mirror of
https://github.com/wren-lang/wren.git
synced 2026-01-11 22:28:45 +01:00
Make strings iterable over their code points.
I'm not sure why, but this also regresses perf: binary_trees - wren .......... 3290 0.30s 96.68% relative to baseline delta_blue - wren .......... 7948 0.13s 99.06% relative to baseline fib - wren .......... 3165 0.32s 95.90% relative to baseline for - wren .......... 8242 0.12s 96.00% relative to baseline method_call - wren .......... 5417 0.18s 78.74% relative to baseline Need to investigate.
This commit is contained in:
@ -44,6 +44,8 @@ class Sequence {
|
||||
|
||||
}
|
||||
|
||||
class String is Sequence {}
|
||||
|
||||
class List is Sequence {
|
||||
addAll(other) {
|
||||
for (element in other) {
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
^title List Class
|
||||
^category core
|
||||
|
||||
**TODO**
|
||||
|
||||
Extends [Sequence](sequence.html).
|
||||
|
||||
An indexable contiguous collection of elements. More details [here](../lists.html).
|
||||
|
||||
### **add**(item)
|
||||
|
||||
Appends `item` onto the end of the list.
|
||||
@ -23,7 +23,8 @@ The number of items in the list.
|
||||
|
||||
### **iterate**(iterator), **iteratorValue**(iterator)
|
||||
|
||||
**TODO**
|
||||
Implements the [iterator protocol](../control-flow.html#the-iterator-protocol)
|
||||
for iterating over the elements in the list.
|
||||
|
||||
### **removeAt**(index)
|
||||
|
||||
|
||||
@ -1,11 +1,33 @@
|
||||
^title Sequence Class
|
||||
^category core
|
||||
|
||||
An abstract base class for any iterable object. It provides a number of methods for working with sequences based on the core [iterator protocol](../control-flow.html#the-iterator-protocol).
|
||||
An abstract base class for any iterable object. Any class that implements the
|
||||
core [iterator protocol][] can extend this to get a number of helpful methods.
|
||||
|
||||
[iterator protocol]: ../control-flow.html#the-iterator-protocol
|
||||
|
||||
### **all**(predicate)
|
||||
|
||||
Tests whether all the elements in the list pass the `predicate`.
|
||||
Tests whether all the elements in the sequence pass the `predicate`.
|
||||
|
||||
Iterates over the sequence, passing each element to the function `predicate`.
|
||||
If it returns `false`, stops iterating and returns `false`. Otherwise, returns
|
||||
`true`.
|
||||
|
||||
:::dart
|
||||
[1, 2, 3].all {|n| n > 2} // False.
|
||||
[1, 2, 3].all {|n| n < 4} // True.
|
||||
|
||||
### **map**(transformation)
|
||||
|
||||
Creates a new list by applying `transformation` to each element in the
|
||||
sequence.
|
||||
|
||||
Iterates over the sequence, passing each element to the function
|
||||
`transformation`. Generates a new list from the result of each of those calls.
|
||||
|
||||
:::dart
|
||||
[1, 2, 3].map {|n| n * 2} // [2, 4, 6].
|
||||
|
||||
### **reduce**(function)
|
||||
|
||||
@ -16,3 +38,13 @@ It is a runtime error to call this on an empty sequence.
|
||||
### **reduce**(seed, function)
|
||||
|
||||
Similar to above, but uses `seed` for the initial value of the accumulator. If the sequence is empty, returns `seed`.
|
||||
|
||||
### **where**(predicate)
|
||||
|
||||
Produces a new list containing only the elements in the sequence that pass the
|
||||
`predicate`.
|
||||
|
||||
Iterates over the sequence, passing each element to the function `predicate`.
|
||||
If it returns `true`, adds the element to the result list.
|
||||
|
||||
(1..10).where {|n| n % 2 == 1} // [1, 3, 5, 7, 9].
|
||||
|
||||
@ -56,6 +56,19 @@ Returns the index of the first byte matching `search` in the string or `-1` if
|
||||
|
||||
It is a runtime error if `search` is not a string.
|
||||
|
||||
### **iterate**(iterator), **iteratorValue**(iterator)
|
||||
|
||||
Implements the [iterator protocol](../control-flow.html#the-iterator-protocol)
|
||||
for iterating over the *code points* in the string:
|
||||
|
||||
:::dart
|
||||
var codePoints = []
|
||||
for (c in "(ᵔᴥᵔ)") {
|
||||
codePoints.add(c)
|
||||
}
|
||||
|
||||
IO.print(codePoints) // ["(", "ᵔ", "ᴥ", "ᵔ", ")"].
|
||||
|
||||
### **startsWith**(prefix)
|
||||
|
||||
Checks if the string starts with `prefix`.
|
||||
|
||||
@ -87,6 +87,8 @@ static const char* libSource =
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"class String is Sequence {}\n"
|
||||
"\n"
|
||||
"class List is Sequence {\n"
|
||||
" addAll(other) {\n"
|
||||
" for (element in other) {\n"
|
||||
@ -1027,6 +1029,42 @@ DEF_NATIVE(string_indexOf)
|
||||
RETURN_NUM(firstOccurrence ? firstOccurrence - string->value : -1);
|
||||
}
|
||||
|
||||
DEF_NATIVE(string_iterate)
|
||||
{
|
||||
ObjString* string = AS_STRING(args[0]);
|
||||
|
||||
// If we're starting the iteration, return the first index.
|
||||
if (IS_NULL(args[1]))
|
||||
{
|
||||
if (string->length == 0) RETURN_FALSE;
|
||||
RETURN_NUM(0);
|
||||
}
|
||||
|
||||
if (!validateInt(vm, args, 1, "Iterator")) return PRIM_ERROR;
|
||||
|
||||
int index = (int)AS_NUM(args[1]);
|
||||
if (index < 0) RETURN_FALSE;
|
||||
|
||||
// Advance to the beginning of the next UTF-8 sequence.
|
||||
do
|
||||
{
|
||||
index++;
|
||||
if (index >= string->length) RETURN_FALSE;
|
||||
} while ((string->value[index] & 0xc0) == 0x80);
|
||||
|
||||
RETURN_NUM(index);
|
||||
}
|
||||
|
||||
DEF_NATIVE(string_iteratorValue)
|
||||
{
|
||||
ObjString* string = AS_STRING(args[0]);
|
||||
int index = validateIndex(vm, args, string->length, 1, "Iterator");
|
||||
// TODO: Test.
|
||||
if (index == -1) return PRIM_ERROR;
|
||||
|
||||
RETURN_VAL(wrenStringCodePointAt(vm, string, index));
|
||||
}
|
||||
|
||||
DEF_NATIVE(string_startsWith)
|
||||
{
|
||||
if (!validateString(vm, args, 1, "Argument")) return PRIM_ERROR;
|
||||
@ -1261,7 +1299,9 @@ void wrenInitializeCore(WrenVM* vm)
|
||||
NATIVE(vm->numClass, "== ", num_eqeq);
|
||||
NATIVE(vm->numClass, "!= ", num_bangeq);
|
||||
|
||||
vm->stringClass = defineClass(vm, "String");
|
||||
wrenInterpret(vm, "", libSource);
|
||||
|
||||
vm->stringClass = AS_CLASS(findGlobal(vm, "String"));
|
||||
NATIVE(vm->stringClass, "+ ", string_plus);
|
||||
NATIVE(vm->stringClass, "== ", string_eqeq);
|
||||
NATIVE(vm->stringClass, "!= ", string_bangeq);
|
||||
@ -1270,24 +1310,11 @@ void wrenInitializeCore(WrenVM* vm)
|
||||
NATIVE(vm->stringClass, "count", string_count);
|
||||
NATIVE(vm->stringClass, "endsWith ", string_endsWith);
|
||||
NATIVE(vm->stringClass, "indexOf ", string_indexOf);
|
||||
NATIVE(vm->stringClass, "iterate ", string_iterate);
|
||||
NATIVE(vm->stringClass, "iteratorValue ", string_iteratorValue);
|
||||
NATIVE(vm->stringClass, "startsWith ", string_startsWith);
|
||||
NATIVE(vm->stringClass, "toString", string_toString);
|
||||
|
||||
// When the base classes are defined, we allocate string objects for their
|
||||
// names. However, we haven't created the string class itself yet, so those
|
||||
// all have NULL class pointers. Now that we have a string class, go back and
|
||||
// fix them up.
|
||||
vm->objectClass->name->obj.classObj = vm->stringClass;
|
||||
vm->classClass->name->obj.classObj = vm->stringClass;
|
||||
vm->boolClass->name->obj.classObj = vm->stringClass;
|
||||
vm->fiberClass->name->obj.classObj = vm->stringClass;
|
||||
vm->fnClass->name->obj.classObj = vm->stringClass;
|
||||
vm->nullClass->name->obj.classObj = vm->stringClass;
|
||||
vm->numClass->name->obj.classObj = vm->stringClass;
|
||||
vm->stringClass->name->obj.classObj = vm->stringClass;
|
||||
|
||||
wrenInterpret(vm, "", libSource);
|
||||
|
||||
vm->listClass = AS_CLASS(findGlobal(vm, "List"));
|
||||
NATIVE(vm->listClass->obj.classObj, " instantiate", list_instantiate);
|
||||
NATIVE(vm->listClass, "[ ]", list_subscript);
|
||||
@ -1309,4 +1336,17 @@ void wrenInitializeCore(WrenVM* vm)
|
||||
NATIVE(vm->rangeClass, "iterate ", range_iterate);
|
||||
NATIVE(vm->rangeClass, "iteratorValue ", range_iteratorValue);
|
||||
NATIVE(vm->rangeClass, "toString", range_toString);
|
||||
|
||||
// While bootstrapping the core types and running the core library, a number
|
||||
// string objects have been created, many of which were instantiated before
|
||||
// stringClass was stored in the VM. Some of them *must* be created first:
|
||||
// the ObjClass for string itself has a reference to the ObjString for its
|
||||
// name.
|
||||
//
|
||||
// These all currently a NULL classObj pointer, so go back and assign them
|
||||
// now that the string class is known.
|
||||
for (Obj* obj = vm->first; obj != NULL; obj = obj->next)
|
||||
{
|
||||
if (obj->type == OBJ_STRING) obj->classObj = vm->stringClass;
|
||||
}
|
||||
}
|
||||
|
||||
@ -4,6 +4,9 @@ IO.print(a.iterate(0)) // expect: 1
|
||||
IO.print(a.iterate(1)) // expect: 2
|
||||
IO.print(a.iterate(2)) // expect: 3
|
||||
IO.print(a.iterate(3)) // expect: false
|
||||
|
||||
// Out of bounds.
|
||||
IO.print(a.iterate(123)) // expect: false
|
||||
IO.print(a.iterate(-1)) // expect: false
|
||||
|
||||
// Nothing to iterate in an empty list.
|
||||
|
||||
16
test/string/iterate.wren
Normal file
16
test/string/iterate.wren
Normal file
@ -0,0 +1,16 @@
|
||||
var s = "abçd"
|
||||
IO.print(s.iterate(null)) // expect: 0
|
||||
IO.print(s.iterate(0)) // expect: 1
|
||||
IO.print(s.iterate(1)) // expect: 2
|
||||
// Skip 3 because that's the middle of the ç sequence.
|
||||
IO.print(s.iterate(2)) // expect: 4
|
||||
// Iterating from the middle of a UTF-8 sequence goes to the next one.
|
||||
IO.print(s.iterate(3)) // expect: 4
|
||||
IO.print(s.iterate(4)) // expect: false
|
||||
|
||||
// Out of bounds.
|
||||
IO.print(s.iterate(123)) // expect: false
|
||||
IO.print(s.iterate(-1)) // expect: false
|
||||
|
||||
// Nothing to iterate in an empty string.
|
||||
IO.print("".iterate(null)) // expect: false
|
||||
1
test/string/iterate_iterator_not_int.wren
Normal file
1
test/string/iterate_iterator_not_int.wren
Normal file
@ -0,0 +1 @@
|
||||
"s".iterate(1.5) // expect runtime error: Iterator must be an integer.
|
||||
1
test/string/iterate_iterator_not_num.wren
Normal file
1
test/string/iterate_iterator_not_num.wren
Normal file
@ -0,0 +1 @@
|
||||
"s".iterate("2") // expect runtime error: Iterator must be a number.
|
||||
7
test/string/iterator_value.wren
Normal file
7
test/string/iterator_value.wren
Normal file
@ -0,0 +1,7 @@
|
||||
var s = "abçd"
|
||||
IO.print(s.iteratorValue(0)) // expect: a
|
||||
IO.print(s.iteratorValue(1)) // expect: b
|
||||
IO.print(s.iteratorValue(2)) // expect: ç
|
||||
// Iterator value in middle of UTF sequence is an empty string.
|
||||
IO.print(s.iteratorValue(3) == "") // expect: true
|
||||
IO.print(s.iteratorValue(4)) // expect: d
|
||||
1
test/string/iterator_value_iterator_not_int.wren
Normal file
1
test/string/iterator_value_iterator_not_int.wren
Normal file
@ -0,0 +1 @@
|
||||
"s".iteratorValue(1.5) // expect runtime error: Iterator must be an integer.
|
||||
1
test/string/iterator_value_iterator_not_num.wren
Normal file
1
test/string/iterator_value_iterator_not_num.wren
Normal file
@ -0,0 +1 @@
|
||||
"s".iteratorValue("2") // expect runtime error: Iterator must be a number.
|
||||
1
test/string/iterator_value_iterator_too_large.wren
Normal file
1
test/string/iterator_value_iterator_too_large.wren
Normal file
@ -0,0 +1 @@
|
||||
"123".iteratorValue(4) // expect runtime error: Iterator out of bounds.
|
||||
1
test/string/iterator_value_iterator_too_small.wren
Normal file
1
test/string/iterator_value_iterator_too_small.wren
Normal file
@ -0,0 +1 @@
|
||||
"123".iteratorValue(-5) // expect runtime error: Iterator out of bounds.
|
||||
Reference in New Issue
Block a user