Make strings iterable over their code points.

I'm not sure why, but this also regresses perf:

binary_trees - wren            ..........  3290  0.30s   96.68% relative to baseline
delta_blue - wren              ..........  7948  0.13s   99.06% relative to baseline
fib - wren                     ..........  3165  0.32s   95.90% relative to baseline
for - wren                     ..........  8242  0.12s   96.00% relative to baseline
method_call - wren             ..........  5417  0.18s   78.74% relative to baseline

Need to investigate.
This commit is contained in:
Bob Nystrom
2015-01-22 20:58:22 -08:00
parent a5b00cebe7
commit eb424f5c1a
14 changed files with 141 additions and 21 deletions

View File

@ -44,6 +44,8 @@ class Sequence {
}
class String is Sequence {}
class List is Sequence {
addAll(other) {
for (element in other) {

View File

@ -1,10 +1,10 @@
^title List Class
^category core
**TODO**
Extends [Sequence](sequence.html).
An indexable contiguous collection of elements. More details [here](../lists.html).
### **add**(item)
Appends `item` onto the end of the list.
@ -23,7 +23,8 @@ The number of items in the list.
### **iterate**(iterator), **iteratorValue**(iterator)
**TODO**
Implements the [iterator protocol](../control-flow.html#the-iterator-protocol)
for iterating over the elements in the list.
### **removeAt**(index)

View File

@ -1,11 +1,33 @@
^title Sequence Class
^category core
An abstract base class for any iterable object. It provides a number of methods for working with sequences based on the core [iterator protocol](../control-flow.html#the-iterator-protocol).
An abstract base class for any iterable object. Any class that implements the
core [iterator protocol][] can extend this to get a number of helpful methods.
[iterator protocol]: ../control-flow.html#the-iterator-protocol
### **all**(predicate)
Tests whether all the elements in the list pass the `predicate`.
Tests whether all the elements in the sequence pass the `predicate`.
Iterates over the sequence, passing each element to the function `predicate`.
If it returns `false`, stops iterating and returns `false`. Otherwise, returns
`true`.
:::dart
[1, 2, 3].all {|n| n > 2} // False.
[1, 2, 3].all {|n| n < 4} // True.
### **map**(transformation)
Creates a new list by applying `transformation` to each element in the
sequence.
Iterates over the sequence, passing each element to the function
`transformation`. Generates a new list from the result of each of those calls.
:::dart
[1, 2, 3].map {|n| n * 2} // [2, 4, 6].
### **reduce**(function)
@ -16,3 +38,13 @@ It is a runtime error to call this on an empty sequence.
### **reduce**(seed, function)
Similar to above, but uses `seed` for the initial value of the accumulator. If the sequence is empty, returns `seed`.
### **where**(predicate)
Produces a new list containing only the elements in the sequence that pass the
`predicate`.
Iterates over the sequence, passing each element to the function `predicate`.
If it returns `true`, adds the element to the result list.
(1..10).where {|n| n % 2 == 1} // [1, 3, 5, 7, 9].

View File

@ -56,6 +56,19 @@ Returns the index of the first byte matching `search` in the string or `-1` if
It is a runtime error if `search` is not a string.
### **iterate**(iterator), **iteratorValue**(iterator)
Implements the [iterator protocol](../control-flow.html#the-iterator-protocol)
for iterating over the *code points* in the string:
:::dart
var codePoints = []
for (c in "(ᵔᴥᵔ)") {
codePoints.add(c)
}
IO.print(codePoints) // ["(", "ᵔ", "ᴥ", "ᵔ", ")"].
### **startsWith**(prefix)
Checks if the string starts with `prefix`.

View File

@ -87,6 +87,8 @@ static const char* libSource =
"\n"
"}\n"
"\n"
"class String is Sequence {}\n"
"\n"
"class List is Sequence {\n"
" addAll(other) {\n"
" for (element in other) {\n"
@ -1027,6 +1029,42 @@ DEF_NATIVE(string_indexOf)
RETURN_NUM(firstOccurrence ? firstOccurrence - string->value : -1);
}
DEF_NATIVE(string_iterate)
{
ObjString* string = AS_STRING(args[0]);
// If we're starting the iteration, return the first index.
if (IS_NULL(args[1]))
{
if (string->length == 0) RETURN_FALSE;
RETURN_NUM(0);
}
if (!validateInt(vm, args, 1, "Iterator")) return PRIM_ERROR;
int index = (int)AS_NUM(args[1]);
if (index < 0) RETURN_FALSE;
// Advance to the beginning of the next UTF-8 sequence.
do
{
index++;
if (index >= string->length) RETURN_FALSE;
} while ((string->value[index] & 0xc0) == 0x80);
RETURN_NUM(index);
}
DEF_NATIVE(string_iteratorValue)
{
ObjString* string = AS_STRING(args[0]);
int index = validateIndex(vm, args, string->length, 1, "Iterator");
// TODO: Test.
if (index == -1) return PRIM_ERROR;
RETURN_VAL(wrenStringCodePointAt(vm, string, index));
}
DEF_NATIVE(string_startsWith)
{
if (!validateString(vm, args, 1, "Argument")) return PRIM_ERROR;
@ -1261,7 +1299,9 @@ void wrenInitializeCore(WrenVM* vm)
NATIVE(vm->numClass, "== ", num_eqeq);
NATIVE(vm->numClass, "!= ", num_bangeq);
vm->stringClass = defineClass(vm, "String");
wrenInterpret(vm, "", libSource);
vm->stringClass = AS_CLASS(findGlobal(vm, "String"));
NATIVE(vm->stringClass, "+ ", string_plus);
NATIVE(vm->stringClass, "== ", string_eqeq);
NATIVE(vm->stringClass, "!= ", string_bangeq);
@ -1270,24 +1310,11 @@ void wrenInitializeCore(WrenVM* vm)
NATIVE(vm->stringClass, "count", string_count);
NATIVE(vm->stringClass, "endsWith ", string_endsWith);
NATIVE(vm->stringClass, "indexOf ", string_indexOf);
NATIVE(vm->stringClass, "iterate ", string_iterate);
NATIVE(vm->stringClass, "iteratorValue ", string_iteratorValue);
NATIVE(vm->stringClass, "startsWith ", string_startsWith);
NATIVE(vm->stringClass, "toString", string_toString);
// When the base classes are defined, we allocate string objects for their
// names. However, we haven't created the string class itself yet, so those
// all have NULL class pointers. Now that we have a string class, go back and
// fix them up.
vm->objectClass->name->obj.classObj = vm->stringClass;
vm->classClass->name->obj.classObj = vm->stringClass;
vm->boolClass->name->obj.classObj = vm->stringClass;
vm->fiberClass->name->obj.classObj = vm->stringClass;
vm->fnClass->name->obj.classObj = vm->stringClass;
vm->nullClass->name->obj.classObj = vm->stringClass;
vm->numClass->name->obj.classObj = vm->stringClass;
vm->stringClass->name->obj.classObj = vm->stringClass;
wrenInterpret(vm, "", libSource);
vm->listClass = AS_CLASS(findGlobal(vm, "List"));
NATIVE(vm->listClass->obj.classObj, " instantiate", list_instantiate);
NATIVE(vm->listClass, "[ ]", list_subscript);
@ -1309,4 +1336,17 @@ void wrenInitializeCore(WrenVM* vm)
NATIVE(vm->rangeClass, "iterate ", range_iterate);
NATIVE(vm->rangeClass, "iteratorValue ", range_iteratorValue);
NATIVE(vm->rangeClass, "toString", range_toString);
// While bootstrapping the core types and running the core library, a number
// string objects have been created, many of which were instantiated before
// stringClass was stored in the VM. Some of them *must* be created first:
// the ObjClass for string itself has a reference to the ObjString for its
// name.
//
// These all currently a NULL classObj pointer, so go back and assign them
// now that the string class is known.
for (Obj* obj = vm->first; obj != NULL; obj = obj->next)
{
if (obj->type == OBJ_STRING) obj->classObj = vm->stringClass;
}
}

View File

@ -4,6 +4,9 @@ IO.print(a.iterate(0)) // expect: 1
IO.print(a.iterate(1)) // expect: 2
IO.print(a.iterate(2)) // expect: 3
IO.print(a.iterate(3)) // expect: false
// Out of bounds.
IO.print(a.iterate(123)) // expect: false
IO.print(a.iterate(-1)) // expect: false
// Nothing to iterate in an empty list.

16
test/string/iterate.wren Normal file
View File

@ -0,0 +1,16 @@
var s = "abçd"
IO.print(s.iterate(null)) // expect: 0
IO.print(s.iterate(0)) // expect: 1
IO.print(s.iterate(1)) // expect: 2
// Skip 3 because that's the middle of the ç sequence.
IO.print(s.iterate(2)) // expect: 4
// Iterating from the middle of a UTF-8 sequence goes to the next one.
IO.print(s.iterate(3)) // expect: 4
IO.print(s.iterate(4)) // expect: false
// Out of bounds.
IO.print(s.iterate(123)) // expect: false
IO.print(s.iterate(-1)) // expect: false
// Nothing to iterate in an empty string.
IO.print("".iterate(null)) // expect: false

View File

@ -0,0 +1 @@
"s".iterate(1.5) // expect runtime error: Iterator must be an integer.

View File

@ -0,0 +1 @@
"s".iterate("2") // expect runtime error: Iterator must be a number.

View File

@ -0,0 +1,7 @@
var s = "abçd"
IO.print(s.iteratorValue(0)) // expect: a
IO.print(s.iteratorValue(1)) // expect: b
IO.print(s.iteratorValue(2)) // expect: ç
// Iterator value in middle of UTF sequence is an empty string.
IO.print(s.iteratorValue(3) == "") // expect: true
IO.print(s.iteratorValue(4)) // expect: d

View File

@ -0,0 +1 @@
"s".iteratorValue(1.5) // expect runtime error: Iterator must be an integer.

View File

@ -0,0 +1 @@
"s".iteratorValue("2") // expect runtime error: Iterator must be a number.

View File

@ -0,0 +1 @@
"123".iteratorValue(4) // expect runtime error: Iterator out of bounds.

View File

@ -0,0 +1 @@
"123".iteratorValue(-5) // expect runtime error: Iterator out of bounds.