From eb424f5c1acc8351124e1cde3002a7f8b4c5295c Mon Sep 17 00:00:00 2001 From: Bob Nystrom Date: Thu, 22 Jan 2015 20:58:22 -0800 Subject: [PATCH] Make strings iterable over their code points. I'm not sure why, but this also regresses perf: binary_trees - wren .......... 3290 0.30s 96.68% relative to baseline delta_blue - wren .......... 7948 0.13s 99.06% relative to baseline fib - wren .......... 3165 0.32s 95.90% relative to baseline for - wren .......... 8242 0.12s 96.00% relative to baseline method_call - wren .......... 5417 0.18s 78.74% relative to baseline Need to investigate. --- builtin/core.wren | 2 + doc/site/core/list.markdown | 7 +- doc/site/core/sequence.markdown | 36 +++++++++- doc/site/core/string.markdown | 13 ++++ src/wren_core.c | 72 ++++++++++++++----- test/list/iterate.wren | 3 + test/string/iterate.wren | 16 +++++ test/string/iterate_iterator_not_int.wren | 1 + test/string/iterate_iterator_not_num.wren | 1 + test/string/iterator_value.wren | 7 ++ .../iterator_value_iterator_not_int.wren | 1 + .../iterator_value_iterator_not_num.wren | 1 + .../iterator_value_iterator_too_large.wren | 1 + .../iterator_value_iterator_too_small.wren | 1 + 14 files changed, 141 insertions(+), 21 deletions(-) create mode 100644 test/string/iterate.wren create mode 100644 test/string/iterate_iterator_not_int.wren create mode 100644 test/string/iterate_iterator_not_num.wren create mode 100644 test/string/iterator_value.wren create mode 100644 test/string/iterator_value_iterator_not_int.wren create mode 100644 test/string/iterator_value_iterator_not_num.wren create mode 100644 test/string/iterator_value_iterator_too_large.wren create mode 100644 test/string/iterator_value_iterator_too_small.wren diff --git a/builtin/core.wren b/builtin/core.wren index dc8a9888..d1e406d6 100644 --- a/builtin/core.wren +++ b/builtin/core.wren @@ -44,6 +44,8 @@ class Sequence { } +class String is Sequence {} + class List is Sequence { addAll(other) { for (element in other) { diff --git a/doc/site/core/list.markdown b/doc/site/core/list.markdown index 3df5e479..2e7e5863 100644 --- a/doc/site/core/list.markdown +++ b/doc/site/core/list.markdown @@ -1,10 +1,10 @@ ^title List Class ^category core -**TODO** - Extends [Sequence](sequence.html). +An indexable contiguous collection of elements. More details [here](../lists.html). + ### **add**(item) Appends `item` onto the end of the list. @@ -23,7 +23,8 @@ The number of items in the list. ### **iterate**(iterator), **iteratorValue**(iterator) -**TODO** +Implements the [iterator protocol](../control-flow.html#the-iterator-protocol) +for iterating over the elements in the list. ### **removeAt**(index) diff --git a/doc/site/core/sequence.markdown b/doc/site/core/sequence.markdown index 18ebeb0c..4ed8c942 100644 --- a/doc/site/core/sequence.markdown +++ b/doc/site/core/sequence.markdown @@ -1,11 +1,33 @@ ^title Sequence Class ^category core -An abstract base class for any iterable object. It provides a number of methods for working with sequences based on the core [iterator protocol](../control-flow.html#the-iterator-protocol). +An abstract base class for any iterable object. Any class that implements the +core [iterator protocol][] can extend this to get a number of helpful methods. + +[iterator protocol]: ../control-flow.html#the-iterator-protocol ### **all**(predicate) -Tests whether all the elements in the list pass the `predicate`. +Tests whether all the elements in the sequence pass the `predicate`. + +Iterates over the sequence, passing each element to the function `predicate`. +If it returns `false`, stops iterating and returns `false`. Otherwise, returns +`true`. + + :::dart + [1, 2, 3].all {|n| n > 2} // False. + [1, 2, 3].all {|n| n < 4} // True. + +### **map**(transformation) + +Creates a new list by applying `transformation` to each element in the +sequence. + +Iterates over the sequence, passing each element to the function +`transformation`. Generates a new list from the result of each of those calls. + + :::dart + [1, 2, 3].map {|n| n * 2} // [2, 4, 6]. ### **reduce**(function) @@ -16,3 +38,13 @@ It is a runtime error to call this on an empty sequence. ### **reduce**(seed, function) Similar to above, but uses `seed` for the initial value of the accumulator. If the sequence is empty, returns `seed`. + +### **where**(predicate) + +Produces a new list containing only the elements in the sequence that pass the +`predicate`. + +Iterates over the sequence, passing each element to the function `predicate`. +If it returns `true`, adds the element to the result list. + + (1..10).where {|n| n % 2 == 1} // [1, 3, 5, 7, 9]. diff --git a/doc/site/core/string.markdown b/doc/site/core/string.markdown index 88328b5c..9ce97eae 100644 --- a/doc/site/core/string.markdown +++ b/doc/site/core/string.markdown @@ -56,6 +56,19 @@ Returns the index of the first byte matching `search` in the string or `-1` if It is a runtime error if `search` is not a string. +### **iterate**(iterator), **iteratorValue**(iterator) + +Implements the [iterator protocol](../control-flow.html#the-iterator-protocol) +for iterating over the *code points* in the string: + + :::dart + var codePoints = [] + for (c in "(ᵔᴥᵔ)") { + codePoints.add(c) + } + + IO.print(codePoints) // ["(", "ᵔ", "ᴥ", "ᵔ", ")"]. + ### **startsWith**(prefix) Checks if the string starts with `prefix`. diff --git a/src/wren_core.c b/src/wren_core.c index c1ea56e3..d1601b39 100644 --- a/src/wren_core.c +++ b/src/wren_core.c @@ -87,6 +87,8 @@ static const char* libSource = "\n" "}\n" "\n" +"class String is Sequence {}\n" +"\n" "class List is Sequence {\n" " addAll(other) {\n" " for (element in other) {\n" @@ -1027,6 +1029,42 @@ DEF_NATIVE(string_indexOf) RETURN_NUM(firstOccurrence ? firstOccurrence - string->value : -1); } +DEF_NATIVE(string_iterate) +{ + ObjString* string = AS_STRING(args[0]); + + // If we're starting the iteration, return the first index. + if (IS_NULL(args[1])) + { + if (string->length == 0) RETURN_FALSE; + RETURN_NUM(0); + } + + if (!validateInt(vm, args, 1, "Iterator")) return PRIM_ERROR; + + int index = (int)AS_NUM(args[1]); + if (index < 0) RETURN_FALSE; + + // Advance to the beginning of the next UTF-8 sequence. + do + { + index++; + if (index >= string->length) RETURN_FALSE; + } while ((string->value[index] & 0xc0) == 0x80); + + RETURN_NUM(index); +} + +DEF_NATIVE(string_iteratorValue) +{ + ObjString* string = AS_STRING(args[0]); + int index = validateIndex(vm, args, string->length, 1, "Iterator"); + // TODO: Test. + if (index == -1) return PRIM_ERROR; + + RETURN_VAL(wrenStringCodePointAt(vm, string, index)); +} + DEF_NATIVE(string_startsWith) { if (!validateString(vm, args, 1, "Argument")) return PRIM_ERROR; @@ -1261,7 +1299,9 @@ void wrenInitializeCore(WrenVM* vm) NATIVE(vm->numClass, "== ", num_eqeq); NATIVE(vm->numClass, "!= ", num_bangeq); - vm->stringClass = defineClass(vm, "String"); + wrenInterpret(vm, "", libSource); + + vm->stringClass = AS_CLASS(findGlobal(vm, "String")); NATIVE(vm->stringClass, "+ ", string_plus); NATIVE(vm->stringClass, "== ", string_eqeq); NATIVE(vm->stringClass, "!= ", string_bangeq); @@ -1270,24 +1310,11 @@ void wrenInitializeCore(WrenVM* vm) NATIVE(vm->stringClass, "count", string_count); NATIVE(vm->stringClass, "endsWith ", string_endsWith); NATIVE(vm->stringClass, "indexOf ", string_indexOf); + NATIVE(vm->stringClass, "iterate ", string_iterate); + NATIVE(vm->stringClass, "iteratorValue ", string_iteratorValue); NATIVE(vm->stringClass, "startsWith ", string_startsWith); NATIVE(vm->stringClass, "toString", string_toString); - // When the base classes are defined, we allocate string objects for their - // names. However, we haven't created the string class itself yet, so those - // all have NULL class pointers. Now that we have a string class, go back and - // fix them up. - vm->objectClass->name->obj.classObj = vm->stringClass; - vm->classClass->name->obj.classObj = vm->stringClass; - vm->boolClass->name->obj.classObj = vm->stringClass; - vm->fiberClass->name->obj.classObj = vm->stringClass; - vm->fnClass->name->obj.classObj = vm->stringClass; - vm->nullClass->name->obj.classObj = vm->stringClass; - vm->numClass->name->obj.classObj = vm->stringClass; - vm->stringClass->name->obj.classObj = vm->stringClass; - - wrenInterpret(vm, "", libSource); - vm->listClass = AS_CLASS(findGlobal(vm, "List")); NATIVE(vm->listClass->obj.classObj, " instantiate", list_instantiate); NATIVE(vm->listClass, "[ ]", list_subscript); @@ -1309,4 +1336,17 @@ void wrenInitializeCore(WrenVM* vm) NATIVE(vm->rangeClass, "iterate ", range_iterate); NATIVE(vm->rangeClass, "iteratorValue ", range_iteratorValue); NATIVE(vm->rangeClass, "toString", range_toString); + + // While bootstrapping the core types and running the core library, a number + // string objects have been created, many of which were instantiated before + // stringClass was stored in the VM. Some of them *must* be created first: + // the ObjClass for string itself has a reference to the ObjString for its + // name. + // + // These all currently a NULL classObj pointer, so go back and assign them + // now that the string class is known. + for (Obj* obj = vm->first; obj != NULL; obj = obj->next) + { + if (obj->type == OBJ_STRING) obj->classObj = vm->stringClass; + } } diff --git a/test/list/iterate.wren b/test/list/iterate.wren index 8470229d..14bff801 100644 --- a/test/list/iterate.wren +++ b/test/list/iterate.wren @@ -4,6 +4,9 @@ IO.print(a.iterate(0)) // expect: 1 IO.print(a.iterate(1)) // expect: 2 IO.print(a.iterate(2)) // expect: 3 IO.print(a.iterate(3)) // expect: false + +// Out of bounds. +IO.print(a.iterate(123)) // expect: false IO.print(a.iterate(-1)) // expect: false // Nothing to iterate in an empty list. diff --git a/test/string/iterate.wren b/test/string/iterate.wren new file mode 100644 index 00000000..e79861b6 --- /dev/null +++ b/test/string/iterate.wren @@ -0,0 +1,16 @@ +var s = "abçd" +IO.print(s.iterate(null)) // expect: 0 +IO.print(s.iterate(0)) // expect: 1 +IO.print(s.iterate(1)) // expect: 2 +// Skip 3 because that's the middle of the ç sequence. +IO.print(s.iterate(2)) // expect: 4 +// Iterating from the middle of a UTF-8 sequence goes to the next one. +IO.print(s.iterate(3)) // expect: 4 +IO.print(s.iterate(4)) // expect: false + +// Out of bounds. +IO.print(s.iterate(123)) // expect: false +IO.print(s.iterate(-1)) // expect: false + +// Nothing to iterate in an empty string. +IO.print("".iterate(null)) // expect: false diff --git a/test/string/iterate_iterator_not_int.wren b/test/string/iterate_iterator_not_int.wren new file mode 100644 index 00000000..5d6de8e5 --- /dev/null +++ b/test/string/iterate_iterator_not_int.wren @@ -0,0 +1 @@ +"s".iterate(1.5) // expect runtime error: Iterator must be an integer. diff --git a/test/string/iterate_iterator_not_num.wren b/test/string/iterate_iterator_not_num.wren new file mode 100644 index 00000000..62b25b18 --- /dev/null +++ b/test/string/iterate_iterator_not_num.wren @@ -0,0 +1 @@ +"s".iterate("2") // expect runtime error: Iterator must be a number. diff --git a/test/string/iterator_value.wren b/test/string/iterator_value.wren new file mode 100644 index 00000000..343cee4d --- /dev/null +++ b/test/string/iterator_value.wren @@ -0,0 +1,7 @@ +var s = "abçd" +IO.print(s.iteratorValue(0)) // expect: a +IO.print(s.iteratorValue(1)) // expect: b +IO.print(s.iteratorValue(2)) // expect: ç +// Iterator value in middle of UTF sequence is an empty string. +IO.print(s.iteratorValue(3) == "") // expect: true +IO.print(s.iteratorValue(4)) // expect: d diff --git a/test/string/iterator_value_iterator_not_int.wren b/test/string/iterator_value_iterator_not_int.wren new file mode 100644 index 00000000..db057390 --- /dev/null +++ b/test/string/iterator_value_iterator_not_int.wren @@ -0,0 +1 @@ +"s".iteratorValue(1.5) // expect runtime error: Iterator must be an integer. diff --git a/test/string/iterator_value_iterator_not_num.wren b/test/string/iterator_value_iterator_not_num.wren new file mode 100644 index 00000000..c72c1428 --- /dev/null +++ b/test/string/iterator_value_iterator_not_num.wren @@ -0,0 +1 @@ +"s".iteratorValue("2") // expect runtime error: Iterator must be a number. diff --git a/test/string/iterator_value_iterator_too_large.wren b/test/string/iterator_value_iterator_too_large.wren new file mode 100644 index 00000000..173a60ea --- /dev/null +++ b/test/string/iterator_value_iterator_too_large.wren @@ -0,0 +1 @@ +"123".iteratorValue(4) // expect runtime error: Iterator out of bounds. diff --git a/test/string/iterator_value_iterator_too_small.wren b/test/string/iterator_value_iterator_too_small.wren new file mode 100644 index 00000000..0bdc9137 --- /dev/null +++ b/test/string/iterator_value_iterator_too_small.wren @@ -0,0 +1 @@ +"123".iteratorValue(-5) // expect runtime error: Iterator out of bounds.