diff --git a/doc/site/modules/core/string.markdown b/doc/site/modules/core/string.markdown index eed1410c..3b1aad57 100644 --- a/doc/site/modules/core/string.markdown +++ b/doc/site/modules/core/string.markdown @@ -120,6 +120,15 @@ Returns the index of the first byte matching `search` in the string or `-1` if It is a runtime error if `search` is not a string. +### **indexOf**(search, start) + +Returns the index of the first byte matching `search` in the string or `-1` if +`search` was not found, starting a byte offset `start`. The start can be +negative to count backwards from the end of the string. + +It is a runtime error if `search` is not a string or `start` is not an integer +index within the string's byte length. + ### **iterate**(iterator), **iteratorValue**(iterator) Implements the [iterator protocol](../../control-flow.html#the-iterator-protocol) diff --git a/src/vm/wren_core.c b/src/vm/wren_core.c index 51c1e9ab..4f91d3ca 100644 --- a/src/vm/wren_core.c +++ b/src/vm/wren_core.c @@ -879,14 +879,14 @@ DEF_PRIMITIVE(string_endsWith) ObjString* string = AS_STRING(args[0]); ObjString* search = AS_STRING(args[1]); - // Corner case, if the search string is longer than return false right away. + // Edge case: If the search string is longer then return false right away. if (search->length > string->length) RETURN_FALSE; RETURN_BOOL(memcmp(string->value + string->length - search->length, search->value, search->length) == 0); } -DEF_PRIMITIVE(string_indexOf) +DEF_PRIMITIVE(string_indexOf1) { if (!validateString(vm, args[1], "Argument")) return false; @@ -897,15 +897,16 @@ DEF_PRIMITIVE(string_indexOf) RETURN_NUM(index == UINT32_MAX ? -1 : (int)index); } -DEF_PRIMITIVE(string_indexOf_with_startIndex) +DEF_PRIMITIVE(string_indexOf2) { if (!validateString(vm, args[1], "Argument")) return false; ObjString* string = AS_STRING(args[0]); ObjString* search = AS_STRING(args[1]); - uint32_t startIndex = AS_NUM(args[2]); - - uint32_t index = wrenStringFind(string, search, startIndex); + uint32_t start = validateIndex(vm, args[2], string->length, "Start"); + if (start == UINT32_MAX) return false; + + uint32_t index = wrenStringFind(string, search, start); RETURN_NUM(index == UINT32_MAX ? -1 : (int)index); } @@ -974,7 +975,7 @@ DEF_PRIMITIVE(string_startsWith) ObjString* string = AS_STRING(args[0]); ObjString* search = AS_STRING(args[1]); - // Corner case, if the search string is longer than return false right away. + // Edge case: If the search string is longer then return false right away. if (search->length > string->length) RETURN_FALSE; RETURN_BOOL(memcmp(string->value, search->value, search->length) == 0); @@ -1263,8 +1264,8 @@ void wrenInitializeCore(WrenVM* vm) PRIMITIVE(vm->stringClass, "codePointAt_(_)", string_codePointAt); PRIMITIVE(vm->stringClass, "contains(_)", string_contains); PRIMITIVE(vm->stringClass, "endsWith(_)", string_endsWith); - PRIMITIVE(vm->stringClass, "indexOf(_)", string_indexOf); - PRIMITIVE(vm->stringClass, "indexOf(_,_)", string_indexOf_with_startIndex); + PRIMITIVE(vm->stringClass, "indexOf(_)", string_indexOf1); + PRIMITIVE(vm->stringClass, "indexOf(_,_)", string_indexOf2); PRIMITIVE(vm->stringClass, "iterate(_)", string_iterate); PRIMITIVE(vm->stringClass, "iterateByte_(_)", string_iterateByte); PRIMITIVE(vm->stringClass, "iteratorValue(_)", string_iteratorValue); diff --git a/src/vm/wren_value.c b/src/vm/wren_value.c index 06bfc85b..e715fd8d 100644 --- a/src/vm/wren_value.c +++ b/src/vm/wren_value.c @@ -854,16 +854,16 @@ Value wrenStringCodePointAt(WrenVM* vm, ObjString* string, uint32_t index) } // Uses the Boyer-Moore-Horspool string matching algorithm. -uint32_t wrenStringFind(ObjString* haystack, ObjString* needle, uint32_t startIndex) +uint32_t wrenStringFind(ObjString* haystack, ObjString* needle, uint32_t start) { - // Corner case, an empty needle is always found. - if (needle->length == 0) return 0; + // Edge case: An empty needle is always found. + if (needle->length == 0) return start; - // If the needle is longer than the haystack it won't be found. - if (needle->length > (haystack->length - startIndex)) return UINT32_MAX; + // If the needle goes past the haystack it won't be found. + if (start + needle->length > haystack->length) return UINT32_MAX; // If the startIndex is too far it also won't be found. - if (startIndex >= haystack->length) return UINT32_MAX; + if (start >= haystack->length) return UINT32_MAX; // Pre-calculate the shift table. For each character (8-bit value), we // determine how far the search window can be advanced if that character is @@ -893,18 +893,18 @@ uint32_t wrenStringFind(ObjString* haystack, ObjString* needle, uint32_t startIn // Slide the needle across the haystack, looking for the first match or // stopping if the needle goes off the end. char lastChar = needle->value[needleEnd]; - uint32_t range = (haystack->length - startIndex) - needle->length; + uint32_t range = haystack->length - needle->length; - for (uint32_t index = 0; index <= range; ) + for (uint32_t index = start; index <= range; ) { // Compare the last character in the haystack's window to the last character // in the needle. If it matches, see if the whole needle matches. - char c = haystack->value[startIndex + (index + needleEnd)]; + char c = haystack->value[index + needleEnd]; if (lastChar == c && - memcmp(haystack->value + startIndex + index, needle->value, needleEnd) == 0) + memcmp(haystack->value + index, needle->value, needleEnd) == 0) { // Found a match. - return index + startIndex; + return index; } // Otherwise, slide the needle forward. diff --git a/test/core/string/index_of.wren b/test/core/string/index_of.wren index c6b3e02d..22fb8f79 100644 --- a/test/core/string/index_of.wren +++ b/test/core/string/index_of.wren @@ -5,12 +5,6 @@ System.print("abcd".indexOf("abcd")) // expect: 0 System.print("abcd".indexOf("abcde")) // expect: -1 System.print("abab".indexOf("ab")) // expect: 0 -System.print("abcd".indexOf("cd", 0)) // expect: 2 -System.print("abcd".indexOf("cd", 1)) // expect: 2 -System.print("abcd".indexOf("cd", 2)) // expect: 2 -System.print("abcd".indexOf("cd", 3)) // expect: -1 -System.print("abcd".indexOf("cd", 10)) // expect: -1 - // More complex cases. System.print("abcdefabcdefg".indexOf("defg")) // expect: 9 System.print("abcdabcdabcd".indexOf("dab")) // expect: 3 diff --git a/test/core/string/index_of_start.wren b/test/core/string/index_of_start.wren new file mode 100644 index 00000000..c850e82e --- /dev/null +++ b/test/core/string/index_of_start.wren @@ -0,0 +1,39 @@ +// An empty string is anywhere you look for it. +System.print("abcd".indexOf("", 0)) // expect: 0 +System.print("abcd".indexOf("", 1)) // expect: 1 +System.print("abcd".indexOf("", 2)) // expect: 2 + +// Overlapping results. +System.print("aaaaa".indexOf("aaaa", 0)) // expect: 0 +System.print("aaaaa".indexOf("aaaa", 1)) // expect: 1 +System.print("aaaaa".indexOf("aaaa", 2)) // expect: -1 + +// It's OK if the needle extends past the end. +System.print("abcd".indexOf("abcde", 0)) // expect: -1 +System.print("abcd".indexOf("cde", 3)) // expect: -1 + +System.print("abcd".indexOf("cd", 0)) // expect: 2 +System.print("abcd".indexOf("cd", 1)) // expect: 2 +System.print("abcd".indexOf("cd", 2)) // expect: 2 +System.print("abcd".indexOf("cd", 3)) // expect: -1 + +// Negative start. +System.print("abcd".indexOf("cd", -4)) // expect: 2 +System.print("abcd".indexOf("cd", -3)) // expect: 2 +System.print("abcd".indexOf("cd", -2)) // expect: 2 +System.print("abcd".indexOf("cd", -1)) // expect: -1 + +// Skips past earlier results. +System.print("here as well as here".indexOf("here", 1)) // expect: 16 + +// Non-ASCII. Note that it returns byte indices, not code points. +System.print("søméஃthîng".indexOf("e", 2)) // expect: -1 +System.print("søméஃthîng".indexOf("m", 2)) // expect: 3 +System.print("søméஃthîng".indexOf("thî", 8)) // expect: 9 + +// 8-bit clean. +System.print("a\0b\0c".indexOf("\0", 0)) // expect: 1 +System.print("a\0b\0c".indexOf("a", 0)) // expect: 0 +System.print("a\0b\0c".indexOf("b\0c", 1)) // expect: 2 +System.print("a\0b\0c".indexOf("a\0b\0c\0d", 0)) // expect: -1 +System.print("a\0b\0a\0b".indexOf("a\0b", 0)) // expect: 0 diff --git a/test/core/string/index_of_start_not_int.wren b/test/core/string/index_of_start_not_int.wren new file mode 100644 index 00000000..45897412 --- /dev/null +++ b/test/core/string/index_of_start_not_int.wren @@ -0,0 +1 @@ +"abcd".indexOf("bc", 12.34) // expect runtime error: Start must be an integer. diff --git a/test/core/string/index_of_start_not_num.wren b/test/core/string/index_of_start_not_num.wren new file mode 100644 index 00000000..9b3d1a23 --- /dev/null +++ b/test/core/string/index_of_start_not_num.wren @@ -0,0 +1,2 @@ +"abcd".indexOf("bc", "not num") // expect runtime error: Start must be a number. +System.print("after") \ No newline at end of file diff --git a/test/core/string/index_of_start_too_large.wren b/test/core/string/index_of_start_too_large.wren new file mode 100644 index 00000000..3edb3ac1 --- /dev/null +++ b/test/core/string/index_of_start_too_large.wren @@ -0,0 +1 @@ +"abcd".indexOf("bc", 4) // expect runtime error: Start out of bounds. diff --git a/test/core/string/index_of_start_too_small.wren b/test/core/string/index_of_start_too_small.wren new file mode 100644 index 00000000..ae0cac2d --- /dev/null +++ b/test/core/string/index_of_start_too_small.wren @@ -0,0 +1 @@ +"abcd".indexOf("bc", -5) // expect runtime error: Start out of bounds.