forked from Mirror/wren
Tweak String.indexOf(_,_) a bit.
- Simplify the arithmetic a little in wrenStringFind(). - Allow the start to be negative. - Even more tests. - Docs.
This commit is contained in:
@ -120,6 +120,15 @@ Returns the index of the first byte matching `search` in the string or `-1` if
|
||||
|
||||
It is a runtime error if `search` is not a string.
|
||||
|
||||
### **indexOf**(search, start)
|
||||
|
||||
Returns the index of the first byte matching `search` in the string or `-1` if
|
||||
`search` was not found, starting a byte offset `start`. The start can be
|
||||
negative to count backwards from the end of the string.
|
||||
|
||||
It is a runtime error if `search` is not a string or `start` is not an integer
|
||||
index within the string's byte length.
|
||||
|
||||
### **iterate**(iterator), **iteratorValue**(iterator)
|
||||
|
||||
Implements the [iterator protocol](../../control-flow.html#the-iterator-protocol)
|
||||
|
||||
@ -879,14 +879,14 @@ DEF_PRIMITIVE(string_endsWith)
|
||||
ObjString* string = AS_STRING(args[0]);
|
||||
ObjString* search = AS_STRING(args[1]);
|
||||
|
||||
// Corner case, if the search string is longer than return false right away.
|
||||
// Edge case: If the search string is longer then return false right away.
|
||||
if (search->length > string->length) RETURN_FALSE;
|
||||
|
||||
RETURN_BOOL(memcmp(string->value + string->length - search->length,
|
||||
search->value, search->length) == 0);
|
||||
}
|
||||
|
||||
DEF_PRIMITIVE(string_indexOf)
|
||||
DEF_PRIMITIVE(string_indexOf1)
|
||||
{
|
||||
if (!validateString(vm, args[1], "Argument")) return false;
|
||||
|
||||
@ -897,15 +897,16 @@ DEF_PRIMITIVE(string_indexOf)
|
||||
RETURN_NUM(index == UINT32_MAX ? -1 : (int)index);
|
||||
}
|
||||
|
||||
DEF_PRIMITIVE(string_indexOf_with_startIndex)
|
||||
DEF_PRIMITIVE(string_indexOf2)
|
||||
{
|
||||
if (!validateString(vm, args[1], "Argument")) return false;
|
||||
|
||||
ObjString* string = AS_STRING(args[0]);
|
||||
ObjString* search = AS_STRING(args[1]);
|
||||
uint32_t startIndex = AS_NUM(args[2]);
|
||||
uint32_t start = validateIndex(vm, args[2], string->length, "Start");
|
||||
if (start == UINT32_MAX) return false;
|
||||
|
||||
uint32_t index = wrenStringFind(string, search, startIndex);
|
||||
uint32_t index = wrenStringFind(string, search, start);
|
||||
RETURN_NUM(index == UINT32_MAX ? -1 : (int)index);
|
||||
}
|
||||
|
||||
@ -974,7 +975,7 @@ DEF_PRIMITIVE(string_startsWith)
|
||||
ObjString* string = AS_STRING(args[0]);
|
||||
ObjString* search = AS_STRING(args[1]);
|
||||
|
||||
// Corner case, if the search string is longer than return false right away.
|
||||
// Edge case: If the search string is longer then return false right away.
|
||||
if (search->length > string->length) RETURN_FALSE;
|
||||
|
||||
RETURN_BOOL(memcmp(string->value, search->value, search->length) == 0);
|
||||
@ -1263,8 +1264,8 @@ void wrenInitializeCore(WrenVM* vm)
|
||||
PRIMITIVE(vm->stringClass, "codePointAt_(_)", string_codePointAt);
|
||||
PRIMITIVE(vm->stringClass, "contains(_)", string_contains);
|
||||
PRIMITIVE(vm->stringClass, "endsWith(_)", string_endsWith);
|
||||
PRIMITIVE(vm->stringClass, "indexOf(_)", string_indexOf);
|
||||
PRIMITIVE(vm->stringClass, "indexOf(_,_)", string_indexOf_with_startIndex);
|
||||
PRIMITIVE(vm->stringClass, "indexOf(_)", string_indexOf1);
|
||||
PRIMITIVE(vm->stringClass, "indexOf(_,_)", string_indexOf2);
|
||||
PRIMITIVE(vm->stringClass, "iterate(_)", string_iterate);
|
||||
PRIMITIVE(vm->stringClass, "iterateByte_(_)", string_iterateByte);
|
||||
PRIMITIVE(vm->stringClass, "iteratorValue(_)", string_iteratorValue);
|
||||
|
||||
@ -854,16 +854,16 @@ Value wrenStringCodePointAt(WrenVM* vm, ObjString* string, uint32_t index)
|
||||
}
|
||||
|
||||
// Uses the Boyer-Moore-Horspool string matching algorithm.
|
||||
uint32_t wrenStringFind(ObjString* haystack, ObjString* needle, uint32_t startIndex)
|
||||
uint32_t wrenStringFind(ObjString* haystack, ObjString* needle, uint32_t start)
|
||||
{
|
||||
// Corner case, an empty needle is always found.
|
||||
if (needle->length == 0) return 0;
|
||||
// Edge case: An empty needle is always found.
|
||||
if (needle->length == 0) return start;
|
||||
|
||||
// If the needle is longer than the haystack it won't be found.
|
||||
if (needle->length > (haystack->length - startIndex)) return UINT32_MAX;
|
||||
// If the needle goes past the haystack it won't be found.
|
||||
if (start + needle->length > haystack->length) return UINT32_MAX;
|
||||
|
||||
// If the startIndex is too far it also won't be found.
|
||||
if (startIndex >= haystack->length) return UINT32_MAX;
|
||||
if (start >= haystack->length) return UINT32_MAX;
|
||||
|
||||
// Pre-calculate the shift table. For each character (8-bit value), we
|
||||
// determine how far the search window can be advanced if that character is
|
||||
@ -893,18 +893,18 @@ uint32_t wrenStringFind(ObjString* haystack, ObjString* needle, uint32_t startIn
|
||||
// Slide the needle across the haystack, looking for the first match or
|
||||
// stopping if the needle goes off the end.
|
||||
char lastChar = needle->value[needleEnd];
|
||||
uint32_t range = (haystack->length - startIndex) - needle->length;
|
||||
uint32_t range = haystack->length - needle->length;
|
||||
|
||||
for (uint32_t index = 0; index <= range; )
|
||||
for (uint32_t index = start; index <= range; )
|
||||
{
|
||||
// Compare the last character in the haystack's window to the last character
|
||||
// in the needle. If it matches, see if the whole needle matches.
|
||||
char c = haystack->value[startIndex + (index + needleEnd)];
|
||||
char c = haystack->value[index + needleEnd];
|
||||
if (lastChar == c &&
|
||||
memcmp(haystack->value + startIndex + index, needle->value, needleEnd) == 0)
|
||||
memcmp(haystack->value + index, needle->value, needleEnd) == 0)
|
||||
{
|
||||
// Found a match.
|
||||
return index + startIndex;
|
||||
return index;
|
||||
}
|
||||
|
||||
// Otherwise, slide the needle forward.
|
||||
|
||||
@ -5,12 +5,6 @@ System.print("abcd".indexOf("abcd")) // expect: 0
|
||||
System.print("abcd".indexOf("abcde")) // expect: -1
|
||||
System.print("abab".indexOf("ab")) // expect: 0
|
||||
|
||||
System.print("abcd".indexOf("cd", 0)) // expect: 2
|
||||
System.print("abcd".indexOf("cd", 1)) // expect: 2
|
||||
System.print("abcd".indexOf("cd", 2)) // expect: 2
|
||||
System.print("abcd".indexOf("cd", 3)) // expect: -1
|
||||
System.print("abcd".indexOf("cd", 10)) // expect: -1
|
||||
|
||||
// More complex cases.
|
||||
System.print("abcdefabcdefg".indexOf("defg")) // expect: 9
|
||||
System.print("abcdabcdabcd".indexOf("dab")) // expect: 3
|
||||
|
||||
39
test/core/string/index_of_start.wren
Normal file
39
test/core/string/index_of_start.wren
Normal file
@ -0,0 +1,39 @@
|
||||
// An empty string is anywhere you look for it.
|
||||
System.print("abcd".indexOf("", 0)) // expect: 0
|
||||
System.print("abcd".indexOf("", 1)) // expect: 1
|
||||
System.print("abcd".indexOf("", 2)) // expect: 2
|
||||
|
||||
// Overlapping results.
|
||||
System.print("aaaaa".indexOf("aaaa", 0)) // expect: 0
|
||||
System.print("aaaaa".indexOf("aaaa", 1)) // expect: 1
|
||||
System.print("aaaaa".indexOf("aaaa", 2)) // expect: -1
|
||||
|
||||
// It's OK if the needle extends past the end.
|
||||
System.print("abcd".indexOf("abcde", 0)) // expect: -1
|
||||
System.print("abcd".indexOf("cde", 3)) // expect: -1
|
||||
|
||||
System.print("abcd".indexOf("cd", 0)) // expect: 2
|
||||
System.print("abcd".indexOf("cd", 1)) // expect: 2
|
||||
System.print("abcd".indexOf("cd", 2)) // expect: 2
|
||||
System.print("abcd".indexOf("cd", 3)) // expect: -1
|
||||
|
||||
// Negative start.
|
||||
System.print("abcd".indexOf("cd", -4)) // expect: 2
|
||||
System.print("abcd".indexOf("cd", -3)) // expect: 2
|
||||
System.print("abcd".indexOf("cd", -2)) // expect: 2
|
||||
System.print("abcd".indexOf("cd", -1)) // expect: -1
|
||||
|
||||
// Skips past earlier results.
|
||||
System.print("here as well as here".indexOf("here", 1)) // expect: 16
|
||||
|
||||
// Non-ASCII. Note that it returns byte indices, not code points.
|
||||
System.print("søméஃthîng".indexOf("e", 2)) // expect: -1
|
||||
System.print("søméஃthîng".indexOf("m", 2)) // expect: 3
|
||||
System.print("søméஃthîng".indexOf("thî", 8)) // expect: 9
|
||||
|
||||
// 8-bit clean.
|
||||
System.print("a\0b\0c".indexOf("\0", 0)) // expect: 1
|
||||
System.print("a\0b\0c".indexOf("a", 0)) // expect: 0
|
||||
System.print("a\0b\0c".indexOf("b\0c", 1)) // expect: 2
|
||||
System.print("a\0b\0c".indexOf("a\0b\0c\0d", 0)) // expect: -1
|
||||
System.print("a\0b\0a\0b".indexOf("a\0b", 0)) // expect: 0
|
||||
1
test/core/string/index_of_start_not_int.wren
Normal file
1
test/core/string/index_of_start_not_int.wren
Normal file
@ -0,0 +1 @@
|
||||
"abcd".indexOf("bc", 12.34) // expect runtime error: Start must be an integer.
|
||||
2
test/core/string/index_of_start_not_num.wren
Normal file
2
test/core/string/index_of_start_not_num.wren
Normal file
@ -0,0 +1,2 @@
|
||||
"abcd".indexOf("bc", "not num") // expect runtime error: Start must be a number.
|
||||
System.print("after")
|
||||
1
test/core/string/index_of_start_too_large.wren
Normal file
1
test/core/string/index_of_start_too_large.wren
Normal file
@ -0,0 +1 @@
|
||||
"abcd".indexOf("bc", 4) // expect runtime error: Start out of bounds.
|
||||
1
test/core/string/index_of_start_too_small.wren
Normal file
1
test/core/string/index_of_start_too_small.wren
Normal file
@ -0,0 +1 @@
|
||||
"abcd".indexOf("bc", -5) // expect runtime error: Start out of bounds.
|
||||
Reference in New Issue
Block a user