1
0
forked from Mirror/wren

Tweak String.indexOf(_,_) a bit.

- Simplify the arithmetic a little in wrenStringFind().
- Allow the start to be negative.
- Even more tests.
- Docs.
This commit is contained in:
Bob Nystrom
2016-08-03 22:19:34 -07:00
parent e5dce527ac
commit 6845328661
9 changed files with 74 additions and 26 deletions

View File

@ -120,6 +120,15 @@ Returns the index of the first byte matching `search` in the string or `-1` if
It is a runtime error if `search` is not a string.
### **indexOf**(search, start)
Returns the index of the first byte matching `search` in the string or `-1` if
`search` was not found, starting a byte offset `start`. The start can be
negative to count backwards from the end of the string.
It is a runtime error if `search` is not a string or `start` is not an integer
index within the string's byte length.
### **iterate**(iterator), **iteratorValue**(iterator)
Implements the [iterator protocol](../../control-flow.html#the-iterator-protocol)

View File

@ -879,14 +879,14 @@ DEF_PRIMITIVE(string_endsWith)
ObjString* string = AS_STRING(args[0]);
ObjString* search = AS_STRING(args[1]);
// Corner case, if the search string is longer than return false right away.
// Edge case: If the search string is longer then return false right away.
if (search->length > string->length) RETURN_FALSE;
RETURN_BOOL(memcmp(string->value + string->length - search->length,
search->value, search->length) == 0);
}
DEF_PRIMITIVE(string_indexOf)
DEF_PRIMITIVE(string_indexOf1)
{
if (!validateString(vm, args[1], "Argument")) return false;
@ -897,15 +897,16 @@ DEF_PRIMITIVE(string_indexOf)
RETURN_NUM(index == UINT32_MAX ? -1 : (int)index);
}
DEF_PRIMITIVE(string_indexOf_with_startIndex)
DEF_PRIMITIVE(string_indexOf2)
{
if (!validateString(vm, args[1], "Argument")) return false;
ObjString* string = AS_STRING(args[0]);
ObjString* search = AS_STRING(args[1]);
uint32_t startIndex = AS_NUM(args[2]);
uint32_t start = validateIndex(vm, args[2], string->length, "Start");
if (start == UINT32_MAX) return false;
uint32_t index = wrenStringFind(string, search, startIndex);
uint32_t index = wrenStringFind(string, search, start);
RETURN_NUM(index == UINT32_MAX ? -1 : (int)index);
}
@ -974,7 +975,7 @@ DEF_PRIMITIVE(string_startsWith)
ObjString* string = AS_STRING(args[0]);
ObjString* search = AS_STRING(args[1]);
// Corner case, if the search string is longer than return false right away.
// Edge case: If the search string is longer then return false right away.
if (search->length > string->length) RETURN_FALSE;
RETURN_BOOL(memcmp(string->value, search->value, search->length) == 0);
@ -1263,8 +1264,8 @@ void wrenInitializeCore(WrenVM* vm)
PRIMITIVE(vm->stringClass, "codePointAt_(_)", string_codePointAt);
PRIMITIVE(vm->stringClass, "contains(_)", string_contains);
PRIMITIVE(vm->stringClass, "endsWith(_)", string_endsWith);
PRIMITIVE(vm->stringClass, "indexOf(_)", string_indexOf);
PRIMITIVE(vm->stringClass, "indexOf(_,_)", string_indexOf_with_startIndex);
PRIMITIVE(vm->stringClass, "indexOf(_)", string_indexOf1);
PRIMITIVE(vm->stringClass, "indexOf(_,_)", string_indexOf2);
PRIMITIVE(vm->stringClass, "iterate(_)", string_iterate);
PRIMITIVE(vm->stringClass, "iterateByte_(_)", string_iterateByte);
PRIMITIVE(vm->stringClass, "iteratorValue(_)", string_iteratorValue);

View File

@ -854,16 +854,16 @@ Value wrenStringCodePointAt(WrenVM* vm, ObjString* string, uint32_t index)
}
// Uses the Boyer-Moore-Horspool string matching algorithm.
uint32_t wrenStringFind(ObjString* haystack, ObjString* needle, uint32_t startIndex)
uint32_t wrenStringFind(ObjString* haystack, ObjString* needle, uint32_t start)
{
// Corner case, an empty needle is always found.
if (needle->length == 0) return 0;
// Edge case: An empty needle is always found.
if (needle->length == 0) return start;
// If the needle is longer than the haystack it won't be found.
if (needle->length > (haystack->length - startIndex)) return UINT32_MAX;
// If the needle goes past the haystack it won't be found.
if (start + needle->length > haystack->length) return UINT32_MAX;
// If the startIndex is too far it also won't be found.
if (startIndex >= haystack->length) return UINT32_MAX;
if (start >= haystack->length) return UINT32_MAX;
// Pre-calculate the shift table. For each character (8-bit value), we
// determine how far the search window can be advanced if that character is
@ -893,18 +893,18 @@ uint32_t wrenStringFind(ObjString* haystack, ObjString* needle, uint32_t startIn
// Slide the needle across the haystack, looking for the first match or
// stopping if the needle goes off the end.
char lastChar = needle->value[needleEnd];
uint32_t range = (haystack->length - startIndex) - needle->length;
uint32_t range = haystack->length - needle->length;
for (uint32_t index = 0; index <= range; )
for (uint32_t index = start; index <= range; )
{
// Compare the last character in the haystack's window to the last character
// in the needle. If it matches, see if the whole needle matches.
char c = haystack->value[startIndex + (index + needleEnd)];
char c = haystack->value[index + needleEnd];
if (lastChar == c &&
memcmp(haystack->value + startIndex + index, needle->value, needleEnd) == 0)
memcmp(haystack->value + index, needle->value, needleEnd) == 0)
{
// Found a match.
return index + startIndex;
return index;
}
// Otherwise, slide the needle forward.

View File

@ -5,12 +5,6 @@ System.print("abcd".indexOf("abcd")) // expect: 0
System.print("abcd".indexOf("abcde")) // expect: -1
System.print("abab".indexOf("ab")) // expect: 0
System.print("abcd".indexOf("cd", 0)) // expect: 2
System.print("abcd".indexOf("cd", 1)) // expect: 2
System.print("abcd".indexOf("cd", 2)) // expect: 2
System.print("abcd".indexOf("cd", 3)) // expect: -1
System.print("abcd".indexOf("cd", 10)) // expect: -1
// More complex cases.
System.print("abcdefabcdefg".indexOf("defg")) // expect: 9
System.print("abcdabcdabcd".indexOf("dab")) // expect: 3

View File

@ -0,0 +1,39 @@
// An empty string is anywhere you look for it.
System.print("abcd".indexOf("", 0)) // expect: 0
System.print("abcd".indexOf("", 1)) // expect: 1
System.print("abcd".indexOf("", 2)) // expect: 2
// Overlapping results.
System.print("aaaaa".indexOf("aaaa", 0)) // expect: 0
System.print("aaaaa".indexOf("aaaa", 1)) // expect: 1
System.print("aaaaa".indexOf("aaaa", 2)) // expect: -1
// It's OK if the needle extends past the end.
System.print("abcd".indexOf("abcde", 0)) // expect: -1
System.print("abcd".indexOf("cde", 3)) // expect: -1
System.print("abcd".indexOf("cd", 0)) // expect: 2
System.print("abcd".indexOf("cd", 1)) // expect: 2
System.print("abcd".indexOf("cd", 2)) // expect: 2
System.print("abcd".indexOf("cd", 3)) // expect: -1
// Negative start.
System.print("abcd".indexOf("cd", -4)) // expect: 2
System.print("abcd".indexOf("cd", -3)) // expect: 2
System.print("abcd".indexOf("cd", -2)) // expect: 2
System.print("abcd".indexOf("cd", -1)) // expect: -1
// Skips past earlier results.
System.print("here as well as here".indexOf("here", 1)) // expect: 16
// Non-ASCII. Note that it returns byte indices, not code points.
System.print("søméஃthîng".indexOf("e", 2)) // expect: -1
System.print("søméஃthîng".indexOf("m", 2)) // expect: 3
System.print("søméஃthîng".indexOf("thî", 8)) // expect: 9
// 8-bit clean.
System.print("a\0b\0c".indexOf("\0", 0)) // expect: 1
System.print("a\0b\0c".indexOf("a", 0)) // expect: 0
System.print("a\0b\0c".indexOf("b\0c", 1)) // expect: 2
System.print("a\0b\0c".indexOf("a\0b\0c\0d", 0)) // expect: -1
System.print("a\0b\0a\0b".indexOf("a\0b", 0)) // expect: 0

View File

@ -0,0 +1 @@
"abcd".indexOf("bc", 12.34) // expect runtime error: Start must be an integer.

View File

@ -0,0 +1,2 @@
"abcd".indexOf("bc", "not num") // expect runtime error: Start must be a number.
System.print("after")

View File

@ -0,0 +1 @@
"abcd".indexOf("bc", 4) // expect runtime error: Start out of bounds.

View File

@ -0,0 +1 @@
"abcd".indexOf("bc", -5) // expect runtime error: Start out of bounds.