mirror of
https://github.com/wren-lang/wren.git
synced 2026-01-12 06:38:45 +01:00
Merge branch 'master' into MarcoLizza-8_bit_strings_tests
This commit is contained in:
@ -175,7 +175,8 @@ static bool validateFn(WrenVM* vm, Value* args, int index, const char* argName)
|
||||
{
|
||||
if (IS_FN(args[index]) || IS_CLOSURE(args[index])) return true;
|
||||
|
||||
args[0] = OBJ_VAL(wrenStringConcat(vm, argName, " must be a function."));
|
||||
args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1,
|
||||
" must be a function.", -1));
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -185,7 +186,8 @@ static bool validateNum(WrenVM* vm, Value* args, int index, const char* argName)
|
||||
{
|
||||
if (IS_NUM(args[index])) return true;
|
||||
|
||||
args[0] = OBJ_VAL(wrenStringConcat(vm, argName, " must be a number."));
|
||||
args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1,
|
||||
" must be a number.", -1));
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -196,7 +198,8 @@ static bool validateIntValue(WrenVM* vm, Value* args, double value,
|
||||
{
|
||||
if (trunc(value) == value) return true;
|
||||
|
||||
args[0] = OBJ_VAL(wrenStringConcat(vm, argName, " must be an integer."));
|
||||
args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1,
|
||||
" must be an integer.", -1));
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -226,7 +229,7 @@ static int validateIndexValue(WrenVM* vm, Value* args, int count, double value,
|
||||
// Check bounds.
|
||||
if (index >= 0 && index < count) return index;
|
||||
|
||||
args[0] = OBJ_VAL(wrenStringConcat(vm, argName, " out of bounds."));
|
||||
args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1, " out of bounds.", -1));
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -263,7 +266,8 @@ static bool validateString(WrenVM* vm, Value* args, int index,
|
||||
{
|
||||
if (IS_STRING(args[index])) return true;
|
||||
|
||||
args[0] = OBJ_VAL(wrenStringConcat(vm, argName, " must be a string."));
|
||||
args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1,
|
||||
" must be a string.", -1));
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1056,8 +1060,9 @@ DEF_NATIVE(object_toString)
|
||||
else if (IS_INSTANCE(args[0]))
|
||||
{
|
||||
ObjInstance* instance = AS_INSTANCE(args[0]);
|
||||
RETURN_OBJ(wrenStringConcat(vm, "instance of ",
|
||||
instance->obj.classObj->name->value));
|
||||
ObjString* name = instance->obj.classObj->name;
|
||||
RETURN_OBJ(wrenStringConcat(vm, "instance of ", -1,
|
||||
name->value, name->length));
|
||||
}
|
||||
|
||||
RETURN_VAL(wrenNewString(vm, "<object>", 8));
|
||||
@ -1156,10 +1161,7 @@ DEF_NATIVE(string_contains)
|
||||
ObjString* string = AS_STRING(args[0]);
|
||||
ObjString* search = AS_STRING(args[1]);
|
||||
|
||||
// Corner case, the empty string contains the empty string.
|
||||
if (string->length == 0 && search->length == 0) RETURN_TRUE;
|
||||
|
||||
RETURN_BOOL(strstr(string->value, search->value) != NULL);
|
||||
RETURN_BOOL(wrenStringFind(vm, string, search) != UINT32_MAX);
|
||||
}
|
||||
|
||||
DEF_NATIVE(string_count)
|
||||
@ -1191,9 +1193,9 @@ DEF_NATIVE(string_indexOf)
|
||||
ObjString* string = AS_STRING(args[0]);
|
||||
ObjString* search = AS_STRING(args[1]);
|
||||
|
||||
char* firstOccurrence = strstr(string->value, search->value);
|
||||
uint32_t index = wrenStringFind(vm, string, search);
|
||||
|
||||
RETURN_NUM(firstOccurrence ? firstOccurrence - string->value : -1);
|
||||
RETURN_NUM(index == UINT32_MAX ? -1 : (int)index);
|
||||
}
|
||||
|
||||
DEF_NATIVE(string_iterate)
|
||||
@ -1253,7 +1255,10 @@ DEF_NATIVE(string_toString)
|
||||
DEF_NATIVE(string_plus)
|
||||
{
|
||||
if (!validateString(vm, args, 1, "Right operand")) return PRIM_ERROR;
|
||||
RETURN_OBJ(wrenStringConcat(vm, AS_CSTRING(args[0]), AS_CSTRING(args[1])));
|
||||
ObjString* left = AS_STRING(args[0]);
|
||||
ObjString* right = AS_STRING(args[1]);
|
||||
RETURN_OBJ(wrenStringConcat(vm, left->value, left->length,
|
||||
right->value, right->length));
|
||||
}
|
||||
|
||||
DEF_NATIVE(string_subscript)
|
||||
|
||||
@ -86,7 +86,8 @@ ObjClass* wrenNewClass(WrenVM* vm, ObjClass* superclass, int numFields,
|
||||
wrenPushRoot(vm, (Obj*)name);
|
||||
|
||||
// Create the metaclass.
|
||||
ObjString* metaclassName = wrenStringConcat(vm, name->value, " metaclass");
|
||||
ObjString* metaclassName = wrenStringConcat(vm, name->value, name->length,
|
||||
" metaclass", -1);
|
||||
wrenPushRoot(vm, (Obj*)metaclassName);
|
||||
|
||||
ObjClass* metaclass = wrenNewSingleClass(vm, 0, metaclassName);
|
||||
@ -632,10 +633,11 @@ Value wrenNewUninitializedString(WrenVM* vm, size_t length)
|
||||
return OBJ_VAL(string);
|
||||
}
|
||||
|
||||
ObjString* wrenStringConcat(WrenVM* vm, const char* left, const char* right)
|
||||
ObjString* wrenStringConcat(WrenVM* vm, const char* left, int leftLength,
|
||||
const char* right, int rightLength)
|
||||
{
|
||||
size_t leftLength = strlen(left);
|
||||
size_t rightLength = strlen(right);
|
||||
if (leftLength == -1) leftLength = (int)strlen(left);
|
||||
if (rightLength == -1) rightLength = (int)strlen(right);
|
||||
|
||||
Value value = wrenNewUninitializedString(vm, leftLength + rightLength);
|
||||
ObjString* string = AS_STRING(value);
|
||||
@ -670,6 +672,65 @@ Value wrenStringCodePointAt(WrenVM* vm, ObjString* string, int index)
|
||||
return value;
|
||||
}
|
||||
|
||||
// Uses the Boyer-Moore-Horspool string matching algorithm.
|
||||
uint32_t wrenStringFind(WrenVM* vm, ObjString* haystack, ObjString* needle)
|
||||
{
|
||||
// Corner case, an empty needle is always found.
|
||||
if (needle->length == 0) return 0;
|
||||
|
||||
// If the needle is longer than the haystack it won't be found.
|
||||
if (needle->length > haystack->length) return UINT32_MAX;
|
||||
|
||||
// Pre-calculate the shift table. For each character (8-bit value), we
|
||||
// determine how far the search window can be advanced if that character is
|
||||
// the last character in the haystack where we are searching for the needle
|
||||
// and the needle doesn't match there.
|
||||
uint32_t shift[UINT8_MAX];
|
||||
uint32_t needleEnd = needle->length - 1;
|
||||
|
||||
// By default, we assume the character is not the needle at all. In that case
|
||||
// case, if a match fails on that character, we can advance one whole needle
|
||||
// width since.
|
||||
for (uint32_t index = 0; index < UINT8_MAX; index++)
|
||||
{
|
||||
shift[index] = needle->length;
|
||||
}
|
||||
|
||||
// Then, for every character in the needle, determine how far it is from the
|
||||
// end. If a match fails on that character, we can advance the window such
|
||||
// that it the last character in it lines up with the last place we could
|
||||
// find it in the needle.
|
||||
for (uint32_t index = 0; index < needleEnd; index++)
|
||||
{
|
||||
char c = needle->value[index];
|
||||
shift[(uint8_t)c] = needleEnd - index;
|
||||
}
|
||||
|
||||
// Slide the needle across the haystack, looking for the first match or
|
||||
// stopping if the needle goes off the end.
|
||||
char lastChar = needle->value[needleEnd];
|
||||
uint32_t range = haystack->length - needle->length;
|
||||
|
||||
for (uint32_t index = 0; index <= range; )
|
||||
{
|
||||
// Compare the last character in the haystack's window to the last character
|
||||
// in the needle. If it matches, see if the whole needle matches.
|
||||
char c = haystack->value[index + needleEnd];
|
||||
if (lastChar == c &&
|
||||
memcmp(haystack->value + index, needle->value, needleEnd) == 0)
|
||||
{
|
||||
// Found a match.
|
||||
return index;
|
||||
}
|
||||
|
||||
// Otherwise, slide the needle forward.
|
||||
index += shift[(uint8_t)c];
|
||||
}
|
||||
|
||||
// Not found.
|
||||
return UINT32_MAX;
|
||||
}
|
||||
|
||||
Upvalue* wrenNewUpvalue(WrenVM* vm, Value* value)
|
||||
{
|
||||
Upvalue* upvalue = ALLOCATE(vm, Upvalue);
|
||||
|
||||
@ -659,14 +659,22 @@ Value wrenNewString(WrenVM* vm, const char* text, size_t length);
|
||||
// The caller is expected to fully initialize the buffer after calling.
|
||||
Value wrenNewUninitializedString(WrenVM* vm, size_t length);
|
||||
|
||||
// Creates a new string that is the concatenation of [left] and [right].
|
||||
ObjString* wrenStringConcat(WrenVM* vm, const char* left, const char* right);
|
||||
// Creates a new string that is the concatenation of [left] and [right] (with
|
||||
// length [leftLength] and [rightLength], respectively). If -1 is passed
|
||||
// the string length is automatically calculated.
|
||||
ObjString* wrenStringConcat(WrenVM* vm, const char* left, int leftLength,
|
||||
const char* right, int rightLength);
|
||||
|
||||
// Creates a new string containing the code point in [string] starting at byte
|
||||
// [index]. If [index] points into the middle of a UTF-8 sequence, returns an
|
||||
// empty string.
|
||||
Value wrenStringCodePointAt(WrenVM* vm, ObjString* string, int index);
|
||||
|
||||
// Search for the first occurence of [needle] within [haystack] and returns its
|
||||
// zero-based offset. Returns `UINT32_MAX` if [haystack] does not contain
|
||||
// [needle].
|
||||
uint32_t wrenStringFind(WrenVM* vm, ObjString* haystack, ObjString* needle);
|
||||
|
||||
// Creates a new open upvalue pointing to [value] on the stack.
|
||||
Upvalue* wrenNewUpvalue(WrenVM* vm, Value* value);
|
||||
|
||||
|
||||
@ -1,8 +1,16 @@
|
||||
IO.print("abcd".indexOf("")) // expect: 0
|
||||
IO.print("abcd".indexOf("cd")) // expect: 2
|
||||
IO.print("abcd".indexOf("a")) // expect: 0
|
||||
IO.print("abcd".indexOf("abcd")) // expect: 0
|
||||
IO.print("abcd".indexOf("abcde")) // expect: -1
|
||||
IO.print("abab".indexOf("ab")) // expect: 0
|
||||
|
||||
// More complex cases.
|
||||
IO.print("abcdefabcdefg".indexOf("defg")) // expect: 9
|
||||
IO.print("abcdabcdabcd".indexOf("dab")) // expect: 3
|
||||
IO.print("abcdabcdabcdabcd".indexOf("dabcdabc")) // expect: 3
|
||||
IO.print("abcdefg".indexOf("abcdef!")) // expect: -1
|
||||
|
||||
// Non-ASCII. Note that it returns byte indices, not code points.
|
||||
IO.print("søméஃthîng".indexOf("e")) // expect: -1
|
||||
IO.print("søméஃthîng".indexOf("m")) // expect: 3
|
||||
|
||||
Reference in New Issue
Block a user