From 72c38a59cebbd924af4e5d4823e12f47be5e76ea Mon Sep 17 00:00:00 2001 From: Bob Nystrom Date: Fri, 27 Mar 2015 20:44:07 -0700 Subject: [PATCH] More stuff for working with strings and bytes! - "\x" escape sequence to put byte values in strings: "\x34" - String.byteAt(index) gets value of byte in string. - String.bytes returns a raw sequence of bytes for a string. - String.codePointAt(index) gets the code point at an offset as a raw number. --- builtin/core.wren | 14 +++- doc/site/core/string.markdown | 42 +++++++++++ doc/site/values.markdown | 20 +++++- src/vm/wren_compiler.c | 24 +++++-- src/vm/wren_core.c | 70 ++++++++++++++++++- src/vm/wren_utils.c | 53 +++++++++++++- src/vm/wren_utils.h | 4 ++ test/core/string/byte_at.wren | 38 ++++++++++ test/core/string/byte_at_not_int.wren | 1 + test/core/string/byte_at_not_num.wren | 1 + test/core/string/byte_at_too_large.wren | 1 + test/core/string/byte_at_too_small.wren | 1 + test/core/string/bytes.wren | 6 ++ test/core/string/code_point_at.wren | 38 ++++++++++ .../core/string/code_point_at_incomplete.wren | 3 + test/core/string/code_point_at_not_int.wren | 1 + test/core/string/code_point_at_not_num.wren | 1 + test/core/string/code_point_at_too_large.wren | 1 + test/core/string/code_point_at_too_small.wren | 1 + test/core/string_byte_sequence/iterate.wren | 21 ++++++ .../string_byte_sequence/iterate_not_int.wren | 1 + .../iterate_wrong_type.wren | 1 + .../string_byte_sequence/iterator_value.wren | 24 +++++++ .../iterator_value_not_int.wren | 1 + .../iterator_value_not_num.wren | 1 + .../iterator_value_too_large.wren | 1 + .../iterator_value_too_small.wren | 1 + test/core/string_byte_sequence/subscript.wren | 24 +++++++ .../subscript_not_int.wren | 1 + .../subscript_not_num.wren | 1 + .../subscript_too_large.wren | 1 + .../subscript_too_small.wren | 1 + test/language/string/byte_escapes.wren | 12 ++++ .../string/incomplete_byte_escape.wren | 2 + .../string/incomplete_byte_escape_at_eof.wren | 2 + test/language/string/invalid_byte_escape.wren | 2 + 36 files changed, 402 insertions(+), 15 deletions(-) create mode 100644 test/core/string/byte_at.wren create mode 100644 test/core/string/byte_at_not_int.wren create mode 100644 test/core/string/byte_at_not_num.wren create mode 100644 test/core/string/byte_at_too_large.wren create mode 100644 test/core/string/byte_at_too_small.wren create mode 100644 test/core/string/bytes.wren create mode 100644 test/core/string/code_point_at.wren create mode 100644 test/core/string/code_point_at_incomplete.wren create mode 100644 test/core/string/code_point_at_not_int.wren create mode 100644 test/core/string/code_point_at_not_num.wren create mode 100644 test/core/string/code_point_at_too_large.wren create mode 100644 test/core/string/code_point_at_too_small.wren create mode 100644 test/core/string_byte_sequence/iterate.wren create mode 100644 test/core/string_byte_sequence/iterate_not_int.wren create mode 100644 test/core/string_byte_sequence/iterate_wrong_type.wren create mode 100644 test/core/string_byte_sequence/iterator_value.wren create mode 100644 test/core/string_byte_sequence/iterator_value_not_int.wren create mode 100644 test/core/string_byte_sequence/iterator_value_not_num.wren create mode 100644 test/core/string_byte_sequence/iterator_value_too_large.wren create mode 100644 test/core/string_byte_sequence/iterator_value_too_small.wren create mode 100644 test/core/string_byte_sequence/subscript.wren create mode 100644 test/core/string_byte_sequence/subscript_not_int.wren create mode 100644 test/core/string_byte_sequence/subscript_not_num.wren create mode 100644 test/core/string_byte_sequence/subscript_too_large.wren create mode 100644 test/core/string_byte_sequence/subscript_too_small.wren create mode 100644 test/language/string/byte_escapes.wren create mode 100644 test/language/string/incomplete_byte_escape.wren create mode 100644 test/language/string/incomplete_byte_escape_at_eof.wren create mode 100644 test/language/string/invalid_byte_escape.wren diff --git a/builtin/core.wren b/builtin/core.wren index 156f507f..35b6db7a 100644 --- a/builtin/core.wren +++ b/builtin/core.wren @@ -92,7 +92,19 @@ class Sequence { } } -class String is Sequence {} +class String is Sequence { + bytes { new StringByteSequence(this) } +} + +class StringByteSequence is Sequence { + new(string) { + _string = string + } + + [index] { _string.byteAt(index) } + iterate(iterator) { _string.iterateByte_(iterator) } + iteratorValue(iterator) { _string.byteAt(iterator) } +} class List is Sequence { addAll(other) { diff --git a/doc/site/core/string.markdown b/doc/site/core/string.markdown index a02f9b9b..f66f0be8 100644 --- a/doc/site/core/string.markdown +++ b/doc/site/core/string.markdown @@ -45,6 +45,48 @@ It is a runtime error if `codePoint` is not an integer between `0` and ## Methods +### **byteAt**(index) + +Gets the value of the byte at byte offset `index` in the string. + + :::dart + IO.print("hello".byteAt(1)) // 101, for "e". + +If the index is negative, it counts backwards from the end of the string. + + :::dart + IO.print("hello".byteAt(-4)) // 101, for "e". + +It is a runtime error if `index` is not an integer or is out of bounds. + +### **bytes** + +Gets a [`Sequence`](sequence.html) that can be used to access the raw bytes of +the string and ignore any UTF-8 encoding. In addition to the normal sequence +methods, the returned object also has a subscript operator that can be used to +directly index bytes. + + :::dart + IO.print("hello".bytes[1]) // 101, for "e". + +### **codePointAt**(index) + +Gets the value of the UTF-8 encoded code point starting at byte offset `index` +in the string. Unlike the subscript operator, this returns the code point as a +number. + + :::dart + var string = "(ᵔᴥᵔ)" + IO.print(string.codePointAt(0)) // 40, for "(". + IO.print(string.codePointAt(4)) // 7461, for "ᴥ". + +If the byte at `index` does not begin a valid UTF-8 sequence, or the end of the +string is reached before the sequence is complete, returns `-1`. + + :::dart + var string = "(ᵔᴥᵔ)" + IO.print(string.codePointAt(2)) // -1, in the middle of "ᵔ". + ### **contains**(other) Checks if `other` is a substring of the string. diff --git a/doc/site/values.markdown b/doc/site/values.markdown index 7265339d..69a40715 100644 --- a/doc/site/values.markdown +++ b/doc/site/values.markdown @@ -30,8 +30,12 @@ Numbers are instances of the [Num](core/num.html) class. ## Strings -Strings are chunks of text stored as UTF-8. Their class is -[String](core/string.html). String literals are surrounded in double quotes: +A string is an array of bytes. Typically, they store characters encoded in +UTF-8, but you can put any byte values in there, even zero or invalid UTF-8 +sequences. (You might have some trouble *printing* the latter to your terminal, +though.) + +String literals are surrounded in double quotes: :::dart "hi there" @@ -39,6 +43,7 @@ Strings are chunks of text stored as UTF-8. Their class is A handful of escape characters are supported: :::dart + "\0" // The NUL byte: 0. "\"" // A double quote character. "\\" // A backslash. "\a" // Alarm beep. (Who uses this?) @@ -49,7 +54,16 @@ A handful of escape characters are supported: "\t" // Tab. "\v" // Vertical tab. -A `\u` followed by four hex digits can be used to specify a Unicode code point. +A `\u` followed by four hex digits can be used to specify a Unicode code point: + + :::dart + IO.print("\u0041\u0b83\u00DE") // "AஃÞ" + +A `\x` followed by two hex digits specifies a single unencoded byte: + + IO.print("\x48\x69\x2e") // "Hi." + +Strings are objects of class [String](core/string.html). ## Ranges diff --git a/src/vm/wren_compiler.c b/src/vm/wren_compiler.c index e28b7a9c..cfb77f16 100644 --- a/src/vm/wren_compiler.c +++ b/src/vm/wren_compiler.c @@ -622,15 +622,15 @@ static void addStringChar(Parser* parser, char c) wrenByteBufferWrite(parser->vm, &parser->string, c); } -// Reads a four hex digit Unicode escape sequence in a string literal. -static void readUnicodeEscape(Parser* parser) +// Reads [digits] hex digits in a string literal and returns their number value. +static int readHexEscape(Parser* parser, int digits, const char* description) { int value = 0; - for (int i = 0; i < 4; i++) + for (int i = 0; i < digits; i++) { if (peekChar(parser) == '"' || peekChar(parser) == '\0') { - lexError(parser, "Incomplete Unicode escape sequence."); + lexError(parser, "Incomplete %s escape sequence.", description); // Don't consume it if it isn't expected. Keeps us from reading past the // end of an unterminated string. @@ -641,13 +641,21 @@ static void readUnicodeEscape(Parser* parser) int digit = readHexDigit(parser); if (digit == -1) { - lexError(parser, "Invalid Unicode escape sequence."); + lexError(parser, "Invalid %s escape sequence.", description); break; } value = (value * 16) | digit; } + return value; +} + +// Reads a four hex digit Unicode escape sequence in a string literal. +static void readUnicodeEscape(Parser* parser) +{ + int value = readHexEscape(parser, 4, "Unicode"); + // Grow the buffer enough for the encoded result. int numBytes = wrenUtf8NumBytes(value); if (numBytes != 0) @@ -696,9 +704,13 @@ static void readString(Parser* parser) case 'n': addStringChar(parser, '\n'); break; case 'r': addStringChar(parser, '\r'); break; case 't': addStringChar(parser, '\t'); break; - case 'v': addStringChar(parser, '\v'); break; case 'u': readUnicodeEscape(parser); break; // TODO: 'U' for 8 octet Unicode escapes. + case 'v': addStringChar(parser, '\v'); break; + case 'x': + addStringChar(parser, (uint8_t)readHexEscape(parser, 2, "byte")); + break; + default: lexError(parser, "Invalid escape character '%c'.", *(parser->currentChar - 1)); diff --git a/src/vm/wren_core.c b/src/vm/wren_core.c index fdd0cf6b..1938cd81 100644 --- a/src/vm/wren_core.c +++ b/src/vm/wren_core.c @@ -138,7 +138,19 @@ static const char* libSource = " }\n" "}\n" "\n" -"class String is Sequence {}\n" +"class String is Sequence {\n" +" bytes { new StringByteSequence(this) }\n" +"}\n" +"\n" +"class StringByteSequence is Sequence {\n" +" new(string) {\n" +" _string = string\n" +" }\n" +"\n" +" [index] { _string.byteAt(index) }\n" +" iterate(iterator) { _string.iterateByte_(iterator) }\n" +" iteratorValue(iterator) { _string.byteAt(iterator) }\n" +"}\n" "\n" "class List is Sequence {\n" " addAll(other) {\n" @@ -307,7 +319,7 @@ static uint32_t calculateRange(WrenVM* vm, Value* args, ObjRange* range, uint32_t* length, int* step) { *step = 0; - + // Corner case: an empty range at zero is allowed on an empty sequence. // This way, list[0..-1] and list[0...list.count] can be used to copy a list // even when empty. @@ -1225,7 +1237,33 @@ DEF_PRIMITIVE(string_fromCodePoint) RETURN_ERROR("Code point cannot be greater than 0x10ffff."); } - RETURN_VAL(wrenStringFromCodePoint(vm, (int)AS_NUM(args[1]))); + RETURN_VAL(wrenStringFromCodePoint(vm, codePoint)); +} + +DEF_PRIMITIVE(string_byteAt) +{ + ObjString* string = AS_STRING(args[0]); + + uint32_t index = validateIndex(vm, args, string->length, 1, "Index"); + if (index == UINT32_MAX) return PRIM_ERROR; + + RETURN_NUM((uint8_t)string->value[index]); +} + +DEF_PRIMITIVE(string_codePointAt) +{ + ObjString* string = AS_STRING(args[0]); + + uint32_t index = validateIndex(vm, args, string->length, 1, "Index"); + if (index == UINT32_MAX) return PRIM_ERROR; + + // If we are in the middle of a UTF-8 sequence, indicate that. + const uint8_t* bytes = (uint8_t*)string->value; + if ((bytes[index] & 0xc0) == 0x80) RETURN_NUM(-1); + + // Decode the UTF-8 sequence. + RETURN_NUM(wrenUtf8Decode((uint8_t*)string->value + index, + string->length - index)); } DEF_PRIMITIVE(string_contains) @@ -1294,6 +1332,29 @@ DEF_PRIMITIVE(string_iterate) RETURN_NUM(index); } +DEF_PRIMITIVE(string_iterateByte) +{ + ObjString* string = AS_STRING(args[0]); + + // If we're starting the iteration, return the first index. + if (IS_NULL(args[1])) + { + if (string->length == 0) RETURN_FALSE; + RETURN_NUM(0); + } + + if (!validateInt(vm, args, 1, "Iterator")) return PRIM_ERROR; + + if (AS_NUM(args[1]) < 0) RETURN_FALSE; + uint32_t index = (uint32_t)AS_NUM(args[1]); + + // Advance to the next byte. + index++; + if (index >= string->length) RETURN_FALSE; + + RETURN_NUM(index); +} + DEF_PRIMITIVE(string_iteratorValue) { ObjString* string = AS_STRING(args[0]); @@ -1533,11 +1594,14 @@ void wrenInitializeCore(WrenVM* vm) PRIMITIVE(vm->stringClass->obj.classObj, "fromCodePoint(_)", string_fromCodePoint); PRIMITIVE(vm->stringClass, "+(_)", string_plus); PRIMITIVE(vm->stringClass, "[_]", string_subscript); + PRIMITIVE(vm->stringClass, "byteAt(_)", string_byteAt); + PRIMITIVE(vm->stringClass, "codePointAt(_)", string_codePointAt); PRIMITIVE(vm->stringClass, "contains(_)", string_contains); PRIMITIVE(vm->stringClass, "count", string_count); PRIMITIVE(vm->stringClass, "endsWith(_)", string_endsWith); PRIMITIVE(vm->stringClass, "indexOf(_)", string_indexOf); PRIMITIVE(vm->stringClass, "iterate(_)", string_iterate); + PRIMITIVE(vm->stringClass, "iterateByte_(_)", string_iterateByte); PRIMITIVE(vm->stringClass, "iteratorValue(_)", string_iteratorValue); PRIMITIVE(vm->stringClass, "startsWith(_)", string_startsWith); PRIMITIVE(vm->stringClass, "toString", string_toString); diff --git a/src/vm/wren_utils.c b/src/vm/wren_utils.c index f3490af6..7ed15c16 100644 --- a/src/vm/wren_utils.c +++ b/src/vm/wren_utils.c @@ -79,14 +79,14 @@ void wrenUtf8Encode(int value, uint8_t* bytes) } else if (value <= 0x7ff) { - // Two byte sequence: 110xxxxx 10xxxxxx. + // Two byte sequence: 110xxxxx 10xxxxxx. *bytes = 0xc0 | ((value & 0x7c0) >> 6); bytes++; *bytes = 0x80 | (value & 0x3f); } else if (value <= 0xffff) { - // Three byte sequence: 1110xxxx 10xxxxxx 10xxxxxx. + // Three byte sequence: 1110xxxx 10xxxxxx 10xxxxxx. *bytes = 0xe0 | ((value & 0xf000) >> 12); bytes++; *bytes = 0x80 | ((value & 0xfc0) >> 6); @@ -110,3 +110,52 @@ void wrenUtf8Encode(int value, uint8_t* bytes) ASSERT(false, "Invalid UTF-8 value."); } } + +int wrenUtf8Decode(const uint8_t* bytes, uint32_t length) +{ + // Single byte (i.e. fits in ASCII). + if (*bytes <= 0x7f) return *bytes; + + int value; + uint32_t remainingBytes; + if ((*bytes & 0xe0) == 0xc0) + { + // Two byte sequence: 110xxxxx 10xxxxxx. + value = *bytes & 0x1f; + remainingBytes = 1; + } + else if ((*bytes & 0xf0) == 0xe0) + { + // Three byte sequence: 1110xxxx 10xxxxxx 10xxxxxx. + value = *bytes & 0x0f; + remainingBytes = 2; + } + else if ((*bytes & 0xf8) == 0xf0) + { + // Four byte sequence: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx. + value = *bytes & 0x07; + remainingBytes = 3; + } + else + { + // Invalid UTF-8 sequence. + return -1; + } + + // Don't read past the end of the buffer on truncated UTF-8. + // TODO: Test this. + if (remainingBytes > length - 1) return -1; + + while (remainingBytes > 0) + { + bytes++; + remainingBytes--; + + // Remaining bytes must be of form 10xxxxxx. + if ((*bytes & 0xc0) != 0x80) return -1; + + value = value << 6 | (*bytes & 0x3f); + } + + return value; +} diff --git a/src/vm/wren_utils.h b/src/vm/wren_utils.h index c1cfe19e..70b3ded6 100644 --- a/src/vm/wren_utils.h +++ b/src/vm/wren_utils.h @@ -90,4 +90,8 @@ int wrenUtf8NumBytes(int value); // enough to hold the encoded result. void wrenUtf8Encode(int value, uint8_t* bytes); +// Decodes the UTF-8 sequence in [bytes] (which has max [length]), returning +// the code point. +int wrenUtf8Decode(const uint8_t* bytes, uint32_t length); + #endif diff --git a/test/core/string/byte_at.wren b/test/core/string/byte_at.wren new file mode 100644 index 00000000..cf2c20e5 --- /dev/null +++ b/test/core/string/byte_at.wren @@ -0,0 +1,38 @@ +// Bytes: 11111 +// 012345678901234 +// Chars: sø mé ஃ thî ng +var s = "søméஃthîng" + +IO.print(s.byteAt(0)) // expect: 115 +IO.print(s.byteAt(1)) // expect: 195 +IO.print(s.byteAt(2)) // expect: 184 +IO.print(s.byteAt(3)) // expect: 109 +IO.print(s.byteAt(4)) // expect: 195 +IO.print(s.byteAt(5)) // expect: 169 +IO.print(s.byteAt(6)) // expect: 224 +IO.print(s.byteAt(7)) // expect: 174 +IO.print(s.byteAt(8)) // expect: 131 +IO.print(s.byteAt(9)) // expect: 116 +IO.print(s.byteAt(10)) // expect: 104 +IO.print(s.byteAt(11)) // expect: 195 +IO.print(s.byteAt(12)) // expect: 174 +IO.print(s.byteAt(13)) // expect: 110 +IO.print(s.byteAt(14)) // expect: 103 + +IO.print(s.byteAt(-15)) // expect: 115 +IO.print(s.byteAt(-14)) // expect: 195 +IO.print(s.byteAt(-13)) // expect: 184 +IO.print(s.byteAt(-12)) // expect: 109 +IO.print(s.byteAt(-11)) // expect: 195 +IO.print(s.byteAt(-10)) // expect: 169 +IO.print(s.byteAt(-9)) // expect: 224 +IO.print(s.byteAt(-8)) // expect: 174 +IO.print(s.byteAt(-7)) // expect: 131 +IO.print(s.byteAt(-6)) // expect: 116 +IO.print(s.byteAt(-5)) // expect: 104 +IO.print(s.byteAt(-4)) // expect: 195 +IO.print(s.byteAt(-3)) // expect: 174 +IO.print(s.byteAt(-2)) // expect: 110 +IO.print(s.byteAt(-1)) // expect: 103 + +IO.print("\0".byteAt(0)) // expect: 0 diff --git a/test/core/string/byte_at_not_int.wren b/test/core/string/byte_at_not_int.wren new file mode 100644 index 00000000..d9e22b8a --- /dev/null +++ b/test/core/string/byte_at_not_int.wren @@ -0,0 +1 @@ +IO.print("string".byteAt(12.34)) // expect runtime error: Index must be an integer. diff --git a/test/core/string/byte_at_not_num.wren b/test/core/string/byte_at_not_num.wren new file mode 100644 index 00000000..23e5c7c3 --- /dev/null +++ b/test/core/string/byte_at_not_num.wren @@ -0,0 +1 @@ +IO.print("string".byteAt("not num")) // expect runtime error: Index must be a number. diff --git a/test/core/string/byte_at_too_large.wren b/test/core/string/byte_at_too_large.wren new file mode 100644 index 00000000..365687f9 --- /dev/null +++ b/test/core/string/byte_at_too_large.wren @@ -0,0 +1 @@ +IO.print("string".byteAt(6)) // expect runtime error: Index out of bounds. diff --git a/test/core/string/byte_at_too_small.wren b/test/core/string/byte_at_too_small.wren new file mode 100644 index 00000000..2a83555f --- /dev/null +++ b/test/core/string/byte_at_too_small.wren @@ -0,0 +1 @@ +IO.print("string".byteAt(-7)) // expect runtime error: Index out of bounds. diff --git a/test/core/string/bytes.wren b/test/core/string/bytes.wren new file mode 100644 index 00000000..f225a81f --- /dev/null +++ b/test/core/string/bytes.wren @@ -0,0 +1,6 @@ +// Bytes: 11111 +// 012345678901234 +// Chars: sø mé ஃ thî ng +var s = "søméஃthîng" + +IO.print(s.bytes is StringByteSequence) // expect: true diff --git a/test/core/string/code_point_at.wren b/test/core/string/code_point_at.wren new file mode 100644 index 00000000..a4ba4319 --- /dev/null +++ b/test/core/string/code_point_at.wren @@ -0,0 +1,38 @@ +// Bytes: 11111 +// 012345678901234 +// Chars: sø mé ஃ thî ng +var s = "søméஃthîng" + +IO.print(s.codePointAt(0)) // expect: 115 +IO.print(s.codePointAt(1)) // expect: 248 +IO.print(s.codePointAt(2)) // expect: -1 +IO.print(s.codePointAt(3)) // expect: 109 +IO.print(s.codePointAt(4)) // expect: 233 +IO.print(s.codePointAt(5)) // expect: -1 +IO.print(s.codePointAt(6)) // expect: 2947 +IO.print(s.codePointAt(7)) // expect: -1 +IO.print(s.codePointAt(8)) // expect: -1 +IO.print(s.codePointAt(9)) // expect: 116 +IO.print(s.codePointAt(10)) // expect: 104 +IO.print(s.codePointAt(11)) // expect: 238 +IO.print(s.codePointAt(12)) // expect: -1 +IO.print(s.codePointAt(13)) // expect: 110 +IO.print(s.codePointAt(14)) // expect: 103 + +IO.print(s.codePointAt(-15)) // expect: 115 +IO.print(s.codePointAt(-14)) // expect: 248 +IO.print(s.codePointAt(-13)) // expect: -1 +IO.print(s.codePointAt(-12)) // expect: 109 +IO.print(s.codePointAt(-11)) // expect: 233 +IO.print(s.codePointAt(-10)) // expect: -1 +IO.print(s.codePointAt(-9)) // expect: 2947 +IO.print(s.codePointAt(-8)) // expect: -1 +IO.print(s.codePointAt(-7)) // expect: -1 +IO.print(s.codePointAt(-6)) // expect: 116 +IO.print(s.codePointAt(-5)) // expect: 104 +IO.print(s.codePointAt(-4)) // expect: 238 +IO.print(s.codePointAt(-3)) // expect: -1 +IO.print(s.codePointAt(-2)) // expect: 110 +IO.print(s.codePointAt(-1)) // expect: 103 + +IO.print("\0".codePointAt(0)) // expect: 0 diff --git a/test/core/string/code_point_at_incomplete.wren b/test/core/string/code_point_at_incomplete.wren new file mode 100644 index 00000000..648ed30e --- /dev/null +++ b/test/core/string/code_point_at_incomplete.wren @@ -0,0 +1,3 @@ +// The first two bytes of a three-octet sequence. +var s = "\xe0\xae" +IO.print(s.codePointAt(0)) // expect: -1 diff --git a/test/core/string/code_point_at_not_int.wren b/test/core/string/code_point_at_not_int.wren new file mode 100644 index 00000000..2f1f42f3 --- /dev/null +++ b/test/core/string/code_point_at_not_int.wren @@ -0,0 +1 @@ +IO.print("string".codePointAt(12.34)) // expect runtime error: Index must be an integer. diff --git a/test/core/string/code_point_at_not_num.wren b/test/core/string/code_point_at_not_num.wren new file mode 100644 index 00000000..fc41da89 --- /dev/null +++ b/test/core/string/code_point_at_not_num.wren @@ -0,0 +1 @@ +IO.print("string".codePointAt("not num")) // expect runtime error: Index must be a number. diff --git a/test/core/string/code_point_at_too_large.wren b/test/core/string/code_point_at_too_large.wren new file mode 100644 index 00000000..34424ae4 --- /dev/null +++ b/test/core/string/code_point_at_too_large.wren @@ -0,0 +1 @@ +IO.print("string".codePointAt(6)) // expect runtime error: Index out of bounds. diff --git a/test/core/string/code_point_at_too_small.wren b/test/core/string/code_point_at_too_small.wren new file mode 100644 index 00000000..ac264d15 --- /dev/null +++ b/test/core/string/code_point_at_too_small.wren @@ -0,0 +1 @@ +IO.print("string".codePointAt(-7)) // expect runtime error: Index out of bounds. diff --git a/test/core/string_byte_sequence/iterate.wren b/test/core/string_byte_sequence/iterate.wren new file mode 100644 index 00000000..650a61d4 --- /dev/null +++ b/test/core/string_byte_sequence/iterate.wren @@ -0,0 +1,21 @@ +// Bytes: +// 012345678 +// Chars: sø mé ஃ +var bytes = "søméஃ".bytes + +IO.print(bytes.iterate(null)) // expect: 0 +IO.print("".bytes.iterate(null)) // expect: false + +IO.print(bytes.iterate(0)) // expect: 1 +IO.print(bytes.iterate(1)) // expect: 2 +IO.print(bytes.iterate(2)) // expect: 3 +IO.print(bytes.iterate(3)) // expect: 4 +IO.print(bytes.iterate(4)) // expect: 5 +IO.print(bytes.iterate(5)) // expect: 6 +IO.print(bytes.iterate(6)) // expect: 7 +IO.print(bytes.iterate(7)) // expect: 8 +IO.print(bytes.iterate(8)) // expect: false + +// Out of bounds. +IO.print(bytes.iterate(123)) // expect: false +IO.print(bytes.iterate(-1)) // expect: false diff --git a/test/core/string_byte_sequence/iterate_not_int.wren b/test/core/string_byte_sequence/iterate_not_int.wren new file mode 100644 index 00000000..2438e0ab --- /dev/null +++ b/test/core/string_byte_sequence/iterate_not_int.wren @@ -0,0 +1 @@ +"str".bytes.iterate(12.34) // expect runtime error: Iterator must be an integer. diff --git a/test/core/string_byte_sequence/iterate_wrong_type.wren b/test/core/string_byte_sequence/iterate_wrong_type.wren new file mode 100644 index 00000000..76a36193 --- /dev/null +++ b/test/core/string_byte_sequence/iterate_wrong_type.wren @@ -0,0 +1 @@ +"str".bytes.iterate("not num") // expect runtime error: Iterator must be a number. diff --git a/test/core/string_byte_sequence/iterator_value.wren b/test/core/string_byte_sequence/iterator_value.wren new file mode 100644 index 00000000..544d3869 --- /dev/null +++ b/test/core/string_byte_sequence/iterator_value.wren @@ -0,0 +1,24 @@ +// Bytes: +// 012345678 +// Chars: sø mé ஃ +var bytes = "søméஃ".bytes + +IO.print(bytes.iteratorValue(0)) // expect: 115 +IO.print(bytes.iteratorValue(1)) // expect: 195 +IO.print(bytes.iteratorValue(2)) // expect: 184 +IO.print(bytes.iteratorValue(3)) // expect: 109 +IO.print(bytes.iteratorValue(4)) // expect: 195 +IO.print(bytes.iteratorValue(5)) // expect: 169 +IO.print(bytes.iteratorValue(6)) // expect: 224 +IO.print(bytes.iteratorValue(7)) // expect: 174 +IO.print(bytes.iteratorValue(8)) // expect: 131 + +IO.print(bytes.iteratorValue(-9)) // expect: 115 +IO.print(bytes.iteratorValue(-8)) // expect: 195 +IO.print(bytes.iteratorValue(-7)) // expect: 184 +IO.print(bytes.iteratorValue(-6)) // expect: 109 +IO.print(bytes.iteratorValue(-5)) // expect: 195 +IO.print(bytes.iteratorValue(-4)) // expect: 169 +IO.print(bytes.iteratorValue(-3)) // expect: 224 +IO.print(bytes.iteratorValue(-2)) // expect: 174 +IO.print(bytes.iteratorValue(-1)) // expect: 131 diff --git a/test/core/string_byte_sequence/iterator_value_not_int.wren b/test/core/string_byte_sequence/iterator_value_not_int.wren new file mode 100644 index 00000000..6cc0cd80 --- /dev/null +++ b/test/core/string_byte_sequence/iterator_value_not_int.wren @@ -0,0 +1 @@ +"abcd".bytes.iteratorValue(12.34) // expect runtime error: Index must be an integer. diff --git a/test/core/string_byte_sequence/iterator_value_not_num.wren b/test/core/string_byte_sequence/iterator_value_not_num.wren new file mode 100644 index 00000000..c0020da6 --- /dev/null +++ b/test/core/string_byte_sequence/iterator_value_not_num.wren @@ -0,0 +1 @@ +"abcd".bytes.iteratorValue("not num") // expect runtime error: Index must be a number. diff --git a/test/core/string_byte_sequence/iterator_value_too_large.wren b/test/core/string_byte_sequence/iterator_value_too_large.wren new file mode 100644 index 00000000..fe517b6b --- /dev/null +++ b/test/core/string_byte_sequence/iterator_value_too_large.wren @@ -0,0 +1 @@ +"abcd".bytes.iteratorValue(4) // expect runtime error: Index out of bounds. diff --git a/test/core/string_byte_sequence/iterator_value_too_small.wren b/test/core/string_byte_sequence/iterator_value_too_small.wren new file mode 100644 index 00000000..beb9ffd6 --- /dev/null +++ b/test/core/string_byte_sequence/iterator_value_too_small.wren @@ -0,0 +1 @@ +"abcd".bytes.iteratorValue(-5) // expect runtime error: Index out of bounds. diff --git a/test/core/string_byte_sequence/subscript.wren b/test/core/string_byte_sequence/subscript.wren new file mode 100644 index 00000000..0e073978 --- /dev/null +++ b/test/core/string_byte_sequence/subscript.wren @@ -0,0 +1,24 @@ +// Bytes: +// 012345678 +// Chars: sø mé ஃ +var bytes = "søméஃ".bytes + +IO.print(bytes[0]) // expect: 115 +IO.print(bytes[1]) // expect: 195 +IO.print(bytes[2]) // expect: 184 +IO.print(bytes[3]) // expect: 109 +IO.print(bytes[4]) // expect: 195 +IO.print(bytes[5]) // expect: 169 +IO.print(bytes[6]) // expect: 224 +IO.print(bytes[7]) // expect: 174 +IO.print(bytes[8]) // expect: 131 + +IO.print(bytes[-9]) // expect: 115 +IO.print(bytes[-8]) // expect: 195 +IO.print(bytes[-7]) // expect: 184 +IO.print(bytes[-6]) // expect: 109 +IO.print(bytes[-5]) // expect: 195 +IO.print(bytes[-4]) // expect: 169 +IO.print(bytes[-3]) // expect: 224 +IO.print(bytes[-2]) // expect: 174 +IO.print(bytes[-1]) // expect: 131 diff --git a/test/core/string_byte_sequence/subscript_not_int.wren b/test/core/string_byte_sequence/subscript_not_int.wren new file mode 100644 index 00000000..00da9a0a --- /dev/null +++ b/test/core/string_byte_sequence/subscript_not_int.wren @@ -0,0 +1 @@ +"abcd".bytes[12.34] // expect runtime error: Index must be an integer. diff --git a/test/core/string_byte_sequence/subscript_not_num.wren b/test/core/string_byte_sequence/subscript_not_num.wren new file mode 100644 index 00000000..151c8a84 --- /dev/null +++ b/test/core/string_byte_sequence/subscript_not_num.wren @@ -0,0 +1 @@ +"abcd".bytes["not num"] // expect runtime error: Index must be a number. diff --git a/test/core/string_byte_sequence/subscript_too_large.wren b/test/core/string_byte_sequence/subscript_too_large.wren new file mode 100644 index 00000000..9defdedc --- /dev/null +++ b/test/core/string_byte_sequence/subscript_too_large.wren @@ -0,0 +1 @@ +"abcd".bytes[4] // expect runtime error: Index out of bounds. diff --git a/test/core/string_byte_sequence/subscript_too_small.wren b/test/core/string_byte_sequence/subscript_too_small.wren new file mode 100644 index 00000000..b9501b8f --- /dev/null +++ b/test/core/string_byte_sequence/subscript_too_small.wren @@ -0,0 +1 @@ +"abcd".bytes[-5] // expect runtime error: Index out of bounds. diff --git a/test/language/string/byte_escapes.wren b/test/language/string/byte_escapes.wren new file mode 100644 index 00000000..184db639 --- /dev/null +++ b/test/language/string/byte_escapes.wren @@ -0,0 +1,12 @@ +var s = "\x00\x12\x34\x56\x78\xab\xCD\xfFf" + +IO.print(s.byteAt(0)) // expect: 0 +IO.print(s.byteAt(1)) // expect: 18 +IO.print(s.byteAt(2)) // expect: 52 +IO.print(s.byteAt(3)) // expect: 86 +IO.print(s.byteAt(4)) // expect: 120 +IO.print(s.byteAt(5)) // expect: 171 +IO.print(s.byteAt(6)) // expect: 205 +IO.print(s.byteAt(7)) // expect: 255 +// "f". +IO.print(s.byteAt(8)) // expect: 102 diff --git a/test/language/string/incomplete_byte_escape.wren b/test/language/string/incomplete_byte_escape.wren new file mode 100644 index 00000000..8bfd3f38 --- /dev/null +++ b/test/language/string/incomplete_byte_escape.wren @@ -0,0 +1,2 @@ +// expect error line 2 +"\x0" \ No newline at end of file diff --git a/test/language/string/incomplete_byte_escape_at_eof.wren b/test/language/string/incomplete_byte_escape_at_eof.wren new file mode 100644 index 00000000..c2b6474f --- /dev/null +++ b/test/language/string/incomplete_byte_escape_at_eof.wren @@ -0,0 +1,2 @@ +// expect error line 2 +"\x0 \ No newline at end of file diff --git a/test/language/string/invalid_byte_escape.wren b/test/language/string/invalid_byte_escape.wren new file mode 100644 index 00000000..04ce2390 --- /dev/null +++ b/test/language/string/invalid_byte_escape.wren @@ -0,0 +1,2 @@ +// expect error line 2 +"\x0!" \ No newline at end of file