From 72c38a59cebbd924af4e5d4823e12f47be5e76ea Mon Sep 17 00:00:00 2001
From: Bob Nystrom <robert@stuffwithstuff.com>
Date: Fri, 27 Mar 2015 20:44:07 -0700
Subject: [PATCH] More stuff for working with strings and bytes!

- "\x" escape sequence to put byte values in strings: "\x34"
- String.byteAt(index) gets value of byte in string.
- String.bytes returns a raw sequence of bytes for a string.
- String.codePointAt(index) gets the code point at an offset as a raw number.
---
 builtin/core.wren                             | 14 +++-
 doc/site/core/string.markdown                 | 42 +++++++++++
 doc/site/values.markdown                      | 20 +++++-
 src/vm/wren_compiler.c                        | 24 +++++--
 src/vm/wren_core.c                            | 70 ++++++++++++++++++-
 src/vm/wren_utils.c                           | 53 +++++++++++++-
 src/vm/wren_utils.h                           |  4 ++
 test/core/string/byte_at.wren                 | 38 ++++++++++
 test/core/string/byte_at_not_int.wren         |  1 +
 test/core/string/byte_at_not_num.wren         |  1 +
 test/core/string/byte_at_too_large.wren       |  1 +
 test/core/string/byte_at_too_small.wren       |  1 +
 test/core/string/bytes.wren                   |  6 ++
 test/core/string/code_point_at.wren           | 38 ++++++++++
 .../core/string/code_point_at_incomplete.wren |  3 +
 test/core/string/code_point_at_not_int.wren   |  1 +
 test/core/string/code_point_at_not_num.wren   |  1 +
 test/core/string/code_point_at_too_large.wren |  1 +
 test/core/string/code_point_at_too_small.wren |  1 +
 test/core/string_byte_sequence/iterate.wren   | 21 ++++++
 .../string_byte_sequence/iterate_not_int.wren |  1 +
 .../iterate_wrong_type.wren                   |  1 +
 .../string_byte_sequence/iterator_value.wren  | 24 +++++++
 .../iterator_value_not_int.wren               |  1 +
 .../iterator_value_not_num.wren               |  1 +
 .../iterator_value_too_large.wren             |  1 +
 .../iterator_value_too_small.wren             |  1 +
 test/core/string_byte_sequence/subscript.wren | 24 +++++++
 .../subscript_not_int.wren                    |  1 +
 .../subscript_not_num.wren                    |  1 +
 .../subscript_too_large.wren                  |  1 +
 .../subscript_too_small.wren                  |  1 +
 test/language/string/byte_escapes.wren        | 12 ++++
 .../string/incomplete_byte_escape.wren        |  2 +
 .../string/incomplete_byte_escape_at_eof.wren |  2 +
 test/language/string/invalid_byte_escape.wren |  2 +
 36 files changed, 402 insertions(+), 15 deletions(-)
 create mode 100644 test/core/string/byte_at.wren
 create mode 100644 test/core/string/byte_at_not_int.wren
 create mode 100644 test/core/string/byte_at_not_num.wren
 create mode 100644 test/core/string/byte_at_too_large.wren
 create mode 100644 test/core/string/byte_at_too_small.wren
 create mode 100644 test/core/string/bytes.wren
 create mode 100644 test/core/string/code_point_at.wren
 create mode 100644 test/core/string/code_point_at_incomplete.wren
 create mode 100644 test/core/string/code_point_at_not_int.wren
 create mode 100644 test/core/string/code_point_at_not_num.wren
 create mode 100644 test/core/string/code_point_at_too_large.wren
 create mode 100644 test/core/string/code_point_at_too_small.wren
 create mode 100644 test/core/string_byte_sequence/iterate.wren
 create mode 100644 test/core/string_byte_sequence/iterate_not_int.wren
 create mode 100644 test/core/string_byte_sequence/iterate_wrong_type.wren
 create mode 100644 test/core/string_byte_sequence/iterator_value.wren
 create mode 100644 test/core/string_byte_sequence/iterator_value_not_int.wren
 create mode 100644 test/core/string_byte_sequence/iterator_value_not_num.wren
 create mode 100644 test/core/string_byte_sequence/iterator_value_too_large.wren
 create mode 100644 test/core/string_byte_sequence/iterator_value_too_small.wren
 create mode 100644 test/core/string_byte_sequence/subscript.wren
 create mode 100644 test/core/string_byte_sequence/subscript_not_int.wren
 create mode 100644 test/core/string_byte_sequence/subscript_not_num.wren
 create mode 100644 test/core/string_byte_sequence/subscript_too_large.wren
 create mode 100644 test/core/string_byte_sequence/subscript_too_small.wren
 create mode 100644 test/language/string/byte_escapes.wren
 create mode 100644 test/language/string/incomplete_byte_escape.wren
 create mode 100644 test/language/string/incomplete_byte_escape_at_eof.wren
 create mode 100644 test/language/string/invalid_byte_escape.wren

diff --git a/builtin/core.wren b/builtin/core.wren
index 156f507f..35b6db7a 100644
--- a/builtin/core.wren
+++ b/builtin/core.wren
@@ -92,7 +92,19 @@ class Sequence {
   }
 }
 
-class String is Sequence {}
+class String is Sequence {
+  bytes { new StringByteSequence(this) }
+}
+
+class StringByteSequence is Sequence {
+  new(string) {
+    _string = string
+  }
+
+  [index] { _string.byteAt(index) }
+  iterate(iterator) { _string.iterateByte_(iterator) }
+  iteratorValue(iterator) { _string.byteAt(iterator) }
+}
 
 class List is Sequence {
   addAll(other) {
diff --git a/doc/site/core/string.markdown b/doc/site/core/string.markdown
index a02f9b9b..f66f0be8 100644
--- a/doc/site/core/string.markdown
+++ b/doc/site/core/string.markdown
@@ -45,6 +45,48 @@ It is a runtime error if `codePoint` is not an integer between `0` and
 
 ## Methods
 
+### **byteAt**(index)
+
+Gets the value of the byte at byte offset `index` in the string.
+
+    :::dart
+    IO.print("hello".byteAt(1)) // 101, for "e".
+
+If the index is negative, it counts backwards from the end of the string.
+
+    :::dart
+    IO.print("hello".byteAt(-4)) // 101, for "e".
+
+It is a runtime error if `index` is not an integer or is out of bounds.
+
+### **bytes**
+
+Gets a [`Sequence`](sequence.html) that can be used to access the raw bytes of
+the string and ignore any UTF-8 encoding. In addition to the normal sequence
+methods, the returned object also has a subscript operator that can be used to
+directly index bytes.
+
+    :::dart
+    IO.print("hello".bytes[1]) // 101, for "e".
+
+### **codePointAt**(index)
+
+Gets the value of the UTF-8 encoded code point starting at byte offset `index`
+in the string. Unlike the subscript operator, this returns the code point as a
+number.
+
+    :::dart
+    var string = "(ᵔᴥᵔ)"
+    IO.print(string.codePointAt(0)) // 40, for "(".
+    IO.print(string.codePointAt(4)) // 7461, for "ᴥ".
+
+If the byte at `index` does not begin a valid UTF-8 sequence, or the end of the
+string is reached before the sequence is complete, returns `-1`.
+
+    :::dart
+    var string = "(ᵔᴥᵔ)"
+    IO.print(string.codePointAt(2)) // -1, in the middle of "ᵔ".
+
 ### **contains**(other)
 
 Checks if `other` is a substring of the string.
diff --git a/doc/site/values.markdown b/doc/site/values.markdown
index 7265339d..69a40715 100644
--- a/doc/site/values.markdown
+++ b/doc/site/values.markdown
@@ -30,8 +30,12 @@ Numbers are instances of the [Num](core/num.html) class.
 
 ## Strings
 
-Strings are chunks of text stored as UTF-8. Their class is
-[String](core/string.html). String literals are surrounded in double quotes:
+A string is an array of bytes. Typically, they store characters encoded in
+UTF-8, but you can put any byte values in there, even zero or invalid UTF-8
+sequences. (You might have some trouble *printing* the latter to your terminal,
+though.)
+
+String literals are surrounded in double quotes:
 
     :::dart
     "hi there"
@@ -39,6 +43,7 @@ Strings are chunks of text stored as UTF-8. Their class is
 A handful of escape characters are supported:
 
     :::dart
+    "\0" // The NUL byte: 0.
     "\"" // A double quote character.
     "\\" // A backslash.
     "\a" // Alarm beep. (Who uses this?)
@@ -49,7 +54,16 @@ A handful of escape characters are supported:
     "\t" // Tab.
     "\v" // Vertical tab.
 
-A `\u` followed by four hex digits can be used to specify a Unicode code point.
+A `\u` followed by four hex digits can be used to specify a Unicode code point:
+
+    :::dart
+    IO.print("\u0041\u0b83\u00DE") // "AஃÞ"
+
+A `\x` followed by two hex digits specifies a single unencoded byte:
+
+    IO.print("\x48\x69\x2e") // "Hi."
+
+Strings are objects of class [String](core/string.html).
 
 ## Ranges
 
diff --git a/src/vm/wren_compiler.c b/src/vm/wren_compiler.c
index e28b7a9c..cfb77f16 100644
--- a/src/vm/wren_compiler.c
+++ b/src/vm/wren_compiler.c
@@ -622,15 +622,15 @@ static void addStringChar(Parser* parser, char c)
   wrenByteBufferWrite(parser->vm, &parser->string, c);
 }
 
-// Reads a four hex digit Unicode escape sequence in a string literal.
-static void readUnicodeEscape(Parser* parser)
+// Reads [digits] hex digits in a string literal and returns their number value.
+static int readHexEscape(Parser* parser, int digits, const char* description)
 {
   int value = 0;
-  for (int i = 0; i < 4; i++)
+  for (int i = 0; i < digits; i++)
   {
     if (peekChar(parser) == '"' || peekChar(parser) == '\0')
     {
-      lexError(parser, "Incomplete Unicode escape sequence.");
+      lexError(parser, "Incomplete %s escape sequence.", description);
 
       // Don't consume it if it isn't expected. Keeps us from reading past the
       // end of an unterminated string.
@@ -641,13 +641,21 @@ static void readUnicodeEscape(Parser* parser)
     int digit = readHexDigit(parser);
     if (digit == -1)
     {
-      lexError(parser, "Invalid Unicode escape sequence.");
+      lexError(parser, "Invalid %s escape sequence.", description);
       break;
     }
 
     value = (value * 16) | digit;
   }
 
+  return value;
+}
+
+// Reads a four hex digit Unicode escape sequence in a string literal.
+static void readUnicodeEscape(Parser* parser)
+{
+  int value = readHexEscape(parser, 4, "Unicode");
+
   // Grow the buffer enough for the encoded result.
   int numBytes = wrenUtf8NumBytes(value);
   if (numBytes != 0)
@@ -696,9 +704,13 @@ static void readString(Parser* parser)
         case 'n':  addStringChar(parser, '\n'); break;
         case 'r':  addStringChar(parser, '\r'); break;
         case 't':  addStringChar(parser, '\t'); break;
-        case 'v':  addStringChar(parser, '\v'); break;
         case 'u':  readUnicodeEscape(parser); break;
           // TODO: 'U' for 8 octet Unicode escapes.
+        case 'v':  addStringChar(parser, '\v'); break;
+        case 'x':
+          addStringChar(parser, (uint8_t)readHexEscape(parser, 2, "byte"));
+          break;
+
         default:
           lexError(parser, "Invalid escape character '%c'.",
                    *(parser->currentChar - 1));
diff --git a/src/vm/wren_core.c b/src/vm/wren_core.c
index fdd0cf6b..1938cd81 100644
--- a/src/vm/wren_core.c
+++ b/src/vm/wren_core.c
@@ -138,7 +138,19 @@ static const char* libSource =
 "  }\n"
 "}\n"
 "\n"
-"class String is Sequence {}\n"
+"class String is Sequence {\n"
+"  bytes { new StringByteSequence(this) }\n"
+"}\n"
+"\n"
+"class StringByteSequence is Sequence {\n"
+"  new(string) {\n"
+"    _string = string\n"
+"  }\n"
+"\n"
+"  [index] { _string.byteAt(index) }\n"
+"  iterate(iterator) { _string.iterateByte_(iterator) }\n"
+"  iteratorValue(iterator) { _string.byteAt(iterator) }\n"
+"}\n"
 "\n"
 "class List is Sequence {\n"
 "  addAll(other) {\n"
@@ -307,7 +319,7 @@ static uint32_t calculateRange(WrenVM* vm, Value* args, ObjRange* range,
                                uint32_t* length, int* step)
 {
   *step = 0;
-  
+
   // Corner case: an empty range at zero is allowed on an empty sequence.
   // This way, list[0..-1] and list[0...list.count] can be used to copy a list
   // even when empty.
@@ -1225,7 +1237,33 @@ DEF_PRIMITIVE(string_fromCodePoint)
     RETURN_ERROR("Code point cannot be greater than 0x10ffff.");
   }
 
-  RETURN_VAL(wrenStringFromCodePoint(vm, (int)AS_NUM(args[1])));
+  RETURN_VAL(wrenStringFromCodePoint(vm, codePoint));
+}
+
+DEF_PRIMITIVE(string_byteAt)
+{
+  ObjString* string = AS_STRING(args[0]);
+
+  uint32_t index = validateIndex(vm, args, string->length, 1, "Index");
+  if (index == UINT32_MAX) return PRIM_ERROR;
+
+  RETURN_NUM((uint8_t)string->value[index]);
+}
+
+DEF_PRIMITIVE(string_codePointAt)
+{
+  ObjString* string = AS_STRING(args[0]);
+
+  uint32_t index = validateIndex(vm, args, string->length, 1, "Index");
+  if (index == UINT32_MAX) return PRIM_ERROR;
+
+  // If we are in the middle of a UTF-8 sequence, indicate that.
+  const uint8_t* bytes = (uint8_t*)string->value;
+  if ((bytes[index] & 0xc0) == 0x80) RETURN_NUM(-1);
+
+  // Decode the UTF-8 sequence.
+  RETURN_NUM(wrenUtf8Decode((uint8_t*)string->value + index,
+                            string->length - index));
 }
 
 DEF_PRIMITIVE(string_contains)
@@ -1294,6 +1332,29 @@ DEF_PRIMITIVE(string_iterate)
   RETURN_NUM(index);
 }
 
+DEF_PRIMITIVE(string_iterateByte)
+{
+  ObjString* string = AS_STRING(args[0]);
+
+  // If we're starting the iteration, return the first index.
+  if (IS_NULL(args[1]))
+  {
+    if (string->length == 0) RETURN_FALSE;
+    RETURN_NUM(0);
+  }
+
+  if (!validateInt(vm, args, 1, "Iterator")) return PRIM_ERROR;
+
+  if (AS_NUM(args[1]) < 0) RETURN_FALSE;
+  uint32_t index = (uint32_t)AS_NUM(args[1]);
+
+  // Advance to the next byte.
+  index++;
+  if (index >= string->length) RETURN_FALSE;
+
+  RETURN_NUM(index);
+}
+
 DEF_PRIMITIVE(string_iteratorValue)
 {
   ObjString* string = AS_STRING(args[0]);
@@ -1533,11 +1594,14 @@ void wrenInitializeCore(WrenVM* vm)
   PRIMITIVE(vm->stringClass->obj.classObj, "fromCodePoint(_)", string_fromCodePoint);
   PRIMITIVE(vm->stringClass, "+(_)", string_plus);
   PRIMITIVE(vm->stringClass, "[_]", string_subscript);
+  PRIMITIVE(vm->stringClass, "byteAt(_)", string_byteAt);
+  PRIMITIVE(vm->stringClass, "codePointAt(_)", string_codePointAt);
   PRIMITIVE(vm->stringClass, "contains(_)", string_contains);
   PRIMITIVE(vm->stringClass, "count", string_count);
   PRIMITIVE(vm->stringClass, "endsWith(_)", string_endsWith);
   PRIMITIVE(vm->stringClass, "indexOf(_)", string_indexOf);
   PRIMITIVE(vm->stringClass, "iterate(_)", string_iterate);
+  PRIMITIVE(vm->stringClass, "iterateByte_(_)", string_iterateByte);
   PRIMITIVE(vm->stringClass, "iteratorValue(_)", string_iteratorValue);
   PRIMITIVE(vm->stringClass, "startsWith(_)", string_startsWith);
   PRIMITIVE(vm->stringClass, "toString", string_toString);
diff --git a/src/vm/wren_utils.c b/src/vm/wren_utils.c
index f3490af6..7ed15c16 100644
--- a/src/vm/wren_utils.c
+++ b/src/vm/wren_utils.c
@@ -79,14 +79,14 @@ void wrenUtf8Encode(int value, uint8_t* bytes)
   }
   else if (value <= 0x7ff)
   {
-    // Two byte sequence: 110xxxxx	 10xxxxxx.
+    // Two byte sequence: 110xxxxx 10xxxxxx.
     *bytes = 0xc0 | ((value & 0x7c0) >> 6);
     bytes++;
     *bytes = 0x80 | (value & 0x3f);
   }
   else if (value <= 0xffff)
   {
-    // Three byte sequence: 1110xxxx	 10xxxxxx 10xxxxxx.
+    // Three byte sequence: 1110xxxx 10xxxxxx 10xxxxxx.
     *bytes = 0xe0 | ((value & 0xf000) >> 12);
     bytes++;
     *bytes = 0x80 | ((value & 0xfc0) >> 6);
@@ -110,3 +110,52 @@ void wrenUtf8Encode(int value, uint8_t* bytes)
     ASSERT(false, "Invalid UTF-8 value.");
   }
 }
+
+int wrenUtf8Decode(const uint8_t* bytes, uint32_t length)
+{
+  // Single byte (i.e. fits in ASCII).
+  if (*bytes <= 0x7f) return *bytes;
+
+  int value;
+  uint32_t remainingBytes;
+  if ((*bytes & 0xe0) == 0xc0)
+  {
+    // Two byte sequence: 110xxxxx 10xxxxxx.
+    value = *bytes & 0x1f;
+    remainingBytes = 1;
+  }
+  else if ((*bytes & 0xf0) == 0xe0)
+  {
+    // Three byte sequence: 1110xxxx	 10xxxxxx 10xxxxxx.
+    value = *bytes & 0x0f;
+    remainingBytes = 2;
+  }
+  else if ((*bytes & 0xf8) == 0xf0)
+  {
+    // Four byte sequence: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx.
+    value = *bytes & 0x07;
+    remainingBytes = 3;
+  }
+  else
+  {
+    // Invalid UTF-8 sequence.
+    return -1;
+  }
+
+  // Don't read past the end of the buffer on truncated UTF-8.
+  // TODO: Test this.
+  if (remainingBytes > length - 1) return -1;
+
+  while (remainingBytes > 0)
+  {
+    bytes++;
+    remainingBytes--;
+
+    // Remaining bytes must be of form 10xxxxxx.
+    if ((*bytes & 0xc0) != 0x80) return -1;
+
+    value = value << 6 | (*bytes & 0x3f);
+  }
+
+  return value;
+}
diff --git a/src/vm/wren_utils.h b/src/vm/wren_utils.h
index c1cfe19e..70b3ded6 100644
--- a/src/vm/wren_utils.h
+++ b/src/vm/wren_utils.h
@@ -90,4 +90,8 @@ int wrenUtf8NumBytes(int value);
 // enough to hold the encoded result.
 void wrenUtf8Encode(int value, uint8_t* bytes);
 
+// Decodes the UTF-8 sequence in [bytes] (which has max [length]), returning
+// the code point.
+int wrenUtf8Decode(const uint8_t* bytes, uint32_t length);
+
 #endif
diff --git a/test/core/string/byte_at.wren b/test/core/string/byte_at.wren
new file mode 100644
index 00000000..cf2c20e5
--- /dev/null
+++ b/test/core/string/byte_at.wren
@@ -0,0 +1,38 @@
+// Bytes:           11111
+//        012345678901234
+// Chars: sø mé ஃ  thî ng
+var s = "søméஃthîng"
+
+IO.print(s.byteAt(0)) // expect: 115
+IO.print(s.byteAt(1)) // expect: 195
+IO.print(s.byteAt(2)) // expect: 184
+IO.print(s.byteAt(3)) // expect: 109
+IO.print(s.byteAt(4)) // expect: 195
+IO.print(s.byteAt(5)) // expect: 169
+IO.print(s.byteAt(6)) // expect: 224
+IO.print(s.byteAt(7)) // expect: 174
+IO.print(s.byteAt(8)) // expect: 131
+IO.print(s.byteAt(9)) // expect: 116
+IO.print(s.byteAt(10)) // expect: 104
+IO.print(s.byteAt(11)) // expect: 195
+IO.print(s.byteAt(12)) // expect: 174
+IO.print(s.byteAt(13)) // expect: 110
+IO.print(s.byteAt(14)) // expect: 103
+
+IO.print(s.byteAt(-15)) // expect: 115
+IO.print(s.byteAt(-14)) // expect: 195
+IO.print(s.byteAt(-13)) // expect: 184
+IO.print(s.byteAt(-12)) // expect: 109
+IO.print(s.byteAt(-11)) // expect: 195
+IO.print(s.byteAt(-10)) // expect: 169
+IO.print(s.byteAt(-9)) // expect: 224
+IO.print(s.byteAt(-8)) // expect: 174
+IO.print(s.byteAt(-7)) // expect: 131
+IO.print(s.byteAt(-6)) // expect: 116
+IO.print(s.byteAt(-5)) // expect: 104
+IO.print(s.byteAt(-4)) // expect: 195
+IO.print(s.byteAt(-3)) // expect: 174
+IO.print(s.byteAt(-2)) // expect: 110
+IO.print(s.byteAt(-1)) // expect: 103
+
+IO.print("\0".byteAt(0)) // expect: 0
diff --git a/test/core/string/byte_at_not_int.wren b/test/core/string/byte_at_not_int.wren
new file mode 100644
index 00000000..d9e22b8a
--- /dev/null
+++ b/test/core/string/byte_at_not_int.wren
@@ -0,0 +1 @@
+IO.print("string".byteAt(12.34)) // expect runtime error: Index must be an integer.
diff --git a/test/core/string/byte_at_not_num.wren b/test/core/string/byte_at_not_num.wren
new file mode 100644
index 00000000..23e5c7c3
--- /dev/null
+++ b/test/core/string/byte_at_not_num.wren
@@ -0,0 +1 @@
+IO.print("string".byteAt("not num")) // expect runtime error: Index must be a number.
diff --git a/test/core/string/byte_at_too_large.wren b/test/core/string/byte_at_too_large.wren
new file mode 100644
index 00000000..365687f9
--- /dev/null
+++ b/test/core/string/byte_at_too_large.wren
@@ -0,0 +1 @@
+IO.print("string".byteAt(6)) // expect runtime error: Index out of bounds.
diff --git a/test/core/string/byte_at_too_small.wren b/test/core/string/byte_at_too_small.wren
new file mode 100644
index 00000000..2a83555f
--- /dev/null
+++ b/test/core/string/byte_at_too_small.wren
@@ -0,0 +1 @@
+IO.print("string".byteAt(-7)) // expect runtime error: Index out of bounds.
diff --git a/test/core/string/bytes.wren b/test/core/string/bytes.wren
new file mode 100644
index 00000000..f225a81f
--- /dev/null
+++ b/test/core/string/bytes.wren
@@ -0,0 +1,6 @@
+// Bytes:           11111
+//        012345678901234
+// Chars: sø mé ஃ  thî ng
+var s = "søméஃthîng"
+
+IO.print(s.bytes is StringByteSequence) // expect: true
diff --git a/test/core/string/code_point_at.wren b/test/core/string/code_point_at.wren
new file mode 100644
index 00000000..a4ba4319
--- /dev/null
+++ b/test/core/string/code_point_at.wren
@@ -0,0 +1,38 @@
+// Bytes:           11111
+//        012345678901234
+// Chars: sø mé ஃ  thî ng
+var s = "søméஃthîng"
+
+IO.print(s.codePointAt(0)) // expect: 115
+IO.print(s.codePointAt(1)) // expect: 248
+IO.print(s.codePointAt(2)) // expect: -1
+IO.print(s.codePointAt(3)) // expect: 109
+IO.print(s.codePointAt(4)) // expect: 233
+IO.print(s.codePointAt(5)) // expect: -1
+IO.print(s.codePointAt(6)) // expect: 2947
+IO.print(s.codePointAt(7)) // expect: -1
+IO.print(s.codePointAt(8)) // expect: -1
+IO.print(s.codePointAt(9)) // expect: 116
+IO.print(s.codePointAt(10)) // expect: 104
+IO.print(s.codePointAt(11)) // expect: 238
+IO.print(s.codePointAt(12)) // expect: -1
+IO.print(s.codePointAt(13)) // expect: 110
+IO.print(s.codePointAt(14)) // expect: 103
+
+IO.print(s.codePointAt(-15)) // expect: 115
+IO.print(s.codePointAt(-14)) // expect: 248
+IO.print(s.codePointAt(-13)) // expect: -1
+IO.print(s.codePointAt(-12)) // expect: 109
+IO.print(s.codePointAt(-11)) // expect: 233
+IO.print(s.codePointAt(-10)) // expect: -1
+IO.print(s.codePointAt(-9)) // expect: 2947
+IO.print(s.codePointAt(-8)) // expect: -1
+IO.print(s.codePointAt(-7)) // expect: -1
+IO.print(s.codePointAt(-6)) // expect: 116
+IO.print(s.codePointAt(-5)) // expect: 104
+IO.print(s.codePointAt(-4)) // expect: 238
+IO.print(s.codePointAt(-3)) // expect: -1
+IO.print(s.codePointAt(-2)) // expect: 110
+IO.print(s.codePointAt(-1)) // expect: 103
+
+IO.print("\0".codePointAt(0)) // expect: 0
diff --git a/test/core/string/code_point_at_incomplete.wren b/test/core/string/code_point_at_incomplete.wren
new file mode 100644
index 00000000..648ed30e
--- /dev/null
+++ b/test/core/string/code_point_at_incomplete.wren
@@ -0,0 +1,3 @@
+// The first two bytes of a three-octet sequence.
+var s = "\xe0\xae"
+IO.print(s.codePointAt(0)) // expect: -1
diff --git a/test/core/string/code_point_at_not_int.wren b/test/core/string/code_point_at_not_int.wren
new file mode 100644
index 00000000..2f1f42f3
--- /dev/null
+++ b/test/core/string/code_point_at_not_int.wren
@@ -0,0 +1 @@
+IO.print("string".codePointAt(12.34)) // expect runtime error: Index must be an integer.
diff --git a/test/core/string/code_point_at_not_num.wren b/test/core/string/code_point_at_not_num.wren
new file mode 100644
index 00000000..fc41da89
--- /dev/null
+++ b/test/core/string/code_point_at_not_num.wren
@@ -0,0 +1 @@
+IO.print("string".codePointAt("not num")) // expect runtime error: Index must be a number.
diff --git a/test/core/string/code_point_at_too_large.wren b/test/core/string/code_point_at_too_large.wren
new file mode 100644
index 00000000..34424ae4
--- /dev/null
+++ b/test/core/string/code_point_at_too_large.wren
@@ -0,0 +1 @@
+IO.print("string".codePointAt(6)) // expect runtime error: Index out of bounds.
diff --git a/test/core/string/code_point_at_too_small.wren b/test/core/string/code_point_at_too_small.wren
new file mode 100644
index 00000000..ac264d15
--- /dev/null
+++ b/test/core/string/code_point_at_too_small.wren
@@ -0,0 +1 @@
+IO.print("string".codePointAt(-7)) // expect runtime error: Index out of bounds.
diff --git a/test/core/string_byte_sequence/iterate.wren b/test/core/string_byte_sequence/iterate.wren
new file mode 100644
index 00000000..650a61d4
--- /dev/null
+++ b/test/core/string_byte_sequence/iterate.wren
@@ -0,0 +1,21 @@
+// Bytes:
+//        012345678
+// Chars: sø mé ஃ
+var bytes = "søméஃ".bytes
+
+IO.print(bytes.iterate(null)) // expect: 0
+IO.print("".bytes.iterate(null)) // expect: false
+
+IO.print(bytes.iterate(0)) // expect: 1
+IO.print(bytes.iterate(1)) // expect: 2
+IO.print(bytes.iterate(2)) // expect: 3
+IO.print(bytes.iterate(3)) // expect: 4
+IO.print(bytes.iterate(4)) // expect: 5
+IO.print(bytes.iterate(5)) // expect: 6
+IO.print(bytes.iterate(6)) // expect: 7
+IO.print(bytes.iterate(7)) // expect: 8
+IO.print(bytes.iterate(8)) // expect: false
+
+// Out of bounds.
+IO.print(bytes.iterate(123)) // expect: false
+IO.print(bytes.iterate(-1)) // expect: false
diff --git a/test/core/string_byte_sequence/iterate_not_int.wren b/test/core/string_byte_sequence/iterate_not_int.wren
new file mode 100644
index 00000000..2438e0ab
--- /dev/null
+++ b/test/core/string_byte_sequence/iterate_not_int.wren
@@ -0,0 +1 @@
+"str".bytes.iterate(12.34) // expect runtime error: Iterator must be an integer.
diff --git a/test/core/string_byte_sequence/iterate_wrong_type.wren b/test/core/string_byte_sequence/iterate_wrong_type.wren
new file mode 100644
index 00000000..76a36193
--- /dev/null
+++ b/test/core/string_byte_sequence/iterate_wrong_type.wren
@@ -0,0 +1 @@
+"str".bytes.iterate("not num") // expect runtime error: Iterator must be a number.
diff --git a/test/core/string_byte_sequence/iterator_value.wren b/test/core/string_byte_sequence/iterator_value.wren
new file mode 100644
index 00000000..544d3869
--- /dev/null
+++ b/test/core/string_byte_sequence/iterator_value.wren
@@ -0,0 +1,24 @@
+// Bytes:
+//        012345678
+// Chars: sø mé ஃ
+var bytes = "søméஃ".bytes
+
+IO.print(bytes.iteratorValue(0)) // expect: 115
+IO.print(bytes.iteratorValue(1)) // expect: 195
+IO.print(bytes.iteratorValue(2)) // expect: 184
+IO.print(bytes.iteratorValue(3)) // expect: 109
+IO.print(bytes.iteratorValue(4)) // expect: 195
+IO.print(bytes.iteratorValue(5)) // expect: 169
+IO.print(bytes.iteratorValue(6)) // expect: 224
+IO.print(bytes.iteratorValue(7)) // expect: 174
+IO.print(bytes.iteratorValue(8)) // expect: 131
+
+IO.print(bytes.iteratorValue(-9)) // expect: 115
+IO.print(bytes.iteratorValue(-8)) // expect: 195
+IO.print(bytes.iteratorValue(-7)) // expect: 184
+IO.print(bytes.iteratorValue(-6)) // expect: 109
+IO.print(bytes.iteratorValue(-5)) // expect: 195
+IO.print(bytes.iteratorValue(-4)) // expect: 169
+IO.print(bytes.iteratorValue(-3)) // expect: 224
+IO.print(bytes.iteratorValue(-2)) // expect: 174
+IO.print(bytes.iteratorValue(-1)) // expect: 131
diff --git a/test/core/string_byte_sequence/iterator_value_not_int.wren b/test/core/string_byte_sequence/iterator_value_not_int.wren
new file mode 100644
index 00000000..6cc0cd80
--- /dev/null
+++ b/test/core/string_byte_sequence/iterator_value_not_int.wren
@@ -0,0 +1 @@
+"abcd".bytes.iteratorValue(12.34) // expect runtime error: Index must be an integer.
diff --git a/test/core/string_byte_sequence/iterator_value_not_num.wren b/test/core/string_byte_sequence/iterator_value_not_num.wren
new file mode 100644
index 00000000..c0020da6
--- /dev/null
+++ b/test/core/string_byte_sequence/iterator_value_not_num.wren
@@ -0,0 +1 @@
+"abcd".bytes.iteratorValue("not num") // expect runtime error: Index must be a number.
diff --git a/test/core/string_byte_sequence/iterator_value_too_large.wren b/test/core/string_byte_sequence/iterator_value_too_large.wren
new file mode 100644
index 00000000..fe517b6b
--- /dev/null
+++ b/test/core/string_byte_sequence/iterator_value_too_large.wren
@@ -0,0 +1 @@
+"abcd".bytes.iteratorValue(4) // expect runtime error: Index out of bounds.
diff --git a/test/core/string_byte_sequence/iterator_value_too_small.wren b/test/core/string_byte_sequence/iterator_value_too_small.wren
new file mode 100644
index 00000000..beb9ffd6
--- /dev/null
+++ b/test/core/string_byte_sequence/iterator_value_too_small.wren
@@ -0,0 +1 @@
+"abcd".bytes.iteratorValue(-5) // expect runtime error: Index out of bounds.
diff --git a/test/core/string_byte_sequence/subscript.wren b/test/core/string_byte_sequence/subscript.wren
new file mode 100644
index 00000000..0e073978
--- /dev/null
+++ b/test/core/string_byte_sequence/subscript.wren
@@ -0,0 +1,24 @@
+// Bytes:
+//        012345678
+// Chars: sø mé ஃ
+var bytes = "søméஃ".bytes
+
+IO.print(bytes[0]) // expect: 115
+IO.print(bytes[1]) // expect: 195
+IO.print(bytes[2]) // expect: 184
+IO.print(bytes[3]) // expect: 109
+IO.print(bytes[4]) // expect: 195
+IO.print(bytes[5]) // expect: 169
+IO.print(bytes[6]) // expect: 224
+IO.print(bytes[7]) // expect: 174
+IO.print(bytes[8]) // expect: 131
+
+IO.print(bytes[-9]) // expect: 115
+IO.print(bytes[-8]) // expect: 195
+IO.print(bytes[-7]) // expect: 184
+IO.print(bytes[-6]) // expect: 109
+IO.print(bytes[-5]) // expect: 195
+IO.print(bytes[-4]) // expect: 169
+IO.print(bytes[-3]) // expect: 224
+IO.print(bytes[-2]) // expect: 174
+IO.print(bytes[-1]) // expect: 131
diff --git a/test/core/string_byte_sequence/subscript_not_int.wren b/test/core/string_byte_sequence/subscript_not_int.wren
new file mode 100644
index 00000000..00da9a0a
--- /dev/null
+++ b/test/core/string_byte_sequence/subscript_not_int.wren
@@ -0,0 +1 @@
+"abcd".bytes[12.34] // expect runtime error: Index must be an integer.
diff --git a/test/core/string_byte_sequence/subscript_not_num.wren b/test/core/string_byte_sequence/subscript_not_num.wren
new file mode 100644
index 00000000..151c8a84
--- /dev/null
+++ b/test/core/string_byte_sequence/subscript_not_num.wren
@@ -0,0 +1 @@
+"abcd".bytes["not num"] // expect runtime error: Index must be a number.
diff --git a/test/core/string_byte_sequence/subscript_too_large.wren b/test/core/string_byte_sequence/subscript_too_large.wren
new file mode 100644
index 00000000..9defdedc
--- /dev/null
+++ b/test/core/string_byte_sequence/subscript_too_large.wren
@@ -0,0 +1 @@
+"abcd".bytes[4] // expect runtime error: Index out of bounds.
diff --git a/test/core/string_byte_sequence/subscript_too_small.wren b/test/core/string_byte_sequence/subscript_too_small.wren
new file mode 100644
index 00000000..b9501b8f
--- /dev/null
+++ b/test/core/string_byte_sequence/subscript_too_small.wren
@@ -0,0 +1 @@
+"abcd".bytes[-5] // expect runtime error: Index out of bounds.
diff --git a/test/language/string/byte_escapes.wren b/test/language/string/byte_escapes.wren
new file mode 100644
index 00000000..184db639
--- /dev/null
+++ b/test/language/string/byte_escapes.wren
@@ -0,0 +1,12 @@
+var s = "\x00\x12\x34\x56\x78\xab\xCD\xfFf"
+
+IO.print(s.byteAt(0)) // expect: 0
+IO.print(s.byteAt(1)) // expect: 18
+IO.print(s.byteAt(2)) // expect: 52
+IO.print(s.byteAt(3)) // expect: 86
+IO.print(s.byteAt(4)) // expect: 120
+IO.print(s.byteAt(5)) // expect: 171
+IO.print(s.byteAt(6)) // expect: 205
+IO.print(s.byteAt(7)) // expect: 255
+// "f".
+IO.print(s.byteAt(8)) // expect: 102
diff --git a/test/language/string/incomplete_byte_escape.wren b/test/language/string/incomplete_byte_escape.wren
new file mode 100644
index 00000000..8bfd3f38
--- /dev/null
+++ b/test/language/string/incomplete_byte_escape.wren
@@ -0,0 +1,2 @@
+// expect error line 2
+"\x0"
\ No newline at end of file
diff --git a/test/language/string/incomplete_byte_escape_at_eof.wren b/test/language/string/incomplete_byte_escape_at_eof.wren
new file mode 100644
index 00000000..c2b6474f
--- /dev/null
+++ b/test/language/string/incomplete_byte_escape_at_eof.wren
@@ -0,0 +1,2 @@
+// expect error line 2
+"\x0
\ No newline at end of file
diff --git a/test/language/string/invalid_byte_escape.wren b/test/language/string/invalid_byte_escape.wren
new file mode 100644
index 00000000..04ce2390
--- /dev/null
+++ b/test/language/string/invalid_byte_escape.wren
@@ -0,0 +1,2 @@
+// expect error line 2
+"\x0!"
\ No newline at end of file