From 7d45dda3830917f11bbef65a1e1c8501554f069f Mon Sep 17 00:00:00 2001
From: Bob Nystrom <robert@stuffwithstuff.com>
Date: Fri, 27 Mar 2015 07:43:36 -0700
Subject: [PATCH] String.fromCodePoint(). Fix #219.

---
 doc/site/core/bool.markdown                   |  2 +
 doc/site/core/class.markdown                  |  2 +
 doc/site/core/fiber.markdown                  |  4 ++
 doc/site/core/fn.markdown                     |  2 +
 doc/site/core/list.markdown                   |  2 +
 doc/site/core/map.markdown                    |  2 +
 doc/site/core/null.markdown                   |  2 +
 doc/site/core/num.markdown                    | 26 ++++++----
 doc/site/core/object.markdown                 |  2 +
 doc/site/core/range.markdown                  |  2 +
 doc/site/core/sequence.markdown               |  2 +
 doc/site/core/string.markdown                 | 12 +++++
 src/vm/wren_compiler.c                        | 43 ++++-----------
 src/vm/wren_core.c                            | 18 +++++++
 src/vm/wren_utils.c                           | 52 +++++++++++++++++++
 src/vm/wren_utils.h                           |  9 ++++
 src/vm/wren_value.c                           | 13 +++++
 src/vm/wren_value.h                           |  3 ++
 test/core/string/from_code_point.wren         |  5 ++
 test/core/string/from_code_point_not_int.wren |  1 +
 test/core/string/from_code_point_not_num.wren |  1 +
 .../string/from_code_point_too_large.wren     |  3 ++
 .../string/from_code_point_too_small.wren     |  1 +
 23 files changed, 166 insertions(+), 43 deletions(-)
 create mode 100644 test/core/string/from_code_point.wren
 create mode 100644 test/core/string/from_code_point_not_int.wren
 create mode 100644 test/core/string/from_code_point_not_num.wren
 create mode 100644 test/core/string/from_code_point_too_large.wren
 create mode 100644 test/core/string/from_code_point_too_small.wren

diff --git a/doc/site/core/bool.markdown b/doc/site/core/bool.markdown
index a95e968d..fc5b991b 100644
--- a/doc/site/core/bool.markdown
+++ b/doc/site/core/bool.markdown
@@ -3,6 +3,8 @@
 
 Boolean values. There are two instances, `true` and `false`.
 
+## Methods
+
 ### **!** operator
 
 Returns the logical complement of the value.
diff --git a/doc/site/core/class.markdown b/doc/site/core/class.markdown
index 66097aab..b26a827b 100644
--- a/doc/site/core/class.markdown
+++ b/doc/site/core/class.markdown
@@ -1,6 +1,8 @@
 ^title Class Class
 ^category core
 
+## Methods
+
 ### **name**
 
 The name of the class.
diff --git a/doc/site/core/fiber.markdown b/doc/site/core/fiber.markdown
index d5f089db..b93d0095 100644
--- a/doc/site/core/fiber.markdown
+++ b/doc/site/core/fiber.markdown
@@ -13,6 +13,8 @@ fiber is run. Does not immediately start running the fiber.
       IO.print("I won't get printed")
     }
 
+## Static Methods
+
 ### Fiber.**current**
 
 The currently executing fiber.
@@ -69,6 +71,8 @@ Similar to `Fiber.yield` but provides a value to return to the parent fiber's
 
     IO.print(fiber.call()) // "value"
 
+## Methods
+
 ### **call**()
 
 **TODO**
diff --git a/doc/site/core/fn.markdown b/doc/site/core/fn.markdown
index 434982fc..6677be39 100644
--- a/doc/site/core/fn.markdown
+++ b/doc/site/core/fn.markdown
@@ -18,6 +18,8 @@ argument](../functions.html#block-arguments) to some other method.
 
 It is a runtime error if `function` is not a function.
 
+## Methods
+
 ### **arity**
 
 The number of arguments the function requires.
diff --git a/doc/site/core/list.markdown b/doc/site/core/list.markdown
index 498f754c..89961168 100644
--- a/doc/site/core/list.markdown
+++ b/doc/site/core/list.markdown
@@ -5,6 +5,8 @@ Extends [Sequence](sequence.html).
 
 An indexable contiguous collection of elements. More details [here](../lists.html).
 
+## Methods
+
 ### **add**(item)
 
 Appends `item` to the end of the list.
diff --git a/doc/site/core/map.markdown b/doc/site/core/map.markdown
index eac482c0..b5ef691d 100644
--- a/doc/site/core/map.markdown
+++ b/doc/site/core/map.markdown
@@ -3,6 +3,8 @@
 
 An associative collection that maps keys to values. More details [here](../maps.html).
 
+## Methods
+
 ### **clear**()
 
 Removes all entries from the map.
diff --git a/doc/site/core/null.markdown b/doc/site/core/null.markdown
index b51c8bec..4bca4ff9 100644
--- a/doc/site/core/null.markdown
+++ b/doc/site/core/null.markdown
@@ -1,6 +1,8 @@
 ^title Null Class
 ^category core
 
+## Methods
+
 ### **!** operator
 
 Returns `true`, since `null` is considered [false](../control-flow.html#truth).
diff --git a/doc/site/core/num.markdown b/doc/site/core/num.markdown
index 42504c4d..b617b374 100644
--- a/doc/site/core/num.markdown
+++ b/doc/site/core/num.markdown
@@ -1,6 +1,21 @@
 ^title Num Class
 ^category core
 
+## Static Methods
+
+### Num.**fromString**(value)
+
+Attempts to parse `value` as a decimal literal and return it as an instance of
+`Num`. If the number cannot be parsed `null` will be returned.
+
+It is a runtime error if `value` is not a string.
+
+### Num.**pi**
+
+The value of π.
+
+## Methods
+
 ### **abs**
 
 The absolute value of the number.
@@ -131,14 +146,3 @@ from the beginning number to the ending number not including the ending number.
     IO.print(range.min)         // 1.2
     IO.print(range.max)         // 3.4
     IO.print(range.isInclusive) // false
-
-### Num.**fromString**(value)
-
-Attempts to parse `value` as a decimal literal and return it as an instance of
-`Num`. If the number cannot be parsed `null` will be returned.
-
-It is a runtime error if `value` is not a string.
-
-### Num.**pi**
-
-The value of π.
diff --git a/doc/site/core/object.markdown b/doc/site/core/object.markdown
index 2b6b0b2c..b026ae0f 100644
--- a/doc/site/core/object.markdown
+++ b/doc/site/core/object.markdown
@@ -1,6 +1,8 @@
 ^title Object Class
 ^category core
 
+## Methods
+
 ### **!** operator
 
 Returns `false`, since most objects are considered [true](control-flow.html#truth).
diff --git a/doc/site/core/range.markdown b/doc/site/core/range.markdown
index aab8f5b8..88545166 100644
--- a/doc/site/core/range.markdown
+++ b/doc/site/core/range.markdown
@@ -5,6 +5,8 @@
 
 Extends [Sequence](sequence.html).
 
+## Methods
+
 ### **from**
 
 **TODO**
diff --git a/doc/site/core/sequence.markdown b/doc/site/core/sequence.markdown
index 75ab4f14..526b3fd4 100644
--- a/doc/site/core/sequence.markdown
+++ b/doc/site/core/sequence.markdown
@@ -6,6 +6,8 @@ core [iterator protocol][] can extend this to get a number of helpful methods.
 
 [iterator protocol]: ../control-flow.html#the-iterator-protocol
 
+## Methods
+
 ### **all**(predicate)
 
 Tests whether all the elements in the sequence pass the `predicate`.
diff --git a/doc/site/core/string.markdown b/doc/site/core/string.markdown
index 9ce97eae..a02f9b9b 100644
--- a/doc/site/core/string.markdown
+++ b/doc/site/core/string.markdown
@@ -31,6 +31,18 @@ on string *return* byte indices too. So, for example, this does what you want:
 In general, methods on strings will work in terms of code units if they can do
 so efficiently, and will otherwise deal in bytes.
 
+## Static Methods
+
+### String.**fromCodePoint**(codePoint)
+
+Creates a new string containing the UTF-8 encoding of `codePoint`.
+
+It is a runtime error if `codePoint` is not an integer between `0` and
+`0x10ffff`, inclusive.
+
+    :::dart
+    String.fromCodePoint(8225) // "‡"
+
 ## Methods
 
 ### **contains**(other)
diff --git a/src/vm/wren_compiler.c b/src/vm/wren_compiler.c
index 8c3da63c..e28b7a9c 100644
--- a/src/vm/wren_compiler.c
+++ b/src/vm/wren_compiler.c
@@ -648,39 +648,18 @@ static void readUnicodeEscape(Parser* parser)
     value = (value * 16) | digit;
   }
 
-  ByteBuffer* buffer = &parser->string;
+  // Grow the buffer enough for the encoded result.
+  int numBytes = wrenUtf8NumBytes(value);
+  if (numBytes != 0)
+  {
+    // TODO: Function to grow buffer in one allocation.
+    for (int i = 0; i < numBytes; i++)
+    {
+      wrenByteBufferWrite(parser->vm, &parser->string, 0);
+    }
 
-  // UTF-8 encode the value.
-  if (value <= 0x7f)
-  {
-    // Single byte (i.e. fits in ASCII).
-    wrenByteBufferWrite(parser->vm, buffer, value & 0x7f);
-  }
-  else if (value <= 0x7ff)
-  {
-    // Two byte sequence: 110xxxxx	 10xxxxxx.
-    wrenByteBufferWrite(parser->vm, buffer, 0xc0 | ((value & 0x7c0) >> 6));
-    wrenByteBufferWrite(parser->vm, buffer, 0x80 | (value & 0x3f));
-  }
-  else if (value <= 0xffff)
-  {
-    // Three byte sequence: 1110xxxx	 10xxxxxx 10xxxxxx.
-    wrenByteBufferWrite(parser->vm, buffer, 0xe0 | ((value & 0xf000) >> 12));
-    wrenByteBufferWrite(parser->vm, buffer, 0x80 | ((value & 0xfc0) >> 6));
-    wrenByteBufferWrite(parser->vm, buffer, 0x80 | (value & 0x3f));
-  }
-  else if (value <= 0x10ffff)
-  {
-    // Four byte sequence: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx.
-    wrenByteBufferWrite(parser->vm, buffer, 0xf0 | ((value & 0x1c0000) >> 18));
-    wrenByteBufferWrite(parser->vm, buffer, 0x80 | ((value & 0x3f000) >> 12));
-    wrenByteBufferWrite(parser->vm, buffer, 0x80 | ((value & 0xfc0) >> 6));
-    wrenByteBufferWrite(parser->vm, buffer, 0x80 | (value & 0x3f));
-  }
-  else
-  {
-    // Invalid Unicode value. See: http://tools.ietf.org/html/rfc3629
-    // TODO: Error.
+    wrenUtf8Encode(value,
+                   parser->string.data + parser->string.count - numBytes);
   }
 }
 
diff --git a/src/vm/wren_core.c b/src/vm/wren_core.c
index f34b2cbb..fdd0cf6b 100644
--- a/src/vm/wren_core.c
+++ b/src/vm/wren_core.c
@@ -1211,6 +1211,23 @@ DEF_PRIMITIVE(range_toString)
   RETURN_VAL(result);
 }
 
+DEF_PRIMITIVE(string_fromCodePoint)
+{
+  if (!validateInt(vm, args, 1, "Code point")) return PRIM_ERROR;
+
+  int codePoint = (int)AS_NUM(args[1]);
+  if (codePoint < 0)
+  {
+    RETURN_ERROR("Code point cannot be negative.");
+  }
+  else if (codePoint > 0x10ffff)
+  {
+    RETURN_ERROR("Code point cannot be greater than 0x10ffff.");
+  }
+
+  RETURN_VAL(wrenStringFromCodePoint(vm, (int)AS_NUM(args[1])));
+}
+
 DEF_PRIMITIVE(string_contains)
 {
   if (!validateString(vm, args, 1, "Argument")) return PRIM_ERROR;
@@ -1513,6 +1530,7 @@ void wrenInitializeCore(WrenVM* vm)
   wrenInterpret(vm, "", libSource);
 
   vm->stringClass = AS_CLASS(wrenFindVariable(vm, "String"));
+  PRIMITIVE(vm->stringClass->obj.classObj, "fromCodePoint(_)", string_fromCodePoint);
   PRIMITIVE(vm->stringClass, "+(_)", string_plus);
   PRIMITIVE(vm->stringClass, "[_]", string_subscript);
   PRIMITIVE(vm->stringClass, "contains(_)", string_contains);
diff --git a/src/vm/wren_utils.c b/src/vm/wren_utils.c
index 97666f44..f3490af6 100644
--- a/src/vm/wren_utils.c
+++ b/src/vm/wren_utils.c
@@ -58,3 +58,55 @@ int wrenSymbolTableFind(SymbolTable* symbols, const char* name, size_t length)
 
   return -1;
 }
+
+int wrenUtf8NumBytes(int value)
+{
+  ASSERT(value >= 0, "Cannot encode a negative value.");
+  
+  if (value <= 0x7f) return 1;
+  if (value <= 0x7ff) return 2;
+  if (value <= 0xffff) return 3;
+  if (value <= 0x10ffff) return 4;
+  return 0;
+}
+
+void wrenUtf8Encode(int value, uint8_t* bytes)
+{
+  if (value <= 0x7f)
+  {
+    // Single byte (i.e. fits in ASCII).
+    *bytes = value & 0x7f;
+  }
+  else if (value <= 0x7ff)
+  {
+    // Two byte sequence: 110xxxxx	 10xxxxxx.
+    *bytes = 0xc0 | ((value & 0x7c0) >> 6);
+    bytes++;
+    *bytes = 0x80 | (value & 0x3f);
+  }
+  else if (value <= 0xffff)
+  {
+    // Three byte sequence: 1110xxxx	 10xxxxxx 10xxxxxx.
+    *bytes = 0xe0 | ((value & 0xf000) >> 12);
+    bytes++;
+    *bytes = 0x80 | ((value & 0xfc0) >> 6);
+    bytes++;
+    *bytes = 0x80 | (value & 0x3f);
+  }
+  else if (value <= 0x10ffff)
+  {
+    // Four byte sequence: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx.
+    *bytes = 0xf0 | ((value & 0x1c0000) >> 18);
+    bytes++;
+    *bytes = 0x80 | ((value & 0x3f000) >> 12);
+    bytes++;
+    *bytes = 0x80 | ((value & 0xfc0) >> 6);
+    bytes++;
+    *bytes = 0x80 | (value & 0x3f);
+  }
+  else
+  {
+    // Invalid Unicode value. See: http://tools.ietf.org/html/rfc3629
+    ASSERT(false, "Invalid UTF-8 value.");
+  }
+}
diff --git a/src/vm/wren_utils.h b/src/vm/wren_utils.h
index e527dad4..c1cfe19e 100644
--- a/src/vm/wren_utils.h
+++ b/src/vm/wren_utils.h
@@ -81,4 +81,13 @@ int wrenSymbolTableEnsure(WrenVM* vm, SymbolTable* symbols,
 // Looks up name in the symbol table. Returns its index if found or -1 if not.
 int wrenSymbolTableFind(SymbolTable* symbols, const char* name, size_t length);
 
+// Returns the number of bytes needed to encode [value] in UTF-8.
+//
+// Returns 0 if [value] is too large to encode.
+int wrenUtf8NumBytes(int value);
+
+// Encodes value as a series of bytes in [bytes], which is assumed to be large
+// enough to hold the encoded result.
+void wrenUtf8Encode(int value, uint8_t* bytes);
+
 #endif
diff --git a/src/vm/wren_value.c b/src/vm/wren_value.c
index f63b0837..71a01e46 100644
--- a/src/vm/wren_value.c
+++ b/src/vm/wren_value.c
@@ -650,6 +650,19 @@ Value wrenNumToString(WrenVM* vm, double value)
   return wrenNewString(vm, buffer, length);
 }
 
+Value wrenStringFromCodePoint(WrenVM* vm, int value)
+{
+  int length = wrenUtf8NumBytes(value);
+  ASSERT(length != 0, "Value out of range.");
+
+  ObjString* string = allocateString(vm, length);
+
+  wrenUtf8Encode(value, (uint8_t*)string->value);
+  hashString(string);
+
+  return OBJ_VAL(string);
+}
+
 Value wrenStringFormat(WrenVM* vm, const char* format, ...)
 {
   va_list argList;
diff --git a/src/vm/wren_value.h b/src/vm/wren_value.h
index 41868757..2962c96c 100644
--- a/src/vm/wren_value.h
+++ b/src/vm/wren_value.h
@@ -683,6 +683,9 @@ Value wrenNumToString(WrenVM* vm, double value);
 // @ - A Wren string object.
 Value wrenStringFormat(WrenVM* vm, const char* format, ...);
 
+// Creates a new string containing the UTF-8 encoding of [value].
+Value wrenStringFromCodePoint(WrenVM* vm, int value);
+
 // Creates a new string containing the code point in [string] starting at byte
 // [index]. If [index] points into the middle of a UTF-8 sequence, returns an
 // empty string.
diff --git a/test/core/string/from_code_point.wren b/test/core/string/from_code_point.wren
new file mode 100644
index 00000000..b61c8c73
--- /dev/null
+++ b/test/core/string/from_code_point.wren
@@ -0,0 +1,5 @@
+IO.print(String.fromCodePoint(65))   // expect: A
+IO.print(String.fromCodePoint(164))   // expect: ¤
+IO.print(String.fromCodePoint(398))   // expect: Ǝ
+IO.print(String.fromCodePoint(8225))   // expect: ‡
+IO.print(String.fromCodePoint(0x254b))   // expect: ╋
diff --git a/test/core/string/from_code_point_not_int.wren b/test/core/string/from_code_point_not_int.wren
new file mode 100644
index 00000000..8ee9e513
--- /dev/null
+++ b/test/core/string/from_code_point_not_int.wren
@@ -0,0 +1 @@
+IO.print(String.fromCodePoint(12.34))   // expect runtime error: Code point must be an integer.
diff --git a/test/core/string/from_code_point_not_num.wren b/test/core/string/from_code_point_not_num.wren
new file mode 100644
index 00000000..51daa10f
--- /dev/null
+++ b/test/core/string/from_code_point_not_num.wren
@@ -0,0 +1 @@
+IO.print(String.fromCodePoint("not num"))   // expect runtime error: Code point must be a number.
diff --git a/test/core/string/from_code_point_too_large.wren b/test/core/string/from_code_point_too_large.wren
new file mode 100644
index 00000000..b213be62
--- /dev/null
+++ b/test/core/string/from_code_point_too_large.wren
@@ -0,0 +1,3 @@
+// UTF-8 mandates that only values up to 10ffff can be encoded.
+// See: http://tools.ietf.org/html/rfc3629
+IO.print(String.fromCodePoint(0x10ffff + 1))   // expect runtime error: Code point cannot be greater than 0x10ffff.
diff --git a/test/core/string/from_code_point_too_small.wren b/test/core/string/from_code_point_too_small.wren
new file mode 100644
index 00000000..af4a08c6
--- /dev/null
+++ b/test/core/string/from_code_point_too_small.wren
@@ -0,0 +1 @@
+IO.print(String.fromCodePoint(-1))   // expect runtime error: Code point cannot be negative.