Move codePointAt() to separate CodePointSequence class.

This commit is contained in:
Bob Nystrom
2015-09-11 07:56:01 -07:00
parent bda9ad880a
commit c0b5ec9f15
30 changed files with 202 additions and 76 deletions

View File

@ -130,6 +130,7 @@ class WhereSequence is Sequence {
class String is Sequence {
bytes { StringByteSequence.new(this) }
codePoints { StringCodePointSequence.new(this) }
}
class StringByteSequence is Sequence {
@ -144,6 +145,18 @@ class StringByteSequence is Sequence {
count { _string.byteCount_ }
}
class StringCodePointSequence is Sequence {
construct new(string) {
_string = string
}
[index] { _string.codePointAt_(index) }
iterate(iterator) { _string.iterate(iterator) }
iteratorValue(iterator) { _string.codePointAt_(iterator) }
count { _string.count }
}
class List is Sequence {
addAll(other) {
for (element in other) {

View File

@ -45,20 +45,6 @@ It is a runtime error if `codePoint` is not an integer between `0` and
## Methods
### **byteAt**(index)
Gets the value of the byte at byte offset `index` in the string.
:::dart
IO.print("hello".byteAt(1)) // 101, for "e".
If the index is negative, it counts backwards from the end of the string.
:::dart
IO.print("hello".byteAt(-4)) // 101, for "e".
It is a runtime error if `index` is not an integer or is out of bounds.
### **bytes**
Gets a [`Sequence`](sequence.html) that can be used to access the raw bytes of

View File

@ -144,6 +144,7 @@ static const char* coreLibSource =
"\n"
"class String is Sequence {\n"
" bytes { StringByteSequence.new(this) }\n"
" codePoints { StringCodePointSequence.new(this) }\n"
"}\n"
"\n"
"class StringByteSequence is Sequence {\n"
@ -158,6 +159,18 @@ static const char* coreLibSource =
" count { _string.byteCount_ }\n"
"}\n"
"\n"
"class StringCodePointSequence is Sequence {\n"
" construct new(string) {\n"
" _string = string\n"
" }\n"
"\n"
" [index] { _string.codePointAt_(index) }\n"
" iterate(iterator) { _string.iterate(iterator) }\n"
" iteratorValue(iterator) { _string.codePointAt_(iterator) }\n"
"\n"
" count { _string.count }\n"
"}\n"
"\n"
"class List is Sequence {\n"
" addAll(other) {\n"
" for (element in other) {\n"
@ -1418,7 +1431,7 @@ void wrenInitializeCore(WrenVM* vm)
PRIMITIVE(vm->stringClass, "[_]", string_subscript);
PRIMITIVE(vm->stringClass, "byteAt_(_)", string_byteAt);
PRIMITIVE(vm->stringClass, "byteCount_", string_byteCount);
PRIMITIVE(vm->stringClass, "codePointAt(_)", string_codePointAt);
PRIMITIVE(vm->stringClass, "codePointAt_(_)", string_codePointAt);
PRIMITIVE(vm->stringClass, "contains(_)", string_contains);
PRIMITIVE(vm->stringClass, "count", string_count);
PRIMITIVE(vm->stringClass, "endsWith(_)", string_endsWith);

View File

@ -772,8 +772,18 @@ Value wrenStringCodePointAt(WrenVM* vm, ObjString* string, uint32_t index)
{
ASSERT(index < string->length, "Index out of bounds.");
int numBytes = wrenUtf8DecodeNumBytes(string->value[index]);
return wrenNewString(vm, string->value + index, numBytes);
int codePoint = wrenUtf8Decode((uint8_t*)string->value + index,
string->length - index);
if (codePoint == -1)
{
// If it isn't a valid UTF-8 sequence, treat it as a single raw byte.
char bytes[2];
bytes[0] = string->value[index];
bytes[1] = '\0';
return wrenNewString(vm, bytes, 1);
}
return wrenStringFromCodePoint(vm, codePoint);
}
// Uses the Boyer-Moore-Horspool string matching algorithm.

View File

@ -1,38 +0,0 @@
// Bytes: 11111
// 012345678901234
// Chars: sø mé ஃ thî ng
var s = "søméஃthîng"
IO.print(s.codePointAt(0)) // expect: 115
IO.print(s.codePointAt(1)) // expect: 248
IO.print(s.codePointAt(2)) // expect: -1
IO.print(s.codePointAt(3)) // expect: 109
IO.print(s.codePointAt(4)) // expect: 233
IO.print(s.codePointAt(5)) // expect: -1
IO.print(s.codePointAt(6)) // expect: 2947
IO.print(s.codePointAt(7)) // expect: -1
IO.print(s.codePointAt(8)) // expect: -1
IO.print(s.codePointAt(9)) // expect: 116
IO.print(s.codePointAt(10)) // expect: 104
IO.print(s.codePointAt(11)) // expect: 238
IO.print(s.codePointAt(12)) // expect: -1
IO.print(s.codePointAt(13)) // expect: 110
IO.print(s.codePointAt(14)) // expect: 103
IO.print(s.codePointAt(-15)) // expect: 115
IO.print(s.codePointAt(-14)) // expect: 248
IO.print(s.codePointAt(-13)) // expect: -1
IO.print(s.codePointAt(-12)) // expect: 109
IO.print(s.codePointAt(-11)) // expect: 233
IO.print(s.codePointAt(-10)) // expect: -1
IO.print(s.codePointAt(-9)) // expect: 2947
IO.print(s.codePointAt(-8)) // expect: -1
IO.print(s.codePointAt(-7)) // expect: -1
IO.print(s.codePointAt(-6)) // expect: 116
IO.print(s.codePointAt(-5)) // expect: 104
IO.print(s.codePointAt(-4)) // expect: 238
IO.print(s.codePointAt(-3)) // expect: -1
IO.print(s.codePointAt(-2)) // expect: 110
IO.print(s.codePointAt(-1)) // expect: 103
IO.print("\0".codePointAt(0)) // expect: 0

View File

@ -1,8 +0,0 @@
// The first byte of a two-octet sequence.
IO.print("\xc0".codePointAt(0)) // expect: -1
// The first byte of a three-octet sequence.
IO.print("\xe0".codePointAt(0)) // expect: -1
// The first two bytes of a three-octet sequence.
IO.print("\xe0\xae".codePointAt(0)) // expect: -1

View File

@ -1 +0,0 @@
IO.print("string".codePointAt(12.34)) // expect runtime error: Index must be an integer.

View File

@ -1 +0,0 @@
IO.print("string".codePointAt("not num")) // expect runtime error: Index must be a number.

View File

@ -1 +0,0 @@
IO.print("string".codePointAt(6)) // expect runtime error: Index out of bounds.

View File

@ -1 +0,0 @@
IO.print("string".codePointAt(-7)) // expect runtime error: Index out of bounds.

View File

@ -22,3 +22,8 @@ IO.print("a\0b\0c".iterate(1)) // expect: 2
IO.print("a\0b\0c".iterate(2)) // expect: 3
IO.print("a\0b\0c".iterate(3)) // expect: 4
IO.print("a\0b\0c".iterate(4)) // expect: false
// Iterates over invalid UTF-8 one byte at a time.
IO.print("\xef\xf7".iterate(null)) // expect: 0
IO.print("\xef\xf7".iterate(0)) // expect: 1
IO.print("\xef\xf7".iterate(1)) // expect: false

View File

@ -2,8 +2,8 @@ var s = "abçd"
IO.print(s.iteratorValue(0)) // expect: a
IO.print(s.iteratorValue(1)) // expect: b
IO.print(s.iteratorValue(2)) // expect: ç
// Iterator value in middle of UTF sequence is an empty string.
IO.print(s.iteratorValue(3) == "") // expect: true
// Iterator value in middle of UTF sequence is the unencoded byte.
IO.print(s.iteratorValue(3) == "\xa7") // expect: true
IO.print(s.iteratorValue(4)) // expect: d
// 8-bit clean.
@ -13,3 +13,7 @@ IO.print(t.iteratorValue(1) == "\0") // expect: true
IO.print(t.iteratorValue(2) == "b") // expect: true
IO.print(t.iteratorValue(3) == "\0") // expect: true
IO.print(t.iteratorValue(4) == "c") // expect: true
// Returns single byte strings for invalid UTF-8 sequences.
IO.print("\xef\xf7".iteratorValue(0) == "\xef") // expect: true
IO.print("\xef\xf7".iteratorValue(1) == "\xf7") // expect: true

View File

@ -27,12 +27,12 @@ IO.print("søméஃthîng"[-1]) // expect: g
IO.print("søméஃthîng"[-2]) // expect: n
IO.print("søméஃthîng"[-4]) // expect: î
// If the subscript is in the middle of a UTF-8 sequence, yield an empty string.
IO.print("søméஃthîng"[2] == "") // expect: true
IO.print("søméஃthîng"[7] == "") // expect: true
IO.print("søméஃthîng"[8] == "") // expect: true
IO.print("søméஃ"[-1] == "") // expect: true
IO.print("søméஃ"[-2] == "") // expect: true
// If the subscript is in the middle of a UTF-8 sequence, return the raw byte.
IO.print("søméஃthîng"[2] == "\xb8") // expect: true
IO.print("søméஃthîng"[7] == "\xae") // expect: true
IO.print("søméஃthîng"[8] == "\x83") // expect: true
IO.print("søméஃ"[-1] == "\x83") // expect: true
IO.print("søméஃ"[-2] == "\xae") // expect: true
// 8-bit clean.
IO.print("a\0b\0c"[0] == "a") // expect: true
@ -40,3 +40,7 @@ IO.print("a\0b\0c"[1] == "\0") // expect: true
IO.print("a\0b\0c"[2] == "b") // expect: true
IO.print("a\0b\0c"[3] == "\0") // expect: true
IO.print("a\0b\0c"[4] == "c") // expect: true
// Returns single byte strings for invalid UTF-8 sequences.
IO.print("\xef\xf7"[0] == "\xef") // expect: true
IO.print("\xef\xf7"[1] == "\xf7") // expect: true

View File

@ -45,3 +45,5 @@ IO.print("søméஃthîng"[3...10]) // expect: méஃt
IO.print("søméஃthîng"[2..6]) // expect: méஃ
IO.print("søméஃthîng"[2...6]) // expect: mé
IO.print("søméஃthîng"[2...7]) // expect: méஃ
// TODO: Strings including invalid UTF-8.

View File

@ -12,4 +12,4 @@ IO.print("søméஃ".bytes.count) // expect: 9
IO.print("\0\0\0".bytes.count) // expect: 3
// Invalid UTF-8.
IO.print("\xef\x00".bytes.count) // expect: 2
IO.print("\xef\xf7".bytes.count) // expect: 2

View File

@ -0,0 +1,29 @@
var codePoints = "abçd".codePoints
IO.print(codePoints.iterate(null)) // expect: 0
IO.print(codePoints.iterate(0)) // expect: 1
IO.print(codePoints.iterate(1)) // expect: 2
// Skip 3 because that's the middle of the ç sequence.
IO.print(codePoints.iterate(2)) // expect: 4
// Iterating from the middle of a UTF-8 sequence goes to the next one.
IO.print(codePoints.iterate(3)) // expect: 4
IO.print(codePoints.iterate(4)) // expect: false
// Out of bounds.
IO.print(codePoints.iterate(123)) // expect: false
IO.print(codePoints.iterate(-1)) // expect: false
// Nothing to iterate in an empty string.
IO.print("".codePoints.iterate(null)) // expect: false
// 8-bit clean.
IO.print("a\0b\0c".codePoints.iterate(null)) // expect: 0
IO.print("a\0b\0c".codePoints.iterate(0)) // expect: 1
IO.print("a\0b\0c".codePoints.iterate(1)) // expect: 2
IO.print("a\0b\0c".codePoints.iterate(2)) // expect: 3
IO.print("a\0b\0c".codePoints.iterate(3)) // expect: 4
IO.print("a\0b\0c".codePoints.iterate(4)) // expect: false
// Iterates over invalid UTF-8 one byte at a time.
IO.print("\xef\xf7".codePoints.iterate(null)) // expect: 0
IO.print("\xef\xf7".codePoints.iterate(0)) // expect: 1
IO.print("\xef\xf7".codePoints.iterate(1)) // expect: false

View File

@ -0,0 +1 @@
"s".codePoints.iterate(1.5) // expect runtime error: Iterator must be an integer.

View File

@ -0,0 +1 @@
"s".codePoints.iterate("2") // expect runtime error: Iterator must be a number.

View File

@ -0,0 +1,42 @@
// Bytes: 11111
// 012345678901234
// Chars: sø mé ஃ thî ng
var codePoints = "søméஃthîng".codePoints
IO.print(codePoints.iteratorValue(0)) // expect: 115
IO.print(codePoints.iteratorValue(1)) // expect: 248
IO.print(codePoints.iteratorValue(2)) // expect: -1
IO.print(codePoints.iteratorValue(3)) // expect: 109
IO.print(codePoints.iteratorValue(4)) // expect: 233
IO.print(codePoints.iteratorValue(5)) // expect: -1
IO.print(codePoints.iteratorValue(6)) // expect: 2947
IO.print(codePoints.iteratorValue(7)) // expect: -1
IO.print(codePoints.iteratorValue(8)) // expect: -1
IO.print(codePoints.iteratorValue(9)) // expect: 116
IO.print(codePoints.iteratorValue(10)) // expect: 104
IO.print(codePoints.iteratorValue(11)) // expect: 238
IO.print(codePoints.iteratorValue(12)) // expect: -1
IO.print(codePoints.iteratorValue(13)) // expect: 110
IO.print(codePoints.iteratorValue(14)) // expect: 103
IO.print(codePoints.iteratorValue(-15)) // expect: 115
IO.print(codePoints.iteratorValue(-14)) // expect: 248
IO.print(codePoints.iteratorValue(-13)) // expect: -1
IO.print(codePoints.iteratorValue(-12)) // expect: 109
IO.print(codePoints.iteratorValue(-11)) // expect: 233
IO.print(codePoints.iteratorValue(-10)) // expect: -1
IO.print(codePoints.iteratorValue(-9)) // expect: 2947
IO.print(codePoints.iteratorValue(-8)) // expect: -1
IO.print(codePoints.iteratorValue(-7)) // expect: -1
IO.print(codePoints.iteratorValue(-6)) // expect: 116
IO.print(codePoints.iteratorValue(-5)) // expect: 104
IO.print(codePoints.iteratorValue(-4)) // expect: 238
IO.print(codePoints.iteratorValue(-3)) // expect: -1
IO.print(codePoints.iteratorValue(-2)) // expect: 110
IO.print(codePoints.iteratorValue(-1)) // expect: 103
IO.print("\0".codePoints.iteratorValue(0)) // expect: 0
// Returns -1 for invalid UTF-8 sequences.
IO.print("\xef\xf7".codePoints.iteratorValue(0)) // expect: -1
IO.print("\xef\xf7".codePoints.iteratorValue(1)) // expect: -1

View File

@ -0,0 +1,8 @@
// The first byte of a two-octet sequence.
IO.print("\xc0".codePoints.iteratorValue(0)) // expect: -1
// The first byte of a three-octet sequence.
IO.print("\xe0".codePoints.iteratorValue(0)) // expect: -1
// The first two bytes of a three-octet sequence.
IO.print("\xe0\xae".codePoints.iteratorValue(0)) // expect: -1

View File

@ -0,0 +1 @@
IO.print("string".codePoints.iteratorValue(12.34)) // expect runtime error: Index must be an integer.

View File

@ -0,0 +1 @@
IO.print("string".codePoints.iteratorValue("not num")) // expect runtime error: Index must be a number.

View File

@ -0,0 +1 @@
IO.print("string".codePoints.iteratorValue(6)) // expect runtime error: Index out of bounds.

View File

@ -0,0 +1 @@
IO.print("string".codePoints.iteratorValue(-7)) // expect runtime error: Index out of bounds.

View File

@ -0,0 +1,42 @@
// Bytes: 11111
// 012345678901234
// Chars: sø mé ஃ thî ng
var codePoints = "søméஃthîng".codePoints
IO.print(codePoints[0]) // expect: 115
IO.print(codePoints[1]) // expect: 248
IO.print(codePoints[2]) // expect: -1
IO.print(codePoints[3]) // expect: 109
IO.print(codePoints[4]) // expect: 233
IO.print(codePoints[5]) // expect: -1
IO.print(codePoints[6]) // expect: 2947
IO.print(codePoints[7]) // expect: -1
IO.print(codePoints[8]) // expect: -1
IO.print(codePoints[9]) // expect: 116
IO.print(codePoints[10]) // expect: 104
IO.print(codePoints[11]) // expect: 238
IO.print(codePoints[12]) // expect: -1
IO.print(codePoints[13]) // expect: 110
IO.print(codePoints[14]) // expect: 103
IO.print(codePoints[-15]) // expect: 115
IO.print(codePoints[-14]) // expect: 248
IO.print(codePoints[-13]) // expect: -1
IO.print(codePoints[-12]) // expect: 109
IO.print(codePoints[-11]) // expect: 233
IO.print(codePoints[-10]) // expect: -1
IO.print(codePoints[-9]) // expect: 2947
IO.print(codePoints[-8]) // expect: -1
IO.print(codePoints[-7]) // expect: -1
IO.print(codePoints[-6]) // expect: 116
IO.print(codePoints[-5]) // expect: 104
IO.print(codePoints[-4]) // expect: 238
IO.print(codePoints[-3]) // expect: -1
IO.print(codePoints[-2]) // expect: 110
IO.print(codePoints[-1]) // expect: 103
IO.print("\0".codePoints[0]) // expect: 0
// Returns -1 for invalid UTF-8 sequences.
IO.print("\xef\xf7".codePoints[0]) // expect: -1
IO.print("\xef\xf7".codePoints[1]) // expect: -1

View File

@ -0,0 +1,8 @@
// The first byte of a two-octet sequence.
IO.print("\xc0".codePoints[0]) // expect: -1
// The first byte of a three-octet sequence.
IO.print("\xe0".codePoints[0]) // expect: -1
// The first two bytes of a three-octet sequence.
IO.print("\xe0\xae".codePoints[0]) // expect: -1

View File

@ -0,0 +1 @@
IO.print("string".codePoints[12.34]) // expect runtime error: Index must be an integer.

View File

@ -0,0 +1 @@
IO.print("string".codePoints["not num"]) // expect runtime error: Index must be a number.

View File

@ -0,0 +1 @@
IO.print("string".codePoints[6]) // expect runtime error: Index out of bounds.

View File

@ -0,0 +1 @@
IO.print("string".codePoints[-7]) // expect runtime error: Index out of bounds.