mirror of
https://github.com/wren-lang/wren.git
synced 2026-01-11 06:08:41 +01:00
Move codePointAt() to separate CodePointSequence class.
This commit is contained in:
@ -130,6 +130,7 @@ class WhereSequence is Sequence {
|
||||
|
||||
class String is Sequence {
|
||||
bytes { StringByteSequence.new(this) }
|
||||
codePoints { StringCodePointSequence.new(this) }
|
||||
}
|
||||
|
||||
class StringByteSequence is Sequence {
|
||||
@ -144,6 +145,18 @@ class StringByteSequence is Sequence {
|
||||
count { _string.byteCount_ }
|
||||
}
|
||||
|
||||
class StringCodePointSequence is Sequence {
|
||||
construct new(string) {
|
||||
_string = string
|
||||
}
|
||||
|
||||
[index] { _string.codePointAt_(index) }
|
||||
iterate(iterator) { _string.iterate(iterator) }
|
||||
iteratorValue(iterator) { _string.codePointAt_(iterator) }
|
||||
|
||||
count { _string.count }
|
||||
}
|
||||
|
||||
class List is Sequence {
|
||||
addAll(other) {
|
||||
for (element in other) {
|
||||
|
||||
@ -45,20 +45,6 @@ It is a runtime error if `codePoint` is not an integer between `0` and
|
||||
|
||||
## Methods
|
||||
|
||||
### **byteAt**(index)
|
||||
|
||||
Gets the value of the byte at byte offset `index` in the string.
|
||||
|
||||
:::dart
|
||||
IO.print("hello".byteAt(1)) // 101, for "e".
|
||||
|
||||
If the index is negative, it counts backwards from the end of the string.
|
||||
|
||||
:::dart
|
||||
IO.print("hello".byteAt(-4)) // 101, for "e".
|
||||
|
||||
It is a runtime error if `index` is not an integer or is out of bounds.
|
||||
|
||||
### **bytes**
|
||||
|
||||
Gets a [`Sequence`](sequence.html) that can be used to access the raw bytes of
|
||||
|
||||
@ -144,6 +144,7 @@ static const char* coreLibSource =
|
||||
"\n"
|
||||
"class String is Sequence {\n"
|
||||
" bytes { StringByteSequence.new(this) }\n"
|
||||
" codePoints { StringCodePointSequence.new(this) }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"class StringByteSequence is Sequence {\n"
|
||||
@ -158,6 +159,18 @@ static const char* coreLibSource =
|
||||
" count { _string.byteCount_ }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"class StringCodePointSequence is Sequence {\n"
|
||||
" construct new(string) {\n"
|
||||
" _string = string\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" [index] { _string.codePointAt_(index) }\n"
|
||||
" iterate(iterator) { _string.iterate(iterator) }\n"
|
||||
" iteratorValue(iterator) { _string.codePointAt_(iterator) }\n"
|
||||
"\n"
|
||||
" count { _string.count }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"class List is Sequence {\n"
|
||||
" addAll(other) {\n"
|
||||
" for (element in other) {\n"
|
||||
@ -1418,7 +1431,7 @@ void wrenInitializeCore(WrenVM* vm)
|
||||
PRIMITIVE(vm->stringClass, "[_]", string_subscript);
|
||||
PRIMITIVE(vm->stringClass, "byteAt_(_)", string_byteAt);
|
||||
PRIMITIVE(vm->stringClass, "byteCount_", string_byteCount);
|
||||
PRIMITIVE(vm->stringClass, "codePointAt(_)", string_codePointAt);
|
||||
PRIMITIVE(vm->stringClass, "codePointAt_(_)", string_codePointAt);
|
||||
PRIMITIVE(vm->stringClass, "contains(_)", string_contains);
|
||||
PRIMITIVE(vm->stringClass, "count", string_count);
|
||||
PRIMITIVE(vm->stringClass, "endsWith(_)", string_endsWith);
|
||||
|
||||
@ -772,8 +772,18 @@ Value wrenStringCodePointAt(WrenVM* vm, ObjString* string, uint32_t index)
|
||||
{
|
||||
ASSERT(index < string->length, "Index out of bounds.");
|
||||
|
||||
int numBytes = wrenUtf8DecodeNumBytes(string->value[index]);
|
||||
return wrenNewString(vm, string->value + index, numBytes);
|
||||
int codePoint = wrenUtf8Decode((uint8_t*)string->value + index,
|
||||
string->length - index);
|
||||
if (codePoint == -1)
|
||||
{
|
||||
// If it isn't a valid UTF-8 sequence, treat it as a single raw byte.
|
||||
char bytes[2];
|
||||
bytes[0] = string->value[index];
|
||||
bytes[1] = '\0';
|
||||
return wrenNewString(vm, bytes, 1);
|
||||
}
|
||||
|
||||
return wrenStringFromCodePoint(vm, codePoint);
|
||||
}
|
||||
|
||||
// Uses the Boyer-Moore-Horspool string matching algorithm.
|
||||
|
||||
@ -1,38 +0,0 @@
|
||||
// Bytes: 11111
|
||||
// 012345678901234
|
||||
// Chars: sø mé ஃ thî ng
|
||||
var s = "søméஃthîng"
|
||||
|
||||
IO.print(s.codePointAt(0)) // expect: 115
|
||||
IO.print(s.codePointAt(1)) // expect: 248
|
||||
IO.print(s.codePointAt(2)) // expect: -1
|
||||
IO.print(s.codePointAt(3)) // expect: 109
|
||||
IO.print(s.codePointAt(4)) // expect: 233
|
||||
IO.print(s.codePointAt(5)) // expect: -1
|
||||
IO.print(s.codePointAt(6)) // expect: 2947
|
||||
IO.print(s.codePointAt(7)) // expect: -1
|
||||
IO.print(s.codePointAt(8)) // expect: -1
|
||||
IO.print(s.codePointAt(9)) // expect: 116
|
||||
IO.print(s.codePointAt(10)) // expect: 104
|
||||
IO.print(s.codePointAt(11)) // expect: 238
|
||||
IO.print(s.codePointAt(12)) // expect: -1
|
||||
IO.print(s.codePointAt(13)) // expect: 110
|
||||
IO.print(s.codePointAt(14)) // expect: 103
|
||||
|
||||
IO.print(s.codePointAt(-15)) // expect: 115
|
||||
IO.print(s.codePointAt(-14)) // expect: 248
|
||||
IO.print(s.codePointAt(-13)) // expect: -1
|
||||
IO.print(s.codePointAt(-12)) // expect: 109
|
||||
IO.print(s.codePointAt(-11)) // expect: 233
|
||||
IO.print(s.codePointAt(-10)) // expect: -1
|
||||
IO.print(s.codePointAt(-9)) // expect: 2947
|
||||
IO.print(s.codePointAt(-8)) // expect: -1
|
||||
IO.print(s.codePointAt(-7)) // expect: -1
|
||||
IO.print(s.codePointAt(-6)) // expect: 116
|
||||
IO.print(s.codePointAt(-5)) // expect: 104
|
||||
IO.print(s.codePointAt(-4)) // expect: 238
|
||||
IO.print(s.codePointAt(-3)) // expect: -1
|
||||
IO.print(s.codePointAt(-2)) // expect: 110
|
||||
IO.print(s.codePointAt(-1)) // expect: 103
|
||||
|
||||
IO.print("\0".codePointAt(0)) // expect: 0
|
||||
@ -1,8 +0,0 @@
|
||||
// The first byte of a two-octet sequence.
|
||||
IO.print("\xc0".codePointAt(0)) // expect: -1
|
||||
|
||||
// The first byte of a three-octet sequence.
|
||||
IO.print("\xe0".codePointAt(0)) // expect: -1
|
||||
|
||||
// The first two bytes of a three-octet sequence.
|
||||
IO.print("\xe0\xae".codePointAt(0)) // expect: -1
|
||||
@ -1 +0,0 @@
|
||||
IO.print("string".codePointAt(12.34)) // expect runtime error: Index must be an integer.
|
||||
@ -1 +0,0 @@
|
||||
IO.print("string".codePointAt("not num")) // expect runtime error: Index must be a number.
|
||||
@ -1 +0,0 @@
|
||||
IO.print("string".codePointAt(6)) // expect runtime error: Index out of bounds.
|
||||
@ -1 +0,0 @@
|
||||
IO.print("string".codePointAt(-7)) // expect runtime error: Index out of bounds.
|
||||
@ -22,3 +22,8 @@ IO.print("a\0b\0c".iterate(1)) // expect: 2
|
||||
IO.print("a\0b\0c".iterate(2)) // expect: 3
|
||||
IO.print("a\0b\0c".iterate(3)) // expect: 4
|
||||
IO.print("a\0b\0c".iterate(4)) // expect: false
|
||||
|
||||
// Iterates over invalid UTF-8 one byte at a time.
|
||||
IO.print("\xef\xf7".iterate(null)) // expect: 0
|
||||
IO.print("\xef\xf7".iterate(0)) // expect: 1
|
||||
IO.print("\xef\xf7".iterate(1)) // expect: false
|
||||
|
||||
@ -2,8 +2,8 @@ var s = "abçd"
|
||||
IO.print(s.iteratorValue(0)) // expect: a
|
||||
IO.print(s.iteratorValue(1)) // expect: b
|
||||
IO.print(s.iteratorValue(2)) // expect: ç
|
||||
// Iterator value in middle of UTF sequence is an empty string.
|
||||
IO.print(s.iteratorValue(3) == "") // expect: true
|
||||
// Iterator value in middle of UTF sequence is the unencoded byte.
|
||||
IO.print(s.iteratorValue(3) == "\xa7") // expect: true
|
||||
IO.print(s.iteratorValue(4)) // expect: d
|
||||
|
||||
// 8-bit clean.
|
||||
@ -13,3 +13,7 @@ IO.print(t.iteratorValue(1) == "\0") // expect: true
|
||||
IO.print(t.iteratorValue(2) == "b") // expect: true
|
||||
IO.print(t.iteratorValue(3) == "\0") // expect: true
|
||||
IO.print(t.iteratorValue(4) == "c") // expect: true
|
||||
|
||||
// Returns single byte strings for invalid UTF-8 sequences.
|
||||
IO.print("\xef\xf7".iteratorValue(0) == "\xef") // expect: true
|
||||
IO.print("\xef\xf7".iteratorValue(1) == "\xf7") // expect: true
|
||||
|
||||
@ -27,12 +27,12 @@ IO.print("søméஃthîng"[-1]) // expect: g
|
||||
IO.print("søméஃthîng"[-2]) // expect: n
|
||||
IO.print("søméஃthîng"[-4]) // expect: î
|
||||
|
||||
// If the subscript is in the middle of a UTF-8 sequence, yield an empty string.
|
||||
IO.print("søméஃthîng"[2] == "") // expect: true
|
||||
IO.print("søméஃthîng"[7] == "") // expect: true
|
||||
IO.print("søméஃthîng"[8] == "") // expect: true
|
||||
IO.print("søméஃ"[-1] == "") // expect: true
|
||||
IO.print("søméஃ"[-2] == "") // expect: true
|
||||
// If the subscript is in the middle of a UTF-8 sequence, return the raw byte.
|
||||
IO.print("søméஃthîng"[2] == "\xb8") // expect: true
|
||||
IO.print("søméஃthîng"[7] == "\xae") // expect: true
|
||||
IO.print("søméஃthîng"[8] == "\x83") // expect: true
|
||||
IO.print("søméஃ"[-1] == "\x83") // expect: true
|
||||
IO.print("søméஃ"[-2] == "\xae") // expect: true
|
||||
|
||||
// 8-bit clean.
|
||||
IO.print("a\0b\0c"[0] == "a") // expect: true
|
||||
@ -40,3 +40,7 @@ IO.print("a\0b\0c"[1] == "\0") // expect: true
|
||||
IO.print("a\0b\0c"[2] == "b") // expect: true
|
||||
IO.print("a\0b\0c"[3] == "\0") // expect: true
|
||||
IO.print("a\0b\0c"[4] == "c") // expect: true
|
||||
|
||||
// Returns single byte strings for invalid UTF-8 sequences.
|
||||
IO.print("\xef\xf7"[0] == "\xef") // expect: true
|
||||
IO.print("\xef\xf7"[1] == "\xf7") // expect: true
|
||||
|
||||
@ -45,3 +45,5 @@ IO.print("søméஃthîng"[3...10]) // expect: méஃt
|
||||
IO.print("søméஃthîng"[2..6]) // expect: méஃ
|
||||
IO.print("søméஃthîng"[2...6]) // expect: mé
|
||||
IO.print("søméஃthîng"[2...7]) // expect: méஃ
|
||||
|
||||
// TODO: Strings including invalid UTF-8.
|
||||
|
||||
@ -12,4 +12,4 @@ IO.print("søméஃ".bytes.count) // expect: 9
|
||||
IO.print("\0\0\0".bytes.count) // expect: 3
|
||||
|
||||
// Invalid UTF-8.
|
||||
IO.print("\xef\x00".bytes.count) // expect: 2
|
||||
IO.print("\xef\xf7".bytes.count) // expect: 2
|
||||
|
||||
29
test/core/string_code_point_sequence/iterate.wren
Normal file
29
test/core/string_code_point_sequence/iterate.wren
Normal file
@ -0,0 +1,29 @@
|
||||
var codePoints = "abçd".codePoints
|
||||
IO.print(codePoints.iterate(null)) // expect: 0
|
||||
IO.print(codePoints.iterate(0)) // expect: 1
|
||||
IO.print(codePoints.iterate(1)) // expect: 2
|
||||
// Skip 3 because that's the middle of the ç sequence.
|
||||
IO.print(codePoints.iterate(2)) // expect: 4
|
||||
// Iterating from the middle of a UTF-8 sequence goes to the next one.
|
||||
IO.print(codePoints.iterate(3)) // expect: 4
|
||||
IO.print(codePoints.iterate(4)) // expect: false
|
||||
|
||||
// Out of bounds.
|
||||
IO.print(codePoints.iterate(123)) // expect: false
|
||||
IO.print(codePoints.iterate(-1)) // expect: false
|
||||
|
||||
// Nothing to iterate in an empty string.
|
||||
IO.print("".codePoints.iterate(null)) // expect: false
|
||||
|
||||
// 8-bit clean.
|
||||
IO.print("a\0b\0c".codePoints.iterate(null)) // expect: 0
|
||||
IO.print("a\0b\0c".codePoints.iterate(0)) // expect: 1
|
||||
IO.print("a\0b\0c".codePoints.iterate(1)) // expect: 2
|
||||
IO.print("a\0b\0c".codePoints.iterate(2)) // expect: 3
|
||||
IO.print("a\0b\0c".codePoints.iterate(3)) // expect: 4
|
||||
IO.print("a\0b\0c".codePoints.iterate(4)) // expect: false
|
||||
|
||||
// Iterates over invalid UTF-8 one byte at a time.
|
||||
IO.print("\xef\xf7".codePoints.iterate(null)) // expect: 0
|
||||
IO.print("\xef\xf7".codePoints.iterate(0)) // expect: 1
|
||||
IO.print("\xef\xf7".codePoints.iterate(1)) // expect: false
|
||||
@ -0,0 +1 @@
|
||||
"s".codePoints.iterate(1.5) // expect runtime error: Iterator must be an integer.
|
||||
@ -0,0 +1 @@
|
||||
"s".codePoints.iterate("2") // expect runtime error: Iterator must be a number.
|
||||
42
test/core/string_code_point_sequence/iterator_value.wren
Normal file
42
test/core/string_code_point_sequence/iterator_value.wren
Normal file
@ -0,0 +1,42 @@
|
||||
// Bytes: 11111
|
||||
// 012345678901234
|
||||
// Chars: sø mé ஃ thî ng
|
||||
var codePoints = "søméஃthîng".codePoints
|
||||
|
||||
IO.print(codePoints.iteratorValue(0)) // expect: 115
|
||||
IO.print(codePoints.iteratorValue(1)) // expect: 248
|
||||
IO.print(codePoints.iteratorValue(2)) // expect: -1
|
||||
IO.print(codePoints.iteratorValue(3)) // expect: 109
|
||||
IO.print(codePoints.iteratorValue(4)) // expect: 233
|
||||
IO.print(codePoints.iteratorValue(5)) // expect: -1
|
||||
IO.print(codePoints.iteratorValue(6)) // expect: 2947
|
||||
IO.print(codePoints.iteratorValue(7)) // expect: -1
|
||||
IO.print(codePoints.iteratorValue(8)) // expect: -1
|
||||
IO.print(codePoints.iteratorValue(9)) // expect: 116
|
||||
IO.print(codePoints.iteratorValue(10)) // expect: 104
|
||||
IO.print(codePoints.iteratorValue(11)) // expect: 238
|
||||
IO.print(codePoints.iteratorValue(12)) // expect: -1
|
||||
IO.print(codePoints.iteratorValue(13)) // expect: 110
|
||||
IO.print(codePoints.iteratorValue(14)) // expect: 103
|
||||
|
||||
IO.print(codePoints.iteratorValue(-15)) // expect: 115
|
||||
IO.print(codePoints.iteratorValue(-14)) // expect: 248
|
||||
IO.print(codePoints.iteratorValue(-13)) // expect: -1
|
||||
IO.print(codePoints.iteratorValue(-12)) // expect: 109
|
||||
IO.print(codePoints.iteratorValue(-11)) // expect: 233
|
||||
IO.print(codePoints.iteratorValue(-10)) // expect: -1
|
||||
IO.print(codePoints.iteratorValue(-9)) // expect: 2947
|
||||
IO.print(codePoints.iteratorValue(-8)) // expect: -1
|
||||
IO.print(codePoints.iteratorValue(-7)) // expect: -1
|
||||
IO.print(codePoints.iteratorValue(-6)) // expect: 116
|
||||
IO.print(codePoints.iteratorValue(-5)) // expect: 104
|
||||
IO.print(codePoints.iteratorValue(-4)) // expect: 238
|
||||
IO.print(codePoints.iteratorValue(-3)) // expect: -1
|
||||
IO.print(codePoints.iteratorValue(-2)) // expect: 110
|
||||
IO.print(codePoints.iteratorValue(-1)) // expect: 103
|
||||
|
||||
IO.print("\0".codePoints.iteratorValue(0)) // expect: 0
|
||||
|
||||
// Returns -1 for invalid UTF-8 sequences.
|
||||
IO.print("\xef\xf7".codePoints.iteratorValue(0)) // expect: -1
|
||||
IO.print("\xef\xf7".codePoints.iteratorValue(1)) // expect: -1
|
||||
@ -0,0 +1,8 @@
|
||||
// The first byte of a two-octet sequence.
|
||||
IO.print("\xc0".codePoints.iteratorValue(0)) // expect: -1
|
||||
|
||||
// The first byte of a three-octet sequence.
|
||||
IO.print("\xe0".codePoints.iteratorValue(0)) // expect: -1
|
||||
|
||||
// The first two bytes of a three-octet sequence.
|
||||
IO.print("\xe0\xae".codePoints.iteratorValue(0)) // expect: -1
|
||||
@ -0,0 +1 @@
|
||||
IO.print("string".codePoints.iteratorValue(12.34)) // expect runtime error: Index must be an integer.
|
||||
@ -0,0 +1 @@
|
||||
IO.print("string".codePoints.iteratorValue("not num")) // expect runtime error: Index must be a number.
|
||||
@ -0,0 +1 @@
|
||||
IO.print("string".codePoints.iteratorValue(6)) // expect runtime error: Index out of bounds.
|
||||
@ -0,0 +1 @@
|
||||
IO.print("string".codePoints.iteratorValue(-7)) // expect runtime error: Index out of bounds.
|
||||
42
test/core/string_code_point_sequence/subscript.wren
Normal file
42
test/core/string_code_point_sequence/subscript.wren
Normal file
@ -0,0 +1,42 @@
|
||||
// Bytes: 11111
|
||||
// 012345678901234
|
||||
// Chars: sø mé ஃ thî ng
|
||||
var codePoints = "søméஃthîng".codePoints
|
||||
|
||||
IO.print(codePoints[0]) // expect: 115
|
||||
IO.print(codePoints[1]) // expect: 248
|
||||
IO.print(codePoints[2]) // expect: -1
|
||||
IO.print(codePoints[3]) // expect: 109
|
||||
IO.print(codePoints[4]) // expect: 233
|
||||
IO.print(codePoints[5]) // expect: -1
|
||||
IO.print(codePoints[6]) // expect: 2947
|
||||
IO.print(codePoints[7]) // expect: -1
|
||||
IO.print(codePoints[8]) // expect: -1
|
||||
IO.print(codePoints[9]) // expect: 116
|
||||
IO.print(codePoints[10]) // expect: 104
|
||||
IO.print(codePoints[11]) // expect: 238
|
||||
IO.print(codePoints[12]) // expect: -1
|
||||
IO.print(codePoints[13]) // expect: 110
|
||||
IO.print(codePoints[14]) // expect: 103
|
||||
|
||||
IO.print(codePoints[-15]) // expect: 115
|
||||
IO.print(codePoints[-14]) // expect: 248
|
||||
IO.print(codePoints[-13]) // expect: -1
|
||||
IO.print(codePoints[-12]) // expect: 109
|
||||
IO.print(codePoints[-11]) // expect: 233
|
||||
IO.print(codePoints[-10]) // expect: -1
|
||||
IO.print(codePoints[-9]) // expect: 2947
|
||||
IO.print(codePoints[-8]) // expect: -1
|
||||
IO.print(codePoints[-7]) // expect: -1
|
||||
IO.print(codePoints[-6]) // expect: 116
|
||||
IO.print(codePoints[-5]) // expect: 104
|
||||
IO.print(codePoints[-4]) // expect: 238
|
||||
IO.print(codePoints[-3]) // expect: -1
|
||||
IO.print(codePoints[-2]) // expect: 110
|
||||
IO.print(codePoints[-1]) // expect: 103
|
||||
|
||||
IO.print("\0".codePoints[0]) // expect: 0
|
||||
|
||||
// Returns -1 for invalid UTF-8 sequences.
|
||||
IO.print("\xef\xf7".codePoints[0]) // expect: -1
|
||||
IO.print("\xef\xf7".codePoints[1]) // expect: -1
|
||||
@ -0,0 +1,8 @@
|
||||
// The first byte of a two-octet sequence.
|
||||
IO.print("\xc0".codePoints[0]) // expect: -1
|
||||
|
||||
// The first byte of a three-octet sequence.
|
||||
IO.print("\xe0".codePoints[0]) // expect: -1
|
||||
|
||||
// The first two bytes of a three-octet sequence.
|
||||
IO.print("\xe0\xae".codePoints[0]) // expect: -1
|
||||
@ -0,0 +1 @@
|
||||
IO.print("string".codePoints[12.34]) // expect runtime error: Index must be an integer.
|
||||
@ -0,0 +1 @@
|
||||
IO.print("string".codePoints["not num"]) // expect runtime error: Index must be a number.
|
||||
@ -0,0 +1 @@
|
||||
IO.print("string".codePoints[6]) // expect runtime error: Index out of bounds.
|
||||
@ -0,0 +1 @@
|
||||
IO.print("string".codePoints[-7]) // expect runtime error: Index out of bounds.
|
||||
Reference in New Issue
Block a user