diff --git a/doc/site/modules/core/string.markdown b/doc/site/modules/core/string.markdown index 8d28cb4e..b9b459e6 100644 --- a/doc/site/modules/core/string.markdown +++ b/doc/site/modules/core/string.markdown @@ -129,28 +129,12 @@ negative to count backwards from the end of the string. It is a runtime error if `search` is not a string or `start` is not an integer index within the string's byte length. -### **split**(separator) - -Returns a list of one or more strings separated by `separator`. - - :::wren - var string = "abc abc abc" - System.print(string.split(" ")) //> [abc, abc, abc] - -It is a runtime error if `separator` is not a string or is an empty string. - -### **replace**(old, swap) - -Returns a new string with all occurences of `old` replaced with `swap`. - - :::wren - var string = "abc abc abc" - System.print(string.replace(" ", "")) //> abcabcabc - ### **iterate**(iterator), **iteratorValue**(iterator) -Implements the [iterator protocol](../../control-flow.html#the-iterator-protocol) -for iterating over the *code points* in the string: +Implements the [iterator protocol][] for iterating over the *code points* in the +string: + +[iterator protocol]: ../../control-flow.html#the-iterator-protocol :::wren var codePoints = [] @@ -163,12 +147,74 @@ for iterating over the *code points* in the string: If the string contains any bytes that are not valid UTF-8, this iterates over those too, one byte at a time. +### **replace**(old, swap) + +Returns a new string with all occurrences of `old` replaced with `swap`. + + :::wren + var string = "abc abc abc" + System.print(string.replace(" ", "")) //> abcabcabc + +### **split**(separator) + +Returns a list of one or more strings separated by `separator`. + + :::wren + var string = "abc abc abc" + System.print(string.split(" ")) //> [abc, abc, abc] + +It is a runtime error if `separator` is not a string or is an empty string. + ### **startsWith**(prefix) Checks if the string starts with `prefix`. It is a runtime error if `prefix` is not a string. +### **trim**() + +Returns a new string with whitespace removed from the beginning and end of this +string. "Whitespace" is space, tab, carriage return, and line feed characters. + + :::wren + System.print(" \nstuff\r\t".trim()) //> stuff + +### **trim**(chars) + +Returns a new string with all code points in `chars` removed from the beginning +and end of this string. + + :::wren + System.print("ᵔᴥᵔᴥᵔbearᵔᴥᴥᵔᵔ".trim("ᵔᴥ")) //> bear + +### **trimEnd**() + +Like `trim()` but only removes from the end of the string. + + :::wren + System.print(" \nstuff\r\t".trimEnd()) //> " \nstuff" + +### **trimEnd**(chars) + +Like `trim()` but only removes from the end of the string. + + :::wren + System.print("ᵔᴥᵔᴥᵔbearᵔᴥᴥᵔᵔ".trimEnd("ᵔᴥ")) //> ᵔᴥᵔᴥᵔbear + +### **trimStart**() + +Like `trim()` but only removes from the beginning of the string. + + :::wren + System.print(" \nstuff\r\t".trimStart()) //> "stuff\r\t" + +### **trimStart**(chars) + +Like `trim()` but only removes from the beginning of the string. + + :::wren + System.print("ᵔᴥᵔᴥᵔbearᵔᴥᴥᵔᵔ".trimStart("ᵔᴥ")) //> bearᵔᴥᴥᵔᵔ + ### **+**(other) operator Returns a new string that concatenates this string and `other`. diff --git a/src/vm/wren_core.wren b/src/vm/wren_core.wren index 2788308c..c2d0af92 100644 --- a/src/vm/wren_core.wren +++ b/src/vm/wren_core.wren @@ -236,6 +236,48 @@ class String is Sequence { return result } + trim() { trim_("\t\r\n ", true, true) } + trim(chars) { trim_(chars, true, true) } + trimEnd() { trim_("\t\r\n ", false, true) } + trimEnd(chars) { trim_(chars, false, true) } + trimStart() { trim_("\t\r\n ", true, false) } + trimStart(chars) { trim_(chars, true, false) } + + trim_(chars, trimStart, trimEnd) { + if (!(chars is String)) { + Fiber.abort("Characters must be a string.") + } + + var codePoints = chars.codePoints.toList + + var start + if (trimStart) { + while (start = iterate(start)) { + if (!codePoints.contains(codePointAt_(start))) break + } + + if (start == false) return "" + } else { + start = 0 + } + + var end + if (trimEnd) { + end = byteCount_ - 1 + while (end >= start) { + var codePoint = codePointAt_(end) + if (codePoint != -1 && !codePoints.contains(codePoint)) break + end = end - 1 + } + + if (end < start) return "" + } else { + end = -1 + } + + return this[start..end] + } + *(count) { if (!(count is Num) || !count.isInteger || count < 0) { Fiber.abort("Count must be a non-negative integer.") diff --git a/src/vm/wren_core.wren.inc b/src/vm/wren_core.wren.inc index 8251f9f3..63d6648f 100644 --- a/src/vm/wren_core.wren.inc +++ b/src/vm/wren_core.wren.inc @@ -238,6 +238,56 @@ static const char* coreModuleSource = " return result\n" " }\n" "\n" +" trim() { trim_(\"\t\r\n \", true, true) }\n" +"\n" +" trim(chars) { trim_(chars, true, true) }\n" +"\n" +" trimEnd() { trim_(\"\t\r\n \", false, true) }\n" +"\n" +" trimEnd(chars) { trim_(chars, false, true) }\n" +"\n" +" trimStart() { trim_(\"\t\r\n \", true, false) }\n" +"\n" +" trimStart(chars) { trim_(chars, true, false) }\n" +"\n" +" trim_(chars, trimStart, trimEnd) {\n" +" if (!(chars is String)) {\n" +" Fiber.abort(\"Characters must be a string.\")\n" +" }\n" +"\n" +" var codePoints = chars.codePoints.toList\n" +"// System.print(\"code points %(codePoints)\")\n" +"\n" +" var start\n" +" if (trimStart) {\n" +" while (start = iterate(start)) {\n" +" if (!codePoints.contains(codePointAt_(start))) break\n" +" }\n" +"\n" +" if (start == false) return \"\"\n" +" } else {\n" +" start = 0\n" +" }\n" +"\n" +" var end\n" +" if (trimEnd) {\n" +" end = byteCount_ - 1\n" +" while (end >= start) {\n" +" var codePoint = codePointAt_(end)\n" +"// System.print(\"test %(end) : %(codePoint)\")\n" +" if (codePoint != -1 && !codePoints.contains(codePoint)) break\n" +" end = end - 1\n" +" }\n" +"\n" +"// System.print(\"range %(start) %(end)\")\n" +" if (end < start) return \"\"\n" +" } else {\n" +" end = -1\n" +" }\n" +"\n" +" return this[start..end]\n" +" }\n" +"\n" " *(count) {\n" " if (!(count is Num) || !count.isInteger || count < 0) {\n" " Fiber.abort(\"Count must be a non-negative integer.\")\n" diff --git a/test/core/string/trim.wren b/test/core/string/trim.wren new file mode 100644 index 00000000..23c3fc29 --- /dev/null +++ b/test/core/string/trim.wren @@ -0,0 +1,16 @@ +System.print("".trim() == "") // expect: true +System.print("foo".trim() == "foo") // expect: true +System.print(" \t\r\nfoo b\tar \t\r\n".trim() == "foo b\tar") // expect: true +System.print(" \t\r\n \t\r\n".trim() == "") // expect: true +System.print(" \n\n\tsøméஃthîng \n\n\t".trim() == "søméஃthîng") // expect: true + +System.print("".trim("abc") == "") // expect: true +System.print("foo".trim("abc") == "foo") // expect: true +System.print("foo".trim("") == "foo") // expect: true +System.print("cbacbfoobarab".trim("abc") == "foobar") // expect: true +System.print("abcbacba".trim("abc") == "") // expect: true +System.print("søméஃthîngsøméஃ".trim("ஃmésø") == "thîng") // expect: true + +// 8-bit clean. +System.print(" \t\ra\0b \t\r".trim() == "a\0b") // expect: true +System.print("\0a\0b\0c\0".trim("c\0a") == "b") // expect: true diff --git a/test/core/string/trim_chars_not_string.wren b/test/core/string/trim_chars_not_string.wren new file mode 100644 index 00000000..ae68b819 --- /dev/null +++ b/test/core/string/trim_chars_not_string.wren @@ -0,0 +1 @@ +"abracadabra".trim(123) // expect runtime error: Characters must be a string. diff --git a/test/core/string/trim_end.wren b/test/core/string/trim_end.wren new file mode 100644 index 00000000..fc082743 --- /dev/null +++ b/test/core/string/trim_end.wren @@ -0,0 +1,16 @@ +System.print("".trimEnd() == "") // expect: true +System.print("foo".trimEnd() == "foo") // expect: true +System.print(" \t\r\nfoo b\tar \t\r\n".trimEnd() == " \t\r\nfoo b\tar") // expect: true +System.print(" \t\r\n \t\r\n".trimEnd() == "") // expect: true +System.print("søméஃthîng \n\n\t".trimEnd() == "søméஃthîng") // expect: true + +System.print("".trimEnd("abc") == "") // expect: true +System.print("foo".trimEnd("abc") == "foo") // expect: true +System.print("foo".trimEnd("") == "foo") // expect: true +System.print("cbacbfoobarab".trimEnd("abc") == "cbacbfoobar") // expect: true +System.print("abcbacba".trimEnd("abc") == "") // expect: true +System.print("søméஃthîngsøméஃ".trimEnd("ஃmésø") == "søméஃthîng") // expect: true + +// 8-bit clean. +System.print(" \t\ra\0b \t\r".trimEnd() == " \t\ra\0b") // expect: true +System.print("\0a\0b\0c\0".trimEnd("c\0") == "\0a\0b") // expect: true diff --git a/test/core/string/trim_end_chars_not_string.wren b/test/core/string/trim_end_chars_not_string.wren new file mode 100644 index 00000000..929ae08d --- /dev/null +++ b/test/core/string/trim_end_chars_not_string.wren @@ -0,0 +1 @@ +"abracadabra".trimEnd(123) // expect runtime error: Characters must be a string. diff --git a/test/core/string/trim_start.wren b/test/core/string/trim_start.wren new file mode 100644 index 00000000..2e1eb3a4 --- /dev/null +++ b/test/core/string/trim_start.wren @@ -0,0 +1,16 @@ +System.print("".trimStart() == "") // expect: true +System.print("foo".trimStart() == "foo") // expect: true +System.print(" \t\r\nfoo b\tar \t\r\n".trimStart() == "foo b\tar \t\r\n") // expect: true +System.print(" \t\r\n \t\r\n".trimStart() == "") // expect: true +System.print(" \n\n\tsøméஃthîng".trimStart() == "søméஃthîng") // expect: true + +System.print("".trimStart("abc") == "") // expect: true +System.print("foo".trimStart("abc") == "foo") // expect: true +System.print("foo".trimStart("") == "foo") // expect: true +System.print("cbacbfoobarab".trimStart("abc") == "foobarab") // expect: true +System.print("abcbacba".trimStart("abc") == "") // expect: true +System.print("søméஃthîng".trimStart("ஃmésø") == "thîng") // expect: true + +// 8-bit clean. +System.print(" \t\ra\0b".trimStart() == "a\0b") // expect: true +System.print("\0a\0b\0c\0".trimStart("a\0") == "b\0c\0") // expect: true diff --git a/test/core/string/trim_start_chars_not_string.wren b/test/core/string/trim_start_chars_not_string.wren new file mode 100644 index 00000000..8960c7a3 --- /dev/null +++ b/test/core/string/trim_start_chars_not_string.wren @@ -0,0 +1 @@ +"abracadabra".trimStart(123) // expect runtime error: Characters must be a string.