diff --git a/doc/site/values.markdown b/doc/site/values.markdown index f56232e8..389babe7 100644 --- a/doc/site/values.markdown +++ b/doc/site/values.markdown @@ -50,6 +50,16 @@ String literals are surrounded in double quotes: "hi there" +They can also span multiple lines: + +
+"hi +there, +again" ++ +### Escaping + A handful of escape characters are supported:
@@ -109,6 +119,59 @@ System.print("wow %((1..3).map {|n| n * n}.join())") //> wow 149
An interpolated expression can even contain a string literal which in turn has
its own nested interpolations, but doing that gets unreadable pretty quickly.
+### Raw strings
+
+A string literal can also be created using triple quotes `"""` which is
+parsed as a raw string. A raw string is no different
+from any other string, it's just parsed in a different way.
+
+**Raw strings do not process escapes and do not apply any interpolation**.
+
+
+"""hi there"""
+
+
+When a raw string spans multiple lines, the newline immediately
+after the triple quote will be ignored, and any whitespace after
+the last newline (before the closing triple quote) will be ignored too.
+
+
+ """
+ Hello world
+ """
+
+
+The value in the string above is ` Hello world`, it contains no newlines.
+The newline after `"""` and the whitespace on the closing line are ignored.
+Note that the whitespace on the line is preserved.
+
+A raw string will be parsed exactly as is in the file, unmodified.
+This means it can contain quotes, invalid syntax, other data formats
+and so on without being modified by Wren.
+
+
+"""
+ {
+ "hello": "wren",
+ "from" : "json"
+ }
+"""
+
+
+One more example, embedding wren code inside a string safely.
+
+
+"""
+A markdown string with embedded wren code example.
+
+ class Example {
+ construct code() {
+ //
+ }
+ }
+"""
+
+
## Ranges
A range is a little object that represents a consecutive range of numbers. They
diff --git a/src/vm/wren_compiler.c b/src/vm/wren_compiler.c
index 95c470dd..d3b567a6 100644
--- a/src/vm/wren_compiler.c
+++ b/src/vm/wren_compiler.c
@@ -849,6 +849,65 @@ static void readUnicodeEscape(Parser* parser, ByteBuffer* string, int length)
}
}
+static void readRawString(Parser* parser)
+{
+ ByteBuffer string;
+ wrenByteBufferInit(&string);
+ TokenType type = TOKEN_STRING;
+
+ //consume the second and third "
+ nextChar(parser);
+ nextChar(parser);
+
+ //if there's a newline immediately after,
+ //discard it so it's not part of the literal
+ if(peekChar(parser) == '\n') nextChar(parser);
+
+ int lastNewline = -1;
+ int whitespace = -1;
+
+ for (;;)
+ {
+ char c = nextChar(parser);
+ char c1 = peekChar(parser);
+ char c2 = peekNextChar(parser);
+
+ if(c == '\n') {
+ lastNewline = string.count;
+ whitespace = lastNewline;
+ }
+
+ if(c == '"' && c1 == '"' && c2 == '"') break;
+
+ if(c != '\n' && c != ' ' && c != '\t') whitespace = -1;
+
+ if (c == '\0' || c1 == '\0' || c2 == '\0')
+ {
+ lexError(parser, "Unterminated raw string.");
+
+ // Don't consume it if it isn't expected. Keeps us from reading past the
+ // end of an unterminated string.
+ parser->currentChar--;
+ break;
+ }
+
+ wrenByteBufferWrite(parser->vm, &string, c);
+ }
+
+ //consume the second and third "
+ nextChar(parser);
+ nextChar(parser);
+
+ int count = string.count;
+ if(lastNewline != -1 && whitespace == lastNewline) count = lastNewline;
+
+ parser->next.value = wrenNewStringLength(parser->vm,
+ (char*)string.data, count);
+
+ wrenByteBufferClear(parser->vm, &string);
+ makeToken(parser, type);
+}
+
// Finishes lexing a string literal.
static void readString(Parser* parser)
{
@@ -1051,7 +1110,13 @@ static void nextToken(Parser* parser)
}
break;
- case '"': readString(parser); return;
+ case '"': {
+ if(peekChar(parser) == '"' && peekNextChar(parser) == '"') {
+ readRawString(parser);
+ return;
+ }
+ readString(parser); return;
+ }
case '_':
readName(parser,
peekChar(parser) == '_' ? TOKEN_STATIC_FIELD : TOKEN_FIELD);
diff --git a/test/language/string/literals.wren b/test/language/string/literals.wren
index a51124f9..8cf8c854 100644
--- a/test/language/string/literals.wren
+++ b/test/language/string/literals.wren
@@ -3,3 +3,28 @@ System.print("a string") // expect: a string
// Non-ASCII.
System.print("A~¶Þॐஃ") // expect: A~¶Þॐஃ
+
+// Raw strings.
+System.print("""A raw string""") // expect: A raw string
+
+var long = "
+ A
+ multi line
+ regular string
+"
+System.print(long) // expect:
+ // expect: A
+ // expect: multi line
+ // expect: regular string
+ // expect:
+
+var raw = """
+ A if*()*
+ multi line /{}()
+ raw string [\]/
+ "json": "value"
+"""
+System.print(raw) // expect: A if*()*
+ // expect: multi line /{}()
+ // expect: raw string [\]/
+ // expect: "json": "value"
\ No newline at end of file
diff --git a/test/language/string/unterminated_raw.wren b/test/language/string/unterminated_raw.wren
new file mode 100644
index 00000000..be05432d
--- /dev/null
+++ b/test/language/string/unterminated_raw.wren
@@ -0,0 +1,2 @@
+// expect error line 2
+"""this string has no close quote
\ No newline at end of file