From e3c76a3e7684d2ea9b32e956343e56f850deea57 Mon Sep 17 00:00:00 2001
From: ruby0x1 <ruby0x1@pm.me>
Date: Sun, 4 Apr 2021 22:28:57 -0700
Subject: [PATCH] Raw strings now ignore whitespace on both ends for
 consistency and clarity

added more tests, updated documentation
---
 doc/site/values.markdown           | 11 ++++-----
 src/vm/wren_compiler.c             | 36 +++++++++++++++++++++---------
 test/language/string/literals.wren | 31 ++++++++++++++++++++++++-
 3 files changed, 62 insertions(+), 16 deletions(-)
diff --git a/doc/site/values.markdown b/doc/site/values.markdown
index a50788b0..faacc101 100644
--- a/doc/site/values.markdown
+++ b/doc/site/values.markdown
@@ -131,9 +131,10 @@ from any other string, it's just parsed in a different way.
 """hi there"""
 </pre>
 
-When a raw string spans multiple lines, the newline immediately 
-after the triple quote will be ignored, and any spaces or tabs after 
-the last newline (before the closing triple quote) will be ignored too.
+When a raw string spans multiple lines and a triple quote is on it's own line,
+any whitespace on that line will be ignored. This means the opening and closing
+lines are not counted as part of the string when the triple quotes are separate lines,
+as long as they only contain whitespace (spaces + tabs).
 
 <pre class="snippet">
   """
@@ -141,8 +142,8 @@ the last newline (before the closing triple quote) will be ignored too.
   """
 </pre>
 
-The value in the string above has no newlines, but the spaces in front
-are preserved. The newline after `"""` and the whitespace on the closing line are ignored.
+The resulting value in the string above has no newlines or trailing whitespace. 
+Note the spaces in front of the Hello are preserved. 
 
 <pre class="snippet">
     Hello world
diff --git a/src/vm/wren_compiler.c b/src/vm/wren_compiler.c
index d3b567a6..32e26129 100644
--- a/src/vm/wren_compiler.c
+++ b/src/vm/wren_compiler.c
@@ -859,12 +859,11 @@ static void readRawString(Parser* parser)
   nextChar(parser);
   nextChar(parser);
 
-  //if there's a newline immediately after, 
-  //discard it so it's not part of the literal
-  if(peekChar(parser) == '\n') nextChar(parser);
-  
+  int skipStart = 0;
+  int firstNewline = -1;
+
+  int skipEnd = -1;
   int lastNewline = -1;
-  int whitespace = -1;
 
   for (;;)
   {
@@ -874,12 +873,24 @@ static void readRawString(Parser* parser)
 
     if(c == '\n') {
       lastNewline = string.count;
-      whitespace = lastNewline;
+      skipEnd = lastNewline;
+      firstNewline = firstNewline == -1 ? string.count : firstNewline;
     }
 
     if(c == '"' && c1 == '"' && c2 == '"') break;
     
-    if(c != '\n' && c != ' ' && c != '\t') whitespace = -1;
+    bool isWhitespace = c == ' ' || c == '\t';
+    skipEnd = c == '\n' || isWhitespace ? skipEnd : -1;
+
+    // If we haven't seen a newline or other character yet, 
+    // and still seeing whitespace, count the characters 
+    // as skippable till we know otherwise
+    bool skippable = skipStart != -1 && isWhitespace && firstNewline == -1;
+    skipStart = skippable ? string.count + 1 : skipStart;
+    
+    // We've counted leading whitespace till we hit something else, 
+    // but it's not a newline, so we reset skipStart since we need these characters
+    if (firstNewline == -1 && !isWhitespace && c != '\n') skipStart = -1;
 
     if (c == '\0' || c1 == '\0' || c2 == '\0')
     {
@@ -898,11 +909,16 @@ static void readRawString(Parser* parser)
   nextChar(parser);
   nextChar(parser);
 
+  int offset = 0;
   int count = string.count;
-  if(lastNewline != -1 && whitespace == lastNewline) count = lastNewline;
 
-  parser->next.value = wrenNewStringLength(parser->vm,
-                                              (char*)string.data, count);
+  if(firstNewline != -1 && skipStart == firstNewline) offset = firstNewline + 1;
+  if(lastNewline != -1 && skipEnd == lastNewline) count = lastNewline;
+
+  count -= (offset > count) ? count : offset;
+
+  parser->next.value = wrenNewStringLength(parser->vm, 
+                         ((char*)string.data) + offset, count);
   
   wrenByteBufferClear(parser->vm, &string);
   makeToken(parser, type);
diff --git a/test/language/string/literals.wren b/test/language/string/literals.wren
index 8cf8c854..210bba58 100644
--- a/test/language/string/literals.wren
+++ b/test/language/string/literals.wren
@@ -6,6 +6,8 @@ System.print("A~¶Þॐஃ") // expect: A~¶Þॐஃ
 
 // Raw strings.
 System.print("""A raw string""") // expect: A raw string
+System.print("""   A raw string""") // expect:    A raw string
+System.print("""A raw string   """) // expect: A raw string   
 
 var long = "
   A
@@ -27,4 +29,31 @@ var raw = """
 System.print(raw) // expect:   A if*(<invalid>)*
                   // expect:   multi line /{}()
                   // expect:   raw string [\]/
-                  // expect:   "json": "value"
\ No newline at end of file
+                  // expect:   "json": "value"
+
+// Raw strings ignore whitespace on the line with the """
+
+var noNewlines = """
+no newlines
+"""
+System.print(noNewlines) // expect: no newlines
+
+// Spaces after the """ but before the \n
+var noLeadingSpaces = """    
+no leading spaces
+"""
+System.print(noLeadingSpaces) // expect: no leading spaces
+
+// Spaces before the end """ after the \n
+var noTrailingSpaces = """    
+no trailing spaces
+       """
+System.print(noTrailingSpaces) // expect: no trailing spaces
+
+var newlineBefore = """    
+newline before"""
+System.print(newlineBefore) // expect: newline before
+
+var newlineAfter = """newline after
+"""
+System.print(newlineAfter) // expect: newline after