From 8858161484c1903bfd2de2f21a765738ff2d7fd3 Mon Sep 17 00:00:00 2001 From: jclc Date: Wed, 4 Apr 2018 05:11:59 +0300 Subject: [PATCH 1/4] skip UTF-8 BOM --- src/vm/wren_compiler.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/vm/wren_compiler.c b/src/vm/wren_compiler.c index 66af1432..c7f0de37 100644 --- a/src/vm/wren_compiler.c +++ b/src/vm/wren_compiler.c @@ -3416,20 +3416,27 @@ void definition(Compiler* compiler) ObjFn* wrenCompile(WrenVM* vm, ObjModule* module, const char* source, bool isExpression, bool printErrors) { + // Skip potential UTF-8 BOM + size_t sourceOffset = 0; + if (source[0] == (char) 0xef && source[1] == (char) 0xbb && source[2] == (char) 0xbf) + { + sourceOffset = 3; + } + Parser parser; parser.vm = vm; parser.module = module; - parser.source = source; + parser.source = source + sourceOffset; - parser.tokenStart = source; - parser.currentChar = source; + parser.tokenStart = source + sourceOffset; + parser.currentChar = source + sourceOffset; parser.currentLine = 1; parser.numParens = 0; // Zero-init the current token. This will get copied to previous when // advance() is called below. parser.current.type = TOKEN_ERROR; - parser.current.start = source; + parser.current.start = source + sourceOffset; parser.current.length = 0; parser.current.line = 0; parser.current.value = UNDEFINED_VAL; @@ -3439,6 +3446,7 @@ ObjFn* wrenCompile(WrenVM* vm, ObjModule* module, const char* source, parser.printErrors = printErrors; parser.hasError = false; + // Read the first token. nextToken(&parser); From fe2ca0e89a87a7e0ef1787cf5c963677924c2b75 Mon Sep 17 00:00:00 2001 From: jclc Date: Wed, 4 Apr 2018 05:14:05 +0300 Subject: [PATCH 2/4] remove extra line --- src/vm/wren_compiler.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/vm/wren_compiler.c b/src/vm/wren_compiler.c index c7f0de37..3937d1d2 100644 --- a/src/vm/wren_compiler.c +++ b/src/vm/wren_compiler.c @@ -3446,12 +3446,11 @@ ObjFn* wrenCompile(WrenVM* vm, ObjModule* module, const char* source, parser.printErrors = printErrors; parser.hasError = false; - // Read the first token. nextToken(&parser); int numExistingVariables = module->variables.count; - + Compiler compiler; initCompiler(&compiler, &parser, NULL, true); ignoreNewlines(&compiler); From 4034a6d65a99b2daf85655a6d344f0ec243d1063 Mon Sep 17 00:00:00 2001 From: jclc Date: Sat, 7 Apr 2018 06:04:32 +0300 Subject: [PATCH 3/4] crunched BOM skipping --- src/vm/wren_compiler.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/vm/wren_compiler.c b/src/vm/wren_compiler.c index 3937d1d2..75d0136e 100644 --- a/src/vm/wren_compiler.c +++ b/src/vm/wren_compiler.c @@ -3417,26 +3417,25 @@ ObjFn* wrenCompile(WrenVM* vm, ObjModule* module, const char* source, bool isExpression, bool printErrors) { // Skip potential UTF-8 BOM - size_t sourceOffset = 0; - if (source[0] == (char) 0xef && source[1] == (char) 0xbb && source[2] == (char) 0xbf) + if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) { - sourceOffset = 3; + source += 3; } Parser parser; parser.vm = vm; parser.module = module; - parser.source = source + sourceOffset; + parser.source = source; - parser.tokenStart = source + sourceOffset; - parser.currentChar = source + sourceOffset; + parser.tokenStart = source; + parser.currentChar = source; parser.currentLine = 1; parser.numParens = 0; // Zero-init the current token. This will get copied to previous when // advance() is called below. parser.current.type = TOKEN_ERROR; - parser.current.start = source + sourceOffset; + parser.current.start = source; parser.current.length = 0; parser.current.line = 0; parser.current.value = UNDEFINED_VAL; From e66115c9fc086d3dafdbf20141999e707a7667df Mon Sep 17 00:00:00 2001 From: jclc Date: Tue, 10 Apr 2018 18:53:56 +0300 Subject: [PATCH 4/4] add regression test for #520 --- test/regression/520.wren | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 test/regression/520.wren diff --git a/test/regression/520.wren b/test/regression/520.wren new file mode 100644 index 00000000..6eddf921 --- /dev/null +++ b/test/regression/520.wren @@ -0,0 +1,2 @@ +// This file should have a UTF-8 byte order mark +System.print("ok") // expect: ok