From 3da1cbe2fae72dab44ee19a497593b06d6a00e9f Mon Sep 17 00:00:00 2001 From: Bob Nystrom Date: Wed, 23 Oct 2013 13:51:41 -0700 Subject: [PATCH] Pure recursive descent parser. Also, start hacking on method calls. --- example/hello.wren | 3 +- src/compiler.c | 257 +++++++-------------------------------------- src/compiler.h | 2 +- src/main.c | 7 +- src/vm.c | 51 +++++++-- src/vm.h | 27 +++-- 6 files changed, 110 insertions(+), 237 deletions(-) diff --git a/example/hello.wren b/example/hello.wren index 43983eb2..c3054d45 100644 --- a/example/hello.wren +++ b/example/hello.wren @@ -1 +1,2 @@ -21212 +123.ok +123.yar diff --git a/src/compiler.c b/src/compiler.c index 22c190b6..8618346d 100644 --- a/src/compiler.c +++ b/src/compiler.c @@ -5,7 +5,6 @@ #include "compiler.h" -// Note: if you add new token types, make sure to update the arrays below. typedef enum { TOKEN_LEFT_PAREN, @@ -56,6 +55,8 @@ typedef struct Token_s typedef struct { + VM* vm; + const char* source; size_t sourceLength; @@ -79,31 +80,18 @@ typedef struct int hasError; } Compiler; -typedef void (*CompileFn)(Compiler*, Token*); - -typedef struct -{ - CompileFn fn; - int precedence; -} InfixCompiler; - -// Parsing: - -/* -static void block(Compiler* compiler); -*/ -static void statementLike(Compiler* compiler); +// Grammar: +static void statement(Compiler* compiler); static void expression(Compiler* compiler); -static void compilePrecedence(Compiler* compiler, int precedence); -static void prefixLiteral(Compiler* compiler, Token* token); -static void infixCall(Compiler* compiler, Token* token); -static void infixBinaryOp(Compiler* compiler, Token* token); +static void call(Compiler* compiler); +static void primary(Compiler* compiler); +static void number(Compiler* compiler, Token* token); static TokenType peek(Compiler* compiler); static int match(Compiler* compiler, TokenType expected); static void consume(Compiler* compiler, TokenType expected); static void advance(Compiler* compiler); -// Lexing: +// Tokens: static void readNextToken(Compiler* compiler); static void readName(Compiler* compiler); static void readNumber(Compiler* compiler); @@ -119,91 +107,10 @@ static void makeToken(Compiler* compiler, TokenType type); // Utility: static void error(Compiler* compiler, const char* format, ...); -enum -{ - PREC_NONE, - PREC_LOWEST, - - PREC_EQUALITY, // == != - PREC_COMPARISON, // < > <= >= - PREC_BITWISE, // | & - PREC_TERM, // + - - PREC_FACTOR, // * / % - PREC_CALL // () -}; - -CompileFn prefixCompilers[] = { - NULL, // TOKEN_LEFT_PAREN - NULL, // TOKEN_RIGHT_PAREN - NULL, // TOKEN_LEFT_BRACKET - NULL, // TOKEN_RIGHT_BRACKET - NULL, // TOKEN_LEFT_BRACE - NULL, // TOKEN_RIGHT_BRACE - NULL, // TOKEN_COLON - NULL, // TOKEN_DOT - NULL, // TOKEN_COMMA - NULL, // TOKEN_STAR - NULL, // TOKEN_SLASH - NULL, // TOKEN_PERCENT - NULL, // TOKEN_PLUS - NULL, // TOKEN_MINUS - NULL, // TOKEN_PIPE - NULL, // TOKEN_AMP - NULL, // TOKEN_BANG - NULL, // TOKEN_EQ - NULL, // TOKEN_LT - NULL, // TOKEN_GT - NULL, // TOKEN_LTEQ - NULL, // TOKEN_GTEQ - NULL, // TOKEN_EQEQ - NULL, // TOKEN_BANGEQ - NULL, // TOKEN_VAR - prefixLiteral, // TOKEN_NAME - prefixLiteral, // TOKEN_NUMBER - prefixLiteral, // TOKEN_STRING - NULL, // TOKEN_LINE - NULL, // TOKEN_ERROR - NULL // TOKEN_EOF -}; - -// The indices in this array correspond to TOKEN enum values. -InfixCompiler infixCompilers[] = { - { infixCall, PREC_CALL }, // TOKEN_LEFT_PAREN - { NULL, PREC_NONE }, // TOKEN_RIGHT_PAREN - { NULL, PREC_NONE }, // TOKEN_LEFT_BRACKET - { NULL, PREC_NONE }, // TOKEN_RIGHT_BRACKET - { NULL, PREC_NONE }, // TOKEN_LEFT_BRACE - { NULL, PREC_NONE }, // TOKEN_RIGHT_BRACE - { NULL, PREC_NONE }, // TOKEN_COLON - { NULL, PREC_NONE }, // TOKEN_DOT - { NULL, PREC_NONE }, // TOKEN_COMMA - { infixBinaryOp, PREC_FACTOR }, // TOKEN_STAR - { infixBinaryOp, PREC_FACTOR }, // TOKEN_SLASH - { infixBinaryOp, PREC_FACTOR }, // TOKEN_PERCENT - { infixBinaryOp, PREC_TERM }, // TOKEN_PLUS - { infixBinaryOp, PREC_TERM }, // TOKEN_MINUS - { infixBinaryOp, PREC_BITWISE }, // TOKEN_PIPE - { infixBinaryOp, PREC_BITWISE }, // TOKEN_AMP - { NULL, PREC_NONE }, // TOKEN_BANG - { NULL, PREC_NONE }, // TOKEN_EQ - { infixBinaryOp, PREC_COMPARISON }, // TOKEN_LT - { infixBinaryOp, PREC_COMPARISON }, // TOKEN_GT - { infixBinaryOp, PREC_COMPARISON }, // TOKEN_LTEQ - { infixBinaryOp, PREC_COMPARISON }, // TOKEN_GTEQ - { infixBinaryOp, PREC_EQUALITY }, // TOKEN_EQEQ - { infixBinaryOp, PREC_EQUALITY }, // TOKEN_BANGEQ - { NULL, PREC_NONE }, // TOKEN_VAR - { NULL, PREC_NONE }, // TOKEN_NAME - { NULL, PREC_NONE }, // TOKEN_NUMBER - { NULL, PREC_NONE }, // TOKEN_STRING - { NULL, PREC_NONE }, // TOKEN_LINE - { NULL, PREC_NONE }, // TOKEN_ERROR - { NULL, PREC_NONE } // TOKEN_EOF -}; - -Block* compile(const char* source, size_t sourceLength) +Block* compile(VM* vm, const char* source, size_t sourceLength) { Compiler compiler; + compiler.vm = vm; compiler.source = source; compiler.sourceLength = sourceLength; compiler.hasError = 0; @@ -225,10 +132,10 @@ Block* compile(const char* source, size_t sourceLength) compiler.numCodes = 0; - // TODO(bob): Copied from block(). Unify. do { - statementLike(&compiler); + statement(&compiler); + // TODO(bob): Discard previous value. } while (!match(&compiler, TOKEN_EOF)); compiler.block->bytecode[compiler.numCodes++] = CODE_END; @@ -236,53 +143,9 @@ Block* compile(const char* source, size_t sourceLength) return compiler.hasError ? NULL : compiler.block; } -/* -void block(Compiler* compiler) -{ - consume(compiler, TOKEN_INDENT); - - NodeSequence* sequence = malloc(sizeof(NodeSequence)); - sequence->node.type = NODE_SEQUENCE; - sequence->nodes = NULL; - - NodeList** nodes = &sequence->nodes; - do - { - Node* node = statementLike(compiler); - *nodes = malloc(sizeof(NodeList)); - (*nodes)->node = node; - (*nodes)->next = NULL; - nodes = &(*nodes)->next; - - } while (!match(compiler, TOKEN_OUTDENT)); - - return (Node*)sequence; -} -*/ - -void statementLike(Compiler* compiler) +void statement(Compiler* compiler) { /* - if (match(compiler, TOKEN_IF)) - { - // Compile the condition. - expression(compiler); - - consume(compiler, TOKEN_COLON); - - // Compile the then arm. - block(compiler); - - // Compile the else arm. - if (match(compiler, TOKEN_ELSE)) - { - consume(compiler, TOKEN_COLON); - block(parser); - } - - return; - } - if (match(compiler, TOKEN_VAR)) { Token* name = consume(compiler, TOKEN_NAME); @@ -308,32 +171,42 @@ void statementLike(Compiler* compiler) void expression(Compiler* compiler) { - compilePrecedence(compiler, PREC_LOWEST); + call(compiler); } -void compilePrecedence(Compiler* compiler, int precedence) +// Method calls like: +// +// foo.bar +// foo.bar(arg, arg) +// foo.bar { block } other { block } +void call(Compiler* compiler) { - advance(compiler); - CompileFn prefix = prefixCompilers[compiler->previous.type]; + primary(compiler); - if (prefix == NULL) + if (match(compiler, TOKEN_DOT)) { - // TODO(bob): Handle error better. - error(compiler, "No prefix parser."); - exit(1); - } + consume(compiler, TOKEN_NAME); + int symbol = getSymbol(compiler->vm, + compiler->source + compiler->previous.start, + compiler->previous.end - compiler->previous.start); + printf("symbol %d\n", symbol); - prefix(compiler, &compiler->previous); - while (precedence <= infixCompilers[compiler->current.type].precedence) - { - advance(compiler); - CompileFn infix = infixCompilers[compiler->previous.type].fn; - infix(compiler, &compiler->previous); + // Compile the method call. + compiler->block->bytecode[compiler->numCodes++] = CODE_CALL; + compiler->block->bytecode[compiler->numCodes++] = symbol; } } -void prefixLiteral(Compiler* compiler, Token* token) +void primary(Compiler* compiler) +{ + if (match(compiler, TOKEN_NUMBER)) + { + number(compiler, &compiler->previous); + } +} + +void number(Compiler* compiler, Token* token) { char* end; long value = strtol(compiler->source + token->start, &end, 10); @@ -360,56 +233,6 @@ void prefixLiteral(Compiler* compiler, Token* token) compiler->block->bytecode[compiler->numCodes++] = compiler->block->numConstants - 1; } -void infixCall(Compiler* compiler, Token* token) -{ - printf("infix calls not implemented\n"); - exit(1); - /* - NodeList* args = NULL; - if (match(compiler, TOKEN_RIGHT_PAREN) == NULL) - { - NodeList** arg = &args; - do - { - *arg = malloc(sizeof(NodeList)); - (*arg)->node = expression(parser); - (*arg)->next = NULL; - arg = &(*arg)->next; - } - while (match(compiler, TOKEN_COMMA) != NULL); - - consume(compiler, TOKEN_RIGHT_PAREN); - } - - NodeCall* node = malloc(sizeof(NodeCall)); - node->node.type = NODE_CALL; - node->fn = left; - node->args = args; - - return (Node*)node; - */ -} - -void infixBinaryOp(Compiler* compiler, Token* token) -{ - printf("infix binary ops not implemented\n"); - exit(1); - /* - // TODO(bob): Support right-associative infix. Needs to do precedence - // - 1 here to be right-assoc. - Node* right = parsePrecedence(parser, - infixParsers[token->type].precedence); - - NodeBinaryOp* node = malloc(sizeof(NodeBinaryOp)); - node->node.type = NODE_BINARY_OP; - node->left = left; - node->op = token; - node->right = right; - - return (Node*)node; - */ -} - TokenType peek(Compiler* compiler) { return compiler->current.type; @@ -616,7 +439,7 @@ void error(Compiler* compiler, const char* format, ...) compiler->hasError = 1; printf("Compile error on '"); - for (int i = compiler->current.start; i < compiler->current.end; i++) + for (int i = compiler->previous.start; i < compiler->previous.end; i++) { putchar(compiler->source[i]); } diff --git a/src/compiler.h b/src/compiler.h index 317c66e5..160d218f 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -3,6 +3,6 @@ #include "vm.h" -Block* compile(const char* source, size_t sourceLength); +Block* compile(VM* vm, const char* source, size_t sourceLength); #endif diff --git a/src/main.c b/src/main.c index 556e3f12..2943aa9a 100644 --- a/src/main.c +++ b/src/main.c @@ -25,11 +25,12 @@ int main(int argc, const char * argv[]) // TODO(bob): Validate command line arguments. size_t length; char* source = readFile(argv[1], &length); - Block* block = compile(source, length); - Fiber* fiber = newFiber(); - Value value = interpret(fiber, block); + VM* vm = newVM(); + Block* block = compile(vm, source, length); + Value value = interpret(vm, block); printValue(value); printf("\n"); + freeVM(vm); free(source); return 0; diff --git a/src/vm.c b/src/vm.c index f676bb22..b565115f 100644 --- a/src/vm.c +++ b/src/vm.c @@ -1,20 +1,50 @@ #include #include +#include #include "vm.h" +typedef struct +{ + Value stack[STACK_SIZE]; + int stackSize; +} Fiber; + static Value pop(Fiber* fiber); -Fiber* newFiber() +VM* newVM() { - Fiber* fiber = (Fiber*)malloc(sizeof(Fiber)); - fiber->stackSize = 0; + VM* vm = malloc(sizeof(VM)); + vm->numSymbols = 0; - return fiber; + return vm; } -Value interpret(Fiber* fiber, Block* block) +void freeVM(VM* vm) { + free(vm); +} + +int getSymbol(VM* vm, const char* name, size_t length) +{ + // See if the symbol is already defined. + // TODO(bob): O(n). Do something better. + for (int i = 0; i < vm->numSymbols; i++) + { + if (strncmp(vm->symbols[i], name, length) == 0) return i; + } + + // New symbol, so add it. + vm->symbols[vm->numSymbols] = malloc(length); + strncpy(vm->symbols[vm->numSymbols], name, length); + return vm->numSymbols++; +} + +Value interpret(VM* vm, Block* block) +{ + Fiber fiber; + fiber.stackSize = 0; + int ip = 0; for (;;) { @@ -23,12 +53,19 @@ Value interpret(Fiber* fiber, Block* block) case CODE_CONSTANT: { Value value = block->constants[block->bytecode[ip++]]; - fiber->stack[fiber->stackSize++] = value; + fiber.stack[fiber.stackSize++] = value; + break; + } + + case CODE_CALL: + { + int symbol = block->bytecode[ip++]; + printf("call %d\n", symbol); break; } case CODE_END: - return pop(fiber); + return pop(&fiber); } } } diff --git a/src/vm.h b/src/vm.h index 942299a4..3a2d7581 100644 --- a/src/vm.h +++ b/src/vm.h @@ -26,17 +26,14 @@ typedef struct sObj { typedef Obj* Value; -typedef struct -{ - Value stack[STACK_SIZE]; - int stackSize; -} Fiber; - typedef enum { CODE_CONSTANT, // Load the constant at index [arg]. + CODE_CALL, + // Invoke the method with symbol [arg]. + CODE_END // The current block is done and should be exited. @@ -49,8 +46,22 @@ typedef struct int numConstants; } Block; -Fiber* newFiber(); -Value interpret(Fiber* fiber, Block* block); +#define MAX_SYMBOLS 256 + +typedef struct +{ + // TODO(bob): Make this dynamically sized. + char* symbols[MAX_SYMBOLS]; + int numSymbols; + +} VM; + +VM* newVM(); +void freeVM(VM* vm); + +int getSymbol(VM* vm, const char* name, size_t length); + +Value interpret(VM* vm, Block* block); void printValue(Value value);