1
0
forked from Mirror/wren

Pure recursive descent parser.

Also, start hacking on method calls.
This commit is contained in:
Bob Nystrom
2013-10-23 13:51:41 -07:00
parent 7c0ab503f6
commit 3da1cbe2fa
6 changed files with 110 additions and 237 deletions

View File

@ -1 +1,2 @@
21212
123.ok
123.yar

View File

@ -5,7 +5,6 @@
#include "compiler.h"
// Note: if you add new token types, make sure to update the arrays below.
typedef enum
{
TOKEN_LEFT_PAREN,
@ -56,6 +55,8 @@ typedef struct Token_s
typedef struct
{
VM* vm;
const char* source;
size_t sourceLength;
@ -79,31 +80,18 @@ typedef struct
int hasError;
} Compiler;
typedef void (*CompileFn)(Compiler*, Token*);
typedef struct
{
CompileFn fn;
int precedence;
} InfixCompiler;
// Parsing:
/*
static void block(Compiler* compiler);
*/
static void statementLike(Compiler* compiler);
// Grammar:
static void statement(Compiler* compiler);
static void expression(Compiler* compiler);
static void compilePrecedence(Compiler* compiler, int precedence);
static void prefixLiteral(Compiler* compiler, Token* token);
static void infixCall(Compiler* compiler, Token* token);
static void infixBinaryOp(Compiler* compiler, Token* token);
static void call(Compiler* compiler);
static void primary(Compiler* compiler);
static void number(Compiler* compiler, Token* token);
static TokenType peek(Compiler* compiler);
static int match(Compiler* compiler, TokenType expected);
static void consume(Compiler* compiler, TokenType expected);
static void advance(Compiler* compiler);
// Lexing:
// Tokens:
static void readNextToken(Compiler* compiler);
static void readName(Compiler* compiler);
static void readNumber(Compiler* compiler);
@ -119,91 +107,10 @@ static void makeToken(Compiler* compiler, TokenType type);
// Utility:
static void error(Compiler* compiler, const char* format, ...);
enum
{
PREC_NONE,
PREC_LOWEST,
PREC_EQUALITY, // == !=
PREC_COMPARISON, // < > <= >=
PREC_BITWISE, // | &
PREC_TERM, // + -
PREC_FACTOR, // * / %
PREC_CALL // ()
};
CompileFn prefixCompilers[] = {
NULL, // TOKEN_LEFT_PAREN
NULL, // TOKEN_RIGHT_PAREN
NULL, // TOKEN_LEFT_BRACKET
NULL, // TOKEN_RIGHT_BRACKET
NULL, // TOKEN_LEFT_BRACE
NULL, // TOKEN_RIGHT_BRACE
NULL, // TOKEN_COLON
NULL, // TOKEN_DOT
NULL, // TOKEN_COMMA
NULL, // TOKEN_STAR
NULL, // TOKEN_SLASH
NULL, // TOKEN_PERCENT
NULL, // TOKEN_PLUS
NULL, // TOKEN_MINUS
NULL, // TOKEN_PIPE
NULL, // TOKEN_AMP
NULL, // TOKEN_BANG
NULL, // TOKEN_EQ
NULL, // TOKEN_LT
NULL, // TOKEN_GT
NULL, // TOKEN_LTEQ
NULL, // TOKEN_GTEQ
NULL, // TOKEN_EQEQ
NULL, // TOKEN_BANGEQ
NULL, // TOKEN_VAR
prefixLiteral, // TOKEN_NAME
prefixLiteral, // TOKEN_NUMBER
prefixLiteral, // TOKEN_STRING
NULL, // TOKEN_LINE
NULL, // TOKEN_ERROR
NULL // TOKEN_EOF
};
// The indices in this array correspond to TOKEN enum values.
InfixCompiler infixCompilers[] = {
{ infixCall, PREC_CALL }, // TOKEN_LEFT_PAREN
{ NULL, PREC_NONE }, // TOKEN_RIGHT_PAREN
{ NULL, PREC_NONE }, // TOKEN_LEFT_BRACKET
{ NULL, PREC_NONE }, // TOKEN_RIGHT_BRACKET
{ NULL, PREC_NONE }, // TOKEN_LEFT_BRACE
{ NULL, PREC_NONE }, // TOKEN_RIGHT_BRACE
{ NULL, PREC_NONE }, // TOKEN_COLON
{ NULL, PREC_NONE }, // TOKEN_DOT
{ NULL, PREC_NONE }, // TOKEN_COMMA
{ infixBinaryOp, PREC_FACTOR }, // TOKEN_STAR
{ infixBinaryOp, PREC_FACTOR }, // TOKEN_SLASH
{ infixBinaryOp, PREC_FACTOR }, // TOKEN_PERCENT
{ infixBinaryOp, PREC_TERM }, // TOKEN_PLUS
{ infixBinaryOp, PREC_TERM }, // TOKEN_MINUS
{ infixBinaryOp, PREC_BITWISE }, // TOKEN_PIPE
{ infixBinaryOp, PREC_BITWISE }, // TOKEN_AMP
{ NULL, PREC_NONE }, // TOKEN_BANG
{ NULL, PREC_NONE }, // TOKEN_EQ
{ infixBinaryOp, PREC_COMPARISON }, // TOKEN_LT
{ infixBinaryOp, PREC_COMPARISON }, // TOKEN_GT
{ infixBinaryOp, PREC_COMPARISON }, // TOKEN_LTEQ
{ infixBinaryOp, PREC_COMPARISON }, // TOKEN_GTEQ
{ infixBinaryOp, PREC_EQUALITY }, // TOKEN_EQEQ
{ infixBinaryOp, PREC_EQUALITY }, // TOKEN_BANGEQ
{ NULL, PREC_NONE }, // TOKEN_VAR
{ NULL, PREC_NONE }, // TOKEN_NAME
{ NULL, PREC_NONE }, // TOKEN_NUMBER
{ NULL, PREC_NONE }, // TOKEN_STRING
{ NULL, PREC_NONE }, // TOKEN_LINE
{ NULL, PREC_NONE }, // TOKEN_ERROR
{ NULL, PREC_NONE } // TOKEN_EOF
};
Block* compile(const char* source, size_t sourceLength)
Block* compile(VM* vm, const char* source, size_t sourceLength)
{
Compiler compiler;
compiler.vm = vm;
compiler.source = source;
compiler.sourceLength = sourceLength;
compiler.hasError = 0;
@ -225,10 +132,10 @@ Block* compile(const char* source, size_t sourceLength)
compiler.numCodes = 0;
// TODO(bob): Copied from block(). Unify.
do
{
statementLike(&compiler);
statement(&compiler);
// TODO(bob): Discard previous value.
} while (!match(&compiler, TOKEN_EOF));
compiler.block->bytecode[compiler.numCodes++] = CODE_END;
@ -236,53 +143,9 @@ Block* compile(const char* source, size_t sourceLength)
return compiler.hasError ? NULL : compiler.block;
}
/*
void block(Compiler* compiler)
{
consume(compiler, TOKEN_INDENT);
NodeSequence* sequence = malloc(sizeof(NodeSequence));
sequence->node.type = NODE_SEQUENCE;
sequence->nodes = NULL;
NodeList** nodes = &sequence->nodes;
do
{
Node* node = statementLike(compiler);
*nodes = malloc(sizeof(NodeList));
(*nodes)->node = node;
(*nodes)->next = NULL;
nodes = &(*nodes)->next;
} while (!match(compiler, TOKEN_OUTDENT));
return (Node*)sequence;
}
*/
void statementLike(Compiler* compiler)
void statement(Compiler* compiler)
{
/*
if (match(compiler, TOKEN_IF))
{
// Compile the condition.
expression(compiler);
consume(compiler, TOKEN_COLON);
// Compile the then arm.
block(compiler);
// Compile the else arm.
if (match(compiler, TOKEN_ELSE))
{
consume(compiler, TOKEN_COLON);
block(parser);
}
return;
}
if (match(compiler, TOKEN_VAR))
{
Token* name = consume(compiler, TOKEN_NAME);
@ -308,32 +171,42 @@ void statementLike(Compiler* compiler)
void expression(Compiler* compiler)
{
compilePrecedence(compiler, PREC_LOWEST);
call(compiler);
}
void compilePrecedence(Compiler* compiler, int precedence)
// Method calls like:
//
// foo.bar
// foo.bar(arg, arg)
// foo.bar { block } other { block }
void call(Compiler* compiler)
{
advance(compiler);
CompileFn prefix = prefixCompilers[compiler->previous.type];
primary(compiler);
if (prefix == NULL)
if (match(compiler, TOKEN_DOT))
{
// TODO(bob): Handle error better.
error(compiler, "No prefix parser.");
exit(1);
}
consume(compiler, TOKEN_NAME);
int symbol = getSymbol(compiler->vm,
compiler->source + compiler->previous.start,
compiler->previous.end - compiler->previous.start);
printf("symbol %d\n", symbol);
prefix(compiler, &compiler->previous);
while (precedence <= infixCompilers[compiler->current.type].precedence)
{
advance(compiler);
CompileFn infix = infixCompilers[compiler->previous.type].fn;
infix(compiler, &compiler->previous);
// Compile the method call.
compiler->block->bytecode[compiler->numCodes++] = CODE_CALL;
compiler->block->bytecode[compiler->numCodes++] = symbol;
}
}
void prefixLiteral(Compiler* compiler, Token* token)
void primary(Compiler* compiler)
{
if (match(compiler, TOKEN_NUMBER))
{
number(compiler, &compiler->previous);
}
}
void number(Compiler* compiler, Token* token)
{
char* end;
long value = strtol(compiler->source + token->start, &end, 10);
@ -360,56 +233,6 @@ void prefixLiteral(Compiler* compiler, Token* token)
compiler->block->bytecode[compiler->numCodes++] = compiler->block->numConstants - 1;
}
void infixCall(Compiler* compiler, Token* token)
{
printf("infix calls not implemented\n");
exit(1);
/*
NodeList* args = NULL;
if (match(compiler, TOKEN_RIGHT_PAREN) == NULL)
{
NodeList** arg = &args;
do
{
*arg = malloc(sizeof(NodeList));
(*arg)->node = expression(parser);
(*arg)->next = NULL;
arg = &(*arg)->next;
}
while (match(compiler, TOKEN_COMMA) != NULL);
consume(compiler, TOKEN_RIGHT_PAREN);
}
NodeCall* node = malloc(sizeof(NodeCall));
node->node.type = NODE_CALL;
node->fn = left;
node->args = args;
return (Node*)node;
*/
}
void infixBinaryOp(Compiler* compiler, Token* token)
{
printf("infix binary ops not implemented\n");
exit(1);
/*
// TODO(bob): Support right-associative infix. Needs to do precedence
// - 1 here to be right-assoc.
Node* right = parsePrecedence(parser,
infixParsers[token->type].precedence);
NodeBinaryOp* node = malloc(sizeof(NodeBinaryOp));
node->node.type = NODE_BINARY_OP;
node->left = left;
node->op = token;
node->right = right;
return (Node*)node;
*/
}
TokenType peek(Compiler* compiler)
{
return compiler->current.type;
@ -616,7 +439,7 @@ void error(Compiler* compiler, const char* format, ...)
compiler->hasError = 1;
printf("Compile error on '");
for (int i = compiler->current.start; i < compiler->current.end; i++)
for (int i = compiler->previous.start; i < compiler->previous.end; i++)
{
putchar(compiler->source[i]);
}

View File

@ -3,6 +3,6 @@
#include "vm.h"
Block* compile(const char* source, size_t sourceLength);
Block* compile(VM* vm, const char* source, size_t sourceLength);
#endif

View File

@ -25,11 +25,12 @@ int main(int argc, const char * argv[])
// TODO(bob): Validate command line arguments.
size_t length;
char* source = readFile(argv[1], &length);
Block* block = compile(source, length);
Fiber* fiber = newFiber();
Value value = interpret(fiber, block);
VM* vm = newVM();
Block* block = compile(vm, source, length);
Value value = interpret(vm, block);
printValue(value);
printf("\n");
freeVM(vm);
free(source);
return 0;

View File

@ -1,20 +1,50 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "vm.h"
typedef struct
{
Value stack[STACK_SIZE];
int stackSize;
} Fiber;
static Value pop(Fiber* fiber);
Fiber* newFiber()
VM* newVM()
{
Fiber* fiber = (Fiber*)malloc(sizeof(Fiber));
fiber->stackSize = 0;
VM* vm = malloc(sizeof(VM));
vm->numSymbols = 0;
return fiber;
return vm;
}
Value interpret(Fiber* fiber, Block* block)
void freeVM(VM* vm)
{
free(vm);
}
int getSymbol(VM* vm, const char* name, size_t length)
{
// See if the symbol is already defined.
// TODO(bob): O(n). Do something better.
for (int i = 0; i < vm->numSymbols; i++)
{
if (strncmp(vm->symbols[i], name, length) == 0) return i;
}
// New symbol, so add it.
vm->symbols[vm->numSymbols] = malloc(length);
strncpy(vm->symbols[vm->numSymbols], name, length);
return vm->numSymbols++;
}
Value interpret(VM* vm, Block* block)
{
Fiber fiber;
fiber.stackSize = 0;
int ip = 0;
for (;;)
{
@ -23,12 +53,19 @@ Value interpret(Fiber* fiber, Block* block)
case CODE_CONSTANT:
{
Value value = block->constants[block->bytecode[ip++]];
fiber->stack[fiber->stackSize++] = value;
fiber.stack[fiber.stackSize++] = value;
break;
}
case CODE_CALL:
{
int symbol = block->bytecode[ip++];
printf("call %d\n", symbol);
break;
}
case CODE_END:
return pop(fiber);
return pop(&fiber);
}
}
}

View File

@ -26,17 +26,14 @@ typedef struct sObj {
typedef Obj* Value;
typedef struct
{
Value stack[STACK_SIZE];
int stackSize;
} Fiber;
typedef enum
{
CODE_CONSTANT,
// Load the constant at index [arg].
CODE_CALL,
// Invoke the method with symbol [arg].
CODE_END
// The current block is done and should be exited.
@ -49,8 +46,22 @@ typedef struct
int numConstants;
} Block;
Fiber* newFiber();
Value interpret(Fiber* fiber, Block* block);
#define MAX_SYMBOLS 256
typedef struct
{
// TODO(bob): Make this dynamically sized.
char* symbols[MAX_SYMBOLS];
int numSymbols;
} VM;
VM* newVM();
void freeVM(VM* vm);
int getSymbol(VM* vm, const char* name, size_t length);
Value interpret(VM* vm, Block* block);
void printValue(Value value);