Add specialized instructions for loading some locals.

Not having to decode a separate arg byte improves some benchmarks:

binary_trees - wren           ..........  3094  0.32s  101.95% relative to baseline
delta_blue - wren             ..........  6555  0.15s  101.05% relative to baseline
fib - wren                    ..........  2326  0.43s  107.32% relative to baseline
for - wren                    ..........  6183  0.16s  102.81% relative to baseline
method_call - wren            ..........  4746  0.21s  105.97% relative to baseline
This commit is contained in:
Bob Nystrom
2014-12-06 19:59:11 -08:00
parent a94dfdac10
commit ac67386208
5 changed files with 174 additions and 34 deletions

View File

@ -0,0 +1,65 @@
This is the number of times each instruction was executed when running the
delta_blue benchmark:
3753021 CODE_LOAD_LOCAL
2233991 CODE_RETURN
2151580 CODE_LOAD_FIELD_THIS
2121398 CODE_CALL_1
1827535 CODE_CALL_0
1328364 CODE_POP
1136064 CODE_JUMP_IF
715071 CODE_LOAD_GLOBAL
428374 CODE_STORE_FIELD_THIS
424999 CODE_NULL
355344 CODE_STORE_LOCAL
341762 CODE_LOOP
118855 CODE_CONSTANT
93048 CODE_CALL_2
75280 CODE_AND
59920 CODE_JUMP
16842 CODE_LIST
16660 CODE_TRUE
10040 CODE_OR
8200 CODE_LOAD_UPVALUE
8140 CODE_SUPER_1
6540 CODE_FALSE
6076 CODE_STORE_GLOBAL
4000 CODE_SUPER_3
2020 CODE_SUPER_2
2000 CODE_SUPER_0
2000 CODE_CALL_5
2000 CODE_CALL_3
160 CODE_CLOSURE
74 CODE_METHOD_INSTANCE
11 CODE_CLASS
4 CODE_METHOD_STATIC
0 CODE_SUPER_9
0 CODE_SUPER_8
0 CODE_SUPER_7
0 CODE_SUPER_6
0 CODE_SUPER_5
0 CODE_SUPER_4
0 CODE_SUPER_16
0 CODE_SUPER_15
0 CODE_SUPER_14
0 CODE_SUPER_13
0 CODE_SUPER_12
0 CODE_SUPER_11
0 CODE_SUPER_10
0 CODE_STORE_UPVALUE
0 CODE_STORE_FIELD
0 CODE_LOAD_FIELD
0 CODE_IS
0 CODE_CLOSE_UPVALUE
0 CODE_CALL_9
0 CODE_CALL_8
0 CODE_CALL_7
0 CODE_CALL_6
0 CODE_CALL_4
0 CODE_CALL_16
0 CODE_CALL_15
0 CODE_CALL_14
0 CODE_CALL_13
0 CODE_CALL_12
0 CODE_CALL_11
0 CODE_CALL_10

View File

@ -1168,6 +1168,17 @@ static int resolveName(Compiler* compiler, const char* name, int length,
&compiler->parser->vm->globalNames, name, length);
}
static void loadLocal(Compiler* compiler, int slot)
{
if (slot <= 8)
{
emit(compiler, CODE_LOAD_LOCAL_0 + slot);
return;
}
emitByte(compiler, CODE_LOAD_LOCAL, slot);
}
// Copies the identifier from the previously consumed `TOKEN_NAME` into [name],
// which should point to a buffer large enough to contain it. Returns the
// length of the name.
@ -1363,7 +1374,7 @@ static void finishBody(Compiler* compiler, bool isConstructor)
if (!isStatementBody) emit(compiler, CODE_POP);
// The receiver is always stored in the first local slot.
emitByte(compiler, CODE_LOAD_LOCAL, 0);
emit(compiler, CODE_LOAD_LOCAL_0);
}
else if (isStatementBody)
{
@ -1511,7 +1522,14 @@ static void loadThis(Compiler* compiler)
int index = resolveName(compiler, "this", 4, &loadInstruction);
ASSERT(index == -1 || loadInstruction != CODE_LOAD_GLOBAL,
"'this' should not be global.");
emitByte(compiler, loadInstruction, index);
if (loadInstruction == CODE_LOAD_LOCAL)
{
loadLocal(compiler, index);
}
else
{
emitByte(compiler, loadInstruction, index);
}
}
static void grouping(Compiler* compiler, bool allowAssignment)
@ -1671,6 +1689,10 @@ static void variable(Compiler* compiler, bool allowAssignment, int index,
{
emitShort(compiler, loadInstruction, index);
}
else if (loadInstruction == CODE_LOAD_LOCAL)
{
loadLocal(compiler, index);
}
else
{
emitByte(compiler, loadInstruction, index);
@ -2187,6 +2209,15 @@ static int getNumArguments(const uint8_t* bytecode, const Value* constants,
case CODE_CLOSE_UPVALUE:
case CODE_RETURN:
case CODE_END:
case CODE_LOAD_LOCAL_0:
case CODE_LOAD_LOCAL_1:
case CODE_LOAD_LOCAL_2:
case CODE_LOAD_LOCAL_3:
case CODE_LOAD_LOCAL_4:
case CODE_LOAD_LOCAL_5:
case CODE_LOAD_LOCAL_6:
case CODE_LOAD_LOCAL_7:
case CODE_LOAD_LOCAL_8:
return 0;
case CODE_LOAD_LOCAL:
@ -2379,8 +2410,8 @@ static void forStatement(Compiler* compiler)
startLoop(compiler, &loop);
// Advance the iterator by calling the ".iterate" method on the sequence.
emitByte(compiler, CODE_LOAD_LOCAL, seqSlot);
emitByte(compiler, CODE_LOAD_LOCAL, iterSlot);
loadLocal(compiler, seqSlot);
loadLocal(compiler, iterSlot);
emitShort(compiler, CODE_CALL_1, methodSymbol(compiler, "iterate ", 8));
@ -2391,8 +2422,8 @@ static void forStatement(Compiler* compiler)
testExitLoop(compiler);
// Get the current value in the sequence by calling ".iteratorValue".
emitByte(compiler, CODE_LOAD_LOCAL, seqSlot);
emitByte(compiler, CODE_LOAD_LOCAL, iterSlot);
loadLocal(compiler, seqSlot);
loadLocal(compiler, iterSlot);
emitShort(compiler, CODE_CALL_1,
methodSymbol(compiler, "iteratorValue ", 14));
@ -2630,7 +2661,7 @@ static void classDefinition(Compiler* compiler)
}
else
{
emitByte(compiler, CODE_LOAD_LOCAL, symbol);
loadLocal(compiler, symbol);
}
// Define the method.

View File

@ -67,6 +67,16 @@ static int debugPrintInstruction(WrenVM* vm, ObjFn* fn, int i, int* lastLine)
case CODE_FALSE: printf("FALSE\n"); break;
case CODE_TRUE: printf("TRUE\n"); break;
case CODE_LOAD_LOCAL_0: printf("LOAD_LOCAL_0\n"); break;
case CODE_LOAD_LOCAL_1: printf("LOAD_LOCAL_1\n"); break;
case CODE_LOAD_LOCAL_2: printf("LOAD_LOCAL_2\n"); break;
case CODE_LOAD_LOCAL_3: printf("LOAD_LOCAL_3\n"); break;
case CODE_LOAD_LOCAL_4: printf("LOAD_LOCAL_4\n"); break;
case CODE_LOAD_LOCAL_5: printf("LOAD_LOCAL_5\n"); break;
case CODE_LOAD_LOCAL_6: printf("LOAD_LOCAL_6\n"); break;
case CODE_LOAD_LOCAL_7: printf("LOAD_LOCAL_7\n"); break;
case CODE_LOAD_LOCAL_8: printf("LOAD_LOCAL_8\n"); break;
case CODE_LOAD_LOCAL: BYTE_INSTRUCTION("LOAD_LOCAL");
case CODE_STORE_LOCAL: BYTE_INSTRUCTION("STORE_LOCAL");
case CODE_LOAD_UPVALUE: BYTE_INSTRUCTION("LOAD_UPVALUE");

View File

@ -418,6 +418,15 @@ static bool runInterpreter(WrenVM* vm)
&&code_NULL,
&&code_FALSE,
&&code_TRUE,
&&code_LOAD_LOCAL_0,
&&code_LOAD_LOCAL_1,
&&code_LOAD_LOCAL_2,
&&code_LOAD_LOCAL_3,
&&code_LOAD_LOCAL_4,
&&code_LOAD_LOCAL_5,
&&code_LOAD_LOCAL_6,
&&code_LOAD_LOCAL_7,
&&code_LOAD_LOCAL_8,
&&code_LOAD_LOCAL,
&&code_STORE_LOCAL,
&&code_LOAD_UPVALUE,
@ -482,18 +491,20 @@ static bool runInterpreter(WrenVM* vm)
#define INTERPRET_LOOP DISPATCH();
#define CASE_CODE(name) code_##name
#if WREN_DEBUG_TRACE_INSTRUCTIONS
// Prints the stack and instruction before each instruction is executed.
#define DISPATCH() \
{ \
wrenDebugPrintStack(fiber); \
wrenDebugPrintInstruction(vm, fn, (int)(ip - fn->bytecode)); \
instruction = *ip++; \
goto *dispatchTable[instruction]; \
}
#else
#define DISPATCH() goto *dispatchTable[instruction = READ_BYTE()]
#endif
#if WREN_DEBUG_TRACE_INSTRUCTIONS
// Prints the stack and instruction before each instruction is executed.
#define DISPATCH() \
{ \
wrenDebugPrintStack(fiber); \
wrenDebugPrintInstruction(vm, fn, (int)(ip - fn->bytecode)); \
instruction = *ip++; \
goto *dispatchTable[instruction]; \
}
#else
#define DISPATCH() goto *dispatchTable[instruction = READ_BYTE()];
#endif
#else
@ -508,6 +519,33 @@ static bool runInterpreter(WrenVM* vm)
Code instruction;
INTERPRET_LOOP
{
CASE_CODE(LOAD_LOCAL_0):
CASE_CODE(LOAD_LOCAL_1):
CASE_CODE(LOAD_LOCAL_2):
CASE_CODE(LOAD_LOCAL_3):
CASE_CODE(LOAD_LOCAL_4):
CASE_CODE(LOAD_LOCAL_5):
CASE_CODE(LOAD_LOCAL_6):
CASE_CODE(LOAD_LOCAL_7):
CASE_CODE(LOAD_LOCAL_8):
PUSH(fiber->stack[frame->stackStart + instruction - CODE_LOAD_LOCAL_0]);
DISPATCH();
CASE_CODE(LOAD_LOCAL):
PUSH(fiber->stack[frame->stackStart + READ_BYTE()]);
DISPATCH();
CASE_CODE(LOAD_FIELD_THIS):
{
int field = READ_BYTE();
Value receiver = fiber->stack[frame->stackStart];
ASSERT(IS_INSTANCE(receiver), "Receiver should be instance.");
ObjInstance* instance = AS_INSTANCE(receiver);
ASSERT(field < instance->classObj->numFields, "Out of bounds field.");
PUSH(instance->fields[field]);
DISPATCH();
}
CASE_CODE(POP): DROP(); DISPATCH();
CASE_CODE(NULL): PUSH(NULL_VAL); DISPATCH();
CASE_CODE(FALSE): PUSH(FALSE_VAL); DISPATCH();
@ -604,10 +642,6 @@ static bool runInterpreter(WrenVM* vm)
DISPATCH();
}
CASE_CODE(LOAD_LOCAL):
PUSH(fiber->stack[frame->stackStart + READ_BYTE()]);
DISPATCH();
CASE_CODE(STORE_LOCAL):
fiber->stack[frame->stackStart + READ_BYTE()] = PEEK();
DISPATCH();
@ -730,17 +764,6 @@ static bool runInterpreter(WrenVM* vm)
vm->globals.data[READ_SHORT()] = PEEK();
DISPATCH();
CASE_CODE(LOAD_FIELD_THIS):
{
int field = READ_BYTE();
Value receiver = fiber->stack[frame->stackStart];
ASSERT(IS_INSTANCE(receiver), "Receiver should be instance.");
ObjInstance* instance = AS_INSTANCE(receiver);
ASSERT(field < instance->classObj->numFields, "Out of bounds field.");
PUSH(instance->fields[field]);
DISPATCH();
}
CASE_CODE(STORE_FIELD_THIS):
{
int field = READ_BYTE();

View File

@ -40,6 +40,17 @@ typedef enum
// Push true onto the stack.
CODE_TRUE,
// Pushes the value in the given local slot.
CODE_LOAD_LOCAL_0,
CODE_LOAD_LOCAL_1,
CODE_LOAD_LOCAL_2,
CODE_LOAD_LOCAL_3,
CODE_LOAD_LOCAL_4,
CODE_LOAD_LOCAL_5,
CODE_LOAD_LOCAL_6,
CODE_LOAD_LOCAL_7,
CODE_LOAD_LOCAL_8,
// Pushes the value in local slot [arg].
CODE_LOAD_LOCAL,