diff --git a/benchmark/fib.txt b/benchmark/fib.txt new file mode 100644 index 00000000..3bb6d4f1 --- /dev/null +++ b/benchmark/fib.txt @@ -0,0 +1,37 @@ +Value is just Obj* and all values are boxed. +→ time ../build/Release/wren fib.wren +9.22746e+06 + +real 0m12.688s +user 0m12.076s +sys 0m0.601s + +--- + +Unboxed singletons and numbers. Value is a struct of ValueType, double, Obj* obj. +→ time ../build/Release/wren fib.wren +9.22746e+06 + +real 0m3.233s +user 0m3.229s +sys 0m0.003s + +--- + +NaN tagged values. +→ time ../build/Release/wren fib.wren +9.22746e+06 + +real 0m3.100s +user 0m3.097s +sys 0m0.002s + +--- + +Hoist bytecode and IP into locals in interpret loop. +→ time ../build/Release/wren fib.wren +9.22746e+06 + +real 0m2.490s +user 0m2.487s +sys 0m0.002s diff --git a/src/common.h b/src/common.h index 7ba52ae8..0a79af22 100644 --- a/src/common.h +++ b/src/common.h @@ -3,11 +3,13 @@ // Define this to stress test the GC. It will perform a collection before every // allocation. -//#define DEBUG_GC_STRESS +#define DEBUG_GC_STRESS // Define this to log memory operations. //#define TRACE_MEMORY +#define NAN_TAGGING + #ifdef DEBUG #define ASSERT(condition, message) \ diff --git a/src/value.c b/src/value.c index 0c2b8288..be2244d4 100644 --- a/src/value.c +++ b/src/value.c @@ -1,13 +1,34 @@ #include "value.h" +#include // TODO(bob): Testing. + int valueIsFn(Value value) { - return value.type == VAL_OBJ && value.obj->type == OBJ_FN; + return IS_OBJ(value) && AS_OBJ(value)->type == OBJ_FN; } int valueIsString(Value value) { - return value.type == VAL_OBJ && value.obj->type == OBJ_STRING; + return IS_OBJ(value) && AS_OBJ(value)->type == OBJ_STRING; +} + +#ifdef NAN_TAGGING + +int valueIsBool(Value value) +{ + return value.bits == TRUE_VAL.bits || value.bits == FALSE_VAL.bits; +} + +Value objectToValue(Obj* obj) +{ + return (Value)(SIGN_BIT | QNAN | (uint64_t)(obj)); +} + +#else + +int valueIsBool(Value value) +{ + return value.type == VAL_FALSE || value.type == VAL_TRUE; } Value objectToValue(Obj* obj) @@ -17,3 +38,5 @@ Value objectToValue(Obj* obj) value.obj = obj; return value; } + +#endif diff --git a/src/value.h b/src/value.h index a2711b1b..07314fc2 100644 --- a/src/value.h +++ b/src/value.h @@ -1,6 +1,10 @@ #ifndef wren_value_h #define wren_value_h +#include + +#include "common.h" + // TODO(bob): This should be in VM. (Or, really, we shouldn't hardcode this at // all and have growable symbol tables.) #define MAX_SYMBOLS 256 @@ -29,14 +33,23 @@ typedef enum typedef struct sObj { - ObjType type; - ObjFlags flags; + ObjType type : 3; + ObjFlags flags : 1; // The next object in the linked list of all currently allocated objects. struct sObj* next; } Obj; -// TODO(bob): Temp. +#ifdef NAN_TAGGING + +typedef union +{ + double num; + uint64_t bits; +} Value; + +#else + typedef struct { ValueType type; @@ -44,6 +57,8 @@ typedef struct Obj* obj; } Value; +#endif + typedef struct sVM VM; typedef struct sFiber Fiber; @@ -106,33 +121,30 @@ typedef struct char* value; } ObjString; -// Get the class value of [value] (0 or 1), which must be a boolean. -#define AS_CLASS(value) ((ObjClass*)(value).obj) -// Get the bool value of [obj] (0 or 1), which must be a boolean. -#define AS_BOOL(value) ((value).type == VAL_TRUE) +// Value -> ObjClass*. +#define AS_CLASS(value) ((ObjClass*)AS_OBJ(value)) -// Get the function value of [obj] (0 or 1), which must be a function. -#define AS_FN(value) ((ObjFn*)(value).obj) +// Value -> ObjFn*. +#define AS_FN(value) ((ObjFn*)AS_OBJ(value)) -// Get the double value of [obj], which must be a number. -#define AS_INSTANCE(value) ((ObjInstance*)(value).obj) +// Value -> ObjInstance*. +#define AS_INSTANCE(value) ((ObjInstance*)AS_OBJ(value)) -// Get the double value of [value], which must be a number. +// Value -> double. #define AS_NUM(v) ((v).num) -// Get the const char* value of [v], which must be a string. +// Value -> ObjString*. +#define AS_STRING(v) ((ObjString*)AS_OBJ(v)) + +// Value -> const char*. #define AS_CSTRING(v) (AS_STRING(v)->value) -// Get the ObjString* of [v], which must be a string. -#define AS_STRING(v) ((ObjString*)(v).obj) +// Convert [boolean], an int, to a boolean [Value]. +#define BOOL_VAL(boolean) (boolean ? TRUE_VAL : FALSE_VAL) -// Determines if [value] is a garbage-collected object or not. -#define IS_OBJ(value) ((value).type == VAL_OBJ) - -#define IS_NULL(value) ((value).type == VAL_NULL) -#define IS_NUM(value) ((value).type == VAL_NUM) -#define IS_BOOL(value) ((value).type == VAL_FALSE || (value).type == VAL_TRUE) +// Returns non-zero if [value] is a bool. +#define IS_BOOL(value) (valueIsBool(value)) // Returns non-zero if [value] is a function object. #define IS_FN(value) (valueIsFn(value)) @@ -140,20 +152,146 @@ typedef struct // Returns non-zero if [value] is a string object. #define IS_STRING(value) (valueIsString(value)) + +#ifdef NAN_TAGGING + + +// An IEEE 754 double-precision float is a 64-bit value with bits laid out like: +// +// 1 Sign bit +// | 11 Exponent bits +// | | 52 Mantissa (i.e. fraction) bits +// | | | +// S(Exponent--)(Mantissa-----------------------------------------) +// +// The details of how these are used to represent numbers aren't really +// relevant here as long we don't interfere with them. The important bit is NaN. +// +// An IEEE double can represent a few magical values like NaN ("not a number"), +// Infinity, and -Infinity. A NaN is any value where all exponent bits are set: +// +// v--NaN bits +// -111111111111--------------------------------------------------- +// +// Here, "-" means "doesn't matter". Any bit sequence that matches the above is +// a NaN. With all of those "-", it obvious there are a *lot* of different +// bit patterns that all mean the same thing. NaN tagging takes advantage of +// this. We'll use those available bit patterns to represent things other than +// numbers without giving up any valid numeric values. +// +// NaN values come in two flavors: "signalling" and "quiet". The former are +// intended to halt execution, while the latter just flow through arithmetic +// operations silently. We want the latter. Quiet NaNs are indicated by setting +// the highest mantissa bit: +// +// v--Mantissa bit +// -[NaN ]1-------------------------------------------------- +// +// If all of the NaN bits are set, it's not a number. Otherwise, it is. +// That leaves all of the remaining bits as available for us to play with. We +// stuff a few different kinds of things here: special singleton values like +// "true", "false", and "null", and pointers to objects allocated on the heap. +// We'll use the sign bit to distinguish singleton values from pointers. If it's +// set, it's a pointer. +// +// v--Pointer or singleton? +// S[NaN ]1-----0-------------------------------------------- +// +// For singleton values, we just to enumerate the different values. We'll use +// the low three bits of the mantissa for that, and only need a couple: +// +// 3 Type bits--v +// 0[NaN ]1------0----------------------------------------[T] +// +// For pointers, we are left with 48 bits of mantissa to store an address. +// That's more than enough room for a 32-bit address. Even 64-bit machines +// only actually use 48 bits for addresses, so we've got plenty. We just stuff +// the address right into the mantissa. +// +// Ta-da, double precision numbers, pointers, and a bunch of singleton values, +// all stuffed into a single 64-bit sequence. Even better, we don't have to +// do any masking or work to extract number values: they are unmodified. This +// means math on numbers is fast. + +// A mask that selects the sign bit. +#define SIGN_BIT ((uint64_t)1 << 63) + +// The bits that must be set to indicate a quiet NaN. +#define QNAN ((uint64_t)0x7ffc000000000000) + +// If the NaN bits are set, it's not a number. +#define IS_NUM(value) (((value).bits & QNAN) != QNAN) + +// Singleton values are NaN with the sign bit cleared. (This includes the +// normal value of the actual NaN value used in numeric arithmetic.) +#define IS_SINGLETON(value) (((value).bits & (QNAN | SIGN_BIT)) == QNAN) + +// An object pointer is a NaN with a set sign bit. +#define IS_OBJ(value) (((value).bits & (QNAN | SIGN_BIT)) == (QNAN | SIGN_BIT)) + +#define IS_NULL(value) ((value).bits == (QNAN | TAG_NULL)) + +// Masks out the tag bits used to identify the singleton value. +#define MASK_TAG (7) + +// Tag values for the different singleton values. +#define TAG_NAN (0) +#define TAG_NULL (1) +#define TAG_FALSE (2) +#define TAG_TRUE (3) +#define TAG_UNUSED1 (4) +#define TAG_UNUSED2 (5) +#define TAG_UNUSED3 (6) +#define TAG_UNUSED4 (7) + +// double -> Value. +#define NUM_VAL(n) ((Value)(double)(n)) + +// Value -> 0 or 1. +#define AS_BOOL(value) ((value).bits == TRUE_VAL.bits) + +// Value -> Obj*. +#define AS_OBJ(value) ((Obj*)((value).bits & ~(SIGN_BIT | QNAN))) + +// Singleton values. +#define NULL_VAL ((Value)(uint64_t)(QNAN | TAG_NULL)) +#define FALSE_VAL ((Value)(uint64_t)(QNAN | TAG_FALSE)) +#define TRUE_VAL ((Value)(uint64_t)(QNAN | TAG_TRUE)) + +// Gets the singleton type tag for a Value (which must be a singleton). +#define GET_TAG(value) ((int)((value).bits & MASK_TAG)) + +// Converts a pointer to an Obj to a Value. +#define OBJ_VAL(obj) (objectToValue((Obj*)(obj))) + +#else + +// Value -> 0 or 1. +#define AS_BOOL(value) ((value).type == VAL_TRUE) + +// Value -> Obj*. +#define AS_OBJ(v) ((v).obj) + +// Determines if [value] is a garbage-collected object or not. +#define IS_OBJ(value) ((value).type == VAL_OBJ) + +#define IS_NULL(value) ((value).type == VAL_NULL) +#define IS_NUM(value) ((value).type == VAL_NUM) + // Convert [obj], an `Obj*`, to a [Value]. #define OBJ_VAL(obj) (objectToValue((Obj*)(obj))) -// Convert [boolean], an int, to a boolean [Value]. -#define BOOL_VAL(boolean) (boolean ? TRUE_VAL : FALSE_VAL) - -// Convert [n], a raw number, to a [Value]. +// double -> Value. #define NUM_VAL(n) ((Value){ VAL_NUM, n, NULL }) -// TODO(bob): Not C89! +// Singleton values. #define FALSE_VAL ((Value){ VAL_FALSE, 0.0, NULL }) #define NULL_VAL ((Value){ VAL_NULL, 0.0, NULL }) #define TRUE_VAL ((Value){ VAL_TRUE, 0.0, NULL }) +#endif + +int valueIsBool(Value value); int valueIsFn(Value value); int valueIsString(Value value); Value objectToValue(Obj* obj); diff --git a/src/vm.c b/src/vm.c index 14442d92..d62f1b76 100644 --- a/src/vm.c +++ b/src/vm.c @@ -121,7 +121,7 @@ static void markObj(Obj* obj) void markValue(Value value) { if (!IS_OBJ(value)) return; - markObj(value.obj); + markObj(AS_OBJ(value)); } void freeObj(VM* vm, Obj* obj) @@ -574,7 +574,31 @@ void dumpCode(VM* vm, ObjFn* fn) // Returns the class of [object]. static ObjClass* getClass(VM* vm, Value value) -{ +{ // TODO(bob): Unify these. +#ifdef NAN_TAGGING + if (IS_NUM(value)) return vm->numClass; + if (IS_OBJ(value)) + { + Obj* obj = AS_OBJ(value); + switch (obj->type) + { + case OBJ_CLASS: return AS_CLASS(value)->metaclass; + case OBJ_FN: return vm->fnClass; + case OBJ_INSTANCE: return AS_INSTANCE(value)->classObj; + case OBJ_STRING: return vm->stringClass; + } + } + + switch (GET_TAG(value)) + { + case TAG_FALSE: return vm->boolClass; + case TAG_NAN: return vm->numClass; + case TAG_NULL: return vm->nullClass; + case TAG_TRUE: return vm->boolClass; + } + + return NULL; +#else switch (value.type) { case VAL_FALSE: return vm->boolClass; @@ -592,6 +616,7 @@ static ObjClass* getClass(VM* vm, Value value) } } } +#endif } Value interpret(VM* vm, ObjFn* fn) @@ -606,13 +631,18 @@ Value interpret(VM* vm, ObjFn* fn) #define PUSH(value) (fiber->stack[fiber->stackSize++] = value) #define POP() (fiber->stack[--fiber->stackSize]) #define PEEK() (fiber->stack[fiber->stackSize - 1]) - #define READ_ARG() (frame->fn->bytecode[frame->ip++]) + #define READ_ARG() (frame->ip++, bytecode[ip++]) + + // Hoist these into local variables. They are accessed frequently in the loop + // but change less frequently. Keeping them in locals and updating them when + // a call frame has been pushed or pop gives a large speed boost. + CallFrame* frame = &fiber->frames[fiber->numFrames - 1]; + int ip = frame->ip; + unsigned char* bytecode = frame->fn->bytecode; for (;;) { - CallFrame* frame = &fiber->frames[fiber->numFrames - 1]; - - Code instruction = frame->fn->bytecode[frame->ip++]; + Code instruction = bytecode[ip++]; switch (instruction) { case CODE_CONSTANT: @@ -755,19 +785,39 @@ Value interpret(VM* vm, ObjFn* fn) case METHOD_FIBER: { + // Store the IP back into the frame. + frame->ip = ip; + Value* args = &fiber->stack[fiber->stackSize - numArgs]; method->fiberPrimitive(vm, fiber, args); + + // These have changed now, so update them. + frame = &fiber->frames[fiber->numFrames - 1]; + ip = frame->ip; + bytecode = frame->fn->bytecode; break; } case METHOD_BLOCK: + // Store the IP back into the frame. + frame->ip = ip; + callFunction(fiber, method->fn, numArgs); + + // These have changed now, so update them. + frame = &fiber->frames[fiber->numFrames - 1]; + ip = frame->ip; + bytecode = frame->fn->bytecode; break; } break; } - case CODE_JUMP: frame->ip += READ_ARG(); break; + case CODE_JUMP:{ + int offset = READ_ARG(); + ip+= offset; + break; + } case CODE_JUMP_IF: { @@ -777,7 +827,7 @@ Value interpret(VM* vm, ObjFn* fn) // False is the only falsey value. if (!AS_BOOL(condition)) { - frame->ip += offset; + ip += offset; } break; } @@ -808,6 +858,11 @@ Value interpret(VM* vm, ObjFn* fn) // Discard the stack slots for the call frame (leaving one slot for the // result). fiber->stackSize = frame->stackStart + 1; + + // These have changed now, so update them. + frame = &fiber->frames[fiber->numFrames - 1]; + ip = frame->ip; + bytecode = frame->fn->bytecode; break; } } @@ -826,6 +881,34 @@ void callFunction(Fiber* fiber, ObjFn* fn, int numArgs) void printValue(Value value) { + // TODO(bob): Unify these. +#ifdef NAN_TAGGING + if (IS_NUM(value)) + { + printf("%g", AS_NUM(value)); + } + else if (IS_OBJ(value)) + { + Obj* obj = AS_OBJ(value); + switch (obj->type) + { + case OBJ_CLASS: printf("[class %p]", obj); break; + case OBJ_FN: printf("[fn %p]", obj); break; + case OBJ_INSTANCE: printf("[instance %p]", obj); break; + case OBJ_STRING: printf("%s", AS_CSTRING(value)); break; + } + } + else + { + switch (GET_TAG(value)) + { + case TAG_FALSE: printf("false"); break; + case TAG_NAN: printf("NaN"); break; + case TAG_NULL: printf("null"); break; + case TAG_TRUE: printf("true"); break; + } + } +#else switch (value.type) { case VAL_FALSE: printf("false"); break; @@ -841,6 +924,7 @@ void printValue(Value value) case OBJ_STRING: printf("%s", AS_CSTRING(value)); break; } } +#endif } void pinObj(VM* vm, Obj* obj) diff --git a/wren.xcodeproj/project.pbxproj b/wren.xcodeproj/project.pbxproj index 7b4cd4f7..66a3db41 100644 --- a/wren.xcodeproj/project.pbxproj +++ b/wren.xcodeproj/project.pbxproj @@ -151,6 +151,7 @@ isa = XCBuildConfiguration; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD)"; CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_OBJC_ARC = YES; @@ -163,7 +164,7 @@ CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; COPY_PHASE_STRIP = NO; - GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_C_LANGUAGE_STANDARD = c99; GCC_DYNAMIC_NO_PIC = NO; GCC_ENABLE_OBJC_EXCEPTIONS = YES; GCC_OPTIMIZATION_LEVEL = 0; @@ -172,8 +173,10 @@ "$(inherited)", ); GCC_SYMBOLS_PRIVATE_EXTERN = NO; + GCC_TREAT_WARNINGS_AS_ERRORS = YES; GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_PEDANTIC = YES; GCC_WARN_UNDECLARED_SELECTOR = YES; GCC_WARN_UNINITIALIZED_AUTOS = YES; GCC_WARN_UNUSED_FUNCTION = YES; @@ -188,6 +191,7 @@ isa = XCBuildConfiguration; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD)"; CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_OBJC_ARC = YES; @@ -202,10 +206,12 @@ COPY_PHASE_STRIP = YES; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; ENABLE_NS_ASSERTIONS = NO; - GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_C_LANGUAGE_STANDARD = c99; GCC_ENABLE_OBJC_EXCEPTIONS = YES; + GCC_TREAT_WARNINGS_AS_ERRORS = YES; GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_PEDANTIC = YES; GCC_WARN_UNDECLARED_SELECTOR = YES; GCC_WARN_UNINITIALIZED_AUTOS = YES; GCC_WARN_UNUSED_FUNCTION = YES;