From 2a1499b04bc65853fdf71db2126e716168d4c5ae Mon Sep 17 00:00:00 2001 From: Robert Nystrom Date: Sat, 27 Jul 2019 13:34:07 -0700 Subject: [PATCH] Fix horrendously bad bit hashing function. hashBits() is used to generate a hash code from the same 64 bits used to represent a Wren number as a double. When building a map containing a large number of integer keys, it's important for this to do a good job scattering the bits across the 32-bit key space. Alas, it does not. Worse, the benchmark to test this happens to stop just before the performance falls off a cliff, so this was easy to overlook. This replaces it with the hash function V8 uses, which has much better performance across the numeric range. --- src/vm/wren_value.c | 34 +++++++++++----------------- test/benchmark/map_numeric.dart.skip | 6 ++--- test/benchmark/map_numeric.lua | 6 ++--- test/benchmark/map_numeric.py | 6 ++--- test/benchmark/map_numeric.rb | 6 ++--- test/benchmark/map_numeric.wren | 6 ++--- util/benchmark.py | 2 +- 7 files changed, 29 insertions(+), 37 deletions(-) diff --git a/src/vm/wren_value.c b/src/vm/wren_value.c index 3ce4366f..ec82bf3c 100644 --- a/src/vm/wren_value.c +++ b/src/vm/wren_value.c @@ -356,24 +356,18 @@ ObjMap* wrenNewMap(WrenVM* vm) return map; } -static inline uint32_t hashBits(DoubleBits bits) +static inline uint32_t hashBits(uint64_t hash) { - uint32_t result = bits.bits32[0] ^ bits.bits32[1]; - - // Slosh the bits around some. Due to the way doubles are represented, small - // integers will have most of low bits of the double respresentation set to - // zero. For example, the above result for 5 is 43d00600. - // - // We map that to an entry index by masking off the high bits which means - // most small integers would all end up in entry zero. That's bad. To avoid - // that, push a bunch of the high bits lower down so they affect the lower - // bits too. - // - // The specific mixing function here was pulled from Java's HashMap - // implementation. - result ^= (result >> 20) ^ (result >> 12); - result ^= (result >> 7) ^ (result >> 4); - return result; + // From v8's ComputeLongHash() which in turn cites: + // Thomas Wang, Integer Hash Functions. + // http://www.concentric.net/~Ttwang/tech/inthash.htm + hash = ~hash + (hash << 18); // hash = (hash << 18) - hash - 1; + hash = hash ^ (hash >> 31); + hash = hash * 21; // hash = (hash + (hash << 2)) + (hash << 4); + hash = hash ^ (hash >> 11); + hash = hash + (hash << 6); + hash = hash ^ (hash >> 22); + return (uint32_t)(hash & 0x3fffffff); } // Generates a hash code for [num]. @@ -382,7 +376,7 @@ static inline uint32_t hashNumber(double num) // Hash the raw bits of the value. DoubleBits bits; bits.num = num; - return hashBits(bits); + return hashBits(bits.bits64); } // Generates a hash code for [object]. @@ -429,9 +423,7 @@ static uint32_t hashValue(Value value) if (IS_OBJ(value)) return hashObject(AS_OBJ(value)); // Hash the raw bits of the unboxed value. - DoubleBits bits; - bits.bits64 = value; - return hashBits(bits); + return hashBits(value); #else switch (value.type) { diff --git a/test/benchmark/map_numeric.dart.skip b/test/benchmark/map_numeric.dart.skip index da1dfab0..8e4bd454 100644 --- a/test/benchmark/map_numeric.dart.skip +++ b/test/benchmark/map_numeric.dart.skip @@ -6,17 +6,17 @@ main() { var map = {}; - for (var i = 1; i <= 1000000; i++) { + for (var i = 1; i <= 2000000; i++) { map[i] = i; } var sum = 0; - for (var i = 1; i <= 1000000; i++) { + for (var i = 1; i <= 2000000; i++) { sum += map[i]; } print(sum); - for (var i = 1; i <= 1000000; i++) { + for (var i = 1; i <= 2000000; i++) { map.remove(i); } diff --git a/test/benchmark/map_numeric.lua b/test/benchmark/map_numeric.lua index e6d43d07..a7b7f2d7 100644 --- a/test/benchmark/map_numeric.lua +++ b/test/benchmark/map_numeric.lua @@ -2,17 +2,17 @@ local start = os.clock() local map = {} -for i = 1, 1000000 do +for i = 1, 2000000 do map[i] = i end local sum = 0 -for i = 1, 1000000 do +for i = 1, 2000000 do sum = sum + map[i] end io.write(string.format("%d\n", sum)) -for i = 1, 1000000 do +for i = 1, 2000000 do map[i] = nil end diff --git a/test/benchmark/map_numeric.py b/test/benchmark/map_numeric.py index 8b90ec14..b91c413a 100644 --- a/test/benchmark/map_numeric.py +++ b/test/benchmark/map_numeric.py @@ -6,15 +6,15 @@ start = time.clock() map = {} -for i in range(1, 1000001): +for i in range(1, 2000001): map[i] = i sum = 0 -for i in range(1, 1000001): +for i in range(1, 2000001): sum = sum + map[i] print(sum) -for i in range(1, 1000001): +for i in range(1, 2000001): del map[i] print("elapsed: " + str(time.clock() - start)) \ No newline at end of file diff --git a/test/benchmark/map_numeric.rb b/test/benchmark/map_numeric.rb index eaf25631..6c2dba27 100644 --- a/test/benchmark/map_numeric.rb +++ b/test/benchmark/map_numeric.rb @@ -2,17 +2,17 @@ start = Time.now map = Hash.new -for i in (1..1000000) +for i in (1..2000000) map[i] = i end sum = 0 -for i in (1..1000000) +for i in (1..2000000) sum = sum + map[i] end puts sum -for i in (1..1000000) +for i in (1..2000000) map.delete(i) end diff --git a/test/benchmark/map_numeric.wren b/test/benchmark/map_numeric.wren index 252bfa0b..dd632301 100644 --- a/test/benchmark/map_numeric.wren +++ b/test/benchmark/map_numeric.wren @@ -2,17 +2,17 @@ var start = System.clock var map = {} -for (i in 1..1000000) { +for (i in 1..2000000) { map[i] = i } var sum = 0 -for (i in 1..1000000) { +for (i in 1..2000000) { sum = sum + map[i] } System.print(sum) -for (i in 1..1000000) { +for (i in 1..2000000) { map.remove(i) } diff --git a/util/benchmark.py b/util/benchmark.py index f21e1979..5cfa323c 100755 --- a/util/benchmark.py +++ b/util/benchmark.py @@ -86,7 +86,7 @@ BENCHMARK("for", r"""499999500000""") BENCHMARK("method_call", r"""true false""") -BENCHMARK("map_numeric", r"""500000500000""") +BENCHMARK("map_numeric", r"""2000001000000""") BENCHMARK("map_string", r"""12799920000""")