#!/usr/bin/python import math import os import re import subprocess import sys BENCHMARKS = [] def BENCHMARK(name, pattern): BENCHMARKS.append((name, re.compile(pattern, re.MULTILINE))) BENCHMARK("binary_trees", """stretch tree of depth 15\t check: -1 32768\t trees of depth 4\t check: -32768 8192\t trees of depth 6\t check: -8192 2048\t trees of depth 8\t check: -2048 512\t trees of depth 10\t check: -512 128\t trees of depth 12\t check: -128 32\t trees of depth 14\t check: -32 long lived tree of depth 14\t check: -1 elapsed: (\\d+\\.\\d+)""") BENCHMARK("fib", r"""832040 832040 832040 832040 832040 elapsed: (\d+\.\d+)""") LANGUAGES = [ ("wren", "../build/Release/wren", ".wren"), ("lua", "lua", ".lua"), ("python", "python", ".py"), ("ruby", "ruby", ".rb"), ("js", "node", ".js") ] # How many times to run a given benchmark. Should be an odd number to get the # right median. NUM_TRIALS = 7 results = [] def calc_stats(nums): """Calculates the mean, median, and std deviation of a list of numbers.""" mean = sum(nums) / len(nums) nums.sort() median = nums[(len(nums) - 1) / 2] diffs = ((n - mean) * (n - mean) for n in nums) std_dev = math.sqrt(sum(diffs) / len(nums)) return [mean, median, std_dev] def run_benchmark_once(benchmark, language): args = [language[1], benchmark[0] + language[2]] out = subprocess.check_output(args, universal_newlines=True) match = benchmark[1].match(out) if match: return float(match.group(1)) else: print "Incorrect output:" print out return None def run_benchmark(benchmark, language): print "{0} - {1:10s}".format(benchmark[0], language[0]), if not os.path.exists(benchmark[0] + language[2]): print "No implementation for this language" return times = [] for i in range(0, NUM_TRIALS): time = run_benchmark_once(benchmark, language) if not time: return times.append(time) sys.stdout.write(".") times.sort() stats = calc_stats(times) print " mean: {0:.4f} median: {1:.4f} std_dev: {2:.4f}".format( stats[0], stats[1], stats[2]) results.append([benchmark[0] + " - " + language[0], times]) def graph_results(): print # Scale everything by the highest time. highest = 0 for result in results: time = max(result[1]) if time > highest: highest = time print "{0:24s} 0.0 {1:76.4f}".format("", highest) for result in results: line = ["-"] * 80 for time in result[1]: line[int(time / highest * 79)] = "O" print "{0:24s} {1}".format(result[0], "".join(line)) for benchmark in BENCHMARKS: for language in LANGUAGES: run_benchmark(benchmark, language) graph_results()