#!/usr/bin/python import argparse import math import os import re import subprocess import sys BENCHMARKS = [] def BENCHMARK(name, pattern): regex = re.compile(pattern + "\n" + r"elapsed: (\d+\.\d+)", re.MULTILINE) BENCHMARKS.append([name, regex, None]) BENCHMARK("binary_trees", """stretch tree of depth 15\t check: -1 32768\t trees of depth 4\t check: -32768 8192\t trees of depth 6\t check: -8192 2048\t trees of depth 8\t check: -2048 512\t trees of depth 10\t check: -512 128\t trees of depth 12\t check: -128 32\t trees of depth 14\t check: -32 long lived tree of depth 14\t check: -1""") BENCHMARK("fib", r"""832040 832040 832040 832040 832040""") BENCHMARK("method_call", r"""true false""") LANGUAGES = [ ("wren", "../build/Release/wren", ".wren"), ("js", "node", ".js"), ("lua", "lua", ".lua"), ("python", "python", ".py"), ("ruby", "ruby", ".rb") ] # How many times to run a given benchmark. Should be an odd number to get the # right median. NUM_TRIALS = 7 results = [] def green(text): if sys.platform == 'win32': return text return '\033[32m' + text + '\033[0m' def red(text): if sys.platform == 'win32': return text return '\033[31m' + text + '\033[0m' def yellow(text): if sys.platform == 'win32': return text return '\033[33m' + text + '\033[0m' def calc_stats(nums): """Calculates the mean, median, and std deviation of a list of numbers.""" mean = sum(nums) / len(nums) nums.sort() median = nums[(len(nums) - 1) / 2] diffs = ((n - mean) * (n - mean) for n in nums) std_dev = math.sqrt(sum(diffs) / len(nums)) return [mean, median, std_dev] def run_benchmark_once(benchmark, language): args = [language[1], benchmark[0] + language[2]] out = subprocess.check_output(args, universal_newlines=True) match = benchmark[1].match(out) if match: return float(match.group(1)) else: print "Incorrect output:" print out return None def run_benchmark_language(benchmark, language): name = "{0} - {1}".format(benchmark[0], language[0]) print "{0:22s}".format(name), if not os.path.exists(benchmark[0] + language[2]): print "No implementation for this language" return times = [] for i in range(0, NUM_TRIALS): time = run_benchmark_once(benchmark, language) if not time: return times.append(time) sys.stdout.write(".") times.sort() stats = calc_stats(times) comparison = "" if language[0] == "wren": if benchmark[2] != None: ratio = 100 * stats[1] / benchmark[2] comparison = "{0:.2f}% of baseline".format(ratio) if ratio > 105: comparison = red(comparison) if ratio < 95: comparison = green(comparison) else: comparison = "no baseline" else: # Hack: assumes wren is first language. wren_time = results[0][2] ratio = stats[1] / wren_time comparison = "{0:.2f}x wren".format(ratio) if ratio < 1: comparison = red(comparison) if ratio > 1: comparison = green(comparison) print " mean: {0:.2f} median: {1:.2f} std_dev: {2:.2f} {3:s}".format( stats[0], stats[1], stats[2], comparison) results.append([name, times, stats[1]]) return stats def run_benchmark(benchmark, languages): for language in LANGUAGES: if not languages or language[0] in languages: run_benchmark_language(benchmark, language) graph_results() del results[0:len(results)] def graph_results(): print INCREMENT = { '-': 'o', 'o': 'O', 'O': '0', '0': '0' } # Scale everything by the highest time. highest = 0 for result in results: time = max(result[1]) if time > highest: highest = time print "{0:22s}0.0 {1:64.4f}".format("", highest) for result in results: line = ["-"] * 68 for time in result[1]: index = int(time / highest * 67) line[index] = INCREMENT[line[index]] print "{0:22s}{1}".format(result[0], "".join(line)) print def read_baseline(): if os.path.exists("baseline.txt"): with open("baseline.txt") as f: for line in f.readlines(): name, mean, median = line.split(",") for benchmark in BENCHMARKS: if benchmark[0] == name: benchmark[2] = float(median) def generate_baseline(): print "generating baseline" baseline_text = "" for benchmark in BENCHMARKS: stats = run_benchmark_language(benchmark, LANGUAGES[0]) baseline_text += ("{},{},{}\n".format( benchmark[0], stats[0], stats[1])) # Write them to a file. with open("baseline.txt", 'w') as out: out.write(baseline_text) def main(): parser = argparse.ArgumentParser(description="Run the benchmarks") parser.add_argument("benchmark", nargs='?', default="all", help="The benchmark to run") parser.add_argument("--generate-baseline", action="store_true", help="Generate a baseline file") parser.add_argument("-l", "--language", action="append", help="Which language(s) to run benchmarks for") args = parser.parse_args() if args.generate_baseline: generate_baseline() return read_baseline() if args.benchmark == "all": for benchmark in BENCHMARKS: run_benchmark(benchmark, args.language) return # Run the given benchmark. for benchmark in BENCHMARKS: if benchmark[0] == args.benchmark: run_benchmark(benchmark, args.language) main()