#!/usr/bin/python import argparse import math import os import re import subprocess import sys # How many times to run a given benchmark. NUM_TRIALS = 10 BENCHMARKS = [] def BENCHMARK(name, pattern): regex = re.compile(pattern + "\n" + r"elapsed: (\d+\.\d+)", re.MULTILINE) BENCHMARKS.append([name, regex, None]) BENCHMARK("binary_trees", """stretch tree of depth 13 check: -1 8192 trees of depth 4 check: -8192 2048 trees of depth 6 check: -2048 512 trees of depth 8 check: -512 128 trees of depth 10 check: -128 32 trees of depth 12 check: -32 long lived tree of depth 12 check: -1""") BENCHMARK("fib", r"""317811 317811 317811 317811 317811""") BENCHMARK("for", r"""499999500000""") BENCHMARK("method_call", r"""true false""") LANGUAGES = [ ("wren", ["../build/Release/wren"], ".wren"), ("lua", ["lua"], ".lua"), # ("luajit", ["luajit"], ".lua"), ("luajit (-joff)", ["luajit", "-joff"], ".lua"), ("python", ["python"], ".py"), ("ruby", ["ruby"], ".rb") ] results = [] def green(text): if sys.platform == 'win32': return text return '\033[32m' + text + '\033[0m' def red(text): if sys.platform == 'win32': return text return '\033[31m' + text + '\033[0m' def yellow(text): if sys.platform == 'win32': return text return '\033[33m' + text + '\033[0m' def get_score(time): """ Converts time into a "score". This is the inverse of the time with an arbitrary scale applied to get the number in a nice range. The goal here is to have benchmark results where faster = bigger number. """ return 1000.0 / time def run_trial(benchmark, language): """Runs one benchmark one time for one language.""" args = [] args.extend(language[1]) args.append(benchmark[0] + language[2]) out = subprocess.check_output(args, universal_newlines=True) match = benchmark[1].match(out) if match: return float(match.group(1)) else: print "Incorrect output:" print out return None def run_benchmark_language(benchmark, language): """Runs one benchmark for a number of trials for one language.""" name = "{0} - {1}".format(benchmark[0], language[0]) print "{0:30s}".format(name), if not os.path.exists(benchmark[0] + language[2]): print "No implementation for this language" return times = [] for i in range(0, NUM_TRIALS): time = run_trial(benchmark, language) if not time: return times.append(time) sys.stdout.write(".") best = min(times) score = get_score(best) comparison = "" if language[0] == "wren": if benchmark[2] != None: ratio = 100 * score / benchmark[2] comparison = "{:6.2f}% relative to baseline".format(ratio) if ratio > 105: comparison = green(comparison) if ratio < 95: comparison = red(comparison) else: comparison = "no baseline" else: # Hack: assumes wren is first language. wren_score = results[0][2] ratio = 100.0 * wren_score / score comparison = "{:6.2f}%".format(ratio) if ratio > 105: comparison = green(comparison) if ratio < 95: comparison = red(comparison) print " {:5.0f} {:4.2f}s {:s}".format(score, best, comparison) results.append([name, times, score]) return score def run_benchmark(benchmark, languages): """Runs one benchmark for the given languages (or all of them).""" num_languages = 0 for language in LANGUAGES: if not languages or language[0] in languages: num_languages += 1 run_benchmark_language(benchmark, language) if num_languages > 1: graph_results() del results[0:len(results)] def graph_results(): print INCREMENT = { '-': 'o', 'o': 'O', 'O': '0', '0': '0' } # Scale everything by the highest score. highest = 0 for result in results: score = get_score(min(result[1])) if score > highest: highest = score print "{0:30s}0 {1:66.0f}".format("", highest) for result in results: line = ["-"] * 68 for time in result[1]: index = int(get_score(time) / highest * 67) line[index] = INCREMENT[line[index]] print "{0:30s}{1}".format(result[0], "".join(line)) print def read_baseline(): if os.path.exists("baseline.txt"): with open("baseline.txt") as f: for line in f.readlines(): name, best = line.split(",") for benchmark in BENCHMARKS: if benchmark[0] == name: benchmark[2] = float(best) def generate_baseline(): print "generating baseline" baseline_text = "" for benchmark in BENCHMARKS: best = run_benchmark_language(benchmark, LANGUAGES[0]) baseline_text += ("{},{}\n".format(benchmark[0], best)) # Write them to a file. with open("baseline.txt", 'w') as out: out.write(baseline_text) def main(): parser = argparse.ArgumentParser(description="Run the benchmarks") parser.add_argument("benchmark", nargs='?', default="all", help="The benchmark to run") parser.add_argument("--generate-baseline", action="store_true", help="Generate a baseline file") parser.add_argument("-l", "--language", action="append", help="Which language(s) to run benchmarks for") args = parser.parse_args() if args.generate_baseline: generate_baseline() return read_baseline() # Run all benchmarks. if args.benchmark == "all": for benchmark in BENCHMARKS: run_benchmark(benchmark, args.language) return # Run the given benchmark. for benchmark in BENCHMARKS: if benchmark[0] == args.benchmark: run_benchmark(benchmark, args.language) main()