Files
wren/benchmark/run_bench
2014-01-23 23:29:50 -08:00

235 lines
5.7 KiB
Python
Executable File

#!/usr/bin/python
import argparse
import math
import os
import os.path
import re
import subprocess
import sys
# Runs the tests.
WREN_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
BENCHMARK_DIR = os.path.join(WREN_DIR, 'benchmark')
# How many times to run a given benchmark.
NUM_TRIALS = 10
BENCHMARKS = []
def BENCHMARK(name, pattern):
regex = re.compile(pattern + "\n" + r"elapsed: (\d+\.\d+)", re.MULTILINE)
BENCHMARKS.append([name, regex, None])
BENCHMARK("binary_trees", """stretch tree of depth 13 check: -1
8192 trees of depth 4 check: -8192
2048 trees of depth 6 check: -2048
512 trees of depth 8 check: -512
128 trees of depth 10 check: -128
32 trees of depth 12 check: -32
long lived tree of depth 12 check: -1""")
BENCHMARK("fib", r"""317811
317811
317811
317811
317811""")
BENCHMARK("for", r"""499999500000""")
BENCHMARK("method_call", r"""true
false""")
LANGUAGES = [
("wren", ["../wren"], ".wren"),
("lua", ["lua"], ".lua"),
("luajit (-joff)", ["luajit", "-joff"], ".lua"),
("python", ["python"], ".py"),
("ruby", ["ruby"], ".rb")
]
results = []
def green(text):
if sys.platform == 'win32':
return text
return '\033[32m' + text + '\033[0m'
def red(text):
if sys.platform == 'win32':
return text
return '\033[31m' + text + '\033[0m'
def yellow(text):
if sys.platform == 'win32':
return text
return '\033[33m' + text + '\033[0m'
def get_score(time):
"""
Converts time into a "score". This is the inverse of the time with an
arbitrary scale applied to get the number in a nice range. The goal here is
to have benchmark results where faster = bigger number.
"""
return 1000.0 / time
def run_trial(benchmark, language):
"""Runs one benchmark one time for one language."""
args = []
args.extend(language[1])
args.append(os.path.join(BENCHMARK_DIR, benchmark[0] + language[2]))
out = subprocess.check_output(args, universal_newlines=True)
match = benchmark[1].match(out)
if match:
return float(match.group(1))
else:
print "Incorrect output:"
print out
return None
def run_benchmark_language(benchmark, language):
"""Runs one benchmark for a number of trials for one language."""
name = "{0} - {1}".format(benchmark[0], language[0])
print "{0:30s}".format(name),
if not os.path.exists(os.path.join(
BENCHMARK_DIR, benchmark[0] + language[2])):
print "No implementation for this language"
return
times = []
for i in range(0, NUM_TRIALS):
time = run_trial(benchmark, language)
if not time:
return
times.append(time)
sys.stdout.write(".")
best = min(times)
score = get_score(best)
comparison = ""
if language[0] == "wren":
if benchmark[2] != None:
ratio = 100 * score / benchmark[2]
comparison = "{:6.2f}% relative to baseline".format(ratio)
if ratio > 105:
comparison = green(comparison)
if ratio < 95:
comparison = red(comparison)
else:
comparison = "no baseline"
else:
# Hack: assumes wren is first language.
wren_score = results[0][2]
ratio = 100.0 * wren_score / score
comparison = "{:6.2f}%".format(ratio)
if ratio > 105:
comparison = green(comparison)
if ratio < 95:
comparison = red(comparison)
print " {:5.0f} {:4.2f}s {:s}".format(score, best, comparison)
results.append([name, times, score])
return score
def run_benchmark(benchmark, languages):
"""Runs one benchmark for the given languages (or all of them)."""
num_languages = 0
for language in LANGUAGES:
if not languages or language[0] in languages:
num_languages += 1
run_benchmark_language(benchmark, language)
if num_languages > 1:
graph_results()
del results[0:len(results)]
def graph_results():
print
INCREMENT = {
'-': 'o',
'o': 'O',
'O': '0',
'0': '0'
}
# Scale everything by the highest score.
highest = 0
for result in results:
score = get_score(min(result[1]))
if score > highest: highest = score
print "{0:30s}0 {1:66.0f}".format("", highest)
for result in results:
line = ["-"] * 68
for time in result[1]:
index = int(get_score(time) / highest * 67)
line[index] = INCREMENT[line[index]]
print "{0:30s}{1}".format(result[0], "".join(line))
print
def read_baseline():
if os.path.exists("baseline.txt"):
with open("baseline.txt") as f:
for line in f.readlines():
name, best = line.split(",")
for benchmark in BENCHMARKS:
if benchmark[0] == name:
benchmark[2] = float(best)
def generate_baseline():
print "generating baseline"
baseline_text = ""
for benchmark in BENCHMARKS:
best = run_benchmark_language(benchmark, LANGUAGES[0])
baseline_text += ("{},{}\n".format(benchmark[0], best))
# Write them to a file.
with open("baseline.txt", 'w') as out:
out.write(baseline_text)
def main():
parser = argparse.ArgumentParser(description="Run the benchmarks")
parser.add_argument("benchmark", nargs='?',
default="all",
help="The benchmark to run")
parser.add_argument("--generate-baseline",
action="store_true",
help="Generate a baseline file")
parser.add_argument("-l", "--language",
action="append",
help="Which language(s) to run benchmarks for")
args = parser.parse_args()
if args.generate_baseline:
generate_baseline()
return
read_baseline()
# Run all benchmarks.
if args.benchmark == "all":
for benchmark in BENCHMARKS:
run_benchmark(benchmark, args.language)
return
# Run the given benchmark.
for benchmark in BENCHMARKS:
if benchmark[0] == args.benchmark:
run_benchmark(benchmark, args.language)
main()