forked from Mirror/wren
219 lines
5.3 KiB
Python
Executable File
219 lines
5.3 KiB
Python
Executable File
#!/usr/bin/python
|
|
|
|
import argparse
|
|
import math
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
|
|
# How many times to run a given benchmark.
|
|
NUM_TRIALS = 10
|
|
|
|
BENCHMARKS = []
|
|
|
|
def BENCHMARK(name, pattern):
|
|
regex = re.compile(pattern + "\n" + r"elapsed: (\d+\.\d+)", re.MULTILINE)
|
|
BENCHMARKS.append([name, regex, None])
|
|
|
|
BENCHMARK("binary_trees", """stretch tree of depth 13 check: -1
|
|
8192 trees of depth 4 check: -8192
|
|
2048 trees of depth 6 check: -2048
|
|
512 trees of depth 8 check: -512
|
|
128 trees of depth 10 check: -128
|
|
32 trees of depth 12 check: -32
|
|
long lived tree of depth 12 check: -1""")
|
|
|
|
BENCHMARK("fib", r"""317811
|
|
317811
|
|
317811
|
|
317811
|
|
317811""")
|
|
|
|
BENCHMARK("method_call", r"""true
|
|
false""")
|
|
|
|
LANGUAGES = [
|
|
("wren", "../build/Release/wren", ".wren"),
|
|
("lua", "lua", ".lua"),
|
|
("python", "python", ".py"),
|
|
("ruby", "ruby", ".rb")
|
|
]
|
|
|
|
results = []
|
|
|
|
def green(text):
|
|
if sys.platform == 'win32':
|
|
return text
|
|
return '\033[32m' + text + '\033[0m'
|
|
|
|
def red(text):
|
|
if sys.platform == 'win32':
|
|
return text
|
|
return '\033[31m' + text + '\033[0m'
|
|
|
|
def yellow(text):
|
|
if sys.platform == 'win32':
|
|
return text
|
|
return '\033[33m' + text + '\033[0m'
|
|
|
|
|
|
def get_score(time):
|
|
"""
|
|
Converts time into a "score". This is the inverse of the time with an
|
|
arbitrary scale applied to get the number in a nice range. The goal here is
|
|
to have benchmark results where faster = bigger number.
|
|
"""
|
|
return 1000.0 / time
|
|
|
|
|
|
def run_trial(benchmark, language):
|
|
"""Runs one benchmark one time for one language."""
|
|
args = [language[1], benchmark[0] + language[2]]
|
|
out = subprocess.check_output(args, universal_newlines=True)
|
|
match = benchmark[1].match(out)
|
|
if match:
|
|
return float(match.group(1))
|
|
else:
|
|
print "Incorrect output:"
|
|
print out
|
|
return None
|
|
|
|
|
|
def run_benchmark_language(benchmark, language):
|
|
"""Runs one benchmark for a number of trials for one language."""
|
|
name = "{0} - {1}".format(benchmark[0], language[0])
|
|
print "{0:22s}".format(name),
|
|
|
|
if not os.path.exists(benchmark[0] + language[2]):
|
|
print "No implementation for this language"
|
|
return
|
|
|
|
times = []
|
|
for i in range(0, NUM_TRIALS):
|
|
time = run_trial(benchmark, language)
|
|
if not time:
|
|
return
|
|
times.append(time)
|
|
sys.stdout.write(".")
|
|
|
|
best = min(times)
|
|
score = get_score(best)
|
|
|
|
comparison = ""
|
|
if language[0] == "wren":
|
|
if benchmark[2] != None:
|
|
ratio = 100 * score / benchmark[2]
|
|
comparison = "{:6.2f}% relative to baseline".format(ratio)
|
|
if ratio > 105:
|
|
comparison = green(comparison)
|
|
if ratio < 95:
|
|
comparison = red(comparison)
|
|
else:
|
|
comparison = "no baseline"
|
|
else:
|
|
# Hack: assumes wren is first language.
|
|
wren_score = results[0][2]
|
|
ratio = 100.0 * wren_score / score
|
|
comparison = "{:6.2f}%".format(ratio)
|
|
if ratio > 105:
|
|
comparison = green(comparison)
|
|
if ratio < 95:
|
|
comparison = red(comparison)
|
|
|
|
print " {:4.0f} {:4.2f}s {:s}".format(score, best, comparison)
|
|
|
|
results.append([name, times, score])
|
|
return score
|
|
|
|
|
|
def run_benchmark(benchmark, languages):
|
|
"""Runs one benchmark for the given languages (or all of them)."""
|
|
for language in LANGUAGES:
|
|
if not languages or language[0] in languages:
|
|
run_benchmark_language(benchmark, language)
|
|
graph_results()
|
|
del results[0:len(results)]
|
|
|
|
|
|
def graph_results():
|
|
print
|
|
|
|
INCREMENT = {
|
|
'-': 'o',
|
|
'o': 'O',
|
|
'O': '0',
|
|
'0': '0'
|
|
}
|
|
|
|
# Scale everything by the highest score.
|
|
highest = 0
|
|
for result in results:
|
|
score = get_score(min(result[1]))
|
|
if score > highest: highest = score
|
|
|
|
print "{0:22s}0 {1:66.0f}".format("", highest)
|
|
for result in results:
|
|
line = ["-"] * 68
|
|
for time in result[1]:
|
|
index = int(get_score(time) / highest * 67)
|
|
line[index] = INCREMENT[line[index]]
|
|
print "{0:22s}{1}".format(result[0], "".join(line))
|
|
print
|
|
|
|
|
|
def read_baseline():
|
|
if os.path.exists("baseline.txt"):
|
|
with open("baseline.txt") as f:
|
|
for line in f.readlines():
|
|
name, best = line.split(",")
|
|
for benchmark in BENCHMARKS:
|
|
if benchmark[0] == name:
|
|
benchmark[2] = float(best)
|
|
|
|
|
|
def generate_baseline():
|
|
print "generating baseline"
|
|
baseline_text = ""
|
|
for benchmark in BENCHMARKS:
|
|
best = run_benchmark_language(benchmark, LANGUAGES[0])
|
|
baseline_text += ("{},{}\n".format(benchmark[0], best))
|
|
|
|
# Write them to a file.
|
|
with open("baseline.txt", 'w') as out:
|
|
out.write(baseline_text)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Run the benchmarks")
|
|
parser.add_argument("benchmark", nargs='?',
|
|
default="all",
|
|
help="The benchmark to run")
|
|
parser.add_argument("--generate-baseline",
|
|
action="store_true",
|
|
help="Generate a baseline file")
|
|
parser.add_argument("-l", "--language",
|
|
action="append",
|
|
help="Which language(s) to run benchmarks for")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.generate_baseline:
|
|
generate_baseline()
|
|
return
|
|
|
|
read_baseline()
|
|
|
|
# Run all benchmarks.
|
|
if args.benchmark == "all":
|
|
for benchmark in BENCHMARKS:
|
|
run_benchmark(benchmark, args.language)
|
|
return
|
|
|
|
# Run the given benchmark.
|
|
for benchmark in BENCHMARKS:
|
|
if benchmark[0] == args.benchmark:
|
|
run_benchmark(benchmark, args.language)
|
|
|
|
main()
|