mirror of
https://github.com/wren-lang/wren.git
synced 2026-01-18 13:49:59 +01:00
Make benchmarks score based instead of time based.
This commit is contained in:
@ -7,9 +7,8 @@ import re
|
|||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
# How many times to run a given benchmark. Should be an odd number to get the
|
# How many times to run a given benchmark.
|
||||||
# right median.
|
NUM_TRIALS = 10
|
||||||
NUM_TRIALS = 7
|
|
||||||
|
|
||||||
BENCHMARKS = []
|
BENCHMARKS = []
|
||||||
|
|
||||||
@ -59,14 +58,13 @@ def yellow(text):
|
|||||||
return '\033[33m' + text + '\033[0m'
|
return '\033[33m' + text + '\033[0m'
|
||||||
|
|
||||||
|
|
||||||
def calc_stats(nums):
|
def get_score(time):
|
||||||
"""Calculates the best, mean, and median of a list of numbers."""
|
"""
|
||||||
mean = sum(nums) / len(nums)
|
Converts time into a "score". This is the inverse of the time with an
|
||||||
nums.sort()
|
arbitrary scale applied to get the number in a nice range. The goal here is
|
||||||
median = nums[(len(nums) - 1) / 2]
|
to have benchmark results where faster = bigger number.
|
||||||
diffs = ((n - mean) * (n - mean) for n in nums)
|
"""
|
||||||
std_dev = math.sqrt(sum(diffs) / len(nums))
|
return 1000.0 / time
|
||||||
return [nums[0], mean, median, std_dev]
|
|
||||||
|
|
||||||
|
|
||||||
def run_trial(benchmark, language):
|
def run_trial(benchmark, language):
|
||||||
@ -99,35 +97,34 @@ def run_benchmark_language(benchmark, language):
|
|||||||
times.append(time)
|
times.append(time)
|
||||||
sys.stdout.write(".")
|
sys.stdout.write(".")
|
||||||
|
|
||||||
times.sort()
|
best = min(times)
|
||||||
stats = calc_stats(times)
|
score = get_score(best)
|
||||||
|
|
||||||
comparison = ""
|
comparison = ""
|
||||||
if language[0] == "wren":
|
if language[0] == "wren":
|
||||||
if benchmark[2] != None:
|
if benchmark[2] != None:
|
||||||
ratio = 100 * stats[0] / benchmark[2]
|
ratio = 100 * score / benchmark[2]
|
||||||
comparison = "{0:.2f}% of baseline".format(ratio)
|
comparison = "{:6.2f}% relative to baseline".format(ratio)
|
||||||
if ratio > 105:
|
if ratio > 105:
|
||||||
comparison = red(comparison)
|
|
||||||
if ratio < 95:
|
|
||||||
comparison = green(comparison)
|
comparison = green(comparison)
|
||||||
|
if ratio < 95:
|
||||||
|
comparison = red(comparison)
|
||||||
else:
|
else:
|
||||||
comparison = "no baseline"
|
comparison = "no baseline"
|
||||||
else:
|
else:
|
||||||
# Hack: assumes wren is first language.
|
# Hack: assumes wren is first language.
|
||||||
wren_time = results[0][2]
|
wren_score = results[0][2]
|
||||||
ratio = stats[1] / wren_time
|
ratio = 100.0 * wren_score / score
|
||||||
comparison = "{0:.2f}x wren".format(ratio)
|
comparison = "{:6.2f}%".format(ratio)
|
||||||
if ratio < 1:
|
if ratio > 105:
|
||||||
comparison = red(comparison)
|
|
||||||
if ratio > 1:
|
|
||||||
comparison = green(comparison)
|
comparison = green(comparison)
|
||||||
|
if ratio < 95:
|
||||||
|
comparison = red(comparison)
|
||||||
|
|
||||||
print " best: {0:.2f} mean: {1:.2f} median: {2:.2f} {3:s}".format(
|
print " {:4.0f} {:4.2f}s {:s}".format(score, best, comparison)
|
||||||
stats[0], stats[1], stats[2], comparison)
|
|
||||||
|
|
||||||
results.append([name, times, stats[0]])
|
results.append([name, times, score])
|
||||||
return stats
|
return score
|
||||||
|
|
||||||
|
|
||||||
def run_benchmark(benchmark, languages):
|
def run_benchmark(benchmark, languages):
|
||||||
@ -139,37 +136,6 @@ def run_benchmark(benchmark, languages):
|
|||||||
del results[0:len(results)]
|
del results[0:len(results)]
|
||||||
|
|
||||||
|
|
||||||
# TODO(bob): Hook this up so it can be called.
|
|
||||||
def solo_benchmark(benchmark, language):
|
|
||||||
"""Runs a single language benchmark repeatedly, graphing the results."""
|
|
||||||
base = benchmark[2]
|
|
||||||
total = 0
|
|
||||||
for i in range(0, NUM_TRIALS):
|
|
||||||
time = run_trial(benchmark, language)
|
|
||||||
total += time
|
|
||||||
ratio = 100 * time / base
|
|
||||||
|
|
||||||
# TODO(bob): Show scale.
|
|
||||||
|
|
||||||
line = [" "] * 51
|
|
||||||
line[25] = "|"
|
|
||||||
index = 25 + int((time - base) * 200)
|
|
||||||
if index < 0: index = 0
|
|
||||||
if index > 50: index = 50
|
|
||||||
line[index] = "*"
|
|
||||||
|
|
||||||
comparison = "{0:.4f} ({1:6.2f}%) {2}".format(time, ratio, "".join(line))
|
|
||||||
if ratio > 105:
|
|
||||||
comparison = red(comparison)
|
|
||||||
if ratio < 95:
|
|
||||||
comparison = green(comparison)
|
|
||||||
print comparison
|
|
||||||
|
|
||||||
total /= NUM_TRIALS
|
|
||||||
print "----"
|
|
||||||
print "{0:.4f} ({1:6.2f}%)".format(total, 100 * total / base)
|
|
||||||
|
|
||||||
|
|
||||||
def graph_results():
|
def graph_results():
|
||||||
print
|
print
|
||||||
|
|
||||||
@ -180,17 +146,17 @@ def graph_results():
|
|||||||
'0': '0'
|
'0': '0'
|
||||||
}
|
}
|
||||||
|
|
||||||
# Scale everything by the highest time.
|
# Scale everything by the highest score.
|
||||||
highest = 0
|
highest = 0
|
||||||
for result in results:
|
for result in results:
|
||||||
time = max(result[1])
|
score = get_score(min(result[1]))
|
||||||
if time > highest: highest = time
|
if score > highest: highest = score
|
||||||
|
|
||||||
print "{0:22s}0.0 {1:64.4f}".format("", highest)
|
print "{0:22s}0 {1:66.0f}".format("", highest)
|
||||||
for result in results:
|
for result in results:
|
||||||
line = ["-"] * 68
|
line = ["-"] * 68
|
||||||
for time in result[1]:
|
for time in result[1]:
|
||||||
index = int(time / highest * 67)
|
index = int(get_score(time) / highest * 67)
|
||||||
line[index] = INCREMENT[line[index]]
|
line[index] = INCREMENT[line[index]]
|
||||||
print "{0:22s}{1}".format(result[0], "".join(line))
|
print "{0:22s}{1}".format(result[0], "".join(line))
|
||||||
print
|
print
|
||||||
@ -210,8 +176,8 @@ def generate_baseline():
|
|||||||
print "generating baseline"
|
print "generating baseline"
|
||||||
baseline_text = ""
|
baseline_text = ""
|
||||||
for benchmark in BENCHMARKS:
|
for benchmark in BENCHMARKS:
|
||||||
stats = run_benchmark_language(benchmark, LANGUAGES[0])
|
best = run_benchmark_language(benchmark, LANGUAGES[0])
|
||||||
baseline_text += ("{},{}\n".format(benchmark[0], stats[0]))
|
baseline_text += ("{},{}\n".format(benchmark[0], best))
|
||||||
|
|
||||||
# Write them to a file.
|
# Write them to a file.
|
||||||
with open("baseline.txt", 'w') as out:
|
with open("baseline.txt", 'w') as out:
|
||||||
|
|||||||
Reference in New Issue
Block a user