wren/benchmark/run_all

#!/usr/bin/python

import math
import os
import re
import subprocess
import sys

BENCHMARKS = []

def BENCHMARK(name, pattern):
  BENCHMARKS.append((name, re.compile(pattern, re.MULTILINE)))

BENCHMARK("binary_trees", """stretch tree of depth 15\t check: -1
32768\t trees of depth 4\t check: -32768
8192\t trees of depth 6\t check: -8192
2048\t trees of depth 8\t check: -2048
512\t trees of depth 10\t check: -512
128\t trees of depth 12\t check: -128
32\t trees of depth 14\t check: -32
long lived tree of depth 14\t check: -1
elapsed: (\\d+\\.\\d+)""")

BENCHMARK("fib", r"""832040
832040
832040
832040
832040
elapsed: (\d+\.\d+)""")

LANGUAGES = [
  ("wren",   "../build/Release/wren", ".wren"),
  ("lua",    "lua",                   ".lua"),
  ("python", "python",                ".py"),
  ("ruby",   "ruby",                  ".rb"),
  ("js",     "node",                  ".js")
]

# How many times to run a given benchmark. Should be an odd number to get the
# right median.
NUM_TRIALS = 7

results = []

def calc_stats(nums):
  """Calculates the mean, median, and std deviation of a list of numbers."""
  mean = sum(nums) / len(nums)
  nums.sort()
  median = nums[(len(nums) - 1) / 2]
  diffs = ((n - mean) * (n - mean) for n in nums)
  std_dev = math.sqrt(sum(diffs) / len(nums))
  return [mean, median, std_dev]


def run_benchmark_once(benchmark, language):
  args = [language[1], benchmark[0] + language[2]]
  out = subprocess.check_output(args, universal_newlines=True)
  match = benchmark[1].match(out)
  if match:
    return float(match.group(1))
  else:
    print "Incorrect output:"
    print out
    return None


def run_benchmark(benchmark, language):
  print "{0} - {1:10s}".format(benchmark[0], language[0]),

  if not os.path.exists(benchmark[0] + language[2]):
    print "No implementation for this language"
    return

  times = []
  for i in range(0, NUM_TRIALS):
    time = run_benchmark_once(benchmark, language)
    if not time:
      return
    times.append(time)
    sys.stdout.write(".")

  times.sort()
  stats = calc_stats(times)
  print " mean: {0:.4f} median: {1:.4f} std_dev: {2:.4f}".format(
      stats[0], stats[1], stats[2])

  results.append([benchmark[0] + " - " + language[0], times])

def graph_results():
  print

  # Scale everything by the highest time.
  highest = 0
  for result in results:
    time = max(result[1])
    if time > highest: highest = time

  print "{0:24s} 0.0 {1:76.4f}".format("", highest)
  for result in results:
    line = ["-"] * 80
    for time in result[1]:
      line[int(time / highest * 79)] = "O"
    print "{0:24s} {1}".format(result[0], "".join(line))

for benchmark in BENCHMARKS:
  for language in LANGUAGES:
    run_benchmark(benchmark, language)

graph_results()