Add baseline comparison to benchmark script.

This way, I can track performance improvements and regressions.
2013-12-07 22:14:56 -08:00
parent 42b04c6c90
commit 1e2449893e
3 changed files with 93 additions and 71 deletions
--- a/.gitignore
+++ b/.gitignore
@ -8,3 +8,6 @@ xcuserdata/
 # I leave a temporary Wren script at the top level so that I can quickly test
 # stuff.
 scratch.wren
+
+# The baseline file is machine-specific, so doesn't get checked in.
+benchmark/baseline.txt
--- a/benchmark/run_bench
+++ b/benchmark/run_bench
@ -1,5 +1,6 @@
 #!/usr/bin/python

+import argparse
 import math
 import os
 import re
@ -10,7 +11,7 @@ BENCHMARKS = []

 def BENCHMARK(name, pattern):
  regex = re.compile(pattern + "\n" + r"elapsed: (\d+\.\d+)", re.MULTILINE)
-  BENCHMARKS.append((name, regex))
+  BENCHMARKS.append([name, regex, None])

 BENCHMARK("binary_trees", """stretch tree of depth 15\t check: -1
 32768\t trees of depth 4\t check: -32768
@ -40,10 +41,26 @@ LANGUAGES = [

 # How many times to run a given benchmark. Should be an odd number to get the
 # right median.
-NUM_TRIALS = 5
+NUM_TRIALS = 7

 results = []

+def green(text):
+  if sys.platform == 'win32':
+    return text
+  return '\033[32m' + text + '\033[0m'
+
+def red(text):
+  if sys.platform == 'win32':
+    return text
+  return '\033[31m' + text + '\033[0m'
+
+def yellow(text):
+  if sys.platform == 'win32':
+    return text
+  return '\033[33m' + text + '\033[0m'
+
+
 def calc_stats(nums):
  """Calculates the mean, median, and std deviation of a list of numbers."""
  mean = sum(nums) / len(nums)
@ -84,10 +101,33 @@ def run_benchmark_language(benchmark, language):

  times.sort()
  stats = calc_stats(times)
-  print " mean: {0:.4f} median: {1:.4f} std_dev: {2:.4f}".format(
-      stats[0], stats[1], stats[2])

-  results.append([name, times])
+  comparison = ""
+  if language[0] == "wren":
+    if benchmark[2] != None:
+      ratio = 100 * stats[1] / benchmark[2]
+      comparison =  "{0:.2f}% of baseline".format(ratio)
+      if ratio > 105:
+        comparison = red(comparison)
+      if ratio < 95:
+        comparison = green(comparison)
+    else:
+      comparison = "no baseline"
+  else:
+    # Hack: assumes wren is first language.
+    wren_time = results[0][2]
+    ratio = stats[1] / wren_time
+    comparison =  "{0:.2f}x wren".format(ratio)
+    if ratio < 1:
+      comparison = red(comparison)
+    if ratio > 1:
+      comparison = green(comparison)
+
+  print " mean: {0:.2f} median: {1:.2f} std_dev: {2:.2f} {3:s}".format(
+      stats[0], stats[1], stats[2], comparison)
+
+  results.append([name, times, stats[1]])
+  return stats


 def run_benchmark(benchmark):
@ -123,18 +163,54 @@ def graph_results():
  print


+def read_baseline():
+  if os.path.exists("baseline.txt"):
+    with open("baseline.txt") as f:
+      for line in f.readlines():
+        name, mean, median = line.split(",")
+        for benchmark in BENCHMARKS:
+          if benchmark[0] == name:
+            benchmark[2] = float(median)
+
+
+def generate_baseline():
+  print "generating baseline"
+  baseline_text = ""
+  for benchmark in BENCHMARKS:
+    stats = run_benchmark_language(benchmark, LANGUAGES[0])
+    baseline_text += ("{},{},{}\n".format(
+          benchmark[0], stats[0], stats[1]))
+
+  # Write them to a file.
+  with open("baseline.txt", 'w') as out:
+    out.write(baseline_text)
+
+
 def main():
-  # If a benchmark name is passed, just run it.
-  if len(sys.argv) == 2:
-    benchmark_name = sys.argv[1]
-    for benchmark in BENCHMARKS:
-      if benchmark[0] == benchmark_name:
-        run_benchmark(benchmark)
+  parser = argparse.ArgumentParser(description="Run the benchmarks")
+  parser.add_argument("benchmark", nargs='?',
+      default="all",
+      help="The benchmark to run")
+  parser.add_argument("-b", "--generate-baseline",
+      action="store_true",
+      help="Generate a baseline file")
+
+  args = parser.parse_args()
+
+  if args.generate_baseline:
+    generate_baseline()
    return

-  # Otherwise run them all.
-  for benchmark in BENCHMARKS:
-    run_benchmark(benchmark)
+  read_baseline()

+  if args.benchmark == "all":
+    for benchmark in BENCHMARKS:
+      run_benchmark(benchmark)
+    return
+
+  # Run the given benchmark.
+  for benchmark in BENCHMARKS:
+    if benchmark[0] == args.benchmark:
+      run_benchmark(benchmark)

 main()
--- a/57
+++ b/57
@ -8,63 +8,6 @@ import sys
 import time
 from datetime import datetime

-TEMPLATE = """
-<!DOCTYPE html>
-<html>
-<head>
-<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
-<title>Wren - {title}</title>
-<link rel="stylesheet" type="text/css" href="style.css" />
-<link href='http://fonts.googleapis.com/css?family=Sanchez:400|Source+Sans+Pro:300,400,700,400italic,700italic|Source+Code+Pro:300,400' rel='stylesheet' type='text/css'>
-</head>
-<body id="top">
-<a href="https://github.com/munificent/wren">
-  <img style="position: absolute; top: 0; right: 0; border: 0; z-index: 100;" src="https://s3.amazonaws.com/github/ribbons/forkme_right_red_aa0000.png" alt="Fork me on GitHub">
-</a>
-<div class="header">
-  <div class="page">
-    <h1><a href="index.html">wren</a></h1>
-    <h2>a minimal class-based scripting language</h2>
-  </div>
-</div>
-<div class="page">
-  <div class="nav">
-    <h2>Welcome</h2>
-    <ul>
-      <li>Getting Started</li>
-    </ul>
-    <h2>Language</h2>
-    <ul>
-      <li><a href="syntax.html">Syntax</a></li>
-      <li>Method calls</li>
-      <li>Variables</li>
-      <li>Blocks</li>
-      <li><a href="flow-control.html">Flow control</a></li>
-    </ul>
-    <h2>Objects</h2>
-    <ul>
-      <li>Primitives</li>
-      <li>Classes</li>
-      <li>Functions</li>
-      <li>Fibers</li>
-      <li>Lists</li>
-      <li>Maps</li>
-    </ul>
-    <h2>Usage</h2>
-    <ul>
-      <li>Standalone</li>
-      <li>Embedding</li>
-    </ul>
-  </div>
-  <div class="content">
-<h1>{title}</h1>
-{html}
-<p class="footer">Last modified on {mod}. By Bob Nystrom.</p>
-</div>
-</body>
-</html>
-"""
-
 def format_file(path, skip_up_to_date):
  basename = os.path.basename(path)
  basename = basename.split('.')[0]