wren/benchmark/run_bench

#!/usr/bin/env python

from __future__ import print_function

import argparse
import math
import os
import os.path
import re
import subprocess
import sys

# Runs the tests.
WREN_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
BENCHMARK_DIR = os.path.join(WREN_DIR, 'benchmark')

# How many times to run a given benchmark.
NUM_TRIALS = 10

BENCHMARKS = []

def BENCHMARK(name, pattern):
  regex = re.compile(pattern + "\n" + r"elapsed: (\d+\.\d+)", re.MULTILINE)
  BENCHMARKS.append([name, regex, None])

BENCHMARK("binary_trees", """stretch tree of depth 13 check: -1
8192 trees of depth 4 check: -8192
2048 trees of depth 6 check: -2048
512 trees of depth 8 check: -512
128 trees of depth 10 check: -128
32 trees of depth 12 check: -32
long lived tree of depth 12 check: -1""")

BENCHMARK("delta_blue", "7032700")

BENCHMARK("fib", r"""317811
317811
317811
317811
317811""")

BENCHMARK("for", r"""499999500000""")

BENCHMARK("method_call", r"""true
false""")

LANGUAGES = [
  ("wren",           ["../wren"], ".wren"),
  ("lua",            ["lua"],                   ".lua"),
  ("luajit (-joff)", ["luajit", "-joff"],       ".lua"),
  ("python",         ["python"],                ".py"),
  ("python3",        ["python3"],               ".py"),
  ("ruby",           ["ruby"],                  ".rb")
]

results = {}

def green(text):
  if sys.platform == 'win32':
    return text
  return '\033[32m' + text + '\033[0m'

def red(text):
  if sys.platform == 'win32':
    return text
  return '\033[31m' + text + '\033[0m'

def yellow(text):
  if sys.platform == 'win32':
    return text
  return '\033[33m' + text + '\033[0m'


def get_score(time):
  """
  Converts time into a "score". This is the inverse of the time with an
  arbitrary scale applied to get the number in a nice range. The goal here is
  to have benchmark results where faster = bigger number.
  """
  return 1000.0 / time


def run_trial(benchmark, language):
  """Runs one benchmark one time for one language."""
  args = []
  args.extend(language[1])
  args.append(os.path.join(BENCHMARK_DIR, benchmark[0] + language[2]))
  out = subprocess.check_output(args, universal_newlines=True)
  match = benchmark[1].match(out)
  if match:
    return float(match.group(1))
  else:
    print("Incorrect output:")
    print(out)
    return None


def run_benchmark_language(benchmark, language, benchmark_result):
  """
  Runs one benchmark for a number of trials for one language.

  Adds the result to benchmark_result, which is a map of language names to
  results.
  """

  name = "{0} - {1}".format(benchmark[0], language[0])
  print("{0:30s}".format(name), end=' ')

  if not os.path.exists(os.path.join(
      BENCHMARK_DIR, benchmark[0] + language[2])):
    print("No implementation for this language")
    return

  times = []
  for i in range(0, NUM_TRIALS):
    time = run_trial(benchmark, language)
    if not time:
      return
    times.append(time)
    sys.stdout.write(".")

  best = min(times)
  score = get_score(best)

  comparison = ""
  if language[0] == "wren":
    if benchmark[2] != None:
      ratio = 100 * score / benchmark[2]
      comparison =  "{:6.2f}% relative to baseline".format(ratio)
      if ratio > 105:
        comparison = green(comparison)
      if ratio < 95:
        comparison = red(comparison)
    else:
      comparison = "no baseline"
  else:
    # Hack: assumes wren gets run first.
    wren_score = benchmark_result["wren"]["score"]
    ratio = 100.0 * wren_score / score
    comparison =  "{:6.2f}%".format(ratio)
    if ratio > 105:
      comparison = green(comparison)
    if ratio < 95:
      comparison = red(comparison)

  print(" {:5.0f}  {:4.2f}s  {:s}".format(score, best, comparison))

  benchmark_result[language[0]] = {
    "desc": name,
    "times": times,
    "score": score
  }

  return score


def run_benchmark(benchmark, languages):
  """Runs one benchmark for the given languages (or all of them)."""

  benchmark_result = {}
  results[benchmark[0]] = benchmark_result

  num_languages = 0
  for language in LANGUAGES:
    if not languages or language[0] in languages:
      num_languages += 1
      run_benchmark_language(benchmark, language, benchmark_result)

  if num_languages > 1:
    graph_results(benchmark_result)


def graph_results(benchmark_result):
  print()

  INCREMENT = {
    '-': 'o',
    'o': 'O',
    'O': '0',
    '0': '0'
  }

  # Scale everything by the highest score.
  highest = 0
  for language, result in benchmark_result.items():
    score = get_score(min(result["times"]))
    if score > highest: highest = score

  print("{0:30s}0 {1:66.0f}".format("", highest))
  for language, result in benchmark_result.items():
    line = ["-"] * 68
    for time in result["times"]:
      index = int(get_score(time) / highest * 67)
      line[index] = INCREMENT[line[index]]
    print("{0:30s}{1}".format(result["desc"], "".join(line)))
  print()


def read_baseline():
  if os.path.exists("baseline.txt"):
    with open("baseline.txt") as f:
      for line in f.readlines():
        name, best = line.split(",")
        for benchmark in BENCHMARKS:
          if benchmark[0] == name:
            benchmark[2] = float(best)


def generate_baseline():
  print("generating baseline")
  baseline_text = ""
  for benchmark in BENCHMARKS:
    best = run_benchmark_language(benchmark, LANGUAGES[0], {})
    baseline_text += ("{},{}\n".format(benchmark[0], best))

  # Write them to a file.
  with open("baseline.txt", 'w') as out:
    out.write(baseline_text)


def print_html():
  '''Print the results as an HTML chart.'''

  def print_benchmark(benchmark, name):
    print('<h3>{}</h3>'.format(name))
    print('<table class="chart">')

    # Scale everything by the highest score.
    highest = 0
    for language, result in results[benchmark].items():
      score = get_score(min(result["times"]))
      if score > highest: highest = score

    languages = sorted(results[benchmark].keys(),
        key=lambda lang: results[benchmark][lang]["score"], reverse=True)

    for language in languages:
      result = results[benchmark][language]
      score = int(result["score"])
      ratio = int(100 * score / highest)
      css_class = "chart-bar"
      if language == "wren":
        css_class += " wren"
      print('  <tr>')
      print('    <th>{}</th><td><div class="{}" style="width: {}%;">{}&nbsp;</div></td>'.format(
          language, css_class, ratio, score))
      print('  </tr>')
    print('</table>')

  print_benchmark("method_call", "Method Call")
  print_benchmark("delta_blue", "DeltaBlue")
  print_benchmark("binary_trees", "Binary Trees")
  print_benchmark("fib", "Recursive Fibonacci")


def main():
  parser = argparse.ArgumentParser(description="Run the benchmarks")
  parser.add_argument("benchmark", nargs='?',
      default="all",
      help="The benchmark to run")
  parser.add_argument("--generate-baseline",
      action="store_true",
      help="Generate a baseline file")
  parser.add_argument("-l", "--language",
      action="append",
      help="Which language(s) to run benchmarks for")
  parser.add_argument("--output-html",
      action="store_true",
      help="Output the results chart as HTML")

  args = parser.parse_args()

  if args.generate_baseline:
    generate_baseline()
    return

  read_baseline()

  # Run the benchmarks.
  for benchmark in BENCHMARKS:
    if benchmark[0] == args.benchmark or args.benchmark == "all":
      run_benchmark(benchmark, args.language)

  if args.output_html:
    print_html()


main()
Make run_bench compatible with Python 3 2015-01-06 16:20:21 -06:00			`#!/usr/bin/env python`

			`from __future__ import print_function`
Get a basic benchmark runner going. 2013-11-22 08:55:22 -08:00
Add baseline comparison to benchmark script. This way, I can track performance improvements and regressions. 2013-12-07 22:14:56 -08:00			`import argparse`
Get a basic benchmark runner going. 2013-11-22 08:55:22 -08:00			`import math`
Get binary_trees benchmark working. Wren is actually doing well in it: wren mean: 1.9441 median: 1.9428 std_dev: 0.0260 lua mean: 3.5992 median: 3.6033 std_dev: 0.0156 python mean: 3.6667 median: 3.7097 std_dev: 0.1340 ruby mean: 1.3941 median: 1.3914 std_dev: 0.0091 2013-11-29 16:19:13 -08:00			`import os`
Reorganize makefile and scripts. 2014-01-23 23:29:50 -08:00			`import os.path`
Get a basic benchmark runner going. 2013-11-22 08:55:22 -08:00			`import re`
			`import subprocess`
			`import sys`

Reorganize makefile and scripts. 2014-01-23 23:29:50 -08:00			`# Runs the tests.`
			`WREN_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))`
			`BENCHMARK_DIR = os.path.join(WREN_DIR, 'benchmark')`

Make benchmarks score based instead of time based. 2013-12-20 07:04:04 -08:00			`# How many times to run a given benchmark.`
			`NUM_TRIALS = 10`
Revise benchmarks: - Ditch JS since it's in a different league. - Make binary_trees and fib run faster. - Compare using best time instead of mean. 2013-12-12 16:59:57 -08:00
Get binary_trees benchmark working. Wren is actually doing well in it: wren mean: 1.9441 median: 1.9428 std_dev: 0.0260 lua mean: 3.5992 median: 3.6033 std_dev: 0.0156 python mean: 3.6667 median: 3.7097 std_dev: 0.1340 ruby mean: 1.3941 median: 1.3914 std_dev: 0.0091 2013-11-29 16:19:13 -08:00			`BENCHMARKS = []`

			`def BENCHMARK(name, pattern):`
Fix up benchmarks and add method call one. 2013-12-07 18:43:39 -08:00			`regex = re.compile(pattern + "\n" + r"elapsed: (\d+\.\d+)", re.MULTILINE)`
Add baseline comparison to benchmark script. This way, I can track performance improvements and regressions. 2013-12-07 22:14:56 -08:00			`BENCHMARKS.append([name, regex, None])`
Get binary_trees benchmark working. Wren is actually doing well in it: wren mean: 1.9441 median: 1.9428 std_dev: 0.0260 lua mean: 3.5992 median: 3.6033 std_dev: 0.0156 python mean: 3.6667 median: 3.7097 std_dev: 0.1340 ruby mean: 1.3941 median: 1.3914 std_dev: 0.0091 2013-11-29 16:19:13 -08:00
Revise benchmarks: - Ditch JS since it's in a different league. - Make binary_trees and fib run faster. - Compare using best time instead of mean. 2013-12-12 16:59:57 -08:00			`BENCHMARK("binary_trees", """stretch tree of depth 13 check: -1`
			`8192 trees of depth 4 check: -8192`
			`2048 trees of depth 6 check: -2048`
			`512 trees of depth 8 check: -512`
			`128 trees of depth 10 check: -128`
			`32 trees of depth 12 check: -32`
			`long lived tree of depth 12 check: -1""")`

Get delta_blue benchmark working in Wren. 2014-02-10 07:56:11 -08:00			`BENCHMARK("delta_blue", "7032700")`

Revise benchmarks: - Ditch JS since it's in a different league. - Make binary_trees and fib run faster. - Compare using best time instead of mean. 2013-12-12 16:59:57 -08:00			`BENCHMARK("fib", r"""317811`
			`317811`
			`317811`
			`317811`
			`317811""")`
Fix up benchmarks and add method call one. 2013-12-07 18:43:39 -08:00
Include numeric loop in for benchmark. 2014-01-20 08:43:10 -08:00			`BENCHMARK("for", r"""499999500000""")`
For loops! 2013-12-24 21:04:11 -08:00
Fix up benchmarks and add method call one. 2013-12-07 18:43:39 -08:00			`BENCHMARK("method_call", r"""true`
			`false""")`
Get a basic benchmark runner going. 2013-11-22 08:55:22 -08:00
			`LANGUAGES = [`
Reorganize makefile and scripts. 2014-01-23 23:29:50 -08:00			`("wren", ["../wren"], ".wren"),`
Add LuaJIT to benchmark. 2014-01-13 07:29:47 -08:00			`("lua", ["lua"], ".lua"),`
			`("luajit (-joff)", ["luajit", "-joff"], ".lua"),`
			`("python", ["python"], ".py"),`
Add python3 to benchmarks. 2014-02-12 17:22:42 -08:00			`("python3", ["python3"], ".py"),`
Add LuaJIT to benchmark. 2014-01-13 07:29:47 -08:00			`("ruby", ["ruby"], ".rb")`
Get a basic benchmark runner going. 2013-11-22 08:55:22 -08:00			`]`

Benchmark chart. 2014-04-20 21:04:41 -07:00			`results = {}`
ASCII art graph of benchmark results! 2013-11-22 09:17:45 -08:00
Add baseline comparison to benchmark script. This way, I can track performance improvements and regressions. 2013-12-07 22:14:56 -08:00			`def green(text):`
			`if sys.platform == 'win32':`
			`return text`
			`return '\033[32m' + text + '\033[0m'`

			`def red(text):`
			`if sys.platform == 'win32':`
			`return text`
			`return '\033[31m' + text + '\033[0m'`

			`def yellow(text):`
			`if sys.platform == 'win32':`
			`return text`
			`return '\033[33m' + text + '\033[0m'`


Make benchmarks score based instead of time based. 2013-12-20 07:04:04 -08:00			`def get_score(time):`
			`"""`
			`Converts time into a "score". This is the inverse of the time with an`
			`arbitrary scale applied to get the number in a nice range. The goal here is`
			`to have benchmark results where faster = bigger number.`
			`"""`
			`return 1000.0 / time`
Get a basic benchmark runner going. 2013-11-22 08:55:22 -08:00

Revise benchmarks: - Ditch JS since it's in a different league. - Make binary_trees and fib run faster. - Compare using best time instead of mean. 2013-12-12 16:59:57 -08:00			`def run_trial(benchmark, language):`
			`"""Runs one benchmark one time for one language."""`
Add LuaJIT to benchmark. 2014-01-13 07:29:47 -08:00			`args = []`
			`args.extend(language[1])`
Reorganize makefile and scripts. 2014-01-23 23:29:50 -08:00			`args.append(os.path.join(BENCHMARK_DIR, benchmark[0] + language[2]))`
Get a basic benchmark runner going. 2013-11-22 08:55:22 -08:00			`out = subprocess.check_output(args, universal_newlines=True)`
			`match = benchmark[1].match(out)`
			`if match:`
			`return float(match.group(1))`
			`else:`
Make run_bench compatible with Python 3 2015-01-06 16:20:21 -06:00			`print("Incorrect output:")`
			`print(out)`
Get a basic benchmark runner going. 2013-11-22 08:55:22 -08:00			`return None`


Benchmark chart. 2014-04-20 21:04:41 -07:00			`def run_benchmark_language(benchmark, language, benchmark_result):`
			`"""`
			`Runs one benchmark for a number of trials for one language.`

			`Adds the result to benchmark_result, which is a map of language names to`
			`results.`
			`"""`

Add JS version of binary tree benchmark. 2013-11-29 20:25:00 -08:00			`name = "{0} - {1}".format(benchmark[0], language[0])`
Keep same whitespace as previous version 2015-01-06 17:57:57 -06:00			`print("{0:30s}".format(name), end=' ')`
Get a basic benchmark runner going. 2013-11-22 08:55:22 -08:00
Reorganize makefile and scripts. 2014-01-23 23:29:50 -08:00			`if not os.path.exists(os.path.join(`
			`BENCHMARK_DIR, benchmark[0] + language[2])):`
Make run_bench compatible with Python 3 2015-01-06 16:20:21 -06:00			`print("No implementation for this language")`
Get binary_trees benchmark working. Wren is actually doing well in it: wren mean: 1.9441 median: 1.9428 std_dev: 0.0260 lua mean: 3.5992 median: 3.6033 std_dev: 0.0156 python mean: 3.6667 median: 3.7097 std_dev: 0.1340 ruby mean: 1.3941 median: 1.3914 std_dev: 0.0091 2013-11-29 16:19:13 -08:00			`return`

Get a basic benchmark runner going. 2013-11-22 08:55:22 -08:00			`times = []`
ASCII art graph of benchmark results! 2013-11-22 09:17:45 -08:00			`for i in range(0, NUM_TRIALS):`
Revise benchmarks: - Ditch JS since it's in a different league. - Make binary_trees and fib run faster. - Compare using best time instead of mean. 2013-12-12 16:59:57 -08:00			`time = run_trial(benchmark, language)`
Get binary_trees benchmark working. Wren is actually doing well in it: wren mean: 1.9441 median: 1.9428 std_dev: 0.0260 lua mean: 3.5992 median: 3.6033 std_dev: 0.0156 python mean: 3.6667 median: 3.7097 std_dev: 0.1340 ruby mean: 1.3941 median: 1.3914 std_dev: 0.0091 2013-11-29 16:19:13 -08:00			`if not time:`
			`return`
			`times.append(time)`
Get a basic benchmark runner going. 2013-11-22 08:55:22 -08:00			`sys.stdout.write(".")`

Make benchmarks score based instead of time based. 2013-12-20 07:04:04 -08:00			`best = min(times)`
			`score = get_score(best)`
Get a basic benchmark runner going. 2013-11-22 08:55:22 -08:00
Add baseline comparison to benchmark script. This way, I can track performance improvements and regressions. 2013-12-07 22:14:56 -08:00			`comparison = ""`
			`if language[0] == "wren":`
			`if benchmark[2] != None:`
Make benchmarks score based instead of time based. 2013-12-20 07:04:04 -08:00			`ratio = 100 * score / benchmark[2]`
			`comparison = "{:6.2f}% relative to baseline".format(ratio)`
Add baseline comparison to benchmark script. This way, I can track performance improvements and regressions. 2013-12-07 22:14:56 -08:00			`if ratio > 105:`
			`comparison = green(comparison)`
Make benchmarks score based instead of time based. 2013-12-20 07:04:04 -08:00			`if ratio < 95:`
			`comparison = red(comparison)`
Add baseline comparison to benchmark script. This way, I can track performance improvements and regressions. 2013-12-07 22:14:56 -08:00			`else:`
			`comparison = "no baseline"`
			`else:`
Benchmark chart. 2014-04-20 21:04:41 -07:00			`# Hack: assumes wren gets run first.`
			`wren_score = benchmark_result["wren"]["score"]`
Make benchmarks score based instead of time based. 2013-12-20 07:04:04 -08:00			`ratio = 100.0 * wren_score / score`
			`comparison = "{:6.2f}%".format(ratio)`
			`if ratio > 105:`
Add baseline comparison to benchmark script. This way, I can track performance improvements and regressions. 2013-12-07 22:14:56 -08:00			`comparison = green(comparison)`
Make benchmarks score based instead of time based. 2013-12-20 07:04:04 -08:00			`if ratio < 95:`
			`comparison = red(comparison)`
Add baseline comparison to benchmark script. This way, I can track performance improvements and regressions. 2013-12-07 22:14:56 -08:00
Make run_bench compatible with Python 3 2015-01-06 16:20:21 -06:00			`print(" {:5.0f} {:4.2f}s {:s}".format(score, best, comparison))`
Add baseline comparison to benchmark script. This way, I can track performance improvements and regressions. 2013-12-07 22:14:56 -08:00
Benchmark chart. 2014-04-20 21:04:41 -07:00			`benchmark_result[language[0]] = {`
			`"desc": name,`
			`"times": times,`
			`"score": score`
			`}`

Make benchmarks score based instead of time based. 2013-12-20 07:04:04 -08:00			`return score`
ASCII art graph of benchmark results! 2013-11-22 09:17:45 -08:00
Fix up benchmarks and add method call one. 2013-12-07 18:43:39 -08:00
Add support for just control which language benchmarks are run. 2013-12-10 23:02:24 -08:00			`def run_benchmark(benchmark, languages):`
Revise benchmarks: - Ditch JS since it's in a different league. - Make binary_trees and fib run faster. - Compare using best time instead of mean. 2013-12-12 16:59:57 -08:00			`"""Runs one benchmark for the given languages (or all of them)."""`
Benchmark chart. 2014-04-20 21:04:41 -07:00
			`benchmark_result = {}`
			`results[benchmark[0]] = benchmark_result`

Clean up runtime error string handling. 2014-01-20 22:55:11 -08:00			`num_languages = 0`
Fix up benchmarks and add method call one. 2013-12-07 18:43:39 -08:00			`for language in LANGUAGES:`
Add support for just control which language benchmarks are run. 2013-12-10 23:02:24 -08:00			`if not languages or language[0] in languages:`
Clean up runtime error string handling. 2014-01-20 22:55:11 -08:00			`num_languages += 1`
Benchmark chart. 2014-04-20 21:04:41 -07:00			`run_benchmark_language(benchmark, language, benchmark_result)`
Clean up runtime error string handling. 2014-01-20 22:55:11 -08:00
			`if num_languages > 1:`
Benchmark chart. 2014-04-20 21:04:41 -07:00			`graph_results(benchmark_result)`
Fix up benchmarks and add method call one. 2013-12-07 18:43:39 -08:00

Benchmark chart. 2014-04-20 21:04:41 -07:00			`def graph_results(benchmark_result):`
Keep same whitespace as previous version 2015-01-06 17:57:57 -06:00			`print()`
ASCII art graph of benchmark results! 2013-11-22 09:17:45 -08:00
Add JS version of binary tree benchmark. 2013-11-29 20:25:00 -08:00			`INCREMENT = {`
			`'-': 'o',`
			`'o': 'O',`
			`'O': '0',`
			`'0': '0'`
			`}`

Make benchmarks score based instead of time based. 2013-12-20 07:04:04 -08:00			`# Scale everything by the highest score.`
ASCII art graph of benchmark results! 2013-11-22 09:17:45 -08:00			`highest = 0`
Benchmark chart. 2014-04-20 21:04:41 -07:00			`for language, result in benchmark_result.items():`
			`score = get_score(min(result["times"]))`
Make benchmarks score based instead of time based. 2013-12-20 07:04:04 -08:00			`if score > highest: highest = score`
ASCII art graph of benchmark results! 2013-11-22 09:17:45 -08:00
Make run_bench compatible with Python 3 2015-01-06 16:20:21 -06:00			`print("{0:30s}0 {1:66.0f}".format("", highest))`
Benchmark chart. 2014-04-20 21:04:41 -07:00			`for language, result in benchmark_result.items():`
Add JS version of binary tree benchmark. 2013-11-29 20:25:00 -08:00			`line = ["-"] * 68`
Benchmark chart. 2014-04-20 21:04:41 -07:00			`for time in result["times"]:`
Make benchmarks score based instead of time based. 2013-12-20 07:04:04 -08:00			`index = int(get_score(time) / highest * 67)`
Add JS version of binary tree benchmark. 2013-11-29 20:25:00 -08:00			`line[index] = INCREMENT[line[index]]`
Make run_bench compatible with Python 3 2015-01-06 16:20:21 -06:00			`print("{0:30s}{1}".format(result["desc"], "".join(line)))`
Keep same whitespace as previous version 2015-01-06 17:57:57 -06:00			`print()`
Add JS version of binary tree benchmark. 2013-11-29 20:25:00 -08:00
ASCII art graph of benchmark results! 2013-11-22 09:17:45 -08:00
Add baseline comparison to benchmark script. This way, I can track performance improvements and regressions. 2013-12-07 22:14:56 -08:00			`def read_baseline():`
			`if os.path.exists("baseline.txt"):`
			`with open("baseline.txt") as f:`
			`for line in f.readlines():`
Revise benchmarks: - Ditch JS since it's in a different league. - Make binary_trees and fib run faster. - Compare using best time instead of mean. 2013-12-12 16:59:57 -08:00			`name, best = line.split(",")`
Add baseline comparison to benchmark script. This way, I can track performance improvements and regressions. 2013-12-07 22:14:56 -08:00			`for benchmark in BENCHMARKS:`
			`if benchmark[0] == name:`
Revise benchmarks: - Ditch JS since it's in a different league. - Make binary_trees and fib run faster. - Compare using best time instead of mean. 2013-12-12 16:59:57 -08:00			`benchmark[2] = float(best)`
Add baseline comparison to benchmark script. This way, I can track performance improvements and regressions. 2013-12-07 22:14:56 -08:00

			`def generate_baseline():`
Make run_bench compatible with Python 3 2015-01-06 16:20:21 -06:00			`print("generating baseline")`
Add baseline comparison to benchmark script. This way, I can track performance improvements and regressions. 2013-12-07 22:14:56 -08:00			`baseline_text = ""`
			`for benchmark in BENCHMARKS:`
Benchmark chart. 2014-04-20 21:04:41 -07:00			`best = run_benchmark_language(benchmark, LANGUAGES[0], {})`
Make benchmarks score based instead of time based. 2013-12-20 07:04:04 -08:00			`baseline_text += ("{},{}\n".format(benchmark[0], best))`
Add baseline comparison to benchmark script. This way, I can track performance improvements and regressions. 2013-12-07 22:14:56 -08:00
			`# Write them to a file.`
			`with open("baseline.txt", 'w') as out:`
			`out.write(baseline_text)`


Benchmark chart. 2014-04-20 21:04:41 -07:00			`def print_html():`
			`'''Print the results as an HTML chart.'''`

			`def print_benchmark(benchmark, name):`
Make run_bench compatible with Python 3 2015-01-06 16:20:21 -06:00			`print('<h3>{}</h3>'.format(name))`
			`print('<table class="chart">')`
Benchmark chart. 2014-04-20 21:04:41 -07:00
			`# Scale everything by the highest score.`
			`highest = 0`
			`for language, result in results[benchmark].items():`
			`score = get_score(min(result["times"]))`
			`if score > highest: highest = score`

			`languages = sorted(results[benchmark].keys(),`
			`key=lambda lang: results[benchmark][lang]["score"], reverse=True)`

			`for language in languages:`
			`result = results[benchmark][language]`
			`score = int(result["score"])`
			`ratio = int(100 * score / highest)`
			`css_class = "chart-bar"`
			`if language == "wren":`
			`css_class += " wren"`
Make run_bench compatible with Python 3 2015-01-06 16:20:21 -06:00			`print(' <tr>')`
			`print(' <th>{}</th><td><div class="{}" style="width: {}%;">{} </div></td>'.format(`
			`language, css_class, ratio, score))`
			`print(' </tr>')`
			`print('</table>')`
Benchmark chart. 2014-04-20 21:04:41 -07:00
			`print_benchmark("method_call", "Method Call")`
			`print_benchmark("delta_blue", "DeltaBlue")`
			`print_benchmark("binary_trees", "Binary Trees")`
			`print_benchmark("fib", "Recursive Fibonacci")`


Fix up benchmarks and add method call one. 2013-12-07 18:43:39 -08:00			`def main():`
Add baseline comparison to benchmark script. This way, I can track performance improvements and regressions. 2013-12-07 22:14:56 -08:00			`parser = argparse.ArgumentParser(description="Run the benchmarks")`
			`parser.add_argument("benchmark", nargs='?',`
			`default="all",`
			`help="The benchmark to run")`
Add support for just control which language benchmarks are run. 2013-12-10 23:02:24 -08:00			`parser.add_argument("--generate-baseline",`
Add baseline comparison to benchmark script. This way, I can track performance improvements and regressions. 2013-12-07 22:14:56 -08:00			`action="store_true",`
			`help="Generate a baseline file")`
Add support for just control which language benchmarks are run. 2013-12-10 23:02:24 -08:00			`parser.add_argument("-l", "--language",`
			`action="append",`
			`help="Which language(s) to run benchmarks for")`
Benchmark chart. 2014-04-20 21:04:41 -07:00			`parser.add_argument("--output-html",`
			`action="store_true",`
			`help="Output the results chart as HTML")`
Add baseline comparison to benchmark script. This way, I can track performance improvements and regressions. 2013-12-07 22:14:56 -08:00
			`args = parser.parse_args()`

			`if args.generate_baseline:`
			`generate_baseline()`
			`return`

			`read_baseline()`

Benchmark chart. 2014-04-20 21:04:41 -07:00			`# Run the benchmarks.`
Fix up benchmarks and add method call one. 2013-12-07 18:43:39 -08:00			`for benchmark in BENCHMARKS:`
Benchmark chart. 2014-04-20 21:04:41 -07:00			`if benchmark[0] == args.benchmark or args.benchmark == "all":`
Add support for just control which language benchmarks are run. 2013-12-10 23:02:24 -08:00			`run_benchmark(benchmark, args.language)`
ASCII art graph of benchmark results! 2013-11-22 09:17:45 -08:00
Benchmark chart. 2014-04-20 21:04:41 -07:00			`if args.output_html:`
			`print_html()`


Fix up benchmarks and add method call one. 2013-12-07 18:43:39 -08:00			`main()`