| Index: tools/v8-perf-statistics.py
|
| diff --git a/tools/v8-perf-statistics.py b/tools/v8-perf-statistics.py
|
| new file mode 100755
|
| index 0000000000000000000000000000000000000000..188c2e3a868da6b7b08f62776b088d28e0a489f4
|
| --- /dev/null
|
| +++ b/tools/v8-perf-statistics.py
|
| @@ -0,0 +1,112 @@
|
| +#!/usr/bin/env python
|
| +# Copyright 2016 the V8 project authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +'''
|
| +(You'll need packages python-numpy and python-scipy.)
|
| +
|
| +This is a script for comparing v8-perf statistics for benchmarks run
|
| +on different versions of V8. Here's an example of how it can be used:
|
| +
|
| +```bash
|
| +cd v8-perf/benchmarks/Octane2.1
|
| +loop -100 ~/vanilla/v8/out/Release/d8 run-some.js -- code-load > vanilla.txt
|
| +loop -100 ~/v8/out/Release/d8 run-some.js -- code-load > modified.txt
|
| +./v8-perf-statistics.py vanilla.txt modified.txt
|
| +```
|
| +
|
| +where I have defined `loop` to be:
|
| +
|
| +```bash
|
| +function loop () {
|
| + limit=10
|
| + case "$1" in
|
| + -*)
|
| + limit="${1#-}"
|
| + shift
|
| + esac
|
| + for ((i=1; i<=$limit; i++)); do
|
| + printf "%5d " $i
|
| + $*
|
| + done
|
| +}
|
| +```
|
| +
|
| +Multiple files can be supplied to the command line. The base file is
|
| +taken to be the first, but you can change this, e.g.
|
| +
|
| +```bash
|
| +./v8-perf-statistics.py -2 modified-1.txt vanilla.txt modified-2.txt
|
| +```
|
| +'''
|
| +
|
| +import numpy, scipy, scipy.stats
|
| +from math import sqrt
|
| +
|
| +def statistics(data):
|
| + N = len(data)
|
| + average = numpy.average(data)
|
| + median = numpy.median(data)
|
| + low = numpy.min(data)
|
| + high= numpy.max(data)
|
| + # evaluate sample variance by setting delta degrees of freedom (ddof) to
|
| + # 1. The degree used in calculations is N - ddof
|
| + stdev = numpy.std(data, ddof=1)
|
| + # Get the endpoints of the range that contains 95% of the distribution
|
| + t_bounds = scipy.stats.t.interval(0.95, N-1)
|
| + # sum mean to the confidence interval
|
| + ci = [average + critval * stdev / sqrt(N) for critval in t_bounds]
|
| + return N, average, median, stdev, low, high, ci
|
| +
|
| +import re, sys
|
| +
|
| +F = []
|
| +T = {}
|
| +base = 0
|
| +
|
| +# Match e.g. "CodeLoad-octane2.1(Score): 13761"
|
| +# Allow for an optional number in front, e.g. "1 RegExp-octane2.1(Score): 4951"
|
| +R = re.compile("\s*(\d+)?\s*([^:]*):\s*(\d+(.\d+)?)\s*")
|
| +
|
| +for arg in sys.argv[1:]:
|
| + if arg[0] == "-":
|
| + base = int(arg[1:]) - 1
|
| + continue
|
| + F.append(arg)
|
| + with open(arg, "rt") as f:
|
| + S = {}
|
| + for line in f:
|
| + # Ignore empty lines.
|
| + if not line.strip(): continue
|
| + # Ignore lines printed by the run_perf.py tool.
|
| + if line.startswith(">>> ") or line.startswith("{"): continue
|
| + # The rest should come from the run.js script.
|
| + m = R.match(line)
|
| + assert m
|
| + key = m.group(2)
|
| + value = int(m.group(3))
|
| + if key not in S: S[key] = []
|
| + S[key].append(value)
|
| + for key, values in S.items():
|
| + if key not in T: T[key] = {}
|
| + T[key][arg] = statistics(values)
|
| +
|
| +base = F[base]
|
| +
|
| +for key in T:
|
| + print key
|
| + for arg in F:
|
| + def compare(i):
|
| + if arg == base:
|
| + return T[key][arg][i], "base"
|
| + diff = T[key][arg][i] - T[key][base][i]
|
| + perc = 100.0 * diff / T[key][base][i]
|
| + return T[key][arg][i], "{:+6.2f}".format(perc)
|
| + print " {}:".format(arg)
|
| + print " samples: {:5}".format(T[key][arg][0])
|
| + print " average: {:8.2f} {}".format(*compare(1))
|
| + print " median: {:8.2f} {}".format(*compare(2))
|
| + print " stddev: {:8.2f} {}".format(*compare(3))
|
| + print " minimum: {:5} {}".format(*compare(4))
|
| + print " maximum: {:5} {}".format(*compare(5))
|
| + print " CI 95%: {:8.2f} to {:8.2f}".format(*T[key][arg][6])
|
|
|