Chromium Code Reviews| Index: tools/runtime-call-stats.py |
| diff --git a/tools/runtime-call-stats.py b/tools/runtime-call-stats.py |
| new file mode 100755 |
| index 0000000000000000000000000000000000000000..060cb12ffdb162cbfe81a376f66fd8a2a46ed29c |
| --- /dev/null |
| +++ b/tools/runtime-call-stats.py |
| @@ -0,0 +1,433 @@ |
| +#!/usr/bin/env python |
| +# Copyright 2016 the V8 project authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| +''' |
| +Usage: runtime-call-stats.py [-h] <command> ... |
| + |
| +Optional arguments: |
| + -h, --help show this help message and exit |
| + |
| +Commands: |
| + run run chrome with --runtime-call-stats and generate logs |
| + stats process logs and print statistics |
| + json process logs from several versions and generate JSON |
| + help help information |
| + |
| +For each command, you can try ./runtime-call-stats.py help command. |
| +''' |
| + |
| +import argparse |
| +import json |
| +import os |
| +import re |
| +import shutil |
| +import subprocess |
| +import sys |
| +import tempfile |
| + |
| +import numpy |
| +import scipy |
| +import scipy.stats |
| +from math import sqrt |
| + |
| + |
| +# Run benchmarks. |
| + |
| +DEFAULT_SITES = [ |
| + # top websites (http://alexa.com/topsites): -------------------- |
| + "www.google.de/search?q=v8", |
| + "www.youtube.com", |
| + "www.facebook.com/shakira", |
| + "www.baidu.com/s?wd=v8", |
| + "www.yahoo.co.jp", |
| + "www.amazon.com/s/?field-keywords=v8", |
| + "en.wikipedia.org/wiki/main_page", |
| + "www.qq.com", |
| + "www.twitter.com/taylorswift13", |
| + "www.reddit.com", |
| + "www.ebay.com/sch/i.html?_nkw=v8", |
| + "edition.cnn.com", |
| + "world.taobao.com", |
| + "www.instagram.com/archdigest", |
| + "www.linkedin.com/pub/dir/?first=john&last=doe&search=search", |
| + "www.msn.com/ar-ae", |
| + "www.bing.com/search?q=v8+engine", |
| + "www.pinterest.com/categories/popular/", |
| + "www.sina.com.cn", |
| + "weibo.com", |
| + "yandex.ru/search/?text=v8", |
| + # framework driven decisions: ----------------------------------- |
| + # wikipedia content + angularjs |
| + "www.wikiwand.com/en/hill", |
| + # ember website |
| + "meta.discourse.org/", |
| + # backbone js |
| + "reddit.musicplayer.io", |
| + # gwt application |
| + "inbox.google.com", |
| + # webgl |
| + "www.google.de/maps/search/restaurant" |
| +] |
| + |
| +def run_site(site, domain, args): |
| + result_template = "{domain}#{count}.txt" if args.repeat else "{domain}.txt" |
| + count = 0 |
| + while count == 0 or args.repeat is not None and count < args.repeat: |
| + count += 1 |
| + result = result_template.format(domain=domain, count=count) |
| + retries = 0 |
| + while args.retries is None or retries < args.retries: |
| + retries += 1 |
| + try: |
| + temp_user_data_dir = args.user_data_dir is None |
| + if temp_user_data_dir: |
| + user_data_dir = tempfile.mkdtemp(prefix="chr_") |
| + js_flags = "--runtime-call-stats" |
| + if args.js_flags: js_flags += " " + args.js_flags |
| + chrome_flags = [ |
| + "--disk-cache-size=1", |
|
nickie
2016/04/27 12:22:16
I'm removing this option, as some websites do not
|
| + "--single-process", |
| + "--no-sandbox", |
| + "--js-flags={}".format(js_flags), |
| + "--no-first-run", |
| + "--user-data-dir={}".format(user_data_dir) |
| + ] |
| + cmd_args = [ |
| + "timeout", str(args.timeout), |
| + args.with_chrome |
| + ] + chrome_flags + [ site ] |
| + def fix_for_printing(arg): |
| + m = re.match(r'^--([^=]+)=(.*)$', arg) |
| + if m and (' ' in m.group(2) or m.group(2).startswith('-')): |
| + arg = "--{}='{}'".format(m.group(1), m.group(2)) |
| + elif ' ' in arg: |
| + arg = "'{}'".format(arg) |
| + return arg |
| + print " ".join(map(fix_for_printing, cmd_args)) |
| + print "- " * 40 |
| + with open(result, "wt") as f: |
| + status = subprocess.call(cmd_args, stdout=f) |
| + # 124 means timeout killed chrome, 0 means the user was bored first! |
| + # If none of these two happened, then chrome apparently crashed, so |
| + # it must be called again. |
| + if status != 124 and status != 0: continue |
| + # If the stats file is empty, chrome must be called again. |
| + if os.path.isfile(result) and os.path.getsize(result) > 0: |
| + if args.print_url: |
| + with open(result, "at") as f: |
| + print >> f |
| + print >> f, "URL: {}".format(site) |
| + break |
| + finally: |
| + if temp_user_data_dir: |
| + shutil.rmtree(user_data_dir) |
| + |
| +def do_run(args): |
| + # Determine the websites to benchmark. |
| + if args.sites_file: |
| + sites = [] |
| + try: |
| + with open(args.sites_file, "rt") as f: |
| + for line in f: |
| + line = line.strip() |
| + if not line or line.startswith('#'): continue |
| + sites.append(line) |
| + except IOError as e: |
| + args.error("Cannot read from {}. {}.".format(args.sites_file, e.strerror)) |
| + sys.exit(1) |
| + elif args.sites: |
| + sites = args.sites |
| + else: |
| + sites = DEFAULT_SITES |
| + # Disambiguate domains, if needed. |
| + L = [] |
| + domains = {} |
| + for site in sites: |
| + m = re.match(r'^(https?://)?([^/]+)(/.*)?$', site) |
| + if not m: |
| + args.error("Invalid URL {}.".format(site)) |
| + continue |
| + domain = m.group(2) |
| + entry = [site, domain, None] |
| + if domain not in domains: |
| + domains[domain] = entry |
| + else: |
| + if not isinstance(domains[domain], int): |
| + domains[domain][2] = 1 |
| + domains[domain] = 1 |
| + domains[domain] += 1 |
| + entry[2] = domains[domain] |
| + L.append(entry) |
| + # Run them. |
| + for site, domain, count in L: |
| + if count is not None: domain = "{}%{}".format(domain, count) |
| + print site, domain |
| + run_site(site, domain, args) |
| + |
| + |
| +# Calculate statistics. |
| + |
| +def statistics(data): |
| + N = len(data) |
| + average = numpy.average(data) |
| + median = numpy.median(data) |
| + low = numpy.min(data) |
| + high= numpy.max(data) |
| + if N > 1: |
| + # evaluate sample variance by setting delta degrees of freedom (ddof) to |
| + # 1. The degree used in calculations is N - ddof |
| + stddev = numpy.std(data, ddof=1) |
| + # Get the endpoints of the range that contains 95% of the distribution |
| + t_bounds = scipy.stats.t.interval(0.95, N-1) |
| + #assert abs(t_bounds[0] + t_bounds[1]) < 1e-6 |
| + # sum mean to the confidence interval |
| + ci = { |
| + 'abs': t_bounds[1] * stddev / sqrt(N), |
| + 'low': average + t_bounds[0] * stddev / sqrt(N), |
| + 'high': average + t_bounds[1] * stddev / sqrt(N) |
| + } |
| + else: |
| + stddev = 0 |
| + ci = { 'abs': 0, 'low': average, 'high': average } |
| + if abs(stddev) > 0.0001 and abs(average) > 0.0001: |
| + ci['perc'] = t_bounds[1] * stddev / sqrt(N) / average * 100 |
| + else: |
| + ci['perc'] = 0 |
| + return { 'samples': N, 'average': average, 'median': median, |
| + 'stddev': stddev, 'min': low, 'max': high, 'ci': ci } |
| + |
| +def read_stats(path, S): |
| + with open(path, "rt") as f: |
| + # Process the whole file and sum repeating entries. |
| + D = { 'Sum': {'time': 0, 'count': 0} } |
| + for line in f: |
| + line = line.strip() |
| + # Discard headers and footers. |
| + if not line: continue |
| + if line.startswith("Runtime Function"): continue |
| + if line.startswith("===="): continue |
| + if line.startswith("----"): continue |
| + if line.startswith("URL:"): continue |
| + # We have a regular line. |
| + fields = line.split() |
| + key = fields[0] |
| + time = float(fields[1].replace("ms", "")) |
| + count = int(fields[3]) |
| + if key not in D: D[key] = { 'time': 0, 'count': 0 } |
| + D[key]['time'] += time |
| + D[key]['count'] += count |
| + # We calculate the sum, if it's not the "total" line. |
| + if key != "Total": |
| + D['Sum']['time'] += time |
| + D['Sum']['count'] += count |
| + # Append the sums as single entries to S. |
| + for key in D: |
| + if key not in S: S[key] = { 'time_list': [], 'count_list': [] } |
| + S[key]['time_list'].append(D[key]['time']) |
| + S[key]['count_list'].append(D[key]['count']) |
| + |
| +def print_stats(S, args): |
| + # Sort by ascending/descending time average, then by ascending/descending |
| + # count average, then by ascending name. |
| + def sort_asc_func(item): |
| + return (item[1]['time_stat']['average'], |
| + item[1]['count_stat']['average'], |
| + item[0]) |
| + def sort_desc_func(item): |
| + return (-item[1]['time_stat']['average'], |
| + -item[1]['count_stat']['average'], |
| + item[0]) |
| + # Sorting order is in the commend-line arguments. |
| + sort_func = sort_asc_func if args.sort == "asc" else sort_desc_func |
| + # Possibly limit how many elements to print. |
| + L = [item for item in sorted(S.items(), key=sort_func) |
| + if item[0] not in ["Total", "Sum"]] |
| + N = len(L) |
| + if args.limit == 0: |
| + low, high = 0, N |
| + elif args.sort == "desc": |
| + low, high = 0, args.limit |
| + else: |
| + low, high = N-args.limit, N |
| + # How to print entries. |
| + def print_entry(key, value): |
| + def stats(s, units=""): |
| + conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc']) |
| + return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf) |
| + print "{:>50s} {} {}".format( |
| + key, |
| + stats(value['time_stat'], units="ms"), |
| + stats(value['count_stat']) |
| + ) |
| + # Print and calculate partial sums, if necessary. |
| + for i in range(low, high): |
| + print_entry(*L[i]) |
| + if args.totals and args.limit != 0: |
| + if i == low: |
| + partial = { 'time_list': [0] * len(L[i][1]['time_list']), |
| + 'count_list': [0] * len(L[i][1]['count_list']) } |
| + assert len(partial['time_list']) == len(L[i][1]['time_list']) |
| + assert len(partial['count_list']) == len(L[i][1]['count_list']) |
| + for j, v in enumerate(L[i][1]['time_list']): |
| + partial['time_list'][j] += v |
| + for j, v in enumerate(L[i][1]['count_list']): |
| + partial['count_list'][j] += v |
| + # Print totals, if necessary. |
| + if args.totals: |
| + print '-' * 80 |
| + if args.limit != 0: |
| + partial['time_stat'] = statistics(partial['time_list']) |
| + partial['count_stat'] = statistics(partial['count_list']) |
| + print_entry("Partial", partial) |
| + print_entry("Sum", S["Sum"]) |
| + print_entry("Total", S["Total"]) |
| + |
| +def do_stats(args): |
| + T = {} |
| + for path in args.logfiles: |
| + filename = os.path.basename(path) |
| + m = re.match(r'^([^#]+)(#.*)?$', filename) |
| + domain = m.group(1) |
| + if domain not in T: T[domain] = {} |
| + read_stats(path, T[domain]) |
| + for i, domain in enumerate(sorted(T)): |
| + if len(T) > 1: |
| + if i > 0: print |
| + print "{}:".format(domain) |
| + print '=' * 80 |
| + S = T[domain] |
| + for key in S: |
| + S[key]['time_stat'] = statistics(S[key]['time_list']) |
| + S[key]['count_stat'] = statistics(S[key]['count_list']) |
| + print_stats(S, args) |
| + |
| + |
| +# Generate JSON file. |
| + |
| +def do_json(args): |
| + J = {} |
| + for path in args.logdirs: |
| + if os.path.isdir(path): |
| + for root, dirs, files in os.walk(path): |
| + version = os.path.basename(root) |
| + if version not in J: J[version] = {} |
| + for filename in files: |
| + if filename.endswith(".txt"): |
| + m = re.match(r'^([^#]+)(#.*)?$', filename) |
| + domain = m.group(1) |
| + if domain not in J[version]: J[version][domain] = {} |
| + read_stats(os.path.join(root, filename), J[version][domain]) |
| + for version, T in J.items(): |
| + for domain, S in T.items(): |
| + A = [] |
| + for name, value in S.items(): |
| + # We don't want the calculated sum in the JSON file. |
| + if name == "Sum": continue |
| + entry = [name] |
| + for x in ['time_list', 'count_list']: |
| + s = statistics(S[name][x]) |
| + entry.append(round(s['average'], 1)) |
| + entry.append(round(s['ci']['abs'], 1)) |
| + entry.append(round(s['ci']['perc'], 2)) |
| + A.append(entry) |
| + T[domain] = A |
| + print json.dumps(J, separators=(',', ':')) |
| + |
| + |
| +# Help. |
| + |
| +def do_help(parser, subparsers, args): |
| + if args.help_cmd: |
| + if args.help_cmd in subparsers: |
| + subparsers[args.help_cmd].print_help() |
| + else: |
| + args.error("Unknown command '{}'".format(args.help_cmd)) |
| + else: |
| + parser.print_help() |
| + |
| + |
| +# Main program, parse command line and execute. |
| + |
| +def main(): |
| + parser = argparse.ArgumentParser() |
| + subparser_adder = parser.add_subparsers(title="commands", dest="command", |
| + metavar="<command>") |
| + subparsers = {} |
| + # Command: run. |
| + subparsers["run"] = subparser_adder.add_parser( |
| + "run", help="run --help") |
| + subparsers["run"].set_defaults( |
| + func=do_run, error=subparsers["run"].error) |
| + subparsers["run"].add_argument( |
| + "--js-flags", type=str, default="", |
| + help="specify additional V8 flags") |
| + subparsers["run"].add_argument( |
| + "--no-url", dest="print_url", action="store_false", default=True, |
| + help="do not include url in statistics file") |
| + subparsers["run"].add_argument( |
| + "-n", "--repeat", type=int, metavar="<num>", |
| + help="specify iterations for each website (default: once)") |
| + subparsers["run"].add_argument( |
| + "-r", "--retries", type=int, metavar="<num>", |
| + help="specify retries if website is down (default: forever)") |
| + subparsers["run"].add_argument( |
| + "-f", "--sites-file", type=str, metavar="<path>", |
| + help="specify file containing benchmark websites") |
| + subparsers["run"].add_argument( |
| + "-t", "--timeout", type=int, metavar="<seconds>", default=60, |
| + help="specify seconds before chrome is killed") |
| + subparsers["run"].add_argument( |
| + "-u", "--user-data-dir", type=str, metavar="<path>", |
| + help="specify user data dir (default is temporary)") |
| + subparsers["run"].add_argument( |
| + "-c", "--with-chrome", type=str, metavar="<path>", |
| + default="/usr/bin/google-chrome", |
| + help="specify chrome executable to use") |
| + subparsers["run"].add_argument( |
| + "sites", type=str, metavar="<URL>", nargs="*", |
| + help="specify benchmark website") |
| + # Command: stats. |
| + subparsers["stats"] = subparser_adder.add_parser( |
| + "stats", help="stats --help") |
| + subparsers["stats"].set_defaults( |
| + func=do_stats, error=subparsers["stats"].error) |
| + subparsers["stats"].add_argument( |
| + "-l", "--limit", type=int, metavar="<num>", default=0, |
| + help="limit how many items to print (default: none)") |
| + subparsers["stats"].add_argument( |
| + "-s", "--sort", choices=["asc", "desc"], default="asc", |
| + help="specify sorting order (default: ascending)") |
| + subparsers["stats"].add_argument( |
| + "-n", "--no-total", dest="totals", action="store_false", default=True, |
| + help="do not print totals") |
| + subparsers["stats"].add_argument( |
| + "logfiles", type=str, metavar="<logfile>", nargs="*", |
| + help="specify log files to parse") |
| + # Command: json. |
| + subparsers["json"] = subparser_adder.add_parser( |
| + "json", help="json --help") |
| + subparsers["json"].set_defaults( |
| + func=do_json, error=subparsers["json"].error) |
| + subparsers["json"].add_argument( |
| + "logdirs", type=str, metavar="<logdir>", nargs="*", |
| + help="specify directories with log files to parse") |
| + # Command: help. |
| + subparsers["help"] = subparser_adder.add_parser( |
| + "help", help="help information") |
| + subparsers["help"].set_defaults( |
| + func=lambda args: do_help(parser, subparsers, args), |
| + error=subparsers["help"].error) |
| + subparsers["help"].add_argument( |
| + "help_cmd", type=str, metavar="<command>", nargs="?", |
| + help="command for which to display help") |
| + # Execute the command. |
| + args = parser.parse_args() |
| + if args.command == "run" and args.sites_file and args.sites: |
| + args.error("if --sites-file is used, no site URLS must be given") |
| + sys.exit(1) |
| + else: |
| + args.func(args) |
| + |
| +if __name__ == "__main__": |
| + sys.exit(main()) |