tools/runtime-call-stats.py - Issue 1922873004: Add script for benchmarking with --runtime-call-stats

Side by Side Diff: tools/runtime-call-stats.py

Issue 1922873004: Add script for benchmarking with --runtime-call-stats (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Update websites and chrome flags Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 #!/usr/bin/env python

	2 # Copyright 2016 the V8 project authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5 '''

	6 Usage: runtime-call-stats.py [-h] <command> ...

	7

	8 Optional arguments:

	9 -h, --help show this help message and exit

	10

	11 Commands:

	12 run run chrome with --runtime-call-stats and generate logs

	13 stats process logs and print statistics

	14 json process logs from several versions and generate JSON

	15 help help information

	16

	17 For each command, you can try ./runtime-call-stats.py help command.

	18 '''

	19

	20 import argparse

	21 import json

	22 import os

	23 import re

	24 import shutil

	25 import subprocess

	26 import sys

	27 import tempfile

	28

	29 import numpy

	30 import scipy

	31 import scipy.stats

	32 from math import sqrt

	33

	34

	35 # Run benchmarks.

	36

	37 DEFAULT_SITES = [

	38 # top websites (http://alexa.com/topsites): --------------------

	39 "https://www.google.de/search?q=v8",

	40 "https://www.youtube.com",

	41 "https://www.facebook.com/shakira",

	42 "http://www.baidu.com/s?wd=v8",

	43 "http://www.yahoo.co.jp",

	44 "http://www.amazon.com/s/?field-keywords=v8",

	45 "http://hi.wikipedia.org/wiki/%E0%A4%AE%E0%A5%81%E0%A4%96%E0%A4%AA%E0%A5%83% E0%A4%B7%E0%A5%8D%E0%A4%A0",

	46 "http://www.qq.com",

	47 "http://www.twitter.com/taylorswift13",

	48 "http://www.reddit.com",

	49 "http://www.ebay.fr/sch/i.html?_nkw=v8",

	50 "http://edition.cnn.com",

	51 "http://world.taobao.com",

	52 "http://www.instagram.com/archdigest",

	53 "https://www.linkedin.com/pub/dir/?first=john&last=doe&search=search",

	54 "http://www.msn.com/ar-ae",

	55 "http://www.bing.com/search?q=v8+engine",

	56 "http://www.pinterest.com/categories/popular",

	57 "http://www.sina.com.cn",

	58 "http://weibo.com",

	59 "http://yandex.ru/search/?text=v8",

	60 # wikipedia content + angularjs

	61 "http://www.wikiwand.com/en/hill",

	62 # ember website

	63 "http://meta.discourse.org",

	64 # backbone js

	65 "http://reddit.musicplayer.io",

	66 # gwt application

	67 "http://inbox.google.com",

	68 "http://adwords.google.com",

	69 # webgl / algorithmic case

	70 "http://maps.google.co.jp/maps/search/restaurant+tokyo"

	71 ]

	72

	73 def run_site(site, domain, args):

	74 result_template = "{domain}#{count}.txt" if args.repeat else "{domain}.txt"

	75 count = 0

	76 while count == 0 or args.repeat is not None and count < args.repeat:

	77 count += 1

	78 result = result_template.format(domain=domain, count=count)

	79 retries = 0

	80 while args.retries is None or retries < args.retries:

	81 retries += 1

	82 try:

	83 temp_user_data_dir = args.user_data_dir is None

	84 if temp_user_data_dir:

	85 user_data_dir = tempfile.mkdtemp(prefix="chr_")

	86 js_flags = "--runtime-call-stats"

	87 if args.js_flags: js_flags += " " + args.js_flags

	88 chrome_flags = [

	89 "--no-default-browser-check",

	90 "--disable-translate"
	nickie 2016/04/27 15:08:15 There's a comma missing here... :-) There's a comma missing here... :-)
	91 "--single-process",

	92 "--no-sandbox",

	93 "--js-flags={}".format(js_flags),

	94 "--no-first-run",

	95 "--user-data-dir={}".format(user_data_dir)

	96 ]

	97 if args.chrome_flags:

	98 chrome_flags.extend(args.chrome_flags.split())

	99 cmd_args = [

	100 "timeout", str(args.timeout),

	101 args.with_chrome

	102 ] + chrome_flags + [ site ]

	103 def fix_for_printing(arg):

	104 m = re.match(r'^--([^=]+)=(.*)$', arg)

	105 if m and (' ' in m.group(2) or m.group(2).startswith('-')):

	106 arg = "--{}='{}'".format(m.group(1), m.group(2))

	107 elif ' ' in arg:

	108 arg = "'{}'".format(arg)

	109 return arg

	110 print " ".join(map(fix_for_printing, cmd_args))

	111 print "- " * 40

	112 with open(result, "wt") as f:

	113 status = subprocess.call(cmd_args, stdout=f)

	114 # 124 means timeout killed chrome, 0 means the user was bored first!

	115 # If none of these two happened, then chrome apparently crashed, so

	116 # it must be called again.

	117 if status != 124 and status != 0: continue

	118 # If the stats file is empty, chrome must be called again.

	119 if os.path.isfile(result) and os.path.getsize(result) > 0:

	120 if args.print_url:

	121 with open(result, "at") as f:

	122 print >> f

	123 print >> f, "URL: {}".format(site)

	124 break

	125 finally:

	126 if temp_user_data_dir:

	127 shutil.rmtree(user_data_dir)

	128

	129 def do_run(args):

	130 # Determine the websites to benchmark.

	131 if args.sites_file:

	132 sites = []

	133 try:

	134 with open(args.sites_file, "rt") as f:

	135 for line in f:

	136 line = line.strip()

	137 if not line or line.startswith('#'): continue

	138 sites.append(line)

	139 except IOError as e:

	140 args.error("Cannot read from {}. {}.".format(args.sites_file, e.strerror))

	141 sys.exit(1)

	142 elif args.sites:

	143 sites = args.sites

	144 else:

	145 sites = DEFAULT_SITES

	146 # Disambiguate domains, if needed.

	147 L = []

	148 domains = {}

	149 for site in sites:

	150 m = re.match(r'^(https?://)?([^/]+)(/.*)?$', site)

	151 if not m:

	152 args.error("Invalid URL {}.".format(site))

	153 continue

	154 domain = m.group(2)

	155 entry = [site, domain, None]

	156 if domain not in domains:

	157 domains[domain] = entry

	158 else:

	159 if not isinstance(domains[domain], int):

	160 domains[domain][2] = 1

	161 domains[domain] = 1

	162 domains[domain] += 1

	163 entry[2] = domains[domain]

	164 L.append(entry)

	165 # Run them.

	166 for site, domain, count in L:

	167 if count is not None: domain = "{}%{}".format(domain, count)

	168 print site, domain

	169 run_site(site, domain, args)

	170

	171

	172 # Calculate statistics.

	173

	174 def statistics(data):

	175 N = len(data)

	176 average = numpy.average(data)

	177 median = numpy.median(data)

	178 low = numpy.min(data)

	179 high= numpy.max(data)

	180 if N > 1:

	181 # evaluate sample variance by setting delta degrees of freedom (ddof) to

	182 # 1. The degree used in calculations is N - ddof

	183 stddev = numpy.std(data, ddof=1)

	184 # Get the endpoints of the range that contains 95% of the distribution

	185 t_bounds = scipy.stats.t.interval(0.95, N-1)

	186 #assert abs(t_bounds[0] + t_bounds[1]) < 1e-6

	187 # sum mean to the confidence interval

	188 ci = {

	189 'abs': t_bounds[1] * stddev / sqrt(N),

	190 'low': average + t_bounds[0] * stddev / sqrt(N),

	191 'high': average + t_bounds[1] * stddev / sqrt(N)

	192 }

	193 else:

	194 stddev = 0

	195 ci = { 'abs': 0, 'low': average, 'high': average }

	196 if abs(stddev) > 0.0001 and abs(average) > 0.0001:

	197 ci['perc'] = t_bounds[1] * stddev / sqrt(N) / average * 100

	198 else:

	199 ci['perc'] = 0

	200 return { 'samples': N, 'average': average, 'median': median,

	201 'stddev': stddev, 'min': low, 'max': high, 'ci': ci }

	202

	203 def read_stats(path, S):

	204 with open(path, "rt") as f:

	205 # Process the whole file and sum repeating entries.

	206 D = { 'Sum': {'time': 0, 'count': 0} }

	207 for line in f:

	208 line = line.strip()

	209 # Discard headers and footers.

	210 if not line: continue

	211 if line.startswith("Runtime Function"): continue

	212 if line.startswith("===="): continue

	213 if line.startswith("----"): continue

	214 if line.startswith("URL:"): continue

	215 # We have a regular line.

	216 fields = line.split()

	217 key = fields[0]

	218 time = float(fields[1].replace("ms", ""))

	219 count = int(fields[3])

	220 if key not in D: D[key] = { 'time': 0, 'count': 0 }

	221 D[key]['time'] += time

	222 D[key]['count'] += count

	223 # We calculate the sum, if it's not the "total" line.

	224 if key != "Total":

	225 D['Sum']['time'] += time

	226 D['Sum']['count'] += count

	227 # Append the sums as single entries to S.

	228 for key in D:

	229 if key not in S: S[key] = { 'time_list': [], 'count_list': [] }

	230 S[key]['time_list'].append(D[key]['time'])

	231 S[key]['count_list'].append(D[key]['count'])

	232

	233 def print_stats(S, args):

	234 # Sort by ascending/descending time average, then by ascending/descending

	235 # count average, then by ascending name.

	236 def sort_asc_func(item):

	237 return (item[1]['time_stat']['average'],

	238 item[1]['count_stat']['average'],

	239 item[0])

	240 def sort_desc_func(item):

	241 return (-item[1]['time_stat']['average'],

	242 -item[1]['count_stat']['average'],

	243 item[0])

	244 # Sorting order is in the commend-line arguments.

	245 sort_func = sort_asc_func if args.sort == "asc" else sort_desc_func

	246 # Possibly limit how many elements to print.

	247 L = [item for item in sorted(S.items(), key=sort_func)

	248 if item[0] not in ["Total", "Sum"]]

	249 N = len(L)

	250 if args.limit == 0:

	251 low, high = 0, N

	252 elif args.sort == "desc":

	253 low, high = 0, args.limit

	254 else:

	255 low, high = N-args.limit, N

	256 # How to print entries.

	257 def print_entry(key, value):

	258 def stats(s, units=""):

	259 conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc'])

	260 return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf)

	261 print "{:>50s} {} {}".format(

	262 key,

	263 stats(value['time_stat'], units="ms"),

	264 stats(value['count_stat'])

	265 )

	266 # Print and calculate partial sums, if necessary.

	267 for i in range(low, high):

	268 print_entry(*L[i])

	269 if args.totals and args.limit != 0:

	270 if i == low:

	271 partial = { 'time_list': [0] * len(L[i][1]['time_list']),

	272 'count_list': [0] * len(L[i][1]['count_list']) }

	273 assert len(partial['time_list']) == len(L[i][1]['time_list'])

	274 assert len(partial['count_list']) == len(L[i][1]['count_list'])

	275 for j, v in enumerate(L[i][1]['time_list']):

	276 partial['time_list'][j] += v

	277 for j, v in enumerate(L[i][1]['count_list']):

	278 partial['count_list'][j] += v

	279 # Print totals, if necessary.

	280 if args.totals:

	281 print '-' * 80

	282 if args.limit != 0:

	283 partial['time_stat'] = statistics(partial['time_list'])

	284 partial['count_stat'] = statistics(partial['count_list'])

	285 print_entry("Partial", partial)

	286 print_entry("Sum", S["Sum"])

	287 print_entry("Total", S["Total"])

	288

	289 def do_stats(args):

	290 T = {}

	291 for path in args.logfiles:

	292 filename = os.path.basename(path)

	293 m = re.match(r'^([^#]+)(#.*)?$', filename)

	294 domain = m.group(1)

	295 if domain not in T: T[domain] = {}

	296 read_stats(path, T[domain])

	297 for i, domain in enumerate(sorted(T)):

	298 if len(T) > 1:

	299 if i > 0: print

	300 print "{}:".format(domain)

	301 print '=' * 80

	302 S = T[domain]

	303 for key in S:

	304 S[key]['time_stat'] = statistics(S[key]['time_list'])

	305 S[key]['count_stat'] = statistics(S[key]['count_list'])

	306 print_stats(S, args)

	307

	308

	309 # Generate JSON file.

	310

	311 def do_json(args):

	312 J = {}

	313 for path in args.logdirs:

	314 if os.path.isdir(path):

	315 for root, dirs, files in os.walk(path):

	316 version = os.path.basename(root)

	317 if version not in J: J[version] = {}

	318 for filename in files:

	319 if filename.endswith(".txt"):

	320 m = re.match(r'^([^#]+)(#.*)?$', filename)

	321 domain = m.group(1)

	322 if domain not in J[version]: J[version][domain] = {}

	323 read_stats(os.path.join(root, filename), J[version][domain])

	324 for version, T in J.items():

	325 for domain, S in T.items():

	326 A = []

	327 for name, value in S.items():

	328 # We don't want the calculated sum in the JSON file.

	329 if name == "Sum": continue

	330 entry = [name]

	331 for x in ['time_list', 'count_list']:

	332 s = statistics(S[name][x])

	333 entry.append(round(s['average'], 1))

	334 entry.append(round(s['ci']['abs'], 1))

	335 entry.append(round(s['ci']['perc'], 2))

	336 A.append(entry)

	337 T[domain] = A

	338 print json.dumps(J, separators=(',', ':'))

	339

	340

	341 # Help.

	342

	343 def do_help(parser, subparsers, args):

	344 if args.help_cmd:

	345 if args.help_cmd in subparsers:

	346 subparsers[args.help_cmd].print_help()

	347 else:

	348 args.error("Unknown command '{}'".format(args.help_cmd))

	349 else:

	350 parser.print_help()

	351

	352

	353 # Main program, parse command line and execute.

	354

	355 def main():

	356 parser = argparse.ArgumentParser()

	357 subparser_adder = parser.add_subparsers(title="commands", dest="command",

	358 metavar="<command>")

	359 subparsers = {}

	360 # Command: run.

	361 subparsers["run"] = subparser_adder.add_parser(

	362 "run", help="run --help")

	363 subparsers["run"].set_defaults(

	364 func=do_run, error=subparsers["run"].error)

	365 subparsers["run"].add_argument(

	366 "--chrome-flags", type=str, default="",

	367 help="specify additional chrome flags")

	368 subparsers["run"].add_argument(

	369 "--js-flags", type=str, default="",

	370 help="specify additional V8 flags")

	371 subparsers["run"].add_argument(

	372 "--no-url", dest="print_url", action="store_false", default=True,

	373 help="do not include url in statistics file")

	374 subparsers["run"].add_argument(

	375 "-n", "--repeat", type=int, metavar="<num>",

	376 help="specify iterations for each website (default: once)")

	377 subparsers["run"].add_argument(

	378 "-r", "--retries", type=int, metavar="<num>",

	379 help="specify retries if website is down (default: forever)")

	380 subparsers["run"].add_argument(

	381 "-f", "--sites-file", type=str, metavar="<path>",

	382 help="specify file containing benchmark websites")

	383 subparsers["run"].add_argument(

	384 "-t", "--timeout", type=int, metavar="<seconds>", default=60,

	385 help="specify seconds before chrome is killed")

	386 subparsers["run"].add_argument(

	387 "-u", "--user-data-dir", type=str, metavar="<path>",

	388 help="specify user data dir (default is temporary)")

	389 subparsers["run"].add_argument(

	390 "-c", "--with-chrome", type=str, metavar="<path>",

	391 default="/usr/bin/google-chrome",

	392 help="specify chrome executable to use")

	393 subparsers["run"].add_argument(

	394 "sites", type=str, metavar="<URL>", nargs="*",

	395 help="specify benchmark website")

	396 # Command: stats.

	397 subparsers["stats"] = subparser_adder.add_parser(

	398 "stats", help="stats --help")

	399 subparsers["stats"].set_defaults(

	400 func=do_stats, error=subparsers["stats"].error)

	401 subparsers["stats"].add_argument(

	402 "-l", "--limit", type=int, metavar="<num>", default=0,

	403 help="limit how many items to print (default: none)")

	404 subparsers["stats"].add_argument(

	405 "-s", "--sort", choices=["asc", "desc"], default="asc",

	406 help="specify sorting order (default: ascending)")

	407 subparsers["stats"].add_argument(

	408 "-n", "--no-total", dest="totals", action="store_false", default=True,

	409 help="do not print totals")

	410 subparsers["stats"].add_argument(

	411 "logfiles", type=str, metavar="<logfile>", nargs="*",

	412 help="specify log files to parse")

	413 # Command: json.

	414 subparsers["json"] = subparser_adder.add_parser(

	415 "json", help="json --help")

	416 subparsers["json"].set_defaults(

	417 func=do_json, error=subparsers["json"].error)

	418 subparsers["json"].add_argument(

	419 "logdirs", type=str, metavar="<logdir>", nargs="*",

	420 help="specify directories with log files to parse")

	421 # Command: help.

	422 subparsers["help"] = subparser_adder.add_parser(

	423 "help", help="help information")

	424 subparsers["help"].set_defaults(

	425 func=lambda args: do_help(parser, subparsers, args),

	426 error=subparsers["help"].error)

	427 subparsers["help"].add_argument(

	428 "help_cmd", type=str, metavar="<command>", nargs="?",

	429 help="command for which to display help")

	430 # Execute the command.

	431 args = parser.parse_args()

	432 if args.command == "run" and args.sites_file and args.sites:

	433 args.error("if --sites-file is used, no site URLS must be given")

	434 sys.exit(1)

	435 else:

	436 args.func(args)

	437

	438 if __name__ == "__main__":

	439 sys.exit(main())

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »