tools/callstats.py - Issue 1922873004: Add script for benchmarking with --runtime-call-stats

Side by Side Diff: tools/callstats.py

Issue 1922873004: Add script for benchmarking with --runtime-call-stats (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Bug fix with timeouts Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 #!/usr/bin/env python

	2 # Copyright 2016 the V8 project authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5 '''

	6 Usage: runtime-call-stats.py [-h] <command> ...

	7

	8 Optional arguments:

	9 -h, --help show this help message and exit

	10

	11 Commands:

	12 run run chrome with --runtime-call-stats and generate logs

	13 stats process logs and print statistics

	14 json process logs from several versions and generate JSON

	15 help help information

	16

	17 For each command, you can try ./runtime-call-stats.py help command.

	18 '''

	19

	20 import argparse

	21 import json

	22 import os

	23 import re

	24 import shutil

	25 import subprocess

	26 import sys

	27 import tempfile

	28

	29 import numpy

	30 import scipy

	31 import scipy.stats

	32 from math import sqrt

	33

	34

	35 # Run benchmarks.

	36

	37 DEFAULT_SITES = [

	38 # top websites (http://alexa.com/topsites): --------------------

	39 "https://www.google.de/search?q=v8",

	40 "https://www.youtube.com",

	41 "https://www.facebook.com/shakira",

	42 "http://www.baidu.com/s?wd=v8",

	43 "http://www.yahoo.co.jp",

	44 "http://www.amazon.com/s/?field-keywords=v8",

	45 "http://hi.wikipedia.org/wiki/" \

	46 "%E0%A4%AE%E0%A5%81%E0%A4%96%E0%A4%AA%E0%A5%83%E0%A4%B7%E0%A5%8D%E0%A4%A0",

	47 "http://www.qq.com",

	48 "http://www.twitter.com/taylorswift13",

	49 "http://www.reddit.com",

	50 "http://www.ebay.fr/sch/i.html?_nkw=v8",

	51 "http://edition.cnn.com",

	52 "http://world.taobao.com",

	53 "http://www.instagram.com/archdigest",

	54 "https://www.linkedin.com/pub/dir/?first=john&last=doe&search=search",

	55 "http://www.msn.com/ar-ae",

	56 "http://www.bing.com/search?q=v8+engine",

	57 "http://www.pinterest.com/categories/popular",

	58 "http://www.sina.com.cn",

	59 "http://weibo.com",

	60 "http://yandex.ru/search/?text=v8",

	61 # framework driven decisions: -----------------------------------

	62 # wikipedia content + angularjs

	63 "http://www.wikiwand.com/en/hill",

	64 # ember website

	65 "http://meta.discourse.org",

	66 # backbone js

	67 "http://reddit.musicplayer.io",

	68 # gwt application

	69 "http://inbox.google.com",

	70 # webgl / algorithmic case

	71 "http://maps.google.co.jp/maps/search/restaurant+tokyo",

	72 # whatever framework adwords uses

	73 "https://adwords.google.com",

	74 ]

	75

	76

	77 def print_command(cmd_args):

	78 def fix_for_printing(arg):

	79 m = re.match(r'^--([^=]+)=(.*)$', arg)

	80 if m and (' ' in m.group(2) or m.group(2).startswith('-')):

	81 arg = "--{}='{}'".format(m.group(1), m.group(2))

	82 elif ' ' in arg:

	83 arg = "'{}'".format(arg)

	84 return arg

	85 print " ".join(map(fix_for_printing, cmd_args))

	86

	87

	88 def start_replay_server(args):

	89 cmd_args = [

	90 args.replay_bin,

	91 "--port=4080",

	92 "--ssl_port=4443",

	93 "--no-dns_forwarding",

	94 "--use_closest_match",

	95 "--no-diff_unknown_requests",

	96 args.replay_wpr,

	97 ]

	98 print "=" * 80

	99 print_command(cmd_args)

	100 with open(os.devnull, 'w') as null:

	101 server = subprocess.Popen(cmd_args, stdout=null, stderr=null)

	102 print "RUNNING REPLAY SERVER: %s with PID=%s" % (args.replay_bin, server.pid)

	103 print "=" * 80

	104 return server

	105

	106

	107 def stop_replay_server(server):

	108 print("SHUTTING DOWN REPLAY SERVER %s" % server.pid)

	109 server.terminate()

	110

	111

	112 def run_site(site, domain, args, timeout=None):

	113 print "="*80

	114 print "RUNNING DOMAIN %s" % domain

	115 print "="*80

	116 result_template = "{domain}#{count}.txt" if args.repeat else "{domain}.txt"

	117 count = 0

	118 while count == 0 or args.repeat is not None and count < args.repeat:

	119 count += 1

	120 result = result_template.format(domain=domain, count=count)

	121 retries = 0

	122 while args.retries is None or retries < args.retries:

	123 retries += 1

	124 try:

	125 temp_user_data_dir = args.user_data_dir is None

	126 if temp_user_data_dir:

	127 user_data_dir = tempfile.mkdtemp(prefix="chr_")

	128 js_flags = "--runtime-call-stats"

	129 if args.js_flags: js_flags += " " + args.js_flags

	130 chrome_flags = [

	131 "--no-default-browser-check",

	132 "--disable-translate",

	133 "--single-process",

	134 "--no-sandbox",

	135 "--js-flags={}".format(js_flags),

	136 "--no-first-run",

	137 "--user-data-dir={}".format(user_data_dir),

	138 ]

	139 if args.replay_wpr:

	140 chrome_flags += [

	141 "--host-resolver-rules=MAP *:80 localhost:4080, " \

	142 "MAP *:443 localhost:4443, " \

	143 "EXCLUDE localhost",

	144 "--ignore-certificate-errors",

	145 "--disable-web-security",

	146 "--reduce-security-for-testing",

	147 "--allow-insecure-localhost",

	148 ]

	149 if args.chrome_flags:

	150 chrome_flags += args.chrome_flags.split()

	151 if timeout is None: timeout = args.timeout

	152 cmd_args = [

	153 "timeout", str(timeout),

	154 args.with_chrome

	155 ] + chrome_flags + [ site ]

	156 print "- " * 40

	157 print_command(cmd_args)

	158 print "- " * 40

	159 with open(result, "wt") as f:

	160 status = subprocess.call(cmd_args, stdout=f)

	161 # 124 means timeout killed chrome, 0 means the user was bored first!

	162 # If none of these two happened, then chrome apparently crashed, so

	163 # it must be called again.

	164 if status != 124 and status != 0:

	165 print("CHROME CRASHED, REPEATING RUN");

	166 continue

	167 # If the stats file is empty, chrome must be called again.

	168 if os.path.isfile(result) and os.path.getsize(result) > 0:

	169 if args.print_url:

	170 with open(result, "at") as f:

	171 print >> f

	172 print >> f, "URL: {}".format(site)

	173 break

	174 print("EMPTY RESULT, REPEATING RUN");

	175 finally:

	176 if temp_user_data_dir:

	177 shutil.rmtree(user_data_dir)

	178

	179

	180 def read_sites_file(args):

	181 try:

	182 sites = []

	183 try:

	184 with open(args.sites_file, "rt") as f:

	185 for item in json.load(f):

	186 if 'timeout' not in item:

	187 # This is more-or-less arbitrary.

	188 item['timeout'] = int(2.5 * item['timeline'] + 3)

	189 if item['timeout'] > args.timeout: item['timeout'] = args.timeout

	190 sites.append(item)

	191 except ValueError:

	192 with open(args.sites_file, "rt") as f:

	193 for line in f:

	194 line = line.strip()

	195 if not line or line.startswith('#'): continue

	196 sites.append({'url': line, 'timeout': args.timeout})

	197 return sites

	198 except IOError as e:

	199 args.error("Cannot read from {}. {}.".format(args.sites_file, e.strerror))

	200 sys.exit(1)

	201

	202

	203 def do_run(args):

	204 # Determine the websites to benchmark.

	205 if args.sites_file:

	206 sites = read_sites_file(args)

	207 elif args.sites:

	208 sites = [{'url': site, 'timeout': args.timeout} for site in args.sites]

	209 else:

	210 sites = [{'url': site, 'timeout': args.timeout} for site in DEFAULT_SITES]

	211 # Disambiguate domains, if needed.

	212 L = []

	213 domains = {}

	214 for item in sites:

	215 site = item['url']

	216 m = re.match(r'^(https?://)?([^/]+)(/.*)?$', site)

	217 if not m:

	218 args.error("Invalid URL {}.".format(site))

	219 continue

	220 domain = m.group(2)

	221 entry = [site, domain, None, item['timeout']]

	222 if domain not in domains:

	223 domains[domain] = entry

	224 else:

	225 if not isinstance(domains[domain], int):

	226 domains[domain][2] = 1

	227 domains[domain] = 1

	228 domains[domain] += 1

	229 entry[2] = domains[domain]

	230 L.append(entry)

	231 if args.replay_wpr:

	232 replay_server = start_replay_server(args);

	233 try:

	234 # Run them.

	235 for site, domain, count, timeout in L:

	236 if count is not None: domain = "{}%{}".format(domain, count)

	237 print site, domain, timeout

	238 run_site(site, domain, args, timeout)

	239 finally:

	240 if replay_server:

	241 stop_replay_server(replay_server)

	242

	243

	244 # Calculate statistics.

	245

	246 def statistics(data):

	247 N = len(data)

	248 average = numpy.average(data)

	249 median = numpy.median(data)

	250 low = numpy.min(data)

	251 high= numpy.max(data)

	252 if N > 1:

	253 # evaluate sample variance by setting delta degrees of freedom (ddof) to

	254 # 1. The degree used in calculations is N - ddof

	255 stddev = numpy.std(data, ddof=1)

	256 # Get the endpoints of the range that contains 95% of the distribution

	257 t_bounds = scipy.stats.t.interval(0.95, N-1)

	258 #assert abs(t_bounds[0] + t_bounds[1]) < 1e-6

	259 # sum mean to the confidence interval

	260 ci = {

	261 'abs': t_bounds[1] * stddev / sqrt(N),

	262 'low': average + t_bounds[0] * stddev / sqrt(N),

	263 'high': average + t_bounds[1] * stddev / sqrt(N)

	264 }

	265 else:

	266 stddev = 0

	267 ci = { 'abs': 0, 'low': average, 'high': average }

	268 if abs(stddev) > 0.0001 and abs(average) > 0.0001:

	269 ci['perc'] = t_bounds[1] * stddev / sqrt(N) / average * 100

	270 else:

	271 ci['perc'] = 0

	272 return { 'samples': N, 'average': average, 'median': median,

	273 'stddev': stddev, 'min': low, 'max': high, 'ci': ci }

	274

	275

	276 def read_stats(path, S):

	277 with open(path, "rt") as f:

	278 # Process the whole file and sum repeating entries.

	279 D = { 'Sum': {'time': 0, 'count': 0} }

	280 for line in f:

	281 line = line.strip()

	282 # Discard headers and footers.

	283 if not line: continue

	284 if line.startswith("Runtime Function"): continue

	285 if line.startswith("===="): continue

	286 if line.startswith("----"): continue

	287 if line.startswith("URL:"): continue

	288 # We have a regular line.

	289 fields = line.split()

	290 key = fields[0]

	291 time = float(fields[1].replace("ms", ""))

	292 count = int(fields[3])

	293 if key not in D: D[key] = { 'time': 0, 'count': 0 }

	294 D[key]['time'] += time

	295 D[key]['count'] += count

	296 # We calculate the sum, if it's not the "total" line.

	297 if key != "Total":

	298 D['Sum']['time'] += time

	299 D['Sum']['count'] += count

	300 # Append the sums as single entries to S.

	301 for key in D:

	302 if key not in S: S[key] = { 'time_list': [], 'count_list': [] }

	303 S[key]['time_list'].append(D[key]['time'])

	304 S[key]['count_list'].append(D[key]['count'])

	305

	306

	307 def print_stats(S, args):

	308 # Sort by ascending/descending time average, then by ascending/descending

	309 # count average, then by ascending name.

	310 def sort_asc_func(item):

	311 return (item[1]['time_stat']['average'],

	312 item[1]['count_stat']['average'],

	313 item[0])

	314 def sort_desc_func(item):

	315 return (-item[1]['time_stat']['average'],

	316 -item[1]['count_stat']['average'],

	317 item[0])

	318 # Sorting order is in the commend-line arguments.

	319 sort_func = sort_asc_func if args.sort == "asc" else sort_desc_func

	320 # Possibly limit how many elements to print.

	321 L = [item for item in sorted(S.items(), key=sort_func)

	322 if item[0] not in ["Total", "Sum"]]

	323 N = len(L)

	324 if args.limit == 0:

	325 low, high = 0, N

	326 elif args.sort == "desc":

	327 low, high = 0, args.limit

	328 else:

	329 low, high = N-args.limit, N

	330 # How to print entries.

	331 def print_entry(key, value):

	332 def stats(s, units=""):

	333 conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc'])

	334 return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf)

	335 print "{:>50s} {} {}".format(

	336 key,

	337 stats(value['time_stat'], units="ms"),

	338 stats(value['count_stat'])

	339 )

	340 # Print and calculate partial sums, if necessary.

	341 for i in range(low, high):

	342 print_entry(*L[i])

	343 if args.totals and args.limit != 0:

	344 if i == low:

	345 partial = { 'time_list': [0] * len(L[i][1]['time_list']),

	346 'count_list': [0] * len(L[i][1]['count_list']) }

	347 assert len(partial['time_list']) == len(L[i][1]['time_list'])

	348 assert len(partial['count_list']) == len(L[i][1]['count_list'])

	349 for j, v in enumerate(L[i][1]['time_list']):

	350 partial['time_list'][j] += v

	351 for j, v in enumerate(L[i][1]['count_list']):

	352 partial['count_list'][j] += v

	353 # Print totals, if necessary.

	354 if args.totals:

	355 print '-' * 80

	356 if args.limit != 0:

	357 partial['time_stat'] = statistics(partial['time_list'])

	358 partial['count_stat'] = statistics(partial['count_list'])

	359 print_entry("Partial", partial)

	360 print_entry("Sum", S["Sum"])

	361 print_entry("Total", S["Total"])

	362

	363

	364 def do_stats(args):

	365 T = {}

	366 for path in args.logfiles:

	367 filename = os.path.basename(path)

	368 m = re.match(r'^([^#]+)(#.*)?$', filename)

	369 domain = m.group(1)

	370 if domain not in T: T[domain] = {}

	371 read_stats(path, T[domain])

	372 for i, domain in enumerate(sorted(T)):

	373 if len(T) > 1:

	374 if i > 0: print

	375 print "{}:".format(domain)

	376 print '=' * 80

	377 S = T[domain]

	378 for key in S:

	379 S[key]['time_stat'] = statistics(S[key]['time_list'])

	380 S[key]['count_stat'] = statistics(S[key]['count_list'])

	381 print_stats(S, args)

	382

	383

	384 # Generate JSON file.

	385

	386 def do_json(args):

	387 J = {}

	388 for path in args.logdirs:

	389 if os.path.isdir(path):

	390 for root, dirs, files in os.walk(path):

	391 version = os.path.basename(root)

	392 if version not in J: J[version] = {}

	393 for filename in files:

	394 if filename.endswith(".txt"):

	395 m = re.match(r'^([^#]+)(#.*)?$', filename)

	396 domain = m.group(1)

	397 if domain not in J[version]: J[version][domain] = {}

	398 read_stats(os.path.join(root, filename), J[version][domain])

	399 for version, T in J.items():

	400 for domain, S in T.items():

	401 A = []

	402 for name, value in S.items():

	403 # We don't want the calculated sum in the JSON file.

	404 if name == "Sum": continue

	405 entry = [name]

	406 for x in ['time_list', 'count_list']:

	407 s = statistics(S[name][x])

	408 entry.append(round(s['average'], 1))

	409 entry.append(round(s['ci']['abs'], 1))

	410 entry.append(round(s['ci']['perc'], 2))

	411 A.append(entry)

	412 T[domain] = A

	413 print json.dumps(J, separators=(',', ':'))

	414

	415

	416 # Help.

	417

	418 def do_help(parser, subparsers, args):

	419 if args.help_cmd:

	420 if args.help_cmd in subparsers:

	421 subparsers[args.help_cmd].print_help()

	422 else:

	423 args.error("Unknown command '{}'".format(args.help_cmd))

	424 else:

	425 parser.print_help()

	426

	427

	428 # Main program, parse command line and execute.

	429

	430 def main():

	431 parser = argparse.ArgumentParser()

	432 subparser_adder = parser.add_subparsers(title="commands", dest="command",

	433 metavar="<command>")

	434 subparsers = {}

	435 # Command: run.

	436 subparsers["run"] = subparser_adder.add_parser(

	437 "run", help="run --help")

	438 subparsers["run"].set_defaults(

	439 func=do_run, error=subparsers["run"].error)

	440 subparsers["run"].add_argument(

	441 "--chrome-flags", type=str, default="",

	442 help="specify additional chrome flags")

	443 subparsers["run"].add_argument(

	444 "--js-flags", type=str, default="",

	445 help="specify additional V8 flags")

	446 subparsers["run"].add_argument(

	447 "--no-url", dest="print_url", action="store_false", default=True,

	448 help="do not include url in statistics file")

	449 subparsers["run"].add_argument(

	450 "-n", "--repeat", type=int, metavar="<num>",

	451 help="specify iterations for each website (default: once)")

	452 subparsers["run"].add_argument(

	453 "--replay-wpr", type=str, metavar="<path>",

	454 help="use the specified web page replay (.wpr) archive")

	455 subparsers["run"].add_argument(

	456 "--replay-bin", type=str, metavar="<path>",

	457 help="specify the replay.py script typically located in " \

	458 "$CHROMIUM/src/third_party/webpagereplay/replay.py")

	459 subparsers["run"].add_argument(

	460 "-r", "--retries", type=int, metavar="<num>",

	461 help="specify retries if website is down (default: forever)")

	462 subparsers["run"].add_argument(

	463 "-f", "--sites-file", type=str, metavar="<path>",

	464 help="specify file containing benchmark websites")

	465 subparsers["run"].add_argument(

	466 "-t", "--timeout", type=int, metavar="<seconds>", default=60,

	467 help="specify seconds before chrome is killed")

	468 subparsers["run"].add_argument(

	469 "-u", "--user-data-dir", type=str, metavar="<path>",

	470 help="specify user data dir (default is temporary)")

	471 subparsers["run"].add_argument(

	472 "-c", "--with-chrome", type=str, metavar="<path>",

	473 default="/usr/bin/google-chrome",

	474 help="specify chrome executable to use")

	475 subparsers["run"].add_argument(

	476 "sites", type=str, metavar="<URL>", nargs="*",

	477 help="specify benchmark website")

	478 # Command: stats.

	479 subparsers["stats"] = subparser_adder.add_parser(

	480 "stats", help="stats --help")

	481 subparsers["stats"].set_defaults(

	482 func=do_stats, error=subparsers["stats"].error)

	483 subparsers["stats"].add_argument(

	484 "-l", "--limit", type=int, metavar="<num>", default=0,

	485 help="limit how many items to print (default: none)")

	486 subparsers["stats"].add_argument(

	487 "-s", "--sort", choices=["asc", "desc"], default="asc",

	488 help="specify sorting order (default: ascending)")

	489 subparsers["stats"].add_argument(

	490 "-n", "--no-total", dest="totals", action="store_false", default=True,

	491 help="do not print totals")

	492 subparsers["stats"].add_argument(

	493 "logfiles", type=str, metavar="<logfile>", nargs="*",

	494 help="specify log files to parse")

	495 # Command: json.

	496 subparsers["json"] = subparser_adder.add_parser(

	497 "json", help="json --help")

	498 subparsers["json"].set_defaults(

	499 func=do_json, error=subparsers["json"].error)

	500 subparsers["json"].add_argument(

	501 "logdirs", type=str, metavar="<logdir>", nargs="*",

	502 help="specify directories with log files to parse")

	503 # Command: help.

	504 subparsers["help"] = subparser_adder.add_parser(

	505 "help", help="help information")

	506 subparsers["help"].set_defaults(

	507 func=lambda args: do_help(parser, subparsers, args),

	508 error=subparsers["help"].error)

	509 subparsers["help"].add_argument(

	510 "help_cmd", type=str, metavar="<command>", nargs="?",

	511 help="command for which to display help")

	512 # Execute the command.

	513 args = parser.parse_args()

	514 if args.command == "run" and args.sites_file and args.sites:

	515 args.error("if --sites-file is used, no site URLS must be given")

	516 sys.exit(1)

	517 elif args.command == "run" and args.replay_wpr and not args.replay_bin:

	518 args.error("if --replay-wpr is used, --replay-bin must be given")

	519 sys.exit(1)

	520 else:

	521 args.func(args)

	522

	523 if __name__ == "__main__":

	524 sys.exit(main())

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »