tools/runtime-call-stats.py - Issue 1922873004: Add script for benchmarking with --runtime-call-stats

Side by Side Diff: tools/runtime-call-stats.py

Issue 1922873004: Add script for benchmarking with --runtime-call-stats (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Remove calculated sum from JSON file Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 #!/usr/bin/env python

	2 # Copyright 2016 the V8 project authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5 '''

	6 Usage: runtime-call-stats.py [-h] <command> ...

	7

	8 Optional arguments:

	9 -h, --help show this help message and exit

	10

	11 Commands:

	12 run run chrome with --runtime-call-stats and generate logs

	13 stats process logs and print statistics

	14 json process logs from several versions and generate JSON

	15 help help information

	16

	17 For each command, you can try ./runtime-call-stats.py help command.

	18 '''

	19

	20 import argparse

	21 import json

	22 import os

	23 import re

	24 import shutil

	25 import subprocess

	26 import sys

	27 import tempfile

	28

	29 import numpy

	30 import scipy

	31 import scipy.stats

	32 from math import sqrt

	33

	34

	35 # Run benchmarks.

	36

	37 DEFAULT_SITES = [

	38 # top websites (http://alexa.com/topsites): --------------------

	39 "www.google.de/search?q=v8",

	40 "www.youtube.com",

	41 "www.facebook.com/shakira",

	42 "www.baidu.com/s?wd=v8",

	43 "www.yahoo.co.jp",

	44 "www.amazon.com/s/?field-keywords=v8",

	45 "en.wikipedia.org/wiki/main_page",

	46 "www.qq.com",

	47 "www.twitter.com/taylorswift13",

	48 "www.reddit.com",

	49 "www.ebay.com/sch/i.html?_nkw=v8",

	50 "edition.cnn.com",

	51 "world.taobao.com",

	52 "www.instagram.com/archdigest",

	53 "www.linkedin.com/pub/dir/?first=john&last=doe&search=search",

	54 "www.msn.com/ar-ae",

	55 "www.bing.com/search?q=v8+engine",

	56 "www.pinterest.com/categories/popular/",

	57 "www.sina.com.cn",

	58 "weibo.com",

	59 "yandex.ru/search/?text=v8",

	60 # framework driven decisions: -----------------------------------

	61 # wikipedia content + angularjs

	62 "www.wikiwand.com/en/hill",

	63 # ember website

	64 "meta.discourse.org/",

	65 # backbone js

	66 "reddit.musicplayer.io",

	67 # gwt application

	68 "inbox.google.com",

	69 # webgl

	70 "www.google.de/maps/search/restaurant"

	71 ]

	72

	73 def run_site(site, domain, args):

	74 result_template = "{domain}#{count}.txt" if args.repeat else "{domain}.txt"

	75 count = 0

	76 while count == 0 or args.repeat is not None and count < args.repeat:

	77 count += 1

	78 result = result_template.format(domain=domain, count=count)

	79 retries = 0

	80 while args.retries is None or retries < args.retries:

	81 retries += 1

	82 try:

	83 temp_user_data_dir = args.user_data_dir is None

	84 if temp_user_data_dir:

	85 user_data_dir = tempfile.mkdtemp(prefix="chr_")

	86 js_flags = "--runtime-call-stats"

	87 if args.js_flags: js_flags += " " + args.js_flags

	88 chrome_flags = [

	89 "--disk-cache-size=1",
	nickie 2016/04/27 12:22:16 I'm removing this option, as some websites do not I'm removing this option, as some websites do not load properly with it. I'm adding --no-default-browser-check and --disable-translate. I'm also adding a generic --chrome-flags option to the run command.
	90 "--single-process",

	91 "--no-sandbox",

	92 "--js-flags={}".format(js_flags),

	93 "--no-first-run",

	94 "--user-data-dir={}".format(user_data_dir)

	95 ]

	96 cmd_args = [

	97 "timeout", str(args.timeout),

	98 args.with_chrome

	99 ] + chrome_flags + [ site ]

	100 def fix_for_printing(arg):

	101 m = re.match(r'^--([^=]+)=(.*)$', arg)

	102 if m and (' ' in m.group(2) or m.group(2).startswith('-')):

	103 arg = "--{}='{}'".format(m.group(1), m.group(2))

	104 elif ' ' in arg:

	105 arg = "'{}'".format(arg)

	106 return arg

	107 print " ".join(map(fix_for_printing, cmd_args))

	108 print "- " * 40

	109 with open(result, "wt") as f:

	110 status = subprocess.call(cmd_args, stdout=f)

	111 # 124 means timeout killed chrome, 0 means the user was bored first!

	112 # If none of these two happened, then chrome apparently crashed, so

	113 # it must be called again.

	114 if status != 124 and status != 0: continue

	115 # If the stats file is empty, chrome must be called again.

	116 if os.path.isfile(result) and os.path.getsize(result) > 0:

	117 if args.print_url:

	118 with open(result, "at") as f:

	119 print >> f

	120 print >> f, "URL: {}".format(site)

	121 break

	122 finally:

	123 if temp_user_data_dir:

	124 shutil.rmtree(user_data_dir)

	125

	126 def do_run(args):

	127 # Determine the websites to benchmark.

	128 if args.sites_file:

	129 sites = []

	130 try:

	131 with open(args.sites_file, "rt") as f:

	132 for line in f:

	133 line = line.strip()

	134 if not line or line.startswith('#'): continue

	135 sites.append(line)

	136 except IOError as e:

	137 args.error("Cannot read from {}. {}.".format(args.sites_file, e.strerror))

	138 sys.exit(1)

	139 elif args.sites:

	140 sites = args.sites

	141 else:

	142 sites = DEFAULT_SITES

	143 # Disambiguate domains, if needed.

	144 L = []

	145 domains = {}

	146 for site in sites:

	147 m = re.match(r'^(https?://)?([^/]+)(/.*)?$', site)

	148 if not m:

	149 args.error("Invalid URL {}.".format(site))

	150 continue

	151 domain = m.group(2)

	152 entry = [site, domain, None]

	153 if domain not in domains:

	154 domains[domain] = entry

	155 else:

	156 if not isinstance(domains[domain], int):

	157 domains[domain][2] = 1

	158 domains[domain] = 1

	159 domains[domain] += 1

	160 entry[2] = domains[domain]

	161 L.append(entry)

	162 # Run them.

	163 for site, domain, count in L:

	164 if count is not None: domain = "{}%{}".format(domain, count)

	165 print site, domain

	166 run_site(site, domain, args)

	167

	168

	169 # Calculate statistics.

	170

	171 def statistics(data):

	172 N = len(data)

	173 average = numpy.average(data)

	174 median = numpy.median(data)

	175 low = numpy.min(data)

	176 high= numpy.max(data)

	177 if N > 1:

	178 # evaluate sample variance by setting delta degrees of freedom (ddof) to

	179 # 1. The degree used in calculations is N - ddof

	180 stddev = numpy.std(data, ddof=1)

	181 # Get the endpoints of the range that contains 95% of the distribution

	182 t_bounds = scipy.stats.t.interval(0.95, N-1)

	183 #assert abs(t_bounds[0] + t_bounds[1]) < 1e-6

	184 # sum mean to the confidence interval

	185 ci = {

	186 'abs': t_bounds[1] * stddev / sqrt(N),

	187 'low': average + t_bounds[0] * stddev / sqrt(N),

	188 'high': average + t_bounds[1] * stddev / sqrt(N)

	189 }

	190 else:

	191 stddev = 0

	192 ci = { 'abs': 0, 'low': average, 'high': average }

	193 if abs(stddev) > 0.0001 and abs(average) > 0.0001:

	194 ci['perc'] = t_bounds[1] * stddev / sqrt(N) / average * 100

	195 else:

	196 ci['perc'] = 0

	197 return { 'samples': N, 'average': average, 'median': median,

	198 'stddev': stddev, 'min': low, 'max': high, 'ci': ci }

	199

	200 def read_stats(path, S):

	201 with open(path, "rt") as f:

	202 # Process the whole file and sum repeating entries.

	203 D = { 'Sum': {'time': 0, 'count': 0} }

	204 for line in f:

	205 line = line.strip()

	206 # Discard headers and footers.

	207 if not line: continue

	208 if line.startswith("Runtime Function"): continue

	209 if line.startswith("===="): continue

	210 if line.startswith("----"): continue

	211 if line.startswith("URL:"): continue

	212 # We have a regular line.

	213 fields = line.split()

	214 key = fields[0]

	215 time = float(fields[1].replace("ms", ""))

	216 count = int(fields[3])

	217 if key not in D: D[key] = { 'time': 0, 'count': 0 }

	218 D[key]['time'] += time

	219 D[key]['count'] += count

	220 # We calculate the sum, if it's not the "total" line.

	221 if key != "Total":

	222 D['Sum']['time'] += time

	223 D['Sum']['count'] += count

	224 # Append the sums as single entries to S.

	225 for key in D:

	226 if key not in S: S[key] = { 'time_list': [], 'count_list': [] }

	227 S[key]['time_list'].append(D[key]['time'])

	228 S[key]['count_list'].append(D[key]['count'])

	229

	230 def print_stats(S, args):

	231 # Sort by ascending/descending time average, then by ascending/descending

	232 # count average, then by ascending name.

	233 def sort_asc_func(item):

	234 return (item[1]['time_stat']['average'],

	235 item[1]['count_stat']['average'],

	236 item[0])

	237 def sort_desc_func(item):

	238 return (-item[1]['time_stat']['average'],

	239 -item[1]['count_stat']['average'],

	240 item[0])

	241 # Sorting order is in the commend-line arguments.

	242 sort_func = sort_asc_func if args.sort == "asc" else sort_desc_func

	243 # Possibly limit how many elements to print.

	244 L = [item for item in sorted(S.items(), key=sort_func)

	245 if item[0] not in ["Total", "Sum"]]

	246 N = len(L)

	247 if args.limit == 0:

	248 low, high = 0, N

	249 elif args.sort == "desc":

	250 low, high = 0, args.limit

	251 else:

	252 low, high = N-args.limit, N

	253 # How to print entries.

	254 def print_entry(key, value):

	255 def stats(s, units=""):

	256 conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc'])

	257 return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf)

	258 print "{:>50s} {} {}".format(

	259 key,

	260 stats(value['time_stat'], units="ms"),

	261 stats(value['count_stat'])

	262 )

	263 # Print and calculate partial sums, if necessary.

	264 for i in range(low, high):

	265 print_entry(*L[i])

	266 if args.totals and args.limit != 0:

	267 if i == low:

	268 partial = { 'time_list': [0] * len(L[i][1]['time_list']),

	269 'count_list': [0] * len(L[i][1]['count_list']) }

	270 assert len(partial['time_list']) == len(L[i][1]['time_list'])

	271 assert len(partial['count_list']) == len(L[i][1]['count_list'])

	272 for j, v in enumerate(L[i][1]['time_list']):

	273 partial['time_list'][j] += v

	274 for j, v in enumerate(L[i][1]['count_list']):

	275 partial['count_list'][j] += v

	276 # Print totals, if necessary.

	277 if args.totals:

	278 print '-' * 80

	279 if args.limit != 0:

	280 partial['time_stat'] = statistics(partial['time_list'])

	281 partial['count_stat'] = statistics(partial['count_list'])

	282 print_entry("Partial", partial)

	283 print_entry("Sum", S["Sum"])

	284 print_entry("Total", S["Total"])

	285

	286 def do_stats(args):

	287 T = {}

	288 for path in args.logfiles:

	289 filename = os.path.basename(path)

	290 m = re.match(r'^([^#]+)(#.*)?$', filename)

	291 domain = m.group(1)

	292 if domain not in T: T[domain] = {}

	293 read_stats(path, T[domain])

	294 for i, domain in enumerate(sorted(T)):

	295 if len(T) > 1:

	296 if i > 0: print

	297 print "{}:".format(domain)

	298 print '=' * 80

	299 S = T[domain]

	300 for key in S:

	301 S[key]['time_stat'] = statistics(S[key]['time_list'])

	302 S[key]['count_stat'] = statistics(S[key]['count_list'])

	303 print_stats(S, args)

	304

	305

	306 # Generate JSON file.

	307

	308 def do_json(args):

	309 J = {}

	310 for path in args.logdirs:

	311 if os.path.isdir(path):

	312 for root, dirs, files in os.walk(path):

	313 version = os.path.basename(root)

	314 if version not in J: J[version] = {}

	315 for filename in files:

	316 if filename.endswith(".txt"):

	317 m = re.match(r'^([^#]+)(#.*)?$', filename)

	318 domain = m.group(1)

	319 if domain not in J[version]: J[version][domain] = {}

	320 read_stats(os.path.join(root, filename), J[version][domain])

	321 for version, T in J.items():

	322 for domain, S in T.items():

	323 A = []

	324 for name, value in S.items():

	325 # We don't want the calculated sum in the JSON file.

	326 if name == "Sum": continue

	327 entry = [name]

	328 for x in ['time_list', 'count_list']:

	329 s = statistics(S[name][x])

	330 entry.append(round(s['average'], 1))

	331 entry.append(round(s['ci']['abs'], 1))

	332 entry.append(round(s['ci']['perc'], 2))

	333 A.append(entry)

	334 T[domain] = A

	335 print json.dumps(J, separators=(',', ':'))

	336

	337

	338 # Help.

	339

	340 def do_help(parser, subparsers, args):

	341 if args.help_cmd:

	342 if args.help_cmd in subparsers:

	343 subparsers[args.help_cmd].print_help()

	344 else:

	345 args.error("Unknown command '{}'".format(args.help_cmd))

	346 else:

	347 parser.print_help()

	348

	349

	350 # Main program, parse command line and execute.

	351

	352 def main():

	353 parser = argparse.ArgumentParser()

	354 subparser_adder = parser.add_subparsers(title="commands", dest="command",

	355 metavar="<command>")

	356 subparsers = {}

	357 # Command: run.

	358 subparsers["run"] = subparser_adder.add_parser(

	359 "run", help="run --help")

	360 subparsers["run"].set_defaults(

	361 func=do_run, error=subparsers["run"].error)

	362 subparsers["run"].add_argument(

	363 "--js-flags", type=str, default="",

	364 help="specify additional V8 flags")

	365 subparsers["run"].add_argument(

	366 "--no-url", dest="print_url", action="store_false", default=True,

	367 help="do not include url in statistics file")

	368 subparsers["run"].add_argument(

	369 "-n", "--repeat", type=int, metavar="<num>",

	370 help="specify iterations for each website (default: once)")

	371 subparsers["run"].add_argument(

	372 "-r", "--retries", type=int, metavar="<num>",

	373 help="specify retries if website is down (default: forever)")

	374 subparsers["run"].add_argument(

	375 "-f", "--sites-file", type=str, metavar="<path>",

	376 help="specify file containing benchmark websites")

	377 subparsers["run"].add_argument(

	378 "-t", "--timeout", type=int, metavar="<seconds>", default=60,

	379 help="specify seconds before chrome is killed")

	380 subparsers["run"].add_argument(

	381 "-u", "--user-data-dir", type=str, metavar="<path>",

	382 help="specify user data dir (default is temporary)")

	383 subparsers["run"].add_argument(

	384 "-c", "--with-chrome", type=str, metavar="<path>",

	385 default="/usr/bin/google-chrome",

	386 help="specify chrome executable to use")

	387 subparsers["run"].add_argument(

	388 "sites", type=str, metavar="<URL>", nargs="*",

	389 help="specify benchmark website")

	390 # Command: stats.

	391 subparsers["stats"] = subparser_adder.add_parser(

	392 "stats", help="stats --help")

	393 subparsers["stats"].set_defaults(

	394 func=do_stats, error=subparsers["stats"].error)

	395 subparsers["stats"].add_argument(

	396 "-l", "--limit", type=int, metavar="<num>", default=0,

	397 help="limit how many items to print (default: none)")

	398 subparsers["stats"].add_argument(

	399 "-s", "--sort", choices=["asc", "desc"], default="asc",

	400 help="specify sorting order (default: ascending)")

	401 subparsers["stats"].add_argument(

	402 "-n", "--no-total", dest="totals", action="store_false", default=True,

	403 help="do not print totals")

	404 subparsers["stats"].add_argument(

	405 "logfiles", type=str, metavar="<logfile>", nargs="*",

	406 help="specify log files to parse")

	407 # Command: json.

	408 subparsers["json"] = subparser_adder.add_parser(

	409 "json", help="json --help")

	410 subparsers["json"].set_defaults(

	411 func=do_json, error=subparsers["json"].error)

	412 subparsers["json"].add_argument(

	413 "logdirs", type=str, metavar="<logdir>", nargs="*",

	414 help="specify directories with log files to parse")

	415 # Command: help.

	416 subparsers["help"] = subparser_adder.add_parser(

	417 "help", help="help information")

	418 subparsers["help"].set_defaults(

	419 func=lambda args: do_help(parser, subparsers, args),

	420 error=subparsers["help"].error)

	421 subparsers["help"].add_argument(

	422 "help_cmd", type=str, metavar="<command>", nargs="?",

	423 help="command for which to display help")

	424 # Execute the command.

	425 args = parser.parse_args()

	426 if args.command == "run" and args.sites_file and args.sites:

	427 args.error("if --sites-file is used, no site URLS must be given")

	428 sys.exit(1)

	429 else:

	430 args.func(args)

	431

	432 if __name__ == "__main__":

	433 sys.exit(main())

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »