Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(914)

Side by Side Diff: tools/callstats.py

Issue 1922873004: Add script for benchmarking with --runtime-call-stats (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Bug fix with timeouts Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright 2016 the V8 project authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5 '''
6 Usage: runtime-call-stats.py [-h] <command> ...
7
8 Optional arguments:
9 -h, --help show this help message and exit
10
11 Commands:
12 run run chrome with --runtime-call-stats and generate logs
13 stats process logs and print statistics
14 json process logs from several versions and generate JSON
15 help help information
16
17 For each command, you can try ./runtime-call-stats.py help command.
18 '''
19
20 import argparse
21 import json
22 import os
23 import re
24 import shutil
25 import subprocess
26 import sys
27 import tempfile
28
29 import numpy
30 import scipy
31 import scipy.stats
32 from math import sqrt
33
34
35 # Run benchmarks.
36
37 DEFAULT_SITES = [
38 # top websites (http://alexa.com/topsites): --------------------
39 "https://www.google.de/search?q=v8",
40 "https://www.youtube.com",
41 "https://www.facebook.com/shakira",
42 "http://www.baidu.com/s?wd=v8",
43 "http://www.yahoo.co.jp",
44 "http://www.amazon.com/s/?field-keywords=v8",
45 "http://hi.wikipedia.org/wiki/" \
46 "%E0%A4%AE%E0%A5%81%E0%A4%96%E0%A4%AA%E0%A5%83%E0%A4%B7%E0%A5%8D%E0%A4%A0",
47 "http://www.qq.com",
48 "http://www.twitter.com/taylorswift13",
49 "http://www.reddit.com",
50 "http://www.ebay.fr/sch/i.html?_nkw=v8",
51 "http://edition.cnn.com",
52 "http://world.taobao.com",
53 "http://www.instagram.com/archdigest",
54 "https://www.linkedin.com/pub/dir/?first=john&last=doe&search=search",
55 "http://www.msn.com/ar-ae",
56 "http://www.bing.com/search?q=v8+engine",
57 "http://www.pinterest.com/categories/popular",
58 "http://www.sina.com.cn",
59 "http://weibo.com",
60 "http://yandex.ru/search/?text=v8",
61 # framework driven decisions: -----------------------------------
62 # wikipedia content + angularjs
63 "http://www.wikiwand.com/en/hill",
64 # ember website
65 "http://meta.discourse.org",
66 # backbone js
67 "http://reddit.musicplayer.io",
68 # gwt application
69 "http://inbox.google.com",
70 # webgl / algorithmic case
71 "http://maps.google.co.jp/maps/search/restaurant+tokyo",
72 # whatever framework adwords uses
73 "https://adwords.google.com",
74 ]
75
76
77 def print_command(cmd_args):
78 def fix_for_printing(arg):
79 m = re.match(r'^--([^=]+)=(.*)$', arg)
80 if m and (' ' in m.group(2) or m.group(2).startswith('-')):
81 arg = "--{}='{}'".format(m.group(1), m.group(2))
82 elif ' ' in arg:
83 arg = "'{}'".format(arg)
84 return arg
85 print " ".join(map(fix_for_printing, cmd_args))
86
87
88 def start_replay_server(args):
89 cmd_args = [
90 args.replay_bin,
91 "--port=4080",
92 "--ssl_port=4443",
93 "--no-dns_forwarding",
94 "--use_closest_match",
95 "--no-diff_unknown_requests",
96 args.replay_wpr,
97 ]
98 print "=" * 80
99 print_command(cmd_args)
100 with open(os.devnull, 'w') as null:
101 server = subprocess.Popen(cmd_args, stdout=null, stderr=null)
102 print "RUNNING REPLAY SERVER: %s with PID=%s" % (args.replay_bin, server.pid)
103 print "=" * 80
104 return server
105
106
107 def stop_replay_server(server):
108 print("SHUTTING DOWN REPLAY SERVER %s" % server.pid)
109 server.terminate()
110
111
112 def run_site(site, domain, args, timeout=None):
113 print "="*80
114 print "RUNNING DOMAIN %s" % domain
115 print "="*80
116 result_template = "{domain}#{count}.txt" if args.repeat else "{domain}.txt"
117 count = 0
118 while count == 0 or args.repeat is not None and count < args.repeat:
119 count += 1
120 result = result_template.format(domain=domain, count=count)
121 retries = 0
122 while args.retries is None or retries < args.retries:
123 retries += 1
124 try:
125 temp_user_data_dir = args.user_data_dir is None
126 if temp_user_data_dir:
127 user_data_dir = tempfile.mkdtemp(prefix="chr_")
128 js_flags = "--runtime-call-stats"
129 if args.js_flags: js_flags += " " + args.js_flags
130 chrome_flags = [
131 "--no-default-browser-check",
132 "--disable-translate",
133 "--single-process",
134 "--no-sandbox",
135 "--js-flags={}".format(js_flags),
136 "--no-first-run",
137 "--user-data-dir={}".format(user_data_dir),
138 ]
139 if args.replay_wpr:
140 chrome_flags += [
141 "--host-resolver-rules=MAP *:80 localhost:4080, " \
142 "MAP *:443 localhost:4443, " \
143 "EXCLUDE localhost",
144 "--ignore-certificate-errors",
145 "--disable-web-security",
146 "--reduce-security-for-testing",
147 "--allow-insecure-localhost",
148 ]
149 if args.chrome_flags:
150 chrome_flags += args.chrome_flags.split()
151 if timeout is None: timeout = args.timeout
152 cmd_args = [
153 "timeout", str(timeout),
154 args.with_chrome
155 ] + chrome_flags + [ site ]
156 print "- " * 40
157 print_command(cmd_args)
158 print "- " * 40
159 with open(result, "wt") as f:
160 status = subprocess.call(cmd_args, stdout=f)
161 # 124 means timeout killed chrome, 0 means the user was bored first!
162 # If none of these two happened, then chrome apparently crashed, so
163 # it must be called again.
164 if status != 124 and status != 0:
165 print("CHROME CRASHED, REPEATING RUN");
166 continue
167 # If the stats file is empty, chrome must be called again.
168 if os.path.isfile(result) and os.path.getsize(result) > 0:
169 if args.print_url:
170 with open(result, "at") as f:
171 print >> f
172 print >> f, "URL: {}".format(site)
173 break
174 print("EMPTY RESULT, REPEATING RUN");
175 finally:
176 if temp_user_data_dir:
177 shutil.rmtree(user_data_dir)
178
179
180 def read_sites_file(args):
181 try:
182 sites = []
183 try:
184 with open(args.sites_file, "rt") as f:
185 for item in json.load(f):
186 if 'timeout' not in item:
187 # This is more-or-less arbitrary.
188 item['timeout'] = int(2.5 * item['timeline'] + 3)
189 if item['timeout'] > args.timeout: item['timeout'] = args.timeout
190 sites.append(item)
191 except ValueError:
192 with open(args.sites_file, "rt") as f:
193 for line in f:
194 line = line.strip()
195 if not line or line.startswith('#'): continue
196 sites.append({'url': line, 'timeout': args.timeout})
197 return sites
198 except IOError as e:
199 args.error("Cannot read from {}. {}.".format(args.sites_file, e.strerror))
200 sys.exit(1)
201
202
203 def do_run(args):
204 # Determine the websites to benchmark.
205 if args.sites_file:
206 sites = read_sites_file(args)
207 elif args.sites:
208 sites = [{'url': site, 'timeout': args.timeout} for site in args.sites]
209 else:
210 sites = [{'url': site, 'timeout': args.timeout} for site in DEFAULT_SITES]
211 # Disambiguate domains, if needed.
212 L = []
213 domains = {}
214 for item in sites:
215 site = item['url']
216 m = re.match(r'^(https?://)?([^/]+)(/.*)?$', site)
217 if not m:
218 args.error("Invalid URL {}.".format(site))
219 continue
220 domain = m.group(2)
221 entry = [site, domain, None, item['timeout']]
222 if domain not in domains:
223 domains[domain] = entry
224 else:
225 if not isinstance(domains[domain], int):
226 domains[domain][2] = 1
227 domains[domain] = 1
228 domains[domain] += 1
229 entry[2] = domains[domain]
230 L.append(entry)
231 if args.replay_wpr:
232 replay_server = start_replay_server(args);
233 try:
234 # Run them.
235 for site, domain, count, timeout in L:
236 if count is not None: domain = "{}%{}".format(domain, count)
237 print site, domain, timeout
238 run_site(site, domain, args, timeout)
239 finally:
240 if replay_server:
241 stop_replay_server(replay_server)
242
243
244 # Calculate statistics.
245
246 def statistics(data):
247 N = len(data)
248 average = numpy.average(data)
249 median = numpy.median(data)
250 low = numpy.min(data)
251 high= numpy.max(data)
252 if N > 1:
253 # evaluate sample variance by setting delta degrees of freedom (ddof) to
254 # 1. The degree used in calculations is N - ddof
255 stddev = numpy.std(data, ddof=1)
256 # Get the endpoints of the range that contains 95% of the distribution
257 t_bounds = scipy.stats.t.interval(0.95, N-1)
258 #assert abs(t_bounds[0] + t_bounds[1]) < 1e-6
259 # sum mean to the confidence interval
260 ci = {
261 'abs': t_bounds[1] * stddev / sqrt(N),
262 'low': average + t_bounds[0] * stddev / sqrt(N),
263 'high': average + t_bounds[1] * stddev / sqrt(N)
264 }
265 else:
266 stddev = 0
267 ci = { 'abs': 0, 'low': average, 'high': average }
268 if abs(stddev) > 0.0001 and abs(average) > 0.0001:
269 ci['perc'] = t_bounds[1] * stddev / sqrt(N) / average * 100
270 else:
271 ci['perc'] = 0
272 return { 'samples': N, 'average': average, 'median': median,
273 'stddev': stddev, 'min': low, 'max': high, 'ci': ci }
274
275
276 def read_stats(path, S):
277 with open(path, "rt") as f:
278 # Process the whole file and sum repeating entries.
279 D = { 'Sum': {'time': 0, 'count': 0} }
280 for line in f:
281 line = line.strip()
282 # Discard headers and footers.
283 if not line: continue
284 if line.startswith("Runtime Function"): continue
285 if line.startswith("===="): continue
286 if line.startswith("----"): continue
287 if line.startswith("URL:"): continue
288 # We have a regular line.
289 fields = line.split()
290 key = fields[0]
291 time = float(fields[1].replace("ms", ""))
292 count = int(fields[3])
293 if key not in D: D[key] = { 'time': 0, 'count': 0 }
294 D[key]['time'] += time
295 D[key]['count'] += count
296 # We calculate the sum, if it's not the "total" line.
297 if key != "Total":
298 D['Sum']['time'] += time
299 D['Sum']['count'] += count
300 # Append the sums as single entries to S.
301 for key in D:
302 if key not in S: S[key] = { 'time_list': [], 'count_list': [] }
303 S[key]['time_list'].append(D[key]['time'])
304 S[key]['count_list'].append(D[key]['count'])
305
306
307 def print_stats(S, args):
308 # Sort by ascending/descending time average, then by ascending/descending
309 # count average, then by ascending name.
310 def sort_asc_func(item):
311 return (item[1]['time_stat']['average'],
312 item[1]['count_stat']['average'],
313 item[0])
314 def sort_desc_func(item):
315 return (-item[1]['time_stat']['average'],
316 -item[1]['count_stat']['average'],
317 item[0])
318 # Sorting order is in the commend-line arguments.
319 sort_func = sort_asc_func if args.sort == "asc" else sort_desc_func
320 # Possibly limit how many elements to print.
321 L = [item for item in sorted(S.items(), key=sort_func)
322 if item[0] not in ["Total", "Sum"]]
323 N = len(L)
324 if args.limit == 0:
325 low, high = 0, N
326 elif args.sort == "desc":
327 low, high = 0, args.limit
328 else:
329 low, high = N-args.limit, N
330 # How to print entries.
331 def print_entry(key, value):
332 def stats(s, units=""):
333 conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc'])
334 return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf)
335 print "{:>50s} {} {}".format(
336 key,
337 stats(value['time_stat'], units="ms"),
338 stats(value['count_stat'])
339 )
340 # Print and calculate partial sums, if necessary.
341 for i in range(low, high):
342 print_entry(*L[i])
343 if args.totals and args.limit != 0:
344 if i == low:
345 partial = { 'time_list': [0] * len(L[i][1]['time_list']),
346 'count_list': [0] * len(L[i][1]['count_list']) }
347 assert len(partial['time_list']) == len(L[i][1]['time_list'])
348 assert len(partial['count_list']) == len(L[i][1]['count_list'])
349 for j, v in enumerate(L[i][1]['time_list']):
350 partial['time_list'][j] += v
351 for j, v in enumerate(L[i][1]['count_list']):
352 partial['count_list'][j] += v
353 # Print totals, if necessary.
354 if args.totals:
355 print '-' * 80
356 if args.limit != 0:
357 partial['time_stat'] = statistics(partial['time_list'])
358 partial['count_stat'] = statistics(partial['count_list'])
359 print_entry("Partial", partial)
360 print_entry("Sum", S["Sum"])
361 print_entry("Total", S["Total"])
362
363
364 def do_stats(args):
365 T = {}
366 for path in args.logfiles:
367 filename = os.path.basename(path)
368 m = re.match(r'^([^#]+)(#.*)?$', filename)
369 domain = m.group(1)
370 if domain not in T: T[domain] = {}
371 read_stats(path, T[domain])
372 for i, domain in enumerate(sorted(T)):
373 if len(T) > 1:
374 if i > 0: print
375 print "{}:".format(domain)
376 print '=' * 80
377 S = T[domain]
378 for key in S:
379 S[key]['time_stat'] = statistics(S[key]['time_list'])
380 S[key]['count_stat'] = statistics(S[key]['count_list'])
381 print_stats(S, args)
382
383
384 # Generate JSON file.
385
386 def do_json(args):
387 J = {}
388 for path in args.logdirs:
389 if os.path.isdir(path):
390 for root, dirs, files in os.walk(path):
391 version = os.path.basename(root)
392 if version not in J: J[version] = {}
393 for filename in files:
394 if filename.endswith(".txt"):
395 m = re.match(r'^([^#]+)(#.*)?$', filename)
396 domain = m.group(1)
397 if domain not in J[version]: J[version][domain] = {}
398 read_stats(os.path.join(root, filename), J[version][domain])
399 for version, T in J.items():
400 for domain, S in T.items():
401 A = []
402 for name, value in S.items():
403 # We don't want the calculated sum in the JSON file.
404 if name == "Sum": continue
405 entry = [name]
406 for x in ['time_list', 'count_list']:
407 s = statistics(S[name][x])
408 entry.append(round(s['average'], 1))
409 entry.append(round(s['ci']['abs'], 1))
410 entry.append(round(s['ci']['perc'], 2))
411 A.append(entry)
412 T[domain] = A
413 print json.dumps(J, separators=(',', ':'))
414
415
416 # Help.
417
418 def do_help(parser, subparsers, args):
419 if args.help_cmd:
420 if args.help_cmd in subparsers:
421 subparsers[args.help_cmd].print_help()
422 else:
423 args.error("Unknown command '{}'".format(args.help_cmd))
424 else:
425 parser.print_help()
426
427
428 # Main program, parse command line and execute.
429
430 def main():
431 parser = argparse.ArgumentParser()
432 subparser_adder = parser.add_subparsers(title="commands", dest="command",
433 metavar="<command>")
434 subparsers = {}
435 # Command: run.
436 subparsers["run"] = subparser_adder.add_parser(
437 "run", help="run --help")
438 subparsers["run"].set_defaults(
439 func=do_run, error=subparsers["run"].error)
440 subparsers["run"].add_argument(
441 "--chrome-flags", type=str, default="",
442 help="specify additional chrome flags")
443 subparsers["run"].add_argument(
444 "--js-flags", type=str, default="",
445 help="specify additional V8 flags")
446 subparsers["run"].add_argument(
447 "--no-url", dest="print_url", action="store_false", default=True,
448 help="do not include url in statistics file")
449 subparsers["run"].add_argument(
450 "-n", "--repeat", type=int, metavar="<num>",
451 help="specify iterations for each website (default: once)")
452 subparsers["run"].add_argument(
453 "--replay-wpr", type=str, metavar="<path>",
454 help="use the specified web page replay (.wpr) archive")
455 subparsers["run"].add_argument(
456 "--replay-bin", type=str, metavar="<path>",
457 help="specify the replay.py script typically located in " \
458 "$CHROMIUM/src/third_party/webpagereplay/replay.py")
459 subparsers["run"].add_argument(
460 "-r", "--retries", type=int, metavar="<num>",
461 help="specify retries if website is down (default: forever)")
462 subparsers["run"].add_argument(
463 "-f", "--sites-file", type=str, metavar="<path>",
464 help="specify file containing benchmark websites")
465 subparsers["run"].add_argument(
466 "-t", "--timeout", type=int, metavar="<seconds>", default=60,
467 help="specify seconds before chrome is killed")
468 subparsers["run"].add_argument(
469 "-u", "--user-data-dir", type=str, metavar="<path>",
470 help="specify user data dir (default is temporary)")
471 subparsers["run"].add_argument(
472 "-c", "--with-chrome", type=str, metavar="<path>",
473 default="/usr/bin/google-chrome",
474 help="specify chrome executable to use")
475 subparsers["run"].add_argument(
476 "sites", type=str, metavar="<URL>", nargs="*",
477 help="specify benchmark website")
478 # Command: stats.
479 subparsers["stats"] = subparser_adder.add_parser(
480 "stats", help="stats --help")
481 subparsers["stats"].set_defaults(
482 func=do_stats, error=subparsers["stats"].error)
483 subparsers["stats"].add_argument(
484 "-l", "--limit", type=int, metavar="<num>", default=0,
485 help="limit how many items to print (default: none)")
486 subparsers["stats"].add_argument(
487 "-s", "--sort", choices=["asc", "desc"], default="asc",
488 help="specify sorting order (default: ascending)")
489 subparsers["stats"].add_argument(
490 "-n", "--no-total", dest="totals", action="store_false", default=True,
491 help="do not print totals")
492 subparsers["stats"].add_argument(
493 "logfiles", type=str, metavar="<logfile>", nargs="*",
494 help="specify log files to parse")
495 # Command: json.
496 subparsers["json"] = subparser_adder.add_parser(
497 "json", help="json --help")
498 subparsers["json"].set_defaults(
499 func=do_json, error=subparsers["json"].error)
500 subparsers["json"].add_argument(
501 "logdirs", type=str, metavar="<logdir>", nargs="*",
502 help="specify directories with log files to parse")
503 # Command: help.
504 subparsers["help"] = subparser_adder.add_parser(
505 "help", help="help information")
506 subparsers["help"].set_defaults(
507 func=lambda args: do_help(parser, subparsers, args),
508 error=subparsers["help"].error)
509 subparsers["help"].add_argument(
510 "help_cmd", type=str, metavar="<command>", nargs="?",
511 help="command for which to display help")
512 # Execute the command.
513 args = parser.parse_args()
514 if args.command == "run" and args.sites_file and args.sites:
515 args.error("if --sites-file is used, no site URLS must be given")
516 sys.exit(1)
517 elif args.command == "run" and args.replay_wpr and not args.replay_bin:
518 args.error("if --replay-wpr is used, --replay-bin must be given")
519 sys.exit(1)
520 else:
521 args.func(args)
522
523 if __name__ == "__main__":
524 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698