Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(73)

Side by Side Diff: tools/runtime-call-stats.py

Issue 1922873004: Add script for benchmarking with --runtime-call-stats (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Update websites and chrome flags Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright 2016 the V8 project authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5 '''
6 Usage: runtime-call-stats.py [-h] <command> ...
7
8 Optional arguments:
9 -h, --help show this help message and exit
10
11 Commands:
12 run run chrome with --runtime-call-stats and generate logs
13 stats process logs and print statistics
14 json process logs from several versions and generate JSON
15 help help information
16
17 For each command, you can try ./runtime-call-stats.py help command.
18 '''
19
20 import argparse
21 import json
22 import os
23 import re
24 import shutil
25 import subprocess
26 import sys
27 import tempfile
28
29 import numpy
30 import scipy
31 import scipy.stats
32 from math import sqrt
33
34
35 # Run benchmarks.
36
37 DEFAULT_SITES = [
38 # top websites (http://alexa.com/topsites): --------------------
39 "https://www.google.de/search?q=v8",
40 "https://www.youtube.com",
41 "https://www.facebook.com/shakira",
42 "http://www.baidu.com/s?wd=v8",
43 "http://www.yahoo.co.jp",
44 "http://www.amazon.com/s/?field-keywords=v8",
45 "http://hi.wikipedia.org/wiki/%E0%A4%AE%E0%A5%81%E0%A4%96%E0%A4%AA%E0%A5%83% E0%A4%B7%E0%A5%8D%E0%A4%A0",
46 "http://www.qq.com",
47 "http://www.twitter.com/taylorswift13",
48 "http://www.reddit.com",
49 "http://www.ebay.fr/sch/i.html?_nkw=v8",
50 "http://edition.cnn.com",
51 "http://world.taobao.com",
52 "http://www.instagram.com/archdigest",
53 "https://www.linkedin.com/pub/dir/?first=john&last=doe&search=search",
54 "http://www.msn.com/ar-ae",
55 "http://www.bing.com/search?q=v8+engine",
56 "http://www.pinterest.com/categories/popular",
57 "http://www.sina.com.cn",
58 "http://weibo.com",
59 "http://yandex.ru/search/?text=v8",
60 # wikipedia content + angularjs
61 "http://www.wikiwand.com/en/hill",
62 # ember website
63 "http://meta.discourse.org",
64 # backbone js
65 "http://reddit.musicplayer.io",
66 # gwt application
67 "http://inbox.google.com",
68 "http://adwords.google.com",
69 # webgl / algorithmic case
70 "http://maps.google.co.jp/maps/search/restaurant+tokyo"
71 ]
72
73 def run_site(site, domain, args):
74 result_template = "{domain}#{count}.txt" if args.repeat else "{domain}.txt"
75 count = 0
76 while count == 0 or args.repeat is not None and count < args.repeat:
77 count += 1
78 result = result_template.format(domain=domain, count=count)
79 retries = 0
80 while args.retries is None or retries < args.retries:
81 retries += 1
82 try:
83 temp_user_data_dir = args.user_data_dir is None
84 if temp_user_data_dir:
85 user_data_dir = tempfile.mkdtemp(prefix="chr_")
86 js_flags = "--runtime-call-stats"
87 if args.js_flags: js_flags += " " + args.js_flags
88 chrome_flags = [
89 "--no-default-browser-check",
90 "--disable-translate"
nickie 2016/04/27 15:08:15 There's a comma missing here... :-)
91 "--single-process",
92 "--no-sandbox",
93 "--js-flags={}".format(js_flags),
94 "--no-first-run",
95 "--user-data-dir={}".format(user_data_dir)
96 ]
97 if args.chrome_flags:
98 chrome_flags.extend(args.chrome_flags.split())
99 cmd_args = [
100 "timeout", str(args.timeout),
101 args.with_chrome
102 ] + chrome_flags + [ site ]
103 def fix_for_printing(arg):
104 m = re.match(r'^--([^=]+)=(.*)$', arg)
105 if m and (' ' in m.group(2) or m.group(2).startswith('-')):
106 arg = "--{}='{}'".format(m.group(1), m.group(2))
107 elif ' ' in arg:
108 arg = "'{}'".format(arg)
109 return arg
110 print " ".join(map(fix_for_printing, cmd_args))
111 print "- " * 40
112 with open(result, "wt") as f:
113 status = subprocess.call(cmd_args, stdout=f)
114 # 124 means timeout killed chrome, 0 means the user was bored first!
115 # If none of these two happened, then chrome apparently crashed, so
116 # it must be called again.
117 if status != 124 and status != 0: continue
118 # If the stats file is empty, chrome must be called again.
119 if os.path.isfile(result) and os.path.getsize(result) > 0:
120 if args.print_url:
121 with open(result, "at") as f:
122 print >> f
123 print >> f, "URL: {}".format(site)
124 break
125 finally:
126 if temp_user_data_dir:
127 shutil.rmtree(user_data_dir)
128
129 def do_run(args):
130 # Determine the websites to benchmark.
131 if args.sites_file:
132 sites = []
133 try:
134 with open(args.sites_file, "rt") as f:
135 for line in f:
136 line = line.strip()
137 if not line or line.startswith('#'): continue
138 sites.append(line)
139 except IOError as e:
140 args.error("Cannot read from {}. {}.".format(args.sites_file, e.strerror))
141 sys.exit(1)
142 elif args.sites:
143 sites = args.sites
144 else:
145 sites = DEFAULT_SITES
146 # Disambiguate domains, if needed.
147 L = []
148 domains = {}
149 for site in sites:
150 m = re.match(r'^(https?://)?([^/]+)(/.*)?$', site)
151 if not m:
152 args.error("Invalid URL {}.".format(site))
153 continue
154 domain = m.group(2)
155 entry = [site, domain, None]
156 if domain not in domains:
157 domains[domain] = entry
158 else:
159 if not isinstance(domains[domain], int):
160 domains[domain][2] = 1
161 domains[domain] = 1
162 domains[domain] += 1
163 entry[2] = domains[domain]
164 L.append(entry)
165 # Run them.
166 for site, domain, count in L:
167 if count is not None: domain = "{}%{}".format(domain, count)
168 print site, domain
169 run_site(site, domain, args)
170
171
172 # Calculate statistics.
173
174 def statistics(data):
175 N = len(data)
176 average = numpy.average(data)
177 median = numpy.median(data)
178 low = numpy.min(data)
179 high= numpy.max(data)
180 if N > 1:
181 # evaluate sample variance by setting delta degrees of freedom (ddof) to
182 # 1. The degree used in calculations is N - ddof
183 stddev = numpy.std(data, ddof=1)
184 # Get the endpoints of the range that contains 95% of the distribution
185 t_bounds = scipy.stats.t.interval(0.95, N-1)
186 #assert abs(t_bounds[0] + t_bounds[1]) < 1e-6
187 # sum mean to the confidence interval
188 ci = {
189 'abs': t_bounds[1] * stddev / sqrt(N),
190 'low': average + t_bounds[0] * stddev / sqrt(N),
191 'high': average + t_bounds[1] * stddev / sqrt(N)
192 }
193 else:
194 stddev = 0
195 ci = { 'abs': 0, 'low': average, 'high': average }
196 if abs(stddev) > 0.0001 and abs(average) > 0.0001:
197 ci['perc'] = t_bounds[1] * stddev / sqrt(N) / average * 100
198 else:
199 ci['perc'] = 0
200 return { 'samples': N, 'average': average, 'median': median,
201 'stddev': stddev, 'min': low, 'max': high, 'ci': ci }
202
203 def read_stats(path, S):
204 with open(path, "rt") as f:
205 # Process the whole file and sum repeating entries.
206 D = { 'Sum': {'time': 0, 'count': 0} }
207 for line in f:
208 line = line.strip()
209 # Discard headers and footers.
210 if not line: continue
211 if line.startswith("Runtime Function"): continue
212 if line.startswith("===="): continue
213 if line.startswith("----"): continue
214 if line.startswith("URL:"): continue
215 # We have a regular line.
216 fields = line.split()
217 key = fields[0]
218 time = float(fields[1].replace("ms", ""))
219 count = int(fields[3])
220 if key not in D: D[key] = { 'time': 0, 'count': 0 }
221 D[key]['time'] += time
222 D[key]['count'] += count
223 # We calculate the sum, if it's not the "total" line.
224 if key != "Total":
225 D['Sum']['time'] += time
226 D['Sum']['count'] += count
227 # Append the sums as single entries to S.
228 for key in D:
229 if key not in S: S[key] = { 'time_list': [], 'count_list': [] }
230 S[key]['time_list'].append(D[key]['time'])
231 S[key]['count_list'].append(D[key]['count'])
232
233 def print_stats(S, args):
234 # Sort by ascending/descending time average, then by ascending/descending
235 # count average, then by ascending name.
236 def sort_asc_func(item):
237 return (item[1]['time_stat']['average'],
238 item[1]['count_stat']['average'],
239 item[0])
240 def sort_desc_func(item):
241 return (-item[1]['time_stat']['average'],
242 -item[1]['count_stat']['average'],
243 item[0])
244 # Sorting order is in the commend-line arguments.
245 sort_func = sort_asc_func if args.sort == "asc" else sort_desc_func
246 # Possibly limit how many elements to print.
247 L = [item for item in sorted(S.items(), key=sort_func)
248 if item[0] not in ["Total", "Sum"]]
249 N = len(L)
250 if args.limit == 0:
251 low, high = 0, N
252 elif args.sort == "desc":
253 low, high = 0, args.limit
254 else:
255 low, high = N-args.limit, N
256 # How to print entries.
257 def print_entry(key, value):
258 def stats(s, units=""):
259 conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc'])
260 return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf)
261 print "{:>50s} {} {}".format(
262 key,
263 stats(value['time_stat'], units="ms"),
264 stats(value['count_stat'])
265 )
266 # Print and calculate partial sums, if necessary.
267 for i in range(low, high):
268 print_entry(*L[i])
269 if args.totals and args.limit != 0:
270 if i == low:
271 partial = { 'time_list': [0] * len(L[i][1]['time_list']),
272 'count_list': [0] * len(L[i][1]['count_list']) }
273 assert len(partial['time_list']) == len(L[i][1]['time_list'])
274 assert len(partial['count_list']) == len(L[i][1]['count_list'])
275 for j, v in enumerate(L[i][1]['time_list']):
276 partial['time_list'][j] += v
277 for j, v in enumerate(L[i][1]['count_list']):
278 partial['count_list'][j] += v
279 # Print totals, if necessary.
280 if args.totals:
281 print '-' * 80
282 if args.limit != 0:
283 partial['time_stat'] = statistics(partial['time_list'])
284 partial['count_stat'] = statistics(partial['count_list'])
285 print_entry("Partial", partial)
286 print_entry("Sum", S["Sum"])
287 print_entry("Total", S["Total"])
288
289 def do_stats(args):
290 T = {}
291 for path in args.logfiles:
292 filename = os.path.basename(path)
293 m = re.match(r'^([^#]+)(#.*)?$', filename)
294 domain = m.group(1)
295 if domain not in T: T[domain] = {}
296 read_stats(path, T[domain])
297 for i, domain in enumerate(sorted(T)):
298 if len(T) > 1:
299 if i > 0: print
300 print "{}:".format(domain)
301 print '=' * 80
302 S = T[domain]
303 for key in S:
304 S[key]['time_stat'] = statistics(S[key]['time_list'])
305 S[key]['count_stat'] = statistics(S[key]['count_list'])
306 print_stats(S, args)
307
308
309 # Generate JSON file.
310
311 def do_json(args):
312 J = {}
313 for path in args.logdirs:
314 if os.path.isdir(path):
315 for root, dirs, files in os.walk(path):
316 version = os.path.basename(root)
317 if version not in J: J[version] = {}
318 for filename in files:
319 if filename.endswith(".txt"):
320 m = re.match(r'^([^#]+)(#.*)?$', filename)
321 domain = m.group(1)
322 if domain not in J[version]: J[version][domain] = {}
323 read_stats(os.path.join(root, filename), J[version][domain])
324 for version, T in J.items():
325 for domain, S in T.items():
326 A = []
327 for name, value in S.items():
328 # We don't want the calculated sum in the JSON file.
329 if name == "Sum": continue
330 entry = [name]
331 for x in ['time_list', 'count_list']:
332 s = statistics(S[name][x])
333 entry.append(round(s['average'], 1))
334 entry.append(round(s['ci']['abs'], 1))
335 entry.append(round(s['ci']['perc'], 2))
336 A.append(entry)
337 T[domain] = A
338 print json.dumps(J, separators=(',', ':'))
339
340
341 # Help.
342
343 def do_help(parser, subparsers, args):
344 if args.help_cmd:
345 if args.help_cmd in subparsers:
346 subparsers[args.help_cmd].print_help()
347 else:
348 args.error("Unknown command '{}'".format(args.help_cmd))
349 else:
350 parser.print_help()
351
352
353 # Main program, parse command line and execute.
354
355 def main():
356 parser = argparse.ArgumentParser()
357 subparser_adder = parser.add_subparsers(title="commands", dest="command",
358 metavar="<command>")
359 subparsers = {}
360 # Command: run.
361 subparsers["run"] = subparser_adder.add_parser(
362 "run", help="run --help")
363 subparsers["run"].set_defaults(
364 func=do_run, error=subparsers["run"].error)
365 subparsers["run"].add_argument(
366 "--chrome-flags", type=str, default="",
367 help="specify additional chrome flags")
368 subparsers["run"].add_argument(
369 "--js-flags", type=str, default="",
370 help="specify additional V8 flags")
371 subparsers["run"].add_argument(
372 "--no-url", dest="print_url", action="store_false", default=True,
373 help="do not include url in statistics file")
374 subparsers["run"].add_argument(
375 "-n", "--repeat", type=int, metavar="<num>",
376 help="specify iterations for each website (default: once)")
377 subparsers["run"].add_argument(
378 "-r", "--retries", type=int, metavar="<num>",
379 help="specify retries if website is down (default: forever)")
380 subparsers["run"].add_argument(
381 "-f", "--sites-file", type=str, metavar="<path>",
382 help="specify file containing benchmark websites")
383 subparsers["run"].add_argument(
384 "-t", "--timeout", type=int, metavar="<seconds>", default=60,
385 help="specify seconds before chrome is killed")
386 subparsers["run"].add_argument(
387 "-u", "--user-data-dir", type=str, metavar="<path>",
388 help="specify user data dir (default is temporary)")
389 subparsers["run"].add_argument(
390 "-c", "--with-chrome", type=str, metavar="<path>",
391 default="/usr/bin/google-chrome",
392 help="specify chrome executable to use")
393 subparsers["run"].add_argument(
394 "sites", type=str, metavar="<URL>", nargs="*",
395 help="specify benchmark website")
396 # Command: stats.
397 subparsers["stats"] = subparser_adder.add_parser(
398 "stats", help="stats --help")
399 subparsers["stats"].set_defaults(
400 func=do_stats, error=subparsers["stats"].error)
401 subparsers["stats"].add_argument(
402 "-l", "--limit", type=int, metavar="<num>", default=0,
403 help="limit how many items to print (default: none)")
404 subparsers["stats"].add_argument(
405 "-s", "--sort", choices=["asc", "desc"], default="asc",
406 help="specify sorting order (default: ascending)")
407 subparsers["stats"].add_argument(
408 "-n", "--no-total", dest="totals", action="store_false", default=True,
409 help="do not print totals")
410 subparsers["stats"].add_argument(
411 "logfiles", type=str, metavar="<logfile>", nargs="*",
412 help="specify log files to parse")
413 # Command: json.
414 subparsers["json"] = subparser_adder.add_parser(
415 "json", help="json --help")
416 subparsers["json"].set_defaults(
417 func=do_json, error=subparsers["json"].error)
418 subparsers["json"].add_argument(
419 "logdirs", type=str, metavar="<logdir>", nargs="*",
420 help="specify directories with log files to parse")
421 # Command: help.
422 subparsers["help"] = subparser_adder.add_parser(
423 "help", help="help information")
424 subparsers["help"].set_defaults(
425 func=lambda args: do_help(parser, subparsers, args),
426 error=subparsers["help"].error)
427 subparsers["help"].add_argument(
428 "help_cmd", type=str, metavar="<command>", nargs="?",
429 help="command for which to display help")
430 # Execute the command.
431 args = parser.parse_args()
432 if args.command == "run" and args.sites_file and args.sites:
433 args.error("if --sites-file is used, no site URLS must be given")
434 sys.exit(1)
435 else:
436 args.func(args)
437
438 if __name__ == "__main__":
439 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698