Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(493)

Side by Side Diff: tools/runtime-call-stats.py

Issue 1922873004: Add script for benchmarking with --runtime-call-stats (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Remove calculated sum from JSON file Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright 2016 the V8 project authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5 '''
6 Usage: runtime-call-stats.py [-h] <command> ...
7
8 Optional arguments:
9 -h, --help show this help message and exit
10
11 Commands:
12 run run chrome with --runtime-call-stats and generate logs
13 stats process logs and print statistics
14 json process logs from several versions and generate JSON
15 help help information
16
17 For each command, you can try ./runtime-call-stats.py help command.
18 '''
19
20 import argparse
21 import json
22 import os
23 import re
24 import shutil
25 import subprocess
26 import sys
27 import tempfile
28
29 import numpy
30 import scipy
31 import scipy.stats
32 from math import sqrt
33
34
35 # Run benchmarks.
36
37 DEFAULT_SITES = [
38 # top websites (http://alexa.com/topsites): --------------------
39 "www.google.de/search?q=v8",
40 "www.youtube.com",
41 "www.facebook.com/shakira",
42 "www.baidu.com/s?wd=v8",
43 "www.yahoo.co.jp",
44 "www.amazon.com/s/?field-keywords=v8",
45 "en.wikipedia.org/wiki/main_page",
46 "www.qq.com",
47 "www.twitter.com/taylorswift13",
48 "www.reddit.com",
49 "www.ebay.com/sch/i.html?_nkw=v8",
50 "edition.cnn.com",
51 "world.taobao.com",
52 "www.instagram.com/archdigest",
53 "www.linkedin.com/pub/dir/?first=john&last=doe&search=search",
54 "www.msn.com/ar-ae",
55 "www.bing.com/search?q=v8+engine",
56 "www.pinterest.com/categories/popular/",
57 "www.sina.com.cn",
58 "weibo.com",
59 "yandex.ru/search/?text=v8",
60 # framework driven decisions: -----------------------------------
61 # wikipedia content + angularjs
62 "www.wikiwand.com/en/hill",
63 # ember website
64 "meta.discourse.org/",
65 # backbone js
66 "reddit.musicplayer.io",
67 # gwt application
68 "inbox.google.com",
69 # webgl
70 "www.google.de/maps/search/restaurant"
71 ]
72
73 def run_site(site, domain, args):
74 result_template = "{domain}#{count}.txt" if args.repeat else "{domain}.txt"
75 count = 0
76 while count == 0 or args.repeat is not None and count < args.repeat:
77 count += 1
78 result = result_template.format(domain=domain, count=count)
79 retries = 0
80 while args.retries is None or retries < args.retries:
81 retries += 1
82 try:
83 temp_user_data_dir = args.user_data_dir is None
84 if temp_user_data_dir:
85 user_data_dir = tempfile.mkdtemp(prefix="chr_")
86 js_flags = "--runtime-call-stats"
87 if args.js_flags: js_flags += " " + args.js_flags
88 chrome_flags = [
89 "--disk-cache-size=1",
nickie 2016/04/27 12:22:16 I'm removing this option, as some websites do not
90 "--single-process",
91 "--no-sandbox",
92 "--js-flags={}".format(js_flags),
93 "--no-first-run",
94 "--user-data-dir={}".format(user_data_dir)
95 ]
96 cmd_args = [
97 "timeout", str(args.timeout),
98 args.with_chrome
99 ] + chrome_flags + [ site ]
100 def fix_for_printing(arg):
101 m = re.match(r'^--([^=]+)=(.*)$', arg)
102 if m and (' ' in m.group(2) or m.group(2).startswith('-')):
103 arg = "--{}='{}'".format(m.group(1), m.group(2))
104 elif ' ' in arg:
105 arg = "'{}'".format(arg)
106 return arg
107 print " ".join(map(fix_for_printing, cmd_args))
108 print "- " * 40
109 with open(result, "wt") as f:
110 status = subprocess.call(cmd_args, stdout=f)
111 # 124 means timeout killed chrome, 0 means the user was bored first!
112 # If none of these two happened, then chrome apparently crashed, so
113 # it must be called again.
114 if status != 124 and status != 0: continue
115 # If the stats file is empty, chrome must be called again.
116 if os.path.isfile(result) and os.path.getsize(result) > 0:
117 if args.print_url:
118 with open(result, "at") as f:
119 print >> f
120 print >> f, "URL: {}".format(site)
121 break
122 finally:
123 if temp_user_data_dir:
124 shutil.rmtree(user_data_dir)
125
126 def do_run(args):
127 # Determine the websites to benchmark.
128 if args.sites_file:
129 sites = []
130 try:
131 with open(args.sites_file, "rt") as f:
132 for line in f:
133 line = line.strip()
134 if not line or line.startswith('#'): continue
135 sites.append(line)
136 except IOError as e:
137 args.error("Cannot read from {}. {}.".format(args.sites_file, e.strerror))
138 sys.exit(1)
139 elif args.sites:
140 sites = args.sites
141 else:
142 sites = DEFAULT_SITES
143 # Disambiguate domains, if needed.
144 L = []
145 domains = {}
146 for site in sites:
147 m = re.match(r'^(https?://)?([^/]+)(/.*)?$', site)
148 if not m:
149 args.error("Invalid URL {}.".format(site))
150 continue
151 domain = m.group(2)
152 entry = [site, domain, None]
153 if domain not in domains:
154 domains[domain] = entry
155 else:
156 if not isinstance(domains[domain], int):
157 domains[domain][2] = 1
158 domains[domain] = 1
159 domains[domain] += 1
160 entry[2] = domains[domain]
161 L.append(entry)
162 # Run them.
163 for site, domain, count in L:
164 if count is not None: domain = "{}%{}".format(domain, count)
165 print site, domain
166 run_site(site, domain, args)
167
168
169 # Calculate statistics.
170
171 def statistics(data):
172 N = len(data)
173 average = numpy.average(data)
174 median = numpy.median(data)
175 low = numpy.min(data)
176 high= numpy.max(data)
177 if N > 1:
178 # evaluate sample variance by setting delta degrees of freedom (ddof) to
179 # 1. The degree used in calculations is N - ddof
180 stddev = numpy.std(data, ddof=1)
181 # Get the endpoints of the range that contains 95% of the distribution
182 t_bounds = scipy.stats.t.interval(0.95, N-1)
183 #assert abs(t_bounds[0] + t_bounds[1]) < 1e-6
184 # sum mean to the confidence interval
185 ci = {
186 'abs': t_bounds[1] * stddev / sqrt(N),
187 'low': average + t_bounds[0] * stddev / sqrt(N),
188 'high': average + t_bounds[1] * stddev / sqrt(N)
189 }
190 else:
191 stddev = 0
192 ci = { 'abs': 0, 'low': average, 'high': average }
193 if abs(stddev) > 0.0001 and abs(average) > 0.0001:
194 ci['perc'] = t_bounds[1] * stddev / sqrt(N) / average * 100
195 else:
196 ci['perc'] = 0
197 return { 'samples': N, 'average': average, 'median': median,
198 'stddev': stddev, 'min': low, 'max': high, 'ci': ci }
199
200 def read_stats(path, S):
201 with open(path, "rt") as f:
202 # Process the whole file and sum repeating entries.
203 D = { 'Sum': {'time': 0, 'count': 0} }
204 for line in f:
205 line = line.strip()
206 # Discard headers and footers.
207 if not line: continue
208 if line.startswith("Runtime Function"): continue
209 if line.startswith("===="): continue
210 if line.startswith("----"): continue
211 if line.startswith("URL:"): continue
212 # We have a regular line.
213 fields = line.split()
214 key = fields[0]
215 time = float(fields[1].replace("ms", ""))
216 count = int(fields[3])
217 if key not in D: D[key] = { 'time': 0, 'count': 0 }
218 D[key]['time'] += time
219 D[key]['count'] += count
220 # We calculate the sum, if it's not the "total" line.
221 if key != "Total":
222 D['Sum']['time'] += time
223 D['Sum']['count'] += count
224 # Append the sums as single entries to S.
225 for key in D:
226 if key not in S: S[key] = { 'time_list': [], 'count_list': [] }
227 S[key]['time_list'].append(D[key]['time'])
228 S[key]['count_list'].append(D[key]['count'])
229
230 def print_stats(S, args):
231 # Sort by ascending/descending time average, then by ascending/descending
232 # count average, then by ascending name.
233 def sort_asc_func(item):
234 return (item[1]['time_stat']['average'],
235 item[1]['count_stat']['average'],
236 item[0])
237 def sort_desc_func(item):
238 return (-item[1]['time_stat']['average'],
239 -item[1]['count_stat']['average'],
240 item[0])
241 # Sorting order is in the commend-line arguments.
242 sort_func = sort_asc_func if args.sort == "asc" else sort_desc_func
243 # Possibly limit how many elements to print.
244 L = [item for item in sorted(S.items(), key=sort_func)
245 if item[0] not in ["Total", "Sum"]]
246 N = len(L)
247 if args.limit == 0:
248 low, high = 0, N
249 elif args.sort == "desc":
250 low, high = 0, args.limit
251 else:
252 low, high = N-args.limit, N
253 # How to print entries.
254 def print_entry(key, value):
255 def stats(s, units=""):
256 conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc'])
257 return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf)
258 print "{:>50s} {} {}".format(
259 key,
260 stats(value['time_stat'], units="ms"),
261 stats(value['count_stat'])
262 )
263 # Print and calculate partial sums, if necessary.
264 for i in range(low, high):
265 print_entry(*L[i])
266 if args.totals and args.limit != 0:
267 if i == low:
268 partial = { 'time_list': [0] * len(L[i][1]['time_list']),
269 'count_list': [0] * len(L[i][1]['count_list']) }
270 assert len(partial['time_list']) == len(L[i][1]['time_list'])
271 assert len(partial['count_list']) == len(L[i][1]['count_list'])
272 for j, v in enumerate(L[i][1]['time_list']):
273 partial['time_list'][j] += v
274 for j, v in enumerate(L[i][1]['count_list']):
275 partial['count_list'][j] += v
276 # Print totals, if necessary.
277 if args.totals:
278 print '-' * 80
279 if args.limit != 0:
280 partial['time_stat'] = statistics(partial['time_list'])
281 partial['count_stat'] = statistics(partial['count_list'])
282 print_entry("Partial", partial)
283 print_entry("Sum", S["Sum"])
284 print_entry("Total", S["Total"])
285
286 def do_stats(args):
287 T = {}
288 for path in args.logfiles:
289 filename = os.path.basename(path)
290 m = re.match(r'^([^#]+)(#.*)?$', filename)
291 domain = m.group(1)
292 if domain not in T: T[domain] = {}
293 read_stats(path, T[domain])
294 for i, domain in enumerate(sorted(T)):
295 if len(T) > 1:
296 if i > 0: print
297 print "{}:".format(domain)
298 print '=' * 80
299 S = T[domain]
300 for key in S:
301 S[key]['time_stat'] = statistics(S[key]['time_list'])
302 S[key]['count_stat'] = statistics(S[key]['count_list'])
303 print_stats(S, args)
304
305
306 # Generate JSON file.
307
308 def do_json(args):
309 J = {}
310 for path in args.logdirs:
311 if os.path.isdir(path):
312 for root, dirs, files in os.walk(path):
313 version = os.path.basename(root)
314 if version not in J: J[version] = {}
315 for filename in files:
316 if filename.endswith(".txt"):
317 m = re.match(r'^([^#]+)(#.*)?$', filename)
318 domain = m.group(1)
319 if domain not in J[version]: J[version][domain] = {}
320 read_stats(os.path.join(root, filename), J[version][domain])
321 for version, T in J.items():
322 for domain, S in T.items():
323 A = []
324 for name, value in S.items():
325 # We don't want the calculated sum in the JSON file.
326 if name == "Sum": continue
327 entry = [name]
328 for x in ['time_list', 'count_list']:
329 s = statistics(S[name][x])
330 entry.append(round(s['average'], 1))
331 entry.append(round(s['ci']['abs'], 1))
332 entry.append(round(s['ci']['perc'], 2))
333 A.append(entry)
334 T[domain] = A
335 print json.dumps(J, separators=(',', ':'))
336
337
338 # Help.
339
340 def do_help(parser, subparsers, args):
341 if args.help_cmd:
342 if args.help_cmd in subparsers:
343 subparsers[args.help_cmd].print_help()
344 else:
345 args.error("Unknown command '{}'".format(args.help_cmd))
346 else:
347 parser.print_help()
348
349
350 # Main program, parse command line and execute.
351
352 def main():
353 parser = argparse.ArgumentParser()
354 subparser_adder = parser.add_subparsers(title="commands", dest="command",
355 metavar="<command>")
356 subparsers = {}
357 # Command: run.
358 subparsers["run"] = subparser_adder.add_parser(
359 "run", help="run --help")
360 subparsers["run"].set_defaults(
361 func=do_run, error=subparsers["run"].error)
362 subparsers["run"].add_argument(
363 "--js-flags", type=str, default="",
364 help="specify additional V8 flags")
365 subparsers["run"].add_argument(
366 "--no-url", dest="print_url", action="store_false", default=True,
367 help="do not include url in statistics file")
368 subparsers["run"].add_argument(
369 "-n", "--repeat", type=int, metavar="<num>",
370 help="specify iterations for each website (default: once)")
371 subparsers["run"].add_argument(
372 "-r", "--retries", type=int, metavar="<num>",
373 help="specify retries if website is down (default: forever)")
374 subparsers["run"].add_argument(
375 "-f", "--sites-file", type=str, metavar="<path>",
376 help="specify file containing benchmark websites")
377 subparsers["run"].add_argument(
378 "-t", "--timeout", type=int, metavar="<seconds>", default=60,
379 help="specify seconds before chrome is killed")
380 subparsers["run"].add_argument(
381 "-u", "--user-data-dir", type=str, metavar="<path>",
382 help="specify user data dir (default is temporary)")
383 subparsers["run"].add_argument(
384 "-c", "--with-chrome", type=str, metavar="<path>",
385 default="/usr/bin/google-chrome",
386 help="specify chrome executable to use")
387 subparsers["run"].add_argument(
388 "sites", type=str, metavar="<URL>", nargs="*",
389 help="specify benchmark website")
390 # Command: stats.
391 subparsers["stats"] = subparser_adder.add_parser(
392 "stats", help="stats --help")
393 subparsers["stats"].set_defaults(
394 func=do_stats, error=subparsers["stats"].error)
395 subparsers["stats"].add_argument(
396 "-l", "--limit", type=int, metavar="<num>", default=0,
397 help="limit how many items to print (default: none)")
398 subparsers["stats"].add_argument(
399 "-s", "--sort", choices=["asc", "desc"], default="asc",
400 help="specify sorting order (default: ascending)")
401 subparsers["stats"].add_argument(
402 "-n", "--no-total", dest="totals", action="store_false", default=True,
403 help="do not print totals")
404 subparsers["stats"].add_argument(
405 "logfiles", type=str, metavar="<logfile>", nargs="*",
406 help="specify log files to parse")
407 # Command: json.
408 subparsers["json"] = subparser_adder.add_parser(
409 "json", help="json --help")
410 subparsers["json"].set_defaults(
411 func=do_json, error=subparsers["json"].error)
412 subparsers["json"].add_argument(
413 "logdirs", type=str, metavar="<logdir>", nargs="*",
414 help="specify directories with log files to parse")
415 # Command: help.
416 subparsers["help"] = subparser_adder.add_parser(
417 "help", help="help information")
418 subparsers["help"].set_defaults(
419 func=lambda args: do_help(parser, subparsers, args),
420 error=subparsers["help"].error)
421 subparsers["help"].add_argument(
422 "help_cmd", type=str, metavar="<command>", nargs="?",
423 help="command for which to display help")
424 # Execute the command.
425 args = parser.parse_args()
426 if args.command == "run" and args.sites_file and args.sites:
427 args.error("if --sites-file is used, no site URLS must be given")
428 sys.exit(1)
429 else:
430 args.func(args)
431
432 if __name__ == "__main__":
433 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698