OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/env python | |
2 # Copyright 2016 the V8 project authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 ''' | |
6 Usage: runtime-call-stats.py [-h] <command> ... | |
7 | |
8 Optional arguments: | |
9 -h, --help show this help message and exit | |
10 | |
11 Commands: | |
12 run run chrome with --runtime-call-stats and generate logs | |
13 stats process logs and print statistics | |
14 json process logs from several versions and generate JSON | |
15 help help information | |
16 | |
17 For each command, you can try ./runtime-call-stats.py help command. | |
18 ''' | |
19 | |
20 import argparse | |
21 import json | |
22 import os | |
23 import re | |
24 import shutil | |
25 import subprocess | |
26 import sys | |
27 import tempfile | |
28 | |
29 import numpy | |
30 import scipy | |
31 import scipy.stats | |
32 from math import sqrt | |
33 | |
34 | |
35 # Run benchmarks. | |
36 | |
37 DEFAULT_SITES = [ | |
38 # top websites (http://alexa.com/topsites): -------------------- | |
39 "www.google.de/search?q=v8", | |
40 "www.youtube.com", | |
41 "www.facebook.com/shakira", | |
42 "www.baidu.com/s?wd=v8", | |
43 "www.yahoo.co.jp", | |
44 "www.amazon.com/s/?field-keywords=v8", | |
45 "en.wikipedia.org/wiki/main_page", | |
46 "www.qq.com", | |
47 "www.twitter.com/taylorswift13", | |
48 "www.reddit.com", | |
49 "www.ebay.com/sch/i.html?_nkw=v8", | |
50 "edition.cnn.com", | |
51 "world.taobao.com", | |
52 "www.instagram.com/archdigest", | |
53 "www.linkedin.com/pub/dir/?first=john&last=doe&search=search", | |
54 "www.msn.com/ar-ae", | |
55 "www.bing.com/search?q=v8+engine", | |
56 "www.pinterest.com/categories/popular/", | |
57 "www.sina.com.cn", | |
58 "weibo.com", | |
59 "yandex.ru/search/?text=v8", | |
60 # framework driven decisions: ----------------------------------- | |
61 # wikipedia content + angularjs | |
62 "www.wikiwand.com/en/hill", | |
63 # ember website | |
64 "meta.discourse.org/", | |
65 # backbone js | |
66 "reddit.musicplayer.io", | |
67 # gwt application | |
68 "inbox.google.com", | |
69 # webgl | |
70 "www.google.de/maps/search/restaurant" | |
71 ] | |
72 | |
73 def run_site(site, domain, args): | |
74 result_template = "{domain}#{count}.txt" if args.repeat else "{domain}.txt" | |
75 count = 0 | |
76 while count == 0 or args.repeat is not None and count < args.repeat: | |
77 count += 1 | |
78 result = result_template.format(domain=domain, count=count) | |
79 retries = 0 | |
80 while args.retries is None or retries < args.retries: | |
81 retries += 1 | |
82 try: | |
83 temp_user_data_dir = args.user_data_dir is None | |
84 if temp_user_data_dir: | |
85 user_data_dir = tempfile.mkdtemp(prefix="chr_") | |
86 js_flags = "--runtime-call-stats" | |
87 if args.js_flags: js_flags += " " + args.js_flags | |
88 chrome_flags = [ | |
89 "--disk-cache-size=1", | |
nickie
2016/04/27 12:22:16
I'm removing this option, as some websites do not
| |
90 "--single-process", | |
91 "--no-sandbox", | |
92 "--js-flags={}".format(js_flags), | |
93 "--no-first-run", | |
94 "--user-data-dir={}".format(user_data_dir) | |
95 ] | |
96 cmd_args = [ | |
97 "timeout", str(args.timeout), | |
98 args.with_chrome | |
99 ] + chrome_flags + [ site ] | |
100 def fix_for_printing(arg): | |
101 m = re.match(r'^--([^=]+)=(.*)$', arg) | |
102 if m and (' ' in m.group(2) or m.group(2).startswith('-')): | |
103 arg = "--{}='{}'".format(m.group(1), m.group(2)) | |
104 elif ' ' in arg: | |
105 arg = "'{}'".format(arg) | |
106 return arg | |
107 print " ".join(map(fix_for_printing, cmd_args)) | |
108 print "- " * 40 | |
109 with open(result, "wt") as f: | |
110 status = subprocess.call(cmd_args, stdout=f) | |
111 # 124 means timeout killed chrome, 0 means the user was bored first! | |
112 # If none of these two happened, then chrome apparently crashed, so | |
113 # it must be called again. | |
114 if status != 124 and status != 0: continue | |
115 # If the stats file is empty, chrome must be called again. | |
116 if os.path.isfile(result) and os.path.getsize(result) > 0: | |
117 if args.print_url: | |
118 with open(result, "at") as f: | |
119 print >> f | |
120 print >> f, "URL: {}".format(site) | |
121 break | |
122 finally: | |
123 if temp_user_data_dir: | |
124 shutil.rmtree(user_data_dir) | |
125 | |
126 def do_run(args): | |
127 # Determine the websites to benchmark. | |
128 if args.sites_file: | |
129 sites = [] | |
130 try: | |
131 with open(args.sites_file, "rt") as f: | |
132 for line in f: | |
133 line = line.strip() | |
134 if not line or line.startswith('#'): continue | |
135 sites.append(line) | |
136 except IOError as e: | |
137 args.error("Cannot read from {}. {}.".format(args.sites_file, e.strerror)) | |
138 sys.exit(1) | |
139 elif args.sites: | |
140 sites = args.sites | |
141 else: | |
142 sites = DEFAULT_SITES | |
143 # Disambiguate domains, if needed. | |
144 L = [] | |
145 domains = {} | |
146 for site in sites: | |
147 m = re.match(r'^(https?://)?([^/]+)(/.*)?$', site) | |
148 if not m: | |
149 args.error("Invalid URL {}.".format(site)) | |
150 continue | |
151 domain = m.group(2) | |
152 entry = [site, domain, None] | |
153 if domain not in domains: | |
154 domains[domain] = entry | |
155 else: | |
156 if not isinstance(domains[domain], int): | |
157 domains[domain][2] = 1 | |
158 domains[domain] = 1 | |
159 domains[domain] += 1 | |
160 entry[2] = domains[domain] | |
161 L.append(entry) | |
162 # Run them. | |
163 for site, domain, count in L: | |
164 if count is not None: domain = "{}%{}".format(domain, count) | |
165 print site, domain | |
166 run_site(site, domain, args) | |
167 | |
168 | |
169 # Calculate statistics. | |
170 | |
171 def statistics(data): | |
172 N = len(data) | |
173 average = numpy.average(data) | |
174 median = numpy.median(data) | |
175 low = numpy.min(data) | |
176 high= numpy.max(data) | |
177 if N > 1: | |
178 # evaluate sample variance by setting delta degrees of freedom (ddof) to | |
179 # 1. The degree used in calculations is N - ddof | |
180 stddev = numpy.std(data, ddof=1) | |
181 # Get the endpoints of the range that contains 95% of the distribution | |
182 t_bounds = scipy.stats.t.interval(0.95, N-1) | |
183 #assert abs(t_bounds[0] + t_bounds[1]) < 1e-6 | |
184 # sum mean to the confidence interval | |
185 ci = { | |
186 'abs': t_bounds[1] * stddev / sqrt(N), | |
187 'low': average + t_bounds[0] * stddev / sqrt(N), | |
188 'high': average + t_bounds[1] * stddev / sqrt(N) | |
189 } | |
190 else: | |
191 stddev = 0 | |
192 ci = { 'abs': 0, 'low': average, 'high': average } | |
193 if abs(stddev) > 0.0001 and abs(average) > 0.0001: | |
194 ci['perc'] = t_bounds[1] * stddev / sqrt(N) / average * 100 | |
195 else: | |
196 ci['perc'] = 0 | |
197 return { 'samples': N, 'average': average, 'median': median, | |
198 'stddev': stddev, 'min': low, 'max': high, 'ci': ci } | |
199 | |
200 def read_stats(path, S): | |
201 with open(path, "rt") as f: | |
202 # Process the whole file and sum repeating entries. | |
203 D = { 'Sum': {'time': 0, 'count': 0} } | |
204 for line in f: | |
205 line = line.strip() | |
206 # Discard headers and footers. | |
207 if not line: continue | |
208 if line.startswith("Runtime Function"): continue | |
209 if line.startswith("===="): continue | |
210 if line.startswith("----"): continue | |
211 if line.startswith("URL:"): continue | |
212 # We have a regular line. | |
213 fields = line.split() | |
214 key = fields[0] | |
215 time = float(fields[1].replace("ms", "")) | |
216 count = int(fields[3]) | |
217 if key not in D: D[key] = { 'time': 0, 'count': 0 } | |
218 D[key]['time'] += time | |
219 D[key]['count'] += count | |
220 # We calculate the sum, if it's not the "total" line. | |
221 if key != "Total": | |
222 D['Sum']['time'] += time | |
223 D['Sum']['count'] += count | |
224 # Append the sums as single entries to S. | |
225 for key in D: | |
226 if key not in S: S[key] = { 'time_list': [], 'count_list': [] } | |
227 S[key]['time_list'].append(D[key]['time']) | |
228 S[key]['count_list'].append(D[key]['count']) | |
229 | |
230 def print_stats(S, args): | |
231 # Sort by ascending/descending time average, then by ascending/descending | |
232 # count average, then by ascending name. | |
233 def sort_asc_func(item): | |
234 return (item[1]['time_stat']['average'], | |
235 item[1]['count_stat']['average'], | |
236 item[0]) | |
237 def sort_desc_func(item): | |
238 return (-item[1]['time_stat']['average'], | |
239 -item[1]['count_stat']['average'], | |
240 item[0]) | |
241 # Sorting order is in the commend-line arguments. | |
242 sort_func = sort_asc_func if args.sort == "asc" else sort_desc_func | |
243 # Possibly limit how many elements to print. | |
244 L = [item for item in sorted(S.items(), key=sort_func) | |
245 if item[0] not in ["Total", "Sum"]] | |
246 N = len(L) | |
247 if args.limit == 0: | |
248 low, high = 0, N | |
249 elif args.sort == "desc": | |
250 low, high = 0, args.limit | |
251 else: | |
252 low, high = N-args.limit, N | |
253 # How to print entries. | |
254 def print_entry(key, value): | |
255 def stats(s, units=""): | |
256 conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc']) | |
257 return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf) | |
258 print "{:>50s} {} {}".format( | |
259 key, | |
260 stats(value['time_stat'], units="ms"), | |
261 stats(value['count_stat']) | |
262 ) | |
263 # Print and calculate partial sums, if necessary. | |
264 for i in range(low, high): | |
265 print_entry(*L[i]) | |
266 if args.totals and args.limit != 0: | |
267 if i == low: | |
268 partial = { 'time_list': [0] * len(L[i][1]['time_list']), | |
269 'count_list': [0] * len(L[i][1]['count_list']) } | |
270 assert len(partial['time_list']) == len(L[i][1]['time_list']) | |
271 assert len(partial['count_list']) == len(L[i][1]['count_list']) | |
272 for j, v in enumerate(L[i][1]['time_list']): | |
273 partial['time_list'][j] += v | |
274 for j, v in enumerate(L[i][1]['count_list']): | |
275 partial['count_list'][j] += v | |
276 # Print totals, if necessary. | |
277 if args.totals: | |
278 print '-' * 80 | |
279 if args.limit != 0: | |
280 partial['time_stat'] = statistics(partial['time_list']) | |
281 partial['count_stat'] = statistics(partial['count_list']) | |
282 print_entry("Partial", partial) | |
283 print_entry("Sum", S["Sum"]) | |
284 print_entry("Total", S["Total"]) | |
285 | |
286 def do_stats(args): | |
287 T = {} | |
288 for path in args.logfiles: | |
289 filename = os.path.basename(path) | |
290 m = re.match(r'^([^#]+)(#.*)?$', filename) | |
291 domain = m.group(1) | |
292 if domain not in T: T[domain] = {} | |
293 read_stats(path, T[domain]) | |
294 for i, domain in enumerate(sorted(T)): | |
295 if len(T) > 1: | |
296 if i > 0: print | |
297 print "{}:".format(domain) | |
298 print '=' * 80 | |
299 S = T[domain] | |
300 for key in S: | |
301 S[key]['time_stat'] = statistics(S[key]['time_list']) | |
302 S[key]['count_stat'] = statistics(S[key]['count_list']) | |
303 print_stats(S, args) | |
304 | |
305 | |
306 # Generate JSON file. | |
307 | |
308 def do_json(args): | |
309 J = {} | |
310 for path in args.logdirs: | |
311 if os.path.isdir(path): | |
312 for root, dirs, files in os.walk(path): | |
313 version = os.path.basename(root) | |
314 if version not in J: J[version] = {} | |
315 for filename in files: | |
316 if filename.endswith(".txt"): | |
317 m = re.match(r'^([^#]+)(#.*)?$', filename) | |
318 domain = m.group(1) | |
319 if domain not in J[version]: J[version][domain] = {} | |
320 read_stats(os.path.join(root, filename), J[version][domain]) | |
321 for version, T in J.items(): | |
322 for domain, S in T.items(): | |
323 A = [] | |
324 for name, value in S.items(): | |
325 # We don't want the calculated sum in the JSON file. | |
326 if name == "Sum": continue | |
327 entry = [name] | |
328 for x in ['time_list', 'count_list']: | |
329 s = statistics(S[name][x]) | |
330 entry.append(round(s['average'], 1)) | |
331 entry.append(round(s['ci']['abs'], 1)) | |
332 entry.append(round(s['ci']['perc'], 2)) | |
333 A.append(entry) | |
334 T[domain] = A | |
335 print json.dumps(J, separators=(',', ':')) | |
336 | |
337 | |
338 # Help. | |
339 | |
340 def do_help(parser, subparsers, args): | |
341 if args.help_cmd: | |
342 if args.help_cmd in subparsers: | |
343 subparsers[args.help_cmd].print_help() | |
344 else: | |
345 args.error("Unknown command '{}'".format(args.help_cmd)) | |
346 else: | |
347 parser.print_help() | |
348 | |
349 | |
350 # Main program, parse command line and execute. | |
351 | |
352 def main(): | |
353 parser = argparse.ArgumentParser() | |
354 subparser_adder = parser.add_subparsers(title="commands", dest="command", | |
355 metavar="<command>") | |
356 subparsers = {} | |
357 # Command: run. | |
358 subparsers["run"] = subparser_adder.add_parser( | |
359 "run", help="run --help") | |
360 subparsers["run"].set_defaults( | |
361 func=do_run, error=subparsers["run"].error) | |
362 subparsers["run"].add_argument( | |
363 "--js-flags", type=str, default="", | |
364 help="specify additional V8 flags") | |
365 subparsers["run"].add_argument( | |
366 "--no-url", dest="print_url", action="store_false", default=True, | |
367 help="do not include url in statistics file") | |
368 subparsers["run"].add_argument( | |
369 "-n", "--repeat", type=int, metavar="<num>", | |
370 help="specify iterations for each website (default: once)") | |
371 subparsers["run"].add_argument( | |
372 "-r", "--retries", type=int, metavar="<num>", | |
373 help="specify retries if website is down (default: forever)") | |
374 subparsers["run"].add_argument( | |
375 "-f", "--sites-file", type=str, metavar="<path>", | |
376 help="specify file containing benchmark websites") | |
377 subparsers["run"].add_argument( | |
378 "-t", "--timeout", type=int, metavar="<seconds>", default=60, | |
379 help="specify seconds before chrome is killed") | |
380 subparsers["run"].add_argument( | |
381 "-u", "--user-data-dir", type=str, metavar="<path>", | |
382 help="specify user data dir (default is temporary)") | |
383 subparsers["run"].add_argument( | |
384 "-c", "--with-chrome", type=str, metavar="<path>", | |
385 default="/usr/bin/google-chrome", | |
386 help="specify chrome executable to use") | |
387 subparsers["run"].add_argument( | |
388 "sites", type=str, metavar="<URL>", nargs="*", | |
389 help="specify benchmark website") | |
390 # Command: stats. | |
391 subparsers["stats"] = subparser_adder.add_parser( | |
392 "stats", help="stats --help") | |
393 subparsers["stats"].set_defaults( | |
394 func=do_stats, error=subparsers["stats"].error) | |
395 subparsers["stats"].add_argument( | |
396 "-l", "--limit", type=int, metavar="<num>", default=0, | |
397 help="limit how many items to print (default: none)") | |
398 subparsers["stats"].add_argument( | |
399 "-s", "--sort", choices=["asc", "desc"], default="asc", | |
400 help="specify sorting order (default: ascending)") | |
401 subparsers["stats"].add_argument( | |
402 "-n", "--no-total", dest="totals", action="store_false", default=True, | |
403 help="do not print totals") | |
404 subparsers["stats"].add_argument( | |
405 "logfiles", type=str, metavar="<logfile>", nargs="*", | |
406 help="specify log files to parse") | |
407 # Command: json. | |
408 subparsers["json"] = subparser_adder.add_parser( | |
409 "json", help="json --help") | |
410 subparsers["json"].set_defaults( | |
411 func=do_json, error=subparsers["json"].error) | |
412 subparsers["json"].add_argument( | |
413 "logdirs", type=str, metavar="<logdir>", nargs="*", | |
414 help="specify directories with log files to parse") | |
415 # Command: help. | |
416 subparsers["help"] = subparser_adder.add_parser( | |
417 "help", help="help information") | |
418 subparsers["help"].set_defaults( | |
419 func=lambda args: do_help(parser, subparsers, args), | |
420 error=subparsers["help"].error) | |
421 subparsers["help"].add_argument( | |
422 "help_cmd", type=str, metavar="<command>", nargs="?", | |
423 help="command for which to display help") | |
424 # Execute the command. | |
425 args = parser.parse_args() | |
426 if args.command == "run" and args.sites_file and args.sites: | |
427 args.error("if --sites-file is used, no site URLS must be given") | |
428 sys.exit(1) | |
429 else: | |
430 args.func(args) | |
431 | |
432 if __name__ == "__main__": | |
433 sys.exit(main()) | |
OLD | NEW |