OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env python |
| 2 # Copyright 2016 the V8 project authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 ''' |
| 6 Usage: runtime-call-stats.py [-h] <command> ... |
| 7 |
| 8 Optional arguments: |
| 9 -h, --help show this help message and exit |
| 10 |
| 11 Commands: |
| 12 run run chrome with --runtime-call-stats and generate logs |
| 13 stats process logs and print statistics |
| 14 json process logs from several versions and generate JSON |
| 15 help help information |
| 16 |
| 17 For each command, you can try ./runtime-call-stats.py help command. |
| 18 ''' |
| 19 |
| 20 import argparse |
| 21 import json |
| 22 import os |
| 23 import re |
| 24 import shutil |
| 25 import subprocess |
| 26 import sys |
| 27 import tempfile |
| 28 |
| 29 import numpy |
| 30 import scipy |
| 31 import scipy.stats |
| 32 from math import sqrt |
| 33 |
| 34 |
| 35 # Run benchmarks. |
| 36 |
| 37 DEFAULT_SITES = [ |
| 38 # top websites (http://alexa.com/topsites): -------------------- |
| 39 "https://www.google.de/search?q=v8", |
| 40 "https://www.youtube.com", |
| 41 "https://www.facebook.com/shakira", |
| 42 "http://www.baidu.com/s?wd=v8", |
| 43 "http://www.yahoo.co.jp", |
| 44 "http://www.amazon.com/s/?field-keywords=v8", |
| 45 "http://hi.wikipedia.org/wiki/" \ |
| 46 "%E0%A4%AE%E0%A5%81%E0%A4%96%E0%A4%AA%E0%A5%83%E0%A4%B7%E0%A5%8D%E0%A4%A0", |
| 47 "http://www.qq.com", |
| 48 "http://www.twitter.com/taylorswift13", |
| 49 "http://www.reddit.com", |
| 50 "http://www.ebay.fr/sch/i.html?_nkw=v8", |
| 51 "http://edition.cnn.com", |
| 52 "http://world.taobao.com", |
| 53 "http://www.instagram.com/archdigest", |
| 54 "https://www.linkedin.com/pub/dir/?first=john&last=doe&search=search", |
| 55 "http://www.msn.com/ar-ae", |
| 56 "http://www.bing.com/search?q=v8+engine", |
| 57 "http://www.pinterest.com/categories/popular", |
| 58 "http://www.sina.com.cn", |
| 59 "http://weibo.com", |
| 60 "http://yandex.ru/search/?text=v8", |
| 61 # framework driven decisions: ----------------------------------- |
| 62 # wikipedia content + angularjs |
| 63 "http://www.wikiwand.com/en/hill", |
| 64 # ember website |
| 65 "http://meta.discourse.org", |
| 66 # backbone js |
| 67 "http://reddit.musicplayer.io", |
| 68 # gwt application |
| 69 "http://inbox.google.com", |
| 70 # webgl / algorithmic case |
| 71 "http://maps.google.co.jp/maps/search/restaurant+tokyo", |
| 72 # whatever framework adwords uses |
| 73 "https://adwords.google.com", |
| 74 ] |
| 75 |
| 76 |
| 77 def print_command(cmd_args): |
| 78 def fix_for_printing(arg): |
| 79 m = re.match(r'^--([^=]+)=(.*)$', arg) |
| 80 if m and (' ' in m.group(2) or m.group(2).startswith('-')): |
| 81 arg = "--{}='{}'".format(m.group(1), m.group(2)) |
| 82 elif ' ' in arg: |
| 83 arg = "'{}'".format(arg) |
| 84 return arg |
| 85 print " ".join(map(fix_for_printing, cmd_args)) |
| 86 |
| 87 |
| 88 def start_replay_server(args): |
| 89 cmd_args = [ |
| 90 args.replay_bin, |
| 91 "--port=4080", |
| 92 "--ssl_port=4443", |
| 93 "--no-dns_forwarding", |
| 94 "--use_closest_match", |
| 95 "--no-diff_unknown_requests", |
| 96 args.replay_wpr, |
| 97 ] |
| 98 print "=" * 80 |
| 99 print_command(cmd_args) |
| 100 with open(os.devnull, 'w') as null: |
| 101 server = subprocess.Popen(cmd_args, stdout=null, stderr=null) |
| 102 print "RUNNING REPLAY SERVER: %s with PID=%s" % (args.replay_bin, server.pid) |
| 103 print "=" * 80 |
| 104 return server |
| 105 |
| 106 |
| 107 def stop_replay_server(server): |
| 108 print("SHUTTING DOWN REPLAY SERVER %s" % server.pid) |
| 109 server.terminate() |
| 110 |
| 111 |
| 112 def run_site(site, domain, args, timeout=None): |
| 113 print "="*80 |
| 114 print "RUNNING DOMAIN %s" % domain |
| 115 print "="*80 |
| 116 result_template = "{domain}#{count}.txt" if args.repeat else "{domain}.txt" |
| 117 count = 0 |
| 118 while count == 0 or args.repeat is not None and count < args.repeat: |
| 119 count += 1 |
| 120 result = result_template.format(domain=domain, count=count) |
| 121 retries = 0 |
| 122 while args.retries is None or retries < args.retries: |
| 123 retries += 1 |
| 124 try: |
| 125 temp_user_data_dir = args.user_data_dir is None |
| 126 if temp_user_data_dir: |
| 127 user_data_dir = tempfile.mkdtemp(prefix="chr_") |
| 128 js_flags = "--runtime-call-stats" |
| 129 if args.js_flags: js_flags += " " + args.js_flags |
| 130 chrome_flags = [ |
| 131 "--no-default-browser-check", |
| 132 "--disable-translate", |
| 133 "--single-process", |
| 134 "--no-sandbox", |
| 135 "--js-flags={}".format(js_flags), |
| 136 "--no-first-run", |
| 137 "--user-data-dir={}".format(user_data_dir), |
| 138 ] |
| 139 if args.replay_wpr: |
| 140 chrome_flags += [ |
| 141 "--host-resolver-rules=MAP *:80 localhost:4080, " \ |
| 142 "MAP *:443 localhost:4443, " \ |
| 143 "EXCLUDE localhost", |
| 144 "--ignore-certificate-errors", |
| 145 "--disable-web-security", |
| 146 "--reduce-security-for-testing", |
| 147 "--allow-insecure-localhost", |
| 148 ] |
| 149 if args.chrome_flags: |
| 150 chrome_flags += args.chrome_flags.split() |
| 151 if timeout is None: timeout = args.timeout |
| 152 cmd_args = [ |
| 153 "timeout", str(timeout), |
| 154 args.with_chrome |
| 155 ] + chrome_flags + [ site ] |
| 156 print "- " * 40 |
| 157 print_command(cmd_args) |
| 158 print "- " * 40 |
| 159 with open(result, "wt") as f: |
| 160 status = subprocess.call(cmd_args, stdout=f) |
| 161 # 124 means timeout killed chrome, 0 means the user was bored first! |
| 162 # If none of these two happened, then chrome apparently crashed, so |
| 163 # it must be called again. |
| 164 if status != 124 and status != 0: |
| 165 print("CHROME CRASHED, REPEATING RUN"); |
| 166 continue |
| 167 # If the stats file is empty, chrome must be called again. |
| 168 if os.path.isfile(result) and os.path.getsize(result) > 0: |
| 169 if args.print_url: |
| 170 with open(result, "at") as f: |
| 171 print >> f |
| 172 print >> f, "URL: {}".format(site) |
| 173 break |
| 174 print("EMPTY RESULT, REPEATING RUN"); |
| 175 finally: |
| 176 if temp_user_data_dir: |
| 177 shutil.rmtree(user_data_dir) |
| 178 |
| 179 |
| 180 def read_sites_file(args): |
| 181 try: |
| 182 sites = [] |
| 183 try: |
| 184 with open(args.sites_file, "rt") as f: |
| 185 for item in json.load(f): |
| 186 if 'timeout' not in item: |
| 187 # This is more-or-less arbitrary. |
| 188 item['timeout'] = int(2.5 * item['timeline'] + 3) |
| 189 if item['timeout'] > args.timeout: item['timeout'] = args.timeout |
| 190 sites.append(item) |
| 191 except ValueError: |
| 192 with open(args.sites_file, "rt") as f: |
| 193 for line in f: |
| 194 line = line.strip() |
| 195 if not line or line.startswith('#'): continue |
| 196 sites.append({'url': line, 'timeout': args.timeout}) |
| 197 return sites |
| 198 except IOError as e: |
| 199 args.error("Cannot read from {}. {}.".format(args.sites_file, e.strerror)) |
| 200 sys.exit(1) |
| 201 |
| 202 |
| 203 def do_run(args): |
| 204 # Determine the websites to benchmark. |
| 205 if args.sites_file: |
| 206 sites = read_sites_file(args) |
| 207 elif args.sites: |
| 208 sites = [{'url': site, 'timeout': args.timeout} for site in args.sites] |
| 209 else: |
| 210 sites = [{'url': site, 'timeout': args.timeout} for site in DEFAULT_SITES] |
| 211 # Disambiguate domains, if needed. |
| 212 L = [] |
| 213 domains = {} |
| 214 for item in sites: |
| 215 site = item['url'] |
| 216 m = re.match(r'^(https?://)?([^/]+)(/.*)?$', site) |
| 217 if not m: |
| 218 args.error("Invalid URL {}.".format(site)) |
| 219 continue |
| 220 domain = m.group(2) |
| 221 entry = [site, domain, None, item['timeout']] |
| 222 if domain not in domains: |
| 223 domains[domain] = entry |
| 224 else: |
| 225 if not isinstance(domains[domain], int): |
| 226 domains[domain][2] = 1 |
| 227 domains[domain] = 1 |
| 228 domains[domain] += 1 |
| 229 entry[2] = domains[domain] |
| 230 L.append(entry) |
| 231 if args.replay_wpr: |
| 232 replay_server = start_replay_server(args); |
| 233 try: |
| 234 # Run them. |
| 235 for site, domain, count, timeout in L: |
| 236 if count is not None: domain = "{}%{}".format(domain, count) |
| 237 print site, domain, timeout |
| 238 run_site(site, domain, args, timeout) |
| 239 finally: |
| 240 if replay_server: |
| 241 stop_replay_server(replay_server) |
| 242 |
| 243 |
| 244 # Calculate statistics. |
| 245 |
| 246 def statistics(data): |
| 247 N = len(data) |
| 248 average = numpy.average(data) |
| 249 median = numpy.median(data) |
| 250 low = numpy.min(data) |
| 251 high= numpy.max(data) |
| 252 if N > 1: |
| 253 # evaluate sample variance by setting delta degrees of freedom (ddof) to |
| 254 # 1. The degree used in calculations is N - ddof |
| 255 stddev = numpy.std(data, ddof=1) |
| 256 # Get the endpoints of the range that contains 95% of the distribution |
| 257 t_bounds = scipy.stats.t.interval(0.95, N-1) |
| 258 #assert abs(t_bounds[0] + t_bounds[1]) < 1e-6 |
| 259 # sum mean to the confidence interval |
| 260 ci = { |
| 261 'abs': t_bounds[1] * stddev / sqrt(N), |
| 262 'low': average + t_bounds[0] * stddev / sqrt(N), |
| 263 'high': average + t_bounds[1] * stddev / sqrt(N) |
| 264 } |
| 265 else: |
| 266 stddev = 0 |
| 267 ci = { 'abs': 0, 'low': average, 'high': average } |
| 268 if abs(stddev) > 0.0001 and abs(average) > 0.0001: |
| 269 ci['perc'] = t_bounds[1] * stddev / sqrt(N) / average * 100 |
| 270 else: |
| 271 ci['perc'] = 0 |
| 272 return { 'samples': N, 'average': average, 'median': median, |
| 273 'stddev': stddev, 'min': low, 'max': high, 'ci': ci } |
| 274 |
| 275 |
| 276 def read_stats(path, S): |
| 277 with open(path, "rt") as f: |
| 278 # Process the whole file and sum repeating entries. |
| 279 D = { 'Sum': {'time': 0, 'count': 0} } |
| 280 for line in f: |
| 281 line = line.strip() |
| 282 # Discard headers and footers. |
| 283 if not line: continue |
| 284 if line.startswith("Runtime Function"): continue |
| 285 if line.startswith("===="): continue |
| 286 if line.startswith("----"): continue |
| 287 if line.startswith("URL:"): continue |
| 288 # We have a regular line. |
| 289 fields = line.split() |
| 290 key = fields[0] |
| 291 time = float(fields[1].replace("ms", "")) |
| 292 count = int(fields[3]) |
| 293 if key not in D: D[key] = { 'time': 0, 'count': 0 } |
| 294 D[key]['time'] += time |
| 295 D[key]['count'] += count |
| 296 # We calculate the sum, if it's not the "total" line. |
| 297 if key != "Total": |
| 298 D['Sum']['time'] += time |
| 299 D['Sum']['count'] += count |
| 300 # Append the sums as single entries to S. |
| 301 for key in D: |
| 302 if key not in S: S[key] = { 'time_list': [], 'count_list': [] } |
| 303 S[key]['time_list'].append(D[key]['time']) |
| 304 S[key]['count_list'].append(D[key]['count']) |
| 305 |
| 306 |
| 307 def print_stats(S, args): |
| 308 # Sort by ascending/descending time average, then by ascending/descending |
| 309 # count average, then by ascending name. |
| 310 def sort_asc_func(item): |
| 311 return (item[1]['time_stat']['average'], |
| 312 item[1]['count_stat']['average'], |
| 313 item[0]) |
| 314 def sort_desc_func(item): |
| 315 return (-item[1]['time_stat']['average'], |
| 316 -item[1]['count_stat']['average'], |
| 317 item[0]) |
| 318 # Sorting order is in the commend-line arguments. |
| 319 sort_func = sort_asc_func if args.sort == "asc" else sort_desc_func |
| 320 # Possibly limit how many elements to print. |
| 321 L = [item for item in sorted(S.items(), key=sort_func) |
| 322 if item[0] not in ["Total", "Sum"]] |
| 323 N = len(L) |
| 324 if args.limit == 0: |
| 325 low, high = 0, N |
| 326 elif args.sort == "desc": |
| 327 low, high = 0, args.limit |
| 328 else: |
| 329 low, high = N-args.limit, N |
| 330 # How to print entries. |
| 331 def print_entry(key, value): |
| 332 def stats(s, units=""): |
| 333 conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc']) |
| 334 return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf) |
| 335 print "{:>50s} {} {}".format( |
| 336 key, |
| 337 stats(value['time_stat'], units="ms"), |
| 338 stats(value['count_stat']) |
| 339 ) |
| 340 # Print and calculate partial sums, if necessary. |
| 341 for i in range(low, high): |
| 342 print_entry(*L[i]) |
| 343 if args.totals and args.limit != 0: |
| 344 if i == low: |
| 345 partial = { 'time_list': [0] * len(L[i][1]['time_list']), |
| 346 'count_list': [0] * len(L[i][1]['count_list']) } |
| 347 assert len(partial['time_list']) == len(L[i][1]['time_list']) |
| 348 assert len(partial['count_list']) == len(L[i][1]['count_list']) |
| 349 for j, v in enumerate(L[i][1]['time_list']): |
| 350 partial['time_list'][j] += v |
| 351 for j, v in enumerate(L[i][1]['count_list']): |
| 352 partial['count_list'][j] += v |
| 353 # Print totals, if necessary. |
| 354 if args.totals: |
| 355 print '-' * 80 |
| 356 if args.limit != 0: |
| 357 partial['time_stat'] = statistics(partial['time_list']) |
| 358 partial['count_stat'] = statistics(partial['count_list']) |
| 359 print_entry("Partial", partial) |
| 360 print_entry("Sum", S["Sum"]) |
| 361 print_entry("Total", S["Total"]) |
| 362 |
| 363 |
| 364 def do_stats(args): |
| 365 T = {} |
| 366 for path in args.logfiles: |
| 367 filename = os.path.basename(path) |
| 368 m = re.match(r'^([^#]+)(#.*)?$', filename) |
| 369 domain = m.group(1) |
| 370 if domain not in T: T[domain] = {} |
| 371 read_stats(path, T[domain]) |
| 372 for i, domain in enumerate(sorted(T)): |
| 373 if len(T) > 1: |
| 374 if i > 0: print |
| 375 print "{}:".format(domain) |
| 376 print '=' * 80 |
| 377 S = T[domain] |
| 378 for key in S: |
| 379 S[key]['time_stat'] = statistics(S[key]['time_list']) |
| 380 S[key]['count_stat'] = statistics(S[key]['count_list']) |
| 381 print_stats(S, args) |
| 382 |
| 383 |
| 384 # Generate JSON file. |
| 385 |
| 386 def do_json(args): |
| 387 J = {} |
| 388 for path in args.logdirs: |
| 389 if os.path.isdir(path): |
| 390 for root, dirs, files in os.walk(path): |
| 391 version = os.path.basename(root) |
| 392 if version not in J: J[version] = {} |
| 393 for filename in files: |
| 394 if filename.endswith(".txt"): |
| 395 m = re.match(r'^([^#]+)(#.*)?$', filename) |
| 396 domain = m.group(1) |
| 397 if domain not in J[version]: J[version][domain] = {} |
| 398 read_stats(os.path.join(root, filename), J[version][domain]) |
| 399 for version, T in J.items(): |
| 400 for domain, S in T.items(): |
| 401 A = [] |
| 402 for name, value in S.items(): |
| 403 # We don't want the calculated sum in the JSON file. |
| 404 if name == "Sum": continue |
| 405 entry = [name] |
| 406 for x in ['time_list', 'count_list']: |
| 407 s = statistics(S[name][x]) |
| 408 entry.append(round(s['average'], 1)) |
| 409 entry.append(round(s['ci']['abs'], 1)) |
| 410 entry.append(round(s['ci']['perc'], 2)) |
| 411 A.append(entry) |
| 412 T[domain] = A |
| 413 print json.dumps(J, separators=(',', ':')) |
| 414 |
| 415 |
| 416 # Help. |
| 417 |
| 418 def do_help(parser, subparsers, args): |
| 419 if args.help_cmd: |
| 420 if args.help_cmd in subparsers: |
| 421 subparsers[args.help_cmd].print_help() |
| 422 else: |
| 423 args.error("Unknown command '{}'".format(args.help_cmd)) |
| 424 else: |
| 425 parser.print_help() |
| 426 |
| 427 |
| 428 # Main program, parse command line and execute. |
| 429 |
| 430 def main(): |
| 431 parser = argparse.ArgumentParser() |
| 432 subparser_adder = parser.add_subparsers(title="commands", dest="command", |
| 433 metavar="<command>") |
| 434 subparsers = {} |
| 435 # Command: run. |
| 436 subparsers["run"] = subparser_adder.add_parser( |
| 437 "run", help="run --help") |
| 438 subparsers["run"].set_defaults( |
| 439 func=do_run, error=subparsers["run"].error) |
| 440 subparsers["run"].add_argument( |
| 441 "--chrome-flags", type=str, default="", |
| 442 help="specify additional chrome flags") |
| 443 subparsers["run"].add_argument( |
| 444 "--js-flags", type=str, default="", |
| 445 help="specify additional V8 flags") |
| 446 subparsers["run"].add_argument( |
| 447 "--no-url", dest="print_url", action="store_false", default=True, |
| 448 help="do not include url in statistics file") |
| 449 subparsers["run"].add_argument( |
| 450 "-n", "--repeat", type=int, metavar="<num>", |
| 451 help="specify iterations for each website (default: once)") |
| 452 subparsers["run"].add_argument( |
| 453 "--replay-wpr", type=str, metavar="<path>", |
| 454 help="use the specified web page replay (.wpr) archive") |
| 455 subparsers["run"].add_argument( |
| 456 "--replay-bin", type=str, metavar="<path>", |
| 457 help="specify the replay.py script typically located in " \ |
| 458 "$CHROMIUM/src/third_party/webpagereplay/replay.py") |
| 459 subparsers["run"].add_argument( |
| 460 "-r", "--retries", type=int, metavar="<num>", |
| 461 help="specify retries if website is down (default: forever)") |
| 462 subparsers["run"].add_argument( |
| 463 "-f", "--sites-file", type=str, metavar="<path>", |
| 464 help="specify file containing benchmark websites") |
| 465 subparsers["run"].add_argument( |
| 466 "-t", "--timeout", type=int, metavar="<seconds>", default=60, |
| 467 help="specify seconds before chrome is killed") |
| 468 subparsers["run"].add_argument( |
| 469 "-u", "--user-data-dir", type=str, metavar="<path>", |
| 470 help="specify user data dir (default is temporary)") |
| 471 subparsers["run"].add_argument( |
| 472 "-c", "--with-chrome", type=str, metavar="<path>", |
| 473 default="/usr/bin/google-chrome", |
| 474 help="specify chrome executable to use") |
| 475 subparsers["run"].add_argument( |
| 476 "sites", type=str, metavar="<URL>", nargs="*", |
| 477 help="specify benchmark website") |
| 478 # Command: stats. |
| 479 subparsers["stats"] = subparser_adder.add_parser( |
| 480 "stats", help="stats --help") |
| 481 subparsers["stats"].set_defaults( |
| 482 func=do_stats, error=subparsers["stats"].error) |
| 483 subparsers["stats"].add_argument( |
| 484 "-l", "--limit", type=int, metavar="<num>", default=0, |
| 485 help="limit how many items to print (default: none)") |
| 486 subparsers["stats"].add_argument( |
| 487 "-s", "--sort", choices=["asc", "desc"], default="asc", |
| 488 help="specify sorting order (default: ascending)") |
| 489 subparsers["stats"].add_argument( |
| 490 "-n", "--no-total", dest="totals", action="store_false", default=True, |
| 491 help="do not print totals") |
| 492 subparsers["stats"].add_argument( |
| 493 "logfiles", type=str, metavar="<logfile>", nargs="*", |
| 494 help="specify log files to parse") |
| 495 # Command: json. |
| 496 subparsers["json"] = subparser_adder.add_parser( |
| 497 "json", help="json --help") |
| 498 subparsers["json"].set_defaults( |
| 499 func=do_json, error=subparsers["json"].error) |
| 500 subparsers["json"].add_argument( |
| 501 "logdirs", type=str, metavar="<logdir>", nargs="*", |
| 502 help="specify directories with log files to parse") |
| 503 # Command: help. |
| 504 subparsers["help"] = subparser_adder.add_parser( |
| 505 "help", help="help information") |
| 506 subparsers["help"].set_defaults( |
| 507 func=lambda args: do_help(parser, subparsers, args), |
| 508 error=subparsers["help"].error) |
| 509 subparsers["help"].add_argument( |
| 510 "help_cmd", type=str, metavar="<command>", nargs="?", |
| 511 help="command for which to display help") |
| 512 # Execute the command. |
| 513 args = parser.parse_args() |
| 514 if args.command == "run" and args.sites_file and args.sites: |
| 515 args.error("if --sites-file is used, no site URLS must be given") |
| 516 sys.exit(1) |
| 517 elif args.command == "run" and args.replay_wpr and not args.replay_bin: |
| 518 args.error("if --replay-wpr is used, --replay-bin must be given") |
| 519 sys.exit(1) |
| 520 else: |
| 521 args.func(args) |
| 522 |
| 523 if __name__ == "__main__": |
| 524 sys.exit(main()) |
OLD | NEW |