OLD | NEW |
| (Empty) |
1 #!/usr/bin/env python | |
2 # Copyright 2013 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 """Parses CSV output from the loading_measurement and outputs interesting stats. | |
7 | |
8 Example usage: | |
9 $ tools/perf/run_measurement --browser=release \ | |
10 --output-format=csv --output=/path/to/loading_measurement_output.csv \ | |
11 loading_measurement tools/perf/page_sets/top_1m.py | |
12 $ tools/perf/measurements/loading_measurement_analyzer.py \ | |
13 --num-slowest-urls=100 --rank-csv-file=/path/to/top-1m.csv \ | |
14 /path/to/loading_measurement_output.csv | |
15 """ | |
16 | |
17 import collections | |
18 import csv | |
19 import heapq | |
20 import optparse | |
21 import os | |
22 import re | |
23 import sys | |
24 | |
25 | |
26 class LoadingMeasurementAnalyzer(object): | |
27 | |
28 def __init__(self, input_file, options): | |
29 self.ranks = {} | |
30 self.totals = collections.defaultdict(list) | |
31 self.maxes = collections.defaultdict(list) | |
32 self.avgs = collections.defaultdict(list) | |
33 self.load_times = [] | |
34 self.cpu_times = [] | |
35 self.network_percents = [] | |
36 self.num_rows_parsed = 0 | |
37 self.num_slowest_urls = options.num_slowest_urls | |
38 if options.rank_csv_file: | |
39 self._ParseRankCsvFile(os.path.expanduser(options.rank_csv_file)) | |
40 self._ParseInputFile(input_file, options) | |
41 self._display_zeros = options.display_zeros | |
42 | |
43 def _ParseInputFile(self, input_file, options): | |
44 with open(input_file, 'r') as csvfile: | |
45 row_dict = csv.DictReader(csvfile) | |
46 for row in row_dict: | |
47 if (options.rank_limit and | |
48 self._GetRank(row['url']) > options.rank_limit): | |
49 continue | |
50 cpu_time = 0 | |
51 load_time = float(row['load_time (ms)']) | |
52 if load_time < 0: | |
53 print 'Skipping %s due to negative load time' % row['url'] | |
54 continue | |
55 for key, value in row.iteritems(): | |
56 if key in ('url', 'load_time (ms)', 'dom_content_loaded_time (ms)'): | |
57 continue | |
58 if not value or value == '-': | |
59 continue | |
60 value = float(value) | |
61 if not value: | |
62 continue | |
63 if '_avg' in key: | |
64 self.avgs[key].append((value, row['url'])) | |
65 elif '_max' in key: | |
66 self.maxes[key].append((value, row['url'])) | |
67 else: | |
68 self.totals[key].append((value, row['url'])) | |
69 cpu_time += value | |
70 self.load_times.append((load_time, row['url'])) | |
71 self.cpu_times.append((cpu_time, row['url'])) | |
72 if options.show_network: | |
73 network_time = load_time - cpu_time | |
74 self.totals['Network (ms)'].append((network_time, row['url'])) | |
75 self.network_percents.append((network_time / load_time, row['url'])) | |
76 self.num_rows_parsed += 1 | |
77 if options.max_rows and self.num_rows_parsed == int(options.max_rows): | |
78 break | |
79 | |
80 def _ParseRankCsvFile(self, input_file): | |
81 with open(input_file, 'r') as csvfile: | |
82 for row in csv.reader(csvfile): | |
83 assert len(row) == 2 | |
84 self.ranks[row[1]] = int(row[0]) | |
85 | |
86 def _GetRank(self, url): | |
87 url = url.replace('http://', '') | |
88 if url in self.ranks: | |
89 return self.ranks[url] | |
90 return len(self.ranks) | |
91 | |
92 def PrintSummary(self, stdout): | |
93 sum_totals = {} | |
94 units = None | |
95 for key, values in self.totals.iteritems(): | |
96 m = re.match('.* [(](.*)[)]', key) | |
97 assert m, 'All keys should have units.' | |
98 assert not units or units == m.group(1), 'All units should be the same.' | |
99 units = m.group(1) | |
100 sum_totals[key] = sum([v[0] for v in values]) | |
101 total_cpu_time = sum([v[0] for v in self.cpu_times]) | |
102 total_page_load_time = sum([v[0] for v in self.load_times]) | |
103 | |
104 print >> stdout | |
105 print >> stdout, 'Total URLs:', self.num_rows_parsed | |
106 print >> stdout, 'Total page load time: %ds' % int(round( | |
107 total_page_load_time / 1000)) | |
108 print >> stdout, 'Average page load time: %dms' % int(round( | |
109 total_page_load_time / self.num_rows_parsed)) | |
110 if units == 'ms': | |
111 print >> stdout, 'Total CPU time: %ds' % int(round(total_cpu_time / 1000)) | |
112 print >> stdout, 'Average CPU time: %dms' % int(round( | |
113 total_cpu_time / self.num_rows_parsed)) | |
114 print >> stdout | |
115 for key, value in sorted(sum_totals.iteritems(), reverse=True, | |
116 key=lambda i: i[1]): | |
117 if not self._display_zeros and not int(value / 100.): | |
118 break | |
119 output_key = '%60s: ' % re.sub(' [(].*[)]', '', key) | |
120 if units == 'ms': | |
121 output_value = '%10ds ' % (value / 1000) | |
122 output_percent = '%.1f%%' % (100 * value / total_page_load_time) | |
123 else: | |
124 output_value = '%10d%s ' % (value, units) | |
125 output_percent = '%.1f%%' % (100 * value / total_cpu_time) | |
126 print >> stdout, output_key, output_value, output_percent | |
127 | |
128 if not self.num_slowest_urls: | |
129 return | |
130 | |
131 for key, values in sorted(self.totals.iteritems(), reverse=True, | |
132 key=lambda i: sum_totals[i[0]]): | |
133 if not self._display_zeros and not int(sum_totals[key] / 100.): | |
134 break | |
135 print >> stdout | |
136 print >> stdout, 'Top %d slowest %s:' % (self.num_slowest_urls, | |
137 re.sub(' [(].*[)]', '', key)) | |
138 slowest = heapq.nlargest(self.num_slowest_urls, values) | |
139 for value, url in slowest: | |
140 print >> stdout, '%10d%s\t%s (#%s)' % (value, units, url, | |
141 self._GetRank(url)) | |
142 | |
143 if self.network_percents: | |
144 print >> stdout | |
145 print >> stdout, 'Top %d highest network to CPU time ratios:' % ( | |
146 self.num_slowest_urls) | |
147 for percent, url in sorted( | |
148 self.network_percents, reverse=True)[:self.num_slowest_urls]: | |
149 percent *= 100 | |
150 print >> stdout, '\t', '%.1f%%' % percent, url, '(#%s)' % ( | |
151 self._GetRank(url)) | |
152 | |
153 | |
154 def main(arguments, stdout=sys.stdout): | |
155 prog_desc = 'Parses CSV output from the loading_measurement' | |
156 parser = optparse.OptionParser(usage=('%prog [options]' + '\n\n' + prog_desc)) | |
157 | |
158 parser.add_option('--max-rows', type='int', | |
159 help='Only process this many rows') | |
160 parser.add_option('--num-slowest-urls', type='int', | |
161 help='Output this many slowest URLs for each category') | |
162 parser.add_option('--rank-csv-file', help='A CSV file of <rank,url>') | |
163 parser.add_option('--rank-limit', type='int', | |
164 help='Only process pages higher than this rank') | |
165 parser.add_option('--show-network', action='store_true', | |
166 help='Whether to display Network as a category') | |
167 parser.add_option('--display-zeros', action='store_true', | |
168 help='Whether to display categories with zero time') | |
169 | |
170 options, args = parser.parse_args(arguments) | |
171 | |
172 assert len(args) == 1, 'Must pass exactly one CSV file to analyze' | |
173 if options.rank_limit and not options.rank_csv_file: | |
174 print 'Must pass --rank-csv-file with --rank-limit' | |
175 return 1 | |
176 | |
177 LoadingMeasurementAnalyzer(args[0], options).PrintSummary(stdout) | |
178 | |
179 return 0 | |
180 | |
181 | |
182 if __name__ == '__main__': | |
183 sys.exit(main(sys.argv[1:])) | |
OLD | NEW |