Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(83)

Side by Side Diff: mojo/devtools/common/mojo_benchmark

Issue 1433693004: mojo_benchmark: aggregate results over multiple runs. (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « mojo/devtools/common/devtoolslib/perf_dashboard_unittest.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright 2015 The Chromium Authors. All rights reserved. 2 # Copyright 2015 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Runner for Mojo application benchmarks.""" 6 """Runner for Mojo application benchmarks."""
7 7
8 import argparse 8 import argparse
9 import logging 9 import logging
10 import sys 10 import sys
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
52 []) + _COLD_START_SHELL_ARGS}) 52 []) + _COLD_START_SHELL_ARGS})
53 variants.append({ 53 variants.append({
54 'variant_name': 'warm start', 54 'variant_name': 'warm start',
55 'app': benchmark_spec['app'], 55 'app': benchmark_spec['app'],
56 'duration': benchmark_spec['duration'], 56 'duration': benchmark_spec['duration'],
57 'measurements': benchmark_spec['measurements'], 57 'measurements': benchmark_spec['measurements'],
58 'shell-args': benchmark_spec.get('shell-args', [])}) 58 'shell-args': benchmark_spec.get('shell-args', [])})
59 return variants 59 return variants
60 60
61 61
62 def _print_benchmark_error(outcome):
63 if not outcome.succeeded:
64 print 'benchmark failed: ' + outcome.error_str
65 if outcome.some_measurements_failed:
66 print 'some measurements failed'
67 print 'output: '
68 print '-' * 72
69 print outcome.output
70 print '-' * 72
71
72
73 def _print_results(benchmark_name, variant_name, results, measurements,
74 aggregate):
75 print '[ %s ] %s ' % (benchmark_name, variant_name)
76 for measurement in measurements:
77 print ' ' + measurement['name'] + ': ',
78 if measurement['spec'] in results:
79 if aggregate:
80 print str(results[measurement['spec']])
81 else:
82 if len(results[measurement['spec']]) == 0:
83 print '?'
84 else:
85 print '%10.4f' % results[measurement['spec']][0]
qsr 2015/11/10 17:05:27 Why is 10.4f the right format for single measureme
ppi 2015/11/10 18:00:18 Done.
86 else:
87 print '?'
88
89
90 def _upload_results(benchmark_name, variant_name, results, measurements,
91 script_args):
92 anything_recorded = False
93 chart_data_recorder = perf_dashboard.ChartDataRecorder(script_args.test_name)
94 chart_name = benchmark_name + '__' + variant_name
95
96 for measurement in measurements:
97 if measurement['spec'] in results:
98 if script_args.aggregate:
qsr 2015/11/10 17:05:27 Do you want to send empty list of values?
ppi 2015/11/10 18:00:18 Done.
99 chart_data_recorder.record_vector(
100 perf_dashboard.normalize_label(chart_name),
101 perf_dashboard.normalize_label(measurement['name']),
102 'ms', results[measurement['spec']])
103 anything_recorded = True
104 elif len(results[measurement['spec']]) > 0:
105 chart_data_recorder.record_scalar(
106 perf_dashboard.normalize_label(chart_name),
107 perf_dashboard.normalize_label(measurement['name']),
108 'ms', results[measurement['spec']][0])
109 anything_recorded = True
110
111 if not anything_recorded:
112 # Don't upload empty packets, see
113 # https://github.com/catapult-project/catapult/issues/1733 .
114 return True
115
116 return perf_dashboard.upload_chart_data(
117 script_args.master_name, script_args.bot_name,
118 script_args.test_name, script_args.builder_name,
119 script_args.build_number, chart_data_recorder.get_chart_data(),
120 script_args.server_url, script_args.dry_run)
121
122
123 def _argparse_aggregate_type(value):
124 try:
125 cast_value = int(value)
126 except ValueError:
127 raise argparse.ArgumentTypeError('value is not a positive integer')
128
129 if cast_value < 1:
130 raise argparse.ArgumentTypeError('value is not a positive integer')
131 return cast_value
132
133
62 def main(): 134 def main():
63 parser = argparse.ArgumentParser( 135 parser = argparse.ArgumentParser(
64 formatter_class=argparse.RawDescriptionHelpFormatter, 136 formatter_class=argparse.RawDescriptionHelpFormatter,
65 description=_DESCRIPTION) 137 description=_DESCRIPTION)
66 parser.add_argument('benchmark_list_file', type=file, 138 parser.add_argument('benchmark_list_file', type=file,
67 help='a file listing benchmarks to run') 139 help='a file listing benchmarks to run')
140 parser.add_argument('--aggregate', type=_argparse_aggregate_type,
141 help='aggregate results over multiple runs. The value '
142 'has to be a positive integer indicating the number of '
143 'runs.')
68 parser.add_argument('--save-all-traces', action='store_true', 144 parser.add_argument('--save-all-traces', action='store_true',
69 help='save the traces produced by benchmarks to disk') 145 help='save the traces produced by benchmarks to disk')
70 perf_dashboard.add_argparse_server_arguments(parser) 146 perf_dashboard.add_argparse_server_arguments(parser)
71 147
72 # Common shell configuration arguments. 148 # Common shell configuration arguments.
73 shell_config.add_shell_arguments(parser) 149 shell_config.add_shell_arguments(parser)
74 script_args = parser.parse_args() 150 script_args = parser.parse_args()
75 config = shell_config.get_shell_config(script_args) 151 config = shell_config.get_shell_config(script_args)
76 152
77 try: 153 try:
78 shell, common_shell_args = shell_arguments.get_shell(config, []) 154 shell, common_shell_args = shell_arguments.get_shell(config, [])
79 except shell_arguments.ShellConfigurationException as e: 155 except shell_arguments.ShellConfigurationException as e:
80 print e 156 print e
81 return 1 157 return 1
82 158
83 target_os = 'android' if script_args.android else 'linux' 159 target_os = 'android' if script_args.android else 'linux'
84 benchmark_list_params = {"target_os": target_os} 160 benchmark_list_params = {"target_os": target_os}
85 exec script_args.benchmark_list_file in benchmark_list_params 161 exec script_args.benchmark_list_file in benchmark_list_params
86 162
87 exit_code = 0 163 exit_code = 0
164 run_count = script_args.aggregate if script_args.aggregate else 1
88 for benchmark_spec in benchmark_list_params['benchmarks']: 165 for benchmark_spec in benchmark_list_params['benchmarks']:
89 benchmark_name = benchmark_spec['name'] 166 benchmark_name = benchmark_spec['name']
167 variants = _generate_benchmark_variants(benchmark_spec)
168 variant_results = {variant_spec['variant_name']: {}
169 for variant_spec in variants}
90 170
91 for variant_spec in _generate_benchmark_variants(benchmark_spec): 171 for _ in xrange(run_count):
172 for variant_spec in variants:
173 variant_name = variant_spec['variant_name']
174 app = variant_spec['app']
175 duration = variant_spec['duration']
176 shell_args = variant_spec.get('shell-args', []) + common_shell_args
177 measurements = variant_spec['measurements']
178
179 output_file = None
180 if script_args.save_all_traces:
181 output_file = 'benchmark-%s-%s-%s.trace' % (
182 benchmark_name.replace(' ', '_'),
183 variant_name.replace(' ', '_'),
184 time.strftime('%Y%m%d%H%M%S'))
185
186 outcome = benchmark.run(
187 shell, shell_args, app, duration, measurements, script_args.verbose,
188 script_args.android, output_file)
189
190 if not outcome.succeeded or outcome.some_measurements_failed:
191 _print_benchmark_error(outcome)
192 exit_code = 1
193
194 if outcome.succeeded:
195 for measurement_spec in outcome.results:
196 if measurement_spec not in variant_results[variant_name]:
197 variant_results[variant_name][measurement_spec] = []
198 variant_results[variant_name][measurement_spec].append(
199 outcome.results[measurement_spec])
200
201 for variant_spec in variants:
92 variant_name = variant_spec['variant_name'] 202 variant_name = variant_spec['variant_name']
93 app = variant_spec['app'] 203 _print_results(benchmark_name, variant_name,
94 duration = variant_spec['duration'] 204 variant_results[variant_name],
95 shell_args = variant_spec.get('shell-args', []) + common_shell_args 205 variant_spec['measurements'], script_args.aggregate)
96 measurements = variant_spec['measurements']
97 206
98 output_file = None
99 if script_args.save_all_traces:
100 output_file = 'benchmark-%s-%s-%s.trace' % (
101 benchmark_name.replace(' ', '_'),
102 variant_name.replace(' ', '_'),
103 time.strftime('%Y%m%d%H%M%S'))
104
105 chart_data_recorder = None
106 if script_args.upload: 207 if script_args.upload:
107 chart_data_recorder = perf_dashboard.ChartDataRecorder( 208 upload_succeeded = _upload_results(benchmark_name, variant_name,
108 script_args.test_name) 209 variant_results[variant_name],
109 210 variant_spec['measurements'],
110 results = benchmark.run( 211 script_args)
111 shell, shell_args, app, duration, measurements, script_args.verbose, 212 if not upload_succeeded:
112 script_args.android, output_file)
113
114 print '[ %s ] %s ' % (benchmark_name, variant_name)
115
116 some_measurements_failed = False
117 some_measurements_succeeded = False
118 if results.succeeded:
119 # Iterate over the list of specs, not the dictionary, to detect missing
120 # results and preserve the required order.
121 for measurement in measurements:
122 if measurement['spec'] in results.measurements:
123 result = results.measurements[measurement['spec']]
124 print '%10.4f %s' % (result, measurement['name'])
125
126 if chart_data_recorder:
127 chart_name = benchmark_name + '__' + variant_name
128 chart_data_recorder.record_scalar(
129 perf_dashboard.normalize_label(chart_name),
130 perf_dashboard.normalize_label(measurement['name']),
131 'ms', result)
132 some_measurements_succeeded = True
133 else:
134 print '? %s' % measurement['name']
135 some_measurements_failed = True
136
137 if not results.succeeded or some_measurements_failed:
138 if not results.succeeded:
139 print 'benchmark failed: ' + results.error_str
140 if some_measurements_failed:
141 print 'some measurements failed'
142 print 'output: '
143 print '-' * 72
144 print results.output
145 print '-' * 72
146 exit_code = 1
147
148 if script_args.upload and some_measurements_succeeded:
149 if not perf_dashboard.upload_chart_data(
150 script_args.master_name, script_args.bot_name,
151 script_args.test_name, script_args.builder_name,
152 script_args.build_number, chart_data_recorder.get_chart_data(),
153 script_args.server_url, script_args.dry_run):
154 exit_code = 1 213 exit_code = 1
155 214
156 return exit_code 215 return exit_code
157 216
158 if __name__ == '__main__': 217 if __name__ == '__main__':
159 sys.exit(main()) 218 sys.exit(main())
OLDNEW
« no previous file with comments | « mojo/devtools/common/devtoolslib/perf_dashboard_unittest.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698