mojo/devtools/common/mojo_benchmark - Issue 1433693004: mojo_benchmark: aggregate results over multiple runs.

Side by Side Diff: mojo/devtools/common/mojo_benchmark

Issue 1433693004: mojo_benchmark: aggregate results over multiple runs. (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright 2015 The Chromium Authors. All rights reserved.	2 # Copyright 2015 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """Runner for Mojo application benchmarks."""	6 """Runner for Mojo application benchmarks."""

7	7

8 import argparse	8 import argparse

9 import logging	9 import logging

10 import sys	10 import sys

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
52 []) + _COLD_START_SHELL_ARGS})	52 []) + _COLD_START_SHELL_ARGS})

53 variants.append({	53 variants.append({

54 'variant_name': 'warm start',	54 'variant_name': 'warm start',

55 'app': benchmark_spec['app'],	55 'app': benchmark_spec['app'],

56 'duration': benchmark_spec['duration'],	56 'duration': benchmark_spec['duration'],

57 'measurements': benchmark_spec['measurements'],	57 'measurements': benchmark_spec['measurements'],

58 'shell-args': benchmark_spec.get('shell-args', [])})	58 'shell-args': benchmark_spec.get('shell-args', [])})

59 return variants	59 return variants

60	60

61	61

	62 def _print_benchmark_error(outcome):

	63 if not outcome.succeeded:

	64 print 'benchmark failed: ' + outcome.error_str

	65 if outcome.some_measurements_failed:

	66 print 'some measurements failed'

	67 print 'output: '

	68 print '-' * 72

	69 print outcome.output

	70 print '-' * 72

	71

	72

	73 def _print_results(benchmark_name, variant_name, results, measurements,

	74 aggregate):

	75 print '[ %s ] %s ' % (benchmark_name, variant_name)

	76 for measurement in measurements:

	77 print ' ' + measurement['name'] + ': ',

	78 if measurement['spec'] in results:

	79 if aggregate:

	80 print str(results[measurement['spec']])

	81 else:

	82 if len(results[measurement['spec']]) == 0:

	83 print '?'

	84 else:

	85 print '%10.4f' % results[measurement['spec']][0]
	qsr 2015/11/10 17:05:27 Why is 10.4f the right format for single measureme Why is 10.4f the right format for single measurement, but you do not use it when displaying an array? ppi 2015/11/10 18:00:18 Done. Show quoted text On 2015/11/10 17:05:27, qsr wrote: > Why is 10.4f the right format for single measurement, but you do not use it when > displaying an array? Done.
	86 else:

	87 print '?'

	88

	89

	90 def _upload_results(benchmark_name, variant_name, results, measurements,

	91 script_args):

	92 anything_recorded = False

	93 chart_data_recorder = perf_dashboard.ChartDataRecorder(script_args.test_name)

	94 chart_name = benchmark_name + '__' + variant_name

	95

	96 for measurement in measurements:

	97 if measurement['spec'] in results:

	98 if script_args.aggregate:
	qsr 2015/11/10 17:05:27 Do you want to send empty list of values? Do you want to send empty list of values? ppi 2015/11/10 18:00:18 Done. Show quoted text On 2015/11/10 17:05:27, qsr wrote: > Do you want to send empty list of values? Done.
	99 chart_data_recorder.record_vector(

	100 perf_dashboard.normalize_label(chart_name),

	101 perf_dashboard.normalize_label(measurement['name']),

	102 'ms', results[measurement['spec']])

	103 anything_recorded = True

	104 elif len(results[measurement['spec']]) > 0:

	105 chart_data_recorder.record_scalar(

	106 perf_dashboard.normalize_label(chart_name),

	107 perf_dashboard.normalize_label(measurement['name']),

	108 'ms', results[measurement['spec']][0])

	109 anything_recorded = True

	110

	111 if not anything_recorded:

	112 # Don't upload empty packets, see

	113 # https://github.com/catapult-project/catapult/issues/1733 .

	114 return True

	115

	116 return perf_dashboard.upload_chart_data(

	117 script_args.master_name, script_args.bot_name,

	118 script_args.test_name, script_args.builder_name,

	119 script_args.build_number, chart_data_recorder.get_chart_data(),

	120 script_args.server_url, script_args.dry_run)

	121

	122

	123 def _argparse_aggregate_type(value):

	124 try:

	125 cast_value = int(value)

	126 except ValueError:

	127 raise argparse.ArgumentTypeError('value is not a positive integer')

	128

	129 if cast_value < 1:

	130 raise argparse.ArgumentTypeError('value is not a positive integer')

	131 return cast_value

	132

	133

62 def main():	134 def main():

63 parser = argparse.ArgumentParser(	135 parser = argparse.ArgumentParser(

64 formatter_class=argparse.RawDescriptionHelpFormatter,	136 formatter_class=argparse.RawDescriptionHelpFormatter,

65 description=_DESCRIPTION)	137 description=_DESCRIPTION)

66 parser.add_argument('benchmark_list_file', type=file,	138 parser.add_argument('benchmark_list_file', type=file,

67 help='a file listing benchmarks to run')	139 help='a file listing benchmarks to run')

	140 parser.add_argument('--aggregate', type=_argparse_aggregate_type,

	141 help='aggregate results over multiple runs. The value '

	142 'has to be a positive integer indicating the number of '

	143 'runs.')

68 parser.add_argument('--save-all-traces', action='store_true',	144 parser.add_argument('--save-all-traces', action='store_true',

69 help='save the traces produced by benchmarks to disk')	145 help='save the traces produced by benchmarks to disk')

70 perf_dashboard.add_argparse_server_arguments(parser)	146 perf_dashboard.add_argparse_server_arguments(parser)

71	147

72 # Common shell configuration arguments.	148 # Common shell configuration arguments.

73 shell_config.add_shell_arguments(parser)	149 shell_config.add_shell_arguments(parser)

74 script_args = parser.parse_args()	150 script_args = parser.parse_args()

75 config = shell_config.get_shell_config(script_args)	151 config = shell_config.get_shell_config(script_args)

76	152

77 try:	153 try:

78 shell, common_shell_args = shell_arguments.get_shell(config, [])	154 shell, common_shell_args = shell_arguments.get_shell(config, [])

79 except shell_arguments.ShellConfigurationException as e:	155 except shell_arguments.ShellConfigurationException as e:

80 print e	156 print e

81 return 1	157 return 1

82	158

83 target_os = 'android' if script_args.android else 'linux'	159 target_os = 'android' if script_args.android else 'linux'

84 benchmark_list_params = {"target_os": target_os}	160 benchmark_list_params = {"target_os": target_os}

85 exec script_args.benchmark_list_file in benchmark_list_params	161 exec script_args.benchmark_list_file in benchmark_list_params

86	162

87 exit_code = 0	163 exit_code = 0

	164 run_count = script_args.aggregate if script_args.aggregate else 1

88 for benchmark_spec in benchmark_list_params['benchmarks']:	165 for benchmark_spec in benchmark_list_params['benchmarks']:

89 benchmark_name = benchmark_spec['name']	166 benchmark_name = benchmark_spec['name']

	167 variants = _generate_benchmark_variants(benchmark_spec)

	168 variant_results = {variant_spec['variant_name']: {}

	169 for variant_spec in variants}

90	170

91 for variant_spec in _generate_benchmark_variants(benchmark_spec):	171 for _ in xrange(run_count):

	172 for variant_spec in variants:

	173 variant_name = variant_spec['variant_name']

	174 app = variant_spec['app']

	175 duration = variant_spec['duration']

	176 shell_args = variant_spec.get('shell-args', []) + common_shell_args

	177 measurements = variant_spec['measurements']

	178

	179 output_file = None

	180 if script_args.save_all_traces:

	181 output_file = 'benchmark-%s-%s-%s.trace' % (

	182 benchmark_name.replace(' ', '_'),

	183 variant_name.replace(' ', '_'),

	184 time.strftime('%Y%m%d%H%M%S'))

	185

	186 outcome = benchmark.run(

	187 shell, shell_args, app, duration, measurements, script_args.verbose,

	188 script_args.android, output_file)

	189

	190 if not outcome.succeeded or outcome.some_measurements_failed:

	191 _print_benchmark_error(outcome)

	192 exit_code = 1

	193

	194 if outcome.succeeded:

	195 for measurement_spec in outcome.results:

	196 if measurement_spec not in variant_results[variant_name]:

	197 variant_results[variant_name][measurement_spec] = []

	198 variant_results[variant_name][measurement_spec].append(

	199 outcome.results[measurement_spec])

	200

	201 for variant_spec in variants:

92 variant_name = variant_spec['variant_name']	202 variant_name = variant_spec['variant_name']

93 app = variant_spec['app']	203 _print_results(benchmark_name, variant_name,

94 duration = variant_spec['duration']	204 variant_results[variant_name],

95 shell_args = variant_spec.get('shell-args', []) + common_shell_args	205 variant_spec['measurements'], script_args.aggregate)

96 measurements = variant_spec['measurements']

97	206

98 output_file = None

99 if script_args.save_all_traces:

100 output_file = 'benchmark-%s-%s-%s.trace' % (

101 benchmark_name.replace(' ', '_'),

102 variant_name.replace(' ', '_'),

103 time.strftime('%Y%m%d%H%M%S'))

104

105 chart_data_recorder = None

106 if script_args.upload:	207 if script_args.upload:

107 chart_data_recorder = perf_dashboard.ChartDataRecorder(	208 upload_succeeded = _upload_results(benchmark_name, variant_name,

108 script_args.test_name)	209 variant_results[variant_name],

109	210 variant_spec['measurements'],

110 results = benchmark.run(	211 script_args)

111 shell, shell_args, app, duration, measurements, script_args.verbose,	212 if not upload_succeeded:

112 script_args.android, output_file)

113

114 print '[ %s ] %s ' % (benchmark_name, variant_name)

115

116 some_measurements_failed = False

117 some_measurements_succeeded = False

118 if results.succeeded:

119 # Iterate over the list of specs, not the dictionary, to detect missing

120 # results and preserve the required order.

121 for measurement in measurements:

122 if measurement['spec'] in results.measurements:

123 result = results.measurements[measurement['spec']]

124 print '%10.4f %s' % (result, measurement['name'])

125

126 if chart_data_recorder:

127 chart_name = benchmark_name + '__' + variant_name

128 chart_data_recorder.record_scalar(

129 perf_dashboard.normalize_label(chart_name),

130 perf_dashboard.normalize_label(measurement['name']),

131 'ms', result)

132 some_measurements_succeeded = True

133 else:

134 print '? %s' % measurement['name']

135 some_measurements_failed = True

136

137 if not results.succeeded or some_measurements_failed:

138 if not results.succeeded:

139 print 'benchmark failed: ' + results.error_str

140 if some_measurements_failed:

141 print 'some measurements failed'

142 print 'output: '

143 print '-' * 72

144 print results.output

145 print '-' * 72

146 exit_code = 1

147

148 if script_args.upload and some_measurements_succeeded:

149 if not perf_dashboard.upload_chart_data(

150 script_args.master_name, script_args.bot_name,

151 script_args.test_name, script_args.builder_name,

152 script_args.build_number, chart_data_recorder.get_chart_data(),

153 script_args.server_url, script_args.dry_run):

154 exit_code = 1	213 exit_code = 1

155	214

156 return exit_code	215 return exit_code

157	216

158 if __name__ == '__main__':	217 if __name__ == '__main__':

159 sys.exit(main())	218 sys.exit(main())

OLD	NEW

« no previous file with comments | « mojo/devtools/common/devtoolslib/perf_dashboard_unittest.py ('k') | no next file » | no next file with comments »