mojo/devtools/common/mojo_benchmark - Issue 1433693004: mojo_benchmark: aggregate results over multiple runs.

Side by Side Diff: mojo/devtools/common/mojo_benchmark

Issue 1433693004: mojo_benchmark: aggregate results over multiple runs. (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Address Ben's comments. Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright 2015 The Chromium Authors. All rights reserved.	2 # Copyright 2015 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """Runner for Mojo application benchmarks."""	6 """Runner for Mojo application benchmarks."""

7	7

8 import argparse	8 import argparse

9 import logging	9 import logging

10 import sys	10 import sys

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
52 []) + _COLD_START_SHELL_ARGS})	52 []) + _COLD_START_SHELL_ARGS})

53 variants.append({	53 variants.append({

54 'variant_name': 'warm start',	54 'variant_name': 'warm start',

55 'app': benchmark_spec['app'],	55 'app': benchmark_spec['app'],

56 'duration': benchmark_spec['duration'],	56 'duration': benchmark_spec['duration'],

57 'measurements': benchmark_spec['measurements'],	57 'measurements': benchmark_spec['measurements'],

58 'shell-args': benchmark_spec.get('shell-args', [])})	58 'shell-args': benchmark_spec.get('shell-args', [])})

59 return variants	59 return variants

60	60

61	61

	62 def _print_benchmark_error(outcome):

	63 if not outcome.succeeded:

	64 print 'benchmark failed: ' + outcome.error_str

	65 if outcome.some_measurements_failed:

	66 print 'some measurements failed'

	67 print 'output: '

	68 print '-' * 72

	69 print outcome.output

	70 print '-' * 72

	71

	72

	73 def _print_results(benchmark_name, variant_name, results, measurements,

	74 aggregate):

	75 print '[ %s ] %s ' % (benchmark_name, variant_name)

	76 for measurement in measurements:

	77 print ' ' + measurement['name'] + ': ',

	78 if measurement['spec'] in results:

	79 if aggregate:

	80 print str(results[measurement['spec']])

	81 else:

	82 if len(results[measurement['spec']]) == 0:

	83 print '?'

	84 else:

	85 print '%f' % results[measurement['spec']][0]

	86 else:

	87 print '?'

	88

	89

	90 def _upload_results(benchmark_name, variant_name, results, measurements,

	91 script_args):

	92 anything_recorded = False

	93 chart_data_recorder = perf_dashboard.ChartDataRecorder(script_args.test_name)

	94 chart_name = benchmark_name + '__' + variant_name

	95

	96 for measurement in measurements:

	97 if measurement['spec'] in results:

	98 if not results[measurement['spec']]:

	99 continue

	100

	101 if script_args.aggregate:

	102 chart_data_recorder.record_vector(

	103 perf_dashboard.normalize_label(chart_name),

	104 perf_dashboard.normalize_label(measurement['name']),

	105 'ms', results[measurement['spec']])

	106 else:

	107 chart_data_recorder.record_scalar(

	108 perf_dashboard.normalize_label(chart_name),

	109 perf_dashboard.normalize_label(measurement['name']),

	110 'ms', results[measurement['spec']][0])

	111 anything_recorded = True

	112

	113 if not anything_recorded:

	114 # Don't upload empty packets, see

	115 # https://github.com/catapult-project/catapult/issues/1733 .

	116 return True

	117

	118 return perf_dashboard.upload_chart_data(

	119 script_args.master_name, script_args.bot_name,

	120 script_args.test_name, script_args.builder_name,

	121 script_args.build_number, chart_data_recorder.get_chart_data(),

	122 script_args.server_url, script_args.dry_run)

	123

	124

	125 def _argparse_aggregate_type(value):

	126 try:

	127 cast_value = int(value)

	128 except ValueError:

	129 raise argparse.ArgumentTypeError('value is not a positive integer')

	130

	131 if cast_value < 1:

	132 raise argparse.ArgumentTypeError('value is not a positive integer')

	133 return cast_value

	134

	135

62 def main():	136 def main():

63 parser = argparse.ArgumentParser(	137 parser = argparse.ArgumentParser(

64 formatter_class=argparse.RawDescriptionHelpFormatter,	138 formatter_class=argparse.RawDescriptionHelpFormatter,

65 description=_DESCRIPTION)	139 description=_DESCRIPTION)

66 parser.add_argument('benchmark_list_file', type=file,	140 parser.add_argument('benchmark_list_file', type=file,

67 help='a file listing benchmarks to run')	141 help='a file listing benchmarks to run')

	142 parser.add_argument('--aggregate', type=_argparse_aggregate_type,

	143 help='aggregate results over multiple runs. The value '

	144 'has to be a positive integer indicating the number of '

	145 'runs.')

68 parser.add_argument('--save-all-traces', action='store_true',	146 parser.add_argument('--save-all-traces', action='store_true',

69 help='save the traces produced by benchmarks to disk')	147 help='save the traces produced by benchmarks to disk')

70 perf_dashboard.add_argparse_server_arguments(parser)	148 perf_dashboard.add_argparse_server_arguments(parser)

71	149

72 # Common shell configuration arguments.	150 # Common shell configuration arguments.

73 shell_config.add_shell_arguments(parser)	151 shell_config.add_shell_arguments(parser)

74 script_args = parser.parse_args()	152 script_args = parser.parse_args()

75 config = shell_config.get_shell_config(script_args)	153 config = shell_config.get_shell_config(script_args)

76	154

77 try:	155 try:

78 shell, common_shell_args = shell_arguments.get_shell(config, [])	156 shell, common_shell_args = shell_arguments.get_shell(config, [])

79 except shell_arguments.ShellConfigurationException as e:	157 except shell_arguments.ShellConfigurationException as e:

80 print e	158 print e

81 return 1	159 return 1

82	160

83 target_os = 'android' if script_args.android else 'linux'	161 target_os = 'android' if script_args.android else 'linux'

84 benchmark_list_params = {"target_os": target_os}	162 benchmark_list_params = {"target_os": target_os}

85 exec script_args.benchmark_list_file in benchmark_list_params	163 exec script_args.benchmark_list_file in benchmark_list_params

86	164

87 exit_code = 0	165 exit_code = 0

	166 run_count = script_args.aggregate if script_args.aggregate else 1

88 for benchmark_spec in benchmark_list_params['benchmarks']:	167 for benchmark_spec in benchmark_list_params['benchmarks']:

89 benchmark_name = benchmark_spec['name']	168 benchmark_name = benchmark_spec['name']

	169 variants = _generate_benchmark_variants(benchmark_spec)

	170 variant_results = {variant_spec['variant_name']: {}

	171 for variant_spec in variants}

90	172

91 for variant_spec in _generate_benchmark_variants(benchmark_spec):	173 for _ in xrange(run_count):

	174 for variant_spec in variants:

	175 variant_name = variant_spec['variant_name']

	176 app = variant_spec['app']

	177 duration = variant_spec['duration']

	178 shell_args = variant_spec.get('shell-args', []) + common_shell_args

	179 measurements = variant_spec['measurements']

	180

	181 output_file = None

	182 if script_args.save_all_traces:

	183 output_file = 'benchmark-%s-%s-%s.trace' % (

	184 benchmark_name.replace(' ', '_'),

	185 variant_name.replace(' ', '_'),

	186 time.strftime('%Y%m%d%H%M%S'))

	187

	188 outcome = benchmark.run(

	189 shell, shell_args, app, duration, measurements, script_args.verbose,

	190 script_args.android, output_file)

	191

	192 if not outcome.succeeded or outcome.some_measurements_failed:

	193 _print_benchmark_error(outcome)

	194 exit_code = 1

	195

	196 if outcome.succeeded:

	197 for measurement_spec in outcome.results:

	198 if measurement_spec not in variant_results[variant_name]:

	199 variant_results[variant_name][measurement_spec] = []

	200 variant_results[variant_name][measurement_spec].append(

	201 outcome.results[measurement_spec])

	202

	203 for variant_spec in variants:

92 variant_name = variant_spec['variant_name']	204 variant_name = variant_spec['variant_name']

93 app = variant_spec['app']	205 _print_results(benchmark_name, variant_name,

94 duration = variant_spec['duration']	206 variant_results[variant_name],

95 shell_args = variant_spec.get('shell-args', []) + common_shell_args	207 variant_spec['measurements'], script_args.aggregate)

96 measurements = variant_spec['measurements']

97	208

98 output_file = None

99 if script_args.save_all_traces:

100 output_file = 'benchmark-%s-%s-%s.trace' % (

101 benchmark_name.replace(' ', '_'),

102 variant_name.replace(' ', '_'),

103 time.strftime('%Y%m%d%H%M%S'))

104

105 chart_data_recorder = None

106 if script_args.upload:	209 if script_args.upload:

107 chart_data_recorder = perf_dashboard.ChartDataRecorder(	210 upload_succeeded = _upload_results(benchmark_name, variant_name,

108 script_args.test_name)	211 variant_results[variant_name],

109	212 variant_spec['measurements'],

110 results = benchmark.run(	213 script_args)

111 shell, shell_args, app, duration, measurements, script_args.verbose,	214 if not upload_succeeded:

112 script_args.android, output_file)

113

114 print '[ %s ] %s ' % (benchmark_name, variant_name)

115

116 some_measurements_failed = False

117 some_measurements_succeeded = False

118 if results.succeeded:

119 # Iterate over the list of specs, not the dictionary, to detect missing

120 # results and preserve the required order.

121 for measurement in measurements:

122 if measurement['spec'] in results.measurements:

123 result = results.measurements[measurement['spec']]

124 print '%10.4f %s' % (result, measurement['name'])

125

126 if chart_data_recorder:

127 chart_name = benchmark_name + '__' + variant_name

128 chart_data_recorder.record_scalar(

129 perf_dashboard.normalize_label(chart_name),

130 perf_dashboard.normalize_label(measurement['name']),

131 'ms', result)

132 some_measurements_succeeded = True

133 else:

134 print '? %s' % measurement['name']

135 some_measurements_failed = True

136

137 if not results.succeeded or some_measurements_failed:

138 if not results.succeeded:

139 print 'benchmark failed: ' + results.error_str

140 if some_measurements_failed:

141 print 'some measurements failed'

142 print 'output: '

143 print '-' * 72

144 print results.output

145 print '-' * 72

146 exit_code = 1

147

148 if script_args.upload and some_measurements_succeeded:

149 if not perf_dashboard.upload_chart_data(

150 script_args.master_name, script_args.bot_name,

151 script_args.test_name, script_args.builder_name,

152 script_args.build_number, chart_data_recorder.get_chart_data(),

153 script_args.server_url, script_args.dry_run):

154 exit_code = 1	215 exit_code = 1

155	216

156 return exit_code	217 return exit_code

157	218

158 if __name__ == '__main__':	219 if __name__ == '__main__':

159 sys.exit(main())	220 sys.exit(main())

OLD	NEW

« no previous file with comments | « mojo/devtools/common/devtoolslib/perf_dashboard_unittest.py ('k') | no next file » | no next file with comments »