OLD | NEW |
---|---|
1 # Copyright 2013 The Chromium Authors. All rights reserved. | 1 # Copyright 2013 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 """Runs perf tests. | 5 """Runs perf tests. |
6 | 6 |
7 Our buildbot infrastructure requires each slave to run steps serially. | 7 Our buildbot infrastructure requires each slave to run steps serially. |
8 This is sub-optimal for android, where these steps can run independently on | 8 This is sub-optimal for android, where these steps can run independently on |
9 multiple connected devices. | 9 multiple connected devices. |
10 | 10 |
11 The buildbots will run this script multiple times per cycle: | 11 The buildbots will run this script multiple times per cycle: |
12 - First: all steps listed in --steps in will be executed in parallel using all | 12 - First: all steps listed in --steps in will be executed in parallel using all |
13 connected devices. Step results will be pickled to disk. Each step has a unique | 13 connected devices. Step results will be pickled to disk. Each step has a unique |
14 name. The result code will be ignored if the step name is listed in | 14 name. The result code will be ignored if the step name is listed in |
15 --flaky-steps. | 15 --flaky-steps. |
16 The buildbot will treat this step as a regular step, and will not process any | 16 The buildbot will treat this step as a regular step, and will not process any |
17 graph data. | 17 graph data. |
18 | 18 |
19 - Then, with -print-step STEP_NAME: at this stage, we'll simply print the file | 19 - Then, with -print-step STEP_NAME: at this stage, we'll simply print the file |
20 with the step results previously saved. The buildbot will then process the graph | 20 with the step results previously saved. The buildbot will then process the graph |
21 data accordingly. | 21 data accordingly. |
22 | 22 |
23 The JSON steps file contains a dictionary in the format: | |
24 { "version": int, | |
25 "steps": { | |
26 "foo": { | |
27 "device_affinity": int, | |
28 "cmd": "script_to_execute foo" | |
29 }, | |
30 "bar": { | |
31 "device_affinity": int, | |
32 "cmd": "script_to_execute bar" | |
33 } | |
34 } | |
35 } | |
23 | 36 |
24 The JSON steps file contains a dictionary in the format: | 37 # TODO(bulach): remove once it rolls downstream, crbug.com/378862. |
38 The OLD JSON steps file contains a dictionary in the format: | |
25 [ | 39 [ |
26 ["step_name_foo", "script_to_execute foo"], | 40 ["step_name_foo", "script_to_execute foo"], |
27 ["step_name_bar", "script_to_execute bar"] | 41 ["step_name_bar", "script_to_execute bar"] |
28 ] | 42 ] |
29 | 43 |
30 This preserves the order in which the steps are executed. | 44 This preserves the order in which the steps are executed. |
31 | 45 |
32 The JSON flaky steps file contains a list with step names which results should | 46 The JSON flaky steps file contains a list with step names which results should |
33 be ignored: | 47 be ignored: |
34 [ | 48 [ |
35 "step_name_foo", | 49 "step_name_foo", |
36 "step_name_bar" | 50 "step_name_bar" |
37 ] | 51 ] |
38 | 52 |
39 Note that script_to_execute necessarily have to take at least the following | 53 Note that script_to_execute necessarily have to take at least the following |
40 option: | 54 option: |
41 --device: the serial number to be passed to all adb commands. | 55 --device: the serial number to be passed to all adb commands. |
42 """ | 56 """ |
43 | 57 |
58 import collections | |
44 import datetime | 59 import datetime |
45 import logging | 60 import logging |
46 import os | 61 import os |
47 import pickle | 62 import pickle |
48 import sys | 63 import sys |
49 import threading | 64 import threading |
50 import time | 65 import time |
51 | 66 |
52 from pylib import cmd_helper | 67 from pylib import cmd_helper |
53 from pylib import constants | 68 from pylib import constants |
(...skipping 24 matching lines...) Expand all Loading... | |
78 logging.info('*' * 80) | 93 logging.info('*' * 80) |
79 print persisted_result['output'] | 94 print persisted_result['output'] |
80 | 95 |
81 return persisted_result['exit_code'] | 96 return persisted_result['exit_code'] |
82 | 97 |
83 | 98 |
84 def PrintSummary(test_names): | 99 def PrintSummary(test_names): |
85 logging.info('*' * 80) | 100 logging.info('*' * 80) |
86 logging.info('Sharding summary') | 101 logging.info('Sharding summary') |
87 total_time = 0 | 102 total_time = 0 |
103 device_total_time = collections.defaultdict(int) | |
88 for test_name in test_names: | 104 for test_name in test_names: |
89 file_name = os.path.join(constants.PERF_OUTPUT_DIR, test_name) | 105 file_name = os.path.join(constants.PERF_OUTPUT_DIR, test_name) |
90 if not os.path.exists(file_name): | 106 if not os.path.exists(file_name): |
91 logging.info('%s : No status file found', test_name) | 107 logging.info('%s : No status file found', test_name) |
92 continue | 108 continue |
93 with file(file_name, 'r') as f: | 109 with file(file_name, 'r') as f: |
94 result = pickle.loads(f.read()) | 110 result = pickle.loads(f.read()) |
95 logging.info('%s : exit_code=%d in %d secs at %s', | 111 logging.info('%s : exit_code=%d in %d secs at %s', |
96 result['name'], result['exit_code'], result['total_time'], | 112 result['name'], result['exit_code'], result['total_time'], |
97 result['device']) | 113 result['device']) |
98 total_time += result['total_time'] | 114 total_time += result['total_time'] |
jbudorick
2014/06/03 13:52:52
nit: you don't really need a separate total_time a
bulach
2014/06/03 14:25:52
Done.
| |
115 device_total_time[result['device']] += result['total_time'] | |
116 for device, device_time in device_total_time.iteritems(): | |
117 logging.info('Total for device %s : %d secs', device, device_time) | |
99 logging.info('Total steps time: %d secs', total_time) | 118 logging.info('Total steps time: %d secs', total_time) |
100 | 119 |
101 | 120 |
102 class _HeartBeatLogger(object): | 121 class _HeartBeatLogger(object): |
103 # How often to print the heartbeat on flush(). | 122 # How often to print the heartbeat on flush(). |
104 _PRINT_INTERVAL = 30.0 | 123 _PRINT_INTERVAL = 30.0 |
105 | 124 |
106 def __init__(self): | 125 def __init__(self): |
107 """A file-like class for keeping the buildbot alive.""" | 126 """A file-like class for keeping the buildbot alive.""" |
108 self._len = 0 | 127 self._len = 0 |
(...skipping 15 matching lines...) Expand all Loading... | |
124 if now - self._tick >= _HeartBeatLogger._PRINT_INTERVAL: | 143 if now - self._tick >= _HeartBeatLogger._PRINT_INTERVAL: |
125 self._tick = now | 144 self._tick = now |
126 print '--single-step output length %d' % self._len | 145 print '--single-step output length %d' % self._len |
127 sys.stdout.flush() | 146 sys.stdout.flush() |
128 | 147 |
129 def stop(self): | 148 def stop(self): |
130 self._stopped.set() | 149 self._stopped.set() |
131 | 150 |
132 | 151 |
133 class TestRunner(base_test_runner.BaseTestRunner): | 152 class TestRunner(base_test_runner.BaseTestRunner): |
134 def __init__(self, test_options, device, tests, flaky_tests): | 153 def __init__(self, test_options, device, shard_index, max_shard, tests, |
154 flaky_tests): | |
135 """A TestRunner instance runs a perf test on a single device. | 155 """A TestRunner instance runs a perf test on a single device. |
136 | 156 |
137 Args: | 157 Args: |
138 test_options: A PerfOptions object. | 158 test_options: A PerfOptions object. |
139 device: Device to run the tests. | 159 device: Device to run the tests. |
160 shard_index: the index of this device. | |
161 max_shards: the maximum shard index. | |
140 tests: a dict mapping test_name to command. | 162 tests: a dict mapping test_name to command. |
141 flaky_tests: a list of flaky test_name. | 163 flaky_tests: a list of flaky test_name. |
142 """ | 164 """ |
143 super(TestRunner, self).__init__(device, None, 'Release') | 165 super(TestRunner, self).__init__(device, None, 'Release') |
144 self._options = test_options | 166 self._options = test_options |
167 self._shard_index = shard_index | |
168 self._max_shard = max_shard | |
145 self._tests = tests | 169 self._tests = tests |
146 self._flaky_tests = flaky_tests | 170 self._flaky_tests = flaky_tests |
147 | 171 |
148 @staticmethod | 172 @staticmethod |
149 def _IsBetter(result): | 173 def _IsBetter(result): |
150 if result['actual_exit_code'] == 0: | 174 if result['actual_exit_code'] == 0: |
151 return True | 175 return True |
152 pickled = os.path.join(constants.PERF_OUTPUT_DIR, | 176 pickled = os.path.join(constants.PERF_OUTPUT_DIR, |
153 result['name']) | 177 result['name']) |
154 if not os.path.exists(pickled): | 178 if not os.path.exists(pickled): |
155 return True | 179 return True |
156 with file(pickled, 'r') as f: | 180 with file(pickled, 'r') as f: |
157 previous = pickle.loads(f.read()) | 181 previous = pickle.loads(f.read()) |
158 return result['actual_exit_code'] < previous['actual_exit_code'] | 182 return result['actual_exit_code'] < previous['actual_exit_code'] |
159 | 183 |
160 @staticmethod | 184 @staticmethod |
161 def _SaveResult(result): | 185 def _SaveResult(result): |
162 if TestRunner._IsBetter(result): | 186 if TestRunner._IsBetter(result): |
163 with file(os.path.join(constants.PERF_OUTPUT_DIR, | 187 with file(os.path.join(constants.PERF_OUTPUT_DIR, |
164 result['name']), 'w') as f: | 188 result['name']), 'w') as f: |
165 f.write(pickle.dumps(result)) | 189 f.write(pickle.dumps(result)) |
166 | 190 |
191 def _CheckDeviceAffinity(self, test_name): | |
192 """Returns True if test_name has affinity for this shard.""" | |
193 affinity = (self._tests['steps'][test_name]['device_affinity'] % | |
jbudorick
2014/06/03 13:52:52
My point with the shard index vs the serial was th
bulach
2014/06/03 14:25:52
let's split this:
1) this is using the persistent
| |
194 self._max_shard) | |
195 if self._shard_index == affinity: | |
196 return True | |
197 logging.info('Skipping %s on %s (affinity is %s, device is %s)', | |
198 test_name, self.device_serial, affinity, self._shard_index) | |
199 return False | |
200 | |
167 def _LaunchPerfTest(self, test_name): | 201 def _LaunchPerfTest(self, test_name): |
168 """Runs a perf test. | 202 """Runs a perf test. |
169 | 203 |
170 Args: | 204 Args: |
171 test_name: the name of the test to be executed. | 205 test_name: the name of the test to be executed. |
172 | 206 |
173 Returns: | 207 Returns: |
174 A tuple containing (Output, base_test_result.ResultType) | 208 A tuple containing (Output, base_test_result.ResultType) |
175 """ | 209 """ |
210 if not self._CheckDeviceAffinity(test_name): | |
211 return '', base_test_result.ResultType.PASS | |
212 | |
176 try: | 213 try: |
177 logging.warning('Unmapping device ports') | 214 logging.warning('Unmapping device ports') |
178 forwarder.Forwarder.UnmapAllDevicePorts(self.device) | 215 forwarder.Forwarder.UnmapAllDevicePorts(self.device) |
179 self.device.old_interface.RestartAdbdOnDevice() | 216 self.device.old_interface.RestartAdbdOnDevice() |
180 except Exception as e: | 217 except Exception as e: |
181 logging.error('Exception when tearing down device %s', e) | 218 logging.error('Exception when tearing down device %s', e) |
182 | 219 |
183 cmd = ('%s --device %s' % | 220 cmd = ('%s --device %s' % |
184 (self._tests[test_name], self.device.old_interface.GetDevice())) | 221 (self._tests['steps'][test_name]['cmd'], |
222 self.device_serial)) | |
185 logging.info('%s : %s', test_name, cmd) | 223 logging.info('%s : %s', test_name, cmd) |
186 start_time = datetime.datetime.now() | 224 start_time = datetime.datetime.now() |
187 | 225 |
188 timeout = 5400 | 226 timeout = 5400 |
189 if self._options.no_timeout: | 227 if self._options.no_timeout: |
190 timeout = None | 228 timeout = None |
191 full_cmd = cmd | 229 full_cmd = cmd |
192 if self._options.dry_run: | 230 if self._options.dry_run: |
193 full_cmd = 'echo %s' % cmd | 231 full_cmd = 'echo %s' % cmd |
194 | 232 |
195 logfile = sys.stdout | 233 logfile = sys.stdout |
196 if self._options.single_step: | 234 if self._options.single_step: |
197 # Just print a heart-beat so that the outer buildbot scripts won't timeout | 235 # Just print a heart-beat so that the outer buildbot scripts won't timeout |
198 # without response. | 236 # without response. |
199 logfile = _HeartBeatLogger() | 237 logfile = _HeartBeatLogger() |
200 cwd = os.path.abspath(constants.DIR_SOURCE_ROOT) | 238 cwd = os.path.abspath(constants.DIR_SOURCE_ROOT) |
201 if full_cmd.startswith('src/'): | 239 if full_cmd.startswith('src/'): |
202 cwd = os.path.abspath(os.path.join(constants.DIR_SOURCE_ROOT, os.pardir)) | 240 cwd = os.path.abspath(os.path.join(constants.DIR_SOURCE_ROOT, os.pardir)) |
203 try: | 241 try: |
204 exit_code, output = cmd_helper.GetCmdStatusAndOutputWithTimeout( | 242 exit_code, output = cmd_helper.GetCmdStatusAndOutputWithTimeout( |
205 full_cmd, timeout, cwd=cwd, shell=True, logfile=logfile) | 243 full_cmd, timeout, cwd=cwd, shell=True, logfile=logfile) |
206 finally: | 244 finally: |
207 if self._options.single_step: | 245 if self._options.single_step: |
208 logfile.stop() | 246 logfile.stop() |
209 end_time = datetime.datetime.now() | 247 end_time = datetime.datetime.now() |
210 if exit_code is None: | 248 if exit_code is None: |
211 exit_code = -1 | 249 exit_code = -1 |
212 logging.info('%s : exit_code=%d in %d secs at %s', | 250 logging.info('%s : exit_code=%d in %d secs at %s', |
213 test_name, exit_code, (end_time - start_time).seconds, | 251 test_name, exit_code, (end_time - start_time).seconds, |
214 self.device.old_interface.GetDevice()) | 252 self.device_serial) |
jbudorick
2014/06/03 13:52:52
Sneaking in ahead of me here, I see.
(In the futu
bulach
2014/06/03 14:25:52
want me to keep the old way? happy either way :)
| |
215 result_type = base_test_result.ResultType.FAIL | 253 result_type = base_test_result.ResultType.FAIL |
216 if exit_code == 0: | 254 if exit_code == 0: |
217 result_type = base_test_result.ResultType.PASS | 255 result_type = base_test_result.ResultType.PASS |
218 actual_exit_code = exit_code | 256 actual_exit_code = exit_code |
219 if test_name in self._flaky_tests: | 257 if test_name in self._flaky_tests: |
220 # The exit_code is used at the second stage when printing the | 258 # The exit_code is used at the second stage when printing the |
221 # test output. If the test is flaky, force to "0" to get that step green | 259 # test output. If the test is flaky, force to "0" to get that step green |
222 # whilst still gathering data to the perf dashboards. | 260 # whilst still gathering data to the perf dashboards. |
223 # The result_type is used by the test_dispatcher to retry the test. | 261 # The result_type is used by the test_dispatcher to retry the test. |
224 exit_code = 0 | 262 exit_code = 0 |
225 | 263 |
226 persisted_result = { | 264 persisted_result = { |
227 'name': test_name, | 265 'name': test_name, |
228 'output': output, | 266 'output': output, |
229 'exit_code': exit_code, | 267 'exit_code': exit_code, |
230 'actual_exit_code': actual_exit_code, | 268 'actual_exit_code': actual_exit_code, |
231 'result_type': result_type, | 269 'result_type': result_type, |
232 'total_time': (end_time - start_time).seconds, | 270 'total_time': (end_time - start_time).seconds, |
233 'device': self.device.old_interface.GetDevice(), | 271 'device': self.device_serial, |
234 'cmd': cmd, | 272 'cmd': cmd, |
235 } | 273 } |
236 self._SaveResult(persisted_result) | 274 self._SaveResult(persisted_result) |
237 | 275 |
238 return (output, result_type) | 276 return (output, result_type) |
239 | 277 |
240 def RunTest(self, test_name): | 278 def RunTest(self, test_name): |
241 """Run a perf test on the device. | 279 """Run a perf test on the device. |
242 | 280 |
243 Args: | 281 Args: |
244 test_name: String to use for logging the test result. | 282 test_name: String to use for logging the test result. |
245 | 283 |
246 Returns: | 284 Returns: |
247 A tuple of (TestRunResults, retry). | 285 A tuple of (TestRunResults, retry). |
248 """ | 286 """ |
249 _, result_type = self._LaunchPerfTest(test_name) | 287 _, result_type = self._LaunchPerfTest(test_name) |
250 results = base_test_result.TestRunResults() | 288 results = base_test_result.TestRunResults() |
251 results.AddResult(base_test_result.BaseTestResult(test_name, result_type)) | 289 results.AddResult(base_test_result.BaseTestResult(test_name, result_type)) |
252 retry = None | 290 retry = None |
253 if not results.DidRunPass(): | 291 if not results.DidRunPass(): |
254 retry = test_name | 292 retry = test_name |
255 return results, retry | 293 return results, retry |
OLD | NEW |