Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2013 The Chromium Authors. All rights reserved. | 1 # Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """Runs perf tests. | 5 """Runs perf tests. |
| 6 | 6 |
| 7 Our buildbot infrastructure requires each slave to run steps serially. | 7 Our buildbot infrastructure requires each slave to run steps serially. |
| 8 This is sub-optimal for android, where these steps can run independently on | 8 This is sub-optimal for android, where these steps can run independently on |
| 9 multiple connected devices. | 9 multiple connected devices. |
| 10 | 10 |
| 11 The buildbots will run this script multiple times per cycle: | 11 The buildbots will run this script multiple times per cycle: |
| 12 - First: all steps listed in --steps in will be executed in parallel using all | 12 - First: all steps listed in --steps in will be executed in parallel using all |
| 13 connected devices. Step results will be pickled to disk. Each step has a unique | 13 connected devices. Step results will be pickled to disk. Each step has a unique |
| 14 name. The result code will be ignored if the step name is listed in | 14 name. The result code will be ignored if the step name is listed in |
| 15 --flaky-steps. | 15 --flaky-steps. |
| 16 The buildbot will treat this step as a regular step, and will not process any | 16 The buildbot will treat this step as a regular step, and will not process any |
| 17 graph data. | 17 graph data. |
| 18 | 18 |
| 19 - Then, with -print-step STEP_NAME: at this stage, we'll simply print the file | 19 - Then, with -print-step STEP_NAME: at this stage, we'll simply print the file |
| 20 with the step results previously saved. The buildbot will then process the graph | 20 with the step results previously saved. The buildbot will then process the graph |
| 21 data accordingly. | 21 data accordingly. |
| 22 | 22 |
| 23 The JSON steps file contains a dictionary in the format: | |
| 24 { "version": int, | |
| 25 "steps": { | |
| 26 "foo": { | |
| 27 "device_affinity": int, | |
| 28 "cmd": "script_to_execute foo" | |
| 29 }, | |
| 30 "bar": { | |
| 31 "device_affinity": int, | |
| 32 "cmd": "script_to_execute bar" | |
| 33 } | |
| 34 } | |
| 35 } | |
| 23 | 36 |
| 24 The JSON steps file contains a dictionary in the format: | 37 # TODO(bulach): remove once it rolls downstream, crbug.com/378862. |
| 38 The OLD JSON steps file contains a dictionary in the format: | |
| 25 [ | 39 [ |
| 26 ["step_name_foo", "script_to_execute foo"], | 40 ["step_name_foo", "script_to_execute foo"], |
| 27 ["step_name_bar", "script_to_execute bar"] | 41 ["step_name_bar", "script_to_execute bar"] |
| 28 ] | 42 ] |
| 29 | 43 |
| 30 This preserves the order in which the steps are executed. | 44 This preserves the order in which the steps are executed. |
| 31 | 45 |
| 32 The JSON flaky steps file contains a list with step names which results should | 46 The JSON flaky steps file contains a list with step names which results should |
| 33 be ignored: | 47 be ignored: |
| 34 [ | 48 [ |
| 35 "step_name_foo", | 49 "step_name_foo", |
| 36 "step_name_bar" | 50 "step_name_bar" |
| 37 ] | 51 ] |
| 38 | 52 |
| 39 Note that script_to_execute necessarily have to take at least the following | 53 Note that script_to_execute necessarily have to take at least the following |
| 40 option: | 54 option: |
| 41 --device: the serial number to be passed to all adb commands. | 55 --device: the serial number to be passed to all adb commands. |
| 42 """ | 56 """ |
| 43 | 57 |
| 58 import collections | |
| 44 import datetime | 59 import datetime |
| 45 import logging | 60 import logging |
| 46 import os | 61 import os |
| 47 import pickle | 62 import pickle |
| 48 import sys | 63 import sys |
| 49 import threading | 64 import threading |
| 50 import time | 65 import time |
| 51 | 66 |
| 52 from pylib import cmd_helper | 67 from pylib import cmd_helper |
| 53 from pylib import constants | 68 from pylib import constants |
| (...skipping 24 matching lines...) Expand all Loading... | |
| 78 logging.info('*' * 80) | 93 logging.info('*' * 80) |
| 79 print persisted_result['output'] | 94 print persisted_result['output'] |
| 80 | 95 |
| 81 return persisted_result['exit_code'] | 96 return persisted_result['exit_code'] |
| 82 | 97 |
| 83 | 98 |
| 84 def PrintSummary(test_names): | 99 def PrintSummary(test_names): |
| 85 logging.info('*' * 80) | 100 logging.info('*' * 80) |
| 86 logging.info('Sharding summary') | 101 logging.info('Sharding summary') |
| 87 total_time = 0 | 102 total_time = 0 |
| 103 device_total_time = collections.defaultdict(int) | |
| 88 for test_name in test_names: | 104 for test_name in test_names: |
| 89 file_name = os.path.join(constants.PERF_OUTPUT_DIR, test_name) | 105 file_name = os.path.join(constants.PERF_OUTPUT_DIR, test_name) |
| 90 if not os.path.exists(file_name): | 106 if not os.path.exists(file_name): |
| 91 logging.info('%s : No status file found', test_name) | 107 logging.info('%s : No status file found', test_name) |
| 92 continue | 108 continue |
| 93 with file(file_name, 'r') as f: | 109 with file(file_name, 'r') as f: |
| 94 result = pickle.loads(f.read()) | 110 result = pickle.loads(f.read()) |
| 95 logging.info('%s : exit_code=%d in %d secs at %s', | 111 logging.info('%s : exit_code=%d in %d secs at %s', |
| 96 result['name'], result['exit_code'], result['total_time'], | 112 result['name'], result['exit_code'], result['total_time'], |
| 97 result['device']) | 113 result['device']) |
| 98 total_time += result['total_time'] | 114 total_time += result['total_time'] |
|
jbudorick
2014/06/03 13:52:52
nit: you don't really need a separate total_time a
bulach
2014/06/03 14:25:52
Done.
| |
| 115 device_total_time[result['device']] += result['total_time'] | |
| 116 for device, device_time in device_total_time.iteritems(): | |
| 117 logging.info('Total for device %s : %d secs', device, device_time) | |
| 99 logging.info('Total steps time: %d secs', total_time) | 118 logging.info('Total steps time: %d secs', total_time) |
| 100 | 119 |
| 101 | 120 |
| 102 class _HeartBeatLogger(object): | 121 class _HeartBeatLogger(object): |
| 103 # How often to print the heartbeat on flush(). | 122 # How often to print the heartbeat on flush(). |
| 104 _PRINT_INTERVAL = 30.0 | 123 _PRINT_INTERVAL = 30.0 |
| 105 | 124 |
| 106 def __init__(self): | 125 def __init__(self): |
| 107 """A file-like class for keeping the buildbot alive.""" | 126 """A file-like class for keeping the buildbot alive.""" |
| 108 self._len = 0 | 127 self._len = 0 |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 124 if now - self._tick >= _HeartBeatLogger._PRINT_INTERVAL: | 143 if now - self._tick >= _HeartBeatLogger._PRINT_INTERVAL: |
| 125 self._tick = now | 144 self._tick = now |
| 126 print '--single-step output length %d' % self._len | 145 print '--single-step output length %d' % self._len |
| 127 sys.stdout.flush() | 146 sys.stdout.flush() |
| 128 | 147 |
| 129 def stop(self): | 148 def stop(self): |
| 130 self._stopped.set() | 149 self._stopped.set() |
| 131 | 150 |
| 132 | 151 |
| 133 class TestRunner(base_test_runner.BaseTestRunner): | 152 class TestRunner(base_test_runner.BaseTestRunner): |
| 134 def __init__(self, test_options, device, tests, flaky_tests): | 153 def __init__(self, test_options, device, shard_index, max_shard, tests, |
| 154 flaky_tests): | |
| 135 """A TestRunner instance runs a perf test on a single device. | 155 """A TestRunner instance runs a perf test on a single device. |
| 136 | 156 |
| 137 Args: | 157 Args: |
| 138 test_options: A PerfOptions object. | 158 test_options: A PerfOptions object. |
| 139 device: Device to run the tests. | 159 device: Device to run the tests. |
| 160 shard_index: the index of this device. | |
| 161 max_shards: the maximum shard index. | |
| 140 tests: a dict mapping test_name to command. | 162 tests: a dict mapping test_name to command. |
| 141 flaky_tests: a list of flaky test_name. | 163 flaky_tests: a list of flaky test_name. |
| 142 """ | 164 """ |
| 143 super(TestRunner, self).__init__(device, None, 'Release') | 165 super(TestRunner, self).__init__(device, None, 'Release') |
| 144 self._options = test_options | 166 self._options = test_options |
| 167 self._shard_index = shard_index | |
| 168 self._max_shard = max_shard | |
| 145 self._tests = tests | 169 self._tests = tests |
| 146 self._flaky_tests = flaky_tests | 170 self._flaky_tests = flaky_tests |
| 147 | 171 |
| 148 @staticmethod | 172 @staticmethod |
| 149 def _IsBetter(result): | 173 def _IsBetter(result): |
| 150 if result['actual_exit_code'] == 0: | 174 if result['actual_exit_code'] == 0: |
| 151 return True | 175 return True |
| 152 pickled = os.path.join(constants.PERF_OUTPUT_DIR, | 176 pickled = os.path.join(constants.PERF_OUTPUT_DIR, |
| 153 result['name']) | 177 result['name']) |
| 154 if not os.path.exists(pickled): | 178 if not os.path.exists(pickled): |
| 155 return True | 179 return True |
| 156 with file(pickled, 'r') as f: | 180 with file(pickled, 'r') as f: |
| 157 previous = pickle.loads(f.read()) | 181 previous = pickle.loads(f.read()) |
| 158 return result['actual_exit_code'] < previous['actual_exit_code'] | 182 return result['actual_exit_code'] < previous['actual_exit_code'] |
| 159 | 183 |
| 160 @staticmethod | 184 @staticmethod |
| 161 def _SaveResult(result): | 185 def _SaveResult(result): |
| 162 if TestRunner._IsBetter(result): | 186 if TestRunner._IsBetter(result): |
| 163 with file(os.path.join(constants.PERF_OUTPUT_DIR, | 187 with file(os.path.join(constants.PERF_OUTPUT_DIR, |
| 164 result['name']), 'w') as f: | 188 result['name']), 'w') as f: |
| 165 f.write(pickle.dumps(result)) | 189 f.write(pickle.dumps(result)) |
| 166 | 190 |
| 191 def _CheckDeviceAffinity(self, test_name): | |
| 192 """Returns True if test_name has affinity for this shard.""" | |
| 193 affinity = (self._tests['steps'][test_name]['device_affinity'] % | |
|
jbudorick
2014/06/03 13:52:52
My point with the shard index vs the serial was th
bulach
2014/06/03 14:25:52
let's split this:
1) this is using the persistent
| |
| 194 self._max_shard) | |
| 195 if self._shard_index == affinity: | |
| 196 return True | |
| 197 logging.info('Skipping %s on %s (affinity is %s, device is %s)', | |
| 198 test_name, self.device_serial, affinity, self._shard_index) | |
| 199 return False | |
| 200 | |
| 167 def _LaunchPerfTest(self, test_name): | 201 def _LaunchPerfTest(self, test_name): |
| 168 """Runs a perf test. | 202 """Runs a perf test. |
| 169 | 203 |
| 170 Args: | 204 Args: |
| 171 test_name: the name of the test to be executed. | 205 test_name: the name of the test to be executed. |
| 172 | 206 |
| 173 Returns: | 207 Returns: |
| 174 A tuple containing (Output, base_test_result.ResultType) | 208 A tuple containing (Output, base_test_result.ResultType) |
| 175 """ | 209 """ |
| 210 if not self._CheckDeviceAffinity(test_name): | |
| 211 return '', base_test_result.ResultType.PASS | |
| 212 | |
| 176 try: | 213 try: |
| 177 logging.warning('Unmapping device ports') | 214 logging.warning('Unmapping device ports') |
| 178 forwarder.Forwarder.UnmapAllDevicePorts(self.device) | 215 forwarder.Forwarder.UnmapAllDevicePorts(self.device) |
| 179 self.device.old_interface.RestartAdbdOnDevice() | 216 self.device.old_interface.RestartAdbdOnDevice() |
| 180 except Exception as e: | 217 except Exception as e: |
| 181 logging.error('Exception when tearing down device %s', e) | 218 logging.error('Exception when tearing down device %s', e) |
| 182 | 219 |
| 183 cmd = ('%s --device %s' % | 220 cmd = ('%s --device %s' % |
| 184 (self._tests[test_name], self.device.old_interface.GetDevice())) | 221 (self._tests['steps'][test_name]['cmd'], |
| 222 self.device_serial)) | |
| 185 logging.info('%s : %s', test_name, cmd) | 223 logging.info('%s : %s', test_name, cmd) |
| 186 start_time = datetime.datetime.now() | 224 start_time = datetime.datetime.now() |
| 187 | 225 |
| 188 timeout = 5400 | 226 timeout = 5400 |
| 189 if self._options.no_timeout: | 227 if self._options.no_timeout: |
| 190 timeout = None | 228 timeout = None |
| 191 full_cmd = cmd | 229 full_cmd = cmd |
| 192 if self._options.dry_run: | 230 if self._options.dry_run: |
| 193 full_cmd = 'echo %s' % cmd | 231 full_cmd = 'echo %s' % cmd |
| 194 | 232 |
| 195 logfile = sys.stdout | 233 logfile = sys.stdout |
| 196 if self._options.single_step: | 234 if self._options.single_step: |
| 197 # Just print a heart-beat so that the outer buildbot scripts won't timeout | 235 # Just print a heart-beat so that the outer buildbot scripts won't timeout |
| 198 # without response. | 236 # without response. |
| 199 logfile = _HeartBeatLogger() | 237 logfile = _HeartBeatLogger() |
| 200 cwd = os.path.abspath(constants.DIR_SOURCE_ROOT) | 238 cwd = os.path.abspath(constants.DIR_SOURCE_ROOT) |
| 201 if full_cmd.startswith('src/'): | 239 if full_cmd.startswith('src/'): |
| 202 cwd = os.path.abspath(os.path.join(constants.DIR_SOURCE_ROOT, os.pardir)) | 240 cwd = os.path.abspath(os.path.join(constants.DIR_SOURCE_ROOT, os.pardir)) |
| 203 try: | 241 try: |
| 204 exit_code, output = cmd_helper.GetCmdStatusAndOutputWithTimeout( | 242 exit_code, output = cmd_helper.GetCmdStatusAndOutputWithTimeout( |
| 205 full_cmd, timeout, cwd=cwd, shell=True, logfile=logfile) | 243 full_cmd, timeout, cwd=cwd, shell=True, logfile=logfile) |
| 206 finally: | 244 finally: |
| 207 if self._options.single_step: | 245 if self._options.single_step: |
| 208 logfile.stop() | 246 logfile.stop() |
| 209 end_time = datetime.datetime.now() | 247 end_time = datetime.datetime.now() |
| 210 if exit_code is None: | 248 if exit_code is None: |
| 211 exit_code = -1 | 249 exit_code = -1 |
| 212 logging.info('%s : exit_code=%d in %d secs at %s', | 250 logging.info('%s : exit_code=%d in %d secs at %s', |
| 213 test_name, exit_code, (end_time - start_time).seconds, | 251 test_name, exit_code, (end_time - start_time).seconds, |
| 214 self.device.old_interface.GetDevice()) | 252 self.device_serial) |
|
jbudorick
2014/06/03 13:52:52
Sneaking in ahead of me here, I see.
(In the futu
bulach
2014/06/03 14:25:52
want me to keep the old way? happy either way :)
| |
| 215 result_type = base_test_result.ResultType.FAIL | 253 result_type = base_test_result.ResultType.FAIL |
| 216 if exit_code == 0: | 254 if exit_code == 0: |
| 217 result_type = base_test_result.ResultType.PASS | 255 result_type = base_test_result.ResultType.PASS |
| 218 actual_exit_code = exit_code | 256 actual_exit_code = exit_code |
| 219 if test_name in self._flaky_tests: | 257 if test_name in self._flaky_tests: |
| 220 # The exit_code is used at the second stage when printing the | 258 # The exit_code is used at the second stage when printing the |
| 221 # test output. If the test is flaky, force to "0" to get that step green | 259 # test output. If the test is flaky, force to "0" to get that step green |
| 222 # whilst still gathering data to the perf dashboards. | 260 # whilst still gathering data to the perf dashboards. |
| 223 # The result_type is used by the test_dispatcher to retry the test. | 261 # The result_type is used by the test_dispatcher to retry the test. |
| 224 exit_code = 0 | 262 exit_code = 0 |
| 225 | 263 |
| 226 persisted_result = { | 264 persisted_result = { |
| 227 'name': test_name, | 265 'name': test_name, |
| 228 'output': output, | 266 'output': output, |
| 229 'exit_code': exit_code, | 267 'exit_code': exit_code, |
| 230 'actual_exit_code': actual_exit_code, | 268 'actual_exit_code': actual_exit_code, |
| 231 'result_type': result_type, | 269 'result_type': result_type, |
| 232 'total_time': (end_time - start_time).seconds, | 270 'total_time': (end_time - start_time).seconds, |
| 233 'device': self.device.old_interface.GetDevice(), | 271 'device': self.device_serial, |
| 234 'cmd': cmd, | 272 'cmd': cmd, |
| 235 } | 273 } |
| 236 self._SaveResult(persisted_result) | 274 self._SaveResult(persisted_result) |
| 237 | 275 |
| 238 return (output, result_type) | 276 return (output, result_type) |
| 239 | 277 |
| 240 def RunTest(self, test_name): | 278 def RunTest(self, test_name): |
| 241 """Run a perf test on the device. | 279 """Run a perf test on the device. |
| 242 | 280 |
| 243 Args: | 281 Args: |
| 244 test_name: String to use for logging the test result. | 282 test_name: String to use for logging the test result. |
| 245 | 283 |
| 246 Returns: | 284 Returns: |
| 247 A tuple of (TestRunResults, retry). | 285 A tuple of (TestRunResults, retry). |
| 248 """ | 286 """ |
| 249 _, result_type = self._LaunchPerfTest(test_name) | 287 _, result_type = self._LaunchPerfTest(test_name) |
| 250 results = base_test_result.TestRunResults() | 288 results = base_test_result.TestRunResults() |
| 251 results.AddResult(base_test_result.BaseTestResult(test_name, result_type)) | 289 results.AddResult(base_test_result.BaseTestResult(test_name, result_type)) |
| 252 retry = None | 290 retry = None |
| 253 if not results.DidRunPass(): | 291 if not results.DidRunPass(): |
| 254 retry = test_name | 292 retry = test_name |
| 255 return results, retry | 293 return results, retry |
| OLD | NEW |