Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # | 2 # |
| 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
| 5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
| 6 | 6 |
| 7 """Helper script to shard build bot steps and save results to disk. | 7 """DEPRECATED! |
| 8 | 8 TODO(bulach): remove me once all other repositories reference |
| 9 Our buildbot infrastructure requires each slave to run steps serially. | 9 'test_runner.py perf' directly. |
| 10 This is sub-optimal for android, where these steps can run independently on | |
| 11 multiple connected devices. | |
| 12 | |
| 13 The buildbots will run this script multiple times per cycle: | |
| 14 - First: all steps listed in -s in will be executed in parallel using all | |
| 15 connected devices. Step results will be pickled to disk. Each step has a unique | |
| 16 name. The result code will be ignored if the step name is listed in | |
| 17 --flaky_steps. | |
| 18 The buildbot will treat this step as a regular step, and will not process any | |
| 19 graph data. | |
| 20 | |
| 21 - Then, with -p STEP_NAME: at this stage, we'll simply print the file with the | |
| 22 step results previously saved. The buildbot will then process the graph data | |
| 23 accordingly. | |
| 24 | |
| 25 The JSON steps file contains a dictionary in the format: | |
| 26 { | |
| 27 "step_name_foo": "script_to_execute foo", | |
| 28 "step_name_bar": "script_to_execute bar" | |
| 29 } | |
| 30 | |
| 31 The JSON flaky steps file contains a list with step names which results should | |
| 32 be ignored: | |
| 33 [ | |
| 34 "step_name_foo", | |
| 35 "step_name_bar" | |
| 36 ] | |
| 37 | |
| 38 Note that script_to_execute necessarily have to take at least the following | |
| 39 options: | |
| 40 --device: the serial number to be passed to all adb commands. | |
| 41 --keep_test_server_ports: indicates it's being run as a shard, and shouldn't | |
| 42 reset test server port allocation. | |
| 43 """ | 10 """ |
| 44 | 11 |
| 12 import optparse | |
| 13 import os | |
| 14 import sys | |
| 45 | 15 |
| 46 import datetime | |
| 47 import json | |
| 48 import logging | |
| 49 import multiprocessing | |
| 50 import optparse | |
| 51 import pexpect | |
| 52 import pickle | |
| 53 import os | |
| 54 import signal | |
| 55 import shutil | |
| 56 import sys | |
| 57 import time | |
| 58 | |
| 59 from pylib import android_commands | |
| 60 from pylib import cmd_helper | 16 from pylib import cmd_helper |
| 61 from pylib import constants | |
| 62 from pylib import forwarder | |
| 63 from pylib import ports | |
| 64 | |
| 65 | |
| 66 _OUTPUT_DIR = os.path.join(constants.DIR_SOURCE_ROOT, 'out', 'step_results') | |
| 67 | |
| 68 | |
| 69 def _SaveResult(result): | |
| 70 with file(os.path.join(_OUTPUT_DIR, result['name']), 'w') as f: | |
| 71 f.write(pickle.dumps(result)) | |
| 72 | |
| 73 | |
| 74 def _RunStepsPerDevice(steps): | |
| 75 results = [] | |
| 76 for step in steps: | |
| 77 start_time = datetime.datetime.now() | |
| 78 print 'Starting %s: %s %s at %s' % (step['name'], step['cmd'], | |
| 79 start_time, step['device']) | |
| 80 output, exit_code = pexpect.run( | |
| 81 step['cmd'], cwd=os.path.abspath(constants.DIR_SOURCE_ROOT), | |
| 82 withexitstatus=True, logfile=sys.stdout, timeout=1800, | |
| 83 env=os.environ) | |
| 84 exit_code = exit_code or 0 | |
| 85 end_time = datetime.datetime.now() | |
| 86 exit_msg = '%s %s' % (exit_code, | |
| 87 '(ignored, flaky step)' if step['is_flaky'] else '') | |
| 88 print 'Finished %s: %s %s %s at %s' % (step['name'], exit_msg, step['cmd'], | |
| 89 end_time, step['device']) | |
| 90 if step['is_flaky']: | |
| 91 exit_code = 0 | |
| 92 result = {'name': step['name'], | |
| 93 'output': output, | |
| 94 'exit_code': exit_code, | |
| 95 'total_time': (end_time - start_time).seconds, | |
| 96 'device': step['device']} | |
| 97 _SaveResult(result) | |
| 98 results += [result] | |
| 99 return results | |
| 100 | |
| 101 | |
| 102 def _RunShardedSteps(steps, flaky_steps, devices): | |
| 103 assert steps | |
| 104 assert devices, 'No devices connected?' | |
| 105 if os.path.exists(_OUTPUT_DIR): | |
| 106 assert '/step_results' in _OUTPUT_DIR | |
| 107 shutil.rmtree(_OUTPUT_DIR) | |
| 108 if not os.path.exists(_OUTPUT_DIR): | |
| 109 os.makedirs(_OUTPUT_DIR) | |
| 110 step_names = sorted(steps.keys()) | |
| 111 all_params = [] | |
| 112 num_devices = len(devices) | |
| 113 shard_size = (len(steps) + num_devices - 1) / num_devices | |
| 114 for i, device in enumerate(devices): | |
| 115 steps_per_device = [] | |
| 116 for s in steps.keys()[i * shard_size:(i + 1) * shard_size]: | |
| 117 steps_per_device += [{'name': s, | |
| 118 'device': device, | |
| 119 'is_flaky': s in flaky_steps, | |
| 120 'cmd': steps[s] + ' --device ' + device + | |
| 121 ' --keep_test_server_ports'}] | |
| 122 all_params += [steps_per_device] | |
| 123 print 'Start sharding (note: output is not synchronized...)' | |
| 124 print '*' * 80 | |
| 125 start_time = datetime.datetime.now() | |
| 126 pool = multiprocessing.Pool(processes=num_devices) | |
| 127 async_results = pool.map_async(_RunStepsPerDevice, all_params) | |
| 128 results_per_device = async_results.get(999999) | |
| 129 end_time = datetime.datetime.now() | |
| 130 print '*' * 80 | |
| 131 print 'Finished sharding.' | |
| 132 print 'Summary' | |
| 133 total_time = 0 | |
| 134 for results in results_per_device: | |
| 135 for result in results: | |
| 136 print('%s : exit_code=%d in %d secs at %s' % | |
| 137 (result['name'], result['exit_code'], result['total_time'], | |
| 138 result['device'])) | |
| 139 total_time += result['total_time'] | |
| 140 print 'Step time: %d secs' % ((end_time - start_time).seconds) | |
| 141 print 'Bots time: %d secs' % total_time | |
| 142 # No exit_code for the sharding step: the individual _PrintResults step | |
| 143 # will return the corresponding exit_code. | |
| 144 return 0 | |
| 145 | |
| 146 | |
| 147 def _PrintStepOutput(step_name): | |
| 148 file_name = os.path.join(_OUTPUT_DIR, step_name) | |
| 149 if not os.path.exists(file_name): | |
| 150 print 'File not found ', file_name | |
| 151 return 1 | |
| 152 with file(file_name, 'r') as f: | |
| 153 result = pickle.loads(f.read()) | |
| 154 print result['output'] | |
| 155 return result['exit_code'] | |
| 156 | |
| 157 | |
| 158 def _PrintAllStepsOutput(steps): | |
| 159 with file(steps, 'r') as f: | |
| 160 steps = json.load(f) | |
| 161 ret = 0 | |
| 162 for step_name in steps.keys(): | |
| 163 ret |= _PrintStepOutput(step_name) | |
| 164 return ret | |
| 165 | |
| 166 | |
| 167 def _KillPendingServers(): | |
| 168 for retry in range(5): | |
| 169 for server in ['lighttpd', 'web-page-replay']: | |
| 170 pids = cmd_helper.GetCmdOutput(['pgrep', '-f', server]) | |
| 171 pids = [pid.strip() for pid in pids.split('\n') if pid.strip()] | |
| 172 for pid in pids: | |
| 173 try: | |
| 174 logging.warning('Killing %s %s', server, pid) | |
| 175 os.kill(int(pid), signal.SIGQUIT) | |
| 176 except Exception as e: | |
| 177 logging.warning('Failed killing %s %s %s', server, pid, e) | |
| 178 # Restart the adb server with taskset to set a single CPU affinity. | |
| 179 cmd_helper.RunCmd(['adb', 'kill-server']) | |
| 180 cmd_helper.RunCmd(['taskset', '-c', '0', 'adb', 'start-server']) | |
| 181 cmd_helper.RunCmd(['taskset', '-c', '0', 'adb', 'root']) | |
| 182 i = 1 | |
| 183 while not android_commands.GetAttachedDevices(): | |
| 184 time.sleep(i) | |
| 185 i *= 2 | |
| 186 if i > 10: | |
| 187 break | |
| 188 | 17 |
| 189 | 18 |
| 190 def main(argv): | 19 def main(argv): |
| 191 parser = optparse.OptionParser() | 20 parser = optparse.OptionParser() |
| 192 parser.add_option('-s', '--steps', | 21 parser.add_option('-s', '--steps', |
| 193 help='A JSON file containing all the steps to be ' | 22 help='A JSON file containing all the steps to be ' |
| 194 'sharded.') | 23 'sharded.') |
| 195 parser.add_option('--flaky_steps', | 24 parser.add_option('--flaky_steps', |
| 196 help='A JSON file containing steps that are flaky and ' | 25 help='A JSON file containing steps that are flaky and ' |
| 197 'will have its exit code ignored.') | 26 'will have its exit code ignored.') |
| 198 parser.add_option('-p', '--print_results', | 27 parser.add_option('-p', '--print_results', |
| 199 help='Only prints the results for the previously ' | 28 help='Only prints the results for the previously ' |
| 200 'executed step, do not run it again.') | 29 'executed step, do not run it again.') |
| 201 parser.add_option('-P', '--print_all', | |
| 202 help='Only prints the results for the previously ' | |
| 203 'executed steps, do not run them again.') | |
|
bulach
2013/08/14 10:18:35
this has never been used, pending discussions here
| |
| 204 options, urls = parser.parse_args(argv) | 30 options, urls = parser.parse_args(argv) |
| 205 if options.print_results: | 31 if options.print_results: |
| 206 return _PrintStepOutput(options.print_results) | 32 return cmd_helper.RunCmd(['build/android/test_runner.py', 'perf', |
| 207 if options.print_all: | 33 '--print-step', options.print_results]) |
| 208 return _PrintAllStepsOutput(options.print_all) | 34 flaky_options = [] |
| 209 | |
| 210 # At this point, we should kill everything that may have been left over from | |
| 211 # previous runs. | |
| 212 _KillPendingServers() | |
| 213 | |
| 214 forwarder.Forwarder.UseMultiprocessing() | |
| 215 | |
| 216 # Reset the test port allocation. It's important to do it before starting | |
| 217 # to dispatch any step. | |
| 218 if not ports.ResetTestServerPortAllocation(): | |
| 219 raise Exception('Failed to reset test server port.') | |
| 220 | |
| 221 # Sort the devices so that we'll try to always run a step in the same device. | |
| 222 devices = sorted(android_commands.GetAttachedDevices()) | |
| 223 if not devices: | |
| 224 print 'You must attach a device' | |
| 225 return 1 | |
| 226 | |
| 227 with file(options.steps, 'r') as f: | |
| 228 steps = json.load(f) | |
| 229 flaky_steps = [] | |
| 230 if options.flaky_steps: | 35 if options.flaky_steps: |
| 231 with file(options.flaky_steps, 'r') as f: | 36 flaky_options = ['--flaky-steps', options.flaky_steps] |
| 232 flaky_steps = json.load(f) | 37 return cmd_helper.RunCmd(['build/android/test_runner.py', 'perf', |
| 233 return _RunShardedSteps(steps, flaky_steps, devices) | 38 '--steps', options.steps] + flaky_options) |
| 234 | 39 |
| 235 | 40 |
| 236 if __name__ == '__main__': | 41 if __name__ == '__main__': |
| 237 sys.exit(main(sys.argv)) | 42 sys.exit(main(sys.argv)) |
| OLD | NEW |