Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/env python | |
| 2 # | |
| 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 4 # Use of this source code is governed by a BSD-style license that can be | |
| 5 # found in the LICENSE file. | |
| 6 | |
| 7 """Helper script to shard build bot steps and save results to disk. | |
| 8 | |
| 9 Our buildbot infrastructure requires each slave to run steps serially. | |
| 10 This is sub-optimal for android, where these steps can run independently on | |
| 11 multiple connected devices. | |
| 12 | |
| 13 The buildbots will run this script multiple times per cycle: | |
| 14 - First, without params: all steps will be executed in parallel using all | |
|
tonyg
2012/12/04 23:57:56
Please explain the JSON steps file format here.
bulach
2012/12/07 13:53:00
Done.
| |
| 15 connected devices. Step results will be pickled to disk (each step has a unique | |
| 16 name). | |
| 17 The buildbot will treat this step as a regular step, and will not process any | |
| 18 graph data. | |
| 19 | |
| 20 - Then, with -p STEP_NAME: at this stage, we'll simply print the file with the | |
| 21 step results previously saved. The buildbot will then process the graph data | |
| 22 accordingly. | |
| 23 """ | |
| 24 | |
| 25 | |
| 26 import datetime | |
| 27 import json | |
| 28 import logging | |
| 29 import multiprocessing | |
| 30 import optparse | |
| 31 import pexpect | |
| 32 import pickle | |
| 33 import os | |
| 34 import signal | |
| 35 import shutil | |
| 36 import sys | |
| 37 | |
| 38 from pylib import android_commands | |
| 39 from pylib import cmd_helper | |
| 40 from pylib import constants | |
| 41 from pylib import ports | |
| 42 from pylib import test_options_parser | |
| 43 | |
| 44 _OUTPUT_DIR = os.path.join(constants.CHROME_DIR, 'out', 'step_results') | |
| 45 | |
| 46 | |
| 47 def _SaveResult(result): | |
| 48 with file(os.path.join(_OUTPUT_DIR, result['name']), 'w') as f: | |
| 49 f.write(pickle.dumps(result)) | |
| 50 | |
| 51 | |
| 52 def _RunStepsPerDevice(steps): | |
| 53 results = [] | |
| 54 for step in steps: | |
| 55 start_time = datetime.datetime.now() | |
| 56 print 'Starting %s: %s %s at %s' % (step['name'], step['cmd'], | |
| 57 start_time, step['device']) | |
| 58 output, exit_code = pexpect.run( | |
| 59 step['cmd'], cwd=os.path.abspath(constants.CHROME_DIR), | |
| 60 withexitstatus=True, logfile=sys.stdout, timeout=1800, | |
| 61 env=os.environ) | |
| 62 end_time = datetime.datetime.now() | |
| 63 print 'Finished %s: %s %s at %s' % (step['name'], step['cmd'], | |
| 64 end_time, step['device']) | |
|
tonyg
2012/12/04 23:57:56
indentation
bulach
2012/12/07 13:53:00
Done.
| |
| 65 result = {'name': step['name'], | |
| 66 'output': output, | |
| 67 'exit_code': exit_code or 0, | |
| 68 'total_time': (end_time - start_time).seconds, | |
| 69 'device': step['device']} | |
| 70 _SaveResult(result) | |
| 71 results += [result] | |
| 72 return results | |
| 73 | |
| 74 | |
| 75 def _RunShardedSteps(steps, devices): | |
|
tonyg
2012/12/04 23:57:56
assert steps
assert devices
bulach
2012/12/07 13:53:00
Done.
| |
| 76 if os.path.exists(_OUTPUT_DIR): | |
| 77 assert '/step_results' in _OUTPUT_DIR | |
| 78 shutil.rmtree(_OUTPUT_DIR) | |
| 79 if not os.path.exists(_OUTPUT_DIR): | |
| 80 os.makedirs(_OUTPUT_DIR) | |
| 81 step_names = sorted(steps.keys()) | |
| 82 all_params = [] | |
| 83 num_devices = len(devices) | |
| 84 shard_size = (len(steps) + num_devices - 1) / num_devices | |
| 85 for i in range(num_devices): | |
| 86 device = devices[i] | |
|
tonyg
2012/12/04 23:57:56
above two lines can be collapsed into:
for i, devi
bulach
2012/12/07 13:53:00
Done.
| |
| 87 steps_per_device = [] | |
| 88 for s in steps.keys()[i * shard_size:(i + 1) * shard_size]: | |
| 89 steps_per_device += [{'name': s, | |
| 90 'device': device, | |
| 91 'cmd': steps[s] + ' --device ' + device + | |
| 92 ' --keep_test_server_ports'}] | |
|
tonyg
2012/12/04 23:57:56
Telemetry doesn't support --keep_test_server_ports
bulach
2012/12/07 13:53:00
ahn, very good point! added, and fixed the port al
| |
| 93 all_params += [steps_per_device] | |
| 94 print 'Start sharding (note: output is not synchronized...)' | |
| 95 print '*' * 80 | |
| 96 start_time = datetime.datetime.now() | |
| 97 pool = multiprocessing.Pool(processes=num_devices) | |
| 98 async_results = pool.map_async(_RunStepsPerDevice, all_params) | |
| 99 results_per_device = async_results.get(999999) | |
| 100 end_time = datetime.datetime.now() | |
| 101 print '*' * 80 | |
| 102 print 'Finished sharding.' | |
| 103 print 'Summary' | |
| 104 total_time = 0 | |
| 105 for results in results_per_device: | |
| 106 for result in results: | |
| 107 print('%s : exit_code=%d in %d secs at %s' % | |
| 108 (result['name'], result['exit_code'], result['total_time'], | |
| 109 result['device'])) | |
| 110 total_time += result['total_time'] | |
| 111 print 'Step time: %d secs' % ((end_time - start_time).seconds) | |
| 112 print 'Bots time: %d secs' % total_time | |
| 113 # No exit_code for the sharding step: the individual _PrintResults step | |
| 114 # will return the corresponding exit_code. | |
| 115 return 0 | |
| 116 | |
| 117 | |
| 118 def _PrintResults(step_name): | |
|
tonyg
2012/12/04 23:57:56
_PrintStepOutput?
bulach
2012/12/07 13:53:00
Done.
| |
| 119 file_name = os.path.join(_OUTPUT_DIR, step_name) | |
| 120 if not os.path.exists(file_name): | |
| 121 print 'File not found ', file_name | |
| 122 return 1 | |
| 123 with file(file_name, 'r') as f: | |
| 124 result = pickle.loads(f.read()) | |
| 125 print result['output'] | |
|
tonyg
2012/12/04 23:57:56
The buildbot output colors stdout in black and std
bulach
2012/12/07 13:53:00
oh, that's a good point.. we use pexpect.run, in l
| |
| 126 return result['exit_code'] | |
| 127 | |
| 128 | |
| 129 def _KillPendingServers(): | |
| 130 for retry in range(5): | |
| 131 for server in ['lighttpd', 'web-page-replay']: | |
| 132 pids = cmd_helper.GetCmdOutput(['pgrep', '-f', server]) | |
| 133 pids = [pid.strip() for pid in pids.split('\n') if pid.strip()] | |
| 134 for pid in pids: | |
| 135 try: | |
| 136 logging.warning('Killing %s %s', server, pid) | |
| 137 os.kill(int(pid), signal.SIGQUIT) | |
| 138 except Exception as e: | |
| 139 logging.warning('Failed killing %s %s %s', server, pid, e) | |
| 140 | |
| 141 | |
| 142 def main(argv): | |
| 143 parser = optparse.OptionParser() | |
| 144 parser.add_option('-s', '--steps', | |
| 145 help='A JSON file containing all the steps to be ' | |
| 146 'sharded.') | |
| 147 parser.add_option('-p', '--print_results', | |
| 148 help='Only prints the results for the previously ' | |
| 149 'executed step, do not run it again.') | |
| 150 test_options_parser.AddBuildTypeOption(parser) | |
|
tonyg
2012/12/04 23:57:56
This looks unused
bulach
2012/12/07 13:53:00
Done.
| |
| 151 options, urls = parser.parse_args(argv) | |
| 152 if options.print_results: | |
| 153 return _PrintResults(options.print_results) | |
| 154 | |
| 155 # At this point, we should kill everything that may have been left over from | |
| 156 # previous runs. | |
| 157 _KillPendingServers() | |
| 158 | |
| 159 # Reset the test port allocation. It's important to do it before starting | |
| 160 # to dispatch any step. | |
| 161 if not ports.ResetTestServerPortAllocation(): | |
| 162 raise Exception('Failed to reset test server port.') | |
| 163 | |
| 164 # Sort the devices so that we'll try to always run a step in the same device. | |
| 165 devices = sorted(android_commands.GetAttachedDevices()) | |
| 166 if not devices: | |
| 167 print 'You must attach a device' | |
| 168 return 1 | |
| 169 | |
| 170 with file(options.steps, 'r') as f: | |
| 171 steps = json.load(f) | |
| 172 return _RunShardedSteps(steps, devices) | |
| 173 | |
| 174 | |
| 175 if __name__ == '__main__': | |
| 176 sys.exit(main(sys.argv)) | |
| OLD | NEW |