Chromium Code Reviews| Index: build/android/bb_run_sharded_steps.py |
| diff --git a/build/android/bb_run_sharded_steps.py b/build/android/bb_run_sharded_steps.py |
| new file mode 100755 |
| index 0000000000000000000000000000000000000000..5a5fd4a1cb2815b5b625cc5dcee94a40f3dc7e01 |
| --- /dev/null |
| +++ b/build/android/bb_run_sharded_steps.py |
| @@ -0,0 +1,176 @@ |
| +#!/usr/bin/env python |
| +# |
| +# Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +"""Helper script to shard build bot steps and save results to disk. |
| + |
| +Our buildbot infrastructure requires each slave to run steps serially. |
| +This is sub-optimal for android, where these steps can run independently on |
| +multiple connected devices. |
| + |
| +The buildbots will run this script multiple times per cycle: |
| +- First, without params: all steps will be executed in parallel using all |
|
tonyg
2012/12/04 23:57:56
Please explain the JSON steps file format here.
bulach
2012/12/07 13:53:00
Done.
|
| +connected devices. Step results will be pickled to disk (each step has a unique |
| +name). |
| +The buildbot will treat this step as a regular step, and will not process any |
| +graph data. |
| + |
| +- Then, with -p STEP_NAME: at this stage, we'll simply print the file with the |
| +step results previously saved. The buildbot will then process the graph data |
| +accordingly. |
| +""" |
| + |
| + |
| +import datetime |
| +import json |
| +import logging |
| +import multiprocessing |
| +import optparse |
| +import pexpect |
| +import pickle |
| +import os |
| +import signal |
| +import shutil |
| +import sys |
| + |
| +from pylib import android_commands |
| +from pylib import cmd_helper |
| +from pylib import constants |
| +from pylib import ports |
| +from pylib import test_options_parser |
| + |
| +_OUTPUT_DIR = os.path.join(constants.CHROME_DIR, 'out', 'step_results') |
| + |
| + |
| +def _SaveResult(result): |
| + with file(os.path.join(_OUTPUT_DIR, result['name']), 'w') as f: |
| + f.write(pickle.dumps(result)) |
| + |
| + |
| +def _RunStepsPerDevice(steps): |
| + results = [] |
| + for step in steps: |
| + start_time = datetime.datetime.now() |
| + print 'Starting %s: %s %s at %s' % (step['name'], step['cmd'], |
| + start_time, step['device']) |
| + output, exit_code = pexpect.run( |
| + step['cmd'], cwd=os.path.abspath(constants.CHROME_DIR), |
| + withexitstatus=True, logfile=sys.stdout, timeout=1800, |
| + env=os.environ) |
| + end_time = datetime.datetime.now() |
| + print 'Finished %s: %s %s at %s' % (step['name'], step['cmd'], |
| + end_time, step['device']) |
|
tonyg
2012/12/04 23:57:56
indentation
bulach
2012/12/07 13:53:00
Done.
|
| + result = {'name': step['name'], |
| + 'output': output, |
| + 'exit_code': exit_code or 0, |
| + 'total_time': (end_time - start_time).seconds, |
| + 'device': step['device']} |
| + _SaveResult(result) |
| + results += [result] |
| + return results |
| + |
| + |
| +def _RunShardedSteps(steps, devices): |
|
tonyg
2012/12/04 23:57:56
assert steps
assert devices
bulach
2012/12/07 13:53:00
Done.
|
| + if os.path.exists(_OUTPUT_DIR): |
| + assert '/step_results' in _OUTPUT_DIR |
| + shutil.rmtree(_OUTPUT_DIR) |
| + if not os.path.exists(_OUTPUT_DIR): |
| + os.makedirs(_OUTPUT_DIR) |
| + step_names = sorted(steps.keys()) |
| + all_params = [] |
| + num_devices = len(devices) |
| + shard_size = (len(steps) + num_devices - 1) / num_devices |
| + for i in range(num_devices): |
| + device = devices[i] |
|
tonyg
2012/12/04 23:57:56
above two lines can be collapsed into:
for i, devi
bulach
2012/12/07 13:53:00
Done.
|
| + steps_per_device = [] |
| + for s in steps.keys()[i * shard_size:(i + 1) * shard_size]: |
| + steps_per_device += [{'name': s, |
| + 'device': device, |
| + 'cmd': steps[s] + ' --device ' + device + |
| + ' --keep_test_server_ports'}] |
|
tonyg
2012/12/04 23:57:56
Telemetry doesn't support --keep_test_server_ports
bulach
2012/12/07 13:53:00
ahn, very good point! added, and fixed the port al
|
| + all_params += [steps_per_device] |
| + print 'Start sharding (note: output is not synchronized...)' |
| + print '*' * 80 |
| + start_time = datetime.datetime.now() |
| + pool = multiprocessing.Pool(processes=num_devices) |
| + async_results = pool.map_async(_RunStepsPerDevice, all_params) |
| + results_per_device = async_results.get(999999) |
| + end_time = datetime.datetime.now() |
| + print '*' * 80 |
| + print 'Finished sharding.' |
| + print 'Summary' |
| + total_time = 0 |
| + for results in results_per_device: |
| + for result in results: |
| + print('%s : exit_code=%d in %d secs at %s' % |
| + (result['name'], result['exit_code'], result['total_time'], |
| + result['device'])) |
| + total_time += result['total_time'] |
| + print 'Step time: %d secs' % ((end_time - start_time).seconds) |
| + print 'Bots time: %d secs' % total_time |
| + # No exit_code for the sharding step: the individual _PrintResults step |
| + # will return the corresponding exit_code. |
| + return 0 |
| + |
| + |
| +def _PrintResults(step_name): |
|
tonyg
2012/12/04 23:57:56
_PrintStepOutput?
bulach
2012/12/07 13:53:00
Done.
|
| + file_name = os.path.join(_OUTPUT_DIR, step_name) |
| + if not os.path.exists(file_name): |
| + print 'File not found ', file_name |
| + return 1 |
| + with file(file_name, 'r') as f: |
| + result = pickle.loads(f.read()) |
| + print result['output'] |
|
tonyg
2012/12/04 23:57:56
The buildbot output colors stdout in black and std
bulach
2012/12/07 13:53:00
oh, that's a good point.. we use pexpect.run, in l
|
| + return result['exit_code'] |
| + |
| + |
| +def _KillPendingServers(): |
| + for retry in range(5): |
| + for server in ['lighttpd', 'web-page-replay']: |
| + pids = cmd_helper.GetCmdOutput(['pgrep', '-f', server]) |
| + pids = [pid.strip() for pid in pids.split('\n') if pid.strip()] |
| + for pid in pids: |
| + try: |
| + logging.warning('Killing %s %s', server, pid) |
| + os.kill(int(pid), signal.SIGQUIT) |
| + except Exception as e: |
| + logging.warning('Failed killing %s %s %s', server, pid, e) |
| + |
| + |
| +def main(argv): |
| + parser = optparse.OptionParser() |
| + parser.add_option('-s', '--steps', |
| + help='A JSON file containing all the steps to be ' |
| + 'sharded.') |
| + parser.add_option('-p', '--print_results', |
| + help='Only prints the results for the previously ' |
| + 'executed step, do not run it again.') |
| + test_options_parser.AddBuildTypeOption(parser) |
|
tonyg
2012/12/04 23:57:56
This looks unused
bulach
2012/12/07 13:53:00
Done.
|
| + options, urls = parser.parse_args(argv) |
| + if options.print_results: |
| + return _PrintResults(options.print_results) |
| + |
| + # At this point, we should kill everything that may have been left over from |
| + # previous runs. |
| + _KillPendingServers() |
| + |
| + # Reset the test port allocation. It's important to do it before starting |
| + # to dispatch any step. |
| + if not ports.ResetTestServerPortAllocation(): |
| + raise Exception('Failed to reset test server port.') |
| + |
| + # Sort the devices so that we'll try to always run a step in the same device. |
| + devices = sorted(android_commands.GetAttachedDevices()) |
| + if not devices: |
| + print 'You must attach a device' |
| + return 1 |
| + |
| + with file(options.steps, 'r') as f: |
| + steps = json.load(f) |
| + return _RunShardedSteps(steps, devices) |
| + |
| + |
| +if __name__ == '__main__': |
| + sys.exit(main(sys.argv)) |