Index: build/android/bb_run_sharded_steps.py |
diff --git a/build/android/bb_run_sharded_steps.py b/build/android/bb_run_sharded_steps.py |
new file mode 100755 |
index 0000000000000000000000000000000000000000..5a5fd4a1cb2815b5b625cc5dcee94a40f3dc7e01 |
--- /dev/null |
+++ b/build/android/bb_run_sharded_steps.py |
@@ -0,0 +1,176 @@ |
+#!/usr/bin/env python |
+# |
+# Copyright (c) 2012 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+"""Helper script to shard build bot steps and save results to disk. |
+ |
+Our buildbot infrastructure requires each slave to run steps serially. |
+This is sub-optimal for android, where these steps can run independently on |
+multiple connected devices. |
+ |
+The buildbots will run this script multiple times per cycle: |
+- First, without params: all steps will be executed in parallel using all |
tonyg
2012/12/04 23:57:56
Please explain the JSON steps file format here.
bulach
2012/12/07 13:53:00
Done.
|
+connected devices. Step results will be pickled to disk (each step has a unique |
+name). |
+The buildbot will treat this step as a regular step, and will not process any |
+graph data. |
+ |
+- Then, with -p STEP_NAME: at this stage, we'll simply print the file with the |
+step results previously saved. The buildbot will then process the graph data |
+accordingly. |
+""" |
+ |
+ |
+import datetime |
+import json |
+import logging |
+import multiprocessing |
+import optparse |
+import pexpect |
+import pickle |
+import os |
+import signal |
+import shutil |
+import sys |
+ |
+from pylib import android_commands |
+from pylib import cmd_helper |
+from pylib import constants |
+from pylib import ports |
+from pylib import test_options_parser |
+ |
+_OUTPUT_DIR = os.path.join(constants.CHROME_DIR, 'out', 'step_results') |
+ |
+ |
+def _SaveResult(result): |
+ with file(os.path.join(_OUTPUT_DIR, result['name']), 'w') as f: |
+ f.write(pickle.dumps(result)) |
+ |
+ |
+def _RunStepsPerDevice(steps): |
+ results = [] |
+ for step in steps: |
+ start_time = datetime.datetime.now() |
+ print 'Starting %s: %s %s at %s' % (step['name'], step['cmd'], |
+ start_time, step['device']) |
+ output, exit_code = pexpect.run( |
+ step['cmd'], cwd=os.path.abspath(constants.CHROME_DIR), |
+ withexitstatus=True, logfile=sys.stdout, timeout=1800, |
+ env=os.environ) |
+ end_time = datetime.datetime.now() |
+ print 'Finished %s: %s %s at %s' % (step['name'], step['cmd'], |
+ end_time, step['device']) |
tonyg
2012/12/04 23:57:56
indentation
bulach
2012/12/07 13:53:00
Done.
|
+ result = {'name': step['name'], |
+ 'output': output, |
+ 'exit_code': exit_code or 0, |
+ 'total_time': (end_time - start_time).seconds, |
+ 'device': step['device']} |
+ _SaveResult(result) |
+ results += [result] |
+ return results |
+ |
+ |
+def _RunShardedSteps(steps, devices): |
tonyg
2012/12/04 23:57:56
assert steps
assert devices
bulach
2012/12/07 13:53:00
Done.
|
+ if os.path.exists(_OUTPUT_DIR): |
+ assert '/step_results' in _OUTPUT_DIR |
+ shutil.rmtree(_OUTPUT_DIR) |
+ if not os.path.exists(_OUTPUT_DIR): |
+ os.makedirs(_OUTPUT_DIR) |
+ step_names = sorted(steps.keys()) |
+ all_params = [] |
+ num_devices = len(devices) |
+ shard_size = (len(steps) + num_devices - 1) / num_devices |
+ for i in range(num_devices): |
+ device = devices[i] |
tonyg
2012/12/04 23:57:56
above two lines can be collapsed into:
for i, devi
bulach
2012/12/07 13:53:00
Done.
|
+ steps_per_device = [] |
+ for s in steps.keys()[i * shard_size:(i + 1) * shard_size]: |
+ steps_per_device += [{'name': s, |
+ 'device': device, |
+ 'cmd': steps[s] + ' --device ' + device + |
+ ' --keep_test_server_ports'}] |
tonyg
2012/12/04 23:57:56
Telemetry doesn't support --keep_test_server_ports
bulach
2012/12/07 13:53:00
ahn, very good point! added, and fixed the port al
|
+ all_params += [steps_per_device] |
+ print 'Start sharding (note: output is not synchronized...)' |
+ print '*' * 80 |
+ start_time = datetime.datetime.now() |
+ pool = multiprocessing.Pool(processes=num_devices) |
+ async_results = pool.map_async(_RunStepsPerDevice, all_params) |
+ results_per_device = async_results.get(999999) |
+ end_time = datetime.datetime.now() |
+ print '*' * 80 |
+ print 'Finished sharding.' |
+ print 'Summary' |
+ total_time = 0 |
+ for results in results_per_device: |
+ for result in results: |
+ print('%s : exit_code=%d in %d secs at %s' % |
+ (result['name'], result['exit_code'], result['total_time'], |
+ result['device'])) |
+ total_time += result['total_time'] |
+ print 'Step time: %d secs' % ((end_time - start_time).seconds) |
+ print 'Bots time: %d secs' % total_time |
+ # No exit_code for the sharding step: the individual _PrintResults step |
+ # will return the corresponding exit_code. |
+ return 0 |
+ |
+ |
+def _PrintResults(step_name): |
tonyg
2012/12/04 23:57:56
_PrintStepOutput?
bulach
2012/12/07 13:53:00
Done.
|
+ file_name = os.path.join(_OUTPUT_DIR, step_name) |
+ if not os.path.exists(file_name): |
+ print 'File not found ', file_name |
+ return 1 |
+ with file(file_name, 'r') as f: |
+ result = pickle.loads(f.read()) |
+ print result['output'] |
tonyg
2012/12/04 23:57:56
The buildbot output colors stdout in black and std
bulach
2012/12/07 13:53:00
oh, that's a good point.. we use pexpect.run, in l
|
+ return result['exit_code'] |
+ |
+ |
+def _KillPendingServers(): |
+ for retry in range(5): |
+ for server in ['lighttpd', 'web-page-replay']: |
+ pids = cmd_helper.GetCmdOutput(['pgrep', '-f', server]) |
+ pids = [pid.strip() for pid in pids.split('\n') if pid.strip()] |
+ for pid in pids: |
+ try: |
+ logging.warning('Killing %s %s', server, pid) |
+ os.kill(int(pid), signal.SIGQUIT) |
+ except Exception as e: |
+ logging.warning('Failed killing %s %s %s', server, pid, e) |
+ |
+ |
+def main(argv): |
+ parser = optparse.OptionParser() |
+ parser.add_option('-s', '--steps', |
+ help='A JSON file containing all the steps to be ' |
+ 'sharded.') |
+ parser.add_option('-p', '--print_results', |
+ help='Only prints the results for the previously ' |
+ 'executed step, do not run it again.') |
+ test_options_parser.AddBuildTypeOption(parser) |
tonyg
2012/12/04 23:57:56
This looks unused
bulach
2012/12/07 13:53:00
Done.
|
+ options, urls = parser.parse_args(argv) |
+ if options.print_results: |
+ return _PrintResults(options.print_results) |
+ |
+ # At this point, we should kill everything that may have been left over from |
+ # previous runs. |
+ _KillPendingServers() |
+ |
+ # Reset the test port allocation. It's important to do it before starting |
+ # to dispatch any step. |
+ if not ports.ResetTestServerPortAllocation(): |
+ raise Exception('Failed to reset test server port.') |
+ |
+ # Sort the devices so that we'll try to always run a step in the same device. |
+ devices = sorted(android_commands.GetAttachedDevices()) |
+ if not devices: |
+ print 'You must attach a device' |
+ return 1 |
+ |
+ with file(options.steps, 'r') as f: |
+ steps = json.load(f) |
+ return _RunShardedSteps(steps, devices) |
+ |
+ |
+if __name__ == '__main__': |
+ sys.exit(main(sys.argv)) |