OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/env python | |
2 # | |
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
4 # Use of this source code is governed by a BSD-style license that can be | |
5 # found in the LICENSE file. | |
6 | |
7 """Helper script to shard build bot steps and save results to disk. | |
8 | |
9 Our buildbot infrastructure requires each slave to run steps serially. | |
10 This is sub-optimal for android, where these steps can run independently on | |
11 multiple connected devices. | |
12 | |
13 The buildbots will run this script multiple times per cycle: | |
14 - First, without params: all steps will be executed in parallel using all | |
tonyg
2012/12/04 23:57:56
Please explain the JSON steps file format here.
bulach
2012/12/07 13:53:00
Done.
| |
15 connected devices. Step results will be pickled to disk (each step has a unique | |
16 name). | |
17 The buildbot will treat this step as a regular step, and will not process any | |
18 graph data. | |
19 | |
20 - Then, with -p STEP_NAME: at this stage, we'll simply print the file with the | |
21 step results previously saved. The buildbot will then process the graph data | |
22 accordingly. | |
23 """ | |
24 | |
25 | |
26 import datetime | |
27 import json | |
28 import logging | |
29 import multiprocessing | |
30 import optparse | |
31 import pexpect | |
32 import pickle | |
33 import os | |
34 import signal | |
35 import shutil | |
36 import sys | |
37 | |
38 from pylib import android_commands | |
39 from pylib import cmd_helper | |
40 from pylib import constants | |
41 from pylib import ports | |
42 from pylib import test_options_parser | |
43 | |
44 _OUTPUT_DIR = os.path.join(constants.CHROME_DIR, 'out', 'step_results') | |
45 | |
46 | |
47 def _SaveResult(result): | |
48 with file(os.path.join(_OUTPUT_DIR, result['name']), 'w') as f: | |
49 f.write(pickle.dumps(result)) | |
50 | |
51 | |
52 def _RunStepsPerDevice(steps): | |
53 results = [] | |
54 for step in steps: | |
55 start_time = datetime.datetime.now() | |
56 print 'Starting %s: %s %s at %s' % (step['name'], step['cmd'], | |
57 start_time, step['device']) | |
58 output, exit_code = pexpect.run( | |
59 step['cmd'], cwd=os.path.abspath(constants.CHROME_DIR), | |
60 withexitstatus=True, logfile=sys.stdout, timeout=1800, | |
61 env=os.environ) | |
62 end_time = datetime.datetime.now() | |
63 print 'Finished %s: %s %s at %s' % (step['name'], step['cmd'], | |
64 end_time, step['device']) | |
tonyg
2012/12/04 23:57:56
indentation
bulach
2012/12/07 13:53:00
Done.
| |
65 result = {'name': step['name'], | |
66 'output': output, | |
67 'exit_code': exit_code or 0, | |
68 'total_time': (end_time - start_time).seconds, | |
69 'device': step['device']} | |
70 _SaveResult(result) | |
71 results += [result] | |
72 return results | |
73 | |
74 | |
75 def _RunShardedSteps(steps, devices): | |
tonyg
2012/12/04 23:57:56
assert steps
assert devices
bulach
2012/12/07 13:53:00
Done.
| |
76 if os.path.exists(_OUTPUT_DIR): | |
77 assert '/step_results' in _OUTPUT_DIR | |
78 shutil.rmtree(_OUTPUT_DIR) | |
79 if not os.path.exists(_OUTPUT_DIR): | |
80 os.makedirs(_OUTPUT_DIR) | |
81 step_names = sorted(steps.keys()) | |
82 all_params = [] | |
83 num_devices = len(devices) | |
84 shard_size = (len(steps) + num_devices - 1) / num_devices | |
85 for i in range(num_devices): | |
86 device = devices[i] | |
tonyg
2012/12/04 23:57:56
above two lines can be collapsed into:
for i, devi
bulach
2012/12/07 13:53:00
Done.
| |
87 steps_per_device = [] | |
88 for s in steps.keys()[i * shard_size:(i + 1) * shard_size]: | |
89 steps_per_device += [{'name': s, | |
90 'device': device, | |
91 'cmd': steps[s] + ' --device ' + device + | |
92 ' --keep_test_server_ports'}] | |
tonyg
2012/12/04 23:57:56
Telemetry doesn't support --keep_test_server_ports
bulach
2012/12/07 13:53:00
ahn, very good point! added, and fixed the port al
| |
93 all_params += [steps_per_device] | |
94 print 'Start sharding (note: output is not synchronized...)' | |
95 print '*' * 80 | |
96 start_time = datetime.datetime.now() | |
97 pool = multiprocessing.Pool(processes=num_devices) | |
98 async_results = pool.map_async(_RunStepsPerDevice, all_params) | |
99 results_per_device = async_results.get(999999) | |
100 end_time = datetime.datetime.now() | |
101 print '*' * 80 | |
102 print 'Finished sharding.' | |
103 print 'Summary' | |
104 total_time = 0 | |
105 for results in results_per_device: | |
106 for result in results: | |
107 print('%s : exit_code=%d in %d secs at %s' % | |
108 (result['name'], result['exit_code'], result['total_time'], | |
109 result['device'])) | |
110 total_time += result['total_time'] | |
111 print 'Step time: %d secs' % ((end_time - start_time).seconds) | |
112 print 'Bots time: %d secs' % total_time | |
113 # No exit_code for the sharding step: the individual _PrintResults step | |
114 # will return the corresponding exit_code. | |
115 return 0 | |
116 | |
117 | |
118 def _PrintResults(step_name): | |
tonyg
2012/12/04 23:57:56
_PrintStepOutput?
bulach
2012/12/07 13:53:00
Done.
| |
119 file_name = os.path.join(_OUTPUT_DIR, step_name) | |
120 if not os.path.exists(file_name): | |
121 print 'File not found ', file_name | |
122 return 1 | |
123 with file(file_name, 'r') as f: | |
124 result = pickle.loads(f.read()) | |
125 print result['output'] | |
tonyg
2012/12/04 23:57:56
The buildbot output colors stdout in black and std
bulach
2012/12/07 13:53:00
oh, that's a good point.. we use pexpect.run, in l
| |
126 return result['exit_code'] | |
127 | |
128 | |
129 def _KillPendingServers(): | |
130 for retry in range(5): | |
131 for server in ['lighttpd', 'web-page-replay']: | |
132 pids = cmd_helper.GetCmdOutput(['pgrep', '-f', server]) | |
133 pids = [pid.strip() for pid in pids.split('\n') if pid.strip()] | |
134 for pid in pids: | |
135 try: | |
136 logging.warning('Killing %s %s', server, pid) | |
137 os.kill(int(pid), signal.SIGQUIT) | |
138 except Exception as e: | |
139 logging.warning('Failed killing %s %s %s', server, pid, e) | |
140 | |
141 | |
142 def main(argv): | |
143 parser = optparse.OptionParser() | |
144 parser.add_option('-s', '--steps', | |
145 help='A JSON file containing all the steps to be ' | |
146 'sharded.') | |
147 parser.add_option('-p', '--print_results', | |
148 help='Only prints the results for the previously ' | |
149 'executed step, do not run it again.') | |
150 test_options_parser.AddBuildTypeOption(parser) | |
tonyg
2012/12/04 23:57:56
This looks unused
bulach
2012/12/07 13:53:00
Done.
| |
151 options, urls = parser.parse_args(argv) | |
152 if options.print_results: | |
153 return _PrintResults(options.print_results) | |
154 | |
155 # At this point, we should kill everything that may have been left over from | |
156 # previous runs. | |
157 _KillPendingServers() | |
158 | |
159 # Reset the test port allocation. It's important to do it before starting | |
160 # to dispatch any step. | |
161 if not ports.ResetTestServerPortAllocation(): | |
162 raise Exception('Failed to reset test server port.') | |
163 | |
164 # Sort the devices so that we'll try to always run a step in the same device. | |
165 devices = sorted(android_commands.GetAttachedDevices()) | |
166 if not devices: | |
167 print 'You must attach a device' | |
168 return 1 | |
169 | |
170 with file(options.steps, 'r') as f: | |
171 steps = json.load(f) | |
172 return _RunShardedSteps(steps, devices) | |
173 | |
174 | |
175 if __name__ == '__main__': | |
176 sys.exit(main(sys.argv)) | |
OLD | NEW |