OLD | NEW |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # | 2 # |
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
6 | 6 |
7 """Helper script to shard build bot steps and save results to disk. | 7 """DEPRECATED! |
8 | 8 TODO(bulach): remove me once all other repositories reference |
9 Our buildbot infrastructure requires each slave to run steps serially. | 9 'test_runner.py perf' directly. |
10 This is sub-optimal for android, where these steps can run independently on | |
11 multiple connected devices. | |
12 | |
13 The buildbots will run this script multiple times per cycle: | |
14 - First: all steps listed in -s in will be executed in parallel using all | |
15 connected devices. Step results will be pickled to disk. Each step has a unique | |
16 name. The result code will be ignored if the step name is listed in | |
17 --flaky_steps. | |
18 The buildbot will treat this step as a regular step, and will not process any | |
19 graph data. | |
20 | |
21 - Then, with -p STEP_NAME: at this stage, we'll simply print the file with the | |
22 step results previously saved. The buildbot will then process the graph data | |
23 accordingly. | |
24 | |
25 The JSON steps file contains a dictionary in the format: | |
26 { | |
27 "step_name_foo": "script_to_execute foo", | |
28 "step_name_bar": "script_to_execute bar" | |
29 } | |
30 | |
31 The JSON flaky steps file contains a list with step names which results should | |
32 be ignored: | |
33 [ | |
34 "step_name_foo", | |
35 "step_name_bar" | |
36 ] | |
37 | |
38 Note that script_to_execute necessarily have to take at least the following | |
39 options: | |
40 --device: the serial number to be passed to all adb commands. | |
41 --keep_test_server_ports: indicates it's being run as a shard, and shouldn't | |
42 reset test server port allocation. | |
43 """ | 10 """ |
44 | 11 |
12 import optparse | |
13 import os | |
14 import sys | |
45 | 15 |
46 import datetime | |
47 import json | |
48 import logging | |
49 import multiprocessing | |
50 import optparse | |
51 import pexpect | |
52 import pickle | |
53 import os | |
54 import signal | |
55 import shutil | |
56 import sys | |
57 import time | |
58 | |
59 from pylib import android_commands | |
60 from pylib import cmd_helper | 16 from pylib import cmd_helper |
61 from pylib import constants | |
62 from pylib import forwarder | |
63 from pylib import ports | |
64 | |
65 | |
66 _OUTPUT_DIR = os.path.join(constants.DIR_SOURCE_ROOT, 'out', 'step_results') | |
67 | |
68 | |
69 def _SaveResult(result): | |
70 with file(os.path.join(_OUTPUT_DIR, result['name']), 'w') as f: | |
71 f.write(pickle.dumps(result)) | |
72 | |
73 | |
74 def _RunStepsPerDevice(steps): | |
75 results = [] | |
76 for step in steps: | |
77 start_time = datetime.datetime.now() | |
78 print 'Starting %s: %s %s at %s' % (step['name'], step['cmd'], | |
79 start_time, step['device']) | |
80 output, exit_code = pexpect.run( | |
81 step['cmd'], cwd=os.path.abspath(constants.DIR_SOURCE_ROOT), | |
82 withexitstatus=True, logfile=sys.stdout, timeout=1800, | |
83 env=os.environ) | |
84 exit_code = exit_code or 0 | |
85 end_time = datetime.datetime.now() | |
86 exit_msg = '%s %s' % (exit_code, | |
87 '(ignored, flaky step)' if step['is_flaky'] else '') | |
88 print 'Finished %s: %s %s %s at %s' % (step['name'], exit_msg, step['cmd'], | |
89 end_time, step['device']) | |
90 if step['is_flaky']: | |
91 exit_code = 0 | |
92 result = {'name': step['name'], | |
93 'output': output, | |
94 'exit_code': exit_code, | |
95 'total_time': (end_time - start_time).seconds, | |
96 'device': step['device']} | |
97 _SaveResult(result) | |
98 results += [result] | |
99 return results | |
100 | |
101 | |
102 def _RunShardedSteps(steps, flaky_steps, devices): | |
103 assert steps | |
104 assert devices, 'No devices connected?' | |
105 if os.path.exists(_OUTPUT_DIR): | |
106 assert '/step_results' in _OUTPUT_DIR | |
107 shutil.rmtree(_OUTPUT_DIR) | |
108 if not os.path.exists(_OUTPUT_DIR): | |
109 os.makedirs(_OUTPUT_DIR) | |
110 step_names = sorted(steps.keys()) | |
111 all_params = [] | |
112 num_devices = len(devices) | |
113 shard_size = (len(steps) + num_devices - 1) / num_devices | |
114 for i, device in enumerate(devices): | |
115 steps_per_device = [] | |
116 for s in steps.keys()[i * shard_size:(i + 1) * shard_size]: | |
117 steps_per_device += [{'name': s, | |
118 'device': device, | |
119 'is_flaky': s in flaky_steps, | |
120 'cmd': steps[s] + ' --device ' + device + | |
121 ' --keep_test_server_ports'}] | |
122 all_params += [steps_per_device] | |
123 print 'Start sharding (note: output is not synchronized...)' | |
124 print '*' * 80 | |
125 start_time = datetime.datetime.now() | |
126 pool = multiprocessing.Pool(processes=num_devices) | |
127 async_results = pool.map_async(_RunStepsPerDevice, all_params) | |
128 results_per_device = async_results.get(999999) | |
129 end_time = datetime.datetime.now() | |
130 print '*' * 80 | |
131 print 'Finished sharding.' | |
132 print 'Summary' | |
133 total_time = 0 | |
134 for results in results_per_device: | |
135 for result in results: | |
136 print('%s : exit_code=%d in %d secs at %s' % | |
137 (result['name'], result['exit_code'], result['total_time'], | |
138 result['device'])) | |
139 total_time += result['total_time'] | |
140 print 'Step time: %d secs' % ((end_time - start_time).seconds) | |
141 print 'Bots time: %d secs' % total_time | |
142 # No exit_code for the sharding step: the individual _PrintResults step | |
143 # will return the corresponding exit_code. | |
144 return 0 | |
145 | |
146 | |
147 def _PrintStepOutput(step_name): | |
148 file_name = os.path.join(_OUTPUT_DIR, step_name) | |
149 if not os.path.exists(file_name): | |
150 print 'File not found ', file_name | |
151 return 1 | |
152 with file(file_name, 'r') as f: | |
153 result = pickle.loads(f.read()) | |
154 print result['output'] | |
155 return result['exit_code'] | |
156 | |
157 | |
158 def _PrintAllStepsOutput(steps): | |
159 with file(steps, 'r') as f: | |
160 steps = json.load(f) | |
161 ret = 0 | |
162 for step_name in steps.keys(): | |
163 ret |= _PrintStepOutput(step_name) | |
164 return ret | |
165 | |
166 | |
167 def _KillPendingServers(): | |
168 for retry in range(5): | |
169 for server in ['lighttpd', 'web-page-replay']: | |
170 pids = cmd_helper.GetCmdOutput(['pgrep', '-f', server]) | |
171 pids = [pid.strip() for pid in pids.split('\n') if pid.strip()] | |
172 for pid in pids: | |
173 try: | |
174 logging.warning('Killing %s %s', server, pid) | |
175 os.kill(int(pid), signal.SIGQUIT) | |
176 except Exception as e: | |
177 logging.warning('Failed killing %s %s %s', server, pid, e) | |
178 # Restart the adb server with taskset to set a single CPU affinity. | |
179 cmd_helper.RunCmd(['adb', 'kill-server']) | |
180 cmd_helper.RunCmd(['taskset', '-c', '0', 'adb', 'start-server']) | |
181 cmd_helper.RunCmd(['taskset', '-c', '0', 'adb', 'root']) | |
182 i = 1 | |
183 while not android_commands.GetAttachedDevices(): | |
184 time.sleep(i) | |
185 i *= 2 | |
186 if i > 10: | |
187 break | |
188 | 17 |
189 | 18 |
190 def main(argv): | 19 def main(argv): |
191 parser = optparse.OptionParser() | 20 parser = optparse.OptionParser() |
192 parser.add_option('-s', '--steps', | 21 parser.add_option('-s', '--steps', |
193 help='A JSON file containing all the steps to be ' | 22 help='A JSON file containing all the steps to be ' |
194 'sharded.') | 23 'sharded.') |
195 parser.add_option('--flaky_steps', | 24 parser.add_option('--flaky_steps', |
196 help='A JSON file containing steps that are flaky and ' | 25 help='A JSON file containing steps that are flaky and ' |
197 'will have its exit code ignored.') | 26 'will have its exit code ignored.') |
198 parser.add_option('-p', '--print_results', | 27 parser.add_option('-p', '--print_results', |
199 help='Only prints the results for the previously ' | 28 help='Only prints the results for the previously ' |
200 'executed step, do not run it again.') | 29 'executed step, do not run it again.') |
201 parser.add_option('-P', '--print_all', | |
202 help='Only prints the results for the previously ' | |
203 'executed steps, do not run them again.') | |
bulach
2013/08/14 10:18:35
this has never been used, pending discussions here
| |
204 options, urls = parser.parse_args(argv) | 30 options, urls = parser.parse_args(argv) |
205 if options.print_results: | 31 if options.print_results: |
206 return _PrintStepOutput(options.print_results) | 32 return cmd_helper.RunCmd(['build/android/test_runner.py', 'perf', |
207 if options.print_all: | 33 '--print-step', options.print_results]) |
208 return _PrintAllStepsOutput(options.print_all) | 34 flaky_options = [] |
209 | |
210 # At this point, we should kill everything that may have been left over from | |
211 # previous runs. | |
212 _KillPendingServers() | |
213 | |
214 forwarder.Forwarder.UseMultiprocessing() | |
215 | |
216 # Reset the test port allocation. It's important to do it before starting | |
217 # to dispatch any step. | |
218 if not ports.ResetTestServerPortAllocation(): | |
219 raise Exception('Failed to reset test server port.') | |
220 | |
221 # Sort the devices so that we'll try to always run a step in the same device. | |
222 devices = sorted(android_commands.GetAttachedDevices()) | |
223 if not devices: | |
224 print 'You must attach a device' | |
225 return 1 | |
226 | |
227 with file(options.steps, 'r') as f: | |
228 steps = json.load(f) | |
229 flaky_steps = [] | |
230 if options.flaky_steps: | 35 if options.flaky_steps: |
231 with file(options.flaky_steps, 'r') as f: | 36 flaky_options = ['--flaky-steps', options.flaky_steps] |
232 flaky_steps = json.load(f) | 37 return cmd_helper.RunCmd(['build/android/test_runner.py', 'perf', |
233 return _RunShardedSteps(steps, flaky_steps, devices) | 38 '--steps', options.steps] + flaky_options) |
234 | 39 |
235 | 40 |
236 if __name__ == '__main__': | 41 if __name__ == '__main__': |
237 sys.exit(main(sys.argv)) | 42 sys.exit(main(sys.argv)) |
OLD | NEW |