Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(540)

Side by Side Diff: build/android/bb_run_sharded_steps.py

Issue 11308344: Telemetry: shard tests on android. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 #
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file.
6
7 """Helper script to shard build bot steps and save results to disk.
8
9 Our buildbot infrastructure requires each slave to run steps serially.
10 This is sub-optimal for android, where these steps can run independently on
11 multiple connected devices.
12
13 The buildbots will run this script multiple times per cycle:
14 - First, without params: all steps will be executed in parallel using all
tonyg 2012/12/04 23:57:56 Please explain the JSON steps file format here.
bulach 2012/12/07 13:53:00 Done.
15 connected devices. Step results will be pickled to disk (each step has a unique
16 name).
17 The buildbot will treat this step as a regular step, and will not process any
18 graph data.
19
20 - Then, with -p STEP_NAME: at this stage, we'll simply print the file with the
21 step results previously saved. The buildbot will then process the graph data
22 accordingly.
23 """
24
25
26 import datetime
27 import json
28 import logging
29 import multiprocessing
30 import optparse
31 import pexpect
32 import pickle
33 import os
34 import signal
35 import shutil
36 import sys
37
38 from pylib import android_commands
39 from pylib import cmd_helper
40 from pylib import constants
41 from pylib import ports
42 from pylib import test_options_parser
43
44 _OUTPUT_DIR = os.path.join(constants.CHROME_DIR, 'out', 'step_results')
45
46
47 def _SaveResult(result):
48 with file(os.path.join(_OUTPUT_DIR, result['name']), 'w') as f:
49 f.write(pickle.dumps(result))
50
51
52 def _RunStepsPerDevice(steps):
53 results = []
54 for step in steps:
55 start_time = datetime.datetime.now()
56 print 'Starting %s: %s %s at %s' % (step['name'], step['cmd'],
57 start_time, step['device'])
58 output, exit_code = pexpect.run(
59 step['cmd'], cwd=os.path.abspath(constants.CHROME_DIR),
60 withexitstatus=True, logfile=sys.stdout, timeout=1800,
61 env=os.environ)
62 end_time = datetime.datetime.now()
63 print 'Finished %s: %s %s at %s' % (step['name'], step['cmd'],
64 end_time, step['device'])
tonyg 2012/12/04 23:57:56 indentation
bulach 2012/12/07 13:53:00 Done.
65 result = {'name': step['name'],
66 'output': output,
67 'exit_code': exit_code or 0,
68 'total_time': (end_time - start_time).seconds,
69 'device': step['device']}
70 _SaveResult(result)
71 results += [result]
72 return results
73
74
75 def _RunShardedSteps(steps, devices):
tonyg 2012/12/04 23:57:56 assert steps assert devices
bulach 2012/12/07 13:53:00 Done.
76 if os.path.exists(_OUTPUT_DIR):
77 assert '/step_results' in _OUTPUT_DIR
78 shutil.rmtree(_OUTPUT_DIR)
79 if not os.path.exists(_OUTPUT_DIR):
80 os.makedirs(_OUTPUT_DIR)
81 step_names = sorted(steps.keys())
82 all_params = []
83 num_devices = len(devices)
84 shard_size = (len(steps) + num_devices - 1) / num_devices
85 for i in range(num_devices):
86 device = devices[i]
tonyg 2012/12/04 23:57:56 above two lines can be collapsed into: for i, devi
bulach 2012/12/07 13:53:00 Done.
87 steps_per_device = []
88 for s in steps.keys()[i * shard_size:(i + 1) * shard_size]:
89 steps_per_device += [{'name': s,
90 'device': device,
91 'cmd': steps[s] + ' --device ' + device +
92 ' --keep_test_server_ports'}]
tonyg 2012/12/04 23:57:56 Telemetry doesn't support --keep_test_server_ports
bulach 2012/12/07 13:53:00 ahn, very good point! added, and fixed the port al
93 all_params += [steps_per_device]
94 print 'Start sharding (note: output is not synchronized...)'
95 print '*' * 80
96 start_time = datetime.datetime.now()
97 pool = multiprocessing.Pool(processes=num_devices)
98 async_results = pool.map_async(_RunStepsPerDevice, all_params)
99 results_per_device = async_results.get(999999)
100 end_time = datetime.datetime.now()
101 print '*' * 80
102 print 'Finished sharding.'
103 print 'Summary'
104 total_time = 0
105 for results in results_per_device:
106 for result in results:
107 print('%s : exit_code=%d in %d secs at %s' %
108 (result['name'], result['exit_code'], result['total_time'],
109 result['device']))
110 total_time += result['total_time']
111 print 'Step time: %d secs' % ((end_time - start_time).seconds)
112 print 'Bots time: %d secs' % total_time
113 # No exit_code for the sharding step: the individual _PrintResults step
114 # will return the corresponding exit_code.
115 return 0
116
117
118 def _PrintResults(step_name):
tonyg 2012/12/04 23:57:56 _PrintStepOutput?
bulach 2012/12/07 13:53:00 Done.
119 file_name = os.path.join(_OUTPUT_DIR, step_name)
120 if not os.path.exists(file_name):
121 print 'File not found ', file_name
122 return 1
123 with file(file_name, 'r') as f:
124 result = pickle.loads(f.read())
125 print result['output']
tonyg 2012/12/04 23:57:56 The buildbot output colors stdout in black and std
bulach 2012/12/07 13:53:00 oh, that's a good point.. we use pexpect.run, in l
126 return result['exit_code']
127
128
129 def _KillPendingServers():
130 for retry in range(5):
131 for server in ['lighttpd', 'web-page-replay']:
132 pids = cmd_helper.GetCmdOutput(['pgrep', '-f', server])
133 pids = [pid.strip() for pid in pids.split('\n') if pid.strip()]
134 for pid in pids:
135 try:
136 logging.warning('Killing %s %s', server, pid)
137 os.kill(int(pid), signal.SIGQUIT)
138 except Exception as e:
139 logging.warning('Failed killing %s %s %s', server, pid, e)
140
141
142 def main(argv):
143 parser = optparse.OptionParser()
144 parser.add_option('-s', '--steps',
145 help='A JSON file containing all the steps to be '
146 'sharded.')
147 parser.add_option('-p', '--print_results',
148 help='Only prints the results for the previously '
149 'executed step, do not run it again.')
150 test_options_parser.AddBuildTypeOption(parser)
tonyg 2012/12/04 23:57:56 This looks unused
bulach 2012/12/07 13:53:00 Done.
151 options, urls = parser.parse_args(argv)
152 if options.print_results:
153 return _PrintResults(options.print_results)
154
155 # At this point, we should kill everything that may have been left over from
156 # previous runs.
157 _KillPendingServers()
158
159 # Reset the test port allocation. It's important to do it before starting
160 # to dispatch any step.
161 if not ports.ResetTestServerPortAllocation():
162 raise Exception('Failed to reset test server port.')
163
164 # Sort the devices so that we'll try to always run a step in the same device.
165 devices = sorted(android_commands.GetAttachedDevices())
166 if not devices:
167 print 'You must attach a device'
168 return 1
169
170 with file(options.steps, 'r') as f:
171 steps = json.load(f)
172 return _RunShardedSteps(steps, devices)
173
174
175 if __name__ == '__main__':
176 sys.exit(main(sys.argv))
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698