build/android/bb_run_sharded_steps.py - Issue 11308344: Telemetry: shard tests on android.

Side by Side Diff: build/android/bb_run_sharded_steps.py

Issue 11308344: Telemetry: shard tests on android. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 8 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 #!/usr/bin/env python

	2 #

	3 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

	4 # Use of this source code is governed by a BSD-style license that can be

	5 # found in the LICENSE file.

	6

	7 """Helper script to shard build bot steps and save results to disk.

	8

	9 Our buildbot infrastructure requires each slave to run steps serially.

	10 This is sub-optimal for android, where these steps can run independently on

	11 multiple connected devices.

	12

	13 The buildbots will run this script multiple times per cycle:

	14 - First, without params: all steps will be executed in parallel using all
	tonyg 2012/12/04 23:57:56 Please explain the JSON steps file format here. Please explain the JSON steps file format here. bulach 2012/12/07 13:53:00 Done. Show quoted text On 2012/12/04 23:57:56, tonyg wrote: > Please explain the JSON steps file format here. Done.
	15 connected devices. Step results will be pickled to disk (each step has a unique

	16 name).

	17 The buildbot will treat this step as a regular step, and will not process any

	18 graph data.

	19

	20 - Then, with -p STEP_NAME: at this stage, we'll simply print the file with the

	21 step results previously saved. The buildbot will then process the graph data

	22 accordingly.

	23 """

	24

	25

	26 import datetime

	27 import json

	28 import logging

	29 import multiprocessing

	30 import optparse

	31 import pexpect

	32 import pickle

	33 import os

	34 import signal

	35 import shutil

	36 import sys

	37

	38 from pylib import android_commands

	39 from pylib import cmd_helper

	40 from pylib import constants

	41 from pylib import ports

	42 from pylib import test_options_parser

	43

	44 _OUTPUT_DIR = os.path.join(constants.CHROME_DIR, 'out', 'step_results')

	45

	46

	47 def _SaveResult(result):

	48 with file(os.path.join(_OUTPUT_DIR, result['name']), 'w') as f:

	49 f.write(pickle.dumps(result))

	50

	51

	52 def _RunStepsPerDevice(steps):

	53 results = []

	54 for step in steps:

	55 start_time = datetime.datetime.now()

	56 print 'Starting %s: %s %s at %s' % (step['name'], step['cmd'],

	57 start_time, step['device'])

	58 output, exit_code = pexpect.run(

	59 step['cmd'], cwd=os.path.abspath(constants.CHROME_DIR),

	60 withexitstatus=True, logfile=sys.stdout, timeout=1800,

	61 env=os.environ)

	62 end_time = datetime.datetime.now()

	63 print 'Finished %s: %s %s at %s' % (step['name'], step['cmd'],

	64 end_time, step['device'])
	tonyg 2012/12/04 23:57:56 indentation indentation bulach 2012/12/07 13:53:00 Done. Show quoted text On 2012/12/04 23:57:56, tonyg wrote: > indentation Done.
	65 result = {'name': step['name'],

	66 'output': output,

	67 'exit_code': exit_code or 0,

	68 'total_time': (end_time - start_time).seconds,

	69 'device': step['device']}

	70 _SaveResult(result)

	71 results += [result]

	72 return results

	73

	74

	75 def _RunShardedSteps(steps, devices):
	tonyg 2012/12/04 23:57:56 assert steps assert devices assert steps assert devices bulach 2012/12/07 13:53:00 Done. Show quoted text On 2012/12/04 23:57:56, tonyg wrote: > assert steps > assert devices Done.
	76 if os.path.exists(_OUTPUT_DIR):

	77 assert '/step_results' in _OUTPUT_DIR

	78 shutil.rmtree(_OUTPUT_DIR)

	79 if not os.path.exists(_OUTPUT_DIR):

	80 os.makedirs(_OUTPUT_DIR)

	81 step_names = sorted(steps.keys())

	82 all_params = []

	83 num_devices = len(devices)

	84 shard_size = (len(steps) + num_devices - 1) / num_devices

	85 for i in range(num_devices):

	86 device = devices[i]
	tonyg 2012/12/04 23:57:56 above two lines can be collapsed into: for i, devi above two lines can be collapsed into: for i, device in enumerate(devices): bulach 2012/12/07 13:53:00 Done. Show quoted text On 2012/12/04 23:57:56, tonyg wrote: > above two lines can be collapsed into: > for i, device in enumerate(devices): Done.
	87 steps_per_device = []

	88 for s in steps.keys()[i * shard_size:(i + 1) * shard_size]:

	89 steps_per_device += [{'name': s,

	90 'device': device,

	91 'cmd': steps[s] + ' --device ' + device +

	92 ' --keep_test_server_ports'}]
	tonyg 2012/12/04 23:57:56 Telemetry doesn't support --keep_test_server_ports Telemetry doesn't support --keep_test_server_ports, we'll have to add that. bulach 2012/12/07 13:53:00 ahn, very good point! added, and fixed the port al Show quoted text On 2012/12/04 23:57:56, tonyg wrote: > Telemetry doesn't support --keep_test_server_ports, we'll have to add that. ahn, very good point! added, and fixed the port allocation in browser_android.py as well.. note though that I'll only be able to fully test it next week when I'm back home..
	93 all_params += [steps_per_device]

	94 print 'Start sharding (note: output is not synchronized...)'

	95 print '' 80

	96 start_time = datetime.datetime.now()

	97 pool = multiprocessing.Pool(processes=num_devices)

	98 async_results = pool.map_async(_RunStepsPerDevice, all_params)

	99 results_per_device = async_results.get(999999)

	100 end_time = datetime.datetime.now()

	101 print '' 80

	102 print 'Finished sharding.'

	103 print 'Summary'

	104 total_time = 0

	105 for results in results_per_device:

	106 for result in results:

	107 print('%s : exit_code=%d in %d secs at %s' %

	108 (result['name'], result['exit_code'], result['total_time'],

	109 result['device']))

	110 total_time += result['total_time']

	111 print 'Step time: %d secs' % ((end_time - start_time).seconds)

	112 print 'Bots time: %d secs' % total_time

	113 # No exit_code for the sharding step: the individual _PrintResults step

	114 # will return the corresponding exit_code.

	115 return 0

	116

	117

	118 def _PrintResults(step_name):
	tonyg 2012/12/04 23:57:56 _PrintStepOutput? _PrintStepOutput? bulach 2012/12/07 13:53:00 Done. Show quoted text On 2012/12/04 23:57:56, tonyg wrote: > _PrintStepOutput? Done.
	119 file_name = os.path.join(_OUTPUT_DIR, step_name)

	120 if not os.path.exists(file_name):

	121 print 'File not found ', file_name

	122 return 1

	123 with file(file_name, 'r') as f:

	124 result = pickle.loads(f.read())

	125 print result['output']
	tonyg 2012/12/04 23:57:56 The buildbot output colors stdout in black and std The buildbot output colors stdout in black and stderr in red, so it must know the difference at some level. Should we worry about that for now? bulach 2012/12/07 13:53:00 oh, that's a good point.. we use pexpect.run, in l Show quoted text On 2012/12/04 23:57:56, tonyg wrote: > The buildbot output colors stdout in black and stderr in red, so it must know > the difference at some level. Should we worry about that for now? oh, that's a good point.. we use pexpect.run, in line 58. that unfortunately collapses stdout and stderr.. :( we could obviously roll our own "run with timeout and stream output", but I'd rather live with a single color for the time being, wdyt?
	126 return result['exit_code']

	127

	128

	129 def _KillPendingServers():

	130 for retry in range(5):

	131 for server in ['lighttpd', 'web-page-replay']:

	132 pids = cmd_helper.GetCmdOutput(['pgrep', '-f', server])

	133 pids = [pid.strip() for pid in pids.split('\n') if pid.strip()]

	134 for pid in pids:

	135 try:

	136 logging.warning('Killing %s %s', server, pid)

	137 os.kill(int(pid), signal.SIGQUIT)

	138 except Exception as e:

	139 logging.warning('Failed killing %s %s %s', server, pid, e)

	140

	141

	142 def main(argv):

	143 parser = optparse.OptionParser()

	144 parser.add_option('-s', '--steps',

	145 help='A JSON file containing all the steps to be '

	146 'sharded.')

	147 parser.add_option('-p', '--print_results',

	148 help='Only prints the results for the previously '

	149 'executed step, do not run it again.')

	150 test_options_parser.AddBuildTypeOption(parser)
	tonyg 2012/12/04 23:57:56 This looks unused This looks unused bulach 2012/12/07 13:53:00 Done. Show quoted text On 2012/12/04 23:57:56, tonyg wrote: > This looks unused Done.
	151 options, urls = parser.parse_args(argv)

	152 if options.print_results:

	153 return _PrintResults(options.print_results)

	154

	155 # At this point, we should kill everything that may have been left over from

	156 # previous runs.

	157 _KillPendingServers()

	158

	159 # Reset the test port allocation. It's important to do it before starting

	160 # to dispatch any step.

	161 if not ports.ResetTestServerPortAllocation():

	162 raise Exception('Failed to reset test server port.')

	163

	164 # Sort the devices so that we'll try to always run a step in the same device.

	165 devices = sorted(android_commands.GetAttachedDevices())

	166 if not devices:

	167 print 'You must attach a device'

	168 return 1

	169

	170 with file(options.steps, 'r') as f:

	171 steps = json.load(f)

	172 return _RunShardedSteps(steps, devices)

	173

	174

	175 if __name__ == '__main__':

	176 sys.exit(main(sys.argv))

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »