Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(133)

Side by Side Diff: build/android/pylib/base/test_dispatcher.py

Issue 18770008: [Android] Redesigns the sharder to allow replicated vs distributed tests (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Small fixes to formatting Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright (c) 2013 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 """Implements test sharding logic.""" 5 """Dispatches tests, either sharding or replicating them."""
frankf 2013/07/17 04:07:20 Expand this a little about. Include the fact, this
gkanwar 2013/07/17 20:31:26 Done.
6 6
7 import logging 7 import logging
8 import threading 8 import threading
9 9
10 from pylib import android_commands 10 from pylib import android_commands
11 from pylib import constants 11 from pylib import constants
12 from pylib import forwarder 12 from pylib import forwarder
13 from pylib.utils import reraiser_thread 13 from pylib.utils import reraiser_thread
14 from pylib.utils import watchdog_timer 14 from pylib.utils import watchdog_timer
15 15
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after
111 def __iter__(self): 111 def __iter__(self):
112 """Iterate through tests in the collection until all have been handled.""" 112 """Iterate through tests in the collection until all have been handled."""
113 while True: 113 while True:
114 r = self._pop() 114 r = self._pop()
115 if r is None: 115 if r is None:
116 break 116 break
117 yield r 117 yield r
118 118
119 119
120 def _RunTestsFromQueue(runner, test_collection, out_results, watcher, 120 def _RunTestsFromQueue(runner, test_collection, out_results, watcher,
121 num_retries): 121 num_retries, tag_results_with_device=False):
122 """Runs tests from the test_collection until empty using the given runner. 122 """Runs tests from the test_collection until empty using the given runner.
123 123
124 Adds TestRunResults objects to the out_results list and may add tests to the 124 Adds TestRunResults objects to the out_results list and may add tests to the
125 out_retry list. 125 out_retry list.
126 126
127 Args: 127 Args:
128 runner: A TestRunner object used to run the tests. 128 runner: A TestRunner object used to run the tests.
129 test_collection: A _TestCollection from which to get _Test objects to run. 129 test_collection: A _TestCollection from which to get _Test objects to run.
130 out_results: A list to add TestRunResults to. 130 out_results: A list to add TestRunResults to.
131 watcher: A watchdog_timer.WatchdogTimer object, used as a shared timeout. 131 watcher: A watchdog_timer.WatchdogTimer object, used as a shared timeout.
132 num_retries: Number of retries for a test. 132 num_retries: Number of retries for a test.
133 tag_results_with_device: If True, appends the name of the device on which
134 the test was run to the test name. Used by ReplicateAndRunTests to
135 identify which device ran each copy of the test, and to ensure each copy
136 of the test is recorded separately.
133 """ 137 """
138
139 # Used to tag all results to identify which device caused failing tests
frankf 2013/07/17 04:07:20 This is misleading. This also tags passing tests,
gkanwar 2013/07/17 20:31:26 Done.
140 def TagTestRunResults(test_run_results):
frankf 2013/07/17 04:07:20 Move comment here.
gkanwar 2013/07/17 20:31:26 Done.
141 new_test_run_results = base_test_result.TestRunResults()
142 for test_result in test_run_results.GetAll():
143 test_result.SetTag(runner.device[-4:])
144 new_test_run_results.AddResult(test_result)
145 return new_test_run_results
146
134 for test in test_collection: 147 for test in test_collection:
135 watcher.Reset() 148 watcher.Reset()
136 try: 149 try:
137 if not android_commands.IsDeviceAttached(runner.device): 150 if not android_commands.IsDeviceAttached(runner.device):
138 # Device is unresponsive, stop handling tests on this device. 151 # Device is unresponsive, stop handling tests on this device.
139 msg = 'Device %s is unresponsive.' % runner.device 152 msg = 'Device %s is unresponsive.' % runner.device
140 logging.warning(msg) 153 logging.warning(msg)
141 raise android_commands.errors.DeviceUnresponsiveError(msg) 154 raise android_commands.errors.DeviceUnresponsiveError(msg)
142 result, retry = runner.RunTest(test.test) 155 result, retry = runner.RunTest(test.test)
156 if tag_results_with_device:
157 result = TagTestRunResults(result)
143 test.tries += 1 158 test.tries += 1
144 if retry and test.tries <= num_retries: 159 if retry and test.tries <= num_retries:
145 # Retry non-passing results, only record passing results. 160 # Retry non-passing results, only record passing results.
146 pass_results = base_test_result.TestRunResults() 161 pass_results = base_test_result.TestRunResults()
147 pass_results.AddResults(result.GetPass()) 162 pass_results.AddResults(result.GetPass())
148 out_results.append(pass_results) 163 out_results.append(pass_results)
149 logging.warning('Will retry test, try #%s.' % test.tries) 164 logging.warning('Will retry test, try #%s.' % test.tries)
150 test_collection.add(_Test(test=retry, tries=test.tries)) 165 test_collection.add(_Test(test=retry, tries=test.tries))
151 else: 166 else:
152 # All tests passed or retry limit reached. Either way, record results. 167 # All tests passed or retry limit reached. Either way, record results.
(...skipping 24 matching lines...) Expand all
177 try: 192 try:
178 index = threadsafe_counter.GetAndIncrement() 193 index = threadsafe_counter.GetAndIncrement()
179 logging.warning('Creating shard %s for device %s.', index, device) 194 logging.warning('Creating shard %s for device %s.', index, device)
180 runner = runner_factory(device, index) 195 runner = runner_factory(device, index)
181 runner.SetUp() 196 runner.SetUp()
182 out_runners.append(runner) 197 out_runners.append(runner)
183 except android_commands.errors.DeviceUnresponsiveError as e: 198 except android_commands.errors.DeviceUnresponsiveError as e:
184 logging.warning('Failed to create shard for %s: [%s]', device, e) 199 logging.warning('Failed to create shard for %s: [%s]', device, e)
185 200
186 201
187 def _RunAllTests(runners, tests, num_retries, timeout=None): 202 def _RunAllTests(runners, test_collection_factory, num_retries, timeout=None,
203 tag_results_with_device=False):
188 """Run all tests using the given TestRunners. 204 """Run all tests using the given TestRunners.
189 205
190 Args: 206 Args:
191 runners: a list of TestRunner objects. 207 runners: a list of TestRunner objects.
192 tests: a list of Tests to run using the given TestRunners. 208 test_collection_factory: a callable to generate a _TestCollection object for
209 each test runner.
193 num_retries: number of retries for a test. 210 num_retries: number of retries for a test.
194 timeout: watchdog timeout in seconds, defaults to the default timeout. 211 timeout: watchdog timeout in seconds, defaults to the default timeout.
212 tag_results_with_device: If True, appends the name of the device on which
213 the test was run to the test name. Used by ReplicateAndRunTests to
frankf 2013/07/17 04:07:20 Please address all comments. This needs to be upda
gkanwar 2013/07/17 20:31:26 Done.
214 identify which device ran each copy of the test, and to ensure each copy
215 of the test is recorded separately.
195 216
196 Returns: 217 Returns:
197 A tuple of (TestRunResults object, exit code) 218 A tuple of (TestRunResults object, exit code)
198 """ 219 """
199 logging.warning('Running %s tests with %s test runners.' % 220 logging.warning('Running tests with %s test runners.' % (len(runners)))
200 (len(tests), len(runners)))
201 tests_collection = _TestCollection([_Test(t) for t in tests])
202 results = [] 221 results = []
203 exit_code = 0 222 exit_code = 0
204 watcher = watchdog_timer.WatchdogTimer(timeout) 223 watcher = watchdog_timer.WatchdogTimer(timeout)
224
205 workers = reraiser_thread.ReraiserThreadGroup( 225 workers = reraiser_thread.ReraiserThreadGroup(
206 [reraiser_thread.ReraiserThread( 226 [reraiser_thread.ReraiserThread(
207 _RunTestsFromQueue, 227 _RunTestsFromQueue,
208 [r, tests_collection, results, watcher, num_retries], 228 [r, test_collection_factory(), results, watcher, num_retries,
229 tag_results_with_device],
209 name=r.device[-4:]) 230 name=r.device[-4:])
210 for r in runners]) 231 for r in runners])
211 run_results = base_test_result.TestRunResults() 232 run_results = base_test_result.TestRunResults()
212 workers.StartAll() 233 workers.StartAll()
213 234
214 # Catch DeviceUnresponsiveErrors and set a warning exit code 235 # Catch DeviceUnresponsiveErrors and set a warning exit code
215 try: 236 try:
216 workers.JoinAll(watcher) 237 workers.JoinAll(watcher)
217 except android_commands.errors.DeviceUnresponsiveError as e: 238 except android_commands.errors.DeviceUnresponsiveError as e:
218 logging.error(e) 239 logging.error(e)
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
260 runners: a list of TestRunner objects. 281 runners: a list of TestRunner objects.
261 timeout: watchdog timeout in seconds, defaults to the default timeout. 282 timeout: watchdog timeout in seconds, defaults to the default timeout.
262 """ 283 """
263 threads = reraiser_thread.ReraiserThreadGroup( 284 threads = reraiser_thread.ReraiserThreadGroup(
264 [reraiser_thread.ReraiserThread(r.TearDown, name=r.device[-4:]) 285 [reraiser_thread.ReraiserThread(r.TearDown, name=r.device[-4:])
265 for r in runners]) 286 for r in runners])
266 threads.StartAll() 287 threads.StartAll()
267 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout)) 288 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout))
268 289
269 290
270 def ShardAndRunTests(runner_factory, devices, tests, build_type='Debug', 291
271 test_timeout=DEFAULT_TIMEOUT, 292 def _GetAttachedDevices(wait_for_debugger=False, test_device=None):
272 setup_timeout=DEFAULT_TIMEOUT, 293 """Get all attached devices.
273 num_retries=2): 294
295 If we are using a debugger, limit to only one device.
296
297 Args:
298 wait_for_debugger: True if this run will use a debugger.
299 test_device: name of a specific device to use.
300
301 Returns:
302 A list of attached devices.
303 """
304 attached_devices = []
305
306 attached_devices = android_commands.GetAttachedDevices()
307 if test_device:
308 assert (test_device in attached_devices,
309 'Did not find device %s among attached device. Attached devices: %s'
310 % (test_device, ', '.join(attached_devices)))
311 attached_devices = [test_device]
312
313 if len(attached_devices) > 1 and wait_for_debugger:
314 logging.warning('Debugger can not be sharded, using first available device')
315 attached_devices = attached_devices[:1]
316
317 return attached_devices
318
319
320 def RunTests(tests, runner_factory, wait_for_debugger, test_device, shard,
321 build_type='Debug',
322 test_timeout=DEFAULT_TIMEOUT,
323 setup_timeout=DEFAULT_TIMEOUT,
324 num_retries=2):
274 """Run all tests on attached devices, retrying tests that don't pass. 325 """Run all tests on attached devices, retrying tests that don't pass.
275 326
276 Args: 327 Args:
277 runner_factory: callable that takes a device and index and returns a 328 tests: List of tests to run.
278 TestRunner object. 329 runner_factory: Callable that takes a device and index and returns a
279 devices: list of attached device serial numbers as strings. 330 TestRunner object.
frankf 2013/07/17 04:07:20 I think we use 2 spaces for indentation or align i
gkanwar 2013/07/17 20:31:26 According to go/pyguide it's 4 spaces: http://www.
frankf 2013/07/17 21:08:41 We diverge from the style guide for somethings. Le
280 tests: list of tests to run. 331 wait_for_debugger: True if this test is using a debugger.
281 build_type: either 'Debug' or 'Release'. 332 test_device: A specific device to run tests on, or None.
282 test_timeout: watchdog timeout in seconds for running tests, defaults to the 333 shard: True if we should shard, False if we should replicate tests.
frankf 2013/07/17 04:07:20 Expand this to define replicate.
gkanwar 2013/07/17 20:31:26 Done.
283 default timeout. 334 build_type: Either 'Debug' or 'Release'.
284 setup_timeout: watchdog timeout in seconds for creating and cleaning up 335 test_timeout: Watchdog timeout in seconds for running tests, defaults to the
frankf 2013/07/17 04:07:20 Remove "defaults to default timeout".
gkanwar 2013/07/17 20:31:26 Done.
285 test runners, defaults to the default timeout. 336 default timeout.
286 num_retries: number of retries for a test. 337 setup_timeout: Watchdog timeout in seconds for creating and cleaning up
338 test runners, defaults to the default timeout.
339 num_retries: Number of retries for a test.
287 340
288 Returns: 341 Returns:
289 A tuple of (base_test_result.TestRunResults object, exit code). 342 A tuple of (base_test_result.TestRunResults object, exit code).
290 """ 343 """
291 if not tests: 344 if not tests:
292 logging.error('No tests to run.') 345 logging.error('No tests to run.')
293 return (base_test_result.TestRunResults(), constants.ERROR_EXIT_CODE) 346 return (base_test_result.TestRunResults(), constants.ERROR_EXIT_CODE)
294 347
348 if shard:
349 # Generate a shared _TestCollection object for all test runners, so they
350 # draw from a common pool of tests.
351 shared_test_collection = _TestCollection([_Test(t) for t in tests])
352 test_collection_factory = lambda: shared_test_collection
353 tag_results_with_device = False
354 else:
355 # Generate a unique _TestCollection object for each test runner, but use
356 # the same set of tests.
357 test_collection_factory = lambda: _TestCollection([_Test(t) for t in tests])
358 tag_results_with_device = True
359
360 devices = _GetAttachedDevices(wait_for_debugger, test_device)
361
295 logging.info('Will run %d tests: %s', len(tests), str(tests)) 362 logging.info('Will run %d tests: %s', len(tests), str(tests))
363
296 forwarder.Forwarder.KillHost(build_type) 364 forwarder.Forwarder.KillHost(build_type)
297 runners = _CreateRunners(runner_factory, devices, setup_timeout) 365 runners = _CreateRunners(runner_factory, devices, setup_timeout)
298 try: 366 try:
299 return _RunAllTests(runners, tests, num_retries, test_timeout) 367 return _RunAllTests(runners, test_collection_factory,
368 num_retries, test_timeout, tag_results_with_device)
300 finally: 369 finally:
301 try: 370 try:
302 _TearDownRunners(runners, setup_timeout) 371 _TearDownRunners(runners, setup_timeout)
303 except android_commands.errors.DeviceUnresponsiveError as e: 372 except android_commands.errors.DeviceUnresponsiveError as e:
304 logging.warning('Device unresponsive during TearDown: [%s]', e) 373 logging.warning('Device unresponsive during TearDown: [%s]', e)
305 finally: 374 finally:
306 forwarder.Forwarder.KillHost(build_type) 375 forwarder.Forwarder.KillHost(build_type)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698