Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(228)

Side by Side Diff: build/android/pylib/base/dispatch.py

Issue 18770008: [Android] Redesigns the sharder to allow replicated vs distributed tests (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Adds tagging of tests (for replication) Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright (c) 2013 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 """Implements test sharding logic.""" 5 """Implements test sharding logic."""
frankf 2013/07/16 00:02:18 rename to test_dispatcher.py
gkanwar 2013/07/16 00:47:03 Done.
6 6
7 import logging 7 import logging
8 import threading 8 import threading
9 9
10 from pylib import android_commands 10 from pylib import android_commands
11 from pylib import constants 11 from pylib import constants
12 from pylib import forwarder 12 from pylib import forwarder
13 from pylib.utils import reraiser_thread 13 from pylib.utils import reraiser_thread
14 from pylib.utils import watchdog_timer 14 from pylib.utils import watchdog_timer
15 15
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after
111 def __iter__(self): 111 def __iter__(self):
112 """Iterate through tests in the collection until all have been handled.""" 112 """Iterate through tests in the collection until all have been handled."""
113 while True: 113 while True:
114 r = self._pop() 114 r = self._pop()
115 if r is None: 115 if r is None:
116 break 116 break
117 yield r 117 yield r
118 118
119 119
120 def _RunTestsFromQueue(runner, test_collection, out_results, watcher, 120 def _RunTestsFromQueue(runner, test_collection, out_results, watcher,
121 num_retries): 121 num_retries, tag_results_with_device=False):
122 """Runs tests from the test_collection until empty using the given runner. 122 """Runs tests from the test_collection until empty using the given runner.
123 123
124 Adds TestRunResults objects to the out_results list and may add tests to the 124 Adds TestRunResults objects to the out_results list and may add tests to the
125 out_retry list. 125 out_retry list.
126 126
127 Args: 127 Args:
128 runner: A TestRunner object used to run the tests. 128 runner: A TestRunner object used to run the tests.
129 test_collection: A _TestCollection from which to get _Test objects to run. 129 test_collection: A _TestCollection from which to get _Test objects to run.
130 out_results: A list to add TestRunResults to. 130 out_results: A list to add TestRunResults to.
131 watcher: A watchdog_timer.WatchdogTimer object, used as a shared timeout. 131 watcher: A watchdog_timer.WatchdogTimer object, used as a shared timeout.
132 num_retries: Number of retries for a test. 132 num_retries: Number of retries for a test.
133 tag_results_with_device: If True, appends the name of the device on which
134 the test was run to the test name. Used by ReplicateAndRunTests to
135 identify which device ran each copy of the test, and to ensure each copy
136 of the test is recorded separately.
133 """ 137 """
138
139 # Used to tag all results to identify which device caused failing tests
140 def TagTestRunResults(test_run_results):
141 new_test_run_results = base_test_result.TestRunResults()
142 for test_result in test_run_results.GetAll():
143 new_result = base_test_result.BaseTestResult(
144 '%s_%s' % (runner.device, test_result.GetName()),
145 test_result.GetType(), test_result.GetLog())
146 new_test_run_results.AddResult(new_result)
147 return new_test_run_results
148
134 for test in test_collection: 149 for test in test_collection:
135 watcher.Reset() 150 watcher.Reset()
136 try: 151 try:
137 if not android_commands.IsDeviceAttached(runner.device): 152 if not android_commands.IsDeviceAttached(runner.device):
138 # Device is unresponsive, stop handling tests on this device. 153 # Device is unresponsive, stop handling tests on this device.
139 msg = 'Device %s is unresponsive.' % runner.device 154 msg = 'Device %s is unresponsive.' % runner.device
140 logging.warning(msg) 155 logging.warning(msg)
141 raise android_commands.errors.DeviceUnresponsiveError(msg) 156 raise android_commands.errors.DeviceUnresponsiveError(msg)
142 result, retry = runner.RunTest(test.test) 157 result, retry = runner.RunTest(test.test)
158 if tag_results_with_device:
159 result = TagTestRunResults(result)
143 test.tries += 1 160 test.tries += 1
144 if retry and test.tries <= num_retries: 161 if retry and test.tries <= num_retries:
145 # Retry non-passing results, only record passing results. 162 # Retry non-passing results, only record passing results.
146 pass_results = base_test_result.TestRunResults() 163 pass_results = base_test_result.TestRunResults()
164 # Tag all results with the device, so we can identify the failing device
165 # for replicated tests.
147 pass_results.AddResults(result.GetPass()) 166 pass_results.AddResults(result.GetPass())
148 out_results.append(pass_results) 167 out_results.append(pass_results)
149 logging.warning('Will retry test, try #%s.' % test.tries) 168 logging.warning('Will retry test, try #%s.' % test.tries)
150 test_collection.add(_Test(test=retry, tries=test.tries)) 169 test_collection.add(_Test(test=retry, tries=test.tries))
151 else: 170 else:
152 # All tests passed or retry limit reached. Either way, record results. 171 # All tests passed or retry limit reached. Either way, record results.
153 out_results.append(result) 172 out_results.append(result)
154 except: 173 except:
155 # An unhandleable exception, ensure tests get run by another device and 174 # An unhandleable exception, ensure tests get run by another device and
156 # reraise this exception on the main thread. 175 # reraise this exception on the main thread.
(...skipping 20 matching lines...) Expand all
177 try: 196 try:
178 index = threadsafe_counter.GetAndIncrement() 197 index = threadsafe_counter.GetAndIncrement()
179 logging.warning('Creating shard %s for device %s.', index, device) 198 logging.warning('Creating shard %s for device %s.', index, device)
180 runner = runner_factory(device, index) 199 runner = runner_factory(device, index)
181 runner.SetUp() 200 runner.SetUp()
182 out_runners.append(runner) 201 out_runners.append(runner)
183 except android_commands.errors.DeviceUnresponsiveError as e: 202 except android_commands.errors.DeviceUnresponsiveError as e:
184 logging.warning('Failed to create shard for %s: [%s]', device, e) 203 logging.warning('Failed to create shard for %s: [%s]', device, e)
185 204
186 205
187 def _RunAllTests(runners, tests, num_retries, timeout=None): 206 def _RunAllTests(runners, test_collection_factory, num_retries, timeout=None,
207 tag_results_with_device=False):
188 """Run all tests using the given TestRunners. 208 """Run all tests using the given TestRunners.
189 209
190 Args: 210 Args:
191 runners: a list of TestRunner objects. 211 runners: a list of TestRunner objects.
192 tests: a list of Tests to run using the given TestRunners. 212 test_collection_factory: a callable to generate a _TestCollection object for
213 each test runner.
193 num_retries: number of retries for a test. 214 num_retries: number of retries for a test.
194 timeout: watchdog timeout in seconds, defaults to the default timeout. 215 timeout: watchdog timeout in seconds, defaults to the default timeout.
216 tag_results_with_device: If True, appends the name of the device on which
217 the test was run to the test name. Used by ReplicateAndRunTests to
218 identify which device ran each copy of the test, and to ensure each copy
219 of the test is recorded separately.
195 220
196 Returns: 221 Returns:
197 A tuple of (TestRunResults object, exit code) 222 A tuple of (TestRunResults object, exit code)
198 """ 223 """
199 logging.warning('Running %s tests with %s test runners.' % 224 logging.warning('Running tests with %s test runners.' % (len(runners)))
200 (len(tests), len(runners)))
201 tests_collection = _TestCollection([_Test(t) for t in tests])
202 results = [] 225 results = []
203 exit_code = 0 226 exit_code = 0
204 watcher = watchdog_timer.WatchdogTimer(timeout) 227 watcher = watchdog_timer.WatchdogTimer(timeout)
228
205 workers = reraiser_thread.ReraiserThreadGroup( 229 workers = reraiser_thread.ReraiserThreadGroup(
206 [reraiser_thread.ReraiserThread( 230 [reraiser_thread.ReraiserThread(
207 _RunTestsFromQueue, 231 _RunTestsFromQueue,
208 [r, tests_collection, results, watcher, num_retries], 232 [r, test_collection_factory(), results, watcher, num_retries,
233 tag_results_with_device],
209 name=r.device[-4:]) 234 name=r.device[-4:])
210 for r in runners]) 235 for r in runners])
211 run_results = base_test_result.TestRunResults() 236 run_results = base_test_result.TestRunResults()
212 workers.StartAll() 237 workers.StartAll()
213 238
214 # Catch DeviceUnresponsiveErrors and set a warning exit code 239 # Catch DeviceUnresponsiveErrors and set a warning exit code
215 try: 240 try:
216 workers.JoinAll(watcher) 241 workers.JoinAll(watcher)
217 except android_commands.errors.DeviceUnresponsiveError as e: 242 except android_commands.errors.DeviceUnresponsiveError as e:
218 logging.error(e) 243 logging.error(e)
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
260 runners: a list of TestRunner objects. 285 runners: a list of TestRunner objects.
261 timeout: watchdog timeout in seconds, defaults to the default timeout. 286 timeout: watchdog timeout in seconds, defaults to the default timeout.
262 """ 287 """
263 threads = reraiser_thread.ReraiserThreadGroup( 288 threads = reraiser_thread.ReraiserThreadGroup(
264 [reraiser_thread.ReraiserThread(r.TearDown, name=r.device[-4:]) 289 [reraiser_thread.ReraiserThread(r.TearDown, name=r.device[-4:])
265 for r in runners]) 290 for r in runners])
266 threads.StartAll() 291 threads.StartAll()
267 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout)) 292 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout))
268 293
269 294
270 def ShardAndRunTests(runner_factory, devices, tests, build_type='Debug', 295
271 test_timeout=DEFAULT_TIMEOUT, 296 def _GetAttachedDevices(wait_for_debugger=False, test_device=None):
272 setup_timeout=DEFAULT_TIMEOUT, 297 """Get all attached devices.
273 num_retries=2): 298
274 """Run all tests on attached devices, retrying tests that don't pass. 299 If we are using a debugger, limit to only one device.
275 300
276 Args: 301 Args:
277 runner_factory: callable that takes a device and index and returns a 302 wait_for_debugger: True if this run will use a debugger.
278 TestRunner object. 303 test_device: name of a specific device to use.
279 devices: list of attached device serial numbers as strings. 304
280 tests: list of tests to run. 305 Returns:
281 build_type: either 'Debug' or 'Release'. 306 A list of attached devices.
282 test_timeout: watchdog timeout in seconds for running tests, defaults to the 307 """
283 default timeout. 308 attached_devices = []
284 setup_timeout: watchdog timeout in seconds for creating and cleaning up 309
285 test runners, defaults to the default timeout. 310 attached_devices = android_commands.GetAttachedDevices()
286 num_retries: number of retries for a test. 311 if test_device:
312 assert test_device in attached_devices
313 attached_devices = [test_device]
314
315 if len(attached_devices) > 1 and wait_for_debugger:
316 logging.warning('Debugger can not be sharded, using first available device')
317 attached_devices = attached_devices[:1]
318
319 return attached_devices
320
321
322 def ReplicateAndRunTests(tests, wait_for_debugger, test_device,
323 *args, **kwargs):
324 """Replicates the tests for each device, so all devices run every test.
325
326 Args:
327 tests: A list of tests to run.
328 wait_for_debugger: True if this test is using a debugger.
329 test_device: A specific device to run tests on, or None.
330 *args, **kwargs: Args and kwargs to RunTests which we pass through.
287 331
288 Returns: 332 Returns:
289 A tuple of (base_test_result.TestRunResults object, exit code). 333 A tuple of (base_test_result.TestRunResults object, exit code).
290 """ 334 """
335
291 if not tests: 336 if not tests:
292 logging.error('No tests to run.') 337 logging.error('No tests to run.')
293 return (base_test_result.TestRunResults(), constants.ERROR_EXIT_CODE) 338 return (base_test_result.TestRunResults(), constants.ERROR_EXIT_CODE)
294 339
295 logging.info('Will run %d tests: %s', len(tests), str(tests)) 340 logging.info('Will run %d tests: %s', len(tests), str(tests))
341
342 # Genereate a unique _TestCollection object for each test runner, but use
343 # the same set of tests.
344 TestCollectionFactory = lambda: _TestCollection([_Test(t) for t in tests])
345
346 devices = _GetAttachedDevices(wait_for_debugger, test_device)
347 return _RunTests(TestCollectionFactory, devices, *args,
348 tag_results_with_device=True, **kwargs)
349
350
351 def ShardAndRunTests(tests, wait_for_debugger, test_device, *args, **kwargs):
352 """Distrbutes all tests over devices through a shared pool of tests.
frankf 2013/07/16 00:02:18 It's sufficient to say "Shards tests over devices"
gkanwar 2013/07/16 00:47:03 Done.
353
354 Args:
355 tests: A list of tests to run.
356 wait_for_debugger: True if this test is using a debugger.
357 test_device: A specific device to run tests on, or None.
358 *args, **kwargs: Args and kwargs to _RunTests which we pass through.
359
360 Returns:
361 A tuple of (base_test_result.TestRunResults object, exit code).
362 """
363
364 if not tests:
frankf 2013/07/16 00:02:18 There's a lot duplication between these two method
gkanwar 2013/07/16 00:47:03 I ended up combining the two methods back together
365 logging.error('No tests to run.')
366 return (base_test_result.TestRunResults(), constants.ERROR_EXIT_CODE)
367
368 logging.info('Will run %d tests: %s', len(tests), str(tests))
369
370 # Genereate a shared _TestCollection object for all test runners, so they draw
371 # from a common pool of tests.
372 shared_test_collection = _TestCollection([_Test(t) for t in tests])
373 TestCollectionFactory = lambda: shared_test_collection
374
375 devices = _GetAttachedDevices(wait_for_debugger, test_device)
376 return _RunTests(TestCollectionFactory, devices, *args,
377 tag_results_with_device=False, **kwargs)
378
379
380 def _RunTests(test_collection_factory, devices, runner_factory,
frankf 2013/07/16 00:02:18 It's convention to move callee above caller
gkanwar 2013/07/16 00:47:03 Done.
381 build_type='Debug',
382 test_timeout=DEFAULT_TIMEOUT,
383 setup_timeout=DEFAULT_TIMEOUT,
384 num_retries=2,
385 tag_results_with_device=False):
386 """Run all tests on attached devices, retrying tests that don't pass.
387
388 Args:
389 test_collection_factory: callable that is used to generate a _TestCollection
390 object for each test runner.
391 devices: list of attached device serial numbers as strings.
392 build_type: either 'Debug' or 'Release'.
393 runner_factory: callable that takes a device and index and returns a
394 TestRunner object.
395 test_timeout: watchdog timeout in seconds for running tests, defaults to the
396 default timeout.
397 setup_timeout: watchdog timeout in seconds for creating and cleaning up
398 test runners, defaults to the default timeout.
399 num_retries: number of retries for a test.
400 tag_results_with_device: If True, appends the name of the device on which
401 the test was run to the test name. Used by ReplicateAndRunTests to
402 identify which device ran each copy of the test, and to ensure each copy
403 of the test is recorded separately.
404
405 Returns:
406 A tuple of (base_test_result.TestRunResults object, exit code).
407 """
296 forwarder.Forwarder.KillHost(build_type) 408 forwarder.Forwarder.KillHost(build_type)
297 runners = _CreateRunners(runner_factory, devices, setup_timeout) 409 runners = _CreateRunners(runner_factory, devices, setup_timeout)
298 try: 410 try:
299 return _RunAllTests(runners, tests, num_retries, test_timeout) 411 return _RunAllTests(runners, test_collection_factory,
412 num_retries, test_timeout, tag_results_with_device)
300 finally: 413 finally:
301 try: 414 try:
302 _TearDownRunners(runners, setup_timeout) 415 _TearDownRunners(runners, setup_timeout)
303 except android_commands.errors.DeviceUnresponsiveError as e: 416 except android_commands.errors.DeviceUnresponsiveError as e:
304 logging.warning('Device unresponsive during TearDown: [%s]', e) 417 logging.warning('Device unresponsive during TearDown: [%s]', e)
305 finally: 418 finally:
306 forwarder.Forwarder.KillHost(build_type) 419 forwarder.Forwarder.KillHost(build_type)
OLDNEW
« no previous file with comments | « no previous file | build/android/pylib/base/dispatch_unittest.py » ('j') | build/android/pylib/browsertests/setup.py » ('J')

Powered by Google App Engine
This is Rietveld 408576698