Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """Implements test sharding logic.""" | 5 """Implements test sharding logic.""" |
|
frankf
2013/07/16 00:02:18
rename to test_dispatcher.py
gkanwar
2013/07/16 00:47:03
Done.
| |
| 6 | 6 |
| 7 import logging | 7 import logging |
| 8 import threading | 8 import threading |
| 9 | 9 |
| 10 from pylib import android_commands | 10 from pylib import android_commands |
| 11 from pylib import constants | 11 from pylib import constants |
| 12 from pylib import forwarder | 12 from pylib import forwarder |
| 13 from pylib.utils import reraiser_thread | 13 from pylib.utils import reraiser_thread |
| 14 from pylib.utils import watchdog_timer | 14 from pylib.utils import watchdog_timer |
| 15 | 15 |
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 111 def __iter__(self): | 111 def __iter__(self): |
| 112 """Iterate through tests in the collection until all have been handled.""" | 112 """Iterate through tests in the collection until all have been handled.""" |
| 113 while True: | 113 while True: |
| 114 r = self._pop() | 114 r = self._pop() |
| 115 if r is None: | 115 if r is None: |
| 116 break | 116 break |
| 117 yield r | 117 yield r |
| 118 | 118 |
| 119 | 119 |
| 120 def _RunTestsFromQueue(runner, test_collection, out_results, watcher, | 120 def _RunTestsFromQueue(runner, test_collection, out_results, watcher, |
| 121 num_retries): | 121 num_retries, tag_results_with_device=False): |
| 122 """Runs tests from the test_collection until empty using the given runner. | 122 """Runs tests from the test_collection until empty using the given runner. |
| 123 | 123 |
| 124 Adds TestRunResults objects to the out_results list and may add tests to the | 124 Adds TestRunResults objects to the out_results list and may add tests to the |
| 125 out_retry list. | 125 out_retry list. |
| 126 | 126 |
| 127 Args: | 127 Args: |
| 128 runner: A TestRunner object used to run the tests. | 128 runner: A TestRunner object used to run the tests. |
| 129 test_collection: A _TestCollection from which to get _Test objects to run. | 129 test_collection: A _TestCollection from which to get _Test objects to run. |
| 130 out_results: A list to add TestRunResults to. | 130 out_results: A list to add TestRunResults to. |
| 131 watcher: A watchdog_timer.WatchdogTimer object, used as a shared timeout. | 131 watcher: A watchdog_timer.WatchdogTimer object, used as a shared timeout. |
| 132 num_retries: Number of retries for a test. | 132 num_retries: Number of retries for a test. |
| 133 tag_results_with_device: If True, appends the name of the device on which | |
| 134 the test was run to the test name. Used by ReplicateAndRunTests to | |
| 135 identify which device ran each copy of the test, and to ensure each copy | |
| 136 of the test is recorded separately. | |
| 133 """ | 137 """ |
| 138 | |
| 139 # Used to tag all results to identify which device caused failing tests | |
| 140 def TagTestRunResults(test_run_results): | |
| 141 new_test_run_results = base_test_result.TestRunResults() | |
| 142 for test_result in test_run_results.GetAll(): | |
| 143 new_result = base_test_result.BaseTestResult( | |
| 144 '%s_%s' % (runner.device, test_result.GetName()), | |
| 145 test_result.GetType(), test_result.GetLog()) | |
| 146 new_test_run_results.AddResult(new_result) | |
| 147 return new_test_run_results | |
| 148 | |
| 134 for test in test_collection: | 149 for test in test_collection: |
| 135 watcher.Reset() | 150 watcher.Reset() |
| 136 try: | 151 try: |
| 137 if not android_commands.IsDeviceAttached(runner.device): | 152 if not android_commands.IsDeviceAttached(runner.device): |
| 138 # Device is unresponsive, stop handling tests on this device. | 153 # Device is unresponsive, stop handling tests on this device. |
| 139 msg = 'Device %s is unresponsive.' % runner.device | 154 msg = 'Device %s is unresponsive.' % runner.device |
| 140 logging.warning(msg) | 155 logging.warning(msg) |
| 141 raise android_commands.errors.DeviceUnresponsiveError(msg) | 156 raise android_commands.errors.DeviceUnresponsiveError(msg) |
| 142 result, retry = runner.RunTest(test.test) | 157 result, retry = runner.RunTest(test.test) |
| 158 if tag_results_with_device: | |
| 159 result = TagTestRunResults(result) | |
| 143 test.tries += 1 | 160 test.tries += 1 |
| 144 if retry and test.tries <= num_retries: | 161 if retry and test.tries <= num_retries: |
| 145 # Retry non-passing results, only record passing results. | 162 # Retry non-passing results, only record passing results. |
| 146 pass_results = base_test_result.TestRunResults() | 163 pass_results = base_test_result.TestRunResults() |
| 164 # Tag all results with the device, so we can identify the failing device | |
| 165 # for replicated tests. | |
| 147 pass_results.AddResults(result.GetPass()) | 166 pass_results.AddResults(result.GetPass()) |
| 148 out_results.append(pass_results) | 167 out_results.append(pass_results) |
| 149 logging.warning('Will retry test, try #%s.' % test.tries) | 168 logging.warning('Will retry test, try #%s.' % test.tries) |
| 150 test_collection.add(_Test(test=retry, tries=test.tries)) | 169 test_collection.add(_Test(test=retry, tries=test.tries)) |
| 151 else: | 170 else: |
| 152 # All tests passed or retry limit reached. Either way, record results. | 171 # All tests passed or retry limit reached. Either way, record results. |
| 153 out_results.append(result) | 172 out_results.append(result) |
| 154 except: | 173 except: |
| 155 # An unhandleable exception, ensure tests get run by another device and | 174 # An unhandleable exception, ensure tests get run by another device and |
| 156 # reraise this exception on the main thread. | 175 # reraise this exception on the main thread. |
| (...skipping 20 matching lines...) Expand all Loading... | |
| 177 try: | 196 try: |
| 178 index = threadsafe_counter.GetAndIncrement() | 197 index = threadsafe_counter.GetAndIncrement() |
| 179 logging.warning('Creating shard %s for device %s.', index, device) | 198 logging.warning('Creating shard %s for device %s.', index, device) |
| 180 runner = runner_factory(device, index) | 199 runner = runner_factory(device, index) |
| 181 runner.SetUp() | 200 runner.SetUp() |
| 182 out_runners.append(runner) | 201 out_runners.append(runner) |
| 183 except android_commands.errors.DeviceUnresponsiveError as e: | 202 except android_commands.errors.DeviceUnresponsiveError as e: |
| 184 logging.warning('Failed to create shard for %s: [%s]', device, e) | 203 logging.warning('Failed to create shard for %s: [%s]', device, e) |
| 185 | 204 |
| 186 | 205 |
| 187 def _RunAllTests(runners, tests, num_retries, timeout=None): | 206 def _RunAllTests(runners, test_collection_factory, num_retries, timeout=None, |
| 207 tag_results_with_device=False): | |
| 188 """Run all tests using the given TestRunners. | 208 """Run all tests using the given TestRunners. |
| 189 | 209 |
| 190 Args: | 210 Args: |
| 191 runners: a list of TestRunner objects. | 211 runners: a list of TestRunner objects. |
| 192 tests: a list of Tests to run using the given TestRunners. | 212 test_collection_factory: a callable to generate a _TestCollection object for |
| 213 each test runner. | |
| 193 num_retries: number of retries for a test. | 214 num_retries: number of retries for a test. |
| 194 timeout: watchdog timeout in seconds, defaults to the default timeout. | 215 timeout: watchdog timeout in seconds, defaults to the default timeout. |
| 216 tag_results_with_device: If True, appends the name of the device on which | |
| 217 the test was run to the test name. Used by ReplicateAndRunTests to | |
| 218 identify which device ran each copy of the test, and to ensure each copy | |
| 219 of the test is recorded separately. | |
| 195 | 220 |
| 196 Returns: | 221 Returns: |
| 197 A tuple of (TestRunResults object, exit code) | 222 A tuple of (TestRunResults object, exit code) |
| 198 """ | 223 """ |
| 199 logging.warning('Running %s tests with %s test runners.' % | 224 logging.warning('Running tests with %s test runners.' % (len(runners))) |
| 200 (len(tests), len(runners))) | |
| 201 tests_collection = _TestCollection([_Test(t) for t in tests]) | |
| 202 results = [] | 225 results = [] |
| 203 exit_code = 0 | 226 exit_code = 0 |
| 204 watcher = watchdog_timer.WatchdogTimer(timeout) | 227 watcher = watchdog_timer.WatchdogTimer(timeout) |
| 228 | |
| 205 workers = reraiser_thread.ReraiserThreadGroup( | 229 workers = reraiser_thread.ReraiserThreadGroup( |
| 206 [reraiser_thread.ReraiserThread( | 230 [reraiser_thread.ReraiserThread( |
| 207 _RunTestsFromQueue, | 231 _RunTestsFromQueue, |
| 208 [r, tests_collection, results, watcher, num_retries], | 232 [r, test_collection_factory(), results, watcher, num_retries, |
| 233 tag_results_with_device], | |
| 209 name=r.device[-4:]) | 234 name=r.device[-4:]) |
| 210 for r in runners]) | 235 for r in runners]) |
| 211 run_results = base_test_result.TestRunResults() | 236 run_results = base_test_result.TestRunResults() |
| 212 workers.StartAll() | 237 workers.StartAll() |
| 213 | 238 |
| 214 # Catch DeviceUnresponsiveErrors and set a warning exit code | 239 # Catch DeviceUnresponsiveErrors and set a warning exit code |
| 215 try: | 240 try: |
| 216 workers.JoinAll(watcher) | 241 workers.JoinAll(watcher) |
| 217 except android_commands.errors.DeviceUnresponsiveError as e: | 242 except android_commands.errors.DeviceUnresponsiveError as e: |
| 218 logging.error(e) | 243 logging.error(e) |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 260 runners: a list of TestRunner objects. | 285 runners: a list of TestRunner objects. |
| 261 timeout: watchdog timeout in seconds, defaults to the default timeout. | 286 timeout: watchdog timeout in seconds, defaults to the default timeout. |
| 262 """ | 287 """ |
| 263 threads = reraiser_thread.ReraiserThreadGroup( | 288 threads = reraiser_thread.ReraiserThreadGroup( |
| 264 [reraiser_thread.ReraiserThread(r.TearDown, name=r.device[-4:]) | 289 [reraiser_thread.ReraiserThread(r.TearDown, name=r.device[-4:]) |
| 265 for r in runners]) | 290 for r in runners]) |
| 266 threads.StartAll() | 291 threads.StartAll() |
| 267 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout)) | 292 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout)) |
| 268 | 293 |
| 269 | 294 |
| 270 def ShardAndRunTests(runner_factory, devices, tests, build_type='Debug', | 295 |
| 271 test_timeout=DEFAULT_TIMEOUT, | 296 def _GetAttachedDevices(wait_for_debugger=False, test_device=None): |
| 272 setup_timeout=DEFAULT_TIMEOUT, | 297 """Get all attached devices. |
| 273 num_retries=2): | 298 |
| 274 """Run all tests on attached devices, retrying tests that don't pass. | 299 If we are using a debugger, limit to only one device. |
| 275 | 300 |
| 276 Args: | 301 Args: |
| 277 runner_factory: callable that takes a device and index and returns a | 302 wait_for_debugger: True if this run will use a debugger. |
| 278 TestRunner object. | 303 test_device: name of a specific device to use. |
| 279 devices: list of attached device serial numbers as strings. | 304 |
| 280 tests: list of tests to run. | 305 Returns: |
| 281 build_type: either 'Debug' or 'Release'. | 306 A list of attached devices. |
| 282 test_timeout: watchdog timeout in seconds for running tests, defaults to the | 307 """ |
| 283 default timeout. | 308 attached_devices = [] |
| 284 setup_timeout: watchdog timeout in seconds for creating and cleaning up | 309 |
| 285 test runners, defaults to the default timeout. | 310 attached_devices = android_commands.GetAttachedDevices() |
| 286 num_retries: number of retries for a test. | 311 if test_device: |
| 312 assert test_device in attached_devices | |
| 313 attached_devices = [test_device] | |
| 314 | |
| 315 if len(attached_devices) > 1 and wait_for_debugger: | |
| 316 logging.warning('Debugger can not be sharded, using first available device') | |
| 317 attached_devices = attached_devices[:1] | |
| 318 | |
| 319 return attached_devices | |
| 320 | |
| 321 | |
| 322 def ReplicateAndRunTests(tests, wait_for_debugger, test_device, | |
| 323 *args, **kwargs): | |
| 324 """Replicates the tests for each device, so all devices run every test. | |
| 325 | |
| 326 Args: | |
| 327 tests: A list of tests to run. | |
| 328 wait_for_debugger: True if this test is using a debugger. | |
| 329 test_device: A specific device to run tests on, or None. | |
| 330 *args, **kwargs: Args and kwargs to RunTests which we pass through. | |
| 287 | 331 |
| 288 Returns: | 332 Returns: |
| 289 A tuple of (base_test_result.TestRunResults object, exit code). | 333 A tuple of (base_test_result.TestRunResults object, exit code). |
| 290 """ | 334 """ |
| 335 | |
| 291 if not tests: | 336 if not tests: |
| 292 logging.error('No tests to run.') | 337 logging.error('No tests to run.') |
| 293 return (base_test_result.TestRunResults(), constants.ERROR_EXIT_CODE) | 338 return (base_test_result.TestRunResults(), constants.ERROR_EXIT_CODE) |
| 294 | 339 |
| 295 logging.info('Will run %d tests: %s', len(tests), str(tests)) | 340 logging.info('Will run %d tests: %s', len(tests), str(tests)) |
| 341 | |
| 342 # Genereate a unique _TestCollection object for each test runner, but use | |
| 343 # the same set of tests. | |
| 344 TestCollectionFactory = lambda: _TestCollection([_Test(t) for t in tests]) | |
| 345 | |
| 346 devices = _GetAttachedDevices(wait_for_debugger, test_device) | |
| 347 return _RunTests(TestCollectionFactory, devices, *args, | |
| 348 tag_results_with_device=True, **kwargs) | |
| 349 | |
| 350 | |
| 351 def ShardAndRunTests(tests, wait_for_debugger, test_device, *args, **kwargs): | |
| 352 """Distrbutes all tests over devices through a shared pool of tests. | |
|
frankf
2013/07/16 00:02:18
It's sufficient to say "Shards tests over devices"
gkanwar
2013/07/16 00:47:03
Done.
| |
| 353 | |
| 354 Args: | |
| 355 tests: A list of tests to run. | |
| 356 wait_for_debugger: True if this test is using a debugger. | |
| 357 test_device: A specific device to run tests on, or None. | |
| 358 *args, **kwargs: Args and kwargs to _RunTests which we pass through. | |
| 359 | |
| 360 Returns: | |
| 361 A tuple of (base_test_result.TestRunResults object, exit code). | |
| 362 """ | |
| 363 | |
| 364 if not tests: | |
|
frankf
2013/07/16 00:02:18
There's a lot duplication between these two method
gkanwar
2013/07/16 00:47:03
I ended up combining the two methods back together
| |
| 365 logging.error('No tests to run.') | |
| 366 return (base_test_result.TestRunResults(), constants.ERROR_EXIT_CODE) | |
| 367 | |
| 368 logging.info('Will run %d tests: %s', len(tests), str(tests)) | |
| 369 | |
| 370 # Genereate a shared _TestCollection object for all test runners, so they draw | |
| 371 # from a common pool of tests. | |
| 372 shared_test_collection = _TestCollection([_Test(t) for t in tests]) | |
| 373 TestCollectionFactory = lambda: shared_test_collection | |
| 374 | |
| 375 devices = _GetAttachedDevices(wait_for_debugger, test_device) | |
| 376 return _RunTests(TestCollectionFactory, devices, *args, | |
| 377 tag_results_with_device=False, **kwargs) | |
| 378 | |
| 379 | |
| 380 def _RunTests(test_collection_factory, devices, runner_factory, | |
|
frankf
2013/07/16 00:02:18
It's convention to move callee above caller
gkanwar
2013/07/16 00:47:03
Done.
| |
| 381 build_type='Debug', | |
| 382 test_timeout=DEFAULT_TIMEOUT, | |
| 383 setup_timeout=DEFAULT_TIMEOUT, | |
| 384 num_retries=2, | |
| 385 tag_results_with_device=False): | |
| 386 """Run all tests on attached devices, retrying tests that don't pass. | |
| 387 | |
| 388 Args: | |
| 389 test_collection_factory: callable that is used to generate a _TestCollection | |
| 390 object for each test runner. | |
| 391 devices: list of attached device serial numbers as strings. | |
| 392 build_type: either 'Debug' or 'Release'. | |
| 393 runner_factory: callable that takes a device and index and returns a | |
| 394 TestRunner object. | |
| 395 test_timeout: watchdog timeout in seconds for running tests, defaults to the | |
| 396 default timeout. | |
| 397 setup_timeout: watchdog timeout in seconds for creating and cleaning up | |
| 398 test runners, defaults to the default timeout. | |
| 399 num_retries: number of retries for a test. | |
| 400 tag_results_with_device: If True, appends the name of the device on which | |
| 401 the test was run to the test name. Used by ReplicateAndRunTests to | |
| 402 identify which device ran each copy of the test, and to ensure each copy | |
| 403 of the test is recorded separately. | |
| 404 | |
| 405 Returns: | |
| 406 A tuple of (base_test_result.TestRunResults object, exit code). | |
| 407 """ | |
| 296 forwarder.Forwarder.KillHost(build_type) | 408 forwarder.Forwarder.KillHost(build_type) |
| 297 runners = _CreateRunners(runner_factory, devices, setup_timeout) | 409 runners = _CreateRunners(runner_factory, devices, setup_timeout) |
| 298 try: | 410 try: |
| 299 return _RunAllTests(runners, tests, num_retries, test_timeout) | 411 return _RunAllTests(runners, test_collection_factory, |
| 412 num_retries, test_timeout, tag_results_with_device) | |
| 300 finally: | 413 finally: |
| 301 try: | 414 try: |
| 302 _TearDownRunners(runners, setup_timeout) | 415 _TearDownRunners(runners, setup_timeout) |
| 303 except android_commands.errors.DeviceUnresponsiveError as e: | 416 except android_commands.errors.DeviceUnresponsiveError as e: |
| 304 logging.warning('Device unresponsive during TearDown: [%s]', e) | 417 logging.warning('Device unresponsive during TearDown: [%s]', e) |
| 305 finally: | 418 finally: |
| 306 forwarder.Forwarder.KillHost(build_type) | 419 forwarder.Forwarder.KillHost(build_type) |
| OLD | NEW |