| OLD | NEW |
| 1 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """Implements test sharding logic.""" | 5 """Implements test sharding logic.""" |
| 6 | 6 |
| 7 import logging | 7 import logging |
| 8 import threading | 8 import threading |
| 9 | 9 |
| 10 from pylib import android_commands | 10 from pylib import android_commands |
| 11 from pylib import forwarder | 11 from pylib import forwarder |
| 12 from pylib.utils import reraiser_thread | 12 from pylib.utils import reraiser_thread |
| 13 from pylib.utils import watchdog_timer |
| 13 | 14 |
| 14 import base_test_result | 15 import base_test_result |
| 15 | 16 |
| 16 | 17 |
| 18 DEFAULT_TIMEOUT = 7 * 60 # seven minutes |
| 19 |
| 20 |
| 17 class _ThreadSafeCounter(object): | 21 class _ThreadSafeCounter(object): |
| 18 """A threadsafe counter.""" | 22 """A threadsafe counter.""" |
| 23 |
| 19 def __init__(self): | 24 def __init__(self): |
| 20 self._lock = threading.Lock() | 25 self._lock = threading.Lock() |
| 21 self._value = 0 | 26 self._value = 0 |
| 22 | 27 |
| 23 def GetAndIncrement(self): | 28 def GetAndIncrement(self): |
| 24 """Get the current value and increment it atomically. | 29 """Get the current value and increment it atomically. |
| 25 | 30 |
| 26 Returns: | 31 Returns: |
| 27 The value before incrementing. | 32 The value before incrementing. |
| 28 """ | 33 """ |
| 29 with self._lock: | 34 with self._lock: |
| 30 pre_increment = self._value | 35 pre_increment = self._value |
| 31 self._value += 1 | 36 self._value += 1 |
| 32 return pre_increment | 37 return pre_increment |
| 33 | 38 |
| 34 | 39 |
| 35 class _Test(object): | 40 class _Test(object): |
| 36 """Holds a test with additional metadata.""" | 41 """Holds a test with additional metadata.""" |
| 42 |
| 37 def __init__(self, test, tries=0): | 43 def __init__(self, test, tries=0): |
| 38 """Initializes the _Test object. | 44 """Initializes the _Test object. |
| 39 | 45 |
| 40 Args: | 46 Args: |
| 41 test: the test. | 47 test: the test. |
| 42 tries: number of tries so far. | 48 tries: number of tries so far. |
| 43 """ | 49 """ |
| 44 self.test = test | 50 self.test = test |
| 45 self.tries = tries | 51 self.tries = tries |
| 46 | 52 |
| 47 | 53 |
| 48 class _TestCollection(object): | 54 class _TestCollection(object): |
| 49 """A threadsafe collection of tests. | 55 """A threadsafe collection of tests. |
| 50 | 56 |
| 51 Args: | 57 Args: |
| 52 tests: list of tests to put in the collection. | 58 tests: list of tests to put in the collection. |
| 53 """ | 59 """ |
| 60 |
| 54 def __init__(self, tests=[]): | 61 def __init__(self, tests=[]): |
| 55 self._lock = threading.Lock() | 62 self._lock = threading.Lock() |
| 56 self._tests = [] | 63 self._tests = [] |
| 57 self._tests_in_progress = 0 | 64 self._tests_in_progress = 0 |
| 58 # Used to signal that an item is avaliable or all items have been handled. | 65 # Used to signal that an item is avaliable or all items have been handled. |
| 59 self._item_avaliable_or_all_done = threading.Event() | 66 self._item_avaliable_or_all_done = threading.Event() |
| 60 for t in tests: | 67 for t in tests: |
| 61 self.add(t) | 68 self.add(t) |
| 62 | 69 |
| 63 def _pop(self): | 70 def _pop(self): |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 102 | 109 |
| 103 def __iter__(self): | 110 def __iter__(self): |
| 104 """Iterate through tests in the collection until all have been handled.""" | 111 """Iterate through tests in the collection until all have been handled.""" |
| 105 while True: | 112 while True: |
| 106 r = self._pop() | 113 r = self._pop() |
| 107 if r is None: | 114 if r is None: |
| 108 break | 115 break |
| 109 yield r | 116 yield r |
| 110 | 117 |
| 111 | 118 |
| 112 def _RunTestsFromQueue(runner, test_collection, out_results): | 119 def _RunTestsFromQueue(runner, test_collection, out_results, watcher): |
| 113 """Runs tests from the test_collection until empty using the given runner. | 120 """Runs tests from the test_collection until empty using the given runner. |
| 114 | 121 |
| 115 Adds TestRunResults objects to the out_results list and may add tests to the | 122 Adds TestRunResults objects to the out_results list and may add tests to the |
| 116 out_retry list. | 123 out_retry list. |
| 117 | 124 |
| 118 Args: | 125 Args: |
| 119 runner: A TestRunner object used to run the tests. | 126 runner: A TestRunner object used to run the tests. |
| 120 test_collection: A _TestCollection from which to get _Test objects to run. | 127 test_collection: A _TestCollection from which to get _Test objects to run. |
| 121 out_results: A list to add TestRunResults to. | 128 out_results: A list to add TestRunResults to. |
| 129 watcher: A watchdog_timer.WatchdogTimer object, used as a shared timeout. |
| 122 """ | 130 """ |
| 123 for test in test_collection: | 131 for test in test_collection: |
| 132 watcher.Reset() |
| 124 try: | 133 try: |
| 125 if not android_commands.IsDeviceAttached(runner.device): | 134 if not android_commands.IsDeviceAttached(runner.device): |
| 126 # Device is unresponsive, stop handling tests on this device. | 135 # Device is unresponsive, stop handling tests on this device. |
| 127 msg = 'Device %s is unresponsive.' % runner.device | 136 msg = 'Device %s is unresponsive.' % runner.device |
| 128 logging.warning(msg) | 137 logging.warning(msg) |
| 129 raise android_commands.errors.DeviceUnresponsiveError(msg) | 138 raise android_commands.errors.DeviceUnresponsiveError(msg) |
| 130 result, retry = runner.RunTest(test.test) | 139 result, retry = runner.RunTest(test.test) |
| 131 test.tries += 1 | 140 test.tries += 1 |
| 132 if retry and test.tries <= 3: | 141 if retry and test.tries <= 3: |
| 133 # Retry non-passing results, only record passing results. | 142 # Retry non-passing results, only record passing results. |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 171 try: | 180 try: |
| 172 index = threadsafe_counter.GetAndIncrement() | 181 index = threadsafe_counter.GetAndIncrement() |
| 173 logging.warning('*****Creating shard %s for device %s.', index, device) | 182 logging.warning('*****Creating shard %s for device %s.', index, device) |
| 174 runner = runner_factory(device, index) | 183 runner = runner_factory(device, index) |
| 175 runner.SetUp() | 184 runner.SetUp() |
| 176 out_runners.append(runner) | 185 out_runners.append(runner) |
| 177 except android_commands.errors.DeviceUnresponsiveError as e: | 186 except android_commands.errors.DeviceUnresponsiveError as e: |
| 178 logging.warning('****Failed to create shard for %s: [%s]', device, e) | 187 logging.warning('****Failed to create shard for %s: [%s]', device, e) |
| 179 | 188 |
| 180 | 189 |
| 181 def _RunAllTests(runners, tests): | 190 def _RunAllTests(runners, tests, timeout=None): |
| 182 """Run all tests using the given TestRunners. | 191 """Run all tests using the given TestRunners. |
| 183 | 192 |
| 184 Args: | 193 Args: |
| 185 runners: a list of TestRunner objects. | 194 runners: a list of TestRunner objects. |
| 186 tests: a list of Tests to run using the given TestRunners. | 195 tests: a list of Tests to run using the given TestRunners. |
| 196 timeout: watchdog timeout in seconds, defaults to the default timeout. |
| 187 | 197 |
| 188 Returns: | 198 Returns: |
| 189 A TestRunResults object. | 199 A TestRunResults object. |
| 190 """ | 200 """ |
| 191 logging.warning('****Running %s tests with %s test runners.' % | 201 logging.warning('****Running %s tests with %s test runners.' % |
| 192 (len(tests), len(runners))) | 202 (len(tests), len(runners))) |
| 193 tests_collection = _TestCollection([_Test(t) for t in tests]) | 203 tests_collection = _TestCollection([_Test(t) for t in tests]) |
| 194 results = [] | 204 results = [] |
| 195 workers = reraiser_thread.ReraiserThreadGroup([reraiser_thread.ReraiserThread( | 205 watcher = watchdog_timer.WatchdogTimer(timeout) |
| 196 _RunTestsFromQueue, [r, tests_collection, results]) for r in runners]) | 206 workers = reraiser_thread.ReraiserThreadGroup( |
| 207 [reraiser_thread.ReraiserThread(_RunTestsFromQueue, |
| 208 [r, tests_collection, results, watcher]) |
| 209 for r in runners]) |
| 197 workers.StartAll() | 210 workers.StartAll() |
| 198 workers.JoinAll() | 211 workers.JoinAll(watcher) |
| 199 run_results = base_test_result.TestRunResults() | 212 run_results = base_test_result.TestRunResults() |
| 200 for r in results: | 213 for r in results: |
| 201 run_results.AddTestRunResults(r) | 214 run_results.AddTestRunResults(r) |
| 202 return run_results | 215 return run_results |
| 203 | 216 |
| 204 | 217 |
| 205 def _CreateRunners(runner_factory, devices): | 218 def _CreateRunners(runner_factory, devices, timeout=None): |
| 206 """Creates a test runner for each device and calls SetUp() in parallel. | 219 """Creates a test runner for each device and calls SetUp() in parallel. |
| 207 | 220 |
| 208 Note: if a device is unresponsive the corresponding TestRunner will not be | 221 Note: if a device is unresponsive the corresponding TestRunner will not be |
| 209 included in the returned list. | 222 included in the returned list. |
| 210 | 223 |
| 211 Args: | 224 Args: |
| 212 runner_factory: callable that takes a device and index and returns a | 225 runner_factory: callable that takes a device and index and returns a |
| 213 TestRunner object. | 226 TestRunner object. |
| 214 devices: list of device serial numbers as strings. | 227 devices: list of device serial numbers as strings. |
| 228 timeout: watchdog timeout in seconds, defaults to the default timeout. |
| 215 | 229 |
| 216 Returns: | 230 Returns: |
| 217 A list of TestRunner objects. | 231 A list of TestRunner objects. |
| 218 """ | 232 """ |
| 219 logging.warning('****Creating %s test runners.' % len(devices)) | 233 logging.warning('****Creating %s test runners.' % len(devices)) |
| 220 runners = [] | 234 runners = [] |
| 221 counter = _ThreadSafeCounter() | 235 counter = _ThreadSafeCounter() |
| 222 threads = reraiser_thread.ReraiserThreadGroup( | 236 threads = reraiser_thread.ReraiserThreadGroup( |
| 223 [reraiser_thread.ReraiserThread(_SetUp, [runner_factory, d, runners, | 237 [reraiser_thread.ReraiserThread(_SetUp, [runner_factory, d, runners, |
| 224 counter]) | 238 counter]) |
| 225 for d in devices]) | 239 for d in devices]) |
| 226 threads.StartAll() | 240 threads.StartAll() |
| 227 threads.JoinAll() | 241 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout)) |
| 228 return runners | 242 return runners |
| 229 | 243 |
| 230 | 244 |
| 231 def _TearDownRunners(runners): | 245 def _TearDownRunners(runners, timeout=None): |
| 232 """Calls TearDown() for each test runner in parallel. | 246 """Calls TearDown() for each test runner in parallel. |
| 233 Args: | 247 Args: |
| 234 runners: a list of TestRunner objects. | 248 runners: a list of TestRunner objects. |
| 249 timeout: watchdog timeout in seconds, defaults to the default timeout. |
| 235 """ | 250 """ |
| 236 threads = reraiser_thread.ReraiserThreadGroup( | 251 threads = reraiser_thread.ReraiserThreadGroup( |
| 237 [reraiser_thread.ReraiserThread(runner.TearDown) | 252 [reraiser_thread.ReraiserThread(runner.TearDown) |
| 238 for runner in runners]) | 253 for runner in runners]) |
| 239 threads.StartAll() | 254 threads.StartAll() |
| 240 threads.JoinAll() | 255 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout)) |
| 241 | 256 |
| 242 | 257 |
| 243 def ShardAndRunTests(runner_factory, devices, tests, build_type='Debug'): | 258 def ShardAndRunTests(runner_factory, devices, tests, build_type='Debug', |
| 259 test_timeout=DEFAULT_TIMEOUT, |
| 260 setup_timeout=DEFAULT_TIMEOUT): |
| 244 """Run all tests on attached devices, retrying tests that don't pass. | 261 """Run all tests on attached devices, retrying tests that don't pass. |
| 245 | 262 |
| 246 Args: | 263 Args: |
| 247 runner_factory: callable that takes a device and index and returns a | 264 runner_factory: callable that takes a device and index and returns a |
| 248 TestRunner object. | 265 TestRunner object. |
| 249 devices: list of attached device serial numbers as strings. | 266 devices: list of attached device serial numbers as strings. |
| 250 tests: list of tests to run. | 267 tests: list of tests to run. |
| 251 build_type: either 'Debug' or 'Release'. | 268 build_type: either 'Debug' or 'Release'. |
| 269 test_timeout: watchdog timeout in seconds for running tests, defaults to the |
| 270 default timeout. |
| 271 setup_timeout: watchdog timeout in seconds for creating and cleaning up |
| 272 test runners, defaults to the default timeout. |
| 252 | 273 |
| 253 Returns: | 274 Returns: |
| 254 A base_test_result.TestRunResults object. | 275 A base_test_result.TestRunResults object. |
| 255 """ | 276 """ |
| 256 forwarder.Forwarder.KillHost(build_type) | 277 forwarder.Forwarder.KillHost(build_type) |
| 257 runners = _CreateRunners(runner_factory, devices) | 278 runners = _CreateRunners(runner_factory, devices, setup_timeout) |
| 258 try: | 279 try: |
| 259 return _RunAllTests(runners, tests) | 280 return _RunAllTests(runners, tests, test_timeout) |
| 260 finally: | 281 finally: |
| 261 try: | 282 try: |
| 262 _TearDownRunners(runners) | 283 _TearDownRunners(runners, setup_timeout) |
| 263 except android_commands.errors.DeviceUnresponsiveError as e: | 284 except android_commands.errors.DeviceUnresponsiveError as e: |
| 264 logging.warning('****Device unresponsive during TearDown: [%s]', e) | 285 logging.warning('****Device unresponsive during TearDown: [%s]', e) |
| 265 finally: | 286 finally: |
| 266 forwarder.Forwarder.KillHost(build_type) | 287 forwarder.Forwarder.KillHost(build_type) |
| OLD | NEW |