OLD | NEW |
---|---|
1 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 """Implements test sharding logic.""" | 5 """Implements test sharding logic.""" |
6 | 6 |
7 import logging | 7 import logging |
8 import threading | 8 import threading |
9 | 9 |
10 from pylib import android_commands | 10 from pylib import android_commands |
11 from pylib import constants | 11 from pylib import constants |
12 from pylib import forwarder | 12 from pylib import forwarder |
13 from pylib.utils import reraiser_thread | 13 from pylib.utils import reraiser_thread |
14 from pylib.utils import watchdog_timer | 14 from pylib.utils import watchdog_timer |
15 | 15 |
16 import base_test_result | 16 import base_test_result |
17 | 17 |
18 | 18 |
19 DEFAULT_TIMEOUT = 7 * 60 # seven minutes | 19 DEFAULT_TIMEOUT = 7 * 60 # seven minutes |
20 VALID_SHARDING_OPTIONS = ['distribute', 'duplicate'] | |
20 | 21 |
21 | 22 |
22 class _ThreadSafeCounter(object): | 23 class _ThreadSafeCounter(object): |
23 """A threadsafe counter.""" | 24 """A threadsafe counter.""" |
24 | 25 |
25 def __init__(self): | 26 def __init__(self): |
26 self._lock = threading.Lock() | 27 self._lock = threading.Lock() |
27 self._value = 0 | 28 self._value = 0 |
28 | 29 |
29 def GetAndIncrement(self): | 30 def GetAndIncrement(self): |
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
177 try: | 178 try: |
178 index = threadsafe_counter.GetAndIncrement() | 179 index = threadsafe_counter.GetAndIncrement() |
179 logging.warning('Creating shard %s for device %s.', index, device) | 180 logging.warning('Creating shard %s for device %s.', index, device) |
180 runner = runner_factory(device, index) | 181 runner = runner_factory(device, index) |
181 runner.SetUp() | 182 runner.SetUp() |
182 out_runners.append(runner) | 183 out_runners.append(runner) |
183 except android_commands.errors.DeviceUnresponsiveError as e: | 184 except android_commands.errors.DeviceUnresponsiveError as e: |
184 logging.warning('Failed to create shard for %s: [%s]', device, e) | 185 logging.warning('Failed to create shard for %s: [%s]', device, e) |
185 | 186 |
186 | 187 |
187 def _RunAllTests(runners, tests, num_retries, timeout=None): | 188 def _RunAllTests(runners, tests, sharding, num_retries, timeout=None): |
188 """Run all tests using the given TestRunners. | 189 """Run all tests using the given TestRunners. |
189 | 190 |
190 Args: | 191 Args: |
191 runners: a list of TestRunner objects. | 192 runners: a list of TestRunner objects. |
192 tests: a list of Tests to run using the given TestRunners. | 193 tests: a list of Tests to run using the given TestRunners. |
193 num_retries: number of retries for a test. | 194 num_retries: number of retries for a test. |
194 timeout: watchdog timeout in seconds, defaults to the default timeout. | 195 timeout: watchdog timeout in seconds, defaults to the default timeout. |
196 sharding: a string to indicate whether we should distribute all tests as a | |
197 common pool to draw from, or duplicate all tests onto every test runner. | |
198 Must be either 'distribute' or 'duplicate' | |
195 | 199 |
196 Returns: | 200 Returns: |
197 A tuple of (TestRunResults object, exit code) | 201 A tuple of (TestRunResults object, exit code) |
198 """ | 202 """ |
199 logging.warning('Running %s tests with %s test runners.' % | 203 logging.warning('Running %s tests with %s test runners.' % |
200 (len(tests), len(runners))) | 204 (len(tests), len(runners))) |
201 tests_collection = _TestCollection([_Test(t) for t in tests]) | |
202 results = [] | 205 results = [] |
203 exit_code = 0 | 206 exit_code = 0 |
204 watcher = watchdog_timer.WatchdogTimer(timeout) | 207 watcher = watchdog_timer.WatchdogTimer(timeout) |
208 | |
209 if sharding == 'distribute': | |
210 shared_test_collection = _TestCollection([_Test(t) for t in tests]) | |
211 GenerateTestCollection = lambda: shared_test_collection | |
212 elif sharding == 'duplicate': | |
213 GenerateTestCollection = lambda: _TestCollection([_Test(t) for t in tests]) | |
214 else: | |
215 raise ValueError('Unknown sharding option %s. Options are: %s' | |
216 % (sharding, VALID_SHARDING_OPTIONS)) | |
217 | |
218 # Use a lambda to get the test collection for each runner for cleanliness | |
205 workers = reraiser_thread.ReraiserThreadGroup( | 219 workers = reraiser_thread.ReraiserThreadGroup( |
206 [reraiser_thread.ReraiserThread( | 220 [reraiser_thread.ReraiserThread( |
207 _RunTestsFromQueue, | 221 _RunTestsFromQueue, |
208 [r, tests_collection, results, watcher, num_retries], | 222 [r, GenerateTestCollection(), results, watcher, num_retries], |
209 name=r.device[-4:]) | 223 name=r.device[-4:]) |
210 for r in runners]) | 224 for r in runners]) |
211 run_results = base_test_result.TestRunResults() | 225 run_results = base_test_result.TestRunResults() |
212 workers.StartAll() | 226 workers.StartAll() |
213 | 227 |
214 # Catch DeviceUnresponsiveErrors and set a warning exit code | 228 # Catch DeviceUnresponsiveErrors and set a warning exit code |
215 try: | 229 try: |
216 workers.JoinAll(watcher) | 230 workers.JoinAll(watcher) |
217 except android_commands.errors.DeviceUnresponsiveError as e: | 231 except android_commands.errors.DeviceUnresponsiveError as e: |
218 logging.error(e) | 232 logging.error(e) |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
263 threads = reraiser_thread.ReraiserThreadGroup( | 277 threads = reraiser_thread.ReraiserThreadGroup( |
264 [reraiser_thread.ReraiserThread(r.TearDown, name=r.device[-4:]) | 278 [reraiser_thread.ReraiserThread(r.TearDown, name=r.device[-4:]) |
265 for r in runners]) | 279 for r in runners]) |
266 threads.StartAll() | 280 threads.StartAll() |
267 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout)) | 281 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout)) |
268 | 282 |
269 | 283 |
270 def ShardAndRunTests(runner_factory, devices, tests, build_type='Debug', | 284 def ShardAndRunTests(runner_factory, devices, tests, build_type='Debug', |
271 test_timeout=DEFAULT_TIMEOUT, | 285 test_timeout=DEFAULT_TIMEOUT, |
272 setup_timeout=DEFAULT_TIMEOUT, | 286 setup_timeout=DEFAULT_TIMEOUT, |
273 num_retries=2): | 287 num_retries=2, |
288 sharding='distribute'): | |
craigdh
2013/07/12 18:14:15
The behavior is quite different, I think it would
gkanwar
2013/07/12 18:22:41
I was discussing this with Frank in the gdoc, and
frankf
2013/07/12 18:49:19
I still think the semantics is confusing. Sharding
gkanwar
2013/07/12 18:52:04
That sounds reasonable to me. Craig, does that sou
craigdh
2013/07/12 19:03:36
I like the method names, but test_allocator.py is
gkanwar
2013/07/12 19:22:32
We already have a 'dispatch.py' under pylib now (t
| |
274 """Run all tests on attached devices, retrying tests that don't pass. | 289 """Run all tests on attached devices, retrying tests that don't pass. |
275 | 290 |
276 Args: | 291 Args: |
277 runner_factory: callable that takes a device and index and returns a | 292 runner_factory: callable that takes a device and index and returns a |
278 TestRunner object. | 293 TestRunner object. |
279 devices: list of attached device serial numbers as strings. | 294 devices: list of attached device serial numbers as strings. |
280 tests: list of tests to run. | 295 tests: list of tests to run. |
281 build_type: either 'Debug' or 'Release'. | 296 build_type: either 'Debug' or 'Release'. |
282 test_timeout: watchdog timeout in seconds for running tests, defaults to the | 297 test_timeout: watchdog timeout in seconds for running tests, defaults to the |
283 default timeout. | 298 default timeout. |
284 setup_timeout: watchdog timeout in seconds for creating and cleaning up | 299 setup_timeout: watchdog timeout in seconds for creating and cleaning up |
285 test runners, defaults to the default timeout. | 300 test runners, defaults to the default timeout. |
286 num_retries: number of retries for a test. | 301 num_retries: number of retries for a test. |
302 sharding: a string to indicate whether we should distribute all tests as a | |
303 common pool to draw from, or duplicate all tests onto every test runner. | |
304 Must be either 'distribute' or 'duplicate' | |
287 | 305 |
288 Returns: | 306 Returns: |
289 A tuple of (base_test_result.TestRunResults object, exit code). | 307 A tuple of (base_test_result.TestRunResults object, exit code). |
290 """ | 308 """ |
309 | |
craigdh
2013/07/12 18:14:15
remove this new blank line
gkanwar
2013/07/12 18:22:41
Will do.
| |
291 if not tests: | 310 if not tests: |
292 logging.error('No tests to run.') | 311 logging.error('No tests to run.') |
293 return (base_test_result.TestRunResults(), constants.ERROR_EXIT_CODE) | 312 return (base_test_result.TestRunResults(), constants.ERROR_EXIT_CODE) |
294 | 313 |
295 logging.info('Will run %d tests: %s', len(tests), str(tests)) | 314 logging.info('Will run %d tests: %s', len(tests), str(tests)) |
296 forwarder.Forwarder.KillHost(build_type) | 315 forwarder.Forwarder.KillHost(build_type) |
297 runners = _CreateRunners(runner_factory, devices, setup_timeout) | 316 runners = _CreateRunners(runner_factory, devices, setup_timeout) |
298 try: | 317 try: |
299 return _RunAllTests(runners, tests, num_retries, test_timeout) | 318 return _RunAllTests(runners, tests, sharding, num_retries, test_timeout) |
300 finally: | 319 finally: |
301 try: | 320 try: |
302 _TearDownRunners(runners, setup_timeout) | 321 _TearDownRunners(runners, setup_timeout) |
303 except android_commands.errors.DeviceUnresponsiveError as e: | 322 except android_commands.errors.DeviceUnresponsiveError as e: |
304 logging.warning('Device unresponsive during TearDown: [%s]', e) | 323 logging.warning('Device unresponsive during TearDown: [%s]', e) |
305 finally: | 324 finally: |
306 forwarder.Forwarder.KillHost(build_type) | 325 forwarder.Forwarder.KillHost(build_type) |
OLD | NEW |