OLD | NEW |
---|---|
1 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 """Implements test sharding logic.""" | 5 """Implements test sharding logic.""" |
frankf
2013/07/16 18:42:32
Update
gkanwar
2013/07/16 20:27:36
Done.
| |
6 | 6 |
7 import logging | 7 import logging |
8 import threading | 8 import threading |
9 | 9 |
10 from pylib import android_commands | 10 from pylib import android_commands |
11 from pylib import constants | 11 from pylib import constants |
12 from pylib import forwarder | 12 from pylib import forwarder |
13 from pylib.utils import reraiser_thread | 13 from pylib.utils import reraiser_thread |
14 from pylib.utils import watchdog_timer | 14 from pylib.utils import watchdog_timer |
15 | 15 |
16 import base_test_result | 16 import base_test_result |
17 | 17 |
18 | 18 |
19 DEFAULT_TIMEOUT = 7 * 60 # seven minutes | 19 DEFAULT_TIMEOUT = 7 * 60 # seven minutes |
20 VALID_TEST_ALLOCATION = ['shard', 'replicate'] | |
20 | 21 |
21 | 22 |
22 class _ThreadSafeCounter(object): | 23 class _ThreadSafeCounter(object): |
23 """A threadsafe counter.""" | 24 """A threadsafe counter.""" |
24 | 25 |
25 def __init__(self): | 26 def __init__(self): |
26 self._lock = threading.Lock() | 27 self._lock = threading.Lock() |
27 self._value = 0 | 28 self._value = 0 |
28 | 29 |
29 def GetAndIncrement(self): | 30 def GetAndIncrement(self): |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
111 def __iter__(self): | 112 def __iter__(self): |
112 """Iterate through tests in the collection until all have been handled.""" | 113 """Iterate through tests in the collection until all have been handled.""" |
113 while True: | 114 while True: |
114 r = self._pop() | 115 r = self._pop() |
115 if r is None: | 116 if r is None: |
116 break | 117 break |
117 yield r | 118 yield r |
118 | 119 |
119 | 120 |
120 def _RunTestsFromQueue(runner, test_collection, out_results, watcher, | 121 def _RunTestsFromQueue(runner, test_collection, out_results, watcher, |
121 num_retries): | 122 num_retries, tag_results_with_device=False): |
122 """Runs tests from the test_collection until empty using the given runner. | 123 """Runs tests from the test_collection until empty using the given runner. |
123 | 124 |
124 Adds TestRunResults objects to the out_results list and may add tests to the | 125 Adds TestRunResults objects to the out_results list and may add tests to the |
125 out_retry list. | 126 out_retry list. |
126 | 127 |
127 Args: | 128 Args: |
128 runner: A TestRunner object used to run the tests. | 129 runner: A TestRunner object used to run the tests. |
129 test_collection: A _TestCollection from which to get _Test objects to run. | 130 test_collection: A _TestCollection from which to get _Test objects to run. |
130 out_results: A list to add TestRunResults to. | 131 out_results: A list to add TestRunResults to. |
131 watcher: A watchdog_timer.WatchdogTimer object, used as a shared timeout. | 132 watcher: A watchdog_timer.WatchdogTimer object, used as a shared timeout. |
132 num_retries: Number of retries for a test. | 133 num_retries: Number of retries for a test. |
134 tag_results_with_device: If True, appends the name of the device on which | |
135 the test was run to the test name. Used by ReplicateAndRunTests to | |
136 identify which device ran each copy of the test, and to ensure each copy | |
137 of the test is recorded separately. | |
133 """ | 138 """ |
139 | |
140 # Used to tag all results to identify which device caused failing tests | |
141 def TagTestRunResults(test_run_results): | |
142 new_test_run_results = base_test_result.TestRunResults() | |
143 for test_result in test_run_results.GetAll(): | |
144 new_result = base_test_result.BaseTestResult( | |
145 '%s_%s' % (runner.device, test_result.GetName()), | |
frankf
2013/07/16 18:42:32
Use last 4 digits of device serial.
gkanwar
2013/07/16 20:27:36
Done.
| |
146 test_result.GetType(), test_result.GetLog()) | |
frankf
2013/07/16 18:42:32
As the name suggest, BaseTestResult can be derived
gkanwar
2013/07/16 20:27:36
Updated to move tag into the BaseTestResult class
| |
147 new_test_run_results.AddResult(new_result) | |
148 return new_test_run_results | |
149 | |
134 for test in test_collection: | 150 for test in test_collection: |
135 watcher.Reset() | 151 watcher.Reset() |
136 try: | 152 try: |
137 if not android_commands.IsDeviceAttached(runner.device): | 153 if not android_commands.IsDeviceAttached(runner.device): |
138 # Device is unresponsive, stop handling tests on this device. | 154 # Device is unresponsive, stop handling tests on this device. |
139 msg = 'Device %s is unresponsive.' % runner.device | 155 msg = 'Device %s is unresponsive.' % runner.device |
140 logging.warning(msg) | 156 logging.warning(msg) |
141 raise android_commands.errors.DeviceUnresponsiveError(msg) | 157 raise android_commands.errors.DeviceUnresponsiveError(msg) |
142 result, retry = runner.RunTest(test.test) | 158 result, retry = runner.RunTest(test.test) |
159 if tag_results_with_device: | |
160 result = TagTestRunResults(result) | |
143 test.tries += 1 | 161 test.tries += 1 |
144 if retry and test.tries <= num_retries: | 162 if retry and test.tries <= num_retries: |
145 # Retry non-passing results, only record passing results. | 163 # Retry non-passing results, only record passing results. |
146 pass_results = base_test_result.TestRunResults() | 164 pass_results = base_test_result.TestRunResults() |
165 # Tag all results with the device, so we can identify the failing device | |
166 # for replicated tests. | |
frankf
2013/07/16 18:42:32
Is this comment misplaced?
gkanwar
2013/07/16 20:27:36
Oops, removed.
| |
147 pass_results.AddResults(result.GetPass()) | 167 pass_results.AddResults(result.GetPass()) |
148 out_results.append(pass_results) | 168 out_results.append(pass_results) |
149 logging.warning('Will retry test, try #%s.' % test.tries) | 169 logging.warning('Will retry test, try #%s.' % test.tries) |
150 test_collection.add(_Test(test=retry, tries=test.tries)) | 170 test_collection.add(_Test(test=retry, tries=test.tries)) |
151 else: | 171 else: |
152 # All tests passed or retry limit reached. Either way, record results. | 172 # All tests passed or retry limit reached. Either way, record results. |
153 out_results.append(result) | 173 out_results.append(result) |
154 except: | 174 except: |
155 # An unhandleable exception, ensure tests get run by another device and | 175 # An unhandleable exception, ensure tests get run by another device and |
156 # reraise this exception on the main thread. | 176 # reraise this exception on the main thread. |
(...skipping 20 matching lines...) Expand all Loading... | |
177 try: | 197 try: |
178 index = threadsafe_counter.GetAndIncrement() | 198 index = threadsafe_counter.GetAndIncrement() |
179 logging.warning('Creating shard %s for device %s.', index, device) | 199 logging.warning('Creating shard %s for device %s.', index, device) |
180 runner = runner_factory(device, index) | 200 runner = runner_factory(device, index) |
181 runner.SetUp() | 201 runner.SetUp() |
182 out_runners.append(runner) | 202 out_runners.append(runner) |
183 except android_commands.errors.DeviceUnresponsiveError as e: | 203 except android_commands.errors.DeviceUnresponsiveError as e: |
184 logging.warning('Failed to create shard for %s: [%s]', device, e) | 204 logging.warning('Failed to create shard for %s: [%s]', device, e) |
185 | 205 |
186 | 206 |
187 def _RunAllTests(runners, tests, num_retries, timeout=None): | 207 def _RunAllTests(runners, test_collection_factory, num_retries, timeout=None, |
208 tag_results_with_device=False): | |
188 """Run all tests using the given TestRunners. | 209 """Run all tests using the given TestRunners. |
189 | 210 |
190 Args: | 211 Args: |
191 runners: a list of TestRunner objects. | 212 runners: a list of TestRunner objects. |
192 tests: a list of Tests to run using the given TestRunners. | 213 test_collection_factory: a callable to generate a _TestCollection object for |
214 each test runner. | |
193 num_retries: number of retries for a test. | 215 num_retries: number of retries for a test. |
194 timeout: watchdog timeout in seconds, defaults to the default timeout. | 216 timeout: watchdog timeout in seconds, defaults to the default timeout. |
217 tag_results_with_device: If True, appends the name of the device on which | |
218 the test was run to the test name. Used by ReplicateAndRunTests to | |
219 identify which device ran each copy of the test, and to ensure each copy | |
220 of the test is recorded separately. | |
195 | 221 |
196 Returns: | 222 Returns: |
197 A tuple of (TestRunResults object, exit code) | 223 A tuple of (TestRunResults object, exit code) |
198 """ | 224 """ |
199 logging.warning('Running %s tests with %s test runners.' % | 225 logging.warning('Running tests with %s test runners.' % (len(runners))) |
200 (len(tests), len(runners))) | |
201 tests_collection = _TestCollection([_Test(t) for t in tests]) | |
202 results = [] | 226 results = [] |
203 exit_code = 0 | 227 exit_code = 0 |
204 watcher = watchdog_timer.WatchdogTimer(timeout) | 228 watcher = watchdog_timer.WatchdogTimer(timeout) |
229 | |
205 workers = reraiser_thread.ReraiserThreadGroup( | 230 workers = reraiser_thread.ReraiserThreadGroup( |
206 [reraiser_thread.ReraiserThread( | 231 [reraiser_thread.ReraiserThread( |
207 _RunTestsFromQueue, | 232 _RunTestsFromQueue, |
208 [r, tests_collection, results, watcher, num_retries], | 233 [r, test_collection_factory(), results, watcher, num_retries, |
234 tag_results_with_device], | |
209 name=r.device[-4:]) | 235 name=r.device[-4:]) |
210 for r in runners]) | 236 for r in runners]) |
211 run_results = base_test_result.TestRunResults() | 237 run_results = base_test_result.TestRunResults() |
212 workers.StartAll() | 238 workers.StartAll() |
213 | 239 |
214 # Catch DeviceUnresponsiveErrors and set a warning exit code | 240 # Catch DeviceUnresponsiveErrors and set a warning exit code |
215 try: | 241 try: |
216 workers.JoinAll(watcher) | 242 workers.JoinAll(watcher) |
217 except android_commands.errors.DeviceUnresponsiveError as e: | 243 except android_commands.errors.DeviceUnresponsiveError as e: |
218 logging.error(e) | 244 logging.error(e) |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
260 runners: a list of TestRunner objects. | 286 runners: a list of TestRunner objects. |
261 timeout: watchdog timeout in seconds, defaults to the default timeout. | 287 timeout: watchdog timeout in seconds, defaults to the default timeout. |
262 """ | 288 """ |
263 threads = reraiser_thread.ReraiserThreadGroup( | 289 threads = reraiser_thread.ReraiserThreadGroup( |
264 [reraiser_thread.ReraiserThread(r.TearDown, name=r.device[-4:]) | 290 [reraiser_thread.ReraiserThread(r.TearDown, name=r.device[-4:]) |
265 for r in runners]) | 291 for r in runners]) |
266 threads.StartAll() | 292 threads.StartAll() |
267 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout)) | 293 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout)) |
268 | 294 |
269 | 295 |
270 def ShardAndRunTests(runner_factory, devices, tests, build_type='Debug', | 296 |
271 test_timeout=DEFAULT_TIMEOUT, | 297 def _GetAttachedDevices(wait_for_debugger=False, test_device=None): |
272 setup_timeout=DEFAULT_TIMEOUT, | 298 """Get all attached devices. |
273 num_retries=2): | 299 |
300 If we are using a debugger, limit to only one device. | |
301 | |
302 Args: | |
303 wait_for_debugger: True if this run will use a debugger. | |
304 test_device: name of a specific device to use. | |
305 | |
306 Returns: | |
307 A list of attached devices. | |
308 """ | |
309 attached_devices = [] | |
310 | |
311 attached_devices = android_commands.GetAttachedDevices() | |
312 if test_device: | |
313 assert test_device in attached_devices | |
frankf
2013/07/16 18:42:32
add a message to assert
gkanwar
2013/07/16 20:27:36
Done.
| |
314 attached_devices = [test_device] | |
315 | |
316 if len(attached_devices) > 1 and wait_for_debugger: | |
317 logging.warning('Debugger can not be sharded, using first available device') | |
318 attached_devices = attached_devices[:1] | |
319 | |
320 return attached_devices | |
321 | |
322 | |
323 def RunTests(tests, runner_factory, | |
324 wait_for_debugger, test_device, | |
325 test_allocation='shard', | |
frankf
2013/07/16 18:42:32
I think a boolean called 'shard' makes more sense,
gkanwar
2013/07/16 20:27:36
Done.
| |
326 build_type='Debug', | |
327 test_timeout=DEFAULT_TIMEOUT, | |
328 setup_timeout=DEFAULT_TIMEOUT, | |
329 num_retries=2): | |
274 """Run all tests on attached devices, retrying tests that don't pass. | 330 """Run all tests on attached devices, retrying tests that don't pass. |
275 | 331 |
276 Args: | 332 Args: |
333 tests: list of tests to run. | |
frankf
2013/07/16 18:42:32
Capital first letter.
gkanwar
2013/07/16 20:27:36
Done.
| |
277 runner_factory: callable that takes a device and index and returns a | 334 runner_factory: callable that takes a device and index and returns a |
278 TestRunner object. | 335 TestRunner object. |
279 devices: list of attached device serial numbers as strings. | 336 wait_for_debugger: True if this test is using a debugger. |
280 tests: list of tests to run. | 337 test_device: A specific device to run tests on, or None. |
338 test_allocation: 'shard' or 'replicate'. | |
281 build_type: either 'Debug' or 'Release'. | 339 build_type: either 'Debug' or 'Release'. |
282 test_timeout: watchdog timeout in seconds for running tests, defaults to the | 340 test_timeout: watchdog timeout in seconds for running tests, defaults to the |
283 default timeout. | 341 default timeout. |
284 setup_timeout: watchdog timeout in seconds for creating and cleaning up | 342 setup_timeout: watchdog timeout in seconds for creating and cleaning up |
285 test runners, defaults to the default timeout. | 343 test runners, defaults to the default timeout. |
286 num_retries: number of retries for a test. | 344 num_retries: number of retries for a test. |
345 tag_results_with_device: If True, appends the name of the device on which | |
346 the test was run to the test name. Used by ReplicateAndRunTests to | |
347 identify which device ran each copy of the test, and to ensure each copy | |
frankf
2013/07/16 18:42:32
Update this
gkanwar
2013/07/16 20:27:36
Done.
| |
348 of the test is recorded separately. | |
287 | 349 |
288 Returns: | 350 Returns: |
289 A tuple of (base_test_result.TestRunResults object, exit code). | 351 A tuple of (base_test_result.TestRunResults object, exit code). |
290 """ | 352 """ |
353 # Validation | |
frankf
2013/07/16 18:42:32
this is obvious. remove comment.
gkanwar
2013/07/16 20:27:36
Done.
| |
291 if not tests: | 354 if not tests: |
292 logging.error('No tests to run.') | 355 logging.error('No tests to run.') |
293 return (base_test_result.TestRunResults(), constants.ERROR_EXIT_CODE) | 356 return (base_test_result.TestRunResults(), constants.ERROR_EXIT_CODE) |
294 | 357 |
358 if not test_allocation in VALID_TEST_ALLOCATION: | |
359 logging.error('Unknown test allocation string %s. Options are: %s' | |
360 % (test_allocation, ', '.join(VALID_TEST_ALLOCATION))) | |
361 return (base_test_result.TestRunResults(), constants.ERROR_EXIT_CODE) | |
362 | |
363 if test_allocation == 'shard': | |
364 # Generate a shared _TestCollection object for all test runners, so they | |
365 # draw from a common pool of tests. | |
366 shared_test_collection = _TestCollection([_Test(t) for t in tests]) | |
367 test_collection_factory = lambda: shared_test_collection | |
368 tag_results_with_device = False | |
369 else: | |
370 # Generate a unique _TestCollection object for each test runner, but use | |
371 # the same set of tests. | |
372 test_collection_factory = lambda: _TestCollection([_Test(t) for t in tests]) | |
373 tag_results_with_device = True | |
374 | |
375 devices = _GetAttachedDevices(wait_for_debugger, test_device) | |
376 | |
295 logging.info('Will run %d tests: %s', len(tests), str(tests)) | 377 logging.info('Will run %d tests: %s', len(tests), str(tests)) |
378 | |
296 forwarder.Forwarder.KillHost(build_type) | 379 forwarder.Forwarder.KillHost(build_type) |
297 runners = _CreateRunners(runner_factory, devices, setup_timeout) | 380 runners = _CreateRunners(runner_factory, devices, setup_timeout) |
298 try: | 381 try: |
299 return _RunAllTests(runners, tests, num_retries, test_timeout) | 382 return _RunAllTests(runners, test_collection_factory, |
383 num_retries, test_timeout, tag_results_with_device) | |
300 finally: | 384 finally: |
301 try: | 385 try: |
302 _TearDownRunners(runners, setup_timeout) | 386 _TearDownRunners(runners, setup_timeout) |
303 except android_commands.errors.DeviceUnresponsiveError as e: | 387 except android_commands.errors.DeviceUnresponsiveError as e: |
304 logging.warning('Device unresponsive during TearDown: [%s]', e) | 388 logging.warning('Device unresponsive during TearDown: [%s]', e) |
305 finally: | 389 finally: |
306 forwarder.Forwarder.KillHost(build_type) | 390 forwarder.Forwarder.KillHost(build_type) |
OLD | NEW |