OLD | NEW |
1 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 """Implements test sharding logic.""" | 5 """Implements test sharding logic.""" |
6 | 6 |
7 import logging | 7 import logging |
8 import threading | 8 import threading |
9 | 9 |
10 from pylib import android_commands | 10 from pylib import android_commands |
11 from pylib import forwarder | 11 from pylib import forwarder |
12 from pylib.utils import reraiser_thread | 12 from pylib.utils import reraiser_thread |
| 13 from pylib.utils import watchdog_timer |
13 | 14 |
14 import base_test_result | 15 import base_test_result |
15 | 16 |
16 | 17 |
| 18 DEFAULT_TIMEOUT = 7 * 60 # seven minutes |
| 19 |
| 20 |
17 class _ThreadSafeCounter(object): | 21 class _ThreadSafeCounter(object): |
18 """A threadsafe counter.""" | 22 """A threadsafe counter.""" |
| 23 |
19 def __init__(self): | 24 def __init__(self): |
20 self._lock = threading.Lock() | 25 self._lock = threading.Lock() |
21 self._value = 0 | 26 self._value = 0 |
22 | 27 |
23 def GetAndIncrement(self): | 28 def GetAndIncrement(self): |
24 """Get the current value and increment it atomically. | 29 """Get the current value and increment it atomically. |
25 | 30 |
26 Returns: | 31 Returns: |
27 The value before incrementing. | 32 The value before incrementing. |
28 """ | 33 """ |
29 with self._lock: | 34 with self._lock: |
30 pre_increment = self._value | 35 pre_increment = self._value |
31 self._value += 1 | 36 self._value += 1 |
32 return pre_increment | 37 return pre_increment |
33 | 38 |
34 | 39 |
35 class _Test(object): | 40 class _Test(object): |
36 """Holds a test with additional metadata.""" | 41 """Holds a test with additional metadata.""" |
| 42 |
37 def __init__(self, test, tries=0): | 43 def __init__(self, test, tries=0): |
38 """Initializes the _Test object. | 44 """Initializes the _Test object. |
39 | 45 |
40 Args: | 46 Args: |
41 test: the test. | 47 test: the test. |
42 tries: number of tries so far. | 48 tries: number of tries so far. |
43 """ | 49 """ |
44 self.test = test | 50 self.test = test |
45 self.tries = tries | 51 self.tries = tries |
46 | 52 |
47 | 53 |
48 class _TestCollection(object): | 54 class _TestCollection(object): |
49 """A threadsafe collection of tests. | 55 """A threadsafe collection of tests. |
50 | 56 |
51 Args: | 57 Args: |
52 tests: list of tests to put in the collection. | 58 tests: list of tests to put in the collection. |
53 """ | 59 """ |
| 60 |
54 def __init__(self, tests=[]): | 61 def __init__(self, tests=[]): |
55 self._lock = threading.Lock() | 62 self._lock = threading.Lock() |
56 self._tests = [] | 63 self._tests = [] |
57 self._tests_in_progress = 0 | 64 self._tests_in_progress = 0 |
58 # Used to signal that an item is avaliable or all items have been handled. | 65 # Used to signal that an item is avaliable or all items have been handled. |
59 self._item_avaliable_or_all_done = threading.Event() | 66 self._item_avaliable_or_all_done = threading.Event() |
60 for t in tests: | 67 for t in tests: |
61 self.add(t) | 68 self.add(t) |
62 | 69 |
63 def _pop(self): | 70 def _pop(self): |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
102 | 109 |
103 def __iter__(self): | 110 def __iter__(self): |
104 """Iterate through tests in the collection until all have been handled.""" | 111 """Iterate through tests in the collection until all have been handled.""" |
105 while True: | 112 while True: |
106 r = self._pop() | 113 r = self._pop() |
107 if r is None: | 114 if r is None: |
108 break | 115 break |
109 yield r | 116 yield r |
110 | 117 |
111 | 118 |
112 def _RunTestsFromQueue(runner, test_collection, out_results): | 119 def _RunTestsFromQueue(runner, test_collection, out_results, watcher): |
113 """Runs tests from the test_collection until empty using the given runner. | 120 """Runs tests from the test_collection until empty using the given runner. |
114 | 121 |
115 Adds TestRunResults objects to the out_results list and may add tests to the | 122 Adds TestRunResults objects to the out_results list and may add tests to the |
116 out_retry list. | 123 out_retry list. |
117 | 124 |
118 Args: | 125 Args: |
119 runner: A TestRunner object used to run the tests. | 126 runner: A TestRunner object used to run the tests. |
120 test_collection: A _TestCollection from which to get _Test objects to run. | 127 test_collection: A _TestCollection from which to get _Test objects to run. |
121 out_results: A list to add TestRunResults to. | 128 out_results: A list to add TestRunResults to. |
| 129 watcher: A watchdog_timer.WatchdogTimer object, used as a shared timeout. |
122 """ | 130 """ |
123 for test in test_collection: | 131 for test in test_collection: |
| 132 watcher.Reset() |
124 try: | 133 try: |
125 if not android_commands.IsDeviceAttached(runner.device): | 134 if not android_commands.IsDeviceAttached(runner.device): |
126 # Device is unresponsive, stop handling tests on this device. | 135 # Device is unresponsive, stop handling tests on this device. |
127 msg = 'Device %s is unresponsive.' % runner.device | 136 msg = 'Device %s is unresponsive.' % runner.device |
128 logging.warning(msg) | 137 logging.warning(msg) |
129 raise android_commands.errors.DeviceUnresponsiveError(msg) | 138 raise android_commands.errors.DeviceUnresponsiveError(msg) |
130 result, retry = runner.RunTest(test.test) | 139 result, retry = runner.RunTest(test.test) |
131 test.tries += 1 | 140 test.tries += 1 |
132 if retry and test.tries <= 3: | 141 if retry and test.tries <= 3: |
133 # Retry non-passing results, only record passing results. | 142 # Retry non-passing results, only record passing results. |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
171 try: | 180 try: |
172 index = threadsafe_counter.GetAndIncrement() | 181 index = threadsafe_counter.GetAndIncrement() |
173 logging.warning('*****Creating shard %s for device %s.', index, device) | 182 logging.warning('*****Creating shard %s for device %s.', index, device) |
174 runner = runner_factory(device, index) | 183 runner = runner_factory(device, index) |
175 runner.SetUp() | 184 runner.SetUp() |
176 out_runners.append(runner) | 185 out_runners.append(runner) |
177 except android_commands.errors.DeviceUnresponsiveError as e: | 186 except android_commands.errors.DeviceUnresponsiveError as e: |
178 logging.warning('****Failed to create shard for %s: [%s]', device, e) | 187 logging.warning('****Failed to create shard for %s: [%s]', device, e) |
179 | 188 |
180 | 189 |
181 def _RunAllTests(runners, tests): | 190 def _RunAllTests(runners, tests, timeout=None): |
182 """Run all tests using the given TestRunners. | 191 """Run all tests using the given TestRunners. |
183 | 192 |
184 Args: | 193 Args: |
185 runners: a list of TestRunner objects. | 194 runners: a list of TestRunner objects. |
186 tests: a list of Tests to run using the given TestRunners. | 195 tests: a list of Tests to run using the given TestRunners. |
| 196 timeout: watchdog timeout in seconds, defaults to the default timeout. |
187 | 197 |
188 Returns: | 198 Returns: |
189 A TestRunResults object. | 199 A TestRunResults object. |
190 """ | 200 """ |
191 logging.warning('****Running %s tests with %s test runners.' % | 201 logging.warning('****Running %s tests with %s test runners.' % |
192 (len(tests), len(runners))) | 202 (len(tests), len(runners))) |
193 tests_collection = _TestCollection([_Test(t) for t in tests]) | 203 tests_collection = _TestCollection([_Test(t) for t in tests]) |
194 results = [] | 204 results = [] |
195 workers = reraiser_thread.ReraiserThreadGroup([reraiser_thread.ReraiserThread( | 205 watcher = watchdog_timer.WatchdogTimer(timeout) |
196 _RunTestsFromQueue, [r, tests_collection, results]) for r in runners]) | 206 workers = reraiser_thread.ReraiserThreadGroup( |
| 207 [reraiser_thread.ReraiserThread(_RunTestsFromQueue, |
| 208 [r, tests_collection, results, watcher]) |
| 209 for r in runners]) |
197 workers.StartAll() | 210 workers.StartAll() |
198 workers.JoinAll() | 211 workers.JoinAll(watcher) |
199 run_results = base_test_result.TestRunResults() | 212 run_results = base_test_result.TestRunResults() |
200 for r in results: | 213 for r in results: |
201 run_results.AddTestRunResults(r) | 214 run_results.AddTestRunResults(r) |
202 return run_results | 215 return run_results |
203 | 216 |
204 | 217 |
205 def _CreateRunners(runner_factory, devices): | 218 def _CreateRunners(runner_factory, devices, timeout=None): |
206 """Creates a test runner for each device and calls SetUp() in parallel. | 219 """Creates a test runner for each device and calls SetUp() in parallel. |
207 | 220 |
208 Note: if a device is unresponsive the corresponding TestRunner will not be | 221 Note: if a device is unresponsive the corresponding TestRunner will not be |
209 included in the returned list. | 222 included in the returned list. |
210 | 223 |
211 Args: | 224 Args: |
212 runner_factory: callable that takes a device and index and returns a | 225 runner_factory: callable that takes a device and index and returns a |
213 TestRunner object. | 226 TestRunner object. |
214 devices: list of device serial numbers as strings. | 227 devices: list of device serial numbers as strings. |
| 228 timeout: watchdog timeout in seconds, defaults to the default timeout. |
215 | 229 |
216 Returns: | 230 Returns: |
217 A list of TestRunner objects. | 231 A list of TestRunner objects. |
218 """ | 232 """ |
219 logging.warning('****Creating %s test runners.' % len(devices)) | 233 logging.warning('****Creating %s test runners.' % len(devices)) |
220 runners = [] | 234 runners = [] |
221 counter = _ThreadSafeCounter() | 235 counter = _ThreadSafeCounter() |
222 threads = reraiser_thread.ReraiserThreadGroup( | 236 threads = reraiser_thread.ReraiserThreadGroup( |
223 [reraiser_thread.ReraiserThread(_SetUp, [runner_factory, d, runners, | 237 [reraiser_thread.ReraiserThread(_SetUp, [runner_factory, d, runners, |
224 counter]) | 238 counter]) |
225 for d in devices]) | 239 for d in devices]) |
226 threads.StartAll() | 240 threads.StartAll() |
227 threads.JoinAll() | 241 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout)) |
228 return runners | 242 return runners |
229 | 243 |
230 | 244 |
231 def _TearDownRunners(runners): | 245 def _TearDownRunners(runners, timeout=None): |
232 """Calls TearDown() for each test runner in parallel. | 246 """Calls TearDown() for each test runner in parallel. |
233 Args: | 247 Args: |
234 runners: a list of TestRunner objects. | 248 runners: a list of TestRunner objects. |
| 249 timeout: watchdog timeout in seconds, defaults to the default timeout. |
235 """ | 250 """ |
236 threads = reraiser_thread.ReraiserThreadGroup( | 251 threads = reraiser_thread.ReraiserThreadGroup( |
237 [reraiser_thread.ReraiserThread(runner.TearDown) | 252 [reraiser_thread.ReraiserThread(runner.TearDown) |
238 for runner in runners]) | 253 for runner in runners]) |
239 threads.StartAll() | 254 threads.StartAll() |
240 threads.JoinAll() | 255 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout)) |
241 | 256 |
242 | 257 |
243 def ShardAndRunTests(runner_factory, devices, tests, build_type='Debug'): | 258 def ShardAndRunTests(runner_factory, devices, tests, build_type='Debug', |
| 259 test_timeout=DEFAULT_TIMEOUT, |
| 260 setup_timeout=DEFAULT_TIMEOUT): |
244 """Run all tests on attached devices, retrying tests that don't pass. | 261 """Run all tests on attached devices, retrying tests that don't pass. |
245 | 262 |
246 Args: | 263 Args: |
247 runner_factory: callable that takes a device and index and returns a | 264 runner_factory: callable that takes a device and index and returns a |
248 TestRunner object. | 265 TestRunner object. |
249 devices: list of attached device serial numbers as strings. | 266 devices: list of attached device serial numbers as strings. |
250 tests: list of tests to run. | 267 tests: list of tests to run. |
251 build_type: either 'Debug' or 'Release'. | 268 build_type: either 'Debug' or 'Release'. |
| 269 test_timeout: watchdog timeout in seconds for running tests, defaults to the |
| 270 default timeout. |
| 271 setup_timeout: watchdog timeout in seconds for creating and cleaning up |
| 272 test runners, defaults to the default timeout. |
252 | 273 |
253 Returns: | 274 Returns: |
254 A base_test_result.TestRunResults object. | 275 A base_test_result.TestRunResults object. |
255 """ | 276 """ |
256 forwarder.Forwarder.KillHost(build_type) | 277 forwarder.Forwarder.KillHost(build_type) |
257 runners = _CreateRunners(runner_factory, devices) | 278 runners = _CreateRunners(runner_factory, devices, setup_timeout) |
258 try: | 279 try: |
259 return _RunAllTests(runners, tests) | 280 return _RunAllTests(runners, tests, test_timeout) |
260 finally: | 281 finally: |
261 try: | 282 try: |
262 _TearDownRunners(runners) | 283 _TearDownRunners(runners, setup_timeout) |
263 except android_commands.errors.DeviceUnresponsiveError as e: | 284 except android_commands.errors.DeviceUnresponsiveError as e: |
264 logging.warning('****Device unresponsive during TearDown: [%s]', e) | 285 logging.warning('****Device unresponsive during TearDown: [%s]', e) |
265 finally: | 286 finally: |
266 forwarder.Forwarder.KillHost(build_type) | 287 forwarder.Forwarder.KillHost(build_type) |
OLD | NEW |