Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(199)

Side by Side Diff: build/android/pylib/base/shard.py

Issue 13590002: [Android] Print the stack of deadlocked threads when sharding. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: additional '*' * 80 to make frankf happy Created 7 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | build/android/pylib/base/shard_unittest.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright (c) 2013 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 """Implements test sharding logic.""" 5 """Implements test sharding logic."""
6 6
7 import logging 7 import logging
8 import threading 8 import threading
9 9
10 from pylib import android_commands 10 from pylib import android_commands
11 from pylib import forwarder 11 from pylib import forwarder
12 from pylib.utils import reraiser_thread 12 from pylib.utils import reraiser_thread
13 from pylib.utils import watchdog_timer
13 14
14 import base_test_result 15 import base_test_result
15 16
16 17
18 DEFAULT_TIMEOUT = 7 * 60 # seven minutes
19
20
17 class _ThreadSafeCounter(object): 21 class _ThreadSafeCounter(object):
18 """A threadsafe counter.""" 22 """A threadsafe counter."""
23
19 def __init__(self): 24 def __init__(self):
20 self._lock = threading.Lock() 25 self._lock = threading.Lock()
21 self._value = 0 26 self._value = 0
22 27
23 def GetAndIncrement(self): 28 def GetAndIncrement(self):
24 """Get the current value and increment it atomically. 29 """Get the current value and increment it atomically.
25 30
26 Returns: 31 Returns:
27 The value before incrementing. 32 The value before incrementing.
28 """ 33 """
29 with self._lock: 34 with self._lock:
30 pre_increment = self._value 35 pre_increment = self._value
31 self._value += 1 36 self._value += 1
32 return pre_increment 37 return pre_increment
33 38
34 39
35 class _Test(object): 40 class _Test(object):
36 """Holds a test with additional metadata.""" 41 """Holds a test with additional metadata."""
42
37 def __init__(self, test, tries=0): 43 def __init__(self, test, tries=0):
38 """Initializes the _Test object. 44 """Initializes the _Test object.
39 45
40 Args: 46 Args:
41 test: the test. 47 test: the test.
42 tries: number of tries so far. 48 tries: number of tries so far.
43 """ 49 """
44 self.test = test 50 self.test = test
45 self.tries = tries 51 self.tries = tries
46 52
47 53
48 class _TestCollection(object): 54 class _TestCollection(object):
49 """A threadsafe collection of tests. 55 """A threadsafe collection of tests.
50 56
51 Args: 57 Args:
52 tests: list of tests to put in the collection. 58 tests: list of tests to put in the collection.
53 """ 59 """
60
54 def __init__(self, tests=[]): 61 def __init__(self, tests=[]):
55 self._lock = threading.Lock() 62 self._lock = threading.Lock()
56 self._tests = [] 63 self._tests = []
57 self._tests_in_progress = 0 64 self._tests_in_progress = 0
58 # Used to signal that an item is avaliable or all items have been handled. 65 # Used to signal that an item is avaliable or all items have been handled.
59 self._item_avaliable_or_all_done = threading.Event() 66 self._item_avaliable_or_all_done = threading.Event()
60 for t in tests: 67 for t in tests:
61 self.add(t) 68 self.add(t)
62 69
63 def _pop(self): 70 def _pop(self):
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
102 109
103 def __iter__(self): 110 def __iter__(self):
104 """Iterate through tests in the collection until all have been handled.""" 111 """Iterate through tests in the collection until all have been handled."""
105 while True: 112 while True:
106 r = self._pop() 113 r = self._pop()
107 if r is None: 114 if r is None:
108 break 115 break
109 yield r 116 yield r
110 117
111 118
112 def _RunTestsFromQueue(runner, test_collection, out_results): 119 def _RunTestsFromQueue(runner, test_collection, out_results, watcher):
113 """Runs tests from the test_collection until empty using the given runner. 120 """Runs tests from the test_collection until empty using the given runner.
114 121
115 Adds TestRunResults objects to the out_results list and may add tests to the 122 Adds TestRunResults objects to the out_results list and may add tests to the
116 out_retry list. 123 out_retry list.
117 124
118 Args: 125 Args:
119 runner: A TestRunner object used to run the tests. 126 runner: A TestRunner object used to run the tests.
120 test_collection: A _TestCollection from which to get _Test objects to run. 127 test_collection: A _TestCollection from which to get _Test objects to run.
121 out_results: A list to add TestRunResults to. 128 out_results: A list to add TestRunResults to.
129 watcher: A watchdog_timer.WatchdogTimer object, used as a shared timeout.
122 """ 130 """
123 for test in test_collection: 131 for test in test_collection:
132 watcher.Reset()
124 try: 133 try:
125 if not android_commands.IsDeviceAttached(runner.device): 134 if not android_commands.IsDeviceAttached(runner.device):
126 # Device is unresponsive, stop handling tests on this device. 135 # Device is unresponsive, stop handling tests on this device.
127 msg = 'Device %s is unresponsive.' % runner.device 136 msg = 'Device %s is unresponsive.' % runner.device
128 logging.warning(msg) 137 logging.warning(msg)
129 raise android_commands.errors.DeviceUnresponsiveError(msg) 138 raise android_commands.errors.DeviceUnresponsiveError(msg)
130 result, retry = runner.RunTest(test.test) 139 result, retry = runner.RunTest(test.test)
131 test.tries += 1 140 test.tries += 1
132 if retry and test.tries <= 3: 141 if retry and test.tries <= 3:
133 # Retry non-passing results, only record passing results. 142 # Retry non-passing results, only record passing results.
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
171 try: 180 try:
172 index = threadsafe_counter.GetAndIncrement() 181 index = threadsafe_counter.GetAndIncrement()
173 logging.warning('*****Creating shard %s for device %s.', index, device) 182 logging.warning('*****Creating shard %s for device %s.', index, device)
174 runner = runner_factory(device, index) 183 runner = runner_factory(device, index)
175 runner.SetUp() 184 runner.SetUp()
176 out_runners.append(runner) 185 out_runners.append(runner)
177 except android_commands.errors.DeviceUnresponsiveError as e: 186 except android_commands.errors.DeviceUnresponsiveError as e:
178 logging.warning('****Failed to create shard for %s: [%s]', device, e) 187 logging.warning('****Failed to create shard for %s: [%s]', device, e)
179 188
180 189
181 def _RunAllTests(runners, tests): 190 def _RunAllTests(runners, tests, timeout=None):
182 """Run all tests using the given TestRunners. 191 """Run all tests using the given TestRunners.
183 192
184 Args: 193 Args:
185 runners: a list of TestRunner objects. 194 runners: a list of TestRunner objects.
186 tests: a list of Tests to run using the given TestRunners. 195 tests: a list of Tests to run using the given TestRunners.
196 timeout: watchdog timeout in seconds, defaults to the default timeout.
187 197
188 Returns: 198 Returns:
189 A TestRunResults object. 199 A TestRunResults object.
190 """ 200 """
191 logging.warning('****Running %s tests with %s test runners.' % 201 logging.warning('****Running %s tests with %s test runners.' %
192 (len(tests), len(runners))) 202 (len(tests), len(runners)))
193 tests_collection = _TestCollection([_Test(t) for t in tests]) 203 tests_collection = _TestCollection([_Test(t) for t in tests])
194 results = [] 204 results = []
195 workers = reraiser_thread.ReraiserThreadGroup([reraiser_thread.ReraiserThread( 205 watcher = watchdog_timer.WatchdogTimer(timeout)
196 _RunTestsFromQueue, [r, tests_collection, results]) for r in runners]) 206 workers = reraiser_thread.ReraiserThreadGroup(
207 [reraiser_thread.ReraiserThread(_RunTestsFromQueue,
208 [r, tests_collection, results, watcher])
209 for r in runners])
197 workers.StartAll() 210 workers.StartAll()
198 workers.JoinAll() 211 workers.JoinAll(watcher)
199 run_results = base_test_result.TestRunResults() 212 run_results = base_test_result.TestRunResults()
200 for r in results: 213 for r in results:
201 run_results.AddTestRunResults(r) 214 run_results.AddTestRunResults(r)
202 return run_results 215 return run_results
203 216
204 217
205 def _CreateRunners(runner_factory, devices): 218 def _CreateRunners(runner_factory, devices, timeout=None):
206 """Creates a test runner for each device and calls SetUp() in parallel. 219 """Creates a test runner for each device and calls SetUp() in parallel.
207 220
208 Note: if a device is unresponsive the corresponding TestRunner will not be 221 Note: if a device is unresponsive the corresponding TestRunner will not be
209 included in the returned list. 222 included in the returned list.
210 223
211 Args: 224 Args:
212 runner_factory: callable that takes a device and index and returns a 225 runner_factory: callable that takes a device and index and returns a
213 TestRunner object. 226 TestRunner object.
214 devices: list of device serial numbers as strings. 227 devices: list of device serial numbers as strings.
228 timeout: watchdog timeout in seconds, defaults to the default timeout.
215 229
216 Returns: 230 Returns:
217 A list of TestRunner objects. 231 A list of TestRunner objects.
218 """ 232 """
219 logging.warning('****Creating %s test runners.' % len(devices)) 233 logging.warning('****Creating %s test runners.' % len(devices))
220 runners = [] 234 runners = []
221 counter = _ThreadSafeCounter() 235 counter = _ThreadSafeCounter()
222 threads = reraiser_thread.ReraiserThreadGroup( 236 threads = reraiser_thread.ReraiserThreadGroup(
223 [reraiser_thread.ReraiserThread(_SetUp, [runner_factory, d, runners, 237 [reraiser_thread.ReraiserThread(_SetUp, [runner_factory, d, runners,
224 counter]) 238 counter])
225 for d in devices]) 239 for d in devices])
226 threads.StartAll() 240 threads.StartAll()
227 threads.JoinAll() 241 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout))
228 return runners 242 return runners
229 243
230 244
231 def _TearDownRunners(runners): 245 def _TearDownRunners(runners, timeout=None):
232 """Calls TearDown() for each test runner in parallel. 246 """Calls TearDown() for each test runner in parallel.
233 Args: 247 Args:
234 runners: a list of TestRunner objects. 248 runners: a list of TestRunner objects.
249 timeout: watchdog timeout in seconds, defaults to the default timeout.
235 """ 250 """
236 threads = reraiser_thread.ReraiserThreadGroup( 251 threads = reraiser_thread.ReraiserThreadGroup(
237 [reraiser_thread.ReraiserThread(runner.TearDown) 252 [reraiser_thread.ReraiserThread(runner.TearDown)
238 for runner in runners]) 253 for runner in runners])
239 threads.StartAll() 254 threads.StartAll()
240 threads.JoinAll() 255 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout))
241 256
242 257
243 def ShardAndRunTests(runner_factory, devices, tests, build_type='Debug'): 258 def ShardAndRunTests(runner_factory, devices, tests, build_type='Debug',
259 test_timeout=DEFAULT_TIMEOUT,
260 setup_timeout=DEFAULT_TIMEOUT):
244 """Run all tests on attached devices, retrying tests that don't pass. 261 """Run all tests on attached devices, retrying tests that don't pass.
245 262
246 Args: 263 Args:
247 runner_factory: callable that takes a device and index and returns a 264 runner_factory: callable that takes a device and index and returns a
248 TestRunner object. 265 TestRunner object.
249 devices: list of attached device serial numbers as strings. 266 devices: list of attached device serial numbers as strings.
250 tests: list of tests to run. 267 tests: list of tests to run.
251 build_type: either 'Debug' or 'Release'. 268 build_type: either 'Debug' or 'Release'.
269 test_timeout: watchdog timeout in seconds for running tests, defaults to the
270 default timeout.
271 setup_timeout: watchdog timeout in seconds for creating and cleaning up
272 test runners, defaults to the default timeout.
252 273
253 Returns: 274 Returns:
254 A base_test_result.TestRunResults object. 275 A base_test_result.TestRunResults object.
255 """ 276 """
256 forwarder.Forwarder.KillHost(build_type) 277 forwarder.Forwarder.KillHost(build_type)
257 runners = _CreateRunners(runner_factory, devices) 278 runners = _CreateRunners(runner_factory, devices, setup_timeout)
258 try: 279 try:
259 return _RunAllTests(runners, tests) 280 return _RunAllTests(runners, tests, test_timeout)
260 finally: 281 finally:
261 try: 282 try:
262 _TearDownRunners(runners) 283 _TearDownRunners(runners, setup_timeout)
263 except android_commands.errors.DeviceUnresponsiveError as e: 284 except android_commands.errors.DeviceUnresponsiveError as e:
264 logging.warning('****Device unresponsive during TearDown: [%s]', e) 285 logging.warning('****Device unresponsive during TearDown: [%s]', e)
265 finally: 286 finally:
266 forwarder.Forwarder.KillHost(build_type) 287 forwarder.Forwarder.KillHost(build_type)
OLDNEW
« no previous file with comments | « no previous file | build/android/pylib/base/shard_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698