build/android/pylib/base/shard.py - Issue 18323020: Updates the test runner script exit codes

Side by Side Diff: build/android/pylib/base/shard.py

Issue 18323020: Updates the test runner script exit codes (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright (c) 2013 The Chromium Authors. All rights reserved.	1 # Copyright (c) 2013 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 """Implements test sharding logic."""	5 """Implements test sharding logic."""

6	6

7 import logging	7 import logging

8 import threading	8 import threading

9	9

10 from pylib import android_commands	10 from pylib import android_commands

	11 from pylib import constants

11 from pylib import forwarder	12 from pylib import forwarder

12 from pylib.utils import reraiser_thread	13 from pylib.utils import reraiser_thread

13 from pylib.utils import watchdog_timer	14 from pylib.utils import watchdog_timer

14	15

15 import base_test_result	16 import base_test_result

16	17

17	18

18 DEFAULT_TIMEOUT = 7 * 60 # seven minutes	19 DEFAULT_TIMEOUT = 7 * 60 # seven minutes

19	20

20	21

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
85 try:	86 try:

86 return self._tests.pop(0)	87 return self._tests.pop(0)

87 except IndexError:	88 except IndexError:

88 # Another thread beat us to the avaliable test, wait again.	89 # Another thread beat us to the avaliable test, wait again.

89 self._item_avaliable_or_all_done.clear()	90 self._item_avaliable_or_all_done.clear()

90	91

91 def add(self, test):	92 def add(self, test):

92 """Add an test to the collection.	93 """Add an test to the collection.

93	94

94 Args:	95 Args:

95 item: A test to add.	96 test: A test to add.

96 """	97 """

97 with self._lock:	98 with self._lock:

98 self._tests.append(test)	99 self._tests.append(test)

99 self._item_avaliable_or_all_done.set()	100 self._item_avaliable_or_all_done.set()

100 self._tests_in_progress += 1	101 self._tests_in_progress += 1

101	102

102 def test_completed(self):	103 def test_completed(self):

103 """Indicate that a test has been fully handled."""	104 """Indicate that a test has been fully handled."""

104 with self._lock:	105 with self._lock:

105 self._tests_in_progress -= 1	106 self._tests_in_progress -= 1

106 if self._tests_in_progress == 0:	107 if self._tests_in_progress == 0:

107 # All tests have been handled, signal all waiting threads.	108 # All tests have been handled, signal all waiting threads.

108 self._item_avaliable_or_all_done.set()	109 self._item_avaliable_or_all_done.set()

109	110

110 def __iter__(self):	111 def __iter__(self):

111 """Iterate through tests in the collection until all have been handled."""	112 """Iterate through tests in the collection until all have been handled."""

112 while True:	113 while True:

113 r = self._pop()	114 r = self._pop()

114 if r is None:	115 if r is None:

115 break	116 break

116 yield r	117 yield r

117	118

118	119

119 def _RunTestsFromQueue(runner, test_collection, out_results, watcher,	120 def _RunTestsFromQueue(runner, test_collection, out_results, watcher,

120 num_retries):	121 num_retries):

121 """Runs tests from the test_collection until empty using the given runner.	122 """Runs tests from the test_collection until empty using the given runner.

122	123

123 Adds TestRunResults objects to the out_results list and may add tests to the	124 Adds TestRunResults objects to the out_results list and may add tests to the

124 out_retry list.	125 out_retry list.

125	126

126 Args:	127 Args:

127 runner: A TestRunner object used to run the tests.	128 runner: A TestRunner object used to run the tests.

128 test_collection: A _TestCollection from which to get _Test objects to run.	129 test_collection: A _TestCollection from which to get _Test objects to run.

129 out_results: A list to add TestRunResults to.	130 out_results: A list to add TestRunResults to.

130 watcher: A watchdog_timer.WatchdogTimer object, used as a shared timeout.	131 watcher: A watchdog_timer.WatchdogTimer object, used as a shared timeout.

(...skipping 12 matching lines...) Expand all Loading...
143 if retry and test.tries <= num_retries:	144 if retry and test.tries <= num_retries:

144 # Retry non-passing results, only record passing results.	145 # Retry non-passing results, only record passing results.

145 pass_results = base_test_result.TestRunResults()	146 pass_results = base_test_result.TestRunResults()

146 pass_results.AddResults(result.GetPass())	147 pass_results.AddResults(result.GetPass())

147 out_results.append(pass_results)	148 out_results.append(pass_results)

148 logging.warning('Will retry test, try #%s.' % test.tries)	149 logging.warning('Will retry test, try #%s.' % test.tries)

149 test_collection.add(_Test(test=retry, tries=test.tries))	150 test_collection.add(_Test(test=retry, tries=test.tries))

150 else:	151 else:

151 # All tests passed or retry limit reached. Either way, record results.	152 # All tests passed or retry limit reached. Either way, record results.

152 out_results.append(result)	153 out_results.append(result)

153 except android_commands.errors.DeviceUnresponsiveError:

154 # Device is unresponsive, stop handling tests on this device and ensure

155 # current test gets runs by another device. Don't reraise this exception

156 # on the main thread.

157 test_collection.add(test)
frankf 2013/07/03 21:26:37 This decreases robustness. Currently, if we catch This decreases robustness. Currently, if we catch this exception, we add the tests back to queue to get retried. gkanwar 2013/07/03 23:15:18 I'm not sure I fully understand -- is this decreas Show quoted text On 2013/07/03 21:26:37, frankf wrote: > This decreases robustness. Currently, if we catch this exception, we add the > tests back to queue to get retried. I'm not sure I fully understand -- is this decreasing robustness by not adding the tests back to the queue to be retried? The except clause below should catch this exception and do that. The only change is whether we re-raise or silently ignore the exception. frankf 2013/07/04 00:46:19 Ah, it makes sense. On 2013/07/03 23:15:18, gkanw Ah, it makes sense. On 2013/07/03 23:15:18, gkanwar wrote: Show quoted text > On 2013/07/03 21:26:37, frankf wrote: > > This decreases robustness. Currently, if we catch this exception, we add the > > tests back to queue to get retried. > > I'm not sure I fully understand -- is this decreasing robustness by not adding > the tests back to the queue to be retried? The except clause below should catch > this exception and do that. The only change is whether we re-raise or silently > ignore the exception.
158 return

159 except:	154 except:

160 # An unhandleable exception, ensure tests get run by another device and	155 # An unhandleable exception, ensure tests get run by another device and

161 # reraise this exception on the main thread.	156 # reraise this exception on the main thread.

162 test_collection.add(test)	157 test_collection.add(test)

163 raise	158 raise

164 finally:	159 finally:

165 # Retries count as separate tasks so always mark the popped test as done.	160 # Retries count as separate tasks so always mark the popped test as done.

166 test_collection.test_completed()	161 test_collection.test_completed()

167	162

168	163

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
205 (len(tests), len(runners)))	200 (len(tests), len(runners)))

206 tests_collection = _TestCollection([_Test(t) for t in tests])	201 tests_collection = _TestCollection([_Test(t) for t in tests])

207 results = []	202 results = []

208 watcher = watchdog_timer.WatchdogTimer(timeout)	203 watcher = watchdog_timer.WatchdogTimer(timeout)

209 workers = reraiser_thread.ReraiserThreadGroup(	204 workers = reraiser_thread.ReraiserThreadGroup(

210 [reraiser_thread.ReraiserThread(	205 [reraiser_thread.ReraiserThread(

211 _RunTestsFromQueue,	206 _RunTestsFromQueue,

212 [r, tests_collection, results, watcher, num_retries],	207 [r, tests_collection, results, watcher, num_retries],

213 name=r.device[-4:])	208 name=r.device[-4:])

214 for r in runners])	209 for r in runners])

	210 run_results = base_test_result.TestRunResults()

215 workers.StartAll()	211 workers.StartAll()

216 workers.JoinAll(watcher)	212

217 run_results = base_test_result.TestRunResults()	213 # Catch DeviceUnresponsiveErrors and set a warning exit code

	214 try:

	215 workers.JoinAll(watcher)

	216 except android_commands.errors.DeviceUnresponsiveError as e:

	217 run_results.exit_code = constants.WARNING_EXIT_CODE
	frankf 2013/07/03 21:26:37 Perhaps a singleton object that flags this excepti Perhaps a singleton object that flags this exception happened is cleaner in this case, than propagating the exit_code through the results object. If we're returning a warning code, we should also do logging.error() of which device is unresponsive. gkanwar 2013/07/03 23:15:18 That's a good point. Updated to use a singleton in Show quoted text On 2013/07/03 21:26:37, frankf wrote: > Perhaps a singleton object that flags this exception happened is cleaner in this > case, than propagating the exit_code through the results object. If we're > returning a warning code, we should also do logging.error() of which device is > unresponsive. That's a good point. Updated to use a singleton instead. Logging added when we set this exit code here.
	218

218 for r in results:	219 for r in results:

219 run_results.AddTestRunResults(r)	220 run_results.AddTestRunResults(r)

220 return run_results	221 return run_results

221	222

222	223

223 def _CreateRunners(runner_factory, devices, timeout=None):	224 def _CreateRunners(runner_factory, devices, timeout=None):

224 """Creates a test runner for each device and calls SetUp() in parallel.	225 """Creates a test runner for each device and calls SetUp() in parallel.

225	226

226 Note: if a device is unresponsive the corresponding TestRunner will not be	227 Note: if a device is unresponsive the corresponding TestRunner will not be

227 included in the returned list.	228 included in the returned list.

(...skipping 15 matching lines...) Expand all Loading...
243 [runner_factory, d, runners, counter],	244 [runner_factory, d, runners, counter],

244 name=d[-4:])	245 name=d[-4:])

245 for d in devices])	246 for d in devices])

246 threads.StartAll()	247 threads.StartAll()

247 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout))	248 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout))

248 return runners	249 return runners

249	250

250	251

251 def _TearDownRunners(runners, timeout=None):	252 def _TearDownRunners(runners, timeout=None):

252 """Calls TearDown() for each test runner in parallel.	253 """Calls TearDown() for each test runner in parallel.

	254

253 Args:	255 Args:

254 runners: a list of TestRunner objects.	256 runners: a list of TestRunner objects.

255 timeout: watchdog timeout in seconds, defaults to the default timeout.	257 timeout: watchdog timeout in seconds, defaults to the default timeout.

256 """	258 """

257 threads = reraiser_thread.ReraiserThreadGroup(	259 threads = reraiser_thread.ReraiserThreadGroup(

258 [reraiser_thread.ReraiserThread(r.TearDown, name=r.device[-4:])	260 [reraiser_thread.ReraiserThread(r.TearDown, name=r.device[-4:])

259 for r in runners])	261 for r in runners])

260 threads.StartAll()	262 threads.StartAll()

261 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout))	263 threads.JoinAll(watchdog_timer.WatchdogTimer(timeout))

262	264

(...skipping 28 matching lines...) Expand all Loading...
291 runners = _CreateRunners(runner_factory, devices, setup_timeout)	293 runners = _CreateRunners(runner_factory, devices, setup_timeout)

292 try:	294 try:

293 return _RunAllTests(runners, tests, num_retries, test_timeout)	295 return _RunAllTests(runners, tests, num_retries, test_timeout)

294 finally:	296 finally:

295 try:	297 try:

296 _TearDownRunners(runners, setup_timeout)	298 _TearDownRunners(runners, setup_timeout)

297 except android_commands.errors.DeviceUnresponsiveError as e:	299 except android_commands.errors.DeviceUnresponsiveError as e:

298 logging.warning('Device unresponsive during TearDown: [%s]', e)	300 logging.warning('Device unresponsive during TearDown: [%s]', e)

299 finally:	301 finally:

300 forwarder.Forwarder.KillHost(build_type)	302 forwarder.Forwarder.KillHost(build_type)

OLD	NEW