Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(157)

Side by Side Diff: build/android/pylib/base_test_sharder.py

Issue 11275078: Android: improves test sharding reliability. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Unrecoverable error Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | build/android/run_tests.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 5
6 import android_commands 6 import android_commands
7 import logging 7 import logging
8 import multiprocessing 8 import multiprocessing
9 9
10 from android_commands import errors
10 from test_result import TestResults 11 from test_result import TestResults
11 12
12 13
13 def _ShardedTestRunnable(test): 14 def _ShardedTestRunnable(test):
14 """Standalone function needed by multiprocessing.Pool.""" 15 """Standalone function needed by multiprocessing.Pool."""
15 log_format = '[' + test.device + '] # %(asctime)-15s: %(message)s' 16 log_format = '[' + test.device + '] # %(asctime)-15s: %(message)s'
16 if logging.getLogger().handlers: 17 if logging.getLogger().handlers:
17 logging.getLogger().handlers[0].setFormatter(logging.Formatter(log_format)) 18 logging.getLogger().handlers[0].setFormatter(logging.Formatter(log_format))
18 else: 19 else:
19 logging.basicConfig(format=log_format) 20 logging.basicConfig(format=log_format)
(...skipping 16 matching lines...) Expand all
36 """Base class for sharding tests across multiple devices. 37 """Base class for sharding tests across multiple devices.
37 38
38 Args: 39 Args:
39 attached_devices: A list of attached devices. 40 attached_devices: A list of attached devices.
40 """ 41 """
41 # See more in SetTestsContainer. 42 # See more in SetTestsContainer.
42 tests_container = None 43 tests_container = None
43 44
44 def __init__(self, attached_devices): 45 def __init__(self, attached_devices):
45 self.attached_devices = attached_devices 46 self.attached_devices = attached_devices
46 self.retries = 1 47 # Worst case scenario: a device will drop offline per run, so we need
48 # to retry until we're out of devices.
49 self.retries = len(self.attached_devices)
47 self.tests = [] 50 self.tests = []
48 51
49 def CreateShardedTestRunner(self, device, index): 52 def CreateShardedTestRunner(self, device, index):
50 """Factory function to create a suite-specific test runner. 53 """Factory function to create a suite-specific test runner.
51 54
52 Args: 55 Args:
53 device: Device serial where this shard will run 56 device: Device serial where this shard will run
54 index: Index of this device in the pool. 57 index: Index of this device in the pool.
55 58
56 Returns: 59 Returns:
(...skipping 19 matching lines...) Expand all
76 logging.warning('Sharding in ' + str(len(self.attached_devices)) + 79 logging.warning('Sharding in ' + str(len(self.attached_devices)) +
77 ' devices.') 80 ' devices.')
78 logging.warning('Note that the output is not synchronized.') 81 logging.warning('Note that the output is not synchronized.')
79 logging.warning('Look for the "Final result" banner in the end.') 82 logging.warning('Look for the "Final result" banner in the end.')
80 logging.warning('*' * 80) 83 logging.warning('*' * 80)
81 final_results = TestResults() 84 final_results = TestResults()
82 for retry in xrange(self.retries): 85 for retry in xrange(self.retries):
83 logging.warning('Try %d of %d', retry + 1, self.retries) 86 logging.warning('Try %d of %d', retry + 1, self.retries)
84 self.SetupSharding(self.tests) 87 self.SetupSharding(self.tests)
85 test_runners = [] 88 test_runners = []
86 for index, device in enumerate(self.attached_devices): 89
87 logging.warning('*' * 80) 90 # Try to create N shards, and retrying on failure.
88 logging.warning('Creating shard %d for %s', index, device) 91 try:
89 logging.warning('*' * 80) 92 for index, device in enumerate(self.attached_devices):
90 test_runner = self.CreateShardedTestRunner(device, index) 93 logging.warning('*' * 80)
91 test_runners += [test_runner] 94 logging.warning('Creating shard %d for %s', index, device)
95 logging.warning('*' * 80)
96 test_runner = self.CreateShardedTestRunner(device, index)
97 test_runners += [test_runner]
98 except errors.DeviceUnresponsiveError as e:
99 logging.critical('****Failed to create a shard: [%s]', e)
100 self.attached_devices.remove(device)
101 continue
102
92 logging.warning('Starting...') 103 logging.warning('Starting...')
93 pool = multiprocessing.Pool(len(self.attached_devices), 104 pool = multiprocessing.Pool(len(self.attached_devices),
94 SetTestsContainer, 105 SetTestsContainer,
95 [BaseTestSharder.tests_container]) 106 [BaseTestSharder.tests_container])
96 # map can't handle KeyboardInterrupt exception. It's a python bug. 107 # map can't handle KeyboardInterrupt exception. It's a python bug.
97 # So use map_async instead. 108 # So use map_async instead.
98 async_results = pool.map_async(_ShardedTestRunnable, test_runners) 109 async_results = pool.map_async(_ShardedTestRunnable, test_runners)
99 results_lists = async_results.get(999999) 110 try:
100 111 results_lists = async_results.get(999999)
112 except errors.DeviceUnresponsiveError as e:
113 logging.critical('****Failed to run test: [%s]', e)
114 self.attached_devices = android_commands.GetAttachedDevices()
115 continue
yongsheng 2012/11/01 02:22:49 If one device raises an exception, this might bloc
bulach 2012/11/01 11:41:06 hmm, sorry, I'm not sure what do you mean by "bloc
yongsheng 2012/11/02 00:55:11 you're right. That's what i mean: one exception is
101 test_results = TestResults.FromTestResults(results_lists) 116 test_results = TestResults.FromTestResults(results_lists)
102 # Re-check the attached devices for some devices may 117 # Re-check the attached devices for some devices may
103 # become offline 118 # become offline
104 retry_devices = set(android_commands.GetAttachedDevices()) 119 retry_devices = set(android_commands.GetAttachedDevices())
105 # Remove devices that had exceptions. 120 # Remove devices that had exceptions.
106 retry_devices -= TestResults.DeviceExceptions(results_lists) 121 retry_devices -= TestResults.DeviceExceptions(results_lists)
107 # Retry on devices that didn't have any exception. 122 # Retry on devices that didn't have any exception.
108 self.attached_devices = list(retry_devices) 123 self.attached_devices = list(retry_devices)
109 if (retry == self.retries - 1 or 124 if (retry == self.retries - 1 or
110 len(self.attached_devices) == 0): 125 len(self.attached_devices) == 0):
111 all_passed = final_results.ok + test_results.ok 126 all_passed = final_results.ok + test_results.ok
112 final_results = test_results 127 final_results = test_results
113 final_results.ok = all_passed 128 final_results.ok = all_passed
114 break 129 break
115 else: 130 else:
116 final_results.ok += test_results.ok 131 final_results.ok += test_results.ok
117 self.tests = [] 132 self.tests = []
118 for t in test_results.GetAllBroken(): 133 for t in test_results.GetAllBroken():
119 self.tests += [t.name] 134 self.tests += [t.name]
120 if not self.tests: 135 if not self.tests:
121 break 136 break
137 else:
138 # We ran out retries, possibly out of healthy devices.
139 # There's no recovery at this point.
140 raise Exception('Unrecoverable error while retrying test runs.')
122 self.OnTestsCompleted(test_runners, final_results) 141 self.OnTestsCompleted(test_runners, final_results)
123 return final_results 142 return final_results
OLDNEW
« no previous file with comments | « no previous file | build/android/run_tests.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698