OLD | NEW |
---|---|
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 | 5 |
6 import logging | 6 import logging |
7 import multiprocessing | 7 import multiprocessing |
8 | 8 |
9 from pylib import android_commands | 9 from pylib import android_commands |
10 from pylib.base.test_result import TestResults | 10 from pylib.base.test_result import TestResults |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
47 self.attached_devices = attached_devices | 47 self.attached_devices = attached_devices |
48 # Worst case scenario: a device will drop offline per run, so we need | 48 # Worst case scenario: a device will drop offline per run, so we need |
49 # to retry until we're out of devices. | 49 # to retry until we're out of devices. |
50 | 50 |
51 # TODO(frankf): There are two sources of flakiness: | 51 # TODO(frankf): There are two sources of flakiness: |
52 # 1. Device flakiness | 52 # 1. Device flakiness |
53 # 2. Test/product flakiness | 53 # 2. Test/product flakiness |
54 # We should differentiate between these. Otherwise, blindly retrying tests | 54 # We should differentiate between these. Otherwise, blindly retrying tests |
55 # might mask test/product flakiness. For type 2, we should follow the | 55 # might mask test/product flakiness. For type 2, we should follow the |
56 # general chrome best practices. | 56 # general chrome best practices. |
57 self.retries = len(self.attached_devices) | 57 self.retries = 3 |
frankf
2013/02/14 03:42:15
Do you have some data why the tot bot was flaky? R
Isaac (away)
2013/02/14 06:38:50
I agree this code would be better if we distinguis
frankf
2013/02/14 06:43:48
Let's define it as a constant at the top of the mo
Isaac (away)
2013/02/14 06:53:10
Done.
Yaron
2013/02/14 16:59:27
It wasn't the bot that was flaky (it only had one
| |
58 self.tests = [] | 58 self.tests = [] |
59 self.build_type = build_type | 59 self.build_type = build_type |
60 | 60 |
61 def CreateShardedTestRunner(self, device, index): | 61 def CreateShardedTestRunner(self, device, index): |
62 """Factory function to create a suite-specific test runner. | 62 """Factory function to create a suite-specific test runner. |
63 | 63 |
64 Args: | 64 Args: |
65 device: Device serial where this shard will run | 65 device: Device serial where this shard will run |
66 index: Index of this device in the pool. | 66 index: Index of this device in the pool. |
67 | 67 |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
99 test_runners = [] | 99 test_runners = [] |
100 | 100 |
101 # Try to create N shards, and retrying on failure. | 101 # Try to create N shards, and retrying on failure. |
102 try: | 102 try: |
103 for index, device in enumerate(self.attached_devices): | 103 for index, device in enumerate(self.attached_devices): |
104 logging.warning('*' * 80) | 104 logging.warning('*' * 80) |
105 logging.warning('Creating shard %d for %s', index, device) | 105 logging.warning('Creating shard %d for %s', index, device) |
106 logging.warning('*' * 80) | 106 logging.warning('*' * 80) |
107 test_runner = self.CreateShardedTestRunner(device, index) | 107 test_runner = self.CreateShardedTestRunner(device, index) |
108 test_runners += [test_runner] | 108 test_runners += [test_runner] |
109 except android_commands.errors.DeviceUnresponsiveError as e: | 109 except android_commands.errors.DeviceUnresponsiveError as e: |
cjhopman
2013/02/14 03:06:34
What happens if 4 devices are offline? It appears
Isaac (away)
2013/02/14 03:17:46
attached_devices is initially set to only the onli
cjhopman
2013/02/14 15:37:07
I would be ok with just failing in that case, but
| |
110 logging.critical('****Failed to create a shard: [%s]', e) | 110 logging.critical('****Failed to create a shard: [%s]', e) |
111 self.attached_devices.remove(device) | 111 self.attached_devices.remove(device) |
112 continue | 112 continue |
113 | 113 |
114 logging.warning('Starting...') | 114 logging.warning('Starting...') |
115 pool = multiprocessing.Pool(len(self.attached_devices), | 115 pool = multiprocessing.Pool(len(self.attached_devices), |
116 SetTestsContainer, | 116 SetTestsContainer, |
117 [BaseTestSharder.tests_container]) | 117 [BaseTestSharder.tests_container]) |
118 # map can't handle KeyboardInterrupt exception. It's a python bug. | 118 # map can't handle KeyboardInterrupt exception. It's a python bug. |
119 # So use map_async instead. | 119 # So use map_async instead. |
(...skipping 27 matching lines...) Expand all Loading... | |
147 for t in test_results.GetAllBroken(): | 147 for t in test_results.GetAllBroken(): |
148 self.tests += [t.name] | 148 self.tests += [t.name] |
149 if not self.tests: | 149 if not self.tests: |
150 break | 150 break |
151 else: | 151 else: |
152 # We ran out retries, possibly out of healthy devices. | 152 # We ran out retries, possibly out of healthy devices. |
153 # There's no recovery at this point. | 153 # There's no recovery at this point. |
154 raise Exception('Unrecoverable error while retrying test runs.') | 154 raise Exception('Unrecoverable error while retrying test runs.') |
155 self._KillHostForwarder() | 155 self._KillHostForwarder() |
156 return final_results | 156 return final_results |
OLD | NEW |