Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 | 5 |
| 6 import logging | 6 import logging |
| 7 import multiprocessing | 7 import multiprocessing |
| 8 | 8 |
| 9 from pylib import android_commands | 9 from pylib import android_commands |
| 10 from pylib.base.test_result import TestResults | 10 from pylib.base.test_result import TestResults |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 47 self.attached_devices = attached_devices | 47 self.attached_devices = attached_devices |
| 48 # Worst case scenario: a device will drop offline per run, so we need | 48 # Worst case scenario: a device will drop offline per run, so we need |
| 49 # to retry until we're out of devices. | 49 # to retry until we're out of devices. |
| 50 | 50 |
| 51 # TODO(frankf): There are two sources of flakiness: | 51 # TODO(frankf): There are two sources of flakiness: |
| 52 # 1. Device flakiness | 52 # 1. Device flakiness |
| 53 # 2. Test/product flakiness | 53 # 2. Test/product flakiness |
| 54 # We should differentiate between these. Otherwise, blindly retrying tests | 54 # We should differentiate between these. Otherwise, blindly retrying tests |
| 55 # might mask test/product flakiness. For type 2, we should follow the | 55 # might mask test/product flakiness. For type 2, we should follow the |
| 56 # general chrome best practices. | 56 # general chrome best practices. |
| 57 self.retries = len(self.attached_devices) | 57 self.retries = 3 |
|
frankf
2013/02/14 03:42:15
Do you have some data why the tot bot was flaky? R
Isaac (away)
2013/02/14 06:38:50
I agree this code would be better if we distinguis
frankf
2013/02/14 06:43:48
Let's define it as a constant at the top of the mo
Isaac (away)
2013/02/14 06:53:10
Done.
Yaron
2013/02/14 16:59:27
It wasn't the bot that was flaky (it only had one
| |
| 58 self.tests = [] | 58 self.tests = [] |
| 59 self.build_type = build_type | 59 self.build_type = build_type |
| 60 | 60 |
| 61 def CreateShardedTestRunner(self, device, index): | 61 def CreateShardedTestRunner(self, device, index): |
| 62 """Factory function to create a suite-specific test runner. | 62 """Factory function to create a suite-specific test runner. |
| 63 | 63 |
| 64 Args: | 64 Args: |
| 65 device: Device serial where this shard will run | 65 device: Device serial where this shard will run |
| 66 index: Index of this device in the pool. | 66 index: Index of this device in the pool. |
| 67 | 67 |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 99 test_runners = [] | 99 test_runners = [] |
| 100 | 100 |
| 101 # Try to create N shards, and retrying on failure. | 101 # Try to create N shards, and retrying on failure. |
| 102 try: | 102 try: |
| 103 for index, device in enumerate(self.attached_devices): | 103 for index, device in enumerate(self.attached_devices): |
| 104 logging.warning('*' * 80) | 104 logging.warning('*' * 80) |
| 105 logging.warning('Creating shard %d for %s', index, device) | 105 logging.warning('Creating shard %d for %s', index, device) |
| 106 logging.warning('*' * 80) | 106 logging.warning('*' * 80) |
| 107 test_runner = self.CreateShardedTestRunner(device, index) | 107 test_runner = self.CreateShardedTestRunner(device, index) |
| 108 test_runners += [test_runner] | 108 test_runners += [test_runner] |
| 109 except android_commands.errors.DeviceUnresponsiveError as e: | 109 except android_commands.errors.DeviceUnresponsiveError as e: |
|
cjhopman
2013/02/14 03:06:34
What happens if 4 devices are offline? It appears
Isaac (away)
2013/02/14 03:17:46
attached_devices is initially set to only the onli
cjhopman
2013/02/14 15:37:07
I would be ok with just failing in that case, but
| |
| 110 logging.critical('****Failed to create a shard: [%s]', e) | 110 logging.critical('****Failed to create a shard: [%s]', e) |
| 111 self.attached_devices.remove(device) | 111 self.attached_devices.remove(device) |
| 112 continue | 112 continue |
| 113 | 113 |
| 114 logging.warning('Starting...') | 114 logging.warning('Starting...') |
| 115 pool = multiprocessing.Pool(len(self.attached_devices), | 115 pool = multiprocessing.Pool(len(self.attached_devices), |
| 116 SetTestsContainer, | 116 SetTestsContainer, |
| 117 [BaseTestSharder.tests_container]) | 117 [BaseTestSharder.tests_container]) |
| 118 # map can't handle KeyboardInterrupt exception. It's a python bug. | 118 # map can't handle KeyboardInterrupt exception. It's a python bug. |
| 119 # So use map_async instead. | 119 # So use map_async instead. |
| (...skipping 27 matching lines...) Expand all Loading... | |
| 147 for t in test_results.GetAllBroken(): | 147 for t in test_results.GetAllBroken(): |
| 148 self.tests += [t.name] | 148 self.tests += [t.name] |
| 149 if not self.tests: | 149 if not self.tests: |
| 150 break | 150 break |
| 151 else: | 151 else: |
| 152 # We ran out retries, possibly out of healthy devices. | 152 # We ran out retries, possibly out of healthy devices. |
| 153 # There's no recovery at this point. | 153 # There's no recovery at this point. |
| 154 raise Exception('Unrecoverable error while retrying test runs.') | 154 raise Exception('Unrecoverable error while retrying test runs.') |
| 155 self._KillHostForwarder() | 155 self._KillHostForwarder() |
| 156 return final_results | 156 return final_results |
| OLD | NEW |