OLD | NEW |
1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 """An interface for holding state and result of revisions in a bisect job. | 5 """An interface for holding state and result of revisions in a bisect job. |
6 | 6 |
7 When implementing support for tests other than perf, one should extend this | 7 When implementing support for tests other than perf, one should extend this |
8 class so that the bisect module and recipe can use it. | 8 class so that the bisect module and recipe can use it. |
9 | 9 |
10 See perf_revision_state for an example. | 10 See perf_revision_state for an example. |
11 """ | 11 """ |
12 | 12 |
13 import hashlib | 13 import hashlib |
14 import json | 14 import json |
15 import math | 15 import math |
16 import os | 16 import os |
17 import tempfile | 17 import tempfile |
18 import re | 18 import re |
19 import uuid | 19 import uuid |
20 | 20 |
21 from . import depot_config | 21 from . import depot_config |
| 22 from exceptions import * |
22 | 23 |
23 # These relate to how to increase the number of repetitions during re-test | 24 # These relate to how to increase the number of repetitions during re-test |
24 MINIMUM_SAMPLE_SIZE = 5 | 25 MINIMUM_SAMPLE_SIZE = 5 |
25 INCREASE_FACTOR = 1.5 | 26 INCREASE_FACTOR = 1.5 |
26 # Buildbot job result codes. | 27 # Buildbot job result codes. |
27 # See http://docs.buildbot.net/current/developer/results.html | 28 # See http://docs.buildbot.net/current/developer/results.html |
28 SUCCESS, WARNINGS, FAILURE, SKIPPED, EXCEPTION = range(5) | 29 SUCCESS, WARNINGS, FAILURE, SKIPPED, EXCEPTION = range(5) |
29 | 30 |
30 class RevisionState(object): | 31 class RevisionState(object): |
31 """Abstracts the state of a single revision on a bisect job.""" | 32 """Abstracts the state of a single revision on a bisect job.""" |
32 | 33 |
33 # Possible values for the status attribute of RevisionState: | |
34 ( | |
35 NEW, # A revision_state object that has just been initialized. | |
36 BUILDING, # Requested a build for this revision, waiting for it. | |
37 TESTING, # A test job for this revision was triggered, waiting for it. | |
38 TESTED, # The test job completed with non-failing results. | |
39 FAILED, # Either the build or the test jobs failed or timed out. | |
40 ABORTED, # The build or test job was aborted. (For use in multi-secting). | |
41 SKIPPED, # A revision that was not built or tested for a special reason, | |
42 # such as those ranges that we know are broken, or when nudging | |
43 # revisions. | |
44 NEED_MORE_DATA, # Current number of test values is too small to establish | |
45 # a statistically significant difference between this | |
46 # revision and the revisions known to be good and bad. | |
47 ) = xrange(8) | |
48 | |
49 def __init__(self, bisector, commit_hash, depot_name=None, | 34 def __init__(self, bisector, commit_hash, depot_name=None, |
50 base_revision=None): | 35 base_revision=None): |
51 """Creates a new instance to track the state of a revision. | 36 """Creates a new instance to track the state of a revision. |
52 | 37 |
53 Args: | 38 Args: |
54 bisector (Bisector): The object performing the bisection. | 39 bisector (Bisector): The object performing the bisection. |
55 commit_hash (str): The hash identifying the revision to represent. | 40 commit_hash (str): The hash identifying the revision to represent. |
56 depot_name (str): The name of the depot as specified in DEPS. Must be a | 41 depot_name (str): The name of the depot as specified in DEPS. Must be a |
57 key in depot_config.DEPOT_DEPS_NAME . | 42 key in depot_config.DEPOT_DEPS_NAME . |
58 base_revision (RevisionState): The revision state to patch with the deps | 43 base_revision (RevisionState): The revision state to patch with the deps |
59 change. | 44 change. |
60 """ | 45 """ |
61 super(RevisionState, self).__init__() | 46 super(RevisionState, self).__init__() |
62 self.bisector = bisector | 47 self.bisector = bisector |
63 self._good = None | 48 self._good = None |
| 49 self.failed = False |
64 self.deps = None | 50 self.deps = None |
65 self.test_results_url = None | 51 self.test_results_url = None |
66 self.build_archived = False | 52 self.build_archived = False |
67 self.status = RevisionState.NEW | |
68 self.next_revision = None | 53 self.next_revision = None |
69 self.previous_revision = None | 54 self.previous_revision = None |
70 self.job_name = None | 55 self.job_name = None |
71 self.patch_file = None | 56 self.patch_file = None |
72 self.deps_revision = None | 57 self.deps_revision = None |
73 self.depot_name = depot_name or self.bisector.base_depot | 58 self.depot_name = depot_name or self.bisector.base_depot |
74 self.depot = depot_config.DEPOT_DEPS_NAME[self.depot_name] | 59 self.depot = depot_config.DEPOT_DEPS_NAME[self.depot_name] |
75 self.commit_hash = str(commit_hash) | 60 self.commit_hash = str(commit_hash) |
76 self._rev_str = None | 61 self._rev_str = None |
77 self.base_revision = base_revision | 62 self.base_revision = base_revision |
78 self.revision_overrides = {} | 63 self.revision_overrides = {} |
79 if self.base_revision: | 64 if self.base_revision: |
80 assert self.base_revision.deps_file_contents | 65 assert self.base_revision.deps_file_contents |
81 self.needs_patch = True | 66 self.needs_patch = True |
82 self.revision_overrides[self.depot['src']] = self.commit_hash | 67 self.revision_overrides[self.depot['src']] = self.commit_hash |
83 self.deps_patch, self.deps_file_contents = self.bisector.make_deps_patch( | 68 self.deps_patch, self.deps_file_contents = self.bisector.make_deps_patch( |
84 self.base_revision, self.base_revision.deps_file_contents, | 69 self.base_revision, self.base_revision.deps_file_contents, |
85 self.depot, self.commit_hash) | 70 self.depot, self.commit_hash) |
86 self.deps_sha = hashlib.sha1(self.deps_patch).hexdigest() | 71 self.deps_sha = hashlib.sha1(self.deps_patch).hexdigest() |
87 self.deps_sha_patch = self.bisector.make_deps_sha_file(self.deps_sha) | 72 self.deps_sha_patch = self.bisector.make_deps_sha_file(self.deps_sha) |
88 self.deps = dict(base_revision.deps) | 73 self.deps = dict(base_revision.deps) |
89 self.deps[self.depot_name] = self.commit_hash | 74 self.deps[self.depot_name] = self.commit_hash |
90 else: | 75 else: |
91 self.needs_patch = False | 76 self.needs_patch = False |
92 self.build_url = self.bisector.get_platform_gs_prefix() + self._gs_suffix() | 77 self.build_url = self.bisector.get_platform_gs_prefix() + self._gs_suffix() |
93 self.values = [] | 78 self.valueset_paths = [] |
94 self.mean_value = None | 79 self.chartjson_paths = [] |
95 self.overall_return_code = None | 80 self.debug_values = [] |
| 81 self.return_codes = [] |
| 82 self.mean = None |
96 self.std_dev = None | 83 self.std_dev = None |
97 self._test_config = None | 84 self._test_config = None |
98 self.build_number = None | 85 self.build_number = None |
99 | 86 |
100 if self.bisector.test_type == 'perf': | 87 if self.bisector.test_type == 'perf': |
101 self.repeat_count = MINIMUM_SAMPLE_SIZE | 88 self.repeat_count = MINIMUM_SAMPLE_SIZE |
102 else: | 89 else: |
103 self.repeat_count = self.bisector.bisect_config.get( | 90 self.repeat_count = self.bisector.bisect_config.get( |
104 'repeat_count', MINIMUM_SAMPLE_SIZE) | 91 'repeat_count', MINIMUM_SAMPLE_SIZE) |
105 | 92 |
106 @property | 93 @property |
107 def tested(self): | |
108 return self.status in (RevisionState.TESTED,) | |
109 | |
110 @property | |
111 def in_progress(self): | |
112 return self.status in (RevisionState.BUILDING, RevisionState.TESTING, | |
113 RevisionState.NEED_MORE_DATA) | |
114 | |
115 @property | |
116 def failed(self): | |
117 return self.status == RevisionState.FAILED | |
118 | |
119 @property | |
120 def aborted(self): | |
121 return self.status == RevisionState.ABORTED | |
122 | |
123 @property | |
124 def good(self): | 94 def good(self): |
125 return self._good == True | 95 return self._good == True |
126 | 96 |
127 @property | 97 @property |
128 def bad(self): | 98 def bad(self): |
129 return self._good == False | 99 return self._good == False |
130 | 100 |
131 @good.setter | 101 @good.setter |
132 def good(self, value): | 102 def good(self, value): |
133 self._good = value | 103 self._good = value |
134 | 104 |
135 @bad.setter | 105 @bad.setter |
136 def bad(self, value): | 106 def bad(self, value): |
137 self._good = not value | 107 self._good = not value |
138 | 108 |
| 109 @property |
| 110 def test_run_count(self): |
| 111 return max( |
| 112 len(self.valueset_paths), |
| 113 len(self.chartjson_paths), |
| 114 len(self.return_codes)) |
| 115 |
139 def start_job(self): | 116 def start_job(self): |
140 """Starts a build, or a test job if the build is available.""" | 117 try: |
141 if self.status == RevisionState.NEW and not self._is_build_archived(): | 118 if not self._is_build_archived(): |
142 self._request_build() | 119 self._request_build() |
143 self.status = RevisionState.BUILDING | 120 with self.bisector.api.m.step.nest('Waiting for build'): |
144 return | 121 while not self._is_build_archived(): |
| 122 self.api.m.python.inline( |
| 123 'sleeping', |
| 124 """ |
| 125 import sys |
| 126 import time |
| 127 time.sleep(20*60) |
| 128 sys.exit(0) |
| 129 """) |
| 130 if self._is_build_failed(): |
| 131 self.failed = True |
| 132 return |
145 | 133 |
146 if self._is_build_archived() and self.status in ( | |
147 RevisionState.NEW, RevisionState.BUILDING, | |
148 RevisionState.NEED_MORE_DATA): | |
149 self._do_test() | 134 self._do_test() |
150 self.status = RevisionState.TESTING | 135 while not self._check_revision_good(): |
| 136 min(self, self.bisector.lkgr, self.bisector.fkbr, |
| 137 key=lambda(x): x.test_run_count)._do_test() |
| 138 |
| 139 except UntestableRevisionException: |
| 140 if self.reference_range: |
| 141 # TODO(robertocn): Consider nudging the revision here. |
| 142 raise InconclusiveBisectException() |
151 | 143 |
152 def deps_change(self): | 144 def deps_change(self): |
153 """Uses `git show` to see if a given commit contains a DEPS change.""" | 145 """Uses `git show` to see if a given commit contains a DEPS change.""" |
154 # Avoid checking DEPS changes for dependency repo revisions. | 146 # Avoid checking DEPS changes for dependency repo revisions. |
155 # crbug.com/580681 | 147 # crbug.com/580681 |
156 if self.needs_patch: # pragma: no cover | 148 if self.needs_patch: # pragma: no cover |
157 return False | 149 return False |
158 api = self.bisector.api | 150 api = self.bisector.api |
159 working_dir = api.working_dir | 151 working_dir = api.working_dir |
160 cwd = working_dir.join( | 152 cwd = working_dir.join( |
161 depot_config.DEPOT_DEPS_NAME[self.depot_name]['src']) | 153 depot_config.DEPOT_DEPS_NAME[self.depot_name]['src']) |
162 name = 'Checking DEPS for ' + self.commit_hash | 154 name = 'Checking DEPS for ' + self.commit_hash |
163 step_result = api.m.git( | 155 step_result = api.m.git( |
164 'show', '--name-only', '--pretty=format:', | 156 'show', '--name-only', '--pretty=format:', |
165 self.commit_hash, cwd=cwd, stdout=api.m.raw_io.output(), name=name) | 157 self.commit_hash, cwd=cwd, stdout=api.m.raw_io.output(), name=name, |
166 if self.bisector.dummy_builds and not self.commit_hash.startswith('dcdc'): | 158 step_test_data=lambda: api._test_data['deps_change'][self.commit_hash] |
167 return False | 159 ) |
168 if 'DEPS' in step_result.stdout.splitlines(): # pragma: no cover | 160 if 'DEPS' in step_result.stdout.splitlines(): # pragma: no cover |
169 return True | 161 return True |
170 return False # pragma: no cover | 162 return False # pragma: no cover |
171 | 163 |
172 def _gen_deps_local_scope(self): | 164 def _gen_deps_local_scope(self): |
173 """Defines the Var and From functions in a dict for calling exec. | 165 """Defines the Var and From functions in a dict for calling exec. |
174 | 166 |
175 This is needed for executing the DEPS file. | 167 This is needed for executing the DEPS file. |
176 """ | 168 """ |
177 deps_data = { | 169 deps_data = { |
(...skipping 26 matching lines...) Expand all Loading... |
204 if not self.deps_file_contents: | 196 if not self.deps_file_contents: |
205 self.deps_file_contents = self._read_content( | 197 self.deps_file_contents = self._read_content( |
206 depot_config.DEPOT_DEPS_NAME[self.depot_name]['url'], | 198 depot_config.DEPOT_DEPS_NAME[self.depot_name]['url'], |
207 depot_config.DEPS_FILENAME, | 199 depot_config.DEPS_FILENAME, |
208 self.commit_hash) | 200 self.commit_hash) |
209 else: | 201 else: |
210 step_result = api.m.python( | 202 step_result = api.m.python( |
211 'fetch file %s:%s' % (self.commit_hash, depot_config.DEPS_FILENAME), | 203 'fetch file %s:%s' % (self.commit_hash, depot_config.DEPS_FILENAME), |
212 api.resource('fetch_file.py'), | 204 api.resource('fetch_file.py'), |
213 [depot_config.DEPS_FILENAME, '--commit', self.commit_hash], | 205 [depot_config.DEPS_FILENAME, '--commit', self.commit_hash], |
214 stdout=api.m.raw_io.output()) | 206 stdout=api.m.raw_io.output(), |
| 207 step_test_data=lambda: api._test_data['deps'][self.commit_hash] |
| 208 ) |
215 self.deps_file_contents = step_result.stdout | 209 self.deps_file_contents = step_result.stdout |
216 try: | 210 try: |
217 deps_data = self._gen_deps_local_scope() | 211 deps_data = self._gen_deps_local_scope() |
218 exec(self.deps_file_contents or 'deps = {}', {}, deps_data) | 212 exec (self.deps_file_contents or 'deps = {}') in {}, deps_data |
219 deps_data = deps_data['deps'] | 213 deps_data = deps_data['deps'] |
220 except ImportError: # pragma: no cover | 214 except ImportError: # pragma: no cover |
221 # TODO(robertocn): Implement manual parsing of DEPS when exec fails. | 215 # TODO(robertocn): Implement manual parsing of DEPS when exec fails. |
222 raise NotImplementedError('Path not implemented to manually parse DEPS') | 216 raise NotImplementedError('Path not implemented to manually parse DEPS') |
223 | 217 |
224 revision_regex = re.compile('.git@(?P<revision>[a-fA-F0-9]+)') | 218 revision_regex = re.compile('.git@(?P<revision>[a-fA-F0-9]+)') |
225 results = {} | 219 results = {} |
226 for depot_name, depot_data in depot_config.DEPOT_DEPS_NAME.iteritems(): | 220 for depot_name, depot_data in depot_config.DEPOT_DEPS_NAME.iteritems(): |
227 if (depot_data.get('platform') and | 221 if (depot_data.get('platform') and |
228 depot_data.get('platform') not in recipe_tester_name.lower()): | 222 depot_data.get('platform') not in recipe_tester_name.lower()): |
(...skipping 10 matching lines...) Expand all Loading... |
239 else: # pragma: no cover | 233 else: # pragma: no cover |
240 warning_text = ('Could not parse revision for %s while bisecting ' | 234 warning_text = ('Could not parse revision for %s while bisecting ' |
241 '%s' % (depot_name, self.depot)) | 235 '%s' % (depot_name, self.depot)) |
242 if warning_text not in self.bisector.warnings: | 236 if warning_text not in self.bisector.warnings: |
243 self.bisector.warnings.append(warning_text) | 237 self.bisector.warnings.append(warning_text) |
244 else: | 238 else: |
245 results[depot_name] = None | 239 results[depot_name] = None |
246 self.deps = results | 240 self.deps = results |
247 return | 241 return |
248 | 242 |
249 def update_status(self): | |
250 """Checks on the pending jobs and updates status accordingly. | |
251 | |
252 This method will check for the build to complete and then trigger the test, | |
253 or will wait for the test as appropriate. | |
254 | |
255 To wait for the test we try to get the buildbot job url from GS, and if | |
256 available, we query the status of such job. | |
257 """ | |
258 if self.status == RevisionState.BUILDING: | |
259 if self._is_build_archived(): | |
260 self.start_job() | |
261 elif self._is_build_failed(): | |
262 self.status = RevisionState.FAILED | |
263 elif (self.status in (RevisionState.TESTING, RevisionState.NEED_MORE_DATA) | |
264 and self._results_available()): | |
265 # If we have already decided whether the revision is good or bad we | |
266 # shouldn't check again | |
267 check_revision_goodness = not(self.good or self.bad) | |
268 self._read_test_results( | |
269 check_revision_goodness=check_revision_goodness) | |
270 # We assume _read_test_results may have changed the status to a broken | |
271 # state such as FAILED or ABORTED. | |
272 if self.status in (RevisionState.TESTING, RevisionState.NEED_MORE_DATA): | |
273 self.status = RevisionState.TESTED | |
274 | |
275 def _is_build_archived(self): | 243 def _is_build_archived(self): |
276 """Checks if the revision is already built and archived.""" | 244 """Checks if the revision is already built and archived.""" |
277 if not self.build_archived: | 245 if not self.build_archived: |
278 api = self.bisector.api | 246 api = self.bisector.api |
279 self.build_archived = api.gsutil_file_exists(self.build_url) | 247 self.build_archived = api.gsutil_file_exists(self.build_url) |
280 | 248 |
281 if self.bisector.dummy_builds: | 249 # This is meant to return false the first time is called, and true on later |
282 self.build_archived = self.in_progress | 250 # calls. This is so that expectations can cover both cases. |
| 251 if self.bisector.dummy_builds and not self.build_archived: |
| 252 self.build_archived = True |
| 253 return False |
283 | 254 |
284 return self.build_archived | 255 return self.build_archived |
285 | 256 |
286 def _fetch_build_info(self, base_url, build_number): | 257 def _fetch_build_info(self, base_url, build_number): |
287 api = self.bisector.api | 258 api = self.bisector.api |
288 build_url = '%s/builds/%s?as_text=1' % (base_url, build_number) | 259 build_url = '%s/builds/%s?as_text=1' % (base_url, build_number) |
289 fetch_result = api.m.url.fetch( build_url, step_name='fetch build details') | 260 fetch_result = api.m.url.fetch( build_url, step_name='fetch build details') |
290 return json.loads(fetch_result or '{}') | 261 return json.loads(fetch_result or '{}') |
291 | 262 |
| 263 # TODO(robertocn): Make this use buildbucket instead. |
292 def _is_build_failed(self): | 264 def _is_build_failed(self): |
293 api = self.bisector.api | 265 api = self.bisector.api |
294 current_build = None | 266 current_build = None |
295 path = 'json/builders/' + self.bisector.get_builder_bot_for_this_platform() | 267 path = 'json/builders/' + self.bisector.get_builder_bot_for_this_platform() |
296 base_url = api.m.properties.get('buildbotURL', 'http://localhost:8041/') | 268 base_url = api.m.properties.get('buildbotURL', 'http://localhost:8041/') |
297 base_url += path | 269 base_url += path |
298 if self.build_number is None: | 270 if self.build_number is None: |
299 try: | 271 try: |
300 # Get all the current builds. | 272 # Get all the current builds. |
301 builder_state_url = base_url + '?as_text=1' | 273 builder_state_url = base_url + '?as_text=1' |
(...skipping 12 matching lines...) Expand all Loading... |
314 # If we cannot get json from buildbot, we cannot determine if a build is | 286 # If we cannot get json from buildbot, we cannot determine if a build is |
315 # failed, hence we consider it in progress until it times out. | 287 # failed, hence we consider it in progress until it times out. |
316 return False | 288 return False |
317 if self.build_number is None: | 289 if self.build_number is None: |
318 # The build hasn't started yet, therefore it's not failed. | 290 # The build hasn't started yet, therefore it's not failed. |
319 return False | 291 return False |
320 if not current_build: | 292 if not current_build: |
321 current_build = self._fetch_build_info(base_url, self.build_number) | 293 current_build = self._fetch_build_info(base_url, self.build_number) |
322 return current_build.get('results') in [FAILURE, SKIPPED, EXCEPTION] | 294 return current_build.get('results') in [FAILURE, SKIPPED, EXCEPTION] |
323 | 295 |
324 def _results_available(self): | |
325 """Checks if the results for the test job have been uploaded.""" | |
326 api = self.bisector.api | |
327 result = api.gsutil_file_exists(self.test_results_url) | |
328 if self.bisector.dummy_builds: | |
329 return self.in_progress | |
330 return result # pragma: no cover | |
331 | |
332 def _gs_suffix(self): | 296 def _gs_suffix(self): |
333 """Provides the expected right half of the build filename. | 297 """Provides the expected right half of the build filename. |
334 | 298 |
335 This takes into account whether the build has a deps patch. | 299 This takes into account whether the build has a deps patch. |
336 """ | 300 """ |
337 top_revision = self | 301 top_revision = self |
338 while top_revision.base_revision: | 302 while top_revision.base_revision: |
339 top_revision = top_revision.base_revision | 303 top_revision = top_revision.base_revision |
340 name_parts = [top_revision.commit_hash] | 304 name_parts = [top_revision.commit_hash] |
341 if self.needs_patch: | 305 if self.needs_patch: |
342 name_parts.append(self.deps_sha) | 306 name_parts.append(self.deps_sha) |
343 return '%s.zip' % '_'.join(name_parts) | 307 return '%s.zip' % '_'.join(name_parts) |
344 | 308 |
345 def _read_test_results(self, check_revision_goodness=True): | 309 def _read_test_results(self, results): |
346 """Gets the test results from GS and checks if the rev is good or bad.""" | 310 # Results will be a dictionary containing path to chartjsons, paths to |
347 test_results = self._get_test_results() | 311 # valueset, list of return codes. |
348 # Results will contain the keys 'results' and 'output' where output is the | |
349 # stdout of the command, and 'results' is itself a dict with the key | |
350 # 'values' unless the test failed, in which case 'results' will contain | |
351 # the 'error' key explaining the type of error. | |
352 results = test_results['results'] | |
353 if results.get('errors'): | 312 if results.get('errors'): |
354 self.status = RevisionState.FAILED | 313 self.failed = True |
355 if 'MISSING_METRIC' in results.get('errors'): # pragma: no cover | 314 if 'MISSING_METRIC' in results.get('errors'): # pragma: no cover |
356 self.bisector.surface_result('MISSING_METRIC') | 315 self.bisector.surface_result('MISSING_METRIC') |
357 return | 316 raise UntestableRevisionException(results['errors']) |
358 self.values += results['values'] | 317 elif self.bisector.is_return_code_mode(): |
359 api = self.bisector.api | 318 assert len(results['retcodes']) |
360 if test_results.get('retcodes') and test_results['retcodes'][-1] != 0 and ( | 319 self.return_codes.extend(results['retcodes']) |
361 api.m.chromium.c.TARGET_PLATFORM == 'android'): #pragma: no cover | |
362 api.m.chromium_android.device_status() | |
363 current_connected_devices = api.m.chromium_android.devices | |
364 current_device = api.m.bisect_tester.device_to_test | |
365 if current_device not in current_connected_devices: | |
366 # We need to manually raise step failure here because we are catching | |
367 # them further down the line to enable return_code bisects and bisecting | |
368 # on benchmarks that are a little flaky. | |
369 raise api.m.step.StepFailure('Test device disconnected.') | |
370 if self.bisector.is_return_code_mode(): | |
371 retcodes = test_results['retcodes'] | |
372 self.overall_return_code = 0 if all(v == 0 for v in retcodes) else 1 | |
373 # Keeping mean_value for compatibility with dashboard. | |
374 # TODO(robertocn): refactor mean_value, specially when uploading results | |
375 # to dashboard. | |
376 self.mean_value = self.overall_return_code | |
377 elif self.values: | |
378 api = self.bisector.api | |
379 self.mean_value = api.m.math_utils.mean(self.values) | |
380 self.std_dev = api.m.math_utils.standard_deviation(self.values) | |
381 # Values were not found, but the test did not otherwise fail. | |
382 else: | 320 else: |
383 self.status = RevisionState.FAILED | 321 self.valueset_paths.extend(results.get('valueset_paths')) |
384 self.bisector.surface_result('MISSING_METRIC') | 322 self.chartjson_paths.extend(results.get('chartjson_paths')) |
385 return | |
386 # If we have already decided on the goodness of this revision, we shouldn't | |
387 # recheck it. | |
388 if self.good or self.bad: | |
389 check_revision_goodness = False | |
390 # We cannot test the goodness of the initial rev range. | |
391 if (self.bisector.good_rev != self and self.bisector.bad_rev != self and | |
392 check_revision_goodness): | |
393 if self._check_revision_good(): | |
394 self.good = True | |
395 else: | |
396 self.bad = True | |
397 | 323 |
398 def _request_build(self): | 324 def _request_build(self): |
399 """Posts a request to buildbot to build this revision and archive it.""" | 325 """Posts a request to buildbot to build this revision and archive it.""" |
400 api = self.bisector.api | 326 api = self.bisector.api |
401 bot_name = self.bisector.get_builder_bot_for_this_platform() | 327 bot_name = self.bisector.get_builder_bot_for_this_platform() |
402 | 328 |
403 # To allow multiple nested levels, we go to the topmost revision. | 329 # To allow multiple nested levels, we go to the topmost revision. |
404 top_revision = self | 330 top_revision = self |
405 while top_revision.base_revision: | 331 while top_revision.base_revision: |
406 top_revision = top_revision.base_revision | 332 top_revision = top_revision.base_revision |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
450 self._test_config = result | 376 self._test_config = result |
451 return result | 377 return result |
452 | 378 |
453 def _do_test(self): | 379 def _do_test(self): |
454 """Triggers tests for a revision, either locally or via try job. | 380 """Triggers tests for a revision, either locally or via try job. |
455 | 381 |
456 If local testing is enabled (i.e. director/tester merged) then | 382 If local testing is enabled (i.e. director/tester merged) then |
457 the test will be run on the same machine. Otherwise, this posts | 383 the test will be run on the same machine. Otherwise, this posts |
458 a request to buildbot to download and perf-test this build. | 384 a request to buildbot to download and perf-test this build. |
459 """ | 385 """ |
460 if self.bisector.bisect_config.get('dummy_job_names'): | 386 if self.test_run_count: |
461 self.job_name = self.commit_hash + '-test' | 387 self.repeat_count = max(MINIMUM_SAMPLE_SIZE, math.ceil( |
462 else: # pragma: no cover | 388 self.test_run_count * 1.5)) - len(self.test_run_count) |
463 self.job_name = uuid.uuid4().hex | 389 |
464 api = self.bisector.api | 390 api = self.bisector.api |
465 # Stores revision map for different repos eg, android-chrome, src, v8 etc. | 391 # Stores revision map for different repos eg, android-chrome, src, v8 etc. |
466 revision_ladder = {} | 392 revision_ladder = {} |
467 top_revision = self | 393 top_revision = self |
468 revision_ladder[top_revision.depot_name] = top_revision.commit_hash | 394 revision_ladder[top_revision.depot_name] = top_revision.commit_hash |
469 while top_revision.base_revision: # pragma: no cover | 395 while top_revision.base_revision: # pragma: no cover |
470 revision_ladder[top_revision.depot_name] = top_revision.commit_hash | 396 revision_ladder[top_revision.depot_name] = top_revision.commit_hash |
471 top_revision = top_revision.base_revision | 397 top_revision = top_revision.base_revision |
472 perf_test_properties = { | 398 perf_test_properties = { |
473 'builder_name': self.bisector.get_perf_tester_name(), | |
474 'properties': { | 399 'properties': { |
475 'revision': top_revision.commit_hash, | 400 'revision': top_revision.commit_hash, |
476 'parent_got_revision': top_revision.commit_hash, | 401 'parent_got_revision': top_revision.commit_hash, |
477 'parent_build_archive_url': self.build_url, | 402 'parent_build_archive_url': self.build_url, |
478 'bisect_config': self._get_bisect_config_for_tester(), | 403 'bisect_config': self._get_bisect_config_for_tester(), |
479 'job_name': self.job_name, | |
480 'revision_ladder': revision_ladder, | 404 'revision_ladder': revision_ladder, |
481 }, | 405 }, |
482 } | 406 } |
483 self.test_results_url = (self.bisector.api.GS_RESULTS_URL + | 407 self.bisector.last_tested_revision = self |
484 self.job_name + '.results') | 408 skip_download = self.bisector.last_tested_revision == self |
485 if (api.m.bisect_tester.local_test_enabled() or | 409 overrides = perf_test_properties['properties'] |
486 self.bisector.internal_bisect): # pragma: no cover | |
487 skip_download = self.bisector.last_tested_revision == self | |
488 self.bisector.last_tested_revision = self | |
489 overrides = perf_test_properties['properties'] | |
490 api.run_local_test_run(overrides, skip_download=skip_download) | |
491 else: | |
492 step_name = 'Triggering test job for ' + self.commit_hash | |
493 api.m.trigger(perf_test_properties, name=step_name) | |
494 | 410 |
495 def retest(self): # pragma: no cover | 411 def run_test_step_test_data(): |
496 # We need at least 5 samples for applying Mann-Whitney U test | 412 """Returns a single step data object when called. |
497 # with P < 0.01, two-tailed . | |
498 target_sample_size = max(5, math.ceil(len(self.values) * 1.5)) | |
499 self.status = RevisionState.NEED_MORE_DATA | |
500 self.repeat_count = target_sample_size - len(self.values) | |
501 self.start_job() | |
502 self.bisector.wait_for(self) | |
503 | 413 |
504 def _get_test_results(self): | 414 These are expected to be populated by the test_api. |
505 """Tries to get the results of a test job from cloud storage.""" | 415 """ |
506 api = self.bisector.api | 416 if (api._test_data['run_results'].get(self.commit_hash)): |
507 try: | 417 return api._test_data['run_results'][self.commit_hash].pop(0) |
508 stdout = api.m.raw_io.output() | 418 return api._test_data['run_results']['default'] |
509 name = 'Get test results for build ' + self.commit_hash | 419 |
510 step_result = api.m.gsutil.cat(self.test_results_url, stdout=stdout, | 420 |
511 name=name) | 421 self._read_test_results(api.run_local_test_run( |
512 if not step_result.stdout: | 422 overrides, skip_download=skip_download, |
513 raise api.m.step.StepFailure('Test for build %s failed' % | 423 step_test_data=run_test_step_test_data |
514 self.revision_string()) | 424 )) |
515 except api.m.step.StepFailure as sf: # pragma: no cover | |
516 self.bisector.surface_result('TEST_FAILURE') | |
517 return {'results': {'errors': str(sf)}} | |
518 else: | |
519 return json.loads(step_result.stdout) | |
520 | 425 |
521 def _check_revision_good(self): | 426 def _check_revision_good(self): |
522 """Determines if a revision is good or bad. | 427 """Determines if a revision is good or bad. |
523 | 428 |
524 Iteratively increment the sample size of the revision being tested, the last | 429 Returns: |
525 known good revision, and the first known bad revision until a relationship | 430 True if the revision is either good or bad, False if it cannot be |
526 of significant difference can be established betweeb the results of the | 431 determined from the available data. |
527 revision being tested and one of the other two. | 432 """ |
| 433 # Do not reclassify revisions. Important for reference range. |
| 434 if self.good or self.bad: |
| 435 return True |
528 | 436 |
529 If the results do not converge towards finding a significant difference in | |
530 either direction, this is expected to timeout eventually. This scenario | |
531 should be rather rare, since it is expected that the fkbr and lkgr are | |
532 significantly different as a precondition. | |
533 | |
534 Returns: | |
535 True if the results of testing this revision are significantly different | |
536 from those of testing the earliest known bad revision. | |
537 False if they are instead significantly different form those of testing | |
538 the latest knwon good revision. | |
539 """ | |
540 lkgr = self.bisector.lkgr | 437 lkgr = self.bisector.lkgr |
541 fkbr = self.bisector.fkbr | 438 fkbr = self.bisector.fkbr |
| 439 if self.bisector.is_return_code_mode(): |
| 440 if self.overall_return_code == lkgr.overall_return_code: |
| 441 self.good = True |
| 442 else: |
| 443 self.bad = True |
| 444 return True |
| 445 diff_from_good = self.bisector.compare_revisions(self, lkgr) |
| 446 diff_from_bad = self.bisector.compare_revisions(self, fkbr) |
| 447 if diff_from_good == False and diff_from_bad == False: |
| 448 # We have reached the max number of samples and have not established |
| 449 # difference, give up. |
| 450 raise InconclusiveBisectException() |
| 451 if diff_from_good and diff_from_bad: |
| 452 # Multiple regressions. |
| 453 # For now, proceed bisecting the biggest difference of the means. |
| 454 dist_from_good = abs(self.mean - lkgr.mean) |
| 455 dist_from_bad = abs(self.mean - fkbr.mean) |
| 456 if dist_from_good > dist_from_bad: |
| 457 # TODO(robertocn): Add way to handle the secondary regression |
| 458 #self.bisector.handle_secondary_regression(self, fkbr) |
| 459 self.bad = True |
| 460 return True |
| 461 else: |
| 462 #self.bisector.handle_secondary_regression(lkgr, self) |
| 463 self.good = True |
| 464 return True |
| 465 if diff_from_good: |
| 466 self.bad = True |
| 467 return True |
| 468 elif diff_from_bad: # pragma: no cover |
| 469 self.good = True |
| 470 return True |
| 471 return False |
542 | 472 |
543 if self.bisector.is_return_code_mode(): | |
544 return self.overall_return_code == lkgr.overall_return_code | |
545 | |
546 while True: | |
547 diff_from_good = self.bisector.significantly_different( | |
548 lkgr.values[:len(fkbr.values)], self.values) | |
549 diff_from_bad = self.bisector.significantly_different( | |
550 fkbr.values[:len(lkgr.values)], self.values) | |
551 | |
552 if diff_from_good and diff_from_bad: | |
553 # Multiple regressions. | |
554 # For now, proceed bisecting the biggest difference of the means. | |
555 dist_from_good = abs(self.mean_value - lkgr.mean_value) | |
556 dist_from_bad = abs(self.mean_value - fkbr.mean_value) | |
557 if dist_from_good > dist_from_bad: | |
558 # TODO(robertocn): Add way to handle the secondary regression | |
559 #self.bisector.handle_secondary_regression(self, fkbr) | |
560 return False | |
561 else: | |
562 #self.bisector.handle_secondary_regression(lkgr, self) | |
563 return True | |
564 | |
565 if diff_from_good or diff_from_bad: # pragma: no cover | |
566 return diff_from_bad | |
567 | |
568 self._next_retest() # pragma: no cover | |
569 | 473 |
570 def revision_string(self): | 474 def revision_string(self): |
571 if self._rev_str: | 475 if self._rev_str: |
572 return self._rev_str | 476 return self._rev_str |
573 result = '' | 477 result = '' |
574 if self.base_revision: # pragma: no cover | 478 if self.base_revision: # pragma: no cover |
575 result += self.base_revision.revision_string() + ',' | 479 result += self.base_revision.revision_string() + ',' |
576 commit = self.commit_hash[:10] | 480 commit = self.commit_hash[:10] |
577 if self.depot_name == 'chromium': | 481 if self.depot_name == 'chromium': |
578 try: | 482 try: |
579 commit = str(self.bisector.api.m.commit_position | 483 commit = str(self.bisector.api.m.commit_position |
580 .chromium_commit_position_from_hash(self.commit_hash)) | 484 .chromium_commit_position_from_hash(self.commit_hash)) |
581 except self.bisector.api.m.step.StepFailure: | 485 except self.bisector.api.m.step.StepFailure: |
582 pass # Failure to resolve a commit position is no reason to break. | 486 pass # Failure to resolve a commit position is no reason to break. |
583 result += '%s@%s' % (self.depot_name, commit) | 487 result += '%s@%s' % (self.depot_name, commit) |
584 self._rev_str = result | 488 self._rev_str = result |
585 return self._rev_str | 489 return self._rev_str |
586 | 490 |
587 def _next_retest(self): # pragma: no cover | 491 def __repr__(self): |
588 """Chooses one of current, lkgr, fkbr to retest. | 492 if not self.test_run_count: |
| 493 return ('RevisionState(rev=%s), values=[]' % self.revision_string()) |
| 494 if self.bisector.is_return_code_mode(): |
| 495 return ('RevisionState(rev=%s, mean=%r, overall_return_code=%r, ' |
| 496 'std_dev=%r)') % (self.revision_string(), self.mean, |
| 497 self.overall_return_code, self.std_dev) |
| 498 return ('RevisionState(rev=%s, mean_value=%r, std_dev=%r)' % ( |
| 499 self.revision_string(), self.mean, self.std_dev)) |
589 | 500 |
590 Look for the smallest sample and retest that. If the last tested revision | 501 @property |
591 is tied for the smallest sample, use that to take advantage of the fact | 502 def overall_return_code(self): |
592 that it is already downloaded and unzipped. | 503 if self.bisector.is_return_code_mode(): |
593 """ | 504 if self.return_codes: |
594 next_revision_to_test = min(self.bisector.lkgr, self, self.bisector.fkbr, | 505 if max(self.return_codes): |
595 key=lambda x: len(x.values)) | 506 return 1 |
596 if (len(self.bisector.last_tested_revision.values) == | 507 return 0 |
597 next_revision_to_test.values): | 508 raise ValueError('overall_return_code needs non-empty sample' |
598 self.bisector.last_tested_revision.retest() | 509 ) # pragma: no cover |
599 else: | 510 raise ValueError('overall_return_code only applies to return_code bisects' |
600 next_revision_to_test.retest() | 511 ) # pragma: no cover |
601 | |
602 def __repr__(self): | |
603 if self.overall_return_code is not None: | |
604 return ('RevisionState(rev=%s, values=%r, overall_return_code=%r, ' | |
605 'std_dev=%r)') % (self.revision_string(), self.values, | |
606 self.overall_return_code, self.std_dev) | |
607 return ('RevisionState(rev=%s, values=%r, mean_value=%r, std_dev=%r)' % ( | |
608 self.revision_string(), self.values, self.mean_value, self.std_dev)) | |
OLD | NEW |