Chromium Code Reviews| Index: scripts/slave/recipe_modules/auto_bisect/bisector.py |
| diff --git a/scripts/slave/recipe_modules/auto_bisect/bisector.py b/scripts/slave/recipe_modules/auto_bisect/bisector.py |
| index cd46eb187fbc2b818e67509b6c6a9350f9519313..c613d1be59539e72a907300714849a20cc0ebd7b 100644 |
| --- a/scripts/slave/recipe_modules/auto_bisect/bisector.py |
| +++ b/scripts/slave/recipe_modules/auto_bisect/bisector.py |
| @@ -7,9 +7,9 @@ import re |
| import time |
| import urllib |
| -from . import config_validation |
| -from . import depot_config |
| -from . import revision_state |
| +from auto_bisect import config_validation |
| +from auto_bisect import depot_config |
| +from auto_bisect import revision_state |
| _DEPS_SHA_PATCH = """ |
| diff --git DEPS.sha DEPS.sha |
| @@ -47,13 +47,6 @@ DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25 |
| # How long to re-test the initial good-bad range for until significant |
| # difference is established. |
| REGRESSION_CHECK_TIMEOUT = 20 * 60 * 60 # 20 hours. A build times out after 24. |
| -# If we reach this number of samples on the reference range and have not |
| -# achieved statistical significance, bail. |
| -MAX_REQUIRED_SAMPLES = 15 |
| - |
| -# Significance level to use for determining difference between revisions via |
| -# hypothesis testing. |
| -SIGNIFICANCE_LEVEL = 0.01 |
| _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = ( |
| 'The metric values for the initial "good" and "bad" revisions ' |
| @@ -155,7 +148,9 @@ class Bisector(object): |
| commit_position = self._api.m.commit_position.construct( |
| branch='refs/heads/master', value=rev) |
| try: |
| - return self._api.m.crrev.to_commit_hash(commit_position) |
| + return self._api.m.crrev.to_commit_hash( |
| + commit_position, |
| + step_test_data=lambda: self.api._test_data['hash_cp_map'][rev]) |
| except self.api.m.step.StepFailure: # pragma: no cover |
| self.surface_result('BAD_REV') |
| raise |
| @@ -167,37 +162,39 @@ class Bisector(object): |
| def _is_sha1(s): |
| return bool(re.match('^[0-9A-Fa-f]{40}$', s)) |
| - def significantly_different( |
| - self, list_a, list_b, |
| - significance_level=SIGNIFICANCE_LEVEL): # pragma: no cover |
| - """Uses an external script to run hypothesis testing with scipy. |
| - |
| - The reason why we need an external script is that scipy is not available to |
| - the default python installed in all platforms. We instead rely on an |
| - anaconda environment to provide those packages. |
| - |
| - Args: |
| - list_a, list_b: Two lists representing samples to be compared. |
| - significance_level: Self-describing. As a decimal fraction. |
| - |
| + def compare_revisions(self, revision_a, revision_b): |
| + """ |
| Returns: |
| - A boolean indicating whether the null hypothesis ~(that the lists are |
| - samples from the same population) can be rejected at the specified |
| - significance level. |
| + SIGNIFICANTLY_DIFFERENT if the samples are significantly different. |
| + NEED_MORE_DATA if there is not enough data to tell. |
| + NOT_SIGNIFICANTLY_DIFFERENT if if there's enough data but still can't |
| + tell the samples apart. |
| """ |
| - step_result = self.api.m.python( |
| - 'Checking sample difference', |
| - self.api.resource('significantly_different.py'), |
| - [json.dumps(list_a), json.dumps(list_b), str(significance_level)], |
| - stdout=self.api.m.json.output()) |
| - results = step_result.stdout |
| - if results is None: |
| - assert self.dummy_builds |
| - return True |
| - significantly_different = results['significantly_different'] |
| - step_result.presentation.logs[str(significantly_different)] = [ |
| - 'See json.output for details'] |
| - return significantly_different |
| + output_format = 'chartjson' |
| + values_a = revision_a.chartjson_paths |
| + values_b = revision_b.chartjson_paths |
| + if revision_a.valueset_paths and revision_b.valueset_paths: |
| + output_format = 'valueset' |
| + values_a = revision_a.valueset_paths |
| + values_b = revision_b.valueset_paths |
| + |
| + result = self.api.stat_compare( |
| + values_a, |
| + values_b, |
| + self.bisect_config['metric'], |
| + output_format=output_format, |
| + step_test_data=lambda: self.api.test_api.compare_samples_data( |
| + self.api._test_data.get('revision_data'), revision_a, revision_b)) |
| + |
| + revision_a.debug_values = result['sample_a']['debug_values'] |
| + revision_b.debug_values = result['sample_b']['debug_values'] |
| + |
| + if result['result'] == revision_state.NEED_MORE_DATA: |
| + return revision_state.NEED_MORE_DATA |
| + elif result['result']: |
| + return revision_state.SIGNIFICANTLY_DIFFERENT |
| + else: # pragma: no cover |
| + return revision_state.NOT_SIGNIFICANTLY_DIFFERENT |
| def config_step(self): |
| """Yields a step that prints the bisect config.""" |
| @@ -235,8 +232,8 @@ class Bisector(object): |
| return self._api |
| def compute_relative_change(self): |
| - old_value = float(self.good_rev.mean_value) |
| - new_value = float(self.bad_rev.mean_value) |
| + old_value = float(self.good_rev.mean or 0) |
| + new_value = float(self.bad_rev.mean or 0) |
| if new_value and not old_value: # pragma: no cover |
| self.relative_change = ZERO_TO_NON_ZERO |
| @@ -245,145 +242,26 @@ class Bisector(object): |
| rel_change = self.api.m.math_utils.relative_change(old_value, new_value) |
| self.relative_change = '%.2f%%' % (100 * rel_change) |
| - def make_deps_sha_file(self, deps_sha): |
| - """Make a diff patch that creates DEPS.sha. |
| - |
| - Args: |
| - deps_sha (str): The hex digest of a SHA1 hash of the diff that patches |
| - DEPS. |
| - |
| - Returns: |
| - A string containing a git diff. |
| - """ |
| - return _DEPS_SHA_PATCH % {'deps_sha': deps_sha} |
| - |
| - def _git_intern_file(self, file_contents, cwd, commit_hash): |
| - """Writes a file to the git database and produces its git hash. |
| - |
| - Args: |
| - file_contents (str): The contents of the file to be hashed and interned. |
| - cwd (recipe_config_types.Path): Path to the checkout whose repository the |
| - file is to be written to. |
| - commit_hash (str): An identifier for the step. |
| - |
| - Returns: |
| - A string containing the hash of the interned object. |
| - """ |
| - cmd = 'hash-object -t blob -w --stdin'.split(' ') |
| - stdin = self.api.m.raw_io.input(file_contents) |
| - stdout = self.api.m.raw_io.output() |
| - step_name = 'Hashing modified DEPS file with revision ' + commit_hash |
| - step_result = self.api.m.git(*cmd, cwd=cwd, stdin=stdin, stdout=stdout, |
| - name=step_name) |
| - hash_string = step_result.stdout.splitlines()[0] |
| - try: |
| - if hash_string: |
| - int(hash_string, 16) |
| - return hash_string |
| - except ValueError: # pragma: no cover |
| - reason = 'Git did not output a valid hash for the interned file.' |
| - self.api.m.halt(reason) |
| - raise self.api.m.step.StepFailure(reason) |
| - |
| - def _gen_diff_patch(self, git_object_a, git_object_b, src_alias, dst_alias, |
| - cwd, deps_rev): |
| - """Produces a git diff patch. |
| - |
| - Args: |
| - git_object_a (str): Tree-ish git object identifier. |
| - git_object_b (str): Another tree-ish git object identifier. |
| - src_alias (str): Label to replace the tree-ish identifier on |
| - the resulting diff patch. (git_object_a) |
| - dst_alias (str): Same as above for (git_object_b) |
| - cwd (recipe_config_types.Path): Path to the checkout whose repo contains |
| - the objects to be compared. |
| - deps_rev (str): Deps revision to identify the patch generating step. |
| - |
| - Returns: |
| - A string containing the diff patch as produced by the 'git diff' command. |
| - """ |
| - # The prefixes used in the command below are used to find and replace the |
| - # tree-ish git object id's on the diff output more easily. |
| - cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:' |
| - cmd %= (git_object_a, git_object_b) |
| - cmd = cmd.split(' ') |
| - stdout = self.api.m.raw_io.output() |
| - step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev) |
| - step_result = self.api.m.git(*cmd, cwd=cwd, stdout=stdout, name=step_name) |
| - patch_text = step_result.stdout |
| - src_string = 'IAMSRC:' + git_object_a |
| - dst_string = 'IAMDST:' + git_object_b |
| - patch_text = patch_text.replace(src_string, src_alias) |
| - patch_text = patch_text.replace(dst_string, dst_alias) |
| - return patch_text |
| - |
| - def make_deps_patch(self, base_revision, base_file_contents, |
| - depot, new_commit_hash): |
| - """Make a diff patch that updates a specific dependency revision. |
| - |
| - Args: |
| - base_revision (RevisionState): The revision for which the DEPS file is to |
| - be patched. |
| - base_file_contents (str): The contents of the original DEPS file. |
| - depot (str): The dependency to modify. |
| - new_commit_hash (str): The revision to put in place of the old one. |
| - |
| - Returns: |
| - A pair containing the git diff patch that updates DEPS, and the |
| - full text of the modified DEPS file, both as strings. |
| - """ |
| - original_contents = str(base_file_contents) |
| - patched_contents = str(original_contents) |
| - |
| - # Modify DEPS. |
| - deps_var = depot['deps_var'] |
| - deps_item_regexp = re.compile( |
| - r'(?<=["\']%s["\']: ["\'])([a-fA-F0-9]+)(?=["\'])' % deps_var, |
| - re.MULTILINE) |
| - if not re.search(deps_item_regexp, original_contents): # pragma: no cover |
| - reason = 'DEPS file does not contain entry for ' + deps_var |
| - self.api.m.halt(reason) |
| - raise self.api.m.step.StepFailure(reason) |
| - |
| - patched_contents = re.sub(deps_item_regexp, new_commit_hash, |
| - original_contents) |
| - |
| - cwd = self.api.working_dir.join( |
| - depot_config.DEPOT_DEPS_NAME[base_revision.depot_name]['src']) |
| - deps_file = 'DEPS' |
| - # This is to support backward compatibility on android-chrome because |
| - # .DEPS.git got migrated to DEPS on April 5, 2016 |
| - if (base_revision.depot_name == 'android-chrome' and |
| - self.api.m.path.exists(self.api.m.path.join(cwd, '.DEPS.git'))): |
| - deps_file = '.DEPS.git' # pragma: no cover |
| - |
| - interned_deps_hash = self._git_intern_file( |
| - patched_contents, cwd, new_commit_hash) |
| - |
| - patch_text = self._gen_diff_patch( |
| - '%s:%s' % (base_revision.commit_hash, deps_file), |
| - interned_deps_hash, deps_file, deps_file, |
| - cwd=cwd, |
| - deps_rev=new_commit_hash) |
| - return patch_text, patched_contents |
| - |
| def _expand_initial_revision_range(self): |
| """Sets the initial contents of |self.revisions|.""" |
| with self.api.m.step.nest('Expanding revision range'): |
| good_hash = self.good_rev.commit_hash |
| bad_hash = self.bad_rev.commit_hash |
| + depot = self.good_rev.depot_name |
| step_name = 'for revisions %s:%s' % (good_hash, bad_hash) |
| revisions = self._revision_range( |
| start=good_hash, |
| end=bad_hash, |
| depot_name=self.base_depot, |
| step_name=step_name, |
| - exclude_end=True) |
| + exclude_end=True, |
| + step_test_data=lambda: self.api._test_data['revision_list'][depot] |
|
dtu
2016/09/13 23:57:27
How do you decide whether to pass step_test_data h
RobertoCN
2016/09/21 22:22:48
I also do not like this. Ideally gsutil (for examp
|
| + ) |
| self.revisions = [self.good_rev] + revisions + [self.bad_rev] |
| self._update_revision_list_indexes() |
| def _revision_range(self, start, end, depot_name, base_revision=None, |
| - step_name=None, exclude_end=False): |
| + step_name=None, exclude_end=False, **kwargs): |
| """Returns a list of RevisionState objects between |start| and |end|. |
| When expanding the initial revision range we want to exclude the last |
| @@ -412,7 +290,7 @@ class Bisector(object): |
| step_name, |
| self.api.resource('fetch_intervening_revisions.py'), |
| [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']], |
| - stdout=self.api.m.json.output()) |
| + stdout=self.api.m.json.output(), **kwargs) |
| except self.api.m.step.StepFailure: # pragma: no cover |
| self.surface_result('BAD_REV') |
| raise |
| @@ -521,7 +399,9 @@ class Bisector(object): |
| end=dep_revision_max, |
| depot_name=depot_name, |
| base_revision=min_revision, |
| - step_name=step_name) |
| + step_name=step_name, |
| + step_test_data=lambda: |
| + self.api._test_data['revision_list'][depot_name]) |
| new_revisions = self.revisions[:max_revision.list_index] |
| new_revisions += rev_list |
| new_revisions += self.revisions[max_revision.list_index:] |
| @@ -558,8 +438,8 @@ class Bisector(object): |
| True if the check passes (i.e. no problem), False if the change is not |
| a regression according to the improvement direction. |
| """ |
| - good = self.good_rev.mean_value |
| - bad = self.bad_rev.mean_value |
| + good = self.good_rev.mean |
| + bad = self.bad_rev.mean |
| if self.is_return_code_mode(): |
| return True |
| @@ -594,32 +474,32 @@ class Bisector(object): |
| other in a statistically significant manner. False if such difference could |
| not be established in the time or sample size allowed. |
| """ |
| - if self.test_type == 'return_code': |
| + if self.is_return_code_mode(): |
| return (self.good_rev.overall_return_code != |
| self.bad_rev.overall_return_code) |
| if self.bypass_stats_check: |
| - dummy_result = self.good_rev.values != self.bad_rev.values |
| + self.compare_revisions(self.good_rev, self.bad_rev) |
| + dummy_result = self.good_rev.mean != self.bad_rev.mean |
| if not dummy_result: |
| self._set_insufficient_confidence_warning() |
| return dummy_result |
| + # TODO(robertocn): This step should not be necessary in some cases. |
| with self.api.m.step.nest('Re-testing reference range'): |
| expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT |
| while time.time() < expiration_time: |
| - if len(self.good_rev.values) >= 5 and len(self.bad_rev.values) >= 5: |
| - if self.significantly_different(self.good_rev.values, |
| - self.bad_rev.values): |
| + if (self.good_rev.test_run_count >= 5 |
| + and self.bad_rev.test_run_count >= 5): |
| + if self.compare_revisions(self.good_rev, self.bad_rev): |
| return True |
| - if len(self.good_rev.values) == len(self.bad_rev.values): |
| + if self.good_rev.test_run_count == self.bad_rev.test_run_count: |
| revision_to_retest = self.last_tested_revision |
| else: |
| revision_to_retest = min(self.good_rev, self.bad_rev, |
| - key=lambda x: len(x.values)) |
| - if len(revision_to_retest.values) < MAX_REQUIRED_SAMPLES: |
| - revision_to_retest.retest() |
| - else: |
| - break |
| + key=lambda x: x.test_run_count) |
| + revision_to_retest._do_test() |
| + |
| self._set_insufficient_confidence_warning() |
| return False |
| @@ -641,14 +521,18 @@ class Bisector(object): |
| result = 'bisector.lkgr: %r\n' % self.lkgr |
| result += 'bisector.fkbr: %r\n\n' % self.fkbr |
| result += self._revision_value_table() |
| - if (self.lkgr and self.lkgr.values and self.fkbr and self.fkbr.values): |
| - result += '\n' + self._t_test_results() |
| + if (self.lkgr and self.lkgr.test_run_count and self.fkbr and |
| + self.fkbr.test_run_count): |
| + result += '\n' + '\n'.join([ |
| + 'LKGR values: %r' % list(self.lkgr.debug_values), |
| + 'FKBR values: %r' % list(self.fkbr.debug_values), |
| + ]) |
| return result |
| def _revision_value_table(self): |
| """Returns a string table showing revisions and their values.""" |
| header = [['Revision', 'Values']] |
| - rows = [[r.revision_string(), str(r.values)] for r in self.revisions] |
| + rows = [[r.revision_string(), str(r.debug_values)] for r in self.revisions] |
| return self._pretty_table(header + rows) |
| def _pretty_table(self, data): |
| @@ -657,20 +541,6 @@ class Bisector(object): |
| results.append('%-15s' * len(row) % tuple(row)) |
| return '\n'.join(results) |
| - def _t_test_results(self): |
| - """Returns a string showing t-test results for lkgr and fkbr.""" |
| - t, df, p = self.api.m.math_utils.welchs_t_test( |
| - self.lkgr.values, self.fkbr.values) |
| - lines = [ |
| - 'LKGR values: %r' % self.lkgr.values, |
| - 'FKBR values: %r' % self.fkbr.values, |
| - 't-statistic: %r' % t, |
| - 'deg. of freedom: %r' % df, |
| - 'p-value: %r' % p, |
| - 'Confidence score: %r' % (100 * (1 - p)) |
| - ] |
| - return '\n'.join(lines) |
| - |
| def print_result_debug_info(self): |
| """Prints extra debug info at the end of the bisect process.""" |
| lines = self._results_debug_message().splitlines() |
| @@ -695,7 +565,7 @@ class Bisector(object): |
| candidate_range = [revision for revision in |
| self.revisions[self.lkgr.list_index + 1: |
| self.fkbr.list_index] |
| - if not revision.tested and not revision.failed] |
| + if not revision.failed] |
| if len(candidate_range) == 1: |
| return candidate_range[0] |
| if len(candidate_range) == 0: |
| @@ -738,7 +608,7 @@ class Bisector(object): |
| does not contain a DEPS change. |
| """ |
| if (revision.bad and revision.previous_revision and |
| - revision.previous_revision.good): # pragma: no cover |
| + revision.previous_revision.good): |
| if revision.deps_change() and self._expand_deps_revisions(revision): |
| return False |
| self.culprit = revision |
| @@ -750,32 +620,9 @@ class Bisector(object): |
| return False |
| self.culprit = revision.next_revision |
| return True |
| - return False |
| - |
| - def wait_for_all(self, revision_list): |
| - """Waits for all revisions in list to finish.""" |
| - for r in revision_list: |
| - self.wait_for(r) |
| - |
| - def wait_for(self, revision, nest_check=True): |
| - """Waits for the revision to finish its job.""" |
| - if nest_check and not self.flags.get( |
| - 'do_not_nest_wait_for_revision'): # pragma: no cover |
| - with self.api.m.step.nest('Waiting for ' + revision.revision_string()): |
| - return self.wait_for(revision, nest_check=False) |
| - while True: |
| - revision.update_status() |
| - if revision.in_progress: |
| - self.api.m.python.inline( |
| - 'sleeping', |
| - """ |
| - import sys |
| - import time |
| - time.sleep(20*60) |
| - sys.exit(0) |
| - """) |
| - else: |
| - break |
| + # We'll never get here because revision adjacency is checked before this |
| + # function is called. |
| + assert False # pragma: no cover |
| def _update_candidate_range(self): |
| """Updates lkgr and fkbr (last known good/first known bad) revisions. |
| @@ -783,7 +630,7 @@ class Bisector(object): |
| lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in |
| bisectors.revisions.""" |
| for r in self.revisions: |
| - if r.tested: |
| + if r.test_run_count: |
| if r.good: |
| self.lkgr = r |
| elif r.bad: |
| @@ -821,7 +668,7 @@ class Bisector(object): |
| # TODO(prasadv): Refactor this code to remove hard coded values and use |
| # target_bit from the bot config. crbug.com/640287 |
| - if 'android' in bot_name: |
| + if 'android' in bot_name: # pragma: no cover |
| if any(b in bot_name for b in ['arm64', 'nexus9', 'nexus5X']): |
| return 'android_arm64_perf_bisect_builder' |
| return 'android_perf_bisect_builder' |
| @@ -851,7 +698,7 @@ class Bisector(object): |
| # TODO(prasadv): Refactor this code to remove hard coded values and use |
| # target_bit from the bot config. crbug.com/640287 |
| - if 'android' in bot_name: |
| + if 'android' in bot_name: #pragma: no cover |
| if any(b in bot_name for b in ['arm64', 'nexus9', 'nexus5X']): |
| return 'gs://chrome-perf/Android arm64 Builder/full-build-linux_' |
| return 'gs://chrome-perf/Android Builder/full-build-linux_' |
| @@ -876,7 +723,7 @@ class Bisector(object): |
| def is_return_code_mode(self): |
| """Checks whether this is a bisect on the test's exit code.""" |
| - return self.bisect_config.get('test_type') == 'return_code' |
| + return self.test_type == 'return_code' |
| def surface_result(self, result_string): |
| assert result_string in VALID_RESULT_CODES |
| @@ -891,10 +738,6 @@ class Bisector(object): |
| def get_result(self): |
| """Returns the results as a jsonable object.""" |
| config = self.bisect_config |
| - results_confidence = 0 |
| - if self.culprit: |
| - results_confidence = self.api.m.math_utils.confidence_score( |
| - self.lkgr.values, self.fkbr.values) |
| if self.failed: |
| status = 'failed' |
| @@ -916,9 +759,8 @@ class Bisector(object): |
| 'bisect_bot': self.get_perf_tester_name(), |
| 'command': config['command'], |
| 'test_type': config['test_type'], |
| - 'metric': config['metric'], |
| + 'metric': config.get('metric'), |
| 'change': self.relative_change, |
| - 'score': results_confidence, |
| 'good_revision': self.good_rev.commit_hash, |
| 'bad_revision': self.bad_rev.commit_hash, |
| 'warnings': self.warnings, |
| @@ -950,14 +792,14 @@ class Bisector(object): |
| def _revision_data(self): |
| revision_rows = [] |
| for r in self.revisions: |
| - if r.tested or r.aborted: |
| + if r.test_run_count: |
| revision_rows.append({ |
| 'depot_name': r.depot_name, |
| 'commit_hash': r.commit_hash, |
| 'revision_string': r.revision_string(), |
| - 'mean_value': r.mean_value, |
| + 'mean_value': r.mean, |
| 'std_dev': r.std_dev, |
| - 'values': r.values, |
| + 'values': r.debug_values, |
| 'result': 'good' if r.good else 'bad' if r.bad else 'unknown', |
| }) |
| return revision_rows |