| OLD | NEW |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import logging | 5 import logging |
| 6 import math | 6 import math |
| 7 | 7 |
| 8 from common import chrome_dependency_fetcher | 8 from common.chrome_dependency_fetcher import ChromeDependencyFetcher |
| 9 from crash import changelist_classifier | 9 from crash import changelist_classifier |
| 10 from crash.loglinear.changelist_features import min_distance | 10 from crash.loglinear.changelist_features import min_distance |
| 11 from crash.loglinear.changelist_features import top_frame_index | 11 from crash.loglinear.changelist_features import top_frame_index |
| 12 from crash.loglinear.model import ToFeatureFunction | 12 from crash.loglinear.model import ToFeatureFunction |
| 13 from crash.loglinear.model import UnnormalizedLogLinearModel | 13 from crash.loglinear.model import UnnormalizedLogLinearModel |
| 14 from crash.stacktrace import CallStack | 14 from crash.stacktrace import CallStack |
| 15 from crash.stacktrace import Stacktrace | 15 from crash.stacktrace import Stacktrace |
| 16 | 16 |
| 17 | 17 |
| 18 class LogLinearChangelistClassifier(object): | 18 class LogLinearChangelistClassifier(object): |
| 19 """A ``LogLinearModel``-based implementation of CL classification.""" | 19 """A ``LogLinearModel``-based implementation of CL classification.""" |
| 20 | 20 |
| 21 def __init__(self, repository, weights, top_n_frames=7, top_n_suspects=3): | 21 def __init__(self, repository, get_repository, weights, top_n_frames=7, |
| 22 top_n_suspects=3): |
| 22 """Args: | 23 """Args: |
| 23 repository (Repository): the Git repository for getting CLs to classify. | 24 repository (Repository): the Git repository for getting CLs to classify. |
| 25 get_repository (callable): a function from DEP urls to ``Repository`` |
| 26 objects, so we can get changelogs and blame for each dep. Notably, |
| 27 to keep the code here generic, we make no assumptions about |
| 28 which subclass of ``Repository`` this function returns. Thus, |
| 29 it is up to the caller to decide what class to return and handle |
| 30 any other arguments that class may require (e.g., an http client |
| 31 for ``GitilesRepository``). |
| 24 weights (dict of float): the weights for the features. The keys of | 32 weights (dict of float): the weights for the features. The keys of |
| 25 the dictionary are the names of the feature that weight is | 33 the dictionary are the names of the feature that weight is |
| 26 for. We take this argument as a dict rather than as a list so that | 34 for. We take this argument as a dict rather than as a list so that |
| 27 callers needn't worry about what order to provide the weights in. | 35 callers needn't worry about what order to provide the weights in. |
| 28 top_n_frames (int): how many frames of each callstack to look at. | 36 top_n_frames (int): how many frames of each callstack to look at. |
| 29 top_n_suspects (int): maximum number of suspects to return. | 37 top_n_suspects (int): maximum number of suspects to return. |
| 30 """ | 38 """ |
| 31 self._repository = repository | 39 self._repository = repository |
| 40 self._dependency_fetcher = ChromeDependencyFetcher(self._repository) |
| 41 self._get_repository = get_repository |
| 32 self._top_n_frames = top_n_frames | 42 self._top_n_frames = top_n_frames |
| 33 self._top_n_suspects = top_n_suspects | 43 self._top_n_suspects = top_n_suspects |
| 34 | 44 |
| 35 feature_function = ToFeatureFunction([ | 45 feature_function = ToFeatureFunction([ |
| 36 top_frame_index.TopFrameIndexFeature(top_n_frames), | 46 top_frame_index.TopFrameIndexFeature(top_n_frames), |
| 37 min_distance.MinDistanceFeature(), | 47 min_distance.MinDistanceFeature(), |
| 38 ]) | 48 ]) |
| 39 | 49 |
| 40 weight_list = [ | 50 weight_list = [ |
| 41 weights['TopFrameIndex'], | 51 weights['TopFrameIndex'], |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 96 logging.info('ChangelistClassifier.__call__: Regression range %s:%s', | 106 logging.info('ChangelistClassifier.__call__: Regression range %s:%s', |
| 97 last_good_version, first_bad_version) | 107 last_good_version, first_bad_version) |
| 98 | 108 |
| 99 # We are only interested in the deps in crash stack (the callstack that | 109 # We are only interested in the deps in crash stack (the callstack that |
| 100 # caused the crash). | 110 # caused the crash). |
| 101 # TODO(wrengr): we may want to receive the crash deps as an argument, | 111 # TODO(wrengr): we may want to receive the crash deps as an argument, |
| 102 # so that when this method is called via Findit.FindCulprit, we avoid | 112 # so that when this method is called via Findit.FindCulprit, we avoid |
| 103 # doing redundant work creating it. | 113 # doing redundant work creating it. |
| 104 stack_deps = changelist_classifier.GetDepsInCrashStack( | 114 stack_deps = changelist_classifier.GetDepsInCrashStack( |
| 105 report.stacktrace.crash_stack, | 115 report.stacktrace.crash_stack, |
| 106 chrome_dependency_fetcher.ChromeDependencyFetcher( | 116 self._dependency_fetcher.GetDependency( |
| 107 self._repository).GetDependency(report.crashed_version, | 117 report.crashed_version, report.platform)) |
| 108 report.platform)) | |
| 109 | 118 |
| 110 # Get dep and file to changelogs, stack_info and blame dicts. | 119 # Get dep and file to changelogs, stack_info and blame dicts. |
| 111 dep_rolls = chrome_dependency_fetcher.ChromeDependencyFetcher( | 120 dep_rolls = self._dependency_fetcher.GetDependencyRollsDict( |
| 112 self._repository).GetDependencyRollsDict( | 121 last_good_version, first_bad_version, report.platform) |
| 113 last_good_version, first_bad_version, report.platform) | |
| 114 | 122 |
| 115 # Regression of a dep added/deleted (old_revision/new_revision is None) can | 123 # Regression of a dep added/deleted (old_revision/new_revision is None) can |
| 116 # not be known for sure and this case rarely happens, so just filter them | 124 # not be known for sure and this case rarely happens, so just filter them |
| 117 # out. | 125 # out. |
| 118 regression_deps_rolls = {} | 126 regression_deps_rolls = {} |
| 119 for dep_path, dep_roll in dep_rolls.iteritems(): | 127 for dep_path, dep_roll in dep_rolls.iteritems(): |
| 120 if not dep_roll.old_revision or not dep_roll.new_revision: | 128 if not dep_roll.old_revision or not dep_roll.new_revision: |
| 121 logging.info('Skip %s denpendency %s', | 129 logging.info('Skip %s denpendency %s', |
| 122 'added' if dep_roll.new_revision else 'deleted', dep_path) | 130 'added' if dep_roll.new_revision else 'deleted', dep_path) |
| 123 continue | 131 continue |
| 124 regression_deps_rolls[dep_path] = dep_roll | 132 regression_deps_rolls[dep_path] = dep_roll |
| 125 | 133 |
| 126 dep_to_file_to_changelogs, ignore_cls = ( | 134 dep_to_file_to_changelogs, ignore_cls = ( |
| 127 changelist_classifier.GetChangeLogsForFilesGroupedByDeps( | 135 changelist_classifier.GetChangeLogsForFilesGroupedByDeps( |
| 128 regression_deps_rolls, stack_deps, self._repository)) | 136 regression_deps_rolls, stack_deps, self._repository)) |
| 129 dep_to_file_to_stack_infos = ( | 137 dep_to_file_to_stack_infos = ( |
| 130 changelist_classifier.GetStackInfosForFilesGroupedByDeps( | 138 changelist_classifier.GetStackInfosForFilesGroupedByDeps( |
| 131 report.stacktrace, stack_deps)) | 139 report.stacktrace, stack_deps)) |
| 132 | 140 |
| 133 # Get the possible suspects. | 141 # Get the possible suspects. |
| 134 suspects = changelist_classifier.FindSuspects( | 142 suspects = changelist_classifier.FindSuspects( |
| 135 dep_to_file_to_changelogs, | 143 dep_to_file_to_changelogs, |
| 136 dep_to_file_to_stack_infos, | 144 dep_to_file_to_stack_infos, |
| 137 stack_deps, | 145 stack_deps, |
| 138 self._repository, | 146 self._get_repository, |
| 139 ignore_cls) | 147 ignore_cls) |
| 140 if suspects is None: | 148 if suspects is None: |
| 141 return [] | 149 return [] |
| 142 | 150 |
| 143 # Score the suspects and organize them for outputting/returning. | 151 # Score the suspects and organize them for outputting/returning. |
| 144 features_given_report = self._model.Features(report) | 152 features_given_report = self._model.Features(report) |
| 145 score_given_report = self._model.Score(report) | 153 score_given_report = self._model.Score(report) |
| 146 scored_suspects = [] | 154 scored_suspects = [] |
| 147 for suspect in suspects: | 155 for suspect in suspects: |
| 148 score = score_given_report(suspect) | 156 score = score_given_report(suspect) |
| (...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 222 all_changed_files[changed_file.name] = changed_file | 230 all_changed_files[changed_file.name] = changed_file |
| 223 continue | 231 continue |
| 224 | 232 |
| 225 assert accumulated_changed_file.blame_url == changed_file.blame_url, ( | 233 assert accumulated_changed_file.blame_url == changed_file.blame_url, ( |
| 226 ValueError('Blame URLs do not match: %s != %s' | 234 ValueError('Blame URLs do not match: %s != %s' |
| 227 % (accumulated_changed_file.blame_url, changed_file.blame_url))) | 235 % (accumulated_changed_file.blame_url, changed_file.blame_url))) |
| 228 accumulated_changed_file.reasons.extend(changed_file.reasons or []) | 236 accumulated_changed_file.reasons.extend(changed_file.reasons or []) |
| 229 | 237 |
| 230 return sorted(all_changed_files.values(), | 238 return sorted(all_changed_files.values(), |
| 231 key=lambda changed_file: changed_file.name) | 239 key=lambda changed_file: changed_file.name) |
| OLD | NEW |