| OLD | NEW |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 from collections import defaultdict | 5 from collections import defaultdict |
| 6 import logging | 6 import logging |
| 7 import math | 7 import math |
| 8 | 8 |
| 9 from common.chrome_dependency_fetcher import ChromeDependencyFetcher | 9 from common.chrome_dependency_fetcher import ChromeDependencyFetcher |
| 10 from crash import changelist_classifier | 10 from crash import changelist_classifier |
| 11 from crash.crash_report_with_dependencies import CrashReportWithDependencies | 11 from crash.crash_report_with_dependencies import CrashReportWithDependencies |
| 12 from crash.loglinear.changelist_features import min_distance | 12 from crash.loglinear.changelist_features.min_distance import MinDistanceFeature |
| 13 from crash.loglinear.changelist_features import top_frame_index | 13 from crash.loglinear.changelist_features.top_frame_index import ( |
| 14 from crash.loglinear.feature import FeatureFunction | 14 TopFrameIndexFeature) |
| 15 from crash.loglinear.feature import WrapperMetaFeature |
| 15 from crash.loglinear.model import UnnormalizedLogLinearModel | 16 from crash.loglinear.model import UnnormalizedLogLinearModel |
| 16 from crash.stacktrace import CallStack | 17 from crash.stacktrace import CallStack |
| 17 from crash.stacktrace import Stacktrace | 18 from crash.stacktrace import Stacktrace |
| 18 from crash.suspect import StackInfo | 19 from crash.suspect import StackInfo |
| 19 | 20 |
| 20 | 21 |
| 21 class LogLinearChangelistClassifier(object): | 22 class LogLinearChangelistClassifier(object): |
| 22 """A ``LogLinearModel``-based implementation of CL classification.""" | 23 """A ``LogLinearModel``-based implementation of CL classification.""" |
| 23 | 24 |
| 24 def __init__(self, get_repository, weights, top_n_frames=7, top_n_suspects=3): | 25 def __init__(self, get_repository, meta_feature, meta_weight, |
| 26 top_n_frames=7, top_n_suspects=3): |
| 25 """Args: | 27 """Args: |
| 26 get_repository (callable): a function from DEP urls to ``Repository`` | 28 get_repository (callable): a function from DEP urls to ``Repository`` |
| 27 objects, so we can get changelogs and blame for each dep. Notably, | 29 objects, so we can get changelogs and blame for each dep. Notably, |
| 28 to keep the code here generic, we make no assumptions about | 30 to keep the code here generic, we make no assumptions about |
| 29 which subclass of ``Repository`` this function returns. Thus, | 31 which subclass of ``Repository`` this function returns. Thus, |
| 30 it is up to the caller to decide what class to return and handle | 32 it is up to the caller to decide what class to return and handle |
| 31 any other arguments that class may require (e.g., an http client | 33 any other arguments that class may require (e.g., an http client |
| 32 for ``GitilesRepository``). | 34 for ``GitilesRepository``). |
| 33 weights (dict of float): the weights for the features. The keys of | 35 meta_feature (MetaFeature): All features. |
| 34 the dictionary are the names of the feature that weight is | 36 meta_weight (MetaWeight): All weights. the weights for the features. |
| 37 The keys of the dictionary are the names of the feature that weight is |
| 35 for. We take this argument as a dict rather than as a list so that | 38 for. We take this argument as a dict rather than as a list so that |
| 36 callers needn't worry about what order to provide the weights in. | 39 callers needn't worry about what order to provide the weights in. |
| 37 top_n_frames (int): how many frames of each callstack to look at. | 40 top_n_frames (int): how many frames of each callstack to look at. |
| 38 top_n_suspects (int): maximum number of suspects to return. | 41 top_n_suspects (int): maximum number of suspects to return. |
| 39 """ | 42 """ |
| 40 self._dependency_fetcher = ChromeDependencyFetcher(get_repository) | 43 self._dependency_fetcher = ChromeDependencyFetcher(get_repository) |
| 41 self._get_repository = get_repository | 44 self._get_repository = get_repository |
| 42 self._top_n_frames = top_n_frames | 45 self._top_n_frames = top_n_frames |
| 43 self._top_n_suspects = top_n_suspects | 46 self._top_n_suspects = top_n_suspects |
| 44 | 47 self._model = UnnormalizedLogLinearModel(meta_feature, meta_weight) |
| 45 feature_function = FeatureFunction([ | |
| 46 top_frame_index.TopFrameIndexFeature(top_n_frames), | |
| 47 min_distance.MinDistanceFeature(), | |
| 48 ]) | |
| 49 | |
| 50 self._model = UnnormalizedLogLinearModel(feature_function, weights) | |
| 51 | 48 |
| 52 def __call__(self, report): | 49 def __call__(self, report): |
| 53 """Finds changelists suspected of being responsible for the crash report. | 50 """Finds changelists suspected of being responsible for the crash report. |
| 54 | 51 |
| 55 Args: | 52 Args: |
| 56 report (CrashReport): the report to be analyzed. | 53 report (CrashReport): the report to be analyzed. |
| 57 | 54 |
| 58 Returns: | 55 Returns: |
| 59 List of ``Suspect``s, sorted by probability from highest to lowest. | 56 List of ``Suspect``s, sorted by probability from highest to lowest. |
| 60 """ | 57 """ |
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 133 | 130 |
| 134 scored_suspects = [] | 131 scored_suspects = [] |
| 135 for suspect in suspects: | 132 for suspect in suspects: |
| 136 score = score_given_report(suspect) | 133 score = score_given_report(suspect) |
| 137 if self._model.LogZeroish(score): | 134 if self._model.LogZeroish(score): |
| 138 logging.debug('Discarding suspect because it has zero probability: %s' | 135 logging.debug('Discarding suspect because it has zero probability: %s' |
| 139 % str(suspect.ToDict())) | 136 % str(suspect.ToDict())) |
| 140 continue | 137 continue |
| 141 | 138 |
| 142 suspect.confidence = score | 139 suspect.confidence = score |
| 140 # features is ``MetaFeatureValue`` object containing all feature values. |
| 143 features = features_given_report(suspect) | 141 features = features_given_report(suspect) |
| 144 suspect.reasons = self._model.FormatReasons(features.itervalues()) | 142 suspect.reasons = features.reason |
| 145 suspect.changed_files = [ | 143 suspect.changed_files = [changed_file.ToDict() |
| 146 changed_file.ToDict() for changed_file in | 144 for changed_file in features.changed_files] |
| 147 self._model.AggregateChangedFiles(features.itervalues())] | |
| 148 scored_suspects.append(suspect) | 145 scored_suspects.append(suspect) |
| 149 | 146 |
| 150 scored_suspects.sort(key=lambda suspect: suspect.confidence) | 147 scored_suspects.sort(key=lambda suspect: suspect.confidence) |
| 151 return scored_suspects[:self._top_n_suspects] | 148 return scored_suspects[:self._top_n_suspects] |
| OLD | NEW |