Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(241)

Side by Side Diff: appengine/findit/crash/loglinear/changelist_classifier.py

Issue 2625073003: [Predator] Add MetaWeight and MetaFeatureValue to group multiple weights and features together. (Closed)
Patch Set: Rebase. Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 from collections import defaultdict 5 from collections import defaultdict
6 import logging 6 import logging
7 import math 7 import math
8 8
9 from common.chrome_dependency_fetcher import ChromeDependencyFetcher 9 from common.chrome_dependency_fetcher import ChromeDependencyFetcher
10 from crash import changelist_classifier 10 from crash import changelist_classifier
11 from crash.crash_report_with_dependencies import CrashReportWithDependencies 11 from crash.crash_report_with_dependencies import CrashReportWithDependencies
12 from crash.loglinear.changelist_features import min_distance 12 from crash.loglinear.changelist_features.min_distance import MinDistanceFeature
13 from crash.loglinear.changelist_features import top_frame_index 13 from crash.loglinear.changelist_features.top_frame_index import (
14 from crash.loglinear.feature import FeatureFunction 14 TopFrameIndexFeature)
15 from crash.loglinear.feature import WrapperMetaFeature
15 from crash.loglinear.model import UnnormalizedLogLinearModel 16 from crash.loglinear.model import UnnormalizedLogLinearModel
16 from crash.stacktrace import CallStack 17 from crash.stacktrace import CallStack
17 from crash.stacktrace import Stacktrace 18 from crash.stacktrace import Stacktrace
18 from crash.suspect import StackInfo 19 from crash.suspect import StackInfo
19 20
20 21
21 class LogLinearChangelistClassifier(object): 22 class LogLinearChangelistClassifier(object):
22 """A ``LogLinearModel``-based implementation of CL classification.""" 23 """A ``LogLinearModel``-based implementation of CL classification."""
23 24
24 def __init__(self, get_repository, weights, top_n_frames=7, top_n_suspects=3): 25 def __init__(self, get_repository, meta_feature, meta_weight,
26 top_n_frames=7, top_n_suspects=3):
25 """Args: 27 """Args:
26 get_repository (callable): a function from DEP urls to ``Repository`` 28 get_repository (callable): a function from DEP urls to ``Repository``
27 objects, so we can get changelogs and blame for each dep. Notably, 29 objects, so we can get changelogs and blame for each dep. Notably,
28 to keep the code here generic, we make no assumptions about 30 to keep the code here generic, we make no assumptions about
29 which subclass of ``Repository`` this function returns. Thus, 31 which subclass of ``Repository`` this function returns. Thus,
30 it is up to the caller to decide what class to return and handle 32 it is up to the caller to decide what class to return and handle
31 any other arguments that class may require (e.g., an http client 33 any other arguments that class may require (e.g., an http client
32 for ``GitilesRepository``). 34 for ``GitilesRepository``).
33 weights (dict of float): the weights for the features. The keys of 35 meta_feature (MetaFeature): All features.
34 the dictionary are the names of the feature that weight is 36 meta_weight (MetaWeight): All weights. the weights for the features.
37 The keys of the dictionary are the names of the feature that weight is
35 for. We take this argument as a dict rather than as a list so that 38 for. We take this argument as a dict rather than as a list so that
36 callers needn't worry about what order to provide the weights in. 39 callers needn't worry about what order to provide the weights in.
37 top_n_frames (int): how many frames of each callstack to look at. 40 top_n_frames (int): how many frames of each callstack to look at.
38 top_n_suspects (int): maximum number of suspects to return. 41 top_n_suspects (int): maximum number of suspects to return.
39 """ 42 """
40 self._dependency_fetcher = ChromeDependencyFetcher(get_repository) 43 self._dependency_fetcher = ChromeDependencyFetcher(get_repository)
41 self._get_repository = get_repository 44 self._get_repository = get_repository
42 self._top_n_frames = top_n_frames 45 self._top_n_frames = top_n_frames
43 self._top_n_suspects = top_n_suspects 46 self._top_n_suspects = top_n_suspects
44 47 self._model = UnnormalizedLogLinearModel(meta_feature, meta_weight)
45 feature_function = FeatureFunction([
46 top_frame_index.TopFrameIndexFeature(top_n_frames),
47 min_distance.MinDistanceFeature(),
48 ])
49
50 self._model = UnnormalizedLogLinearModel(feature_function, weights)
51 48
52 def __call__(self, report): 49 def __call__(self, report):
53 """Finds changelists suspected of being responsible for the crash report. 50 """Finds changelists suspected of being responsible for the crash report.
54 51
55 Args: 52 Args:
56 report (CrashReport): the report to be analyzed. 53 report (CrashReport): the report to be analyzed.
57 54
58 Returns: 55 Returns:
59 List of ``Suspect``s, sorted by probability from highest to lowest. 56 List of ``Suspect``s, sorted by probability from highest to lowest.
60 """ 57 """
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
133 130
134 scored_suspects = [] 131 scored_suspects = []
135 for suspect in suspects: 132 for suspect in suspects:
136 score = score_given_report(suspect) 133 score = score_given_report(suspect)
137 if self._model.LogZeroish(score): 134 if self._model.LogZeroish(score):
138 logging.debug('Discarding suspect because it has zero probability: %s' 135 logging.debug('Discarding suspect because it has zero probability: %s'
139 % str(suspect.ToDict())) 136 % str(suspect.ToDict()))
140 continue 137 continue
141 138
142 suspect.confidence = score 139 suspect.confidence = score
140 # features is ``MetaFeatureValue`` object containing all feature values.
143 features = features_given_report(suspect) 141 features = features_given_report(suspect)
144 suspect.reasons = self._model.FormatReasons(features.itervalues()) 142 suspect.reasons = features.reason
145 suspect.changed_files = [ 143 suspect.changed_files = [changed_file.ToDict()
146 changed_file.ToDict() for changed_file in 144 for changed_file in features.changed_files]
147 self._model.AggregateChangedFiles(features.itervalues())]
148 scored_suspects.append(suspect) 145 scored_suspects.append(suspect)
149 146
150 scored_suspects.sort(key=lambda suspect: suspect.confidence) 147 scored_suspects.sort(key=lambda suspect: suspect.confidence)
151 return scored_suspects[:self._top_n_suspects] 148 return scored_suspects[:self._top_n_suspects]
OLDNEW
« no previous file with comments | « appengine/findit/crash/findit_for_chromecrash.py ('k') | appengine/findit/crash/loglinear/feature.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698