Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(172)

Side by Side Diff: appengine/findit/crash/loglinear/changelist_classifier.py

Issue 2617273002: [Predator] Move ``SingleFeatureScore`` to LLM. (Closed)
Patch Set: . Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import logging 5 import logging
6 import math 6 import math
7 7
8 from common.chrome_dependency_fetcher import ChromeDependencyFetcher 8 from common.chrome_dependency_fetcher import ChromeDependencyFetcher
9 from crash import changelist_classifier 9 from crash import changelist_classifier
10 from crash.loglinear.changelist_features import min_distance 10 from crash.loglinear.changelist_features import min_distance
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
42 feature_function = ToFeatureFunction([ 42 feature_function = ToFeatureFunction([
43 top_frame_index.TopFrameIndexFeature(top_n_frames), 43 top_frame_index.TopFrameIndexFeature(top_n_frames),
44 min_distance.MinDistanceFeature(), 44 min_distance.MinDistanceFeature(),
45 ]) 45 ])
46 46
47 weight_list = [ 47 weight_list = [
48 weights['TopFrameIndex'], 48 weights['TopFrameIndex'],
49 weights['MinDistance'], 49 weights['MinDistance'],
50 ] 50 ]
51 51
52 self._model = UnnormalizedLogLinearModel(feature_function, weight_list) 52 self._model = UnnormalizedLogLinearModel(feature_function,
53 53 weight_list, weights)
wrengr 2017/01/09 19:31:51 we shouldn't have to pass both the weight dict and
54 # TODO(crbug.com/674262): remove the need for storing these weights.
55 self._weights = weights
56 54
57 # TODO(crbug.com/673964): something better for detecting "close to log(0)". 55 # TODO(crbug.com/673964): something better for detecting "close to log(0)".
58 def _LogZeroish(self, x): 56 def _LogZeroish(self, x):
59 """Determine whether a float is close enough to log(0). 57 """Determine whether a float is close enough to log(0).
60 58
61 If a ``FeatureValue`` has a (log-domain) score of -inf for a given 59 If a ``FeatureValue`` has a (log-domain) score of -inf for a given
62 ``Suspect``, then that suspect has zero probability of being the 60 ``Suspect``, then that suspect has zero probability of being the
63 culprit. We want to filter these suspects out, to clean up the 61 culprit. We want to filter these suspects out, to clean up the
64 output of classification; so this method encapsulates the logic of 62 output of classification; so this method encapsulates the logic of
65 that check. 63 that check.
66 64
67 Args: 65 Args:
68 x (float): the float to check 66 x (float): the float to check
69 67
70 Returns: 68 Returns:
71 ``True`` if ``x`` is close enough to log(0); else ``False``. 69 ``True`` if ``x`` is close enough to log(0); else ``False``.
72 """ 70 """
73 return x < 0 and math.isinf(x) 71 return x < 0 and math.isinf(x)
74 72
75 def _SingleFeatureScore(self, feature_value):
76 """Returns the score (aka weighted value) of a ``FeatureValue``.
77
78 This function assumes the report's stacktrace has already had any necessary
79 preprocessing (like filtering or truncating) applied.
80
81 Args:
82 feature_value (FeatureValue): the feature value to check.
83
84 Returns:
85 The score of the feature value.
86 """
87 return feature_value.value * self._weights.get(feature_value.name, 0.)
88
89 def __call__(self, report): 73 def __call__(self, report):
90 """Finds changelists suspected of being responsible for the crash report. 74 """Finds changelists suspected of being responsible for the crash report.
91 75
92 Args: 76 Args:
93 report (CrashReport): the report to be analyzed. 77 report (CrashReport): the report to be analyzed.
94 78
95 Returns: 79 Returns:
96 List of ``Suspect``s, sorted by probability from highest to lowest. 80 List of ``Suspect``s, sorted by probability from highest to lowest.
97 """ 81 """
98 if not report.regression_range: 82 if not report.regression_range:
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
181 sorted by feature name, just to ensure that it comes out in some 165 sorted by feature name, just to ensure that it comes out in some
182 canonical order. 166 canonical order.
183 167
184 At present, the float is the log-domain score of the feature 168 At present, the float is the log-domain score of the feature
185 value. However, this isn't the best thing for UX reasons. In the 169 value. However, this isn't the best thing for UX reasons. In the
186 future it might be replaced by the normal-domain score, or by 170 future it might be replaced by the normal-domain score, or by
187 the probability. 171 the probability.
188 """ 172 """
189 formatted_reasons = [] 173 formatted_reasons = []
190 for feature in features: 174 for feature in features:
191 feature_score = self._SingleFeatureScore(feature) 175 feature_score = self._model.SingleFeatureScore(feature)
192 if self._LogZeroish(feature_score): # pragma: no cover 176 if self._LogZeroish(feature_score): # pragma: no cover
193 logging.debug('Discarding reasons from feature %s' 177 logging.debug('Discarding reasons from feature %s'
194 ' because it has zero probability' % feature.name) 178 ' because it has zero probability' % feature.name)
195 continue 179 continue
196 180
197 formatted_reasons.append((feature.name, feature_score, feature.reason)) 181 formatted_reasons.append((feature.name, feature_score, feature.reason))
198 182
199 return sorted(formatted_reasons, 183 return sorted(formatted_reasons,
200 key=lambda formatted_reason: formatted_reason[0]) 184 key=lambda formatted_reason: formatted_reason[0])
201 185
202 def AggregateChangedFiles(self, features): 186 def AggregateChangedFiles(self, features):
203 """Merge multiple``FeatureValue.changed_files`` lists into one. 187 """Merge multiple``FeatureValue.changed_files`` lists into one.
204 188
205 Args: 189 Args:
206 features (list of FeatureValue): the values whose ``changed_files`` 190 features (list of FeatureValue): the values whose ``changed_files``
207 lists should be aggregated. 191 lists should be aggregated.
208 192
209 Returns: 193 Returns:
210 A list of ``ChangedFile`` objects sorted by file name. The sorting 194 A list of ``ChangedFile`` objects sorted by file name. The sorting
211 is not essential, but is provided to ease testing by ensuring the 195 is not essential, but is provided to ease testing by ensuring the
212 output is in some canonical order. 196 output is in some canonical order.
213 197
214 Raises: 198 Raises:
215 ``ValueError`` if any file name is given inconsistent ``blame_url``s. 199 ``ValueError`` if any file name is given inconsistent ``blame_url``s.
216 """ 200 """
217 all_changed_files = {} 201 all_changed_files = {}
218 for feature in features: 202 for feature in features:
219 if self._LogZeroish(self._SingleFeatureScore(feature)): # pragma: no cover 203 if self._LogZeroish(
204 self._model.SingleFeatureScore(feature)): # pragma: no cover
220 logging.debug('Discarding changed files from feature %s' 205 logging.debug('Discarding changed files from feature %s'
221 ' because it has zero probability' % feature.name) 206 ' because it has zero probability' % feature.name)
222 continue 207 continue
223 208
224 for changed_file in feature.changed_files or []: 209 for changed_file in feature.changed_files or []:
225 accumulated_changed_file = all_changed_files.get(changed_file.name) 210 accumulated_changed_file = all_changed_files.get(changed_file.name)
226 if accumulated_changed_file is None: 211 if accumulated_changed_file is None:
227 all_changed_files[changed_file.name] = changed_file 212 all_changed_files[changed_file.name] = changed_file
228 continue 213 continue
229 214
230 assert accumulated_changed_file.blame_url == changed_file.blame_url, ( 215 assert accumulated_changed_file.blame_url == changed_file.blame_url, (
231 ValueError('Blame URLs do not match: %s != %s' 216 ValueError('Blame URLs do not match: %s != %s'
232 % (accumulated_changed_file.blame_url, changed_file.blame_url))) 217 % (accumulated_changed_file.blame_url, changed_file.blame_url)))
233 accumulated_changed_file.reasons.extend(changed_file.reasons or []) 218 accumulated_changed_file.reasons.extend(changed_file.reasons or [])
234 219
235 return sorted(all_changed_files.values(), 220 return sorted(all_changed_files.values(),
236 key=lambda changed_file: changed_file.name) 221 key=lambda changed_file: changed_file.name)
OLDNEW
« no previous file with comments | « no previous file | appengine/findit/crash/loglinear/model.py » ('j') | appengine/findit/crash/loglinear/model.py » ('J')

Powered by Google App Engine
This is Rietveld 408576698