Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import logging | 5 import logging |
| 6 import math | 6 import math |
| 7 | 7 |
| 8 from common.chrome_dependency_fetcher import ChromeDependencyFetcher | 8 from common.chrome_dependency_fetcher import ChromeDependencyFetcher |
| 9 from crash import changelist_classifier | 9 from crash import changelist_classifier |
| 10 from crash.loglinear.changelist_features import min_distance | 10 from crash.loglinear.changelist_features import min_distance |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 42 feature_function = ToFeatureFunction([ | 42 feature_function = ToFeatureFunction([ |
| 43 top_frame_index.TopFrameIndexFeature(top_n_frames), | 43 top_frame_index.TopFrameIndexFeature(top_n_frames), |
| 44 min_distance.MinDistanceFeature(), | 44 min_distance.MinDistanceFeature(), |
| 45 ]) | 45 ]) |
| 46 | 46 |
| 47 weight_list = [ | 47 weight_list = [ |
| 48 weights['TopFrameIndex'], | 48 weights['TopFrameIndex'], |
| 49 weights['MinDistance'], | 49 weights['MinDistance'], |
| 50 ] | 50 ] |
| 51 | 51 |
| 52 self._model = UnnormalizedLogLinearModel(feature_function, weight_list) | 52 self._model = UnnormalizedLogLinearModel(feature_function, |
| 53 | 53 weight_list, weights) |
|
wrengr
2017/01/09 19:31:51
we shouldn't have to pass both the weight dict and
| |
| 54 # TODO(crbug.com/674262): remove the need for storing these weights. | |
| 55 self._weights = weights | |
| 56 | 54 |
| 57 # TODO(crbug.com/673964): something better for detecting "close to log(0)". | 55 # TODO(crbug.com/673964): something better for detecting "close to log(0)". |
| 58 def _LogZeroish(self, x): | 56 def _LogZeroish(self, x): |
| 59 """Determine whether a float is close enough to log(0). | 57 """Determine whether a float is close enough to log(0). |
| 60 | 58 |
| 61 If a ``FeatureValue`` has a (log-domain) score of -inf for a given | 59 If a ``FeatureValue`` has a (log-domain) score of -inf for a given |
| 62 ``Suspect``, then that suspect has zero probability of being the | 60 ``Suspect``, then that suspect has zero probability of being the |
| 63 culprit. We want to filter these suspects out, to clean up the | 61 culprit. We want to filter these suspects out, to clean up the |
| 64 output of classification; so this method encapsulates the logic of | 62 output of classification; so this method encapsulates the logic of |
| 65 that check. | 63 that check. |
| 66 | 64 |
| 67 Args: | 65 Args: |
| 68 x (float): the float to check | 66 x (float): the float to check |
| 69 | 67 |
| 70 Returns: | 68 Returns: |
| 71 ``True`` if ``x`` is close enough to log(0); else ``False``. | 69 ``True`` if ``x`` is close enough to log(0); else ``False``. |
| 72 """ | 70 """ |
| 73 return x < 0 and math.isinf(x) | 71 return x < 0 and math.isinf(x) |
| 74 | 72 |
| 75 def _SingleFeatureScore(self, feature_value): | |
| 76 """Returns the score (aka weighted value) of a ``FeatureValue``. | |
| 77 | |
| 78 This function assumes the report's stacktrace has already had any necessary | |
| 79 preprocessing (like filtering or truncating) applied. | |
| 80 | |
| 81 Args: | |
| 82 feature_value (FeatureValue): the feature value to check. | |
| 83 | |
| 84 Returns: | |
| 85 The score of the feature value. | |
| 86 """ | |
| 87 return feature_value.value * self._weights.get(feature_value.name, 0.) | |
| 88 | |
| 89 def __call__(self, report): | 73 def __call__(self, report): |
| 90 """Finds changelists suspected of being responsible for the crash report. | 74 """Finds changelists suspected of being responsible for the crash report. |
| 91 | 75 |
| 92 Args: | 76 Args: |
| 93 report (CrashReport): the report to be analyzed. | 77 report (CrashReport): the report to be analyzed. |
| 94 | 78 |
| 95 Returns: | 79 Returns: |
| 96 List of ``Suspect``s, sorted by probability from highest to lowest. | 80 List of ``Suspect``s, sorted by probability from highest to lowest. |
| 97 """ | 81 """ |
| 98 if not report.regression_range: | 82 if not report.regression_range: |
| (...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 181 sorted by feature name, just to ensure that it comes out in some | 165 sorted by feature name, just to ensure that it comes out in some |
| 182 canonical order. | 166 canonical order. |
| 183 | 167 |
| 184 At present, the float is the log-domain score of the feature | 168 At present, the float is the log-domain score of the feature |
| 185 value. However, this isn't the best thing for UX reasons. In the | 169 value. However, this isn't the best thing for UX reasons. In the |
| 186 future it might be replaced by the normal-domain score, or by | 170 future it might be replaced by the normal-domain score, or by |
| 187 the probability. | 171 the probability. |
| 188 """ | 172 """ |
| 189 formatted_reasons = [] | 173 formatted_reasons = [] |
| 190 for feature in features: | 174 for feature in features: |
| 191 feature_score = self._SingleFeatureScore(feature) | 175 feature_score = self._model.SingleFeatureScore(feature) |
| 192 if self._LogZeroish(feature_score): # pragma: no cover | 176 if self._LogZeroish(feature_score): # pragma: no cover |
| 193 logging.debug('Discarding reasons from feature %s' | 177 logging.debug('Discarding reasons from feature %s' |
| 194 ' because it has zero probability' % feature.name) | 178 ' because it has zero probability' % feature.name) |
| 195 continue | 179 continue |
| 196 | 180 |
| 197 formatted_reasons.append((feature.name, feature_score, feature.reason)) | 181 formatted_reasons.append((feature.name, feature_score, feature.reason)) |
| 198 | 182 |
| 199 return sorted(formatted_reasons, | 183 return sorted(formatted_reasons, |
| 200 key=lambda formatted_reason: formatted_reason[0]) | 184 key=lambda formatted_reason: formatted_reason[0]) |
| 201 | 185 |
| 202 def AggregateChangedFiles(self, features): | 186 def AggregateChangedFiles(self, features): |
| 203 """Merge multiple``FeatureValue.changed_files`` lists into one. | 187 """Merge multiple``FeatureValue.changed_files`` lists into one. |
| 204 | 188 |
| 205 Args: | 189 Args: |
| 206 features (list of FeatureValue): the values whose ``changed_files`` | 190 features (list of FeatureValue): the values whose ``changed_files`` |
| 207 lists should be aggregated. | 191 lists should be aggregated. |
| 208 | 192 |
| 209 Returns: | 193 Returns: |
| 210 A list of ``ChangedFile`` objects sorted by file name. The sorting | 194 A list of ``ChangedFile`` objects sorted by file name. The sorting |
| 211 is not essential, but is provided to ease testing by ensuring the | 195 is not essential, but is provided to ease testing by ensuring the |
| 212 output is in some canonical order. | 196 output is in some canonical order. |
| 213 | 197 |
| 214 Raises: | 198 Raises: |
| 215 ``ValueError`` if any file name is given inconsistent ``blame_url``s. | 199 ``ValueError`` if any file name is given inconsistent ``blame_url``s. |
| 216 """ | 200 """ |
| 217 all_changed_files = {} | 201 all_changed_files = {} |
| 218 for feature in features: | 202 for feature in features: |
| 219 if self._LogZeroish(self._SingleFeatureScore(feature)): # pragma: no cover | 203 if self._LogZeroish( |
| 204 self._model.SingleFeatureScore(feature)): # pragma: no cover | |
| 220 logging.debug('Discarding changed files from feature %s' | 205 logging.debug('Discarding changed files from feature %s' |
| 221 ' because it has zero probability' % feature.name) | 206 ' because it has zero probability' % feature.name) |
| 222 continue | 207 continue |
| 223 | 208 |
| 224 for changed_file in feature.changed_files or []: | 209 for changed_file in feature.changed_files or []: |
| 225 accumulated_changed_file = all_changed_files.get(changed_file.name) | 210 accumulated_changed_file = all_changed_files.get(changed_file.name) |
| 226 if accumulated_changed_file is None: | 211 if accumulated_changed_file is None: |
| 227 all_changed_files[changed_file.name] = changed_file | 212 all_changed_files[changed_file.name] = changed_file |
| 228 continue | 213 continue |
| 229 | 214 |
| 230 assert accumulated_changed_file.blame_url == changed_file.blame_url, ( | 215 assert accumulated_changed_file.blame_url == changed_file.blame_url, ( |
| 231 ValueError('Blame URLs do not match: %s != %s' | 216 ValueError('Blame URLs do not match: %s != %s' |
| 232 % (accumulated_changed_file.blame_url, changed_file.blame_url))) | 217 % (accumulated_changed_file.blame_url, changed_file.blame_url))) |
| 233 accumulated_changed_file.reasons.extend(changed_file.reasons or []) | 218 accumulated_changed_file.reasons.extend(changed_file.reasons or []) |
| 234 | 219 |
| 235 return sorted(all_changed_files.values(), | 220 return sorted(all_changed_files.values(), |
| 236 key=lambda changed_file: changed_file.name) | 221 key=lambda changed_file: changed_file.name) |
| OLD | NEW |