| OLD | NEW |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import logging | 5 import logging |
| 6 from collections import defaultdict | 6 from collections import defaultdict |
| 7 from collections import namedtuple | 7 from collections import namedtuple |
| 8 | 8 |
| 9 from common import chrome_dependency_fetcher | 9 from common import chrome_dependency_fetcher |
| 10 from crash import crash_util | 10 from crash import crash_util |
| 11 from crash.results import MatchResults | 11 from crash.suspect import Suspect |
| 12 from crash.suspect import Suspects |
| 12 from crash.scorers.aggregated_scorer import AggregatedScorer | 13 from crash.scorers.aggregated_scorer import AggregatedScorer |
| 13 from crash.scorers.min_distance import MinDistance | 14 from crash.scorers.min_distance import MinDistance |
| 14 from crash.scorers.top_frame_index import TopFrameIndex | 15 from crash.scorers.top_frame_index import TopFrameIndex |
| 15 from crash.stacktrace import CallStack | 16 from crash.stacktrace import CallStack |
| 16 from crash.stacktrace import Stacktrace | 17 from crash.stacktrace import Stacktrace |
| 17 from libs.gitiles.diff import ChangeType | 18 from libs.gitiles.diff import ChangeType |
| 18 | 19 |
| 19 | 20 |
| 20 class ChangelistClassifier(namedtuple('ChangelistClassifier', | 21 class ChangelistClassifier(namedtuple('ChangelistClassifier', |
| 21 ['repository', 'top_n_frames', 'top_n_results', 'confidence_threshold'])): | 22 ['repository', 'top_n_frames', 'top_n_results', 'confidence_threshold'])): |
| 22 __slots__ = () | 23 __slots__ = () |
| 23 | 24 |
| 24 def __new__(cls, repository, | 25 def __new__(cls, repository, |
| 25 top_n_frames, top_n_results=3, confidence_threshold=0.999): | 26 top_n_frames, top_n_results=3, confidence_threshold=0.999): |
| 26 """Args: | 27 """Args: |
| 27 repository (Repository): the Git repository for getting CLs to classify. | 28 repository (Repository): the Git repository for getting CLs to classify. |
| 28 top_n_frames (int): how many frames of each callstack to look at. | 29 top_n_frames (int): how many frames of each callstack to look at. |
| 29 top_n_results (int): maximum number of results to return. | 30 top_n_results (int): maximum number of suspects to return. |
| 30 confidence_threshold (float): In [0,1], above which we only return | 31 confidence_threshold (float): In [0,1], above which we only return |
| 31 the first result. | 32 the first suspect. |
| 32 """ | 33 """ |
| 33 return super(cls, ChangelistClassifier).__new__(cls, | 34 return super(cls, ChangelistClassifier).__new__(cls, |
| 34 repository, top_n_frames, top_n_results, confidence_threshold) | 35 repository, top_n_frames, top_n_results, confidence_threshold) |
| 35 | 36 |
| 36 def __str__(self): # pragma: no cover | 37 def __str__(self): # pragma: no cover |
| 37 return ('%s(top_n_frames=%d, top_n_results=%d, confidence_threshold=%g)' | 38 return ('%s(top_n_frames=%d, top_n_results=%d, confidence_threshold=%g)' |
| 38 % (self.__class__.__name__, | 39 % (self.__class__.__name__, |
| 39 self.top_n_frames, | 40 self.top_n_frames, |
| 40 self.top_n_results, | 41 self.top_n_results, |
| 41 self.confidence_threshold)) | 42 self.confidence_threshold)) |
| 42 | 43 |
| 43 def __call__(self, report): | 44 def __call__(self, report): |
| 44 """Finds changelists suspected of being responsible for the crash report. | 45 """Finds changelists suspected of being responsible for the crash report. |
| 45 | 46 |
| 46 Args: | 47 Args: |
| 47 report (CrashReport): the report to be analyzed. | 48 report (CrashReport): the report to be analyzed. |
| 48 | 49 |
| 49 Returns: | 50 Returns: |
| 50 List of Results, sorted by confidence from highest to lowest. | 51 List of ``Suspect``s, sorted by confidence from highest to lowest. |
| 51 """ | 52 """ |
| 52 if not report.regression_range: | 53 if not report.regression_range: |
| 53 logging.warning('ChangelistClassifier.__call__: Missing regression range ' | 54 logging.warning('ChangelistClassifier.__call__: Missing regression range ' |
| 54 'for report: %s', str(report)) | 55 'for report: %s', str(report)) |
| 55 return [] | 56 return [] |
| 56 last_good_version, first_bad_version = report.regression_range | 57 last_good_version, first_bad_version = report.regression_range |
| 57 logging.info('ChangelistClassifier.__call__: Regression range %s:%s', | 58 logging.info('ChangelistClassifier.__call__: Regression range %s:%s', |
| 58 last_good_version, first_bad_version) | 59 last_good_version, first_bad_version) |
| 59 | 60 |
| 60 # Restrict analysis to just the top n frames in each callstack. | 61 # Restrict analysis to just the top n frames in each callstack. |
| (...skipping 28 matching lines...) Expand all Loading... |
| 89 continue | 90 continue |
| 90 regression_deps_rolls[dep_path] = dep_roll | 91 regression_deps_rolls[dep_path] = dep_roll |
| 91 | 92 |
| 92 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps( | 93 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps( |
| 93 regression_deps_rolls, stack_deps, self.repository) | 94 regression_deps_rolls, stack_deps, self.repository) |
| 94 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps( | 95 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps( |
| 95 stacktrace, stack_deps) | 96 stacktrace, stack_deps) |
| 96 | 97 |
| 97 # TODO: argument order is inconsistent from others. Repository should | 98 # TODO: argument order is inconsistent from others. Repository should |
| 98 # be last argument. | 99 # be last argument. |
| 99 results = FindMatchResults(dep_to_file_to_changelogs, | 100 suspects = FindSuspects(dep_to_file_to_changelogs, |
| 100 dep_to_file_to_stack_infos, | 101 dep_to_file_to_stack_infos, |
| 101 stack_deps, self.repository, ignore_cls) | 102 stack_deps, self.repository, ignore_cls) |
| 102 if not results: | 103 if not suspects: |
| 103 return [] | 104 return [] |
| 104 | 105 |
| 105 # TODO(wrengr): we should be able to do this map/filter/sort in one pass. | 106 # Set confidence, reasons, and changed_files. |
| 106 # Set result.confidence, result.reasons and result.changed_files. | |
| 107 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()]) | 107 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()]) |
| 108 map(aggregated_scorer.Score, results) | 108 map(aggregated_scorer.Score, suspects) |
| 109 | 109 |
| 110 # Filter all the 0 confidence results. | 110 # Filter all the 0 confidence results. |
| 111 results = filter(lambda r: r.confidence != 0, results) | 111 suspects = filter(lambda suspect: suspect.confidence != 0, suspects) |
| 112 if not results: | 112 if not suspects: |
| 113 return [] | 113 return [] |
| 114 | 114 |
| 115 sorted_results = sorted(results, key=lambda r: -r.confidence) | 115 suspects.sort(key=lambda suspect: -suspect.confidence) |
| 116 | 116 |
| 117 max_results = (1 if sorted_results[0].confidence > self.confidence_threshold | 117 max_results = (1 if suspects[0].confidence > self.confidence_threshold |
| 118 else self.top_n_results) | 118 else self.top_n_results) |
| 119 | 119 |
| 120 return sorted_results[:max_results] | 120 return suspects[:max_results] |
| 121 | 121 |
| 122 | 122 |
| 123 def GetDepsInCrashStack(crash_stack, crash_deps): | 123 def GetDepsInCrashStack(crash_stack, crash_deps): |
| 124 """Gets Dependencies in crash stack.""" | 124 """Gets Dependencies in crash stack.""" |
| 125 if not crash_stack: | 125 if not crash_stack: |
| 126 return {} | 126 return {} |
| 127 | 127 |
| 128 stack_deps = {} | 128 stack_deps = {} |
| 129 for frame in crash_stack: | 129 for frame in crash_stack: |
| 130 if frame.dep_path: | 130 if frame.dep_path: |
| (...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 257 if frame.dep_path not in stack_deps: | 257 if frame.dep_path not in stack_deps: |
| 258 continue | 258 continue |
| 259 | 259 |
| 260 dep_to_file_to_stack_infos[frame.dep_path][frame.file_path].append(( | 260 dep_to_file_to_stack_infos[frame.dep_path][frame.file_path].append(( |
| 261 frame, callstack.priority)) | 261 frame, callstack.priority)) |
| 262 | 262 |
| 263 return dep_to_file_to_stack_infos | 263 return dep_to_file_to_stack_infos |
| 264 | 264 |
| 265 | 265 |
| 266 # TODO(katesonia): Remove the repository argument after refatoring cl committed. | 266 # TODO(katesonia): Remove the repository argument after refatoring cl committed. |
| 267 def FindMatchResults(dep_to_file_to_changelogs, | 267 def FindSuspects(dep_to_file_to_changelogs, |
| 268 dep_to_file_to_stack_infos, | 268 dep_to_file_to_stack_infos, |
| 269 stack_deps, repository, | 269 stack_deps, repository, |
| 270 ignore_cls=None): | 270 ignore_cls=None): |
| 271 """Finds results by matching stacktrace and changelogs in regression range. | 271 """Finds suspects by matching stacktrace and changelogs in regression range. |
| 272 | 272 |
| 273 This method only applies to those crashes with regression range. | 273 This method only applies to those crashes with regression range. |
| 274 | 274 |
| 275 Args: | 275 Args: |
| 276 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path | 276 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path |
| 277 to ChangeLogs that touched this file. | 277 to ChangeLogs that touched this file. |
| 278 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path | 278 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path |
| 279 to a list of stack information of this file. A file may occur in several | 279 to a list of stack information of this file. A file may occur in several |
| 280 frames, one stack info consist of a StackFrame and the callstack priority | 280 frames, one stack info consist of a StackFrame and the callstack priority |
| 281 of it. | 281 of it. |
| 282 stack_deps (dict): Represents all the dependencies shown in the crash stack. | 282 stack_deps (dict): Represents all the dependencies shown in the crash stack. |
| 283 repository (Repository): Repository to get changelogs and blame from. | 283 repository (Repository): Repository to get changelogs and blame from. |
| 284 ignore_cls (set): Set of reverted revisions. | 284 ignore_cls (set): Set of reverted revisions. |
| 285 | 285 |
| 286 Returns: | 286 Returns: |
| 287 A list of MatchResult instances with confidence and reason unset. | 287 A list of ``Suspect`` instances with confidence and reason unset. |
| 288 """ | 288 """ |
| 289 match_results = MatchResults(ignore_cls) | 289 suspects = Suspects(ignore_cls) |
| 290 | 290 |
| 291 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems(): | 291 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems(): |
| 292 file_to_changelogs = dep_to_file_to_changelogs[dep] | 292 file_to_changelogs = dep_to_file_to_changelogs[dep] |
| 293 | 293 |
| 294 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems(): | 294 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems(): |
| 295 for touched_file_path, changelogs in file_to_changelogs.iteritems(): | 295 for touched_file_path, changelogs in file_to_changelogs.iteritems(): |
| 296 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path): | 296 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path): |
| 297 continue | 297 continue |
| 298 | 298 |
| 299 repository.repo_url = stack_deps[dep].repo_url | 299 repository.repo_url = stack_deps[dep].repo_url |
| 300 blame = repository.GetBlame(touched_file_path, | 300 blame = repository.GetBlame(touched_file_path, |
| 301 stack_deps[dep].revision) | 301 stack_deps[dep].revision) |
| 302 | 302 |
| 303 # Generate/update each result(changelog) in changelogs, blame is used | 303 # Generate/update each suspect(changelog) in changelogs, blame is used |
| 304 # to calculate distance between touched lines and crashed lines in file. | 304 # to calculate distance between touched lines and crashed lines in file. |
| 305 match_results.GenerateMatchResults( | 305 suspects.GenerateSuspects( |
| 306 touched_file_path, dep, stack_infos, changelogs, blame) | 306 touched_file_path, dep, stack_infos, changelogs, blame) |
| 307 | 307 |
| 308 return match_results.values() | 308 return suspects.values() |
| OLD | NEW |