Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import logging | 5 import logging |
| 6 from collections import defaultdict | 6 from collections import defaultdict |
| 7 from collections import namedtuple | 7 from collections import namedtuple |
| 8 | 8 |
| 9 from common import chrome_dependency_fetcher | 9 from common.chrome_dependency_fetcher import ChromeDependencyFetcher |
| 10 from crash import crash_util | 10 from crash import crash_util |
| 11 from crash.suspect import StackInfo | 11 from crash.suspect import StackInfo |
| 12 from crash.suspect import Suspect | 12 from crash.suspect import Suspect |
| 13 from crash.suspect import SuspectMap | 13 from crash.suspect import SuspectMap |
| 14 from crash.scorers.aggregated_scorer import AggregatedScorer | 14 from crash.scorers.aggregated_scorer import AggregatedScorer |
| 15 from crash.scorers.min_distance import MinDistance | 15 from crash.scorers.min_distance import MinDistance |
| 16 from crash.scorers.top_frame_index import TopFrameIndex | 16 from crash.scorers.top_frame_index import TopFrameIndex |
| 17 from crash.stacktrace import CallStack | 17 from crash.stacktrace import CallStack |
| 18 from crash.stacktrace import Stacktrace | 18 from crash.stacktrace import Stacktrace |
| 19 from libs.gitiles.diff import ChangeType | 19 from libs.gitiles.diff import ChangeType |
| 20 | 20 |
| 21 | 21 |
| 22 class ChangelistClassifier(namedtuple('ChangelistClassifier', | 22 class ChangelistClassifier(namedtuple('ChangelistClassifier', |
| 23 ['repository', 'top_n_results', 'confidence_threshold'])): | 23 ['repository', 'get_repository', 'top_n_results', 'confidence_threshold'])): |
| 24 __slots__ = () | 24 __slots__ = () |
| 25 | 25 |
| 26 def __new__(cls, repository, top_n_results=3, confidence_threshold=0.999): | 26 def __new__(cls, repository, get_repository, top_n_results=3, |
| 27 confidence_threshold=0.999): | |
| 27 """Args: | 28 """Args: |
| 28 repository (Repository): the Git repository for getting CLs to classify. | 29 repository (Repository): the Git repository of the main project |
| 30 we're trying to classify CLs from. | |
| 31 get_repository (callable): a function from DEP urls to ``Repository`` | |
| 32 objects, so we can get changelogs and blame for each dep. Notably, | |
| 33 to keep the code here generic, we make no assumptions about | |
| 34 which subclass of ``Repository`` this function returns. Thus, | |
| 35 it is up to the caller to decide what class to return and handle | |
| 36 any other arguments that class may require (e.g., an http client | |
| 37 for ``GitilesRepository``). | |
| 29 top_n_results (int): maximum number of results to return. | 38 top_n_results (int): maximum number of results to return. |
| 30 confidence_threshold (float): In [0,1], above which we only return | 39 confidence_threshold (float): In [0,1], above which we only return |
| 31 the first suspect. | 40 the first suspect. |
| 32 """ | 41 """ |
| 33 return super(cls, ChangelistClassifier).__new__( | 42 return super(cls, ChangelistClassifier).__new__( |
| 34 cls, repository, top_n_results, confidence_threshold) | 43 cls, repository, get_repository, top_n_results, confidence_threshold) |
| 35 | 44 |
| 36 def __str__(self): # pragma: no cover | 45 def __str__(self): # pragma: no cover |
| 37 return ('%s(top_n_results=%d, confidence_threshold=%g)' | 46 return ('%s(top_n_results=%d, confidence_threshold=%g)' |
| 38 % (self.__class__.__name__, | 47 % (self.__class__.__name__, |
| 39 self.top_n_results, | 48 self.top_n_results, |
| 40 self.confidence_threshold)) | 49 self.confidence_threshold)) |
| 41 | 50 |
| 42 def __call__(self, report): | 51 def __call__(self, report): |
| 43 """Finds changelists suspected of being responsible for the crash report. | 52 """Finds changelists suspected of being responsible for the crash report. |
| 44 | 53 |
| 45 This function assumes the report's stacktrace has already had any necessary | 54 This function assumes the report's stacktrace has already had any necessary |
| 46 preprocessing (like filtering or truncating) applied. | 55 preprocessing (like filtering or truncating) applied. |
| 47 | 56 |
| 48 Args: | 57 Args: |
| 49 report (CrashReport): the report to be analyzed. | 58 report (CrashReport): the report to be analyzed. |
| 50 | 59 |
| 51 Returns: | 60 Returns: |
| 52 List of ``Suspect``s, sorted by confidence from highest to lowest. | 61 List of ``Suspect``s, sorted by confidence from highest to lowest. |
| 53 """ | 62 """ |
| 54 if not report.regression_range: | 63 if not report.regression_range: |
| 55 logging.warning('ChangelistClassifier.__call__: Missing regression range ' | 64 logging.warning('ChangelistClassifier.__call__: Missing regression range ' |
| 56 'for report: %s', str(report)) | 65 'for report: %s', str(report)) |
| 57 return [] | 66 return [] |
| 58 last_good_version, first_bad_version = report.regression_range | 67 last_good_version, first_bad_version = report.regression_range |
| 59 logging.info('ChangelistClassifier.__call__: Regression range %s:%s', | 68 logging.info('ChangelistClassifier.__call__: Regression range %s:%s', |
| 60 last_good_version, first_bad_version) | 69 last_good_version, first_bad_version) |
| 61 | 70 |
| 71 dependency_fetcher = ChromeDependencyFetcher(self.repository) | |
| 72 | |
| 62 # We are only interested in the deps in crash stack (the callstack that | 73 # We are only interested in the deps in crash stack (the callstack that |
| 63 # caused the crash). | 74 # caused the crash). |
| 64 # TODO(wrengr): we may want to receive the crash deps as an argument, | 75 # TODO(wrengr): we may want to receive the crash deps as an argument, |
| 65 # so that when this method is called via Findit.FindCulprit, we avoid | 76 # so that when this method is called via Findit.FindCulprit, we avoid |
| 66 # doing redundant work creating it. | 77 # doing redundant work creating it. |
| 67 stack_deps = GetDepsInCrashStack( | 78 stack_deps = GetDepsInCrashStack( |
| 68 report.stacktrace.crash_stack, | 79 report.stacktrace.crash_stack, |
| 69 chrome_dependency_fetcher.ChromeDependencyFetcher( | 80 dependency_fetcher.GetDependency( |
| 70 self.repository).GetDependency(report.crashed_version, | 81 report.crashed_version, report.platform)) |
| 71 report.platform)) | |
| 72 | 82 |
| 73 # Get dep and file to changelogs, stack_info and blame dicts. | 83 # Get dep and file to changelogs, stack_info and blame dicts. |
| 74 dep_rolls = chrome_dependency_fetcher.ChromeDependencyFetcher( | 84 dep_rolls = dependency_fetcher.GetDependencyRollsDict( |
| 75 self.repository).GetDependencyRollsDict( | 85 last_good_version, first_bad_version, report.platform) |
| 76 last_good_version, first_bad_version, report.platform) | |
| 77 | 86 |
| 78 # Regression of a dep added/deleted (old_revision/new_revision is None) can | 87 # Regression of a dep added/deleted (old_revision/new_revision is None) can |
| 79 # not be known for sure and this case rarely happens, so just filter them | 88 # not be known for sure and this case rarely happens, so just filter them |
| 80 # out. | 89 # out. |
| 81 regression_deps_rolls = {} | 90 regression_deps_rolls = {} |
| 82 for dep_path, dep_roll in dep_rolls.iteritems(): | 91 for dep_path, dep_roll in dep_rolls.iteritems(): |
| 83 if not dep_roll.old_revision or not dep_roll.new_revision: | 92 if not dep_roll.old_revision or not dep_roll.new_revision: |
| 84 logging.info('Skip %s denpendency %s', | 93 logging.info('Skip %s denpendency %s', |
| 85 'added' if dep_roll.new_revision else 'deleted', dep_path) | 94 'added' if dep_roll.new_revision else 'deleted', dep_path) |
| 86 continue | 95 continue |
| 87 regression_deps_rolls[dep_path] = dep_roll | 96 regression_deps_rolls[dep_path] = dep_roll |
| 88 | 97 |
| 89 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps( | 98 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps( |
| 90 regression_deps_rolls, stack_deps, self.repository) | 99 regression_deps_rolls, stack_deps, self.repository) |
| 91 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps( | 100 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps( |
| 92 report.stacktrace, stack_deps) | 101 report.stacktrace, stack_deps) |
| 93 | 102 |
| 94 # TODO: argument order is inconsistent from others. Repository should | |
| 95 # be last argument. | |
| 96 suspects = FindSuspects(dep_to_file_to_changelogs, | 103 suspects = FindSuspects(dep_to_file_to_changelogs, |
| 97 dep_to_file_to_stack_infos, | 104 dep_to_file_to_stack_infos, |
| 98 stack_deps, self.repository, ignore_cls) | 105 stack_deps, self.get_repository, ignore_cls) |
| 99 if not suspects: | 106 if not suspects: |
| 100 return [] | 107 return [] |
| 101 | 108 |
| 102 # Set confidence, reasons, and changed_files. | 109 # Set confidence, reasons, and changed_files. |
| 103 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()]) | 110 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()]) |
| 104 map(aggregated_scorer.Score, suspects) | 111 map(aggregated_scorer.Score, suspects) |
| 105 | 112 |
| 106 # Filter all the 0 confidence results. | 113 # Filter all the 0 confidence results. |
| 107 suspects = filter(lambda suspect: suspect.confidence != 0, suspects) | 114 suspects = filter(lambda suspect: suspect.confidence != 0, suspects) |
| 108 if not suspects: | 115 if not suspects: |
| (...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 247 # We only care about those dependencies in crash stack. | 254 # We only care about those dependencies in crash stack. |
| 248 if frame.dep_path not in stack_deps: | 255 if frame.dep_path not in stack_deps: |
| 249 continue | 256 continue |
| 250 | 257 |
| 251 dep_to_file_to_stack_infos[frame.dep_path][frame.file_path].append( | 258 dep_to_file_to_stack_infos[frame.dep_path][frame.file_path].append( |
| 252 StackInfo(frame, callstack.priority)) | 259 StackInfo(frame, callstack.priority)) |
| 253 | 260 |
| 254 return dep_to_file_to_stack_infos | 261 return dep_to_file_to_stack_infos |
| 255 | 262 |
| 256 | 263 |
| 257 # TODO(katesonia): Remove the repository argument after refatoring cl committed. | 264 # TODO(katesonia): Remove the repository argument after refatoring cl committed. |
|
Martin Barbella
2016/12/29 21:51:05
Is this now done, or was additional refactoring pl
Sharu Jiang
2016/12/29 21:58:20
No, this TODO is about moving this function to be
| |
| 258 def FindSuspects(dep_to_file_to_changelogs, | 265 def FindSuspects(dep_to_file_to_changelogs, |
| 259 dep_to_file_to_stack_infos, | 266 dep_to_file_to_stack_infos, |
| 260 stack_deps, repository, | 267 stack_deps, get_repository, |
| 261 ignore_cls=None): | 268 ignore_cls=None): |
| 262 """Finds suspects by matching stacktrace and changelogs in regression range. | 269 """Finds suspects by matching stacktrace and changelogs in regression range. |
| 263 | 270 |
| 264 This method only applies to those crashes with regression range. | 271 This method only applies to those crashes with regression range. |
| 265 | 272 |
| 266 Args: | 273 Args: |
| 267 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path | 274 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path |
| 268 to ChangeLogs that touched this file. | 275 to ChangeLogs that touched this file. |
| 269 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path | 276 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path |
| 270 to a list of stack information of this file. A file may occur in several | 277 to a list of stack information of this file. A file may occur in several |
| 271 frames, one stack info consist of a StackFrame and the callstack priority | 278 frames, one stack info consist of a StackFrame and the callstack priority |
| 272 of it. | 279 of it. |
| 273 stack_deps (dict): Represents all the dependencies shown in the crash stack. | 280 stack_deps (dict): Represents all the dependencies shown in the crash stack. |
| 274 repository (Repository): Repository to get changelogs and blame from. | 281 get_repository (callable): a function from urls to ``Repository`` |
| 282 objects, so we can get changelogs and blame for each dep. | |
| 275 ignore_cls (set): Set of reverted revisions. | 283 ignore_cls (set): Set of reverted revisions. |
| 276 | 284 |
| 277 Returns: | 285 Returns: |
| 278 A list of ``Suspect`` instances with confidence and reason unset. | 286 A list of ``Suspect`` instances with confidence and reason unset. |
| 279 """ | 287 """ |
| 280 suspects = SuspectMap(ignore_cls) | 288 suspects = SuspectMap(ignore_cls) |
| 281 | 289 |
| 282 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems(): | 290 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems(): |
| 283 file_to_changelogs = dep_to_file_to_changelogs[dep] | 291 file_to_changelogs = dep_to_file_to_changelogs[dep] |
| 284 | 292 |
| 285 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems(): | 293 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems(): |
| 286 for touched_file_path, changelogs in file_to_changelogs.iteritems(): | 294 for touched_file_path, changelogs in file_to_changelogs.iteritems(): |
| 287 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path): | 295 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path): |
| 288 continue | 296 continue |
| 289 | 297 |
| 290 repository.repo_url = stack_deps[dep].repo_url | 298 repository = get_repository(stack_deps[dep].repo_url) |
| 291 blame = repository.GetBlame(touched_file_path, | 299 blame = repository.GetBlame(touched_file_path, |
| 292 stack_deps[dep].revision) | 300 stack_deps[dep].revision) |
| 293 | 301 |
| 294 # Generate/update each suspect(changelog) in changelogs, blame is used | 302 # Generate/update each suspect(changelog) in changelogs, blame is used |
| 295 # to calculate distance between touched lines and crashed lines in file. | 303 # to calculate distance between touched lines and crashed lines in file. |
| 296 suspects.GenerateSuspects( | 304 suspects.GenerateSuspects( |
| 297 touched_file_path, dep, stack_infos, changelogs, blame) | 305 touched_file_path, dep, stack_infos, changelogs, blame) |
| 298 | 306 |
| 299 return suspects.values() | 307 return suspects.values() |
| OLD | NEW |