| OLD | NEW |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import logging | 5 import logging |
| 6 from collections import defaultdict | 6 from collections import defaultdict |
| 7 from collections import namedtuple | 7 from collections import namedtuple |
| 8 | 8 |
| 9 from common.chrome_dependency_fetcher import ChromeDependencyFetcher | 9 from common.chrome_dependency_fetcher import ChromeDependencyFetcher |
| 10 from crash import crash_util | 10 from crash import crash_util |
| 11 from crash.suspect import StackInfo | 11 from crash.suspect import StackInfo |
| 12 from crash.suspect import Suspect | 12 from crash.suspect import Suspect |
| 13 from crash.suspect import SuspectMap | 13 from crash.suspect import SuspectMap |
| 14 from crash.scorers.aggregated_scorer import AggregatedScorer | 14 from crash.scorers.aggregated_scorer import AggregatedScorer |
| 15 from crash.scorers.min_distance import MinDistance | 15 from crash.scorers.min_distance import MinDistance |
| 16 from crash.scorers.top_frame_index import TopFrameIndex | 16 from crash.scorers.top_frame_index import TopFrameIndex |
| 17 from crash.stacktrace import CallStack | 17 from crash.stacktrace import CallStack |
| 18 from crash.stacktrace import Stacktrace | 18 from crash.stacktrace import Stacktrace |
| 19 from libs.gitiles.diff import ChangeType | 19 from libs.gitiles.diff import ChangeType |
| 20 | 20 |
| 21 | 21 |
| 22 class ChangelistClassifier(namedtuple('ChangelistClassifier', | 22 class ChangelistClassifier(namedtuple('ChangelistClassifier', |
| 23 ['repository', 'get_repository', 'top_n_results', 'confidence_threshold'])): | 23 ['get_repository', 'top_n_results', 'confidence_threshold'])): |
| 24 __slots__ = () | 24 __slots__ = () |
| 25 | 25 |
| 26 def __new__(cls, repository, get_repository, top_n_results=3, | 26 def __new__(cls, get_repository, top_n_results=3, confidence_threshold=0.999): |
| 27 confidence_threshold=0.999): | |
| 28 """Args: | 27 """Args: |
| 29 repository (Repository): the Git repository of the main project | |
| 30 we're trying to classify CLs from. | |
| 31 get_repository (callable): a function from DEP urls to ``Repository`` | 28 get_repository (callable): a function from DEP urls to ``Repository`` |
| 32 objects, so we can get changelogs and blame for each dep. Notably, | 29 objects, so we can get changelogs and blame for each dep. Notably, |
| 33 to keep the code here generic, we make no assumptions about | 30 to keep the code here generic, we make no assumptions about |
| 34 which subclass of ``Repository`` this function returns. Thus, | 31 which subclass of ``Repository`` this function returns. Thus, |
| 35 it is up to the caller to decide what class to return and handle | 32 it is up to the caller to decide what class to return and handle |
| 36 any other arguments that class may require (e.g., an http client | 33 any other arguments that class may require (e.g., an http client |
| 37 for ``GitilesRepository``). | 34 for ``GitilesRepository``). |
| 38 top_n_results (int): maximum number of results to return. | 35 top_n_results (int): maximum number of results to return. |
| 39 confidence_threshold (float): In [0,1], above which we only return | 36 confidence_threshold (float): In [0,1], above which we only return |
| 40 the first suspect. | 37 the first suspect. |
| 41 """ | 38 """ |
| 42 return super(cls, ChangelistClassifier).__new__( | 39 return super(cls, ChangelistClassifier).__new__( |
| 43 cls, repository, get_repository, top_n_results, confidence_threshold) | 40 cls, get_repository, top_n_results, confidence_threshold) |
| 44 | 41 |
| 45 def __str__(self): # pragma: no cover | 42 def __str__(self): # pragma: no cover |
| 46 return ('%s(top_n_results=%d, confidence_threshold=%g)' | 43 return ('%s(top_n_results=%d, confidence_threshold=%g)' |
| 47 % (self.__class__.__name__, | 44 % (self.__class__.__name__, |
| 48 self.top_n_results, | 45 self.top_n_results, |
| 49 self.confidence_threshold)) | 46 self.confidence_threshold)) |
| 50 | 47 |
| 51 def __call__(self, report): | 48 def __call__(self, report): |
| 52 """Finds changelists suspected of being responsible for the crash report. | 49 """Finds changelists suspected of being responsible for the crash report. |
| 53 | 50 |
| 54 This function assumes the report's stacktrace has already had any necessary | 51 This function assumes the report's stacktrace has already had any necessary |
| 55 preprocessing (like filtering or truncating) applied. | 52 preprocessing (like filtering or truncating) applied. |
| 56 | 53 |
| 57 Args: | 54 Args: |
| 58 report (CrashReport): the report to be analyzed. | 55 report (CrashReport): the report to be analyzed. |
| 59 | 56 |
| 60 Returns: | 57 Returns: |
| 61 List of ``Suspect``s, sorted by confidence from highest to lowest. | 58 List of ``Suspect``s, sorted by confidence from highest to lowest. |
| 62 """ | 59 """ |
| 63 if not report.regression_range: | 60 if not report.regression_range: |
| 64 logging.warning('ChangelistClassifier.__call__: Missing regression range ' | 61 logging.warning('ChangelistClassifier.__call__: Missing regression range ' |
| 65 'for report: %s', str(report)) | 62 'for report: %s', str(report)) |
| 66 return [] | 63 return [] |
| 67 last_good_version, first_bad_version = report.regression_range | 64 last_good_version, first_bad_version = report.regression_range |
| 68 logging.info('ChangelistClassifier.__call__: Regression range %s:%s', | 65 logging.info('ChangelistClassifier.__call__: Regression range %s:%s', |
| 69 last_good_version, first_bad_version) | 66 last_good_version, first_bad_version) |
| 70 | 67 |
| 71 dependency_fetcher = ChromeDependencyFetcher(self.repository) | 68 dependency_fetcher = ChromeDependencyFetcher(self.get_repository) |
| 72 | 69 |
| 73 # We are only interested in the deps in crash stack (the callstack that | 70 # We are only interested in the deps in crash stack (the callstack that |
| 74 # caused the crash). | 71 # caused the crash). |
| 75 # TODO(wrengr): we may want to receive the crash deps as an argument, | 72 # TODO(wrengr): we may want to receive the crash deps as an argument, |
| 76 # so that when this method is called via Findit.FindCulprit, we avoid | 73 # so that when this method is called via Findit.FindCulprit, we avoid |
| 77 # doing redundant work creating it. | 74 # doing redundant work creating it. |
| 78 stack_deps = GetDepsInCrashStack( | 75 stack_deps = GetDepsInCrashStack( |
| 79 report.stacktrace.crash_stack, | 76 report.stacktrace.crash_stack, |
| 80 dependency_fetcher.GetDependency( | 77 dependency_fetcher.GetDependency( |
| 81 report.crashed_version, report.platform)) | 78 report.crashed_version, report.platform)) |
| 82 | 79 |
| 83 # Get dep and file to changelogs, stack_info and blame dicts. | 80 # Get dep and file to changelogs, stack_info and blame dicts. |
| 84 dep_rolls = dependency_fetcher.GetDependencyRollsDict( | 81 dep_rolls = dependency_fetcher.GetDependencyRollsDict( |
| 85 last_good_version, first_bad_version, report.platform) | 82 last_good_version, first_bad_version, report.platform) |
| 86 | 83 |
| 87 # Regression of a dep added/deleted (old_revision/new_revision is None) can | 84 # Regression of a dep added/deleted (old_revision/new_revision is None) can |
| 88 # not be known for sure and this case rarely happens, so just filter them | 85 # not be known for sure and this case rarely happens, so just filter them |
| 89 # out. | 86 # out. |
| 90 regression_deps_rolls = {} | 87 regression_deps_rolls = {} |
| 91 for dep_path, dep_roll in dep_rolls.iteritems(): | 88 for dep_path, dep_roll in dep_rolls.iteritems(): |
| 92 if not dep_roll.old_revision or not dep_roll.new_revision: | 89 if not dep_roll.old_revision or not dep_roll.new_revision: |
| 93 logging.info('Skip %s denpendency %s', | 90 logging.info('Skip %s denpendency %s', |
| 94 'added' if dep_roll.new_revision else 'deleted', dep_path) | 91 'added' if dep_roll.new_revision else 'deleted', dep_path) |
| 95 continue | 92 continue |
| 96 regression_deps_rolls[dep_path] = dep_roll | 93 regression_deps_rolls[dep_path] = dep_roll |
| 97 | 94 |
| 98 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps( | 95 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps( |
| 99 regression_deps_rolls, stack_deps, self.repository) | 96 regression_deps_rolls, stack_deps, self.get_repository) |
| 100 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps( | 97 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps( |
| 101 report.stacktrace, stack_deps) | 98 report.stacktrace, stack_deps) |
| 102 | 99 |
| 103 suspects = FindSuspects(dep_to_file_to_changelogs, | 100 suspects = FindSuspects(dep_to_file_to_changelogs, |
| 104 dep_to_file_to_stack_infos, | 101 dep_to_file_to_stack_infos, |
| 105 stack_deps, self.get_repository, ignore_cls) | 102 stack_deps, self.get_repository, ignore_cls) |
| 106 if not suspects: | 103 if not suspects: |
| 107 return [] | 104 return [] |
| 108 | 105 |
| 109 # Set confidence, reasons, and changed_files. | 106 # Set confidence, reasons, and changed_files. |
| (...skipping 21 matching lines...) Expand all Loading... |
| 131 stack_deps = {} | 128 stack_deps = {} |
| 132 for frame in crash_stack: | 129 for frame in crash_stack: |
| 133 if frame.dep_path: | 130 if frame.dep_path: |
| 134 stack_deps[frame.dep_path] = crash_deps[frame.dep_path] | 131 stack_deps[frame.dep_path] = crash_deps[frame.dep_path] |
| 135 | 132 |
| 136 return stack_deps | 133 return stack_deps |
| 137 | 134 |
| 138 | 135 |
| 139 # TODO(katesonia): Remove the repository argument after refatoring cl committed. | 136 # TODO(katesonia): Remove the repository argument after refatoring cl committed. |
| 140 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps, | 137 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps, |
| 141 repository): | 138 get_repository): |
| 142 """Gets a dict containing files touched by changelogs for deps in stack_deps. | 139 """Gets a dict containing files touched by changelogs for deps in stack_deps. |
| 143 | 140 |
| 144 Regression ranges for each dep is determined by regression_deps_rolls. | 141 Regression ranges for each dep is determined by regression_deps_rolls. |
| 145 Changelogs which were reverted are returned in a reverted_cls set. | 142 Changelogs which were reverted are returned in a reverted_cls set. |
| 146 | 143 |
| 147 Args: | 144 Args: |
| 148 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in | 145 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in |
| 149 regression range. | 146 regression range. |
| 150 stack_deps (dict): Represents all the dependencies shown in | 147 stack_deps (dict): Represents all the dependencies shown in |
| 151 the crash stack. | 148 the crash stack. |
| 152 repository (Repository): Repository to get changelogs from. | 149 get_repository (callable): a function from DEP urls to ``Repository`` |
| 150 objects, so we can get changelogs and blame for each dep. Notably, |
| 151 to keep the code here generic, we make no assumptions about |
| 152 which subclass of ``Repository`` this function returns. Thus, |
| 153 it is up to the caller to decide what class to return and handle |
| 154 any other arguments that class may require (e.g., an http client |
| 155 for ``GitilesRepository``). |
| 153 | 156 |
| 154 Returns: | 157 Returns: |
| 155 A tuple (dep_to_file_to_changelogs, reverted_cls). | 158 A tuple (dep_to_file_to_changelogs, reverted_cls). |
| 156 | 159 |
| 157 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path | 160 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path |
| 158 to ChangeLogs that touched this file. | 161 to ChangeLogs that touched this file. |
| 159 For example: | 162 For example: |
| 160 { | 163 { |
| 161 'src/': { | 164 'src/': { |
| 162 'a.cc': [ | 165 'a.cc': [ |
| (...skipping 29 matching lines...) Expand all Loading... |
| 192 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list)) | 195 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list)) |
| 193 reverted_cls = set() | 196 reverted_cls = set() |
| 194 | 197 |
| 195 for dep in stack_deps: | 198 for dep in stack_deps: |
| 196 # If a dep is not in regression range, than it cannot be the dep of | 199 # If a dep is not in regression range, than it cannot be the dep of |
| 197 # culprits. | 200 # culprits. |
| 198 dep_roll = regression_deps_rolls.get(dep) | 201 dep_roll = regression_deps_rolls.get(dep) |
| 199 if not dep_roll: | 202 if not dep_roll: |
| 200 continue | 203 continue |
| 201 | 204 |
| 202 repository.repo_url = dep_roll.repo_url | 205 repository = get_repository(dep_roll.repo_url) |
| 203 changelogs = repository.GetChangeLogs(dep_roll.old_revision, | 206 changelogs = repository.GetChangeLogs(dep_roll.old_revision, |
| 204 dep_roll.new_revision) | 207 dep_roll.new_revision) |
| 205 | 208 |
| 206 for changelog in changelogs or []: | 209 for changelog in changelogs or []: |
| 207 # When someone reverts, we need to skip both the CL doing | 210 # When someone reverts, we need to skip both the CL doing |
| 208 # the reverting as well as the CL that got reverted. If | 211 # the reverting as well as the CL that got reverted. If |
| 209 # ``reverted_revision`` is true, then this CL reverts another one, | 212 # ``reverted_revision`` is true, then this CL reverts another one, |
| 210 # so we skip it and save the CL it reverts in ``reverted_cls`` to | 213 # so we skip it and save the CL it reverts in ``reverted_cls`` to |
| 211 # be filtered out later. | 214 # be filtered out later. |
| 212 if changelog.reverted_revision: | 215 if changelog.reverted_revision: |
| (...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 298 repository = get_repository(stack_deps[dep].repo_url) | 301 repository = get_repository(stack_deps[dep].repo_url) |
| 299 blame = repository.GetBlame(touched_file_path, | 302 blame = repository.GetBlame(touched_file_path, |
| 300 stack_deps[dep].revision) | 303 stack_deps[dep].revision) |
| 301 | 304 |
| 302 # Generate/update each suspect(changelog) in changelogs, blame is used | 305 # Generate/update each suspect(changelog) in changelogs, blame is used |
| 303 # to calculate distance between touched lines and crashed lines in file. | 306 # to calculate distance between touched lines and crashed lines in file. |
| 304 suspects.GenerateSuspects( | 307 suspects.GenerateSuspects( |
| 305 touched_file_path, dep, stack_infos, changelogs, blame) | 308 touched_file_path, dep, stack_infos, changelogs, blame) |
| 306 | 309 |
| 307 return suspects.values() | 310 return suspects.values() |
| OLD | NEW |