| OLD | NEW |
| (Empty) |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 import logging | |
| 6 from collections import defaultdict | |
| 7 from collections import namedtuple | |
| 8 | |
| 9 from common.chrome_dependency_fetcher import ChromeDependencyFetcher | |
| 10 from crash import crash_util | |
| 11 from crash.suspect import StackInfo | |
| 12 from crash.suspect import Suspect | |
| 13 from crash.suspect import SuspectMap | |
| 14 from crash.scorers.aggregated_scorer import AggregatedScorer | |
| 15 from crash.scorers.min_distance import MinDistance | |
| 16 from crash.scorers.top_frame_index import TopFrameIndex | |
| 17 from crash.stacktrace import CallStack | |
| 18 from crash.stacktrace import Stacktrace | |
| 19 from libs.gitiles.diff import ChangeType | |
| 20 | |
| 21 | |
| 22 class ChangelistClassifier(namedtuple('ChangelistClassifier', | |
| 23 ['get_repository', 'top_n_results', 'confidence_threshold'])): | |
| 24 __slots__ = () | |
| 25 | |
| 26 def __new__(cls, get_repository, top_n_results=3, confidence_threshold=0.999): | |
| 27 """Args: | |
| 28 get_repository (callable): a function from DEP urls to ``Repository`` | |
| 29 objects, so we can get changelogs and blame for each dep. Notably, | |
| 30 to keep the code here generic, we make no assumptions about | |
| 31 which subclass of ``Repository`` this function returns. Thus, | |
| 32 it is up to the caller to decide what class to return and handle | |
| 33 any other arguments that class may require (e.g., an http client | |
| 34 for ``GitilesRepository``). | |
| 35 top_n_results (int): maximum number of results to return. | |
| 36 confidence_threshold (float): In [0,1], above which we only return | |
| 37 the first suspect. | |
| 38 """ | |
| 39 return super(cls, ChangelistClassifier).__new__( | |
| 40 cls, get_repository, top_n_results, confidence_threshold) | |
| 41 | |
| 42 def __str__(self): # pragma: no cover | |
| 43 return ('%s(top_n_results=%d, confidence_threshold=%g)' | |
| 44 % (self.__class__.__name__, | |
| 45 self.top_n_results, | |
| 46 self.confidence_threshold)) | |
| 47 | |
| 48 def __call__(self, report): | |
| 49 """Finds changelists suspected of being responsible for the crash report. | |
| 50 | |
| 51 This function assumes the report's stacktrace has already had any necessary | |
| 52 preprocessing (like filtering or truncating) applied. | |
| 53 | |
| 54 Args: | |
| 55 report (CrashReport): the report to be analyzed. | |
| 56 | |
| 57 Returns: | |
| 58 List of ``Suspect``s, sorted by confidence from highest to lowest. | |
| 59 """ | |
| 60 if not report.regression_range: | |
| 61 logging.warning('ChangelistClassifier.__call__: Missing regression range ' | |
| 62 'for report: %s', str(report)) | |
| 63 return [] | |
| 64 last_good_version, first_bad_version = report.regression_range | |
| 65 logging.info('ChangelistClassifier.__call__: Regression range %s:%s', | |
| 66 last_good_version, first_bad_version) | |
| 67 | |
| 68 dependency_fetcher = ChromeDependencyFetcher(self.get_repository) | |
| 69 | |
| 70 # We are only interested in the deps in crash stack (the callstack that | |
| 71 # caused the crash). | |
| 72 # TODO(wrengr): we may want to receive the crash deps as an argument, | |
| 73 # so that when this method is called via Findit.FindCulprit, we avoid | |
| 74 # doing redundant work creating it. | |
| 75 stack_deps = GetDepsInCrashStack( | |
| 76 report.stacktrace.crash_stack, | |
| 77 dependency_fetcher.GetDependency( | |
| 78 report.crashed_version, report.platform)) | |
| 79 | |
| 80 # Get dep and file to changelogs, stack_info and blame dicts. | |
| 81 dep_rolls = dependency_fetcher.GetDependencyRollsDict( | |
| 82 last_good_version, first_bad_version, report.platform) | |
| 83 | |
| 84 # Regression of a dep added/deleted (old_revision/new_revision is None) can | |
| 85 # not be known for sure and this case rarely happens, so just filter them | |
| 86 # out. | |
| 87 regression_deps_rolls = {} | |
| 88 for dep_path, dep_roll in dep_rolls.iteritems(): | |
| 89 if not dep_roll.old_revision or not dep_roll.new_revision: | |
| 90 logging.info('Skip %s denpendency %s', | |
| 91 'added' if dep_roll.new_revision else 'deleted', dep_path) | |
| 92 continue | |
| 93 regression_deps_rolls[dep_path] = dep_roll | |
| 94 | |
| 95 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps( | |
| 96 regression_deps_rolls, stack_deps, self.get_repository) | |
| 97 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps( | |
| 98 report.stacktrace, stack_deps) | |
| 99 | |
| 100 suspects = FindSuspects(dep_to_file_to_changelogs, | |
| 101 dep_to_file_to_stack_infos, | |
| 102 stack_deps, self.get_repository, ignore_cls) | |
| 103 if not suspects: | |
| 104 return [] | |
| 105 | |
| 106 # Set confidence, reasons, and changed_files. | |
| 107 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()]) | |
| 108 map(aggregated_scorer.Score, suspects) | |
| 109 | |
| 110 # Filter all the 0 confidence results. | |
| 111 suspects = filter(lambda suspect: suspect.confidence != 0, suspects) | |
| 112 if not suspects: | |
| 113 return [] | |
| 114 | |
| 115 suspects.sort(key=lambda suspect: -suspect.confidence) | |
| 116 | |
| 117 max_results = (1 if suspects[0].confidence > self.confidence_threshold | |
| 118 else self.top_n_results) | |
| 119 | |
| 120 return suspects[:max_results] | |
| 121 | |
| 122 | |
| 123 def GetDepsInCrashStack(crash_stack, crash_deps): | |
| 124 """Gets Dependencies in crash stack.""" | |
| 125 if not crash_stack: | |
| 126 return {} | |
| 127 | |
| 128 stack_deps = {} | |
| 129 for frame in crash_stack.frames: | |
| 130 if frame.dep_path: | |
| 131 stack_deps[frame.dep_path] = crash_deps[frame.dep_path] | |
| 132 | |
| 133 return stack_deps | |
| 134 | |
| 135 | |
| 136 # TODO(katesonia): Remove the repository argument after refatoring cl committed. | |
| 137 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps, | |
| 138 get_repository): | |
| 139 """Gets a dict containing files touched by changelogs for deps in stack_deps. | |
| 140 | |
| 141 Regression ranges for each dep is determined by regression_deps_rolls. | |
| 142 Changelogs which were reverted are returned in a reverted_cls set. | |
| 143 | |
| 144 Args: | |
| 145 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in | |
| 146 regression range. | |
| 147 stack_deps (dict): Represents all the dependencies shown in | |
| 148 the crash stack. | |
| 149 get_repository (callable): a function from DEP urls to ``Repository`` | |
| 150 objects, so we can get changelogs and blame for each dep. Notably, | |
| 151 to keep the code here generic, we make no assumptions about | |
| 152 which subclass of ``Repository`` this function returns. Thus, | |
| 153 it is up to the caller to decide what class to return and handle | |
| 154 any other arguments that class may require (e.g., an http client | |
| 155 for ``GitilesRepository``). | |
| 156 | |
| 157 Returns: | |
| 158 A tuple (dep_to_file_to_changelogs, reverted_cls). | |
| 159 | |
| 160 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path | |
| 161 to ChangeLogs that touched this file. | |
| 162 For example: | |
| 163 { | |
| 164 'src/': { | |
| 165 'a.cc': [ | |
| 166 ChangeLog.FromDict({ | |
| 167 'author': { | |
| 168 'name': 'test@chromium.org', | |
| 169 'email': 'example@chromium.org', | |
| 170 'time': 'Thu Mar 31 21:24:43 2016', | |
| 171 }, | |
| 172 'committer': { | |
| 173 'name': 'example@chromium.org', | |
| 174 'email': 'example@chromium.org', | |
| 175 'time': 'Thu Mar 31 21:28:39 2016', | |
| 176 }, | |
| 177 'message': 'dummy', | |
| 178 'commit_position': 175976, | |
| 179 'touched_files': [ | |
| 180 { | |
| 181 'change_type': 'add', | |
| 182 'new_path': 'a.cc', | |
| 183 'old_path': 'b/a.cc' | |
| 184 }, | |
| 185 ... | |
| 186 ], | |
| 187 'commit_url': | |
| 188 'https://repo.test/+/bcfd', | |
| 189 'code_review_url': 'https://codereview.chromium.org/3281', | |
| 190 'revision': 'bcfd', | |
| 191 'reverted_revision': None | |
| 192 }), | |
| 193 ] | |
| 194 } | |
| 195 } | |
| 196 | |
| 197 reverted_cls (set): A set of reverted revisions. | |
| 198 """ | |
| 199 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list)) | |
| 200 reverted_cls = set() | |
| 201 | |
| 202 for dep in stack_deps: | |
| 203 # If a dep is not in regression range, than it cannot be the dep of | |
| 204 # culprits. | |
| 205 dep_roll = regression_deps_rolls.get(dep) | |
| 206 if not dep_roll: | |
| 207 continue | |
| 208 | |
| 209 repository = get_repository(dep_roll.repo_url) | |
| 210 changelogs = repository.GetChangeLogs(dep_roll.old_revision, | |
| 211 dep_roll.new_revision) | |
| 212 | |
| 213 for changelog in changelogs or []: | |
| 214 # When someone reverts, we need to skip both the CL doing | |
| 215 # the reverting as well as the CL that got reverted. If | |
| 216 # ``reverted_revision`` is true, then this CL reverts another one, | |
| 217 # so we skip it and save the CL it reverts in ``reverted_cls`` to | |
| 218 # be filtered out later. | |
| 219 if changelog.reverted_revision: | |
| 220 reverted_cls.add(changelog.reverted_revision) | |
| 221 continue | |
| 222 | |
| 223 for touched_file in changelog.touched_files: | |
| 224 if touched_file.change_type == ChangeType.DELETE: | |
| 225 continue | |
| 226 | |
| 227 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog) | |
| 228 | |
| 229 return dep_to_file_to_changelogs, reverted_cls | |
| 230 | |
| 231 | |
| 232 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps): | |
| 233 """Gets a dict containing all the stack information of files in stacktrace. | |
| 234 | |
| 235 Only gets stack informations for files grouped by deps in stack_deps. | |
| 236 | |
| 237 Args: | |
| 238 stacktrace (Stacktrace): Parsed stacktrace object. | |
| 239 stack_deps (dict): Represents all the dependencies show in | |
| 240 the crash stack. | |
| 241 | |
| 242 Returns: | |
| 243 A dict, maps dep path to a dict mapping file path to a list of stack | |
| 244 information of this file. A file may occur in several frames, one | |
| 245 stack info consist of a StackFrame and the callstack priority of it. | |
| 246 | |
| 247 For example: | |
| 248 { | |
| 249 'src/': { | |
| 250 'a.cc': [ | |
| 251 StackInfo(StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0), | |
| 252 StackInfo(StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0), | |
| 253 ] | |
| 254 } | |
| 255 } | |
| 256 """ | |
| 257 dep_to_file_to_stack_infos = defaultdict(lambda: defaultdict(list)) | |
| 258 | |
| 259 for callstack in stacktrace.stacks: | |
| 260 for frame in callstack.frames: | |
| 261 # We only care about those dependencies in crash stack. | |
| 262 if frame.dep_path not in stack_deps: | |
| 263 continue | |
| 264 | |
| 265 dep_to_file_to_stack_infos[frame.dep_path][frame.file_path].append( | |
| 266 StackInfo(frame, callstack.priority)) | |
| 267 | |
| 268 return dep_to_file_to_stack_infos | |
| 269 | |
| 270 | |
| 271 # TODO(katesonia): Remove the repository argument after refatoring cl committed. | |
| 272 def FindSuspects(dep_to_file_to_changelogs, | |
| 273 dep_to_file_to_stack_infos, | |
| 274 stack_deps, get_repository, | |
| 275 ignore_cls=None): | |
| 276 """Finds suspects by matching stacktrace and changelogs in regression range. | |
| 277 | |
| 278 This method only applies to those crashes with regression range. | |
| 279 | |
| 280 Args: | |
| 281 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path | |
| 282 to ChangeLogs that touched this file. | |
| 283 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path | |
| 284 to a list of stack information of this file. A file may occur in several | |
| 285 frames, one stack info consist of a StackFrame and the callstack priority | |
| 286 of it. | |
| 287 stack_deps (dict): Represents all the dependencies shown in the crash stack. | |
| 288 get_repository (callable): a function from urls to ``Repository`` | |
| 289 objects, so we can get changelogs and blame for each dep. | |
| 290 ignore_cls (set): Set of reverted revisions. | |
| 291 | |
| 292 Returns: | |
| 293 A list of ``Suspect`` instances with confidence and reason unset. | |
| 294 """ | |
| 295 suspects = SuspectMap(ignore_cls) | |
| 296 | |
| 297 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems(): | |
| 298 file_to_changelogs = dep_to_file_to_changelogs[dep] | |
| 299 | |
| 300 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems(): | |
| 301 for touched_file_path, changelogs in file_to_changelogs.iteritems(): | |
| 302 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path): | |
| 303 continue | |
| 304 | |
| 305 repository = get_repository(stack_deps[dep].repo_url) | |
| 306 blame = repository.GetBlame(touched_file_path, | |
| 307 stack_deps[dep].revision) | |
| 308 | |
| 309 # Generate/update each suspect(changelog) in changelogs, blame is used | |
| 310 # to calculate distance between touched lines and crashed lines in file. | |
| 311 suspects.GenerateSuspects( | |
| 312 touched_file_path, dep, stack_infos, changelogs, blame) | |
| 313 | |
| 314 return suspects.values() | |
| OLD | NEW |