| OLD | NEW |
| (Empty) |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 from collections import defaultdict | |
| 6 | |
| 7 from common.diff import ChangeType | |
| 8 from common.git_repository import GitRepository | |
| 9 from common.http_client_appengine import HttpClientAppengine | |
| 10 from crash import crash_util | |
| 11 from crash.stacktrace import CallStack | |
| 12 from crash.stacktrace import Stacktrace | |
| 13 from crash.results import MatchResults | |
| 14 from crash.scorers.aggregated_scorer import AggregatedScorer | |
| 15 from crash.scorers.min_distance import MinDistance | |
| 16 from crash.scorers.top_frame_index import TopFrameIndex | |
| 17 | |
| 18 | |
| 19 def GetDepsInCrashStack(crash_stack, crash_deps): | |
| 20 """Gets Dependencies in crash stack.""" | |
| 21 if not crash_stack: | |
| 22 return {} | |
| 23 | |
| 24 stack_deps = {} | |
| 25 for frame in crash_stack: | |
| 26 if frame.dep_path: | |
| 27 stack_deps[frame.dep_path] = crash_deps[frame.dep_path] | |
| 28 | |
| 29 return stack_deps | |
| 30 | |
| 31 | |
| 32 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps): | |
| 33 """Gets a dict containing files touched by changelogs for deps in stack_deps. | |
| 34 | |
| 35 Regression ranges for each dep is determined by regression_deps_rolls. | |
| 36 Those changelogs got reverted should be returned in a ignore_cls set. | |
| 37 | |
| 38 Args: | |
| 39 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in | |
| 40 regression range. | |
| 41 stack_deps (dict): Represents all the dependencies shown in | |
| 42 the crash stack. | |
| 43 | |
| 44 Returns: | |
| 45 A tuple (dep_to_file_to_changelogs, ignore_cls). | |
| 46 | |
| 47 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path | |
| 48 to ChangeLogs that touched this file. | |
| 49 For example: | |
| 50 { | |
| 51 'src/': { | |
| 52 'a.cc': [ | |
| 53 ChangeLog.FromDict({ | |
| 54 'author_name': 'test@chromium.org', | |
| 55 'message': 'dummy', | |
| 56 'committer_email': 'example@chromium.org', | |
| 57 'commit_position': 175976, | |
| 58 'author_email': 'example@chromium.org', | |
| 59 'touched_files': [ | |
| 60 { | |
| 61 'change_type': 'add', | |
| 62 'new_path': 'a.cc', | |
| 63 'old_path': 'b/a.cc' | |
| 64 }, | |
| 65 ... | |
| 66 ], | |
| 67 'author_time': 'Thu Mar 31 21:24:43 2016', | |
| 68 'committer_time': 'Thu Mar 31 21:28:39 2016', | |
| 69 'commit_url': | |
| 70 'https://repo.test/+/bcfd', | |
| 71 'code_review_url': 'https://codereview.chromium.org/3281', | |
| 72 'committer_name': 'example@chromium.org', | |
| 73 'revision': 'bcfd', | |
| 74 'reverted_revision': None | |
| 75 }), | |
| 76 ] | |
| 77 } | |
| 78 } | |
| 79 | |
| 80 ignore_cls (set): A set of reverted revisions. | |
| 81 """ | |
| 82 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list)) | |
| 83 ignore_cls = set() | |
| 84 | |
| 85 for dep in stack_deps: | |
| 86 # If a dep is not in regression range, than it cannot be the dep of | |
| 87 # culprits. | |
| 88 if dep not in regression_deps_rolls: | |
| 89 continue | |
| 90 | |
| 91 dep_roll = regression_deps_rolls[dep] | |
| 92 | |
| 93 git_repository = GitRepository(dep_roll.repo_url, HttpClientAppengine()) | |
| 94 changelogs = git_repository.GetChangeLogs(dep_roll.old_revision, | |
| 95 dep_roll.new_revision) | |
| 96 | |
| 97 for changelog in changelogs: | |
| 98 if changelog.reverted_revision: | |
| 99 # Skip reverting cls and add reverted revisions to ignore_cls to later | |
| 100 # filter those reverted revisions. | |
| 101 ignore_cls.add(changelog.reverted_revision) | |
| 102 continue | |
| 103 | |
| 104 for touched_file in changelog.touched_files: | |
| 105 if touched_file.change_type == ChangeType.DELETE: | |
| 106 continue | |
| 107 | |
| 108 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog) | |
| 109 | |
| 110 return dep_to_file_to_changelogs, ignore_cls | |
| 111 | |
| 112 | |
| 113 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps): | |
| 114 """Gets a dict containing all the stack information of files in stacktrace. | |
| 115 | |
| 116 Only gets stack informations for files grouped by deps in stack_deps. | |
| 117 | |
| 118 Args: | |
| 119 stacktrace (Stacktrace): Parsed stacktrace object. | |
| 120 stack_deps (dict): Represents all the dependencies show in | |
| 121 the crash stack. | |
| 122 | |
| 123 Returns: | |
| 124 A dict, maps dep path to a dict mapping file path to a list of stack | |
| 125 inforamtion of this file. A file may occur in several frames, one stack info | |
| 126 consist of a StackFrame and the callstack priority of it. | |
| 127 | |
| 128 For example: | |
| 129 { | |
| 130 'src/': { | |
| 131 'a.cc': [ | |
| 132 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0), | |
| 133 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0), | |
| 134 ] | |
| 135 } | |
| 136 } | |
| 137 """ | |
| 138 dep_to_file_to_stack_infos = defaultdict(lambda: defaultdict(list)) | |
| 139 | |
| 140 for callstack in stacktrace: | |
| 141 for frame in callstack: | |
| 142 # We only care about those dependencies in crash stack. | |
| 143 if frame.dep_path not in stack_deps: | |
| 144 continue | |
| 145 | |
| 146 dep_to_file_to_stack_infos[frame.dep_path][frame.file_path].append(( | |
| 147 frame, callstack.priority)) | |
| 148 | |
| 149 return dep_to_file_to_stack_infos | |
| 150 | |
| 151 | |
| 152 def FindMatchResults(dep_to_file_to_changelogs, | |
| 153 dep_to_file_to_stack_infos, | |
| 154 stack_deps, | |
| 155 ignore_cls=None): | |
| 156 """Finds results by matching stacktrace and changelogs in regression range. | |
| 157 | |
| 158 This method only applies to those crashes with regression range. | |
| 159 | |
| 160 Args: | |
| 161 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path | |
| 162 to ChangeLogs that touched this file. | |
| 163 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path | |
| 164 to a list of stack inforamtion of this file. A file may occur in several | |
| 165 frames, one stack info consist of a StackFrame and the callstack priority | |
| 166 of it. | |
| 167 stack_deps (dict): Represents all the dependencies shown in the crash stack. | |
| 168 ignore_cls (set): Set of reverted revisions. | |
| 169 | |
| 170 Returns: | |
| 171 A list of MatchResult instances with confidence and reason unset. | |
| 172 """ | |
| 173 match_results = MatchResults(ignore_cls) | |
| 174 | |
| 175 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems(): | |
| 176 file_to_changelogs = dep_to_file_to_changelogs[dep] | |
| 177 git_repository = GitRepository(stack_deps[dep].repo_url, | |
| 178 HttpClientAppengine()) | |
| 179 | |
| 180 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems(): | |
| 181 for touched_file_path, changelogs in file_to_changelogs.iteritems(): | |
| 182 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path): | |
| 183 continue | |
| 184 | |
| 185 blame = git_repository.GetBlame(crashed_file_path, | |
| 186 stack_deps[dep].revision) | |
| 187 | |
| 188 # Generate/update each result(changelog) in changelogs, blame is used | |
| 189 # to calculate distance between touched lines and crashed lines in file. | |
| 190 match_results.GenerateMatchResults( | |
| 191 crashed_file_path, dep, stack_infos, changelogs, blame) | |
| 192 | |
| 193 return match_results.values() | |
| 194 | |
| 195 | |
| 196 def FindItForCrash(stacktrace, regression_deps_rolls, crashed_deps, top_n): | |
| 197 """Finds culprit results for crash. | |
| 198 | |
| 199 Args: | |
| 200 stacktrace (Stactrace): Parsed Stactrace object. | |
| 201 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in | |
| 202 regression range. | |
| 203 crashed_deps (dict of Dependencys): Represents all the dependencies of | |
| 204 crashed revision. | |
| 205 top_n (int): Top n frames of each stack to be analyzed. | |
| 206 | |
| 207 Returns: | |
| 208 List of Results, sorted by confidence from highest to lowest. | |
| 209 """ | |
| 210 if not regression_deps_rolls: | |
| 211 return [] | |
| 212 | |
| 213 # Findit will only analyze the top n frames in each callstacks. | |
| 214 stack_trace = Stacktrace([ | |
| 215 CallStack(stack.priority, | |
| 216 format_type=stack.format_type, | |
| 217 language_type=stack.language_type, | |
| 218 frame_list=stack[:top_n]) | |
| 219 for stack in stacktrace]) | |
| 220 | |
| 221 # We are only interested in the deps in crash stack (the callstack that | |
| 222 # caused the crash). | |
| 223 stack_deps = GetDepsInCrashStack(stack_trace.crash_stack, crashed_deps) | |
| 224 | |
| 225 # Get dep and file to changelogs, stack_info and blame dicts. | |
| 226 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps( | |
| 227 regression_deps_rolls, stack_deps) | |
| 228 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps( | |
| 229 stack_trace, stack_deps) | |
| 230 | |
| 231 results = FindMatchResults(dep_to_file_to_changelogs, | |
| 232 dep_to_file_to_stack_infos, | |
| 233 stack_deps, ignore_cls) | |
| 234 | |
| 235 if not results: | |
| 236 return [] | |
| 237 | |
| 238 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()]) | |
| 239 | |
| 240 # Set result.confidence, result.reasons and result.changed_files. | |
| 241 map(aggregated_scorer.Score, results) | |
| 242 | |
| 243 # Filter all the 0 confidence results. | |
| 244 results = filter(lambda r: r.confidence != 0, results) | |
| 245 if not results: | |
| 246 return [] | |
| 247 | |
| 248 sorted_results = sorted(results, key=lambda r: -r.confidence) | |
| 249 | |
| 250 if sorted_results[0].confidence > 0.999: | |
| 251 return sorted_results[:1] | |
| 252 | |
| 253 return sorted_results[:3] | |
| OLD | NEW |