| OLD | NEW |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import logging |
| 5 from collections import defaultdict | 6 from collections import defaultdict |
| 6 | 7 |
| 8 from common import chrome_dependency_fetcher |
| 7 from common.diff import ChangeType | 9 from common.diff import ChangeType |
| 8 from common.git_repository import GitRepository | 10 from common.git_repository import GitRepository |
| 9 from common.http_client_appengine import HttpClientAppengine | 11 from common.http_client_appengine import HttpClientAppengine |
| 10 from crash import crash_util | 12 from crash import crash_util |
| 11 from crash.stacktrace import CallStack | |
| 12 from crash.stacktrace import Stacktrace | |
| 13 from crash.results import MatchResults | 13 from crash.results import MatchResults |
| 14 from crash.scorers.aggregated_scorer import AggregatedScorer | 14 from crash.scorers.aggregated_scorer import AggregatedScorer |
| 15 from crash.scorers.min_distance import MinDistance | 15 from crash.scorers.min_distance import MinDistance |
| 16 from crash.scorers.top_frame_index import TopFrameIndex | 16 from crash.scorers.top_frame_index import TopFrameIndex |
| 17 from crash.stacktrace import CallStack |
| 18 from crash.stacktrace import Stacktrace |
| 19 |
| 20 # TODO(wrengr): make this a namedtuple. |
| 21 class ChangelistClassifier(object): |
| 22 def __init__(self, repository, |
| 23 top_n_frames, top_n_results=3, confidence_threshold=0.999): |
| 24 """Args: |
| 25 repository (Repository): the Git repository for getting CLs to classify. |
| 26 top_n_frames (int): how many frames of each callstack to look at. |
| 27 top_n_results (int): maximum number of results to return. |
| 28 confidence_threshold (float): In [0,1], above which we only return |
| 29 the first result. |
| 30 """ |
| 31 self._repository = repository |
| 32 self.top_n_frames = top_n_frames |
| 33 self.top_n_results = top_n_results |
| 34 self.confidence_threshold = confidence_threshold |
| 35 |
| 36 def __str__(self): # pragma: no cover |
| 37 return ('%s(top_n_frames=%d, top_n_results=%d, confidence_threshold=%g)' |
| 38 % (self.__class__.__name__, |
| 39 self.top_n_frames, |
| 40 self.top_n_results, |
| 41 self.confidence_threshold)) |
| 42 |
| 43 def __call__(self, report): |
| 44 """Finds changelists suspected of being responsible for the crash report. |
| 45 |
| 46 Args: |
| 47 report (CrashReport): the report to be analyzed. |
| 48 |
| 49 Returns: |
| 50 List of Results, sorted by confidence from highest to lowest. |
| 51 """ |
| 52 if not report.regression_range: |
| 53 logging.warning('ChangelistClassifier.__call__: Missing regression range ' |
| 54 'for report: %s', str(report)) |
| 55 return [] |
| 56 last_good_version, first_bad_version = report.regression_range |
| 57 logging.info('ChangelistClassifier.__call__: Regression range %s:%s', |
| 58 last_good_version, first_bad_version) |
| 59 |
| 60 # Restrict analysis to just the top n frames in each callstack. |
| 61 # TODO(wrengr): move this to be a Stacktrace method? |
| 62 stacktrace = Stacktrace([ |
| 63 CallStack(stack.priority, |
| 64 format_type=stack.format_type, |
| 65 language_type=stack.language_type, |
| 66 frame_list=stack[:self.top_n_frames]) |
| 67 for stack in report.stacktrace]) |
| 68 |
| 69 # We are only interested in the deps in crash stack (the callstack that |
| 70 # caused the crash). |
| 71 # TODO(wrengr): we may want to receive the crash deps as an argument, |
| 72 # so that when this method is called via Findit.FindCulprit, we avoid |
| 73 # doing redundant work creating it. |
| 74 stack_deps = GetDepsInCrashStack( |
| 75 report.stacktrace.crash_stack, |
| 76 chrome_dependency_fetcher.ChromeDependencyFetcher(self._repository |
| 77 ).GetDependency(report.crashed_version, report.platform)) |
| 78 |
| 79 # Get dep and file to changelogs, stack_info and blame dicts. |
| 80 regression_deps_rolls = chrome_dependency_fetcher.ChromeDependencyFetcher( |
| 81 self._repository).GetDependencyRollsDict( |
| 82 last_good_version, first_bad_version, report.platform) |
| 83 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps( |
| 84 regression_deps_rolls, stack_deps, self._repository) |
| 85 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps( |
| 86 stacktrace, stack_deps) |
| 87 |
| 88 # TODO: argument order is inconsistent from others. Repository should |
| 89 # be last argument. |
| 90 results = FindMatchResults(dep_to_file_to_changelogs, |
| 91 dep_to_file_to_stack_infos, |
| 92 stack_deps, self._repository, ignore_cls) |
| 93 if not results: |
| 94 return [] |
| 95 |
| 96 # TODO(wrengr): we should be able to do this map/filter/sort in one pass. |
| 97 # Set result.confidence, result.reasons and result.changed_files. |
| 98 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()]) |
| 99 map(aggregated_scorer.Score, results) |
| 100 |
| 101 # Filter all the 0 confidence results. |
| 102 results = filter(lambda r: r.confidence != 0, results) |
| 103 if not results: |
| 104 return [] |
| 105 |
| 106 sorted_results = sorted(results, key=lambda r: -r.confidence) |
| 107 |
| 108 max_results = (1 if sorted_results[0].confidence > self.confidence_threshold |
| 109 else self.top_n_results) |
| 110 |
| 111 return sorted_results[:max_results] |
| 17 | 112 |
| 18 | 113 |
| 19 def GetDepsInCrashStack(crash_stack, crash_deps): | 114 def GetDepsInCrashStack(crash_stack, crash_deps): |
| 20 """Gets Dependencies in crash stack.""" | 115 """Gets Dependencies in crash stack.""" |
| 21 if not crash_stack: | 116 if not crash_stack: |
| 22 return {} | 117 return {} |
| 23 | 118 |
| 24 stack_deps = {} | 119 stack_deps = {} |
| 25 for frame in crash_stack: | 120 for frame in crash_stack: |
| 26 if frame.dep_path: | 121 if frame.dep_path: |
| 27 stack_deps[frame.dep_path] = crash_deps[frame.dep_path] | 122 stack_deps[frame.dep_path] = crash_deps[frame.dep_path] |
| 28 | 123 |
| 29 return stack_deps | 124 return stack_deps |
| 30 | 125 |
| 31 | 126 |
| 32 # TODO(katesonia): Remove the repository argument after refatoring cl committed. | 127 # TODO(katesonia): Remove the repository argument after refatoring cl committed. |
| 33 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps, | 128 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps, |
| 34 repository): | 129 repository): |
| 35 """Gets a dict containing files touched by changelogs for deps in stack_deps. | 130 """Gets a dict containing files touched by changelogs for deps in stack_deps. |
| 36 | 131 |
| 37 Regression ranges for each dep is determined by regression_deps_rolls. | 132 Regression ranges for each dep is determined by regression_deps_rolls. |
| 38 Those changelogs got reverted should be returned in a ignore_cls set. | 133 Changelogs which were reverted are returned in a reverted_cls set. |
| 39 | 134 |
| 40 Args: | 135 Args: |
| 41 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in | 136 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in |
| 42 regression range. | 137 regression range. |
| 43 stack_deps (dict): Represents all the dependencies shown in | 138 stack_deps (dict): Represents all the dependencies shown in |
| 44 the crash stack. | 139 the crash stack. |
| 45 repository (Repository): Repository to get changelogs from. | 140 repository (Repository): Repository to get changelogs from. |
| 46 | 141 |
| 47 Returns: | 142 Returns: |
| 48 A tuple (dep_to_file_to_changelogs, ignore_cls). | 143 A tuple (dep_to_file_to_changelogs, reverted_cls). |
| 49 | 144 |
| 50 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path | 145 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path |
| 51 to ChangeLogs that touched this file. | 146 to ChangeLogs that touched this file. |
| 52 For example: | 147 For example: |
| 53 { | 148 { |
| 54 'src/': { | 149 'src/': { |
| 55 'a.cc': [ | 150 'a.cc': [ |
| 56 ChangeLog.FromDict({ | 151 ChangeLog.FromDict({ |
| 57 'author_name': 'test@chromium.org', | 152 'author_name': 'test@chromium.org', |
| 58 'message': 'dummy', | 153 'message': 'dummy', |
| (...skipping 14 matching lines...) Expand all Loading... |
| 73 'https://repo.test/+/bcfd', | 168 'https://repo.test/+/bcfd', |
| 74 'code_review_url': 'https://codereview.chromium.org/3281', | 169 'code_review_url': 'https://codereview.chromium.org/3281', |
| 75 'committer_name': 'example@chromium.org', | 170 'committer_name': 'example@chromium.org', |
| 76 'revision': 'bcfd', | 171 'revision': 'bcfd', |
| 77 'reverted_revision': None | 172 'reverted_revision': None |
| 78 }), | 173 }), |
| 79 ] | 174 ] |
| 80 } | 175 } |
| 81 } | 176 } |
| 82 | 177 |
| 83 ignore_cls (set): A set of reverted revisions. | 178 reverted_cls (set): A set of reverted revisions. |
| 84 """ | 179 """ |
| 85 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list)) | 180 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list)) |
| 86 ignore_cls = set() | 181 reverted_cls = set() |
| 87 | 182 |
| 88 for dep in stack_deps: | 183 for dep in stack_deps: |
| 89 # If a dep is not in regression range, than it cannot be the dep of | 184 # If a dep is not in regression range, than it cannot be the dep of |
| 90 # culprits. | 185 # culprits. |
| 91 if dep not in regression_deps_rolls: | 186 dep_roll = regression_deps_rolls.get(dep) |
| 187 if not dep_roll: |
| 92 continue | 188 continue |
| 93 | 189 |
| 94 dep_roll = regression_deps_rolls[dep] | 190 dep_roll = regression_deps_rolls[dep] |
| 95 | 191 |
| 96 repository.repo_url = dep_roll.repo_url | 192 repository.repo_url = dep_roll.repo_url |
| 97 changelogs = repository.GetChangeLogs(dep_roll.old_revision, | 193 changelogs = repository.GetChangeLogs(dep_roll.old_revision, |
| 98 dep_roll.new_revision) | 194 dep_roll.new_revision) |
| 99 | 195 |
| 100 for changelog in changelogs: | 196 for changelog in changelogs: |
| 197 # When someone reverts, we need to skip both the CL doing |
| 198 # the reverting as well as the CL that got reverted. If |
| 199 # |reverted_revision| is true, then this CL reverts another one, |
| 200 # so we skip it and save the CL it reverts in |reverted_cls| to |
| 201 # be filtered out later. |
| 101 if changelog.reverted_revision: | 202 if changelog.reverted_revision: |
| 102 # Skip reverting cls and add reverted revisions to ignore_cls to later | 203 reverted_cls.add(changelog.reverted_revision) |
| 103 # filter those reverted revisions. | |
| 104 ignore_cls.add(changelog.reverted_revision) | |
| 105 continue | 204 continue |
| 106 | 205 |
| 107 for touched_file in changelog.touched_files: | 206 for touched_file in changelog.touched_files: |
| 108 if touched_file.change_type == ChangeType.DELETE: | 207 if touched_file.change_type == ChangeType.DELETE: |
| 109 continue | 208 continue |
| 110 | 209 |
| 111 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog) | 210 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog) |
| 112 | 211 |
| 113 return dep_to_file_to_changelogs, ignore_cls | 212 return dep_to_file_to_changelogs, reverted_cls |
| 114 | 213 |
| 115 | 214 |
| 116 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps): | 215 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps): |
| 117 """Gets a dict containing all the stack information of files in stacktrace. | 216 """Gets a dict containing all the stack information of files in stacktrace. |
| 118 | 217 |
| 119 Only gets stack informations for files grouped by deps in stack_deps. | 218 Only gets stack informations for files grouped by deps in stack_deps. |
| 120 | 219 |
| 121 Args: | 220 Args: |
| 122 stacktrace (Stacktrace): Parsed stacktrace object. | 221 stacktrace (Stacktrace): Parsed stacktrace object. |
| 123 stack_deps (dict): Represents all the dependencies show in | 222 stack_deps (dict): Represents all the dependencies show in |
| 124 the crash stack. | 223 the crash stack. |
| 125 | 224 |
| 126 Returns: | 225 Returns: |
| 127 A dict, maps dep path to a dict mapping file path to a list of stack | 226 A dict, maps dep path to a dict mapping file path to a list of stack |
| 128 inforamtion of this file. A file may occur in several frames, one stack info | 227 information of this file. A file may occur in several frames, one |
| 129 consist of a StackFrame and the callstack priority of it. | 228 stack info consist of a StackFrame and the callstack priority of it. |
| 130 | 229 |
| 131 For example: | 230 For example: |
| 132 { | 231 { |
| 133 'src/': { | 232 'src/': { |
| 134 'a.cc': [ | 233 'a.cc': [ |
| 135 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0), | 234 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0), |
| 136 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0), | 235 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0), |
| 137 ] | 236 ] |
| 138 } | 237 } |
| 139 } | 238 } |
| (...skipping 18 matching lines...) Expand all Loading... |
| 158 stack_deps, repository, | 257 stack_deps, repository, |
| 159 ignore_cls=None): | 258 ignore_cls=None): |
| 160 """Finds results by matching stacktrace and changelogs in regression range. | 259 """Finds results by matching stacktrace and changelogs in regression range. |
| 161 | 260 |
| 162 This method only applies to those crashes with regression range. | 261 This method only applies to those crashes with regression range. |
| 163 | 262 |
| 164 Args: | 263 Args: |
| 165 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path | 264 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path |
| 166 to ChangeLogs that touched this file. | 265 to ChangeLogs that touched this file. |
| 167 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path | 266 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path |
| 168 to a list of stack inforamtion of this file. A file may occur in several | 267 to a list of stack information of this file. A file may occur in several |
| 169 frames, one stack info consist of a StackFrame and the callstack priority | 268 frames, one stack info consist of a StackFrame and the callstack priority |
| 170 of it. | 269 of it. |
| 171 stack_deps (dict): Represents all the dependencies shown in the crash stack. | 270 stack_deps (dict): Represents all the dependencies shown in the crash stack. |
| 172 repository (Repository): Repository to get changelogs and blame from. | 271 repository (Repository): Repository to get changelogs and blame from. |
| 173 ignore_cls (set): Set of reverted revisions. | 272 ignore_cls (set): Set of reverted revisions. |
| 174 | 273 |
| 175 Returns: | 274 Returns: |
| 176 A list of MatchResult instances with confidence and reason unset. | 275 A list of MatchResult instances with confidence and reason unset. |
| 177 """ | 276 """ |
| 178 match_results = MatchResults(ignore_cls) | 277 match_results = MatchResults(ignore_cls) |
| 179 | 278 |
| 180 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems(): | 279 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems(): |
| 181 file_to_changelogs = dep_to_file_to_changelogs[dep] | 280 file_to_changelogs = dep_to_file_to_changelogs[dep] |
| 182 repository.repo_url = stack_deps[dep].repo_url | 281 repository.repo_url = stack_deps[dep].repo_url |
| 183 | 282 |
| 184 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems(): | 283 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems(): |
| 185 for touched_file_path, changelogs in file_to_changelogs.iteritems(): | 284 for touched_file_path, changelogs in file_to_changelogs.iteritems(): |
| 186 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path): | 285 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path): |
| 187 continue | 286 continue |
| 188 | 287 |
| 189 blame = repository.GetBlame(crashed_file_path, | 288 blame = repository.GetBlame(crashed_file_path, |
| 190 stack_deps[dep].revision) | 289 stack_deps[dep].revision) |
| 191 | 290 |
| 192 # Generate/update each result(changelog) in changelogs, blame is used | 291 # Generate/update each result(changelog) in changelogs, blame is used |
| 193 # to calculate distance between touched lines and crashed lines in file. | 292 # to calculate distance between touched lines and crashed lines in file. |
| 194 match_results.GenerateMatchResults( | 293 match_results.GenerateMatchResults( |
| 195 crashed_file_path, dep, stack_infos, changelogs, blame) | 294 crashed_file_path, dep, stack_infos, changelogs, blame) |
| 196 | 295 |
| 197 return match_results.values() | 296 return match_results.values() |
| 198 | |
| 199 | |
| 200 # TODO(katesonia): Remove the repository argument after refatoring cl committed. | |
| 201 def FindItForCrash(stacktrace, regression_deps_rolls, crashed_deps, top_n, | |
| 202 repository): | |
| 203 """Finds culprit results for crash. | |
| 204 | |
| 205 Args: | |
| 206 stacktrace (Stactrace): Parsed Stactrace object. | |
| 207 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in | |
| 208 regression range. | |
| 209 crashed_deps (dict of Dependencys): Represents all the dependencies of | |
| 210 crashed revision. | |
| 211 top_n (int): Top n frames of each stack to be analyzed. | |
| 212 repository (Repository): Repository to get changelogs and blame from. | |
| 213 | |
| 214 Returns: | |
| 215 List of Results, sorted by confidence from highest to lowest. | |
| 216 """ | |
| 217 if not regression_deps_rolls: | |
| 218 return [] | |
| 219 | |
| 220 # Findit will only analyze the top n frames in each callstacks. | |
| 221 stack_trace = Stacktrace([ | |
| 222 CallStack(stack.priority, | |
| 223 format_type=stack.format_type, | |
| 224 language_type=stack.language_type, | |
| 225 frame_list=stack[:top_n]) | |
| 226 for stack in stacktrace]) | |
| 227 | |
| 228 # We are only interested in the deps in crash stack (the callstack that | |
| 229 # caused the crash). | |
| 230 stack_deps = GetDepsInCrashStack(stack_trace.crash_stack, crashed_deps) | |
| 231 | |
| 232 # Get dep and file to changelogs, stack_info and blame dicts. | |
| 233 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps( | |
| 234 regression_deps_rolls, stack_deps, repository) | |
| 235 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps( | |
| 236 stack_trace, stack_deps) | |
| 237 | |
| 238 results = FindMatchResults(dep_to_file_to_changelogs, | |
| 239 dep_to_file_to_stack_infos, | |
| 240 stack_deps, repository, ignore_cls) | |
| 241 | |
| 242 if not results: | |
| 243 return [] | |
| 244 | |
| 245 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()]) | |
| 246 | |
| 247 # Set result.confidence, result.reasons and result.changed_files. | |
| 248 map(aggregated_scorer.Score, results) | |
| 249 | |
| 250 # Filter all the 0 confidence results. | |
| 251 results = filter(lambda r: r.confidence != 0, results) | |
| 252 if not results: | |
| 253 return [] | |
| 254 | |
| 255 sorted_results = sorted(results, key=lambda r: -r.confidence) | |
| 256 | |
| 257 if sorted_results[0].confidence > 0.999: | |
| 258 return sorted_results[:1] | |
| 259 | |
| 260 return sorted_results[:3] | |
| OLD | NEW |