Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(122)

Side by Side Diff: appengine/findit/crash/changelist_classifier.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)
Patch Set: Finally fixed the mock tests! Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « appengine/findit/crash/azalea.py ('k') | appengine/findit/crash/classifier.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import logging
5 from collections import defaultdict 6 from collections import defaultdict
6 7
8 from common import chrome_dependency_fetcher
7 from common.diff import ChangeType 9 from common.diff import ChangeType
8 from common.git_repository import GitRepository 10 from common.git_repository import GitRepository
9 from common.http_client_appengine import HttpClientAppengine 11 from common.http_client_appengine import HttpClientAppengine
10 from crash import crash_util 12 from crash import crash_util
11 from crash.stacktrace import CallStack
12 from crash.stacktrace import Stacktrace
13 from crash.results import MatchResults 13 from crash.results import MatchResults
14 from crash.scorers.aggregated_scorer import AggregatedScorer 14 from crash.scorers.aggregated_scorer import AggregatedScorer
15 from crash.scorers.min_distance import MinDistance 15 from crash.scorers.min_distance import MinDistance
16 from crash.scorers.top_frame_index import TopFrameIndex 16 from crash.scorers.top_frame_index import TopFrameIndex
17 from crash.stacktrace import CallStack
18 from crash.stacktrace import Stacktrace
19
20 # TODO(wrengr): make this a namedtuple.
21 class ChangelistClassifier(object):
22 def __init__(self, repository,
23 top_n_frames, top_n_results=3, confidence_threshold=0.999):
24 """Args:
25 repository (Repository): the Git repository for getting CLs to classify.
26 top_n_frames (int): how many frames of each callstack to look at.
27 top_n_results (int): maximum number of results to return.
28 confidence_threshold (float): In [0,1], above which we only return
29 the first result.
30 """
31 self._repository = repository
32 self.top_n_frames = top_n_frames
33 self.top_n_results = top_n_results
34 self.confidence_threshold = confidence_threshold
35
36 def __str__(self): # pragma: no cover
37 return ('%s(top_n_frames=%d, top_n_results=%d, confidence_threshold=%g)'
38 % (self.__class__.__name__,
39 self.top_n_frames,
40 self.top_n_results,
41 self.confidence_threshold))
42
43 def __call__(self, report):
44 """Finds changelists suspected of being responsible for the crash report.
45
46 Args:
47 report (CrashReport): the report to be analyzed.
48
49 Returns:
50 List of Results, sorted by confidence from highest to lowest.
51 """
52 if not report.regression_range:
53 logging.warning('ChangelistClassifier.__call__: Missing regression range '
54 'for report: %s', str(report))
55 return []
56 last_good_version, first_bad_version = report.regression_range
57 logging.info('ChangelistClassifier.__call__: Regression range %s:%s',
58 last_good_version, first_bad_version)
59
60 # Restrict analysis to just the top n frames in each callstack.
61 # TODO(wrengr): move this to be a Stacktrace method?
62 stacktrace = Stacktrace([
63 CallStack(stack.priority,
64 format_type=stack.format_type,
65 language_type=stack.language_type,
66 frame_list=stack[:self.top_n_frames])
67 for stack in report.stacktrace])
68
69 # We are only interested in the deps in crash stack (the callstack that
70 # caused the crash).
71 # TODO(wrengr): we may want to receive the crash deps as an argument,
72 # so that when this method is called via Findit.FindCulprit, we avoid
73 # doing redundant work creating it.
74 stack_deps = GetDepsInCrashStack(
75 report.stacktrace.crash_stack,
76 chrome_dependency_fetcher.ChromeDependencyFetcher(self._repository
77 ).GetDependency(report.crashed_version, report.platform))
78
79 # Get dep and file to changelogs, stack_info and blame dicts.
80 regression_deps_rolls = chrome_dependency_fetcher.ChromeDependencyFetcher(
81 self._repository).GetDependencyRollsDict(
82 last_good_version, first_bad_version, report.platform)
83 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(
84 regression_deps_rolls, stack_deps, self._repository)
85 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(
86 stacktrace, stack_deps)
87
88 # TODO: argument order is inconsistent from others. Repository should
89 # be last argument.
90 results = FindMatchResults(dep_to_file_to_changelogs,
91 dep_to_file_to_stack_infos,
92 stack_deps, self._repository, ignore_cls)
93 if not results:
94 return []
95
96 # TODO(wrengr): we should be able to do this map/filter/sort in one pass.
97 # Set result.confidence, result.reasons and result.changed_files.
98 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])
99 map(aggregated_scorer.Score, results)
100
101 # Filter all the 0 confidence results.
102 results = filter(lambda r: r.confidence != 0, results)
103 if not results:
104 return []
105
106 sorted_results = sorted(results, key=lambda r: -r.confidence)
107
108 max_results = (1 if sorted_results[0].confidence > self.confidence_threshold
109 else self.top_n_results)
110
111 return sorted_results[:max_results]
17 112
18 113
19 def GetDepsInCrashStack(crash_stack, crash_deps): 114 def GetDepsInCrashStack(crash_stack, crash_deps):
20 """Gets Dependencies in crash stack.""" 115 """Gets Dependencies in crash stack."""
21 if not crash_stack: 116 if not crash_stack:
22 return {} 117 return {}
23 118
24 stack_deps = {} 119 stack_deps = {}
25 for frame in crash_stack: 120 for frame in crash_stack:
26 if frame.dep_path: 121 if frame.dep_path:
27 stack_deps[frame.dep_path] = crash_deps[frame.dep_path] 122 stack_deps[frame.dep_path] = crash_deps[frame.dep_path]
28 123
29 return stack_deps 124 return stack_deps
30 125
31 126
32 # TODO(katesonia): Remove the repository argument after refatoring cl committed. 127 # TODO(katesonia): Remove the repository argument after refatoring cl committed.
33 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps, 128 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps,
34 repository): 129 repository):
35 """Gets a dict containing files touched by changelogs for deps in stack_deps. 130 """Gets a dict containing files touched by changelogs for deps in stack_deps.
36 131
37 Regression ranges for each dep is determined by regression_deps_rolls. 132 Regression ranges for each dep is determined by regression_deps_rolls.
38 Those changelogs got reverted should be returned in a ignore_cls set. 133 Changelogs which were reverted are returned in a reverted_cls set.
39 134
40 Args: 135 Args:
41 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in 136 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in
42 regression range. 137 regression range.
43 stack_deps (dict): Represents all the dependencies shown in 138 stack_deps (dict): Represents all the dependencies shown in
44 the crash stack. 139 the crash stack.
45 repository (Repository): Repository to get changelogs from. 140 repository (Repository): Repository to get changelogs from.
46 141
47 Returns: 142 Returns:
48 A tuple (dep_to_file_to_changelogs, ignore_cls). 143 A tuple (dep_to_file_to_changelogs, reverted_cls).
49 144
50 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path 145 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
51 to ChangeLogs that touched this file. 146 to ChangeLogs that touched this file.
52 For example: 147 For example:
53 { 148 {
54 'src/': { 149 'src/': {
55 'a.cc': [ 150 'a.cc': [
56 ChangeLog.FromDict({ 151 ChangeLog.FromDict({
57 'author_name': 'test@chromium.org', 152 'author_name': 'test@chromium.org',
58 'message': 'dummy', 153 'message': 'dummy',
(...skipping 14 matching lines...) Expand all
73 'https://repo.test/+/bcfd', 168 'https://repo.test/+/bcfd',
74 'code_review_url': 'https://codereview.chromium.org/3281', 169 'code_review_url': 'https://codereview.chromium.org/3281',
75 'committer_name': 'example@chromium.org', 170 'committer_name': 'example@chromium.org',
76 'revision': 'bcfd', 171 'revision': 'bcfd',
77 'reverted_revision': None 172 'reverted_revision': None
78 }), 173 }),
79 ] 174 ]
80 } 175 }
81 } 176 }
82 177
83 ignore_cls (set): A set of reverted revisions. 178 reverted_cls (set): A set of reverted revisions.
84 """ 179 """
85 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list)) 180 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list))
86 ignore_cls = set() 181 reverted_cls = set()
87 182
88 for dep in stack_deps: 183 for dep in stack_deps:
89 # If a dep is not in regression range, than it cannot be the dep of 184 # If a dep is not in regression range, than it cannot be the dep of
90 # culprits. 185 # culprits.
91 if dep not in regression_deps_rolls: 186 dep_roll = regression_deps_rolls.get(dep)
187 if not dep_roll:
92 continue 188 continue
93 189
94 dep_roll = regression_deps_rolls[dep] 190 dep_roll = regression_deps_rolls[dep]
95 191
96 repository.repo_url = dep_roll.repo_url 192 repository.repo_url = dep_roll.repo_url
97 changelogs = repository.GetChangeLogs(dep_roll.old_revision, 193 changelogs = repository.GetChangeLogs(dep_roll.old_revision,
98 dep_roll.new_revision) 194 dep_roll.new_revision)
99 195
100 for changelog in changelogs: 196 for changelog in changelogs:
197 # When someone reverts, we need to skip both the CL doing
198 # the reverting as well as the CL that got reverted. If
199 # |reverted_revision| is true, then this CL reverts another one,
200 # so we skip it and save the CL it reverts in |reverted_cls| to
201 # be filtered out later.
101 if changelog.reverted_revision: 202 if changelog.reverted_revision:
102 # Skip reverting cls and add reverted revisions to ignore_cls to later 203 reverted_cls.add(changelog.reverted_revision)
103 # filter those reverted revisions.
104 ignore_cls.add(changelog.reverted_revision)
105 continue 204 continue
106 205
107 for touched_file in changelog.touched_files: 206 for touched_file in changelog.touched_files:
108 if touched_file.change_type == ChangeType.DELETE: 207 if touched_file.change_type == ChangeType.DELETE:
109 continue 208 continue
110 209
111 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog) 210 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog)
112 211
113 return dep_to_file_to_changelogs, ignore_cls 212 return dep_to_file_to_changelogs, reverted_cls
114 213
115 214
116 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps): 215 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps):
117 """Gets a dict containing all the stack information of files in stacktrace. 216 """Gets a dict containing all the stack information of files in stacktrace.
118 217
119 Only gets stack informations for files grouped by deps in stack_deps. 218 Only gets stack informations for files grouped by deps in stack_deps.
120 219
121 Args: 220 Args:
122 stacktrace (Stacktrace): Parsed stacktrace object. 221 stacktrace (Stacktrace): Parsed stacktrace object.
123 stack_deps (dict): Represents all the dependencies show in 222 stack_deps (dict): Represents all the dependencies show in
124 the crash stack. 223 the crash stack.
125 224
126 Returns: 225 Returns:
127 A dict, maps dep path to a dict mapping file path to a list of stack 226 A dict, maps dep path to a dict mapping file path to a list of stack
128 inforamtion of this file. A file may occur in several frames, one stack info 227 information of this file. A file may occur in several frames, one
129 consist of a StackFrame and the callstack priority of it. 228 stack info consist of a StackFrame and the callstack priority of it.
130 229
131 For example: 230 For example:
132 { 231 {
133 'src/': { 232 'src/': {
134 'a.cc': [ 233 'a.cc': [
135 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0), 234 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0),
136 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0), 235 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0),
137 ] 236 ]
138 } 237 }
139 } 238 }
(...skipping 18 matching lines...) Expand all
158 stack_deps, repository, 257 stack_deps, repository,
159 ignore_cls=None): 258 ignore_cls=None):
160 """Finds results by matching stacktrace and changelogs in regression range. 259 """Finds results by matching stacktrace and changelogs in regression range.
161 260
162 This method only applies to those crashes with regression range. 261 This method only applies to those crashes with regression range.
163 262
164 Args: 263 Args:
165 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path 264 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
166 to ChangeLogs that touched this file. 265 to ChangeLogs that touched this file.
167 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path 266 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path
168 to a list of stack inforamtion of this file. A file may occur in several 267 to a list of stack information of this file. A file may occur in several
169 frames, one stack info consist of a StackFrame and the callstack priority 268 frames, one stack info consist of a StackFrame and the callstack priority
170 of it. 269 of it.
171 stack_deps (dict): Represents all the dependencies shown in the crash stack. 270 stack_deps (dict): Represents all the dependencies shown in the crash stack.
172 repository (Repository): Repository to get changelogs and blame from. 271 repository (Repository): Repository to get changelogs and blame from.
173 ignore_cls (set): Set of reverted revisions. 272 ignore_cls (set): Set of reverted revisions.
174 273
175 Returns: 274 Returns:
176 A list of MatchResult instances with confidence and reason unset. 275 A list of MatchResult instances with confidence and reason unset.
177 """ 276 """
178 match_results = MatchResults(ignore_cls) 277 match_results = MatchResults(ignore_cls)
179 278
180 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems(): 279 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems():
181 file_to_changelogs = dep_to_file_to_changelogs[dep] 280 file_to_changelogs = dep_to_file_to_changelogs[dep]
182 repository.repo_url = stack_deps[dep].repo_url 281 repository.repo_url = stack_deps[dep].repo_url
183 282
184 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems(): 283 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():
185 for touched_file_path, changelogs in file_to_changelogs.iteritems(): 284 for touched_file_path, changelogs in file_to_changelogs.iteritems():
186 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path): 285 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):
187 continue 286 continue
188 287
189 blame = repository.GetBlame(crashed_file_path, 288 blame = repository.GetBlame(crashed_file_path,
190 stack_deps[dep].revision) 289 stack_deps[dep].revision)
191 290
192 # Generate/update each result(changelog) in changelogs, blame is used 291 # Generate/update each result(changelog) in changelogs, blame is used
193 # to calculate distance between touched lines and crashed lines in file. 292 # to calculate distance between touched lines and crashed lines in file.
194 match_results.GenerateMatchResults( 293 match_results.GenerateMatchResults(
195 crashed_file_path, dep, stack_infos, changelogs, blame) 294 crashed_file_path, dep, stack_infos, changelogs, blame)
196 295
197 return match_results.values() 296 return match_results.values()
198
199
200 # TODO(katesonia): Remove the repository argument after refatoring cl committed.
201 def FindItForCrash(stacktrace, regression_deps_rolls, crashed_deps, top_n,
202 repository):
203 """Finds culprit results for crash.
204
205 Args:
206 stacktrace (Stactrace): Parsed Stactrace object.
207 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in
208 regression range.
209 crashed_deps (dict of Dependencys): Represents all the dependencies of
210 crashed revision.
211 top_n (int): Top n frames of each stack to be analyzed.
212 repository (Repository): Repository to get changelogs and blame from.
213
214 Returns:
215 List of Results, sorted by confidence from highest to lowest.
216 """
217 if not regression_deps_rolls:
218 return []
219
220 # Findit will only analyze the top n frames in each callstacks.
221 stack_trace = Stacktrace([
222 CallStack(stack.priority,
223 format_type=stack.format_type,
224 language_type=stack.language_type,
225 frame_list=stack[:top_n])
226 for stack in stacktrace])
227
228 # We are only interested in the deps in crash stack (the callstack that
229 # caused the crash).
230 stack_deps = GetDepsInCrashStack(stack_trace.crash_stack, crashed_deps)
231
232 # Get dep and file to changelogs, stack_info and blame dicts.
233 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(
234 regression_deps_rolls, stack_deps, repository)
235 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(
236 stack_trace, stack_deps)
237
238 results = FindMatchResults(dep_to_file_to_changelogs,
239 dep_to_file_to_stack_infos,
240 stack_deps, repository, ignore_cls)
241
242 if not results:
243 return []
244
245 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])
246
247 # Set result.confidence, result.reasons and result.changed_files.
248 map(aggregated_scorer.Score, results)
249
250 # Filter all the 0 confidence results.
251 results = filter(lambda r: r.confidence != 0, results)
252 if not results:
253 return []
254
255 sorted_results = sorted(results, key=lambda r: -r.confidence)
256
257 if sorted_results[0].confidence > 0.999:
258 return sorted_results[:1]
259
260 return sorted_results[:3]
OLDNEW
« no previous file with comments | « appengine/findit/crash/azalea.py ('k') | appengine/findit/crash/classifier.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698