Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1106)

Side by Side Diff: appengine/findit/crash/changelist_classifier.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)
Patch Set: Addressing the crash_config.fracas issue Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import logging
5 from collections import defaultdict 6 from collections import defaultdict
6 7
8 from common import chromium_deps
7 from common.diff import ChangeType 9 from common.diff import ChangeType
8 from common.git_repository import GitRepository 10 from common.git_repository import GitRepository
9 from common.http_client_appengine import HttpClientAppengine 11 from common.http_client_appengine import HttpClientAppengine
10 from crash import crash_util 12 from crash import crash_util
11 from crash.stacktrace import CallStack
12 from crash.stacktrace import Stacktrace
13 from crash.results import MatchResults 13 from crash.results import MatchResults
14 from crash.scorers.aggregated_scorer import AggregatedScorer 14 from crash.scorers.aggregated_scorer import AggregatedScorer
15 from crash.scorers.min_distance import MinDistance 15 from crash.scorers.min_distance import MinDistance
16 from crash.scorers.top_frame_index import TopFrameIndex 16 from crash.scorers.top_frame_index import TopFrameIndex
17 from crash.stacktrace import CallStack
18 from crash.stacktrace import Stacktrace
19
20 # TODO(wrengr): make this a namedtuple.
21 class ChangelistClassifier(object):
22 def __init__(self, top_n_frames, top_n_results=3, confidence_threshold=0.999):
23 """Args:
24 top_n_frames (int): how many frames of each callstack to look at.
25 top_n_results (int): maximum number of results to return.
26 confidence_threshold (float): In [0,1], above which we only return
27 the first result.
28 """
29 self.top_n_frames = top_n_frames
30 self.top_n_results = top_n_results
31 self.confidence_threshold = confidence_threshold
32
33 def __str__(self): # pragma: no cover
34 return ('%s(top_n_frames=%d, top_n_results=%d, confidence_threshold=%g)'
35 % (self.__class__.__name__,
36 self.top_n_frames,
37 self.top_n_results,
38 self.confidence_threshold))
39
40 def __call__(self, report):
41 """Finds changelists suspected of being responsible for the crash report.
42
43 Args:
44 report (CrashReport): the report to be analyzed.
45
46 Returns:
47 List of Results, sorted by confidence from highest to lowest.
48 """
49 if not report.regression_range:
50 logging.warning('ChangelistClassifier.__call__: Missing regression range '
51 'for report: %s', str(report))
52 return []
53 last_good_version, first_bad_version = report.regression_range
54 logging.info('ChangelistClassifier.__call__: Regression range %s:%s',
55 last_good_version, first_bad_version)
56
57 # Restrict analysis to just the top n frames in each callstack.
58 # TODO(wrengr): move this to be a Stacktrace method?
59 stacktrace = Stacktrace([
60 CallStack(stack.priority,
61 format_type=stack.format_type,
62 language_type=stack.language_type,
63 frame_list=stack[:self.top_n_frames])
64 for stack in report.stacktrace])
65
66 # We are only interested in the deps in crash stack (the callstack that
67 # caused the crash).
68 # TODO(wrengr): we may want to receive the crash deps as an argument,
69 # so that when this method is called via Findit.FindCulprit, we avoid
70 # doing redundant work creating it.
71 stack_deps = GetDepsInCrashStack(report.stacktrace.crash_stack,
72 chromium_deps.GetChromeDependency(
73 report.crashed_version, report.platform))
74
75 # Get dep and file to changelogs, stack_info and blame dicts.
76 regression_deps_rolls = chromium_deps.GetDEPSRollsDict(
77 last_good_version, first_bad_version, report.platform)
78 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(
79 regression_deps_rolls, stack_deps)
80 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(
81 stacktrace, stack_deps)
82
83 results = FindMatchResults(dep_to_file_to_changelogs,
84 dep_to_file_to_stack_infos,
85 stack_deps, ignore_cls)
86 if not results:
87 return []
88
89 # TODO(wrengr): we should be able to do this map/filter/sort in one pass.
90 # Set result.confidence, result.reasons and result.changed_files.
91 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])
92 map(aggregated_scorer.Score, results)
93
94 # Filter all the 0 confidence results.
95 results = filter(lambda r: r.confidence != 0, results)
96 if not results:
97 return []
98
99 sorted_results = sorted(results, key=lambda r: -r.confidence)
100
101 max_results = (1 if sorted_results[0].confidence > self.confidence_threshold
102 else self.top_n_results)
103
104 return sorted_results[:max_results]
17 105
18 106
19 def GetDepsInCrashStack(crash_stack, crash_deps): 107 def GetDepsInCrashStack(crash_stack, crash_deps):
20 """Gets Dependencies in crash stack.""" 108 """Gets Dependencies in crash stack."""
21 if not crash_stack: 109 if not crash_stack:
22 return {} 110 return {}
23 111
24 stack_deps = {} 112 stack_deps = {}
25 for frame in crash_stack: 113 for frame in crash_stack:
26 if frame.dep_path: 114 if frame.dep_path:
27 stack_deps[frame.dep_path] = crash_deps[frame.dep_path] 115 stack_deps[frame.dep_path] = crash_deps[frame.dep_path]
28 116
29 return stack_deps 117 return stack_deps
30 118
31 119 # TODO(wrengr): come up with a design to clean up these
120 # FooForFilesGroupedByDeps functions.
32 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps): 121 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps):
33 """Gets a dict containing files touched by changelogs for deps in stack_deps. 122 """Gets a dict containing files touched by changelogs for deps in stack_deps.
34 123
35 Regression ranges for each dep is determined by regression_deps_rolls. 124 Regression ranges for each dep is determined by regression_deps_rolls.
36 Those changelogs got reverted should be returned in a ignore_cls set. 125 Changelogs which were reverted are returned in a reverted_cls set.
37 126
38 Args: 127 Args:
39 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in 128 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in
40 regression range. 129 regression range.
41 stack_deps (dict): Represents all the dependencies shown in 130 stack_deps (dict): Represents all the dependencies shown in
42 the crash stack. 131 the crash stack.
43 132
44 Returns: 133 Returns:
45 A tuple (dep_to_file_to_changelogs, ignore_cls). 134 A tuple (dep_to_file_to_changelogs, reverted_cls).
46 135
47 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path 136 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
48 to ChangeLogs that touched this file. 137 to ChangeLogs that touched this file.
49 For example: 138 For example:
50 { 139 {
51 'src/': { 140 'src/': {
52 'a.cc': [ 141 'a.cc': [
53 ChangeLog.FromDict({ 142 ChangeLog.FromDict({
54 'author_name': 'test@chromium.org', 143 'author_name': 'test@chromium.org',
55 'message': 'dummy', 144 'message': 'dummy',
(...skipping 14 matching lines...) Expand all
70 'https://repo.test/+/bcfd', 159 'https://repo.test/+/bcfd',
71 'code_review_url': 'https://codereview.chromium.org/3281', 160 'code_review_url': 'https://codereview.chromium.org/3281',
72 'committer_name': 'example@chromium.org', 161 'committer_name': 'example@chromium.org',
73 'revision': 'bcfd', 162 'revision': 'bcfd',
74 'reverted_revision': None 163 'reverted_revision': None
75 }), 164 }),
76 ] 165 ]
77 } 166 }
78 } 167 }
79 168
80 ignore_cls (set): A set of reverted revisions. 169 reverted_cls (set): A set of reverted revisions.
81 """ 170 """
82 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list)) 171 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list))
83 ignore_cls = set() 172 reverted_cls = set()
84 173
85 for dep in stack_deps: 174 for dep in stack_deps:
86 # If a dep is not in regression range, than it cannot be the dep of 175 # If a dep is not in regression range, than it cannot be the dep of
87 # culprits. 176 # culprits.
88 if dep not in regression_deps_rolls: 177 dep_roll = regression_deps_rolls.get(dep)
178 if not dep_roll:
89 continue 179 continue
90 180
91 dep_roll = regression_deps_rolls[dep]
92
93 git_repository = GitRepository(dep_roll.repo_url, HttpClientAppengine()) 181 git_repository = GitRepository(dep_roll.repo_url, HttpClientAppengine())
94 changelogs = git_repository.GetChangeLogs(dep_roll.old_revision, 182 changelogs = git_repository.GetChangeLogs(dep_roll.old_revision,
95 dep_roll.new_revision) 183 dep_roll.new_revision)
96 184
97 for changelog in changelogs: 185 for changelog in changelogs:
186 # When someone reverts, we need to skip both the CL doing
187 # the reverting as well as the CL that got reverted. If
188 # |reverted_revision| is true, then this CL reverts another one,
189 # so we skip it and save the CL it reverts in |reverted_cls| to
190 # be filtered out later.
98 if changelog.reverted_revision: 191 if changelog.reverted_revision:
99 # Skip reverting cls and add reverted revisions to ignore_cls to later 192 reverted_cls.add(changelog.reverted_revision)
100 # filter those reverted revisions.
101 ignore_cls.add(changelog.reverted_revision)
102 continue 193 continue
103 194
104 for touched_file in changelog.touched_files: 195 for touched_file in changelog.touched_files:
105 if touched_file.change_type == ChangeType.DELETE: 196 if touched_file.change_type == ChangeType.DELETE:
106 continue 197 continue
107 198
108 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog) 199 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog)
109 200
110 return dep_to_file_to_changelogs, ignore_cls 201 return dep_to_file_to_changelogs, reverted_cls
111 202
112 203
113 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps): 204 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps):
114 """Gets a dict containing all the stack information of files in stacktrace. 205 """Gets a dict containing all the stack information of files in stacktrace.
115 206
116 Only gets stack informations for files grouped by deps in stack_deps. 207 Only gets stack informations for files grouped by deps in stack_deps.
117 208
118 Args: 209 Args:
119 stacktrace (Stacktrace): Parsed stacktrace object. 210 stacktrace (Stacktrace): Parsed stacktrace object.
120 stack_deps (dict): Represents all the dependencies show in 211 stack_deps (dict): Represents all the dependencies show in
121 the crash stack. 212 the crash stack.
122 213
123 Returns: 214 Returns:
124 A dict, maps dep path to a dict mapping file path to a list of stack 215 A dict, maps dep path to a dict mapping file path to a list of stack
125 inforamtion of this file. A file may occur in several frames, one stack info 216 information of this file. A file may occur in several frames, one
126 consist of a StackFrame and the callstack priority of it. 217 stack info consist of a StackFrame and the callstack priority of it.
127 218
128 For example: 219 For example:
129 { 220 {
130 'src/': { 221 'src/': {
131 'a.cc': [ 222 'a.cc': [
132 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0), 223 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0),
133 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0), 224 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0),
134 ] 225 ]
135 } 226 }
136 } 227 }
(...skipping 17 matching lines...) Expand all
154 stack_deps, 245 stack_deps,
155 ignore_cls=None): 246 ignore_cls=None):
156 """Finds results by matching stacktrace and changelogs in regression range. 247 """Finds results by matching stacktrace and changelogs in regression range.
157 248
158 This method only applies to those crashes with regression range. 249 This method only applies to those crashes with regression range.
159 250
160 Args: 251 Args:
161 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path 252 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
162 to ChangeLogs that touched this file. 253 to ChangeLogs that touched this file.
163 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path 254 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path
164 to a list of stack inforamtion of this file. A file may occur in several 255 to a list of stack information of this file. A file may occur in several
165 frames, one stack info consist of a StackFrame and the callstack priority 256 frames, one stack info consist of a StackFrame and the callstack priority
166 of it. 257 of it.
167 stack_deps (dict): Represents all the dependencies shown in the crash stack. 258 stack_deps (dict): Represents all the dependencies shown in the crash stack.
168 ignore_cls (set): Set of reverted revisions. 259 ignore_cls (set): Set of reverted revisions.
169 260
170 Returns: 261 Returns:
171 A list of MatchResult instances with confidence and reason unset. 262 A list of MatchResult instances with confidence and reason unset.
172 """ 263 """
173 match_results = MatchResults(ignore_cls) 264 match_results = MatchResults(ignore_cls)
174 265
175 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems(): 266 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems():
176 file_to_changelogs = dep_to_file_to_changelogs[dep] 267 file_to_changelogs = dep_to_file_to_changelogs[dep]
177 git_repository = GitRepository(stack_deps[dep].repo_url, 268 git_repository = GitRepository(stack_deps[dep].repo_url,
178 HttpClientAppengine()) 269 HttpClientAppengine())
179 270
180 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems(): 271 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():
181 for touched_file_path, changelogs in file_to_changelogs.iteritems(): 272 for touched_file_path, changelogs in file_to_changelogs.iteritems():
182 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path): 273 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):
183 continue 274 continue
184 275
185 blame = git_repository.GetBlame(crashed_file_path, 276 blame = git_repository.GetBlame(crashed_file_path,
186 stack_deps[dep].revision) 277 stack_deps[dep].revision)
187 278
188 # Generate/update each result(changelog) in changelogs, blame is used 279 # Generate/update each result(changelog) in changelogs, blame is used
189 # to calculate distance between touched lines and crashed lines in file. 280 # to calculate distance between touched lines and crashed lines in file.
190 match_results.GenerateMatchResults( 281 match_results.GenerateMatchResults(
191 crashed_file_path, dep, stack_infos, changelogs, blame) 282 crashed_file_path, dep, stack_infos, changelogs, blame)
192 283
193 return match_results.values() 284 return match_results.values()
194
195
196 def FindItForCrash(stacktrace, regression_deps_rolls, crashed_deps, top_n):
197 """Finds culprit results for crash.
198
199 Args:
200 stacktrace (Stactrace): Parsed Stactrace object.
201 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in
202 regression range.
203 crashed_deps (dict of Dependencys): Represents all the dependencies of
204 crashed revision.
205 top_n (int): Top n frames of each stack to be analyzed.
206
207 Returns:
208 List of Results, sorted by confidence from highest to lowest.
209 """
210 if not regression_deps_rolls:
211 return []
212
213 # Findit will only analyze the top n frames in each callstacks.
214 stack_trace = Stacktrace([
215 CallStack(stack.priority,
216 format_type=stack.format_type,
217 language_type=stack.language_type,
218 frame_list=stack[:top_n])
219 for stack in stacktrace])
220
221 # We are only interested in the deps in crash stack (the callstack that
222 # caused the crash).
223 stack_deps = GetDepsInCrashStack(stack_trace.crash_stack, crashed_deps)
224
225 # Get dep and file to changelogs, stack_info and blame dicts.
226 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(
227 regression_deps_rolls, stack_deps)
228 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(
229 stack_trace, stack_deps)
230
231 results = FindMatchResults(dep_to_file_to_changelogs,
232 dep_to_file_to_stack_infos,
233 stack_deps, ignore_cls)
234
235 if not results:
236 return []
237
238 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])
239
240 # Set result.confidence, result.reasons and result.changed_files.
241 map(aggregated_scorer.Score, results)
242
243 # Filter all the 0 confidence results.
244 results = filter(lambda r: r.confidence != 0, results)
245 if not results:
246 return []
247
248 sorted_results = sorted(results, key=lambda r: -r.confidence)
249
250 if sorted_results[0].confidence > 0.999:
251 return sorted_results[:1]
252
253 return sorted_results[:3]
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698