Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: appengine/findit/crash/changelist_classifier.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)
Patch Set: more debugging Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import logging
5 from collections import defaultdict 6 from collections import defaultdict
6 7
8 from common import chromium_deps
7 from common.diff import ChangeType 9 from common.diff import ChangeType
8 from common.git_repository import GitRepository 10 from common.git_repository import GitRepository
9 from common.http_client_appengine import HttpClientAppengine 11 from common.http_client_appengine import HttpClientAppengine
10 from crash import crash_util 12 from crash import crash_util
11 from crash.stacktrace import CallStack
12 from crash.stacktrace import Stacktrace
13 from crash.results import MatchResults 13 from crash.results import MatchResults
14 from crash.scorers.aggregated_scorer import AggregatedScorer 14 from crash.scorers.aggregated_scorer import AggregatedScorer
15 from crash.scorers.min_distance import MinDistance 15 from crash.scorers.min_distance import MinDistance
16 from crash.scorers.top_frame_index import TopFrameIndex 16 from crash.scorers.top_frame_index import TopFrameIndex
17 from crash.stacktrace import CallStack
18 from crash.stacktrace import Stacktrace
19
20 class ChangelistClassifier(object):
21 def __init__(self, top_n_frames, top_n_results=3, confidence_threshold=0.999):
22 """Args:
23 top_n_frames (int): how many frames of each callstack to look at.
24 top_n_results (int): maximum number of results to return.
25 confidence_threshold (float): In [0,1], above which we only return
26 the first result.
27 """
28 self.top_n_frames = top_n_frames
29 self.top_n_results = top_n_results
30 self.confidence_threshold = confidence_threshold
31
32 def __call__(self, report):
33 """Finds changelists suspected of being responsible for the crash report.
34
35 Args:
36 report (CrashReport): the report to be analyzed.
37
38 Returns:
39 List of Results, sorted by confidence from highest to lowest.
40 """
41 if not report.regression_range:
42 logging.warning('ChangelistClassifier.__call_: Missing regression range '
43 'for report: %s', str(report))
44 return []
45 last_good_version, first_bad_version = report.regression_range
46 logging.info('ChangelistClassifier.__call__: Regression range %s:%s',
47 last_good_version, first_bad_version)
48
49 # Restrict analysis to just the top n frames in each callstack.
50 # TODO(wrengr): move this to be a Stacktrace method?
51 stacktrace = Stacktrace([
52 CallStack(stack.priority,
53 format_type=stack.format_type,
54 language_type=stack.language_type,
55 frame_list=stack[:self.top_n_frames])
56 for stack in report.stacktrace])
57
58 # We are only interested in the deps in crash stack (the callstack that
59 # caused the crash).
60 # TODO(wrengr): we may want to receive the crash deps as an argument,
61 # so that when this method is called via Findit.FindCulprit, we avoid
62 # doing redundant work creating it.
63 stack_deps = GetDepsInCrashStack(report.stacktrace.crash_stack,
64 chromium_deps.GetChromeDependency(
65 report.crashed_version, report.platform))
66
67 # Get dep and file to changelogs, stack_info and blame dicts.
68 regression_deps_rolls = chromium_deps.GetDEPSRollsDict(
69 last_good_version, first_bad_version, report.platform)
70 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(
71 regression_deps_rolls, stack_deps)
72 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(
73 stacktrace, stack_deps)
74
75 results = FindMatchResults(dep_to_file_to_changelogs,
76 dep_to_file_to_stack_infos,
77 stack_deps, ignore_cls)
78 if not results:
79 return []
80
81 # TODO(wrengr): we should be able to do this map/filter/sort in one pass.
82 # Set result.confidence, result.reasons and result.changed_files.
83 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])
84 map(aggregated_scorer.Score, results)
85
86 # Filter all the 0 confidence results.
87 results = filter(lambda r: r.confidence != 0, results)
88 if not results:
89 return []
90
91 sorted_results = sorted(results, key=lambda r: -r.confidence)
92
93 max_results = (1 if sorted_results[0].confidence > self.confidence_threshold
94 else self.top_n_results)
95
96 return sorted_results[:max_results]
17 97
18 98
19 def GetDepsInCrashStack(crash_stack, crash_deps): 99 def GetDepsInCrashStack(crash_stack, crash_deps):
20 """Gets Dependencies in crash stack.""" 100 """Gets Dependencies in crash stack."""
21 if not crash_stack: 101 if not crash_stack:
22 return {} 102 return {}
23 103
24 stack_deps = {} 104 stack_deps = {}
25 for frame in crash_stack: 105 for frame in crash_stack:
26 if frame.dep_path: 106 if frame.dep_path:
27 stack_deps[frame.dep_path] = crash_deps[frame.dep_path] 107 stack_deps[frame.dep_path] = crash_deps[frame.dep_path]
28 108
29 return stack_deps 109 return stack_deps
30 110
31 111 # TODO(wrengr): come up with a design to clean up these
112 # FooForFilesGroupedByDeps functions.
32 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps): 113 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps):
33 """Gets a dict containing files touched by changelogs for deps in stack_deps. 114 """Gets a dict containing files touched by changelogs for deps in stack_deps.
34 115
35 Regression ranges for each dep is determined by regression_deps_rolls. 116 Regression ranges for each dep is determined by regression_deps_rolls.
36 Those changelogs got reverted should be returned in a ignore_cls set. 117 Changelogs which were reverted are returned in a reverted_cls set.
37 118
38 Args: 119 Args:
39 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in 120 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in
40 regression range. 121 regression range.
41 stack_deps (dict): Represents all the dependencies shown in 122 stack_deps (dict): Represents all the dependencies shown in
42 the crash stack. 123 the crash stack.
43 124
44 Returns: 125 Returns:
45 A tuple (dep_to_file_to_changelogs, ignore_cls). 126 A tuple (dep_to_file_to_changelogs, reverted_cls).
46 127
47 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path 128 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
48 to ChangeLogs that touched this file. 129 to ChangeLogs that touched this file.
49 For example: 130 For example:
50 { 131 {
51 'src/': { 132 'src/': {
52 'a.cc': [ 133 'a.cc': [
53 ChangeLog.FromDict({ 134 ChangeLog.FromDict({
54 'author_name': 'test@chromium.org', 135 'author_name': 'test@chromium.org',
55 'message': 'dummy', 136 'message': 'dummy',
(...skipping 14 matching lines...) Expand all
70 'https://repo.test/+/bcfd', 151 'https://repo.test/+/bcfd',
71 'code_review_url': 'https://codereview.chromium.org/3281', 152 'code_review_url': 'https://codereview.chromium.org/3281',
72 'committer_name': 'example@chromium.org', 153 'committer_name': 'example@chromium.org',
73 'revision': 'bcfd', 154 'revision': 'bcfd',
74 'reverted_revision': None 155 'reverted_revision': None
75 }), 156 }),
76 ] 157 ]
77 } 158 }
78 } 159 }
79 160
80 ignore_cls (set): A set of reverted revisions. 161 reverted_cls (set): A set of reverted revisions.
81 """ 162 """
82 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list)) 163 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list))
83 ignore_cls = set() 164 reverted_cls = set()
84 165
85 for dep in stack_deps: 166 for dep in stack_deps:
86 # If a dep is not in regression range, than it cannot be the dep of 167 # If a dep is not in regression range, than it cannot be the dep of
87 # culprits. 168 # culprits.
88 if dep not in regression_deps_rolls: 169 dep_roll = regression_deps_rolls.get(dep)
170 if not dep_roll:
89 continue 171 continue
90 172
91 dep_roll = regression_deps_rolls[dep]
92
93 git_repository = GitRepository(dep_roll.repo_url, HttpClientAppengine()) 173 git_repository = GitRepository(dep_roll.repo_url, HttpClientAppengine())
94 changelogs = git_repository.GetChangeLogs(dep_roll.old_revision, 174 changelogs = git_repository.GetChangeLogs(dep_roll.old_revision,
95 dep_roll.new_revision) 175 dep_roll.new_revision)
96 176
97 for changelog in changelogs: 177 for changelog in changelogs:
178 # When someone reverts, we need to skip both the CL doing
179 # the reverting as well as the CL that got reverted. If
180 # |reverted_revision| is true, then this CL reverts another one,
181 # so we skip it and save the CL it reverts in |reverted_cls| to
182 # be filtered out later.
98 if changelog.reverted_revision: 183 if changelog.reverted_revision:
99 # Skip reverting cls and add reverted revisions to ignore_cls to later 184 reverted_cls.add(changelog.reverted_revision)
100 # filter those reverted revisions.
101 ignore_cls.add(changelog.reverted_revision)
102 continue 185 continue
103 186
104 for touched_file in changelog.touched_files: 187 for touched_file in changelog.touched_files:
105 if touched_file.change_type == ChangeType.DELETE: 188 if touched_file.change_type == ChangeType.DELETE:
106 continue 189 continue
107 190
108 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog) 191 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog)
109 192
110 return dep_to_file_to_changelogs, ignore_cls 193 return dep_to_file_to_changelogs, reverted_cls
111 194
112 195
113 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps): 196 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps):
114 """Gets a dict containing all the stack information of files in stacktrace. 197 """Gets a dict containing all the stack information of files in stacktrace.
115 198
116 Only gets stack informations for files grouped by deps in stack_deps. 199 Only gets stack informations for files grouped by deps in stack_deps.
117 200
118 Args: 201 Args:
119 stacktrace (Stacktrace): Parsed stacktrace object. 202 stacktrace (Stacktrace): Parsed stacktrace object.
120 stack_deps (dict): Represents all the dependencies show in 203 stack_deps (dict): Represents all the dependencies show in
121 the crash stack. 204 the crash stack.
122 205
123 Returns: 206 Returns:
124 A dict, maps dep path to a dict mapping file path to a list of stack 207 A dict, maps dep path to a dict mapping file path to a list of stack
125 inforamtion of this file. A file may occur in several frames, one stack info 208 information of this file. A file may occur in several frames, one
126 consist of a StackFrame and the callstack priority of it. 209 stack info consist of a StackFrame and the callstack priority of it.
127 210
128 For example: 211 For example:
129 { 212 {
130 'src/': { 213 'src/': {
131 'a.cc': [ 214 'a.cc': [
132 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0), 215 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0),
133 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0), 216 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0),
134 ] 217 ]
135 } 218 }
136 } 219 }
(...skipping 17 matching lines...) Expand all
154 stack_deps, 237 stack_deps,
155 ignore_cls=None): 238 ignore_cls=None):
156 """Finds results by matching stacktrace and changelogs in regression range. 239 """Finds results by matching stacktrace and changelogs in regression range.
157 240
158 This method only applies to those crashes with regression range. 241 This method only applies to those crashes with regression range.
159 242
160 Args: 243 Args:
161 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path 244 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
162 to ChangeLogs that touched this file. 245 to ChangeLogs that touched this file.
163 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path 246 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path
164 to a list of stack inforamtion of this file. A file may occur in several 247 to a list of stack information of this file. A file may occur in several
165 frames, one stack info consist of a StackFrame and the callstack priority 248 frames, one stack info consist of a StackFrame and the callstack priority
166 of it. 249 of it.
167 stack_deps (dict): Represents all the dependencies shown in the crash stack. 250 stack_deps (dict): Represents all the dependencies shown in the crash stack.
168 ignore_cls (set): Set of reverted revisions. 251 ignore_cls (set): Set of reverted revisions.
169 252
170 Returns: 253 Returns:
171 A list of MatchResult instances with confidence and reason unset. 254 A list of MatchResult instances with confidence and reason unset.
172 """ 255 """
173 match_results = MatchResults(ignore_cls) 256 match_results = MatchResults(ignore_cls)
174 257
175 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems(): 258 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems():
176 file_to_changelogs = dep_to_file_to_changelogs[dep] 259 file_to_changelogs = dep_to_file_to_changelogs[dep]
177 git_repository = GitRepository(stack_deps[dep].repo_url, 260 git_repository = GitRepository(stack_deps[dep].repo_url,
178 HttpClientAppengine()) 261 HttpClientAppengine())
179 262
180 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems(): 263 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():
181 for touched_file_path, changelogs in file_to_changelogs.iteritems(): 264 for touched_file_path, changelogs in file_to_changelogs.iteritems():
182 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path): 265 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):
183 continue 266 continue
184 267
185 blame = git_repository.GetBlame(crashed_file_path, 268 blame = git_repository.GetBlame(crashed_file_path,
186 stack_deps[dep].revision) 269 stack_deps[dep].revision)
187 270
188 # Generate/update each result(changelog) in changelogs, blame is used 271 # Generate/update each result(changelog) in changelogs, blame is used
189 # to calculate distance between touched lines and crashed lines in file. 272 # to calculate distance between touched lines and crashed lines in file.
190 match_results.GenerateMatchResults( 273 match_results.GenerateMatchResults(
191 crashed_file_path, dep, stack_infos, changelogs, blame) 274 crashed_file_path, dep, stack_infos, changelogs, blame)
192 275
193 return match_results.values() 276 return match_results.values()
194
195
196 def FindItForCrash(stacktrace, regression_deps_rolls, crashed_deps, top_n):
197 """Finds culprit results for crash.
198
199 Args:
200 stacktrace (Stactrace): Parsed Stactrace object.
201 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in
202 regression range.
203 crashed_deps (dict of Dependencys): Represents all the dependencies of
204 crashed revision.
205 top_n (int): Top n frames of each stack to be analyzed.
206
207 Returns:
208 List of Results, sorted by confidence from highest to lowest.
209 """
210 if not regression_deps_rolls:
211 return []
212
213 # Findit will only analyze the top n frames in each callstacks.
214 stack_trace = Stacktrace([
215 CallStack(stack.priority,
216 format_type=stack.format_type,
217 language_type=stack.language_type,
218 frame_list=stack[:top_n])
219 for stack in stacktrace])
220
221 # We are only interested in the deps in crash stack (the callstack that
222 # caused the crash).
223 stack_deps = GetDepsInCrashStack(stack_trace.crash_stack, crashed_deps)
224
225 # Get dep and file to changelogs, stack_info and blame dicts.
226 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(
227 regression_deps_rolls, stack_deps)
228 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(
229 stack_trace, stack_deps)
230
231 results = FindMatchResults(dep_to_file_to_changelogs,
232 dep_to_file_to_stack_infos,
233 stack_deps, ignore_cls)
234
235 if not results:
236 return []
237
238 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])
239
240 # Set result.confidence, result.reasons and result.changed_files.
241 map(aggregated_scorer.Score, results)
242
243 # Filter all the 0 confidence results.
244 results = filter(lambda r: r.confidence != 0, results)
245 if not results:
246 return []
247
248 sorted_results = sorted(results, key=lambda r: -r.confidence)
249
250 if sorted_results[0].confidence > 0.999:
251 return sorted_results[:1]
252
253 return sorted_results[:3]
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698