Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(46)

Side by Side Diff: appengine/findit/crash/changelist_classifier.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)
Patch Set: trying to fix some tests Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import logging
5 from collections import defaultdict 6 from collections import defaultdict
6 7
8 from common import chromium_deps
7 from common.diff import ChangeType 9 from common.diff import ChangeType
8 from common.git_repository import GitRepository 10 from common.git_repository import GitRepository
9 from common.http_client_appengine import HttpClientAppengine 11 from common.http_client_appengine import HttpClientAppengine
10 from crash import crash_util 12 from crash import crash_util
11 from crash.stacktrace import CallStack
12 from crash.stacktrace import Stacktrace
13 from crash.results import MatchResults 13 from crash.results import MatchResults
14 from crash.scorers.aggregated_scorer import AggregatedScorer 14 from crash.scorers.aggregated_scorer import AggregatedScorer
15 from crash.scorers.min_distance import MinDistance 15 from crash.scorers.min_distance import MinDistance
16 from crash.scorers.top_frame_index import TopFrameIndex 16 from crash.scorers.top_frame_index import TopFrameIndex
17 from crash.stacktrace import CallStack
18 from crash.stacktrace import Stacktrace
19
20 class ChangelistClassifier(object):
21 def __init__(self, top_n_frames, top_n_results=3, confidence_threshold=0.999):
22 """Args:
23 top_n_frames (int): how many frames of each callstack to look at.
24 top_n_results (int): maximum number of results to return.
25 confidence_threshold (float): In [0,1], above which we only return
26 the first result.
27 """
28 self.top_n_frames = top_n_frames
29 self.top_n_results = top_n_results
30 self.confidence_threshold = confidence_threshold
31
32 def __call__(self, report):
33 """Finds changelists suspected of being responsible for the crash report.
34
35 Args:
36 report (CrashReport): the report to be analyzed.
37
38 Returns:
39 List of Results, sorted by confidence from highest to lowest.
40 """
41 if not report.regression_range:
42 return []
43 last_good_version, first_bad_version = report.regression_range
44 logging.info('Regression range %s:%s', last_good_version, first_bad_version)
45
46 # Restrict analysis to just the top n frames in each callstack.
47 # TODO(wrengr): move this to be a Stacktrace method?
48 stacktrace = Stacktrace([
49 CallStack(stack.priority,
50 format_type=stack.format_type,
51 language_type=stack.language_type,
52 frame_list=stack[:self.top_n_frames])
53 for stack in report.stacktrace])
54
55 # We are only interested in the deps in crash stack (the callstack that
56 # caused the crash).
57 # TODO(wrengr): we may want to receive the crash deps as an argument,
58 # so that when this method is called via Findit.FindCulprit, we avoid
59 # doing redundant work creating it.
60 stack_deps = GetDepsInCrashStack(report.stacktrace.crash_stack,
61 chromium_deps.GetChromeDependency(
62 report.crashed_version, report.platform))
63
64 # Get dep and file to changelogs, stack_info and blame dicts.
65 regression_deps_rolls = chromium_deps.GetDEPSRollsDict(
66 last_good_version, first_bad_version, report.platform)
67 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(
68 regression_deps_rolls, stack_deps)
69 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(
70 stacktrace, stack_deps)
71
72 results = FindMatchResults(dep_to_file_to_changelogs,
73 dep_to_file_to_stack_infos,
74 stack_deps, ignore_cls)
75 if not results:
76 return []
77
78 # TODO(wrengr): we should be able to do this map/filter/sort in one pass.
79 # Set result.confidence, result.reasons and result.changed_files.
80 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])
81 map(aggregated_scorer.Score, results)
82
83 # Filter all the 0 confidence results.
84 results = filter(lambda r: r.confidence != 0, results)
85 if not results:
86 return []
87
88 sorted_results = sorted(results, key=lambda r: -r.confidence)
89
90 max_results = (1 if sorted_results[0].confidence > self.confidence_threshold
91 else self.top_n_results)
92
93 return sorted_results[:max_results]
17 94
18 95
19 def GetDepsInCrashStack(crash_stack, crash_deps): 96 def GetDepsInCrashStack(crash_stack, crash_deps):
20 """Gets Dependencies in crash stack.""" 97 """Gets Dependencies in crash stack."""
21 if not crash_stack: 98 if not crash_stack:
22 return {} 99 return {}
23 100
24 stack_deps = {} 101 stack_deps = {}
25 for frame in crash_stack: 102 for frame in crash_stack:
26 if frame.dep_path: 103 if frame.dep_path:
27 stack_deps[frame.dep_path] = crash_deps[frame.dep_path] 104 stack_deps[frame.dep_path] = crash_deps[frame.dep_path]
28 105
29 return stack_deps 106 return stack_deps
30 107
31 108 # TODO(wrengr): come up with a design to clean up these
109 # FooForFilesGroupedByDeps functions.
32 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps): 110 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps):
33 """Gets a dict containing files touched by changelogs for deps in stack_deps. 111 """Gets a dict containing files touched by changelogs for deps in stack_deps.
34 112
35 Regression ranges for each dep is determined by regression_deps_rolls. 113 Regression ranges for each dep is determined by regression_deps_rolls.
36 Those changelogs got reverted should be returned in a ignore_cls set. 114 Changelogs which were reverted are returned in a reverted_cls set.
37 115
38 Args: 116 Args:
39 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in 117 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in
40 regression range. 118 regression range.
41 stack_deps (dict): Represents all the dependencies shown in 119 stack_deps (dict): Represents all the dependencies shown in
42 the crash stack. 120 the crash stack.
43 121
44 Returns: 122 Returns:
45 A tuple (dep_to_file_to_changelogs, ignore_cls). 123 A tuple (dep_to_file_to_changelogs, reverted_cls).
46 124
47 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path 125 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
48 to ChangeLogs that touched this file. 126 to ChangeLogs that touched this file.
49 For example: 127 For example:
50 { 128 {
51 'src/': { 129 'src/': {
52 'a.cc': [ 130 'a.cc': [
53 ChangeLog.FromDict({ 131 ChangeLog.FromDict({
54 'author_name': 'test@chromium.org', 132 'author_name': 'test@chromium.org',
55 'message': 'dummy', 133 'message': 'dummy',
(...skipping 14 matching lines...) Expand all
70 'https://repo.test/+/bcfd', 148 'https://repo.test/+/bcfd',
71 'code_review_url': 'https://codereview.chromium.org/3281', 149 'code_review_url': 'https://codereview.chromium.org/3281',
72 'committer_name': 'example@chromium.org', 150 'committer_name': 'example@chromium.org',
73 'revision': 'bcfd', 151 'revision': 'bcfd',
74 'reverted_revision': None 152 'reverted_revision': None
75 }), 153 }),
76 ] 154 ]
77 } 155 }
78 } 156 }
79 157
80 ignore_cls (set): A set of reverted revisions. 158 reverted_cls (set): A set of reverted revisions.
81 """ 159 """
82 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list)) 160 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list))
83 ignore_cls = set() 161 reverted_cls = set()
84 162
85 for dep in stack_deps: 163 for dep in stack_deps:
86 # If a dep is not in regression range, than it cannot be the dep of 164 # If a dep is not in regression range, than it cannot be the dep of
87 # culprits. 165 # culprits.
88 if dep not in regression_deps_rolls: 166 dep_roll = regression_deps_rolls.get(dep)
167 if not dep_roll:
89 continue 168 continue
90 169
91 dep_roll = regression_deps_rolls[dep]
92
93 git_repository = GitRepository(dep_roll.repo_url, HttpClientAppengine()) 170 git_repository = GitRepository(dep_roll.repo_url, HttpClientAppengine())
94 changelogs = git_repository.GetChangeLogs(dep_roll.old_revision, 171 changelogs = git_repository.GetChangeLogs(dep_roll.old_revision,
95 dep_roll.new_revision) 172 dep_roll.new_revision)
96 173
97 for changelog in changelogs: 174 for changelog in changelogs:
175 # When someone reverts, we need to skip both the CL doing
176 # the reverting as well as the CL that got reverted. If
177 # |reverted_revision| is true, then this CL reverts another one,
178 # so we skip it and save the CL it reverts in |reverted_cls| to
179 # be filtered out later.
98 if changelog.reverted_revision: 180 if changelog.reverted_revision:
99 # Skip reverting cls and add reverted revisions to ignore_cls to later 181 reverted_cls.add(changelog.reverted_revision)
100 # filter those reverted revisions.
101 ignore_cls.add(changelog.reverted_revision)
102 continue 182 continue
103 183
104 for touched_file in changelog.touched_files: 184 for touched_file in changelog.touched_files:
105 if touched_file.change_type == ChangeType.DELETE: 185 if touched_file.change_type == ChangeType.DELETE:
106 continue 186 continue
107 187
108 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog) 188 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog)
109 189
110 return dep_to_file_to_changelogs, ignore_cls 190 return dep_to_file_to_changelogs, reverted_cls
111 191
112 192
113 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps): 193 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps):
114 """Gets a dict containing all the stack information of files in stacktrace. 194 """Gets a dict containing all the stack information of files in stacktrace.
115 195
116 Only gets stack informations for files grouped by deps in stack_deps. 196 Only gets stack informations for files grouped by deps in stack_deps.
117 197
118 Args: 198 Args:
119 stacktrace (Stacktrace): Parsed stacktrace object. 199 stacktrace (Stacktrace): Parsed stacktrace object.
120 stack_deps (dict): Represents all the dependencies show in 200 stack_deps (dict): Represents all the dependencies show in
121 the crash stack. 201 the crash stack.
122 202
123 Returns: 203 Returns:
124 A dict, maps dep path to a dict mapping file path to a list of stack 204 A dict, maps dep path to a dict mapping file path to a list of stack
125 inforamtion of this file. A file may occur in several frames, one stack info 205 information of this file. A file may occur in several frames, one
126 consist of a StackFrame and the callstack priority of it. 206 stack info consist of a StackFrame and the callstack priority of it.
127 207
128 For example: 208 For example:
129 { 209 {
130 'src/': { 210 'src/': {
131 'a.cc': [ 211 'a.cc': [
132 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0), 212 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0),
133 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0), 213 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0),
134 ] 214 ]
135 } 215 }
136 } 216 }
(...skipping 17 matching lines...) Expand all
154 stack_deps, 234 stack_deps,
155 ignore_cls=None): 235 ignore_cls=None):
156 """Finds results by matching stacktrace and changelogs in regression range. 236 """Finds results by matching stacktrace and changelogs in regression range.
157 237
158 This method only applies to those crashes with regression range. 238 This method only applies to those crashes with regression range.
159 239
160 Args: 240 Args:
161 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path 241 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
162 to ChangeLogs that touched this file. 242 to ChangeLogs that touched this file.
163 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path 243 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path
164 to a list of stack inforamtion of this file. A file may occur in several 244 to a list of stack information of this file. A file may occur in several
165 frames, one stack info consist of a StackFrame and the callstack priority 245 frames, one stack info consist of a StackFrame and the callstack priority
166 of it. 246 of it.
167 stack_deps (dict): Represents all the dependencies shown in the crash stack. 247 stack_deps (dict): Represents all the dependencies shown in the crash stack.
168 ignore_cls (set): Set of reverted revisions. 248 ignore_cls (set): Set of reverted revisions.
169 249
170 Returns: 250 Returns:
171 A list of MatchResult instances with confidence and reason unset. 251 A list of MatchResult instances with confidence and reason unset.
172 """ 252 """
173 match_results = MatchResults(ignore_cls) 253 match_results = MatchResults(ignore_cls)
174 254
175 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems(): 255 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems():
176 file_to_changelogs = dep_to_file_to_changelogs[dep] 256 file_to_changelogs = dep_to_file_to_changelogs[dep]
177 git_repository = GitRepository(stack_deps[dep].repo_url, 257 git_repository = GitRepository(stack_deps[dep].repo_url,
178 HttpClientAppengine()) 258 HttpClientAppengine())
179 259
180 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems(): 260 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():
181 for touched_file_path, changelogs in file_to_changelogs.iteritems(): 261 for touched_file_path, changelogs in file_to_changelogs.iteritems():
182 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path): 262 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):
183 continue 263 continue
184 264
185 blame = git_repository.GetBlame(crashed_file_path, 265 blame = git_repository.GetBlame(crashed_file_path,
186 stack_deps[dep].revision) 266 stack_deps[dep].revision)
187 267
188 # Generate/update each result(changelog) in changelogs, blame is used 268 # Generate/update each result(changelog) in changelogs, blame is used
189 # to calculate distance between touched lines and crashed lines in file. 269 # to calculate distance between touched lines and crashed lines in file.
190 match_results.GenerateMatchResults( 270 match_results.GenerateMatchResults(
191 crashed_file_path, dep, stack_infos, changelogs, blame) 271 crashed_file_path, dep, stack_infos, changelogs, blame)
192 272
193 return match_results.values() 273 return match_results.values()
194
195
196 def FindItForCrash(stacktrace, regression_deps_rolls, crashed_deps, top_n):
197 """Finds culprit results for crash.
198
199 Args:
200 stacktrace (Stactrace): Parsed Stactrace object.
201 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in
202 regression range.
203 crashed_deps (dict of Dependencys): Represents all the dependencies of
204 crashed revision.
205 top_n (int): Top n frames of each stack to be analyzed.
206
207 Returns:
208 List of Results, sorted by confidence from highest to lowest.
209 """
210 if not regression_deps_rolls:
211 return []
212
213 # Findit will only analyze the top n frames in each callstacks.
214 stack_trace = Stacktrace([
215 CallStack(stack.priority,
216 format_type=stack.format_type,
217 language_type=stack.language_type,
218 frame_list=stack[:top_n])
219 for stack in stacktrace])
220
221 # We are only interested in the deps in crash stack (the callstack that
222 # caused the crash).
223 stack_deps = GetDepsInCrashStack(stack_trace.crash_stack, crashed_deps)
224
225 # Get dep and file to changelogs, stack_info and blame dicts.
226 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(
227 regression_deps_rolls, stack_deps)
228 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(
229 stack_trace, stack_deps)
230
231 results = FindMatchResults(dep_to_file_to_changelogs,
232 dep_to_file_to_stack_infos,
233 stack_deps, ignore_cls)
234
235 if not results:
236 return []
237
238 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])
239
240 # Set result.confidence, result.reasons and result.changed_files.
241 map(aggregated_scorer.Score, results)
242
243 # Filter all the 0 confidence results.
244 results = filter(lambda r: r.confidence != 0, results)
245 if not results:
246 return []
247
248 sorted_results = sorted(results, key=lambda r: -r.confidence)
249
250 if sorted_results[0].confidence > 0.999:
251 return sorted_results[:1]
252
253 return sorted_results[:3]
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698