Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(134)

Side by Side Diff: appengine/findit/crash/changelist_classifier.py

Issue 2707603002: [Predator] Generate all changelogs in regression ranges instead of only matched changelogs (Closed)
Patch Set: . Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | appengine/findit/crash/loglinear/changelist_classifier.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 import logging
6 from collections import defaultdict
7 from collections import namedtuple
8
9 from common.chrome_dependency_fetcher import ChromeDependencyFetcher
10 from crash import crash_util
11 from crash.suspect import StackInfo
12 from crash.suspect import Suspect
13 from crash.suspect import SuspectMap
14 from crash.scorers.aggregated_scorer import AggregatedScorer
15 from crash.scorers.min_distance import MinDistance
16 from crash.scorers.top_frame_index import TopFrameIndex
17 from crash.stacktrace import CallStack
18 from crash.stacktrace import Stacktrace
19 from libs.gitiles.diff import ChangeType
20
21
22 class ChangelistClassifier(namedtuple('ChangelistClassifier',
23 ['get_repository', 'top_n_results', 'confidence_threshold'])):
24 __slots__ = ()
25
26 def __new__(cls, get_repository, top_n_results=3, confidence_threshold=0.999):
27 """Args:
28 get_repository (callable): a function from DEP urls to ``Repository``
29 objects, so we can get changelogs and blame for each dep. Notably,
30 to keep the code here generic, we make no assumptions about
31 which subclass of ``Repository`` this function returns. Thus,
32 it is up to the caller to decide what class to return and handle
33 any other arguments that class may require (e.g., an http client
34 for ``GitilesRepository``).
35 top_n_results (int): maximum number of results to return.
36 confidence_threshold (float): In [0,1], above which we only return
37 the first suspect.
38 """
39 return super(cls, ChangelistClassifier).__new__(
40 cls, get_repository, top_n_results, confidence_threshold)
41
42 def __str__(self): # pragma: no cover
43 return ('%s(top_n_results=%d, confidence_threshold=%g)'
44 % (self.__class__.__name__,
45 self.top_n_results,
46 self.confidence_threshold))
47
48 def __call__(self, report):
49 """Finds changelists suspected of being responsible for the crash report.
50
51 This function assumes the report's stacktrace has already had any necessary
52 preprocessing (like filtering or truncating) applied.
53
54 Args:
55 report (CrashReport): the report to be analyzed.
56
57 Returns:
58 List of ``Suspect``s, sorted by confidence from highest to lowest.
59 """
60 if not report.regression_range:
61 logging.warning('ChangelistClassifier.__call__: Missing regression range '
62 'for report: %s', str(report))
63 return []
64 last_good_version, first_bad_version = report.regression_range
65 logging.info('ChangelistClassifier.__call__: Regression range %s:%s',
66 last_good_version, first_bad_version)
67
68 dependency_fetcher = ChromeDependencyFetcher(self.get_repository)
69
70 # We are only interested in the deps in crash stack (the callstack that
71 # caused the crash).
72 # TODO(wrengr): we may want to receive the crash deps as an argument,
73 # so that when this method is called via Findit.FindCulprit, we avoid
74 # doing redundant work creating it.
75 stack_deps = GetDepsInCrashStack(
76 report.stacktrace.crash_stack,
77 dependency_fetcher.GetDependency(
78 report.crashed_version, report.platform))
79
80 # Get dep and file to changelogs, stack_info and blame dicts.
81 dep_rolls = dependency_fetcher.GetDependencyRollsDict(
82 last_good_version, first_bad_version, report.platform)
83
84 # Regression of a dep added/deleted (old_revision/new_revision is None) can
85 # not be known for sure and this case rarely happens, so just filter them
86 # out.
87 regression_deps_rolls = {}
88 for dep_path, dep_roll in dep_rolls.iteritems():
89 if not dep_roll.old_revision or not dep_roll.new_revision:
90 logging.info('Skip %s denpendency %s',
91 'added' if dep_roll.new_revision else 'deleted', dep_path)
92 continue
93 regression_deps_rolls[dep_path] = dep_roll
94
95 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(
96 regression_deps_rolls, stack_deps, self.get_repository)
97 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(
98 report.stacktrace, stack_deps)
99
100 suspects = FindSuspects(dep_to_file_to_changelogs,
101 dep_to_file_to_stack_infos,
102 stack_deps, self.get_repository, ignore_cls)
103 if not suspects:
104 return []
105
106 # Set confidence, reasons, and changed_files.
107 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])
108 map(aggregated_scorer.Score, suspects)
109
110 # Filter all the 0 confidence results.
111 suspects = filter(lambda suspect: suspect.confidence != 0, suspects)
112 if not suspects:
113 return []
114
115 suspects.sort(key=lambda suspect: -suspect.confidence)
116
117 max_results = (1 if suspects[0].confidence > self.confidence_threshold
118 else self.top_n_results)
119
120 return suspects[:max_results]
121
122
123 def GetDepsInCrashStack(crash_stack, crash_deps):
124 """Gets Dependencies in crash stack."""
125 if not crash_stack:
126 return {}
127
128 stack_deps = {}
129 for frame in crash_stack.frames:
130 if frame.dep_path:
131 stack_deps[frame.dep_path] = crash_deps[frame.dep_path]
132
133 return stack_deps
134
135
136 # TODO(katesonia): Remove the repository argument after refatoring cl committed.
137 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps,
138 get_repository):
139 """Gets a dict containing files touched by changelogs for deps in stack_deps.
140
141 Regression ranges for each dep is determined by regression_deps_rolls.
142 Changelogs which were reverted are returned in a reverted_cls set.
143
144 Args:
145 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in
146 regression range.
147 stack_deps (dict): Represents all the dependencies shown in
148 the crash stack.
149 get_repository (callable): a function from DEP urls to ``Repository``
150 objects, so we can get changelogs and blame for each dep. Notably,
151 to keep the code here generic, we make no assumptions about
152 which subclass of ``Repository`` this function returns. Thus,
153 it is up to the caller to decide what class to return and handle
154 any other arguments that class may require (e.g., an http client
155 for ``GitilesRepository``).
156
157 Returns:
158 A tuple (dep_to_file_to_changelogs, reverted_cls).
159
160 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
161 to ChangeLogs that touched this file.
162 For example:
163 {
164 'src/': {
165 'a.cc': [
166 ChangeLog.FromDict({
167 'author': {
168 'name': 'test@chromium.org',
169 'email': 'example@chromium.org',
170 'time': 'Thu Mar 31 21:24:43 2016',
171 },
172 'committer': {
173 'name': 'example@chromium.org',
174 'email': 'example@chromium.org',
175 'time': 'Thu Mar 31 21:28:39 2016',
176 },
177 'message': 'dummy',
178 'commit_position': 175976,
179 'touched_files': [
180 {
181 'change_type': 'add',
182 'new_path': 'a.cc',
183 'old_path': 'b/a.cc'
184 },
185 ...
186 ],
187 'commit_url':
188 'https://repo.test/+/bcfd',
189 'code_review_url': 'https://codereview.chromium.org/3281',
190 'revision': 'bcfd',
191 'reverted_revision': None
192 }),
193 ]
194 }
195 }
196
197 reverted_cls (set): A set of reverted revisions.
198 """
199 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list))
200 reverted_cls = set()
201
202 for dep in stack_deps:
203 # If a dep is not in regression range, than it cannot be the dep of
204 # culprits.
205 dep_roll = regression_deps_rolls.get(dep)
206 if not dep_roll:
207 continue
208
209 repository = get_repository(dep_roll.repo_url)
210 changelogs = repository.GetChangeLogs(dep_roll.old_revision,
211 dep_roll.new_revision)
212
213 for changelog in changelogs or []:
214 # When someone reverts, we need to skip both the CL doing
215 # the reverting as well as the CL that got reverted. If
216 # ``reverted_revision`` is true, then this CL reverts another one,
217 # so we skip it and save the CL it reverts in ``reverted_cls`` to
218 # be filtered out later.
219 if changelog.reverted_revision:
220 reverted_cls.add(changelog.reverted_revision)
221 continue
222
223 for touched_file in changelog.touched_files:
224 if touched_file.change_type == ChangeType.DELETE:
225 continue
226
227 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog)
228
229 return dep_to_file_to_changelogs, reverted_cls
230
231
232 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps):
233 """Gets a dict containing all the stack information of files in stacktrace.
234
235 Only gets stack informations for files grouped by deps in stack_deps.
236
237 Args:
238 stacktrace (Stacktrace): Parsed stacktrace object.
239 stack_deps (dict): Represents all the dependencies show in
240 the crash stack.
241
242 Returns:
243 A dict, maps dep path to a dict mapping file path to a list of stack
244 information of this file. A file may occur in several frames, one
245 stack info consist of a StackFrame and the callstack priority of it.
246
247 For example:
248 {
249 'src/': {
250 'a.cc': [
251 StackInfo(StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0),
252 StackInfo(StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0),
253 ]
254 }
255 }
256 """
257 dep_to_file_to_stack_infos = defaultdict(lambda: defaultdict(list))
258
259 for callstack in stacktrace.stacks:
260 for frame in callstack.frames:
261 # We only care about those dependencies in crash stack.
262 if frame.dep_path not in stack_deps:
263 continue
264
265 dep_to_file_to_stack_infos[frame.dep_path][frame.file_path].append(
266 StackInfo(frame, callstack.priority))
267
268 return dep_to_file_to_stack_infos
269
270
271 # TODO(katesonia): Remove the repository argument after refatoring cl committed.
272 def FindSuspects(dep_to_file_to_changelogs,
273 dep_to_file_to_stack_infos,
274 stack_deps, get_repository,
275 ignore_cls=None):
276 """Finds suspects by matching stacktrace and changelogs in regression range.
277
278 This method only applies to those crashes with regression range.
279
280 Args:
281 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
282 to ChangeLogs that touched this file.
283 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path
284 to a list of stack information of this file. A file may occur in several
285 frames, one stack info consist of a StackFrame and the callstack priority
286 of it.
287 stack_deps (dict): Represents all the dependencies shown in the crash stack.
288 get_repository (callable): a function from urls to ``Repository``
289 objects, so we can get changelogs and blame for each dep.
290 ignore_cls (set): Set of reverted revisions.
291
292 Returns:
293 A list of ``Suspect`` instances with confidence and reason unset.
294 """
295 suspects = SuspectMap(ignore_cls)
296
297 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems():
298 file_to_changelogs = dep_to_file_to_changelogs[dep]
299
300 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():
301 for touched_file_path, changelogs in file_to_changelogs.iteritems():
302 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):
303 continue
304
305 repository = get_repository(stack_deps[dep].repo_url)
306 blame = repository.GetBlame(touched_file_path,
307 stack_deps[dep].revision)
308
309 # Generate/update each suspect(changelog) in changelogs, blame is used
310 # to calculate distance between touched lines and crashed lines in file.
311 suspects.GenerateSuspects(
312 touched_file_path, dep, stack_infos, changelogs, blame)
313
314 return suspects.values()
OLDNEW
« no previous file with comments | « no previous file | appengine/findit/crash/loglinear/changelist_classifier.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698