Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(40)

Side by Side Diff: appengine/findit/crash/findit_for_crash.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)
Patch Set: Finally fixed the mock tests! Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « appengine/findit/crash/findit_for_clusterfuzz.py ('k') | appengine/findit/crash/occurrence.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 from collections import defaultdict
6
7 from common.diff import ChangeType
8 from common.git_repository import GitRepository
9 from common.http_client_appengine import HttpClientAppengine
10 from crash import crash_util
11 from crash.stacktrace import CallStack
12 from crash.stacktrace import Stacktrace
13 from crash.results import MatchResults
14 from crash.scorers.aggregated_scorer import AggregatedScorer
15 from crash.scorers.min_distance import MinDistance
16 from crash.scorers.top_frame_index import TopFrameIndex
17
18
19 def GetDepsInCrashStack(crash_stack, crash_deps):
20 """Gets Dependencies in crash stack."""
21 if not crash_stack:
22 return {}
23
24 stack_deps = {}
25 for frame in crash_stack:
26 if frame.dep_path:
27 stack_deps[frame.dep_path] = crash_deps[frame.dep_path]
28
29 return stack_deps
30
31
32 # TODO(katesonia): Remove the repository argument after refatoring cl committed.
33 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps,
34 repository):
35 """Gets a dict containing files touched by changelogs for deps in stack_deps.
36
37 Regression ranges for each dep is determined by regression_deps_rolls.
38 Those changelogs got reverted should be returned in a ignore_cls set.
39
40 Args:
41 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in
42 regression range.
43 stack_deps (dict): Represents all the dependencies shown in
44 the crash stack.
45 repository (Repository): Repository to get changelogs from.
46
47 Returns:
48 A tuple (dep_to_file_to_changelogs, ignore_cls).
49
50 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
51 to ChangeLogs that touched this file.
52 For example:
53 {
54 'src/': {
55 'a.cc': [
56 ChangeLog.FromDict({
57 'author_name': 'test@chromium.org',
58 'message': 'dummy',
59 'committer_email': 'example@chromium.org',
60 'commit_position': 175976,
61 'author_email': 'example@chromium.org',
62 'touched_files': [
63 {
64 'change_type': 'add',
65 'new_path': 'a.cc',
66 'old_path': 'b/a.cc'
67 },
68 ...
69 ],
70 'author_time': 'Thu Mar 31 21:24:43 2016',
71 'committer_time': 'Thu Mar 31 21:28:39 2016',
72 'commit_url':
73 'https://repo.test/+/bcfd',
74 'code_review_url': 'https://codereview.chromium.org/3281',
75 'committer_name': 'example@chromium.org',
76 'revision': 'bcfd',
77 'reverted_revision': None
78 }),
79 ]
80 }
81 }
82
83 ignore_cls (set): A set of reverted revisions.
84 """
85 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list))
86 ignore_cls = set()
87
88 for dep in stack_deps:
89 # If a dep is not in regression range, than it cannot be the dep of
90 # culprits.
91 if dep not in regression_deps_rolls:
92 continue
93
94 dep_roll = regression_deps_rolls[dep]
95
96 repository.repo_url = dep_roll.repo_url
97 changelogs = repository.GetChangeLogs(dep_roll.old_revision,
98 dep_roll.new_revision)
99
100 for changelog in changelogs:
101 if changelog.reverted_revision:
102 # Skip reverting cls and add reverted revisions to ignore_cls to later
103 # filter those reverted revisions.
104 ignore_cls.add(changelog.reverted_revision)
105 continue
106
107 for touched_file in changelog.touched_files:
108 if touched_file.change_type == ChangeType.DELETE:
109 continue
110
111 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog)
112
113 return dep_to_file_to_changelogs, ignore_cls
114
115
116 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps):
117 """Gets a dict containing all the stack information of files in stacktrace.
118
119 Only gets stack informations for files grouped by deps in stack_deps.
120
121 Args:
122 stacktrace (Stacktrace): Parsed stacktrace object.
123 stack_deps (dict): Represents all the dependencies show in
124 the crash stack.
125
126 Returns:
127 A dict, maps dep path to a dict mapping file path to a list of stack
128 inforamtion of this file. A file may occur in several frames, one stack info
129 consist of a StackFrame and the callstack priority of it.
130
131 For example:
132 {
133 'src/': {
134 'a.cc': [
135 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0),
136 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0),
137 ]
138 }
139 }
140 """
141 dep_to_file_to_stack_infos = defaultdict(lambda: defaultdict(list))
142
143 for callstack in stacktrace:
144 for frame in callstack:
145 # We only care about those dependencies in crash stack.
146 if frame.dep_path not in stack_deps:
147 continue
148
149 dep_to_file_to_stack_infos[frame.dep_path][frame.file_path].append((
150 frame, callstack.priority))
151
152 return dep_to_file_to_stack_infos
153
154
155 # TODO(katesonia): Remove the repository argument after refatoring cl committed.
156 def FindMatchResults(dep_to_file_to_changelogs,
157 dep_to_file_to_stack_infos,
158 stack_deps, repository,
159 ignore_cls=None):
160 """Finds results by matching stacktrace and changelogs in regression range.
161
162 This method only applies to those crashes with regression range.
163
164 Args:
165 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
166 to ChangeLogs that touched this file.
167 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path
168 to a list of stack inforamtion of this file. A file may occur in several
169 frames, one stack info consist of a StackFrame and the callstack priority
170 of it.
171 stack_deps (dict): Represents all the dependencies shown in the crash stack.
172 repository (Repository): Repository to get changelogs and blame from.
173 ignore_cls (set): Set of reverted revisions.
174
175 Returns:
176 A list of MatchResult instances with confidence and reason unset.
177 """
178 match_results = MatchResults(ignore_cls)
179
180 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems():
181 file_to_changelogs = dep_to_file_to_changelogs[dep]
182 repository.repo_url = stack_deps[dep].repo_url
183
184 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():
185 for touched_file_path, changelogs in file_to_changelogs.iteritems():
186 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):
187 continue
188
189 blame = repository.GetBlame(crashed_file_path,
190 stack_deps[dep].revision)
191
192 # Generate/update each result(changelog) in changelogs, blame is used
193 # to calculate distance between touched lines and crashed lines in file.
194 match_results.GenerateMatchResults(
195 crashed_file_path, dep, stack_infos, changelogs, blame)
196
197 return match_results.values()
198
199
200 # TODO(katesonia): Remove the repository argument after refatoring cl committed.
201 def FindItForCrash(stacktrace, regression_deps_rolls, crashed_deps, top_n,
202 repository):
203 """Finds culprit results for crash.
204
205 Args:
206 stacktrace (Stactrace): Parsed Stactrace object.
207 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in
208 regression range.
209 crashed_deps (dict of Dependencys): Represents all the dependencies of
210 crashed revision.
211 top_n (int): Top n frames of each stack to be analyzed.
212 repository (Repository): Repository to get changelogs and blame from.
213
214 Returns:
215 List of Results, sorted by confidence from highest to lowest.
216 """
217 if not regression_deps_rolls:
218 return []
219
220 # Findit will only analyze the top n frames in each callstacks.
221 stack_trace = Stacktrace([
222 CallStack(stack.priority,
223 format_type=stack.format_type,
224 language_type=stack.language_type,
225 frame_list=stack[:top_n])
226 for stack in stacktrace])
227
228 # We are only interested in the deps in crash stack (the callstack that
229 # caused the crash).
230 stack_deps = GetDepsInCrashStack(stack_trace.crash_stack, crashed_deps)
231
232 # Get dep and file to changelogs, stack_info and blame dicts.
233 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(
234 regression_deps_rolls, stack_deps, repository)
235 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(
236 stack_trace, stack_deps)
237
238 results = FindMatchResults(dep_to_file_to_changelogs,
239 dep_to_file_to_stack_infos,
240 stack_deps, repository, ignore_cls)
241
242 if not results:
243 return []
244
245 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])
246
247 # Set result.confidence, result.reasons and result.changed_files.
248 map(aggregated_scorer.Score, results)
249
250 # Filter all the 0 confidence results.
251 results = filter(lambda r: r.confidence != 0, results)
252 if not results:
253 return []
254
255 sorted_results = sorted(results, key=lambda r: -r.confidence)
256
257 if sorted_results[0].confidence > 0.999:
258 return sorted_results[:1]
259
260 return sorted_results[:3]
OLDNEW
« no previous file with comments | « appengine/findit/crash/findit_for_clusterfuzz.py ('k') | appengine/findit/crash/occurrence.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698