Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1320)

Side by Side Diff: appengine/findit/crash/findit_for_crash.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)
Patch Set: trying to fix some tests Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 from collections import defaultdict
6
7 from common.diff import ChangeType
8 from common.git_repository import GitRepository
9 from common.http_client_appengine import HttpClientAppengine
10 from crash import crash_util
11 from crash.stacktrace import CallStack
12 from crash.stacktrace import Stacktrace
13 from crash.results import MatchResults
14 from crash.scorers.aggregated_scorer import AggregatedScorer
15 from crash.scorers.min_distance import MinDistance
16 from crash.scorers.top_frame_index import TopFrameIndex
17
18
19 def GetDepsInCrashStack(crash_stack, crash_deps):
20 """Gets Dependencies in crash stack."""
21 if not crash_stack:
22 return {}
23
24 stack_deps = {}
25 for frame in crash_stack:
26 if frame.dep_path:
27 stack_deps[frame.dep_path] = crash_deps[frame.dep_path]
28
29 return stack_deps
30
31
32 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps):
33 """Gets a dict containing files touched by changelogs for deps in stack_deps.
34
35 Regression ranges for each dep is determined by regression_deps_rolls.
36 Those changelogs got reverted should be returned in a ignore_cls set.
37
38 Args:
39 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in
40 regression range.
41 stack_deps (dict): Represents all the dependencies shown in
42 the crash stack.
43
44 Returns:
45 A tuple (dep_to_file_to_changelogs, ignore_cls).
46
47 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
48 to ChangeLogs that touched this file.
49 For example:
50 {
51 'src/': {
52 'a.cc': [
53 ChangeLog.FromDict({
54 'author_name': 'test@chromium.org',
55 'message': 'dummy',
56 'committer_email': 'example@chromium.org',
57 'commit_position': 175976,
58 'author_email': 'example@chromium.org',
59 'touched_files': [
60 {
61 'change_type': 'add',
62 'new_path': 'a.cc',
63 'old_path': 'b/a.cc'
64 },
65 ...
66 ],
67 'author_time': 'Thu Mar 31 21:24:43 2016',
68 'committer_time': 'Thu Mar 31 21:28:39 2016',
69 'commit_url':
70 'https://repo.test/+/bcfd',
71 'code_review_url': 'https://codereview.chromium.org/3281',
72 'committer_name': 'example@chromium.org',
73 'revision': 'bcfd',
74 'reverted_revision': None
75 }),
76 ]
77 }
78 }
79
80 ignore_cls (set): A set of reverted revisions.
81 """
82 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list))
83 ignore_cls = set()
84
85 for dep in stack_deps:
86 # If a dep is not in regression range, than it cannot be the dep of
87 # culprits.
88 if dep not in regression_deps_rolls:
89 continue
90
91 dep_roll = regression_deps_rolls[dep]
92
93 git_repository = GitRepository(dep_roll.repo_url, HttpClientAppengine())
94 changelogs = git_repository.GetChangeLogs(dep_roll.old_revision,
95 dep_roll.new_revision)
96
97 for changelog in changelogs:
98 if changelog.reverted_revision:
99 # Skip reverting cls and add reverted revisions to ignore_cls to later
100 # filter those reverted revisions.
101 ignore_cls.add(changelog.reverted_revision)
102 continue
103
104 for touched_file in changelog.touched_files:
105 if touched_file.change_type == ChangeType.DELETE:
106 continue
107
108 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog)
109
110 return dep_to_file_to_changelogs, ignore_cls
111
112
113 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps):
114 """Gets a dict containing all the stack information of files in stacktrace.
115
116 Only gets stack informations for files grouped by deps in stack_deps.
117
118 Args:
119 stacktrace (Stacktrace): Parsed stacktrace object.
120 stack_deps (dict): Represents all the dependencies show in
121 the crash stack.
122
123 Returns:
124 A dict, maps dep path to a dict mapping file path to a list of stack
125 inforamtion of this file. A file may occur in several frames, one stack info
126 consist of a StackFrame and the callstack priority of it.
127
128 For example:
129 {
130 'src/': {
131 'a.cc': [
132 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0),
133 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0),
134 ]
135 }
136 }
137 """
138 dep_to_file_to_stack_infos = defaultdict(lambda: defaultdict(list))
139
140 for callstack in stacktrace:
141 for frame in callstack:
142 # We only care about those dependencies in crash stack.
143 if frame.dep_path not in stack_deps:
144 continue
145
146 dep_to_file_to_stack_infos[frame.dep_path][frame.file_path].append((
147 frame, callstack.priority))
148
149 return dep_to_file_to_stack_infos
150
151
152 def FindMatchResults(dep_to_file_to_changelogs,
153 dep_to_file_to_stack_infos,
154 stack_deps,
155 ignore_cls=None):
156 """Finds results by matching stacktrace and changelogs in regression range.
157
158 This method only applies to those crashes with regression range.
159
160 Args:
161 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
162 to ChangeLogs that touched this file.
163 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path
164 to a list of stack inforamtion of this file. A file may occur in several
165 frames, one stack info consist of a StackFrame and the callstack priority
166 of it.
167 stack_deps (dict): Represents all the dependencies shown in the crash stack.
168 ignore_cls (set): Set of reverted revisions.
169
170 Returns:
171 A list of MatchResult instances with confidence and reason unset.
172 """
173 match_results = MatchResults(ignore_cls)
174
175 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems():
176 file_to_changelogs = dep_to_file_to_changelogs[dep]
177 git_repository = GitRepository(stack_deps[dep].repo_url,
178 HttpClientAppengine())
179
180 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():
181 for touched_file_path, changelogs in file_to_changelogs.iteritems():
182 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):
183 continue
184
185 blame = git_repository.GetBlame(crashed_file_path,
186 stack_deps[dep].revision)
187
188 # Generate/update each result(changelog) in changelogs, blame is used
189 # to calculate distance between touched lines and crashed lines in file.
190 match_results.GenerateMatchResults(
191 crashed_file_path, dep, stack_infos, changelogs, blame)
192
193 return match_results.values()
194
195
196 def FindItForCrash(stacktrace, regression_deps_rolls, crashed_deps, top_n):
197 """Finds culprit results for crash.
198
199 Args:
200 stacktrace (Stactrace): Parsed Stactrace object.
201 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in
202 regression range.
203 crashed_deps (dict of Dependencys): Represents all the dependencies of
204 crashed revision.
205 top_n (int): Top n frames of each stack to be analyzed.
206
207 Returns:
208 List of Results, sorted by confidence from highest to lowest.
209 """
210 if not regression_deps_rolls:
211 return []
212
213 # Findit will only analyze the top n frames in each callstacks.
214 stack_trace = Stacktrace([
215 CallStack(stack.priority,
216 format_type=stack.format_type,
217 language_type=stack.language_type,
218 frame_list=stack[:top_n])
219 for stack in stacktrace])
220
221 # We are only interested in the deps in crash stack (the callstack that
222 # caused the crash).
223 stack_deps = GetDepsInCrashStack(stack_trace.crash_stack, crashed_deps)
224
225 # Get dep and file to changelogs, stack_info and blame dicts.
226 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(
227 regression_deps_rolls, stack_deps)
228 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(
229 stack_trace, stack_deps)
230
231 results = FindMatchResults(dep_to_file_to_changelogs,
232 dep_to_file_to_stack_infos,
233 stack_deps, ignore_cls)
234
235 if not results:
236 return []
237
238 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])
239
240 # Set result.confidence, result.reasons and result.changed_files.
241 map(aggregated_scorer.Score, results)
242
243 # Filter all the 0 confidence results.
244 results = filter(lambda r: r.confidence != 0, results)
245 if not results:
246 return []
247
248 sorted_results = sorted(results, key=lambda r: -r.confidence)
249
250 if sorted_results[0].confidence > 0.999:
251 return sorted_results[:1]
252
253 return sorted_results[:3]
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698