Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(195)

Side by Side Diff: appengine/findit/crash/findit_for_crash.py

Issue 1861373003: [Findit] Initial code of findit for crash. Add scorers to apply heuristic rules. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Fix nits and doc strings. Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 from collections import defaultdict
6
7 from common import chromium_deps
8 from common.change_log import ChangeLog
9 from common.git_repository import GitRepository
10 from common.http_client_appengine import HttpClientAppengine
11 from crash import crash_util
12 from crash.results import MatchResult, MatchResults
13 from crash.scorers.aggregator import Aggregator
14 from crash.scorers.top_frame_index import TopFrameIndex
15 from crash.scorers.min_distance import MinDistance
16
17
18 def GetDepsInCrashStack(crash_stack, crash_deps):
19 """Gets Dependencies in crash stack."""
20 if not crash_stack:
stgao 2016/04/15 18:35:19 It seems this is just one callstack, while a crash
Sharu 2016/04/15 22:59:46 crash_stack is one callstack, the one that has the
21 return {}
22
23 deps = set()
24 for frame in crash_stack:
25 deps.add(frame.dep_path)
26
27 stack_deps = {}
28 for dep in deps:
stgao 2016/04/15 18:35:19 Can we merge these two for loop?
Sharu 2016/04/15 22:59:46 Done.
29 stack_deps[dep] = crash_deps[dep]
30
31 return stack_deps
32
33
34 def GetDepFileToChangeLogsAndIgnoreCls(regression_deps_rolls, stack_deps):
35 """For those deps we concern, gets a dict containing all the files touched by
36 changelogs in regressoin range and a set of revisions that should be ignored.
37
38 Args:
39 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in
40 regression range.
41 stack_deps (dict): Represents all the dependencies show in
42 the crash stack.
43
44 Returns:
45 A tuple (dep_file_to_changelogs, ignore_cls).
46
47 dep_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
48 to ChangeLogs that touched this file.
49
50 For example:
51 {
52 'src/': {
53 'a.cc': [
54 ChangeLog.FromDict({
55 'author_name': 'test@chromium.org',
56 'message': 'dummy',
57 'committer_email': 'example@chromium.org',
58 'commit_position': 175976,
59 'author_email': 'example@chromium.org',
60 'touched_files': [
61 {
62 'change_type': 'add',
63 'new_path': 'a.cc',
64 'old_path': 'b/a.cc'
65 },
66 ...
67 ],
68 'author_time': 'Thu Mar 31 21:24:43 2016',
69 'committer_time': 'Thu Mar 31 21:28:39 2016',
70 'commit_url':
71 'https://repo.test/+/bcfd',
72 'code_review_url': 'https://codereview.chromium.org/3281',
73 'committer_name': 'example@chromium.org',
74 'revision': 'bcfd',
75 'reverted_revision': None
76 }),
77 ]
78 }
79 }
80
81 ignore_cls (set): A set of reverted revisions.
82 """
83 dep_file_to_changelogs = defaultdict(lambda: defaultdict(list))
84
Martin Barbella 2016/04/15 06:05:24 Nit: Remove this whitespace and the one on 88.
Sharu 2016/04/15 22:59:46 Done.
85 ignore_cls = set()
86
87 for dep in stack_deps:
88
89 dep_roll = regression_deps_rolls[dep]
90
91 git_parser = GitRepository(dep_roll['repo_url'], HttpClientAppengine())
92 changelogs = git_parser.GetChangeLogs(dep_roll['old_revision'],
93 dep_roll['new_revision'])
94
95 for changelog in changelogs:
96 for touched_file in changelog.touched_files:
97 # If a dependency is deleted or is a .h file, skip this changelog.
Martin Barbella 2016/04/15 06:05:24 s/skip this changelog/skip this file/ right?
Sharu 2016/04/15 22:59:46 Right, done.
98 if not touched_file.new_path or touched_file.new_path.endswith('.h'):
stgao 2016/04/15 18:35:18 Can we use the change_type instead? https://chrom
Sharu 2016/04/15 22:59:46 Done.
99 continue
100
101 if changelog.reverted_revision:
stgao 2016/04/15 18:35:19 Should this be in this inner for loop?
Sharu 2016/04/15 22:59:47 Ouch, done.
102 # Add reverted revisions to ignore_cls to filter those reverted
103 # revisions later while matching cls.
104 ignore_cls.add(changelog.reverted_revision)
stgao 2016/04/15 18:35:19 Should we including the reverting cl too?
Sharu 2016/04/15 22:59:46 I didn't add the reverting cls in dep_file_to_chan
stgao 2016/04/20 17:41:25 What if the reverting CLs showing up in the regres
Sharu Jiang 2016/04/20 18:54:23 All the changelogs in the regression range we are
105 continue
106
107 dep_file_to_changelogs[dep][touched_file.new_path].append(changelog)
108
109 return dep_file_to_changelogs, ignore_cls
110
111
112 def GetDepFileToStackInfos(stacktrace):
113 """Gets a dict containing all the stack information of files in stacktrace.
114
115 Args:
116 stacktrace (Stacktrace): Parsed stacktrace object.
117
118 Returns:
119 A dict, maps dep path to a dict mapping file path to a list of stack
120 inforamtion of this file. A file may occur in several frames, one stack info
121 consist of a StackFrame and the callstack priority of it.
122
123 For example:
124 {
125 'src/': {
126 'a.cc': [
127 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0),
128 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0),
129 ]
130 }
131 }
132 """
133 dep_file_to_stack_infos = defaultdict(lambda: defaultdict(list))
134
135 for callstack in stacktrace:
136 for frame in callstack:
137 if frame.file_path.endswith('.h'):
138 continue
139
140 dep_file_to_stack_infos[frame.dep_path][frame.file_path].append((
141 frame, callstack.priority))
142
143 return dep_file_to_stack_infos
144
145
146 def GetDepFileToBlame(stacktrace, stack_deps):
stgao 2016/04/15 18:35:19 What does "DepFile" mean here? The DEPS file?
Sharu 2016/04/15 22:59:47 I wanted to mean a dict[dep_path][file_path] = Bla
147 """Gets Blames for files in stacktrace.
148
149 Args:
150 stacktrace (Stacktrace): Parsed stacktrace object.
151 stack_deps (dict of Dependencys): Represents all the dependencies shown in
152 the crash stack.
153
154 Returns:
155 A dict, maps dep_path to a dict mapping file path to its Blame.
156
157 For example:
158 {
159 'src/': {
160 'a.cc': Blame(revision, 'a.cc')
161 'b.cc': Blame(revision, 'b.cc')
162 }
163 }
164 """
165 dep_file_to_blame = defaultdict(dict)
166 for callstack in stacktrace:
167 for frame in callstack:
168 # We only care about those dependencies in crash stack.
169 if frame.dep_path not in stack_deps:
170 continue
171
172 git_parser = GitRepository(stack_deps[frame.dep_path].repo_url,
stgao 2016/04/15 18:35:19 naming nit: it's not just a parser.
Sharu 2016/04/15 22:59:47 Done.
173 HttpClientAppengine())
174 dep_file_to_blame[frame.dep_path][frame.file_path] = (
175 git_parser.GetBlame(
176 frame.file_path, stack_deps[frame.dep_path].revision))
177 return dep_file_to_blame
178
179
180 def FindMatchResults(dep_file_to_changelogs,
181 dep_file_to_stack_infos,
182 dep_file_to_blame,
183 ignore_cls=None):
184 """Finds culprit results by matching stacktrace and changelogs in regression
185 range. This method only applies to those crashes with regression range.
186
187 Args:
188 dep_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
189 to ChangeLogs that touched this file.
190
191 dep_file_to_stack_infos (dict): Maps dep path to a dict mapping file path
192 to a list of stack inforamtion of this file. A file may occur in several
193 frames, one stack info consist of a StackFrame and the callstack priority
194 of it.
195
196 dep_file_to_blame (dict): Maps dep_path to a dict mapping file path to
197 its Blame.
198
199 ignore_cls (set): Set of reverted revisions.
200
201 Returns:
202 A list of MatchResult instances with confidence and reason unset.
203 """
204 match_results = MatchResults(ignore_cls)
205
206 for dep, file_to_stack_infos in dep_file_to_stack_infos.iteritems():
207 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():
208
209 file_to_changelogs = dep_file_to_changelogs[dep]
210 for touched_file_path, changelogs in file_to_changelogs.iteritems():
211 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):
stgao 2016/04/15 18:35:18 Note: syzyasan has lower case file path in Cluster
Sharu 2016/04/15 22:59:47 Modified this inside IsSameFilePath function
212 continue
213
214 match_results.GenerateMatchResults(
215 crashed_file_path, dep, stack_infos, changelogs,
216 dep_file_to_blame[dep][crashed_file_path])
217
218 return match_results.values()
219
220
221 def FindItForCrash(stacktrace, regression_range, crash_revision):
222 """Finds culprit results for crash.
223
224 Args:
225 stacktrace (Stactrace): Parsed Stactrace object.
226 regression_range (tuple or None): A tuple of githash strs -
227 (start_revision, end_revision), None if no regression range provided.
228 crash_revision (str): The crashed revision githash str.
229
230 Returns:
231 List of dicts of culprit results, sorted by confidence from highest to
232 lowest.
233 """
234
235 if not regression_range:
236 return []
237
238 start_revision, end_revision = regression_range
stgao 2016/04/15 18:35:19 good_revision, bad_revision?
Sharu 2016/04/15 22:59:46 Done.
239
240 # Get regression deps and crash deps.
241 # Right now we only care about those deps for all platforms.
242 regression_deps_rolls = chromium_deps.GetDEPSRollsDict(
243 start_revision, end_revision, 'all')
stgao 2016/04/15 18:35:19 Why 'all' instead of the specific os platform?
Sharu 2016/04/15 22:59:46 I was supposed to set it to None :( Changed it.
stgao 2016/04/20 17:41:25 Why set it to None?
Sharu Jiang 2016/04/20 18:54:24 It's a mistake :(
244 crash_deps = chromium_deps.GetChromeDependency(crash_revision, 'all')
245
246 crash_stack = stacktrace.GetCrashStack()
247 stack_deps = GetDepsInCrashStack(crash_stack, crash_deps)
248
249 # Get dep and file to changelogs, stack_info and blame dicts.
250 dep_file_to_changelogs, ignore_cls = GetDepFileToChangeLogsAndIgnoreCls(
251 regression_deps_rolls, stack_deps)
252 dep_file_to_stack_infos = GetDepFileToStackInfos(stacktrace)
253 dep_file_to_blame = GetDepFileToBlame(stacktrace, stack_deps)
254
255 results = FindMatchResults(dep_file_to_changelogs,
256 dep_file_to_stack_infos,
257 dep_file_to_blame,
258 ignore_cls)
259
260 if not results:
261 return []
262
263 aggregator = Aggregator([TopFrameIndex(), MinDistance()])
264
265 map(aggregator.ScoreAndReason, results)
266
267 return sorted([result.ToDict() for result in results],
268 key=lambda r: -r['confidence'])
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698