Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(125)

Side by Side Diff: appengine/findit/crash/findit_for_crash.py

Issue 1861373003: [Findit] Initial code of findit for crash. Add scorers to apply heuristic rules. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Fix nits. Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 from collections import defaultdict
6
7 from common.diff import ChangeType
8 from common.git_repository import GitRepository
9 from common.http_client_appengine import HttpClientAppengine
10 from crash import crash_util
11 from crash.results import MatchResults
12 from crash.scorers.aggregator import Aggregator
13 from crash.scorers.min_distance import MinDistance
14 from crash.scorers.top_frame_index import TopFrameIndex
15
16
17 def GetDepsInCrashStack(crash_stack, crash_deps):
18 """Gets Dependencies in crash stack."""
19 if not crash_stack:
20 return {}
21
22 stack_deps = {}
23 for frame in crash_stack:
24 stack_deps[frame.dep_path] = crash_deps[frame.dep_path]
25
26 return stack_deps
27
28
29 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps):
30 """Gets a dict containing files touched by changelogs for deps in stack_deps.
31
32 Regression ranges for each dep is determined by regression_deps_rolls.
33 Those changelogs got reverted should be returned in a ignore_cls set.
34
35 Args:
36 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in
37 regression range.
38 stack_deps (dict): Represents all the dependencies shown in
39 the crash stack.
40
41 Returns:
42 A tuple (dep_to_file_to_changelogs, ignore_cls).
43
44 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
45 to ChangeLogs that touched this file.
46
47 For example:
48 {
49 'src/': {
50 'a.cc': [
51 ChangeLog.FromDict({
52 'author_name': 'test@chromium.org',
53 'message': 'dummy',
54 'committer_email': 'example@chromium.org',
55 'commit_position': 175976,
56 'author_email': 'example@chromium.org',
57 'touched_files': [
58 {
59 'change_type': 'add',
60 'new_path': 'a.cc',
61 'old_path': 'b/a.cc'
62 },
63 ...
64 ],
65 'author_time': 'Thu Mar 31 21:24:43 2016',
66 'committer_time': 'Thu Mar 31 21:28:39 2016',
67 'commit_url':
68 'https://repo.test/+/bcfd',
69 'code_review_url': 'https://codereview.chromium.org/3281',
70 'committer_name': 'example@chromium.org',
71 'revision': 'bcfd',
72 'reverted_revision': None
73 }),
74 ]
75 }
76 }
77
78 ignore_cls (set): A set of reverted revisions.
79 """
80 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list))
81 ignore_cls = set()
82
83 for dep in stack_deps:
84 dep_roll = regression_deps_rolls[dep]
85
86 git_repository = GitRepository(dep_roll['repo_url'], HttpClientAppengine())
87 changelogs = git_repository.GetChangeLogs(dep_roll['old_revision'],
88 dep_roll['new_revision'])
89
90 for changelog in changelogs:
91 if changelog.reverted_revision:
92 # Skip reverting cls and add reverted revisions to ignore_cls to later
93 # filter those reverted revisions.
94 ignore_cls.add(changelog.reverted_revision)
95 continue
96
97 for touched_file in changelog.touched_files:
98 if touched_file.change_type == ChangeType.DELETE:
99 continue
100
101 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog)
102
103 return dep_to_file_to_changelogs, ignore_cls
104
105
106 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps):
107 """Gets a dict containing all the stack information of files in stacktrace.
108
109 Only gets file to stack informations for deps in stack_deps.
110
111 Args:
112 stacktrace (Stacktrace): Parsed stacktrace object.
113 stack_deps (dict): Represents all the dependencies show in
114 the crash stack.
115
116 Returns:
117 A dict, maps dep path to a dict mapping file path to a list of stack
118 inforamtion of this file. A file may occur in several frames, one stack info
119 consist of a StackFrame and the callstack priority of it.
120
121 For example:
122 {
123 'src/': {
124 'a.cc': [
125 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0),
126 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0),
127 ]
128 }
129 }
130 """
131 dep_to_file_to_stack_infos = defaultdict(lambda: defaultdict(list))
132
133 for callstack in stacktrace:
134 for frame in callstack:
135 if frame.dep_path not in stack_deps or frame.file_path.endswith('.h'):
stgao 2016/04/21 17:31:54 This is a general comment for this CL and all up-c
Sharu Jiang 2016/04/21 22:38:50 Actually after I double check with sheriffs, we sh
136 continue
137
138 dep_to_file_to_stack_infos[frame.dep_path][frame.file_path].append((
139 frame, callstack.priority))
140
141 return dep_to_file_to_stack_infos
142
143
144 def GetBlameForFilesGroupedByDeps(stacktrace, stack_deps):
145 """Gets Blames of files in stacktrace for deps in stack_deps.
146
147 Args:
148 stacktrace (Stacktrace): Parsed stacktrace object.
149 stack_deps (dict of Dependencys): Represents all the dependencies shown in
150 the crash stack.
151
152 Returns:
153 A dict, maps dep_path to a dict mapping file path to its Blame.
154
155 For example:
156 {
157 'src/': {
158 'a.cc': Blame(revision, 'a.cc')
159 'b.cc': Blame(revision, 'b.cc')
160 }
161 }
162 """
163 dep_to_file_to_blame = defaultdict(dict)
164 for callstack in stacktrace:
165 for frame in callstack:
166 # We only care about those dependencies in crash stack.
167 if frame.dep_path not in stack_deps:
168 continue
169
170 git_repository = GitRepository(stack_deps[frame.dep_path].repo_url,
171 HttpClientAppengine())
172 dep_to_file_to_blame[frame.dep_path][frame.file_path] = (
173 git_repository.GetBlame(
174 frame.file_path, stack_deps[frame.dep_path].revision))
175
176 return dep_to_file_to_blame
177
178
179 def FindMatchResults(dep_to_file_to_changelogs,
180 dep_to_file_to_stack_infos,
181 dep_to_file_to_blame,
182 ignore_cls=None):
183 """Finds results by matching stacktrace and changelogs in regression range.
184
185 This method only applies to those crashes with regression range.
186
187 Args:
188 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
189 to ChangeLogs that touched this file.
190
191 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path
192 to a list of stack inforamtion of this file. A file may occur in several
193 frames, one stack info consist of a StackFrame and the callstack priority
194 of it.
195
196 dep_to_file_to_blame (dict): Maps dep_path to a dict mapping file path to
197 its Blame.
198
199 ignore_cls (set): Set of reverted revisions.
200
201 Returns:
202 A list of MatchResult instances with confidence and reason unset.
203 """
204 match_results = MatchResults(ignore_cls)
205
206 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems():
207
208 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():
209 file_to_changelogs = dep_to_file_to_changelogs[dep]
210
211 for touched_file_path, changelogs in file_to_changelogs.iteritems():
212 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):
213 continue
214
215 match_results.GenerateMatchResults(
216 crashed_file_path, dep, stack_infos, changelogs,
217 dep_to_file_to_blame[dep][crashed_file_path])
218
219 return match_results.values()
220
221
222 def FindItForCrash(stacktrace, regression_deps_rolls, crashed_deps):
223 """Finds culprit results for crash.
224
225 Args:
226 stacktrace (Stactrace): Parsed Stactrace object.
227 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in
228 regression range.
229 crashed_deps (dict of Dependencys): Represents all the dependencies of
230 crashed revision.
231
232 Returns:
233 List of dicts of culprit results, sorted by confidence from highest to
234 lowest.
235 """
236 if not regression_deps_rolls:
237 return []
238
239 # We are only interested in the deps in crash stack (the callstack that
240 # caused the crash).
241 stack_deps = GetDepsInCrashStack(stacktrace.GetCrashStack(), crashed_deps)
242
243 # Get dep and file to changelogs, stack_info and blame dicts.
244 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(
245 regression_deps_rolls, stack_deps)
246 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(
247 stacktrace, stack_deps)
248 dep_to_file_to_blame = GetBlameForFilesGroupedByDeps(stacktrace, stack_deps)
249
250 results = FindMatchResults(dep_to_file_to_changelogs,
251 dep_to_file_to_stack_infos,
252 dep_to_file_to_blame,
253 ignore_cls)
254
255 if not results:
256 return []
257
258 aggregator = Aggregator([TopFrameIndex(), MinDistance()])
259
260 map(aggregator.ScoreAndReason, results)
261
262 return sorted([result.ToDict() for result in results],
263 key=lambda r: -r['confidence'])
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698