Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(80)

Side by Side Diff: tools/findit/findit_for_crash.py

Issue 468823003: [Findit] findit algorithms and a cluster-fuzz implementation. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 import logging
6 import os
7 from threading import Lock, Thread
8
9 import blame
10 import component_dictionary
11 import crash_utils
12 import git_repository_parser
13 import match_set
14 import svn_repository_parser
15 import utils
16
17
18 LINE_CHANGE_PRIORITY = 1
19 FILE_CHANGE_PRIORITY = 2
20 logging.basicConfig(filename='errors.log', level=logging.WARNING,
stgao 2014/08/14 18:25:42 Let's move this out. logging setup should be done
jeun 2014/08/14 20:54:12 Done.
21 filemode='w+')
22 URL_MAP = crash_utils.ParseURLsFromConfig('urls.config')
23
24
25 def GenerateMatchEntry(
26 matches, revisions, cl, file_path, file_name, function, component_path,
27 component_name, crashed_lines, stack_frame_indices, file_action,
28 repository_parser):
29 """Generates a match object and adds it to the match set.
30
31 Args:
32 matches: A matchset object, a map from CL to a match object.
33 revisions: The dictionary mapping cl's number to the revision information,
34 as returned from a function ParseChangelog in parser.
35 cl: The SVN revision number or git hash.
36 file_path: The path of the file.
37 file_name: The name of the file that this CL might be the culprit cl.
38 function: The function that caused an crash.
39 component_path: The path of the component this file is from.
40 component_name: The name of the component the file is from.
41 crashed_lines: The list of the lines in the file that caused the crash.
42 stack_frame_indices: The list of positions of this file within a stack.
43 file_action: Whether file is modified, added or deleted.
44 repository_parser: The parser object to parse line diff.
45 """
46 # Check if this CL should be avoided.
stgao 2014/08/14 18:25:42 avoided -> ignored?
jeun 2014/08/14 20:54:13 Done.
47 with matches.matches_lock:
48 if cl in matches.cls_to_ignore:
49 return
50
51 # If this CL is not already identified as suspected, create a new entry.
52 if cl not in matches.matches:
53 revision = revisions[cl]
54 match = match_set.Match(revision, component_name)
55 message = revisions[cl]['message']
56 # TODO(jeun): Don't hold lock while issuing http request.
57 match.ParseMessage(message, matches.codereview_api_url)
58
59 # If this match is a revert, add to the set of CLs to be avoided.
60 if match.is_reverted:
61 matches.cls_to_ignore.add(cl)
62
63 # If this match has info on what CL it is reverted from, add that CL.
64 if match.revert_of:
65 matches.cls_to_ignore.add(match.revert_of)
66
67 # Return because we do not need to look at this CL anymore.
68 return
69
70 # Add this CL to the set of matches.
71 matches.matches[cl] = match
72
73 # Else, bring in the existing result.
74 else:
75 match = matches.matches[cl]
76
77 # Parse line diff information for this file.
78 (diff_url, changed_line_numbers, changed_line_contents) = (
79 repository_parser.ParseLineDiff(
80 file_path, component_path, file_action, cl))
81
82 # Ignore this match if the component is not supported for svn.
83 if not diff_url:
84 return
85
86 # Find the intersection between the lines that this file crashed on and
87 # the changed lines.
88 (line_intersection, stack_frame_index_intersection) = (
89 crash_utils.Intersection(
90 crashed_lines, stack_frame_indices, changed_line_numbers))
91
92 # Find the minimum distance between the changed lines and crashed lines.
93 min_distance = crash_utils.FindMinLineDistance(crashed_lines,
94 changed_line_numbers)
95
96 # Check whether this CL changes the crashed lines or not.
97 if line_intersection:
98 priority = LINE_CHANGE_PRIORITY
99 else:
100 priority = FILE_CHANGE_PRIORITY
101
102 # Add the parsed information to the object.
103 with matches.matches_lock:
104 match.line_of_crash.append(line_intersection)
105 match.files.append(file_name)
106
107 # Update the min distance only if it is less than the current one.
108 if min_distance < match.min_distance:
109 match.min_distance = min_distance
110
111 # If this CL does not change the crashed line, all occurrence of this
112 # file in the stack has the same priority.
113 if not stack_frame_index_intersection:
114 stack_frame_index_intersection = stack_frame_indices
115 match.stack_frame_indices.append(stack_frame_index_intersection)
116 match.file_urls.append(diff_url)
117
118 # Only record the highest rank of this CL.
119 if priority < match.rank:
120 match.rank = priority
121 match.priorities.append(priority)
122 match.function.append(function)
123
124
125 def FindMatch(revisions, file_map, file_dict, component_path, component_name,
126 repository_parser, codereview_api_url):
127 """Finds a CL that modifies file in the stacktrace.
128
129 Args:
130 revisions: A dictionary mapping revision number to the CL information.
131 file_map: A dictionary mapping file to the revision that modifies it.
132 file_dict: A dictionary mapping file to its occurrence in stacktrace.
133 component_path: The path of the component to search for.
134 component_name: The name of the component to search for.
135 repository_parser: The parser object to parse the line diff.
136 codereview_api_url: A code review url to retrieve data from.
137
138 Returns:
139 Matches, a set of match objects.
140 """
141 matches = match_set.MatchSet(codereview_api_url)
142 threads = []
143
144 # For all files in stacktrace, if it is not header file, check against
145 # all CLs to see if any CL changes this file. If so, add to the set of
146 # suspected CLs.
147 for file_name in file_dict:
148 # Ignore header file.
149 if file_name.endswith('.h'):
150 continue
151
152 # If the file in the stacktrace is not changed in any commits, continue.
153 if file_name not in file_map:
154 continue
155
156 path_dict = file_dict.GetPathDic(file_name)
157 # There can be two files that are in different path but have same
158 # names, so also has to check path.
159 for path in path_dict:
160 crashed_lines = file_dict.GetCrashedLineNumbers(path)
161 stack_frame_nums = file_dict.GetCrashStackFrameindex(path)
162 function = file_dict.GetCrashFunction(path)
163
164 # Iterate through the revisions that this file is changed.
165 for (cl, file_action, file_path) in file_map[file_name]:
166
stgao 2014/08/14 18:25:42 This empty line could be removed.
jeun 2014/08/14 20:54:13 Done.
167 # If path of two files are different or if a commit deletes a file,
168 # continue.
169 if not crash_utils.GuessIfSameSubPath(path, file_path) or (
170 file_action == 'D'):
171 continue
172
173 # Create a new thread for adding this CL.
174 match_thread = Thread(
175 target=GenerateMatchEntry,
176 args=[matches, revisions, cl, file_path, file_name, function,
177 component_path, component_name, crashed_lines,
178 stack_frame_nums, file_action,
179 repository_parser])
180 threads.append(match_thread)
181 match_thread.start()
182
183 # Make sure all threads finish before returning.
184 for match_thread in threads:
185 match_thread.join()
186
187 # Remove CLs that are revert.
188 matches.RemoveReverts()
189
190 return matches
191
192
193 def FindMatchForComponent(component_path, file_dict, changelog,
194 callstack_priority, results, results_lock):
195 """Parses changelog and finds suspected CLs for a given component.
196
197 Args:
198 component_path: The path of component to look for the culprit CL.
199 file_dict: A dictionary mapping file to its occurrence in stackframe.
200 changelog: The parsed changelog for this component.
201 callstack_priority: The priority of this call stack, 0 if from crash stack,
202 1 if from freed, 2 if from previously allocated.
203 results: A dictionary to store the result.
204 results_lock: A lock that guards result_map.
stgao 2014/08/14 18:25:42 result_map is not clear.
jeun 2014/08/14 20:54:12 Done.
205 """
206 (repository_parser, component_name, revisions, file_to_revision_map) = \
207 changelog
208
209 # Find match for this component.
210 codereview_api_url = URL_MAP['codereview']['review_url']
211 component_result = FindMatch(
212 revisions, file_to_revision_map, file_dict, component_path,
213 component_name, repository_parser, codereview_api_url)
214 matches = component_result.matches
215
216 # For all the match results in a dictionary, add to the list so that it
217 # can be sorted.
218 with results_lock:
219 for cl in matches:
220 match = matches[cl]
221 results.append((callstack_priority, cl, match))
222
223
224 def FindMatchForCallstack(
225 callstack, components, component_to_changelog_map, results,
226 results_lock):
227 """Finds culprit cl for a stack within a stacktrace.
228
229 For each components to look for, create new thread that computes the matches
230 and join the results at the end.
231 Args:
232 callstack: A callstack in a stacktrace to find the result for.
233 components: A set of components to look for.
234 component_to_changelog_map: A map from component to its parsed changelog.
235 results: A list to aggregrate results from all stacktraces.
236 results_lock: A lock that guards all_results.
stgao 2014/08/14 18:25:42 all_result is not clear.
jeun 2014/08/14 20:54:13 Done.
237 """
238 # Create component dictionary from the component and call stack.
239 component_dict = component_dictionary.ComponentDictionary(callstack,
240 components)
241 callstack_priority = callstack.priority
242
243 # Iterate through all components and create new thread for each component.
244 threads = []
245 for component_path in component_dict:
246 file_dict = component_dict.GetFileDict(component_path)
247
248 if component_path not in component_to_changelog_map:
249 continue
250 changelog = component_to_changelog_map[component_path]
251
252 t = Thread(
253 target=FindMatchForComponent,
254 args=[component_path, file_dict, changelog,
255 callstack_priority, results, results_lock])
256 threads.append(t)
257 t.start()
258
259 # Join the threads before returning.
260 for t in threads:
261 t.join()
262
263
264 def FindMatchForStacktrace(stacktrace, components, regression_dict):
265 """Finds the culprit CL for stacktrace.
266
267 The passed stacktrace is either from release build stacktrace
268 or debug build stacktrace.
269
270 Args:
271 stacktrace: A list of parsed stacks within a stacktrace.
272 components: A set of components to look for.
273 regression_dict: A dictionary mapping component path to its regression.
274
275 Returns:
276 A list of match results from all stacks.
277 """
278 # A list to aggregate results from the whole stacktrace.
279 results = []
280 results_lock = Lock()
281
282 # Parse changelog for each of the components and store it.
283 svn_parser = svn_repository_parser.SVNParser(URL_MAP['svn'])
284 git_parser = git_repository_parser.GitParser(regression_dict, URL_MAP['git'])
285
286 component_to_changelog_map = {}
287 for component_path in components:
288 component_object = regression_dict[component_path]
289 range_start = component_object['old_revision']
290 range_end = component_object['new_revision']
291 if range_start == '0':
292 continue
293
294 component_name = regression_dict[component_path]['name']
295
296 # Check if the regression is git hash or a revision number, and create
297 # appropriate parser object.
298 is_git = utils.IsGitHash(range_start)
299 if is_git:
300 repository_parser = git_parser
301 else:
302 repository_parser = svn_parser
303
304 # Parse revisions and changed files in regression.
305 (revisions, file_to_revision_map) = repository_parser.ParseChangelog(
306 component_path, range_start, range_end)
307
308 if not (revisions and file_to_revision_map):
309 continue
310
311 component_to_changelog_map[component_path] = (repository_parser,
312 component_name,
313 revisions,
314 file_to_revision_map)
315
316 # Create separate threads for each of the call stack in the stacktrace.
317 threads = []
318 for callstack in stacktrace.stack_list:
319 t = Thread(
320 target=FindMatchForCallstack,
321 args=[callstack, components, component_to_changelog_map,
322 results, results_lock])
323 threads.append(t)
324 t.start()
325
326 # Join the threads before returning.
327 for t in threads:
328 t.join()
329
330 return results
331
332
333 def SortAndFilterMatches(matches):
334 """Filters the list of potential culprit CLs to remove noise.
335
336 Args:
337 matches: A list containing match results.
338
339 Returns:
340 Filtered match results.
341 """
342 new_matches = []
343 line_changed = False
344 in_crash_state = False
345 highest_priority_stack = crash_utils.INFINITY
346
347 # Sort the matches in specific order.
348 matches.sort(key=lambda (stack_index, cl, match):
349 (match.rank, stack_index,
350 crash_utils.FindMinStackFrameNumber(match.stack_frame_indices,
351 match.priorities),
352 -len(match.files), match.min_distance))
353
354 # Iterate through the matches to find out what results are significant.
355 for stack_index, cl, match in matches:
356
stgao 2014/08/14 18:25:42 This empty line could be removed.
jeun 2014/08/14 20:54:12 Done.
357 # Check if the current match changes crashed line.
358 is_line_change = (match.rank == LINE_CHANGE_PRIORITY)
359
360 # Check which thread this match is from.
361 current_stack = stack_index
362 if current_stack < highest_priority_stack:
363 highest_priority_stack = current_stack
364
365 # Check if current match changes a file that occurs in crash state.
366 flattened_stack_frame_indices = [i for sl in match.stack_frame_indices
367 for i in sl]
368 current_in_crash_state = min(flattened_stack_frame_indices) < 5
369
370 # If current match does not change crashed lines but there are matches
371 # that do so, or if current match is not in crash state but there are
372 # matchtes in it, or if there are other matches that came from higher
373 # priority stack, then this match and anything lower than this should
374 # be ignored.
stgao 2014/08/14 18:25:42 Could this comment turned into bullet point?
jeun 2014/08/14 20:54:13 Done.
375 if (line_changed and not is_line_change) or (
376 in_crash_state and not current_in_crash_state) or (
377 current_stack > highest_priority_stack):
378 break
379
380 # Update the variables.
381 if is_line_change:
382 line_changed = True
383 if current_in_crash_state:
384 in_crash_state = True
385
386 # Add current match to the filtered result.
387 new_matches.append((stack_index, cl, match))
388
389 return new_matches
390
391
392 def GenerateReasonForMatches(matches):
393 """Generates a reason that a match (CL) is a culprit cl.
394
395 Args:
396 matches: A list of match objects.
397 """
398 # Iterate through the matches in the list.
399 for i, _, match in matches:
400 reason = []
stgao 2014/08/14 18:25:42 reasons?
jeun 2014/08/14 20:54:12 I would call it a reason because it is a list of s
401
402 # Zip the files in the match by the reason they are suspected
403 # (how the file is modified) and sort it in specific order.
404 match_by_priority = zip(
405 match.priorities, match.line_of_crash, match.files,
406 match.file_urls)
407
408 match_by_priority.sort(
409 key=lambda (priority, crashed_line_number, file_name, file_url):
410 (priority, len(file_name)))
411
412 # Iterate through the sorted match.
413 for i in range(len(match_by_priority)):
414 (priority, crashed_line_number, file_name, file_url) = \
stgao 2014/08/14 18:25:42 file_name or file_path? Same the one for above in
jeun 2014/08/14 20:54:13 It is file_name.
415 match_by_priority[i]
416
417 # If the file in the match is a line change, append a explanation.
418 if priority == LINE_CHANGE_PRIORITY:
419 reason.append(
420 ' Line %s of file %s which caused the crash according '
421 'to the stacktrace, is changed in this cl.\n' %
422 (crash_utils.PrettifyList(crashed_line_number),
423 crash_utils.PrettifyFiles([(file_name, file_url)])))
424
425 else:
426 # Get all the files that are not line change.
427 rest_of_the_files = match_by_priority[i:]
428
429 # Take care of single/multiple files in string.
430 if len(rest_of_the_files) == 1:
431 file_string = ' File %s is changed in this cl.\n'
432 else:
433 file_string = ' Files %s are changed in this cl.\n'
434
435 # Create a list of file name and its url, and prettify the list.
436 file_name_with_url = [(file_name, file_url)
437 for (_, _, file_name, file_url)
438 in rest_of_the_files]
439 pretty_file_name_url = crash_utils.PrettifyFiles(file_name_with_url)
440
441 # Add the reason, break because we took care of the rest of the files.
442 reason.append(file_string % pretty_file_name_url)
443 break
444
445 # Set the reason as string.
446 match.reason = ''.join(reason)
447
448
449 def CombineMatches(matches):
450 """Combine possible duplicates in matches.
451
452 Args:
453 matches: A list of matches object, along with its callstack priority and
454 CL it is from.
455 Returns:
456 A combined list of matches.
457 """
458 combined_matches = []
459
460 for stack_index, cl, match in matches:
461 found_match = None
462
463 # Iterate through the list of combined matches.
464 for _, cl_combined, match_combined in combined_matches:
465 # Check for if current match is already higher up in the result.
466 if cl == cl_combined:
467 found_match = match_combined
468
469 # If current match is not already in, add it to the list of matches.
470 if not found_match:
471 combined_matches.append((stack_index, cl, match))
472 continue
473
474 # Combine the reason if the current match is already in there.
475 found_match.reason += match.reason
476
477 return combined_matches
478
479
480 def FilterAndGenerateReasonForMatches(result):
481 """A wrapper function.
482
483 It generates reasons for the matches and returns string representation
484 of filtered results.
485
486 Args:
487 result: A list of match objects.
488
489 Returns:
490 A string representation of filtered results.
491 """
492 new_result = SortAndFilterMatches(result)
493 GenerateReasonForMatches(new_result)
494 combined_matches = CombineMatches(new_result)
495 return crash_utils.MatchListToResultList(combined_matches)
496
497
498 def ParseCrashComponents(main_stack, top_n_frames=3):
499 """Parses the crashing component.
500
501 Args:
502 main_stack: Main stack from the stacktrace.
503 top_n_frames: A number of frames to regard as crashing frame.
504 Returns:
505 A set of components.
506 """
507 frame_list = main_stack.GetTopNFrames(top_n_frames)
508 components = set()
509
510 for frame in frame_list:
511 components.add(frame.component_path)
512
513 return components
514
515
516 def GenerateAndFilterBlameList(callstack, crash_revision_dict, regression_dict):
517 """A wrapper function.
518
519 Finds blame information for stack and returns string representation.
520
521 Args:
522 callstack: A callstack to find the blame information.
523 crash_revision_dict: A dictionary mapping component to its crash revision.
524 regression_dict: A dictionary mapping component to its regression.
525
526 Returns:
527 A list of blame results.
528 """
529 # Setup parser objects to use for parsing blame information.
530 svn_parser = svn_repository_parser.SVNParser(URL_MAP['svn'])
531 git_parser = git_repository_parser.GitParser(regression_dict, URL_MAP['git'])
532 parsers = [svn_parser, git_parser]
533
534 # Create and generate the blame objects from the callstack.
535 blame_list = blame.BlameList()
536 blame_list.FindBlame(callstack, crash_revision_dict, regression_dict,
537 parsers)
538
539 blame_list.FilterAndSortBlameList()
540 return crash_utils.BlameListToResultList(blame_list)
541
542
543 def FindItForCrash(stacktrace_list,
544 callstack,
545 parsed_regression,
546 parsed_crash_revision):
547 """Finds the culprit CL from the list of stacktrace.
548
549 Args:
550 stacktrace_list: A list of stacktraces to look for, in the order of
551 decreasing significance.
552 callstack: A callstack object to show blame information for, if there are
553 no results for all stacktraces in the stacktrace_list.
554 parsed_regression: A parsed regression information as a result of parsing
555 DEPS file.
556 parsed_crash_revision: A parsed crash revision information.
557
558 Returns:
559 A list of result objects.
560 """
561 # If regression information is not available, return blame information.
562 if not parsed_regression:
563 return GenerateAndFilterBlameList(callstack, parsed_crash_revision,
564 parsed_regression)
565
566 for stacktrace in stacktrace_list:
567
stgao 2014/08/14 18:25:42 This empty line could be removed.
jeun 2014/08/14 20:54:12 Done.
568 # Check the next stacktrace if current one is empty.
569 if not stacktrace.stack_list:
570 continue
571
572 # Get the crash stack for this stacktrace, and extract crashing components
573 # from it.
574 main_stack = stacktrace.GetCrashStack()
575 components = ParseCrashComponents(main_stack, parsed_crash_revision)
576
577 result_for_stacktrace = FindMatchForStacktrace(stacktrace, components,
578 parsed_regression)
579
580 # If the result is empty, check the next stacktrace. Else, return the
581 # filtered result.
582 if not result_for_stacktrace:
583 continue
584
585 return FilterAndGenerateReasonForMatches(result_for_stacktrace)
586
587 # If not match is found, return the blame information for the input
588 # callstack.
589 return GenerateAndFilterBlameList(callstack, parsed_crash_revision,
590 parsed_regression)
591
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698