Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(275)

Side by Side Diff: tools/findit/findit_for_crash.py

Issue 468823003: [Findit] findit algorithms and a cluster-fuzz implementation. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Addressed codereview. Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 import os
6 from threading import Lock, Thread
7
8 import blame
9 from common import utils
10 import component_dictionary
11 import crash_utils
12 import git_repository_parser
13 import match_set
14 import svn_repository_parser
15
16
17 LINE_CHANGE_PRIORITY = 1
18 FILE_CHANGE_PRIORITY = 2
19 _THIS_DIR = os.path.abspath(os.path.dirname(__file__))
20 CONFIG = crash_utils.ParseURLsFromConfig(os.path.join(_THIS_DIR,
21 'config.ini'))
22
23
24 def GenerateMatchEntry(
25 matches, revision_info, revision_number, file_path, function,
26 component_path, component_name, crashed_line_numbers, stack_frame_indices,
27 file_change_type, repository_parser):
28 """Generates a match object and adds it to the match set.
29
30 Args:
31 matches: A matchset object, a map from CL to a match object.
32 revision_info: The revision information, a map from fields (message,
33 changed files, etc) to its values.
34 revision_number: The SVN revision number or git hash.
35 file_path: The path of the file.
36 function: The function that caused an crash.
37 component_path: The path of the component this file is from.
38 component_name: The name of the component the file is from.
39 crashed_line_numbers: The list of the lines in the file that caused
40 the crash.
41 stack_frame_indices: The list of positions of this file within a stack.
42 file_change_type: Whether file is modified, added or deleted.
43 repository_parser: The parser object to parse line diff.
44 """
45 # Check if this CL should be ignored.
46 with matches.matches_lock:
47 if revision_number in matches.cls_to_ignore:
48 return
49
50 # If this CL is not already identified as suspected, create a new entry.
51 if revision_number not in matches.matches:
52 match = match_set.Match(revision_info, component_name)
53 message = revision_info['message']
54 # TODO(jeun): Don't hold lock while issuing http request.
55 match.ParseMessage(message, matches.codereview_api_url)
56
57 # If this match is a revert, add to the set of CLs to be ignored.
58 if match.is_revert:
59 matches.cls_to_ignore.add(revision_number)
60
61 # If this match has info on what CL it is reverted from, add that CL.
62 if match.revert_of:
63 matches.cls_to_ignore.add(match.revert_of)
64
65 return
66
67 matches.matches[revision_number] = match
68
69 else:
70 match = matches.matches[revision_number]
71
72 (diff_url, changed_line_numbers, changed_line_contents) = (
73 repository_parser.ParseLineDiff(
74 file_path, component_path, file_change_type, revision_number))
75
76 # Ignore this match if the component is not supported for svn.
77 if not diff_url:
78 return
79
80 # Find the intersection between the lines that this file crashed on and
81 # the changed lines.
82 (line_number_intersection, stack_frame_index_intersection) = (
83 crash_utils.Intersection(
84 crashed_line_numbers, stack_frame_indices, changed_line_numbers))
85
86 # Find the minimum distance between the changed lines and crashed lines.
87 min_distance = crash_utils.FindMinLineDistance(crashed_line_numbers,
88 changed_line_numbers)
89
90 # Check whether this CL changes the crashed lines or not.
91 if line_number_intersection:
92 priority = LINE_CHANGE_PRIORITY
93 else:
94 priority = FILE_CHANGE_PRIORITY
95
96 # Add the parsed information to the object.
97 with matches.matches_lock:
98 match.crashed_line_numbers.append(line_number_intersection)
99
100 file_name = file_path.split('/')[-1]
101 match.changed_files.append(file_name)
102
103 # Update the min distance only if it is less than the current one.
104 if min_distance < match.min_distance:
105 match.min_distance = min_distance
106
107 # If this CL does not change the crashed line, all occurrence of this
108 # file in the stack has the same priority.
109 if not stack_frame_index_intersection:
110 stack_frame_index_intersection = stack_frame_indices
111 match.stack_frame_indices.append(stack_frame_index_intersection)
112 match.changed_file_urls.append(diff_url)
113 match.priorities.append(priority)
114 match.function_list.append(function)
115
116
117 def FindMatch(revisions_info_map, file_to_revision_info, file_to_crash_info,
118 component_path, component_name, repository_parser,
119 codereview_api_url):
120 """Finds a CL that modifies file in the stacktrace.
121
122 Args:
123 revisions_info_map: A dictionary mapping revision number to the CL
124 information.
125 file_to_revision_info: A dictionary mapping file to the revision that
126 modifies it.
127 file_to_crash_info: A dictionary mapping file to its occurrence in
128 stacktrace.
129 component_path: The path of the component to search for.
130 component_name: The name of the component to search for.
131 repository_parser: The parser object to parse the line diff.
132 codereview_api_url: A code review url to retrieve data from.
133
134 Returns:
135 Matches, a set of match objects.
136 """
137 matches = match_set.MatchSet(codereview_api_url)
138 threads = []
139
140 # Iterate through the crashed files in the stacktrace.
141 for crashed_file_path in file_to_crash_info:
142 # Ignore header file.
143 if crashed_file_path.endswith('.h'):
144 continue
145
146 # If the file in the stacktrace is not changed in any commits, continue.
147 for changed_file_path in file_to_revision_info:
148 changed_file_name = changed_file_path.split('/')[-1]
149 crashed_file_name = crashed_file_path.split('/')[-1]
150
151 if changed_file_name != crashed_file_name:
152 continue
153
154 if not crash_utils.GuessIfSameSubPath(
155 changed_file_path, crashed_file_path):
156 continue
157
158 crashed_line_numbers = file_to_crash_info.GetCrashedLineNumbers(
159 crashed_file_path)
160 stack_frame_nums = file_to_crash_info.GetCrashStackFrameIndices(
161 crashed_file_path)
162 functions = file_to_crash_info.GetCrashFunctions(crashed_file_path)
163
164 # Iterate through the CLs that this file path is changed.
165 for (cl, file_change_type) in file_to_revision_info[changed_file_path]:
166 # If the file change is delete, ignore this CL.
167 if file_change_type == 'D':
168 continue
169
170 revision = revisions_info_map[cl]
171
172 match_thread = Thread(
173 target=GenerateMatchEntry,
174 args=[matches, revision, cl, changed_file_path, functions,
175 component_path, component_name, crashed_line_numbers,
176 stack_frame_nums, file_change_type,
177 repository_parser])
178 threads.append(match_thread)
179 match_thread.start()
180
181 for match_thread in threads:
182 match_thread.join()
183
184 matches.RemoveRevertedCLs()
185
186 return matches
187
188
189 def FindMatchForComponent(component_path, file_to_crash_info, changelog,
190 callstack_priority, results, results_lock):
191 """Parses changelog and finds suspected CLs for a given component.
192
193 Args:
194 component_path: The path of component to look for the culprit CL.
195 file_to_crash_info: A dictionary mapping file to its occurrence in
196 stackframe.
197 changelog: The parsed changelog for this component.
198 callstack_priority: The priority of this call stack, 0 if from crash stack,
199 1 if from freed, 2 if from previously allocated.
200 results: A dictionary to store the result.
201 results_lock: A lock that guards results.
202 """
203 (repository_parser, component_name, revisions, file_to_revision_map) = \
204 changelog
205
206 # Find match for this component.
207 codereview_api_url = CONFIG['codereview']['review_url']
208 component_result = FindMatch(
209 revisions, file_to_revision_map, file_to_crash_info, component_path,
210 component_name, repository_parser, codereview_api_url)
211 matches = component_result.matches
212
213 # For all the match results in a dictionary, add to the list so that it
214 # can be sorted.
215 with results_lock:
216 for cl in matches:
217 match = matches[cl]
218 results.append((callstack_priority, cl, match))
219
220
221 def FindMatchForCallstack(
222 callstack, components, component_to_changelog_map, results,
223 results_lock):
224 """Finds culprit cl for a stack within a stacktrace.
225
226 For each components to look for, create new thread that computes the matches
227 and join the results at the end.
228
229 Args:
230 callstack: A callstack in a stacktrace to find the result for.
231 components: A set of components to look for.
232 component_to_changelog_map: A map from component to its parsed changelog.
233 results: A list to aggregrate results from all stacktraces.
234 results_lock: A lock that guards results.
235 """
236 # Create component dictionary from the component and call stack.
237 component_dict = component_dictionary.ComponentDictionary(callstack,
238 components)
239 callstack_priority = callstack.priority
240
241 # Iterate through all components and create new thread for each component.
242 threads = []
243 for component_path in component_dict:
244 # If the component to consider in this callstack is not in the parsed list
245 # of components, ignore this one.
246 if component_path not in component_to_changelog_map:
247 continue
248
249 changelog = component_to_changelog_map[component_path]
250 file_to_crash_info = component_dict.GetFileDict(component_path)
251 t = Thread(
252 target=FindMatchForComponent,
253 args=[component_path, file_to_crash_info, changelog,
254 callstack_priority, results, results_lock])
255 threads.append(t)
256 t.start()
257
258 for t in threads:
259 t.join()
260
261
262 def FindMatchForStacktrace(stacktrace, components,
263 component_to_regression_dict):
264 """Finds the culprit CL for stacktrace.
265
266 The passed stacktrace is either from release build stacktrace
267 or debug build stacktrace.
268
269 Args:
270 stacktrace: A list of parsed stacks within a stacktrace.
271 components: A set of components to look for.
272 component_to_regression_dict: A dictionary mapping component path to
273 its regression.
274
275 Returns:
276 A list of match results from all stacks.
277 """
278 # A list to aggregate results from all the callstacks in the stacktrace.
279 results = []
280 results_lock = Lock()
281
282 # Setup parsers.
283 svn_parser = svn_repository_parser.SVNParser(CONFIG['svn'])
284 git_parser = git_repository_parser.GitParser(component_to_regression_dict,
285 CONFIG['git'])
286
287 # Create a cache of parsed revisions.
288 component_to_changelog_map = {}
289 for component_path in components:
290 component_object = component_to_regression_dict[component_path]
291 range_start = component_object['old_revision']
292 range_end = component_object['new_revision']
293
294 # If range start is 0, the range is too large and the crash has been
295 # introduced the archived build, so ignore this case.
296 if range_start == '0':
297 continue
298
299 component_name = component_to_regression_dict[component_path]['name']
300
301 is_git = utils.IsGitHash(range_start)
302 if is_git:
303 repository_parser = git_parser
304 else:
305 repository_parser = svn_parser
306
307 (revisions, file_to_revision_map) = repository_parser.ParseChangelog(
308 component_path, range_start, range_end)
309
310 # If the returned map from ParseChangeLog is empty, we don't need to look
311 # further because either the parsing failed or the changelog is empty.
312 if not (revisions and file_to_revision_map):
313 continue
314
315 component_to_changelog_map[component_path] = (repository_parser,
316 component_name,
317 revisions,
318 file_to_revision_map)
319
320 # Create separate threads for each of the call stack in the stacktrace.
321 threads = []
322 for callstack in stacktrace.stack_list:
323 t = Thread(
324 target=FindMatchForCallstack,
325 args=[callstack, components, component_to_changelog_map,
326 results, results_lock])
327 threads.append(t)
328 t.start()
329
330 for t in threads:
331 t.join()
332
333 return results
334
335
336 def SortMatchesFunction(match_with_stack_priority):
337 """A function to sort the match triple.
338
339 Currently, it sorts the list by:
340 1) The highest priority file change in the CL (changing crashed line is
341 higher priority than just changing the file).
342 2) The callstack this match is computed (crash stack, freed, allocation).
343 3) The minimum stack frame number of the changed file in the match.
344 4) The number of files this CL changes (higher the better).
345 5) The minimum distance between the lines that the CL changes and crashed
346 lines.
347
348 Args:
349 match_with_stack_priority: A match object, with the CL it is from and what
350 callstack it is from.
351
352 Returns:
353 A sort key.
354 """
355 (stack_priority, _, match) = match_with_stack_priority
356
357 return (min(match.priorities),
358 stack_priority,
359 crash_utils.FindMinStackFrameNumber(match.stack_frame_indices,
360 match.priorities),
361 -len(match.changed_files), match.min_distance)
362
363
364 def SortAndFilterMatches(matches, num_important_frames=5):
365 """Filters the list of potential culprit CLs to remove noise.
366
367 Args:
368 matches: A list containing match results.
369 num_important_frames: A number of frames on the top of the frame to Check
370 for when filtering the results. A match with a file
371 that is in top num_important_frames of the stacktrace
372 is regarded more probable then others.
373
374 Returns:
375 Filtered match results.
376 """
377 new_matches = []
378 line_changed = False
379 is_important_frame = False
380 highest_priority_stack = crash_utils.INFINITY
381
382 matches.sort(key=SortMatchesFunction)
383
384 # Iterate through the matches to find out what results are significant.
385 for stack_priority, cl, match in matches:
386 # Check if the current match changes crashed line.
387 is_line_change = (min(match.priorities) == LINE_CHANGE_PRIORITY)
388
389 # Check which stack this match is from, and finds the highest priority
390 # callstack up to this point.
391 current_stack = stack_priority
392 if current_stack < highest_priority_stack:
393 highest_priority_stack = current_stack
394
395 # Check if current match changes a file that occurs in crash state.
396 flattened_stack_frame_indices = [frame for frame_indices in
397 match.stack_frame_indices
398 for frame in frame_indices]
399 current_is_important = (
400 min(flattened_stack_frame_indices) < num_important_frames)
401
402 # This match and anything lower than this should be ignored if:
403 # - Current match does not change crashed lines but there are matches
404 # that do so.
405 # - Current match is not in crash state but there are matches in it.
406 # - There are other matches that came from higher priority stack.
407 if (line_changed and not is_line_change) or (
408 is_important_frame and not current_is_important) or (
409 current_stack > highest_priority_stack):
410 break
411
412 # Update the variables.
413 if is_line_change:
414 line_changed = True
415 if current_is_important:
416 is_important_frame = True
417
418 # Add current match to the filtered result.
419 new_matches.append((stack_priority, cl, match))
420
421 return new_matches
422
423
424 def GenerateReasonForMatches(matches):
425 """Generates a reason that a match (CL) is a culprit cl.
426
427 Args:
428 matches: A list of match objects.
429 """
430 # Iterate through the matches in the list.
431 for i, _, match in matches:
432 reason = []
433
434 # Zip the files in the match by the reason they are suspected
435 # (how the file is modified).
436 match_by_priority = zip(
437 match.priorities, match.crashed_line_numbers, match.changed_files,
438 match.changed_file_urls)
439
440 # Sort the zipped changed files in the match by their priority so that the
441 # changed lines comes first in the reason.
442 match_by_priority.sort(
443 key=lambda (priority, crashed_line_number, file_name, url): priority)
444
445 # Iterate through the sorted match.
446 for i in range(len(match_by_priority)):
447 (priority, crashed_line_number, file_name, file_url) = \
448 match_by_priority[i]
449
450 # If the file in the match is a line change, append a explanation.
451 if priority == LINE_CHANGE_PRIORITY:
452 reason.append(
453 'Line %s of file %s which potentially caused the crash '
454 'according to the stacktrace, is changed in this cl.\n' %
455 (crash_utils.PrettifyList(crashed_line_number),
456 crash_utils.PrettifyFiles([(file_name, file_url)])))
457
458 else:
459 # Get all the files that are not line change.
460 rest_of_the_files = match_by_priority[i:]
461
462 if len(rest_of_the_files) == 1:
463 file_string = 'File %s is changed in this cl.\n'
464 else:
465 file_string = 'Files %s are changed in this cl.\n'
466
467 # Create a list of file name and its url, and prettify the list.
468 file_name_with_url = [(file_name, file_url)
469 for (_, _, file_name, file_url)
470 in rest_of_the_files]
471 pretty_file_name_url = crash_utils.PrettifyFiles(file_name_with_url)
472
473 # Add the reason, break because we took care of the rest of the files.
474 reason.append(file_string % pretty_file_name_url)
475 break
476
477 # Set the reason as string.
478 match.reason = ''.join(reason)
479
480
481 def CombineMatches(matches):
482 """Combine possible duplicates in matches.
483
484 Args:
485 matches: A list of matches object, along with its callstack priority and
486 CL it is from.
487 Returns:
488 A combined list of matches.
489 """
490 combined_matches = []
491
492 for stack_index, cl, match in matches:
493 found_match = None
494
495 # Iterate through the list of combined matches.
496 for _, cl_combined, match_combined in combined_matches:
497 # Check for if current CL is already higher up in the result.
498 if cl == cl_combined:
499 found_match = match_combined
500 break
501
502 # If current match is not already in, add it to the list of matches.
503 if not found_match:
504 combined_matches.append((stack_index, cl, match))
505 continue
506
507 # Combine the reason if the current match is already in there.
508 found_match.reason += match.reason
509
510 return combined_matches
511
512
513 def FilterAndGenerateReasonForMatches(result):
514 """A wrapper function.
515
516 It generates reasons for the matches and returns string representation
517 of filtered results.
518
519 Args:
520 result: A list of match objects.
521
522 Returns:
523 A string representation of filtered results.
524 """
525 new_result = SortAndFilterMatches(result)
526 GenerateReasonForMatches(new_result)
527 combined_matches = CombineMatches(new_result)
528 return crash_utils.MatchListToResultList(combined_matches)
529
530
531 def ParseCrashComponents(main_stack, top_n_frames=3):
Martin Barbella 2014/08/19 19:52:12 I discussed this with Shuotao a bit and we think i
jeun 2014/08/19 21:37:43 Done.
532 """Parses the crashing component.
533
534 Crashing components is a component that top_n_frames of the stacktrace is
535 from.
536
537 Args:
538 main_stack: Main stack from the stacktrace.
539 top_n_frames: A number of frames to regard as crashing frame.
540
541 Returns:
542 A set of components.
543 """
544 frame_list = main_stack.GetTopNFrames(top_n_frames)
545 components = set()
546
547 for frame in frame_list:
548 components.add(frame.component_path)
549
550 return components
551
552
553 def GenerateAndFilterBlameList(callstack, component_to_crash_revision_dict,
554 component_to_regression_dict):
555 """A wrapper function.
556
557 Finds blame information for stack and returns string representation.
558
559 Args:
560 callstack: A callstack to find the blame information.
561 component_to_crash_revision_dict: A dictionary mapping component to its
562 crash revision.
563 component_to_regression_dict: A dictionary mapping component to its
564 regression.
565
566 Returns:
567 A list of blame results.
568 """
569 # Setup parser objects to use for parsing blame information.
570 svn_parser = svn_repository_parser.SVNParser(CONFIG['svn'])
571 git_parser = git_repository_parser.GitParser(component_to_regression_dict,
572 CONFIG['git'])
573 parsers = {}
574 parsers['svn'] = svn_parser
575 parsers['git'] = git_parser
576
577 # Create and generate the blame objects from the callstack.
578 blame_list = blame.BlameList()
579 blame_list.FindBlame(callstack, component_to_crash_revision_dict,
580 component_to_regression_dict,
581 parsers)
582
583 blame_list.FilterAndSortBlameList()
584 return crash_utils.BlameListToResultList(blame_list)
585
586
587 def FindItForCrash(stacktrace_list,
588 callstack,
589 component_to_regression_dict,
590 component_to_crash_revision_dict):
591 """Finds the culprit CL from the list of stacktrace.
592
593 Args:
594 stacktrace_list: A list of stacktraces to look for, in the order of
595 decreasing significance.
596 callstack: A callstack object to show blame information for, if there are
597 no results for all stacktraces in the stacktrace_list.
598 component_to_regression_dict: A parsed regression information as a
599 result of parsing DEPS file.
600 component_to_crash_revision_dict: A parsed crash revision information.
601
602 Returns:
603 A list of result objects, with the message how the result is created.
604 """
605 # If regression information is not available, return blame information.
606 if not component_to_regression_dict:
607 return_message = (
608 'Regression information is not available. The result is '
609 'the blame information.')
610 result = GenerateAndFilterBlameList(callstack,
611 component_to_crash_revision_dict,
612 component_to_regression_dict)
613 return (return_message, result)
614
615 for stacktrace in stacktrace_list:
616 # Check the next stacktrace if current one is empty.
617 if not stacktrace.stack_list:
618 continue
619
620 # Get the crash stack for this stacktrace, and extract crashing components
621 # from it.
622 main_stack = stacktrace.GetCrashStack()
623 components = ParseCrashComponents(main_stack)
624
625 result_for_stacktrace = FindMatchForStacktrace(
626 stacktrace, components, component_to_regression_dict)
627
628 # If the result is empty, check the next stacktrace. Else, return the
629 # filtered result.
630 if not result_for_stacktrace:
631 continue
632
633 return_message = (
634 'The result is a list of CLs that change the crashed files.')
635 result = FilterAndGenerateReasonForMatches(result_for_stacktrace)
636 return (return_message, result)
637
638 # If no match is found, return the blame information for the input
639 # callstack.
640 return_message = (
641 'There are no CLs that change the crashed files. The result is the '
642 'blame information.')
643 result = GenerateAndFilterBlameList(
644 callstack, component_to_crash_revision_dict,
645 component_to_regression_dict)
646 return (return_message, result)
647
OLDNEW
« tools/findit/findit_for_clusterfuzz.py ('K') | « tools/findit/findit_for_clusterfuzz.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698