Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7)

Side by Side Diff: tools/findit/findit_for_crash.py

Issue 468823003: [Findit] findit algorithms and a cluster-fuzz implementation. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Addressed code review. Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/findit/findit_for_clusterfuzz.py ('k') | tools/findit/urls.config » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 import logging
6 import os
7 from threading import Lock, Thread
8
9 import blame
10 import component_dictionary
11 import crash_utils
12 import git_repository_parser
13 import match_set
14 import svn_repository_parser
15 import utils
16
17
18 LINE_CHANGE_PRIORITY = 1
19 FILE_CHANGE_PRIORITY = 2
20 URL_MAP = crash_utils.ParseURLsFromConfig('urls.config')
stgao 2014/08/18 22:37:06 The logic to handle the absolute path of the confi
21
22
23 def GenerateMatchEntry(
24 matches, revisions, cl, file_path, file_name, function, component_path,
25 component_name, crashed_lines, stack_frame_indices, file_action,
26 repository_parser):
27 """Generates a match object and adds it to the match set.
28
29 Args:
30 matches: A matchset object, a map from CL to a match object.
31 revisions: The dictionary mapping cl's number to the revision information,
32 as returned from a function ParseChangelog in parser.
33 cl: The SVN revision number or git hash.
stgao 2014/08/18 22:37:06 What's the difference between cls in |revisions| a
34 file_path: The path of the file.
35 file_name: The name of the file that this CL might be the culprit cl.
36 function: The function that caused an crash.
37 component_path: The path of the component this file is from.
38 component_name: The name of the component the file is from.
39 crashed_lines: The list of the lines in the file that caused the crash.
40 stack_frame_indices: The list of positions of this file within a stack.
41 file_action: Whether file is modified, added or deleted.
stgao 2014/08/18 22:37:06 file_change_type?
42 repository_parser: The parser object to parse line diff.
43 """
44 # Check if this CL should be ignored.
45 with matches.matches_lock:
46 if cl in matches.cls_to_ignore:
47 return
48
49 # If this CL is not already identified as suspected, create a new entry.
50 if cl not in matches.matches:
51 revision = revisions[cl]
52 match = match_set.Match(revision, component_name)
53 message = revisions[cl]['message']
54 # TODO(jeun): Don't hold lock while issuing http request.
55 match.ParseMessage(message, matches.codereview_api_url)
56
57 # If this match is a revert, add to the set of CLs to be ignored.
58 if match.is_reverted:
59 matches.cls_to_ignore.add(cl)
60
61 # If this match has info on what CL it is reverted from, add that CL.
62 if match.revert_of:
63 matches.cls_to_ignore.add(match.revert_of)
64
65 # Return because we do not need to look at this CL anymore.
66 return
67
68 # Add this CL to the set of matches.
69 matches.matches[cl] = match
70
71 # Else, bring in the existing result.
72 else:
73 match = matches.matches[cl]
74
75 # Parse line diff information for this file.
76 (diff_url, changed_line_numbers, changed_line_contents) = (
77 repository_parser.ParseLineDiff(
78 file_path, component_path, file_action, cl))
79
80 # Ignore this match if the component is not supported for svn.
81 if not diff_url:
82 return
83
84 # Find the intersection between the lines that this file crashed on and
85 # the changed lines.
86 (line_intersection, stack_frame_index_intersection) = (
87 crash_utils.Intersection(
88 crashed_lines, stack_frame_indices, changed_line_numbers))
89
90 # Find the minimum distance between the changed lines and crashed lines.
91 min_distance = crash_utils.FindMinLineDistance(crashed_lines,
92 changed_line_numbers)
93
94 # Check whether this CL changes the crashed lines or not.
95 if line_intersection:
96 priority = LINE_CHANGE_PRIORITY
97 else:
98 priority = FILE_CHANGE_PRIORITY
99
100 # Add the parsed information to the object.
101 with matches.matches_lock:
102 match.line_of_crash.append(line_intersection)
103 match.files.append(file_name)
104
105 # Update the min distance only if it is less than the current one.
106 if min_distance < match.min_distance:
107 match.min_distance = min_distance
108
109 # If this CL does not change the crashed line, all occurrence of this
110 # file in the stack has the same priority.
111 if not stack_frame_index_intersection:
112 stack_frame_index_intersection = stack_frame_indices
113 match.stack_frame_indices.append(stack_frame_index_intersection)
114 match.file_urls.append(diff_url)
115
116 # Only record the highest rank of this CL.
117 if priority < match.rank:
118 match.rank = priority
119 match.priorities.append(priority)
120 match.function.append(function)
121
122
123 def FindMatch(revisions, file_map, file_dict, component_path, component_name,
124 repository_parser, codereview_api_url):
125 """Finds a CL that modifies file in the stacktrace.
126
127 Args:
128 revisions: A dictionary mapping revision number to the CL information.
129 file_map: A dictionary mapping file to the revision that modifies it.
130 file_dict: A dictionary mapping file to its occurrence in stacktrace.
131 component_path: The path of the component to search for.
132 component_name: The name of the component to search for.
133 repository_parser: The parser object to parse the line diff.
134 codereview_api_url: A code review url to retrieve data from.
135
136 Returns:
137 Matches, a set of match objects.
138 """
139 matches = match_set.MatchSet(codereview_api_url)
140 threads = []
141
142 # For all files in stacktrace, if it is not header file, check against
143 # all CLs to see if any CL changes this file. If so, add to the set of
144 # suspected CLs.
145 for file_name in file_dict:
146 # Ignore header file.
147 if file_name.endswith('.h'):
148 continue
149
150 # If the file in the stacktrace is not changed in any commits, continue.
151 if file_name not in file_map:
152 continue
153
154 path_dict = file_dict.GetPathDic(file_name)
155 # There can be two files that are in different path but have same
156 # names, so also has to check path.
157 for path in path_dict:
158 crashed_lines = file_dict.GetCrashedLineNumbers(path)
159 stack_frame_nums = file_dict.GetCrashStackFrameindex(path)
160 function = file_dict.GetCrashFunction(path)
161
162 # Iterate through the revisions that this file is changed.
163 for (cl, file_action, file_path) in file_map[file_name]:
164 # If path of two files are different or if a commit deletes a file,
165 # continue.
166 if not crash_utils.GuessIfSameSubPath(path, file_path) or (
167 file_action == 'D'):
168 continue
169
170 # Create a new thread for adding this CL.
171 match_thread = Thread(
172 target=GenerateMatchEntry,
173 args=[matches, revisions, cl, file_path, file_name, function,
174 component_path, component_name, crashed_lines,
175 stack_frame_nums, file_action,
176 repository_parser])
177 threads.append(match_thread)
178 match_thread.start()
179
180 # Make sure all threads finish before returning.
181 for match_thread in threads:
182 match_thread.join()
183
184 # Remove CLs that are revert.
185 matches.RemoveReverts()
186
187 return matches
188
189
190 def FindMatchForComponent(component_path, file_dict, changelog,
191 callstack_priority, results, results_lock):
192 """Parses changelog and finds suspected CLs for a given component.
193
194 Args:
195 component_path: The path of component to look for the culprit CL.
196 file_dict: A dictionary mapping file to its occurrence in stackframe.
197 changelog: The parsed changelog for this component.
198 callstack_priority: The priority of this call stack, 0 if from crash stack,
199 1 if from freed, 2 if from previously allocated.
200 results: A dictionary to store the result.
201 results_lock: A lock that guards results.
202 """
203 (repository_parser, component_name, revisions, file_to_revision_map) = \
204 changelog
205
206 # Find match for this component.
207 codereview_api_url = URL_MAP['codereview']['review_url']
208 component_result = FindMatch(
209 revisions, file_to_revision_map, file_dict, component_path,
210 component_name, repository_parser, codereview_api_url)
211 matches = component_result.matches
212
213 # For all the match results in a dictionary, add to the list so that it
214 # can be sorted.
215 with results_lock:
216 for cl in matches:
217 match = matches[cl]
218 results.append((callstack_priority, cl, match))
219
220
221 def FindMatchForCallstack(
222 callstack, components, component_to_changelog_map, results,
223 results_lock):
224 """Finds culprit cl for a stack within a stacktrace.
225
226 For each components to look for, create new thread that computes the matches
227 and join the results at the end.
228 Args:
229 callstack: A callstack in a stacktrace to find the result for.
230 components: A set of components to look for.
231 component_to_changelog_map: A map from component to its parsed changelog.
232 results: A list to aggregrate results from all stacktraces.
233 results_lock: A lock that guards results.
234 """
235 # Create component dictionary from the component and call stack.
236 component_dict = component_dictionary.ComponentDictionary(callstack,
237 components)
238 callstack_priority = callstack.priority
239
240 # Iterate through all components and create new thread for each component.
241 threads = []
242 for component_path in component_dict:
243 file_dict = component_dict.GetFileDict(component_path)
244
245 if component_path not in component_to_changelog_map:
246 continue
247 changelog = component_to_changelog_map[component_path]
248
249 t = Thread(
250 target=FindMatchForComponent,
251 args=[component_path, file_dict, changelog,
252 callstack_priority, results, results_lock])
253 threads.append(t)
254 t.start()
255
256 # Join the threads before returning.
257 for t in threads:
258 t.join()
259
260
261 def FindMatchForStacktrace(stacktrace, components, regression_dict):
262 """Finds the culprit CL for stacktrace.
263
264 The passed stacktrace is either from release build stacktrace
265 or debug build stacktrace.
266
267 Args:
268 stacktrace: A list of parsed stacks within a stacktrace.
269 components: A set of components to look for.
270 regression_dict: A dictionary mapping component path to its regression.
271
272 Returns:
273 A list of match results from all stacks.
274 """
275 # A list to aggregate results from the whole stacktrace.
276 results = []
277 results_lock = Lock()
278
279 # Parse changelog for each of the components and store it.
280 svn_parser = svn_repository_parser.SVNParser(URL_MAP['svn'])
281 git_parser = git_repository_parser.GitParser(regression_dict, URL_MAP['git'])
282
283 component_to_changelog_map = {}
284 for component_path in components:
285 component_object = regression_dict[component_path]
286 range_start = component_object['old_revision']
287 range_end = component_object['new_revision']
288 if range_start == '0':
289 continue
290
291 component_name = regression_dict[component_path]['name']
292
293 # Check if the regression is git hash or a revision number, and create
294 # appropriate parser object.
295 is_git = utils.IsGitHash(range_start)
296 if is_git:
297 repository_parser = git_parser
298 else:
299 repository_parser = svn_parser
300
301 # Parse revisions and changed files in regression.
302 (revisions, file_to_revision_map) = repository_parser.ParseChangelog(
303 component_path, range_start, range_end)
304
305 if not (revisions and file_to_revision_map):
306 continue
307
308 component_to_changelog_map[component_path] = (repository_parser,
309 component_name,
310 revisions,
311 file_to_revision_map)
312
313 # Create separate threads for each of the call stack in the stacktrace.
314 threads = []
315 for callstack in stacktrace.stack_list:
316 t = Thread(
317 target=FindMatchForCallstack,
318 args=[callstack, components, component_to_changelog_map,
319 results, results_lock])
320 threads.append(t)
321 t.start()
322
323 # Join the threads before returning.
324 for t in threads:
325 t.join()
326
327 return results
328
329
330 def SortAndFilterMatches(matches):
331 """Filters the list of potential culprit CLs to remove noise.
332
333 Args:
334 matches: A list containing match results.
335
336 Returns:
337 Filtered match results.
338 """
339 new_matches = []
340 line_changed = False
341 in_crash_state = False
342 highest_priority_stack = crash_utils.INFINITY
343
344 # Sort the matches in specific order.
345 matches.sort(key=lambda (stack_index, cl, match):
346 (match.rank, stack_index,
347 crash_utils.FindMinStackFrameNumber(match.stack_frame_indices,
348 match.priorities),
349 -len(match.files), match.min_distance))
350
351 # Iterate through the matches to find out what results are significant.
352 for stack_index, cl, match in matches:
353 # Check if the current match changes crashed line.
354 is_line_change = (match.rank == LINE_CHANGE_PRIORITY)
355
356 # Check which thread this match is from.
357 current_stack = stack_index
358 if current_stack < highest_priority_stack:
359 highest_priority_stack = current_stack
360
361 # Check if current match changes a file that occurs in crash state.
362 flattened_stack_frame_indices = [i for sl in match.stack_frame_indices
363 for i in sl]
364 current_in_crash_state = min(flattened_stack_frame_indices) < 5
365
366 # This match and anything lower than this should be ignored if:
367 # - Current match does not change crashed lines but there are matches
368 # that do so.
369 # - Current match is not in crash state but there are matches in it.
370 # - There are other matches that came from higher priority stack.
371 if (line_changed and not is_line_change) or (
372 in_crash_state and not current_in_crash_state) or (
373 current_stack > highest_priority_stack):
374 break
375
376 # Update the variables.
377 if is_line_change:
378 line_changed = True
379 if current_in_crash_state:
380 in_crash_state = True
381
382 # Add current match to the filtered result.
383 new_matches.append((stack_index, cl, match))
384
385 return new_matches
386
387
388 def GenerateReasonForMatches(matches):
389 """Generates a reason that a match (CL) is a culprit cl.
390
391 Args:
392 matches: A list of match objects.
393 """
394 # Iterate through the matches in the list.
395 for i, _, match in matches:
396 reason = []
397
398 # Zip the files in the match by the reason they are suspected
399 # (how the file is modified) and sort it in specific order.
400 match_by_priority = zip(
401 match.priorities, match.line_of_crash, match.files,
402 match.file_urls)
403
404 match_by_priority.sort(
405 key=lambda (priority, crashed_line_number, file_name, file_url):
406 (priority, len(file_name)))
407
408 # Iterate through the sorted match.
409 for i in range(len(match_by_priority)):
410 (priority, crashed_line_number, file_name, file_url) = \
411 match_by_priority[i]
412
413 # If the file in the match is a line change, append a explanation.
414 if priority == LINE_CHANGE_PRIORITY:
415 reason.append(
416 ' Line %s of file %s which caused the crash according '
417 'to the stacktrace, is changed in this cl.\n' %
418 (crash_utils.PrettifyList(crashed_line_number),
419 crash_utils.PrettifyFiles([(file_name, file_url)])))
420
421 else:
422 # Get all the files that are not line change.
423 rest_of_the_files = match_by_priority[i:]
424
425 # Take care of single/multiple files in string.
426 if len(rest_of_the_files) == 1:
427 file_string = ' File %s is changed in this cl.\n'
428 else:
429 file_string = ' Files %s are changed in this cl.\n'
430
431 # Create a list of file name and its url, and prettify the list.
432 file_name_with_url = [(file_name, file_url)
433 for (_, _, file_name, file_url)
434 in rest_of_the_files]
435 pretty_file_name_url = crash_utils.PrettifyFiles(file_name_with_url)
436
437 # Add the reason, break because we took care of the rest of the files.
438 reason.append(file_string % pretty_file_name_url)
439 break
440
441 # Set the reason as string.
442 match.reason = ''.join(reason)
443
444
445 def CombineMatches(matches):
446 """Combine possible duplicates in matches.
447
448 Args:
449 matches: A list of matches object, along with its callstack priority and
450 CL it is from.
451 Returns:
452 A combined list of matches.
453 """
454 combined_matches = []
455
456 for stack_index, cl, match in matches:
457 found_match = None
458
459 # Iterate through the list of combined matches.
460 for _, cl_combined, match_combined in combined_matches:
461 # Check for if current match is already higher up in the result.
462 if cl == cl_combined:
463 found_match = match_combined
464
465 # If current match is not already in, add it to the list of matches.
466 if not found_match:
467 combined_matches.append((stack_index, cl, match))
468 continue
469
470 # Combine the reason if the current match is already in there.
471 found_match.reason += match.reason
472
473 return combined_matches
474
475
476 def FilterAndGenerateReasonForMatches(result):
477 """A wrapper function.
478
479 It generates reasons for the matches and returns string representation
480 of filtered results.
481
482 Args:
483 result: A list of match objects.
484
485 Returns:
486 A string representation of filtered results.
487 """
488 new_result = SortAndFilterMatches(result)
489 GenerateReasonForMatches(new_result)
490 combined_matches = CombineMatches(new_result)
491 return crash_utils.MatchListToResultList(combined_matches)
492
493
494 def ParseCrashComponents(main_stack, top_n_frames=3):
495 """Parses the crashing component.
496
497 Args:
498 main_stack: Main stack from the stacktrace.
499 top_n_frames: A number of frames to regard as crashing frame.
500 Returns:
501 A set of components.
502 """
503 frame_list = main_stack.GetTopNFrames(top_n_frames)
504 components = set()
505
506 for frame in frame_list:
507 components.add(frame.component_path)
508
509 return components
510
511
512 def GenerateAndFilterBlameList(callstack, crash_revision_dict, regression_dict):
513 """A wrapper function.
514
515 Finds blame information for stack and returns string representation.
516
517 Args:
518 callstack: A callstack to find the blame information.
519 crash_revision_dict: A dictionary mapping component to its crash revision.
520 regression_dict: A dictionary mapping component to its regression.
521
522 Returns:
523 A list of blame results.
524 """
525 # Setup parser objects to use for parsing blame information.
526 svn_parser = svn_repository_parser.SVNParser(URL_MAP['svn'])
527 git_parser = git_repository_parser.GitParser(regression_dict, URL_MAP['git'])
528 parsers = [svn_parser, git_parser]
529
530 # Create and generate the blame objects from the callstack.
531 blame_list = blame.BlameList()
532 blame_list.FindBlame(callstack, crash_revision_dict, regression_dict,
533 parsers)
534
535 blame_list.FilterAndSortBlameList()
536 return crash_utils.BlameListToResultList(blame_list)
537
538
539 def FindItForCrash(stacktrace_list,
540 callstack,
541 parsed_regression,
542 parsed_crash_revision):
543 """Finds the culprit CL from the list of stacktrace.
544
545 Args:
546 stacktrace_list: A list of stacktraces to look for, in the order of
547 decreasing significance.
548 callstack: A callstack object to show blame information for, if there are
549 no results for all stacktraces in the stacktrace_list.
550 parsed_regression: A parsed regression information as a result of parsing
551 DEPS file.
552 parsed_crash_revision: A parsed crash revision information.
553
554 Returns:
555 A list of result objects, with the message how the result is created.
556 """
557 # If regression information is not available, return blame information.
558 if not parsed_regression:
559 return_message = \
560 """
561 Regression information is not available. The result is
562 the blame information.
563 """
564 result = GenerateAndFilterBlameList(callstack, parsed_crash_revision,
565 parsed_regression)
566 return (return_message, result)
567
568 for stacktrace in stacktrace_list:
569 # Check the next stacktrace if current one is empty.
570 if not stacktrace.stack_list:
571 continue
572
573 # Get the crash stack for this stacktrace, and extract crashing components
574 # from it.
575 main_stack = stacktrace.GetCrashStack()
576 components = ParseCrashComponents(main_stack)
577
578 result_for_stacktrace = FindMatchForStacktrace(stacktrace, components,
579 parsed_regression)
580
581 # If the result is empty, check the next stacktrace. Else, return the
582 # filtered result.
583 if not result_for_stacktrace:
584 continue
585
586 return_message = \
587 'The result is a list of CLs that change the crashed files.'
588 result = FilterAndGenerateReasonForMatches(result_for_stacktrace)
589 return (return_message, result)
590
591 # If not match is found, return the blame information for the input
592 # callstack.
593 return_message = \
594 """
595 There are no CLs that change the crashed files. The result is the
596 blame information.
597 """
598 result = GenerateAndFilterBlameList(callstack, parsed_crash_revision,
599 parsed_regression)
600 return (return_message, result)
601
OLDNEW
« no previous file with comments | « tools/findit/findit_for_clusterfuzz.py ('k') | tools/findit/urls.config » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698