| Index: tools/findit/findit_for_crash.py
|
| diff --git a/tools/findit/findit_for_crash.py b/tools/findit/findit_for_crash.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..e04ffafb2ab4b3c14f59b194757dfdc988b1ec8a
|
| --- /dev/null
|
| +++ b/tools/findit/findit_for_crash.py
|
| @@ -0,0 +1,645 @@
|
| +# Copyright (c) 2014 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +import os
|
| +from threading import Lock, Thread
|
| +
|
| +import blame
|
| +from common import utils
|
| +import component_dictionary
|
| +import crash_utils
|
| +import git_repository_parser
|
| +import match_set
|
| +import svn_repository_parser
|
| +
|
| +
|
| +LINE_CHANGE_PRIORITY = 1
|
| +FILE_CHANGE_PRIORITY = 2
|
| +_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
|
| +CONFIG = crash_utils.ParseURLsFromConfig(os.path.join(_THIS_DIR,
|
| + 'config.ini'))
|
| +
|
| +
|
| +def GenerateMatchEntry(
|
| + matches, revision_info, revision_number, file_path, function,
|
| + component_path, component_name, crashed_line_numbers, stack_frame_indices,
|
| + file_change_type, repository_parser):
|
| + """Generates a match object and adds it to the match set.
|
| +
|
| + Args:
|
| + matches: A matchset object, a map from CL to a match object.
|
| + revision_info: The revision information, a map from fields (message,
|
| + changed files, etc) to its values.
|
| + revision_number: The SVN revision number or git hash.
|
| + file_path: The path of the file.
|
| + function: The function that caused an crash.
|
| + component_path: The path of the component this file is from.
|
| + component_name: The name of the component the file is from.
|
| + crashed_line_numbers: The list of the lines in the file that caused
|
| + the crash.
|
| + stack_frame_indices: The list of positions of this file within a stack.
|
| + file_change_type: Whether file is modified, added or deleted.
|
| + repository_parser: The parser object to parse line diff.
|
| + """
|
| + # Check if this CL should be ignored.
|
| + with matches.matches_lock:
|
| + if revision_number in matches.cls_to_ignore:
|
| + return
|
| +
|
| + # If this CL is not already identified as suspected, create a new entry.
|
| + if revision_number not in matches.matches:
|
| + match = match_set.Match(revision_info, component_name)
|
| + message = revision_info['message']
|
| + # TODO(jeun): Don't hold lock while issuing http request.
|
| + match.ParseMessage(message, matches.codereview_api_url)
|
| +
|
| + # If this match is a revert, add to the set of CLs to be ignored.
|
| + if match.is_revert:
|
| + matches.cls_to_ignore.add(revision_number)
|
| +
|
| + # If this match has info on what CL it is reverted from, add that CL.
|
| + if match.revert_of:
|
| + matches.cls_to_ignore.add(match.revert_of)
|
| +
|
| + return
|
| +
|
| + matches.matches[revision_number] = match
|
| +
|
| + else:
|
| + match = matches.matches[revision_number]
|
| +
|
| + (diff_url, changed_line_numbers, changed_line_contents) = (
|
| + repository_parser.ParseLineDiff(
|
| + file_path, component_path, file_change_type, revision_number))
|
| +
|
| + # Ignore this match if the component is not supported for svn.
|
| + if not diff_url:
|
| + return
|
| +
|
| + # Find the intersection between the lines that this file crashed on and
|
| + # the changed lines.
|
| + (line_number_intersection, stack_frame_index_intersection) = (
|
| + crash_utils.Intersection(
|
| + crashed_line_numbers, stack_frame_indices, changed_line_numbers))
|
| +
|
| + # Find the minimum distance between the changed lines and crashed lines.
|
| + min_distance = crash_utils.FindMinLineDistance(crashed_line_numbers,
|
| + changed_line_numbers)
|
| +
|
| + # Check whether this CL changes the crashed lines or not.
|
| + if line_number_intersection:
|
| + priority = LINE_CHANGE_PRIORITY
|
| + else:
|
| + priority = FILE_CHANGE_PRIORITY
|
| +
|
| + # Add the parsed information to the object.
|
| + with matches.matches_lock:
|
| + match.crashed_line_numbers.append(line_number_intersection)
|
| +
|
| + file_name = file_path.split('/')[-1]
|
| + match.changed_files.append(file_name)
|
| +
|
| + # Update the min distance only if it is less than the current one.
|
| + if min_distance < match.min_distance:
|
| + match.min_distance = min_distance
|
| +
|
| + # If this CL does not change the crashed line, all occurrence of this
|
| + # file in the stack has the same priority.
|
| + if not stack_frame_index_intersection:
|
| + stack_frame_index_intersection = stack_frame_indices
|
| + match.stack_frame_indices.append(stack_frame_index_intersection)
|
| + match.changed_file_urls.append(diff_url)
|
| + match.priorities.append(priority)
|
| + match.function_list.append(function)
|
| +
|
| +
|
| +def FindMatch(revisions_info_map, file_to_revision_info, file_to_crash_info,
|
| + component_path, component_name, repository_parser,
|
| + codereview_api_url):
|
| + """Finds a CL that modifies file in the stacktrace.
|
| +
|
| + Args:
|
| + revisions_info_map: A dictionary mapping revision number to the CL
|
| + information.
|
| + file_to_revision_info: A dictionary mapping file to the revision that
|
| + modifies it.
|
| + file_to_crash_info: A dictionary mapping file to its occurrence in
|
| + stacktrace.
|
| + component_path: The path of the component to search for.
|
| + component_name: The name of the component to search for.
|
| + repository_parser: The parser object to parse the line diff.
|
| + codereview_api_url: A code review url to retrieve data from.
|
| +
|
| + Returns:
|
| + Matches, a set of match objects.
|
| + """
|
| + matches = match_set.MatchSet(codereview_api_url)
|
| + threads = []
|
| +
|
| + # Iterate through the crashed files in the stacktrace.
|
| + for crashed_file_path in file_to_crash_info:
|
| + # Ignore header file.
|
| + if crashed_file_path.endswith('.h'):
|
| + continue
|
| +
|
| + # If the file in the stacktrace is not changed in any commits, continue.
|
| + for changed_file_path in file_to_revision_info:
|
| + changed_file_name = changed_file_path.split('/')[-1]
|
| + crashed_file_name = crashed_file_path.split('/')[-1]
|
| +
|
| + if changed_file_name != crashed_file_name:
|
| + continue
|
| +
|
| + if not crash_utils.GuessIfSameSubPath(
|
| + changed_file_path, crashed_file_path):
|
| + continue
|
| +
|
| + crashed_line_numbers = file_to_crash_info.GetCrashedLineNumbers(
|
| + crashed_file_path)
|
| + stack_frame_nums = file_to_crash_info.GetCrashStackFrameIndices(
|
| + crashed_file_path)
|
| + functions = file_to_crash_info.GetCrashFunctions(crashed_file_path)
|
| +
|
| + # Iterate through the CLs that this file path is changed.
|
| + for (cl, file_change_type) in file_to_revision_info[changed_file_path]:
|
| + # If the file change is delete, ignore this CL.
|
| + if file_change_type == 'D':
|
| + continue
|
| +
|
| + revision = revisions_info_map[cl]
|
| +
|
| + match_thread = Thread(
|
| + target=GenerateMatchEntry,
|
| + args=[matches, revision, cl, changed_file_path, functions,
|
| + component_path, component_name, crashed_line_numbers,
|
| + stack_frame_nums, file_change_type,
|
| + repository_parser])
|
| + threads.append(match_thread)
|
| + match_thread.start()
|
| +
|
| + for match_thread in threads:
|
| + match_thread.join()
|
| +
|
| + matches.RemoveRevertedCLs()
|
| +
|
| + return matches
|
| +
|
| +
|
| +def FindMatchForComponent(component_path, file_to_crash_info, changelog,
|
| + callstack_priority, results, results_lock):
|
| + """Parses changelog and finds suspected CLs for a given component.
|
| +
|
| + Args:
|
| + component_path: The path of component to look for the culprit CL.
|
| + file_to_crash_info: A dictionary mapping file to its occurrence in
|
| + stackframe.
|
| + changelog: The parsed changelog for this component.
|
| + callstack_priority: The priority of this call stack, 0 if from crash stack,
|
| + 1 if from freed, 2 if from previously allocated.
|
| + results: A dictionary to store the result.
|
| + results_lock: A lock that guards results.
|
| + """
|
| + (repository_parser, component_name, revisions, file_to_revision_map) = \
|
| + changelog
|
| +
|
| + # Find match for this component.
|
| + codereview_api_url = CONFIG['codereview']['review_url']
|
| + component_result = FindMatch(
|
| + revisions, file_to_revision_map, file_to_crash_info, component_path,
|
| + component_name, repository_parser, codereview_api_url)
|
| + matches = component_result.matches
|
| +
|
| + # For all the match results in a dictionary, add to the list so that it
|
| + # can be sorted.
|
| + with results_lock:
|
| + for cl in matches:
|
| + match = matches[cl]
|
| + results.append((callstack_priority, cl, match))
|
| +
|
| +
|
| +def FindMatchForCallstack(
|
| + callstack, components, component_to_changelog_map, results,
|
| + results_lock):
|
| + """Finds culprit cl for a stack within a stacktrace.
|
| +
|
| + For each components to look for, create new thread that computes the matches
|
| + and join the results at the end.
|
| +
|
| + Args:
|
| + callstack: A callstack in a stacktrace to find the result for.
|
| + components: A set of components to look for.
|
| + component_to_changelog_map: A map from component to its parsed changelog.
|
| + results: A list to aggregrate results from all stacktraces.
|
| + results_lock: A lock that guards results.
|
| + """
|
| + # Create component dictionary from the component and call stack.
|
| + component_dict = component_dictionary.ComponentDictionary(callstack,
|
| + components)
|
| + callstack_priority = callstack.priority
|
| +
|
| + # Iterate through all components and create new thread for each component.
|
| + threads = []
|
| + for component_path in component_dict:
|
| + # If the component to consider in this callstack is not in the parsed list
|
| + # of components, ignore this one.
|
| + if component_path not in component_to_changelog_map:
|
| + continue
|
| +
|
| + changelog = component_to_changelog_map[component_path]
|
| + file_to_crash_info = component_dict.GetFileDict(component_path)
|
| + t = Thread(
|
| + target=FindMatchForComponent,
|
| + args=[component_path, file_to_crash_info, changelog,
|
| + callstack_priority, results, results_lock])
|
| + threads.append(t)
|
| + t.start()
|
| +
|
| + for t in threads:
|
| + t.join()
|
| +
|
| +
|
| +def FindMatchForStacktrace(stacktrace, components,
|
| + component_to_regression_dict):
|
| + """Finds the culprit CL for stacktrace.
|
| +
|
| + The passed stacktrace is either from release build stacktrace
|
| + or debug build stacktrace.
|
| +
|
| + Args:
|
| + stacktrace: A list of parsed stacks within a stacktrace.
|
| + components: A set of components to look for.
|
| + component_to_regression_dict: A dictionary mapping component path to
|
| + its regression.
|
| +
|
| + Returns:
|
| + A list of match results from all stacks.
|
| + """
|
| + # A list to aggregate results from all the callstacks in the stacktrace.
|
| + results = []
|
| + results_lock = Lock()
|
| +
|
| + # Setup parsers.
|
| + svn_parser = svn_repository_parser.SVNParser(CONFIG['svn'])
|
| + git_parser = git_repository_parser.GitParser(component_to_regression_dict,
|
| + CONFIG['git'])
|
| +
|
| + # Create a cache of parsed revisions.
|
| + component_to_changelog_map = {}
|
| + for component_path in components:
|
| + component_object = component_to_regression_dict[component_path]
|
| + range_start = component_object['old_revision']
|
| + range_end = component_object['new_revision']
|
| +
|
| + # If range start is 0, the range is too large and the crash has been
|
| + # introduced the archived build, so ignore this case.
|
| + if range_start == '0':
|
| + continue
|
| +
|
| + component_name = component_to_regression_dict[component_path]['name']
|
| +
|
| + is_git = utils.IsGitHash(range_start)
|
| + if is_git:
|
| + repository_parser = git_parser
|
| + else:
|
| + repository_parser = svn_parser
|
| +
|
| + (revisions, file_to_revision_map) = repository_parser.ParseChangelog(
|
| + component_path, range_start, range_end)
|
| +
|
| + # If the returned map from ParseChangeLog is empty, we don't need to look
|
| + # further because either the parsing failed or the changelog is empty.
|
| + if not (revisions and file_to_revision_map):
|
| + continue
|
| +
|
| + component_to_changelog_map[component_path] = (repository_parser,
|
| + component_name,
|
| + revisions,
|
| + file_to_revision_map)
|
| +
|
| + # Create separate threads for each of the call stack in the stacktrace.
|
| + threads = []
|
| + for callstack in stacktrace.stack_list:
|
| + t = Thread(
|
| + target=FindMatchForCallstack,
|
| + args=[callstack, components, component_to_changelog_map,
|
| + results, results_lock])
|
| + threads.append(t)
|
| + t.start()
|
| +
|
| + for t in threads:
|
| + t.join()
|
| +
|
| + return results
|
| +
|
| +
|
| +def SortMatchesFunction(match_with_stack_priority):
|
| + """A function to sort the match triple.
|
| +
|
| + Currently, it sorts the list by:
|
| + 1) The highest priority file change in the CL (changing crashed line is
|
| + higher priority than just changing the file).
|
| + 2) The callstack this match is computed (crash stack, freed, allocation).
|
| + 3) The minimum stack frame number of the changed file in the match.
|
| + 4) The number of files this CL changes (higher the better).
|
| + 5) The minimum distance between the lines that the CL changes and crashed
|
| + lines.
|
| +
|
| + Args:
|
| + match_with_stack_priority: A match object, with the CL it is from and what
|
| + callstack it is from.
|
| +
|
| + Returns:
|
| + A sort key.
|
| + """
|
| + (stack_priority, _, match) = match_with_stack_priority
|
| +
|
| + return (min(match.priorities),
|
| + stack_priority,
|
| + crash_utils.FindMinStackFrameNumber(match.stack_frame_indices,
|
| + match.priorities),
|
| + -len(match.changed_files), match.min_distance)
|
| +
|
| +
|
| +def SortAndFilterMatches(matches, num_important_frames=5):
|
| + """Filters the list of potential culprit CLs to remove noise.
|
| +
|
| + Args:
|
| + matches: A list containing match results.
|
| + num_important_frames: A number of frames on the top of the frame to Check
|
| + for when filtering the results. A match with a file
|
| + that is in top num_important_frames of the stacktrace
|
| + is regarded more probable then others.
|
| +
|
| + Returns:
|
| + Filtered match results.
|
| + """
|
| + new_matches = []
|
| + line_changed = False
|
| + is_important_frame = False
|
| + highest_priority_stack = crash_utils.INFINITY
|
| + matches.sort(key=SortMatchesFunction)
|
| +
|
| + # Iterate through the matches to find out what results are significant.
|
| + for stack_priority, cl, match in matches:
|
| + # Check if the current match changes crashed line.
|
| + is_line_change = (min(match.priorities) == LINE_CHANGE_PRIORITY)
|
| +
|
| + # Check which stack this match is from, and finds the highest priority
|
| + # callstack up to this point.
|
| + current_stack = stack_priority
|
| + if current_stack < highest_priority_stack:
|
| + highest_priority_stack = current_stack
|
| +
|
| + # Check if current match changes a file that occurs in crash state.
|
| + flattened_stack_frame_indices = [frame for frame_indices in
|
| + match.stack_frame_indices
|
| + for frame in frame_indices]
|
| + current_is_important = (
|
| + min(flattened_stack_frame_indices) < num_important_frames)
|
| +
|
| + # This match and anything lower than this should be ignored if:
|
| + # - Current match does not change crashed lines but there are matches
|
| + # that do so.
|
| + # - Current match is not in crash state but there are matches in it.
|
| + # - There are other matches that came from higher priority stack.
|
| + if (line_changed and not is_line_change) or (
|
| + is_important_frame and not current_is_important) or (
|
| + current_stack > highest_priority_stack):
|
| + break
|
| +
|
| + # Update the variables.
|
| + if is_line_change:
|
| + line_changed = True
|
| + if current_is_important:
|
| + is_important_frame = True
|
| +
|
| + # Add current match to the filtered result.
|
| + new_matches.append((stack_priority, cl, match))
|
| +
|
| + return new_matches
|
| +
|
| +
|
| +def GenerateReasonForMatches(matches):
|
| + """Generates a reason that a match (CL) is a culprit cl.
|
| +
|
| + Args:
|
| + matches: A list of match objects.
|
| + """
|
| + # Iterate through the matches in the list.
|
| + for i, _, match in matches:
|
| + reason = []
|
| +
|
| + # Zip the files in the match by the reason they are suspected
|
| + # (how the file is modified).
|
| + match_by_priority = zip(
|
| + match.priorities, match.crashed_line_numbers, match.changed_files,
|
| + match.changed_file_urls)
|
| +
|
| + # Sort the zipped changed files in the match by their priority so that the
|
| + # changed lines comes first in the reason.
|
| + match_by_priority.sort(
|
| + key=lambda (priority, crashed_line_number, file_name, url): priority)
|
| +
|
| + # Iterate through the sorted match.
|
| + for i in range(len(match_by_priority)):
|
| + (priority, crashed_line_number, file_name, file_url) = \
|
| + match_by_priority[i]
|
| +
|
| + # If the file in the match is a line change, append a explanation.
|
| + if priority == LINE_CHANGE_PRIORITY:
|
| + reason.append(
|
| + 'Line %s of file %s which potentially caused the crash '
|
| + 'according to the stacktrace, is changed in this cl.\n' %
|
| + (crash_utils.PrettifyList(crashed_line_number),
|
| + crash_utils.PrettifyFiles([(file_name, file_url)])))
|
| +
|
| + else:
|
| + # Get all the files that are not line change.
|
| + rest_of_the_files = match_by_priority[i:]
|
| +
|
| + if len(rest_of_the_files) == 1:
|
| + file_string = 'File %s is changed in this cl.\n'
|
| + else:
|
| + file_string = 'Files %s are changed in this cl.\n'
|
| +
|
| + # Create a list of file name and its url, and prettify the list.
|
| + file_name_with_url = [(file_name, file_url)
|
| + for (_, _, file_name, file_url)
|
| + in rest_of_the_files]
|
| + pretty_file_name_url = crash_utils.PrettifyFiles(file_name_with_url)
|
| +
|
| + # Add the reason, break because we took care of the rest of the files.
|
| + reason.append(file_string % pretty_file_name_url)
|
| + break
|
| +
|
| + # Set the reason as string.
|
| + match.reason = ''.join(reason)
|
| +
|
| +
|
| +def CombineMatches(matches):
|
| + """Combine possible duplicates in matches.
|
| +
|
| + Args:
|
| + matches: A list of matches object, along with its callstack priority and
|
| + CL it is from.
|
| + Returns:
|
| + A combined list of matches.
|
| + """
|
| + combined_matches = []
|
| +
|
| + for stack_index, cl, match in matches:
|
| + found_match = None
|
| +
|
| + # Iterate through the list of combined matches.
|
| + for _, cl_combined, match_combined in combined_matches:
|
| + # Check for if current CL is already higher up in the result.
|
| + if cl == cl_combined:
|
| + found_match = match_combined
|
| + break
|
| +
|
| + # If current match is not already in, add it to the list of matches.
|
| + if not found_match:
|
| + combined_matches.append((stack_index, cl, match))
|
| + continue
|
| +
|
| + # Combine the reason if the current match is already in there.
|
| + found_match.reason += match.reason
|
| +
|
| + return combined_matches
|
| +
|
| +
|
| +def FilterAndGenerateReasonForMatches(result):
|
| + """A wrapper function.
|
| +
|
| + It generates reasons for the matches and returns string representation
|
| + of filtered results.
|
| +
|
| + Args:
|
| + result: A list of match objects.
|
| +
|
| + Returns:
|
| + A string representation of filtered results.
|
| + """
|
| + new_result = SortAndFilterMatches(result)
|
| + GenerateReasonForMatches(new_result)
|
| + combined_matches = CombineMatches(new_result)
|
| + return crash_utils.MatchListToResultList(combined_matches)
|
| +
|
| +
|
| +def ParseCrashComponents(main_stack):
|
| + """Parses the crashing component.
|
| +
|
| + Crashing components is a component that top_n_frames of the stacktrace is
|
| + from.
|
| +
|
| + Args:
|
| + main_stack: Main stack from the stacktrace.
|
| + top_n_frames: A number of frames to regard as crashing frame.
|
| +
|
| + Returns:
|
| + A set of components.
|
| + """
|
| + components = set()
|
| +
|
| + for frame in main_stack.frame_list:
|
| + components.add(frame.component_path)
|
| +
|
| + return components
|
| +
|
| +
|
| +def GenerateAndFilterBlameList(callstack, component_to_crash_revision_dict,
|
| + component_to_regression_dict):
|
| + """A wrapper function.
|
| +
|
| + Finds blame information for stack and returns string representation.
|
| +
|
| + Args:
|
| + callstack: A callstack to find the blame information.
|
| + component_to_crash_revision_dict: A dictionary mapping component to its
|
| + crash revision.
|
| + component_to_regression_dict: A dictionary mapping component to its
|
| + regression.
|
| +
|
| + Returns:
|
| + A list of blame results.
|
| + """
|
| + # Setup parser objects to use for parsing blame information.
|
| + svn_parser = svn_repository_parser.SVNParser(CONFIG['svn'])
|
| + git_parser = git_repository_parser.GitParser(component_to_regression_dict,
|
| + CONFIG['git'])
|
| + parsers = {}
|
| + parsers['svn'] = svn_parser
|
| + parsers['git'] = git_parser
|
| +
|
| + # Create and generate the blame objects from the callstack.
|
| + blame_list = blame.BlameList()
|
| + blame_list.FindBlame(callstack, component_to_crash_revision_dict,
|
| + component_to_regression_dict,
|
| + parsers)
|
| +
|
| + blame_list.FilterAndSortBlameList()
|
| + return crash_utils.BlameListToResultList(blame_list)
|
| +
|
| +
|
| +def FindItForCrash(stacktrace_list,
|
| + callstack,
|
| + component_to_regression_dict,
|
| + component_to_crash_revision_dict):
|
| + """Finds the culprit CL from the list of stacktrace.
|
| +
|
| + Args:
|
| + stacktrace_list: A list of stacktraces to look for, in the order of
|
| + decreasing significance.
|
| + callstack: A callstack object to show blame information for, if there are
|
| + no results for all stacktraces in the stacktrace_list.
|
| + component_to_regression_dict: A parsed regression information as a
|
| + result of parsing DEPS file.
|
| + component_to_crash_revision_dict: A parsed crash revision information.
|
| +
|
| + Returns:
|
| + A list of result objects, with the message how the result is created.
|
| + """
|
| + # If regression information is not available, return blame information.
|
| + if not component_to_regression_dict:
|
| + return_message = (
|
| + 'Regression information is not available. The result is '
|
| + 'the blame information.')
|
| + result = GenerateAndFilterBlameList(callstack,
|
| + component_to_crash_revision_dict,
|
| + component_to_regression_dict)
|
| + return (return_message, result)
|
| +
|
| + for stacktrace in stacktrace_list:
|
| + # Check the next stacktrace if current one is empty.
|
| + if not stacktrace.stack_list:
|
| + continue
|
| +
|
| + # Get the crash stack for this stacktrace, and extract crashing components
|
| + # from it.
|
| + main_stack = stacktrace.GetCrashStack()
|
| + components = ParseCrashComponents(main_stack)
|
| +
|
| + result_for_stacktrace = FindMatchForStacktrace(
|
| + stacktrace, components, component_to_regression_dict)
|
| +
|
| + # If the result is empty, check the next stacktrace. Else, return the
|
| + # filtered result.
|
| + if not result_for_stacktrace:
|
| + continue
|
| +
|
| + return_message = (
|
| + 'The result is a list of CLs that change the crashed files.')
|
| + result = FilterAndGenerateReasonForMatches(result_for_stacktrace)
|
| + return (return_message, result)
|
| +
|
| + # If no match is found, return the blame information for the input
|
| + # callstack.
|
| + return_message = (
|
| + 'There are no CLs that change the crashed files. The result is the '
|
| + 'blame information.')
|
| + result = GenerateAndFilterBlameList(
|
| + callstack, component_to_crash_revision_dict,
|
| + component_to_regression_dict)
|
| + return (return_message, result)
|
| +
|
|
|