Chromium Code Reviews| Index: tools/findit/findit_for_crash.py |
| diff --git a/tools/findit/findit_for_crash.py b/tools/findit/findit_for_crash.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..f63b9a20bb61691b6a47c9df3e02fbda8a048d57 |
| --- /dev/null |
| +++ b/tools/findit/findit_for_crash.py |
| @@ -0,0 +1,601 @@ |
| +# Copyright (c) 2014 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +import logging |
| +import os |
| +from threading import Lock, Thread |
| + |
| +import blame |
| +import component_dictionary |
| +import crash_utils |
| +import git_repository_parser |
| +import match_set |
| +import svn_repository_parser |
| +import utils |
| + |
| + |
| +LINE_CHANGE_PRIORITY = 1 |
| +FILE_CHANGE_PRIORITY = 2 |
| +URL_MAP = crash_utils.ParseURLsFromConfig('urls.config') |
|
stgao
2014/08/18 22:37:06
The logic to handle the absolute path of the confi
|
| + |
| + |
| +def GenerateMatchEntry( |
| + matches, revisions, cl, file_path, file_name, function, component_path, |
| + component_name, crashed_lines, stack_frame_indices, file_action, |
| + repository_parser): |
| + """Generates a match object and adds it to the match set. |
| + |
| + Args: |
| + matches: A matchset object, a map from CL to a match object. |
| + revisions: The dictionary mapping cl's number to the revision information, |
| + as returned from a function ParseChangelog in parser. |
| + cl: The SVN revision number or git hash. |
|
stgao
2014/08/18 22:37:06
What's the difference between cls in |revisions| a
|
| + file_path: The path of the file. |
| + file_name: The name of the file that this CL might be the culprit cl. |
| + function: The function that caused an crash. |
| + component_path: The path of the component this file is from. |
| + component_name: The name of the component the file is from. |
| + crashed_lines: The list of the lines in the file that caused the crash. |
| + stack_frame_indices: The list of positions of this file within a stack. |
| + file_action: Whether file is modified, added or deleted. |
|
stgao
2014/08/18 22:37:06
file_change_type?
|
| + repository_parser: The parser object to parse line diff. |
| + """ |
| + # Check if this CL should be ignored. |
| + with matches.matches_lock: |
| + if cl in matches.cls_to_ignore: |
| + return |
| + |
| + # If this CL is not already identified as suspected, create a new entry. |
| + if cl not in matches.matches: |
| + revision = revisions[cl] |
| + match = match_set.Match(revision, component_name) |
| + message = revisions[cl]['message'] |
| + # TODO(jeun): Don't hold lock while issuing http request. |
| + match.ParseMessage(message, matches.codereview_api_url) |
| + |
| + # If this match is a revert, add to the set of CLs to be ignored. |
| + if match.is_reverted: |
| + matches.cls_to_ignore.add(cl) |
| + |
| + # If this match has info on what CL it is reverted from, add that CL. |
| + if match.revert_of: |
| + matches.cls_to_ignore.add(match.revert_of) |
| + |
| + # Return because we do not need to look at this CL anymore. |
| + return |
| + |
| + # Add this CL to the set of matches. |
| + matches.matches[cl] = match |
| + |
| + # Else, bring in the existing result. |
| + else: |
| + match = matches.matches[cl] |
| + |
| + # Parse line diff information for this file. |
| + (diff_url, changed_line_numbers, changed_line_contents) = ( |
| + repository_parser.ParseLineDiff( |
| + file_path, component_path, file_action, cl)) |
| + |
| + # Ignore this match if the component is not supported for svn. |
| + if not diff_url: |
| + return |
| + |
| + # Find the intersection between the lines that this file crashed on and |
| + # the changed lines. |
| + (line_intersection, stack_frame_index_intersection) = ( |
| + crash_utils.Intersection( |
| + crashed_lines, stack_frame_indices, changed_line_numbers)) |
| + |
| + # Find the minimum distance between the changed lines and crashed lines. |
| + min_distance = crash_utils.FindMinLineDistance(crashed_lines, |
| + changed_line_numbers) |
| + |
| + # Check whether this CL changes the crashed lines or not. |
| + if line_intersection: |
| + priority = LINE_CHANGE_PRIORITY |
| + else: |
| + priority = FILE_CHANGE_PRIORITY |
| + |
| + # Add the parsed information to the object. |
| + with matches.matches_lock: |
| + match.line_of_crash.append(line_intersection) |
| + match.files.append(file_name) |
| + |
| + # Update the min distance only if it is less than the current one. |
| + if min_distance < match.min_distance: |
| + match.min_distance = min_distance |
| + |
| + # If this CL does not change the crashed line, all occurrence of this |
| + # file in the stack has the same priority. |
| + if not stack_frame_index_intersection: |
| + stack_frame_index_intersection = stack_frame_indices |
| + match.stack_frame_indices.append(stack_frame_index_intersection) |
| + match.file_urls.append(diff_url) |
| + |
| + # Only record the highest rank of this CL. |
| + if priority < match.rank: |
| + match.rank = priority |
| + match.priorities.append(priority) |
| + match.function.append(function) |
| + |
| + |
| +def FindMatch(revisions, file_map, file_dict, component_path, component_name, |
| + repository_parser, codereview_api_url): |
| + """Finds a CL that modifies file in the stacktrace. |
| + |
| + Args: |
| + revisions: A dictionary mapping revision number to the CL information. |
| + file_map: A dictionary mapping file to the revision that modifies it. |
| + file_dict: A dictionary mapping file to its occurrence in stacktrace. |
| + component_path: The path of the component to search for. |
| + component_name: The name of the component to search for. |
| + repository_parser: The parser object to parse the line diff. |
| + codereview_api_url: A code review url to retrieve data from. |
| + |
| + Returns: |
| + Matches, a set of match objects. |
| + """ |
| + matches = match_set.MatchSet(codereview_api_url) |
| + threads = [] |
| + |
| + # For all files in stacktrace, if it is not header file, check against |
| + # all CLs to see if any CL changes this file. If so, add to the set of |
| + # suspected CLs. |
| + for file_name in file_dict: |
| + # Ignore header file. |
| + if file_name.endswith('.h'): |
| + continue |
| + |
| + # If the file in the stacktrace is not changed in any commits, continue. |
| + if file_name not in file_map: |
| + continue |
| + |
| + path_dict = file_dict.GetPathDic(file_name) |
| + # There can be two files that are in different path but have same |
| + # names, so also has to check path. |
| + for path in path_dict: |
| + crashed_lines = file_dict.GetCrashedLineNumbers(path) |
| + stack_frame_nums = file_dict.GetCrashStackFrameindex(path) |
| + function = file_dict.GetCrashFunction(path) |
| + |
| + # Iterate through the revisions that this file is changed. |
| + for (cl, file_action, file_path) in file_map[file_name]: |
| + # If path of two files are different or if a commit deletes a file, |
| + # continue. |
| + if not crash_utils.GuessIfSameSubPath(path, file_path) or ( |
| + file_action == 'D'): |
| + continue |
| + |
| + # Create a new thread for adding this CL. |
| + match_thread = Thread( |
| + target=GenerateMatchEntry, |
| + args=[matches, revisions, cl, file_path, file_name, function, |
| + component_path, component_name, crashed_lines, |
| + stack_frame_nums, file_action, |
| + repository_parser]) |
| + threads.append(match_thread) |
| + match_thread.start() |
| + |
| + # Make sure all threads finish before returning. |
| + for match_thread in threads: |
| + match_thread.join() |
| + |
| + # Remove CLs that are revert. |
| + matches.RemoveReverts() |
| + |
| + return matches |
| + |
| + |
| +def FindMatchForComponent(component_path, file_dict, changelog, |
| + callstack_priority, results, results_lock): |
| + """Parses changelog and finds suspected CLs for a given component. |
| + |
| + Args: |
| + component_path: The path of component to look for the culprit CL. |
| + file_dict: A dictionary mapping file to its occurrence in stackframe. |
| + changelog: The parsed changelog for this component. |
| + callstack_priority: The priority of this call stack, 0 if from crash stack, |
| + 1 if from freed, 2 if from previously allocated. |
| + results: A dictionary to store the result. |
| + results_lock: A lock that guards results. |
| + """ |
| + (repository_parser, component_name, revisions, file_to_revision_map) = \ |
| + changelog |
| + |
| + # Find match for this component. |
| + codereview_api_url = URL_MAP['codereview']['review_url'] |
| + component_result = FindMatch( |
| + revisions, file_to_revision_map, file_dict, component_path, |
| + component_name, repository_parser, codereview_api_url) |
| + matches = component_result.matches |
| + |
| + # For all the match results in a dictionary, add to the list so that it |
| + # can be sorted. |
| + with results_lock: |
| + for cl in matches: |
| + match = matches[cl] |
| + results.append((callstack_priority, cl, match)) |
| + |
| + |
| +def FindMatchForCallstack( |
| + callstack, components, component_to_changelog_map, results, |
| + results_lock): |
| + """Finds culprit cl for a stack within a stacktrace. |
| + |
| + For each components to look for, create new thread that computes the matches |
| + and join the results at the end. |
| + Args: |
| + callstack: A callstack in a stacktrace to find the result for. |
| + components: A set of components to look for. |
| + component_to_changelog_map: A map from component to its parsed changelog. |
| + results: A list to aggregrate results from all stacktraces. |
| + results_lock: A lock that guards results. |
| + """ |
| + # Create component dictionary from the component and call stack. |
| + component_dict = component_dictionary.ComponentDictionary(callstack, |
| + components) |
| + callstack_priority = callstack.priority |
| + |
| + # Iterate through all components and create new thread for each component. |
| + threads = [] |
| + for component_path in component_dict: |
| + file_dict = component_dict.GetFileDict(component_path) |
| + |
| + if component_path not in component_to_changelog_map: |
| + continue |
| + changelog = component_to_changelog_map[component_path] |
| + |
| + t = Thread( |
| + target=FindMatchForComponent, |
| + args=[component_path, file_dict, changelog, |
| + callstack_priority, results, results_lock]) |
| + threads.append(t) |
| + t.start() |
| + |
| + # Join the threads before returning. |
| + for t in threads: |
| + t.join() |
| + |
| + |
| +def FindMatchForStacktrace(stacktrace, components, regression_dict): |
| + """Finds the culprit CL for stacktrace. |
| + |
| + The passed stacktrace is either from release build stacktrace |
| + or debug build stacktrace. |
| + |
| + Args: |
| + stacktrace: A list of parsed stacks within a stacktrace. |
| + components: A set of components to look for. |
| + regression_dict: A dictionary mapping component path to its regression. |
| + |
| + Returns: |
| + A list of match results from all stacks. |
| + """ |
| + # A list to aggregate results from the whole stacktrace. |
| + results = [] |
| + results_lock = Lock() |
| + |
| + # Parse changelog for each of the components and store it. |
| + svn_parser = svn_repository_parser.SVNParser(URL_MAP['svn']) |
| + git_parser = git_repository_parser.GitParser(regression_dict, URL_MAP['git']) |
| + |
| + component_to_changelog_map = {} |
| + for component_path in components: |
| + component_object = regression_dict[component_path] |
| + range_start = component_object['old_revision'] |
| + range_end = component_object['new_revision'] |
| + if range_start == '0': |
| + continue |
| + |
| + component_name = regression_dict[component_path]['name'] |
| + |
| + # Check if the regression is git hash or a revision number, and create |
| + # appropriate parser object. |
| + is_git = utils.IsGitHash(range_start) |
| + if is_git: |
| + repository_parser = git_parser |
| + else: |
| + repository_parser = svn_parser |
| + |
| + # Parse revisions and changed files in regression. |
| + (revisions, file_to_revision_map) = repository_parser.ParseChangelog( |
| + component_path, range_start, range_end) |
| + |
| + if not (revisions and file_to_revision_map): |
| + continue |
| + |
| + component_to_changelog_map[component_path] = (repository_parser, |
| + component_name, |
| + revisions, |
| + file_to_revision_map) |
| + |
| + # Create separate threads for each of the call stack in the stacktrace. |
| + threads = [] |
| + for callstack in stacktrace.stack_list: |
| + t = Thread( |
| + target=FindMatchForCallstack, |
| + args=[callstack, components, component_to_changelog_map, |
| + results, results_lock]) |
| + threads.append(t) |
| + t.start() |
| + |
| + # Join the threads before returning. |
| + for t in threads: |
| + t.join() |
| + |
| + return results |
| + |
| + |
| +def SortAndFilterMatches(matches): |
| + """Filters the list of potential culprit CLs to remove noise. |
| + |
| + Args: |
| + matches: A list containing match results. |
| + |
| + Returns: |
| + Filtered match results. |
| + """ |
| + new_matches = [] |
| + line_changed = False |
| + in_crash_state = False |
| + highest_priority_stack = crash_utils.INFINITY |
| + |
| + # Sort the matches in specific order. |
| + matches.sort(key=lambda (stack_index, cl, match): |
| + (match.rank, stack_index, |
| + crash_utils.FindMinStackFrameNumber(match.stack_frame_indices, |
| + match.priorities), |
| + -len(match.files), match.min_distance)) |
| + |
| + # Iterate through the matches to find out what results are significant. |
| + for stack_index, cl, match in matches: |
| + # Check if the current match changes crashed line. |
| + is_line_change = (match.rank == LINE_CHANGE_PRIORITY) |
| + |
| + # Check which thread this match is from. |
| + current_stack = stack_index |
| + if current_stack < highest_priority_stack: |
| + highest_priority_stack = current_stack |
| + |
| + # Check if current match changes a file that occurs in crash state. |
| + flattened_stack_frame_indices = [i for sl in match.stack_frame_indices |
| + for i in sl] |
| + current_in_crash_state = min(flattened_stack_frame_indices) < 5 |
| + |
| + # This match and anything lower than this should be ignored if: |
| + # - Current match does not change crashed lines but there are matches |
| + # that do so. |
| + # - Current match is not in crash state but there are matches in it. |
| + # - There are other matches that came from higher priority stack. |
| + if (line_changed and not is_line_change) or ( |
| + in_crash_state and not current_in_crash_state) or ( |
| + current_stack > highest_priority_stack): |
| + break |
| + |
| + # Update the variables. |
| + if is_line_change: |
| + line_changed = True |
| + if current_in_crash_state: |
| + in_crash_state = True |
| + |
| + # Add current match to the filtered result. |
| + new_matches.append((stack_index, cl, match)) |
| + |
| + return new_matches |
| + |
| + |
| +def GenerateReasonForMatches(matches): |
| + """Generates a reason that a match (CL) is a culprit cl. |
| + |
| + Args: |
| + matches: A list of match objects. |
| + """ |
| + # Iterate through the matches in the list. |
| + for i, _, match in matches: |
| + reason = [] |
| + |
| + # Zip the files in the match by the reason they are suspected |
| + # (how the file is modified) and sort it in specific order. |
| + match_by_priority = zip( |
| + match.priorities, match.line_of_crash, match.files, |
| + match.file_urls) |
| + |
| + match_by_priority.sort( |
| + key=lambda (priority, crashed_line_number, file_name, file_url): |
| + (priority, len(file_name))) |
| + |
| + # Iterate through the sorted match. |
| + for i in range(len(match_by_priority)): |
| + (priority, crashed_line_number, file_name, file_url) = \ |
| + match_by_priority[i] |
| + |
| + # If the file in the match is a line change, append a explanation. |
| + if priority == LINE_CHANGE_PRIORITY: |
| + reason.append( |
| + ' Line %s of file %s which caused the crash according ' |
| + 'to the stacktrace, is changed in this cl.\n' % |
| + (crash_utils.PrettifyList(crashed_line_number), |
| + crash_utils.PrettifyFiles([(file_name, file_url)]))) |
| + |
| + else: |
| + # Get all the files that are not line change. |
| + rest_of_the_files = match_by_priority[i:] |
| + |
| + # Take care of single/multiple files in string. |
| + if len(rest_of_the_files) == 1: |
| + file_string = ' File %s is changed in this cl.\n' |
| + else: |
| + file_string = ' Files %s are changed in this cl.\n' |
| + |
| + # Create a list of file name and its url, and prettify the list. |
| + file_name_with_url = [(file_name, file_url) |
| + for (_, _, file_name, file_url) |
| + in rest_of_the_files] |
| + pretty_file_name_url = crash_utils.PrettifyFiles(file_name_with_url) |
| + |
| + # Add the reason, break because we took care of the rest of the files. |
| + reason.append(file_string % pretty_file_name_url) |
| + break |
| + |
| + # Set the reason as string. |
| + match.reason = ''.join(reason) |
| + |
| + |
| +def CombineMatches(matches): |
| + """Combine possible duplicates in matches. |
| + |
| + Args: |
| + matches: A list of matches object, along with its callstack priority and |
| + CL it is from. |
| + Returns: |
| + A combined list of matches. |
| + """ |
| + combined_matches = [] |
| + |
| + for stack_index, cl, match in matches: |
| + found_match = None |
| + |
| + # Iterate through the list of combined matches. |
| + for _, cl_combined, match_combined in combined_matches: |
| + # Check for if current match is already higher up in the result. |
| + if cl == cl_combined: |
| + found_match = match_combined |
| + |
| + # If current match is not already in, add it to the list of matches. |
| + if not found_match: |
| + combined_matches.append((stack_index, cl, match)) |
| + continue |
| + |
| + # Combine the reason if the current match is already in there. |
| + found_match.reason += match.reason |
| + |
| + return combined_matches |
| + |
| + |
| +def FilterAndGenerateReasonForMatches(result): |
| + """A wrapper function. |
| + |
| + It generates reasons for the matches and returns string representation |
| + of filtered results. |
| + |
| + Args: |
| + result: A list of match objects. |
| + |
| + Returns: |
| + A string representation of filtered results. |
| + """ |
| + new_result = SortAndFilterMatches(result) |
| + GenerateReasonForMatches(new_result) |
| + combined_matches = CombineMatches(new_result) |
| + return crash_utils.MatchListToResultList(combined_matches) |
| + |
| + |
| +def ParseCrashComponents(main_stack, top_n_frames=3): |
| + """Parses the crashing component. |
| + |
| + Args: |
| + main_stack: Main stack from the stacktrace. |
| + top_n_frames: A number of frames to regard as crashing frame. |
| + Returns: |
| + A set of components. |
| + """ |
| + frame_list = main_stack.GetTopNFrames(top_n_frames) |
| + components = set() |
| + |
| + for frame in frame_list: |
| + components.add(frame.component_path) |
| + |
| + return components |
| + |
| + |
| +def GenerateAndFilterBlameList(callstack, crash_revision_dict, regression_dict): |
| + """A wrapper function. |
| + |
| + Finds blame information for stack and returns string representation. |
| + |
| + Args: |
| + callstack: A callstack to find the blame information. |
| + crash_revision_dict: A dictionary mapping component to its crash revision. |
| + regression_dict: A dictionary mapping component to its regression. |
| + |
| + Returns: |
| + A list of blame results. |
| + """ |
| + # Setup parser objects to use for parsing blame information. |
| + svn_parser = svn_repository_parser.SVNParser(URL_MAP['svn']) |
| + git_parser = git_repository_parser.GitParser(regression_dict, URL_MAP['git']) |
| + parsers = [svn_parser, git_parser] |
| + |
| + # Create and generate the blame objects from the callstack. |
| + blame_list = blame.BlameList() |
| + blame_list.FindBlame(callstack, crash_revision_dict, regression_dict, |
| + parsers) |
| + |
| + blame_list.FilterAndSortBlameList() |
| + return crash_utils.BlameListToResultList(blame_list) |
| + |
| + |
| +def FindItForCrash(stacktrace_list, |
| + callstack, |
| + parsed_regression, |
| + parsed_crash_revision): |
| + """Finds the culprit CL from the list of stacktrace. |
| + |
| + Args: |
| + stacktrace_list: A list of stacktraces to look for, in the order of |
| + decreasing significance. |
| + callstack: A callstack object to show blame information for, if there are |
| + no results for all stacktraces in the stacktrace_list. |
| + parsed_regression: A parsed regression information as a result of parsing |
| + DEPS file. |
| + parsed_crash_revision: A parsed crash revision information. |
| + |
| + Returns: |
| + A list of result objects, with the message how the result is created. |
| + """ |
| + # If regression information is not available, return blame information. |
| + if not parsed_regression: |
| + return_message = \ |
| + """ |
| + Regression information is not available. The result is |
| + the blame information. |
| + """ |
| + result = GenerateAndFilterBlameList(callstack, parsed_crash_revision, |
| + parsed_regression) |
| + return (return_message, result) |
| + |
| + for stacktrace in stacktrace_list: |
| + # Check the next stacktrace if current one is empty. |
| + if not stacktrace.stack_list: |
| + continue |
| + |
| + # Get the crash stack for this stacktrace, and extract crashing components |
| + # from it. |
| + main_stack = stacktrace.GetCrashStack() |
| + components = ParseCrashComponents(main_stack) |
| + |
| + result_for_stacktrace = FindMatchForStacktrace(stacktrace, components, |
| + parsed_regression) |
| + |
| + # If the result is empty, check the next stacktrace. Else, return the |
| + # filtered result. |
| + if not result_for_stacktrace: |
| + continue |
| + |
| + return_message = \ |
| + 'The result is a list of CLs that change the crashed files.' |
| + result = FilterAndGenerateReasonForMatches(result_for_stacktrace) |
| + return (return_message, result) |
| + |
| + # If not match is found, return the blame information for the input |
| + # callstack. |
| + return_message = \ |
| + """ |
| + There are no CLs that change the crashed files. The result is the |
| + blame information. |
| + """ |
| + result = GenerateAndFilterBlameList(callstack, parsed_crash_revision, |
| + parsed_regression) |
| + return (return_message, result) |
| + |