Chromium Code Reviews| Index: tools/findit/blame.py | 
| diff --git a/tools/findit/blame.py b/tools/findit/blame.py | 
| new file mode 100644 | 
| index 0000000000000000000000000000000000000000..66e02ff1f17829c85938b1f983b1118215f73080 | 
| --- /dev/null | 
| +++ b/tools/findit/blame.py | 
| @@ -0,0 +1,176 @@ | 
| +# Copyright (c) 2014 The Chromium Authors. All rights reserved. | 
| +# Use of this source code is governed by a BSD-style license that can be | 
| +# found in the LICENSE file. | 
| + | 
| +from threading import Lock, Thread | 
| + | 
| +import crash_utils | 
| +import utils | 
| + | 
| + | 
| +class Blame(object): | 
| + """Represents a blame object. | 
| + | 
| + The object contains blame information for one line of stack, and this | 
| + information is shown when there are no CLs that change the crashing files. | 
| + Attributes: | 
| + content: The content of the line to find the blame for. | 
| 
 
aarya
2014/08/14 20:22:20
s/content/line_content
 
jeun
2014/08/14 22:16:48
Done.
 
 | 
| + component_name: The name of the component this line is in. | 
| 
 
aarya
2014/08/14 21:14:39
s/this line is in/for this line.
 
jeun
2014/08/14 22:16:48
Done.
 
 | 
| + stack_frame_index: The stack frame index of this file. | 
| + file_name: The name of the file. | 
| + line_number: The line that caused a crash. | 
| + author: The author of this line on the latest revision. | 
| + crash_revision: The revision that caused the crash. | 
| + revision: The latest revision of this line before the crash revision. | 
| + url: The url of the change for the revision. | 
| + range_start: The starting range of the regression for this component. | 
| + range_end: The ending range of the regression. | 
| + | 
| + """ | 
| + | 
| + def __init__(self, content, component_name, stack_frame_index, file_name, | 
| + line_number, author, crash_revision, revision, url, | 
| + range_start, range_end): | 
| + # Set all the variables from the arguments. | 
| + self.content = content | 
| + self.component_name = component_name | 
| + self.stack_frame_index = stack_frame_index | 
| + self.file = file_name | 
| + self.line_number = line_number | 
| + self.author = author | 
| + self.revision = revision | 
| + self.url = url | 
| + self.distance = crash_utils.INFINITY | 
| + self.range_start = range_start | 
| + self.range_end = range_end | 
| + revision = int(revision) | 
| + | 
| + # Calculate the distance, where it measures how far the last revision is | 
| 
 
aarya
2014/08/14 20:22:20
Can you stop by and help me understand what do you
 
jeun
2014/08/14 22:16:48
Done.
 
 | 
| + # from the regression. | 
| + if range_start and range_end: | 
| + self.distance = min(abs(revision - range_start), | 
| + abs(revision - range_end)) | 
| + | 
| + # If the regression is in SVN but it does not have regression info, check | 
| + # how far the last revision is from crash revision. | 
| + elif not utils.IsGitHash(crash_revision): | 
| + self.distance = abs(int(crash_revision) - revision) | 
| + | 
| + | 
| +class BlameList(object): | 
| + """Represents a list of blame objects. | 
| + | 
| + Thread-safe. | 
| + """ | 
| + | 
| + def __init__(self): | 
| + self.blame_list = [] | 
| + self.blame_list_lock = Lock() | 
| + | 
| + def __getitem__(self, index): | 
| + return self.blame_list[index] | 
| + | 
| + def FindBlame(self, callstack, crash_revision_dict, regression_dict, parsers, | 
| + top_n_frames=10): | 
| + """Given a stack within a stacktrace, retrieves blame information. | 
| + | 
| + Only either first 'top_n_frames' or the length of stack, whichever is | 
| + shorter, results are returned. The default value of 'top_n_frames' is 10. | 
| + | 
| + Args: | 
| + callstack: The list of stack frames. | 
| + crash_revision_dict: A dictionary that maps component to its crash | 
| + revision. | 
| + regression_dict: A dictionary that maps component to its revision | 
| + range. | 
| + parsers: A list of two parsers, svn_parser and git_parser | 
| + top_n_frames: A number of stack frames to show the blame result for. | 
| + """ | 
| + # Only return blame information for first 'top_n_frames' frames. | 
| + stack_frames = callstack.GetTopNFrames(top_n_frames) | 
| + | 
| + threads = [] | 
| + # Iterate through frames in stack. | 
| + for stack_frame in stack_frames: | 
| + # If the component this line is from does not have a crash revision, | 
| + # It is not possible to get blame information so ignore this line. | 
| 
 
aarya
2014/08/14 20:22:20
s/It/it
s/so/, so
 
jeun
2014/08/14 22:16:47
Done.
 
 | 
| + component_path = stack_frame.component_path | 
| + if component_path not in crash_revision_dict: | 
| + continue | 
| + | 
| + crash_revision = crash_revision_dict[component_path]['revision'] | 
| + range_start = None | 
| + range_end = None | 
| + is_git = utils.IsGitHash(crash_revision) | 
| + | 
| + repository_parser = parsers[1 if is_git else 0] | 
| 
 
aarya
2014/08/14 20:22:20
parsers should be a dict. parsers['git'] and parse
 
jeun
2014/08/14 22:16:47
Done.
 
 | 
| + | 
| + # If the revision is in SVN, and if regression information is available, | 
| + # get it. Not for Git because we cannot calculate the distance. | 
| + if not is_git: | 
| + if regression_dict and component_path in regression_dict: | 
| + component_object = regression_dict[component_path] | 
| + range_start = int(component_object['old_revision']) | 
| + range_end = int(component_object['new_revision']) | 
| + | 
| + # Generate blame entry, one thread for one entry. | 
| + blame_thread = Thread( | 
| + target=self.__GenerateBlameEntry, | 
| + args=[repository_parser, stack_frame, crash_revision, | 
| + range_start, range_end]) | 
| + threads.append(blame_thread) | 
| + blame_thread.start() | 
| + | 
| + # Join the results before returning. | 
| + for blame_thread in threads: | 
| + blame_thread.join() | 
| + | 
| + def __GenerateBlameEntry(self, repository_parser, stack_frame, | 
| 
 
aarya
2014/08/14 20:22:21
Why does the name start with __
 
jeun
2014/08/14 22:16:48
It is because the function is used only in this cl
 
 | 
| + crash_revision, range_start, range_end): | 
| + """Generates blame list from the arguments.""" | 
| + stack_frame_index = stack_frame.index | 
| + component_path = stack_frame.component_path | 
| + component_name = stack_frame.component_name | 
| + file_name = stack_frame.file_name | 
| + file_path = stack_frame.file_path | 
| + line = stack_frame.crashed_line_number | 
| 
 
aarya
2014/08/14 20:22:20
s/crashed_line_number/crash_line_number
s/line/cra
 
jeun
2014/08/14 22:16:48
Done.
 
 | 
| + | 
| + # Parse blame information. | 
| + parsed_blame_info = repository_parser.ParseBlameInfo( | 
| + component_path, file_path, line, crash_revision) | 
| + | 
| + # If it fails to retrieve information, do not do anything. | 
| + if not parsed_blame_info: | 
| 
 
aarya
2014/08/14 20:22:21
can you check list length so that we don't error o
 
jeun
2014/08/14 22:16:48
Done.
 
 | 
| + return | 
| + | 
| + # Create blame object from the parsed info and add it to the list. | 
| + (content, revision, author, url) = parsed_blame_info | 
| + blame = Blame(content, component_name, stack_frame_index, file_name, line, | 
| + author, crash_revision, revision, url, | 
| + range_start, range_end) | 
| + | 
| + with self.blame_list_lock: | 
| + self.blame_list.append(blame) | 
| + | 
| + def FilterAndSortBlameList(self): | 
| + """Filters and sorts the blame list.""" | 
| + # Sort the blame list by its distance, and its position in stack. | 
| + self.blame_list.sort(key=lambda blame: (blame.distance, | 
| + blame.stack_frame_index)) | 
| + | 
| + filtered_blame_list = [] | 
| + | 
| + for blame in self.blame_list: | 
| + # If regression information is available, check if it needs to be | 
| + # filtered. | 
| + if blame.range_start and blame.range_end: | 
| + | 
| + # Discards results that are too far from the regression. | 
| + # For example, if regression is 10000:11000, it is very not | 
| + # likely that a commit from revision 1000 would have caused a crash. | 
| + if (blame.distance > blame.range_start / 4) and ( | 
| 
 
aarya
2014/08/14 20:22:21
This rule makes no sense, why are you adding this
 
jeun
2014/08/14 22:16:48
Done.
 
 | 
| + blame.distance > blame.range_end / 4): | 
| + continue | 
| + | 
| + filtered_blame_list.append(blame) | 
| + self.blame_list = filtered_blame_list |