tools/findit/blame.py - Issue 421223003: [Findit] Plain objects to represent the returned result from running the algorithm,

Unified Diff: tools/findit/blame.py

Issue 421223003: [Findit] Plain objects to represent the returned result from running the algorithm, (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: addressed codereview. Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/findit/blame.py

diff --git a/tools/findit/blame.py b/tools/findit/blame.py

new file mode 100644

index 0000000000000000000000000000000000000000..66e02ff1f17829c85938b1f983b1118215f73080

--- /dev/null

+++ b/tools/findit/blame.py

@@ -0,0 +1,176 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+from threading import Lock, Thread

+import crash_utils

+import utils

+class Blame(object):

+ """Represents a blame object.

+ The object contains blame information for one line of stack, and this

+ information is shown when there are no CLs that change the crashing files.

+ Attributes:

+ content: The content of the line to find the blame for.

aarya 2014/08/14 20:22:20 s/content/line_content

jeun 2014/08/14 22:16:48 Done.

+ component_name: The name of the component this line is in.

aarya 2014/08/14 21:14:39 s/this line is in/for this line.

jeun 2014/08/14 22:16:48 Done.

+ stack_frame_index: The stack frame index of this file.

+ file_name: The name of the file.

+ line_number: The line that caused a crash.

+ author: The author of this line on the latest revision.

+ crash_revision: The revision that caused the crash.

+ revision: The latest revision of this line before the crash revision.

+ url: The url of the change for the revision.

+ range_start: The starting range of the regression for this component.

+ range_end: The ending range of the regression.

+ """

+ def __init__(self, content, component_name, stack_frame_index, file_name,

+ line_number, author, crash_revision, revision, url,

+ range_start, range_end):

+ # Set all the variables from the arguments.

+ self.content = content

+ self.component_name = component_name

+ self.stack_frame_index = stack_frame_index

+ self.file = file_name

+ self.line_number = line_number

+ self.author = author

+ self.revision = revision

+ self.url = url

+ self.distance = crash_utils.INFINITY

+ self.range_start = range_start

+ self.range_end = range_end

+ revision = int(revision)

+ # Calculate the distance, where it measures how far the last revision is

aarya 2014/08/14 20:22:20 Can you stop by and help me understand what do you

jeun 2014/08/14 22:16:48 Done.

+ # from the regression.

+ if range_start and range_end:

+ self.distance = min(abs(revision - range_start),

+ abs(revision - range_end))

+ # If the regression is in SVN but it does not have regression info, check

+ # how far the last revision is from crash revision.

+ elif not utils.IsGitHash(crash_revision):

+ self.distance = abs(int(crash_revision) - revision)

+class BlameList(object):

+ """Represents a list of blame objects.

+ Thread-safe.

+ """

+ def __init__(self):

+ self.blame_list = []

+ self.blame_list_lock = Lock()

+ def __getitem__(self, index):

+ return self.blame_list[index]

+ def FindBlame(self, callstack, crash_revision_dict, regression_dict, parsers,

+ top_n_frames=10):

+ """Given a stack within a stacktrace, retrieves blame information.

+ Only either first 'top_n_frames' or the length of stack, whichever is

+ shorter, results are returned. The default value of 'top_n_frames' is 10.

+ Args:

+ callstack: The list of stack frames.

+ crash_revision_dict: A dictionary that maps component to its crash

+ revision.

+ regression_dict: A dictionary that maps component to its revision

+ range.

+ parsers: A list of two parsers, svn_parser and git_parser

+ top_n_frames: A number of stack frames to show the blame result for.

+ """

+ # Only return blame information for first 'top_n_frames' frames.

+ stack_frames = callstack.GetTopNFrames(top_n_frames)

+ threads = []

+ # Iterate through frames in stack.

+ for stack_frame in stack_frames:

+ # If the component this line is from does not have a crash revision,

+ # It is not possible to get blame information so ignore this line.

aarya 2014/08/14 20:22:20 s/It/it s/so/, so

jeun 2014/08/14 22:16:47 Done.

+ component_path = stack_frame.component_path

+ if component_path not in crash_revision_dict:

+ continue

+ crash_revision = crash_revision_dict[component_path]['revision']

+ range_start = None

+ range_end = None

+ is_git = utils.IsGitHash(crash_revision)

+ repository_parser = parsers[1 if is_git else 0]

aarya 2014/08/14 20:22:20 parsers should be a dict. parsers['git'] and parse

jeun 2014/08/14 22:16:47 Done.

+ # If the revision is in SVN, and if regression information is available,

+ # get it. Not for Git because we cannot calculate the distance.

+ if not is_git:

+ if regression_dict and component_path in regression_dict:

+ component_object = regression_dict[component_path]

+ range_start = int(component_object['old_revision'])

+ range_end = int(component_object['new_revision'])

+ # Generate blame entry, one thread for one entry.

+ blame_thread = Thread(

+ target=self.__GenerateBlameEntry,

+ args=[repository_parser, stack_frame, crash_revision,

+ range_start, range_end])

+ threads.append(blame_thread)

+ blame_thread.start()

+ # Join the results before returning.

+ for blame_thread in threads:

+ blame_thread.join()

+ def __GenerateBlameEntry(self, repository_parser, stack_frame,

aarya 2014/08/14 20:22:21 Why does the name start with __

jeun 2014/08/14 22:16:48 It is because the function is used only in this cl

+ crash_revision, range_start, range_end):

+ """Generates blame list from the arguments."""

+ stack_frame_index = stack_frame.index

+ component_path = stack_frame.component_path

+ component_name = stack_frame.component_name

+ file_name = stack_frame.file_name

+ file_path = stack_frame.file_path

+ line = stack_frame.crashed_line_number

aarya 2014/08/14 20:22:20 s/crashed_line_number/crash_line_number s/line/cra

jeun 2014/08/14 22:16:48 Done.

+ # Parse blame information.

+ parsed_blame_info = repository_parser.ParseBlameInfo(

+ component_path, file_path, line, crash_revision)

+ # If it fails to retrieve information, do not do anything.

+ if not parsed_blame_info:

aarya 2014/08/14 20:22:21 can you check list length so that we don't error o

jeun 2014/08/14 22:16:48 Done.

+ return

+ # Create blame object from the parsed info and add it to the list.

+ (content, revision, author, url) = parsed_blame_info

+ blame = Blame(content, component_name, stack_frame_index, file_name, line,

+ author, crash_revision, revision, url,

+ range_start, range_end)

+ with self.blame_list_lock:

+ self.blame_list.append(blame)

+ def FilterAndSortBlameList(self):

+ """Filters and sorts the blame list."""

+ # Sort the blame list by its distance, and its position in stack.

+ self.blame_list.sort(key=lambda blame: (blame.distance,

+ blame.stack_frame_index))

+ filtered_blame_list = []

+ for blame in self.blame_list:

+ # If regression information is available, check if it needs to be

+ # filtered.

+ if blame.range_start and blame.range_end:

+ # Discards results that are too far from the regression.

+ # For example, if regression is 10000:11000, it is very not

+ # likely that a commit from revision 1000 would have caused a crash.

+ if (blame.distance > blame.range_start / 4) and (

aarya 2014/08/14 20:22:21 This rule makes no sense, why are you adding this

jeun 2014/08/14 22:16:48 Done.

+ blame.distance > blame.range_end / 4):

+ continue

+ filtered_blame_list.append(blame)

+ self.blame_list = filtered_blame_list

« no previous file with comments | « no previous file | tools/findit/match_set.py » ('j') | tools/findit/match_set.py » ('J')