tools/findit/matchset.py - Issue 421223003: [Findit] Plain objects to represent the returned result from running the algorithm,

Unified Diff: tools/findit/matchset.py

Issue 421223003: [Findit] Plain objects to represent the returned result from running the algorithm, (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/findit/matchset.py

diff --git a/tools/findit/matchset.py b/tools/findit/matchset.py

new file mode 100644

index 0000000000000000000000000000000000000000..8b5d6ee7277e3257140ea098ffe69934c40117ce

--- /dev/null

+++ b/tools/findit/matchset.py

@@ -0,0 +1,209 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+import re

stgao 2014/08/04 20:44:57 Add a new line before this one.

jeun 2014/08/06 21:33:04 Done.

+from threading import Lock

+import findit_for_crash_utils as findit_utils

stgao 2014/08/04 20:44:57 seems not up-to-day.

jeun 2014/08/06 21:33:05 Done.

+LINE_CHANGE_PRIORITY = 1

+FILE_CHANGE_PRIORITY = 2

+pl = Lock()

stgao 2014/08/04 20:44:57 Please use full name.

jeun 2014/08/06 21:33:05 Done.

+class MatchSet(object):

+ """Represents a set of matches."""

+ def __init__(self, codereview_api_url):

+ self.codereview_api_url = codereview_api_url

+ self.matches = {}

+ self.cls_to_avoid = set()

+ self.matches_lock = Lock()

+ def GenerateMatchEntry(

stgao 2014/08/04 20:44:58 I think this function is closely-related to the al

jeun 2014/08/06 21:33:05 We can talk about this. I feel like this function

+ self, revisions, cl, file_path, file_name, function, component,

+ crashed_lines, stack_frame_index, file_action, repository_parser):

+ """Generates a match object.

+ A match is a CL that is suspected to have caused the crash. A match object

stgao 2014/08/04 20:44:57 It might be better to move the Match class up to t

jeun 2014/08/06 21:33:04 Done.

+ contains a line that the file crashed, a name of the function, the minimum

+ distance from the crashed line and the line this file changed, list of the

+ files that this CL changes, URLs of the files the CL changes, the author of

+ the CL, the component, list of the stack numbers,the priority of the CL,

+ review address and reviewers.

+ Args:

+ revisions: a dictionary mapping cl's number to the revision information,

+ as returned from ParseRevisions

stgao 2014/08/04 20:44:57 ParseRevisions is unclear here.

jeun 2014/08/06 21:33:04 Done.

+ cl: a SVN revision number or git hash

+ file_path: path of the file

+ file_name: a name of the file that this CL might be the culprit cl

+ function: function that caused an crash

+ component: a component the file is from

+ crashed_lines: a list of the lines in the file that caused the crash

+ stack_frame_index: a list of positions of this file within a stack

+ file_action: whether file is modified, added or deleted

+ repository_parser: a parser object to parse line diff

+ """

+ # Check if this CL should be avoided

+ self.matches_lock.acquire()

+ if cl in self.cls_to_avoid:

stgao 2014/08/04 20:44:57 avoid -> ignore?

jeun 2014/08/06 21:33:05 Done.

+ self.matches_lock.release()

+ return

+ # If this CL is not already identified as suspected, create a new entry.

+ if cl not in self.matches:

+ revision = revisions[cl]

+ match = Match(revision, component)

+ message = revisions[cl]['message']

+ match.ParseMessage(message, self.codereview_api_url)

stgao 2014/08/04 20:44:58 This function call will be time-consuming as it is

jeun 2014/08/06 21:33:04 Done.

+ # If this match is a revert, add to the set of CLs to be avoided

+ if match.isrevert:

+ self.cls_to_avoid.add(cl)

+ # If this match has info on what CL it is reverted from, add that CL

+ if match.revert_of:

+ self.cls_to_avoid.add(match.revert_of)

stgao 2014/08/04 20:44:58 What if the cl revert_of was already analyzed and

jeun 2014/08/06 21:33:05 I remove all reverts at the end again.

+ self.matches_lock.release()

+ return

+ self.matches[cl] = match

+ # Else, bring in the existing result

+ else:

+ match = self.matches[cl]

+ self.matches_lock.release()

stgao 2014/08/04 20:44:57 If code above raise an exception, the lock won't b

jeun 2014/08/06 21:33:04 Done.

+ # Parse line diff information for this file

+ (diff_url, changed_line_numbers, changed_line_contents) = (

+ repository_parser.ParseLineDiff(file_path, component, file_action, cl))

+ # Find the intersection between the lines that this file crashed on and

+ # the changed lines

+ (line_intersection, stack_frame_index_intersection) = findit_utils.Intersection(

stgao 2014/08/04 20:44:58 style: > 80 chars.

jeun 2014/08/06 21:33:05 Done.

+ crashed_lines, stack_frame_index, changed_line_numbers)

+ # Find the minimum distance between the changed lines and crashed lines

+ min_distance = findit_utils.FindMinLineDistance(crashed_lines,

+ changed_line_numbers)

+ # Check whether this CL changes the crashed lines or not

+ if line_intersection:

+ priority = LINE_CHANGE_PRIORITY

+ else:

+ priority = FILE_CHANGE_PRIORITY

+ # Add the parsed information to the object

+ with self.matches_lock:

+ match.line_of_crash.append(line_intersection)

+ match.file.append(file_name)

+ # Update the min distance only if it is less than the current one

+ if min_distance < match.min_distance:

+ match.min_distance = min_distance

+ # If this CL does not change the crashed line, all occurrence of this

+ # file in the stack has the same significance

+ if not stack_frame_index_intersection:

+ stack_frame_index_intersection = stack_frame_index

+ match.stack_frame_index.append(stack_frame_index_intersection)

+ match.file_url.append(diff_url)

+ # Only record the highest rank of this CL

+ if priority < match.rank:

+ match.rank = priority

+ match.priorities.append(priority)

+ match.function.append(function)

+ def RemoveReverts(self):

+ """Removes CLs that are revert."""

+ for cl in self.matches:

+ if cl in self.cls_to_avoid:

+ del self.matches[cl]

+class Match(object):

+ """Represents a match entry.

+ Attributes:

+ line_of_crash: list of lines that caused crash for this CL

+ function: list of functions that caused the crash

+ min_distance: minimum difference between the lines that CL changed and

+ lines that caused the crash

+ file: list of files that the CL changed

stgao 2014/08/04 20:44:58 files?

jeun 2014/08/06 21:33:05 Done.

+ file_url: list of URLs for the file

stgao 2014/08/04 20:44:57 file_urls?

jeun 2014/08/06 21:33:05 Done.

+ owner: owner of the CL

+ component: component that this CL belongs to

+ stack_frame_index: for files that caused crash, list of where in the

stgao 2014/08/04 20:44:57 The name doesn't indicate it is a list.

jeun 2014/08/06 21:33:04 Done.

+ stackframe they occur

+ rank: the highest priority among the files the CL changes

stgao 2014/08/04 20:44:57 What's the concept of priority here? Would you min

jeun 2014/08/06 21:33:05 Done.

+ priorities: for each files, whether it changes the crashed line or is a

+ simple file change

+ url: URL of the CL

+ review_address: a codereview address that reviews this CL

stgao 2014/08/04 20:44:57 review_url?

jeun 2014/08/06 21:33:05 Done.

+ reviewers: list of people that reviewed this CL

+ reason: reason why this CL is suspected

+ """

+ REVERT_PATTERN = re.compile(r'(revert\w*) r?(\d+)', re.I)

+ def __init__(self, revision, component):

+ self.isrevert = False

+ self.revert_of = None

+ self.line_of_crash = []

+ self.function = []

+ self.min_distance = float('inf')

+ self.file = []

+ self.file_url = []

+ self.owner = revision['author']

+ self.component = component

+ self.stack_frame_index = []

+ self.rank = float('inf')

+ self.priorities = []

+ self.url = revision['url']

+ self.review_address = 'N/A'

+ self.reviewers = ['N/A']

+ self.reason = None

+ def ParseMessage(self, message, codereview_api_url):

+ """Parses the message.

+ It checks the message to extract the code review website and list of

+ reviewers, and it also checks if the CL is a revert of another CL.

+ Args:

+ message: a message to parse

+ codereview_api_url: URL to retrieve codereview data from

+ """

+ for line in message.splitlines():

+ line = line.strip()

+ # Check if the line has the code review information

+ if line.startswith('Review URL: '):

+ # Get review number for the code review site from the line

+ parts = line.split('Review URL: ')

+ self.review_address = parts[1].strip()

+ review_number = self.review_address.split('/')[-1]

stgao 2014/08/04 20:44:57 Usually, it is called issue number.

jeun 2014/08/06 21:33:04 Done.

+ # Get JSON from the code review site, ignore the line if it fails

+ url = codereview_api_url % review_number

+ json_string = findit_utils.GetDataFromURL(url)

+ if not json_string:

+ print 'Failed to retrieve code review information from %s' % url

+ continue

+ # Load the JSON from the string, and get the list of reviewers

+ code_review = findit_utils.LoadJSON(json_string)

+ if code_review:

+ self.reviewers = code_review['reviewers']

+ # Check if this CL is a revert of other CL

+ if line.lower().startswith('revert'):

+ self.isrevert = True

+ # Check if the line says what CL this CL is a revert of

+ revert = self.REVERT_PATTERN.match(line)

+ if revert:

+ self.revert_of = revert.group(2)

+ return

« tools/findit/blame.py ('K') | « tools/findit/blame.py ('k') | no next file » | no next file with comments »