Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(540)

Unified Diff: tools/findit/matchset.py

Issue 421223003: [Findit] Plain objects to represent the returned result from running the algorithm, (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« tools/findit/blame.py ('K') | « tools/findit/blame.py ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/findit/matchset.py
diff --git a/tools/findit/matchset.py b/tools/findit/matchset.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b5d6ee7277e3257140ea098ffe69934c40117ce
--- /dev/null
+++ b/tools/findit/matchset.py
@@ -0,0 +1,209 @@
+# Copyright (c) 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+import re
stgao 2014/08/04 20:44:57 Add a new line before this one.
jeun 2014/08/06 21:33:04 Done.
+from threading import Lock
+import findit_for_crash_utils as findit_utils
stgao 2014/08/04 20:44:57 seems not up-to-day.
jeun 2014/08/06 21:33:05 Done.
+
+LINE_CHANGE_PRIORITY = 1
+FILE_CHANGE_PRIORITY = 2
+pl = Lock()
stgao 2014/08/04 20:44:57 Please use full name.
jeun 2014/08/06 21:33:05 Done.
+
+
+class MatchSet(object):
+ """Represents a set of matches."""
+
+ def __init__(self, codereview_api_url):
+ self.codereview_api_url = codereview_api_url
+ self.matches = {}
+ self.cls_to_avoid = set()
+ self.matches_lock = Lock()
+
+ def GenerateMatchEntry(
stgao 2014/08/04 20:44:58 I think this function is closely-related to the al
jeun 2014/08/06 21:33:05 We can talk about this. I feel like this function
+ self, revisions, cl, file_path, file_name, function, component,
+ crashed_lines, stack_frame_index, file_action, repository_parser):
+ """Generates a match object.
+
+ A match is a CL that is suspected to have caused the crash. A match object
stgao 2014/08/04 20:44:57 It might be better to move the Match class up to t
jeun 2014/08/06 21:33:04 Done.
+ contains a line that the file crashed, a name of the function, the minimum
+ distance from the crashed line and the line this file changed, list of the
+ files that this CL changes, URLs of the files the CL changes, the author of
+ the CL, the component, list of the stack numbers,the priority of the CL,
+ review address and reviewers.
+
+ Args:
+ revisions: a dictionary mapping cl's number to the revision information,
+ as returned from ParseRevisions
stgao 2014/08/04 20:44:57 ParseRevisions is unclear here.
jeun 2014/08/06 21:33:04 Done.
+ cl: a SVN revision number or git hash
+ file_path: path of the file
+ file_name: a name of the file that this CL might be the culprit cl
+ function: function that caused an crash
+ component: a component the file is from
+ crashed_lines: a list of the lines in the file that caused the crash
+ stack_frame_index: a list of positions of this file within a stack
+ file_action: whether file is modified, added or deleted
+ repository_parser: a parser object to parse line diff
+ """
+ # Check if this CL should be avoided
+ self.matches_lock.acquire()
+ if cl in self.cls_to_avoid:
stgao 2014/08/04 20:44:57 avoid -> ignore?
jeun 2014/08/06 21:33:05 Done.
+ self.matches_lock.release()
+ return
+
+ # If this CL is not already identified as suspected, create a new entry.
+ if cl not in self.matches:
+ revision = revisions[cl]
+ match = Match(revision, component)
+ message = revisions[cl]['message']
+ match.ParseMessage(message, self.codereview_api_url)
stgao 2014/08/04 20:44:58 This function call will be time-consuming as it is
jeun 2014/08/06 21:33:04 Done.
+
+ # If this match is a revert, add to the set of CLs to be avoided
+ if match.isrevert:
+ self.cls_to_avoid.add(cl)
+
+ # If this match has info on what CL it is reverted from, add that CL
+ if match.revert_of:
+ self.cls_to_avoid.add(match.revert_of)
stgao 2014/08/04 20:44:58 What if the cl revert_of was already analyzed and
jeun 2014/08/06 21:33:05 I remove all reverts at the end again.
+
+ self.matches_lock.release()
+ return
+ self.matches[cl] = match
+
+ # Else, bring in the existing result
+ else:
+ match = self.matches[cl]
+
+ self.matches_lock.release()
stgao 2014/08/04 20:44:57 If code above raise an exception, the lock won't b
jeun 2014/08/06 21:33:04 Done.
+
+ # Parse line diff information for this file
+ (diff_url, changed_line_numbers, changed_line_contents) = (
+ repository_parser.ParseLineDiff(file_path, component, file_action, cl))
+
+ # Find the intersection between the lines that this file crashed on and
+ # the changed lines
+ (line_intersection, stack_frame_index_intersection) = findit_utils.Intersection(
stgao 2014/08/04 20:44:58 style: > 80 chars.
jeun 2014/08/06 21:33:05 Done.
+ crashed_lines, stack_frame_index, changed_line_numbers)
+
+ # Find the minimum distance between the changed lines and crashed lines
+ min_distance = findit_utils.FindMinLineDistance(crashed_lines,
+ changed_line_numbers)
+
+ # Check whether this CL changes the crashed lines or not
+ if line_intersection:
+ priority = LINE_CHANGE_PRIORITY
+ else:
+ priority = FILE_CHANGE_PRIORITY
+
+ # Add the parsed information to the object
+ with self.matches_lock:
+ match.line_of_crash.append(line_intersection)
+ match.file.append(file_name)
+
+ # Update the min distance only if it is less than the current one
+ if min_distance < match.min_distance:
+ match.min_distance = min_distance
+
+ # If this CL does not change the crashed line, all occurrence of this
+ # file in the stack has the same significance
+ if not stack_frame_index_intersection:
+ stack_frame_index_intersection = stack_frame_index
+ match.stack_frame_index.append(stack_frame_index_intersection)
+ match.file_url.append(diff_url)
+
+ # Only record the highest rank of this CL
+ if priority < match.rank:
+ match.rank = priority
+ match.priorities.append(priority)
+ match.function.append(function)
+
+ def RemoveReverts(self):
+ """Removes CLs that are revert."""
+ for cl in self.matches:
+ if cl in self.cls_to_avoid:
+ del self.matches[cl]
+
+
+class Match(object):
+ """Represents a match entry.
+
+ Attributes:
+ line_of_crash: list of lines that caused crash for this CL
+ function: list of functions that caused the crash
+ min_distance: minimum difference between the lines that CL changed and
+ lines that caused the crash
+ file: list of files that the CL changed
stgao 2014/08/04 20:44:58 files?
jeun 2014/08/06 21:33:05 Done.
+ file_url: list of URLs for the file
stgao 2014/08/04 20:44:57 file_urls?
jeun 2014/08/06 21:33:05 Done.
+ owner: owner of the CL
+ component: component that this CL belongs to
+ stack_frame_index: for files that caused crash, list of where in the
stgao 2014/08/04 20:44:57 The name doesn't indicate it is a list.
jeun 2014/08/06 21:33:04 Done.
+ stackframe they occur
+ rank: the highest priority among the files the CL changes
stgao 2014/08/04 20:44:57 What's the concept of priority here? Would you min
jeun 2014/08/06 21:33:05 Done.
+ priorities: for each files, whether it changes the crashed line or is a
+ simple file change
+ url: URL of the CL
+ review_address: a codereview address that reviews this CL
stgao 2014/08/04 20:44:57 review_url?
jeun 2014/08/06 21:33:05 Done.
+ reviewers: list of people that reviewed this CL
+ reason: reason why this CL is suspected
+ """
+ REVERT_PATTERN = re.compile(r'(revert\w*) r?(\d+)', re.I)
+
+ def __init__(self, revision, component):
+ self.isrevert = False
+ self.revert_of = None
+ self.line_of_crash = []
+ self.function = []
+ self.min_distance = float('inf')
+ self.file = []
+ self.file_url = []
+ self.owner = revision['author']
+ self.component = component
+ self.stack_frame_index = []
+ self.rank = float('inf')
+ self.priorities = []
+ self.url = revision['url']
+ self.review_address = 'N/A'
+ self.reviewers = ['N/A']
+ self.reason = None
+
+ def ParseMessage(self, message, codereview_api_url):
+ """Parses the message.
+
+ It checks the message to extract the code review website and list of
+ reviewers, and it also checks if the CL is a revert of another CL.
+
+ Args:
+ message: a message to parse
+ codereview_api_url: URL to retrieve codereview data from
+ """
+ for line in message.splitlines():
+ line = line.strip()
+
+ # Check if the line has the code review information
+ if line.startswith('Review URL: '):
+
+ # Get review number for the code review site from the line
+ parts = line.split('Review URL: ')
+ self.review_address = parts[1].strip()
+ review_number = self.review_address.split('/')[-1]
stgao 2014/08/04 20:44:57 Usually, it is called issue number.
jeun 2014/08/06 21:33:04 Done.
+
+ # Get JSON from the code review site, ignore the line if it fails
+ url = codereview_api_url % review_number
+ json_string = findit_utils.GetDataFromURL(url)
+ if not json_string:
+ print 'Failed to retrieve code review information from %s' % url
+ continue
+
+ # Load the JSON from the string, and get the list of reviewers
+ code_review = findit_utils.LoadJSON(json_string)
+ if code_review:
+ self.reviewers = code_review['reviewers']
+
+ # Check if this CL is a revert of other CL
+ if line.lower().startswith('revert'):
+ self.isrevert = True
+
+ # Check if the line says what CL this CL is a revert of
+ revert = self.REVERT_PATTERN.match(line)
+ if revert:
+ self.revert_of = revert.group(2)
+ return
« tools/findit/blame.py ('K') | « tools/findit/blame.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698