Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(144)

Unified Diff: tools/findit/matchset.py

Issue 421223003: [Findit] Plain objects to represent the returned result from running the algorithm, (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: addressed codereview Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« tools/findit/blame.py ('K') | « tools/findit/blame.py ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/findit/matchset.py
diff --git a/tools/findit/matchset.py b/tools/findit/matchset.py
new file mode 100644
index 0000000000000000000000000000000000000000..08f1e8bc192e5a70c20a85a60b8b2ef0b3aa233f
--- /dev/null
+++ b/tools/findit/matchset.py
@@ -0,0 +1,209 @@
+# Copyright (c) 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import logging
+import re
+from threading import Lock
+
+import crash_utils
+
+LINE_CHANGE_PRIORITY = 1
+FILE_CHANGE_PRIORITY = 2
+
+
+class Match(object):
+ """Represents a match entry.
+
+ A match is a CL that is suspected to have caused the crash. A match object
+ contains information about files it changes, their owners, etc.
+
+ Attributes:
+ line_of_crash: The list of lines that caused crash for this CL.
+ function: The list of functions that caused the crash.
+ min_distance: The minimum difference between the lines that CL changed and
+ lines that caused the crash.
+ files: The list of files that the CL changed.
+ file_urls: The list of URLs for the file.
+ owner: The owner of the CL.
+ component: The component that this CL belongs to.
+ stack_frame_indices: For files that caused crash, list of where in the
+ stackframe they occur.
+ rank: The highest priority among the files the CL changes.
+ priorities: For each files, whether it changes the crashed line
+ (priority = 1) or is a simple file change (priority = 2).
+ url: URL of the CL.
+ review_url: The codereview address that reviews this CL.
+ reviewers: The list of people that reviewed this CL.
+ reason: The reason why this CL is suspected.
+ """
+ REVERT_PATTERN = re.compile(r'(revert\w*) r?(\d+)', re.I)
+
+ def __init__(self, revision, component):
+ self.isrevert = False
+ self.revert_of = None
+ self.line_of_crash = []
+ self.function = []
+ self.min_distance = float('inf')
+ self.files = []
+ self.file_urls = []
+ self.owner = revision['author']
+ self.component = component
+ self.stack_frame_indices = []
+ self.rank = float('inf')
+ self.priorities = []
+ self.url = revision['url']
+ self.review_url = 'N/A'
+ self.reviewers = ['N/A']
+ self.reason = None
+
+ def ParseMessage(self, message, codereview_api_url):
+ """Parses the message.
+
+ It checks the message to extract the code review website and list of
+ reviewers, and it also checks if the CL is a revert of another CL.
+
+ Args:
+ message: The message to parse.
+ codereview_api_url: URL to retrieve codereview data from.
+ """
+ for line in message.splitlines():
+ line = line.strip()
+
+ # Check if the line has the code review information.
+ if line.startswith('Review URL: '):
+
+ # Get review number for the code review site from the line.
+ parts = line.split('Review URL: ')
+ self.review_url = parts[1].strip()
+ issue_number = self.review_url.split('/')[-1]
+
+ # Get JSON from the code review site, ignore the line if it fails.
+ url = codereview_api_url % issue_number
+ json_string = crash_utils.GetDataFromURL(url)
+ if not json_string:
+ logging.warning('Failed to retrieve code review information from %s',
+ url)
+ continue
+
+ # Load the JSON from the string, and get the list of reviewers.
+ code_review = crash_utils.LoadJSON(json_string)
+ if code_review:
+ self.reviewers = code_review['reviewers']
+
+ # Check if this CL is a revert of other CL.
+ if line.lower().startswith('revert'):
+ self.isrevert = True
+
+ # Check if the line says what CL this CL is a revert of.
+ revert = self.REVERT_PATTERN.match(line)
+ if revert:
+ self.revert_of = revert.group(2)
+ return
+
+
+class MatchSet(object):
+ """Represents a set of matches."""
+
+ def __init__(self, codereview_api_url):
+ self.codereview_api_url = codereview_api_url
+ self.matches = {}
+ self.cls_to_ignore = set()
+ self.matches_lock = Lock()
+
+ def GenerateMatchEntry(
+ self, revisions, cl, file_path, file_name, function, component,
+ crashed_lines, stack_frame_indices, file_action, repository_parser):
+ """Generates a match object.
+
+ Args:
+ revisions: The dictionary mapping cl's number to the revision information,
+ as returned from a function ParseRevisions in parser.
+ cl: The SVN revision number or git hash.
+ file_path: The path of the file.
+ file_name: The name of the file that this CL might be the culprit cl.
+ function: The function that caused an crash.
+ component: The component the file is from.
+ crashed_lines: The list of the lines in the file that caused the crash.
+ stack_frame_indices: The list of positions of this file within a stack.
+ file_action: Whether file is modified, added or deleted.
+ repository_parser: The parser object to parse line diff.
+ """
+ # Check if this CL should be avoided.
+ with self.matches_lock:
+ if cl in self.cls_to_ignore:
+ return
+
+ # If this CL is not already identified as suspected, create a new entry.
+ if cl not in self.matches:
+ revision = revisions[cl]
+ match = Match(revision, component)
+ message = revisions[cl]['message']
+ # TODO(jeun): Don't hold lock while issuing http request.
+ match.ParseMessage(message, self.codereview_api_url)
+
+ # If this match is a revert, add to the set of CLs to be avoided.
+ if match.isrevert:
stgao 2014/08/09 19:51:02 |isrevert| -> |is_reverted|?
+ self.cls_to_ignore.add(cl)
+
+ # If this match has info on what CL it is reverted from, add that CL.
+ if match.revert_of:
+ self.cls_to_ignore.add(match.revert_of)
+
+ # Return because we do not need to look at this CL anymore.
+ return
+
+ # Add this CL to the set of matches.
+ self.matches[cl] = match
+
+ # Else, bring in the existing result.
+ else:
+ match = self.matches[cl]
+
+ # Parse line diff information for this file.
+ (diff_url, changed_line_numbers, changed_line_contents) = (
+ repository_parser.ParseLineDiff(file_path, component, file_action, cl))
+
+ # Find the intersection between the lines that this file crashed on and
+ # the changed lines.
+ (line_intersection, stack_frame_index_intersection) = (
+ crash_utils.Intersection(
+ crashed_lines, stack_frame_indices, changed_line_numbers))
+
+ # Find the minimum distance between the changed lines and crashed lines.
+ min_distance = crash_utils.FindMinLineDistance(crashed_lines,
+ changed_line_numbers)
+
+ # Check whether this CL changes the crashed lines or not.
+ if line_intersection:
+ priority = LINE_CHANGE_PRIORITY
+ else:
+ priority = FILE_CHANGE_PRIORITY
+
+ # Add the parsed information to the object.
+ with self.matches_lock:
+ match.line_of_crash.append(line_intersection)
+ match.files.append(file_name)
+
+ # Update the min distance only if it is less than the current one.
+ if min_distance < match.min_distance:
+ match.min_distance = min_distance
+
+ # If this CL does not change the crashed line, all occurrence of this
+ # file in the stack has the same significance.
+ if not stack_frame_index_intersection:
+ stack_frame_index_intersection = stack_frame_indices
+ match.stack_frame_indices.append(stack_frame_index_intersection)
+ match.file_urls.append(diff_url)
+
+ # Only record the highest rank of this CL.
+ if priority < match.rank:
+ match.rank = priority
+ match.priorities.append(priority)
+ match.function.append(function)
+
+ def RemoveReverts(self):
+ """Removes CLs that are revert."""
+ for cl in self.matches:
+ if cl in self.cls_to_ignore:
+ del self.matches[cl]
« tools/findit/blame.py ('K') | « tools/findit/blame.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698