Index: tools/findit/matchset.py |
diff --git a/tools/findit/matchset.py b/tools/findit/matchset.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..08f1e8bc192e5a70c20a85a60b8b2ef0b3aa233f |
--- /dev/null |
+++ b/tools/findit/matchset.py |
@@ -0,0 +1,209 @@ |
+# Copyright (c) 2014 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+import logging |
+import re |
+from threading import Lock |
+ |
+import crash_utils |
+ |
+LINE_CHANGE_PRIORITY = 1 |
+FILE_CHANGE_PRIORITY = 2 |
+ |
+ |
+class Match(object): |
+ """Represents a match entry. |
+ |
+ A match is a CL that is suspected to have caused the crash. A match object |
+ contains information about files it changes, their owners, etc. |
+ |
+ Attributes: |
+ line_of_crash: The list of lines that caused crash for this CL. |
+ function: The list of functions that caused the crash. |
+ min_distance: The minimum difference between the lines that CL changed and |
+ lines that caused the crash. |
+ files: The list of files that the CL changed. |
+ file_urls: The list of URLs for the file. |
+ owner: The owner of the CL. |
+ component: The component that this CL belongs to. |
+ stack_frame_indices: For files that caused crash, list of where in the |
+ stackframe they occur. |
+ rank: The highest priority among the files the CL changes. |
+ priorities: For each files, whether it changes the crashed line |
+ (priority = 1) or is a simple file change (priority = 2). |
+ url: URL of the CL. |
+ review_url: The codereview address that reviews this CL. |
+ reviewers: The list of people that reviewed this CL. |
+ reason: The reason why this CL is suspected. |
+ """ |
+ REVERT_PATTERN = re.compile(r'(revert\w*) r?(\d+)', re.I) |
+ |
+ def __init__(self, revision, component): |
+ self.isrevert = False |
+ self.revert_of = None |
+ self.line_of_crash = [] |
+ self.function = [] |
+ self.min_distance = float('inf') |
+ self.files = [] |
+ self.file_urls = [] |
+ self.owner = revision['author'] |
+ self.component = component |
+ self.stack_frame_indices = [] |
+ self.rank = float('inf') |
+ self.priorities = [] |
+ self.url = revision['url'] |
+ self.review_url = 'N/A' |
+ self.reviewers = ['N/A'] |
+ self.reason = None |
+ |
+ def ParseMessage(self, message, codereview_api_url): |
+ """Parses the message. |
+ |
+ It checks the message to extract the code review website and list of |
+ reviewers, and it also checks if the CL is a revert of another CL. |
+ |
+ Args: |
+ message: The message to parse. |
+ codereview_api_url: URL to retrieve codereview data from. |
+ """ |
+ for line in message.splitlines(): |
+ line = line.strip() |
+ |
+ # Check if the line has the code review information. |
+ if line.startswith('Review URL: '): |
+ |
+ # Get review number for the code review site from the line. |
+ parts = line.split('Review URL: ') |
+ self.review_url = parts[1].strip() |
+ issue_number = self.review_url.split('/')[-1] |
+ |
+ # Get JSON from the code review site, ignore the line if it fails. |
+ url = codereview_api_url % issue_number |
+ json_string = crash_utils.GetDataFromURL(url) |
+ if not json_string: |
+ logging.warning('Failed to retrieve code review information from %s', |
+ url) |
+ continue |
+ |
+ # Load the JSON from the string, and get the list of reviewers. |
+ code_review = crash_utils.LoadJSON(json_string) |
+ if code_review: |
+ self.reviewers = code_review['reviewers'] |
+ |
+ # Check if this CL is a revert of other CL. |
+ if line.lower().startswith('revert'): |
+ self.isrevert = True |
+ |
+ # Check if the line says what CL this CL is a revert of. |
+ revert = self.REVERT_PATTERN.match(line) |
+ if revert: |
+ self.revert_of = revert.group(2) |
+ return |
+ |
+ |
+class MatchSet(object): |
+ """Represents a set of matches.""" |
+ |
+ def __init__(self, codereview_api_url): |
+ self.codereview_api_url = codereview_api_url |
+ self.matches = {} |
+ self.cls_to_ignore = set() |
+ self.matches_lock = Lock() |
+ |
+ def GenerateMatchEntry( |
+ self, revisions, cl, file_path, file_name, function, component, |
+ crashed_lines, stack_frame_indices, file_action, repository_parser): |
+ """Generates a match object. |
+ |
+ Args: |
+ revisions: The dictionary mapping cl's number to the revision information, |
+ as returned from a function ParseRevisions in parser. |
+ cl: The SVN revision number or git hash. |
+ file_path: The path of the file. |
+ file_name: The name of the file that this CL might be the culprit cl. |
+ function: The function that caused an crash. |
+ component: The component the file is from. |
+ crashed_lines: The list of the lines in the file that caused the crash. |
+ stack_frame_indices: The list of positions of this file within a stack. |
+ file_action: Whether file is modified, added or deleted. |
+ repository_parser: The parser object to parse line diff. |
+ """ |
+ # Check if this CL should be avoided. |
+ with self.matches_lock: |
+ if cl in self.cls_to_ignore: |
+ return |
+ |
+ # If this CL is not already identified as suspected, create a new entry. |
+ if cl not in self.matches: |
+ revision = revisions[cl] |
+ match = Match(revision, component) |
+ message = revisions[cl]['message'] |
+ # TODO(jeun): Don't hold lock while issuing http request. |
+ match.ParseMessage(message, self.codereview_api_url) |
+ |
+ # If this match is a revert, add to the set of CLs to be avoided. |
+ if match.isrevert: |
stgao
2014/08/09 19:51:02
|isrevert| -> |is_reverted|?
|
+ self.cls_to_ignore.add(cl) |
+ |
+ # If this match has info on what CL it is reverted from, add that CL. |
+ if match.revert_of: |
+ self.cls_to_ignore.add(match.revert_of) |
+ |
+ # Return because we do not need to look at this CL anymore. |
+ return |
+ |
+ # Add this CL to the set of matches. |
+ self.matches[cl] = match |
+ |
+ # Else, bring in the existing result. |
+ else: |
+ match = self.matches[cl] |
+ |
+ # Parse line diff information for this file. |
+ (diff_url, changed_line_numbers, changed_line_contents) = ( |
+ repository_parser.ParseLineDiff(file_path, component, file_action, cl)) |
+ |
+ # Find the intersection between the lines that this file crashed on and |
+ # the changed lines. |
+ (line_intersection, stack_frame_index_intersection) = ( |
+ crash_utils.Intersection( |
+ crashed_lines, stack_frame_indices, changed_line_numbers)) |
+ |
+ # Find the minimum distance between the changed lines and crashed lines. |
+ min_distance = crash_utils.FindMinLineDistance(crashed_lines, |
+ changed_line_numbers) |
+ |
+ # Check whether this CL changes the crashed lines or not. |
+ if line_intersection: |
+ priority = LINE_CHANGE_PRIORITY |
+ else: |
+ priority = FILE_CHANGE_PRIORITY |
+ |
+ # Add the parsed information to the object. |
+ with self.matches_lock: |
+ match.line_of_crash.append(line_intersection) |
+ match.files.append(file_name) |
+ |
+ # Update the min distance only if it is less than the current one. |
+ if min_distance < match.min_distance: |
+ match.min_distance = min_distance |
+ |
+ # If this CL does not change the crashed line, all occurrence of this |
+ # file in the stack has the same significance. |
+ if not stack_frame_index_intersection: |
+ stack_frame_index_intersection = stack_frame_indices |
+ match.stack_frame_indices.append(stack_frame_index_intersection) |
+ match.file_urls.append(diff_url) |
+ |
+ # Only record the highest rank of this CL. |
+ if priority < match.rank: |
+ match.rank = priority |
+ match.priorities.append(priority) |
+ match.function.append(function) |
+ |
+ def RemoveReverts(self): |
+ """Removes CLs that are revert.""" |
+ for cl in self.matches: |
+ if cl in self.cls_to_ignore: |
+ del self.matches[cl] |