Chromium Code Reviews| Index: tools/findit/matchset.py |
| diff --git a/tools/findit/matchset.py b/tools/findit/matchset.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..8b5d6ee7277e3257140ea098ffe69934c40117ce |
| --- /dev/null |
| +++ b/tools/findit/matchset.py |
| @@ -0,0 +1,209 @@ |
| +# Copyright (c) 2014 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| +import re |
|
stgao
2014/08/04 20:44:57
Add a new line before this one.
jeun
2014/08/06 21:33:04
Done.
|
| +from threading import Lock |
| +import findit_for_crash_utils as findit_utils |
|
stgao
2014/08/04 20:44:57
seems not up-to-day.
jeun
2014/08/06 21:33:05
Done.
|
| + |
| +LINE_CHANGE_PRIORITY = 1 |
| +FILE_CHANGE_PRIORITY = 2 |
| +pl = Lock() |
|
stgao
2014/08/04 20:44:57
Please use full name.
jeun
2014/08/06 21:33:05
Done.
|
| + |
| + |
| +class MatchSet(object): |
| + """Represents a set of matches.""" |
| + |
| + def __init__(self, codereview_api_url): |
| + self.codereview_api_url = codereview_api_url |
| + self.matches = {} |
| + self.cls_to_avoid = set() |
| + self.matches_lock = Lock() |
| + |
| + def GenerateMatchEntry( |
|
stgao
2014/08/04 20:44:58
I think this function is closely-related to the al
jeun
2014/08/06 21:33:05
We can talk about this. I feel like this function
|
| + self, revisions, cl, file_path, file_name, function, component, |
| + crashed_lines, stack_frame_index, file_action, repository_parser): |
| + """Generates a match object. |
| + |
| + A match is a CL that is suspected to have caused the crash. A match object |
|
stgao
2014/08/04 20:44:57
It might be better to move the Match class up to t
jeun
2014/08/06 21:33:04
Done.
|
| + contains a line that the file crashed, a name of the function, the minimum |
| + distance from the crashed line and the line this file changed, list of the |
| + files that this CL changes, URLs of the files the CL changes, the author of |
| + the CL, the component, list of the stack numbers,the priority of the CL, |
| + review address and reviewers. |
| + |
| + Args: |
| + revisions: a dictionary mapping cl's number to the revision information, |
| + as returned from ParseRevisions |
|
stgao
2014/08/04 20:44:57
ParseRevisions is unclear here.
jeun
2014/08/06 21:33:04
Done.
|
| + cl: a SVN revision number or git hash |
| + file_path: path of the file |
| + file_name: a name of the file that this CL might be the culprit cl |
| + function: function that caused an crash |
| + component: a component the file is from |
| + crashed_lines: a list of the lines in the file that caused the crash |
| + stack_frame_index: a list of positions of this file within a stack |
| + file_action: whether file is modified, added or deleted |
| + repository_parser: a parser object to parse line diff |
| + """ |
| + # Check if this CL should be avoided |
| + self.matches_lock.acquire() |
| + if cl in self.cls_to_avoid: |
|
stgao
2014/08/04 20:44:57
avoid -> ignore?
jeun
2014/08/06 21:33:05
Done.
|
| + self.matches_lock.release() |
| + return |
| + |
| + # If this CL is not already identified as suspected, create a new entry. |
| + if cl not in self.matches: |
| + revision = revisions[cl] |
| + match = Match(revision, component) |
| + message = revisions[cl]['message'] |
| + match.ParseMessage(message, self.codereview_api_url) |
|
stgao
2014/08/04 20:44:58
This function call will be time-consuming as it is
jeun
2014/08/06 21:33:04
Done.
|
| + |
| + # If this match is a revert, add to the set of CLs to be avoided |
| + if match.isrevert: |
| + self.cls_to_avoid.add(cl) |
| + |
| + # If this match has info on what CL it is reverted from, add that CL |
| + if match.revert_of: |
| + self.cls_to_avoid.add(match.revert_of) |
|
stgao
2014/08/04 20:44:58
What if the cl revert_of was already analyzed and
jeun
2014/08/06 21:33:05
I remove all reverts at the end again.
|
| + |
| + self.matches_lock.release() |
| + return |
| + self.matches[cl] = match |
| + |
| + # Else, bring in the existing result |
| + else: |
| + match = self.matches[cl] |
| + |
| + self.matches_lock.release() |
|
stgao
2014/08/04 20:44:57
If code above raise an exception, the lock won't b
jeun
2014/08/06 21:33:04
Done.
|
| + |
| + # Parse line diff information for this file |
| + (diff_url, changed_line_numbers, changed_line_contents) = ( |
| + repository_parser.ParseLineDiff(file_path, component, file_action, cl)) |
| + |
| + # Find the intersection between the lines that this file crashed on and |
| + # the changed lines |
| + (line_intersection, stack_frame_index_intersection) = findit_utils.Intersection( |
|
stgao
2014/08/04 20:44:58
style: > 80 chars.
jeun
2014/08/06 21:33:05
Done.
|
| + crashed_lines, stack_frame_index, changed_line_numbers) |
| + |
| + # Find the minimum distance between the changed lines and crashed lines |
| + min_distance = findit_utils.FindMinLineDistance(crashed_lines, |
| + changed_line_numbers) |
| + |
| + # Check whether this CL changes the crashed lines or not |
| + if line_intersection: |
| + priority = LINE_CHANGE_PRIORITY |
| + else: |
| + priority = FILE_CHANGE_PRIORITY |
| + |
| + # Add the parsed information to the object |
| + with self.matches_lock: |
| + match.line_of_crash.append(line_intersection) |
| + match.file.append(file_name) |
| + |
| + # Update the min distance only if it is less than the current one |
| + if min_distance < match.min_distance: |
| + match.min_distance = min_distance |
| + |
| + # If this CL does not change the crashed line, all occurrence of this |
| + # file in the stack has the same significance |
| + if not stack_frame_index_intersection: |
| + stack_frame_index_intersection = stack_frame_index |
| + match.stack_frame_index.append(stack_frame_index_intersection) |
| + match.file_url.append(diff_url) |
| + |
| + # Only record the highest rank of this CL |
| + if priority < match.rank: |
| + match.rank = priority |
| + match.priorities.append(priority) |
| + match.function.append(function) |
| + |
| + def RemoveReverts(self): |
| + """Removes CLs that are revert.""" |
| + for cl in self.matches: |
| + if cl in self.cls_to_avoid: |
| + del self.matches[cl] |
| + |
| + |
| +class Match(object): |
| + """Represents a match entry. |
| + |
| + Attributes: |
| + line_of_crash: list of lines that caused crash for this CL |
| + function: list of functions that caused the crash |
| + min_distance: minimum difference between the lines that CL changed and |
| + lines that caused the crash |
| + file: list of files that the CL changed |
|
stgao
2014/08/04 20:44:58
files?
jeun
2014/08/06 21:33:05
Done.
|
| + file_url: list of URLs for the file |
|
stgao
2014/08/04 20:44:57
file_urls?
jeun
2014/08/06 21:33:05
Done.
|
| + owner: owner of the CL |
| + component: component that this CL belongs to |
| + stack_frame_index: for files that caused crash, list of where in the |
|
stgao
2014/08/04 20:44:57
The name doesn't indicate it is a list.
jeun
2014/08/06 21:33:04
Done.
|
| + stackframe they occur |
| + rank: the highest priority among the files the CL changes |
|
stgao
2014/08/04 20:44:57
What's the concept of priority here?
Would you min
jeun
2014/08/06 21:33:05
Done.
|
| + priorities: for each files, whether it changes the crashed line or is a |
| + simple file change |
| + url: URL of the CL |
| + review_address: a codereview address that reviews this CL |
|
stgao
2014/08/04 20:44:57
review_url?
jeun
2014/08/06 21:33:05
Done.
|
| + reviewers: list of people that reviewed this CL |
| + reason: reason why this CL is suspected |
| + """ |
| + REVERT_PATTERN = re.compile(r'(revert\w*) r?(\d+)', re.I) |
| + |
| + def __init__(self, revision, component): |
| + self.isrevert = False |
| + self.revert_of = None |
| + self.line_of_crash = [] |
| + self.function = [] |
| + self.min_distance = float('inf') |
| + self.file = [] |
| + self.file_url = [] |
| + self.owner = revision['author'] |
| + self.component = component |
| + self.stack_frame_index = [] |
| + self.rank = float('inf') |
| + self.priorities = [] |
| + self.url = revision['url'] |
| + self.review_address = 'N/A' |
| + self.reviewers = ['N/A'] |
| + self.reason = None |
| + |
| + def ParseMessage(self, message, codereview_api_url): |
| + """Parses the message. |
| + |
| + It checks the message to extract the code review website and list of |
| + reviewers, and it also checks if the CL is a revert of another CL. |
| + |
| + Args: |
| + message: a message to parse |
| + codereview_api_url: URL to retrieve codereview data from |
| + """ |
| + for line in message.splitlines(): |
| + line = line.strip() |
| + |
| + # Check if the line has the code review information |
| + if line.startswith('Review URL: '): |
| + |
| + # Get review number for the code review site from the line |
| + parts = line.split('Review URL: ') |
| + self.review_address = parts[1].strip() |
| + review_number = self.review_address.split('/')[-1] |
|
stgao
2014/08/04 20:44:57
Usually, it is called issue number.
jeun
2014/08/06 21:33:04
Done.
|
| + |
| + # Get JSON from the code review site, ignore the line if it fails |
| + url = codereview_api_url % review_number |
| + json_string = findit_utils.GetDataFromURL(url) |
| + if not json_string: |
| + print 'Failed to retrieve code review information from %s' % url |
| + continue |
| + |
| + # Load the JSON from the string, and get the list of reviewers |
| + code_review = findit_utils.LoadJSON(json_string) |
| + if code_review: |
| + self.reviewers = code_review['reviewers'] |
| + |
| + # Check if this CL is a revert of other CL |
| + if line.lower().startswith('revert'): |
| + self.isrevert = True |
| + |
| + # Check if the line says what CL this CL is a revert of |
| + revert = self.REVERT_PATTERN.match(line) |
| + if revert: |
| + self.revert_of = revert.group(2) |
| + return |