Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. | |
|
stgao
2014/08/12 19:12:18
Rename file to "match_set.py"?
jeun
2014/08/12 20:21:05
Done.
| |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 import logging | |
| 6 import re | |
| 7 from threading import Lock | |
| 8 | |
| 9 import crash_utils | |
| 10 | |
| 11 LINE_CHANGE_PRIORITY = 1 | |
| 12 FILE_CHANGE_PRIORITY = 2 | |
| 13 | |
| 14 | |
| 15 class Match(object): | |
| 16 """Represents a match entry. | |
| 17 | |
| 18 A match is a CL that is suspected to have caused the crash. A match object | |
| 19 contains information about files it changes, their owners, etc. | |
| 20 | |
| 21 Attributes: | |
| 22 line_of_crash: The list of lines that caused crash for this CL. | |
| 23 function: The list of functions that caused the crash. | |
| 24 min_distance: The minimum difference between the lines that CL changed and | |
| 25 lines that caused the crash. | |
| 26 files: The list of files that the CL changed. | |
| 27 file_urls: The list of URLs for the file. | |
| 28 owner: The owner of the CL. | |
| 29 component_name: The name of the component that this CL belongs to. | |
| 30 stack_frame_indices: For files that caused crash, list of where in the | |
| 31 stackframe they occur. | |
| 32 rank: The highest priority among the files the CL changes. | |
| 33 priorities: For each files, whether it changes the crashed line | |
| 34 (priority = 1) or is a simple file change (priority = 2). | |
| 35 url: URL of the CL. | |
| 36 review_url: The codereview address that reviews this CL. | |
| 37 reviewers: The list of people that reviewed this CL. | |
| 38 reason: The reason why this CL is suspected. | |
| 39 """ | |
| 40 REVERT_PATTERN = re.compile(r'(revert\w*) r?(\d+)', re.I) | |
| 41 | |
| 42 def __init__(self, revision, component_name): | |
| 43 self.is_reverted = False | |
| 44 self.revert_of = None | |
| 45 self.line_of_crash = [] | |
| 46 self.function = [] | |
| 47 self.min_distance = crash_utils.INFINITY | |
| 48 self.files = [] | |
| 49 self.file_urls = [] | |
| 50 self.owner = revision['author'] | |
| 51 self.component_name = component_name | |
| 52 self.stack_frame_indices = [] | |
| 53 self.rank = crash_utils.INFINITY | |
| 54 self.priorities = [] | |
| 55 self.url = revision['url'] | |
| 56 self.review_url = 'N/A' | |
| 57 self.reviewers = ['N/A'] | |
| 58 self.reason = None | |
| 59 | |
| 60 def ParseMessage(self, message, codereview_api_url): | |
| 61 """Parses the message. | |
| 62 | |
| 63 It checks the message to extract the code review website and list of | |
| 64 reviewers, and it also checks if the CL is a revert of another CL. | |
| 65 | |
| 66 Args: | |
| 67 message: The message to parse. | |
| 68 codereview_api_url: URL to retrieve codereview data from. | |
| 69 """ | |
| 70 for line in message.splitlines(): | |
| 71 line = line.strip() | |
| 72 | |
| 73 # Check if the line has the code review information. | |
| 74 if line.startswith('Review URL: '): | |
| 75 | |
| 76 # Get review number for the code review site from the line. | |
| 77 parts = line.split('Review URL: ') | |
| 78 self.review_url = parts[1].strip() | |
| 79 issue_number = self.review_url.split('/')[-1] | |
| 80 | |
| 81 # Get JSON from the code review site, ignore the line if it fails. | |
| 82 url = codereview_api_url % issue_number | |
| 83 json_string = crash_utils.GetDataFromURL(url) | |
| 84 if not json_string: | |
| 85 logging.warning('Failed to retrieve code review information from %s', | |
| 86 url) | |
| 87 continue | |
| 88 | |
| 89 # Load the JSON from the string, and get the list of reviewers. | |
| 90 code_review = crash_utils.LoadJSON(json_string) | |
| 91 if code_review: | |
| 92 self.reviewers = code_review['reviewers'] | |
| 93 | |
| 94 # Check if this CL is a revert of other CL. | |
| 95 if line.lower().startswith('revert'): | |
| 96 self.is_reverted = True | |
| 97 | |
| 98 # Check if the line says what CL this CL is a revert of. | |
| 99 revert = self.REVERT_PATTERN.match(line) | |
| 100 if revert: | |
| 101 self.revert_of = revert.group(2) | |
| 102 return | |
| 103 | |
| 104 | |
| 105 class MatchSet(object): | |
| 106 """Represents a set of matches.""" | |
| 107 | |
| 108 def __init__(self, codereview_api_url): | |
| 109 self.codereview_api_url = codereview_api_url | |
| 110 self.matches = {} | |
| 111 self.cls_to_ignore = set() | |
| 112 self.matches_lock = Lock() | |
| 113 | |
| 114 def GenerateMatchEntry( | |
| 115 self, revisions, cl, file_path, file_name, function, component_path, | |
| 116 component_name, crashed_lines, stack_frame_indices, file_action, | |
| 117 repository_parser): | |
| 118 """Generates a match object. | |
| 119 | |
| 120 Args: | |
| 121 revisions: The dictionary mapping cl's number to the revision information, | |
| 122 as returned from a function ParseChangelog in parser. | |
| 123 cl: The SVN revision number or git hash. | |
| 124 file_path: The path of the file. | |
| 125 file_name: The name of the file that this CL might be the culprit cl. | |
| 126 function: The function that caused an crash. | |
| 127 component_path: The path of the component this file is from. | |
| 128 component_name: The name of the component the file is from. | |
| 129 crashed_lines: The list of the lines in the file that caused the crash. | |
| 130 stack_frame_indices: The list of positions of this file within a stack. | |
| 131 file_action: Whether file is modified, added or deleted. | |
| 132 repository_parser: The parser object to parse line diff. | |
| 133 """ | |
| 134 # Check if this CL should be avoided. | |
| 135 with self.matches_lock: | |
| 136 if cl in self.cls_to_ignore: | |
| 137 return | |
| 138 | |
| 139 # If this CL is not already identified as suspected, create a new entry. | |
| 140 if cl not in self.matches: | |
| 141 revision = revisions[cl] | |
| 142 match = Match(revision, component_name) | |
| 143 message = revisions[cl]['message'] | |
| 144 # TODO(jeun): Don't hold lock while issuing http request. | |
| 145 match.ParseMessage(message, self.codereview_api_url) | |
| 146 | |
| 147 # If this match is a revert, add to the set of CLs to be avoided. | |
| 148 if match.is_reverted: | |
| 149 self.cls_to_ignore.add(cl) | |
| 150 | |
| 151 # If this match has info on what CL it is reverted from, add that CL. | |
| 152 if match.revert_of: | |
| 153 self.cls_to_ignore.add(match.revert_of) | |
| 154 | |
| 155 # Return because we do not need to look at this CL anymore. | |
| 156 return | |
| 157 | |
| 158 # Add this CL to the set of matches. | |
| 159 self.matches[cl] = match | |
| 160 | |
| 161 # Else, bring in the existing result. | |
| 162 else: | |
| 163 match = self.matches[cl] | |
| 164 | |
| 165 # Parse line diff information for this file. | |
| 166 (diff_url, changed_line_numbers, changed_line_contents) = ( | |
| 167 repository_parser.ParseLineDiff( | |
| 168 file_path, component_path, file_action, cl)) | |
| 169 | |
| 170 if not diff_url: | |
| 171 return | |
| 172 | |
| 173 # Find the intersection between the lines that this file crashed on and | |
| 174 # the changed lines. | |
| 175 (line_intersection, stack_frame_index_intersection) = ( | |
| 176 crash_utils.Intersection( | |
| 177 crashed_lines, stack_frame_indices, changed_line_numbers)) | |
| 178 | |
| 179 # Find the minimum distance between the changed lines and crashed lines. | |
| 180 min_distance = crash_utils.FindMinLineDistance(crashed_lines, | |
| 181 changed_line_numbers) | |
| 182 | |
| 183 # Check whether this CL changes the crashed lines or not. | |
| 184 if line_intersection: | |
| 185 priority = LINE_CHANGE_PRIORITY | |
| 186 else: | |
| 187 priority = FILE_CHANGE_PRIORITY | |
| 188 | |
| 189 # Add the parsed information to the object. | |
| 190 with self.matches_lock: | |
| 191 match.line_of_crash.append(line_intersection) | |
| 192 match.files.append(file_name) | |
| 193 | |
| 194 # Update the min distance only if it is less than the current one. | |
| 195 if min_distance < match.min_distance: | |
| 196 match.min_distance = min_distance | |
| 197 | |
| 198 # If this CL does not change the crashed line, all occurrence of this | |
| 199 # file in the stack has the same significance. | |
| 200 if not stack_frame_index_intersection: | |
| 201 stack_frame_index_intersection = stack_frame_indices | |
| 202 match.stack_frame_indices.append(stack_frame_index_intersection) | |
| 203 match.file_urls.append(diff_url) | |
| 204 | |
| 205 # Only record the highest rank of this CL. | |
| 206 if priority < match.rank: | |
| 207 match.rank = priority | |
| 208 match.priorities.append(priority) | |
| 209 match.function.append(function) | |
| 210 | |
| 211 def RemoveReverts(self): | |
| 212 """Removes CLs that are revert.""" | |
| 213 for cl in self.matches: | |
| 214 if cl in self.cls_to_ignore: | |
| 215 del self.matches[cl] | |
| OLD | NEW |