Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(316)

Side by Side Diff: tools/findit/matchset.py

Issue 421223003: [Findit] Plain objects to represent the returned result from running the algorithm, (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: addressed code review / added git support. Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« tools/findit/blame.py ('K') | « tools/findit/blame.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
stgao 2014/08/12 19:12:18 Rename file to "match_set.py"?
jeun 2014/08/12 20:21:05 Done.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 import logging
6 import re
7 from threading import Lock
8
9 import crash_utils
10
11 LINE_CHANGE_PRIORITY = 1
12 FILE_CHANGE_PRIORITY = 2
13
14
15 class Match(object):
16 """Represents a match entry.
17
18 A match is a CL that is suspected to have caused the crash. A match object
19 contains information about files it changes, their owners, etc.
20
21 Attributes:
22 line_of_crash: The list of lines that caused crash for this CL.
23 function: The list of functions that caused the crash.
24 min_distance: The minimum difference between the lines that CL changed and
25 lines that caused the crash.
26 files: The list of files that the CL changed.
27 file_urls: The list of URLs for the file.
28 owner: The owner of the CL.
29 component_name: The name of the component that this CL belongs to.
30 stack_frame_indices: For files that caused crash, list of where in the
31 stackframe they occur.
32 rank: The highest priority among the files the CL changes.
33 priorities: For each files, whether it changes the crashed line
34 (priority = 1) or is a simple file change (priority = 2).
35 url: URL of the CL.
36 review_url: The codereview address that reviews this CL.
37 reviewers: The list of people that reviewed this CL.
38 reason: The reason why this CL is suspected.
39 """
40 REVERT_PATTERN = re.compile(r'(revert\w*) r?(\d+)', re.I)
41
42 def __init__(self, revision, component_name):
43 self.is_reverted = False
44 self.revert_of = None
45 self.line_of_crash = []
46 self.function = []
47 self.min_distance = crash_utils.INFINITY
48 self.files = []
49 self.file_urls = []
50 self.owner = revision['author']
51 self.component_name = component_name
52 self.stack_frame_indices = []
53 self.rank = crash_utils.INFINITY
54 self.priorities = []
55 self.url = revision['url']
56 self.review_url = 'N/A'
57 self.reviewers = ['N/A']
58 self.reason = None
59
60 def ParseMessage(self, message, codereview_api_url):
61 """Parses the message.
62
63 It checks the message to extract the code review website and list of
64 reviewers, and it also checks if the CL is a revert of another CL.
65
66 Args:
67 message: The message to parse.
68 codereview_api_url: URL to retrieve codereview data from.
69 """
70 for line in message.splitlines():
71 line = line.strip()
72
73 # Check if the line has the code review information.
74 if line.startswith('Review URL: '):
75
76 # Get review number for the code review site from the line.
77 parts = line.split('Review URL: ')
78 self.review_url = parts[1].strip()
79 issue_number = self.review_url.split('/')[-1]
80
81 # Get JSON from the code review site, ignore the line if it fails.
82 url = codereview_api_url % issue_number
83 json_string = crash_utils.GetDataFromURL(url)
84 if not json_string:
85 logging.warning('Failed to retrieve code review information from %s',
86 url)
87 continue
88
89 # Load the JSON from the string, and get the list of reviewers.
90 code_review = crash_utils.LoadJSON(json_string)
91 if code_review:
92 self.reviewers = code_review['reviewers']
93
94 # Check if this CL is a revert of other CL.
95 if line.lower().startswith('revert'):
96 self.is_reverted = True
97
98 # Check if the line says what CL this CL is a revert of.
99 revert = self.REVERT_PATTERN.match(line)
100 if revert:
101 self.revert_of = revert.group(2)
102 return
103
104
105 class MatchSet(object):
106 """Represents a set of matches."""
107
108 def __init__(self, codereview_api_url):
109 self.codereview_api_url = codereview_api_url
110 self.matches = {}
111 self.cls_to_ignore = set()
112 self.matches_lock = Lock()
113
114 def GenerateMatchEntry(
115 self, revisions, cl, file_path, file_name, function, component_path,
116 component_name, crashed_lines, stack_frame_indices, file_action,
117 repository_parser):
118 """Generates a match object.
119
120 Args:
121 revisions: The dictionary mapping cl's number to the revision information,
122 as returned from a function ParseChangelog in parser.
123 cl: The SVN revision number or git hash.
124 file_path: The path of the file.
125 file_name: The name of the file that this CL might be the culprit cl.
126 function: The function that caused an crash.
127 component_path: The path of the component this file is from.
128 component_name: The name of the component the file is from.
129 crashed_lines: The list of the lines in the file that caused the crash.
130 stack_frame_indices: The list of positions of this file within a stack.
131 file_action: Whether file is modified, added or deleted.
132 repository_parser: The parser object to parse line diff.
133 """
134 # Check if this CL should be avoided.
135 with self.matches_lock:
136 if cl in self.cls_to_ignore:
137 return
138
139 # If this CL is not already identified as suspected, create a new entry.
140 if cl not in self.matches:
141 revision = revisions[cl]
142 match = Match(revision, component_name)
143 message = revisions[cl]['message']
144 # TODO(jeun): Don't hold lock while issuing http request.
145 match.ParseMessage(message, self.codereview_api_url)
146
147 # If this match is a revert, add to the set of CLs to be avoided.
148 if match.is_reverted:
149 self.cls_to_ignore.add(cl)
150
151 # If this match has info on what CL it is reverted from, add that CL.
152 if match.revert_of:
153 self.cls_to_ignore.add(match.revert_of)
154
155 # Return because we do not need to look at this CL anymore.
156 return
157
158 # Add this CL to the set of matches.
159 self.matches[cl] = match
160
161 # Else, bring in the existing result.
162 else:
163 match = self.matches[cl]
164
165 # Parse line diff information for this file.
166 (diff_url, changed_line_numbers, changed_line_contents) = (
167 repository_parser.ParseLineDiff(
168 file_path, component_path, file_action, cl))
169
170 if not diff_url:
171 return
172
173 # Find the intersection between the lines that this file crashed on and
174 # the changed lines.
175 (line_intersection, stack_frame_index_intersection) = (
176 crash_utils.Intersection(
177 crashed_lines, stack_frame_indices, changed_line_numbers))
178
179 # Find the minimum distance between the changed lines and crashed lines.
180 min_distance = crash_utils.FindMinLineDistance(crashed_lines,
181 changed_line_numbers)
182
183 # Check whether this CL changes the crashed lines or not.
184 if line_intersection:
185 priority = LINE_CHANGE_PRIORITY
186 else:
187 priority = FILE_CHANGE_PRIORITY
188
189 # Add the parsed information to the object.
190 with self.matches_lock:
191 match.line_of_crash.append(line_intersection)
192 match.files.append(file_name)
193
194 # Update the min distance only if it is less than the current one.
195 if min_distance < match.min_distance:
196 match.min_distance = min_distance
197
198 # If this CL does not change the crashed line, all occurrence of this
199 # file in the stack has the same significance.
200 if not stack_frame_index_intersection:
201 stack_frame_index_intersection = stack_frame_indices
202 match.stack_frame_indices.append(stack_frame_index_intersection)
203 match.file_urls.append(diff_url)
204
205 # Only record the highest rank of this CL.
206 if priority < match.rank:
207 match.rank = priority
208 match.priorities.append(priority)
209 match.function.append(function)
210
211 def RemoveReverts(self):
212 """Removes CLs that are revert."""
213 for cl in self.matches:
214 if cl in self.cls_to_ignore:
215 del self.matches[cl]
OLDNEW
« tools/findit/blame.py ('K') | « tools/findit/blame.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698