Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(524)

Side by Side Diff: tools/findit/git_repository_parser.py

Issue 478763003: [Findit] Bug fixing and implemented some feature requests. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Addressed codereview and removed all references to logging Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2014 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import base64 5 import base64
6 import logging
7 import os
8 import xml.dom.minidom as minidom 6 import xml.dom.minidom as minidom
9 from xml.parsers.expat import ExpatError 7 from xml.parsers.expat import ExpatError
10 8
11 import crash_utils 9 import crash_utils
12 from repository_parser_interface import ParserInterface 10 from repository_parser_interface import ParserInterface
13 11
12 FILE_CHANGE_TYPE_MAP = {
13 'add': 'A',
14 'delete': 'D',
15 'modify': 'M'
16 }
17
14 18
15 class GitParser(ParserInterface): 19 class GitParser(ParserInterface):
16 """Parser for Git repository in googlesource. 20 """Parser for Git repository in googlesource.
17 21
18 Attributes: 22 Attributes:
19 parsed_deps: A map from component path to its repository name, regression, 23 parsed_deps: A map from component path to its repository name, regression,
20 etc. 24 etc.
21 url_parts_map: A map from url type to its url parts. This parts are added 25 url_parts_map: A map from url type to its url parts. This parts are added
22 the base url to form different urls. 26 the base url to form different urls.
23 """ 27 """
24 28
25 def __init__(self, parsed_deps, url_parts_map): 29 def __init__(self, parsed_deps, url_parts_map):
26 self.component_to_url_map = parsed_deps 30 self.component_to_url_map = parsed_deps
27 self.url_parts_map = url_parts_map 31 self.url_parts_map = url_parts_map
28 32
29 def ParseChangelog(self, component_path, range_start, range_end): 33 def ParseChangelog(self, component_path, range_start, range_end):
30 file_to_revision_map = {} 34 file_to_revision_map = {}
31 revision_map = {} 35 revision_map = {}
32 base_url = self.component_to_url_map[component_path]['repository'] 36 base_url = self.component_to_url_map[component_path]['repository']
33 changelog_url = base_url + self.url_parts_map['changelog_url'] 37 changelog_url = base_url + self.url_parts_map['changelog_url']
34 revision_url = base_url + self.url_parts_map['revision_url'] 38 revision_url = base_url + self.url_parts_map['revision_url']
35 39
36 # Retrieve data from the url, return empty maps if fails. Html url is a\ 40 # Retrieve data from the url, return empty maps if fails. Html url is a\
37 # url where the changelog can be parsed from html. 41 # url where the changelog can be parsed from html.
38 url = changelog_url % (range_start, range_end) 42 url = changelog_url % (range_start, range_end)
39 html_url = url + '?pretty=fuller' 43 html_url = url + '?pretty=fuller'
40 response = crash_utils.GetDataFromURL(html_url) 44 response = crash_utils.GetDataFromURL(html_url)
41 if not response: 45 if not response:
42 logging.error('Failed to retrieve changelog from %s', html_url)
43 return (revision_map, file_to_revision_map) 46 return (revision_map, file_to_revision_map)
44 47
45 # Parse xml out of the returned string. If it failes, return empty map. 48 # Parse xml out of the returned string. If it failes, Try parsing
49 # from JSON objects.
46 try: 50 try:
47 dom = minidom.parseString(response) 51 dom = minidom.parseString(response)
48 except ExpatError: 52 except ExpatError:
49 logging.error('Failed to parse changelog from %s', url) 53 self.ParseChangelogFromJSON(range_start, range_end, changelog_url,
54 revision_url, revision_map,
55 file_to_revision_map)
50 return (revision_map, file_to_revision_map) 56 return (revision_map, file_to_revision_map)
51 57
52 # The revisions information are in from the third divs to the second 58 # The revisions information are in from the third divs to the second
53 # to last one. 59 # to last one.
54 divs = dom.getElementsByTagName('div')[2:-1] 60 divs = dom.getElementsByTagName('div')[2:-1]
55 pres = dom.getElementsByTagName('pre') 61 pres = dom.getElementsByTagName('pre')
56 uls = dom.getElementsByTagName('ul') 62 uls = dom.getElementsByTagName('ul')
57 63
58 # Divs, pres and uls each contain revision information for one CL, so 64 # Divs, pres and uls each contain revision information for one CL, so
59 # they should have same length. 65 # they should have same length.
(...skipping 26 matching lines...) Expand all
86 92
87 # Set url of this CL. 93 # Set url of this CL.
88 revision_url_part = self.url_parts_map['revision_url'] % githash 94 revision_url_part = self.url_parts_map['revision_url'] % githash
89 revision['url'] = base_url + revision_url_part 95 revision['url'] = base_url + revision_url_part
90 96
91 # Go through changed files, they are in li. 97 # Go through changed files, they are in li.
92 lis = ul.getElementsByTagName('li') 98 lis = ul.getElementsByTagName('li')
93 for li in lis: 99 for li in lis:
94 # Retrieve path and action of the changed file 100 # Retrieve path and action of the changed file
95 file_path = li.getElementsByTagName('a')[0].firstChild.nodeValue 101 file_path = li.getElementsByTagName('a')[0].firstChild.nodeValue
96 file_action = li.getElementsByTagName('span')[0].getAttribute('class') 102 file_change_type = li.getElementsByTagName('span')[
103 0].getAttribute('class')
97 104
98 # Normalize file action so that it is same as SVN parser. 105 # Normalize file action so that it is same as SVN parser.
99 if file_action == 'add': 106 file_change_type = FILE_CHANGE_TYPE_MAP[file_change_type]
100 file_action = 'A'
101 elif file_action == 'delete':
102 file_action = 'D'
103 elif file_action == 'modify':
104 file_action = 'M'
105 107
106 # Add the changed file to the map. 108 # Add the changed file to the map.
107 changed_file = os.path.basename(file_path) 109 if file_path not in file_to_revision_map:
108 if changed_file not in file_to_revision_map: 110 file_to_revision_map[file_path] = []
109 file_to_revision_map[changed_file] = [] 111 file_to_revision_map[file_path].append((githash, file_change_type))
110 file_to_revision_map[changed_file].append((githash, file_action,
111 file_path))
112 112
113 # Add this revision object to the map. 113 # Add this revision object to the map.
114 revision_map[githash] = revision 114 revision_map[githash] = revision
115 115
116 # Parse one revision for the start range, because googlesource does not 116 # Parse one revision for the start range, because googlesource does not
117 # include the start of the range. 117 # include the start of the range.
118 self.ParseRevision(revision_url, range_start, revision_map, 118 self.ParseRevision(revision_url, range_start, revision_map,
119 file_to_revision_map) 119 file_to_revision_map)
120 120
121 return (revision_map, file_to_revision_map) 121 return (revision_map, file_to_revision_map)
122 122
123 def ParseChangelogFromJSON(self, range_start, range_end, changelog_url, 123 def ParseChangelogFromJSON(self, range_start, range_end, changelog_url,
124 revision_url, revision_map, file_to_revision_map): 124 revision_url, revision_map, file_to_revision_map):
125 """Parses changelog by going over the JSON file. 125 """Parses changelog by going over the JSON file.
126 126
127 Args: 127 Args:
128 range_start: Starting range of the regression. 128 range_start: Starting range of the regression.
129 range_end: Ending range of the regression. 129 range_end: Ending range of the regression.
130 changelog_url: The url to retrieve changelog from. 130 changelog_url: The url to retrieve changelog from.
131 revision_url: The url to retrieve individual revision from. 131 revision_url: The url to retrieve individual revision from.
132 revision_map: A map from a git hash number to its revision information. 132 revision_map: A map from a git hash number to its revision information.
133 file_to_revision_map: A map from file to a git hash in which it occurs. 133 file_to_revision_map: A map from file to a git hash in which it occurs.
134 """ 134 """
135 # Compute URLs from given range, and retrieves changelog. Stop if it fails. 135 # Compute URLs from given range, and retrieves changelog. Stop if it fails.
136 changelog_url %= (range_start, range_end) 136 changelog_url %= (range_start, range_end)
137 json_url = changelog_url + '?format=json' 137 json_url = changelog_url + '?format=json'
138 response = crash_utils.GetDataFromURL(json_url) 138 response = crash_utils.GetDataFromURL(json_url)
139 if not response: 139 if not response:
140 logging.error('Failed to retrieve changelog from %s.', json_url)
141 return 140 return
142 141
143 # Parse changelog from the returned object. The returned string should 142 # Parse changelog from the returned object. The returned string should
144 # start with ")}]'\n", so start from the 6th character. 143 # start with ")}]'\n", so start from the 6th character.
145 revisions = crash_utils.LoadJSON(response[5:]) 144 revisions = crash_utils.LoadJSON(response[5:])
146 if not revisions: 145 if not revisions:
147 logging.error('Failed to parse changelog from %s.', json_url)
148 return 146 return
149 147
150 # Parse individual revision in the log. 148 # Parse individual revision in the log.
151 for revision in revisions['log']: 149 for revision in revisions['log']:
152 githash = revision['commit'] 150 githash = revision['commit']
153 self.ParseRevision(revision_url, githash, revision_map, 151 self.ParseRevision(revision_url, githash, revision_map,
154 file_to_revision_map) 152 file_to_revision_map)
155 153
156 # Parse the revision with range_start, because googlesource ignores 154 # Parse the revision with range_start, because googlesource ignores
157 # that one. 155 # that one.
158 self.ParseRevision(revision_url, range_start, revision_map, 156 self.ParseRevision(revision_url, range_start, revision_map,
159 file_to_revision_map) 157 file_to_revision_map)
160 158
161 def ParseRevision(self, revision_url, githash, revision_map, 159 def ParseRevision(self, revision_url, githash, revision_map,
162 file_to_revision_map): 160 file_to_revision_map):
163 161
164 # Retrieve data from the URL, return if it fails. 162 # Retrieve data from the URL, return if it fails.
165 url = revision_url % githash 163 url = revision_url % githash
166 response = crash_utils.GetDataFromURL(url + '?format=json') 164 response = crash_utils.GetDataFromURL(url + '?format=json')
167 if not response: 165 if not response:
168 logging.warning('Failed to retrieve revision from %s.', url)
169 return 166 return
170 167
171 # Load JSON object from the string. If it fails, terminate the function. 168 # Load JSON object from the string. If it fails, terminate the function.
172 json_revision = crash_utils.LoadJSON(response[5:]) 169 json_revision = crash_utils.LoadJSON(response[5:])
173 if not json_revision: 170 if not json_revision:
174 logging.warning('Failed to parse revision from %s.', url)
175 return 171 return
176 172
177 # Create a map representing object and get githash from the JSON object. 173 # Create a map representing object and get githash from the JSON object.
178 revision = {} 174 revision = {}
179 githash = json_revision['commit'] 175 githash = json_revision['commit']
180 176
181 # Set author, message and URL of this CL. 177 # Set author, message and URL of this CL.
182 revision['author'] = json_revision['author']['name'] 178 revision['author'] = json_revision['author']['name']
183 revision['message'] = json_revision['message'] 179 revision['message'] = json_revision['message']
184 revision['url'] = url 180 revision['url'] = url
185 181
186 # Iterate through the changed files. 182 # Iterate through the changed files.
187 for diff in json_revision['tree_diff']: 183 for diff in json_revision['tree_diff']:
188 file_path = diff['new_path'] 184 file_path = diff['new_path']
189 file_action = diff['type'] 185 file_change_type = diff['type']
190 186
191 # Normalize file action so that it fits with svn_repository_parser. 187 # Normalize file action so that it fits with svn_repository_parser.
192 if file_action == 'add': 188 file_change_type = FILE_CHANGE_TYPE_MAP[file_change_type]
193 file_action = 'A'
194 elif file_action == 'delete':
195 file_action = 'D'
196 elif file_action == 'modify':
197 file_action = 'M'
198 189
199 # Add the file to the map. 190 # Add the file to the map.
200 changed_file = os.path.basename(file_path) 191 if file_path not in file_to_revision_map:
201 if changed_file not in file_to_revision_map: 192 file_to_revision_map[file_path] = []
202 file_to_revision_map[changed_file] = [] 193 file_to_revision_map[file_path].append((githash, file_change_type))
203 file_to_revision_map[changed_file].append(
204 (githash, file_action, file_path))
205 194
206 # Add this CL to the map. 195 # Add this CL to the map.
207 revision_map[githash] = revision 196 revision_map[githash] = revision
208 197
209 return 198 return
210 199
211 def ParseLineDiff(self, path, component, file_action, githash): 200 def ParseLineDiff(self, path, component, file_change_type, githash):
212 changed_line_numbers = [] 201 changed_line_numbers = []
213 changed_line_contents = [] 202 changed_line_contents = []
214 base_url = self.component_to_url_map[component]['repository'] 203 base_url = self.component_to_url_map[component]['repository']
215 backup_url = (base_url + self.url_parts_map['revision_url']) % githash 204 backup_url = (base_url + self.url_parts_map['revision_url']) % githash
216 205
217 # If the file is added (not modified), treat it as if it is not changed. 206 # If the file is added (not modified), treat it as if it is not changed.
218 if file_action == 'A': 207 if file_change_type == 'A':
219 return (backup_url, changed_line_numbers, changed_line_contents) 208 return (backup_url, changed_line_numbers, changed_line_contents)
220 209
221 # Retrieves the diff data from URL, and if it fails, return emptry lines. 210 # Retrieves the diff data from URL, and if it fails, return emptry lines.
222 url = (base_url + self.url_parts_map['diff_url']) % (githash, path) 211 url = (base_url + self.url_parts_map['diff_url']) % (githash, path)
223 data = crash_utils.GetDataFromURL(url + '?format=text') 212 data = crash_utils.GetDataFromURL(url + '?format=text')
224 if not data: 213 if not data:
225 logging.error('Failed to get diff from %s.', url)
226 return (backup_url, changed_line_numbers, changed_line_contents) 214 return (backup_url, changed_line_numbers, changed_line_contents)
227 215
228 # Decode the returned object to line diff info 216 # Decode the returned object to line diff info
229 diff = base64.b64decode(data).splitlines() 217 diff = base64.b64decode(data).splitlines()
230 218
231 # Iterate through the lines in diff. Set current line to -1 so that we know 219 # Iterate through the lines in diff. Set current line to -1 so that we know
232 # that current line is part of the diff chunk. 220 # that current line is part of the diff chunk.
233 current_line = -1 221 current_line = -1
234 for line in diff: 222 for line in diff:
235 line = line.strip() 223 line = line.strip()
(...skipping 17 matching lines...) Expand all
253 return (url, changed_line_numbers, changed_line_contents) 241 return (url, changed_line_numbers, changed_line_contents)
254 242
255 def ParseBlameInfo(self, component, file_path, line, revision): 243 def ParseBlameInfo(self, component, file_path, line, revision):
256 base_url = self.component_to_url_map[component]['repository'] 244 base_url = self.component_to_url_map[component]['repository']
257 245
258 # Retrieve blame JSON file from googlesource. If it fails, return None. 246 # Retrieve blame JSON file from googlesource. If it fails, return None.
259 url_part = self.url_parts_map['blame_url'] % (revision, file_path) 247 url_part = self.url_parts_map['blame_url'] % (revision, file_path)
260 blame_url = base_url + url_part 248 blame_url = base_url + url_part
261 json_string = crash_utils.GetDataFromURL(blame_url) 249 json_string = crash_utils.GetDataFromURL(blame_url)
262 if not json_string: 250 if not json_string:
263 logging.error('Failed to retrieve annotation information from %s.',
264 blame_url)
265 return 251 return
266 252
267 # Parse JSON object from the string. The returned string should 253 # Parse JSON object from the string. The returned string should
268 # start with ")}]'\n", so start from the 6th character. 254 # start with ")}]'\n", so start from the 6th character.
269 annotation = crash_utils.LoadJSON(json_string[5:]) 255 annotation = crash_utils.LoadJSON(json_string[5:])
270 if not annotation: 256 if not annotation:
271 logging.error('Failed to parse annotation information from %s.',
272 blame_url)
273 return 257 return
274 258
275 # Go through the regions, which is a list of consecutive lines with same 259 # Go through the regions, which is a list of consecutive lines with same
276 # author/revision. 260 # author/revision.
277 for blame_line in annotation['regions']: 261 for blame_line in annotation['regions']:
278 start = blame_line['start'] 262 start = blame_line['start']
279 count = blame_line['count'] 263 count = blame_line['count']
280 264
281 # For each region, check if the line we want the blame info of is in this 265 # For each region, check if the line we want the blame info of is in this
282 # region. 266 # region.
283 if start <= line and line <= start + count - 1: 267 if start <= line and line <= start + count - 1:
284 # If we are in the right region, get the information from the line. 268 # If we are in the right region, get the information from the line.
285 revision = blame_line['commit'] 269 revision = blame_line['commit']
286 author = blame_line['author']['name'] 270 author = blame_line['author']['name']
287 revision_url_parts = self.url_parts_map['revision_url'] % revision 271 revision_url_parts = self.url_parts_map['revision_url'] % revision
288 revision_url = base_url + revision_url_parts 272 revision_url = base_url + revision_url_parts
289 # TODO(jeun): Add a way to get content from JSON object. 273 # TODO(jeun): Add a way to get content from JSON object.
290 content = None 274 content = None
291 275
292 return (content, revision, author, revision_url) 276 (revision_info, _) = self.ParseChangelog(component, revision, revision)
277 message = revision_info[revision]['message']
278 return (content, revision, author, revision_url, message)
293 279
294 # Return none if the region does not exist. 280 # Return none if the region does not exist.
295 return None 281 return None
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698