OLD | NEW |
1 # Copyright 2014 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import base64 | 5 import base64 |
6 import logging | |
7 import os | |
8 import xml.dom.minidom as minidom | 6 import xml.dom.minidom as minidom |
9 from xml.parsers.expat import ExpatError | 7 from xml.parsers.expat import ExpatError |
10 | 8 |
11 import crash_utils | 9 import crash_utils |
12 from repository_parser_interface import ParserInterface | 10 from repository_parser_interface import ParserInterface |
13 | 11 |
| 12 FILE_CHANGE_TYPE_MAP = { |
| 13 'add': 'A', |
| 14 'delete': 'D', |
| 15 'modify': 'M' |
| 16 } |
| 17 |
14 | 18 |
15 class GitParser(ParserInterface): | 19 class GitParser(ParserInterface): |
16 """Parser for Git repository in googlesource. | 20 """Parser for Git repository in googlesource. |
17 | 21 |
18 Attributes: | 22 Attributes: |
19 parsed_deps: A map from component path to its repository name, regression, | 23 parsed_deps: A map from component path to its repository name, regression, |
20 etc. | 24 etc. |
21 url_parts_map: A map from url type to its url parts. This parts are added | 25 url_parts_map: A map from url type to its url parts. This parts are added |
22 the base url to form different urls. | 26 the base url to form different urls. |
23 """ | 27 """ |
24 | 28 |
25 def __init__(self, parsed_deps, url_parts_map): | 29 def __init__(self, parsed_deps, url_parts_map): |
26 self.component_to_url_map = parsed_deps | 30 self.component_to_url_map = parsed_deps |
27 self.url_parts_map = url_parts_map | 31 self.url_parts_map = url_parts_map |
28 | 32 |
29 def ParseChangelog(self, component_path, range_start, range_end): | 33 def ParseChangelog(self, component_path, range_start, range_end): |
30 file_to_revision_map = {} | 34 file_to_revision_map = {} |
31 revision_map = {} | 35 revision_map = {} |
32 base_url = self.component_to_url_map[component_path]['repository'] | 36 base_url = self.component_to_url_map[component_path]['repository'] |
33 changelog_url = base_url + self.url_parts_map['changelog_url'] | 37 changelog_url = base_url + self.url_parts_map['changelog_url'] |
34 revision_url = base_url + self.url_parts_map['revision_url'] | 38 revision_url = base_url + self.url_parts_map['revision_url'] |
35 | 39 |
36 # Retrieve data from the url, return empty maps if fails. Html url is a\ | 40 # Retrieve data from the url, return empty maps if fails. Html url is a\ |
37 # url where the changelog can be parsed from html. | 41 # url where the changelog can be parsed from html. |
38 url = changelog_url % (range_start, range_end) | 42 url = changelog_url % (range_start, range_end) |
39 html_url = url + '?pretty=fuller' | 43 html_url = url + '?pretty=fuller' |
40 response = crash_utils.GetDataFromURL(html_url) | 44 response = crash_utils.GetDataFromURL(html_url) |
41 if not response: | 45 if not response: |
42 logging.error('Failed to retrieve changelog from %s', html_url) | |
43 return (revision_map, file_to_revision_map) | 46 return (revision_map, file_to_revision_map) |
44 | 47 |
45 # Parse xml out of the returned string. If it failes, return empty map. | 48 # Parse xml out of the returned string. If it failes, Try parsing |
| 49 # from JSON objects. |
46 try: | 50 try: |
47 dom = minidom.parseString(response) | 51 dom = minidom.parseString(response) |
48 except ExpatError: | 52 except ExpatError: |
49 logging.error('Failed to parse changelog from %s', url) | 53 self.ParseChangelogFromJSON(range_start, range_end, changelog_url, |
| 54 revision_url, revision_map, |
| 55 file_to_revision_map) |
50 return (revision_map, file_to_revision_map) | 56 return (revision_map, file_to_revision_map) |
51 | 57 |
52 # The revisions information are in from the third divs to the second | 58 # The revisions information are in from the third divs to the second |
53 # to last one. | 59 # to last one. |
54 divs = dom.getElementsByTagName('div')[2:-1] | 60 divs = dom.getElementsByTagName('div')[2:-1] |
55 pres = dom.getElementsByTagName('pre') | 61 pres = dom.getElementsByTagName('pre') |
56 uls = dom.getElementsByTagName('ul') | 62 uls = dom.getElementsByTagName('ul') |
57 | 63 |
58 # Divs, pres and uls each contain revision information for one CL, so | 64 # Divs, pres and uls each contain revision information for one CL, so |
59 # they should have same length. | 65 # they should have same length. |
(...skipping 26 matching lines...) Expand all Loading... |
86 | 92 |
87 # Set url of this CL. | 93 # Set url of this CL. |
88 revision_url_part = self.url_parts_map['revision_url'] % githash | 94 revision_url_part = self.url_parts_map['revision_url'] % githash |
89 revision['url'] = base_url + revision_url_part | 95 revision['url'] = base_url + revision_url_part |
90 | 96 |
91 # Go through changed files, they are in li. | 97 # Go through changed files, they are in li. |
92 lis = ul.getElementsByTagName('li') | 98 lis = ul.getElementsByTagName('li') |
93 for li in lis: | 99 for li in lis: |
94 # Retrieve path and action of the changed file | 100 # Retrieve path and action of the changed file |
95 file_path = li.getElementsByTagName('a')[0].firstChild.nodeValue | 101 file_path = li.getElementsByTagName('a')[0].firstChild.nodeValue |
96 file_action = li.getElementsByTagName('span')[0].getAttribute('class') | 102 file_change_type = li.getElementsByTagName('span')[ |
| 103 0].getAttribute('class') |
97 | 104 |
98 # Normalize file action so that it is same as SVN parser. | 105 # Normalize file action so that it is same as SVN parser. |
99 if file_action == 'add': | 106 file_change_type = FILE_CHANGE_TYPE_MAP[file_change_type] |
100 file_action = 'A' | |
101 elif file_action == 'delete': | |
102 file_action = 'D' | |
103 elif file_action == 'modify': | |
104 file_action = 'M' | |
105 | 107 |
106 # Add the changed file to the map. | 108 # Add the changed file to the map. |
107 changed_file = os.path.basename(file_path) | 109 if file_path not in file_to_revision_map: |
108 if changed_file not in file_to_revision_map: | 110 file_to_revision_map[file_path] = [] |
109 file_to_revision_map[changed_file] = [] | 111 file_to_revision_map[file_path].append((githash, file_change_type)) |
110 file_to_revision_map[changed_file].append((githash, file_action, | |
111 file_path)) | |
112 | 112 |
113 # Add this revision object to the map. | 113 # Add this revision object to the map. |
114 revision_map[githash] = revision | 114 revision_map[githash] = revision |
115 | 115 |
116 # Parse one revision for the start range, because googlesource does not | 116 # Parse one revision for the start range, because googlesource does not |
117 # include the start of the range. | 117 # include the start of the range. |
118 self.ParseRevision(revision_url, range_start, revision_map, | 118 self.ParseRevision(revision_url, range_start, revision_map, |
119 file_to_revision_map) | 119 file_to_revision_map) |
120 | 120 |
121 return (revision_map, file_to_revision_map) | 121 return (revision_map, file_to_revision_map) |
122 | 122 |
123 def ParseChangelogFromJSON(self, range_start, range_end, changelog_url, | 123 def ParseChangelogFromJSON(self, range_start, range_end, changelog_url, |
124 revision_url, revision_map, file_to_revision_map): | 124 revision_url, revision_map, file_to_revision_map): |
125 """Parses changelog by going over the JSON file. | 125 """Parses changelog by going over the JSON file. |
126 | 126 |
127 Args: | 127 Args: |
128 range_start: Starting range of the regression. | 128 range_start: Starting range of the regression. |
129 range_end: Ending range of the regression. | 129 range_end: Ending range of the regression. |
130 changelog_url: The url to retrieve changelog from. | 130 changelog_url: The url to retrieve changelog from. |
131 revision_url: The url to retrieve individual revision from. | 131 revision_url: The url to retrieve individual revision from. |
132 revision_map: A map from a git hash number to its revision information. | 132 revision_map: A map from a git hash number to its revision information. |
133 file_to_revision_map: A map from file to a git hash in which it occurs. | 133 file_to_revision_map: A map from file to a git hash in which it occurs. |
134 """ | 134 """ |
135 # Compute URLs from given range, and retrieves changelog. Stop if it fails. | 135 # Compute URLs from given range, and retrieves changelog. Stop if it fails. |
136 changelog_url %= (range_start, range_end) | 136 changelog_url %= (range_start, range_end) |
137 json_url = changelog_url + '?format=json' | 137 json_url = changelog_url + '?format=json' |
138 response = crash_utils.GetDataFromURL(json_url) | 138 response = crash_utils.GetDataFromURL(json_url) |
139 if not response: | 139 if not response: |
140 logging.error('Failed to retrieve changelog from %s.', json_url) | |
141 return | 140 return |
142 | 141 |
143 # Parse changelog from the returned object. The returned string should | 142 # Parse changelog from the returned object. The returned string should |
144 # start with ")}]'\n", so start from the 6th character. | 143 # start with ")}]'\n", so start from the 6th character. |
145 revisions = crash_utils.LoadJSON(response[5:]) | 144 revisions = crash_utils.LoadJSON(response[5:]) |
146 if not revisions: | 145 if not revisions: |
147 logging.error('Failed to parse changelog from %s.', json_url) | |
148 return | 146 return |
149 | 147 |
150 # Parse individual revision in the log. | 148 # Parse individual revision in the log. |
151 for revision in revisions['log']: | 149 for revision in revisions['log']: |
152 githash = revision['commit'] | 150 githash = revision['commit'] |
153 self.ParseRevision(revision_url, githash, revision_map, | 151 self.ParseRevision(revision_url, githash, revision_map, |
154 file_to_revision_map) | 152 file_to_revision_map) |
155 | 153 |
156 # Parse the revision with range_start, because googlesource ignores | 154 # Parse the revision with range_start, because googlesource ignores |
157 # that one. | 155 # that one. |
158 self.ParseRevision(revision_url, range_start, revision_map, | 156 self.ParseRevision(revision_url, range_start, revision_map, |
159 file_to_revision_map) | 157 file_to_revision_map) |
160 | 158 |
161 def ParseRevision(self, revision_url, githash, revision_map, | 159 def ParseRevision(self, revision_url, githash, revision_map, |
162 file_to_revision_map): | 160 file_to_revision_map): |
163 | 161 |
164 # Retrieve data from the URL, return if it fails. | 162 # Retrieve data from the URL, return if it fails. |
165 url = revision_url % githash | 163 url = revision_url % githash |
166 response = crash_utils.GetDataFromURL(url + '?format=json') | 164 response = crash_utils.GetDataFromURL(url + '?format=json') |
167 if not response: | 165 if not response: |
168 logging.warning('Failed to retrieve revision from %s.', url) | |
169 return | 166 return |
170 | 167 |
171 # Load JSON object from the string. If it fails, terminate the function. | 168 # Load JSON object from the string. If it fails, terminate the function. |
172 json_revision = crash_utils.LoadJSON(response[5:]) | 169 json_revision = crash_utils.LoadJSON(response[5:]) |
173 if not json_revision: | 170 if not json_revision: |
174 logging.warning('Failed to parse revision from %s.', url) | |
175 return | 171 return |
176 | 172 |
177 # Create a map representing object and get githash from the JSON object. | 173 # Create a map representing object and get githash from the JSON object. |
178 revision = {} | 174 revision = {} |
179 githash = json_revision['commit'] | 175 githash = json_revision['commit'] |
180 | 176 |
181 # Set author, message and URL of this CL. | 177 # Set author, message and URL of this CL. |
182 revision['author'] = json_revision['author']['name'] | 178 revision['author'] = json_revision['author']['name'] |
183 revision['message'] = json_revision['message'] | 179 revision['message'] = json_revision['message'] |
184 revision['url'] = url | 180 revision['url'] = url |
185 | 181 |
186 # Iterate through the changed files. | 182 # Iterate through the changed files. |
187 for diff in json_revision['tree_diff']: | 183 for diff in json_revision['tree_diff']: |
188 file_path = diff['new_path'] | 184 file_path = diff['new_path'] |
189 file_action = diff['type'] | 185 file_change_type = diff['type'] |
190 | 186 |
191 # Normalize file action so that it fits with svn_repository_parser. | 187 # Normalize file action so that it fits with svn_repository_parser. |
192 if file_action == 'add': | 188 file_change_type = FILE_CHANGE_TYPE_MAP[file_change_type] |
193 file_action = 'A' | |
194 elif file_action == 'delete': | |
195 file_action = 'D' | |
196 elif file_action == 'modify': | |
197 file_action = 'M' | |
198 | 189 |
199 # Add the file to the map. | 190 # Add the file to the map. |
200 changed_file = os.path.basename(file_path) | 191 if file_path not in file_to_revision_map: |
201 if changed_file not in file_to_revision_map: | 192 file_to_revision_map[file_path] = [] |
202 file_to_revision_map[changed_file] = [] | 193 file_to_revision_map[file_path].append((githash, file_change_type)) |
203 file_to_revision_map[changed_file].append( | |
204 (githash, file_action, file_path)) | |
205 | 194 |
206 # Add this CL to the map. | 195 # Add this CL to the map. |
207 revision_map[githash] = revision | 196 revision_map[githash] = revision |
208 | 197 |
209 return | 198 return |
210 | 199 |
211 def ParseLineDiff(self, path, component, file_action, githash): | 200 def ParseLineDiff(self, path, component, file_change_type, githash): |
212 changed_line_numbers = [] | 201 changed_line_numbers = [] |
213 changed_line_contents = [] | 202 changed_line_contents = [] |
214 base_url = self.component_to_url_map[component]['repository'] | 203 base_url = self.component_to_url_map[component]['repository'] |
215 backup_url = (base_url + self.url_parts_map['revision_url']) % githash | 204 backup_url = (base_url + self.url_parts_map['revision_url']) % githash |
216 | 205 |
217 # If the file is added (not modified), treat it as if it is not changed. | 206 # If the file is added (not modified), treat it as if it is not changed. |
218 if file_action == 'A': | 207 if file_change_type == 'A': |
219 return (backup_url, changed_line_numbers, changed_line_contents) | 208 return (backup_url, changed_line_numbers, changed_line_contents) |
220 | 209 |
221 # Retrieves the diff data from URL, and if it fails, return emptry lines. | 210 # Retrieves the diff data from URL, and if it fails, return emptry lines. |
222 url = (base_url + self.url_parts_map['diff_url']) % (githash, path) | 211 url = (base_url + self.url_parts_map['diff_url']) % (githash, path) |
223 data = crash_utils.GetDataFromURL(url + '?format=text') | 212 data = crash_utils.GetDataFromURL(url + '?format=text') |
224 if not data: | 213 if not data: |
225 logging.error('Failed to get diff from %s.', url) | |
226 return (backup_url, changed_line_numbers, changed_line_contents) | 214 return (backup_url, changed_line_numbers, changed_line_contents) |
227 | 215 |
228 # Decode the returned object to line diff info | 216 # Decode the returned object to line diff info |
229 diff = base64.b64decode(data).splitlines() | 217 diff = base64.b64decode(data).splitlines() |
230 | 218 |
231 # Iterate through the lines in diff. Set current line to -1 so that we know | 219 # Iterate through the lines in diff. Set current line to -1 so that we know |
232 # that current line is part of the diff chunk. | 220 # that current line is part of the diff chunk. |
233 current_line = -1 | 221 current_line = -1 |
234 for line in diff: | 222 for line in diff: |
235 line = line.strip() | 223 line = line.strip() |
(...skipping 17 matching lines...) Expand all Loading... |
253 return (url, changed_line_numbers, changed_line_contents) | 241 return (url, changed_line_numbers, changed_line_contents) |
254 | 242 |
255 def ParseBlameInfo(self, component, file_path, line, revision): | 243 def ParseBlameInfo(self, component, file_path, line, revision): |
256 base_url = self.component_to_url_map[component]['repository'] | 244 base_url = self.component_to_url_map[component]['repository'] |
257 | 245 |
258 # Retrieve blame JSON file from googlesource. If it fails, return None. | 246 # Retrieve blame JSON file from googlesource. If it fails, return None. |
259 url_part = self.url_parts_map['blame_url'] % (revision, file_path) | 247 url_part = self.url_parts_map['blame_url'] % (revision, file_path) |
260 blame_url = base_url + url_part | 248 blame_url = base_url + url_part |
261 json_string = crash_utils.GetDataFromURL(blame_url) | 249 json_string = crash_utils.GetDataFromURL(blame_url) |
262 if not json_string: | 250 if not json_string: |
263 logging.error('Failed to retrieve annotation information from %s.', | |
264 blame_url) | |
265 return | 251 return |
266 | 252 |
267 # Parse JSON object from the string. The returned string should | 253 # Parse JSON object from the string. The returned string should |
268 # start with ")}]'\n", so start from the 6th character. | 254 # start with ")}]'\n", so start from the 6th character. |
269 annotation = crash_utils.LoadJSON(json_string[5:]) | 255 annotation = crash_utils.LoadJSON(json_string[5:]) |
270 if not annotation: | 256 if not annotation: |
271 logging.error('Failed to parse annotation information from %s.', | |
272 blame_url) | |
273 return | 257 return |
274 | 258 |
275 # Go through the regions, which is a list of consecutive lines with same | 259 # Go through the regions, which is a list of consecutive lines with same |
276 # author/revision. | 260 # author/revision. |
277 for blame_line in annotation['regions']: | 261 for blame_line in annotation['regions']: |
278 start = blame_line['start'] | 262 start = blame_line['start'] |
279 count = blame_line['count'] | 263 count = blame_line['count'] |
280 | 264 |
281 # For each region, check if the line we want the blame info of is in this | 265 # For each region, check if the line we want the blame info of is in this |
282 # region. | 266 # region. |
283 if start <= line and line <= start + count - 1: | 267 if start <= line and line <= start + count - 1: |
284 # If we are in the right region, get the information from the line. | 268 # If we are in the right region, get the information from the line. |
285 revision = blame_line['commit'] | 269 revision = blame_line['commit'] |
286 author = blame_line['author']['name'] | 270 author = blame_line['author']['name'] |
287 revision_url_parts = self.url_parts_map['revision_url'] % revision | 271 revision_url_parts = self.url_parts_map['revision_url'] % revision |
288 revision_url = base_url + revision_url_parts | 272 revision_url = base_url + revision_url_parts |
289 # TODO(jeun): Add a way to get content from JSON object. | 273 # TODO(jeun): Add a way to get content from JSON object. |
290 content = None | 274 content = None |
291 | 275 |
292 return (content, revision, author, revision_url) | 276 (revision_info, _) = self.ParseChangelog(component, revision, revision) |
| 277 message = revision_info[revision]['message'] |
| 278 return (content, revision, author, revision_url, message) |
293 | 279 |
294 # Return none if the region does not exist. | 280 # Return none if the region does not exist. |
295 return None | 281 return None |
OLD | NEW |