OLD | NEW |
1 # Copyright 2014 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import logging | |
6 import os | |
7 import xml.dom.minidom as minidom | 5 import xml.dom.minidom as minidom |
8 from xml.parsers.expat import ExpatError | 6 from xml.parsers.expat import ExpatError |
9 | 7 |
10 import crash_utils | 8 import crash_utils |
11 from repository_parser_interface import ParserInterface | 9 from repository_parser_interface import ParserInterface |
12 | 10 |
13 | 11 |
14 # This number is 6 because each linediff page in src.chromium.org should | 12 # This number is 6 because each linediff page in src.chromium.org should |
15 # contain the following tables: table with revision number, table with actual | 13 # contain the following tables: table with revision number, table with actual |
16 # diff, table with dropdown menu, table with legend, a border table and a table | 14 # diff, table with dropdown menu, table with legend, a border table and a table |
(...skipping 17 matching lines...) Expand all Loading... |
34 | 32 |
35 def ParseChangelog(self, component, range_start, range_end): | 33 def ParseChangelog(self, component, range_start, range_end): |
36 file_to_revision_map = {} | 34 file_to_revision_map = {} |
37 revision_map = {} | 35 revision_map = {} |
38 | 36 |
39 # Check if the current component is supported by reading the components | 37 # Check if the current component is supported by reading the components |
40 # parsed from config file. If it is not, fail. | 38 # parsed from config file. If it is not, fail. |
41 | 39 |
42 url_map = self.component_to_urls_map.get(component) | 40 url_map = self.component_to_urls_map.get(component) |
43 if not url_map: | 41 if not url_map: |
44 logging.error('Component %s is not currently supported.', component) | |
45 return (revision_map, file_to_revision_map) | 42 return (revision_map, file_to_revision_map) |
46 | 43 |
47 # Retrieve data from the url, return empty map if fails. | 44 # Retrieve data from the url, return empty map if fails. |
48 revision_range_str = '%s:%s' % (range_start, range_end) | 45 revision_range_str = '%s:%s' % (range_start, range_end) |
49 url = url_map['changelog_url'] % revision_range_str | 46 url = url_map['changelog_url'] % revision_range_str |
50 response = crash_utils.GetDataFromURL(url) | 47 response = crash_utils.GetDataFromURL(url) |
51 if not response: | 48 if not response: |
52 logging.error('Failed to retrieve changelog from %s, range %s.', | |
53 url, revision_range_str) | |
54 return (revision_map, file_to_revision_map) | 49 return (revision_map, file_to_revision_map) |
55 | 50 |
56 # Parse xml out of the returned string. If it fails, return empty map. | 51 # Parse xml out of the returned string. If it fails, return empty map. |
57 try: | 52 try: |
58 xml_revisions = minidom.parseString(response) | 53 xml_revisions = minidom.parseString(response) |
59 except ExpatError: | 54 except ExpatError: |
60 logging.error('Failed to parse changelog from %s, range %s.', | |
61 url, revision_range_str) | |
62 return (revision_map, file_to_revision_map) | 55 return (revision_map, file_to_revision_map) |
63 | 56 |
64 # Iterate through the returned XML object. | 57 # Iterate through the returned XML object. |
65 revisions = xml_revisions.getElementsByTagName('logentry') | 58 revisions = xml_revisions.getElementsByTagName('logentry') |
66 for revision in revisions: | 59 for revision in revisions: |
67 # Create new revision object for each of the revision. | 60 # Create new revision object for each of the revision. |
68 revision_object = {} | 61 revision_object = {} |
69 | 62 |
70 # Set author of the CL. | 63 # Set author of the CL. |
71 revision_object['author'] = revision.getElementsByTagName( | 64 revision_object['author'] = revision.getElementsByTagName( |
72 'author')[0].firstChild.nodeValue | 65 'author')[0].firstChild.nodeValue |
73 | 66 |
74 # Get the revision number from xml. | 67 # Get the revision number from xml. |
75 revision_number = int(revision.getAttribute('revision')) | 68 revision_number = int(revision.getAttribute('revision')) |
76 | 69 |
77 # Iterate through the changed paths in the CL. | 70 # Iterate through the changed paths in the CL. |
78 paths = revision.getElementsByTagName('paths') | 71 paths = revision.getElementsByTagName('paths') |
79 if paths: | 72 if paths: |
80 for changed_path in paths[0].getElementsByTagName('path'): | 73 for changed_path in paths[0].getElementsByTagName('path'): |
81 # Get path, file action and file name from the xml. | 74 # Get path and file change type from the xml. |
82 file_path = changed_path.firstChild.nodeValue | 75 file_path = changed_path.firstChild.nodeValue |
83 file_action = changed_path.getAttribute('action') | 76 file_change_type = changed_path.getAttribute('action') |
84 changed_file = os.path.basename(file_path) | 77 |
| 78 if file_path.startswith('/trunk/'): |
| 79 file_path = file_path[len('/trunk/'):] |
85 | 80 |
86 # Add file to the map. | 81 # Add file to the map. |
87 if changed_file not in file_to_revision_map: | 82 if file_path not in file_to_revision_map: |
88 file_to_revision_map[changed_file] = [] | 83 file_to_revision_map[file_path] = [] |
89 file_to_revision_map[changed_file].append( | 84 file_to_revision_map[file_path].append( |
90 (revision_number, file_action, file_path)) | 85 (revision_number, file_change_type)) |
91 | 86 |
92 # Set commit message of the CL. | 87 # Set commit message of the CL. |
93 revision_object['message'] = revision.getElementsByTagName('msg')[ | 88 revision_object['message'] = revision.getElementsByTagName('msg')[ |
94 0].firstChild.nodeValue | 89 0].firstChild.nodeValue |
95 | 90 |
96 # Set url of this CL. | 91 # Set url of this CL. |
97 revision_url = url_map['revision_url'] % revision_number | 92 revision_url = url_map['revision_url'] % revision_number |
98 revision_object['url'] = revision_url | 93 revision_object['url'] = revision_url |
99 | 94 |
100 # Add this CL to the revision map. | 95 # Add this CL to the revision map. |
101 revision_map[revision_number] = revision_object | 96 revision_map[revision_number] = revision_object |
102 | 97 |
103 return (revision_map, file_to_revision_map) | 98 return (revision_map, file_to_revision_map) |
104 | 99 |
105 def ParseLineDiff(self, path, component, file_action, revision_number): | 100 def ParseLineDiff(self, path, component, file_change_type, revision_number): |
106 changed_line_numbers = [] | 101 changed_line_numbers = [] |
107 changed_line_contents = [] | 102 changed_line_contents = [] |
108 | 103 |
109 url_map = self.component_to_urls_map.get(component) | 104 url_map = self.component_to_urls_map.get(component) |
110 if not url_map: | 105 if not url_map: |
111 logging.error('Component %s is not currently supported.', component) | |
112 return (None, None, None) | 106 return (None, None, None) |
113 | 107 |
114 # If the file is added (not modified), treat it as if it is not changed. | 108 # If the file is added (not modified), treat it as if it is not changed. |
115 backup_url = url_map['revision_url'] % revision_number | 109 backup_url = url_map['revision_url'] % revision_number |
116 if file_action == 'A': | 110 if file_change_type == 'A': |
117 return (backup_url, changed_line_numbers, changed_line_contents) | 111 return (backup_url, changed_line_numbers, changed_line_contents) |
118 | 112 |
119 # Retrieve data from the url. If no data is retrieved, return empty lists. | 113 # Retrieve data from the url. If no data is retrieved, return empty lists. |
120 url = url_map['diff_url'] % (path, revision_number - 1, | 114 url = url_map['diff_url'] % (path, revision_number - 1, |
121 revision_number, revision_number) | 115 revision_number, revision_number) |
122 data = crash_utils.GetDataFromURL(url) | 116 data = crash_utils.GetDataFromURL(url) |
123 if not data: | 117 if not data: |
124 logging.error('Failed to get line changes from %s.', url) | |
125 return (backup_url, changed_line_numbers, changed_line_contents) | 118 return (backup_url, changed_line_numbers, changed_line_contents) |
126 | 119 |
127 line_diff_html = minidom.parseString(data) | 120 line_diff_html = minidom.parseString(data) |
128 tables = line_diff_html.getElementsByTagName('table') | 121 tables = line_diff_html.getElementsByTagName('table') |
129 # If there are not NUM_TABLES tables in the html page, there should be an | 122 # If there are not NUM_TABLES tables in the html page, there should be an |
130 # error in the html page. | 123 # error in the html page. |
131 if len(tables) != NUM_TABLES_IN_LINEDIFF_PAGE: | 124 if len(tables) != NUM_TABLES_IN_LINEDIFF_PAGE: |
132 logging.error('Failed to retrieve the diff of revision %d from %s.', | |
133 revision_number, url) | |
134 return (backup_url, changed_line_numbers, changed_line_contents) | 125 return (backup_url, changed_line_numbers, changed_line_contents) |
135 | 126 |
136 # Diff content is in the second table. Each line of the diff content | 127 # Diff content is in the second table. Each line of the diff content |
137 # is in <tr>. | 128 # is in <tr>. |
138 trs = tables[1].getElementsByTagName('tr') | 129 trs = tables[1].getElementsByTagName('tr') |
139 prefix_len = len('vc_diff_') | 130 prefix_len = len('vc_diff_') |
140 | 131 |
141 # Filter trs so that it only contains diff chunk with contents. | 132 # Filter trs so that it only contains diff chunk with contents. |
142 filtered_trs = [] | 133 filtered_trs = [] |
143 for tr in trs: | 134 for tr in trs: |
(...skipping 12 matching lines...) Expand all Loading... |
156 return (backup_url, changed_line_numbers, changed_line_contents) | 147 return (backup_url, changed_line_numbers, changed_line_contents) |
157 | 148 |
158 filtered_trs.append(tr) | 149 filtered_trs.append(tr) |
159 | 150 |
160 # Iterate through filtered trs, and grab line diff information. | 151 # Iterate through filtered trs, and grab line diff information. |
161 for tr in filtered_trs: | 152 for tr in filtered_trs: |
162 tds = tr.getElementsByTagName('td') | 153 tds = tr.getElementsByTagName('td') |
163 | 154 |
164 # If there aren't 3 tds, this line does should not contain line diff. | 155 # If there aren't 3 tds, this line does should not contain line diff. |
165 if len(tds) != NUM_TDS_IN_LINEDIFF_PAGE: | 156 if len(tds) != NUM_TDS_IN_LINEDIFF_PAGE: |
166 logging.warning('Failed to get a line of new file in revision %d.', | |
167 revision_number) | |
168 continue | 157 continue |
169 | 158 |
170 # If line number information is not in hyperlink, ignore this line. | 159 # If line number information is not in hyperlink, ignore this line. |
171 try: | 160 try: |
172 line_num = tds[0].getElementsByTagName('a')[0].firstChild.nodeValue | 161 line_num = tds[0].getElementsByTagName('a')[0].firstChild.nodeValue |
173 left_diff_type = tds[1].getAttribute('class')[prefix_len:] | 162 left_diff_type = tds[1].getAttribute('class')[prefix_len:] |
174 right_diff_type = tds[2].getAttribute('class')[prefix_len:] | 163 right_diff_type = tds[2].getAttribute('class')[prefix_len:] |
175 except IndexError: | 164 except IndexError: |
176 logging.warning('Failed to get a line of file in revision %d.', | |
177 revision_number) | |
178 continue | 165 continue |
179 | 166 |
180 # Treat the line as modified only if both left and right diff has type | 167 # Treat the line as modified only if both left and right diff has type |
181 # changed or both have different change type, and if the change is not | 168 # changed or both have different change type, and if the change is not |
182 # deletion. | 169 # deletion. |
183 if (left_diff_type != right_diff_type) or ( | 170 if (left_diff_type != right_diff_type) or ( |
184 left_diff_type == 'change' and right_diff_type == 'change'): | 171 left_diff_type == 'change' and right_diff_type == 'change'): |
185 | 172 |
186 # Check if the line content is not empty. | 173 # Check if the line content is not empty. |
187 try: | 174 try: |
188 new_line = tds[2].firstChild.nodeValue | 175 new_line = tds[2].firstChild.nodeValue |
189 except AttributeError: | 176 except AttributeError: |
190 new_line = '' | 177 new_line = '' |
191 | 178 |
192 if not (left_diff_type == 'remove' and right_diff_type == 'empty'): | 179 if not (left_diff_type == 'remove' and right_diff_type == 'empty'): |
193 changed_line_numbers.append(int(line_num)) | 180 changed_line_numbers.append(int(line_num)) |
194 changed_line_contents.append(new_line.strip()) | 181 changed_line_contents.append(new_line.strip()) |
195 | 182 |
196 return (url, changed_line_numbers, changed_line_contents) | 183 return (url, changed_line_numbers, changed_line_contents) |
197 | 184 |
198 def ParseBlameInfo(self, component, file_path, line, revision): | 185 def ParseBlameInfo(self, component, file_path, line, revision): |
199 url_map = self.component_to_urls_map.get(component) | 186 url_map = self.component_to_urls_map.get(component) |
200 if not url_map: | 187 if not url_map: |
201 logging.error('Component %s is not currently supported.', component) | |
202 return None | 188 return None |
203 | 189 |
204 # Retrieve blame data from url, return None if fails. | 190 # Retrieve blame data from url, return None if fails. |
205 url = url_map['blame_url'] % (file_path, revision, revision) | 191 url = url_map['blame_url'] % (file_path, revision, revision) |
206 data = crash_utils.GetDataFromURL(url) | 192 data = crash_utils.GetDataFromURL(url) |
207 if not data: | 193 if not data: |
208 logging.error('Failed to retrieve annotation information from %s.', | |
209 url) | |
210 return None | 194 return None |
211 | 195 |
212 blame_html = minidom.parseString(data) | 196 blame_html = minidom.parseString(data) |
213 | 197 |
214 title = blame_html.getElementsByTagName('title') | 198 title = blame_html.getElementsByTagName('title') |
215 # If the returned html page is an exception page, return None. | 199 # If the returned html page is an exception page, return None. |
216 if title[0].firstChild.nodeValue == 'ViewVC Exception': | 200 if title[0].firstChild.nodeValue == 'ViewVC Exception': |
217 logging.error('Failed to retrieve blame information from %s.', url) | |
218 return None | 201 return None |
219 | 202 |
220 # Each of the blame result is in <tr>. | 203 # Each of the blame result is in <tr>. |
221 blame_results = blame_html.getElementsByTagName('tr') | 204 blame_results = blame_html.getElementsByTagName('tr') |
222 blame_result = blame_results[line] | 205 try: |
| 206 blame_result = blame_results[line] |
| 207 except IndexError: |
| 208 return None |
223 | 209 |
224 # There must be 4 <td> for each <tr>. If not, this page is wrong. | 210 # There must be 4 <td> for each <tr>. If not, this page is wrong. |
225 tds = blame_result.getElementsByTagName('td') | 211 tds = blame_result.getElementsByTagName('td') |
226 if len(tds) != 4: | 212 if len(tds) != 4: |
227 logging.error('Failed to retrieve blame information from %s.', url) | |
228 return None | 213 return None |
229 | 214 |
230 # The third <td> has the line content, separated by <span>s. Combine | 215 # The third <td> has the line content, separated by <span>s. Combine |
231 # those to get a string of changed line. If it has nothing, the line | 216 # those to get a string of changed line. If it has nothing, the line |
232 # is empty. | 217 # is empty. |
233 line_content = '' | 218 line_content = '' |
234 if tds[3].hasChildNodes(): | 219 if tds[3].hasChildNodes(): |
235 contents = tds[3].childNodes | 220 contents = tds[3].childNodes |
236 | 221 |
237 for content in contents: | 222 for content in contents: |
(...skipping 12 matching lines...) Expand all Loading... |
250 blame_result = blame_results[line] | 235 blame_result = blame_results[line] |
251 tds = blame_result.getElementsByTagName('td') | 236 tds = blame_result.getElementsByTagName('td') |
252 author = tds[1].firstChild.nodeValue | 237 author = tds[1].firstChild.nodeValue |
253 | 238 |
254 # Revision can either be in hyperlink or plain text. | 239 # Revision can either be in hyperlink or plain text. |
255 try: | 240 try: |
256 revision = tds[2].getElementsByTagName('a')[0].firstChild.nodeValue | 241 revision = tds[2].getElementsByTagName('a')[0].firstChild.nodeValue |
257 except IndexError: | 242 except IndexError: |
258 revision = tds[2].firstChild.nodeValue | 243 revision = tds[2].firstChild.nodeValue |
259 | 244 |
| 245 (revision_info, _) = self.ParseChangelog(component, revision, revision) |
| 246 message = revision_info[int(revision)]['message'] |
| 247 |
260 # Return the parsed information. | 248 # Return the parsed information. |
261 revision_url = url_map['revision_url'] % int(revision) | 249 revision_url = url_map['revision_url'] % int(revision) |
262 return (line_content, revision, author, revision_url) | 250 return (line_content, revision, author, revision_url, message) |
OLD | NEW |