| OLD | NEW |
| 1 # Copyright 2014 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import logging | |
| 6 import os | |
| 7 import xml.dom.minidom as minidom | 5 import xml.dom.minidom as minidom |
| 8 from xml.parsers.expat import ExpatError | 6 from xml.parsers.expat import ExpatError |
| 9 | 7 |
| 10 import crash_utils | 8 import crash_utils |
| 11 from repository_parser_interface import ParserInterface | 9 from repository_parser_interface import ParserInterface |
| 12 | 10 |
| 13 | 11 |
| 14 # This number is 6 because each linediff page in src.chromium.org should | 12 # This number is 6 because each linediff page in src.chromium.org should |
| 15 # contain the following tables: table with revision number, table with actual | 13 # contain the following tables: table with revision number, table with actual |
| 16 # diff, table with dropdown menu, table with legend, a border table and a table | 14 # diff, table with dropdown menu, table with legend, a border table and a table |
| (...skipping 17 matching lines...) Expand all Loading... |
| 34 | 32 |
| 35 def ParseChangelog(self, component, range_start, range_end): | 33 def ParseChangelog(self, component, range_start, range_end): |
| 36 file_to_revision_map = {} | 34 file_to_revision_map = {} |
| 37 revision_map = {} | 35 revision_map = {} |
| 38 | 36 |
| 39 # Check if the current component is supported by reading the components | 37 # Check if the current component is supported by reading the components |
| 40 # parsed from config file. If it is not, fail. | 38 # parsed from config file. If it is not, fail. |
| 41 | 39 |
| 42 url_map = self.component_to_urls_map.get(component) | 40 url_map = self.component_to_urls_map.get(component) |
| 43 if not url_map: | 41 if not url_map: |
| 44 logging.error('Component %s is not currently supported.', component) | |
| 45 return (revision_map, file_to_revision_map) | 42 return (revision_map, file_to_revision_map) |
| 46 | 43 |
| 47 # Retrieve data from the url, return empty map if fails. | 44 # Retrieve data from the url, return empty map if fails. |
| 48 revision_range_str = '%s:%s' % (range_start, range_end) | 45 revision_range_str = '%s:%s' % (range_start, range_end) |
| 49 url = url_map['changelog_url'] % revision_range_str | 46 url = url_map['changelog_url'] % revision_range_str |
| 50 response = crash_utils.GetDataFromURL(url) | 47 response = crash_utils.GetDataFromURL(url) |
| 51 if not response: | 48 if not response: |
| 52 logging.error('Failed to retrieve changelog from %s, range %s.', | |
| 53 url, revision_range_str) | |
| 54 return (revision_map, file_to_revision_map) | 49 return (revision_map, file_to_revision_map) |
| 55 | 50 |
| 56 # Parse xml out of the returned string. If it fails, return empty map. | 51 # Parse xml out of the returned string. If it fails, return empty map. |
| 57 try: | 52 try: |
| 58 xml_revisions = minidom.parseString(response) | 53 xml_revisions = minidom.parseString(response) |
| 59 except ExpatError: | 54 except ExpatError: |
| 60 logging.error('Failed to parse changelog from %s, range %s.', | |
| 61 url, revision_range_str) | |
| 62 return (revision_map, file_to_revision_map) | 55 return (revision_map, file_to_revision_map) |
| 63 | 56 |
| 64 # Iterate through the returned XML object. | 57 # Iterate through the returned XML object. |
| 65 revisions = xml_revisions.getElementsByTagName('logentry') | 58 revisions = xml_revisions.getElementsByTagName('logentry') |
| 66 for revision in revisions: | 59 for revision in revisions: |
| 67 # Create new revision object for each of the revision. | 60 # Create new revision object for each of the revision. |
| 68 revision_object = {} | 61 revision_object = {} |
| 69 | 62 |
| 70 # Set author of the CL. | 63 # Set author of the CL. |
| 71 revision_object['author'] = revision.getElementsByTagName( | 64 revision_object['author'] = revision.getElementsByTagName( |
| 72 'author')[0].firstChild.nodeValue | 65 'author')[0].firstChild.nodeValue |
| 73 | 66 |
| 74 # Get the revision number from xml. | 67 # Get the revision number from xml. |
| 75 revision_number = int(revision.getAttribute('revision')) | 68 revision_number = int(revision.getAttribute('revision')) |
| 76 | 69 |
| 77 # Iterate through the changed paths in the CL. | 70 # Iterate through the changed paths in the CL. |
| 78 paths = revision.getElementsByTagName('paths') | 71 paths = revision.getElementsByTagName('paths') |
| 79 if paths: | 72 if paths: |
| 80 for changed_path in paths[0].getElementsByTagName('path'): | 73 for changed_path in paths[0].getElementsByTagName('path'): |
| 81 # Get path, file action and file name from the xml. | 74 # Get path and file change type from the xml. |
| 82 file_path = changed_path.firstChild.nodeValue | 75 file_path = changed_path.firstChild.nodeValue |
| 83 file_action = changed_path.getAttribute('action') | 76 file_change_type = changed_path.getAttribute('action') |
| 84 changed_file = os.path.basename(file_path) | 77 |
| 78 if file_path.startswith('/trunk/'): |
| 79 file_path = file_path[len('/trunk/'):] |
| 85 | 80 |
| 86 # Add file to the map. | 81 # Add file to the map. |
| 87 if changed_file not in file_to_revision_map: | 82 if file_path not in file_to_revision_map: |
| 88 file_to_revision_map[changed_file] = [] | 83 file_to_revision_map[file_path] = [] |
| 89 file_to_revision_map[changed_file].append( | 84 file_to_revision_map[file_path].append( |
| 90 (revision_number, file_action, file_path)) | 85 (revision_number, file_change_type)) |
| 91 | 86 |
| 92 # Set commit message of the CL. | 87 # Set commit message of the CL. |
| 93 revision_object['message'] = revision.getElementsByTagName('msg')[ | 88 revision_object['message'] = revision.getElementsByTagName('msg')[ |
| 94 0].firstChild.nodeValue | 89 0].firstChild.nodeValue |
| 95 | 90 |
| 96 # Set url of this CL. | 91 # Set url of this CL. |
| 97 revision_url = url_map['revision_url'] % revision_number | 92 revision_url = url_map['revision_url'] % revision_number |
| 98 revision_object['url'] = revision_url | 93 revision_object['url'] = revision_url |
| 99 | 94 |
| 100 # Add this CL to the revision map. | 95 # Add this CL to the revision map. |
| 101 revision_map[revision_number] = revision_object | 96 revision_map[revision_number] = revision_object |
| 102 | 97 |
| 103 return (revision_map, file_to_revision_map) | 98 return (revision_map, file_to_revision_map) |
| 104 | 99 |
| 105 def ParseLineDiff(self, path, component, file_action, revision_number): | 100 def ParseLineDiff(self, path, component, file_change_type, revision_number): |
| 106 changed_line_numbers = [] | 101 changed_line_numbers = [] |
| 107 changed_line_contents = [] | 102 changed_line_contents = [] |
| 108 | 103 |
| 109 url_map = self.component_to_urls_map.get(component) | 104 url_map = self.component_to_urls_map.get(component) |
| 110 if not url_map: | 105 if not url_map: |
| 111 logging.error('Component %s is not currently supported.', component) | |
| 112 return (None, None, None) | 106 return (None, None, None) |
| 113 | 107 |
| 114 # If the file is added (not modified), treat it as if it is not changed. | 108 # If the file is added (not modified), treat it as if it is not changed. |
| 115 backup_url = url_map['revision_url'] % revision_number | 109 backup_url = url_map['revision_url'] % revision_number |
| 116 if file_action == 'A': | 110 if file_change_type == 'A': |
| 117 return (backup_url, changed_line_numbers, changed_line_contents) | 111 return (backup_url, changed_line_numbers, changed_line_contents) |
| 118 | 112 |
| 119 # Retrieve data from the url. If no data is retrieved, return empty lists. | 113 # Retrieve data from the url. If no data is retrieved, return empty lists. |
| 120 url = url_map['diff_url'] % (path, revision_number - 1, | 114 url = url_map['diff_url'] % (path, revision_number - 1, |
| 121 revision_number, revision_number) | 115 revision_number, revision_number) |
| 122 data = crash_utils.GetDataFromURL(url) | 116 data = crash_utils.GetDataFromURL(url) |
| 123 if not data: | 117 if not data: |
| 124 logging.error('Failed to get line changes from %s.', url) | |
| 125 return (backup_url, changed_line_numbers, changed_line_contents) | 118 return (backup_url, changed_line_numbers, changed_line_contents) |
| 126 | 119 |
| 127 line_diff_html = minidom.parseString(data) | 120 line_diff_html = minidom.parseString(data) |
| 128 tables = line_diff_html.getElementsByTagName('table') | 121 tables = line_diff_html.getElementsByTagName('table') |
| 129 # If there are not NUM_TABLES tables in the html page, there should be an | 122 # If there are not NUM_TABLES tables in the html page, there should be an |
| 130 # error in the html page. | 123 # error in the html page. |
| 131 if len(tables) != NUM_TABLES_IN_LINEDIFF_PAGE: | 124 if len(tables) != NUM_TABLES_IN_LINEDIFF_PAGE: |
| 132 logging.error('Failed to retrieve the diff of revision %d from %s.', | |
| 133 revision_number, url) | |
| 134 return (backup_url, changed_line_numbers, changed_line_contents) | 125 return (backup_url, changed_line_numbers, changed_line_contents) |
| 135 | 126 |
| 136 # Diff content is in the second table. Each line of the diff content | 127 # Diff content is in the second table. Each line of the diff content |
| 137 # is in <tr>. | 128 # is in <tr>. |
| 138 trs = tables[1].getElementsByTagName('tr') | 129 trs = tables[1].getElementsByTagName('tr') |
| 139 prefix_len = len('vc_diff_') | 130 prefix_len = len('vc_diff_') |
| 140 | 131 |
| 141 # Filter trs so that it only contains diff chunk with contents. | 132 # Filter trs so that it only contains diff chunk with contents. |
| 142 filtered_trs = [] | 133 filtered_trs = [] |
| 143 for tr in trs: | 134 for tr in trs: |
| (...skipping 12 matching lines...) Expand all Loading... |
| 156 return (backup_url, changed_line_numbers, changed_line_contents) | 147 return (backup_url, changed_line_numbers, changed_line_contents) |
| 157 | 148 |
| 158 filtered_trs.append(tr) | 149 filtered_trs.append(tr) |
| 159 | 150 |
| 160 # Iterate through filtered trs, and grab line diff information. | 151 # Iterate through filtered trs, and grab line diff information. |
| 161 for tr in filtered_trs: | 152 for tr in filtered_trs: |
| 162 tds = tr.getElementsByTagName('td') | 153 tds = tr.getElementsByTagName('td') |
| 163 | 154 |
| 164 # If there aren't 3 tds, this line does should not contain line diff. | 155 # If there aren't 3 tds, this line does should not contain line diff. |
| 165 if len(tds) != NUM_TDS_IN_LINEDIFF_PAGE: | 156 if len(tds) != NUM_TDS_IN_LINEDIFF_PAGE: |
| 166 logging.warning('Failed to get a line of new file in revision %d.', | |
| 167 revision_number) | |
| 168 continue | 157 continue |
| 169 | 158 |
| 170 # If line number information is not in hyperlink, ignore this line. | 159 # If line number information is not in hyperlink, ignore this line. |
| 171 try: | 160 try: |
| 172 line_num = tds[0].getElementsByTagName('a')[0].firstChild.nodeValue | 161 line_num = tds[0].getElementsByTagName('a')[0].firstChild.nodeValue |
| 173 left_diff_type = tds[1].getAttribute('class')[prefix_len:] | 162 left_diff_type = tds[1].getAttribute('class')[prefix_len:] |
| 174 right_diff_type = tds[2].getAttribute('class')[prefix_len:] | 163 right_diff_type = tds[2].getAttribute('class')[prefix_len:] |
| 175 except IndexError: | 164 except IndexError: |
| 176 logging.warning('Failed to get a line of file in revision %d.', | |
| 177 revision_number) | |
| 178 continue | 165 continue |
| 179 | 166 |
| 180 # Treat the line as modified only if both left and right diff has type | 167 # Treat the line as modified only if both left and right diff has type |
| 181 # changed or both have different change type, and if the change is not | 168 # changed or both have different change type, and if the change is not |
| 182 # deletion. | 169 # deletion. |
| 183 if (left_diff_type != right_diff_type) or ( | 170 if (left_diff_type != right_diff_type) or ( |
| 184 left_diff_type == 'change' and right_diff_type == 'change'): | 171 left_diff_type == 'change' and right_diff_type == 'change'): |
| 185 | 172 |
| 186 # Check if the line content is not empty. | 173 # Check if the line content is not empty. |
| 187 try: | 174 try: |
| 188 new_line = tds[2].firstChild.nodeValue | 175 new_line = tds[2].firstChild.nodeValue |
| 189 except AttributeError: | 176 except AttributeError: |
| 190 new_line = '' | 177 new_line = '' |
| 191 | 178 |
| 192 if not (left_diff_type == 'remove' and right_diff_type == 'empty'): | 179 if not (left_diff_type == 'remove' and right_diff_type == 'empty'): |
| 193 changed_line_numbers.append(int(line_num)) | 180 changed_line_numbers.append(int(line_num)) |
| 194 changed_line_contents.append(new_line.strip()) | 181 changed_line_contents.append(new_line.strip()) |
| 195 | 182 |
| 196 return (url, changed_line_numbers, changed_line_contents) | 183 return (url, changed_line_numbers, changed_line_contents) |
| 197 | 184 |
| 198 def ParseBlameInfo(self, component, file_path, line, revision): | 185 def ParseBlameInfo(self, component, file_path, line, revision): |
| 199 url_map = self.component_to_urls_map.get(component) | 186 url_map = self.component_to_urls_map.get(component) |
| 200 if not url_map: | 187 if not url_map: |
| 201 logging.error('Component %s is not currently supported.', component) | |
| 202 return None | 188 return None |
| 203 | 189 |
| 204 # Retrieve blame data from url, return None if fails. | 190 # Retrieve blame data from url, return None if fails. |
| 205 url = url_map['blame_url'] % (file_path, revision, revision) | 191 url = url_map['blame_url'] % (file_path, revision, revision) |
| 206 data = crash_utils.GetDataFromURL(url) | 192 data = crash_utils.GetDataFromURL(url) |
| 207 if not data: | 193 if not data: |
| 208 logging.error('Failed to retrieve annotation information from %s.', | |
| 209 url) | |
| 210 return None | 194 return None |
| 211 | 195 |
| 212 blame_html = minidom.parseString(data) | 196 blame_html = minidom.parseString(data) |
| 213 | 197 |
| 214 title = blame_html.getElementsByTagName('title') | 198 title = blame_html.getElementsByTagName('title') |
| 215 # If the returned html page is an exception page, return None. | 199 # If the returned html page is an exception page, return None. |
| 216 if title[0].firstChild.nodeValue == 'ViewVC Exception': | 200 if title[0].firstChild.nodeValue == 'ViewVC Exception': |
| 217 logging.error('Failed to retrieve blame information from %s.', url) | |
| 218 return None | 201 return None |
| 219 | 202 |
| 220 # Each of the blame result is in <tr>. | 203 # Each of the blame result is in <tr>. |
| 221 blame_results = blame_html.getElementsByTagName('tr') | 204 blame_results = blame_html.getElementsByTagName('tr') |
| 222 blame_result = blame_results[line] | 205 try: |
| 206 blame_result = blame_results[line] |
| 207 except IndexError: |
| 208 return None |
| 223 | 209 |
| 224 # There must be 4 <td> for each <tr>. If not, this page is wrong. | 210 # There must be 4 <td> for each <tr>. If not, this page is wrong. |
| 225 tds = blame_result.getElementsByTagName('td') | 211 tds = blame_result.getElementsByTagName('td') |
| 226 if len(tds) != 4: | 212 if len(tds) != 4: |
| 227 logging.error('Failed to retrieve blame information from %s.', url) | |
| 228 return None | 213 return None |
| 229 | 214 |
| 230 # The third <td> has the line content, separated by <span>s. Combine | 215 # The third <td> has the line content, separated by <span>s. Combine |
| 231 # those to get a string of changed line. If it has nothing, the line | 216 # those to get a string of changed line. If it has nothing, the line |
| 232 # is empty. | 217 # is empty. |
| 233 line_content = '' | 218 line_content = '' |
| 234 if tds[3].hasChildNodes(): | 219 if tds[3].hasChildNodes(): |
| 235 contents = tds[3].childNodes | 220 contents = tds[3].childNodes |
| 236 | 221 |
| 237 for content in contents: | 222 for content in contents: |
| (...skipping 12 matching lines...) Expand all Loading... |
| 250 blame_result = blame_results[line] | 235 blame_result = blame_results[line] |
| 251 tds = blame_result.getElementsByTagName('td') | 236 tds = blame_result.getElementsByTagName('td') |
| 252 author = tds[1].firstChild.nodeValue | 237 author = tds[1].firstChild.nodeValue |
| 253 | 238 |
| 254 # Revision can either be in hyperlink or plain text. | 239 # Revision can either be in hyperlink or plain text. |
| 255 try: | 240 try: |
| 256 revision = tds[2].getElementsByTagName('a')[0].firstChild.nodeValue | 241 revision = tds[2].getElementsByTagName('a')[0].firstChild.nodeValue |
| 257 except IndexError: | 242 except IndexError: |
| 258 revision = tds[2].firstChild.nodeValue | 243 revision = tds[2].firstChild.nodeValue |
| 259 | 244 |
| 245 (revision_info, _) = self.ParseChangelog(component, revision, revision) |
| 246 message = revision_info[int(revision)]['message'] |
| 247 |
| 260 # Return the parsed information. | 248 # Return the parsed information. |
| 261 revision_url = url_map['revision_url'] % int(revision) | 249 revision_url = url_map['revision_url'] % int(revision) |
| 262 return (line_content, revision, author, revision_url) | 250 return (line_content, revision, author, revision_url, message) |
| OLD | NEW |