Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(324)

Side by Side Diff: tools/findit/svn_repository_parser.py

Issue 478763003: [Findit] Bug fixing and implemented some feature requests. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Addressed codereview and removed all references to logging Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« tools/findit/crash_utils.py ('K') | « tools/findit/stacktrace.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2014 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import logging
6 import os
7 import xml.dom.minidom as minidom 5 import xml.dom.minidom as minidom
8 from xml.parsers.expat import ExpatError 6 from xml.parsers.expat import ExpatError
9 7
10 import crash_utils 8 import crash_utils
11 from repository_parser_interface import ParserInterface 9 from repository_parser_interface import ParserInterface
12 10
13 11
14 # This number is 6 because each linediff page in src.chromium.org should 12 # This number is 6 because each linediff page in src.chromium.org should
15 # contain the following tables: table with revision number, table with actual 13 # contain the following tables: table with revision number, table with actual
16 # diff, table with dropdown menu, table with legend, a border table and a table 14 # diff, table with dropdown menu, table with legend, a border table and a table
(...skipping 17 matching lines...) Expand all
34 32
35 def ParseChangelog(self, component, range_start, range_end): 33 def ParseChangelog(self, component, range_start, range_end):
36 file_to_revision_map = {} 34 file_to_revision_map = {}
37 revision_map = {} 35 revision_map = {}
38 36
39 # Check if the current component is supported by reading the components 37 # Check if the current component is supported by reading the components
40 # parsed from config file. If it is not, fail. 38 # parsed from config file. If it is not, fail.
41 39
42 url_map = self.component_to_urls_map.get(component) 40 url_map = self.component_to_urls_map.get(component)
43 if not url_map: 41 if not url_map:
44 logging.error('Component %s is not currently supported.', component)
45 return (revision_map, file_to_revision_map) 42 return (revision_map, file_to_revision_map)
46 43
47 # Retrieve data from the url, return empty map if fails. 44 # Retrieve data from the url, return empty map if fails.
48 revision_range_str = '%s:%s' % (range_start, range_end) 45 revision_range_str = '%s:%s' % (range_start, range_end)
49 url = url_map['changelog_url'] % revision_range_str 46 url = url_map['changelog_url'] % revision_range_str
50 response = crash_utils.GetDataFromURL(url) 47 response = crash_utils.GetDataFromURL(url)
51 if not response: 48 if not response:
52 logging.error('Failed to retrieve changelog from %s, range %s.',
53 url, revision_range_str)
54 return (revision_map, file_to_revision_map) 49 return (revision_map, file_to_revision_map)
55 50
56 # Parse xml out of the returned string. If it fails, return empty map. 51 # Parse xml out of the returned string. If it fails, return empty map.
57 try: 52 try:
58 xml_revisions = minidom.parseString(response) 53 xml_revisions = minidom.parseString(response)
59 except ExpatError: 54 except ExpatError:
60 logging.error('Failed to parse changelog from %s, range %s.',
61 url, revision_range_str)
62 return (revision_map, file_to_revision_map) 55 return (revision_map, file_to_revision_map)
63 56
64 # Iterate through the returned XML object. 57 # Iterate through the returned XML object.
65 revisions = xml_revisions.getElementsByTagName('logentry') 58 revisions = xml_revisions.getElementsByTagName('logentry')
66 for revision in revisions: 59 for revision in revisions:
67 # Create new revision object for each of the revision. 60 # Create new revision object for each of the revision.
68 revision_object = {} 61 revision_object = {}
69 62
70 # Set author of the CL. 63 # Set author of the CL.
71 revision_object['author'] = revision.getElementsByTagName( 64 revision_object['author'] = revision.getElementsByTagName(
72 'author')[0].firstChild.nodeValue 65 'author')[0].firstChild.nodeValue
73 66
74 # Get the revision number from xml. 67 # Get the revision number from xml.
75 revision_number = int(revision.getAttribute('revision')) 68 revision_number = int(revision.getAttribute('revision'))
76 69
77 # Iterate through the changed paths in the CL. 70 # Iterate through the changed paths in the CL.
78 paths = revision.getElementsByTagName('paths') 71 paths = revision.getElementsByTagName('paths')
79 if paths: 72 if paths:
80 for changed_path in paths[0].getElementsByTagName('path'): 73 for changed_path in paths[0].getElementsByTagName('path'):
81 # Get path, file action and file name from the xml. 74 # Get path and file change type from the xml.
82 file_path = changed_path.firstChild.nodeValue 75 file_path = changed_path.firstChild.nodeValue
83 file_action = changed_path.getAttribute('action') 76 file_change_type = changed_path.getAttribute('action')
84 changed_file = os.path.basename(file_path) 77
78 if file_path.startswith('/trunk/'):
79 file_path = file_path[len('/trunk/'):]
85 80
86 # Add file to the map. 81 # Add file to the map.
87 if changed_file not in file_to_revision_map: 82 if file_path not in file_to_revision_map:
88 file_to_revision_map[changed_file] = [] 83 file_to_revision_map[file_path] = []
89 file_to_revision_map[changed_file].append( 84 file_to_revision_map[file_path].append(
90 (revision_number, file_action, file_path)) 85 (revision_number, file_change_type))
91 86
92 # Set commit message of the CL. 87 # Set commit message of the CL.
93 revision_object['message'] = revision.getElementsByTagName('msg')[ 88 revision_object['message'] = revision.getElementsByTagName('msg')[
94 0].firstChild.nodeValue 89 0].firstChild.nodeValue
95 90
96 # Set url of this CL. 91 # Set url of this CL.
97 revision_url = url_map['revision_url'] % revision_number 92 revision_url = url_map['revision_url'] % revision_number
98 revision_object['url'] = revision_url 93 revision_object['url'] = revision_url
99 94
100 # Add this CL to the revision map. 95 # Add this CL to the revision map.
101 revision_map[revision_number] = revision_object 96 revision_map[revision_number] = revision_object
102 97
103 return (revision_map, file_to_revision_map) 98 return (revision_map, file_to_revision_map)
104 99
105 def ParseLineDiff(self, path, component, file_action, revision_number): 100 def ParseLineDiff(self, path, component, file_change_type, revision_number):
106 changed_line_numbers = [] 101 changed_line_numbers = []
107 changed_line_contents = [] 102 changed_line_contents = []
108 103
109 url_map = self.component_to_urls_map.get(component) 104 url_map = self.component_to_urls_map.get(component)
110 if not url_map: 105 if not url_map:
111 logging.error('Component %s is not currently supported.', component)
112 return (None, None, None) 106 return (None, None, None)
113 107
114 # If the file is added (not modified), treat it as if it is not changed. 108 # If the file is added (not modified), treat it as if it is not changed.
115 backup_url = url_map['revision_url'] % revision_number 109 backup_url = url_map['revision_url'] % revision_number
116 if file_action == 'A': 110 if file_change_type == 'A':
117 return (backup_url, changed_line_numbers, changed_line_contents) 111 return (backup_url, changed_line_numbers, changed_line_contents)
118 112
119 # Retrieve data from the url. If no data is retrieved, return empty lists. 113 # Retrieve data from the url. If no data is retrieved, return empty lists.
120 url = url_map['diff_url'] % (path, revision_number - 1, 114 url = url_map['diff_url'] % (path, revision_number - 1,
121 revision_number, revision_number) 115 revision_number, revision_number)
122 data = crash_utils.GetDataFromURL(url) 116 data = crash_utils.GetDataFromURL(url)
123 if not data: 117 if not data:
124 logging.error('Failed to get line changes from %s.', url)
125 return (backup_url, changed_line_numbers, changed_line_contents) 118 return (backup_url, changed_line_numbers, changed_line_contents)
126 119
127 line_diff_html = minidom.parseString(data) 120 line_diff_html = minidom.parseString(data)
128 tables = line_diff_html.getElementsByTagName('table') 121 tables = line_diff_html.getElementsByTagName('table')
129 # If there are not NUM_TABLES tables in the html page, there should be an 122 # If there are not NUM_TABLES tables in the html page, there should be an
130 # error in the html page. 123 # error in the html page.
131 if len(tables) != NUM_TABLES_IN_LINEDIFF_PAGE: 124 if len(tables) != NUM_TABLES_IN_LINEDIFF_PAGE:
132 logging.error('Failed to retrieve the diff of revision %d from %s.',
133 revision_number, url)
134 return (backup_url, changed_line_numbers, changed_line_contents) 125 return (backup_url, changed_line_numbers, changed_line_contents)
135 126
136 # Diff content is in the second table. Each line of the diff content 127 # Diff content is in the second table. Each line of the diff content
137 # is in <tr>. 128 # is in <tr>.
138 trs = tables[1].getElementsByTagName('tr') 129 trs = tables[1].getElementsByTagName('tr')
139 prefix_len = len('vc_diff_') 130 prefix_len = len('vc_diff_')
140 131
141 # Filter trs so that it only contains diff chunk with contents. 132 # Filter trs so that it only contains diff chunk with contents.
142 filtered_trs = [] 133 filtered_trs = []
143 for tr in trs: 134 for tr in trs:
(...skipping 12 matching lines...) Expand all
156 return (backup_url, changed_line_numbers, changed_line_contents) 147 return (backup_url, changed_line_numbers, changed_line_contents)
157 148
158 filtered_trs.append(tr) 149 filtered_trs.append(tr)
159 150
160 # Iterate through filtered trs, and grab line diff information. 151 # Iterate through filtered trs, and grab line diff information.
161 for tr in filtered_trs: 152 for tr in filtered_trs:
162 tds = tr.getElementsByTagName('td') 153 tds = tr.getElementsByTagName('td')
163 154
164 # If there aren't 3 tds, this line does should not contain line diff. 155 # If there aren't 3 tds, this line does should not contain line diff.
165 if len(tds) != NUM_TDS_IN_LINEDIFF_PAGE: 156 if len(tds) != NUM_TDS_IN_LINEDIFF_PAGE:
166 logging.warning('Failed to get a line of new file in revision %d.',
167 revision_number)
168 continue 157 continue
169 158
170 # If line number information is not in hyperlink, ignore this line. 159 # If line number information is not in hyperlink, ignore this line.
171 try: 160 try:
172 line_num = tds[0].getElementsByTagName('a')[0].firstChild.nodeValue 161 line_num = tds[0].getElementsByTagName('a')[0].firstChild.nodeValue
173 left_diff_type = tds[1].getAttribute('class')[prefix_len:] 162 left_diff_type = tds[1].getAttribute('class')[prefix_len:]
174 right_diff_type = tds[2].getAttribute('class')[prefix_len:] 163 right_diff_type = tds[2].getAttribute('class')[prefix_len:]
175 except IndexError: 164 except IndexError:
176 logging.warning('Failed to get a line of file in revision %d.',
177 revision_number)
178 continue 165 continue
179 166
180 # Treat the line as modified only if both left and right diff has type 167 # Treat the line as modified only if both left and right diff has type
181 # changed or both have different change type, and if the change is not 168 # changed or both have different change type, and if the change is not
182 # deletion. 169 # deletion.
183 if (left_diff_type != right_diff_type) or ( 170 if (left_diff_type != right_diff_type) or (
184 left_diff_type == 'change' and right_diff_type == 'change'): 171 left_diff_type == 'change' and right_diff_type == 'change'):
185 172
186 # Check if the line content is not empty. 173 # Check if the line content is not empty.
187 try: 174 try:
188 new_line = tds[2].firstChild.nodeValue 175 new_line = tds[2].firstChild.nodeValue
189 except AttributeError: 176 except AttributeError:
190 new_line = '' 177 new_line = ''
191 178
192 if not (left_diff_type == 'remove' and right_diff_type == 'empty'): 179 if not (left_diff_type == 'remove' and right_diff_type == 'empty'):
193 changed_line_numbers.append(int(line_num)) 180 changed_line_numbers.append(int(line_num))
194 changed_line_contents.append(new_line.strip()) 181 changed_line_contents.append(new_line.strip())
195 182
196 return (url, changed_line_numbers, changed_line_contents) 183 return (url, changed_line_numbers, changed_line_contents)
197 184
198 def ParseBlameInfo(self, component, file_path, line, revision): 185 def ParseBlameInfo(self, component, file_path, line, revision):
199 url_map = self.component_to_urls_map.get(component) 186 url_map = self.component_to_urls_map.get(component)
200 if not url_map: 187 if not url_map:
201 logging.error('Component %s is not currently supported.', component)
202 return None 188 return None
203 189
204 # Retrieve blame data from url, return None if fails. 190 # Retrieve blame data from url, return None if fails.
205 url = url_map['blame_url'] % (file_path, revision, revision) 191 url = url_map['blame_url'] % (file_path, revision, revision)
206 data = crash_utils.GetDataFromURL(url) 192 data = crash_utils.GetDataFromURL(url)
207 if not data: 193 if not data:
208 logging.error('Failed to retrieve annotation information from %s.',
209 url)
210 return None 194 return None
211 195
212 blame_html = minidom.parseString(data) 196 blame_html = minidom.parseString(data)
213 197
214 title = blame_html.getElementsByTagName('title') 198 title = blame_html.getElementsByTagName('title')
215 # If the returned html page is an exception page, return None. 199 # If the returned html page is an exception page, return None.
216 if title[0].firstChild.nodeValue == 'ViewVC Exception': 200 if title[0].firstChild.nodeValue == 'ViewVC Exception':
217 logging.error('Failed to retrieve blame information from %s.', url)
218 return None 201 return None
219 202
220 # Each of the blame result is in <tr>. 203 # Each of the blame result is in <tr>.
221 blame_results = blame_html.getElementsByTagName('tr') 204 blame_results = blame_html.getElementsByTagName('tr')
222 blame_result = blame_results[line] 205 try:
206 blame_result = blame_results[line]
207 except IndexError:
208 return None
223 209
224 # There must be 4 <td> for each <tr>. If not, this page is wrong. 210 # There must be 4 <td> for each <tr>. If not, this page is wrong.
225 tds = blame_result.getElementsByTagName('td') 211 tds = blame_result.getElementsByTagName('td')
226 if len(tds) != 4: 212 if len(tds) != 4:
227 logging.error('Failed to retrieve blame information from %s.', url)
228 return None 213 return None
229 214
230 # The third <td> has the line content, separated by <span>s. Combine 215 # The third <td> has the line content, separated by <span>s. Combine
231 # those to get a string of changed line. If it has nothing, the line 216 # those to get a string of changed line. If it has nothing, the line
232 # is empty. 217 # is empty.
233 line_content = '' 218 line_content = ''
234 if tds[3].hasChildNodes(): 219 if tds[3].hasChildNodes():
235 contents = tds[3].childNodes 220 contents = tds[3].childNodes
236 221
237 for content in contents: 222 for content in contents:
(...skipping 12 matching lines...) Expand all
250 blame_result = blame_results[line] 235 blame_result = blame_results[line]
251 tds = blame_result.getElementsByTagName('td') 236 tds = blame_result.getElementsByTagName('td')
252 author = tds[1].firstChild.nodeValue 237 author = tds[1].firstChild.nodeValue
253 238
254 # Revision can either be in hyperlink or plain text. 239 # Revision can either be in hyperlink or plain text.
255 try: 240 try:
256 revision = tds[2].getElementsByTagName('a')[0].firstChild.nodeValue 241 revision = tds[2].getElementsByTagName('a')[0].firstChild.nodeValue
257 except IndexError: 242 except IndexError:
258 revision = tds[2].firstChild.nodeValue 243 revision = tds[2].firstChild.nodeValue
259 244
245 (revision_info, _) = self.ParseChangelog(component, revision, revision)
246 message = revision_info[int(revision)]['message']
247
260 # Return the parsed information. 248 # Return the parsed information.
261 revision_url = url_map['revision_url'] % int(revision) 249 revision_url = url_map['revision_url'] % int(revision)
262 return (line_content, revision, author, revision_url) 250 return (line_content, revision, author, revision_url, message)
OLDNEW
« tools/findit/crash_utils.py ('K') | « tools/findit/stacktrace.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698