tools/findit/svn_repository_parser.py - Issue 478763003: [Findit] Bug fixing and implemented some feature requests.

Side by Side Diff: tools/findit/svn_repository_parser.py

Issue 478763003: [Findit] Bug fixing and implemented some feature requests. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Addressed codereview and removed all references to logging Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2014 The Chromium Authors. All rights reserved.	1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import logging

6 import os

7 import xml.dom.minidom as minidom	5 import xml.dom.minidom as minidom

8 from xml.parsers.expat import ExpatError	6 from xml.parsers.expat import ExpatError

9	7

10 import crash_utils	8 import crash_utils

11 from repository_parser_interface import ParserInterface	9 from repository_parser_interface import ParserInterface

12	10

13	11

14 # This number is 6 because each linediff page in src.chromium.org should	12 # This number is 6 because each linediff page in src.chromium.org should

15 # contain the following tables: table with revision number, table with actual	13 # contain the following tables: table with revision number, table with actual

16 # diff, table with dropdown menu, table with legend, a border table and a table	14 # diff, table with dropdown menu, table with legend, a border table and a table

(...skipping 17 matching lines...) Expand all Loading...
34	32

35 def ParseChangelog(self, component, range_start, range_end):	33 def ParseChangelog(self, component, range_start, range_end):

36 file_to_revision_map = {}	34 file_to_revision_map = {}

37 revision_map = {}	35 revision_map = {}

38	36

39 # Check if the current component is supported by reading the components	37 # Check if the current component is supported by reading the components

40 # parsed from config file. If it is not, fail.	38 # parsed from config file. If it is not, fail.

41	39

42 url_map = self.component_to_urls_map.get(component)	40 url_map = self.component_to_urls_map.get(component)

43 if not url_map:	41 if not url_map:

44 logging.error('Component %s is not currently supported.', component)

45 return (revision_map, file_to_revision_map)	42 return (revision_map, file_to_revision_map)

46	43

47 # Retrieve data from the url, return empty map if fails.	44 # Retrieve data from the url, return empty map if fails.

48 revision_range_str = '%s:%s' % (range_start, range_end)	45 revision_range_str = '%s:%s' % (range_start, range_end)

49 url = url_map['changelog_url'] % revision_range_str	46 url = url_map['changelog_url'] % revision_range_str

50 response = crash_utils.GetDataFromURL(url)	47 response = crash_utils.GetDataFromURL(url)

51 if not response:	48 if not response:

52 logging.error('Failed to retrieve changelog from %s, range %s.',

53 url, revision_range_str)

54 return (revision_map, file_to_revision_map)	49 return (revision_map, file_to_revision_map)

55	50

56 # Parse xml out of the returned string. If it fails, return empty map.	51 # Parse xml out of the returned string. If it fails, return empty map.

57 try:	52 try:

58 xml_revisions = minidom.parseString(response)	53 xml_revisions = minidom.parseString(response)

59 except ExpatError:	54 except ExpatError:

60 logging.error('Failed to parse changelog from %s, range %s.',

61 url, revision_range_str)

62 return (revision_map, file_to_revision_map)	55 return (revision_map, file_to_revision_map)

63	56

64 # Iterate through the returned XML object.	57 # Iterate through the returned XML object.

65 revisions = xml_revisions.getElementsByTagName('logentry')	58 revisions = xml_revisions.getElementsByTagName('logentry')

66 for revision in revisions:	59 for revision in revisions:

67 # Create new revision object for each of the revision.	60 # Create new revision object for each of the revision.

68 revision_object = {}	61 revision_object = {}

69	62

70 # Set author of the CL.	63 # Set author of the CL.

71 revision_object['author'] = revision.getElementsByTagName(	64 revision_object['author'] = revision.getElementsByTagName(

72 'author')[0].firstChild.nodeValue	65 'author')[0].firstChild.nodeValue

73	66

74 # Get the revision number from xml.	67 # Get the revision number from xml.

75 revision_number = int(revision.getAttribute('revision'))	68 revision_number = int(revision.getAttribute('revision'))

76	69

77 # Iterate through the changed paths in the CL.	70 # Iterate through the changed paths in the CL.

78 paths = revision.getElementsByTagName('paths')	71 paths = revision.getElementsByTagName('paths')

79 if paths:	72 if paths:

80 for changed_path in paths[0].getElementsByTagName('path'):	73 for changed_path in paths[0].getElementsByTagName('path'):

81 # Get path, file action and file name from the xml.	74 # Get path and file change type from the xml.

82 file_path = changed_path.firstChild.nodeValue	75 file_path = changed_path.firstChild.nodeValue

83 file_action = changed_path.getAttribute('action')	76 file_change_type = changed_path.getAttribute('action')

84 changed_file = os.path.basename(file_path)	77

	78 if file_path.startswith('/trunk/'):

	79 file_path = file_path[len('/trunk/'):]

85	80

86 # Add file to the map.	81 # Add file to the map.

87 if changed_file not in file_to_revision_map:	82 if file_path not in file_to_revision_map:

88 file_to_revision_map[changed_file] = []	83 file_to_revision_map[file_path] = []

89 file_to_revision_map[changed_file].append(	84 file_to_revision_map[file_path].append(

90 (revision_number, file_action, file_path))	85 (revision_number, file_change_type))

91	86

92 # Set commit message of the CL.	87 # Set commit message of the CL.

93 revision_object['message'] = revision.getElementsByTagName('msg')[	88 revision_object['message'] = revision.getElementsByTagName('msg')[

94 0].firstChild.nodeValue	89 0].firstChild.nodeValue

95	90

96 # Set url of this CL.	91 # Set url of this CL.

97 revision_url = url_map['revision_url'] % revision_number	92 revision_url = url_map['revision_url'] % revision_number

98 revision_object['url'] = revision_url	93 revision_object['url'] = revision_url

99	94

100 # Add this CL to the revision map.	95 # Add this CL to the revision map.

101 revision_map[revision_number] = revision_object	96 revision_map[revision_number] = revision_object

102	97

103 return (revision_map, file_to_revision_map)	98 return (revision_map, file_to_revision_map)

104	99

105 def ParseLineDiff(self, path, component, file_action, revision_number):	100 def ParseLineDiff(self, path, component, file_change_type, revision_number):

106 changed_line_numbers = []	101 changed_line_numbers = []

107 changed_line_contents = []	102 changed_line_contents = []

108	103

109 url_map = self.component_to_urls_map.get(component)	104 url_map = self.component_to_urls_map.get(component)

110 if not url_map:	105 if not url_map:

111 logging.error('Component %s is not currently supported.', component)

112 return (None, None, None)	106 return (None, None, None)

113	107

114 # If the file is added (not modified), treat it as if it is not changed.	108 # If the file is added (not modified), treat it as if it is not changed.

115 backup_url = url_map['revision_url'] % revision_number	109 backup_url = url_map['revision_url'] % revision_number

116 if file_action == 'A':	110 if file_change_type == 'A':

117 return (backup_url, changed_line_numbers, changed_line_contents)	111 return (backup_url, changed_line_numbers, changed_line_contents)

118	112

119 # Retrieve data from the url. If no data is retrieved, return empty lists.	113 # Retrieve data from the url. If no data is retrieved, return empty lists.

120 url = url_map['diff_url'] % (path, revision_number - 1,	114 url = url_map['diff_url'] % (path, revision_number - 1,

121 revision_number, revision_number)	115 revision_number, revision_number)

122 data = crash_utils.GetDataFromURL(url)	116 data = crash_utils.GetDataFromURL(url)

123 if not data:	117 if not data:

124 logging.error('Failed to get line changes from %s.', url)

125 return (backup_url, changed_line_numbers, changed_line_contents)	118 return (backup_url, changed_line_numbers, changed_line_contents)

126	119

127 line_diff_html = minidom.parseString(data)	120 line_diff_html = minidom.parseString(data)

128 tables = line_diff_html.getElementsByTagName('table')	121 tables = line_diff_html.getElementsByTagName('table')

129 # If there are not NUM_TABLES tables in the html page, there should be an	122 # If there are not NUM_TABLES tables in the html page, there should be an

130 # error in the html page.	123 # error in the html page.

131 if len(tables) != NUM_TABLES_IN_LINEDIFF_PAGE:	124 if len(tables) != NUM_TABLES_IN_LINEDIFF_PAGE:

132 logging.error('Failed to retrieve the diff of revision %d from %s.',

133 revision_number, url)

134 return (backup_url, changed_line_numbers, changed_line_contents)	125 return (backup_url, changed_line_numbers, changed_line_contents)

135	126

136 # Diff content is in the second table. Each line of the diff content	127 # Diff content is in the second table. Each line of the diff content

137 # is in <tr>.	128 # is in <tr>.

138 trs = tables[1].getElementsByTagName('tr')	129 trs = tables[1].getElementsByTagName('tr')

139 prefix_len = len('vc_diff_')	130 prefix_len = len('vc_diff_')

140	131

141 # Filter trs so that it only contains diff chunk with contents.	132 # Filter trs so that it only contains diff chunk with contents.

142 filtered_trs = []	133 filtered_trs = []

143 for tr in trs:	134 for tr in trs:

(...skipping 12 matching lines...) Expand all Loading...
156 return (backup_url, changed_line_numbers, changed_line_contents)	147 return (backup_url, changed_line_numbers, changed_line_contents)

157	148

158 filtered_trs.append(tr)	149 filtered_trs.append(tr)

159	150

160 # Iterate through filtered trs, and grab line diff information.	151 # Iterate through filtered trs, and grab line diff information.

161 for tr in filtered_trs:	152 for tr in filtered_trs:

162 tds = tr.getElementsByTagName('td')	153 tds = tr.getElementsByTagName('td')

163	154

164 # If there aren't 3 tds, this line does should not contain line diff.	155 # If there aren't 3 tds, this line does should not contain line diff.

165 if len(tds) != NUM_TDS_IN_LINEDIFF_PAGE:	156 if len(tds) != NUM_TDS_IN_LINEDIFF_PAGE:

166 logging.warning('Failed to get a line of new file in revision %d.',

167 revision_number)

168 continue	157 continue

169	158

170 # If line number information is not in hyperlink, ignore this line.	159 # If line number information is not in hyperlink, ignore this line.

171 try:	160 try:

172 line_num = tds[0].getElementsByTagName('a')[0].firstChild.nodeValue	161 line_num = tds[0].getElementsByTagName('a')[0].firstChild.nodeValue

173 left_diff_type = tds[1].getAttribute('class')[prefix_len:]	162 left_diff_type = tds[1].getAttribute('class')[prefix_len:]

174 right_diff_type = tds[2].getAttribute('class')[prefix_len:]	163 right_diff_type = tds[2].getAttribute('class')[prefix_len:]

175 except IndexError:	164 except IndexError:

176 logging.warning('Failed to get a line of file in revision %d.',

177 revision_number)

178 continue	165 continue

179	166

180 # Treat the line as modified only if both left and right diff has type	167 # Treat the line as modified only if both left and right diff has type

181 # changed or both have different change type, and if the change is not	168 # changed or both have different change type, and if the change is not

182 # deletion.	169 # deletion.

183 if (left_diff_type != right_diff_type) or (	170 if (left_diff_type != right_diff_type) or (

184 left_diff_type == 'change' and right_diff_type == 'change'):	171 left_diff_type == 'change' and right_diff_type == 'change'):

185	172

186 # Check if the line content is not empty.	173 # Check if the line content is not empty.

187 try:	174 try:

188 new_line = tds[2].firstChild.nodeValue	175 new_line = tds[2].firstChild.nodeValue

189 except AttributeError:	176 except AttributeError:

190 new_line = ''	177 new_line = ''

191	178

192 if not (left_diff_type == 'remove' and right_diff_type == 'empty'):	179 if not (left_diff_type == 'remove' and right_diff_type == 'empty'):

193 changed_line_numbers.append(int(line_num))	180 changed_line_numbers.append(int(line_num))

194 changed_line_contents.append(new_line.strip())	181 changed_line_contents.append(new_line.strip())

195	182

196 return (url, changed_line_numbers, changed_line_contents)	183 return (url, changed_line_numbers, changed_line_contents)

197	184

198 def ParseBlameInfo(self, component, file_path, line, revision):	185 def ParseBlameInfo(self, component, file_path, line, revision):

199 url_map = self.component_to_urls_map.get(component)	186 url_map = self.component_to_urls_map.get(component)

200 if not url_map:	187 if not url_map:

201 logging.error('Component %s is not currently supported.', component)

202 return None	188 return None

203	189

204 # Retrieve blame data from url, return None if fails.	190 # Retrieve blame data from url, return None if fails.

205 url = url_map['blame_url'] % (file_path, revision, revision)	191 url = url_map['blame_url'] % (file_path, revision, revision)

206 data = crash_utils.GetDataFromURL(url)	192 data = crash_utils.GetDataFromURL(url)

207 if not data:	193 if not data:

208 logging.error('Failed to retrieve annotation information from %s.',

209 url)

210 return None	194 return None

211	195

212 blame_html = minidom.parseString(data)	196 blame_html = minidom.parseString(data)

213	197

214 title = blame_html.getElementsByTagName('title')	198 title = blame_html.getElementsByTagName('title')

215 # If the returned html page is an exception page, return None.	199 # If the returned html page is an exception page, return None.

216 if title[0].firstChild.nodeValue == 'ViewVC Exception':	200 if title[0].firstChild.nodeValue == 'ViewVC Exception':

217 logging.error('Failed to retrieve blame information from %s.', url)

218 return None	201 return None

219	202

220 # Each of the blame result is in <tr>.	203 # Each of the blame result is in <tr>.

221 blame_results = blame_html.getElementsByTagName('tr')	204 blame_results = blame_html.getElementsByTagName('tr')

222 blame_result = blame_results[line]	205 try:

	206 blame_result = blame_results[line]

	207 except IndexError:

	208 return None

223	209

224 # There must be 4 <td> for each <tr>. If not, this page is wrong.	210 # There must be 4 <td> for each <tr>. If not, this page is wrong.

225 tds = blame_result.getElementsByTagName('td')	211 tds = blame_result.getElementsByTagName('td')

226 if len(tds) != 4:	212 if len(tds) != 4:

227 logging.error('Failed to retrieve blame information from %s.', url)

228 return None	213 return None

229	214

230 # The third <td> has the line content, separated by <span>s. Combine	215 # The third <td> has the line content, separated by <span>s. Combine

231 # those to get a string of changed line. If it has nothing, the line	216 # those to get a string of changed line. If it has nothing, the line

232 # is empty.	217 # is empty.

233 line_content = ''	218 line_content = ''

234 if tds[3].hasChildNodes():	219 if tds[3].hasChildNodes():

235 contents = tds[3].childNodes	220 contents = tds[3].childNodes

236	221

237 for content in contents:	222 for content in contents:

(...skipping 12 matching lines...) Expand all Loading...
250 blame_result = blame_results[line]	235 blame_result = blame_results[line]

251 tds = blame_result.getElementsByTagName('td')	236 tds = blame_result.getElementsByTagName('td')

252 author = tds[1].firstChild.nodeValue	237 author = tds[1].firstChild.nodeValue

253	238

254 # Revision can either be in hyperlink or plain text.	239 # Revision can either be in hyperlink or plain text.

255 try:	240 try:

256 revision = tds[2].getElementsByTagName('a')[0].firstChild.nodeValue	241 revision = tds[2].getElementsByTagName('a')[0].firstChild.nodeValue

257 except IndexError:	242 except IndexError:

258 revision = tds[2].firstChild.nodeValue	243 revision = tds[2].firstChild.nodeValue

259	244

	245 (revision_info, _) = self.ParseChangelog(component, revision, revision)

	246 message = revision_info[int(revision)]['message']

	247

260 # Return the parsed information.	248 # Return the parsed information.

261 revision_url = url_map['revision_url'] % int(revision)	249 revision_url = url_map['revision_url'] % int(revision)

262 return (line_content, revision, author, revision_url)	250 return (line_content, revision, author, revision_url, message)

OLD	NEW

« tools/findit/crash_utils.py ('K') | « tools/findit/stacktrace.py ('k') | no next file » | no next file with comments »