chrome/common/extensions/docs/server2/subversion_file_system.py - Issue 1151283007: Docserver overhaul: Gitiles away from me.

Side by Side Diff: chrome/common/extensions/docs/server2/subversion_file_system.py

Issue 1151283007: Docserver overhaul: Gitiles away from me. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Remove inform_users template to fix presubmit failure (it's now a redirect) Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « chrome/common/extensions/docs/server2/strings_data_source.py ('k') | chrome/common/extensions/docs/server2/subversion_file_system_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.

4

5 import posixpath

6 import traceback

7 import xml.dom.minidom as xml

8 from xml.parsers.expat import ExpatError

9

10 from appengine_url_fetcher import AppEngineUrlFetcher

11 from appengine_wrappers import IsDownloadError

12 from docs_server_utils import StringIdentity

13 from file_system import (

14 FileNotFoundError, FileSystem, FileSystemError, StatInfo)

15 from future import Future

16 import url_constants

17

18

19 def _ParseHTML(html):

20 '''Unfortunately, the viewvc page has a stray </div> tag, so this takes care

21 of all mismatched tags.

22 '''

23 try:

24 return xml.parseString(html)

25 except ExpatError as e:

26 return _ParseHTML('\n'.join(

27 line for (i, line) in enumerate(html.split('\n'))

28 if e.lineno != i + 1))

29

30 def _InnerText(node):

31 '''Like node.innerText in JS DOM, but strips surrounding whitespace.

32 '''

33 text = []

34 if node.nodeValue:

35 text.append(node.nodeValue)

36 if hasattr(node, 'childNodes'):

37 for child_node in node.childNodes:

38 text.append(_InnerText(child_node))

39 return ''.join(text).strip()

40

41 def _CreateStatInfo(html):

42 parent_version = None

43 child_versions = {}

44

45 # Try all of the tables until we find the ones that contain the data (the

46 # directory and file versions are in different tables).

47 for table in _ParseHTML(html).getElementsByTagName('table'):

48 # Within the table there is a list of files. However, there may be some

49 # things beforehand; a header, "parent directory" list, etc. We will deal

50 # with that below by being generous and just ignoring such rows.

51 rows = table.getElementsByTagName('tr')

52

53 for row in rows:

54 cells = row.getElementsByTagName('td')

55

56 # The version of the directory will eventually appear in the soup of

57 # table rows, like this:

58 #

59 # <tr>

60 # <td>Directory revision:</td>

61 # <td><a href=... title="Revision 214692">214692</a> (of...)</td>

62 # </tr>

63 #

64 # So look out for that.

65 if len(cells) == 2 and _InnerText(cells[0]) == 'Directory revision:':

66 links = cells[1].getElementsByTagName('a')

67 if len(links) != 2:

68 raise FileSystemError('ViewVC assumption invalid: directory ' +

69 'revision content did not have 2 <a> ' +

70 ' elements, instead %s' % _InnerText(cells[1]))

71 this_parent_version = _InnerText(links[0])

72 int(this_parent_version) # sanity check

73 if parent_version is not None:

74 raise FileSystemError('There was already a parent version %s, and ' +

75 ' we just found a second at %s' %

76 (parent_version, this_parent_version))

77 parent_version = this_parent_version

78

79 # The version of each file is a list of rows with 5 cells: name, version,

80 # age, author, and last log entry. Maybe the columns will change; we're

81 # at the mercy viewvc, but this constant can be easily updated.

82 if len(cells) != 5:

83 continue

84 name_element, version_element, _, __, ___ = cells

85

86 name = _InnerText(name_element) # note: will end in / for directories

87 try:

88 version = int(_InnerText(version_element))

89 except StandardError:

90 continue

91 child_versions[name] = str(version)

92

93 if parent_version and child_versions:

94 break

95

96 return StatInfo(parent_version, child_versions)

97

98

99 class SubversionFileSystem(FileSystem):

100 '''Class to fetch resources from src.chromium.org.

101 '''

102 @staticmethod

103 def Create(branch='trunk', revision=None):

104 if branch == 'trunk':

105 svn_path = 'trunk/src'

106 else:

107 svn_path = 'branches/%s/src' % branch

108 return SubversionFileSystem(

109 AppEngineUrlFetcher('%s/%s' % (url_constants.SVN_URL, svn_path)),

110 AppEngineUrlFetcher('%s/%s' % (url_constants.VIEWVC_URL, svn_path)),

111 svn_path,

112 revision=revision)

113

114 def __init__(self, file_fetcher, stat_fetcher, svn_path, revision=None):

115 self._file_fetcher = file_fetcher

116 self._stat_fetcher = stat_fetcher

117 self._svn_path = svn_path

118 self._revision = revision

119

120 def Read(self, paths, skip_not_found=False):

121 args = None

122 if self._revision is not None:

123 # \|fetcher\| gets from svn.chromium.org which uses p= for version.

124 args = 'p=%s' % self._revision

125

126 def apply_args(path):

127 return path if args is None else '%s?%s' % (path, args)

128

129 def list_dir(directory):

130 dom = xml.parseString(directory)

131 files = [elem.childNodes[0].data

132 for elem in dom.getElementsByTagName('a')]

133 if '..' in files:

134 files.remove('..')

135 return files

136

137 # A list of tuples of the form (path, Future).

138 fetches = [(path, self._file_fetcher.FetchAsync(apply_args(path)))

139 for path in paths]

140

141 def resolve():

142 value = {}

143 for path, future in fetches:

144 try:

145 result = future.Get()

146 except Exception as e:

147 if skip_not_found and IsDownloadError(e): continue

148 exc_type = (FileNotFoundError if IsDownloadError(e)

149 else FileSystemError)

150 raise exc_type('%s fetching %s for Get: %s' %

151 (type(e).__name__, path, traceback.format_exc()))

152 if result.status_code == 404:

153 if skip_not_found: continue

154 raise FileNotFoundError(

155 'Got 404 when fetching %s for Get, content %s' %

156 (path, result.content))

157 if result.status_code != 200:

158 raise FileSystemError('Got %s when fetching %s for Get, content %s' %

159 (result.status_code, path, result.content))

160 if path.endswith('/'):

161 value[path] = list_dir(result.content)

162 else:

163 value[path] = result.content

164 return value

165 return Future(callback=resolve)

166

167 def Refresh(self):

168 return Future(value=())

169

170 def StatAsync(self, path):

171 directory, filename = posixpath.split(path)

172 if self._revision is not None:

173 # \|stat_fetch\| uses viewvc which uses pathrev= for version.

174 directory += '?pathrev=%s' % self._revision

175

176 result_future = self._stat_fetcher.FetchAsync(directory)

177 def resolve():

178 try:

179 result = result_future.Get()

180 except Exception as e:

181 exc_type = FileNotFoundError if IsDownloadError(e) else FileSystemError

182 raise exc_type('%s fetching %s for Stat: %s' %

183 (type(e).__name__, path, traceback.format_exc()))

184

185 if result.status_code == 404:

186 raise FileNotFoundError('Got 404 when fetching %s for Stat, '

187 'content %s' % (path, result.content))

188 if result.status_code != 200:

189 raise FileNotFoundError('Got %s when fetching %s for Stat, content %s' %

190 (result.status_code, path, result.content))

191

192 stat_info = _CreateStatInfo(result.content)

193 if stat_info.version is None:

194 raise FileSystemError('Failed to find version of dir %s' % directory)

195 if path == '' or path.endswith('/'):

196 return stat_info

197 if filename not in stat_info.child_versions:

198 raise FileNotFoundError(

199 '%s from %s was not in child versions for Stat' % (filename, path))

200 return StatInfo(stat_info.child_versions[filename])

201

202 return Future(callback=resolve)

203

204 def GetIdentity(self):

205 # NOTE: no revision here, since it would mess up the caching of reads. It

206 # probably doesn't matter since all the caching classes will use the result

207 # of Stat to decide whether to re-read - and Stat has a ceiling of the

208 # revision - so when the revision changes, so might Stat. That is enough.

209 return '@'.join((self.__class__.__name__, StringIdentity(self._svn_path)))

OLD	NEW