OLD | NEW |
| (Empty) |
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 import posixpath | |
6 import traceback | |
7 import xml.dom.minidom as xml | |
8 from xml.parsers.expat import ExpatError | |
9 | |
10 from appengine_url_fetcher import AppEngineUrlFetcher | |
11 from appengine_wrappers import IsDownloadError | |
12 from docs_server_utils import StringIdentity | |
13 from file_system import ( | |
14 FileNotFoundError, FileSystem, FileSystemError, StatInfo) | |
15 from future import Future | |
16 import url_constants | |
17 | |
18 | |
19 def _ParseHTML(html): | |
20 '''Unfortunately, the viewvc page has a stray </div> tag, so this takes care | |
21 of all mismatched tags. | |
22 ''' | |
23 try: | |
24 return xml.parseString(html) | |
25 except ExpatError as e: | |
26 return _ParseHTML('\n'.join( | |
27 line for (i, line) in enumerate(html.split('\n')) | |
28 if e.lineno != i + 1)) | |
29 | |
30 def _InnerText(node): | |
31 '''Like node.innerText in JS DOM, but strips surrounding whitespace. | |
32 ''' | |
33 text = [] | |
34 if node.nodeValue: | |
35 text.append(node.nodeValue) | |
36 if hasattr(node, 'childNodes'): | |
37 for child_node in node.childNodes: | |
38 text.append(_InnerText(child_node)) | |
39 return ''.join(text).strip() | |
40 | |
41 def _CreateStatInfo(html): | |
42 parent_version = None | |
43 child_versions = {} | |
44 | |
45 # Try all of the tables until we find the ones that contain the data (the | |
46 # directory and file versions are in different tables). | |
47 for table in _ParseHTML(html).getElementsByTagName('table'): | |
48 # Within the table there is a list of files. However, there may be some | |
49 # things beforehand; a header, "parent directory" list, etc. We will deal | |
50 # with that below by being generous and just ignoring such rows. | |
51 rows = table.getElementsByTagName('tr') | |
52 | |
53 for row in rows: | |
54 cells = row.getElementsByTagName('td') | |
55 | |
56 # The version of the directory will eventually appear in the soup of | |
57 # table rows, like this: | |
58 # | |
59 # <tr> | |
60 # <td>Directory revision:</td> | |
61 # <td><a href=... title="Revision 214692">214692</a> (of...)</td> | |
62 # </tr> | |
63 # | |
64 # So look out for that. | |
65 if len(cells) == 2 and _InnerText(cells[0]) == 'Directory revision:': | |
66 links = cells[1].getElementsByTagName('a') | |
67 if len(links) != 2: | |
68 raise FileSystemError('ViewVC assumption invalid: directory ' + | |
69 'revision content did not have 2 <a> ' + | |
70 ' elements, instead %s' % _InnerText(cells[1])) | |
71 this_parent_version = _InnerText(links[0]) | |
72 int(this_parent_version) # sanity check | |
73 if parent_version is not None: | |
74 raise FileSystemError('There was already a parent version %s, and ' + | |
75 ' we just found a second at %s' % | |
76 (parent_version, this_parent_version)) | |
77 parent_version = this_parent_version | |
78 | |
79 # The version of each file is a list of rows with 5 cells: name, version, | |
80 # age, author, and last log entry. Maybe the columns will change; we're | |
81 # at the mercy viewvc, but this constant can be easily updated. | |
82 if len(cells) != 5: | |
83 continue | |
84 name_element, version_element, _, __, ___ = cells | |
85 | |
86 name = _InnerText(name_element) # note: will end in / for directories | |
87 try: | |
88 version = int(_InnerText(version_element)) | |
89 except StandardError: | |
90 continue | |
91 child_versions[name] = str(version) | |
92 | |
93 if parent_version and child_versions: | |
94 break | |
95 | |
96 return StatInfo(parent_version, child_versions) | |
97 | |
98 | |
99 class SubversionFileSystem(FileSystem): | |
100 '''Class to fetch resources from src.chromium.org. | |
101 ''' | |
102 @staticmethod | |
103 def Create(branch='trunk', revision=None): | |
104 if branch == 'trunk': | |
105 svn_path = 'trunk/src' | |
106 else: | |
107 svn_path = 'branches/%s/src' % branch | |
108 return SubversionFileSystem( | |
109 AppEngineUrlFetcher('%s/%s' % (url_constants.SVN_URL, svn_path)), | |
110 AppEngineUrlFetcher('%s/%s' % (url_constants.VIEWVC_URL, svn_path)), | |
111 svn_path, | |
112 revision=revision) | |
113 | |
114 def __init__(self, file_fetcher, stat_fetcher, svn_path, revision=None): | |
115 self._file_fetcher = file_fetcher | |
116 self._stat_fetcher = stat_fetcher | |
117 self._svn_path = svn_path | |
118 self._revision = revision | |
119 | |
120 def Read(self, paths, skip_not_found=False): | |
121 args = None | |
122 if self._revision is not None: | |
123 # |fetcher| gets from svn.chromium.org which uses p= for version. | |
124 args = 'p=%s' % self._revision | |
125 | |
126 def apply_args(path): | |
127 return path if args is None else '%s?%s' % (path, args) | |
128 | |
129 def list_dir(directory): | |
130 dom = xml.parseString(directory) | |
131 files = [elem.childNodes[0].data | |
132 for elem in dom.getElementsByTagName('a')] | |
133 if '..' in files: | |
134 files.remove('..') | |
135 return files | |
136 | |
137 # A list of tuples of the form (path, Future). | |
138 fetches = [(path, self._file_fetcher.FetchAsync(apply_args(path))) | |
139 for path in paths] | |
140 | |
141 def resolve(): | |
142 value = {} | |
143 for path, future in fetches: | |
144 try: | |
145 result = future.Get() | |
146 except Exception as e: | |
147 if skip_not_found and IsDownloadError(e): continue | |
148 exc_type = (FileNotFoundError if IsDownloadError(e) | |
149 else FileSystemError) | |
150 raise exc_type('%s fetching %s for Get: %s' % | |
151 (type(e).__name__, path, traceback.format_exc())) | |
152 if result.status_code == 404: | |
153 if skip_not_found: continue | |
154 raise FileNotFoundError( | |
155 'Got 404 when fetching %s for Get, content %s' % | |
156 (path, result.content)) | |
157 if result.status_code != 200: | |
158 raise FileSystemError('Got %s when fetching %s for Get, content %s' % | |
159 (result.status_code, path, result.content)) | |
160 if path.endswith('/'): | |
161 value[path] = list_dir(result.content) | |
162 else: | |
163 value[path] = result.content | |
164 return value | |
165 return Future(callback=resolve) | |
166 | |
167 def Refresh(self): | |
168 return Future(value=()) | |
169 | |
170 def StatAsync(self, path): | |
171 directory, filename = posixpath.split(path) | |
172 if self._revision is not None: | |
173 # |stat_fetch| uses viewvc which uses pathrev= for version. | |
174 directory += '?pathrev=%s' % self._revision | |
175 | |
176 result_future = self._stat_fetcher.FetchAsync(directory) | |
177 def resolve(): | |
178 try: | |
179 result = result_future.Get() | |
180 except Exception as e: | |
181 exc_type = FileNotFoundError if IsDownloadError(e) else FileSystemError | |
182 raise exc_type('%s fetching %s for Stat: %s' % | |
183 (type(e).__name__, path, traceback.format_exc())) | |
184 | |
185 if result.status_code == 404: | |
186 raise FileNotFoundError('Got 404 when fetching %s for Stat, ' | |
187 'content %s' % (path, result.content)) | |
188 if result.status_code != 200: | |
189 raise FileNotFoundError('Got %s when fetching %s for Stat, content %s' % | |
190 (result.status_code, path, result.content)) | |
191 | |
192 stat_info = _CreateStatInfo(result.content) | |
193 if stat_info.version is None: | |
194 raise FileSystemError('Failed to find version of dir %s' % directory) | |
195 if path == '' or path.endswith('/'): | |
196 return stat_info | |
197 if filename not in stat_info.child_versions: | |
198 raise FileNotFoundError( | |
199 '%s from %s was not in child versions for Stat' % (filename, path)) | |
200 return StatInfo(stat_info.child_versions[filename]) | |
201 | |
202 return Future(callback=resolve) | |
203 | |
204 def GetIdentity(self): | |
205 # NOTE: no revision here, since it would mess up the caching of reads. It | |
206 # probably doesn't matter since all the caching classes will use the result | |
207 # of Stat to decide whether to re-read - and Stat has a ceiling of the | |
208 # revision - so when the revision changes, so might Stat. That is enough. | |
209 return '@'.join((self.__class__.__name__, StringIdentity(self._svn_path))) | |
OLD | NEW |