OLD | NEW |
---|---|
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import re | 5 import re |
6 import xml.dom.minidom as xml | 6 import xml.dom.minidom as xml |
7 from xml.parsers.expat import ExpatError | |
7 | 8 |
8 import file_system | 9 import file_system |
9 from future import Future | 10 from future import Future |
10 | 11 |
11 class SubversionFileSystem(file_system.FileSystem): | 12 class SubversionFileSystem(file_system.FileSystem): |
12 """Class to fetch resources from src.chromium.org. | 13 """Class to fetch resources from src.chromium.org. |
13 """ | 14 """ |
14 def __init__(self, fetcher): | 15 def __init__(self, fetcher, stat_fetcher): |
15 self._fetcher = fetcher | 16 self._fetcher = fetcher |
17 self._stat_fetcher = stat_fetcher | |
16 | 18 |
17 def Read(self, paths, binary=False): | 19 def Read(self, paths, binary=False): |
18 return Future(delegate=_AsyncFetchFuture(paths, self._fetcher, binary)) | 20 return Future(delegate=_AsyncFetchFuture(paths, self._fetcher, binary)) |
19 | 21 |
22 def _ParseHTML(self, html): | |
23 """Unfortunately, the viewvc page has a stray </div> tag, so this takes care | |
24 of all mismatched tags. | |
25 """ | |
26 try: | |
27 return xml.parseString(html) | |
28 except ExpatError as e: | |
29 new_html = [] | |
30 for lineno, line in enumerate(html.split('\n')): | |
31 if e.lineno != lineno + 1: | |
32 new_html.append(line) | |
33 return self._ParseHTML('\n'.join(new_html)) | |
34 | |
35 def _CreateStatInfo(self, html): | |
36 dom = self._ParseHTML(html) | |
37 # Brace yourself, this is about to get ugly. The page returned from viewvc | |
38 # was not the prettiest. | |
39 tds = dom.getElementsByTagName('td') | |
40 a_list = [] | |
41 found = False | |
42 dir_revision = None | |
43 for td in tds: | |
44 if found: | |
45 dir_revision = td.getElementsByTagName('a')[0].firstChild.nodeValue | |
46 found = False | |
47 a_list.extend(td.getElementsByTagName('a')) | |
48 if (td.firstChild is not None and | |
49 td.firstChild.nodeValue == 'Directory revision:'): | |
50 found = True | |
51 child_revisions = {} | |
52 for i, a in enumerate(a_list): | |
53 try: | |
54 next_a = a_list[i + 1] | |
55 except IndexError: | |
56 break | |
not at google - send to devlin
2012/08/13 23:02:14
Exceptions aren't a good way of signaling control
not at google - send to devlin
2012/08/14 04:00:08
and by that I mean >= not <
cduvall
2012/08/14 18:15:00
Done.
| |
57 name = a.getAttribute('name') | |
58 if name: | |
59 rev = next_a.getElementsByTagName('strong')[0] | |
60 if 'file' in next_a.getAttribute('title'): | |
61 child_revisions[name] = rev.firstChild.nodeValue | |
62 else: | |
63 child_revisions[name + '/'] = rev.firstChild.nodeValue | |
64 return self.StatInfo(dir_revision, child_revisions) | |
65 | |
20 def Stat(self, path): | 66 def Stat(self, path): |
21 directory = path.rsplit('/', 1)[0] | 67 directory = path.rsplit('/', 1)[0] |
22 result = self._fetcher.Fetch(directory + '/') | 68 result = self._stat_fetcher.Fetch(directory + '/') |
23 if result.status_code == 404: | 69 if result.status_code == 404: |
24 raise file_system.FileNotFoundError(path) | 70 raise file_system.FileNotFoundError(path) |
25 return self.StatInfo(int(re.search('([0-9]+)', result.content).group(0))) | 71 stat_info = self._CreateStatInfo(result.content) |
72 if not path.endswith('/'): | |
73 filename = path.rsplit('/', 1)[-1] | |
74 if filename not in stat_info.child_versions: | |
75 raise file_system.FileNotFoundError(path) | |
76 stat_info.version = stat_info.child_versions[filename] | |
77 return stat_info | |
26 | 78 |
27 class _AsyncFetchFuture(object): | 79 class _AsyncFetchFuture(object): |
28 def __init__(self, paths, fetcher, binary): | 80 def __init__(self, paths, fetcher, binary): |
29 # A list of tuples of the form (path, Future). | 81 # A list of tuples of the form (path, Future). |
30 self._fetches = [] | 82 self._fetches = [] |
31 self._value = {} | 83 self._value = {} |
32 self._error = None | 84 self._error = None |
33 self._fetches = [(path, fetcher.FetchAsync(path)) for path in paths] | 85 self._fetches = [(path, fetcher.FetchAsync(path)) for path in paths] |
34 self._binary = binary | 86 self._binary = binary |
35 | 87 |
(...skipping 12 matching lines...) Expand all Loading... | |
48 elif path.endswith('/'): | 100 elif path.endswith('/'): |
49 self._value[path] = self._ListDir(result.content) | 101 self._value[path] = self._ListDir(result.content) |
50 elif not self._binary: | 102 elif not self._binary: |
51 self._value[path] = file_system._ProcessFileData(result.content, path) | 103 self._value[path] = file_system._ProcessFileData(result.content, path) |
52 else: | 104 else: |
53 self._value[path] = result.content | 105 self._value[path] = result.content |
54 if self._error is not None: | 106 if self._error is not None: |
55 raise self._error | 107 raise self._error |
56 return self._value | 108 return self._value |
57 | 109 |
OLD | NEW |