Chromium Code Reviews| Index: chrome/common/extensions/docs/server2/subversion_file_system.py |
| diff --git a/chrome/common/extensions/docs/server2/subversion_file_system.py b/chrome/common/extensions/docs/server2/subversion_file_system.py |
| index b5d26429776543cff0e0a62ed5ec4791ec4d89dc..019c8e87abdfed3d8b96cdc2b36e7c2903b949b1 100644 |
| --- a/chrome/common/extensions/docs/server2/subversion_file_system.py |
| +++ b/chrome/common/extensions/docs/server2/subversion_file_system.py |
| @@ -4,6 +4,7 @@ |
| import re |
| import xml.dom.minidom as xml |
| +from xml.parsers.expat import ExpatError |
| import file_system |
| from future import Future |
| @@ -11,16 +12,57 @@ from future import Future |
| class SubversionFileSystem(file_system.FileSystem): |
| """Class to fetch resources from src.chromium.org. |
| """ |
| - def __init__(self, fetcher): |
| + def __init__(self, fetcher, stat_fetcher): |
| self._fetcher = fetcher |
| + self._stat_fetcher = stat_fetcher |
| def Read(self, paths, binary=False): |
| return Future(delegate=_AsyncFetchFuture(paths, self._fetcher, binary)) |
| + def _ParseHTML(self, html): |
| + """Unfortunately, the viewvc page has a stray </div> tag, so this takes care |
| + of all mismatched tags. |
| + """ |
| + try: |
| + return xml.parseString(html) |
| + except ExpatError as e: |
| + new_html = [] |
| + for lineno, line in enumerate(html.split('\n')): |
| + if e.lineno != lineno + 1: |
| + new_html.append(line) |
| + return self._ParseHTML('\n'.join(new_html)) |
| + |
| + def _CreateStatInfo(self, html): |
| + dom = self._ParseHTML(html) |
| + # Brace yourself, this is about to get ugly. The page returned from viewvc |
| + # was not the prettiest. |
|
not at google - send to devlin
2012/08/10 06:42:24
heh. Let's hope it doesn't change; but if it does,
|
| + tds = dom.getElementsByTagName('td') |
| + a_list = [] |
| + found = False |
| + dir_revision = None |
| + for td in tds: |
| + if found: |
| + dir_revision = td.getElementsByTagName('a')[0].firstChild.nodeValue |
| + found = False |
| + a_list.extend(td.getElementsByTagName('a')) |
| + if (td.firstChild is not None and |
| + td.firstChild.nodeValue == 'Directory revision:'): |
| + found = True |
| + child_revisions = {} |
| + for i in range(len(a_list)): |
| + name = a_list[i].getAttribute('name') |
| + if name: |
| + rev = a_list[i + 1].getElementsByTagName('strong')[0] |
| + if 'file' in a_list[i + 1].getAttribute('title'): |
| + child_revisions[name] = rev.firstChild.nodeValue |
| + else: |
| + child_revisions[name + '/'] = rev.firstChild.nodeValue |
| + return self.StatInfo(dir_revision, child_revisions) |
| + |
| def Stat(self, path): |
| directory = path.rsplit('/', 1)[0] |
| - dir_html = self._fetcher.Fetch(directory + '/').content |
| - return self.StatInfo(int(re.search('([0-9]+)', dir_html).group(0))) |
| + dir_html = self._stat_fetcher.Fetch(directory + '/').content |
| + return self._CreateStatInfo(dir_html) |
| class _AsyncFetchFuture(object): |
| def __init__(self, paths, fetcher, binary): |