Chromium Code Reviews| Index: chrome/common/extensions/docs/server2/subversion_file_system.py |
| diff --git a/chrome/common/extensions/docs/server2/subversion_file_system.py b/chrome/common/extensions/docs/server2/subversion_file_system.py |
| index a27d788ddfb66e7e47402911586c56741d5d2f05..0d3113df76886c3083f17b67319a18e4e5f3163b 100644 |
| --- a/chrome/common/extensions/docs/server2/subversion_file_system.py |
| +++ b/chrome/common/extensions/docs/server2/subversion_file_system.py |
| @@ -4,6 +4,7 @@ |
| import re |
| import xml.dom.minidom as xml |
| +from xml.parsers.expat import ExpatError |
| import file_system |
| from future import Future |
| @@ -11,18 +12,64 @@ from future import Future |
| class SubversionFileSystem(file_system.FileSystem): |
| """Class to fetch resources from src.chromium.org. |
| """ |
| - def __init__(self, fetcher): |
| + def __init__(self, fetcher, stat_fetcher): |
| self._fetcher = fetcher |
| + self._stat_fetcher = stat_fetcher |
| def Read(self, paths, binary=False): |
| return Future(delegate=_AsyncFetchFuture(paths, self._fetcher, binary)) |
| + def _ParseHTML(self, html): |
| + """Unfortunately, the viewvc page has a stray </div> tag, so this takes care |
| + of all mismatched tags. |
| + """ |
| + try: |
| + return xml.parseString(html) |
|
not at google - send to devlin
2012/08/13 05:34:15
Wow, it kinda-works with an XML parser? Amazing.
|
| + except ExpatError as e: |
| + new_html = [] |
| + for lineno, line in enumerate(html.split('\n')): |
| + if e.lineno != lineno + 1: |
| + new_html.append(line) |
| + return self._ParseHTML('\n'.join(new_html)) |
|
not at google - send to devlin
2012/08/13 05:34:15
I love a good list comprehension challenge.
retur
cduvall
2012/08/13 19:45:45
I didn't use one of the HTML parsing libraries bec
not at google - send to devlin
2012/08/13 23:02:14
No list comprehension? :(
cduvall
2012/08/14 18:15:00
Oops forgot to change it! Done :)
|
| + |
| + def _CreateStatInfo(self, html): |
| + dom = self._ParseHTML(html) |
| + # Brace yourself, this is about to get ugly. The page returned from viewvc |
| + # was not the prettiest. |
|
not at google - send to devlin
2012/08/13 05:34:15
yup
|
| + tds = dom.getElementsByTagName('td') |
| + a_list = [] |
| + found = False |
| + dir_revision = None |
| + for td in tds: |
| + if found: |
| + dir_revision = td.getElementsByTagName('a')[0].firstChild.nodeValue |
| + found = False |
| + a_list.extend(td.getElementsByTagName('a')) |
| + if (td.firstChild is not None and |
| + td.firstChild.nodeValue == 'Directory revision:'): |
| + found = True |
| + child_revisions = {} |
| + for i in range(len(a_list)): |
|
not at google - send to devlin
2012/08/13 05:34:15
is there something special about a_list that means
cduvall
2012/08/13 19:45:45
No, no there isn't. That's just me being strange.
|
| + name = a_list[i].getAttribute('name') |
| + if name: |
| + rev = a_list[i + 1].getElementsByTagName('strong')[0] |
|
not at google - send to devlin
2012/08/13 05:34:15
so we know for sure that this won't be past the en
cduvall
2012/08/13 19:45:45
If the HTML doesn't change it won't go past the en
|
| + if 'file' in a_list[i + 1].getAttribute('title'): |
| + child_revisions[name] = rev.firstChild.nodeValue |
| + else: |
| + child_revisions[name + '/'] = rev.firstChild.nodeValue |
| + return self.StatInfo(dir_revision, child_revisions) |
| + |
| def Stat(self, path): |
| directory = path.rsplit('/', 1)[0] |
| - result = self._fetcher.Fetch(directory + '/') |
| + result = self._stat_fetcher.Fetch(directory + '/') |
| if result.status_code == 404: |
| raise file_system.FileNotFoundError(path) |
| - return self.StatInfo(int(re.search('([0-9]+)', result.content).group(0))) |
| + stat_info = self._CreateStatInfo(result.content) |
| + if not path.endswith('/'): |
| + filename = path.rsplit('/', 1)[-1] |
| + if filename in stat_info.child_versions: |
| + stat_info.version = stat_info.child_versions[filename] |
|
not at google - send to devlin
2012/08/13 05:34:15
throw FileNotFoundException if it isn't?
cduvall
2012/08/13 19:45:45
Done.
|
| + return stat_info |
| class _AsyncFetchFuture(object): |
| def __init__(self, paths, fetcher, binary): |