Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 from HTMLParser import HTMLParser | 5 from HTMLParser import HTMLParser |
| 6 import logging | |
| 7 import re | 6 import re |
| 8 | 7 |
| 9 from docs_server_utils import FormatKey | 8 from docs_server_utils import FormatKey |
| 10 from file_system import FileNotFoundError | 9 from file_system import FileNotFoundError |
| 10 import file_system_cache as fs_cache | |
| 11 from third_party.handlebar import Handlebar | 11 from third_party.handlebar import Handlebar |
| 12 | 12 |
| 13 class _IntroParser(HTMLParser): | 13 class _IntroParser(HTMLParser): |
| 14 """ An HTML parser which will parse table of contents and page title info out | 14 """ An HTML parser which will parse table of contents and page title info out |
| 15 of an intro. | 15 of an intro. |
| 16 """ | 16 """ |
| 17 def __init__(self): | 17 def __init__(self): |
| 18 HTMLParser.__init__(self) | 18 HTMLParser.__init__(self) |
| 19 self.toc = [] | 19 self.toc = [] |
| 20 self.page_title = None | 20 self.page_title = None |
| (...skipping 25 matching lines...) Expand all Loading... | |
| 46 return | 46 return |
| 47 if self._recent_tag == 'h1': | 47 if self._recent_tag == 'h1': |
| 48 if self.page_title is None: | 48 if self.page_title is None: |
| 49 self.page_title = data | 49 self.page_title = data |
| 50 else: | 50 else: |
| 51 self.page_title += data | 51 self.page_title += data |
| 52 elif self._recent_tag in ['h2', 'h3']: | 52 elif self._recent_tag in ['h2', 'h3']: |
| 53 self._current_heading['title'] += data | 53 self._current_heading['title'] += data |
| 54 | 54 |
| 55 class IntroDataSource(object): | 55 class IntroDataSource(object): |
| 56 class Factory(object): | |
| 57 def __init__(self, cache_builder, base_paths): | |
| 58 self._cache = cache_builder.build(self._MakeIntroDict, | |
| 59 fs_cache.FS_CACHE_INTRO) | |
| 60 self._base_paths = base_paths | |
| 61 self._intro_regex = re.compile('<h1[^>.]*?>.*?</h1>', flags=re.DOTALL) | |
|
not at google - send to devlin
2012/08/20 05:27:10
nit: doesn't need to be an instance variable?
cduvall
2012/08/20 21:28:09
Done.
| |
| 62 | |
| 63 def _MakeIntroDict(self, intro): | |
| 64 parser = _IntroParser() | |
| 65 parser.feed(intro) | |
| 66 intro = re.sub(self._intro_regex, '', intro, count=1) | |
| 67 return { | |
| 68 'intro': Handlebar(intro), | |
| 69 'toc': parser.toc, | |
| 70 'title': parser.page_title | |
| 71 } | |
| 72 | |
| 73 def Create(self): | |
| 74 return IntroDataSource(self._cache, self._base_paths) | |
| 75 | |
| 56 """This class fetches the intros for a given API. From this intro, a table | 76 """This class fetches the intros for a given API. From this intro, a table |
| 57 of contents dictionary is created, which contains the headings in the intro. | 77 of contents dictionary is created, which contains the headings in the intro. |
| 58 """ | 78 """ |
| 59 def __init__(self, cache_builder, base_paths): | 79 def __init__(self, cache, base_paths): |
| 60 self._cache = cache_builder.build(self._MakeIntroDict) | 80 self._cache = cache |
| 61 self._base_paths = base_paths | 81 self._base_paths = base_paths |
| 62 self._intro_regex = re.compile('<h1[^>.]*?>.*?</h1>', flags=re.DOTALL) | |
| 63 | |
| 64 def _MakeIntroDict(self, intro): | |
| 65 parser = _IntroParser() | |
| 66 parser.feed(intro) | |
| 67 intro = re.sub(self._intro_regex, '', intro, count=1) | |
| 68 return { | |
| 69 'intro': Handlebar(intro), | |
| 70 'toc': parser.toc, | |
| 71 'title': parser.page_title | |
| 72 } | |
| 73 | 82 |
| 74 def __getitem__(self, key): | 83 def __getitem__(self, key): |
| 75 return self.get(key) | 84 return self.get(key) |
| 76 | 85 |
| 77 def get(self, key): | 86 def get(self, key): |
| 78 real_path = FormatKey(key) | 87 real_path = FormatKey(key) |
| 79 error = None | 88 error = None |
| 80 for base_path in self._base_paths: | 89 for base_path in self._base_paths: |
| 81 try: | 90 try: |
| 82 return self._cache.GetFromFile(base_path + '/' + real_path) | 91 return self._cache.GetFromFile(base_path + '/' + real_path) |
| 83 except FileNotFoundError as error: | 92 except FileNotFoundError as error: |
| 84 pass | 93 pass |
| 85 raise ValueError(str(error) + ': No intro found for "%s".' % key) | 94 raise ValueError(str(error) + ': No intro found for "%s".' % key) |
| OLD | NEW |