| OLD | NEW |
| 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import logging | |
| 6 | |
| 7 from HTMLParser import HTMLParser | 5 from HTMLParser import HTMLParser |
| 6 import re |
| 8 | 7 |
| 9 from docs_server_utils import FormatKey | 8 from docs_server_utils import FormatKey |
| 10 from third_party.handlebar import Handlebar | 9 from third_party.handlebar import Handlebar |
| 11 | 10 |
| 12 class _IntroParser(HTMLParser): | 11 class _IntroParser(HTMLParser): |
| 13 """ An HTML parser which will parse table of contents and page title info out | 12 """ An HTML parser which will parse table of contents and page title info out |
| 14 of an intro. | 13 of an intro. |
| 15 """ | 14 """ |
| 16 def __init__(self): | 15 def __init__(self): |
| 17 HTMLParser.__init__(self) | 16 HTMLParser.__init__(self) |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 51 elif self._recent_tag in ['h2', 'h3']: | 50 elif self._recent_tag in ['h2', 'h3']: |
| 52 self._current_heading['title'] += data | 51 self._current_heading['title'] += data |
| 53 | 52 |
| 54 class IntroDataSource(object): | 53 class IntroDataSource(object): |
| 55 """This class fetches the intros for a given API. From this intro, a table | 54 """This class fetches the intros for a given API. From this intro, a table |
| 56 of contents dictionary is created, which contains the headings in the intro. | 55 of contents dictionary is created, which contains the headings in the intro. |
| 57 """ | 56 """ |
| 58 def __init__(self, cache_builder, base_paths): | 57 def __init__(self, cache_builder, base_paths): |
| 59 self._cache = cache_builder.build(self._MakeIntroDict) | 58 self._cache = cache_builder.build(self._MakeIntroDict) |
| 60 self._base_paths = base_paths | 59 self._base_paths = base_paths |
| 60 self._intro_regex = re.compile('<h1[^>.]*?>.*?</h1>', flags=re.DOTALL) |
| 61 | 61 |
| 62 def _MakeIntroDict(self, intro): | 62 def _MakeIntroDict(self, intro): |
| 63 parser = _IntroParser() | 63 parser = _IntroParser() |
| 64 parser.feed(intro) | 64 parser.feed(intro) |
| 65 intro = re.sub(self._intro_regex, '', intro, count=1) |
| 65 return { | 66 return { |
| 66 'intro': Handlebar(intro), | 67 'intro': Handlebar(intro), |
| 67 'toc': parser.toc, | 68 'toc': parser.toc, |
| 68 'title': parser.page_title | 69 'title': parser.page_title |
| 69 } | 70 } |
| 70 | 71 |
| 71 def __getitem__(self, key): | 72 def __getitem__(self, key): |
| 72 return self.get(key) | 73 return self.get(key) |
| 73 | 74 |
| 74 def get(self, key): | 75 def get(self, key): |
| 75 real_path = FormatKey(key) | 76 real_path = FormatKey(key) |
| 76 for base_path in self._base_paths: | 77 for base_path in self._base_paths: |
| 77 try: | 78 try: |
| 78 return self._cache.GetFromFile(base_path + '/' + real_path) | 79 return self._cache.GetFromFile(base_path + '/' + real_path) |
| 79 except Exception: | 80 except Exception: |
| 80 pass | 81 pass |
| 81 return None | 82 return None |
| OLD | NEW |