| OLD | NEW |
| 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 from HTMLParser import HTMLParser | 5 from HTMLParser import HTMLParser |
| 6 import logging | 6 import logging |
| 7 import os | 7 import os |
| 8 import re | 8 import re |
| 9 | 9 |
| 10 from docs_server_utils import FormatKey | 10 from docs_server_utils import FormatKey |
| 11 from file_system import FileNotFoundError | 11 from file_system import FileNotFoundError |
| 12 import compiled_file_system as compiled_fs | 12 import compiled_file_system as compiled_fs |
| 13 from third_party.handlebar import Handlebar | 13 from third_party.handlebar import Handlebar |
| 14 | 14 |
| 15 # TODO(kalman): rename this HTMLDataSource or other, then have separate intro | 15 # TODO(kalman): rename this HTMLDataSource or other, then have separate intro |
| 16 # article data sources created as instances of it. | 16 # article data sources created as instances of it. |
| 17 | 17 |
| 18 _H1_REGEX = re.compile('<h1[^>.]*?>.*?</h1>', flags=re.DOTALL) | 18 _H1_REGEX = re.compile('<h1[^>.]*?>.*?</h1>', flags=re.DOTALL) |
| 19 | 19 |
| 20 class _IntroParser(HTMLParser): | 20 class _IntroParser(HTMLParser): |
| 21 """ An HTML parser which will parse table of contents and page title info out | 21 ''' An HTML parser which will parse table of contents and page title info out |
| 22 of an intro. | 22 of an intro. |
| 23 """ | 23 ''' |
| 24 def __init__(self): | 24 def __init__(self): |
| 25 HTMLParser.__init__(self) | 25 HTMLParser.__init__(self) |
| 26 self.toc = [] | 26 self.toc = [] |
| 27 self.page_title = None | 27 self.page_title = None |
| 28 self._recent_tag = None | 28 self._recent_tag = None |
| 29 self._current_heading = {} | 29 self._current_heading = {} |
| 30 | 30 |
| 31 def handle_starttag(self, tag, attrs): | 31 def handle_starttag(self, tag, attrs): |
| 32 id_ = '' | 32 id_ = '' |
| 33 if tag not in ['h1', 'h2', 'h3']: | 33 if tag not in ['h1', 'h2', 'h3']: |
| (...skipping 19 matching lines...) Expand all Loading... |
| 53 return | 53 return |
| 54 if self._recent_tag == 'h1': | 54 if self._recent_tag == 'h1': |
| 55 if self.page_title is None: | 55 if self.page_title is None: |
| 56 self.page_title = data | 56 self.page_title = data |
| 57 else: | 57 else: |
| 58 self.page_title += data | 58 self.page_title += data |
| 59 elif self._recent_tag in ['h2', 'h3']: | 59 elif self._recent_tag in ['h2', 'h3']: |
| 60 self._current_heading['title'] += data | 60 self._current_heading['title'] += data |
| 61 | 61 |
| 62 class IntroDataSource(object): | 62 class IntroDataSource(object): |
| 63 """This class fetches the intros for a given API. From this intro, a table | 63 '''This class fetches the intros for a given API. From this intro, a table |
| 64 of contents dictionary is created, which contains the headings in the intro. | 64 of contents dictionary is created, which contains the headings in the intro. |
| 65 """ | 65 ''' |
| 66 class Factory(object): | 66 class Factory(object): |
| 67 def __init__(self, compiled_fs_factory, ref_resolver_factory, base_paths): | 67 def __init__(self, compiled_fs_factory, ref_resolver_factory, base_paths): |
| 68 self._cache = compiled_fs_factory.Create(self._MakeIntroDict, | 68 self._cache = compiled_fs_factory.Create(self._MakeIntroDict, |
| 69 IntroDataSource) | 69 IntroDataSource) |
| 70 self._ref_resolver = ref_resolver_factory.Create() | 70 self._ref_resolver = ref_resolver_factory.Create() |
| 71 self._base_paths = base_paths | 71 self._base_paths = base_paths |
| 72 | 72 |
| 73 def _MakeIntroDict(self, intro_path, intro): | 73 def _MakeIntroDict(self, intro_path, intro): |
| 74 # Guess the name of the API from the path to the intro. | 74 # Guess the name of the API from the path to the intro. |
| 75 api_name = os.path.splitext(intro_path.split('/')[-1])[0] | 75 api_name = os.path.splitext(intro_path.split('/')[-1])[0] |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 110 return self._cache.GetFromFile('%s/%s' % (base_path, path)) | 110 return self._cache.GetFromFile('%s/%s' % (base_path, path)) |
| 111 for base_path in self._base_paths: | 111 for base_path in self._base_paths: |
| 112 try: | 112 try: |
| 113 return get_from_base_path(base_path) | 113 return get_from_base_path(base_path) |
| 114 except FileNotFoundError: | 114 except FileNotFoundError: |
| 115 continue | 115 continue |
| 116 # Not found. Do the first operation again so that we get a stack trace - we | 116 # Not found. Do the first operation again so that we get a stack trace - we |
| 117 # know that it'll fail. | 117 # know that it'll fail. |
| 118 get_from_base_path(self._base_paths[0]) | 118 get_from_base_path(self._base_paths[0]) |
| 119 raise AssertionError() | 119 raise AssertionError() |
| OLD | NEW |