Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 from HTMLParser import HTMLParser | 5 from HTMLParser import HTMLParser |
| 6 import functools | |
|
not at google - send to devlin
2013/05/03 15:56:25
f < H
jshumway
2013/05/10 02:08:36
Done.
| |
| 6 import logging | 7 import logging |
| 7 import os | 8 import os |
| 8 import re | 9 import re |
| 9 | 10 |
| 10 from docs_server_utils import FormatKey | 11 from docs_server_utils import FormatKey |
| 11 from file_system import FileNotFoundError | 12 from file_system import FileNotFoundError |
| 12 import compiled_file_system as compiled_fs | |
| 13 from third_party.handlebar import Handlebar | 13 from third_party.handlebar import Handlebar |
| 14 | 14 |
| 15 # TODO(kalman): rename this HTMLDataSource or other, then have separate intro | 15 # TODO(kalman): rename this HTMLDataSource or other, then have separate intro |
| 16 # article data sources created as instances of it. | 16 # article data sources created as instances of it. |
| 17 | 17 |
| 18 _H1_REGEX = re.compile('<h1[^>.]*?>.*?</h1>', flags=re.DOTALL) | 18 _H1_REGEX = re.compile('<h1[^>.]*?>.*?</h1>', flags=re.DOTALL) |
| 19 | 19 |
| 20 class _IntroParser(HTMLParser): | 20 class _IntroParser(HTMLParser): |
| 21 """ An HTML parser which will parse table of contents and page title info out | 21 """ An HTML parser which will parse table of contents and page title info out |
| 22 of an intro. | 22 of an intro. |
| (...skipping 29 matching lines...) Expand all Loading... | |
| 52 if self._recent_tag is None: | 52 if self._recent_tag is None: |
| 53 return | 53 return |
| 54 if self._recent_tag == 'h1': | 54 if self._recent_tag == 'h1': |
| 55 if self.page_title is None: | 55 if self.page_title is None: |
| 56 self.page_title = data | 56 self.page_title = data |
| 57 else: | 57 else: |
| 58 self.page_title += data | 58 self.page_title += data |
| 59 elif self._recent_tag in ['h2', 'h3']: | 59 elif self._recent_tag in ['h2', 'h3']: |
| 60 self._current_heading['title'] += data | 60 self._current_heading['title'] += data |
| 61 | 61 |
| 62 def _MakeIntroDict(ref_resolver, intro_path, intro): | |
| 63 # Guess the name of the API from the path to the intro. | |
| 64 api_name = os.path.splitext(intro_path.split('/')[-1])[0] | |
| 65 intro_with_links = ref_resolver.ResolveAllLinks(intro, namespace=api_name) | |
| 66 apps_parser = _IntroParser() | |
| 67 apps_parser.feed(Handlebar(intro_with_links).render({ 'is_apps': True }).text) | |
| 68 extensions_parser = _IntroParser() | |
| 69 extensions_parser.feed(Handlebar(intro_with_links).render( | |
| 70 { 'is_apps': False }).text) | |
| 71 # TODO(cduvall): Use the normal template rendering system, so we can check | |
| 72 # errors. | |
| 73 if extensions_parser.page_title != apps_parser.page_title: | |
| 74 logging.error( | |
| 75 'Title differs for apps and extensions: Apps: %s, Extensions: %s.' % | |
| 76 (extensions_parser.page_title, apps_parser.page_title)) | |
| 77 # The templates will render the heading themselves, so remove it from the | |
| 78 # HTML content. | |
| 79 intro_with_links = re.sub(_H1_REGEX, '', intro_with_links, count=1) | |
| 80 return { | |
| 81 'intro': Handlebar(intro_with_links), | |
| 82 'title': apps_parser.page_title, | |
| 83 'apps_toc': apps_parser.toc, | |
| 84 'extensions_toc': extensions_parser.toc, | |
| 85 } | |
| 86 | |
| 62 class IntroDataSource(object): | 87 class IntroDataSource(object): |
| 63 """This class fetches the intros for a given API. From this intro, a table | 88 """ Allows a template to access the contents of a directory or one of its |
| 64 of contents dictionary is created, which contains the headings in the intro. | 89 subdirectories. |
| 90 | |
| 91 Each IntroDataSource has a dictionary of sub IntroDataSources that are used to | |
| 92 access files in subdirectories. Each child IDS shares a cache and identity_fs | |
| 93 with its parent. If subdirectory paths were added to base_paths instead, | |
| 94 lookup time would have really bad time complexity. | |
| 65 """ | 95 """ |
| 66 class Factory(object): | 96 class Factory(object): |
| 67 def __init__(self, compiled_fs_factory, ref_resolver_factory, base_paths): | 97 def __init__(self, compiled_fs_factory, ref_resolver_factory, base_paths): |
| 68 self._cache = compiled_fs_factory.Create(self._MakeIntroDict, | 98 self._cache = compiled_fs_factory.Create( |
| 69 IntroDataSource) | 99 functools.partial(_MakeIntroDict, ref_resolver_factory.Create()), |
| 70 self._ref_resolver = ref_resolver_factory.Create() | 100 IntroDataSource) |
| 71 self._base_paths = base_paths | 101 self._base_paths = base_paths |
| 72 | 102 self._identity_fs = compiled_fs_factory.CreateIdentity(IntroDataSource) |
|
not at google - send to devlin
2013/05/03 15:56:25
hm ok, I don't think any of the changes to this fi
jshumway
2013/05/10 02:08:36
I reverted my changes to this file and everything
| |
| 73 def _MakeIntroDict(self, intro_path, intro): | |
| 74 # Guess the name of the API from the path to the intro. | |
| 75 api_name = os.path.splitext(intro_path.split('/')[-1])[0] | |
| 76 intro_with_links = self._ref_resolver.ResolveAllLinks(intro, | |
| 77 namespace=api_name) | |
| 78 apps_parser = _IntroParser() | |
| 79 apps_parser.feed(Handlebar(intro_with_links).render( | |
| 80 { 'is_apps': True }).text) | |
| 81 extensions_parser = _IntroParser() | |
| 82 extensions_parser.feed(Handlebar(intro_with_links).render( | |
| 83 { 'is_apps': False }).text) | |
| 84 # TODO(cduvall): Use the normal template rendering system, so we can check | |
| 85 # errors. | |
| 86 if extensions_parser.page_title != apps_parser.page_title: | |
| 87 logging.error( | |
| 88 'Title differs for apps and extensions: Apps: %s, Extensions: %s.' % | |
| 89 (extensions_parser.page_title, apps_parser.page_title)) | |
| 90 # The templates will render the heading themselves, so remove it from the | |
| 91 # HTML content. | |
| 92 intro_with_links = re.sub(_H1_REGEX, '', intro_with_links, count=1) | |
| 93 return { | |
| 94 'intro': Handlebar(intro_with_links), | |
| 95 'title': apps_parser.page_title, | |
| 96 'apps_toc': apps_parser.toc, | |
| 97 'extensions_toc': extensions_parser.toc, | |
| 98 } | |
| 99 | 103 |
| 100 def Create(self): | 104 def Create(self): |
| 101 return IntroDataSource(self._cache, self._base_paths) | 105 return IntroDataSource(self._cache, self._identity_fs, self._base_paths) |
| 102 | 106 |
| 103 def __init__(self, cache, base_paths): | 107 def __init__(self, cache, identity_fs, base_paths): |
| 104 self._cache = cache | 108 self._cache = cache |
| 105 self._base_paths = base_paths | 109 self._base_paths = base_paths |
| 110 self._identity_fs = identity_fs | |
| 111 | |
| 112 # Datasources for subdirectories. | |
| 113 self._subIDS = {} | |
| 106 | 114 |
| 107 def get(self, key): | 115 def get(self, key): |
| 108 path = FormatKey(key) | 116 path = FormatKey(key) |
| 117 | |
| 109 def get_from_base_path(base_path): | 118 def get_from_base_path(base_path): |
| 110 return self._cache.GetFromFile('%s/%s' % (base_path, path)) | 119 return self._cache.GetFromFile(os.path.join(base_path, path)) |
|
not at google - send to devlin
2013/05/03 15:56:25
caches always use '/' as a path separator, the cla
jshumway
2013/05/10 02:08:36
My bad
| |
| 120 | |
| 111 for base_path in self._base_paths: | 121 for base_path in self._base_paths: |
| 112 try: | 122 try: |
| 113 return get_from_base_path(base_path) | 123 return get_from_base_path(base_path) |
| 114 except FileNotFoundError: | 124 except FileNotFoundError: |
| 115 continue | 125 continue |
| 126 | |
| 127 # File was not found, check if key is a subdirectory. | |
| 128 for base_path in self._base_paths: | |
| 129 subpath = os.path.join(base_path, key) | |
| 130 try: | |
| 131 subfiles = self._identity_fs.GetFromFileListing(subpath) | |
| 132 except FileNotFoundError: | |
| 133 continue | |
| 134 | |
| 135 if subfiles: | |
| 136 self._subIDS[subpath] = IntroDataSource( | |
| 137 self._cache, self._identity_fs, [subpath]) | |
| 138 return self._subIDS[subpath] | |
| 139 | |
| 116 # Not found. Do the first operation again so that we get a stack trace - we | 140 # Not found. Do the first operation again so that we get a stack trace - we |
| 117 # know that it'll fail. | 141 # know that it'll fail. |
| 118 get_from_base_path(self._base_paths[0]) | 142 get_from_base_path(self._base_paths[0]) |
| 119 raise AssertionError() | 143 raise AssertionError |
| OLD | NEW |