OLD | NEW |
---|---|
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 from HTMLParser import HTMLParser | 5 from HTMLParser import HTMLParser |
6 import functools | |
not at google - send to devlin
2013/05/03 15:56:25
f < H
jshumway
2013/05/10 02:08:36
Done.
| |
6 import logging | 7 import logging |
7 import os | 8 import os |
8 import re | 9 import re |
9 | 10 |
10 from docs_server_utils import FormatKey | 11 from docs_server_utils import FormatKey |
11 from file_system import FileNotFoundError | 12 from file_system import FileNotFoundError |
12 import compiled_file_system as compiled_fs | |
13 from third_party.handlebar import Handlebar | 13 from third_party.handlebar import Handlebar |
14 | 14 |
15 # TODO(kalman): rename this HTMLDataSource or other, then have separate intro | 15 # TODO(kalman): rename this HTMLDataSource or other, then have separate intro |
16 # article data sources created as instances of it. | 16 # article data sources created as instances of it. |
17 | 17 |
18 _H1_REGEX = re.compile('<h1[^>.]*?>.*?</h1>', flags=re.DOTALL) | 18 _H1_REGEX = re.compile('<h1[^>.]*?>.*?</h1>', flags=re.DOTALL) |
19 | 19 |
20 class _IntroParser(HTMLParser): | 20 class _IntroParser(HTMLParser): |
21 """ An HTML parser which will parse table of contents and page title info out | 21 """ An HTML parser which will parse table of contents and page title info out |
22 of an intro. | 22 of an intro. |
(...skipping 29 matching lines...) Expand all Loading... | |
52 if self._recent_tag is None: | 52 if self._recent_tag is None: |
53 return | 53 return |
54 if self._recent_tag == 'h1': | 54 if self._recent_tag == 'h1': |
55 if self.page_title is None: | 55 if self.page_title is None: |
56 self.page_title = data | 56 self.page_title = data |
57 else: | 57 else: |
58 self.page_title += data | 58 self.page_title += data |
59 elif self._recent_tag in ['h2', 'h3']: | 59 elif self._recent_tag in ['h2', 'h3']: |
60 self._current_heading['title'] += data | 60 self._current_heading['title'] += data |
61 | 61 |
62 def _MakeIntroDict(ref_resolver, intro_path, intro): | |
63 # Guess the name of the API from the path to the intro. | |
64 api_name = os.path.splitext(intro_path.split('/')[-1])[0] | |
65 intro_with_links = ref_resolver.ResolveAllLinks(intro, namespace=api_name) | |
66 apps_parser = _IntroParser() | |
67 apps_parser.feed(Handlebar(intro_with_links).render({ 'is_apps': True }).text) | |
68 extensions_parser = _IntroParser() | |
69 extensions_parser.feed(Handlebar(intro_with_links).render( | |
70 { 'is_apps': False }).text) | |
71 # TODO(cduvall): Use the normal template rendering system, so we can check | |
72 # errors. | |
73 if extensions_parser.page_title != apps_parser.page_title: | |
74 logging.error( | |
75 'Title differs for apps and extensions: Apps: %s, Extensions: %s.' % | |
76 (extensions_parser.page_title, apps_parser.page_title)) | |
77 # The templates will render the heading themselves, so remove it from the | |
78 # HTML content. | |
79 intro_with_links = re.sub(_H1_REGEX, '', intro_with_links, count=1) | |
80 return { | |
81 'intro': Handlebar(intro_with_links), | |
82 'title': apps_parser.page_title, | |
83 'apps_toc': apps_parser.toc, | |
84 'extensions_toc': extensions_parser.toc, | |
85 } | |
86 | |
62 class IntroDataSource(object): | 87 class IntroDataSource(object): |
63 """This class fetches the intros for a given API. From this intro, a table | 88 """ Allows a template to access the contents of a directory or one of its |
64 of contents dictionary is created, which contains the headings in the intro. | 89 subdirectories. |
90 | |
91 Each IntroDataSource has a dictionary of sub IntroDataSources that are used to | |
92 access files in subdirectories. Each child IDS shares a cache and identity_fs | |
93 with its parent. If subdirectory paths were added to base_paths instead, | |
94 lookup time would have really bad time complexity. | |
65 """ | 95 """ |
66 class Factory(object): | 96 class Factory(object): |
67 def __init__(self, compiled_fs_factory, ref_resolver_factory, base_paths): | 97 def __init__(self, compiled_fs_factory, ref_resolver_factory, base_paths): |
68 self._cache = compiled_fs_factory.Create(self._MakeIntroDict, | 98 self._cache = compiled_fs_factory.Create( |
69 IntroDataSource) | 99 functools.partial(_MakeIntroDict, ref_resolver_factory.Create()), |
70 self._ref_resolver = ref_resolver_factory.Create() | 100 IntroDataSource) |
71 self._base_paths = base_paths | 101 self._base_paths = base_paths |
72 | 102 self._identity_fs = compiled_fs_factory.CreateIdentity(IntroDataSource) |
not at google - send to devlin
2013/05/03 15:56:25
hm ok, I don't think any of the changes to this fi
jshumway
2013/05/10 02:08:36
I reverted my changes to this file and everything
| |
73 def _MakeIntroDict(self, intro_path, intro): | |
74 # Guess the name of the API from the path to the intro. | |
75 api_name = os.path.splitext(intro_path.split('/')[-1])[0] | |
76 intro_with_links = self._ref_resolver.ResolveAllLinks(intro, | |
77 namespace=api_name) | |
78 apps_parser = _IntroParser() | |
79 apps_parser.feed(Handlebar(intro_with_links).render( | |
80 { 'is_apps': True }).text) | |
81 extensions_parser = _IntroParser() | |
82 extensions_parser.feed(Handlebar(intro_with_links).render( | |
83 { 'is_apps': False }).text) | |
84 # TODO(cduvall): Use the normal template rendering system, so we can check | |
85 # errors. | |
86 if extensions_parser.page_title != apps_parser.page_title: | |
87 logging.error( | |
88 'Title differs for apps and extensions: Apps: %s, Extensions: %s.' % | |
89 (extensions_parser.page_title, apps_parser.page_title)) | |
90 # The templates will render the heading themselves, so remove it from the | |
91 # HTML content. | |
92 intro_with_links = re.sub(_H1_REGEX, '', intro_with_links, count=1) | |
93 return { | |
94 'intro': Handlebar(intro_with_links), | |
95 'title': apps_parser.page_title, | |
96 'apps_toc': apps_parser.toc, | |
97 'extensions_toc': extensions_parser.toc, | |
98 } | |
99 | 103 |
100 def Create(self): | 104 def Create(self): |
101 return IntroDataSource(self._cache, self._base_paths) | 105 return IntroDataSource(self._cache, self._identity_fs, self._base_paths) |
102 | 106 |
103 def __init__(self, cache, base_paths): | 107 def __init__(self, cache, identity_fs, base_paths): |
104 self._cache = cache | 108 self._cache = cache |
105 self._base_paths = base_paths | 109 self._base_paths = base_paths |
110 self._identity_fs = identity_fs | |
111 | |
112 # Datasources for subdirectories. | |
113 self._subIDS = {} | |
106 | 114 |
107 def get(self, key): | 115 def get(self, key): |
108 path = FormatKey(key) | 116 path = FormatKey(key) |
117 | |
109 def get_from_base_path(base_path): | 118 def get_from_base_path(base_path): |
110 return self._cache.GetFromFile('%s/%s' % (base_path, path)) | 119 return self._cache.GetFromFile(os.path.join(base_path, path)) |
not at google - send to devlin
2013/05/03 15:56:25
caches always use '/' as a path separator, the cla
jshumway
2013/05/10 02:08:36
My bad
| |
120 | |
111 for base_path in self._base_paths: | 121 for base_path in self._base_paths: |
112 try: | 122 try: |
113 return get_from_base_path(base_path) | 123 return get_from_base_path(base_path) |
114 except FileNotFoundError: | 124 except FileNotFoundError: |
115 continue | 125 continue |
126 | |
127 # File was not found, check if key is a subdirectory. | |
128 for base_path in self._base_paths: | |
129 subpath = os.path.join(base_path, key) | |
130 try: | |
131 subfiles = self._identity_fs.GetFromFileListing(subpath) | |
132 except FileNotFoundError: | |
133 continue | |
134 | |
135 if subfiles: | |
136 self._subIDS[subpath] = IntroDataSource( | |
137 self._cache, self._identity_fs, [subpath]) | |
138 return self._subIDS[subpath] | |
139 | |
116 # Not found. Do the first operation again so that we get a stack trace - we | 140 # Not found. Do the first operation again so that we get a stack trace - we |
117 # know that it'll fail. | 141 # know that it'll fail. |
118 get_from_base_path(self._base_paths[0]) | 142 get_from_base_path(self._base_paths[0]) |
119 raise AssertionError() | 143 raise AssertionError |
OLD | NEW |