OLD | NEW |
1 # Copyright 2013 The Chromium Authors. All rights reserved. | 1 # Copyright 2013 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 from HTMLParser import HTMLParser | 5 from HTMLParser import HTMLParser |
6 import logging | 6 import logging |
7 | 7 |
8 | 8 |
9 class ParseResult(object): | 9 class ParseResult(object): |
10 '''The result of |ParseDocument|: | 10 '''The result of |ParseDocument|: |
(...skipping 30 matching lines...) Expand all Loading... |
41 ''' | 41 ''' |
42 | 42 |
43 def __init__(self, tag, attributes): | 43 def __init__(self, tag, attributes): |
44 self.attributes = attributes | 44 self.attributes = attributes |
45 self.name = '' | 45 self.name = '' |
46 self.entries = [] | 46 self.entries = [] |
47 # Callers shouldn't care about the tag, but we need it for sanity checking, | 47 # Callers shouldn't care about the tag, but we need it for sanity checking, |
48 # so make it private. In particular we pretend that anything but the first | 48 # so make it private. In particular we pretend that anything but the first |
49 # h1 is an h2, and it'd be odd to expose that. | 49 # h1 is an h2, and it'd be odd to expose that. |
50 self._tag = tag | 50 self._tag = tag |
| 51 # Documents can override the name of the entry using title="". |
| 52 self._has_explicit_name = False |
51 | 53 |
52 def __repr__(self): | 54 def __repr__(self): |
53 return '<%s>%s</%s>' % (self._tag, self.name, self._tag) | 55 return '<%s>%s</%s>' % (self._tag, self.name, self._tag) |
54 | 56 |
55 def __str__(self): | 57 def __str__(self): |
56 return repr(self) | 58 return repr(self) |
57 | 59 |
58 | 60 |
59 def ParseDocument(document, expect_title=False): | 61 def ParseDocument(document, expect_title=False): |
60 '''Parses the title and a document structure form |document| and returns a | 62 '''Parses the title and a document structure form |document| and returns a |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
118 return | 120 return |
119 | 121 |
120 if tag != 'h1' and tag not in _HEADER_TAGS: | 122 if tag != 'h1' and tag not in _HEADER_TAGS: |
121 return | 123 return |
122 | 124 |
123 if self._processing_entry is not None: | 125 if self._processing_entry is not None: |
124 self._WarnWithPosition('Found <%s> in the middle of processing a <%s>' % | 126 self._WarnWithPosition('Found <%s> in the middle of processing a <%s>' % |
125 (tag, self._processing_entry._tag)) | 127 (tag, self._processing_entry._tag)) |
126 return | 128 return |
127 | 129 |
128 self._processing_entry = DocumentStructureEntry(tag, dict(attrs)) | 130 attrs_dict = dict(attrs) |
| 131 self._processing_entry = DocumentStructureEntry(tag, attrs_dict) |
| 132 |
| 133 explicit_name = attrs_dict.pop('title', None) |
| 134 if explicit_name == '': |
| 135 # Don't create a TOC entry at all if the tag has specified title="". |
| 136 return |
| 137 if explicit_name is not None: |
| 138 self._processing_entry.name = explicit_name |
| 139 self._processing_entry._has_explicit_name = True |
129 | 140 |
130 if tag == 'h1' and self._title_entry is not None: | 141 if tag == 'h1' and self._title_entry is not None: |
131 self._WarnWithPosition('Found multiple <h1> tags. Subsequent <h1> tags ' | 142 self._WarnWithPosition('Found multiple <h1> tags. Subsequent <h1> tags ' |
132 'will be classified as <h2> for the purpose of ' | 143 'will be classified as <h2> for the purpose of ' |
133 'the structure') | 144 'the structure') |
134 tag = 'h2' | 145 tag = 'h2' |
135 | 146 |
136 if tag == 'h1': | 147 if tag == 'h1': |
137 self._title_entry = self._processing_entry | 148 self._title_entry = self._processing_entry |
138 else: | 149 else: |
(...skipping 23 matching lines...) Expand all Loading... |
162 | 173 |
163 if self._processing_entry._tag != tag: | 174 if self._processing_entry._tag != tag: |
164 self._WarnWithPosition('Found closing </%s> while processing a <%s>' % | 175 self._WarnWithPosition('Found closing </%s> while processing a <%s>' % |
165 (tag, self._processing_entry._tag)) | 176 (tag, self._processing_entry._tag)) |
166 # Note: no early return, it's more likely that the mismatched header was | 177 # Note: no early return, it's more likely that the mismatched header was |
167 # a typo rather than a misplaced closing header tag. | 178 # a typo rather than a misplaced closing header tag. |
168 | 179 |
169 self._processing_entry = None | 180 self._processing_entry = None |
170 | 181 |
171 def handle_data(self, data): | 182 def handle_data(self, data): |
172 if self._processing_entry is not None: | 183 if (self._processing_entry is not None and |
| 184 not self._processing_entry._has_explicit_name): |
173 # += is inefficient, but probably fine here because the chances of a | 185 # += is inefficient, but probably fine here because the chances of a |
174 # large number of nested tags within header tags is pretty low. | 186 # large number of nested tags within header tags is pretty low. |
175 self._processing_entry.name += data | 187 self._processing_entry.name += data |
176 | 188 |
177 def close(self): | 189 def close(self): |
178 HTMLParser.close(self) | 190 HTMLParser.close(self) |
179 | 191 |
180 self._OnSectionBoundary() | 192 self._OnSectionBoundary() |
181 | 193 |
182 if self._processing_entry is not None: | 194 if self._processing_entry is not None: |
(...skipping 18 matching lines...) Expand all Loading... |
201 | 213 |
202 def _OnSectionBoundary(self): | 214 def _OnSectionBoundary(self): |
203 # Only start a new section if the previous section was non-empty. | 215 # Only start a new section if the previous section was non-empty. |
204 if self._processing_section.structure: | 216 if self._processing_section.structure: |
205 self._sections.append(self._processing_section) | 217 self._sections.append(self._processing_section) |
206 self._processing_section = DocumentSection() | 218 self._processing_section = DocumentSection() |
207 | 219 |
208 def _WarnWithPosition(self, message): | 220 def _WarnWithPosition(self, message): |
209 line, col = self.getpos() | 221 line, col = self.getpos() |
210 self._warnings.append('%s (line %s, column %s)' % (message, line, col + 1)) | 222 self._warnings.append('%s (line %s, column %s)' % (message, line, col + 1)) |
OLD | NEW |