OLD | NEW |
1 # Copyright 2013 The Chromium Authors. All rights reserved. | 1 # Copyright 2013 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 from HTMLParser import HTMLParser | 5 from HTMLParser import HTMLParser |
6 import logging | 6 import logging |
7 | 7 |
8 | 8 |
9 class ParseResult(object): | 9 class ParseResult(object): |
10 '''The result of |ParseDocument|: | 10 '''The result of |ParseDocument|: |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
86 return document, 'No opening <h1> was found' | 86 return document, 'No opening <h1> was found' |
87 title_end = min_index('/h1>', '/H1>') | 87 title_end = min_index('/h1>', '/H1>') |
88 if title_end == -1: | 88 if title_end == -1: |
89 return document, 'No closing </h1> was found' | 89 return document, 'No closing </h1> was found' |
90 if title_end < title_start: | 90 if title_end < title_start: |
91 return document, 'The </h1> appeared before the <h1>' | 91 return document, 'The </h1> appeared before the <h1>' |
92 | 92 |
93 return (document[:title_start] + document[title_end + 4:], None) | 93 return (document[:title_start] + document[title_end + 4:], None) |
94 | 94 |
95 | 95 |
96 _HEADER_TAGS = ['h2', 'h3'] | 96 _HEADER_TAGS = ['h2', 'h3', 'h4'] |
97 | 97 |
98 | 98 |
99 class _DocumentParser(HTMLParser): | 99 class _DocumentParser(HTMLParser): |
100 '''HTMLParser for ParseDocument. | 100 '''HTMLParser for ParseDocument. |
101 ''' | 101 ''' |
102 | 102 |
103 def __init__(self, expect_title): | 103 def __init__(self, expect_title): |
104 HTMLParser.__init__(self) | 104 HTMLParser.__init__(self) |
105 # Public. | 105 # Public. |
106 self.parse_result = None | 106 self.parse_result = None |
(...skipping 25 matching lines...) Expand all Loading... |
132 'will be classified as <h2> for the purpose of ' | 132 'will be classified as <h2> for the purpose of ' |
133 'the structure') | 133 'the structure') |
134 tag = 'h2' | 134 tag = 'h2' |
135 | 135 |
136 if tag == 'h1': | 136 if tag == 'h1': |
137 self._title_entry = self._processing_entry | 137 self._title_entry = self._processing_entry |
138 else: | 138 else: |
139 belongs_to = self._processing_section.structure | 139 belongs_to = self._processing_section.structure |
140 for header in _HEADER_TAGS[:_HEADER_TAGS.index(tag)]: | 140 for header in _HEADER_TAGS[:_HEADER_TAGS.index(tag)]: |
141 if len(belongs_to) == 0: | 141 if len(belongs_to) == 0: |
142 self._WarnWithPosition('Found <%s> without any preceding <%s>' % | 142 # TODO(kalman): Re-enable this warning once the reference pages have |
143 (tag, header)) | 143 # their references fixed. |
| 144 #self._WarnWithPosition('Found <%s> without any preceding <%s>' % |
| 145 # (tag, header)) |
144 break | 146 break |
145 belongs_to = belongs_to[-1].entries | 147 belongs_to = belongs_to[-1].entries |
146 belongs_to.append(self._processing_entry) | 148 belongs_to.append(self._processing_entry) |
147 | 149 |
148 def handle_endtag(self, tag): | 150 def handle_endtag(self, tag): |
149 if tag == 'section': | 151 if tag == 'section': |
150 self._OnSectionBoundary() | 152 self._OnSectionBoundary() |
151 return | 153 return |
152 | 154 |
153 if tag != 'h1' and tag not in _HEADER_TAGS: | 155 if tag != 'h1' and tag not in _HEADER_TAGS: |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
199 | 201 |
200 def _OnSectionBoundary(self): | 202 def _OnSectionBoundary(self): |
201 # Only start a new section if the previous section was non-empty. | 203 # Only start a new section if the previous section was non-empty. |
202 if self._processing_section.structure: | 204 if self._processing_section.structure: |
203 self._sections.append(self._processing_section) | 205 self._sections.append(self._processing_section) |
204 self._processing_section = DocumentSection() | 206 self._processing_section = DocumentSection() |
205 | 207 |
206 def _WarnWithPosition(self, message): | 208 def _WarnWithPosition(self, message): |
207 line, col = self.getpos() | 209 line, col = self.getpos() |
208 self._warnings.append('%s (line %s, column %s)' % (message, line, col + 1)) | 210 self._warnings.append('%s (line %s, column %s)' % (message, line, col + 1)) |
OLD | NEW |