OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright 2013 The Chromium Authors. All rights reserved. | 2 # Copyright 2013 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 import unittest | 6 import unittest |
7 | 7 |
8 from document_parser import ParseDocument, RemoveTitle | 8 from document_parser import ParseDocument, RemoveTitle |
9 | 9 |
10 | 10 |
(...skipping 18 matching lines...) Expand all Loading... |
29 <h2>Grapefruit</h3> | 29 <h2>Grapefruit</h3> |
30 Grapefruit closed a h2 with a h3. This should be a warning. | 30 Grapefruit closed a h2 with a h3. This should be a warning. |
31 | 31 |
32 <h1 id='not-main'>Not the main header</h1> | 32 <h1 id='not-main'>Not the main header</h1> |
33 But it should still show up in the TOC as though it were an h2. | 33 But it should still show up in the TOC as though it were an h2. |
34 | 34 |
35 <h2>Not <h3>a banana</h2> | 35 <h2>Not <h3>a banana</h2> |
36 The embedded h3 should be ignored. | 36 The embedded h3 should be ignored. |
37 | 37 |
38 <h4>It's a h4</h4> | 38 <h4>It's a h4</h4> |
39 h4 are not considered part of the document structure. | 39 h4 are part of the document structure, but this is not inside a h3. |
40 | 40 |
41 <h3>Plantains</h3> | 41 <h3>Plantains</h3> |
42 Now I'm just getting lazy. | 42 Now I'm just getting lazy. |
| 43 |
| 44 <h4>Another h4</h4> |
| 45 This h4 is inside a h3 so will show up. |
| 46 |
| 47 <h5>Header 5</h5> |
| 48 Header 5s are not parsed. |
43 ''' | 49 ''' |
44 | 50 |
45 | 51 |
46 _WHOLE_DOCUMENT_WITHOUT_TITLE = ''' | 52 _WHOLE_DOCUMENT_WITHOUT_TITLE = ''' |
47 Preamble before heading. | 53 Preamble before heading. |
48 | 54 |
49 | 55 |
50 Some intro to the content. | 56 Some intro to the content. |
51 | 57 |
52 <h2 id='banana' class='header'>Bananas</h2> | 58 <h2 id='banana' class='header'>Bananas</h2> |
(...skipping 11 matching lines...) Expand all Loading... |
64 <h2>Grapefruit</h3> | 70 <h2>Grapefruit</h3> |
65 Grapefruit closed a h2 with a h3. This should be a warning. | 71 Grapefruit closed a h2 with a h3. This should be a warning. |
66 | 72 |
67 <h1 id='not-main'>Not the main header</h1> | 73 <h1 id='not-main'>Not the main header</h1> |
68 But it should still show up in the TOC as though it were an h2. | 74 But it should still show up in the TOC as though it were an h2. |
69 | 75 |
70 <h2>Not <h3>a banana</h2> | 76 <h2>Not <h3>a banana</h2> |
71 The embedded h3 should be ignored. | 77 The embedded h3 should be ignored. |
72 | 78 |
73 <h4>It's a h4</h4> | 79 <h4>It's a h4</h4> |
74 h4 are not considered part of the document structure. | 80 h4 are part of the document structure, but this is not inside a h3. |
75 | 81 |
76 <h3>Plantains</h3> | 82 <h3>Plantains</h3> |
77 Now I'm just getting lazy. | 83 Now I'm just getting lazy. |
| 84 |
| 85 <h4>Another h4</h4> |
| 86 This h4 is inside a h3 so will show up. |
| 87 |
| 88 <h5>Header 5</h5> |
| 89 Header 5s are not parsed. |
78 ''' | 90 ''' |
79 | 91 |
80 | 92 |
81 class DocumentParserUnittest(unittest.TestCase): | 93 class DocumentParserUnittest(unittest.TestCase): |
82 | 94 |
83 def testEmptyDocument(self): | 95 def testEmptyDocument(self): |
84 self.assertEqual(('', 'No opening <h1> was found'), RemoveTitle('')) | 96 self.assertEqual(('', 'No opening <h1> was found'), RemoveTitle('')) |
85 | 97 |
86 result = ParseDocument('') | 98 result = ParseDocument('') |
87 self.assertEqual(None, result.title) | 99 self.assertEqual(None, result.title) |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
137 self.assertEqual((_WHOLE_DOCUMENT_WITHOUT_TITLE, None), | 149 self.assertEqual((_WHOLE_DOCUMENT_WITHOUT_TITLE, None), |
138 RemoveTitle(_WHOLE_DOCUMENT)) | 150 RemoveTitle(_WHOLE_DOCUMENT)) |
139 result = ParseDocument(_WHOLE_DOCUMENT, expect_title=True) | 151 result = ParseDocument(_WHOLE_DOCUMENT, expect_title=True) |
140 self.assertEqual('Main header', result.title) | 152 self.assertEqual('Main header', result.title) |
141 self.assertEqual({'id': 'main', 'class': 'header'}, result.title_attributes) | 153 self.assertEqual({'id': 'main', 'class': 'header'}, result.title_attributes) |
142 self.assertEqual([ | 154 self.assertEqual([ |
143 'Found closing </h3> while processing a <h2> (line 19, column 15)', | 155 'Found closing </h3> while processing a <h2> (line 19, column 15)', |
144 'Found multiple <h1> tags. Subsequent <h1> tags will be classified as ' | 156 'Found multiple <h1> tags. Subsequent <h1> tags will be classified as ' |
145 '<h2> for the purpose of the structure (line 22, column 1)', | 157 '<h2> for the purpose of the structure (line 22, column 1)', |
146 'Found <h3> in the middle of processing a <h2> (line 25, column 9)', | 158 'Found <h3> in the middle of processing a <h2> (line 25, column 9)', |
| 159 # TODO(kalman): Re-enable this warning once the reference pages have |
| 160 # their references fixed. |
| 161 #'Found <h4> without any preceding <h3> (line 28, column 1)', |
147 ], result.warnings) | 162 ], result.warnings) |
148 | 163 |
149 # The non-trivial table of contents assertions... | 164 # The non-trivial table of contents assertions... |
150 self.assertEqual(1, len(result.sections)) | 165 self.assertEqual(1, len(result.sections)) |
151 entries = result.sections[0].structure | 166 entries = result.sections[0].structure |
152 | 167 |
153 self.assertEqual(5, len(entries), entries) | 168 self.assertEqual(5, len(entries), entries) |
154 entry0, entry1, entry2, entry3, entry4 = entries | 169 entry0, entry1, entry2, entry3, entry4 = entries |
155 | 170 |
156 self.assertEqual('Bananas', entry0.name) | 171 self.assertEqual('Bananas', entry0.name) |
(...skipping 15 matching lines...) Expand all Loading... |
172 self.assertEqual('Grapefruit', entry2.name) | 187 self.assertEqual('Grapefruit', entry2.name) |
173 self.assertEqual({}, entry2.attributes) | 188 self.assertEqual({}, entry2.attributes) |
174 self.assertEqual([], entry2.entries) | 189 self.assertEqual([], entry2.entries) |
175 | 190 |
176 self.assertEqual('Not the main header', entry3.name) | 191 self.assertEqual('Not the main header', entry3.name) |
177 self.assertEqual({'id': 'not-main'}, entry3.attributes) | 192 self.assertEqual({'id': 'not-main'}, entry3.attributes) |
178 self.assertEqual([], entry3.entries) | 193 self.assertEqual([], entry3.entries) |
179 | 194 |
180 self.assertEqual('Not a banana', entry4.name) | 195 self.assertEqual('Not a banana', entry4.name) |
181 self.assertEqual({}, entry4.attributes) | 196 self.assertEqual({}, entry4.attributes) |
182 self.assertEqual(1, len(entry4.entries)) | 197 self.assertEqual(2, len(entry4.entries)) |
183 entry4_1, = entry4.entries | 198 entry4_1, entry4_2 = entry4.entries |
184 | 199 |
185 self.assertEqual('Plantains', entry4_1.name) | 200 self.assertEqual('It\'s a h4', entry4_1.name) |
186 self.assertEqual({}, entry4_1.attributes) | 201 self.assertEqual({}, entry4_1.attributes) |
187 self.assertEqual([], entry4_1.entries) | 202 self.assertEqual([], entry4_1.entries) |
188 | 203 |
| 204 self.assertEqual('Plantains', entry4_2.name) |
| 205 self.assertEqual({}, entry4_2.attributes) |
| 206 self.assertEqual(1, len(entry4_2.entries)) |
| 207 entry4_2_1, = entry4_2.entries |
| 208 |
| 209 self.assertEqual('Another h4', entry4_2_1.name) |
| 210 self.assertEqual({}, entry4_2_1.attributes) |
| 211 self.assertEqual([], entry4_2_1.entries) |
| 212 |
189 def testSingleExplicitSection(self): | 213 def testSingleExplicitSection(self): |
190 def test(document): | 214 def test(document): |
191 result = ParseDocument(document, expect_title=True) | 215 result = ParseDocument(document, expect_title=True) |
192 self.assertEqual([], result.warnings) | 216 self.assertEqual([], result.warnings) |
193 self.assertEqual('Header', result.title) | 217 self.assertEqual('Header', result.title) |
194 self.assertEqual(1, len(result.sections)) | 218 self.assertEqual(1, len(result.sections)) |
195 section0, = result.sections | 219 section0, = result.sections |
196 entry0, = section0.structure | 220 entry0, = section0.structure |
197 self.assertEqual('An inner header', entry0.name) | 221 self.assertEqual('An inner header', entry0.name) |
198 # A single section, one with the title inside the section, the other out. | 222 # A single section, one with the title inside the section, the other out. |
(...skipping 29 matching lines...) Expand all Loading... |
228 def assert_single_header(section, name): | 252 def assert_single_header(section, name): |
229 self.assertEqual(1, len(section.structure)) | 253 self.assertEqual(1, len(section.structure)) |
230 self.assertEqual(name, section.structure[0].name) | 254 self.assertEqual(name, section.structure[0].name) |
231 assert_single_header(section0, 'First header') | 255 assert_single_header(section0, 'First header') |
232 assert_single_header(section1, 'Second header') | 256 assert_single_header(section1, 'Second header') |
233 assert_single_header(section2, 'Third header') | 257 assert_single_header(section2, 'Third header') |
234 | 258 |
235 | 259 |
236 if __name__ == '__main__': | 260 if __name__ == '__main__': |
237 unittest.main() | 261 unittest.main() |
OLD | NEW |