| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright 2013 The Chromium Authors. All rights reserved. | 2 # Copyright 2013 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 import unittest | 6 import unittest |
| 7 | 7 |
| 8 from document_parser import ParseDocument, RemoveTitle | 8 from document_parser import ParseDocument, RemoveTitle |
| 9 | 9 |
| 10 | 10 |
| (...skipping 18 matching lines...) Expand all Loading... |
| 29 <h2>Grapefruit</h3> | 29 <h2>Grapefruit</h3> |
| 30 Grapefruit closed a h2 with a h3. This should be a warning. | 30 Grapefruit closed a h2 with a h3. This should be a warning. |
| 31 | 31 |
| 32 <h1 id='not-main'>Not the main header</h1> | 32 <h1 id='not-main'>Not the main header</h1> |
| 33 But it should still show up in the TOC as though it were an h2. | 33 But it should still show up in the TOC as though it were an h2. |
| 34 | 34 |
| 35 <h2>Not <h3>a banana</h2> | 35 <h2>Not <h3>a banana</h2> |
| 36 The embedded h3 should be ignored. | 36 The embedded h3 should be ignored. |
| 37 | 37 |
| 38 <h4>It's a h4</h4> | 38 <h4>It's a h4</h4> |
| 39 h4 are not considered part of the document structure. | 39 h4 are part of the document structure, but this is not inside a h3. |
| 40 | 40 |
| 41 <h3>Plantains</h3> | 41 <h3>Plantains</h3> |
| 42 Now I'm just getting lazy. | 42 Now I'm just getting lazy. |
| 43 |
| 44 <h4>Another h4</h4> |
| 45 This h4 is inside a h3 so will show up. |
| 46 |
| 47 <h5>Header 5</h5> |
| 48 Header 5s are not parsed. |
| 43 ''' | 49 ''' |
| 44 | 50 |
| 45 | 51 |
| 46 _WHOLE_DOCUMENT_WITHOUT_TITLE = ''' | 52 _WHOLE_DOCUMENT_WITHOUT_TITLE = ''' |
| 47 Preamble before heading. | 53 Preamble before heading. |
| 48 | 54 |
| 49 | 55 |
| 50 Some intro to the content. | 56 Some intro to the content. |
| 51 | 57 |
| 52 <h2 id='banana' class='header'>Bananas</h2> | 58 <h2 id='banana' class='header'>Bananas</h2> |
| (...skipping 11 matching lines...) Expand all Loading... |
| 64 <h2>Grapefruit</h3> | 70 <h2>Grapefruit</h3> |
| 65 Grapefruit closed a h2 with a h3. This should be a warning. | 71 Grapefruit closed a h2 with a h3. This should be a warning. |
| 66 | 72 |
| 67 <h1 id='not-main'>Not the main header</h1> | 73 <h1 id='not-main'>Not the main header</h1> |
| 68 But it should still show up in the TOC as though it were an h2. | 74 But it should still show up in the TOC as though it were an h2. |
| 69 | 75 |
| 70 <h2>Not <h3>a banana</h2> | 76 <h2>Not <h3>a banana</h2> |
| 71 The embedded h3 should be ignored. | 77 The embedded h3 should be ignored. |
| 72 | 78 |
| 73 <h4>It's a h4</h4> | 79 <h4>It's a h4</h4> |
| 74 h4 are not considered part of the document structure. | 80 h4 are part of the document structure, but this is not inside a h3. |
| 75 | 81 |
| 76 <h3>Plantains</h3> | 82 <h3>Plantains</h3> |
| 77 Now I'm just getting lazy. | 83 Now I'm just getting lazy. |
| 84 |
| 85 <h4>Another h4</h4> |
| 86 This h4 is inside a h3 so will show up. |
| 87 |
| 88 <h5>Header 5</h5> |
| 89 Header 5s are not parsed. |
| 78 ''' | 90 ''' |
| 79 | 91 |
| 80 | 92 |
| 81 class DocumentParserUnittest(unittest.TestCase): | 93 class DocumentParserUnittest(unittest.TestCase): |
| 82 | 94 |
| 83 def testEmptyDocument(self): | 95 def testEmptyDocument(self): |
| 84 self.assertEqual(('', 'No opening <h1> was found'), RemoveTitle('')) | 96 self.assertEqual(('', 'No opening <h1> was found'), RemoveTitle('')) |
| 85 | 97 |
| 86 result = ParseDocument('') | 98 result = ParseDocument('') |
| 87 self.assertEqual(None, result.title) | 99 self.assertEqual(None, result.title) |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 137 self.assertEqual((_WHOLE_DOCUMENT_WITHOUT_TITLE, None), | 149 self.assertEqual((_WHOLE_DOCUMENT_WITHOUT_TITLE, None), |
| 138 RemoveTitle(_WHOLE_DOCUMENT)) | 150 RemoveTitle(_WHOLE_DOCUMENT)) |
| 139 result = ParseDocument(_WHOLE_DOCUMENT, expect_title=True) | 151 result = ParseDocument(_WHOLE_DOCUMENT, expect_title=True) |
| 140 self.assertEqual('Main header', result.title) | 152 self.assertEqual('Main header', result.title) |
| 141 self.assertEqual({'id': 'main', 'class': 'header'}, result.title_attributes) | 153 self.assertEqual({'id': 'main', 'class': 'header'}, result.title_attributes) |
| 142 self.assertEqual([ | 154 self.assertEqual([ |
| 143 'Found closing </h3> while processing a <h2> (line 19, column 15)', | 155 'Found closing </h3> while processing a <h2> (line 19, column 15)', |
| 144 'Found multiple <h1> tags. Subsequent <h1> tags will be classified as ' | 156 'Found multiple <h1> tags. Subsequent <h1> tags will be classified as ' |
| 145 '<h2> for the purpose of the structure (line 22, column 1)', | 157 '<h2> for the purpose of the structure (line 22, column 1)', |
| 146 'Found <h3> in the middle of processing a <h2> (line 25, column 9)', | 158 'Found <h3> in the middle of processing a <h2> (line 25, column 9)', |
| 159 # TODO(kalman): Re-enable this warning once the reference pages have |
| 160 # their references fixed. |
| 161 #'Found <h4> without any preceding <h3> (line 28, column 1)', |
| 147 ], result.warnings) | 162 ], result.warnings) |
| 148 | 163 |
| 149 # The non-trivial table of contents assertions... | 164 # The non-trivial table of contents assertions... |
| 150 self.assertEqual(1, len(result.sections)) | 165 self.assertEqual(1, len(result.sections)) |
| 151 entries = result.sections[0].structure | 166 entries = result.sections[0].structure |
| 152 | 167 |
| 153 self.assertEqual(5, len(entries), entries) | 168 self.assertEqual(5, len(entries), entries) |
| 154 entry0, entry1, entry2, entry3, entry4 = entries | 169 entry0, entry1, entry2, entry3, entry4 = entries |
| 155 | 170 |
| 156 self.assertEqual('Bananas', entry0.name) | 171 self.assertEqual('Bananas', entry0.name) |
| (...skipping 15 matching lines...) Expand all Loading... |
| 172 self.assertEqual('Grapefruit', entry2.name) | 187 self.assertEqual('Grapefruit', entry2.name) |
| 173 self.assertEqual({}, entry2.attributes) | 188 self.assertEqual({}, entry2.attributes) |
| 174 self.assertEqual([], entry2.entries) | 189 self.assertEqual([], entry2.entries) |
| 175 | 190 |
| 176 self.assertEqual('Not the main header', entry3.name) | 191 self.assertEqual('Not the main header', entry3.name) |
| 177 self.assertEqual({'id': 'not-main'}, entry3.attributes) | 192 self.assertEqual({'id': 'not-main'}, entry3.attributes) |
| 178 self.assertEqual([], entry3.entries) | 193 self.assertEqual([], entry3.entries) |
| 179 | 194 |
| 180 self.assertEqual('Not a banana', entry4.name) | 195 self.assertEqual('Not a banana', entry4.name) |
| 181 self.assertEqual({}, entry4.attributes) | 196 self.assertEqual({}, entry4.attributes) |
| 182 self.assertEqual(1, len(entry4.entries)) | 197 self.assertEqual(2, len(entry4.entries)) |
| 183 entry4_1, = entry4.entries | 198 entry4_1, entry4_2 = entry4.entries |
| 184 | 199 |
| 185 self.assertEqual('Plantains', entry4_1.name) | 200 self.assertEqual('It\'s a h4', entry4_1.name) |
| 186 self.assertEqual({}, entry4_1.attributes) | 201 self.assertEqual({}, entry4_1.attributes) |
| 187 self.assertEqual([], entry4_1.entries) | 202 self.assertEqual([], entry4_1.entries) |
| 188 | 203 |
| 204 self.assertEqual('Plantains', entry4_2.name) |
| 205 self.assertEqual({}, entry4_2.attributes) |
| 206 self.assertEqual(1, len(entry4_2.entries)) |
| 207 entry4_2_1, = entry4_2.entries |
| 208 |
| 209 self.assertEqual('Another h4', entry4_2_1.name) |
| 210 self.assertEqual({}, entry4_2_1.attributes) |
| 211 self.assertEqual([], entry4_2_1.entries) |
| 212 |
| 189 def testSingleExplicitSection(self): | 213 def testSingleExplicitSection(self): |
| 190 def test(document): | 214 def test(document): |
| 191 result = ParseDocument(document, expect_title=True) | 215 result = ParseDocument(document, expect_title=True) |
| 192 self.assertEqual([], result.warnings) | 216 self.assertEqual([], result.warnings) |
| 193 self.assertEqual('Header', result.title) | 217 self.assertEqual('Header', result.title) |
| 194 self.assertEqual(1, len(result.sections)) | 218 self.assertEqual(1, len(result.sections)) |
| 195 section0, = result.sections | 219 section0, = result.sections |
| 196 entry0, = section0.structure | 220 entry0, = section0.structure |
| 197 self.assertEqual('An inner header', entry0.name) | 221 self.assertEqual('An inner header', entry0.name) |
| 198 # A single section, one with the title inside the section, the other out. | 222 # A single section, one with the title inside the section, the other out. |
| (...skipping 29 matching lines...) Expand all Loading... |
| 228 def assert_single_header(section, name): | 252 def assert_single_header(section, name): |
| 229 self.assertEqual(1, len(section.structure)) | 253 self.assertEqual(1, len(section.structure)) |
| 230 self.assertEqual(name, section.structure[0].name) | 254 self.assertEqual(name, section.structure[0].name) |
| 231 assert_single_header(section0, 'First header') | 255 assert_single_header(section0, 'First header') |
| 232 assert_single_header(section1, 'Second header') | 256 assert_single_header(section1, 'Second header') |
| 233 assert_single_header(section2, 'Third header') | 257 assert_single_header(section2, 'Third header') |
| 234 | 258 |
| 235 | 259 |
| 236 if __name__ == '__main__': | 260 if __name__ == '__main__': |
| 237 unittest.main() | 261 unittest.main() |
| OLD | NEW |