chrome/common/extensions/docs/server2/document_parser_test.py - Issue 103413002: Docserver: Include <h4> tags in the table of contents, it's needed for the API

Side by Side Diff: chrome/common/extensions/docs/server2/document_parser_test.py

Issue 103413002: Docserver: Include <h4> tags in the table of contents, it's needed for the API (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 7 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright 2013 The Chromium Authors. All rights reserved.	2 # Copyright 2013 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 import unittest	6 import unittest

7	7

8 from document_parser import ParseDocument, RemoveTitle	8 from document_parser import ParseDocument, RemoveTitle

9	9

10	10

(...skipping 18 matching lines...) Expand all Loading...
29 <h2>Grapefruit</h3>	29 <h2>Grapefruit</h3>

30 Grapefruit closed a h2 with a h3. This should be a warning.	30 Grapefruit closed a h2 with a h3. This should be a warning.

31	31

32 <h1 id='not-main'>Not the main header</h1>	32 <h1 id='not-main'>Not the main header</h1>

33 But it should still show up in the TOC as though it were an h2.	33 But it should still show up in the TOC as though it were an h2.

34	34

35 <h2>Not <h3>a banana</h2>	35 <h2>Not <h3>a banana</h2>

36 The embedded h3 should be ignored.	36 The embedded h3 should be ignored.

37	37

38 <h4>It's a h4</h4>	38 <h4>It's a h4</h4>

39 h4 are not considered part of the document structure.	39 h4 are part of the document structure, but this is not inside a h3.

40	40

41 <h3>Plantains</h3>	41 <h3>Plantains</h3>

42 Now I'm just getting lazy.	42 Now I'm just getting lazy.

	43

	44 <h4>Another h4</h4>

	45 This h4 is inside a h3 so will show up.

	46

	47 <h5>Header 5</h5>

	48 Header 5s are not parsed.

43 '''	49 '''

44	50

45	51

46 _WHOLE_DOCUMENT_WITHOUT_TITLE = '''	52 _WHOLE_DOCUMENT_WITHOUT_TITLE = '''

47 Preamble before heading.	53 Preamble before heading.

48	54

49	55

50 Some intro to the content.	56 Some intro to the content.

51	57

52 <h2 id='banana' class='header'>Bananas</h2>	58 <h2 id='banana' class='header'>Bananas</h2>

(...skipping 11 matching lines...) Expand all Loading...
64 <h2>Grapefruit</h3>	70 <h2>Grapefruit</h3>

65 Grapefruit closed a h2 with a h3. This should be a warning.	71 Grapefruit closed a h2 with a h3. This should be a warning.

66	72

67 <h1 id='not-main'>Not the main header</h1>	73 <h1 id='not-main'>Not the main header</h1>

68 But it should still show up in the TOC as though it were an h2.	74 But it should still show up in the TOC as though it were an h2.

69	75

70 <h2>Not <h3>a banana</h2>	76 <h2>Not <h3>a banana</h2>

71 The embedded h3 should be ignored.	77 The embedded h3 should be ignored.

72	78

73 <h4>It's a h4</h4>	79 <h4>It's a h4</h4>

74 h4 are not considered part of the document structure.	80 h4 are part of the document structure, but this is not inside a h3.

75	81

76 <h3>Plantains</h3>	82 <h3>Plantains</h3>

77 Now I'm just getting lazy.	83 Now I'm just getting lazy.

	84

	85 <h4>Another h4</h4>

	86 This h4 is inside a h3 so will show up.

	87

	88 <h5>Header 5</h5>

	89 Header 5s are not parsed.

78 '''	90 '''

79	91

80	92

81 class DocumentParserUnittest(unittest.TestCase):	93 class DocumentParserUnittest(unittest.TestCase):

82	94

83 def testEmptyDocument(self):	95 def testEmptyDocument(self):

84 self.assertEqual(('', 'No opening <h1> was found'), RemoveTitle(''))	96 self.assertEqual(('', 'No opening <h1> was found'), RemoveTitle(''))

85	97

86 result = ParseDocument('')	98 result = ParseDocument('')

87 self.assertEqual(None, result.title)	99 self.assertEqual(None, result.title)

(...skipping 49 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
137 self.assertEqual((_WHOLE_DOCUMENT_WITHOUT_TITLE, None),	149 self.assertEqual((_WHOLE_DOCUMENT_WITHOUT_TITLE, None),

138 RemoveTitle(_WHOLE_DOCUMENT))	150 RemoveTitle(_WHOLE_DOCUMENT))

139 result = ParseDocument(_WHOLE_DOCUMENT, expect_title=True)	151 result = ParseDocument(_WHOLE_DOCUMENT, expect_title=True)

140 self.assertEqual('Main header', result.title)	152 self.assertEqual('Main header', result.title)

141 self.assertEqual({'id': 'main', 'class': 'header'}, result.title_attributes)	153 self.assertEqual({'id': 'main', 'class': 'header'}, result.title_attributes)

142 self.assertEqual([	154 self.assertEqual([

143 'Found closing </h3> while processing a <h2> (line 19, column 15)',	155 'Found closing </h3> while processing a <h2> (line 19, column 15)',

144 'Found multiple <h1> tags. Subsequent <h1> tags will be classified as '	156 'Found multiple <h1> tags. Subsequent <h1> tags will be classified as '

145 '<h2> for the purpose of the structure (line 22, column 1)',	157 '<h2> for the purpose of the structure (line 22, column 1)',

146 'Found <h3> in the middle of processing a <h2> (line 25, column 9)',	158 'Found <h3> in the middle of processing a <h2> (line 25, column 9)',

	159 # TODO(kalman): Re-enable this warning once the reference pages have

	160 # their references fixed.

	161 #'Found <h4> without any preceding <h3> (line 28, column 1)',

147 ], result.warnings)	162 ], result.warnings)

148	163

149 # The non-trivial table of contents assertions...	164 # The non-trivial table of contents assertions...

150 self.assertEqual(1, len(result.sections))	165 self.assertEqual(1, len(result.sections))

151 entries = result.sections[0].structure	166 entries = result.sections[0].structure

152	167

153 self.assertEqual(5, len(entries), entries)	168 self.assertEqual(5, len(entries), entries)

154 entry0, entry1, entry2, entry3, entry4 = entries	169 entry0, entry1, entry2, entry3, entry4 = entries

155	170

156 self.assertEqual('Bananas', entry0.name)	171 self.assertEqual('Bananas', entry0.name)

(...skipping 15 matching lines...) Expand all Loading...
172 self.assertEqual('Grapefruit', entry2.name)	187 self.assertEqual('Grapefruit', entry2.name)

173 self.assertEqual({}, entry2.attributes)	188 self.assertEqual({}, entry2.attributes)

174 self.assertEqual([], entry2.entries)	189 self.assertEqual([], entry2.entries)

175	190

176 self.assertEqual('Not the main header', entry3.name)	191 self.assertEqual('Not the main header', entry3.name)

177 self.assertEqual({'id': 'not-main'}, entry3.attributes)	192 self.assertEqual({'id': 'not-main'}, entry3.attributes)

178 self.assertEqual([], entry3.entries)	193 self.assertEqual([], entry3.entries)

179	194

180 self.assertEqual('Not a banana', entry4.name)	195 self.assertEqual('Not a banana', entry4.name)

181 self.assertEqual({}, entry4.attributes)	196 self.assertEqual({}, entry4.attributes)

182 self.assertEqual(1, len(entry4.entries))	197 self.assertEqual(2, len(entry4.entries))

183 entry4_1, = entry4.entries	198 entry4_1, entry4_2 = entry4.entries

184	199

185 self.assertEqual('Plantains', entry4_1.name)	200 self.assertEqual('It\'s a h4', entry4_1.name)

186 self.assertEqual({}, entry4_1.attributes)	201 self.assertEqual({}, entry4_1.attributes)

187 self.assertEqual([], entry4_1.entries)	202 self.assertEqual([], entry4_1.entries)

188	203

	204 self.assertEqual('Plantains', entry4_2.name)

	205 self.assertEqual({}, entry4_2.attributes)

	206 self.assertEqual(1, len(entry4_2.entries))

	207 entry4_2_1, = entry4_2.entries

	208

	209 self.assertEqual('Another h4', entry4_2_1.name)

	210 self.assertEqual({}, entry4_2_1.attributes)

	211 self.assertEqual([], entry4_2_1.entries)

	212

189 def testSingleExplicitSection(self):	213 def testSingleExplicitSection(self):

190 def test(document):	214 def test(document):

191 result = ParseDocument(document, expect_title=True)	215 result = ParseDocument(document, expect_title=True)

192 self.assertEqual([], result.warnings)	216 self.assertEqual([], result.warnings)

193 self.assertEqual('Header', result.title)	217 self.assertEqual('Header', result.title)

194 self.assertEqual(1, len(result.sections))	218 self.assertEqual(1, len(result.sections))

195 section0, = result.sections	219 section0, = result.sections

196 entry0, = section0.structure	220 entry0, = section0.structure

197 self.assertEqual('An inner header', entry0.name)	221 self.assertEqual('An inner header', entry0.name)

198 # A single section, one with the title inside the section, the other out.	222 # A single section, one with the title inside the section, the other out.

(...skipping 29 matching lines...) Expand all Loading...
228 def assert_single_header(section, name):	252 def assert_single_header(section, name):

229 self.assertEqual(1, len(section.structure))	253 self.assertEqual(1, len(section.structure))

230 self.assertEqual(name, section.structure[0].name)	254 self.assertEqual(name, section.structure[0].name)

231 assert_single_header(section0, 'First header')	255 assert_single_header(section0, 'First header')

232 assert_single_header(section1, 'Second header')	256 assert_single_header(section1, 'Second header')

233 assert_single_header(section2, 'Third header')	257 assert_single_header(section2, 'Third header')

234	258

235	259

236 if __name__ == '__main__':	260 if __name__ == '__main__':

237 unittest.main()	261 unittest.main()

OLD	NEW