OLD | NEW |
| (Empty) |
1 # -*- test-case-name: twisted.web.test.test_xml -*- | |
2 # | |
3 # Copyright (c) 2001-2004 Twisted Matrix Laboratories. | |
4 # See LICENSE for details. | |
5 | |
6 | |
7 """Some fairly inadequate testcases for Twisted XML support.""" | |
8 | |
9 from __future__ import nested_scopes | |
10 | |
11 from twisted.trial.unittest import TestCase | |
12 | |
13 from twisted.web import sux | |
14 | |
15 from twisted.web import microdom | |
16 | |
17 from twisted.web import domhelpers | |
18 | |
19 class Sux0r(sux.XMLParser): | |
20 def __init__(self): | |
21 self.tokens = [] | |
22 | |
23 def getTagStarts(self): | |
24 return [token for token in self.tokens if token[0] == 'start'] | |
25 | |
26 def gotTagStart(self, name, attrs): | |
27 self.tokens.append(("start", name, attrs)) | |
28 | |
29 def gotText(self, text): | |
30 self.tokens.append(("text", text)) | |
31 | |
32 class SUXTest(TestCase): | |
33 | |
34 def testBork(self): | |
35 s = "<bork><bork><bork>" | |
36 ms = Sux0r() | |
37 ms.connectionMade() | |
38 ms.dataReceived(s) | |
39 self.failUnlessEqual(len(ms.getTagStarts()),3) | |
40 | |
41 | |
42 class MicroDOMTest(TestCase): | |
43 | |
44 def test_leadingTextDropping(self): | |
45 """ | |
46 Make sure that if there's no top-level node lenient-mode won't | |
47 drop leading text that's outside of any elements. | |
48 """ | |
49 s = "Hi orders! <br>Well. <br>" | |
50 d = microdom.parseString(s, beExtremelyLenient=True) | |
51 self.assertEquals(d.firstChild().toxml(), | |
52 '<html>Hi orders! <br />Well. <br /></html>') | |
53 | |
54 def test_trailingTextDropping(self): | |
55 """ | |
56 Ensure that no *trailing* text in a mal-formed | |
57 no-top-level-element document(s) will not be dropped. | |
58 """ | |
59 s = "<br>Hi orders!" | |
60 d = microdom.parseString(s, beExtremelyLenient=True) | |
61 self.assertEquals(d.firstChild().toxml(), | |
62 '<html><br />Hi orders!</html>') | |
63 | |
64 | |
65 def test_noTags(self): | |
66 """ | |
67 A string with nothing that looks like a tag at all should just | |
68 be parsed as body text. | |
69 """ | |
70 s = "Hi orders!" | |
71 d = microdom.parseString(s, beExtremelyLenient=True) | |
72 self.assertEquals(d.firstChild().toxml(), | |
73 "<html>Hi orders!</html>") | |
74 | |
75 | |
76 def test_surroundingCrap(self): | |
77 """ | |
78 If a document is surrounded by non-xml text, the text should | |
79 be remain in the XML. | |
80 """ | |
81 s = "Hi<br> orders!" | |
82 d = microdom.parseString(s, beExtremelyLenient=True) | |
83 self.assertEquals(d.firstChild().toxml(), | |
84 "<html>Hi<br /> orders!</html>") | |
85 | |
86 | |
87 def testCaseSensitiveSoonCloser(self): | |
88 s = """ | |
89 <HTML><BODY> | |
90 <P ALIGN="CENTER"> | |
91 <A HREF="http://www.apache.org/"><IMG SRC="/icons/apache_pb.gif"
></A> | |
92 </P> | |
93 | |
94 <P> | |
95 This is an insane set of text nodes that should NOT be gathered
under | |
96 the A tag above. | |
97 </P> | |
98 </BODY></HTML> | |
99 """ | |
100 d = microdom.parseString(s, beExtremelyLenient=1) | |
101 l = domhelpers.findNodesNamed(d.documentElement, 'a') | |
102 n = domhelpers.gatherTextNodes(l[0],1).replace(' ',' ') | |
103 self.assertEquals(n.find('insane'), -1) | |
104 | |
105 | |
106 def test_lenientParenting(self): | |
107 """ | |
108 Test that C{parentNode} attributes are set to meaningful values when | |
109 we are parsing HTML that lacks a root node. | |
110 """ | |
111 # Spare the rod, ruin the child. | |
112 s = "<br/><br/>" | |
113 d = microdom.parseString(s, beExtremelyLenient=1) | |
114 self.assertIdentical(d.documentElement, | |
115 d.documentElement.firstChild().parentNode) | |
116 | |
117 | |
118 def test_lenientParentSingle(self): | |
119 """ | |
120 Test that the C{parentNode} attribute is set to a meaningful value | |
121 when we parse an HTML document that has a non-Element root node. | |
122 """ | |
123 s = "Hello" | |
124 d = microdom.parseString(s, beExtremelyLenient=1) | |
125 self.assertIdentical(d.documentElement, | |
126 d.documentElement.firstChild().parentNode) | |
127 | |
128 | |
129 def testUnEntities(self): | |
130 s = """ | |
131 <HTML> | |
132 This HTML goes between Stupid <=CrAzY!=> Dumb. | |
133 </HTML> | |
134 """ | |
135 d = microdom.parseString(s, beExtremelyLenient=1) | |
136 n = domhelpers.gatherTextNodes(d) | |
137 self.assertNotEquals(n.find('>'), -1) | |
138 | |
139 def testEmptyError(self): | |
140 self.assertRaises(sux.ParseError, microdom.parseString, "") | |
141 | |
142 def testTameDocument(self): | |
143 s = """ | |
144 <test> | |
145 <it> | |
146 <is> | |
147 <a> | |
148 test | |
149 </a> | |
150 </is> | |
151 </it> | |
152 </test> | |
153 """ | |
154 d = microdom.parseString(s) | |
155 self.assertEquals( | |
156 domhelpers.gatherTextNodes(d.documentElement).strip() ,'test') | |
157 | |
158 def testAwfulTagSoup(self): | |
159 s = """ | |
160 <html> | |
161 <head><title> I send you this message to have your advice!!!!</titl e | |
162 </headd> | |
163 | |
164 <body bgcolor alink hlink vlink> | |
165 | |
166 <h1><BLINK>SALE</blINK> TWENTY MILLION EMAILS & FUR COAT NOW | |
167 FREE WITH `ENLARGER'</h1> | |
168 | |
169 YES THIS WONDERFUL AWFER IS NOW HERER!!! | |
170 | |
171 <script LANGUAGE="javascript"> | |
172 function give_answers() { | |
173 if (score < 70) { | |
174 alert("I hate you"); | |
175 }} | |
176 </script><a href=/foo.com/lalal name=foo>lalal</a> | |
177 </body> | |
178 </HTML> | |
179 """ | |
180 d = microdom.parseString(s, beExtremelyLenient=1) | |
181 l = domhelpers.findNodesNamed(d.documentElement, 'blink') | |
182 self.assertEquals(len(l), 1) | |
183 | |
184 def testScriptLeniency(self): | |
185 s = """ | |
186 <script>(foo < bar) and (bar > foo)</script> | |
187 <script language="javascript">foo </scrip bar </script> | |
188 <script src="foo"> | |
189 <script src="foo">baz</script> | |
190 <script /><script></script> | |
191 """ | |
192 d = microdom.parseString(s, beExtremelyLenient=1) | |
193 self.assertEquals(d.firstChild().firstChild().firstChild().data, | |
194 "(foo < bar) and (bar > foo)") | |
195 self.assertEquals( | |
196 d.firstChild().getElementsByTagName("script")[1].firstChild().data, | |
197 "foo </scrip bar ") | |
198 | |
199 def testScriptLeniencyIntelligence(self): | |
200 # if there is comment or CDATA in script, the autoquoting in bEL mode | |
201 # should not happen | |
202 s = """<script><!-- lalal --></script>""" | |
203 self.assertEquals( | |
204 microdom.parseString(s, beExtremelyLenient=1).firstChild().toxml(),
s) | |
205 s = """<script><![CDATA[lalal]]></script>""" | |
206 self.assertEquals( | |
207 microdom.parseString(s, beExtremelyLenient=1).firstChild().toxml(),
s) | |
208 s = """<script> // <![CDATA[ | |
209 lalal | |
210 //]]></script>""" | |
211 self.assertEquals( | |
212 microdom.parseString(s, beExtremelyLenient=1).firstChild().toxml(),
s) | |
213 | |
214 def testPreserveCase(self): | |
215 s = '<eNcApSuLaTe><sUxor></sUxor><bOrk><w00T>TeXt</W00t></BoRk></EnCaPsU
lAtE>' | |
216 s2 = s.lower().replace('text', 'TeXt') | |
217 # these are the only two option permutations that *can* parse the above | |
218 d = microdom.parseString(s, caseInsensitive=1, preserveCase=1) | |
219 d2 = microdom.parseString(s, caseInsensitive=1, preserveCase=0) | |
220 # caseInsensitive=0 preserveCase=0 is not valid, it's converted to | |
221 # caseInsensitive=0 preserveCase=1 | |
222 d3 = microdom.parseString(s2, caseInsensitive=0, preserveCase=1) | |
223 d4 = microdom.parseString(s2, caseInsensitive=1, preserveCase=0) | |
224 d5 = microdom.parseString(s2, caseInsensitive=1, preserveCase=1) | |
225 # this is slightly contrived, toxml() doesn't need to be identical | |
226 # for the documents to be equivalent (i.e. <b></b> to <b/>), | |
227 # however this assertion tests preserving case for start and | |
228 # end tags while still matching stuff like <bOrk></BoRk> | |
229 self.assertEquals(d.documentElement.toxml(), s) | |
230 self.assert_(d.isEqualToDocument(d2), "%r != %r" % (d.toxml(), d2.toxml(
))) | |
231 self.assert_(d2.isEqualToDocument(d3), "%r != %r" % (d2.toxml(), d3.toxm
l())) | |
232 # caseInsensitive=0 on the left, NOT perserveCase=1 on the right | |
233 ## XXX THIS TEST IS TURNED OFF UNTIL SOMEONE WHO CARES ABOUT FIXING IT D
OES | |
234 #self.failIf(d3.isEqualToDocument(d2), "%r == %r" % (d3.toxml(), d2.toxm
l())) | |
235 self.assert_(d3.isEqualToDocument(d4), "%r != %r" % (d3.toxml(), d4.toxm
l())) | |
236 self.assert_(d4.isEqualToDocument(d5), "%r != %r" % (d4.toxml(), d5.toxm
l())) | |
237 | |
238 def testDifferentQuotes(self): | |
239 s = '<test a="a" b=\'b\' />' | |
240 d = microdom.parseString(s) | |
241 e = d.documentElement | |
242 self.assertEquals(e.getAttribute('a'), 'a') | |
243 self.assertEquals(e.getAttribute('b'), 'b') | |
244 | |
245 def testLinebreaks(self): | |
246 s = '<test \na="a"\n\tb="#b" />' | |
247 d = microdom.parseString(s) | |
248 e = d.documentElement | |
249 self.assertEquals(e.getAttribute('a'), 'a') | |
250 self.assertEquals(e.getAttribute('b'), '#b') | |
251 | |
252 def testMismatchedTags(self): | |
253 for s in '<test>', '<test> </tset>', '</test>': | |
254 self.assertRaises(microdom.MismatchedTags, microdom.parseString, s) | |
255 | |
256 def testComment(self): | |
257 s = "<bar><!--<foo />--></bar>" | |
258 d = microdom.parseString(s) | |
259 e = d.documentElement | |
260 self.assertEquals(e.nodeName, "bar") | |
261 c = e.childNodes[0] | |
262 self.assert_(isinstance(c, microdom.Comment)) | |
263 self.assertEquals(c.value, "<foo />") | |
264 c2 = c.cloneNode() | |
265 self.assert_(c is not c2) | |
266 self.assertEquals(c2.toxml(), "<!--<foo />-->") | |
267 | |
268 def testText(self): | |
269 d = microdom.parseString("<bar>xxxx</bar>").documentElement | |
270 text = d.childNodes[0] | |
271 self.assert_(isinstance(text, microdom.Text)) | |
272 self.assertEquals(text.value, "xxxx") | |
273 clone = text.cloneNode() | |
274 self.assert_(clone is not text) | |
275 self.assertEquals(clone.toxml(), "xxxx") | |
276 | |
277 def testEntities(self): | |
278 nodes = microdom.parseString("<b>&AB;</b>").documentElement.chil
dNodes | |
279 self.assertEquals(len(nodes), 2) | |
280 self.assertEquals(nodes[0].data, "&") | |
281 self.assertEquals(nodes[1].data, "AB;") | |
282 self.assertEquals(nodes[0].cloneNode().toxml(), "&") | |
283 for n in nodes: | |
284 self.assert_(isinstance(n, microdom.EntityReference)) | |
285 | |
286 def testCData(self): | |
287 s = '<x><![CDATA[</x>\r\n & foo]]></x>' | |
288 cdata = microdom.parseString(s).documentElement.childNodes[0] | |
289 self.assert_(isinstance(cdata, microdom.CDATASection)) | |
290 self.assertEquals(cdata.data, "</x>\r\n & foo") | |
291 self.assertEquals(cdata.cloneNode().toxml(), "<![CDATA[</x>\r\n & foo]]>
") | |
292 | |
293 def testSingletons(self): | |
294 s = "<foo><b/><b /><b\n/></foo>" | |
295 s2 = "<foo><b/><b/><b/></foo>" | |
296 nodes = microdom.parseString(s).documentElement.childNodes | |
297 nodes2 = microdom.parseString(s2).documentElement.childNodes | |
298 self.assertEquals(len(nodes), 3) | |
299 for (n, n2) in zip(nodes, nodes2): | |
300 self.assert_(isinstance(n, microdom.Element)) | |
301 self.assertEquals(n.nodeName, "b") | |
302 self.assert_(n.isEqualToNode(n2)) | |
303 | |
304 def testAttributes(self): | |
305 s = '<foo a="b" />' | |
306 node = microdom.parseString(s).documentElement | |
307 | |
308 self.assertEquals(node.getAttribute("a"), "b") | |
309 self.assertEquals(node.getAttribute("c"), None) | |
310 self.assert_(node.hasAttribute("a")) | |
311 self.assert_(not node.hasAttribute("c")) | |
312 a = node.getAttributeNode("a") | |
313 self.assertEquals(a.value, "b") | |
314 | |
315 node.setAttribute("foo", "bar") | |
316 self.assertEquals(node.getAttribute("foo"), "bar") | |
317 | |
318 def testChildren(self): | |
319 s = "<foo><bar /><baz /><bax>foo</bax></foo>" | |
320 d = microdom.parseString(s).documentElement | |
321 self.assertEquals([n.nodeName for n in d.childNodes], ["bar", "baz", "ba
x"]) | |
322 self.assertEquals(d.lastChild().nodeName, "bax") | |
323 self.assertEquals(d.firstChild().nodeName, "bar") | |
324 self.assert_(d.hasChildNodes()) | |
325 self.assert_(not d.firstChild().hasChildNodes()) | |
326 | |
327 def testMutate(self): | |
328 s = "<foo />" | |
329 s1 = '<foo a="b"><bar/><foo/></foo>' | |
330 s2 = '<foo a="b">foo</foo>' | |
331 d = microdom.parseString(s).documentElement | |
332 d1 = microdom.parseString(s1).documentElement | |
333 d2 = microdom.parseString(s2).documentElement | |
334 | |
335 d.appendChild(d.cloneNode()) | |
336 d.setAttribute("a", "b") | |
337 child = d.childNodes[0] | |
338 self.assertEquals(child.getAttribute("a"), None) | |
339 self.assertEquals(child.nodeName, "foo") | |
340 | |
341 d.insertBefore(microdom.Element("bar"), child) | |
342 self.assertEquals(d.childNodes[0].nodeName, "bar") | |
343 self.assertEquals(d.childNodes[1], child) | |
344 for n in d.childNodes: | |
345 self.assertEquals(n.parentNode, d) | |
346 self.assert_(d.isEqualToNode(d1)) | |
347 | |
348 d.removeChild(child) | |
349 self.assertEquals(len(d.childNodes), 1) | |
350 self.assertEquals(d.childNodes[0].nodeName, "bar") | |
351 | |
352 t = microdom.Text("foo") | |
353 d.replaceChild(t, d.firstChild()) | |
354 self.assertEquals(d.firstChild(), t) | |
355 self.assert_(d.isEqualToNode(d2)) | |
356 | |
357 def testSearch(self): | |
358 s = "<foo><bar id='me' /><baz><foo /></baz></foo>" | |
359 s2 = "<fOo><bAr id='me' /><bAz><fOO /></bAz></fOo>" | |
360 d = microdom.parseString(s) | |
361 d2 = microdom.parseString(s2, caseInsensitive=0, preserveCase=1) | |
362 d3 = microdom.parseString(s2, caseInsensitive=1, preserveCase=1) | |
363 | |
364 root = d.documentElement | |
365 self.assertEquals(root.firstChild(), d.getElementById('me')) | |
366 self.assertEquals(d.getElementsByTagName("foo"), | |
367 [root, root.lastChild().firstChild()]) | |
368 | |
369 root = d2.documentElement | |
370 self.assertEquals(root.firstChild(), d2.getElementById('me')) | |
371 self.assertEquals(d2.getElementsByTagName('fOo'), [root]) | |
372 self.assertEquals(d2.getElementsByTagName('fOO'), | |
373 [root.lastChild().firstChild()]) | |
374 self.assertEquals(d2.getElementsByTagName('foo'), []) | |
375 | |
376 root = d3.documentElement | |
377 self.assertEquals(root.firstChild(), d3.getElementById('me')) | |
378 self.assertEquals(d3.getElementsByTagName('FOO'), | |
379 [root, root.lastChild().firstChild()]) | |
380 self.assertEquals(d3.getElementsByTagName('fOo'), | |
381 [root, root.lastChild().firstChild()]) | |
382 | |
383 def testDoctype(self): | |
384 s = ('<?xml version="1.0"?>' | |
385 '<!DOCTYPE foo PUBLIC "baz" "http://www.example.com/example.dtd">' | |
386 '<foo></foo>') | |
387 s2 = '<foo/>' | |
388 d = microdom.parseString(s) | |
389 d2 = microdom.parseString(s2) | |
390 self.assertEquals(d.doctype, | |
391 'foo PUBLIC "baz" "http://www.example.com/example.dtd"
') | |
392 self.assertEquals(d.toxml(), s) | |
393 self.failIf(d.isEqualToDocument(d2)) | |
394 self.failUnless(d.documentElement.isEqualToNode(d2.documentElement)) | |
395 | |
396 samples = [("<img/>", "<img />"), | |
397 ("<foo A='b'>x</foo>", '<foo A="b">x</foo>'), | |
398 ("<foo><BAR /></foo>", "<foo><BAR></BAR></foo>"), | |
399 ("<foo>hello there & yoyoy</foo>", | |
400 "<foo>hello there & yoyoy</foo>"), | |
401 ] | |
402 | |
403 def testOutput(self): | |
404 for s, out in self.samples: | |
405 d = microdom.parseString(s, caseInsensitive=0) | |
406 d2 = microdom.parseString(out, caseInsensitive=0) | |
407 testOut = d.documentElement.toxml() | |
408 self.assertEquals(out, testOut) | |
409 self.assert_(d.isEqualToDocument(d2)) | |
410 | |
411 def testErrors(self): | |
412 for s in ["<foo>&am</foo>", "<foo", "<f>&</f>", "<() />"]: | |
413 self.assertRaises(Exception, microdom.parseString, s) | |
414 | |
415 def testCaseInsensitive(self): | |
416 s = "<foo a='b'><BAx>x</bax></FOO>" | |
417 s2 = '<foo a="b"><bax>x</bax></foo>' | |
418 s3 = "<FOO a='b'><BAx>x</BAx></FOO>" | |
419 s4 = "<foo A='b'>x</foo>" | |
420 d = microdom.parseString(s) | |
421 d2 = microdom.parseString(s2) | |
422 d3 = microdom.parseString(s3, caseInsensitive=1) | |
423 d4 = microdom.parseString(s4, caseInsensitive=1, preserveCase=1) | |
424 d5 = microdom.parseString(s4, caseInsensitive=1, preserveCase=0) | |
425 d6 = microdom.parseString(s4, caseInsensitive=0, preserveCase=0) | |
426 out = microdom.parseString(s).documentElement.toxml() | |
427 self.assertRaises(microdom.MismatchedTags, microdom.parseString, | |
428 s, caseInsensitive=0) | |
429 self.assertEquals(out, s2) | |
430 self.failUnless(d.isEqualToDocument(d2)) | |
431 self.failUnless(d.isEqualToDocument(d3)) | |
432 self.failUnless(d4.documentElement.hasAttribute('a')) | |
433 self.failIf(d6.documentElement.hasAttribute('a')) | |
434 self.assertEquals(d4.documentElement.toxml(), '<foo A="b">x</foo>') | |
435 self.assertEquals(d5.documentElement.toxml(), '<foo a="b">x</foo>') | |
436 def testEatingWhitespace(self): | |
437 s = """<hello> | |
438 </hello>""" | |
439 d = microdom.parseString(s) | |
440 self.failUnless(not d.documentElement.hasChildNodes(), | |
441 d.documentElement.childNodes) | |
442 self.failUnless(d.isEqualToDocument(microdom.parseString('<hello></hello
>'))) | |
443 | |
444 def testLenientAmpersand(self): | |
445 prefix = "<?xml version='1.0'?>" | |
446 # we use <pre> so space will be preserved | |
447 for i, o in [("&", "&"), | |
448 ("& ", "& "), | |
449 ("&", "&"), | |
450 ("&hello monkey", "&hello monkey")]: | |
451 d = microdom.parseString("%s<pre>%s</pre>" | |
452 % (prefix, i), beExtremelyLenient=1) | |
453 self.assertEquals(d.documentElement.toxml(), "<pre>%s</pre>" % o) | |
454 # non-space preserving | |
455 d = microdom.parseString("<t>hello & there</t>", beExtremelyLenient=1) | |
456 self.assertEquals(d.documentElement.toxml(), "<t>hello & there</t>") | |
457 | |
458 def testInsensitiveLenient(self): | |
459 # testing issue #537 | |
460 d = microdom.parseString( | |
461 "<?xml version='1.0'?><bar><xA><y>c</Xa> <foo></bar>", | |
462 beExtremelyLenient=1) | |
463 self.assertEquals(d.documentElement.firstChild().toxml(), "<xa><y>c</y><
/xa>") | |
464 | |
465 def testSpacing(self): | |
466 # testing issue #414 | |
467 s = "<?xml version='1.0'?><p><q>smart</q> <code>HairDryer</code></p>" | |
468 d = microdom.parseString(s, beExtremelyLenient=1) | |
469 expected = "<p><q>smart</q> <code>HairDryer</code></p>" | |
470 actual = d.documentElement.toxml() | |
471 self.assertEquals(expected, actual) | |
472 | |
473 testSpacing.todo = "AAARGH white space swallowing screws this up" | |
474 | |
475 def testLaterCloserSimple(self): | |
476 s = "<ul><li>foo<li>bar<li>baz</ul>" | |
477 d = microdom.parseString(s, beExtremelyLenient=1) | |
478 expected = "<ul><li>foo</li><li>bar</li><li>baz</li></ul>" | |
479 actual = d.documentElement.toxml() | |
480 self.assertEquals(expected, actual) | |
481 | |
482 def testLaterCloserCaseInsensitive(self): | |
483 s = "<DL><p><DT>foo<DD>bar</DL>" | |
484 d = microdom.parseString(s, beExtremelyLenient=1) | |
485 expected = "<dl><p></p><dt>foo</dt><dd>bar</dd></dl>" | |
486 actual = d.documentElement.toxml() | |
487 self.assertEquals(expected, actual) | |
488 | |
489 def testLaterCloserTable(self): | |
490 s = ("<table>" | |
491 "<tr><th>name<th>value<th>comment" | |
492 "<tr><th>this<td>tag<td>soup" | |
493 "<tr><th>must<td>be<td>handled" | |
494 "</table>") | |
495 expected = ("<table>" | |
496 "<tr><th>name</th><th>value</th><th>comment</th></tr>" | |
497 "<tr><th>this</th><td>tag</td><td>soup</td></tr>" | |
498 "<tr><th>must</th><td>be</td><td>handled</td></tr>" | |
499 "</table>") | |
500 d = microdom.parseString(s, beExtremelyLenient=1) | |
501 actual = d.documentElement.toxml() | |
502 self.assertEquals(expected, actual) | |
503 testLaterCloserTable.todo = "Table parsing needs to be fixed." | |
504 | |
505 def testLaterCloserDL(self): | |
506 s = ("<dl>" | |
507 "<dt>word<dd>definition" | |
508 "<dt>word<dt>word<dd>definition<dd>definition" | |
509 "</dl>") | |
510 expected = ("<dl>" | |
511 "<dt>word</dt><dd>definition</dd>" | |
512 "<dt>word</dt><dt>word</dt><dd>definition</dd><dd>definition
</dd>" | |
513 "</dl>") | |
514 d = microdom.parseString(s, beExtremelyLenient=1) | |
515 actual = d.documentElement.toxml() | |
516 self.assertEquals(expected, actual) | |
517 | |
518 def testLaterCloserDL2(self): | |
519 s = ("<dl>" | |
520 "<dt>word<dd>definition<p>more definition" | |
521 "<dt>word" | |
522 "</dl>") | |
523 expected = ("<dl>" | |
524 "<dt>word</dt><dd>definition<p>more definition</p></dd>" | |
525 "<dt>word</dt>" | |
526 "</dl>") | |
527 d = microdom.parseString(s, beExtremelyLenient=1) | |
528 actual = d.documentElement.toxml() | |
529 self.assertEquals(expected, actual) | |
530 | |
531 testLaterCloserDL2.todo = "unclosed <p> messes it up." | |
532 | |
533 def testUnicodeTolerance(self): | |
534 import struct | |
535 s = '<foo><bar><baz /></bar></foo>' | |
536 j =(u'<?xml version="1.0" encoding="UCS-2" ?>\r\n<JAPANESE>\r\n' | |
537 u'<TITLE>\u5c02\u9580\u5bb6\u30ea\u30b9\u30c8 </TITLE></JAPANESE>') | |
538 j2=('\xff\xfe<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r\x00s\x00i\x00o' | |
539 '\x00n\x00=\x00"\x001\x00.\x000\x00"\x00 \x00e\x00n\x00c\x00o\x00d' | |
540 '\x00i\x00n\x00g\x00=\x00"\x00U\x00C\x00S\x00-\x002\x00"\x00 \x00?' | |
541 '\x00>\x00\r\x00\n\x00<\x00J\x00A\x00P\x00A\x00N\x00E\x00S\x00E' | |
542 '\x00>\x00\r\x00\n\x00<\x00T\x00I\x00T\x00L\x00E\x00>\x00\x02\\' | |
543 '\x80\x95\xb6[\xea0\xb90\xc80 \x00<\x00/\x00T\x00I\x00T\x00L\x00E' | |
544 '\x00>\x00<\x00/\x00J\x00A\x00P\x00A\x00N\x00E\x00S\x00E\x00>\x00') | |
545 def reverseBytes(s): | |
546 fmt = str(len(s) / 2) + 'H' | |
547 return struct.pack('<' + fmt, *struct.unpack('>' + fmt, s)) | |
548 urd = microdom.parseString(reverseBytes(s.encode('UTF-16'))) | |
549 ud = microdom.parseString(s.encode('UTF-16')) | |
550 sd = microdom.parseString(s) | |
551 self.assert_(ud.isEqualToDocument(sd)) | |
552 self.assert_(ud.isEqualToDocument(urd)) | |
553 ud = microdom.parseString(j) | |
554 urd = microdom.parseString(reverseBytes(j2)) | |
555 sd = microdom.parseString(j2) | |
556 self.assert_(ud.isEqualToDocument(sd)) | |
557 self.assert_(ud.isEqualToDocument(urd)) | |
558 | |
559 # test that raw text still gets encoded | |
560 # test that comments get encoded | |
561 j3=microdom.parseString(u'<foo/>') | |
562 hdr='<?xml version="1.0"?>' | |
563 div=microdom.lmx().text(u'\u221a', raw=1).node | |
564 de=j3.documentElement | |
565 de.appendChild(div) | |
566 de.appendChild(j3.createComment(u'\u221a')) | |
567 self.assertEquals(j3.toxml(), hdr+ | |
568 u'<foo><div>\u221a</div><!--\u221a--></foo>'.encode('u
tf8')) | |
569 | |
570 def testNamedChildren(self): | |
571 tests = {"<foo><bar /><bar unf='1' /><bar>asdfadsf</bar>" | |
572 "<bam/></foo>" : 3, | |
573 '<foo>asdf</foo>' : 0, | |
574 '<foo><bar><bar></bar></bar></foo>' : 1, | |
575 } | |
576 for t in tests.keys(): | |
577 node = microdom.parseString(t).documentElement | |
578 result = domhelpers.namedChildren(node, 'bar') | |
579 self.assertEquals(len(result), tests[t]) | |
580 if result: | |
581 self.assert_(hasattr(result[0], 'tagName')) | |
582 | |
583 def testCloneNode(self): | |
584 s = '<foo a="b"><bax>x</bax></foo>' | |
585 node = microdom.parseString(s).documentElement | |
586 clone = node.cloneNode(deep=1) | |
587 self.failIfEquals(node, clone) | |
588 self.assertEquals(len(node.childNodes), len(clone.childNodes)) | |
589 c1, c2 = node.firstChild(), clone.firstChild() | |
590 self.failIfEquals(c1, c2) | |
591 self.assertEquals(len(c1.childNodes), len(c2.childNodes)) | |
592 self.failIfEquals(c1.firstChild(), c2.firstChild()) | |
593 self.assertEquals(s, clone.toxml()) | |
594 self.assertEquals(node.namespace, clone.namespace) | |
595 | |
596 def testCloneDocument(self): | |
597 s = ('<?xml version="1.0"?>' | |
598 '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"' | |
599 '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><foo></f
oo>') | |
600 | |
601 node = microdom.parseString(s) | |
602 clone = node.cloneNode(deep=1) | |
603 self.failIfEquals(node, clone) | |
604 self.assertEquals(len(node.childNodes), len(clone.childNodes)) | |
605 self.assertEquals(s, clone.toxml()) | |
606 | |
607 self.failUnless(clone.isEqualToDocument(node)) | |
608 self.failUnless(node.isEqualToDocument(clone)) | |
609 | |
610 | |
611 def testLMX(self): | |
612 n = microdom.Element("p") | |
613 lmx = microdom.lmx(n) | |
614 lmx.text("foo") | |
615 b = lmx.b(a="c") | |
616 b.foo()["z"] = "foo" | |
617 b.foo() | |
618 b.add("bar", c="y") | |
619 | |
620 s = '<p>foo<b a="c"><foo z="foo"></foo><foo></foo><bar c="y"></bar></b><
/p>' | |
621 self.assertEquals(s, n.toxml()) | |
622 | |
623 def testDict(self): | |
624 n = microdom.Element("p") | |
625 d = {n : 1} # will fail if Element is unhashable | |
626 | |
627 def testEscaping(self): | |
628 # issue 590 | |
629 raw = "&'some \"stuff\"', <what up?>" | |
630 cooked = "&'some "stuff"', <what up?>" | |
631 esc1 = microdom.escape(raw) | |
632 self.assertEquals(esc1, cooked) | |
633 self.assertEquals(microdom.unescape(esc1), raw) | |
634 | |
635 def testNamespaces(self): | |
636 s = ''' | |
637 <x xmlns="base"> | |
638 <y /> | |
639 <y q="1" x:q="2" y:q="3" /> | |
640 <y:y xml:space="1">here is some space </y:y> | |
641 <y:y /> | |
642 <x:y /> | |
643 </x> | |
644 ''' | |
645 d = microdom.parseString(s) | |
646 # at least make sure it doesn't traceback | |
647 s2 = d.toprettyxml() | |
648 self.assertEquals(d.documentElement.namespace, | |
649 "base") | |
650 self.assertEquals(d.documentElement.getElementsByTagName("y")[0].namespa
ce, | |
651 "base") | |
652 self.assertEquals( | |
653 d.documentElement.getElementsByTagName("y")[1].getAttributeNS('base'
,'q'), | |
654 '1') | |
655 | |
656 d2 = microdom.parseString(s2) | |
657 self.assertEquals(d2.documentElement.namespace, | |
658 "base") | |
659 self.assertEquals(d2.documentElement.getElementsByTagName("y")[0].namesp
ace, | |
660 "base") | |
661 self.assertEquals( | |
662 d2.documentElement.getElementsByTagName("y")[1].getAttributeNS('base
','q'), | |
663 '1') | |
664 | |
665 def testNamespaceDelete(self): | |
666 """ | |
667 Test that C{toxml} can support xml structures that remove namespaces. | |
668 """ | |
669 s1 = ('<?xml version="1.0"?><html xmlns="http://www.w3.org/TR/REC-html40
">' | |
670 '<body xmlns=""></body></html>') | |
671 s2 = microdom.parseString(s1).toxml() | |
672 self.assertEquals(s1, s2) | |
673 | |
674 def testNamespaceInheritance(self): | |
675 """ | |
676 Check that unspecified namespace is a thing separate from undefined | |
677 namespace. This test added after discovering some weirdness in Lore. | |
678 """ | |
679 # will only work if childNodes is mutated. not sure why. | |
680 child = microdom.Element('ol') | |
681 parent = microdom.Element('div', namespace='http://www.w3.org/1999/xhtml
') | |
682 parent.childNodes = [child] | |
683 self.assertEquals(parent.toxml(), | |
684 '<div xmlns="http://www.w3.org/1999/xhtml"><ol></ol></
div>') | |
685 | |
686 | |
687 class TestBrokenHTML(TestCase): | |
688 """ | |
689 Tests for when microdom encounters very bad HTML and C{beExtremelyLenient} | |
690 is enabled. These tests are inspired by some HTML generated in by a mailer, | |
691 which breaks up very long lines by splitting them with '!\n '. The expected | |
692 behaviour is loosely modelled on the way Firefox treats very bad HTML. | |
693 """ | |
694 | |
695 def checkParsed(self, input, expected, beExtremelyLenient=1): | |
696 """ | |
697 Check that C{input}, when parsed, produces a DOM where the XML | |
698 of the document element is equal to C{expected}. | |
699 """ | |
700 output = microdom.parseString(input, | |
701 beExtremelyLenient=beExtremelyLenient) | |
702 self.assertEquals(output.documentElement.toxml(), expected) | |
703 | |
704 | |
705 def test_brokenAttributeName(self): | |
706 """ | |
707 Check that microdom does its best to handle broken attribute names. | |
708 The important thing is that it doesn't raise an exception. | |
709 """ | |
710 input = '<body><h1><div al!\n ign="center">Foo</div></h1></body>' | |
711 expected = ('<body><h1><div ign="center" al="True">' | |
712 'Foo</div></h1></body>') | |
713 self.checkParsed(input, expected) | |
714 | |
715 | |
716 def test_brokenAttributeValue(self): | |
717 """ | |
718 Check that microdom encompasses broken attribute values. | |
719 """ | |
720 input = '<body><h1><div align="cen!\n ter">Foo</div></h1></body>' | |
721 expected = '<body><h1><div align="cen!\n ter">Foo</div></h1></body>' | |
722 self.checkParsed(input, expected) | |
723 | |
724 | |
725 def test_brokenOpeningTag(self): | |
726 """ | |
727 Check that microdom does its best to handle broken opening tags. | |
728 The important thing is that it doesn't raise an exception. | |
729 """ | |
730 input = '<body><h1><sp!\n an>Hello World!</span></h1></body>' | |
731 expected = '<body><h1><sp an="True">Hello World!</sp></h1></body>' | |
732 self.checkParsed(input, expected) | |
733 | |
734 | |
735 def test_brokenSelfClosingTag(self): | |
736 """ | |
737 Check that microdom does its best to handle broken self-closing tags | |
738 The important thing is that it doesn't raise an exception. | |
739 """ | |
740 self.checkParsed('<body><span /!\n></body>', | |
741 '<body><span></span></body>') | |
742 self.checkParsed('<span!\n />', '<span></span>') | |
743 | |
744 | |
745 def test_brokenClosingTag(self): | |
746 """ | |
747 Check that microdom does its best to handle broken closing tags. | |
748 The important thing is that it doesn't raise an exception. | |
749 """ | |
750 input = '<body><h1><span>Hello World!</sp!\nan></h1></body>' | |
751 expected = '<body><h1><span>Hello World!</span></h1></body>' | |
752 self.checkParsed(input, expected) | |
753 input = '<body><h1><span>Hello World!</!\nspan></h1></body>' | |
754 self.checkParsed(input, expected) | |
755 input = '<body><h1><span>Hello World!</span!\n></h1></body>' | |
756 self.checkParsed(input, expected) | |
757 input = '<body><h1><span>Hello World!<!\n/span></h1></body>' | |
758 expected = '<body><h1><span>Hello World!<!></!></span></h1></body>' | |
759 self.checkParsed(input, expected) | |
760 | |
OLD | NEW |