| OLD | NEW |
| (Empty) |
| 1 # -*- test-case-name: twisted.web.test.test_xml -*- | |
| 2 # | |
| 3 # Copyright (c) 2001-2004 Twisted Matrix Laboratories. | |
| 4 # See LICENSE for details. | |
| 5 | |
| 6 | |
| 7 """Some fairly inadequate testcases for Twisted XML support.""" | |
| 8 | |
| 9 from __future__ import nested_scopes | |
| 10 | |
| 11 from twisted.trial.unittest import TestCase | |
| 12 | |
| 13 from twisted.web import sux | |
| 14 | |
| 15 from twisted.web import microdom | |
| 16 | |
| 17 from twisted.web import domhelpers | |
| 18 | |
| 19 class Sux0r(sux.XMLParser): | |
| 20 def __init__(self): | |
| 21 self.tokens = [] | |
| 22 | |
| 23 def getTagStarts(self): | |
| 24 return [token for token in self.tokens if token[0] == 'start'] | |
| 25 | |
| 26 def gotTagStart(self, name, attrs): | |
| 27 self.tokens.append(("start", name, attrs)) | |
| 28 | |
| 29 def gotText(self, text): | |
| 30 self.tokens.append(("text", text)) | |
| 31 | |
| 32 class SUXTest(TestCase): | |
| 33 | |
| 34 def testBork(self): | |
| 35 s = "<bork><bork><bork>" | |
| 36 ms = Sux0r() | |
| 37 ms.connectionMade() | |
| 38 ms.dataReceived(s) | |
| 39 self.failUnlessEqual(len(ms.getTagStarts()),3) | |
| 40 | |
| 41 | |
| 42 class MicroDOMTest(TestCase): | |
| 43 | |
| 44 def test_leadingTextDropping(self): | |
| 45 """ | |
| 46 Make sure that if there's no top-level node lenient-mode won't | |
| 47 drop leading text that's outside of any elements. | |
| 48 """ | |
| 49 s = "Hi orders! <br>Well. <br>" | |
| 50 d = microdom.parseString(s, beExtremelyLenient=True) | |
| 51 self.assertEquals(d.firstChild().toxml(), | |
| 52 '<html>Hi orders! <br />Well. <br /></html>') | |
| 53 | |
| 54 def test_trailingTextDropping(self): | |
| 55 """ | |
| 56 Ensure that no *trailing* text in a mal-formed | |
| 57 no-top-level-element document(s) will not be dropped. | |
| 58 """ | |
| 59 s = "<br>Hi orders!" | |
| 60 d = microdom.parseString(s, beExtremelyLenient=True) | |
| 61 self.assertEquals(d.firstChild().toxml(), | |
| 62 '<html><br />Hi orders!</html>') | |
| 63 | |
| 64 | |
| 65 def test_noTags(self): | |
| 66 """ | |
| 67 A string with nothing that looks like a tag at all should just | |
| 68 be parsed as body text. | |
| 69 """ | |
| 70 s = "Hi orders!" | |
| 71 d = microdom.parseString(s, beExtremelyLenient=True) | |
| 72 self.assertEquals(d.firstChild().toxml(), | |
| 73 "<html>Hi orders!</html>") | |
| 74 | |
| 75 | |
| 76 def test_surroundingCrap(self): | |
| 77 """ | |
| 78 If a document is surrounded by non-xml text, the text should | |
| 79 be remain in the XML. | |
| 80 """ | |
| 81 s = "Hi<br> orders!" | |
| 82 d = microdom.parseString(s, beExtremelyLenient=True) | |
| 83 self.assertEquals(d.firstChild().toxml(), | |
| 84 "<html>Hi<br /> orders!</html>") | |
| 85 | |
| 86 | |
| 87 def testCaseSensitiveSoonCloser(self): | |
| 88 s = """ | |
| 89 <HTML><BODY> | |
| 90 <P ALIGN="CENTER"> | |
| 91 <A HREF="http://www.apache.org/"><IMG SRC="/icons/apache_pb.gif"
></A> | |
| 92 </P> | |
| 93 | |
| 94 <P> | |
| 95 This is an insane set of text nodes that should NOT be gathered
under | |
| 96 the A tag above. | |
| 97 </P> | |
| 98 </BODY></HTML> | |
| 99 """ | |
| 100 d = microdom.parseString(s, beExtremelyLenient=1) | |
| 101 l = domhelpers.findNodesNamed(d.documentElement, 'a') | |
| 102 n = domhelpers.gatherTextNodes(l[0],1).replace(' ',' ') | |
| 103 self.assertEquals(n.find('insane'), -1) | |
| 104 | |
| 105 | |
| 106 def test_lenientParenting(self): | |
| 107 """ | |
| 108 Test that C{parentNode} attributes are set to meaningful values when | |
| 109 we are parsing HTML that lacks a root node. | |
| 110 """ | |
| 111 # Spare the rod, ruin the child. | |
| 112 s = "<br/><br/>" | |
| 113 d = microdom.parseString(s, beExtremelyLenient=1) | |
| 114 self.assertIdentical(d.documentElement, | |
| 115 d.documentElement.firstChild().parentNode) | |
| 116 | |
| 117 | |
| 118 def test_lenientParentSingle(self): | |
| 119 """ | |
| 120 Test that the C{parentNode} attribute is set to a meaningful value | |
| 121 when we parse an HTML document that has a non-Element root node. | |
| 122 """ | |
| 123 s = "Hello" | |
| 124 d = microdom.parseString(s, beExtremelyLenient=1) | |
| 125 self.assertIdentical(d.documentElement, | |
| 126 d.documentElement.firstChild().parentNode) | |
| 127 | |
| 128 | |
| 129 def testUnEntities(self): | |
| 130 s = """ | |
| 131 <HTML> | |
| 132 This HTML goes between Stupid <=CrAzY!=> Dumb. | |
| 133 </HTML> | |
| 134 """ | |
| 135 d = microdom.parseString(s, beExtremelyLenient=1) | |
| 136 n = domhelpers.gatherTextNodes(d) | |
| 137 self.assertNotEquals(n.find('>'), -1) | |
| 138 | |
| 139 def testEmptyError(self): | |
| 140 self.assertRaises(sux.ParseError, microdom.parseString, "") | |
| 141 | |
| 142 def testTameDocument(self): | |
| 143 s = """ | |
| 144 <test> | |
| 145 <it> | |
| 146 <is> | |
| 147 <a> | |
| 148 test | |
| 149 </a> | |
| 150 </is> | |
| 151 </it> | |
| 152 </test> | |
| 153 """ | |
| 154 d = microdom.parseString(s) | |
| 155 self.assertEquals( | |
| 156 domhelpers.gatherTextNodes(d.documentElement).strip() ,'test') | |
| 157 | |
| 158 def testAwfulTagSoup(self): | |
| 159 s = """ | |
| 160 <html> | |
| 161 <head><title> I send you this message to have your advice!!!!</titl e | |
| 162 </headd> | |
| 163 | |
| 164 <body bgcolor alink hlink vlink> | |
| 165 | |
| 166 <h1><BLINK>SALE</blINK> TWENTY MILLION EMAILS & FUR COAT NOW | |
| 167 FREE WITH `ENLARGER'</h1> | |
| 168 | |
| 169 YES THIS WONDERFUL AWFER IS NOW HERER!!! | |
| 170 | |
| 171 <script LANGUAGE="javascript"> | |
| 172 function give_answers() { | |
| 173 if (score < 70) { | |
| 174 alert("I hate you"); | |
| 175 }} | |
| 176 </script><a href=/foo.com/lalal name=foo>lalal</a> | |
| 177 </body> | |
| 178 </HTML> | |
| 179 """ | |
| 180 d = microdom.parseString(s, beExtremelyLenient=1) | |
| 181 l = domhelpers.findNodesNamed(d.documentElement, 'blink') | |
| 182 self.assertEquals(len(l), 1) | |
| 183 | |
| 184 def testScriptLeniency(self): | |
| 185 s = """ | |
| 186 <script>(foo < bar) and (bar > foo)</script> | |
| 187 <script language="javascript">foo </scrip bar </script> | |
| 188 <script src="foo"> | |
| 189 <script src="foo">baz</script> | |
| 190 <script /><script></script> | |
| 191 """ | |
| 192 d = microdom.parseString(s, beExtremelyLenient=1) | |
| 193 self.assertEquals(d.firstChild().firstChild().firstChild().data, | |
| 194 "(foo < bar) and (bar > foo)") | |
| 195 self.assertEquals( | |
| 196 d.firstChild().getElementsByTagName("script")[1].firstChild().data, | |
| 197 "foo </scrip bar ") | |
| 198 | |
| 199 def testScriptLeniencyIntelligence(self): | |
| 200 # if there is comment or CDATA in script, the autoquoting in bEL mode | |
| 201 # should not happen | |
| 202 s = """<script><!-- lalal --></script>""" | |
| 203 self.assertEquals( | |
| 204 microdom.parseString(s, beExtremelyLenient=1).firstChild().toxml(),
s) | |
| 205 s = """<script><![CDATA[lalal]]></script>""" | |
| 206 self.assertEquals( | |
| 207 microdom.parseString(s, beExtremelyLenient=1).firstChild().toxml(),
s) | |
| 208 s = """<script> // <![CDATA[ | |
| 209 lalal | |
| 210 //]]></script>""" | |
| 211 self.assertEquals( | |
| 212 microdom.parseString(s, beExtremelyLenient=1).firstChild().toxml(),
s) | |
| 213 | |
| 214 def testPreserveCase(self): | |
| 215 s = '<eNcApSuLaTe><sUxor></sUxor><bOrk><w00T>TeXt</W00t></BoRk></EnCaPsU
lAtE>' | |
| 216 s2 = s.lower().replace('text', 'TeXt') | |
| 217 # these are the only two option permutations that *can* parse the above | |
| 218 d = microdom.parseString(s, caseInsensitive=1, preserveCase=1) | |
| 219 d2 = microdom.parseString(s, caseInsensitive=1, preserveCase=0) | |
| 220 # caseInsensitive=0 preserveCase=0 is not valid, it's converted to | |
| 221 # caseInsensitive=0 preserveCase=1 | |
| 222 d3 = microdom.parseString(s2, caseInsensitive=0, preserveCase=1) | |
| 223 d4 = microdom.parseString(s2, caseInsensitive=1, preserveCase=0) | |
| 224 d5 = microdom.parseString(s2, caseInsensitive=1, preserveCase=1) | |
| 225 # this is slightly contrived, toxml() doesn't need to be identical | |
| 226 # for the documents to be equivalent (i.e. <b></b> to <b/>), | |
| 227 # however this assertion tests preserving case for start and | |
| 228 # end tags while still matching stuff like <bOrk></BoRk> | |
| 229 self.assertEquals(d.documentElement.toxml(), s) | |
| 230 self.assert_(d.isEqualToDocument(d2), "%r != %r" % (d.toxml(), d2.toxml(
))) | |
| 231 self.assert_(d2.isEqualToDocument(d3), "%r != %r" % (d2.toxml(), d3.toxm
l())) | |
| 232 # caseInsensitive=0 on the left, NOT perserveCase=1 on the right | |
| 233 ## XXX THIS TEST IS TURNED OFF UNTIL SOMEONE WHO CARES ABOUT FIXING IT D
OES | |
| 234 #self.failIf(d3.isEqualToDocument(d2), "%r == %r" % (d3.toxml(), d2.toxm
l())) | |
| 235 self.assert_(d3.isEqualToDocument(d4), "%r != %r" % (d3.toxml(), d4.toxm
l())) | |
| 236 self.assert_(d4.isEqualToDocument(d5), "%r != %r" % (d4.toxml(), d5.toxm
l())) | |
| 237 | |
| 238 def testDifferentQuotes(self): | |
| 239 s = '<test a="a" b=\'b\' />' | |
| 240 d = microdom.parseString(s) | |
| 241 e = d.documentElement | |
| 242 self.assertEquals(e.getAttribute('a'), 'a') | |
| 243 self.assertEquals(e.getAttribute('b'), 'b') | |
| 244 | |
| 245 def testLinebreaks(self): | |
| 246 s = '<test \na="a"\n\tb="#b" />' | |
| 247 d = microdom.parseString(s) | |
| 248 e = d.documentElement | |
| 249 self.assertEquals(e.getAttribute('a'), 'a') | |
| 250 self.assertEquals(e.getAttribute('b'), '#b') | |
| 251 | |
| 252 def testMismatchedTags(self): | |
| 253 for s in '<test>', '<test> </tset>', '</test>': | |
| 254 self.assertRaises(microdom.MismatchedTags, microdom.parseString, s) | |
| 255 | |
| 256 def testComment(self): | |
| 257 s = "<bar><!--<foo />--></bar>" | |
| 258 d = microdom.parseString(s) | |
| 259 e = d.documentElement | |
| 260 self.assertEquals(e.nodeName, "bar") | |
| 261 c = e.childNodes[0] | |
| 262 self.assert_(isinstance(c, microdom.Comment)) | |
| 263 self.assertEquals(c.value, "<foo />") | |
| 264 c2 = c.cloneNode() | |
| 265 self.assert_(c is not c2) | |
| 266 self.assertEquals(c2.toxml(), "<!--<foo />-->") | |
| 267 | |
| 268 def testText(self): | |
| 269 d = microdom.parseString("<bar>xxxx</bar>").documentElement | |
| 270 text = d.childNodes[0] | |
| 271 self.assert_(isinstance(text, microdom.Text)) | |
| 272 self.assertEquals(text.value, "xxxx") | |
| 273 clone = text.cloneNode() | |
| 274 self.assert_(clone is not text) | |
| 275 self.assertEquals(clone.toxml(), "xxxx") | |
| 276 | |
| 277 def testEntities(self): | |
| 278 nodes = microdom.parseString("<b>&AB;</b>").documentElement.chil
dNodes | |
| 279 self.assertEquals(len(nodes), 2) | |
| 280 self.assertEquals(nodes[0].data, "&") | |
| 281 self.assertEquals(nodes[1].data, "AB;") | |
| 282 self.assertEquals(nodes[0].cloneNode().toxml(), "&") | |
| 283 for n in nodes: | |
| 284 self.assert_(isinstance(n, microdom.EntityReference)) | |
| 285 | |
| 286 def testCData(self): | |
| 287 s = '<x><![CDATA[</x>\r\n & foo]]></x>' | |
| 288 cdata = microdom.parseString(s).documentElement.childNodes[0] | |
| 289 self.assert_(isinstance(cdata, microdom.CDATASection)) | |
| 290 self.assertEquals(cdata.data, "</x>\r\n & foo") | |
| 291 self.assertEquals(cdata.cloneNode().toxml(), "<![CDATA[</x>\r\n & foo]]>
") | |
| 292 | |
| 293 def testSingletons(self): | |
| 294 s = "<foo><b/><b /><b\n/></foo>" | |
| 295 s2 = "<foo><b/><b/><b/></foo>" | |
| 296 nodes = microdom.parseString(s).documentElement.childNodes | |
| 297 nodes2 = microdom.parseString(s2).documentElement.childNodes | |
| 298 self.assertEquals(len(nodes), 3) | |
| 299 for (n, n2) in zip(nodes, nodes2): | |
| 300 self.assert_(isinstance(n, microdom.Element)) | |
| 301 self.assertEquals(n.nodeName, "b") | |
| 302 self.assert_(n.isEqualToNode(n2)) | |
| 303 | |
| 304 def testAttributes(self): | |
| 305 s = '<foo a="b" />' | |
| 306 node = microdom.parseString(s).documentElement | |
| 307 | |
| 308 self.assertEquals(node.getAttribute("a"), "b") | |
| 309 self.assertEquals(node.getAttribute("c"), None) | |
| 310 self.assert_(node.hasAttribute("a")) | |
| 311 self.assert_(not node.hasAttribute("c")) | |
| 312 a = node.getAttributeNode("a") | |
| 313 self.assertEquals(a.value, "b") | |
| 314 | |
| 315 node.setAttribute("foo", "bar") | |
| 316 self.assertEquals(node.getAttribute("foo"), "bar") | |
| 317 | |
| 318 def testChildren(self): | |
| 319 s = "<foo><bar /><baz /><bax>foo</bax></foo>" | |
| 320 d = microdom.parseString(s).documentElement | |
| 321 self.assertEquals([n.nodeName for n in d.childNodes], ["bar", "baz", "ba
x"]) | |
| 322 self.assertEquals(d.lastChild().nodeName, "bax") | |
| 323 self.assertEquals(d.firstChild().nodeName, "bar") | |
| 324 self.assert_(d.hasChildNodes()) | |
| 325 self.assert_(not d.firstChild().hasChildNodes()) | |
| 326 | |
| 327 def testMutate(self): | |
| 328 s = "<foo />" | |
| 329 s1 = '<foo a="b"><bar/><foo/></foo>' | |
| 330 s2 = '<foo a="b">foo</foo>' | |
| 331 d = microdom.parseString(s).documentElement | |
| 332 d1 = microdom.parseString(s1).documentElement | |
| 333 d2 = microdom.parseString(s2).documentElement | |
| 334 | |
| 335 d.appendChild(d.cloneNode()) | |
| 336 d.setAttribute("a", "b") | |
| 337 child = d.childNodes[0] | |
| 338 self.assertEquals(child.getAttribute("a"), None) | |
| 339 self.assertEquals(child.nodeName, "foo") | |
| 340 | |
| 341 d.insertBefore(microdom.Element("bar"), child) | |
| 342 self.assertEquals(d.childNodes[0].nodeName, "bar") | |
| 343 self.assertEquals(d.childNodes[1], child) | |
| 344 for n in d.childNodes: | |
| 345 self.assertEquals(n.parentNode, d) | |
| 346 self.assert_(d.isEqualToNode(d1)) | |
| 347 | |
| 348 d.removeChild(child) | |
| 349 self.assertEquals(len(d.childNodes), 1) | |
| 350 self.assertEquals(d.childNodes[0].nodeName, "bar") | |
| 351 | |
| 352 t = microdom.Text("foo") | |
| 353 d.replaceChild(t, d.firstChild()) | |
| 354 self.assertEquals(d.firstChild(), t) | |
| 355 self.assert_(d.isEqualToNode(d2)) | |
| 356 | |
| 357 def testSearch(self): | |
| 358 s = "<foo><bar id='me' /><baz><foo /></baz></foo>" | |
| 359 s2 = "<fOo><bAr id='me' /><bAz><fOO /></bAz></fOo>" | |
| 360 d = microdom.parseString(s) | |
| 361 d2 = microdom.parseString(s2, caseInsensitive=0, preserveCase=1) | |
| 362 d3 = microdom.parseString(s2, caseInsensitive=1, preserveCase=1) | |
| 363 | |
| 364 root = d.documentElement | |
| 365 self.assertEquals(root.firstChild(), d.getElementById('me')) | |
| 366 self.assertEquals(d.getElementsByTagName("foo"), | |
| 367 [root, root.lastChild().firstChild()]) | |
| 368 | |
| 369 root = d2.documentElement | |
| 370 self.assertEquals(root.firstChild(), d2.getElementById('me')) | |
| 371 self.assertEquals(d2.getElementsByTagName('fOo'), [root]) | |
| 372 self.assertEquals(d2.getElementsByTagName('fOO'), | |
| 373 [root.lastChild().firstChild()]) | |
| 374 self.assertEquals(d2.getElementsByTagName('foo'), []) | |
| 375 | |
| 376 root = d3.documentElement | |
| 377 self.assertEquals(root.firstChild(), d3.getElementById('me')) | |
| 378 self.assertEquals(d3.getElementsByTagName('FOO'), | |
| 379 [root, root.lastChild().firstChild()]) | |
| 380 self.assertEquals(d3.getElementsByTagName('fOo'), | |
| 381 [root, root.lastChild().firstChild()]) | |
| 382 | |
| 383 def testDoctype(self): | |
| 384 s = ('<?xml version="1.0"?>' | |
| 385 '<!DOCTYPE foo PUBLIC "baz" "http://www.example.com/example.dtd">' | |
| 386 '<foo></foo>') | |
| 387 s2 = '<foo/>' | |
| 388 d = microdom.parseString(s) | |
| 389 d2 = microdom.parseString(s2) | |
| 390 self.assertEquals(d.doctype, | |
| 391 'foo PUBLIC "baz" "http://www.example.com/example.dtd"
') | |
| 392 self.assertEquals(d.toxml(), s) | |
| 393 self.failIf(d.isEqualToDocument(d2)) | |
| 394 self.failUnless(d.documentElement.isEqualToNode(d2.documentElement)) | |
| 395 | |
| 396 samples = [("<img/>", "<img />"), | |
| 397 ("<foo A='b'>x</foo>", '<foo A="b">x</foo>'), | |
| 398 ("<foo><BAR /></foo>", "<foo><BAR></BAR></foo>"), | |
| 399 ("<foo>hello there & yoyoy</foo>", | |
| 400 "<foo>hello there & yoyoy</foo>"), | |
| 401 ] | |
| 402 | |
| 403 def testOutput(self): | |
| 404 for s, out in self.samples: | |
| 405 d = microdom.parseString(s, caseInsensitive=0) | |
| 406 d2 = microdom.parseString(out, caseInsensitive=0) | |
| 407 testOut = d.documentElement.toxml() | |
| 408 self.assertEquals(out, testOut) | |
| 409 self.assert_(d.isEqualToDocument(d2)) | |
| 410 | |
| 411 def testErrors(self): | |
| 412 for s in ["<foo>&am</foo>", "<foo", "<f>&</f>", "<() />"]: | |
| 413 self.assertRaises(Exception, microdom.parseString, s) | |
| 414 | |
| 415 def testCaseInsensitive(self): | |
| 416 s = "<foo a='b'><BAx>x</bax></FOO>" | |
| 417 s2 = '<foo a="b"><bax>x</bax></foo>' | |
| 418 s3 = "<FOO a='b'><BAx>x</BAx></FOO>" | |
| 419 s4 = "<foo A='b'>x</foo>" | |
| 420 d = microdom.parseString(s) | |
| 421 d2 = microdom.parseString(s2) | |
| 422 d3 = microdom.parseString(s3, caseInsensitive=1) | |
| 423 d4 = microdom.parseString(s4, caseInsensitive=1, preserveCase=1) | |
| 424 d5 = microdom.parseString(s4, caseInsensitive=1, preserveCase=0) | |
| 425 d6 = microdom.parseString(s4, caseInsensitive=0, preserveCase=0) | |
| 426 out = microdom.parseString(s).documentElement.toxml() | |
| 427 self.assertRaises(microdom.MismatchedTags, microdom.parseString, | |
| 428 s, caseInsensitive=0) | |
| 429 self.assertEquals(out, s2) | |
| 430 self.failUnless(d.isEqualToDocument(d2)) | |
| 431 self.failUnless(d.isEqualToDocument(d3)) | |
| 432 self.failUnless(d4.documentElement.hasAttribute('a')) | |
| 433 self.failIf(d6.documentElement.hasAttribute('a')) | |
| 434 self.assertEquals(d4.documentElement.toxml(), '<foo A="b">x</foo>') | |
| 435 self.assertEquals(d5.documentElement.toxml(), '<foo a="b">x</foo>') | |
| 436 def testEatingWhitespace(self): | |
| 437 s = """<hello> | |
| 438 </hello>""" | |
| 439 d = microdom.parseString(s) | |
| 440 self.failUnless(not d.documentElement.hasChildNodes(), | |
| 441 d.documentElement.childNodes) | |
| 442 self.failUnless(d.isEqualToDocument(microdom.parseString('<hello></hello
>'))) | |
| 443 | |
| 444 def testLenientAmpersand(self): | |
| 445 prefix = "<?xml version='1.0'?>" | |
| 446 # we use <pre> so space will be preserved | |
| 447 for i, o in [("&", "&"), | |
| 448 ("& ", "& "), | |
| 449 ("&", "&"), | |
| 450 ("&hello monkey", "&hello monkey")]: | |
| 451 d = microdom.parseString("%s<pre>%s</pre>" | |
| 452 % (prefix, i), beExtremelyLenient=1) | |
| 453 self.assertEquals(d.documentElement.toxml(), "<pre>%s</pre>" % o) | |
| 454 # non-space preserving | |
| 455 d = microdom.parseString("<t>hello & there</t>", beExtremelyLenient=1) | |
| 456 self.assertEquals(d.documentElement.toxml(), "<t>hello & there</t>") | |
| 457 | |
| 458 def testInsensitiveLenient(self): | |
| 459 # testing issue #537 | |
| 460 d = microdom.parseString( | |
| 461 "<?xml version='1.0'?><bar><xA><y>c</Xa> <foo></bar>", | |
| 462 beExtremelyLenient=1) | |
| 463 self.assertEquals(d.documentElement.firstChild().toxml(), "<xa><y>c</y><
/xa>") | |
| 464 | |
| 465 def testSpacing(self): | |
| 466 # testing issue #414 | |
| 467 s = "<?xml version='1.0'?><p><q>smart</q> <code>HairDryer</code></p>" | |
| 468 d = microdom.parseString(s, beExtremelyLenient=1) | |
| 469 expected = "<p><q>smart</q> <code>HairDryer</code></p>" | |
| 470 actual = d.documentElement.toxml() | |
| 471 self.assertEquals(expected, actual) | |
| 472 | |
| 473 testSpacing.todo = "AAARGH white space swallowing screws this up" | |
| 474 | |
| 475 def testLaterCloserSimple(self): | |
| 476 s = "<ul><li>foo<li>bar<li>baz</ul>" | |
| 477 d = microdom.parseString(s, beExtremelyLenient=1) | |
| 478 expected = "<ul><li>foo</li><li>bar</li><li>baz</li></ul>" | |
| 479 actual = d.documentElement.toxml() | |
| 480 self.assertEquals(expected, actual) | |
| 481 | |
| 482 def testLaterCloserCaseInsensitive(self): | |
| 483 s = "<DL><p><DT>foo<DD>bar</DL>" | |
| 484 d = microdom.parseString(s, beExtremelyLenient=1) | |
| 485 expected = "<dl><p></p><dt>foo</dt><dd>bar</dd></dl>" | |
| 486 actual = d.documentElement.toxml() | |
| 487 self.assertEquals(expected, actual) | |
| 488 | |
| 489 def testLaterCloserTable(self): | |
| 490 s = ("<table>" | |
| 491 "<tr><th>name<th>value<th>comment" | |
| 492 "<tr><th>this<td>tag<td>soup" | |
| 493 "<tr><th>must<td>be<td>handled" | |
| 494 "</table>") | |
| 495 expected = ("<table>" | |
| 496 "<tr><th>name</th><th>value</th><th>comment</th></tr>" | |
| 497 "<tr><th>this</th><td>tag</td><td>soup</td></tr>" | |
| 498 "<tr><th>must</th><td>be</td><td>handled</td></tr>" | |
| 499 "</table>") | |
| 500 d = microdom.parseString(s, beExtremelyLenient=1) | |
| 501 actual = d.documentElement.toxml() | |
| 502 self.assertEquals(expected, actual) | |
| 503 testLaterCloserTable.todo = "Table parsing needs to be fixed." | |
| 504 | |
| 505 def testLaterCloserDL(self): | |
| 506 s = ("<dl>" | |
| 507 "<dt>word<dd>definition" | |
| 508 "<dt>word<dt>word<dd>definition<dd>definition" | |
| 509 "</dl>") | |
| 510 expected = ("<dl>" | |
| 511 "<dt>word</dt><dd>definition</dd>" | |
| 512 "<dt>word</dt><dt>word</dt><dd>definition</dd><dd>definition
</dd>" | |
| 513 "</dl>") | |
| 514 d = microdom.parseString(s, beExtremelyLenient=1) | |
| 515 actual = d.documentElement.toxml() | |
| 516 self.assertEquals(expected, actual) | |
| 517 | |
| 518 def testLaterCloserDL2(self): | |
| 519 s = ("<dl>" | |
| 520 "<dt>word<dd>definition<p>more definition" | |
| 521 "<dt>word" | |
| 522 "</dl>") | |
| 523 expected = ("<dl>" | |
| 524 "<dt>word</dt><dd>definition<p>more definition</p></dd>" | |
| 525 "<dt>word</dt>" | |
| 526 "</dl>") | |
| 527 d = microdom.parseString(s, beExtremelyLenient=1) | |
| 528 actual = d.documentElement.toxml() | |
| 529 self.assertEquals(expected, actual) | |
| 530 | |
| 531 testLaterCloserDL2.todo = "unclosed <p> messes it up." | |
| 532 | |
| 533 def testUnicodeTolerance(self): | |
| 534 import struct | |
| 535 s = '<foo><bar><baz /></bar></foo>' | |
| 536 j =(u'<?xml version="1.0" encoding="UCS-2" ?>\r\n<JAPANESE>\r\n' | |
| 537 u'<TITLE>\u5c02\u9580\u5bb6\u30ea\u30b9\u30c8 </TITLE></JAPANESE>') | |
| 538 j2=('\xff\xfe<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r\x00s\x00i\x00o' | |
| 539 '\x00n\x00=\x00"\x001\x00.\x000\x00"\x00 \x00e\x00n\x00c\x00o\x00d' | |
| 540 '\x00i\x00n\x00g\x00=\x00"\x00U\x00C\x00S\x00-\x002\x00"\x00 \x00?' | |
| 541 '\x00>\x00\r\x00\n\x00<\x00J\x00A\x00P\x00A\x00N\x00E\x00S\x00E' | |
| 542 '\x00>\x00\r\x00\n\x00<\x00T\x00I\x00T\x00L\x00E\x00>\x00\x02\\' | |
| 543 '\x80\x95\xb6[\xea0\xb90\xc80 \x00<\x00/\x00T\x00I\x00T\x00L\x00E' | |
| 544 '\x00>\x00<\x00/\x00J\x00A\x00P\x00A\x00N\x00E\x00S\x00E\x00>\x00') | |
| 545 def reverseBytes(s): | |
| 546 fmt = str(len(s) / 2) + 'H' | |
| 547 return struct.pack('<' + fmt, *struct.unpack('>' + fmt, s)) | |
| 548 urd = microdom.parseString(reverseBytes(s.encode('UTF-16'))) | |
| 549 ud = microdom.parseString(s.encode('UTF-16')) | |
| 550 sd = microdom.parseString(s) | |
| 551 self.assert_(ud.isEqualToDocument(sd)) | |
| 552 self.assert_(ud.isEqualToDocument(urd)) | |
| 553 ud = microdom.parseString(j) | |
| 554 urd = microdom.parseString(reverseBytes(j2)) | |
| 555 sd = microdom.parseString(j2) | |
| 556 self.assert_(ud.isEqualToDocument(sd)) | |
| 557 self.assert_(ud.isEqualToDocument(urd)) | |
| 558 | |
| 559 # test that raw text still gets encoded | |
| 560 # test that comments get encoded | |
| 561 j3=microdom.parseString(u'<foo/>') | |
| 562 hdr='<?xml version="1.0"?>' | |
| 563 div=microdom.lmx().text(u'\u221a', raw=1).node | |
| 564 de=j3.documentElement | |
| 565 de.appendChild(div) | |
| 566 de.appendChild(j3.createComment(u'\u221a')) | |
| 567 self.assertEquals(j3.toxml(), hdr+ | |
| 568 u'<foo><div>\u221a</div><!--\u221a--></foo>'.encode('u
tf8')) | |
| 569 | |
| 570 def testNamedChildren(self): | |
| 571 tests = {"<foo><bar /><bar unf='1' /><bar>asdfadsf</bar>" | |
| 572 "<bam/></foo>" : 3, | |
| 573 '<foo>asdf</foo>' : 0, | |
| 574 '<foo><bar><bar></bar></bar></foo>' : 1, | |
| 575 } | |
| 576 for t in tests.keys(): | |
| 577 node = microdom.parseString(t).documentElement | |
| 578 result = domhelpers.namedChildren(node, 'bar') | |
| 579 self.assertEquals(len(result), tests[t]) | |
| 580 if result: | |
| 581 self.assert_(hasattr(result[0], 'tagName')) | |
| 582 | |
| 583 def testCloneNode(self): | |
| 584 s = '<foo a="b"><bax>x</bax></foo>' | |
| 585 node = microdom.parseString(s).documentElement | |
| 586 clone = node.cloneNode(deep=1) | |
| 587 self.failIfEquals(node, clone) | |
| 588 self.assertEquals(len(node.childNodes), len(clone.childNodes)) | |
| 589 c1, c2 = node.firstChild(), clone.firstChild() | |
| 590 self.failIfEquals(c1, c2) | |
| 591 self.assertEquals(len(c1.childNodes), len(c2.childNodes)) | |
| 592 self.failIfEquals(c1.firstChild(), c2.firstChild()) | |
| 593 self.assertEquals(s, clone.toxml()) | |
| 594 self.assertEquals(node.namespace, clone.namespace) | |
| 595 | |
| 596 def testCloneDocument(self): | |
| 597 s = ('<?xml version="1.0"?>' | |
| 598 '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"' | |
| 599 '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><foo></f
oo>') | |
| 600 | |
| 601 node = microdom.parseString(s) | |
| 602 clone = node.cloneNode(deep=1) | |
| 603 self.failIfEquals(node, clone) | |
| 604 self.assertEquals(len(node.childNodes), len(clone.childNodes)) | |
| 605 self.assertEquals(s, clone.toxml()) | |
| 606 | |
| 607 self.failUnless(clone.isEqualToDocument(node)) | |
| 608 self.failUnless(node.isEqualToDocument(clone)) | |
| 609 | |
| 610 | |
| 611 def testLMX(self): | |
| 612 n = microdom.Element("p") | |
| 613 lmx = microdom.lmx(n) | |
| 614 lmx.text("foo") | |
| 615 b = lmx.b(a="c") | |
| 616 b.foo()["z"] = "foo" | |
| 617 b.foo() | |
| 618 b.add("bar", c="y") | |
| 619 | |
| 620 s = '<p>foo<b a="c"><foo z="foo"></foo><foo></foo><bar c="y"></bar></b><
/p>' | |
| 621 self.assertEquals(s, n.toxml()) | |
| 622 | |
| 623 def testDict(self): | |
| 624 n = microdom.Element("p") | |
| 625 d = {n : 1} # will fail if Element is unhashable | |
| 626 | |
| 627 def testEscaping(self): | |
| 628 # issue 590 | |
| 629 raw = "&'some \"stuff\"', <what up?>" | |
| 630 cooked = "&'some "stuff"', <what up?>" | |
| 631 esc1 = microdom.escape(raw) | |
| 632 self.assertEquals(esc1, cooked) | |
| 633 self.assertEquals(microdom.unescape(esc1), raw) | |
| 634 | |
| 635 def testNamespaces(self): | |
| 636 s = ''' | |
| 637 <x xmlns="base"> | |
| 638 <y /> | |
| 639 <y q="1" x:q="2" y:q="3" /> | |
| 640 <y:y xml:space="1">here is some space </y:y> | |
| 641 <y:y /> | |
| 642 <x:y /> | |
| 643 </x> | |
| 644 ''' | |
| 645 d = microdom.parseString(s) | |
| 646 # at least make sure it doesn't traceback | |
| 647 s2 = d.toprettyxml() | |
| 648 self.assertEquals(d.documentElement.namespace, | |
| 649 "base") | |
| 650 self.assertEquals(d.documentElement.getElementsByTagName("y")[0].namespa
ce, | |
| 651 "base") | |
| 652 self.assertEquals( | |
| 653 d.documentElement.getElementsByTagName("y")[1].getAttributeNS('base'
,'q'), | |
| 654 '1') | |
| 655 | |
| 656 d2 = microdom.parseString(s2) | |
| 657 self.assertEquals(d2.documentElement.namespace, | |
| 658 "base") | |
| 659 self.assertEquals(d2.documentElement.getElementsByTagName("y")[0].namesp
ace, | |
| 660 "base") | |
| 661 self.assertEquals( | |
| 662 d2.documentElement.getElementsByTagName("y")[1].getAttributeNS('base
','q'), | |
| 663 '1') | |
| 664 | |
| 665 def testNamespaceDelete(self): | |
| 666 """ | |
| 667 Test that C{toxml} can support xml structures that remove namespaces. | |
| 668 """ | |
| 669 s1 = ('<?xml version="1.0"?><html xmlns="http://www.w3.org/TR/REC-html40
">' | |
| 670 '<body xmlns=""></body></html>') | |
| 671 s2 = microdom.parseString(s1).toxml() | |
| 672 self.assertEquals(s1, s2) | |
| 673 | |
| 674 def testNamespaceInheritance(self): | |
| 675 """ | |
| 676 Check that unspecified namespace is a thing separate from undefined | |
| 677 namespace. This test added after discovering some weirdness in Lore. | |
| 678 """ | |
| 679 # will only work if childNodes is mutated. not sure why. | |
| 680 child = microdom.Element('ol') | |
| 681 parent = microdom.Element('div', namespace='http://www.w3.org/1999/xhtml
') | |
| 682 parent.childNodes = [child] | |
| 683 self.assertEquals(parent.toxml(), | |
| 684 '<div xmlns="http://www.w3.org/1999/xhtml"><ol></ol></
div>') | |
| 685 | |
| 686 | |
| 687 class TestBrokenHTML(TestCase): | |
| 688 """ | |
| 689 Tests for when microdom encounters very bad HTML and C{beExtremelyLenient} | |
| 690 is enabled. These tests are inspired by some HTML generated in by a mailer, | |
| 691 which breaks up very long lines by splitting them with '!\n '. The expected | |
| 692 behaviour is loosely modelled on the way Firefox treats very bad HTML. | |
| 693 """ | |
| 694 | |
| 695 def checkParsed(self, input, expected, beExtremelyLenient=1): | |
| 696 """ | |
| 697 Check that C{input}, when parsed, produces a DOM where the XML | |
| 698 of the document element is equal to C{expected}. | |
| 699 """ | |
| 700 output = microdom.parseString(input, | |
| 701 beExtremelyLenient=beExtremelyLenient) | |
| 702 self.assertEquals(output.documentElement.toxml(), expected) | |
| 703 | |
| 704 | |
| 705 def test_brokenAttributeName(self): | |
| 706 """ | |
| 707 Check that microdom does its best to handle broken attribute names. | |
| 708 The important thing is that it doesn't raise an exception. | |
| 709 """ | |
| 710 input = '<body><h1><div al!\n ign="center">Foo</div></h1></body>' | |
| 711 expected = ('<body><h1><div ign="center" al="True">' | |
| 712 'Foo</div></h1></body>') | |
| 713 self.checkParsed(input, expected) | |
| 714 | |
| 715 | |
| 716 def test_brokenAttributeValue(self): | |
| 717 """ | |
| 718 Check that microdom encompasses broken attribute values. | |
| 719 """ | |
| 720 input = '<body><h1><div align="cen!\n ter">Foo</div></h1></body>' | |
| 721 expected = '<body><h1><div align="cen!\n ter">Foo</div></h1></body>' | |
| 722 self.checkParsed(input, expected) | |
| 723 | |
| 724 | |
| 725 def test_brokenOpeningTag(self): | |
| 726 """ | |
| 727 Check that microdom does its best to handle broken opening tags. | |
| 728 The important thing is that it doesn't raise an exception. | |
| 729 """ | |
| 730 input = '<body><h1><sp!\n an>Hello World!</span></h1></body>' | |
| 731 expected = '<body><h1><sp an="True">Hello World!</sp></h1></body>' | |
| 732 self.checkParsed(input, expected) | |
| 733 | |
| 734 | |
| 735 def test_brokenSelfClosingTag(self): | |
| 736 """ | |
| 737 Check that microdom does its best to handle broken self-closing tags | |
| 738 The important thing is that it doesn't raise an exception. | |
| 739 """ | |
| 740 self.checkParsed('<body><span /!\n></body>', | |
| 741 '<body><span></span></body>') | |
| 742 self.checkParsed('<span!\n />', '<span></span>') | |
| 743 | |
| 744 | |
| 745 def test_brokenClosingTag(self): | |
| 746 """ | |
| 747 Check that microdom does its best to handle broken closing tags. | |
| 748 The important thing is that it doesn't raise an exception. | |
| 749 """ | |
| 750 input = '<body><h1><span>Hello World!</sp!\nan></h1></body>' | |
| 751 expected = '<body><h1><span>Hello World!</span></h1></body>' | |
| 752 self.checkParsed(input, expected) | |
| 753 input = '<body><h1><span>Hello World!</!\nspan></h1></body>' | |
| 754 self.checkParsed(input, expected) | |
| 755 input = '<body><h1><span>Hello World!</span!\n></h1></body>' | |
| 756 self.checkParsed(input, expected) | |
| 757 input = '<body><h1><span>Hello World!<!\n/span></h1></body>' | |
| 758 expected = '<body><h1><span>Hello World!<!></!></span></h1></body>' | |
| 759 self.checkParsed(input, expected) | |
| 760 | |
| OLD | NEW |