Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(70)

Side by Side Diff: third_party/twisted_8_1/twisted/web/microdom.py

Issue 12261012: Remove third_party/twisted_8_1 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/build
Patch Set: Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 # -*- test-case-name: twisted.web.test.test_xml -*-
2 #
3 # Copyright (c) 2001-2004 Twisted Matrix Laboratories.
4 # See LICENSE for details.
5
6 #
7
8 """Micro Document Object Model: a partial DOM implementation with SUX.
9
10 This is an implementation of what we consider to be the useful subset of the
11 DOM. The chief advantage of this library is that, not being burdened with
12 standards compliance, it can remain very stable between versions. We can also
13 implement utility 'pythonic' ways to access and mutate the XML tree.
14
15 Since this has not subjected to a serious trial by fire, it is not recommended
16 to use this outside of Twisted applications. However, it seems to work just
17 fine for the documentation generator, which parses a fairly representative
18 sample of XML.
19
20 Microdom mainly focuses on working with HTML and XHTML.
21 """
22
23 from __future__ import nested_scopes
24
25 # System Imports
26 import re
27 from cStringIO import StringIO
28
29 # Twisted Imports
30 from twisted.web.sux import XMLParser, ParseError
31 from twisted.python.util import InsensitiveDict
32
33 # create NodeList class
34 from types import ListType as NodeList
35 from types import StringTypes, UnicodeType
36
37 def getElementsByTagName(iNode, name):
38 matches = []
39 matches_append = matches.append # faster lookup. don't do this at home
40 slice=[iNode]
41 while len(slice)>0:
42 c = slice.pop(0)
43 if c.nodeName == name:
44 matches_append(c)
45 slice[:0] = c.childNodes
46 return matches
47
48 def getElementsByTagNameNoCase(iNode, name):
49 name = name.lower()
50 matches = []
51 matches_append = matches.append
52 slice=[iNode]
53 while len(slice)>0:
54 c = slice.pop(0)
55 if c.nodeName.lower() == name:
56 matches_append(c)
57 slice[:0] = c.childNodes
58 return matches
59
60 # order is important
61 HTML_ESCAPE_CHARS = (('&', '&'), # don't add any entities before this one
62 ('<', '&lt;'),
63 ('>', '&gt;'),
64 ('"', '&quot;'))
65 REV_HTML_ESCAPE_CHARS = list(HTML_ESCAPE_CHARS)
66 REV_HTML_ESCAPE_CHARS.reverse()
67
68 XML_ESCAPE_CHARS = HTML_ESCAPE_CHARS + (("'", '&apos;'),)
69 REV_XML_ESCAPE_CHARS = list(XML_ESCAPE_CHARS)
70 REV_XML_ESCAPE_CHARS.reverse()
71
72 def unescape(text, chars=REV_HTML_ESCAPE_CHARS):
73 "Perform the exact opposite of 'escape'."
74 for s, h in chars:
75 text = text.replace(h, s)
76 return text
77
78 def escape(text, chars=HTML_ESCAPE_CHARS):
79 "Escape a few XML special chars with XML entities."
80 for s, h in chars:
81 text = text.replace(s, h)
82 return text
83
84
85 class MismatchedTags(Exception):
86
87 def __init__(self, filename, expect, got, endLine, endCol, begLine, begCol):
88 (self.filename, self.expect, self.got, self.begLine, self.begCol, self.en dLine,
89 self.endCol) = filename, expect, got, begLine, begCol, endLine, endCol
90
91 def __str__(self):
92 return ("expected </%s>, got </%s> line: %s col: %s, began line: %s col: %s"
93 % (self.expect, self.got, self.endLine, self.endCol, self.begLin e,
94 self.begCol))
95
96
97 class Node(object):
98 nodeName = "Node"
99
100 def __init__(self, parentNode=None):
101 self.parentNode = parentNode
102 self.childNodes = []
103
104 def isEqualToNode(self, n):
105 for a, b in zip(self.childNodes, n.childNodes):
106 if not a.isEqualToNode(b):
107 return 0
108 return 1
109
110 def writexml(self, stream, indent='', addindent='', newl='', strip=0,
111 nsprefixes={}, namespace=''):
112 raise NotImplementedError()
113
114 def toxml(self, indent='', addindent='', newl='', strip=0, nsprefixes={},
115 namespace=''):
116 s = StringIO()
117 self.writexml(s, indent, addindent, newl, strip, nsprefixes, namespace)
118 rv = s.getvalue()
119 return rv
120
121 def writeprettyxml(self, stream, indent='', addindent=' ', newl='\n', strip= 0):
122 return self.writexml(stream, indent, addindent, newl, strip)
123
124 def toprettyxml(self, indent='', addindent=' ', newl='\n', strip=0):
125 return self.toxml(indent, addindent, newl, strip)
126
127 def cloneNode(self, deep=0, parent=None):
128 raise NotImplementedError()
129
130 def hasChildNodes(self):
131 if self.childNodes:
132 return 1
133 else:
134 return 0
135
136 def appendChild(self, child):
137 assert isinstance(child, Node)
138 self.childNodes.append(child)
139 child.parentNode = self
140
141 def insertBefore(self, new, ref):
142 i = self.childNodes.index(ref)
143 new.parentNode = self
144 self.childNodes.insert(i, new)
145 return new
146
147 def removeChild(self, child):
148 if child in self.childNodes:
149 self.childNodes.remove(child)
150 child.parentNode = None
151 return child
152
153 def replaceChild(self, newChild, oldChild):
154 assert isinstance(newChild, Node)
155 #if newChild.parentNode:
156 # newChild.parentNode.removeChild(newChild)
157 assert (oldChild.parentNode is self,
158 ('oldChild (%s): oldChild.parentNode (%s) != self (%s)'
159 % (oldChild, oldChild.parentNode, self)))
160 self.childNodes[self.childNodes.index(oldChild)] = newChild
161 oldChild.parentNode = None
162 newChild.parentNode = self
163
164 def lastChild(self):
165 return self.childNodes[-1]
166
167 def firstChild(self):
168 if len(self.childNodes):
169 return self.childNodes[0]
170 return None
171
172 #def get_ownerDocument(self):
173 # """This doesn't really get the owner document; microdom nodes
174 # don't even have one necessarily. This gets the root node,
175 # which is usually what you really meant.
176 # *NOT DOM COMPLIANT.*
177 # """
178 # node=self
179 # while (node.parentNode): node=node.parentNode
180 # return node
181 #ownerDocument=node.get_ownerDocument()
182 # leaving commented for discussion; see also domhelpers.getParents(node)
183
184 class Document(Node):
185
186 def __init__(self, documentElement=None):
187 Node.__init__(self)
188 if documentElement:
189 self.appendChild(documentElement)
190
191 def cloneNode(self, deep=0, parent=None):
192 d = Document()
193 d.doctype = self.doctype
194 if deep:
195 newEl = self.documentElement.cloneNode(1, self)
196 else:
197 newEl = self.documentElement
198 d.appendChild(newEl)
199 return d
200
201 doctype = None
202
203 def isEqualToDocument(self, n):
204 return (self.doctype == n.doctype) and self.isEqualToNode(n)
205
206 def get_documentElement(self):
207 return self.childNodes[0]
208 documentElement=property(get_documentElement)
209
210 def appendChild(self, c):
211 assert not self.childNodes, "Only one element per document."
212 Node.appendChild(self, c)
213
214 def writexml(self, stream, indent='', addindent='', newl='', strip=0,
215 nsprefixes={}, namespace=''):
216 stream.write('<?xml version="1.0"?>' + newl)
217 if self.doctype:
218 stream.write("<!DOCTYPE "+self.doctype+">" + newl)
219 self.documentElement.writexml(stream, indent, addindent, newl, strip,
220 nsprefixes, namespace)
221
222 # of dubious utility (?)
223 def createElement(self, name, **kw):
224 return Element(name, **kw)
225
226 def createTextNode(self, text):
227 return Text(text)
228
229 def createComment(self, text):
230 return Comment(text)
231
232 def getElementsByTagName(self, name):
233 if self.documentElement.caseInsensitive:
234 return getElementsByTagNameNoCase(self, name)
235 return getElementsByTagName(self, name)
236
237 def getElementById(self, id):
238 childNodes = self.childNodes[:]
239 while childNodes:
240 node = childNodes.pop(0)
241 if node.childNodes:
242 childNodes.extend(node.childNodes)
243 if hasattr(node, 'getAttribute') and node.getAttribute("id") == id:
244 return node
245
246
247 class EntityReference(Node):
248
249 def __init__(self, eref, parentNode=None):
250 Node.__init__(self, parentNode)
251 self.eref = eref
252 self.nodeValue = self.data = "&" + eref + ";"
253
254 def isEqualToEntityReference(self, n):
255 if not isinstance(n, EntityReference):
256 return 0
257 return (self.eref == n.eref) and (self.nodeValue == n.nodeValue)
258
259 def writexml(self, stream, indent='', addindent='', newl='', strip=0,
260 nsprefixes={}, namespace=''):
261 stream.write(self.nodeValue)
262
263 def cloneNode(self, deep=0, parent=None):
264 return EntityReference(self.eref, parent)
265
266
267 class CharacterData(Node):
268
269 def __init__(self, data, parentNode=None):
270 Node.__init__(self, parentNode)
271 self.value = self.data = self.nodeValue = data
272
273 def isEqualToCharacterData(self, n):
274 return self.value == n.value
275
276
277 class Comment(CharacterData):
278 """A comment node."""
279
280 def writexml(self, stream, indent='', addindent='', newl='', strip=0,
281 nsprefixes={}, namespace=''):
282 val=self.data
283 if isinstance(val, UnicodeType):
284 val=val.encode('utf8')
285 stream.write("<!--%s-->" % val)
286
287 def cloneNode(self, deep=0, parent=None):
288 return Comment(self.nodeValue, parent)
289
290
291 class Text(CharacterData):
292
293 def __init__(self, data, parentNode=None, raw=0):
294 CharacterData.__init__(self, data, parentNode)
295 self.raw = raw
296
297 def cloneNode(self, deep=0, parent=None):
298 return Text(self.nodeValue, parent, self.raw)
299
300 def writexml(self, stream, indent='', addindent='', newl='', strip=0,
301 nsprefixes={}, namespace=''):
302 if self.raw:
303 val = self.nodeValue
304 if not isinstance(val, StringTypes):
305 val = str(self.nodeValue)
306 else:
307 v = self.nodeValue
308 if not isinstance(v, StringTypes):
309 v = str(v)
310 if strip:
311 v = ' '.join(v.split())
312 val = escape(v)
313 if isinstance(val, UnicodeType):
314 val = val.encode('utf8')
315 stream.write(val)
316
317 def __repr__(self):
318 return "Text(%s" % repr(self.nodeValue) + ')'
319
320
321 class CDATASection(CharacterData):
322 def cloneNode(self, deep=0, parent=None):
323 return CDATASection(self.nodeValue, parent)
324
325 def writexml(self, stream, indent='', addindent='', newl='', strip=0,
326 nsprefixes={}, namespace=''):
327 stream.write("<![CDATA[")
328 stream.write(self.nodeValue)
329 stream.write("]]>")
330
331 def _genprefix():
332 i = 0
333 while True:
334 yield 'p' + str(i)
335 i = i + 1
336 genprefix = _genprefix().next
337
338 class _Attr(CharacterData):
339 "Support class for getAttributeNode."
340
341 class Element(Node):
342
343 preserveCase = 0
344 caseInsensitive = 1
345 nsprefixes = None
346
347 def __init__(self, tagName, attributes=None, parentNode=None,
348 filename=None, markpos=None,
349 caseInsensitive=1, preserveCase=0,
350 namespace=None):
351 Node.__init__(self, parentNode)
352 self.preserveCase = preserveCase or not caseInsensitive
353 self.caseInsensitive = caseInsensitive
354 if not preserveCase:
355 tagName = tagName.lower()
356 if attributes is None:
357 self.attributes = {}
358 else:
359 self.attributes = attributes
360 for k, v in self.attributes.items():
361 self.attributes[k] = unescape(v)
362
363 if caseInsensitive:
364 self.attributes = InsensitiveDict(self.attributes,
365 preserve=preserveCase)
366
367 self.endTagName = self.nodeName = self.tagName = tagName
368 self._filename = filename
369 self._markpos = markpos
370 self.namespace = namespace
371
372 def addPrefixes(self, pfxs):
373 if self.nsprefixes is None:
374 self.nsprefixes = pfxs
375 else:
376 self.nsprefixes.update(pfxs)
377
378 def endTag(self, endTagName):
379 if not self.preserveCase:
380 endTagName = endTagName.lower()
381 self.endTagName = endTagName
382
383 def isEqualToElement(self, n):
384 if self.caseInsensitive:
385 return ((self.attributes == n.attributes)
386 and (self.nodeName.lower() == n.nodeName.lower()))
387 return (self.attributes == n.attributes) and (self.nodeName == n.nodeNam e)
388
389 def cloneNode(self, deep=0, parent=None):
390 clone = Element(
391 self.tagName, parentNode=parent, namespace=self.namespace,
392 preserveCase=self.preserveCase, caseInsensitive=self.caseInsensitive )
393 clone.attributes.update(self.attributes)
394 if deep:
395 clone.childNodes = [child.cloneNode(1, clone) for child in self.chil dNodes]
396 else:
397 clone.childNodes = []
398 return clone
399
400 def getElementsByTagName(self, name):
401 if self.caseInsensitive:
402 return getElementsByTagNameNoCase(self, name)
403 return getElementsByTagName(self, name)
404
405 def hasAttributes(self):
406 return 1
407
408 def getAttribute(self, name, default=None):
409 return self.attributes.get(name, default)
410
411 def getAttributeNS(self, ns, name, default=None):
412 nsk = (ns, name)
413 if self.attributes.has_key(nsk):
414 return self.attributes[nsk]
415 if ns == self.namespace:
416 return self.attributes.get(name, default)
417 return default
418
419 def getAttributeNode(self, name):
420 return _Attr(self.getAttribute(name), self)
421
422 def setAttribute(self, name, attr):
423 self.attributes[name] = attr
424
425 def removeAttribute(self, name):
426 if name in self.attributes:
427 del self.attributes[name]
428
429 def hasAttribute(self, name):
430 return name in self.attributes
431
432 def writexml(self, stream, indent='', addindent='', newl='', strip=0,
433 nsprefixes={}, namespace=''):
434 # write beginning
435 ALLOWSINGLETON = ('img', 'br', 'hr', 'base', 'meta', 'link', 'param',
436 'area', 'input', 'col', 'basefont', 'isindex',
437 'frame')
438 BLOCKELEMENTS = ('html', 'head', 'body', 'noscript', 'ins', 'del',
439 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'script',
440 'ul', 'ol', 'dl', 'pre', 'hr', 'blockquote',
441 'address', 'p', 'div', 'fieldset', 'table', 'tr',
442 'form', 'object', 'fieldset', 'applet', 'map')
443 FORMATNICELY = ('tr', 'ul', 'ol', 'head')
444
445 # this should never be necessary unless people start
446 # changing .tagName on the fly(?)
447 if not self.preserveCase:
448 self.endTagName = self.tagName
449 w = stream.write
450 if self.nsprefixes:
451 newprefixes = self.nsprefixes.copy()
452 for ns in nsprefixes.keys():
453 if ns in newprefixes:
454 del newprefixes[ns]
455 else:
456 newprefixes = {}
457
458 begin = ['<']
459 if self.tagName in BLOCKELEMENTS:
460 begin = [newl, indent] + begin
461 bext = begin.extend
462 writeattr = lambda _atr, _val: bext((' ', _atr, '="', escape(_val), '"') )
463 if namespace != self.namespace and self.namespace is not None:
464 if nsprefixes.has_key(self.namespace):
465 prefix = nsprefixes[self.namespace]
466 bext(prefix+':'+self.tagName)
467 else:
468 bext(self.tagName)
469 writeattr("xmlns", self.namespace)
470 else:
471 bext(self.tagName)
472 j = ''.join
473 for attr, val in self.attributes.iteritems():
474 if isinstance(attr, tuple):
475 ns, key = attr
476 if nsprefixes.has_key(ns):
477 prefix = nsprefixes[ns]
478 else:
479 prefix = genprefix()
480 newprefixes[ns] = prefix
481 assert val is not None
482 writeattr(prefix+':'+key,val)
483 else:
484 assert val is not None
485 writeattr(attr, val)
486 if newprefixes:
487 for ns, prefix in newprefixes.iteritems():
488 if prefix:
489 writeattr('xmlns:'+prefix, ns)
490 newprefixes.update(nsprefixes)
491 downprefixes = newprefixes
492 else:
493 downprefixes = nsprefixes
494 w(j(begin))
495 if self.childNodes:
496 w(">")
497 newindent = indent + addindent
498 for child in self.childNodes:
499 if self.tagName in BLOCKELEMENTS and \
500 self.tagName in FORMATNICELY:
501 w(j((newl, newindent)))
502 child.writexml(stream, newindent, addindent, newl, strip,
503 downprefixes, self.namespace)
504 if self.tagName in BLOCKELEMENTS:
505 w(j((newl, indent)))
506 w(j(("</", self.endTagName, '>')))
507
508 elif self.tagName.lower() not in ALLOWSINGLETON:
509 w(j(('></', self.endTagName, '>')))
510 else:
511 w(" />")
512
513 def __repr__(self):
514 rep = "Element(%s" % repr(self.nodeName)
515 if self.attributes:
516 rep += ", attributes=%r" % (self.attributes,)
517 if self._filename:
518 rep += ", filename=%r" % (self._filename,)
519 if self._markpos:
520 rep += ", markpos=%r" % (self._markpos,)
521 return rep + ')'
522
523 def __str__(self):
524 rep = "<" + self.nodeName
525 if self._filename or self._markpos:
526 rep += " ("
527 if self._filename:
528 rep += repr(self._filename)
529 if self._markpos:
530 rep += " line %s column %s" % self._markpos
531 if self._filename or self._markpos:
532 rep += ")"
533 for item in self.attributes.items():
534 rep += " %s=%r" % item
535 if self.hasChildNodes():
536 rep += " >...</%s>" % self.nodeName
537 else:
538 rep += " />"
539 return rep
540
541 def _unescapeDict(d):
542 dd = {}
543 for k, v in d.items():
544 dd[k] = unescape(v)
545 return dd
546
547 def _reverseDict(d):
548 dd = {}
549 for k, v in d.items():
550 dd[v]=k
551 return dd
552
553 class MicroDOMParser(XMLParser):
554
555 # <dash> glyph: a quick scan thru the DTD says BODY, AREA, LINK, IMG, HR,
556 # P, DT, DD, LI, INPUT, OPTION, THEAD, TFOOT, TBODY, COLGROUP, COL, TR, TH,
557 # TD, HEAD, BASE, META, HTML all have optional closing tags
558
559 soonClosers = 'area link br img hr input base meta'.split()
560 laterClosers = {'p': ['p', 'dt'],
561 'dt': ['dt','dd'],
562 'dd': ['dt', 'dd'],
563 'li': ['li'],
564 'tbody': ['thead', 'tfoot', 'tbody'],
565 'thead': ['thead', 'tfoot', 'tbody'],
566 'tfoot': ['thead', 'tfoot', 'tbody'],
567 'colgroup': ['colgroup'],
568 'col': ['col'],
569 'tr': ['tr'],
570 'td': ['td'],
571 'th': ['th'],
572 'head': ['body'],
573 'title': ['head', 'body'], # this looks wrong...
574 'option': ['option'],
575 }
576
577
578 def __init__(self, beExtremelyLenient=0, caseInsensitive=1, preserveCase=0,
579 soonClosers=soonClosers, laterClosers=laterClosers):
580 self.elementstack = []
581 d = {'xmlns': 'xmlns', '': None}
582 dr = _reverseDict(d)
583 self.nsstack = [(d,None,dr)]
584 self.documents = []
585 self._mddoctype = None
586 self.beExtremelyLenient = beExtremelyLenient
587 self.caseInsensitive = caseInsensitive
588 self.preserveCase = preserveCase or not caseInsensitive
589 self.soonClosers = soonClosers
590 self.laterClosers = laterClosers
591 # self.indentlevel = 0
592
593 def shouldPreserveSpace(self):
594 for edx in xrange(len(self.elementstack)):
595 el = self.elementstack[-edx]
596 if el.tagName == 'pre' or el.getAttribute("xml:space", '') == 'prese rve':
597 return 1
598 return 0
599
600 def _getparent(self):
601 if self.elementstack:
602 return self.elementstack[-1]
603 else:
604 return None
605
606 COMMENT = re.compile(r"\s*/[/*]\s*")
607
608 def _fixScriptElement(self, el):
609 # this deals with case where there is comment or CDATA inside
610 # <script> tag and we want to do the right thing with it
611 if not self.beExtremelyLenient or not len(el.childNodes) == 1:
612 return
613 c = el.firstChild()
614 if isinstance(c, Text):
615 # deal with nasty people who do stuff like:
616 # <script> // <!--
617 # x = 1;
618 # // --></script>
619 # tidy does this, for example.
620 prefix = ""
621 oldvalue = c.value
622 match = self.COMMENT.match(oldvalue)
623 if match:
624 prefix = match.group()
625 oldvalue = oldvalue[len(prefix):]
626
627 # now see if contents are actual node and comment or CDATA
628 try:
629 e = parseString("<a>%s</a>" % oldvalue).childNodes[0]
630 except (ParseError, MismatchedTags):
631 return
632 if len(e.childNodes) != 1:
633 return
634 e = e.firstChild()
635 if isinstance(e, (CDATASection, Comment)):
636 el.childNodes = []
637 if prefix:
638 el.childNodes.append(Text(prefix))
639 el.childNodes.append(e)
640
641 def gotDoctype(self, doctype):
642 self._mddoctype = doctype
643
644 def gotTagStart(self, name, attributes):
645 # print ' '*self.indentlevel, 'start tag',name
646 # self.indentlevel += 1
647 parent = self._getparent()
648 if (self.beExtremelyLenient and isinstance(parent, Element)):
649 parentName = parent.tagName
650 myName = name
651 if self.caseInsensitive:
652 parentName = parentName.lower()
653 myName = myName.lower()
654 if myName in self.laterClosers.get(parentName, []):
655 self.gotTagEnd(parent.tagName)
656 parent = self._getparent()
657 attributes = _unescapeDict(attributes)
658 namespaces = self.nsstack[-1][0]
659 newspaces = {}
660 for k, v in attributes.items():
661 if k.startswith('xmlns'):
662 spacenames = k.split(':',1)
663 if len(spacenames) == 2:
664 newspaces[spacenames[1]] = v
665 else:
666 newspaces[''] = v
667 del attributes[k]
668 if newspaces:
669 namespaces = namespaces.copy()
670 namespaces.update(newspaces)
671 for k, v in attributes.items():
672 ksplit = k.split(':', 1)
673 if len(ksplit) == 2:
674 pfx, tv = ksplit
675 if pfx != 'xml' and namespaces.has_key(pfx):
676 attributes[namespaces[pfx], tv] = v
677 del attributes[k]
678 el = Element(name, attributes, parent,
679 self.filename, self.saveMark(),
680 caseInsensitive=self.caseInsensitive,
681 preserveCase=self.preserveCase,
682 namespace=namespaces.get(''))
683 revspaces = _reverseDict(newspaces)
684 el.addPrefixes(revspaces)
685
686 if newspaces:
687 rscopy = self.nsstack[-1][2].copy()
688 rscopy.update(revspaces)
689 self.nsstack.append((namespaces, el, rscopy))
690 self.elementstack.append(el)
691 if parent:
692 parent.appendChild(el)
693 if (self.beExtremelyLenient and el.tagName in self.soonClosers):
694 self.gotTagEnd(name)
695
696 def _gotStandalone(self, factory, data):
697 parent = self._getparent()
698 te = factory(data, parent)
699 if parent:
700 parent.appendChild(te)
701 elif self.beExtremelyLenient:
702 self.documents.append(te)
703
704 def gotText(self, data):
705 if data.strip() or self.shouldPreserveSpace():
706 self._gotStandalone(Text, data)
707
708 def gotComment(self, data):
709 self._gotStandalone(Comment, data)
710
711 def gotEntityReference(self, entityRef):
712 self._gotStandalone(EntityReference, entityRef)
713
714 def gotCData(self, cdata):
715 self._gotStandalone(CDATASection, cdata)
716
717 def gotTagEnd(self, name):
718 # print ' '*self.indentlevel, 'end tag',name
719 # self.indentlevel -= 1
720 if not self.elementstack:
721 if self.beExtremelyLenient:
722 return
723 raise MismatchedTags(*((self.filename, "NOTHING", name)
724 +self.saveMark()+(0,0)))
725 el = self.elementstack.pop()
726 pfxdix = self.nsstack[-1][2]
727 if self.nsstack[-1][1] is el:
728 nstuple = self.nsstack.pop()
729 else:
730 nstuple = None
731 if self.caseInsensitive:
732 tn = el.tagName.lower()
733 cname = name.lower()
734 else:
735 tn = el.tagName
736 cname = name
737
738 nsplit = name.split(':',1)
739 if len(nsplit) == 2:
740 pfx, newname = nsplit
741 ns = pfxdix.get(pfx,None)
742 if ns is not None:
743 if el.namespace != ns:
744 if not self.beExtremelyLenient:
745 raise MismatchedTags(*((self.filename, el.tagName, name)
746 +self.saveMark()+el._markpos))
747 if not (tn == cname):
748 if self.beExtremelyLenient:
749 if self.elementstack:
750 lastEl = self.elementstack[0]
751 for idx in xrange(len(self.elementstack)):
752 if self.elementstack[-(idx+1)].tagName == cname:
753 self.elementstack[-(idx+1)].endTag(name)
754 break
755 else:
756 # this was a garbage close tag; wait for a real one
757 self.elementstack.append(el)
758 if nstuple is not None:
759 self.nsstack.append(nstuple)
760 return
761 del self.elementstack[-(idx+1):]
762 if not self.elementstack:
763 self.documents.append(lastEl)
764 return
765 else:
766 raise MismatchedTags(*((self.filename, el.tagName, name)
767 +self.saveMark()+el._markpos))
768 el.endTag(name)
769 if not self.elementstack:
770 self.documents.append(el)
771 if self.beExtremelyLenient and el.tagName == "script":
772 self._fixScriptElement(el)
773
774 def connectionLost(self, reason):
775 XMLParser.connectionLost(self, reason) # This can cause more events!
776 if self.elementstack:
777 if self.beExtremelyLenient:
778 self.documents.append(self.elementstack[0])
779 else:
780 raise MismatchedTags(*((self.filename, self.elementstack[-1],
781 "END_OF_FILE")
782 +self.saveMark()
783 +self.elementstack[-1]._markpos))
784
785
786 def parse(readable, *args, **kwargs):
787 """Parse HTML or XML readable."""
788 if not hasattr(readable, "read"):
789 readable = open(readable, "rb")
790 mdp = MicroDOMParser(*args, **kwargs)
791 mdp.filename = getattr(readable, "name", "<xmlfile />")
792 mdp.makeConnection(None)
793 if hasattr(readable,"getvalue"):
794 mdp.dataReceived(readable.getvalue())
795 else:
796 r = readable.read(1024)
797 while r:
798 mdp.dataReceived(r)
799 r = readable.read(1024)
800 mdp.connectionLost(None)
801
802 if not mdp.documents:
803 raise ParseError(mdp.filename, 0, 0, "No top-level Nodes in document")
804
805 if mdp.beExtremelyLenient:
806 if len(mdp.documents) == 1:
807 d = mdp.documents[0]
808 if not isinstance(d, Element):
809 el = Element("html")
810 el.appendChild(d)
811 d = el
812 else:
813 d = Element("html")
814 for child in mdp.documents:
815 d.appendChild(child)
816 else:
817 d = mdp.documents[0]
818 doc = Document(d)
819 doc.doctype = mdp._mddoctype
820 return doc
821
822 def parseString(st, *args, **kw):
823 if isinstance(st, UnicodeType):
824 # this isn't particularly ideal, but it does work.
825 return parse(StringIO(st.encode('UTF-16')), *args, **kw)
826 return parse(StringIO(st), *args, **kw)
827
828
829 def parseXML(readable):
830 """Parse an XML readable object."""
831 return parse(readable, caseInsensitive=0, preserveCase=1)
832
833
834 def parseXMLString(st):
835 """Parse an XML readable object."""
836 return parseString(st, caseInsensitive=0, preserveCase=1)
837
838
839 # Utility
840
841 class lmx:
842 """Easy creation of XML."""
843
844 def __init__(self, node='div'):
845 if isinstance(node, StringTypes):
846 node = Element(node)
847 self.node = node
848
849 def __getattr__(self, name):
850 if name[0] == '_':
851 raise AttributeError("no private attrs")
852 return lambda **kw: self.add(name,**kw)
853
854 def __setitem__(self, key, val):
855 self.node.setAttribute(key, val)
856
857 def __getitem__(self, key):
858 return self.node.getAttribute(key)
859
860 def text(self, txt, raw=0):
861 nn = Text(txt, raw=raw)
862 self.node.appendChild(nn)
863 return self
864
865 def add(self, tagName, **kw):
866 newNode = Element(tagName, caseInsensitive=0, preserveCase=0)
867 self.node.appendChild(newNode)
868 xf = lmx(newNode)
869 for k, v in kw.items():
870 if k[0] == '_':
871 k = k[1:]
872 xf[k]=v
873 return xf
OLDNEW
« no previous file with comments | « third_party/twisted_8_1/twisted/web/http.py ('k') | third_party/twisted_8_1/twisted/web/monitor.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698