Index: third_party/twisted_8_1/twisted/web/microdom.py |
diff --git a/third_party/twisted_8_1/twisted/web/microdom.py b/third_party/twisted_8_1/twisted/web/microdom.py |
deleted file mode 100644 |
index b7b430cf2da0815d0f3d30b27ab91ba3171e600b..0000000000000000000000000000000000000000 |
--- a/third_party/twisted_8_1/twisted/web/microdom.py |
+++ /dev/null |
@@ -1,873 +0,0 @@ |
-# -*- test-case-name: twisted.web.test.test_xml -*- |
-# |
-# Copyright (c) 2001-2004 Twisted Matrix Laboratories. |
-# See LICENSE for details. |
- |
-# |
- |
-"""Micro Document Object Model: a partial DOM implementation with SUX. |
- |
-This is an implementation of what we consider to be the useful subset of the |
-DOM. The chief advantage of this library is that, not being burdened with |
-standards compliance, it can remain very stable between versions. We can also |
-implement utility 'pythonic' ways to access and mutate the XML tree. |
- |
-Since this has not subjected to a serious trial by fire, it is not recommended |
-to use this outside of Twisted applications. However, it seems to work just |
-fine for the documentation generator, which parses a fairly representative |
-sample of XML. |
- |
-Microdom mainly focuses on working with HTML and XHTML. |
-""" |
- |
-from __future__ import nested_scopes |
- |
-# System Imports |
-import re |
-from cStringIO import StringIO |
- |
-# Twisted Imports |
-from twisted.web.sux import XMLParser, ParseError |
-from twisted.python.util import InsensitiveDict |
- |
-# create NodeList class |
-from types import ListType as NodeList |
-from types import StringTypes, UnicodeType |
- |
-def getElementsByTagName(iNode, name): |
- matches = [] |
- matches_append = matches.append # faster lookup. don't do this at home |
- slice=[iNode] |
- while len(slice)>0: |
- c = slice.pop(0) |
- if c.nodeName == name: |
- matches_append(c) |
- slice[:0] = c.childNodes |
- return matches |
- |
-def getElementsByTagNameNoCase(iNode, name): |
- name = name.lower() |
- matches = [] |
- matches_append = matches.append |
- slice=[iNode] |
- while len(slice)>0: |
- c = slice.pop(0) |
- if c.nodeName.lower() == name: |
- matches_append(c) |
- slice[:0] = c.childNodes |
- return matches |
- |
-# order is important |
-HTML_ESCAPE_CHARS = (('&', '&'), # don't add any entities before this one |
- ('<', '<'), |
- ('>', '>'), |
- ('"', '"')) |
-REV_HTML_ESCAPE_CHARS = list(HTML_ESCAPE_CHARS) |
-REV_HTML_ESCAPE_CHARS.reverse() |
- |
-XML_ESCAPE_CHARS = HTML_ESCAPE_CHARS + (("'", '''),) |
-REV_XML_ESCAPE_CHARS = list(XML_ESCAPE_CHARS) |
-REV_XML_ESCAPE_CHARS.reverse() |
- |
-def unescape(text, chars=REV_HTML_ESCAPE_CHARS): |
- "Perform the exact opposite of 'escape'." |
- for s, h in chars: |
- text = text.replace(h, s) |
- return text |
- |
-def escape(text, chars=HTML_ESCAPE_CHARS): |
- "Escape a few XML special chars with XML entities." |
- for s, h in chars: |
- text = text.replace(s, h) |
- return text |
- |
- |
-class MismatchedTags(Exception): |
- |
- def __init__(self, filename, expect, got, endLine, endCol, begLine, begCol): |
- (self.filename, self.expect, self.got, self.begLine, self.begCol, self.endLine, |
- self.endCol) = filename, expect, got, begLine, begCol, endLine, endCol |
- |
- def __str__(self): |
- return ("expected </%s>, got </%s> line: %s col: %s, began line: %s col: %s" |
- % (self.expect, self.got, self.endLine, self.endCol, self.begLine, |
- self.begCol)) |
- |
- |
-class Node(object): |
- nodeName = "Node" |
- |
- def __init__(self, parentNode=None): |
- self.parentNode = parentNode |
- self.childNodes = [] |
- |
- def isEqualToNode(self, n): |
- for a, b in zip(self.childNodes, n.childNodes): |
- if not a.isEqualToNode(b): |
- return 0 |
- return 1 |
- |
- def writexml(self, stream, indent='', addindent='', newl='', strip=0, |
- nsprefixes={}, namespace=''): |
- raise NotImplementedError() |
- |
- def toxml(self, indent='', addindent='', newl='', strip=0, nsprefixes={}, |
- namespace=''): |
- s = StringIO() |
- self.writexml(s, indent, addindent, newl, strip, nsprefixes, namespace) |
- rv = s.getvalue() |
- return rv |
- |
- def writeprettyxml(self, stream, indent='', addindent=' ', newl='\n', strip=0): |
- return self.writexml(stream, indent, addindent, newl, strip) |
- |
- def toprettyxml(self, indent='', addindent=' ', newl='\n', strip=0): |
- return self.toxml(indent, addindent, newl, strip) |
- |
- def cloneNode(self, deep=0, parent=None): |
- raise NotImplementedError() |
- |
- def hasChildNodes(self): |
- if self.childNodes: |
- return 1 |
- else: |
- return 0 |
- |
- def appendChild(self, child): |
- assert isinstance(child, Node) |
- self.childNodes.append(child) |
- child.parentNode = self |
- |
- def insertBefore(self, new, ref): |
- i = self.childNodes.index(ref) |
- new.parentNode = self |
- self.childNodes.insert(i, new) |
- return new |
- |
- def removeChild(self, child): |
- if child in self.childNodes: |
- self.childNodes.remove(child) |
- child.parentNode = None |
- return child |
- |
- def replaceChild(self, newChild, oldChild): |
- assert isinstance(newChild, Node) |
- #if newChild.parentNode: |
- # newChild.parentNode.removeChild(newChild) |
- assert (oldChild.parentNode is self, |
- ('oldChild (%s): oldChild.parentNode (%s) != self (%s)' |
- % (oldChild, oldChild.parentNode, self))) |
- self.childNodes[self.childNodes.index(oldChild)] = newChild |
- oldChild.parentNode = None |
- newChild.parentNode = self |
- |
- def lastChild(self): |
- return self.childNodes[-1] |
- |
- def firstChild(self): |
- if len(self.childNodes): |
- return self.childNodes[0] |
- return None |
- |
- #def get_ownerDocument(self): |
- # """This doesn't really get the owner document; microdom nodes |
- # don't even have one necessarily. This gets the root node, |
- # which is usually what you really meant. |
- # *NOT DOM COMPLIANT.* |
- # """ |
- # node=self |
- # while (node.parentNode): node=node.parentNode |
- # return node |
- #ownerDocument=node.get_ownerDocument() |
- # leaving commented for discussion; see also domhelpers.getParents(node) |
- |
-class Document(Node): |
- |
- def __init__(self, documentElement=None): |
- Node.__init__(self) |
- if documentElement: |
- self.appendChild(documentElement) |
- |
- def cloneNode(self, deep=0, parent=None): |
- d = Document() |
- d.doctype = self.doctype |
- if deep: |
- newEl = self.documentElement.cloneNode(1, self) |
- else: |
- newEl = self.documentElement |
- d.appendChild(newEl) |
- return d |
- |
- doctype = None |
- |
- def isEqualToDocument(self, n): |
- return (self.doctype == n.doctype) and self.isEqualToNode(n) |
- |
- def get_documentElement(self): |
- return self.childNodes[0] |
- documentElement=property(get_documentElement) |
- |
- def appendChild(self, c): |
- assert not self.childNodes, "Only one element per document." |
- Node.appendChild(self, c) |
- |
- def writexml(self, stream, indent='', addindent='', newl='', strip=0, |
- nsprefixes={}, namespace=''): |
- stream.write('<?xml version="1.0"?>' + newl) |
- if self.doctype: |
- stream.write("<!DOCTYPE "+self.doctype+">" + newl) |
- self.documentElement.writexml(stream, indent, addindent, newl, strip, |
- nsprefixes, namespace) |
- |
- # of dubious utility (?) |
- def createElement(self, name, **kw): |
- return Element(name, **kw) |
- |
- def createTextNode(self, text): |
- return Text(text) |
- |
- def createComment(self, text): |
- return Comment(text) |
- |
- def getElementsByTagName(self, name): |
- if self.documentElement.caseInsensitive: |
- return getElementsByTagNameNoCase(self, name) |
- return getElementsByTagName(self, name) |
- |
- def getElementById(self, id): |
- childNodes = self.childNodes[:] |
- while childNodes: |
- node = childNodes.pop(0) |
- if node.childNodes: |
- childNodes.extend(node.childNodes) |
- if hasattr(node, 'getAttribute') and node.getAttribute("id") == id: |
- return node |
- |
- |
-class EntityReference(Node): |
- |
- def __init__(self, eref, parentNode=None): |
- Node.__init__(self, parentNode) |
- self.eref = eref |
- self.nodeValue = self.data = "&" + eref + ";" |
- |
- def isEqualToEntityReference(self, n): |
- if not isinstance(n, EntityReference): |
- return 0 |
- return (self.eref == n.eref) and (self.nodeValue == n.nodeValue) |
- |
- def writexml(self, stream, indent='', addindent='', newl='', strip=0, |
- nsprefixes={}, namespace=''): |
- stream.write(self.nodeValue) |
- |
- def cloneNode(self, deep=0, parent=None): |
- return EntityReference(self.eref, parent) |
- |
- |
-class CharacterData(Node): |
- |
- def __init__(self, data, parentNode=None): |
- Node.__init__(self, parentNode) |
- self.value = self.data = self.nodeValue = data |
- |
- def isEqualToCharacterData(self, n): |
- return self.value == n.value |
- |
- |
-class Comment(CharacterData): |
- """A comment node.""" |
- |
- def writexml(self, stream, indent='', addindent='', newl='', strip=0, |
- nsprefixes={}, namespace=''): |
- val=self.data |
- if isinstance(val, UnicodeType): |
- val=val.encode('utf8') |
- stream.write("<!--%s-->" % val) |
- |
- def cloneNode(self, deep=0, parent=None): |
- return Comment(self.nodeValue, parent) |
- |
- |
-class Text(CharacterData): |
- |
- def __init__(self, data, parentNode=None, raw=0): |
- CharacterData.__init__(self, data, parentNode) |
- self.raw = raw |
- |
- def cloneNode(self, deep=0, parent=None): |
- return Text(self.nodeValue, parent, self.raw) |
- |
- def writexml(self, stream, indent='', addindent='', newl='', strip=0, |
- nsprefixes={}, namespace=''): |
- if self.raw: |
- val = self.nodeValue |
- if not isinstance(val, StringTypes): |
- val = str(self.nodeValue) |
- else: |
- v = self.nodeValue |
- if not isinstance(v, StringTypes): |
- v = str(v) |
- if strip: |
- v = ' '.join(v.split()) |
- val = escape(v) |
- if isinstance(val, UnicodeType): |
- val = val.encode('utf8') |
- stream.write(val) |
- |
- def __repr__(self): |
- return "Text(%s" % repr(self.nodeValue) + ')' |
- |
- |
-class CDATASection(CharacterData): |
- def cloneNode(self, deep=0, parent=None): |
- return CDATASection(self.nodeValue, parent) |
- |
- def writexml(self, stream, indent='', addindent='', newl='', strip=0, |
- nsprefixes={}, namespace=''): |
- stream.write("<![CDATA[") |
- stream.write(self.nodeValue) |
- stream.write("]]>") |
- |
-def _genprefix(): |
- i = 0 |
- while True: |
- yield 'p' + str(i) |
- i = i + 1 |
-genprefix = _genprefix().next |
- |
-class _Attr(CharacterData): |
- "Support class for getAttributeNode." |
- |
-class Element(Node): |
- |
- preserveCase = 0 |
- caseInsensitive = 1 |
- nsprefixes = None |
- |
- def __init__(self, tagName, attributes=None, parentNode=None, |
- filename=None, markpos=None, |
- caseInsensitive=1, preserveCase=0, |
- namespace=None): |
- Node.__init__(self, parentNode) |
- self.preserveCase = preserveCase or not caseInsensitive |
- self.caseInsensitive = caseInsensitive |
- if not preserveCase: |
- tagName = tagName.lower() |
- if attributes is None: |
- self.attributes = {} |
- else: |
- self.attributes = attributes |
- for k, v in self.attributes.items(): |
- self.attributes[k] = unescape(v) |
- |
- if caseInsensitive: |
- self.attributes = InsensitiveDict(self.attributes, |
- preserve=preserveCase) |
- |
- self.endTagName = self.nodeName = self.tagName = tagName |
- self._filename = filename |
- self._markpos = markpos |
- self.namespace = namespace |
- |
- def addPrefixes(self, pfxs): |
- if self.nsprefixes is None: |
- self.nsprefixes = pfxs |
- else: |
- self.nsprefixes.update(pfxs) |
- |
- def endTag(self, endTagName): |
- if not self.preserveCase: |
- endTagName = endTagName.lower() |
- self.endTagName = endTagName |
- |
- def isEqualToElement(self, n): |
- if self.caseInsensitive: |
- return ((self.attributes == n.attributes) |
- and (self.nodeName.lower() == n.nodeName.lower())) |
- return (self.attributes == n.attributes) and (self.nodeName == n.nodeName) |
- |
- def cloneNode(self, deep=0, parent=None): |
- clone = Element( |
- self.tagName, parentNode=parent, namespace=self.namespace, |
- preserveCase=self.preserveCase, caseInsensitive=self.caseInsensitive) |
- clone.attributes.update(self.attributes) |
- if deep: |
- clone.childNodes = [child.cloneNode(1, clone) for child in self.childNodes] |
- else: |
- clone.childNodes = [] |
- return clone |
- |
- def getElementsByTagName(self, name): |
- if self.caseInsensitive: |
- return getElementsByTagNameNoCase(self, name) |
- return getElementsByTagName(self, name) |
- |
- def hasAttributes(self): |
- return 1 |
- |
- def getAttribute(self, name, default=None): |
- return self.attributes.get(name, default) |
- |
- def getAttributeNS(self, ns, name, default=None): |
- nsk = (ns, name) |
- if self.attributes.has_key(nsk): |
- return self.attributes[nsk] |
- if ns == self.namespace: |
- return self.attributes.get(name, default) |
- return default |
- |
- def getAttributeNode(self, name): |
- return _Attr(self.getAttribute(name), self) |
- |
- def setAttribute(self, name, attr): |
- self.attributes[name] = attr |
- |
- def removeAttribute(self, name): |
- if name in self.attributes: |
- del self.attributes[name] |
- |
- def hasAttribute(self, name): |
- return name in self.attributes |
- |
- def writexml(self, stream, indent='', addindent='', newl='', strip=0, |
- nsprefixes={}, namespace=''): |
- # write beginning |
- ALLOWSINGLETON = ('img', 'br', 'hr', 'base', 'meta', 'link', 'param', |
- 'area', 'input', 'col', 'basefont', 'isindex', |
- 'frame') |
- BLOCKELEMENTS = ('html', 'head', 'body', 'noscript', 'ins', 'del', |
- 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'script', |
- 'ul', 'ol', 'dl', 'pre', 'hr', 'blockquote', |
- 'address', 'p', 'div', 'fieldset', 'table', 'tr', |
- 'form', 'object', 'fieldset', 'applet', 'map') |
- FORMATNICELY = ('tr', 'ul', 'ol', 'head') |
- |
- # this should never be necessary unless people start |
- # changing .tagName on the fly(?) |
- if not self.preserveCase: |
- self.endTagName = self.tagName |
- w = stream.write |
- if self.nsprefixes: |
- newprefixes = self.nsprefixes.copy() |
- for ns in nsprefixes.keys(): |
- if ns in newprefixes: |
- del newprefixes[ns] |
- else: |
- newprefixes = {} |
- |
- begin = ['<'] |
- if self.tagName in BLOCKELEMENTS: |
- begin = [newl, indent] + begin |
- bext = begin.extend |
- writeattr = lambda _atr, _val: bext((' ', _atr, '="', escape(_val), '"')) |
- if namespace != self.namespace and self.namespace is not None: |
- if nsprefixes.has_key(self.namespace): |
- prefix = nsprefixes[self.namespace] |
- bext(prefix+':'+self.tagName) |
- else: |
- bext(self.tagName) |
- writeattr("xmlns", self.namespace) |
- else: |
- bext(self.tagName) |
- j = ''.join |
- for attr, val in self.attributes.iteritems(): |
- if isinstance(attr, tuple): |
- ns, key = attr |
- if nsprefixes.has_key(ns): |
- prefix = nsprefixes[ns] |
- else: |
- prefix = genprefix() |
- newprefixes[ns] = prefix |
- assert val is not None |
- writeattr(prefix+':'+key,val) |
- else: |
- assert val is not None |
- writeattr(attr, val) |
- if newprefixes: |
- for ns, prefix in newprefixes.iteritems(): |
- if prefix: |
- writeattr('xmlns:'+prefix, ns) |
- newprefixes.update(nsprefixes) |
- downprefixes = newprefixes |
- else: |
- downprefixes = nsprefixes |
- w(j(begin)) |
- if self.childNodes: |
- w(">") |
- newindent = indent + addindent |
- for child in self.childNodes: |
- if self.tagName in BLOCKELEMENTS and \ |
- self.tagName in FORMATNICELY: |
- w(j((newl, newindent))) |
- child.writexml(stream, newindent, addindent, newl, strip, |
- downprefixes, self.namespace) |
- if self.tagName in BLOCKELEMENTS: |
- w(j((newl, indent))) |
- w(j(("</", self.endTagName, '>'))) |
- |
- elif self.tagName.lower() not in ALLOWSINGLETON: |
- w(j(('></', self.endTagName, '>'))) |
- else: |
- w(" />") |
- |
- def __repr__(self): |
- rep = "Element(%s" % repr(self.nodeName) |
- if self.attributes: |
- rep += ", attributes=%r" % (self.attributes,) |
- if self._filename: |
- rep += ", filename=%r" % (self._filename,) |
- if self._markpos: |
- rep += ", markpos=%r" % (self._markpos,) |
- return rep + ')' |
- |
- def __str__(self): |
- rep = "<" + self.nodeName |
- if self._filename or self._markpos: |
- rep += " (" |
- if self._filename: |
- rep += repr(self._filename) |
- if self._markpos: |
- rep += " line %s column %s" % self._markpos |
- if self._filename or self._markpos: |
- rep += ")" |
- for item in self.attributes.items(): |
- rep += " %s=%r" % item |
- if self.hasChildNodes(): |
- rep += " >...</%s>" % self.nodeName |
- else: |
- rep += " />" |
- return rep |
- |
-def _unescapeDict(d): |
- dd = {} |
- for k, v in d.items(): |
- dd[k] = unescape(v) |
- return dd |
- |
-def _reverseDict(d): |
- dd = {} |
- for k, v in d.items(): |
- dd[v]=k |
- return dd |
- |
-class MicroDOMParser(XMLParser): |
- |
- # <dash> glyph: a quick scan thru the DTD says BODY, AREA, LINK, IMG, HR, |
- # P, DT, DD, LI, INPUT, OPTION, THEAD, TFOOT, TBODY, COLGROUP, COL, TR, TH, |
- # TD, HEAD, BASE, META, HTML all have optional closing tags |
- |
- soonClosers = 'area link br img hr input base meta'.split() |
- laterClosers = {'p': ['p', 'dt'], |
- 'dt': ['dt','dd'], |
- 'dd': ['dt', 'dd'], |
- 'li': ['li'], |
- 'tbody': ['thead', 'tfoot', 'tbody'], |
- 'thead': ['thead', 'tfoot', 'tbody'], |
- 'tfoot': ['thead', 'tfoot', 'tbody'], |
- 'colgroup': ['colgroup'], |
- 'col': ['col'], |
- 'tr': ['tr'], |
- 'td': ['td'], |
- 'th': ['th'], |
- 'head': ['body'], |
- 'title': ['head', 'body'], # this looks wrong... |
- 'option': ['option'], |
- } |
- |
- |
- def __init__(self, beExtremelyLenient=0, caseInsensitive=1, preserveCase=0, |
- soonClosers=soonClosers, laterClosers=laterClosers): |
- self.elementstack = [] |
- d = {'xmlns': 'xmlns', '': None} |
- dr = _reverseDict(d) |
- self.nsstack = [(d,None,dr)] |
- self.documents = [] |
- self._mddoctype = None |
- self.beExtremelyLenient = beExtremelyLenient |
- self.caseInsensitive = caseInsensitive |
- self.preserveCase = preserveCase or not caseInsensitive |
- self.soonClosers = soonClosers |
- self.laterClosers = laterClosers |
- # self.indentlevel = 0 |
- |
- def shouldPreserveSpace(self): |
- for edx in xrange(len(self.elementstack)): |
- el = self.elementstack[-edx] |
- if el.tagName == 'pre' or el.getAttribute("xml:space", '') == 'preserve': |
- return 1 |
- return 0 |
- |
- def _getparent(self): |
- if self.elementstack: |
- return self.elementstack[-1] |
- else: |
- return None |
- |
- COMMENT = re.compile(r"\s*/[/*]\s*") |
- |
- def _fixScriptElement(self, el): |
- # this deals with case where there is comment or CDATA inside |
- # <script> tag and we want to do the right thing with it |
- if not self.beExtremelyLenient or not len(el.childNodes) == 1: |
- return |
- c = el.firstChild() |
- if isinstance(c, Text): |
- # deal with nasty people who do stuff like: |
- # <script> // <!-- |
- # x = 1; |
- # // --></script> |
- # tidy does this, for example. |
- prefix = "" |
- oldvalue = c.value |
- match = self.COMMENT.match(oldvalue) |
- if match: |
- prefix = match.group() |
- oldvalue = oldvalue[len(prefix):] |
- |
- # now see if contents are actual node and comment or CDATA |
- try: |
- e = parseString("<a>%s</a>" % oldvalue).childNodes[0] |
- except (ParseError, MismatchedTags): |
- return |
- if len(e.childNodes) != 1: |
- return |
- e = e.firstChild() |
- if isinstance(e, (CDATASection, Comment)): |
- el.childNodes = [] |
- if prefix: |
- el.childNodes.append(Text(prefix)) |
- el.childNodes.append(e) |
- |
- def gotDoctype(self, doctype): |
- self._mddoctype = doctype |
- |
- def gotTagStart(self, name, attributes): |
- # print ' '*self.indentlevel, 'start tag',name |
- # self.indentlevel += 1 |
- parent = self._getparent() |
- if (self.beExtremelyLenient and isinstance(parent, Element)): |
- parentName = parent.tagName |
- myName = name |
- if self.caseInsensitive: |
- parentName = parentName.lower() |
- myName = myName.lower() |
- if myName in self.laterClosers.get(parentName, []): |
- self.gotTagEnd(parent.tagName) |
- parent = self._getparent() |
- attributes = _unescapeDict(attributes) |
- namespaces = self.nsstack[-1][0] |
- newspaces = {} |
- for k, v in attributes.items(): |
- if k.startswith('xmlns'): |
- spacenames = k.split(':',1) |
- if len(spacenames) == 2: |
- newspaces[spacenames[1]] = v |
- else: |
- newspaces[''] = v |
- del attributes[k] |
- if newspaces: |
- namespaces = namespaces.copy() |
- namespaces.update(newspaces) |
- for k, v in attributes.items(): |
- ksplit = k.split(':', 1) |
- if len(ksplit) == 2: |
- pfx, tv = ksplit |
- if pfx != 'xml' and namespaces.has_key(pfx): |
- attributes[namespaces[pfx], tv] = v |
- del attributes[k] |
- el = Element(name, attributes, parent, |
- self.filename, self.saveMark(), |
- caseInsensitive=self.caseInsensitive, |
- preserveCase=self.preserveCase, |
- namespace=namespaces.get('')) |
- revspaces = _reverseDict(newspaces) |
- el.addPrefixes(revspaces) |
- |
- if newspaces: |
- rscopy = self.nsstack[-1][2].copy() |
- rscopy.update(revspaces) |
- self.nsstack.append((namespaces, el, rscopy)) |
- self.elementstack.append(el) |
- if parent: |
- parent.appendChild(el) |
- if (self.beExtremelyLenient and el.tagName in self.soonClosers): |
- self.gotTagEnd(name) |
- |
- def _gotStandalone(self, factory, data): |
- parent = self._getparent() |
- te = factory(data, parent) |
- if parent: |
- parent.appendChild(te) |
- elif self.beExtremelyLenient: |
- self.documents.append(te) |
- |
- def gotText(self, data): |
- if data.strip() or self.shouldPreserveSpace(): |
- self._gotStandalone(Text, data) |
- |
- def gotComment(self, data): |
- self._gotStandalone(Comment, data) |
- |
- def gotEntityReference(self, entityRef): |
- self._gotStandalone(EntityReference, entityRef) |
- |
- def gotCData(self, cdata): |
- self._gotStandalone(CDATASection, cdata) |
- |
- def gotTagEnd(self, name): |
- # print ' '*self.indentlevel, 'end tag',name |
- # self.indentlevel -= 1 |
- if not self.elementstack: |
- if self.beExtremelyLenient: |
- return |
- raise MismatchedTags(*((self.filename, "NOTHING", name) |
- +self.saveMark()+(0,0))) |
- el = self.elementstack.pop() |
- pfxdix = self.nsstack[-1][2] |
- if self.nsstack[-1][1] is el: |
- nstuple = self.nsstack.pop() |
- else: |
- nstuple = None |
- if self.caseInsensitive: |
- tn = el.tagName.lower() |
- cname = name.lower() |
- else: |
- tn = el.tagName |
- cname = name |
- |
- nsplit = name.split(':',1) |
- if len(nsplit) == 2: |
- pfx, newname = nsplit |
- ns = pfxdix.get(pfx,None) |
- if ns is not None: |
- if el.namespace != ns: |
- if not self.beExtremelyLenient: |
- raise MismatchedTags(*((self.filename, el.tagName, name) |
- +self.saveMark()+el._markpos)) |
- if not (tn == cname): |
- if self.beExtremelyLenient: |
- if self.elementstack: |
- lastEl = self.elementstack[0] |
- for idx in xrange(len(self.elementstack)): |
- if self.elementstack[-(idx+1)].tagName == cname: |
- self.elementstack[-(idx+1)].endTag(name) |
- break |
- else: |
- # this was a garbage close tag; wait for a real one |
- self.elementstack.append(el) |
- if nstuple is not None: |
- self.nsstack.append(nstuple) |
- return |
- del self.elementstack[-(idx+1):] |
- if not self.elementstack: |
- self.documents.append(lastEl) |
- return |
- else: |
- raise MismatchedTags(*((self.filename, el.tagName, name) |
- +self.saveMark()+el._markpos)) |
- el.endTag(name) |
- if not self.elementstack: |
- self.documents.append(el) |
- if self.beExtremelyLenient and el.tagName == "script": |
- self._fixScriptElement(el) |
- |
- def connectionLost(self, reason): |
- XMLParser.connectionLost(self, reason) # This can cause more events! |
- if self.elementstack: |
- if self.beExtremelyLenient: |
- self.documents.append(self.elementstack[0]) |
- else: |
- raise MismatchedTags(*((self.filename, self.elementstack[-1], |
- "END_OF_FILE") |
- +self.saveMark() |
- +self.elementstack[-1]._markpos)) |
- |
- |
-def parse(readable, *args, **kwargs): |
- """Parse HTML or XML readable.""" |
- if not hasattr(readable, "read"): |
- readable = open(readable, "rb") |
- mdp = MicroDOMParser(*args, **kwargs) |
- mdp.filename = getattr(readable, "name", "<xmlfile />") |
- mdp.makeConnection(None) |
- if hasattr(readable,"getvalue"): |
- mdp.dataReceived(readable.getvalue()) |
- else: |
- r = readable.read(1024) |
- while r: |
- mdp.dataReceived(r) |
- r = readable.read(1024) |
- mdp.connectionLost(None) |
- |
- if not mdp.documents: |
- raise ParseError(mdp.filename, 0, 0, "No top-level Nodes in document") |
- |
- if mdp.beExtremelyLenient: |
- if len(mdp.documents) == 1: |
- d = mdp.documents[0] |
- if not isinstance(d, Element): |
- el = Element("html") |
- el.appendChild(d) |
- d = el |
- else: |
- d = Element("html") |
- for child in mdp.documents: |
- d.appendChild(child) |
- else: |
- d = mdp.documents[0] |
- doc = Document(d) |
- doc.doctype = mdp._mddoctype |
- return doc |
- |
-def parseString(st, *args, **kw): |
- if isinstance(st, UnicodeType): |
- # this isn't particularly ideal, but it does work. |
- return parse(StringIO(st.encode('UTF-16')), *args, **kw) |
- return parse(StringIO(st), *args, **kw) |
- |
- |
-def parseXML(readable): |
- """Parse an XML readable object.""" |
- return parse(readable, caseInsensitive=0, preserveCase=1) |
- |
- |
-def parseXMLString(st): |
- """Parse an XML readable object.""" |
- return parseString(st, caseInsensitive=0, preserveCase=1) |
- |
- |
-# Utility |
- |
-class lmx: |
- """Easy creation of XML.""" |
- |
- def __init__(self, node='div'): |
- if isinstance(node, StringTypes): |
- node = Element(node) |
- self.node = node |
- |
- def __getattr__(self, name): |
- if name[0] == '_': |
- raise AttributeError("no private attrs") |
- return lambda **kw: self.add(name,**kw) |
- |
- def __setitem__(self, key, val): |
- self.node.setAttribute(key, val) |
- |
- def __getitem__(self, key): |
- return self.node.getAttribute(key) |
- |
- def text(self, txt, raw=0): |
- nn = Text(txt, raw=raw) |
- self.node.appendChild(nn) |
- return self |
- |
- def add(self, tagName, **kw): |
- newNode = Element(tagName, caseInsensitive=0, preserveCase=0) |
- self.node.appendChild(newNode) |
- xf = lmx(newNode) |
- for k, v in kw.items(): |
- if k[0] == '_': |
- k = k[1:] |
- xf[k]=v |
- return xf |