| Index: third_party/markdown/serializers.py
|
| diff --git a/third_party/markdown/serializers.py b/third_party/markdown/serializers.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..b19d61c93d05d7284a5da42434e8ea2266c5fce5
|
| --- /dev/null
|
| +++ b/third_party/markdown/serializers.py
|
| @@ -0,0 +1,277 @@
|
| +# markdown/searializers.py
|
| +#
|
| +# Add x/html serialization to Elementree
|
| +# Taken from ElementTree 1.3 preview with slight modifications
|
| +#
|
| +# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved.
|
| +#
|
| +# fredrik@pythonware.com
|
| +# http://www.pythonware.com
|
| +#
|
| +# --------------------------------------------------------------------
|
| +# The ElementTree toolkit is
|
| +#
|
| +# Copyright (c) 1999-2007 by Fredrik Lundh
|
| +#
|
| +# By obtaining, using, and/or copying this software and/or its
|
| +# associated documentation, you agree that you have read, understood,
|
| +# and will comply with the following terms and conditions:
|
| +#
|
| +# Permission to use, copy, modify, and distribute this software and
|
| +# its associated documentation for any purpose and without fee is
|
| +# hereby granted, provided that the above copyright notice appears in
|
| +# all copies, and that both that copyright notice and this permission
|
| +# notice appear in supporting documentation, and that the name of
|
| +# Secret Labs AB or the author not be used in advertising or publicity
|
| +# pertaining to distribution of the software without specific, written
|
| +# prior permission.
|
| +#
|
| +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
| +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
| +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
| +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
| +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| +# OF THIS SOFTWARE.
|
| +# --------------------------------------------------------------------
|
| +
|
| +
|
| +from __future__ import absolute_import
|
| +from __future__ import unicode_literals
|
| +from . import util
|
| +ElementTree = util.etree.ElementTree
|
| +QName = util.etree.QName
|
| +if hasattr(util.etree, 'test_comment'):
|
| + Comment = util.etree.test_comment
|
| +else:
|
| + Comment = util.etree.Comment
|
| +PI = util.etree.PI
|
| +ProcessingInstruction = util.etree.ProcessingInstruction
|
| +
|
| +__all__ = ['to_html_string', 'to_xhtml_string']
|
| +
|
| +HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
|
| + "img", "input", "isindex", "link", "meta" "param")
|
| +
|
| +try:
|
| + HTML_EMPTY = set(HTML_EMPTY)
|
| +except NameError:
|
| + pass
|
| +
|
| +_namespace_map = {
|
| + # "well-known" namespace prefixes
|
| + "http://www.w3.org/XML/1998/namespace": "xml",
|
| + "http://www.w3.org/1999/xhtml": "html",
|
| + "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
|
| + "http://schemas.xmlsoap.org/wsdl/": "wsdl",
|
| + # xml schema
|
| + "http://www.w3.org/2001/XMLSchema": "xs",
|
| + "http://www.w3.org/2001/XMLSchema-instance": "xsi",
|
| + # dublic core
|
| + "http://purl.org/dc/elements/1.1/": "dc",
|
| +}
|
| +
|
| +
|
| +def _raise_serialization_error(text):
|
| + raise TypeError(
|
| + "cannot serialize %r (type %s)" % (text, type(text).__name__)
|
| + )
|
| +
|
| +def _encode(text, encoding):
|
| + try:
|
| + return text.encode(encoding, "xmlcharrefreplace")
|
| + except (TypeError, AttributeError):
|
| + _raise_serialization_error(text)
|
| +
|
| +def _escape_cdata(text):
|
| + # escape character data
|
| + try:
|
| + # it's worth avoiding do-nothing calls for strings that are
|
| + # shorter than 500 character, or so. assume that's, by far,
|
| + # the most common case in most applications.
|
| + if "&" in text:
|
| + text = text.replace("&", "&")
|
| + if "<" in text:
|
| + text = text.replace("<", "<")
|
| + if ">" in text:
|
| + text = text.replace(">", ">")
|
| + return text
|
| + except (TypeError, AttributeError):
|
| + _raise_serialization_error(text)
|
| +
|
| +
|
| +def _escape_attrib(text):
|
| + # escape attribute value
|
| + try:
|
| + if "&" in text:
|
| + text = text.replace("&", "&")
|
| + if "<" in text:
|
| + text = text.replace("<", "<")
|
| + if ">" in text:
|
| + text = text.replace(">", ">")
|
| + if "\"" in text:
|
| + text = text.replace("\"", """)
|
| + if "\n" in text:
|
| + text = text.replace("\n", " ")
|
| + return text
|
| + except (TypeError, AttributeError):
|
| + _raise_serialization_error(text)
|
| +
|
| +def _escape_attrib_html(text):
|
| + # escape attribute value
|
| + try:
|
| + if "&" in text:
|
| + text = text.replace("&", "&")
|
| + if "<" in text:
|
| + text = text.replace("<", "<")
|
| + if ">" in text:
|
| + text = text.replace(">", ">")
|
| + if "\"" in text:
|
| + text = text.replace("\"", """)
|
| + return text
|
| + except (TypeError, AttributeError):
|
| + _raise_serialization_error(text)
|
| +
|
| +
|
| +def _serialize_html(write, elem, qnames, namespaces, format):
|
| + tag = elem.tag
|
| + text = elem.text
|
| + if tag is Comment:
|
| + write("<!--%s-->" % _escape_cdata(text))
|
| + elif tag is ProcessingInstruction:
|
| + write("<?%s?>" % _escape_cdata(text))
|
| + else:
|
| + tag = qnames[tag]
|
| + if tag is None:
|
| + if text:
|
| + write(_escape_cdata(text))
|
| + for e in elem:
|
| + _serialize_html(write, e, qnames, None, format)
|
| + else:
|
| + write("<" + tag)
|
| + items = elem.items()
|
| + if items or namespaces:
|
| + items.sort() # lexical order
|
| + for k, v in items:
|
| + if isinstance(k, QName):
|
| + k = k.text
|
| + if isinstance(v, QName):
|
| + v = qnames[v.text]
|
| + else:
|
| + v = _escape_attrib_html(v)
|
| + if qnames[k] == v and format == 'html':
|
| + # handle boolean attributes
|
| + write(" %s" % v)
|
| + else:
|
| + write(" %s=\"%s\"" % (qnames[k], v))
|
| + if namespaces:
|
| + items = namespaces.items()
|
| + items.sort(key=lambda x: x[1]) # sort on prefix
|
| + for v, k in items:
|
| + if k:
|
| + k = ":" + k
|
| + write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v)))
|
| + if format == "xhtml" and tag in HTML_EMPTY:
|
| + write(" />")
|
| + else:
|
| + write(">")
|
| + tag = tag.lower()
|
| + if text:
|
| + if tag == "script" or tag == "style":
|
| + write(text)
|
| + else:
|
| + write(_escape_cdata(text))
|
| + for e in elem:
|
| + _serialize_html(write, e, qnames, None, format)
|
| + if tag not in HTML_EMPTY:
|
| + write("</" + tag + ">")
|
| + if elem.tail:
|
| + write(_escape_cdata(elem.tail))
|
| +
|
| +def _write_html(root,
|
| + encoding=None,
|
| + default_namespace=None,
|
| + format="html"):
|
| + assert root is not None
|
| + data = []
|
| + write = data.append
|
| + qnames, namespaces = _namespaces(root, default_namespace)
|
| + _serialize_html(write, root, qnames, namespaces, format)
|
| + if encoding is None:
|
| + return "".join(data)
|
| + else:
|
| + return _encode("".join(data))
|
| +
|
| +
|
| +# --------------------------------------------------------------------
|
| +# serialization support
|
| +
|
| +def _namespaces(elem, default_namespace=None):
|
| + # identify namespaces used in this tree
|
| +
|
| + # maps qnames to *encoded* prefix:local names
|
| + qnames = {None: None}
|
| +
|
| + # maps uri:s to prefixes
|
| + namespaces = {}
|
| + if default_namespace:
|
| + namespaces[default_namespace] = ""
|
| +
|
| + def add_qname(qname):
|
| + # calculate serialized qname representation
|
| + try:
|
| + if qname[:1] == "{":
|
| + uri, tag = qname[1:].split("}", 1)
|
| + prefix = namespaces.get(uri)
|
| + if prefix is None:
|
| + prefix = _namespace_map.get(uri)
|
| + if prefix is None:
|
| + prefix = "ns%d" % len(namespaces)
|
| + if prefix != "xml":
|
| + namespaces[uri] = prefix
|
| + if prefix:
|
| + qnames[qname] = "%s:%s" % (prefix, tag)
|
| + else:
|
| + qnames[qname] = tag # default element
|
| + else:
|
| + if default_namespace:
|
| + raise ValueError(
|
| + "cannot use non-qualified names with "
|
| + "default_namespace option"
|
| + )
|
| + qnames[qname] = qname
|
| + except TypeError:
|
| + _raise_serialization_error(qname)
|
| +
|
| + # populate qname and namespaces table
|
| + try:
|
| + iterate = elem.iter
|
| + except AttributeError:
|
| + iterate = elem.getiterator # cET compatibility
|
| + for elem in iterate():
|
| + tag = elem.tag
|
| + if isinstance(tag, QName) and tag.text not in qnames:
|
| + add_qname(tag.text)
|
| + elif isinstance(tag, util.string_type):
|
| + if tag not in qnames:
|
| + add_qname(tag)
|
| + elif tag is not None and tag is not Comment and tag is not PI:
|
| + _raise_serialization_error(tag)
|
| + for key, value in elem.items():
|
| + if isinstance(key, QName):
|
| + key = key.text
|
| + if key not in qnames:
|
| + add_qname(key)
|
| + if isinstance(value, QName) and value.text not in qnames:
|
| + add_qname(value.text)
|
| + text = elem.text
|
| + if isinstance(text, QName) and text.text not in qnames:
|
| + add_qname(text.text)
|
| + return qnames, namespaces
|
| +
|
| +def to_html_string(element):
|
| + return _write_html(ElementTree(element).getroot(), format="html")
|
| +
|
| +def to_xhtml_string(element):
|
| + return _write_html(ElementTree(element).getroot(), format="xhtml")
|
|
|