| OLD | NEW |
| (Empty) |
| 1 # markdown/searializers.py | |
| 2 # | |
| 3 # Add x/html serialization to Elementree | |
| 4 # Taken from ElementTree 1.3 preview with slight modifications | |
| 5 # | |
| 6 # Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved. | |
| 7 # | |
| 8 # fredrik@pythonware.com | |
| 9 # http://www.pythonware.com | |
| 10 # | |
| 11 # -------------------------------------------------------------------- | |
| 12 # The ElementTree toolkit is | |
| 13 # | |
| 14 # Copyright (c) 1999-2007 by Fredrik Lundh | |
| 15 # | |
| 16 # By obtaining, using, and/or copying this software and/or its | |
| 17 # associated documentation, you agree that you have read, understood, | |
| 18 # and will comply with the following terms and conditions: | |
| 19 # | |
| 20 # Permission to use, copy, modify, and distribute this software and | |
| 21 # its associated documentation for any purpose and without fee is | |
| 22 # hereby granted, provided that the above copyright notice appears in | |
| 23 # all copies, and that both that copyright notice and this permission | |
| 24 # notice appear in supporting documentation, and that the name of | |
| 25 # Secret Labs AB or the author not be used in advertising or publicity | |
| 26 # pertaining to distribution of the software without specific, written | |
| 27 # prior permission. | |
| 28 # | |
| 29 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD | |
| 30 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- | |
| 31 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR | |
| 32 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY | |
| 33 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, | |
| 34 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS | |
| 35 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE | |
| 36 # OF THIS SOFTWARE. | |
| 37 # -------------------------------------------------------------------- | |
| 38 | |
| 39 | |
| 40 from __future__ import absolute_import | |
| 41 from __future__ import unicode_literals | |
| 42 from . import util | |
| 43 ElementTree = util.etree.ElementTree | |
| 44 QName = util.etree.QName | |
| 45 if hasattr(util.etree, 'test_comment'): # pragma: no cover | |
| 46 Comment = util.etree.test_comment | |
| 47 else: # pragma: no cover | |
| 48 Comment = util.etree.Comment | |
| 49 PI = util.etree.PI | |
| 50 ProcessingInstruction = util.etree.ProcessingInstruction | |
| 51 | |
| 52 __all__ = ['to_html_string', 'to_xhtml_string'] | |
| 53 | |
| 54 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", | |
| 55 "img", "input", "isindex", "link", "meta" "param") | |
| 56 | |
| 57 try: | |
| 58 HTML_EMPTY = set(HTML_EMPTY) | |
| 59 except NameError: # pragma: no cover | |
| 60 pass | |
| 61 | |
| 62 _namespace_map = { | |
| 63 # "well-known" namespace prefixes | |
| 64 "http://www.w3.org/XML/1998/namespace": "xml", | |
| 65 "http://www.w3.org/1999/xhtml": "html", | |
| 66 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", | |
| 67 "http://schemas.xmlsoap.org/wsdl/": "wsdl", | |
| 68 # xml schema | |
| 69 "http://www.w3.org/2001/XMLSchema": "xs", | |
| 70 "http://www.w3.org/2001/XMLSchema-instance": "xsi", | |
| 71 # dublic core | |
| 72 "http://purl.org/dc/elements/1.1/": "dc", | |
| 73 } | |
| 74 | |
| 75 | |
| 76 def _raise_serialization_error(text): # pragma: no cover | |
| 77 raise TypeError( | |
| 78 "cannot serialize %r (type %s)" % (text, type(text).__name__) | |
| 79 ) | |
| 80 | |
| 81 | |
| 82 def _encode(text, encoding): | |
| 83 try: | |
| 84 return text.encode(encoding, "xmlcharrefreplace") | |
| 85 except (TypeError, AttributeError): # pragma: no cover | |
| 86 _raise_serialization_error(text) | |
| 87 | |
| 88 | |
| 89 def _escape_cdata(text): | |
| 90 # escape character data | |
| 91 try: | |
| 92 # it's worth avoiding do-nothing calls for strings that are | |
| 93 # shorter than 500 character, or so. assume that's, by far, | |
| 94 # the most common case in most applications. | |
| 95 if "&" in text: | |
| 96 text = text.replace("&", "&") | |
| 97 if "<" in text: | |
| 98 text = text.replace("<", "<") | |
| 99 if ">" in text: | |
| 100 text = text.replace(">", ">") | |
| 101 return text | |
| 102 except (TypeError, AttributeError): # pragma: no cover | |
| 103 _raise_serialization_error(text) | |
| 104 | |
| 105 | |
| 106 def _escape_attrib(text): | |
| 107 # escape attribute value | |
| 108 try: | |
| 109 if "&" in text: | |
| 110 text = text.replace("&", "&") | |
| 111 if "<" in text: | |
| 112 text = text.replace("<", "<") | |
| 113 if ">" in text: | |
| 114 text = text.replace(">", ">") | |
| 115 if "\"" in text: | |
| 116 text = text.replace("\"", """) | |
| 117 if "\n" in text: | |
| 118 text = text.replace("\n", " ") | |
| 119 return text | |
| 120 except (TypeError, AttributeError): # pragma: no cover | |
| 121 _raise_serialization_error(text) | |
| 122 | |
| 123 | |
| 124 def _escape_attrib_html(text): | |
| 125 # escape attribute value | |
| 126 try: | |
| 127 if "&" in text: | |
| 128 text = text.replace("&", "&") | |
| 129 if "<" in text: | |
| 130 text = text.replace("<", "<") | |
| 131 if ">" in text: | |
| 132 text = text.replace(">", ">") | |
| 133 if "\"" in text: | |
| 134 text = text.replace("\"", """) | |
| 135 return text | |
| 136 except (TypeError, AttributeError): # pragma: no cover | |
| 137 _raise_serialization_error(text) | |
| 138 | |
| 139 | |
| 140 def _serialize_html(write, elem, qnames, namespaces, format): | |
| 141 tag = elem.tag | |
| 142 text = elem.text | |
| 143 if tag is Comment: | |
| 144 write("<!--%s-->" % _escape_cdata(text)) | |
| 145 elif tag is ProcessingInstruction: | |
| 146 write("<?%s?>" % _escape_cdata(text)) | |
| 147 else: | |
| 148 tag = qnames[tag] | |
| 149 if tag is None: | |
| 150 if text: | |
| 151 write(_escape_cdata(text)) | |
| 152 for e in elem: | |
| 153 _serialize_html(write, e, qnames, None, format) | |
| 154 else: | |
| 155 write("<" + tag) | |
| 156 items = elem.items() | |
| 157 if items or namespaces: | |
| 158 items = sorted(items) # lexical order | |
| 159 for k, v in items: | |
| 160 if isinstance(k, QName): | |
| 161 k = k.text | |
| 162 if isinstance(v, QName): | |
| 163 v = qnames[v.text] | |
| 164 else: | |
| 165 v = _escape_attrib_html(v) | |
| 166 if qnames[k] == v and format == 'html': | |
| 167 # handle boolean attributes | |
| 168 write(" %s" % v) | |
| 169 else: | |
| 170 write(" %s=\"%s\"" % (qnames[k], v)) | |
| 171 if namespaces: | |
| 172 items = namespaces.items() | |
| 173 items.sort(key=lambda x: x[1]) # sort on prefix | |
| 174 for v, k in items: | |
| 175 if k: | |
| 176 k = ":" + k | |
| 177 write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v))) | |
| 178 if format == "xhtml" and tag.lower() in HTML_EMPTY: | |
| 179 write(" />") | |
| 180 else: | |
| 181 write(">") | |
| 182 if text: | |
| 183 if tag.lower() in ["script", "style"]: | |
| 184 write(text) | |
| 185 else: | |
| 186 write(_escape_cdata(text)) | |
| 187 for e in elem: | |
| 188 _serialize_html(write, e, qnames, None, format) | |
| 189 if tag.lower() not in HTML_EMPTY: | |
| 190 write("</" + tag + ">") | |
| 191 if elem.tail: | |
| 192 write(_escape_cdata(elem.tail)) | |
| 193 | |
| 194 | |
| 195 def _write_html(root, | |
| 196 encoding=None, | |
| 197 default_namespace=None, | |
| 198 format="html"): | |
| 199 assert root is not None | |
| 200 data = [] | |
| 201 write = data.append | |
| 202 qnames, namespaces = _namespaces(root, default_namespace) | |
| 203 _serialize_html(write, root, qnames, namespaces, format) | |
| 204 if encoding is None: | |
| 205 return "".join(data) | |
| 206 else: | |
| 207 return _encode("".join(data)) | |
| 208 | |
| 209 | |
| 210 # -------------------------------------------------------------------- | |
| 211 # serialization support | |
| 212 | |
| 213 def _namespaces(elem, default_namespace=None): | |
| 214 # identify namespaces used in this tree | |
| 215 | |
| 216 # maps qnames to *encoded* prefix:local names | |
| 217 qnames = {None: None} | |
| 218 | |
| 219 # maps uri:s to prefixes | |
| 220 namespaces = {} | |
| 221 if default_namespace: | |
| 222 namespaces[default_namespace] = "" | |
| 223 | |
| 224 def add_qname(qname): | |
| 225 # calculate serialized qname representation | |
| 226 try: | |
| 227 if qname[:1] == "{": | |
| 228 uri, tag = qname[1:].split("}", 1) | |
| 229 prefix = namespaces.get(uri) | |
| 230 if prefix is None: | |
| 231 prefix = _namespace_map.get(uri) | |
| 232 if prefix is None: | |
| 233 prefix = "ns%d" % len(namespaces) | |
| 234 if prefix != "xml": | |
| 235 namespaces[uri] = prefix | |
| 236 if prefix: | |
| 237 qnames[qname] = "%s:%s" % (prefix, tag) | |
| 238 else: | |
| 239 qnames[qname] = tag # default element | |
| 240 else: | |
| 241 if default_namespace: | |
| 242 raise ValueError( | |
| 243 "cannot use non-qualified names with " | |
| 244 "default_namespace option" | |
| 245 ) | |
| 246 qnames[qname] = qname | |
| 247 except TypeError: # pragma: no cover | |
| 248 _raise_serialization_error(qname) | |
| 249 | |
| 250 # populate qname and namespaces table | |
| 251 try: | |
| 252 iterate = elem.iter | |
| 253 except AttributeError: | |
| 254 iterate = elem.getiterator # cET compatibility | |
| 255 for elem in iterate(): | |
| 256 tag = elem.tag | |
| 257 if isinstance(tag, QName) and tag.text not in qnames: | |
| 258 add_qname(tag.text) | |
| 259 elif isinstance(tag, util.string_type): | |
| 260 if tag not in qnames: | |
| 261 add_qname(tag) | |
| 262 elif tag is not None and tag is not Comment and tag is not PI: | |
| 263 _raise_serialization_error(tag) | |
| 264 for key, value in elem.items(): | |
| 265 if isinstance(key, QName): | |
| 266 key = key.text | |
| 267 if key not in qnames: | |
| 268 add_qname(key) | |
| 269 if isinstance(value, QName) and value.text not in qnames: | |
| 270 add_qname(value.text) | |
| 271 text = elem.text | |
| 272 if isinstance(text, QName) and text.text not in qnames: | |
| 273 add_qname(text.text) | |
| 274 return qnames, namespaces | |
| 275 | |
| 276 | |
| 277 def to_html_string(element): | |
| 278 return _write_html(ElementTree(element).getroot(), format="html") | |
| 279 | |
| 280 | |
| 281 def to_xhtml_string(element): | |
| 282 return _write_html(ElementTree(element).getroot(), format="xhtml") | |
| OLD | NEW |