OLD | NEW |
| (Empty) |
1 # markdown/searializers.py | |
2 # | |
3 # Add x/html serialization to Elementree | |
4 # Taken from ElementTree 1.3 preview with slight modifications | |
5 # | |
6 # Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved. | |
7 # | |
8 # fredrik@pythonware.com | |
9 # http://www.pythonware.com | |
10 # | |
11 # -------------------------------------------------------------------- | |
12 # The ElementTree toolkit is | |
13 # | |
14 # Copyright (c) 1999-2007 by Fredrik Lundh | |
15 # | |
16 # By obtaining, using, and/or copying this software and/or its | |
17 # associated documentation, you agree that you have read, understood, | |
18 # and will comply with the following terms and conditions: | |
19 # | |
20 # Permission to use, copy, modify, and distribute this software and | |
21 # its associated documentation for any purpose and without fee is | |
22 # hereby granted, provided that the above copyright notice appears in | |
23 # all copies, and that both that copyright notice and this permission | |
24 # notice appear in supporting documentation, and that the name of | |
25 # Secret Labs AB or the author not be used in advertising or publicity | |
26 # pertaining to distribution of the software without specific, written | |
27 # prior permission. | |
28 # | |
29 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD | |
30 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- | |
31 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR | |
32 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY | |
33 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, | |
34 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS | |
35 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE | |
36 # OF THIS SOFTWARE. | |
37 # -------------------------------------------------------------------- | |
38 | |
39 | |
40 from __future__ import absolute_import | |
41 from __future__ import unicode_literals | |
42 from . import util | |
43 ElementTree = util.etree.ElementTree | |
44 QName = util.etree.QName | |
45 if hasattr(util.etree, 'test_comment'): # pragma: no cover | |
46 Comment = util.etree.test_comment | |
47 else: # pragma: no cover | |
48 Comment = util.etree.Comment | |
49 PI = util.etree.PI | |
50 ProcessingInstruction = util.etree.ProcessingInstruction | |
51 | |
52 __all__ = ['to_html_string', 'to_xhtml_string'] | |
53 | |
54 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", | |
55 "img", "input", "isindex", "link", "meta" "param") | |
56 | |
57 try: | |
58 HTML_EMPTY = set(HTML_EMPTY) | |
59 except NameError: # pragma: no cover | |
60 pass | |
61 | |
62 _namespace_map = { | |
63 # "well-known" namespace prefixes | |
64 "http://www.w3.org/XML/1998/namespace": "xml", | |
65 "http://www.w3.org/1999/xhtml": "html", | |
66 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", | |
67 "http://schemas.xmlsoap.org/wsdl/": "wsdl", | |
68 # xml schema | |
69 "http://www.w3.org/2001/XMLSchema": "xs", | |
70 "http://www.w3.org/2001/XMLSchema-instance": "xsi", | |
71 # dublic core | |
72 "http://purl.org/dc/elements/1.1/": "dc", | |
73 } | |
74 | |
75 | |
76 def _raise_serialization_error(text): # pragma: no cover | |
77 raise TypeError( | |
78 "cannot serialize %r (type %s)" % (text, type(text).__name__) | |
79 ) | |
80 | |
81 | |
82 def _encode(text, encoding): | |
83 try: | |
84 return text.encode(encoding, "xmlcharrefreplace") | |
85 except (TypeError, AttributeError): # pragma: no cover | |
86 _raise_serialization_error(text) | |
87 | |
88 | |
89 def _escape_cdata(text): | |
90 # escape character data | |
91 try: | |
92 # it's worth avoiding do-nothing calls for strings that are | |
93 # shorter than 500 character, or so. assume that's, by far, | |
94 # the most common case in most applications. | |
95 if "&" in text: | |
96 text = text.replace("&", "&") | |
97 if "<" in text: | |
98 text = text.replace("<", "<") | |
99 if ">" in text: | |
100 text = text.replace(">", ">") | |
101 return text | |
102 except (TypeError, AttributeError): # pragma: no cover | |
103 _raise_serialization_error(text) | |
104 | |
105 | |
106 def _escape_attrib(text): | |
107 # escape attribute value | |
108 try: | |
109 if "&" in text: | |
110 text = text.replace("&", "&") | |
111 if "<" in text: | |
112 text = text.replace("<", "<") | |
113 if ">" in text: | |
114 text = text.replace(">", ">") | |
115 if "\"" in text: | |
116 text = text.replace("\"", """) | |
117 if "\n" in text: | |
118 text = text.replace("\n", " ") | |
119 return text | |
120 except (TypeError, AttributeError): # pragma: no cover | |
121 _raise_serialization_error(text) | |
122 | |
123 | |
124 def _escape_attrib_html(text): | |
125 # escape attribute value | |
126 try: | |
127 if "&" in text: | |
128 text = text.replace("&", "&") | |
129 if "<" in text: | |
130 text = text.replace("<", "<") | |
131 if ">" in text: | |
132 text = text.replace(">", ">") | |
133 if "\"" in text: | |
134 text = text.replace("\"", """) | |
135 return text | |
136 except (TypeError, AttributeError): # pragma: no cover | |
137 _raise_serialization_error(text) | |
138 | |
139 | |
140 def _serialize_html(write, elem, qnames, namespaces, format): | |
141 tag = elem.tag | |
142 text = elem.text | |
143 if tag is Comment: | |
144 write("<!--%s-->" % _escape_cdata(text)) | |
145 elif tag is ProcessingInstruction: | |
146 write("<?%s?>" % _escape_cdata(text)) | |
147 else: | |
148 tag = qnames[tag] | |
149 if tag is None: | |
150 if text: | |
151 write(_escape_cdata(text)) | |
152 for e in elem: | |
153 _serialize_html(write, e, qnames, None, format) | |
154 else: | |
155 write("<" + tag) | |
156 items = elem.items() | |
157 if items or namespaces: | |
158 items = sorted(items) # lexical order | |
159 for k, v in items: | |
160 if isinstance(k, QName): | |
161 k = k.text | |
162 if isinstance(v, QName): | |
163 v = qnames[v.text] | |
164 else: | |
165 v = _escape_attrib_html(v) | |
166 if qnames[k] == v and format == 'html': | |
167 # handle boolean attributes | |
168 write(" %s" % v) | |
169 else: | |
170 write(" %s=\"%s\"" % (qnames[k], v)) | |
171 if namespaces: | |
172 items = namespaces.items() | |
173 items.sort(key=lambda x: x[1]) # sort on prefix | |
174 for v, k in items: | |
175 if k: | |
176 k = ":" + k | |
177 write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v))) | |
178 if format == "xhtml" and tag.lower() in HTML_EMPTY: | |
179 write(" />") | |
180 else: | |
181 write(">") | |
182 if text: | |
183 if tag.lower() in ["script", "style"]: | |
184 write(text) | |
185 else: | |
186 write(_escape_cdata(text)) | |
187 for e in elem: | |
188 _serialize_html(write, e, qnames, None, format) | |
189 if tag.lower() not in HTML_EMPTY: | |
190 write("</" + tag + ">") | |
191 if elem.tail: | |
192 write(_escape_cdata(elem.tail)) | |
193 | |
194 | |
195 def _write_html(root, | |
196 encoding=None, | |
197 default_namespace=None, | |
198 format="html"): | |
199 assert root is not None | |
200 data = [] | |
201 write = data.append | |
202 qnames, namespaces = _namespaces(root, default_namespace) | |
203 _serialize_html(write, root, qnames, namespaces, format) | |
204 if encoding is None: | |
205 return "".join(data) | |
206 else: | |
207 return _encode("".join(data)) | |
208 | |
209 | |
210 # -------------------------------------------------------------------- | |
211 # serialization support | |
212 | |
213 def _namespaces(elem, default_namespace=None): | |
214 # identify namespaces used in this tree | |
215 | |
216 # maps qnames to *encoded* prefix:local names | |
217 qnames = {None: None} | |
218 | |
219 # maps uri:s to prefixes | |
220 namespaces = {} | |
221 if default_namespace: | |
222 namespaces[default_namespace] = "" | |
223 | |
224 def add_qname(qname): | |
225 # calculate serialized qname representation | |
226 try: | |
227 if qname[:1] == "{": | |
228 uri, tag = qname[1:].split("}", 1) | |
229 prefix = namespaces.get(uri) | |
230 if prefix is None: | |
231 prefix = _namespace_map.get(uri) | |
232 if prefix is None: | |
233 prefix = "ns%d" % len(namespaces) | |
234 if prefix != "xml": | |
235 namespaces[uri] = prefix | |
236 if prefix: | |
237 qnames[qname] = "%s:%s" % (prefix, tag) | |
238 else: | |
239 qnames[qname] = tag # default element | |
240 else: | |
241 if default_namespace: | |
242 raise ValueError( | |
243 "cannot use non-qualified names with " | |
244 "default_namespace option" | |
245 ) | |
246 qnames[qname] = qname | |
247 except TypeError: # pragma: no cover | |
248 _raise_serialization_error(qname) | |
249 | |
250 # populate qname and namespaces table | |
251 try: | |
252 iterate = elem.iter | |
253 except AttributeError: | |
254 iterate = elem.getiterator # cET compatibility | |
255 for elem in iterate(): | |
256 tag = elem.tag | |
257 if isinstance(tag, QName) and tag.text not in qnames: | |
258 add_qname(tag.text) | |
259 elif isinstance(tag, util.string_type): | |
260 if tag not in qnames: | |
261 add_qname(tag) | |
262 elif tag is not None and tag is not Comment and tag is not PI: | |
263 _raise_serialization_error(tag) | |
264 for key, value in elem.items(): | |
265 if isinstance(key, QName): | |
266 key = key.text | |
267 if key not in qnames: | |
268 add_qname(key) | |
269 if isinstance(value, QName) and value.text not in qnames: | |
270 add_qname(value.text) | |
271 text = elem.text | |
272 if isinstance(text, QName) and text.text not in qnames: | |
273 add_qname(text.text) | |
274 return qnames, namespaces | |
275 | |
276 | |
277 def to_html_string(element): | |
278 return _write_html(ElementTree(element).getroot(), format="html") | |
279 | |
280 | |
281 def to_xhtml_string(element): | |
282 return _write_html(ElementTree(element).getroot(), format="xhtml") | |
OLD | NEW |