OLD | NEW |
1 # markdown is released under the BSD license | |
2 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later) | |
3 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) | |
4 # Copyright 2004 Manfred Stienstra (the original version) | |
5 # | |
6 # All rights reserved. | |
7 # | |
8 # Redistribution and use in source and binary forms, with or without | |
9 # modification, are permitted provided that the following conditions are met: | |
10 # | |
11 # * Redistributions of source code must retain the above copyright | |
12 # notice, this list of conditions and the following disclaimer. | |
13 # * Redistributions in binary form must reproduce the above copyright | |
14 # notice, this list of conditions and the following disclaimer in the | |
15 # documentation and/or other materials provided with the distribution. | |
16 # * Neither the name of the <organization> nor the | |
17 # names of its contributors may be used to endorse or promote products | |
18 # derived from this software without specific prior written permission. | |
19 # | |
20 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY | |
21 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
22 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
23 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT | |
24 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
30 # POSSIBILITY OF SUCH DAMAGE. | |
31 | |
32 | |
33 # markdown/searializers.py | 1 # markdown/searializers.py |
34 # | 2 # |
35 # Add x/html serialization to Elementree | 3 # Add x/html serialization to Elementree |
36 # Taken from ElementTree 1.3 preview with slight modifications | 4 # Taken from ElementTree 1.3 preview with slight modifications |
37 # | 5 # |
38 # Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved. | 6 # Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved. |
39 # | 7 # |
40 # fredrik@pythonware.com | 8 # fredrik@pythonware.com |
41 # http://www.pythonware.com | 9 # http://www.pythonware.com |
42 # | 10 # |
(...skipping 24 matching lines...) Expand all Loading... |
67 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE | 35 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE |
68 # OF THIS SOFTWARE. | 36 # OF THIS SOFTWARE. |
69 # -------------------------------------------------------------------- | 37 # -------------------------------------------------------------------- |
70 | 38 |
71 | 39 |
72 from __future__ import absolute_import | 40 from __future__ import absolute_import |
73 from __future__ import unicode_literals | 41 from __future__ import unicode_literals |
74 from . import util | 42 from . import util |
75 ElementTree = util.etree.ElementTree | 43 ElementTree = util.etree.ElementTree |
76 QName = util.etree.QName | 44 QName = util.etree.QName |
77 if hasattr(util.etree, 'test_comment'): | 45 if hasattr(util.etree, 'test_comment'): # pragma: no cover |
78 Comment = util.etree.test_comment | 46 Comment = util.etree.test_comment |
79 else: | 47 else: # pragma: no cover |
80 Comment = util.etree.Comment | 48 Comment = util.etree.Comment |
81 PI = util.etree.PI | 49 PI = util.etree.PI |
82 ProcessingInstruction = util.etree.ProcessingInstruction | 50 ProcessingInstruction = util.etree.ProcessingInstruction |
83 | 51 |
84 __all__ = ['to_html_string', 'to_xhtml_string'] | 52 __all__ = ['to_html_string', 'to_xhtml_string'] |
85 | 53 |
86 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", | 54 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", |
87 "img", "input", "isindex", "link", "meta" "param") | 55 "img", "input", "isindex", "link", "meta" "param") |
88 | 56 |
89 try: | 57 try: |
90 HTML_EMPTY = set(HTML_EMPTY) | 58 HTML_EMPTY = set(HTML_EMPTY) |
91 except NameError: | 59 except NameError: # pragma: no cover |
92 pass | 60 pass |
93 | 61 |
94 _namespace_map = { | 62 _namespace_map = { |
95 # "well-known" namespace prefixes | 63 # "well-known" namespace prefixes |
96 "http://www.w3.org/XML/1998/namespace": "xml", | 64 "http://www.w3.org/XML/1998/namespace": "xml", |
97 "http://www.w3.org/1999/xhtml": "html", | 65 "http://www.w3.org/1999/xhtml": "html", |
98 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", | 66 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", |
99 "http://schemas.xmlsoap.org/wsdl/": "wsdl", | 67 "http://schemas.xmlsoap.org/wsdl/": "wsdl", |
100 # xml schema | 68 # xml schema |
101 "http://www.w3.org/2001/XMLSchema": "xs", | 69 "http://www.w3.org/2001/XMLSchema": "xs", |
102 "http://www.w3.org/2001/XMLSchema-instance": "xsi", | 70 "http://www.w3.org/2001/XMLSchema-instance": "xsi", |
103 # dublic core | 71 # dublic core |
104 "http://purl.org/dc/elements/1.1/": "dc", | 72 "http://purl.org/dc/elements/1.1/": "dc", |
105 } | 73 } |
106 | 74 |
107 | 75 |
108 def _raise_serialization_error(text): | 76 def _raise_serialization_error(text): # pragma: no cover |
109 raise TypeError( | 77 raise TypeError( |
110 "cannot serialize %r (type %s)" % (text, type(text).__name__) | 78 "cannot serialize %r (type %s)" % (text, type(text).__name__) |
111 ) | 79 ) |
112 | 80 |
| 81 |
113 def _encode(text, encoding): | 82 def _encode(text, encoding): |
114 try: | 83 try: |
115 return text.encode(encoding, "xmlcharrefreplace") | 84 return text.encode(encoding, "xmlcharrefreplace") |
116 except (TypeError, AttributeError): | 85 except (TypeError, AttributeError): # pragma: no cover |
117 _raise_serialization_error(text) | 86 _raise_serialization_error(text) |
118 | 87 |
| 88 |
119 def _escape_cdata(text): | 89 def _escape_cdata(text): |
120 # escape character data | 90 # escape character data |
121 try: | 91 try: |
122 # it's worth avoiding do-nothing calls for strings that are | 92 # it's worth avoiding do-nothing calls for strings that are |
123 # shorter than 500 character, or so. assume that's, by far, | 93 # shorter than 500 character, or so. assume that's, by far, |
124 # the most common case in most applications. | 94 # the most common case in most applications. |
125 if "&" in text: | 95 if "&" in text: |
126 text = text.replace("&", "&") | 96 text = text.replace("&", "&") |
127 if "<" in text: | 97 if "<" in text: |
128 text = text.replace("<", "<") | 98 text = text.replace("<", "<") |
129 if ">" in text: | 99 if ">" in text: |
130 text = text.replace(">", ">") | 100 text = text.replace(">", ">") |
131 return text | 101 return text |
132 except (TypeError, AttributeError): | 102 except (TypeError, AttributeError): # pragma: no cover |
133 _raise_serialization_error(text) | 103 _raise_serialization_error(text) |
134 | 104 |
135 | 105 |
136 def _escape_attrib(text): | 106 def _escape_attrib(text): |
137 # escape attribute value | 107 # escape attribute value |
138 try: | 108 try: |
139 if "&" in text: | 109 if "&" in text: |
140 text = text.replace("&", "&") | 110 text = text.replace("&", "&") |
141 if "<" in text: | 111 if "<" in text: |
142 text = text.replace("<", "<") | 112 text = text.replace("<", "<") |
143 if ">" in text: | 113 if ">" in text: |
144 text = text.replace(">", ">") | 114 text = text.replace(">", ">") |
145 if "\"" in text: | 115 if "\"" in text: |
146 text = text.replace("\"", """) | 116 text = text.replace("\"", """) |
147 if "\n" in text: | 117 if "\n" in text: |
148 text = text.replace("\n", " ") | 118 text = text.replace("\n", " ") |
149 return text | 119 return text |
150 except (TypeError, AttributeError): | 120 except (TypeError, AttributeError): # pragma: no cover |
151 _raise_serialization_error(text) | 121 _raise_serialization_error(text) |
152 | 122 |
| 123 |
153 def _escape_attrib_html(text): | 124 def _escape_attrib_html(text): |
154 # escape attribute value | 125 # escape attribute value |
155 try: | 126 try: |
156 if "&" in text: | 127 if "&" in text: |
157 text = text.replace("&", "&") | 128 text = text.replace("&", "&") |
158 if "<" in text: | 129 if "<" in text: |
159 text = text.replace("<", "<") | 130 text = text.replace("<", "<") |
160 if ">" in text: | 131 if ">" in text: |
161 text = text.replace(">", ">") | 132 text = text.replace(">", ">") |
162 if "\"" in text: | 133 if "\"" in text: |
163 text = text.replace("\"", """) | 134 text = text.replace("\"", """) |
164 return text | 135 return text |
165 except (TypeError, AttributeError): | 136 except (TypeError, AttributeError): # pragma: no cover |
166 _raise_serialization_error(text) | 137 _raise_serialization_error(text) |
167 | 138 |
168 | 139 |
169 def _serialize_html(write, elem, qnames, namespaces, format): | 140 def _serialize_html(write, elem, qnames, namespaces, format): |
170 tag = elem.tag | 141 tag = elem.tag |
171 text = elem.text | 142 text = elem.text |
172 if tag is Comment: | 143 if tag is Comment: |
173 write("<!--%s-->" % _escape_cdata(text)) | 144 write("<!--%s-->" % _escape_cdata(text)) |
174 elif tag is ProcessingInstruction: | 145 elif tag is ProcessingInstruction: |
175 write("<?%s?>" % _escape_cdata(text)) | 146 write("<?%s?>" % _escape_cdata(text)) |
176 else: | 147 else: |
177 tag = qnames[tag] | 148 tag = qnames[tag] |
178 if tag is None: | 149 if tag is None: |
179 if text: | 150 if text: |
180 write(_escape_cdata(text)) | 151 write(_escape_cdata(text)) |
181 for e in elem: | 152 for e in elem: |
182 _serialize_html(write, e, qnames, None, format) | 153 _serialize_html(write, e, qnames, None, format) |
183 else: | 154 else: |
184 write("<" + tag) | 155 write("<" + tag) |
185 items = elem.items() | 156 items = elem.items() |
186 if items or namespaces: | 157 if items or namespaces: |
187 items.sort() # lexical order | 158 items = sorted(items) # lexical order |
188 for k, v in items: | 159 for k, v in items: |
189 if isinstance(k, QName): | 160 if isinstance(k, QName): |
190 k = k.text | 161 k = k.text |
191 if isinstance(v, QName): | 162 if isinstance(v, QName): |
192 v = qnames[v.text] | 163 v = qnames[v.text] |
193 else: | 164 else: |
194 v = _escape_attrib_html(v) | 165 v = _escape_attrib_html(v) |
195 if qnames[k] == v and format == 'html': | 166 if qnames[k] == v and format == 'html': |
196 # handle boolean attributes | 167 # handle boolean attributes |
197 write(" %s" % v) | 168 write(" %s" % v) |
198 else: | 169 else: |
199 write(" %s=\"%s\"" % (qnames[k], v)) | 170 write(" %s=\"%s\"" % (qnames[k], v)) |
200 if namespaces: | 171 if namespaces: |
201 items = namespaces.items() | 172 items = namespaces.items() |
202 items.sort(key=lambda x: x[1]) # sort on prefix | 173 items.sort(key=lambda x: x[1]) # sort on prefix |
203 for v, k in items: | 174 for v, k in items: |
204 if k: | 175 if k: |
205 k = ":" + k | 176 k = ":" + k |
206 write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v))) | 177 write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v))) |
207 if format == "xhtml" and tag in HTML_EMPTY: | 178 if format == "xhtml" and tag.lower() in HTML_EMPTY: |
208 write(" />") | 179 write(" />") |
209 else: | 180 else: |
210 write(">") | 181 write(">") |
211 tag = tag.lower() | |
212 if text: | 182 if text: |
213 if tag == "script" or tag == "style": | 183 if tag.lower() in ["script", "style"]: |
214 write(text) | 184 write(text) |
215 else: | 185 else: |
216 write(_escape_cdata(text)) | 186 write(_escape_cdata(text)) |
217 for e in elem: | 187 for e in elem: |
218 _serialize_html(write, e, qnames, None, format) | 188 _serialize_html(write, e, qnames, None, format) |
219 if tag not in HTML_EMPTY: | 189 if tag.lower() not in HTML_EMPTY: |
220 write("</" + tag + ">") | 190 write("</" + tag + ">") |
221 if elem.tail: | 191 if elem.tail: |
222 write(_escape_cdata(elem.tail)) | 192 write(_escape_cdata(elem.tail)) |
223 | 193 |
| 194 |
224 def _write_html(root, | 195 def _write_html(root, |
225 encoding=None, | 196 encoding=None, |
226 default_namespace=None, | 197 default_namespace=None, |
227 format="html"): | 198 format="html"): |
228 assert root is not None | 199 assert root is not None |
229 data = [] | 200 data = [] |
230 write = data.append | 201 write = data.append |
231 qnames, namespaces = _namespaces(root, default_namespace) | 202 qnames, namespaces = _namespaces(root, default_namespace) |
232 _serialize_html(write, root, qnames, namespaces, format) | 203 _serialize_html(write, root, qnames, namespaces, format) |
233 if encoding is None: | 204 if encoding is None: |
(...skipping 24 matching lines...) Expand all Loading... |
258 prefix = namespaces.get(uri) | 229 prefix = namespaces.get(uri) |
259 if prefix is None: | 230 if prefix is None: |
260 prefix = _namespace_map.get(uri) | 231 prefix = _namespace_map.get(uri) |
261 if prefix is None: | 232 if prefix is None: |
262 prefix = "ns%d" % len(namespaces) | 233 prefix = "ns%d" % len(namespaces) |
263 if prefix != "xml": | 234 if prefix != "xml": |
264 namespaces[uri] = prefix | 235 namespaces[uri] = prefix |
265 if prefix: | 236 if prefix: |
266 qnames[qname] = "%s:%s" % (prefix, tag) | 237 qnames[qname] = "%s:%s" % (prefix, tag) |
267 else: | 238 else: |
268 qnames[qname] = tag # default element | 239 qnames[qname] = tag # default element |
269 else: | 240 else: |
270 if default_namespace: | 241 if default_namespace: |
271 raise ValueError( | 242 raise ValueError( |
272 "cannot use non-qualified names with " | 243 "cannot use non-qualified names with " |
273 "default_namespace option" | 244 "default_namespace option" |
274 ) | 245 ) |
275 qnames[qname] = qname | 246 qnames[qname] = qname |
276 except TypeError: | 247 except TypeError: # pragma: no cover |
277 _raise_serialization_error(qname) | 248 _raise_serialization_error(qname) |
278 | 249 |
279 # populate qname and namespaces table | 250 # populate qname and namespaces table |
280 try: | 251 try: |
281 iterate = elem.iter | 252 iterate = elem.iter |
282 except AttributeError: | 253 except AttributeError: |
283 iterate = elem.getiterator # cET compatibility | 254 iterate = elem.getiterator # cET compatibility |
284 for elem in iterate(): | 255 for elem in iterate(): |
285 tag = elem.tag | 256 tag = elem.tag |
286 if isinstance(tag, QName) and tag.text not in qnames: | 257 if isinstance(tag, QName) and tag.text not in qnames: |
287 add_qname(tag.text) | 258 add_qname(tag.text) |
288 elif isinstance(tag, util.string_type): | 259 elif isinstance(tag, util.string_type): |
289 if tag not in qnames: | 260 if tag not in qnames: |
290 add_qname(tag) | 261 add_qname(tag) |
291 elif tag is not None and tag is not Comment and tag is not PI: | 262 elif tag is not None and tag is not Comment and tag is not PI: |
292 _raise_serialization_error(tag) | 263 _raise_serialization_error(tag) |
293 for key, value in elem.items(): | 264 for key, value in elem.items(): |
294 if isinstance(key, QName): | 265 if isinstance(key, QName): |
295 key = key.text | 266 key = key.text |
296 if key not in qnames: | 267 if key not in qnames: |
297 add_qname(key) | 268 add_qname(key) |
298 if isinstance(value, QName) and value.text not in qnames: | 269 if isinstance(value, QName) and value.text not in qnames: |
299 add_qname(value.text) | 270 add_qname(value.text) |
300 text = elem.text | 271 text = elem.text |
301 if isinstance(text, QName) and text.text not in qnames: | 272 if isinstance(text, QName) and text.text not in qnames: |
302 add_qname(text.text) | 273 add_qname(text.text) |
303 return qnames, namespaces | 274 return qnames, namespaces |
304 | 275 |
| 276 |
305 def to_html_string(element): | 277 def to_html_string(element): |
306 return _write_html(ElementTree(element).getroot(), format="html") | 278 return _write_html(ElementTree(element).getroot(), format="html") |
307 | 279 |
| 280 |
308 def to_xhtml_string(element): | 281 def to_xhtml_string(element): |
309 return _write_html(ElementTree(element).getroot(), format="xhtml") | 282 return _write_html(ElementTree(element).getroot(), format="xhtml") |
OLD | NEW |