Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(19)

Side by Side Diff: third_party/Python-Markdown/markdown/serializers.py

Issue 1389543003: Revert of Check in a simple pure-python based Markdown previewer. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@add
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # markdown/searializers.py
2 #
3 # Add x/html serialization to Elementree
4 # Taken from ElementTree 1.3 preview with slight modifications
5 #
6 # Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved.
7 #
8 # fredrik@pythonware.com
9 # http://www.pythonware.com
10 #
11 # --------------------------------------------------------------------
12 # The ElementTree toolkit is
13 #
14 # Copyright (c) 1999-2007 by Fredrik Lundh
15 #
16 # By obtaining, using, and/or copying this software and/or its
17 # associated documentation, you agree that you have read, understood,
18 # and will comply with the following terms and conditions:
19 #
20 # Permission to use, copy, modify, and distribute this software and
21 # its associated documentation for any purpose and without fee is
22 # hereby granted, provided that the above copyright notice appears in
23 # all copies, and that both that copyright notice and this permission
24 # notice appear in supporting documentation, and that the name of
25 # Secret Labs AB or the author not be used in advertising or publicity
26 # pertaining to distribution of the software without specific, written
27 # prior permission.
28 #
29 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
30 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
31 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
32 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
33 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
34 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
35 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
36 # OF THIS SOFTWARE.
37 # --------------------------------------------------------------------
38
39
40 from __future__ import absolute_import
41 from __future__ import unicode_literals
42 from . import util
43 ElementTree = util.etree.ElementTree
44 QName = util.etree.QName
45 if hasattr(util.etree, 'test_comment'): # pragma: no cover
46 Comment = util.etree.test_comment
47 else: # pragma: no cover
48 Comment = util.etree.Comment
49 PI = util.etree.PI
50 ProcessingInstruction = util.etree.ProcessingInstruction
51
52 __all__ = ['to_html_string', 'to_xhtml_string']
53
54 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
55 "img", "input", "isindex", "link", "meta" "param")
56
57 try:
58 HTML_EMPTY = set(HTML_EMPTY)
59 except NameError: # pragma: no cover
60 pass
61
62 _namespace_map = {
63 # "well-known" namespace prefixes
64 "http://www.w3.org/XML/1998/namespace": "xml",
65 "http://www.w3.org/1999/xhtml": "html",
66 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
67 "http://schemas.xmlsoap.org/wsdl/": "wsdl",
68 # xml schema
69 "http://www.w3.org/2001/XMLSchema": "xs",
70 "http://www.w3.org/2001/XMLSchema-instance": "xsi",
71 # dublic core
72 "http://purl.org/dc/elements/1.1/": "dc",
73 }
74
75
76 def _raise_serialization_error(text): # pragma: no cover
77 raise TypeError(
78 "cannot serialize %r (type %s)" % (text, type(text).__name__)
79 )
80
81
82 def _encode(text, encoding):
83 try:
84 return text.encode(encoding, "xmlcharrefreplace")
85 except (TypeError, AttributeError): # pragma: no cover
86 _raise_serialization_error(text)
87
88
89 def _escape_cdata(text):
90 # escape character data
91 try:
92 # it's worth avoiding do-nothing calls for strings that are
93 # shorter than 500 character, or so. assume that's, by far,
94 # the most common case in most applications.
95 if "&" in text:
96 text = text.replace("&", "&")
97 if "<" in text:
98 text = text.replace("<", "&lt;")
99 if ">" in text:
100 text = text.replace(">", "&gt;")
101 return text
102 except (TypeError, AttributeError): # pragma: no cover
103 _raise_serialization_error(text)
104
105
106 def _escape_attrib(text):
107 # escape attribute value
108 try:
109 if "&" in text:
110 text = text.replace("&", "&amp;")
111 if "<" in text:
112 text = text.replace("<", "&lt;")
113 if ">" in text:
114 text = text.replace(">", "&gt;")
115 if "\"" in text:
116 text = text.replace("\"", "&quot;")
117 if "\n" in text:
118 text = text.replace("\n", "&#10;")
119 return text
120 except (TypeError, AttributeError): # pragma: no cover
121 _raise_serialization_error(text)
122
123
124 def _escape_attrib_html(text):
125 # escape attribute value
126 try:
127 if "&" in text:
128 text = text.replace("&", "&amp;")
129 if "<" in text:
130 text = text.replace("<", "&lt;")
131 if ">" in text:
132 text = text.replace(">", "&gt;")
133 if "\"" in text:
134 text = text.replace("\"", "&quot;")
135 return text
136 except (TypeError, AttributeError): # pragma: no cover
137 _raise_serialization_error(text)
138
139
140 def _serialize_html(write, elem, qnames, namespaces, format):
141 tag = elem.tag
142 text = elem.text
143 if tag is Comment:
144 write("<!--%s-->" % _escape_cdata(text))
145 elif tag is ProcessingInstruction:
146 write("<?%s?>" % _escape_cdata(text))
147 else:
148 tag = qnames[tag]
149 if tag is None:
150 if text:
151 write(_escape_cdata(text))
152 for e in elem:
153 _serialize_html(write, e, qnames, None, format)
154 else:
155 write("<" + tag)
156 items = elem.items()
157 if items or namespaces:
158 items = sorted(items) # lexical order
159 for k, v in items:
160 if isinstance(k, QName):
161 k = k.text
162 if isinstance(v, QName):
163 v = qnames[v.text]
164 else:
165 v = _escape_attrib_html(v)
166 if qnames[k] == v and format == 'html':
167 # handle boolean attributes
168 write(" %s" % v)
169 else:
170 write(" %s=\"%s\"" % (qnames[k], v))
171 if namespaces:
172 items = namespaces.items()
173 items.sort(key=lambda x: x[1]) # sort on prefix
174 for v, k in items:
175 if k:
176 k = ":" + k
177 write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v)))
178 if format == "xhtml" and tag.lower() in HTML_EMPTY:
179 write(" />")
180 else:
181 write(">")
182 if text:
183 if tag.lower() in ["script", "style"]:
184 write(text)
185 else:
186 write(_escape_cdata(text))
187 for e in elem:
188 _serialize_html(write, e, qnames, None, format)
189 if tag.lower() not in HTML_EMPTY:
190 write("</" + tag + ">")
191 if elem.tail:
192 write(_escape_cdata(elem.tail))
193
194
195 def _write_html(root,
196 encoding=None,
197 default_namespace=None,
198 format="html"):
199 assert root is not None
200 data = []
201 write = data.append
202 qnames, namespaces = _namespaces(root, default_namespace)
203 _serialize_html(write, root, qnames, namespaces, format)
204 if encoding is None:
205 return "".join(data)
206 else:
207 return _encode("".join(data))
208
209
210 # --------------------------------------------------------------------
211 # serialization support
212
213 def _namespaces(elem, default_namespace=None):
214 # identify namespaces used in this tree
215
216 # maps qnames to *encoded* prefix:local names
217 qnames = {None: None}
218
219 # maps uri:s to prefixes
220 namespaces = {}
221 if default_namespace:
222 namespaces[default_namespace] = ""
223
224 def add_qname(qname):
225 # calculate serialized qname representation
226 try:
227 if qname[:1] == "{":
228 uri, tag = qname[1:].split("}", 1)
229 prefix = namespaces.get(uri)
230 if prefix is None:
231 prefix = _namespace_map.get(uri)
232 if prefix is None:
233 prefix = "ns%d" % len(namespaces)
234 if prefix != "xml":
235 namespaces[uri] = prefix
236 if prefix:
237 qnames[qname] = "%s:%s" % (prefix, tag)
238 else:
239 qnames[qname] = tag # default element
240 else:
241 if default_namespace:
242 raise ValueError(
243 "cannot use non-qualified names with "
244 "default_namespace option"
245 )
246 qnames[qname] = qname
247 except TypeError: # pragma: no cover
248 _raise_serialization_error(qname)
249
250 # populate qname and namespaces table
251 try:
252 iterate = elem.iter
253 except AttributeError:
254 iterate = elem.getiterator # cET compatibility
255 for elem in iterate():
256 tag = elem.tag
257 if isinstance(tag, QName) and tag.text not in qnames:
258 add_qname(tag.text)
259 elif isinstance(tag, util.string_type):
260 if tag not in qnames:
261 add_qname(tag)
262 elif tag is not None and tag is not Comment and tag is not PI:
263 _raise_serialization_error(tag)
264 for key, value in elem.items():
265 if isinstance(key, QName):
266 key = key.text
267 if key not in qnames:
268 add_qname(key)
269 if isinstance(value, QName) and value.text not in qnames:
270 add_qname(value.text)
271 text = elem.text
272 if isinstance(text, QName) and text.text not in qnames:
273 add_qname(text.text)
274 return qnames, namespaces
275
276
277 def to_html_string(element):
278 return _write_html(ElementTree(element).getroot(), format="html")
279
280
281 def to_xhtml_string(element):
282 return _write_html(ElementTree(element).getroot(), format="xhtml")
OLDNEW
« no previous file with comments | « third_party/Python-Markdown/markdown/preprocessors.py ('k') | third_party/Python-Markdown/markdown/treeprocessors.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698