third_party/Python-Markdown/markdown/serializers.py - Issue 1392733002: Re-land "Check in a simple pure-python based Markdown previewer."

Side by Side Diff: third_party/Python-Markdown/markdown/serializers.py

Issue 1392733002: Re-land "Check in a simple pure-python based Markdown previewer." (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: clarify comment re: licenses, add bug #, use --no-find-copies Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # markdown/searializers.py

	2 #

	3 # Add x/html serialization to Elementree

	4 # Taken from ElementTree 1.3 preview with slight modifications

	5 #

	6 # Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved.

	7 #

	8 # fredrik@pythonware.com

	9 # http://www.pythonware.com

	10 #

	11 # --------------------------------------------------------------------

	12 # The ElementTree toolkit is

	13 #

	14 # Copyright (c) 1999-2007 by Fredrik Lundh

	15 #

	16 # By obtaining, using, and/or copying this software and/or its

	17 # associated documentation, you agree that you have read, understood,

	18 # and will comply with the following terms and conditions:

	19 #

	20 # Permission to use, copy, modify, and distribute this software and

	21 # its associated documentation for any purpose and without fee is

	22 # hereby granted, provided that the above copyright notice appears in

	23 # all copies, and that both that copyright notice and this permission

	24 # notice appear in supporting documentation, and that the name of

	25 # Secret Labs AB or the author not be used in advertising or publicity

	26 # pertaining to distribution of the software without specific, written

	27 # prior permission.

	28 #

	29 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD

	30 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-

	31 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR

	32 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY

	33 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,

	34 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS

	35 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE

	36 # OF THIS SOFTWARE.

	37 # --------------------------------------------------------------------

	38

	39

	40 from __future__ import absolute_import

	41 from __future__ import unicode_literals

	42 from . import util

	43 ElementTree = util.etree.ElementTree

	44 QName = util.etree.QName

	45 if hasattr(util.etree, 'test_comment'): # pragma: no cover

	46 Comment = util.etree.test_comment

	47 else: # pragma: no cover

	48 Comment = util.etree.Comment

	49 PI = util.etree.PI

	50 ProcessingInstruction = util.etree.ProcessingInstruction

	51

	52 __all__ = ['to_html_string', 'to_xhtml_string']

	53

	54 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",

	55 "img", "input", "isindex", "link", "meta" "param")

	56

	57 try:

	58 HTML_EMPTY = set(HTML_EMPTY)

	59 except NameError: # pragma: no cover

	60 pass

	61

	62 _namespace_map = {

	63 # "well-known" namespace prefixes

	64 "http://www.w3.org/XML/1998/namespace": "xml",

	65 "http://www.w3.org/1999/xhtml": "html",

	66 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",

	67 "http://schemas.xmlsoap.org/wsdl/": "wsdl",

	68 # xml schema

	69 "http://www.w3.org/2001/XMLSchema": "xs",

	70 "http://www.w3.org/2001/XMLSchema-instance": "xsi",

	71 # dublic core

	72 "http://purl.org/dc/elements/1.1/": "dc",

	73 }

	74

	75

	76 def _raise_serialization_error(text): # pragma: no cover

	77 raise TypeError(

	78 "cannot serialize %r (type %s)" % (text, type(text).__name__)

	79 )

	80

	81

	82 def _encode(text, encoding):

	83 try:

	84 return text.encode(encoding, "xmlcharrefreplace")

	85 except (TypeError, AttributeError): # pragma: no cover

	86 _raise_serialization_error(text)

	87

	88

	89 def _escape_cdata(text):

	90 # escape character data

	91 try:

	92 # it's worth avoiding do-nothing calls for strings that are

	93 # shorter than 500 character, or so. assume that's, by far,

	94 # the most common case in most applications.

	95 if "&" in text:

	96 text = text.replace("&", "&")

	97 if "<" in text:

	98 text = text.replace("<", "<")

	99 if ">" in text:

	100 text = text.replace(">", ">")

	101 return text

	102 except (TypeError, AttributeError): # pragma: no cover

	103 _raise_serialization_error(text)

	104

	105

	106 def _escape_attrib(text):

	107 # escape attribute value

	108 try:

	109 if "&" in text:

	110 text = text.replace("&", "&")

	111 if "<" in text:

	112 text = text.replace("<", "<")

	113 if ">" in text:

	114 text = text.replace(">", ">")

	115 if "\"" in text:

	116 text = text.replace("\"", """)

	117 if "\n" in text:

	118 text = text.replace("\n", " ")

	119 return text

	120 except (TypeError, AttributeError): # pragma: no cover

	121 _raise_serialization_error(text)

	122

	123

	124 def _escape_attrib_html(text):

	125 # escape attribute value

	126 try:

	127 if "&" in text:

	128 text = text.replace("&", "&")

	129 if "<" in text:

	130 text = text.replace("<", "<")

	131 if ">" in text:

	132 text = text.replace(">", ">")

	133 if "\"" in text:

	134 text = text.replace("\"", """)

	135 return text

	136 except (TypeError, AttributeError): # pragma: no cover

	137 _raise_serialization_error(text)

	138

	139

	140 def _serialize_html(write, elem, qnames, namespaces, format):

	141 tag = elem.tag

	142 text = elem.text

	143 if tag is Comment:

	144 write("<!--%s-->" % _escape_cdata(text))

	145 elif tag is ProcessingInstruction:

	146 write("<?%s?>" % _escape_cdata(text))

	147 else:

	148 tag = qnames[tag]

	149 if tag is None:

	150 if text:

	151 write(_escape_cdata(text))

	152 for e in elem:

	153 _serialize_html(write, e, qnames, None, format)

	154 else:

	155 write("<" + tag)

	156 items = elem.items()

	157 if items or namespaces:

	158 items = sorted(items) # lexical order

	159 for k, v in items:

	160 if isinstance(k, QName):

	161 k = k.text

	162 if isinstance(v, QName):

	163 v = qnames[v.text]

	164 else:

	165 v = _escape_attrib_html(v)

	166 if qnames[k] == v and format == 'html':

	167 # handle boolean attributes

	168 write(" %s" % v)

	169 else:

	170 write(" %s=\"%s\"" % (qnames[k], v))

	171 if namespaces:

	172 items = namespaces.items()

	173 items.sort(key=lambda x: x[1]) # sort on prefix

	174 for v, k in items:

	175 if k:

	176 k = ":" + k

	177 write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v)))

	178 if format == "xhtml" and tag.lower() in HTML_EMPTY:

	179 write(" />")

	180 else:

	181 write(">")

	182 if text:

	183 if tag.lower() in ["script", "style"]:

	184 write(text)

	185 else:

	186 write(_escape_cdata(text))

	187 for e in elem:

	188 _serialize_html(write, e, qnames, None, format)

	189 if tag.lower() not in HTML_EMPTY:

	190 write("</" + tag + ">")

	191 if elem.tail:

	192 write(_escape_cdata(elem.tail))

	193

	194

	195 def _write_html(root,

	196 encoding=None,

	197 default_namespace=None,

	198 format="html"):

	199 assert root is not None

	200 data = []

	201 write = data.append

	202 qnames, namespaces = _namespaces(root, default_namespace)

	203 _serialize_html(write, root, qnames, namespaces, format)

	204 if encoding is None:

	205 return "".join(data)

	206 else:

	207 return _encode("".join(data))

	208

	209

	210 # --------------------------------------------------------------------

	211 # serialization support

	212

	213 def _namespaces(elem, default_namespace=None):

	214 # identify namespaces used in this tree

	215

	216 # maps qnames to encoded prefix:local names

	217 qnames = {None: None}

	218

	219 # maps uri:s to prefixes

	220 namespaces = {}

	221 if default_namespace:

	222 namespaces[default_namespace] = ""

	223

	224 def add_qname(qname):

	225 # calculate serialized qname representation

	226 try:

	227 if qname[:1] == "{":

	228 uri, tag = qname[1:].split("}", 1)

	229 prefix = namespaces.get(uri)

	230 if prefix is None:

	231 prefix = _namespace_map.get(uri)

	232 if prefix is None:

	233 prefix = "ns%d" % len(namespaces)

	234 if prefix != "xml":

	235 namespaces[uri] = prefix

	236 if prefix:

	237 qnames[qname] = "%s:%s" % (prefix, tag)

	238 else:

	239 qnames[qname] = tag # default element

	240 else:

	241 if default_namespace:

	242 raise ValueError(

	243 "cannot use non-qualified names with "

	244 "default_namespace option"

	245 )

	246 qnames[qname] = qname

	247 except TypeError: # pragma: no cover

	248 _raise_serialization_error(qname)

	249

	250 # populate qname and namespaces table

	251 try:

	252 iterate = elem.iter

	253 except AttributeError:

	254 iterate = elem.getiterator # cET compatibility

	255 for elem in iterate():

	256 tag = elem.tag

	257 if isinstance(tag, QName) and tag.text not in qnames:

	258 add_qname(tag.text)

	259 elif isinstance(tag, util.string_type):

	260 if tag not in qnames:

	261 add_qname(tag)

	262 elif tag is not None and tag is not Comment and tag is not PI:

	263 _raise_serialization_error(tag)

	264 for key, value in elem.items():

	265 if isinstance(key, QName):

	266 key = key.text

	267 if key not in qnames:

	268 add_qname(key)

	269 if isinstance(value, QName) and value.text not in qnames:

	270 add_qname(value.text)

	271 text = elem.text

	272 if isinstance(text, QName) and text.text not in qnames:

	273 add_qname(text.text)

	274 return qnames, namespaces

	275

	276

	277 def to_html_string(element):

	278 return _write_html(ElementTree(element).getroot(), format="html")

	279

	280

	281 def to_xhtml_string(element):

	282 return _write_html(ElementTree(element).getroot(), format="xhtml")

OLD	NEW

« no previous file with comments | « third_party/Python-Markdown/markdown/preprocessors.py ('k') | third_party/Python-Markdown/markdown/treeprocessors.py » ('j') | no next file with comments »