third_party/markdown/__init__.py - Issue 93743005: Support markdown template for html editor

Side by Side Diff: third_party/markdown/init.py

Issue 93743005: Support markdown template for html editor (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 7 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 """

	2 Python Markdown

	3 ===============

	4

	5 Python Markdown converts Markdown to HTML and can be used as a library or

	6 called from the command line.

	7

	8 ## Basic usage as a module:

	9

	10 import markdown

	11 html = markdown.markdown(your_text_string)

	12

	13 See <http://packages.python.org/Markdown/> for more

	14 information and instructions on how to extend the functionality of

	15 Python Markdown. Read that before you try modifying this file.

	16

	17 ## Authors and License

	18

	19 Started by [Manfred Stienstra](http://www.dwerg.net/). Continued and

	20 maintained by [Yuri Takhteyev](http://www.freewisdom.org), [Waylan

	21 Limberg](http://achinghead.com/) and [Artem Yunusov](http://blog.splyer.com).

	22

	23 Contact: markdown@freewisdom.org

	24

	25 Copyright 2007-2013 The Python Markdown Project (v. 1.7 and later)

	26 Copyright 200? Django Software Foundation (OrderedDict implementation)

	27 Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)

	28 Copyright 2004 Manfred Stienstra (the original version)

	29

	30 License: BSD (see LICENSE for details).

	31 """

	32

	33 from __future__ import absolute_import

	34 from __future__ import unicode_literals

	35 from .__version__ import version, version_info

	36 import re

	37 import codecs

	38 import sys

	39 import logging

	40 from . import util

	41 from .preprocessors import build_preprocessors

	42 from .blockprocessors import build_block_parser

	43 from .treeprocessors import build_treeprocessors

	44 from .inlinepatterns import build_inlinepatterns

	45 from .postprocessors import build_postprocessors

	46 from .extensions import Extension

	47 from .serializers import to_html_string, to_xhtml_string

	48

	49 __all__ = ['Markdown', 'markdown', 'markdownFromFile']

	50

	51 logger = logging.getLogger('MARKDOWN')

	52

	53

	54 class Markdown(object):

	55 """Convert Markdown to HTML."""

	56

	57 doc_tag = "div" # Element used to wrap document - later removed

	58

	59 option_defaults = {

	60 'html_replacement_text' : '[HTML_REMOVED]',

	61 'tab_length' : 4,

	62 'enable_attributes' : True,

	63 'smart_emphasis' : True,

	64 'lazy_ol' : True,

	65 }

	66

	67 output_formats = {

	68 'html' : to_html_string,

	69 'html4' : to_html_string,

	70 'html5' : to_html_string,

	71 'xhtml' : to_xhtml_string,

	72 'xhtml1': to_xhtml_string,

	73 'xhtml5': to_xhtml_string,

	74 }

	75

	76 ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',

	77 '(', ')', '>', '#', '+', '-', '.', '!']

	78

	79 def __init__(self, args, *kwargs):

	80 """

	81 Creates a new Markdown instance.

	82

	83 Keyword arguments:

	84

	85 * extensions: A list of extensions.

	86 If they are of type string, the module mdx_name.py will be loaded.

	87 If they are a subclass of markdown.Extension, they will be used

	88 as-is.

	89 * extension_configs: Configuration settingis for extensions.

	90 * output_format: Format of output. Supported formats are:

	91 * "xhtml1": Outputs XHTML 1.x. Default.

	92 * "xhtml5": Outputs XHTML style tags of HTML 5

	93 * "xhtml": Outputs latest supported version of XHTML (currently XHTM L 1.1).

	94 * "html4": Outputs HTML 4

	95 * "html5": Outputs HTML style tags of HTML 5

	96 * "html": Outputs latest supported version of HTML (currently HTML 4 ).

	97 Note that it is suggested that the more specific formats ("xhtml1"

	98 and "html4") be used as "xhtml" or "html" may change in the future

	99 if it makes sense at that time.

	100 * safe_mode: Disallow raw html. One of "remove", "replace" or "escape".

	101 * html_replacement_text: Text used when safe_mode is set to "replace".

	102 * tab_length: Length of tabs in the source. Default: 4

	103 * enable_attributes: Enable the conversion of attributes. Default: True

	104 * smart_emphasis: Treat `_connected_words_` intelegently Default: True

	105 * lazy_ol: Ignore number of first item of ordered lists. Default: True

	106

	107 """

	108

	109 # For backward compatibility, loop through old positional args

	110 pos = ['extensions', 'extension_configs', 'safe_mode', 'output_format']

	111 c = 0

	112 for arg in args:

	113 if pos[c] not in kwargs:

	114 kwargs[pos[c]] = arg

	115 c += 1

	116 if c == len(pos):

	117 # ignore any additional args

	118 break

	119

	120 # Loop through kwargs and assign defaults

	121 for option, default in self.option_defaults.items():

	122 setattr(self, option, kwargs.get(option, default))

	123

	124 self.safeMode = kwargs.get('safe_mode', False)

	125 if self.safeMode and 'enable_attributes' not in kwargs:

	126 # Disable attributes in safeMode when not explicitly set

	127 self.enable_attributes = False

	128

	129 self.registeredExtensions = []

	130 self.docType = ""

	131 self.stripTopLevelTags = True

	132

	133 self.build_parser()

	134

	135 self.references = {}

	136 self.htmlStash = util.HtmlStash()

	137 self.set_output_format(kwargs.get('output_format', 'xhtml1'))

	138 self.registerExtensions(extensions=kwargs.get('extensions', []),

	139 configs=kwargs.get('extension_configs', {}))

	140 self.reset()

	141

	142 def build_parser(self):

	143 """ Build the parser from the various parts. """

	144 self.preprocessors = build_preprocessors(self)

	145 self.parser = build_block_parser(self)

	146 self.inlinePatterns = build_inlinepatterns(self)

	147 self.treeprocessors = build_treeprocessors(self)

	148 self.postprocessors = build_postprocessors(self)

	149 return self

	150

	151 def registerExtensions(self, extensions, configs):

	152 """

	153 Register extensions with this instance of Markdown.

	154

	155 Keyword arguments:

	156

	157 * extensions: A list of extensions, which can either

	158 be strings or objects. See the docstring on Markdown.

	159 * configs: A dictionary mapping module names to config options.

	160

	161 """

	162 for ext in extensions:

	163 if isinstance(ext, util.string_type):

	164 ext = self.build_extension(ext, configs.get(ext, []))

	165 if isinstance(ext, Extension):

	166 ext.extendMarkdown(self, globals())

	167 elif ext is not None:

	168 raise TypeError(

	169 'Extension "%s.%s" must be of type: "markdown.Extension"'

	170 % (ext.__class__.__module__, ext.__class__.__name__))

	171

	172 return self

	173

	174 def build_extension(self, ext_name, configs = []):

	175 """Build extension by name, then return the module.

	176

	177 The extension name may contain arguments as part of the string in the

	178 following format: "extname(key1=value1,key2=value2)"

	179

	180 """

	181

	182 # Parse extensions config params (ignore the order)

	183 configs = dict(configs)

	184 pos = ext_name.find("(") # find the first "("

	185 if pos > 0:

	186 ext_args = ext_name[pos+1:-1]

	187 ext_name = ext_name[:pos]

	188 pairs = [x.split("=") for x in ext_args.split(",")]

	189 configs.update([(x.strip(), y.strip()) for (x, y) in pairs])

	190

	191 # Setup the module name

	192 module_name = ext_name

	193 if '.' not in ext_name:

	194 module_name = '.'.join(['third_party.markdown.extensions', ext_name] )

	195

	196 # Try loading the extension first from one place, then another

	197 try: # New style (markdown.extensons.<extension>)

	198 module = __import__(module_name, {}, {}, [module_name.rpartition('.' )[0]])

	199 except ImportError:

	200 module_name_old_style = '_'.join(['mdx', ext_name])

	201 try: # Old style (mdx_<extension>)

	202 module = __import__(module_name_old_style)

	203 except ImportError as e:

	204 message = "Failed loading extension '%s' from '%s' or '%s'" \

	205 % (ext_name, module_name, module_name_old_style)

	206 e.args = (message,) + e.args[1:]

	207 raise

	208

	209 # If the module is loaded successfully, we expect it to define a

	210 # function called makeExtension()

	211 try:

	212 return module.makeExtension(configs.items())

	213 except AttributeError as e:

	214 message = e.args[0]

	215 message = "Failed to initiate extension " \

	216 "'%s': %s" % (ext_name, message)

	217 e.args = (message,) + e.args[1:]

	218 raise

	219

	220 def registerExtension(self, extension):

	221 """ This gets called by the extension """

	222 self.registeredExtensions.append(extension)

	223 return self

	224

	225 def reset(self):

	226 """

	227 Resets all state variables so that we can start with a new text.

	228 """

	229 self.htmlStash.reset()

	230 self.references.clear()

	231

	232 for extension in self.registeredExtensions:

	233 if hasattr(extension, 'reset'):

	234 extension.reset()

	235

	236 return self

	237

	238 def set_output_format(self, format):

	239 """ Set the output format for the class instance. """

	240 self.output_format = format.lower()

	241 try:

	242 self.serializer = self.output_formats[self.output_format]

	243 except KeyError as e:

	244 valid_formats = list(self.output_formats.keys())

	245 valid_formats.sort()

	246 message = 'Invalid Output Format: "%s". Use one of %s.' \

	247 % (self.output_format,

	248 '"' + '", "'.join(valid_formats) + '"')

	249 e.args = (message,) + e.args[1:]

	250 raise

	251 return self

	252

	253 def convert(self, source):

	254 """

	255 Convert markdown to serialized XHTML or HTML.

	256

	257 Keyword arguments:

	258

	259 * source: Source text as a Unicode string.

	260

	261 Markdown processing takes place in five steps:

	262

	263 1. A bunch of "preprocessors" munge the input text.

	264 2. BlockParser() parses the high-level structural elements of the

	265 pre-processed text into an ElementTree.

	266 3. A bunch of "treeprocessors" are run against the ElementTree. One

	267 such treeprocessor runs InlinePatterns against the ElementTree,

	268 detecting inline markup.

	269 4. Some post-processors are run against the text after the ElementTree

	270 has been serialized into text.

	271 5. The output is written to a string.

	272

	273 """

	274

	275 # Fixup the source text

	276 if not source.strip():

	277 return '' # a blank unicode string

	278

	279 try:

	280 source = util.text_type(source)

	281 except UnicodeDecodeError as e:

	282 # Customise error message while maintaining original trackback

	283 e.reason += '. -- Note: Markdown only accepts unicode input!'

	284 raise

	285

	286 # Split into lines and run the line preprocessors.

	287 self.lines = source.split("\n")

	288 for prep in self.preprocessors.values():

	289 self.lines = prep.run(self.lines)

	290

	291 # Parse the high-level elements.

	292 root = self.parser.parseDocument(self.lines).getroot()

	293

	294 # Run the tree-processors

	295 for treeprocessor in self.treeprocessors.values():

	296 newRoot = treeprocessor.run(root)

	297 if newRoot:

	298 root = newRoot

	299

	300 # Serialize _properly_. Strip top-level tags.

	301 output = self.serializer(root)

	302 if self.stripTopLevelTags:

	303 try:

	304 start = output.index('<%s>'%self.doc_tag)+len(self.doc_tag)+2

	305 end = output.rindex('</%s>'%self.doc_tag)

	306 output = output[start:end].strip()

	307 except ValueError:

	308 if output.strip().endswith('<%s />'%self.doc_tag):

	309 # We have an empty document

	310 output = ''

	311 else:

	312 # We have a serious problem

	313 raise ValueError('Markdown failed to strip top-level tags. D ocument=%r' % output.strip())

	314

	315 # Run the text post-processors

	316 for pp in self.postprocessors.values():

	317 output = pp.run(output)

	318

	319 return output.strip()

	320

	321 def convertFile(self, input=None, output=None, encoding=None):

	322 """Converts a markdown file and returns the HTML as a unicode string.

	323

	324 Decodes the file using the provided encoding (defaults to utf-8),

	325 passes the file content to markdown, and outputs the html to either

	326 the provided stream or the file with provided name, using the same

	327 encoding as the source file. The 'xmlcharrefreplace' error handler is

	328 used when encoding the output.

	329

	330 Note: This is the only place that decoding and encoding of unicode

	331 takes place in Python-Markdown. (All other code is unicode-in /

	332 unicode-out.)

	333

	334 Keyword arguments:

	335

	336 * input: File object or path. Reads from stdin if `None`.

	337 * output: File object or path. Writes to stdout if `None`.

	338 * encoding: Encoding of input and output files. Defaults to utf-8.

	339

	340 """

	341

	342 encoding = encoding or "utf-8"

	343

	344 # Read the source

	345 if input:

	346 if isinstance(input, util.string_type):

	347 input_file = codecs.open(input, mode="r", encoding=encoding)

	348 else:

	349 input_file = codecs.getreader(encoding)(input)

	350 text = input_file.read()

	351 input_file.close()

	352 else:

	353 text = sys.stdin.read()

	354 if not isinstance(text, util.text_type):

	355 text = text.decode(encoding)

	356

	357 text = text.lstrip('\ufeff') # remove the byte-order mark

	358

	359 # Convert

	360 html = self.convert(text)

	361

	362 # Write to file or stdout

	363 if output:

	364 if isinstance(output, util.string_type):

	365 output_file = codecs.open(output, "w",

	366 encoding=encoding,

	367 errors="xmlcharrefreplace")

	368 output_file.write(html)

	369 output_file.close()

	370 else:

	371 writer = codecs.getwriter(encoding)

	372 output_file = writer(output, errors="xmlcharrefreplace")

	373 output_file.write(html)

	374 # Don't close here. User may want to write more.

	375 else:

	376 # Encode manually and write bytes to stdout.

	377 html = html.encode(encoding, "xmlcharrefreplace")

	378 try:

	379 # Write bytes directly to buffer (Python 3).

	380 sys.stdout.buffer.write(html)

	381 except AttributeError:

	382 # Probably Python 2, which works with bytes by default.

	383 sys.stdout.write(html)

	384

	385 return self

	386

	387

	388 """

	389 EXPORTED FUNCTIONS

	390 =============================================================================

	391

	392 Those are the two functions we really mean to export: markdown() and

	393 markdownFromFile().

	394 """

	395

	396 def markdown(text, args, *kwargs):

	397 """Convert a markdown string to HTML and return HTML as a unicode string.

	398

	399 This is a shortcut function for `Markdown` class to cover the most

	400 basic use case. It initializes an instance of Markdown, loads the

	401 necessary extensions and runs the parser on the given text.

	402

	403 Keyword arguments:

	404

	405 * text: Markdown formatted text as Unicode or ASCII string.

	406 * Any arguments accepted by the Markdown class.

	407

	408 Returns: An HTML document as a string.

	409

	410 """

	411 md = Markdown(args, *kwargs)

	412 return md.convert(text)

	413

	414

	415 def markdownFromFile(args, *kwargs):

	416 """Read markdown code from a file and write it to a file or a stream.

	417

	418 This is a shortcut function which initializes an instance of Markdown,

	419 and calls the convertFile method rather than convert.

	420

	421 Keyword arguments:

	422

	423 * input: a file name or readable object.

	424 * output: a file name or writable object.

	425 * encoding: Encoding of input and output.

	426 * Any arguments accepted by the Markdown class.

	427

	428 """

	429 # For backward compatibility loop through positional args

	430 pos = ['input', 'output', 'extensions', 'encoding']

	431 c = 0

	432 for arg in args:

	433 if pos[c] not in kwargs:

	434 kwargs[pos[c]] = arg

	435 c += 1

	436 if c == len(pos):

	437 break

	438

	439 md = Markdown(**kwargs)

	440 md.convertFile(kwargs.get('input', None),

	441 kwargs.get('output', None),

	442 kwargs.get('encoding', None))

	443

OLD	NEW

Side by Side Diff: third_party/markdown/__init__.py

Side by Side Diff: third_party/markdown/init.py