third_party/Python-Markdown/markdown/__init__.py - Issue 1392733002: Re-land "Check in a simple pure-python based Markdown previewer."

Side by Side Diff: third_party/Python-Markdown/markdown/init.py

Issue 1392733002: Re-land "Check in a simple pure-python based Markdown previewer." (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: clarify comment re: licenses, add bug #, use --no-find-copies Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 """

	2 Python Markdown

	3 ===============

	4

	5 Python Markdown converts Markdown to HTML and can be used as a library or

	6 called from the command line.

	7

	8 ## Basic usage as a module:

	9

	10 import markdown

	11 html = markdown.markdown(your_text_string)

	12

	13 See <https://pythonhosted.org/Markdown/> for more

	14 information and instructions on how to extend the functionality of

	15 Python Markdown. Read that before you try modifying this file.

	16

	17 ## Authors and License

	18

	19 Started by [Manfred Stienstra](http://www.dwerg.net/). Continued and

	20 maintained by [Yuri Takhteyev](http://www.freewisdom.org), [Waylan

	21 Limberg](http://achinghead.com/) and [Artem Yunusov](http://blog.splyer.com).

	22

	23 Contact: markdown@freewisdom.org

	24

	25 Copyright 2007-2013 The Python Markdown Project (v. 1.7 and later)

	26 Copyright 200? Django Software Foundation (OrderedDict implementation)

	27 Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)

	28 Copyright 2004 Manfred Stienstra (the original version)

	29

	30 License: BSD (see LICENSE for details).

	31 """

	32

	33 from __future__ import absolute_import

	34 from __future__ import unicode_literals

	35 from .__version__ import version, version_info # noqa

	36 import codecs

	37 import sys

	38 import logging

	39 import warnings

	40 import importlib

	41 from . import util

	42 from .preprocessors import build_preprocessors

	43 from .blockprocessors import build_block_parser

	44 from .treeprocessors import build_treeprocessors

	45 from .inlinepatterns import build_inlinepatterns

	46 from .postprocessors import build_postprocessors

	47 from .extensions import Extension

	48 from .serializers import to_html_string, to_xhtml_string

	49

	50 __all__ = ['Markdown', 'markdown', 'markdownFromFile']

	51

	52

	53 logger = logging.getLogger('MARKDOWN')

	54

	55

	56 class Markdown(object):

	57 """Convert Markdown to HTML."""

	58

	59 doc_tag = "div" # Element used to wrap document - later removed

	60

	61 option_defaults = {

	62 'html_replacement_text': '[HTML_REMOVED]',

	63 'tab_length': 4,

	64 'enable_attributes': True,

	65 'smart_emphasis': True,

	66 'lazy_ol': True,

	67 }

	68

	69 output_formats = {

	70 'html': to_html_string,

	71 'html4': to_html_string,

	72 'html5': to_html_string,

	73 'xhtml': to_xhtml_string,

	74 'xhtml1': to_xhtml_string,

	75 'xhtml5': to_xhtml_string,

	76 }

	77

	78 ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',

	79 '(', ')', '>', '#', '+', '-', '.', '!']

	80

	81 def __init__(self, args, *kwargs):

	82 """

	83 Creates a new Markdown instance.

	84

	85 Keyword arguments:

	86

	87 * extensions: A list of extensions.

	88 If they are of type string, the module mdx_name.py will be loaded.

	89 If they are a subclass of markdown.Extension, they will be used

	90 as-is.

	91 * extension_configs: Configuration settings for extensions.

	92 * output_format: Format of output. Supported formats are:

	93 * "xhtml1": Outputs XHTML 1.x. Default.

	94 * "xhtml5": Outputs XHTML style tags of HTML 5

	95 * "xhtml": Outputs latest supported version of XHTML

	96 (currently XHTML 1.1).

	97 * "html4": Outputs HTML 4

	98 * "html5": Outputs HTML style tags of HTML 5

	99 * "html": Outputs latest supported version of HTML

	100 (currently HTML 4).

	101 Note that it is suggested that the more specific formats ("xhtml1"

	102 and "html4") be used as "xhtml" or "html" may change in the future

	103 if it makes sense at that time.

	104 * safe_mode: Deprecated! Disallow raw html. One of "remove", "replace"

	105 or "escape".

	106 * html_replacement_text: Deprecated! Text used when safe_mode is set

	107 to "replace".

	108 * tab_length: Length of tabs in the source. Default: 4

	109 * enable_attributes: Enable the conversion of attributes. Default: True

	110 * smart_emphasis: Treat `_connected_words_` intelligently Default: True

	111 * lazy_ol: Ignore number of first item of ordered lists. Default: True

	112

	113 """

	114

	115 # For backward compatibility, loop through old positional args

	116 pos = ['extensions', 'extension_configs', 'safe_mode', 'output_format']

	117 for c, arg in enumerate(args):

	118 if pos[c] not in kwargs:

	119 kwargs[pos[c]] = arg

	120 if c+1 == len(pos): # pragma: no cover

	121 # ignore any additional args

	122 break

	123 if len(args):

	124 warnings.warn('Positional arguments are deprecated in Markdown. '

	125 'Use keyword arguments only.',

	126 DeprecationWarning)

	127

	128 # Loop through kwargs and assign defaults

	129 for option, default in self.option_defaults.items():

	130 setattr(self, option, kwargs.get(option, default))

	131

	132 self.safeMode = kwargs.get('safe_mode', False)

	133 if self.safeMode and 'enable_attributes' not in kwargs:

	134 # Disable attributes in safeMode when not explicitly set

	135 self.enable_attributes = False

	136

	137 if 'safe_mode' in kwargs:

	138 warnings.warn('"safe_mode" is deprecated in Python-Markdown. '

	139 'Use an HTML sanitizer (like '

	140 'Bleach http://bleach.readthedocs.org/) '

	141 'if you are parsing untrusted markdown text. '

	142 'See the 2.6 release notes for more info',

	143 DeprecationWarning)

	144

	145 if 'html_replacement_text' in kwargs:

	146 warnings.warn('The "html_replacement_text" keyword is '

	147 'deprecated along with "safe_mode".',

	148 DeprecationWarning)

	149

	150 self.registeredExtensions = []

	151 self.docType = ""

	152 self.stripTopLevelTags = True

	153

	154 self.build_parser()

	155

	156 self.references = {}

	157 self.htmlStash = util.HtmlStash()

	158 self.registerExtensions(extensions=kwargs.get('extensions', []),

	159 configs=kwargs.get('extension_configs', {}))

	160 self.set_output_format(kwargs.get('output_format', 'xhtml1'))

	161 self.reset()

	162

	163 def build_parser(self):

	164 """ Build the parser from the various parts. """

	165 self.preprocessors = build_preprocessors(self)

	166 self.parser = build_block_parser(self)

	167 self.inlinePatterns = build_inlinepatterns(self)

	168 self.treeprocessors = build_treeprocessors(self)

	169 self.postprocessors = build_postprocessors(self)

	170 return self

	171

	172 def registerExtensions(self, extensions, configs):

	173 """

	174 Register extensions with this instance of Markdown.

	175

	176 Keyword arguments:

	177

	178 * extensions: A list of extensions, which can either

	179 be strings or objects. See the docstring on Markdown.

	180 * configs: A dictionary mapping module names to config options.

	181

	182 """

	183 for ext in extensions:

	184 if isinstance(ext, util.string_type):

	185 ext = self.build_extension(ext, configs.get(ext, {}))

	186 if isinstance(ext, Extension):

	187 ext.extendMarkdown(self, globals())

	188 logger.debug(

	189 'Successfully loaded extension "%s.%s".'

	190 % (ext.__class__.__module__, ext.__class__.__name__)

	191 )

	192 elif ext is not None:

	193 raise TypeError(

	194 'Extension "%s.%s" must be of type: "markdown.Extension"'

	195 % (ext.__class__.__module__, ext.__class__.__name__))

	196

	197 return self

	198

	199 def build_extension(self, ext_name, configs):

	200 """Build extension by name, then return the module.

	201

	202 The extension name may contain arguments as part of the string in the

	203 following format: "extname(key1=value1,key2=value2)"

	204

	205 """

	206

	207 configs = dict(configs)

	208

	209 # Parse extensions config params (ignore the order)

	210 pos = ext_name.find("(") # find the first "("

	211 if pos > 0:

	212 ext_args = ext_name[pos+1:-1]

	213 ext_name = ext_name[:pos]

	214 pairs = [x.split("=") for x in ext_args.split(",")]

	215 configs.update([(x.strip(), y.strip()) for (x, y) in pairs])

	216 warnings.warn('Setting configs in the Named Extension string is '

	217 'deprecated. It is recommended that you '

	218 'pass an instance of the extension class to '

	219 'Markdown or use the "extension_configs" keyword. '

	220 'The current behavior will raise an error in version 2 .7. '

	221 'See the Release Notes for Python-Markdown version '

	222 '2.6 for more info.', DeprecationWarning)

	223

	224 # Get class name (if provided): `path.to.module:ClassName`

	225 ext_name, class_name = ext_name.split(':', 1) \

	226 if ':' in ext_name else (ext_name, '')

	227

	228 # Try loading the extension first from one place, then another

	229 try:

	230 # Assume string uses dot syntax (`path.to.some.module`)

	231 module = importlib.import_module(ext_name)

	232 logger.debug(

	233 'Successfuly imported extension module "%s".' % ext_name

	234 )

	235 # For backward compat (until deprecation)

	236 # check that this is an extension.

	237 if ('.' not in ext_name and not (hasattr(module, 'makeExtension') or

	238 (class_name and hasattr(module, class_name)))):

	239 # We have a name conflict

	240 # eg: extensions=['tables'] and PyTables is installed

	241 raise ImportError

	242 except ImportError:

	243 # Preppend `markdown.extensions.` to name

	244 module_name = '.'.join(['markdown.extensions', ext_name])

	245 try:

	246 module = importlib.import_module(module_name)

	247 logger.debug(

	248 'Successfuly imported extension module "%s".' %

	249 module_name

	250 )

	251 warnings.warn('Using short names for Markdown\'s builtin '

	252 'extensions is deprecated. Use the '

	253 'full path to the extension with Python\'s dot '

	254 'notation (eg: "%s" instead of "%s"). The '

	255 'current behavior will raise an error in version '

	256 '2.7. See the Release Notes for '

	257 'Python-Markdown version 2.6 for more info.' %

	258 (module_name, ext_name),

	259 DeprecationWarning)

	260 except ImportError:

	261 # Preppend `mdx_` to name

	262 module_name_old_style = '_'.join(['mdx', ext_name])

	263 try:

	264 module = importlib.import_module(module_name_old_style)

	265 logger.debug(

	266 'Successfuly imported extension module "%s".' %

	267 module_name_old_style)

	268 warnings.warn('Markdown\'s behavior of prepending "mdx_" '

	269 'to an extension name is deprecated. '

	270 'Use the full path to the '

	271 'extension with Python\'s dot notation '

	272 '(eg: "%s" instead of "%s"). The current '

	273 'behavior will raise an error in version 2.7. '

	274 'See the Release Notes for Python-Markdown '

	275 'version 2.6 for more info.' %

	276 (module_name_old_style, ext_name),

	277 DeprecationWarning)

	278 except ImportError as e:

	279 message = "Failed loading extension '%s' from '%s', '%s' " \

	280 "or '%s'" % (ext_name, ext_name, module_name,

	281 module_name_old_style)

	282 e.args = (message,) + e.args[1:]

	283 raise

	284

	285 if class_name:

	286 # Load given class name from module.

	287 return getattr(module, class_name)(**configs)

	288 else:

	289 # Expect makeExtension() function to return a class.

	290 try:

	291 return module.makeExtension(**configs)

	292 except AttributeError as e:

	293 message = e.args[0]

	294 message = "Failed to initiate extension " \

	295 "'%s': %s" % (ext_name, message)

	296 e.args = (message,) + e.args[1:]

	297 raise

	298

	299 def registerExtension(self, extension):

	300 """ This gets called by the extension """

	301 self.registeredExtensions.append(extension)

	302 return self

	303

	304 def reset(self):

	305 """

	306 Resets all state variables so that we can start with a new text.

	307 """

	308 self.htmlStash.reset()

	309 self.references.clear()

	310

	311 for extension in self.registeredExtensions:

	312 if hasattr(extension, 'reset'):

	313 extension.reset()

	314

	315 return self

	316

	317 def set_output_format(self, format):

	318 """ Set the output format for the class instance. """

	319 self.output_format = format.lower()

	320 try:

	321 self.serializer = self.output_formats[self.output_format]

	322 except KeyError as e:

	323 valid_formats = list(self.output_formats.keys())

	324 valid_formats.sort()

	325 message = 'Invalid Output Format: "%s". Use one of %s.' \

	326 % (self.output_format,

	327 '"' + '", "'.join(valid_formats) + '"')

	328 e.args = (message,) + e.args[1:]

	329 raise

	330 return self

	331

	332 def convert(self, source):

	333 """

	334 Convert markdown to serialized XHTML or HTML.

	335

	336 Keyword arguments:

	337

	338 * source: Source text as a Unicode string.

	339

	340 Markdown processing takes place in five steps:

	341

	342 1. A bunch of "preprocessors" munge the input text.

	343 2. BlockParser() parses the high-level structural elements of the

	344 pre-processed text into an ElementTree.

	345 3. A bunch of "treeprocessors" are run against the ElementTree. One

	346 such treeprocessor runs InlinePatterns against the ElementTree,

	347 detecting inline markup.

	348 4. Some post-processors are run against the text after the ElementTree

	349 has been serialized into text.

	350 5. The output is written to a string.

	351

	352 """

	353

	354 # Fixup the source text

	355 if not source.strip():

	356 return '' # a blank unicode string

	357

	358 try:

	359 source = util.text_type(source)

	360 except UnicodeDecodeError as e:

	361 # Customise error message while maintaining original trackback

	362 e.reason += '. -- Note: Markdown only accepts unicode input!'

	363 raise

	364

	365 # Split into lines and run the line preprocessors.

	366 self.lines = source.split("\n")

	367 for prep in self.preprocessors.values():

	368 self.lines = prep.run(self.lines)

	369

	370 # Parse the high-level elements.

	371 root = self.parser.parseDocument(self.lines).getroot()

	372

	373 # Run the tree-processors

	374 for treeprocessor in self.treeprocessors.values():

	375 newRoot = treeprocessor.run(root)

	376 if newRoot is not None:

	377 root = newRoot

	378

	379 # Serialize _properly_. Strip top-level tags.

	380 output = self.serializer(root)

	381 if self.stripTopLevelTags:

	382 try:

	383 start = output.index(

	384 '<%s>' % self.doc_tag) + len(self.doc_tag) + 2

	385 end = output.rindex('</%s>' % self.doc_tag)

	386 output = output[start:end].strip()

	387 except ValueError: # pragma: no cover

	388 if output.strip().endswith('<%s />' % self.doc_tag):

	389 # We have an empty document

	390 output = ''

	391 else:

	392 # We have a serious problem

	393 raise ValueError('Markdown failed to strip top-level '

	394 'tags. Document=%r' % output.strip())

	395

	396 # Run the text post-processors

	397 for pp in self.postprocessors.values():

	398 output = pp.run(output)

	399

	400 return output.strip()

	401

	402 def convertFile(self, input=None, output=None, encoding=None):

	403 """Converts a Markdown file and returns the HTML as a Unicode string.

	404

	405 Decodes the file using the provided encoding (defaults to utf-8),

	406 passes the file content to markdown, and outputs the html to either

	407 the provided stream or the file with provided name, using the same

	408 encoding as the source file. The 'xmlcharrefreplace' error handler is

	409 used when encoding the output.

	410

	411 Note: This is the only place that decoding and encoding of Unicode

	412 takes place in Python-Markdown. (All other code is Unicode-in /

	413 Unicode-out.)

	414

	415 Keyword arguments:

	416

	417 * input: File object or path. Reads from stdin if `None`.

	418 * output: File object or path. Writes to stdout if `None`.

	419 * encoding: Encoding of input and output files. Defaults to utf-8.

	420

	421 """

	422

	423 encoding = encoding or "utf-8"

	424

	425 # Read the source

	426 if input:

	427 if isinstance(input, util.string_type):

	428 input_file = codecs.open(input, mode="r", encoding=encoding)

	429 else:

	430 input_file = codecs.getreader(encoding)(input)

	431 text = input_file.read()

	432 input_file.close()

	433 else:

	434 text = sys.stdin.read()

	435 if not isinstance(text, util.text_type):

	436 text = text.decode(encoding)

	437

	438 text = text.lstrip('\ufeff') # remove the byte-order mark

	439

	440 # Convert

	441 html = self.convert(text)

	442

	443 # Write to file or stdout

	444 if output:

	445 if isinstance(output, util.string_type):

	446 output_file = codecs.open(output, "w",

	447 encoding=encoding,

	448 errors="xmlcharrefreplace")

	449 output_file.write(html)

	450 output_file.close()

	451 else:

	452 writer = codecs.getwriter(encoding)

	453 output_file = writer(output, errors="xmlcharrefreplace")

	454 output_file.write(html)

	455 # Don't close here. User may want to write more.

	456 else:

	457 # Encode manually and write bytes to stdout.

	458 html = html.encode(encoding, "xmlcharrefreplace")

	459 try:

	460 # Write bytes directly to buffer (Python 3).

	461 sys.stdout.buffer.write(html)

	462 except AttributeError:

	463 # Probably Python 2, which works with bytes by default.

	464 sys.stdout.write(html)

	465

	466 return self

	467

	468

	469 """

	470 EXPORTED FUNCTIONS

	471 =============================================================================

	472

	473 Those are the two functions we really mean to export: markdown() and

	474 markdownFromFile().

	475 """

	476

	477

	478 def markdown(text, args, *kwargs):

	479 """Convert a Markdown string to HTML and return HTML as a Unicode string.

	480

	481 This is a shortcut function for `Markdown` class to cover the most

	482 basic use case. It initializes an instance of Markdown, loads the

	483 necessary extensions and runs the parser on the given text.

	484

	485 Keyword arguments:

	486

	487 * text: Markdown formatted text as Unicode or ASCII string.

	488 * Any arguments accepted by the Markdown class.

	489

	490 Returns: An HTML document as a string.

	491

	492 """

	493 md = Markdown(args, *kwargs)

	494 return md.convert(text)

	495

	496

	497 def markdownFromFile(args, *kwargs):

	498 """Read markdown code from a file and write it to a file or a stream.

	499

	500 This is a shortcut function which initializes an instance of Markdown,

	501 and calls the convertFile method rather than convert.

	502

	503 Keyword arguments:

	504

	505 * input: a file name or readable object.

	506 * output: a file name or writable object.

	507 * encoding: Encoding of input and output.

	508 * Any arguments accepted by the Markdown class.

	509

	510 """

	511 # For backward compatibility loop through positional args

	512 pos = ['input', 'output', 'extensions', 'encoding']

	513 c = 0

	514 for arg in args:

	515 if pos[c] not in kwargs:

	516 kwargs[pos[c]] = arg

	517 c += 1

	518 if c == len(pos):

	519 break

	520 if len(args):

	521 warnings.warn('Positional arguments are depreacted in '

	522 'Markdown and will raise an error in version 2.7. '

	523 'Use keyword arguments only.',

	524 DeprecationWarning)

	525

	526 md = Markdown(**kwargs)

	527 md.convertFile(kwargs.get('input', None),

	528 kwargs.get('output', None),

	529 kwargs.get('encoding', None))

OLD	NEW

« no previous file with comments | « third_party/Python-Markdown/README.md ('k') | third_party/Python-Markdown/markdown/__main__.py » ('j') | no next file with comments »

Side by Side Diff: third_party/Python-Markdown/markdown/__init__.py

Side by Side Diff: third_party/Python-Markdown/markdown/init.py