Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: third_party/Python-Markdown/markdown/__init__.py

Issue 1389543003: Revert of Check in a simple pure-python based Markdown previewer. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@add
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/Python-Markdown/README.md ('k') | third_party/Python-Markdown/markdown/__main__.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 """
2 Python Markdown
3 ===============
4
5 Python Markdown converts Markdown to HTML and can be used as a library or
6 called from the command line.
7
8 ## Basic usage as a module:
9
10 import markdown
11 html = markdown.markdown(your_text_string)
12
13 See <https://pythonhosted.org/Markdown/> for more
14 information and instructions on how to extend the functionality of
15 Python Markdown. Read that before you try modifying this file.
16
17 ## Authors and License
18
19 Started by [Manfred Stienstra](http://www.dwerg.net/). Continued and
20 maintained by [Yuri Takhteyev](http://www.freewisdom.org), [Waylan
21 Limberg](http://achinghead.com/) and [Artem Yunusov](http://blog.splyer.com).
22
23 Contact: markdown@freewisdom.org
24
25 Copyright 2007-2013 The Python Markdown Project (v. 1.7 and later)
26 Copyright 200? Django Software Foundation (OrderedDict implementation)
27 Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
28 Copyright 2004 Manfred Stienstra (the original version)
29
30 License: BSD (see LICENSE for details).
31 """
32
33 from __future__ import absolute_import
34 from __future__ import unicode_literals
35 from .__version__ import version, version_info # noqa
36 import codecs
37 import sys
38 import logging
39 import warnings
40 import importlib
41 from . import util
42 from .preprocessors import build_preprocessors
43 from .blockprocessors import build_block_parser
44 from .treeprocessors import build_treeprocessors
45 from .inlinepatterns import build_inlinepatterns
46 from .postprocessors import build_postprocessors
47 from .extensions import Extension
48 from .serializers import to_html_string, to_xhtml_string
49
50 __all__ = ['Markdown', 'markdown', 'markdownFromFile']
51
52
53 logger = logging.getLogger('MARKDOWN')
54
55
56 class Markdown(object):
57 """Convert Markdown to HTML."""
58
59 doc_tag = "div" # Element used to wrap document - later removed
60
61 option_defaults = {
62 'html_replacement_text': '[HTML_REMOVED]',
63 'tab_length': 4,
64 'enable_attributes': True,
65 'smart_emphasis': True,
66 'lazy_ol': True,
67 }
68
69 output_formats = {
70 'html': to_html_string,
71 'html4': to_html_string,
72 'html5': to_html_string,
73 'xhtml': to_xhtml_string,
74 'xhtml1': to_xhtml_string,
75 'xhtml5': to_xhtml_string,
76 }
77
78 ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',
79 '(', ')', '>', '#', '+', '-', '.', '!']
80
81 def __init__(self, *args, **kwargs):
82 """
83 Creates a new Markdown instance.
84
85 Keyword arguments:
86
87 * extensions: A list of extensions.
88 If they are of type string, the module mdx_name.py will be loaded.
89 If they are a subclass of markdown.Extension, they will be used
90 as-is.
91 * extension_configs: Configuration settings for extensions.
92 * output_format: Format of output. Supported formats are:
93 * "xhtml1": Outputs XHTML 1.x. Default.
94 * "xhtml5": Outputs XHTML style tags of HTML 5
95 * "xhtml": Outputs latest supported version of XHTML
96 (currently XHTML 1.1).
97 * "html4": Outputs HTML 4
98 * "html5": Outputs HTML style tags of HTML 5
99 * "html": Outputs latest supported version of HTML
100 (currently HTML 4).
101 Note that it is suggested that the more specific formats ("xhtml1"
102 and "html4") be used as "xhtml" or "html" may change in the future
103 if it makes sense at that time.
104 * safe_mode: Deprecated! Disallow raw html. One of "remove", "replace"
105 or "escape".
106 * html_replacement_text: Deprecated! Text used when safe_mode is set
107 to "replace".
108 * tab_length: Length of tabs in the source. Default: 4
109 * enable_attributes: Enable the conversion of attributes. Default: True
110 * smart_emphasis: Treat `_connected_words_` intelligently Default: True
111 * lazy_ol: Ignore number of first item of ordered lists. Default: True
112
113 """
114
115 # For backward compatibility, loop through old positional args
116 pos = ['extensions', 'extension_configs', 'safe_mode', 'output_format']
117 for c, arg in enumerate(args):
118 if pos[c] not in kwargs:
119 kwargs[pos[c]] = arg
120 if c+1 == len(pos): # pragma: no cover
121 # ignore any additional args
122 break
123 if len(args):
124 warnings.warn('Positional arguments are deprecated in Markdown. '
125 'Use keyword arguments only.',
126 DeprecationWarning)
127
128 # Loop through kwargs and assign defaults
129 for option, default in self.option_defaults.items():
130 setattr(self, option, kwargs.get(option, default))
131
132 self.safeMode = kwargs.get('safe_mode', False)
133 if self.safeMode and 'enable_attributes' not in kwargs:
134 # Disable attributes in safeMode when not explicitly set
135 self.enable_attributes = False
136
137 if 'safe_mode' in kwargs:
138 warnings.warn('"safe_mode" is deprecated in Python-Markdown. '
139 'Use an HTML sanitizer (like '
140 'Bleach http://bleach.readthedocs.org/) '
141 'if you are parsing untrusted markdown text. '
142 'See the 2.6 release notes for more info',
143 DeprecationWarning)
144
145 if 'html_replacement_text' in kwargs:
146 warnings.warn('The "html_replacement_text" keyword is '
147 'deprecated along with "safe_mode".',
148 DeprecationWarning)
149
150 self.registeredExtensions = []
151 self.docType = ""
152 self.stripTopLevelTags = True
153
154 self.build_parser()
155
156 self.references = {}
157 self.htmlStash = util.HtmlStash()
158 self.registerExtensions(extensions=kwargs.get('extensions', []),
159 configs=kwargs.get('extension_configs', {}))
160 self.set_output_format(kwargs.get('output_format', 'xhtml1'))
161 self.reset()
162
163 def build_parser(self):
164 """ Build the parser from the various parts. """
165 self.preprocessors = build_preprocessors(self)
166 self.parser = build_block_parser(self)
167 self.inlinePatterns = build_inlinepatterns(self)
168 self.treeprocessors = build_treeprocessors(self)
169 self.postprocessors = build_postprocessors(self)
170 return self
171
172 def registerExtensions(self, extensions, configs):
173 """
174 Register extensions with this instance of Markdown.
175
176 Keyword arguments:
177
178 * extensions: A list of extensions, which can either
179 be strings or objects. See the docstring on Markdown.
180 * configs: A dictionary mapping module names to config options.
181
182 """
183 for ext in extensions:
184 if isinstance(ext, util.string_type):
185 ext = self.build_extension(ext, configs.get(ext, {}))
186 if isinstance(ext, Extension):
187 ext.extendMarkdown(self, globals())
188 logger.debug(
189 'Successfully loaded extension "%s.%s".'
190 % (ext.__class__.__module__, ext.__class__.__name__)
191 )
192 elif ext is not None:
193 raise TypeError(
194 'Extension "%s.%s" must be of type: "markdown.Extension"'
195 % (ext.__class__.__module__, ext.__class__.__name__))
196
197 return self
198
199 def build_extension(self, ext_name, configs):
200 """Build extension by name, then return the module.
201
202 The extension name may contain arguments as part of the string in the
203 following format: "extname(key1=value1,key2=value2)"
204
205 """
206
207 configs = dict(configs)
208
209 # Parse extensions config params (ignore the order)
210 pos = ext_name.find("(") # find the first "("
211 if pos > 0:
212 ext_args = ext_name[pos+1:-1]
213 ext_name = ext_name[:pos]
214 pairs = [x.split("=") for x in ext_args.split(",")]
215 configs.update([(x.strip(), y.strip()) for (x, y) in pairs])
216 warnings.warn('Setting configs in the Named Extension string is '
217 'deprecated. It is recommended that you '
218 'pass an instance of the extension class to '
219 'Markdown or use the "extension_configs" keyword. '
220 'The current behavior will raise an error in version 2 .7. '
221 'See the Release Notes for Python-Markdown version '
222 '2.6 for more info.', DeprecationWarning)
223
224 # Get class name (if provided): `path.to.module:ClassName`
225 ext_name, class_name = ext_name.split(':', 1) \
226 if ':' in ext_name else (ext_name, '')
227
228 # Try loading the extension first from one place, then another
229 try:
230 # Assume string uses dot syntax (`path.to.some.module`)
231 module = importlib.import_module(ext_name)
232 logger.debug(
233 'Successfuly imported extension module "%s".' % ext_name
234 )
235 # For backward compat (until deprecation)
236 # check that this is an extension.
237 if ('.' not in ext_name and not (hasattr(module, 'makeExtension') or
238 (class_name and hasattr(module, class_name)))):
239 # We have a name conflict
240 # eg: extensions=['tables'] and PyTables is installed
241 raise ImportError
242 except ImportError:
243 # Preppend `markdown.extensions.` to name
244 module_name = '.'.join(['markdown.extensions', ext_name])
245 try:
246 module = importlib.import_module(module_name)
247 logger.debug(
248 'Successfuly imported extension module "%s".' %
249 module_name
250 )
251 warnings.warn('Using short names for Markdown\'s builtin '
252 'extensions is deprecated. Use the '
253 'full path to the extension with Python\'s dot '
254 'notation (eg: "%s" instead of "%s"). The '
255 'current behavior will raise an error in version '
256 '2.7. See the Release Notes for '
257 'Python-Markdown version 2.6 for more info.' %
258 (module_name, ext_name),
259 DeprecationWarning)
260 except ImportError:
261 # Preppend `mdx_` to name
262 module_name_old_style = '_'.join(['mdx', ext_name])
263 try:
264 module = importlib.import_module(module_name_old_style)
265 logger.debug(
266 'Successfuly imported extension module "%s".' %
267 module_name_old_style)
268 warnings.warn('Markdown\'s behavior of prepending "mdx_" '
269 'to an extension name is deprecated. '
270 'Use the full path to the '
271 'extension with Python\'s dot notation '
272 '(eg: "%s" instead of "%s"). The current '
273 'behavior will raise an error in version 2.7. '
274 'See the Release Notes for Python-Markdown '
275 'version 2.6 for more info.' %
276 (module_name_old_style, ext_name),
277 DeprecationWarning)
278 except ImportError as e:
279 message = "Failed loading extension '%s' from '%s', '%s' " \
280 "or '%s'" % (ext_name, ext_name, module_name,
281 module_name_old_style)
282 e.args = (message,) + e.args[1:]
283 raise
284
285 if class_name:
286 # Load given class name from module.
287 return getattr(module, class_name)(**configs)
288 else:
289 # Expect makeExtension() function to return a class.
290 try:
291 return module.makeExtension(**configs)
292 except AttributeError as e:
293 message = e.args[0]
294 message = "Failed to initiate extension " \
295 "'%s': %s" % (ext_name, message)
296 e.args = (message,) + e.args[1:]
297 raise
298
299 def registerExtension(self, extension):
300 """ This gets called by the extension """
301 self.registeredExtensions.append(extension)
302 return self
303
304 def reset(self):
305 """
306 Resets all state variables so that we can start with a new text.
307 """
308 self.htmlStash.reset()
309 self.references.clear()
310
311 for extension in self.registeredExtensions:
312 if hasattr(extension, 'reset'):
313 extension.reset()
314
315 return self
316
317 def set_output_format(self, format):
318 """ Set the output format for the class instance. """
319 self.output_format = format.lower()
320 try:
321 self.serializer = self.output_formats[self.output_format]
322 except KeyError as e:
323 valid_formats = list(self.output_formats.keys())
324 valid_formats.sort()
325 message = 'Invalid Output Format: "%s". Use one of %s.' \
326 % (self.output_format,
327 '"' + '", "'.join(valid_formats) + '"')
328 e.args = (message,) + e.args[1:]
329 raise
330 return self
331
332 def convert(self, source):
333 """
334 Convert markdown to serialized XHTML or HTML.
335
336 Keyword arguments:
337
338 * source: Source text as a Unicode string.
339
340 Markdown processing takes place in five steps:
341
342 1. A bunch of "preprocessors" munge the input text.
343 2. BlockParser() parses the high-level structural elements of the
344 pre-processed text into an ElementTree.
345 3. A bunch of "treeprocessors" are run against the ElementTree. One
346 such treeprocessor runs InlinePatterns against the ElementTree,
347 detecting inline markup.
348 4. Some post-processors are run against the text after the ElementTree
349 has been serialized into text.
350 5. The output is written to a string.
351
352 """
353
354 # Fixup the source text
355 if not source.strip():
356 return '' # a blank unicode string
357
358 try:
359 source = util.text_type(source)
360 except UnicodeDecodeError as e:
361 # Customise error message while maintaining original trackback
362 e.reason += '. -- Note: Markdown only accepts unicode input!'
363 raise
364
365 # Split into lines and run the line preprocessors.
366 self.lines = source.split("\n")
367 for prep in self.preprocessors.values():
368 self.lines = prep.run(self.lines)
369
370 # Parse the high-level elements.
371 root = self.parser.parseDocument(self.lines).getroot()
372
373 # Run the tree-processors
374 for treeprocessor in self.treeprocessors.values():
375 newRoot = treeprocessor.run(root)
376 if newRoot is not None:
377 root = newRoot
378
379 # Serialize _properly_. Strip top-level tags.
380 output = self.serializer(root)
381 if self.stripTopLevelTags:
382 try:
383 start = output.index(
384 '<%s>' % self.doc_tag) + len(self.doc_tag) + 2
385 end = output.rindex('</%s>' % self.doc_tag)
386 output = output[start:end].strip()
387 except ValueError: # pragma: no cover
388 if output.strip().endswith('<%s />' % self.doc_tag):
389 # We have an empty document
390 output = ''
391 else:
392 # We have a serious problem
393 raise ValueError('Markdown failed to strip top-level '
394 'tags. Document=%r' % output.strip())
395
396 # Run the text post-processors
397 for pp in self.postprocessors.values():
398 output = pp.run(output)
399
400 return output.strip()
401
402 def convertFile(self, input=None, output=None, encoding=None):
403 """Converts a Markdown file and returns the HTML as a Unicode string.
404
405 Decodes the file using the provided encoding (defaults to utf-8),
406 passes the file content to markdown, and outputs the html to either
407 the provided stream or the file with provided name, using the same
408 encoding as the source file. The 'xmlcharrefreplace' error handler is
409 used when encoding the output.
410
411 **Note:** This is the only place that decoding and encoding of Unicode
412 takes place in Python-Markdown. (All other code is Unicode-in /
413 Unicode-out.)
414
415 Keyword arguments:
416
417 * input: File object or path. Reads from stdin if `None`.
418 * output: File object or path. Writes to stdout if `None`.
419 * encoding: Encoding of input and output files. Defaults to utf-8.
420
421 """
422
423 encoding = encoding or "utf-8"
424
425 # Read the source
426 if input:
427 if isinstance(input, util.string_type):
428 input_file = codecs.open(input, mode="r", encoding=encoding)
429 else:
430 input_file = codecs.getreader(encoding)(input)
431 text = input_file.read()
432 input_file.close()
433 else:
434 text = sys.stdin.read()
435 if not isinstance(text, util.text_type):
436 text = text.decode(encoding)
437
438 text = text.lstrip('\ufeff') # remove the byte-order mark
439
440 # Convert
441 html = self.convert(text)
442
443 # Write to file or stdout
444 if output:
445 if isinstance(output, util.string_type):
446 output_file = codecs.open(output, "w",
447 encoding=encoding,
448 errors="xmlcharrefreplace")
449 output_file.write(html)
450 output_file.close()
451 else:
452 writer = codecs.getwriter(encoding)
453 output_file = writer(output, errors="xmlcharrefreplace")
454 output_file.write(html)
455 # Don't close here. User may want to write more.
456 else:
457 # Encode manually and write bytes to stdout.
458 html = html.encode(encoding, "xmlcharrefreplace")
459 try:
460 # Write bytes directly to buffer (Python 3).
461 sys.stdout.buffer.write(html)
462 except AttributeError:
463 # Probably Python 2, which works with bytes by default.
464 sys.stdout.write(html)
465
466 return self
467
468
469 """
470 EXPORTED FUNCTIONS
471 =============================================================================
472
473 Those are the two functions we really mean to export: markdown() and
474 markdownFromFile().
475 """
476
477
478 def markdown(text, *args, **kwargs):
479 """Convert a Markdown string to HTML and return HTML as a Unicode string.
480
481 This is a shortcut function for `Markdown` class to cover the most
482 basic use case. It initializes an instance of Markdown, loads the
483 necessary extensions and runs the parser on the given text.
484
485 Keyword arguments:
486
487 * text: Markdown formatted text as Unicode or ASCII string.
488 * Any arguments accepted by the Markdown class.
489
490 Returns: An HTML document as a string.
491
492 """
493 md = Markdown(*args, **kwargs)
494 return md.convert(text)
495
496
497 def markdownFromFile(*args, **kwargs):
498 """Read markdown code from a file and write it to a file or a stream.
499
500 This is a shortcut function which initializes an instance of Markdown,
501 and calls the convertFile method rather than convert.
502
503 Keyword arguments:
504
505 * input: a file name or readable object.
506 * output: a file name or writable object.
507 * encoding: Encoding of input and output.
508 * Any arguments accepted by the Markdown class.
509
510 """
511 # For backward compatibility loop through positional args
512 pos = ['input', 'output', 'extensions', 'encoding']
513 c = 0
514 for arg in args:
515 if pos[c] not in kwargs:
516 kwargs[pos[c]] = arg
517 c += 1
518 if c == len(pos):
519 break
520 if len(args):
521 warnings.warn('Positional arguments are depreacted in '
522 'Markdown and will raise an error in version 2.7. '
523 'Use keyword arguments only.',
524 DeprecationWarning)
525
526 md = Markdown(**kwargs)
527 md.convertFile(kwargs.get('input', None),
528 kwargs.get('output', None),
529 kwargs.get('encoding', None))
OLDNEW
« no previous file with comments | « third_party/Python-Markdown/README.md ('k') | third_party/Python-Markdown/markdown/__main__.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698