Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(63)

Side by Side Diff: third_party/markdown/__init__.py

Issue 93743005: Support markdown template for html editor (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 7 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 """
2 Python Markdown
3 ===============
4
5 Python Markdown converts Markdown to HTML and can be used as a library or
6 called from the command line.
7
8 ## Basic usage as a module:
9
10 import markdown
11 html = markdown.markdown(your_text_string)
12
13 See <http://packages.python.org/Markdown/> for more
14 information and instructions on how to extend the functionality of
15 Python Markdown. Read that before you try modifying this file.
16
17 ## Authors and License
18
19 Started by [Manfred Stienstra](http://www.dwerg.net/). Continued and
20 maintained by [Yuri Takhteyev](http://www.freewisdom.org), [Waylan
21 Limberg](http://achinghead.com/) and [Artem Yunusov](http://blog.splyer.com).
22
23 Contact: markdown@freewisdom.org
24
25 Copyright 2007-2013 The Python Markdown Project (v. 1.7 and later)
26 Copyright 200? Django Software Foundation (OrderedDict implementation)
27 Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
28 Copyright 2004 Manfred Stienstra (the original version)
29
30 License: BSD (see LICENSE for details).
31 """
32
33 from __future__ import absolute_import
34 from __future__ import unicode_literals
35 from .__version__ import version, version_info
36 import re
37 import codecs
38 import sys
39 import logging
40 from . import util
41 from .preprocessors import build_preprocessors
42 from .blockprocessors import build_block_parser
43 from .treeprocessors import build_treeprocessors
44 from .inlinepatterns import build_inlinepatterns
45 from .postprocessors import build_postprocessors
46 from .extensions import Extension
47 from .serializers import to_html_string, to_xhtml_string
48
49 __all__ = ['Markdown', 'markdown', 'markdownFromFile']
50
51 logger = logging.getLogger('MARKDOWN')
52
53
54 class Markdown(object):
55 """Convert Markdown to HTML."""
56
57 doc_tag = "div" # Element used to wrap document - later removed
58
59 option_defaults = {
60 'html_replacement_text' : '[HTML_REMOVED]',
61 'tab_length' : 4,
62 'enable_attributes' : True,
63 'smart_emphasis' : True,
64 'lazy_ol' : True,
65 }
66
67 output_formats = {
68 'html' : to_html_string,
69 'html4' : to_html_string,
70 'html5' : to_html_string,
71 'xhtml' : to_xhtml_string,
72 'xhtml1': to_xhtml_string,
73 'xhtml5': to_xhtml_string,
74 }
75
76 ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',
77 '(', ')', '>', '#', '+', '-', '.', '!']
78
79 def __init__(self, *args, **kwargs):
80 """
81 Creates a new Markdown instance.
82
83 Keyword arguments:
84
85 * extensions: A list of extensions.
86 If they are of type string, the module mdx_name.py will be loaded.
87 If they are a subclass of markdown.Extension, they will be used
88 as-is.
89 * extension_configs: Configuration settingis for extensions.
90 * output_format: Format of output. Supported formats are:
91 * "xhtml1": Outputs XHTML 1.x. Default.
92 * "xhtml5": Outputs XHTML style tags of HTML 5
93 * "xhtml": Outputs latest supported version of XHTML (currently XHTM L 1.1).
94 * "html4": Outputs HTML 4
95 * "html5": Outputs HTML style tags of HTML 5
96 * "html": Outputs latest supported version of HTML (currently HTML 4 ).
97 Note that it is suggested that the more specific formats ("xhtml1"
98 and "html4") be used as "xhtml" or "html" may change in the future
99 if it makes sense at that time.
100 * safe_mode: Disallow raw html. One of "remove", "replace" or "escape".
101 * html_replacement_text: Text used when safe_mode is set to "replace".
102 * tab_length: Length of tabs in the source. Default: 4
103 * enable_attributes: Enable the conversion of attributes. Default: True
104 * smart_emphasis: Treat `_connected_words_` intelegently Default: True
105 * lazy_ol: Ignore number of first item of ordered lists. Default: True
106
107 """
108
109 # For backward compatibility, loop through old positional args
110 pos = ['extensions', 'extension_configs', 'safe_mode', 'output_format']
111 c = 0
112 for arg in args:
113 if pos[c] not in kwargs:
114 kwargs[pos[c]] = arg
115 c += 1
116 if c == len(pos):
117 # ignore any additional args
118 break
119
120 # Loop through kwargs and assign defaults
121 for option, default in self.option_defaults.items():
122 setattr(self, option, kwargs.get(option, default))
123
124 self.safeMode = kwargs.get('safe_mode', False)
125 if self.safeMode and 'enable_attributes' not in kwargs:
126 # Disable attributes in safeMode when not explicitly set
127 self.enable_attributes = False
128
129 self.registeredExtensions = []
130 self.docType = ""
131 self.stripTopLevelTags = True
132
133 self.build_parser()
134
135 self.references = {}
136 self.htmlStash = util.HtmlStash()
137 self.set_output_format(kwargs.get('output_format', 'xhtml1'))
138 self.registerExtensions(extensions=kwargs.get('extensions', []),
139 configs=kwargs.get('extension_configs', {}))
140 self.reset()
141
142 def build_parser(self):
143 """ Build the parser from the various parts. """
144 self.preprocessors = build_preprocessors(self)
145 self.parser = build_block_parser(self)
146 self.inlinePatterns = build_inlinepatterns(self)
147 self.treeprocessors = build_treeprocessors(self)
148 self.postprocessors = build_postprocessors(self)
149 return self
150
151 def registerExtensions(self, extensions, configs):
152 """
153 Register extensions with this instance of Markdown.
154
155 Keyword arguments:
156
157 * extensions: A list of extensions, which can either
158 be strings or objects. See the docstring on Markdown.
159 * configs: A dictionary mapping module names to config options.
160
161 """
162 for ext in extensions:
163 if isinstance(ext, util.string_type):
164 ext = self.build_extension(ext, configs.get(ext, []))
165 if isinstance(ext, Extension):
166 ext.extendMarkdown(self, globals())
167 elif ext is not None:
168 raise TypeError(
169 'Extension "%s.%s" must be of type: "markdown.Extension"'
170 % (ext.__class__.__module__, ext.__class__.__name__))
171
172 return self
173
174 def build_extension(self, ext_name, configs = []):
175 """Build extension by name, then return the module.
176
177 The extension name may contain arguments as part of the string in the
178 following format: "extname(key1=value1,key2=value2)"
179
180 """
181
182 # Parse extensions config params (ignore the order)
183 configs = dict(configs)
184 pos = ext_name.find("(") # find the first "("
185 if pos > 0:
186 ext_args = ext_name[pos+1:-1]
187 ext_name = ext_name[:pos]
188 pairs = [x.split("=") for x in ext_args.split(",")]
189 configs.update([(x.strip(), y.strip()) for (x, y) in pairs])
190
191 # Setup the module name
192 module_name = ext_name
193 if '.' not in ext_name:
194 module_name = '.'.join(['third_party.markdown.extensions', ext_name] )
195
196 # Try loading the extension first from one place, then another
197 try: # New style (markdown.extensons.<extension>)
198 module = __import__(module_name, {}, {}, [module_name.rpartition('.' )[0]])
199 except ImportError:
200 module_name_old_style = '_'.join(['mdx', ext_name])
201 try: # Old style (mdx_<extension>)
202 module = __import__(module_name_old_style)
203 except ImportError as e:
204 message = "Failed loading extension '%s' from '%s' or '%s'" \
205 % (ext_name, module_name, module_name_old_style)
206 e.args = (message,) + e.args[1:]
207 raise
208
209 # If the module is loaded successfully, we expect it to define a
210 # function called makeExtension()
211 try:
212 return module.makeExtension(configs.items())
213 except AttributeError as e:
214 message = e.args[0]
215 message = "Failed to initiate extension " \
216 "'%s': %s" % (ext_name, message)
217 e.args = (message,) + e.args[1:]
218 raise
219
220 def registerExtension(self, extension):
221 """ This gets called by the extension """
222 self.registeredExtensions.append(extension)
223 return self
224
225 def reset(self):
226 """
227 Resets all state variables so that we can start with a new text.
228 """
229 self.htmlStash.reset()
230 self.references.clear()
231
232 for extension in self.registeredExtensions:
233 if hasattr(extension, 'reset'):
234 extension.reset()
235
236 return self
237
238 def set_output_format(self, format):
239 """ Set the output format for the class instance. """
240 self.output_format = format.lower()
241 try:
242 self.serializer = self.output_formats[self.output_format]
243 except KeyError as e:
244 valid_formats = list(self.output_formats.keys())
245 valid_formats.sort()
246 message = 'Invalid Output Format: "%s". Use one of %s.' \
247 % (self.output_format,
248 '"' + '", "'.join(valid_formats) + '"')
249 e.args = (message,) + e.args[1:]
250 raise
251 return self
252
253 def convert(self, source):
254 """
255 Convert markdown to serialized XHTML or HTML.
256
257 Keyword arguments:
258
259 * source: Source text as a Unicode string.
260
261 Markdown processing takes place in five steps:
262
263 1. A bunch of "preprocessors" munge the input text.
264 2. BlockParser() parses the high-level structural elements of the
265 pre-processed text into an ElementTree.
266 3. A bunch of "treeprocessors" are run against the ElementTree. One
267 such treeprocessor runs InlinePatterns against the ElementTree,
268 detecting inline markup.
269 4. Some post-processors are run against the text after the ElementTree
270 has been serialized into text.
271 5. The output is written to a string.
272
273 """
274
275 # Fixup the source text
276 if not source.strip():
277 return '' # a blank unicode string
278
279 try:
280 source = util.text_type(source)
281 except UnicodeDecodeError as e:
282 # Customise error message while maintaining original trackback
283 e.reason += '. -- Note: Markdown only accepts unicode input!'
284 raise
285
286 # Split into lines and run the line preprocessors.
287 self.lines = source.split("\n")
288 for prep in self.preprocessors.values():
289 self.lines = prep.run(self.lines)
290
291 # Parse the high-level elements.
292 root = self.parser.parseDocument(self.lines).getroot()
293
294 # Run the tree-processors
295 for treeprocessor in self.treeprocessors.values():
296 newRoot = treeprocessor.run(root)
297 if newRoot:
298 root = newRoot
299
300 # Serialize _properly_. Strip top-level tags.
301 output = self.serializer(root)
302 if self.stripTopLevelTags:
303 try:
304 start = output.index('<%s>'%self.doc_tag)+len(self.doc_tag)+2
305 end = output.rindex('</%s>'%self.doc_tag)
306 output = output[start:end].strip()
307 except ValueError:
308 if output.strip().endswith('<%s />'%self.doc_tag):
309 # We have an empty document
310 output = ''
311 else:
312 # We have a serious problem
313 raise ValueError('Markdown failed to strip top-level tags. D ocument=%r' % output.strip())
314
315 # Run the text post-processors
316 for pp in self.postprocessors.values():
317 output = pp.run(output)
318
319 return output.strip()
320
321 def convertFile(self, input=None, output=None, encoding=None):
322 """Converts a markdown file and returns the HTML as a unicode string.
323
324 Decodes the file using the provided encoding (defaults to utf-8),
325 passes the file content to markdown, and outputs the html to either
326 the provided stream or the file with provided name, using the same
327 encoding as the source file. The 'xmlcharrefreplace' error handler is
328 used when encoding the output.
329
330 **Note:** This is the only place that decoding and encoding of unicode
331 takes place in Python-Markdown. (All other code is unicode-in /
332 unicode-out.)
333
334 Keyword arguments:
335
336 * input: File object or path. Reads from stdin if `None`.
337 * output: File object or path. Writes to stdout if `None`.
338 * encoding: Encoding of input and output files. Defaults to utf-8.
339
340 """
341
342 encoding = encoding or "utf-8"
343
344 # Read the source
345 if input:
346 if isinstance(input, util.string_type):
347 input_file = codecs.open(input, mode="r", encoding=encoding)
348 else:
349 input_file = codecs.getreader(encoding)(input)
350 text = input_file.read()
351 input_file.close()
352 else:
353 text = sys.stdin.read()
354 if not isinstance(text, util.text_type):
355 text = text.decode(encoding)
356
357 text = text.lstrip('\ufeff') # remove the byte-order mark
358
359 # Convert
360 html = self.convert(text)
361
362 # Write to file or stdout
363 if output:
364 if isinstance(output, util.string_type):
365 output_file = codecs.open(output, "w",
366 encoding=encoding,
367 errors="xmlcharrefreplace")
368 output_file.write(html)
369 output_file.close()
370 else:
371 writer = codecs.getwriter(encoding)
372 output_file = writer(output, errors="xmlcharrefreplace")
373 output_file.write(html)
374 # Don't close here. User may want to write more.
375 else:
376 # Encode manually and write bytes to stdout.
377 html = html.encode(encoding, "xmlcharrefreplace")
378 try:
379 # Write bytes directly to buffer (Python 3).
380 sys.stdout.buffer.write(html)
381 except AttributeError:
382 # Probably Python 2, which works with bytes by default.
383 sys.stdout.write(html)
384
385 return self
386
387
388 """
389 EXPORTED FUNCTIONS
390 =============================================================================
391
392 Those are the two functions we really mean to export: markdown() and
393 markdownFromFile().
394 """
395
396 def markdown(text, *args, **kwargs):
397 """Convert a markdown string to HTML and return HTML as a unicode string.
398
399 This is a shortcut function for `Markdown` class to cover the most
400 basic use case. It initializes an instance of Markdown, loads the
401 necessary extensions and runs the parser on the given text.
402
403 Keyword arguments:
404
405 * text: Markdown formatted text as Unicode or ASCII string.
406 * Any arguments accepted by the Markdown class.
407
408 Returns: An HTML document as a string.
409
410 """
411 md = Markdown(*args, **kwargs)
412 return md.convert(text)
413
414
415 def markdownFromFile(*args, **kwargs):
416 """Read markdown code from a file and write it to a file or a stream.
417
418 This is a shortcut function which initializes an instance of Markdown,
419 and calls the convertFile method rather than convert.
420
421 Keyword arguments:
422
423 * input: a file name or readable object.
424 * output: a file name or writable object.
425 * encoding: Encoding of input and output.
426 * Any arguments accepted by the Markdown class.
427
428 """
429 # For backward compatibility loop through positional args
430 pos = ['input', 'output', 'extensions', 'encoding']
431 c = 0
432 for arg in args:
433 if pos[c] not in kwargs:
434 kwargs[pos[c]] = arg
435 c += 1
436 if c == len(pos):
437 break
438
439 md = Markdown(**kwargs)
440 md.convertFile(kwargs.get('input', None),
441 kwargs.get('output', None),
442 kwargs.get('encoding', None))
443
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698