third_party/Python-Markdown/markdown/__init__.py - Issue 1389543003: Revert of Check in a simple pure-python based Markdown previewer.

Side by Side Diff: third_party/Python-Markdown/markdown/init.py

Issue 1389543003: Revert of Check in a simple pure-python based Markdown previewer. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@add

Patch Set: Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 """

2 Python Markdown

3 ===============

4

5 Python Markdown converts Markdown to HTML and can be used as a library or

6 called from the command line.

7

8 ## Basic usage as a module:

9

10 import markdown

11 html = markdown.markdown(your_text_string)

12

13 See <https://pythonhosted.org/Markdown/> for more

14 information and instructions on how to extend the functionality of

15 Python Markdown. Read that before you try modifying this file.

16

17 ## Authors and License

18

19 Started by [Manfred Stienstra](http://www.dwerg.net/). Continued and

20 maintained by [Yuri Takhteyev](http://www.freewisdom.org), [Waylan

21 Limberg](http://achinghead.com/) and [Artem Yunusov](http://blog.splyer.com).

22

23 Contact: markdown@freewisdom.org

24

25 Copyright 2007-2013 The Python Markdown Project (v. 1.7 and later)

26 Copyright 200? Django Software Foundation (OrderedDict implementation)

27 Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)

28 Copyright 2004 Manfred Stienstra (the original version)

29

30 License: BSD (see LICENSE for details).

31 """

32

33 from __future__ import absolute_import

34 from __future__ import unicode_literals

35 from .__version__ import version, version_info # noqa

36 import codecs

37 import sys

38 import logging

39 import warnings

40 import importlib

41 from . import util

42 from .preprocessors import build_preprocessors

43 from .blockprocessors import build_block_parser

44 from .treeprocessors import build_treeprocessors

45 from .inlinepatterns import build_inlinepatterns

46 from .postprocessors import build_postprocessors

47 from .extensions import Extension

48 from .serializers import to_html_string, to_xhtml_string

49

50 __all__ = ['Markdown', 'markdown', 'markdownFromFile']

51

52

53 logger = logging.getLogger('MARKDOWN')

54

55

56 class Markdown(object):

57 """Convert Markdown to HTML."""

58

59 doc_tag = "div" # Element used to wrap document - later removed

60

61 option_defaults = {

62 'html_replacement_text': '[HTML_REMOVED]',

63 'tab_length': 4,

64 'enable_attributes': True,

65 'smart_emphasis': True,

66 'lazy_ol': True,

67 }

68

69 output_formats = {

70 'html': to_html_string,

71 'html4': to_html_string,

72 'html5': to_html_string,

73 'xhtml': to_xhtml_string,

74 'xhtml1': to_xhtml_string,

75 'xhtml5': to_xhtml_string,

76 }

77

78 ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',

79 '(', ')', '>', '#', '+', '-', '.', '!']

80

81 def __init__(self, args, *kwargs):

82 """

83 Creates a new Markdown instance.

84

85 Keyword arguments:

86

87 * extensions: A list of extensions.

88 If they are of type string, the module mdx_name.py will be loaded.

89 If they are a subclass of markdown.Extension, they will be used

90 as-is.

91 * extension_configs: Configuration settings for extensions.

92 * output_format: Format of output. Supported formats are:

93 * "xhtml1": Outputs XHTML 1.x. Default.

94 * "xhtml5": Outputs XHTML style tags of HTML 5

95 * "xhtml": Outputs latest supported version of XHTML

96 (currently XHTML 1.1).

97 * "html4": Outputs HTML 4

98 * "html5": Outputs HTML style tags of HTML 5

99 * "html": Outputs latest supported version of HTML

100 (currently HTML 4).

101 Note that it is suggested that the more specific formats ("xhtml1"

102 and "html4") be used as "xhtml" or "html" may change in the future

103 if it makes sense at that time.

104 * safe_mode: Deprecated! Disallow raw html. One of "remove", "replace"

105 or "escape".

106 * html_replacement_text: Deprecated! Text used when safe_mode is set

107 to "replace".

108 * tab_length: Length of tabs in the source. Default: 4

109 * enable_attributes: Enable the conversion of attributes. Default: True

110 * smart_emphasis: Treat `_connected_words_` intelligently Default: True

111 * lazy_ol: Ignore number of first item of ordered lists. Default: True

112

113 """

114

115 # For backward compatibility, loop through old positional args

116 pos = ['extensions', 'extension_configs', 'safe_mode', 'output_format']

117 for c, arg in enumerate(args):

118 if pos[c] not in kwargs:

119 kwargs[pos[c]] = arg

120 if c+1 == len(pos): # pragma: no cover

121 # ignore any additional args

122 break

123 if len(args):

124 warnings.warn('Positional arguments are deprecated in Markdown. '

125 'Use keyword arguments only.',

126 DeprecationWarning)

127

128 # Loop through kwargs and assign defaults

129 for option, default in self.option_defaults.items():

130 setattr(self, option, kwargs.get(option, default))

131

132 self.safeMode = kwargs.get('safe_mode', False)

133 if self.safeMode and 'enable_attributes' not in kwargs:

134 # Disable attributes in safeMode when not explicitly set

135 self.enable_attributes = False

136

137 if 'safe_mode' in kwargs:

138 warnings.warn('"safe_mode" is deprecated in Python-Markdown. '

139 'Use an HTML sanitizer (like '

140 'Bleach http://bleach.readthedocs.org/) '

141 'if you are parsing untrusted markdown text. '

142 'See the 2.6 release notes for more info',

143 DeprecationWarning)

144

145 if 'html_replacement_text' in kwargs:

146 warnings.warn('The "html_replacement_text" keyword is '

147 'deprecated along with "safe_mode".',

148 DeprecationWarning)

149

150 self.registeredExtensions = []

151 self.docType = ""

152 self.stripTopLevelTags = True

153

154 self.build_parser()

155

156 self.references = {}

157 self.htmlStash = util.HtmlStash()

158 self.registerExtensions(extensions=kwargs.get('extensions', []),

159 configs=kwargs.get('extension_configs', {}))

160 self.set_output_format(kwargs.get('output_format', 'xhtml1'))

161 self.reset()

162

163 def build_parser(self):

164 """ Build the parser from the various parts. """

165 self.preprocessors = build_preprocessors(self)

166 self.parser = build_block_parser(self)

167 self.inlinePatterns = build_inlinepatterns(self)

168 self.treeprocessors = build_treeprocessors(self)

169 self.postprocessors = build_postprocessors(self)

170 return self

171

172 def registerExtensions(self, extensions, configs):

173 """

174 Register extensions with this instance of Markdown.

175

176 Keyword arguments:

177

178 * extensions: A list of extensions, which can either

179 be strings or objects. See the docstring on Markdown.

180 * configs: A dictionary mapping module names to config options.

181

182 """

183 for ext in extensions:

184 if isinstance(ext, util.string_type):

185 ext = self.build_extension(ext, configs.get(ext, {}))

186 if isinstance(ext, Extension):

187 ext.extendMarkdown(self, globals())

188 logger.debug(

189 'Successfully loaded extension "%s.%s".'

190 % (ext.__class__.__module__, ext.__class__.__name__)

191 )

192 elif ext is not None:

193 raise TypeError(

194 'Extension "%s.%s" must be of type: "markdown.Extension"'

195 % (ext.__class__.__module__, ext.__class__.__name__))

196

197 return self

198

199 def build_extension(self, ext_name, configs):

200 """Build extension by name, then return the module.

201

202 The extension name may contain arguments as part of the string in the

203 following format: "extname(key1=value1,key2=value2)"

204

205 """

206

207 configs = dict(configs)

208

209 # Parse extensions config params (ignore the order)

210 pos = ext_name.find("(") # find the first "("

211 if pos > 0:

212 ext_args = ext_name[pos+1:-1]

213 ext_name = ext_name[:pos]

214 pairs = [x.split("=") for x in ext_args.split(",")]

215 configs.update([(x.strip(), y.strip()) for (x, y) in pairs])

216 warnings.warn('Setting configs in the Named Extension string is '

217 'deprecated. It is recommended that you '

218 'pass an instance of the extension class to '

219 'Markdown or use the "extension_configs" keyword. '

220 'The current behavior will raise an error in version 2 .7. '

221 'See the Release Notes for Python-Markdown version '

222 '2.6 for more info.', DeprecationWarning)

223

224 # Get class name (if provided): `path.to.module:ClassName`

225 ext_name, class_name = ext_name.split(':', 1) \

226 if ':' in ext_name else (ext_name, '')

227

228 # Try loading the extension first from one place, then another

229 try:

230 # Assume string uses dot syntax (`path.to.some.module`)

231 module = importlib.import_module(ext_name)

232 logger.debug(

233 'Successfuly imported extension module "%s".' % ext_name

234 )

235 # For backward compat (until deprecation)

236 # check that this is an extension.

237 if ('.' not in ext_name and not (hasattr(module, 'makeExtension') or

238 (class_name and hasattr(module, class_name)))):

239 # We have a name conflict

240 # eg: extensions=['tables'] and PyTables is installed

241 raise ImportError

242 except ImportError:

243 # Preppend `markdown.extensions.` to name

244 module_name = '.'.join(['markdown.extensions', ext_name])

245 try:

246 module = importlib.import_module(module_name)

247 logger.debug(

248 'Successfuly imported extension module "%s".' %

249 module_name

250 )

251 warnings.warn('Using short names for Markdown\'s builtin '

252 'extensions is deprecated. Use the '

253 'full path to the extension with Python\'s dot '

254 'notation (eg: "%s" instead of "%s"). The '

255 'current behavior will raise an error in version '

256 '2.7. See the Release Notes for '

257 'Python-Markdown version 2.6 for more info.' %

258 (module_name, ext_name),

259 DeprecationWarning)

260 except ImportError:

261 # Preppend `mdx_` to name

262 module_name_old_style = '_'.join(['mdx', ext_name])

263 try:

264 module = importlib.import_module(module_name_old_style)

265 logger.debug(

266 'Successfuly imported extension module "%s".' %

267 module_name_old_style)

268 warnings.warn('Markdown\'s behavior of prepending "mdx_" '

269 'to an extension name is deprecated. '

270 'Use the full path to the '

271 'extension with Python\'s dot notation '

272 '(eg: "%s" instead of "%s"). The current '

273 'behavior will raise an error in version 2.7. '

274 'See the Release Notes for Python-Markdown '

275 'version 2.6 for more info.' %

276 (module_name_old_style, ext_name),

277 DeprecationWarning)

278 except ImportError as e:

279 message = "Failed loading extension '%s' from '%s', '%s' " \

280 "or '%s'" % (ext_name, ext_name, module_name,

281 module_name_old_style)

282 e.args = (message,) + e.args[1:]

283 raise

284

285 if class_name:

286 # Load given class name from module.

287 return getattr(module, class_name)(**configs)

288 else:

289 # Expect makeExtension() function to return a class.

290 try:

291 return module.makeExtension(**configs)

292 except AttributeError as e:

293 message = e.args[0]

294 message = "Failed to initiate extension " \

295 "'%s': %s" % (ext_name, message)

296 e.args = (message,) + e.args[1:]

297 raise

298

299 def registerExtension(self, extension):

300 """ This gets called by the extension """

301 self.registeredExtensions.append(extension)

302 return self

303

304 def reset(self):

305 """

306 Resets all state variables so that we can start with a new text.

307 """

308 self.htmlStash.reset()

309 self.references.clear()

310

311 for extension in self.registeredExtensions:

312 if hasattr(extension, 'reset'):

313 extension.reset()

314

315 return self

316

317 def set_output_format(self, format):

318 """ Set the output format for the class instance. """

319 self.output_format = format.lower()

320 try:

321 self.serializer = self.output_formats[self.output_format]

322 except KeyError as e:

323 valid_formats = list(self.output_formats.keys())

324 valid_formats.sort()

325 message = 'Invalid Output Format: "%s". Use one of %s.' \

326 % (self.output_format,

327 '"' + '", "'.join(valid_formats) + '"')

328 e.args = (message,) + e.args[1:]

329 raise

330 return self

331

332 def convert(self, source):

333 """

334 Convert markdown to serialized XHTML or HTML.

335

336 Keyword arguments:

337

338 * source: Source text as a Unicode string.

339

340 Markdown processing takes place in five steps:

341

342 1. A bunch of "preprocessors" munge the input text.

343 2. BlockParser() parses the high-level structural elements of the

344 pre-processed text into an ElementTree.

345 3. A bunch of "treeprocessors" are run against the ElementTree. One

346 such treeprocessor runs InlinePatterns against the ElementTree,

347 detecting inline markup.

348 4. Some post-processors are run against the text after the ElementTree

349 has been serialized into text.

350 5. The output is written to a string.

351

352 """

353

354 # Fixup the source text

355 if not source.strip():

356 return '' # a blank unicode string

357

358 try:

359 source = util.text_type(source)

360 except UnicodeDecodeError as e:

361 # Customise error message while maintaining original trackback

362 e.reason += '. -- Note: Markdown only accepts unicode input!'

363 raise

364

365 # Split into lines and run the line preprocessors.

366 self.lines = source.split("\n")

367 for prep in self.preprocessors.values():

368 self.lines = prep.run(self.lines)

369

370 # Parse the high-level elements.

371 root = self.parser.parseDocument(self.lines).getroot()

372

373 # Run the tree-processors

374 for treeprocessor in self.treeprocessors.values():

375 newRoot = treeprocessor.run(root)

376 if newRoot is not None:

377 root = newRoot

378

379 # Serialize _properly_. Strip top-level tags.

380 output = self.serializer(root)

381 if self.stripTopLevelTags:

382 try:

383 start = output.index(

384 '<%s>' % self.doc_tag) + len(self.doc_tag) + 2

385 end = output.rindex('</%s>' % self.doc_tag)

386 output = output[start:end].strip()

387 except ValueError: # pragma: no cover

388 if output.strip().endswith('<%s />' % self.doc_tag):

389 # We have an empty document

390 output = ''

391 else:

392 # We have a serious problem

393 raise ValueError('Markdown failed to strip top-level '

394 'tags. Document=%r' % output.strip())

395

396 # Run the text post-processors

397 for pp in self.postprocessors.values():

398 output = pp.run(output)

399

400 return output.strip()

401

402 def convertFile(self, input=None, output=None, encoding=None):

403 """Converts a Markdown file and returns the HTML as a Unicode string.

404

405 Decodes the file using the provided encoding (defaults to utf-8),

406 passes the file content to markdown, and outputs the html to either

407 the provided stream or the file with provided name, using the same

408 encoding as the source file. The 'xmlcharrefreplace' error handler is

409 used when encoding the output.

410

411 Note: This is the only place that decoding and encoding of Unicode

412 takes place in Python-Markdown. (All other code is Unicode-in /

413 Unicode-out.)

414

415 Keyword arguments:

416

417 * input: File object or path. Reads from stdin if `None`.

418 * output: File object or path. Writes to stdout if `None`.

419 * encoding: Encoding of input and output files. Defaults to utf-8.

420

421 """

422

423 encoding = encoding or "utf-8"

424

425 # Read the source

426 if input:

427 if isinstance(input, util.string_type):

428 input_file = codecs.open(input, mode="r", encoding=encoding)

429 else:

430 input_file = codecs.getreader(encoding)(input)

431 text = input_file.read()

432 input_file.close()

433 else:

434 text = sys.stdin.read()

435 if not isinstance(text, util.text_type):

436 text = text.decode(encoding)

437

438 text = text.lstrip('\ufeff') # remove the byte-order mark

439

440 # Convert

441 html = self.convert(text)

442

443 # Write to file or stdout

444 if output:

445 if isinstance(output, util.string_type):

446 output_file = codecs.open(output, "w",

447 encoding=encoding,

448 errors="xmlcharrefreplace")

449 output_file.write(html)

450 output_file.close()

451 else:

452 writer = codecs.getwriter(encoding)

453 output_file = writer(output, errors="xmlcharrefreplace")

454 output_file.write(html)

455 # Don't close here. User may want to write more.

456 else:

457 # Encode manually and write bytes to stdout.

458 html = html.encode(encoding, "xmlcharrefreplace")

459 try:

460 # Write bytes directly to buffer (Python 3).

461 sys.stdout.buffer.write(html)

462 except AttributeError:

463 # Probably Python 2, which works with bytes by default.

464 sys.stdout.write(html)

465

466 return self

467

468

469 """

470 EXPORTED FUNCTIONS

471 =============================================================================

472

473 Those are the two functions we really mean to export: markdown() and

474 markdownFromFile().

475 """

476

477

478 def markdown(text, args, *kwargs):

479 """Convert a Markdown string to HTML and return HTML as a Unicode string.

480

481 This is a shortcut function for `Markdown` class to cover the most

482 basic use case. It initializes an instance of Markdown, loads the

483 necessary extensions and runs the parser on the given text.

484

485 Keyword arguments:

486

487 * text: Markdown formatted text as Unicode or ASCII string.

488 * Any arguments accepted by the Markdown class.

489

490 Returns: An HTML document as a string.

491

492 """

493 md = Markdown(args, *kwargs)

494 return md.convert(text)

495

496

497 def markdownFromFile(args, *kwargs):

498 """Read markdown code from a file and write it to a file or a stream.

499

500 This is a shortcut function which initializes an instance of Markdown,

501 and calls the convertFile method rather than convert.

502

503 Keyword arguments:

504

505 * input: a file name or readable object.

506 * output: a file name or writable object.

507 * encoding: Encoding of input and output.

508 * Any arguments accepted by the Markdown class.

509

510 """

511 # For backward compatibility loop through positional args

512 pos = ['input', 'output', 'extensions', 'encoding']

513 c = 0

514 for arg in args:

515 if pos[c] not in kwargs:

516 kwargs[pos[c]] = arg

517 c += 1

518 if c == len(pos):

519 break

520 if len(args):

521 warnings.warn('Positional arguments are depreacted in '

522 'Markdown and will raise an error in version 2.7. '

523 'Use keyword arguments only.',

524 DeprecationWarning)

525

526 md = Markdown(**kwargs)

527 md.convertFile(kwargs.get('input', None),

528 kwargs.get('output', None),

529 kwargs.get('encoding', None))

OLD	NEW

« no previous file with comments | « third_party/Python-Markdown/README.md ('k') | third_party/Python-Markdown/markdown/__main__.py » ('j') | no next file with comments »

Side by Side Diff: third_party/Python-Markdown/markdown/__init__.py

Side by Side Diff: third_party/Python-Markdown/markdown/init.py