trunk/src/third_party/markdown/__init__.py - Issue 132753002: Revert 243980 "Docserver: Support markdown for HTML content."

Side by Side Diff: trunk/src/third_party/markdown/init.py

Issue 132753002: Revert 243980 "Docserver: Support markdown for HTML content." (Closed) Base URL: svn://svn.chromium.org/chrome/

Patch Set: Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 """

2 Python Markdown

3 ===============

4

5 Python Markdown converts Markdown to HTML and can be used as a library or

6 called from the command line.

7

8 ## Basic usage as a module:

9

10 import markdown

11 html = markdown.markdown(your_text_string)

12

13 See <http://packages.python.org/Markdown/> for more

14 information and instructions on how to extend the functionality of

15 Python Markdown. Read that before you try modifying this file.

16

17 ## Authors and License

18

19 Started by [Manfred Stienstra](http://www.dwerg.net/). Continued and

20 maintained by [Yuri Takhteyev](http://www.freewisdom.org), [Waylan

21 Limberg](http://achinghead.com/) and [Artem Yunusov](http://blog.splyer.com).

22

23 Contact: markdown@freewisdom.org

24

25 Copyright 2007-2013 The Python Markdown Project (v. 1.7 and later)

26 Copyright 200? Django Software Foundation (OrderedDict implementation)

27 Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)

28 Copyright 2004 Manfred Stienstra (the original version)

29

30 License: BSD (see LICENSE for details).

31 """

32

33 from __future__ import absolute_import

34 from __future__ import unicode_literals

35 from .__version__ import version, version_info

36 import re

37 import codecs

38 import sys

39 import logging

40 from . import util

41 from .preprocessors import build_preprocessors

42 from .blockprocessors import build_block_parser

43 from .treeprocessors import build_treeprocessors

44 from .inlinepatterns import build_inlinepatterns

45 from .postprocessors import build_postprocessors

46 from .extensions import Extension

47 from .serializers import to_html_string, to_xhtml_string

48

49 __all__ = ['Markdown', 'markdown', 'markdownFromFile']

50

51 logger = logging.getLogger('MARKDOWN')

52

53

54 class Markdown(object):

55 """Convert Markdown to HTML."""

56

57 doc_tag = "div" # Element used to wrap document - later removed

58

59 option_defaults = {

60 'html_replacement_text' : '[HTML_REMOVED]',

61 'tab_length' : 4,

62 'enable_attributes' : True,

63 'smart_emphasis' : True,

64 'lazy_ol' : True,

65 }

66

67 output_formats = {

68 'html' : to_html_string,

69 'html4' : to_html_string,

70 'html5' : to_html_string,

71 'xhtml' : to_xhtml_string,

72 'xhtml1': to_xhtml_string,

73 'xhtml5': to_xhtml_string,

74 }

75

76 ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',

77 '(', ')', '>', '#', '+', '-', '.', '!']

78

79 def __init__(self, args, *kwargs):

80 """

81 Creates a new Markdown instance.

82

83 Keyword arguments:

84

85 * extensions: A list of extensions.

86 If they are of type string, the module mdx_name.py will be loaded.

87 If they are a subclass of markdown.Extension, they will be used

88 as-is.

89 * extension_configs: Configuration settingis for extensions.

90 * output_format: Format of output. Supported formats are:

91 * "xhtml1": Outputs XHTML 1.x. Default.

92 * "xhtml5": Outputs XHTML style tags of HTML 5

93 * "xhtml": Outputs latest supported version of XHTML (currently XHTM L 1.1).

94 * "html4": Outputs HTML 4

95 * "html5": Outputs HTML style tags of HTML 5

96 * "html": Outputs latest supported version of HTML (currently HTML 4 ).

97 Note that it is suggested that the more specific formats ("xhtml1"

98 and "html4") be used as "xhtml" or "html" may change in the future

99 if it makes sense at that time.

100 * safe_mode: Disallow raw html. One of "remove", "replace" or "escape".

101 * html_replacement_text: Text used when safe_mode is set to "replace".

102 * tab_length: Length of tabs in the source. Default: 4

103 * enable_attributes: Enable the conversion of attributes. Default: True

104 * smart_emphasis: Treat `_connected_words_` intelegently Default: True

105 * lazy_ol: Ignore number of first item of ordered lists. Default: True

106

107 """

108

109 # For backward compatibility, loop through old positional args

110 pos = ['extensions', 'extension_configs', 'safe_mode', 'output_format']

111 c = 0

112 for arg in args:

113 if pos[c] not in kwargs:

114 kwargs[pos[c]] = arg

115 c += 1

116 if c == len(pos):

117 # ignore any additional args

118 break

119

120 # Loop through kwargs and assign defaults

121 for option, default in self.option_defaults.items():

122 setattr(self, option, kwargs.get(option, default))

123

124 self.safeMode = kwargs.get('safe_mode', False)

125 if self.safeMode and 'enable_attributes' not in kwargs:

126 # Disable attributes in safeMode when not explicitly set

127 self.enable_attributes = False

128

129 self.registeredExtensions = []

130 self.docType = ""

131 self.stripTopLevelTags = True

132

133 self.build_parser()

134

135 self.references = {}

136 self.htmlStash = util.HtmlStash()

137 self.set_output_format(kwargs.get('output_format', 'xhtml1'))

138 self.registerExtensions(extensions=kwargs.get('extensions', []),

139 configs=kwargs.get('extension_configs', {}))

140 self.reset()

141

142 def build_parser(self):

143 """ Build the parser from the various parts. """

144 self.preprocessors = build_preprocessors(self)

145 self.parser = build_block_parser(self)

146 self.inlinePatterns = build_inlinepatterns(self)

147 self.treeprocessors = build_treeprocessors(self)

148 self.postprocessors = build_postprocessors(self)

149 return self

150

151 def registerExtensions(self, extensions, configs):

152 """

153 Register extensions with this instance of Markdown.

154

155 Keyword arguments:

156

157 * extensions: A list of extensions, which can either

158 be strings or objects. See the docstring on Markdown.

159 * configs: A dictionary mapping module names to config options.

160

161 """

162 for ext in extensions:

163 if isinstance(ext, util.string_type):

164 ext = self.build_extension(ext, configs.get(ext, []))

165 if isinstance(ext, Extension):

166 ext.extendMarkdown(self, globals())

167 elif ext is not None:

168 raise TypeError(

169 'Extension "%s.%s" must be of type: "markdown.Extension"'

170 % (ext.__class__.__module__, ext.__class__.__name__))

171

172 return self

173

174 def build_extension(self, ext_name, configs = []):

175 """Build extension by name, then return the module.

176

177 The extension name may contain arguments as part of the string in the

178 following format: "extname(key1=value1,key2=value2)"

179

180 """

181

182 # Parse extensions config params (ignore the order)

183 configs = dict(configs)

184 pos = ext_name.find("(") # find the first "("

185 if pos > 0:

186 ext_args = ext_name[pos+1:-1]

187 ext_name = ext_name[:pos]

188 pairs = [x.split("=") for x in ext_args.split(",")]

189 configs.update([(x.strip(), y.strip()) for (x, y) in pairs])

190

191 # Setup the module name

192 module_name = ext_name

193 if '.' not in ext_name:

194 module_name = '.'.join(['third_party.markdown.extensions', ext_name] )

195

196 # Try loading the extension first from one place, then another

197 try: # New style (markdown.extensons.<extension>)

198 module = __import__(module_name, {}, {}, [module_name.rpartition('.' )[0]])

199 except ImportError:

200 module_name_old_style = '_'.join(['mdx', ext_name])

201 try: # Old style (mdx_<extension>)

202 module = __import__(module_name_old_style)

203 except ImportError as e:

204 message = "Failed loading extension '%s' from '%s' or '%s'" \

205 % (ext_name, module_name, module_name_old_style)

206 e.args = (message,) + e.args[1:]

207 raise

208

209 # If the module is loaded successfully, we expect it to define a

210 # function called makeExtension()

211 try:

212 return module.makeExtension(configs.items())

213 except AttributeError as e:

214 message = e.args[0]

215 message = "Failed to initiate extension " \

216 "'%s': %s" % (ext_name, message)

217 e.args = (message,) + e.args[1:]

218 raise

219

220 def registerExtension(self, extension):

221 """ This gets called by the extension """

222 self.registeredExtensions.append(extension)

223 return self

224

225 def reset(self):

226 """

227 Resets all state variables so that we can start with a new text.

228 """

229 self.htmlStash.reset()

230 self.references.clear()

231

232 for extension in self.registeredExtensions:

233 if hasattr(extension, 'reset'):

234 extension.reset()

235

236 return self

237

238 def set_output_format(self, format):

239 """ Set the output format for the class instance. """

240 self.output_format = format.lower()

241 try:

242 self.serializer = self.output_formats[self.output_format]

243 except KeyError as e:

244 valid_formats = list(self.output_formats.keys())

245 valid_formats.sort()

246 message = 'Invalid Output Format: "%s". Use one of %s.' \

247 % (self.output_format,

248 '"' + '", "'.join(valid_formats) + '"')

249 e.args = (message,) + e.args[1:]

250 raise

251 return self

252

253 def convert(self, source):

254 """

255 Convert markdown to serialized XHTML or HTML.

256

257 Keyword arguments:

258

259 * source: Source text as a Unicode string.

260

261 Markdown processing takes place in five steps:

262

263 1. A bunch of "preprocessors" munge the input text.

264 2. BlockParser() parses the high-level structural elements of the

265 pre-processed text into an ElementTree.

266 3. A bunch of "treeprocessors" are run against the ElementTree. One

267 such treeprocessor runs InlinePatterns against the ElementTree,

268 detecting inline markup.

269 4. Some post-processors are run against the text after the ElementTree

270 has been serialized into text.

271 5. The output is written to a string.

272

273 """

274

275 # Fixup the source text

276 if not source.strip():

277 return '' # a blank unicode string

278

279 try:

280 source = util.text_type(source)

281 except UnicodeDecodeError as e:

282 # Customise error message while maintaining original trackback

283 e.reason += '. -- Note: Markdown only accepts unicode input!'

284 raise

285

286 # Split into lines and run the line preprocessors.

287 self.lines = source.split("\n")

288 for prep in self.preprocessors.values():

289 self.lines = prep.run(self.lines)

290

291 # Parse the high-level elements.

292 root = self.parser.parseDocument(self.lines).getroot()

293

294 # Run the tree-processors

295 for treeprocessor in self.treeprocessors.values():

296 newRoot = treeprocessor.run(root)

297 if newRoot:

298 root = newRoot

299

300 # Serialize _properly_. Strip top-level tags.

301 output = self.serializer(root)

302 if self.stripTopLevelTags:

303 try:

304 start = output.index('<%s>'%self.doc_tag)+len(self.doc_tag)+2

305 end = output.rindex('</%s>'%self.doc_tag)

306 output = output[start:end].strip()

307 except ValueError:

308 if output.strip().endswith('<%s />'%self.doc_tag):

309 # We have an empty document

310 output = ''

311 else:

312 # We have a serious problem

313 raise ValueError('Markdown failed to strip top-level tags. D ocument=%r' % output.strip())

314

315 # Run the text post-processors

316 for pp in self.postprocessors.values():

317 output = pp.run(output)

318

319 return output.strip()

320

321 def convertFile(self, input=None, output=None, encoding=None):

322 """Converts a markdown file and returns the HTML as a unicode string.

323

324 Decodes the file using the provided encoding (defaults to utf-8),

325 passes the file content to markdown, and outputs the html to either

326 the provided stream or the file with provided name, using the same

327 encoding as the source file. The 'xmlcharrefreplace' error handler is

328 used when encoding the output.

329

330 Note: This is the only place that decoding and encoding of unicode

331 takes place in Python-Markdown. (All other code is unicode-in /

332 unicode-out.)

333

334 Keyword arguments:

335

336 * input: File object or path. Reads from stdin if `None`.

337 * output: File object or path. Writes to stdout if `None`.

338 * encoding: Encoding of input and output files. Defaults to utf-8.

339

340 """

341

342 encoding = encoding or "utf-8"

343

344 # Read the source

345 if input:

346 if isinstance(input, util.string_type):

347 input_file = codecs.open(input, mode="r", encoding=encoding)

348 else:

349 input_file = codecs.getreader(encoding)(input)

350 text = input_file.read()

351 input_file.close()

352 else:

353 text = sys.stdin.read()

354 if not isinstance(text, util.text_type):

355 text = text.decode(encoding)

356

357 text = text.lstrip('\ufeff') # remove the byte-order mark

358

359 # Convert

360 html = self.convert(text)

361

362 # Write to file or stdout

363 if output:

364 if isinstance(output, util.string_type):

365 output_file = codecs.open(output, "w",

366 encoding=encoding,

367 errors="xmlcharrefreplace")

368 output_file.write(html)

369 output_file.close()

370 else:

371 writer = codecs.getwriter(encoding)

372 output_file = writer(output, errors="xmlcharrefreplace")

373 output_file.write(html)

374 # Don't close here. User may want to write more.

375 else:

376 # Encode manually and write bytes to stdout.

377 html = html.encode(encoding, "xmlcharrefreplace")

378 try:

379 # Write bytes directly to buffer (Python 3).

380 sys.stdout.buffer.write(html)

381 except AttributeError:

382 # Probably Python 2, which works with bytes by default.

383 sys.stdout.write(html)

384

385 return self

386

387

388 """

389 EXPORTED FUNCTIONS

390 =============================================================================

391

392 Those are the two functions we really mean to export: markdown() and

393 markdownFromFile().

394 """

395

396 def markdown(text, args, *kwargs):

397 """Convert a markdown string to HTML and return HTML as a unicode string.

398

399 This is a shortcut function for `Markdown` class to cover the most

400 basic use case. It initializes an instance of Markdown, loads the

401 necessary extensions and runs the parser on the given text.

402

403 Keyword arguments:

404

405 * text: Markdown formatted text as Unicode or ASCII string.

406 * Any arguments accepted by the Markdown class.

407

408 Returns: An HTML document as a string.

409

410 """

411 md = Markdown(args, *kwargs)

412 return md.convert(text)

413

414

415 def markdownFromFile(args, *kwargs):

416 """Read markdown code from a file and write it to a file or a stream.

417

418 This is a shortcut function which initializes an instance of Markdown,

419 and calls the convertFile method rather than convert.

420

421 Keyword arguments:

422

423 * input: a file name or readable object.

424 * output: a file name or writable object.

425 * encoding: Encoding of input and output.

426 * Any arguments accepted by the Markdown class.

427

428 """

429 # For backward compatibility loop through positional args

430 pos = ['input', 'output', 'extensions', 'encoding']

431 c = 0

432 for arg in args:

433 if pos[c] not in kwargs:

434 kwargs[pos[c]] = arg

435 c += 1

436 if c == len(pos):

437 break

438

439 md = Markdown(**kwargs)

440 md.convertFile(kwargs.get('input', None),

441 kwargs.get('output', None),

442 kwargs.get('encoding', None))

443

OLD	NEW

« no previous file with comments | « trunk/src/third_party/markdown/README.chromium ('k') | trunk/src/third_party/markdown/__main__.py » ('j') | no next file with comments »

Side by Side Diff: trunk/src/third_party/markdown/__init__.py

Side by Side Diff: trunk/src/third_party/markdown/init.py