third_party/Python-Markdown/markdown/blockprocessors.py - Issue 1356203004: Check in a simple pure-python based Markdown previewer.

Side by Side Diff: third_party/Python-Markdown/markdown/blockprocessors.py

Issue 1356203004: Check in a simple pure-python based Markdown previewer. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@add

Patch Set: fix license file Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # markdown is released under the BSD license

2 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)

3 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)

4 # Copyright 2004 Manfred Stienstra (the original version)

5 #

6 # All rights reserved.

7 #

8 # Redistribution and use in source and binary forms, with or without

9 # modification, are permitted provided that the following conditions are met:

10 #

11 # * Redistributions of source code must retain the above copyright

12 # notice, this list of conditions and the following disclaimer.

13 # * Redistributions in binary form must reproduce the above copyright

14 # notice, this list of conditions and the following disclaimer in the

15 # documentation and/or other materials provided with the distribution.

16 # * Neither the name of the <organization> nor the

17 # names of its contributors may be used to endorse or promote products

18 # derived from this software without specific prior written permission.

19 #

20 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY

21 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

22 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

23 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT

24 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

30 # POSSIBILITY OF SUCH DAMAGE.

31

32

33 """	1 """

34 CORE MARKDOWN BLOCKPARSER	2 CORE MARKDOWN BLOCKPARSER

35 ===========================================================================	3 ===========================================================================

36	4

37 This parser handles basic parsing of Markdown blocks. It doesn't concern itself	5 This parser handles basic parsing of Markdown blocks. It doesn't concern

38 with inline elements such as bold or italics, but rather just catches	6 itself with inline elements such as bold or italics, but rather just

39 blocks, lists, quotes, etc.	7 catches blocks, lists, quotes, etc.

40	8

41 The BlockParser is made up of a bunch of BlockProssors, each handling a	9 The BlockParser is made up of a bunch of BlockProssors, each handling a

42 different type of block. Extensions may add/replace/remove BlockProcessors	10 different type of block. Extensions may add/replace/remove BlockProcessors

43 as they need to alter how markdown blocks are parsed.	11 as they need to alter how markdown blocks are parsed.

44 """	12 """

45	13

46 from __future__ import absolute_import	14 from __future__ import absolute_import

47 from __future__ import division	15 from __future__ import division

48 from __future__ import unicode_literals	16 from __future__ import unicode_literals

49 import logging	17 import logging

50 import re	18 import re

51 from . import util	19 from . import util

52 from .blockparser import BlockParser	20 from .blockparser import BlockParser

53	21

54 logger = logging.getLogger('MARKDOWN')	22 logger = logging.getLogger('MARKDOWN')

55	23

56	24

57 def build_block_parser(md_instance, **kwargs):	25 def build_block_parser(md_instance, **kwargs):

58 """ Build the default block parser used by Markdown. """	26 """ Build the default block parser used by Markdown. """

59 parser = BlockParser(md_instance)	27 parser = BlockParser(md_instance)

60 parser.blockprocessors['empty'] = EmptyBlockProcessor(parser)	28 parser.blockprocessors['empty'] = EmptyBlockProcessor(parser)

61 parser.blockprocessors['indent'] = ListIndentProcessor(parser)	29 parser.blockprocessors['indent'] = ListIndentProcessor(parser)

62 parser.blockprocessors['code'] = CodeBlockProcessor(parser)	30 parser.blockprocessors['code'] = CodeBlockProcessor(parser)

63 parser.blockprocessors['hashheader'] = HashHeaderProcessor(parser)	31 parser.blockprocessors['hashheader'] = HashHeaderProcessor(parser)

64 parser.blockprocessors['setextheader'] = SetextHeaderProcessor(parser)	32 parser.blockprocessors['setextheader'] = SetextHeaderProcessor(parser)

65 parser.blockprocessors['hr'] = HRProcessor(parser)	33 parser.blockprocessors['hr'] = HRProcessor(parser)

66 parser.blockprocessors['olist'] = OListProcessor(parser)	34 parser.blockprocessors['olist'] = OListProcessor(parser)

67 parser.blockprocessors['ulist'] = UListProcessor(parser)	35 parser.blockprocessors['ulist'] = UListProcessor(parser)

68 parser.blockprocessors['quote'] = BlockQuoteProcessor(parser)	36 parser.blockprocessors['quote'] = BlockQuoteProcessor(parser)

69 parser.blockprocessors['paragraph'] = ParagraphProcessor(parser)	37 parser.blockprocessors['paragraph'] = ParagraphProcessor(parser)

70 return parser	38 return parser

71	39

72	40

73 class BlockProcessor:	41 class BlockProcessor:

74 """ Base class for block processors.	42 """ Base class for block processors.

75	43

76 Each subclass will provide the methods below to work with the source and	44 Each subclass will provide the methods below to work with the source and

77 tree. Each processor will need to define it's own ``test`` and ``run``	45 tree. Each processor will need to define it's own ``test`` and ``run``

78 methods. The ``test`` method should return True or False, to indicate	46 methods. The ``test`` method should return True or False, to indicate

79 whether the current block should be processed by this processor. If the	47 whether the current block should be processed by this processor. If the

80 test passes, the parser will call the processors ``run`` method.	48 test passes, the parser will call the processors ``run`` method.

81	49

82 """	50 """

83	51

84 def __init__(self, parser):	52 def __init__(self, parser):

85 self.parser = parser	53 self.parser = parser

(...skipping 21 matching lines...) Expand all Loading...
107	75

108 def looseDetab(self, text, level=1):	76 def looseDetab(self, text, level=1):

109 """ Remove a tab from front of lines but allowing dedented lines. """	77 """ Remove a tab from front of lines but allowing dedented lines. """

110 lines = text.split('\n')	78 lines = text.split('\n')

111 for i in range(len(lines)):	79 for i in range(len(lines)):

112 if lines[i].startswith(' 'self.tab_lengthlevel):	80 if lines[i].startswith(' 'self.tab_lengthlevel):

113 lines[i] = lines[i][self.tab_length*level:]	81 lines[i] = lines[i][self.tab_length*level:]

114 return '\n'.join(lines)	82 return '\n'.join(lines)

115	83

116 def test(self, parent, block):	84 def test(self, parent, block):

117 """ Test for block type. Must be overridden by subclasses.	85 """ Test for block type. Must be overridden by subclasses.

118	86

119 As the parser loops through processors, it will call the ``test`` method	87 As the parser loops through processors, it will call the ``test``

120 on each to determine if the given block of text is of that type. This	88 method on each to determine if the given block of text is of that

121 method must return a boolean ``True`` or ``False``. The actual method of	89 type. This method must return a boolean ``True`` or ``False``. The

122 testing is left to the needs of that particular block type. It could	90 actual method of testing is left to the needs of that particular

123 be as simple as ``block.startswith(some_string)`` or a complex regular	91 block type. It could be as simple as ``block.startswith(some_string)``

124 expression. As the block type may be different depending on the parent	92 or a complex regular expression. As the block type may be different

125 of the block (i.e. inside a list), the parent etree element is also	93 depending on the parent of the block (i.e. inside a list), the parent

126 provided and may be used as part of the test.	94 etree element is also provided and may be used as part of the test.

127	95

128 Keywords:	96 Keywords:

129	97

130 * ``parent``: A etree element which will be the parent of the block.	98 * ``parent``: A etree element which will be the parent of the block.

131 * ``block``: A block of text from the source which has been split at	99 * ``block``: A block of text from the source which has been split at

132 blank lines.	100 blank lines.

133 """	101 """

134 pass	102 pass # pragma: no cover

135	103

136 def run(self, parent, blocks):	104 def run(self, parent, blocks):

137 """ Run processor. Must be overridden by subclasses.	105 """ Run processor. Must be overridden by subclasses.

138	106

139 When the parser determines the appropriate type of a block, the parser	107 When the parser determines the appropriate type of a block, the parser

140 will call the corresponding processor's ``run`` method. This method	108 will call the corresponding processor's ``run`` method. This method

141 should parse the individual lines of the block and append them to	109 should parse the individual lines of the block and append them to

142 the etree.	110 the etree.

143	111

144 Note that both the ``parent`` and ``etree`` keywords are pointers	112 Note that both the ``parent`` and ``etree`` keywords are pointers

145 to instances of the objects which should be edited in place. Each	113 to instances of the objects which should be edited in place. Each

146 processor must make changes to the existing objects as there is no	114 processor must make changes to the existing objects as there is no

147 mechanism to return new/different objects to replace them.	115 mechanism to return new/different objects to replace them.

148	116

149 This means that this method should be adding SubElements or adding text	117 This means that this method should be adding SubElements or adding text

150 to the parent, and should remove (``pop``) or add (``insert``) items to	118 to the parent, and should remove (``pop``) or add (``insert``) items to

151 the list of blocks.	119 the list of blocks.

152	120

153 Keywords:	121 Keywords:

154	122

155 * ``parent``: A etree element which is the parent of the current block.	123 * ``parent``: A etree element which is the parent of the current block.

156 * ``blocks``: A list of all remaining blocks of the document.	124 * ``blocks``: A list of all remaining blocks of the document.

157 """	125 """

158 pass	126 pass # pragma: no cover

159	127

160	128

161 class ListIndentProcessor(BlockProcessor):	129 class ListIndentProcessor(BlockProcessor):

162 """ Process children of list items.	130 """ Process children of list items.

163	131

164 Example:	132 Example:

165 * a list item	133 * a list item

166 process this part	134 process this part

167	135

168 or this part	136 or this part

169	137

170 """	138 """

171	139

172 ITEM_TYPES = ['li']	140 ITEM_TYPES = ['li']

173 LIST_TYPES = ['ul', 'ol']	141 LIST_TYPES = ['ul', 'ol']

174	142

175 def __init__(self, *args):	143 def __init__(self, *args):

176 BlockProcessor.__init__(self, *args)	144 BlockProcessor.__init__(self, *args)

177 self.INDENT_RE = re.compile(r'^(([ ]{%s})+)'% self.tab_length)	145 self.INDENT_RE = re.compile(r'^(([ ]{%s})+)' % self.tab_length)

178	146

179 def test(self, parent, block):	147 def test(self, parent, block):

180 return block.startswith(' '*self.tab_length) and \	148 return block.startswith(' '*self.tab_length) and \

181 not self.parser.state.isstate('detabbed') and \	149 not self.parser.state.isstate('detabbed') and \

182 (parent.tag in self.ITEM_TYPES or \	150 (parent.tag in self.ITEM_TYPES or

183 (len(parent) and parent[-1] and \	151 (len(parent) and parent[-1] is not None and

184 (parent[-1].tag in self.LIST_TYPES)	152 (parent[-1].tag in self.LIST_TYPES)))

185 )

186 )

187	153

188 def run(self, parent, blocks):	154 def run(self, parent, blocks):

189 block = blocks.pop(0)	155 block = blocks.pop(0)

190 level, sibling = self.get_level(parent, block)	156 level, sibling = self.get_level(parent, block)

191 block = self.looseDetab(block, level)	157 block = self.looseDetab(block, level)

192	158

193 self.parser.state.set('detabbed')	159 self.parser.state.set('detabbed')

194 if parent.tag in self.ITEM_TYPES:	160 if parent.tag in self.ITEM_TYPES:

195 # It's possible that this parent has a 'ul' or 'ol' child list	161 # It's possible that this parent has a 'ul' or 'ol' child list

196 # with a member. If that is the case, then that should be the	162 # with a member. If that is the case, then that should be the

197 # parent. This is intended to catch the edge case of an indented	163 # parent. This is intended to catch the edge case of an indented

198 # list whose first member was parsed previous to this point	164 # list whose first member was parsed previous to this point

199 # see OListProcessor	165 # see OListProcessor

200 if len(parent) and parent[-1].tag in self.LIST_TYPES:	166 if len(parent) and parent[-1].tag in self.LIST_TYPES:

201 self.parser.parseBlocks(parent[-1], [block])	167 self.parser.parseBlocks(parent[-1], [block])

202 else:	168 else:

203 # The parent is already a li. Just parse the child block.	169 # The parent is already a li. Just parse the child block.

204 self.parser.parseBlocks(parent, [block])	170 self.parser.parseBlocks(parent, [block])

205 elif sibling.tag in self.ITEM_TYPES:	171 elif sibling.tag in self.ITEM_TYPES:

206 # The sibling is a li. Use it as parent.	172 # The sibling is a li. Use it as parent.

207 self.parser.parseBlocks(sibling, [block])	173 self.parser.parseBlocks(sibling, [block])

(...skipping 10 matching lines...) Expand all Loading...
218 sibling[-1].insert(0, p)	184 sibling[-1].insert(0, p)

219 self.parser.parseChunk(sibling[-1], block)	185 self.parser.parseChunk(sibling[-1], block)

220 else:	186 else:

221 self.create_item(sibling, block)	187 self.create_item(sibling, block)

222 self.parser.state.reset()	188 self.parser.state.reset()

223	189

224 def create_item(self, parent, block):	190 def create_item(self, parent, block):

225 """ Create a new li and parse the block with it as the parent. """	191 """ Create a new li and parse the block with it as the parent. """

226 li = util.etree.SubElement(parent, 'li')	192 li = util.etree.SubElement(parent, 'li')

227 self.parser.parseBlocks(li, [block])	193 self.parser.parseBlocks(li, [block])

228	194

229 def get_level(self, parent, block):	195 def get_level(self, parent, block):

230 """ Get level of indent based on list level. """	196 """ Get level of indent based on list level. """

231 # Get indent level	197 # Get indent level

232 m = self.INDENT_RE.match(block)	198 m = self.INDENT_RE.match(block)

233 if m:	199 if m:

234 indent_level = len(m.group(1))/self.tab_length	200 indent_level = len(m.group(1))/self.tab_length

235 else:	201 else:

236 indent_level = 0	202 indent_level = 0

237 if self.parser.state.isstate('list'):	203 if self.parser.state.isstate('list'):

238 # We're in a tightlist - so we already are at correct parent.	204 # We're in a tightlist - so we already are at correct parent.

239 level = 1	205 level = 1

240 else:	206 else:

241 # We're in a looselist - so we need to find parent.	207 # We're in a looselist - so we need to find parent.

242 level = 0	208 level = 0

243 # Step through children of tree to find matching indent level.	209 # Step through children of tree to find matching indent level.

244 while indent_level > level:	210 while indent_level > level:

245 child = self.lastChild(parent)	211 child = self.lastChild(parent)

246 if child and (child.tag in self.LIST_TYPES or child.tag in self.ITEM _TYPES):	212 if (child is not None and

	213 (child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES)):

247 if child.tag in self.LIST_TYPES:	214 if child.tag in self.LIST_TYPES:

248 level += 1	215 level += 1

249 parent = child	216 parent = child

250 else:	217 else:

251 # No more child levels. If we're short of indent_level,	218 # No more child levels. If we're short of indent_level,

252 # we have a code block. So we stop here.	219 # we have a code block. So we stop here.

253 break	220 break

254 return level, parent	221 return level, parent

255	222

256	223

257 class CodeBlockProcessor(BlockProcessor):	224 class CodeBlockProcessor(BlockProcessor):

258 """ Process code blocks. """	225 """ Process code blocks. """

259	226

260 def test(self, parent, block):	227 def test(self, parent, block):

261 return block.startswith(' '*self.tab_length)	228 return block.startswith(' '*self.tab_length)

262	229

263 def run(self, parent, blocks):	230 def run(self, parent, blocks):

264 sibling = self.lastChild(parent)	231 sibling = self.lastChild(parent)

265 block = blocks.pop(0)	232 block = blocks.pop(0)

266 theRest = ''	233 theRest = ''

267 if sibling and sibling.tag == "pre" and len(sibling) \	234 if (sibling is not None and sibling.tag == "pre" and

268 and sibling[0].tag == "code":	235 len(sibling) and sibling[0].tag == "code"):

269 # The previous block was a code block. As blank lines do not start	236 # The previous block was a code block. As blank lines do not start

270 # new code blocks, append this block to the previous, adding back	237 # new code blocks, append this block to the previous, adding back

271 # linebreaks removed from the split into a list.	238 # linebreaks removed from the split into a list.

272 code = sibling[0]	239 code = sibling[0]

273 block, theRest = self.detab(block)	240 block, theRest = self.detab(block)

274 code.text = util.AtomicString('%s\n%s\n' % (code.text, block.rstrip( )))	241 code.text = util.AtomicString(

	242 '%s\n%s\n' % (code.text, block.rstrip())

	243 )

275 else:	244 else:

276 # This is a new codeblock. Create the elements and insert text.	245 # This is a new codeblock. Create the elements and insert text.

277 pre = util.etree.SubElement(parent, 'pre')	246 pre = util.etree.SubElement(parent, 'pre')

278 code = util.etree.SubElement(pre, 'code')	247 code = util.etree.SubElement(pre, 'code')

279 block, theRest = self.detab(block)	248 block, theRest = self.detab(block)

280 code.text = util.AtomicString('%s\n' % block.rstrip())	249 code.text = util.AtomicString('%s\n' % block.rstrip())

281 if theRest:	250 if theRest:

282 # This block contained unindented line(s) after the first indented	251 # This block contained unindented line(s) after the first indented

283 # line. Insert these lines as the first block of the master blocks	252 # line. Insert these lines as the first block of the master blocks

284 # list for future processing.	253 # list for future processing.

285 blocks.insert(0, theRest)	254 blocks.insert(0, theRest)

286	255

287	256

288 class BlockQuoteProcessor(BlockProcessor):	257 class BlockQuoteProcessor(BlockProcessor):

289	258

290 RE = re.compile(r'(^\|\n)[ ]{0,3}>[ ]?(.*)')	259 RE = re.compile(r'(^\|\n)[ ]{0,3}>[ ]?(.*)')

291	260

292 def test(self, parent, block):	261 def test(self, parent, block):

293 return bool(self.RE.search(block))	262 return bool(self.RE.search(block))

294	263

295 def run(self, parent, blocks):	264 def run(self, parent, blocks):

296 block = blocks.pop(0)	265 block = blocks.pop(0)

297 m = self.RE.search(block)	266 m = self.RE.search(block)

298 if m:	267 if m:

299 before = block[:m.start()] # Lines before blockquote	268 before = block[:m.start()] # Lines before blockquote

300 # Pass lines before blockquote in recursively for parsing forst.	269 # Pass lines before blockquote in recursively for parsing forst.

301 self.parser.parseBlocks(parent, [before])	270 self.parser.parseBlocks(parent, [before])

302 # Remove ``> `` from begining of each line.	271 # Remove ``> `` from begining of each line.

303 block = '\n'.join([self.clean(line) for line in	272 block = '\n'.join(

304 block[m.start():].split('\n')])	273 [self.clean(line) for line in block[m.start():].split('\n')]

	274 )

305 sibling = self.lastChild(parent)	275 sibling = self.lastChild(parent)

306 if sibling and sibling.tag == "blockquote":	276 if sibling is not None and sibling.tag == "blockquote":

307 # Previous block was a blockquote so set that as this blocks parent	277 # Previous block was a blockquote so set that as this blocks parent

308 quote = sibling	278 quote = sibling

309 else:	279 else:

310 # This is a new blockquote. Create a new parent element.	280 # This is a new blockquote. Create a new parent element.

311 quote = util.etree.SubElement(parent, 'blockquote')	281 quote = util.etree.SubElement(parent, 'blockquote')

312 # Recursively parse block with blockquote as parent.	282 # Recursively parse block with blockquote as parent.

313 # change parser state so blockquotes embedded in lists use p tags	283 # change parser state so blockquotes embedded in lists use p tags

314 self.parser.state.set('blockquote')	284 self.parser.state.set('blockquote')

315 self.parser.parseChunk(quote, block)	285 self.parser.parseChunk(quote, block)

316 self.parser.state.reset()	286 self.parser.state.reset()

317	287

318 def clean(self, line):	288 def clean(self, line):

319 """ Remove ``>`` from beginning of a line. """	289 """ Remove ``>`` from beginning of a line. """

320 m = self.RE.match(line)	290 m = self.RE.match(line)

321 if line.strip() == ">":	291 if line.strip() == ">":

322 return ""	292 return ""

323 elif m:	293 elif m:

324 return m.group(2)	294 return m.group(2)

325 else:	295 else:

326 return line	296 return line

327	297

	298

328 class OListProcessor(BlockProcessor):	299 class OListProcessor(BlockProcessor):

329 """ Process ordered list blocks. """	300 """ Process ordered list blocks. """

330	301

331 TAG = 'ol'	302 TAG = 'ol'

332 # Detect an item (``1. item``). ``group(1)`` contains contents of item.	303 # Detect an item (``1. item``). ``group(1)`` contains contents of item.

333 RE = re.compile(r'^[ ]{0,3}\d+\.[ ]+(.*)')	304 RE = re.compile(r'^[ ]{0,3}\d+\.[ ]+(.*)')

334 # Detect items on secondary lines. they can be of either list type.	305 # Detect items on secondary lines. they can be of either list type.

335 CHILD_RE = re.compile(r'^[ ]{0,3}((\d+\.)\|[+-])[ ]+(.)')	306 CHILD_RE = re.compile(r'^[ ]{0,3}((\d+\.)\|[+-])[ ]+(.)')

336 # Detect indented (nested) items of either type	307 # Detect indented (nested) items of either type

337 INDENT_RE = re.compile(r'^[ ]{4,7}((\d+\.)\|[+-])[ ]+.')	308 INDENT_RE = re.compile(r'^[ ]{4,7}((\d+\.)\|[+-])[ ]+.')

338 # The integer (python string) with which the lists starts (default=1)	309 # The integer (python string) with which the lists starts (default=1)

339 # Eg: If list is intialized as)	310 # Eg: If list is intialized as)

340 # 3. Item	311 # 3. Item

341 # The ol tag will get starts="3" attribute	312 # The ol tag will get starts="3" attribute

342 STARTSWITH = '1'	313 STARTSWITH = '1'

343 # List of allowed sibling tags.	314 # List of allowed sibling tags.

344 SIBLING_TAGS = ['ol', 'ul']	315 SIBLING_TAGS = ['ol', 'ul']

345	316

346 def test(self, parent, block):	317 def test(self, parent, block):

347 return bool(self.RE.match(block))	318 return bool(self.RE.match(block))

348	319

349 def run(self, parent, blocks):	320 def run(self, parent, blocks):

350 # Check fr multiple items in one block.	321 # Check fr multiple items in one block.

351 items = self.get_items(blocks.pop(0))	322 items = self.get_items(blocks.pop(0))

352 sibling = self.lastChild(parent)	323 sibling = self.lastChild(parent)

353	324

354 if sibling and sibling.tag in self.SIBLING_TAGS:	325 if sibling is not None and sibling.tag in self.SIBLING_TAGS:

355 # Previous block was a list item, so set that as parent	326 # Previous block was a list item, so set that as parent

356 lst = sibling	327 lst = sibling

357 # make sure previous item is in a p- if the item has text, then it	328 # make sure previous item is in a p- if the item has text,

358 # it isn't in a p	329 # then it isn't in a p

359 if lst[-1].text:	330 if lst[-1].text:

360 # since it's possible there are other children for this sibling,	331 # since it's possible there are other children for this

361 # we can't just SubElement the p, we need to insert it as the	332 # sibling, we can't just SubElement the p, we need to

362 # first item	333 # insert it as the first item.

363 p = util.etree.Element('p')	334 p = util.etree.Element('p')

364 p.text = lst[-1].text	335 p.text = lst[-1].text

365 lst[-1].text = ''	336 lst[-1].text = ''

366 lst[-1].insert(0, p)	337 lst[-1].insert(0, p)

367 # if the last item has a tail, then the tail needs to be put in a p	338 # if the last item has a tail, then the tail needs to be put in a p

368 # likely only when a header is not followed by a blank line	339 # likely only when a header is not followed by a blank line

369 lch = self.lastChild(lst[-1])	340 lch = self.lastChild(lst[-1])

370 if lch is not None and lch.tail:	341 if lch is not None and lch.tail:

371 p = util.etree.SubElement(lst[-1], 'p')	342 p = util.etree.SubElement(lst[-1], 'p')

372 p.text = lch.tail.lstrip()	343 p.text = lch.tail.lstrip()

373 lch.tail = ''	344 lch.tail = ''

374	345

375 # parse first block differently as it gets wrapped in a p.	346 # parse first block differently as it gets wrapped in a p.

376 li = util.etree.SubElement(lst, 'li')	347 li = util.etree.SubElement(lst, 'li')

377 self.parser.state.set('looselist')	348 self.parser.state.set('looselist')

378 firstitem = items.pop(0)	349 firstitem = items.pop(0)

379 self.parser.parseBlocks(li, [firstitem])	350 self.parser.parseBlocks(li, [firstitem])

380 self.parser.state.reset()	351 self.parser.state.reset()

381 elif parent.tag in ['ol', 'ul']:	352 elif parent.tag in ['ol', 'ul']:

382 # this catches the edge case of a multi-item indented list whose	353 # this catches the edge case of a multi-item indented list whose

383 # first item is in a blank parent-list item:	354 # first item is in a blank parent-list item:

384 # * * subitem1	355 # * * subitem1

385 # * subitem2	356 # * subitem2

386 # see also ListIndentProcessor	357 # see also ListIndentProcessor

387 lst = parent	358 lst = parent

388 else:	359 else:

389 # This is a new list so create parent with appropriate tag.	360 # This is a new list so create parent with appropriate tag.

390 lst = util.etree.SubElement(parent, self.TAG)	361 lst = util.etree.SubElement(parent, self.TAG)

391 # Check if a custom start integer is set	362 # Check if a custom start integer is set

392 if not self.parser.markdown.lazy_ol and self.STARTSWITH !='1':	363 if not self.parser.markdown.lazy_ol and self.STARTSWITH != '1':

393 lst.attrib['start'] = self.STARTSWITH	364 lst.attrib['start'] = self.STARTSWITH

394	365

395 self.parser.state.set('list')	366 self.parser.state.set('list')

396 # Loop through items in block, recursively parsing each with the	367 # Loop through items in block, recursively parsing each with the

397 # appropriate parent.	368 # appropriate parent.

398 for item in items:	369 for item in items:

399 if item.startswith(' '*self.tab_length):	370 if item.startswith(' '*self.tab_length):

400 # Item is indented. Parse with last item as parent	371 # Item is indented. Parse with last item as parent

401 self.parser.parseBlocks(lst[-1], [item])	372 self.parser.parseBlocks(lst[-1], [item])

402 else:	373 else:

403 # New item. Create li and parse with it as parent	374 # New item. Create li and parse with it as parent

404 li = util.etree.SubElement(lst, 'li')	375 li = util.etree.SubElement(lst, 'li')

405 self.parser.parseBlocks(li, [item])	376 self.parser.parseBlocks(li, [item])

406 self.parser.state.reset()	377 self.parser.state.reset()

407	378

408 def get_items(self, block):	379 def get_items(self, block):

409 """ Break a block into list items. """	380 """ Break a block into list items. """

410 items = []	381 items = []

411 for line in block.split('\n'):	382 for line in block.split('\n'):

412 m = self.CHILD_RE.match(line)	383 m = self.CHILD_RE.match(line)

413 if m:	384 if m:

414 # This is a new list item	385 # This is a new list item

415 # Check first item for the start index	386 # Check first item for the start index

416 if not items and self.TAG=='ol':	387 if not items and self.TAG == 'ol':

417 # Detect the integer value of first list item	388 # Detect the integer value of first list item

418 INTEGER_RE = re.compile('(\d+)')	389 INTEGER_RE = re.compile('(\d+)')

419 self.STARTSWITH = INTEGER_RE.match(m.group(1)).group()	390 self.STARTSWITH = INTEGER_RE.match(m.group(1)).group()

420 # Append to the list	391 # Append to the list

421 items.append(m.group(3))	392 items.append(m.group(3))

422 elif self.INDENT_RE.match(line):	393 elif self.INDENT_RE.match(line):

423 # This is an indented (possibly nested) item.	394 # This is an indented (possibly nested) item.

424 if items[-1].startswith(' '*self.tab_length):	395 if items[-1].startswith(' '*self.tab_length):

425 # Previous item was indented. Append to that item.	396 # Previous item was indented. Append to that item.

426 items[-1] = '%s\n%s' % (items[-1], line)	397 items[-1] = '%s\n%s' % (items[-1], line)

(...skipping 18 matching lines...) Expand all Loading...
445 # Detect a header at start of any line in block	416 # Detect a header at start of any line in block

446 RE = re.compile(r'(^\|\n)(?P<level>#{1,6})(?P<header>.?)#(\n\|$)')	417 RE = re.compile(r'(^\|\n)(?P<level>#{1,6})(?P<header>.?)#(\n\|$)')

447	418

448 def test(self, parent, block):	419 def test(self, parent, block):

449 return bool(self.RE.search(block))	420 return bool(self.RE.search(block))

450	421

451 def run(self, parent, blocks):	422 def run(self, parent, blocks):

452 block = blocks.pop(0)	423 block = blocks.pop(0)

453 m = self.RE.search(block)	424 m = self.RE.search(block)

454 if m:	425 if m:

455 before = block[:m.start()] # All lines before header	426 before = block[:m.start()] # All lines before header

456 after = block[m.end():] # All lines after header	427 after = block[m.end():] # All lines after header

457 if before:	428 if before:

458 # As the header was not the first line of the block and the	429 # As the header was not the first line of the block and the

459 # lines before the header must be parsed first,	430 # lines before the header must be parsed first,

460 # recursively parse this lines as a block.	431 # recursively parse this lines as a block.

461 self.parser.parseBlocks(parent, [before])	432 self.parser.parseBlocks(parent, [before])

462 # Create header using named groups from RE	433 # Create header using named groups from RE

463 h = util.etree.SubElement(parent, 'h%d' % len(m.group('level')))	434 h = util.etree.SubElement(parent, 'h%d' % len(m.group('level')))

464 h.text = m.group('header').strip()	435 h.text = m.group('header').strip()

465 if after:	436 if after:

466 # Insert remaining lines as first block for future parsing.	437 # Insert remaining lines as first block for future parsing.

467 blocks.insert(0, after)	438 blocks.insert(0, after)

468 else:	439 else: # pragma: no cover

469 # This should never happen, but just in case...	440 # This should never happen, but just in case...

470 logger.warn("We've got a problem header: %r" % block)	441 logger.warn("We've got a problem header: %r" % block)

471	442

472	443

473 class SetextHeaderProcessor(BlockProcessor):	444 class SetextHeaderProcessor(BlockProcessor):

474 """ Process Setext-style Headers. """	445 """ Process Setext-style Headers. """

475	446

476 # Detect Setext-style header. Must be first 2 lines of block.	447 # Detect Setext-style header. Must be first 2 lines of block.

477 RE = re.compile(r'^.?\n[=-]+[ ](\n\|$)', re.MULTILINE)	448 RE = re.compile(r'^.?\n[=-]+[ ](\n\|$)', re.MULTILINE)

478	449

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
520 self.parser.parseBlocks(parent, [prelines])	491 self.parser.parseBlocks(parent, [prelines])

521 # create hr	492 # create hr

522 util.etree.SubElement(parent, 'hr')	493 util.etree.SubElement(parent, 'hr')

523 # check for lines in block after hr.	494 # check for lines in block after hr.

524 postlines = block[self.match.end():].lstrip('\n')	495 postlines = block[self.match.end():].lstrip('\n')

525 if postlines:	496 if postlines:

526 # Add lines after hr to master blocks for later parsing.	497 # Add lines after hr to master blocks for later parsing.

527 blocks.insert(0, postlines)	498 blocks.insert(0, postlines)

528	499

529	500

530

531 class EmptyBlockProcessor(BlockProcessor):	501 class EmptyBlockProcessor(BlockProcessor):

532 """ Process blocks that are empty or start with an empty line. """	502 """ Process blocks that are empty or start with an empty line. """

533	503

534 def test(self, parent, block):	504 def test(self, parent, block):

535 return not block or block.startswith('\n')	505 return not block or block.startswith('\n')

536	506

537 def run(self, parent, blocks):	507 def run(self, parent, blocks):

538 block = blocks.pop(0)	508 block = blocks.pop(0)

539 filler = '\n\n'	509 filler = '\n\n'

540 if block:	510 if block:

541 # Starts with empty line	511 # Starts with empty line

542 # Only replace a single line.	512 # Only replace a single line.

543 filler = '\n'	513 filler = '\n'

544 # Save the rest for later.	514 # Save the rest for later.

545 theRest = block[1:]	515 theRest = block[1:]

546 if theRest:	516 if theRest:

547 # Add remaining lines to master blocks for later.	517 # Add remaining lines to master blocks for later.

548 blocks.insert(0, theRest)	518 blocks.insert(0, theRest)

549 sibling = self.lastChild(parent)	519 sibling = self.lastChild(parent)

550 if sibling and sibling.tag == 'pre' and len(sibling) and sibling[0].tag == 'code':	520 if (sibling is not None and sibling.tag == 'pre' and

	521 len(sibling) and sibling[0].tag == 'code'):

551 # Last block is a codeblock. Append to preserve whitespace.	522 # Last block is a codeblock. Append to preserve whitespace.

552 sibling[0].text = util.AtomicString('%s%s' % (sibling[0].text, fille r))	523 sibling[0].text = util.AtomicString(

	524 '%s%s' % (sibling[0].text, filler)

	525 )

553	526

554	527

555 class ParagraphProcessor(BlockProcessor):	528 class ParagraphProcessor(BlockProcessor):

556 """ Process Paragraph blocks. """	529 """ Process Paragraph blocks. """

557	530

558 def test(self, parent, block):	531 def test(self, parent, block):

559 return True	532 return True

560	533

561 def run(self, parent, blocks):	534 def run(self, parent, blocks):

562 block = blocks.pop(0)	535 block = blocks.pop(0)

563 if block.strip():	536 if block.strip():

564 # Not a blank block. Add to parent, otherwise throw it away.	537 # Not a blank block. Add to parent, otherwise throw it away.

565 if self.parser.state.isstate('list'):	538 if self.parser.state.isstate('list'):

566 # The parent is a tight-list.	539 # The parent is a tight-list.

567 #	540 #

568 # Check for any children. This will likely only happen in a	541 # Check for any children. This will likely only happen in a

569 # tight-list when a header isn't followed by a blank line.	542 # tight-list when a header isn't followed by a blank line.

570 # For example:	543 # For example:

571 #	544 #

572 # * # Header	545 # * # Header

573 # Line 2 of list item - not part of header.	546 # Line 2 of list item - not part of header.

574 sibling = self.lastChild(parent)	547 sibling = self.lastChild(parent)

575 if sibling is not None:	548 if sibling is not None:

576 # Insetrt after sibling.	549 # Insetrt after sibling.

577 if sibling.tail:	550 if sibling.tail:

578 sibling.tail = '%s\n%s' % (sibling.tail, block)	551 sibling.tail = '%s\n%s' % (sibling.tail, block)

579 else:	552 else:

580 sibling.tail = '\n%s' % block	553 sibling.tail = '\n%s' % block

581 else:	554 else:

582 # Append to parent.text	555 # Append to parent.text

583 if parent.text:	556 if parent.text:

584 parent.text = '%s\n%s' % (parent.text, block)	557 parent.text = '%s\n%s' % (parent.text, block)

585 else:	558 else:

586 parent.text = block.lstrip()	559 parent.text = block.lstrip()

587 else:	560 else:

588 # Create a regular paragraph	561 # Create a regular paragraph

589 p = util.etree.SubElement(parent, 'p')	562 p = util.etree.SubElement(parent, 'p')

590 p.text = block.lstrip()	563 p.text = block.lstrip()

OLD	NEW

« no previous file with comments | « third_party/Python-Markdown/markdown/blockparser.py ('k') | third_party/Python-Markdown/markdown/extensions/__init__.py » ('j') | no next file with comments »