OLD | NEW |
(Empty) | |
| 1 # markdown is released under the BSD license |
| 2 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later) |
| 3 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) |
| 4 # Copyright 2004 Manfred Stienstra (the original version) |
| 5 # |
| 6 # All rights reserved. |
| 7 # |
| 8 # Redistribution and use in source and binary forms, with or without |
| 9 # modification, are permitted provided that the following conditions are met: |
| 10 # |
| 11 # * Redistributions of source code must retain the above copyright |
| 12 # notice, this list of conditions and the following disclaimer. |
| 13 # * Redistributions in binary form must reproduce the above copyright |
| 14 # notice, this list of conditions and the following disclaimer in the |
| 15 # documentation and/or other materials provided with the distribution. |
| 16 # * Neither the name of the <organization> nor the |
| 17 # names of its contributors may be used to endorse or promote products |
| 18 # derived from this software without specific prior written permission. |
| 19 # |
| 20 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY |
| 21 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| 22 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| 23 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT |
| 24 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 30 # POSSIBILITY OF SUCH DAMAGE. |
| 31 |
| 32 |
| 33 """ |
| 34 CORE MARKDOWN BLOCKPARSER |
| 35 =========================================================================== |
| 36 |
| 37 This parser handles basic parsing of Markdown blocks. It doesn't concern itself |
| 38 with inline elements such as **bold** or *italics*, but rather just catches |
| 39 blocks, lists, quotes, etc. |
| 40 |
| 41 The BlockParser is made up of a bunch of BlockProssors, each handling a |
| 42 different type of block. Extensions may add/replace/remove BlockProcessors |
| 43 as they need to alter how markdown blocks are parsed. |
| 44 """ |
| 45 |
| 46 from __future__ import absolute_import |
| 47 from __future__ import division |
| 48 from __future__ import unicode_literals |
| 49 import logging |
| 50 import re |
| 51 from . import util |
| 52 from .blockparser import BlockParser |
| 53 |
| 54 logger = logging.getLogger('MARKDOWN') |
| 55 |
| 56 |
| 57 def build_block_parser(md_instance, **kwargs): |
| 58 """ Build the default block parser used by Markdown. """ |
| 59 parser = BlockParser(md_instance) |
| 60 parser.blockprocessors['empty'] = EmptyBlockProcessor(parser) |
| 61 parser.blockprocessors['indent'] = ListIndentProcessor(parser) |
| 62 parser.blockprocessors['code'] = CodeBlockProcessor(parser) |
| 63 parser.blockprocessors['hashheader'] = HashHeaderProcessor(parser) |
| 64 parser.blockprocessors['setextheader'] = SetextHeaderProcessor(parser) |
| 65 parser.blockprocessors['hr'] = HRProcessor(parser) |
| 66 parser.blockprocessors['olist'] = OListProcessor(parser) |
| 67 parser.blockprocessors['ulist'] = UListProcessor(parser) |
| 68 parser.blockprocessors['quote'] = BlockQuoteProcessor(parser) |
| 69 parser.blockprocessors['paragraph'] = ParagraphProcessor(parser) |
| 70 return parser |
| 71 |
| 72 |
| 73 class BlockProcessor: |
| 74 """ Base class for block processors. |
| 75 |
| 76 Each subclass will provide the methods below to work with the source and |
| 77 tree. Each processor will need to define it's own ``test`` and ``run`` |
| 78 methods. The ``test`` method should return True or False, to indicate |
| 79 whether the current block should be processed by this processor. If the |
| 80 test passes, the parser will call the processors ``run`` method. |
| 81 |
| 82 """ |
| 83 |
| 84 def __init__(self, parser): |
| 85 self.parser = parser |
| 86 self.tab_length = parser.markdown.tab_length |
| 87 |
| 88 def lastChild(self, parent): |
| 89 """ Return the last child of an etree element. """ |
| 90 if len(parent): |
| 91 return parent[-1] |
| 92 else: |
| 93 return None |
| 94 |
| 95 def detab(self, text): |
| 96 """ Remove a tab from the front of each line of the given text. """ |
| 97 newtext = [] |
| 98 lines = text.split('\n') |
| 99 for line in lines: |
| 100 if line.startswith(' '*self.tab_length): |
| 101 newtext.append(line[self.tab_length:]) |
| 102 elif not line.strip(): |
| 103 newtext.append('') |
| 104 else: |
| 105 break |
| 106 return '\n'.join(newtext), '\n'.join(lines[len(newtext):]) |
| 107 |
| 108 def looseDetab(self, text, level=1): |
| 109 """ Remove a tab from front of lines but allowing dedented lines. """ |
| 110 lines = text.split('\n') |
| 111 for i in range(len(lines)): |
| 112 if lines[i].startswith(' '*self.tab_length*level): |
| 113 lines[i] = lines[i][self.tab_length*level:] |
| 114 return '\n'.join(lines) |
| 115 |
| 116 def test(self, parent, block): |
| 117 """ Test for block type. Must be overridden by subclasses. |
| 118 |
| 119 As the parser loops through processors, it will call the ``test`` method |
| 120 on each to determine if the given block of text is of that type. This |
| 121 method must return a boolean ``True`` or ``False``. The actual method of |
| 122 testing is left to the needs of that particular block type. It could |
| 123 be as simple as ``block.startswith(some_string)`` or a complex regular |
| 124 expression. As the block type may be different depending on the parent |
| 125 of the block (i.e. inside a list), the parent etree element is also |
| 126 provided and may be used as part of the test. |
| 127 |
| 128 Keywords: |
| 129 |
| 130 * ``parent``: A etree element which will be the parent of the block. |
| 131 * ``block``: A block of text from the source which has been split at |
| 132 blank lines. |
| 133 """ |
| 134 pass |
| 135 |
| 136 def run(self, parent, blocks): |
| 137 """ Run processor. Must be overridden by subclasses. |
| 138 |
| 139 When the parser determines the appropriate type of a block, the parser |
| 140 will call the corresponding processor's ``run`` method. This method |
| 141 should parse the individual lines of the block and append them to |
| 142 the etree. |
| 143 |
| 144 Note that both the ``parent`` and ``etree`` keywords are pointers |
| 145 to instances of the objects which should be edited in place. Each |
| 146 processor must make changes to the existing objects as there is no |
| 147 mechanism to return new/different objects to replace them. |
| 148 |
| 149 This means that this method should be adding SubElements or adding text |
| 150 to the parent, and should remove (``pop``) or add (``insert``) items to |
| 151 the list of blocks. |
| 152 |
| 153 Keywords: |
| 154 |
| 155 * ``parent``: A etree element which is the parent of the current block. |
| 156 * ``blocks``: A list of all remaining blocks of the document. |
| 157 """ |
| 158 pass |
| 159 |
| 160 |
| 161 class ListIndentProcessor(BlockProcessor): |
| 162 """ Process children of list items. |
| 163 |
| 164 Example: |
| 165 * a list item |
| 166 process this part |
| 167 |
| 168 or this part |
| 169 |
| 170 """ |
| 171 |
| 172 ITEM_TYPES = ['li'] |
| 173 LIST_TYPES = ['ul', 'ol'] |
| 174 |
| 175 def __init__(self, *args): |
| 176 BlockProcessor.__init__(self, *args) |
| 177 self.INDENT_RE = re.compile(r'^(([ ]{%s})+)'% self.tab_length) |
| 178 |
| 179 def test(self, parent, block): |
| 180 return block.startswith(' '*self.tab_length) and \ |
| 181 not self.parser.state.isstate('detabbed') and \ |
| 182 (parent.tag in self.ITEM_TYPES or \ |
| 183 (len(parent) and parent[-1] and \ |
| 184 (parent[-1].tag in self.LIST_TYPES) |
| 185 ) |
| 186 ) |
| 187 |
| 188 def run(self, parent, blocks): |
| 189 block = blocks.pop(0) |
| 190 level, sibling = self.get_level(parent, block) |
| 191 block = self.looseDetab(block, level) |
| 192 |
| 193 self.parser.state.set('detabbed') |
| 194 if parent.tag in self.ITEM_TYPES: |
| 195 # It's possible that this parent has a 'ul' or 'ol' child list |
| 196 # with a member. If that is the case, then that should be the |
| 197 # parent. This is intended to catch the edge case of an indented |
| 198 # list whose first member was parsed previous to this point |
| 199 # see OListProcessor |
| 200 if len(parent) and parent[-1].tag in self.LIST_TYPES: |
| 201 self.parser.parseBlocks(parent[-1], [block]) |
| 202 else: |
| 203 # The parent is already a li. Just parse the child block. |
| 204 self.parser.parseBlocks(parent, [block]) |
| 205 elif sibling.tag in self.ITEM_TYPES: |
| 206 # The sibling is a li. Use it as parent. |
| 207 self.parser.parseBlocks(sibling, [block]) |
| 208 elif len(sibling) and sibling[-1].tag in self.ITEM_TYPES: |
| 209 # The parent is a list (``ol`` or ``ul``) which has children. |
| 210 # Assume the last child li is the parent of this block. |
| 211 if sibling[-1].text: |
| 212 # If the parent li has text, that text needs to be moved to a p |
| 213 # The p must be 'inserted' at beginning of list in the event |
| 214 # that other children already exist i.e.; a nested sublist. |
| 215 p = util.etree.Element('p') |
| 216 p.text = sibling[-1].text |
| 217 sibling[-1].text = '' |
| 218 sibling[-1].insert(0, p) |
| 219 self.parser.parseChunk(sibling[-1], block) |
| 220 else: |
| 221 self.create_item(sibling, block) |
| 222 self.parser.state.reset() |
| 223 |
| 224 def create_item(self, parent, block): |
| 225 """ Create a new li and parse the block with it as the parent. """ |
| 226 li = util.etree.SubElement(parent, 'li') |
| 227 self.parser.parseBlocks(li, [block]) |
| 228 |
| 229 def get_level(self, parent, block): |
| 230 """ Get level of indent based on list level. """ |
| 231 # Get indent level |
| 232 m = self.INDENT_RE.match(block) |
| 233 if m: |
| 234 indent_level = len(m.group(1))/self.tab_length |
| 235 else: |
| 236 indent_level = 0 |
| 237 if self.parser.state.isstate('list'): |
| 238 # We're in a tightlist - so we already are at correct parent. |
| 239 level = 1 |
| 240 else: |
| 241 # We're in a looselist - so we need to find parent. |
| 242 level = 0 |
| 243 # Step through children of tree to find matching indent level. |
| 244 while indent_level > level: |
| 245 child = self.lastChild(parent) |
| 246 if child and (child.tag in self.LIST_TYPES or child.tag in self.ITEM
_TYPES): |
| 247 if child.tag in self.LIST_TYPES: |
| 248 level += 1 |
| 249 parent = child |
| 250 else: |
| 251 # No more child levels. If we're short of indent_level, |
| 252 # we have a code block. So we stop here. |
| 253 break |
| 254 return level, parent |
| 255 |
| 256 |
| 257 class CodeBlockProcessor(BlockProcessor): |
| 258 """ Process code blocks. """ |
| 259 |
| 260 def test(self, parent, block): |
| 261 return block.startswith(' '*self.tab_length) |
| 262 |
| 263 def run(self, parent, blocks): |
| 264 sibling = self.lastChild(parent) |
| 265 block = blocks.pop(0) |
| 266 theRest = '' |
| 267 if sibling and sibling.tag == "pre" and len(sibling) \ |
| 268 and sibling[0].tag == "code": |
| 269 # The previous block was a code block. As blank lines do not start |
| 270 # new code blocks, append this block to the previous, adding back |
| 271 # linebreaks removed from the split into a list. |
| 272 code = sibling[0] |
| 273 block, theRest = self.detab(block) |
| 274 code.text = util.AtomicString('%s\n%s\n' % (code.text, block.rstrip(
))) |
| 275 else: |
| 276 # This is a new codeblock. Create the elements and insert text. |
| 277 pre = util.etree.SubElement(parent, 'pre') |
| 278 code = util.etree.SubElement(pre, 'code') |
| 279 block, theRest = self.detab(block) |
| 280 code.text = util.AtomicString('%s\n' % block.rstrip()) |
| 281 if theRest: |
| 282 # This block contained unindented line(s) after the first indented |
| 283 # line. Insert these lines as the first block of the master blocks |
| 284 # list for future processing. |
| 285 blocks.insert(0, theRest) |
| 286 |
| 287 |
| 288 class BlockQuoteProcessor(BlockProcessor): |
| 289 |
| 290 RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)') |
| 291 |
| 292 def test(self, parent, block): |
| 293 return bool(self.RE.search(block)) |
| 294 |
| 295 def run(self, parent, blocks): |
| 296 block = blocks.pop(0) |
| 297 m = self.RE.search(block) |
| 298 if m: |
| 299 before = block[:m.start()] # Lines before blockquote |
| 300 # Pass lines before blockquote in recursively for parsing forst. |
| 301 self.parser.parseBlocks(parent, [before]) |
| 302 # Remove ``> `` from begining of each line. |
| 303 block = '\n'.join([self.clean(line) for line in |
| 304 block[m.start():].split('\n')]) |
| 305 sibling = self.lastChild(parent) |
| 306 if sibling and sibling.tag == "blockquote": |
| 307 # Previous block was a blockquote so set that as this blocks parent |
| 308 quote = sibling |
| 309 else: |
| 310 # This is a new blockquote. Create a new parent element. |
| 311 quote = util.etree.SubElement(parent, 'blockquote') |
| 312 # Recursively parse block with blockquote as parent. |
| 313 # change parser state so blockquotes embedded in lists use p tags |
| 314 self.parser.state.set('blockquote') |
| 315 self.parser.parseChunk(quote, block) |
| 316 self.parser.state.reset() |
| 317 |
| 318 def clean(self, line): |
| 319 """ Remove ``>`` from beginning of a line. """ |
| 320 m = self.RE.match(line) |
| 321 if line.strip() == ">": |
| 322 return "" |
| 323 elif m: |
| 324 return m.group(2) |
| 325 else: |
| 326 return line |
| 327 |
| 328 class OListProcessor(BlockProcessor): |
| 329 """ Process ordered list blocks. """ |
| 330 |
| 331 TAG = 'ol' |
| 332 # Detect an item (``1. item``). ``group(1)`` contains contents of item. |
| 333 RE = re.compile(r'^[ ]{0,3}\d+\.[ ]+(.*)') |
| 334 # Detect items on secondary lines. they can be of either list type. |
| 335 CHILD_RE = re.compile(r'^[ ]{0,3}((\d+\.)|[*+-])[ ]+(.*)') |
| 336 # Detect indented (nested) items of either type |
| 337 INDENT_RE = re.compile(r'^[ ]{4,7}((\d+\.)|[*+-])[ ]+.*') |
| 338 # The integer (python string) with which the lists starts (default=1) |
| 339 # Eg: If list is intialized as) |
| 340 # 3. Item |
| 341 # The ol tag will get starts="3" attribute |
| 342 STARTSWITH = '1' |
| 343 # List of allowed sibling tags. |
| 344 SIBLING_TAGS = ['ol', 'ul'] |
| 345 |
| 346 def test(self, parent, block): |
| 347 return bool(self.RE.match(block)) |
| 348 |
| 349 def run(self, parent, blocks): |
| 350 # Check fr multiple items in one block. |
| 351 items = self.get_items(blocks.pop(0)) |
| 352 sibling = self.lastChild(parent) |
| 353 |
| 354 if sibling and sibling.tag in self.SIBLING_TAGS: |
| 355 # Previous block was a list item, so set that as parent |
| 356 lst = sibling |
| 357 # make sure previous item is in a p- if the item has text, then it |
| 358 # it isn't in a p |
| 359 if lst[-1].text: |
| 360 # since it's possible there are other children for this sibling, |
| 361 # we can't just SubElement the p, we need to insert it as the |
| 362 # first item |
| 363 p = util.etree.Element('p') |
| 364 p.text = lst[-1].text |
| 365 lst[-1].text = '' |
| 366 lst[-1].insert(0, p) |
| 367 # if the last item has a tail, then the tail needs to be put in a p |
| 368 # likely only when a header is not followed by a blank line |
| 369 lch = self.lastChild(lst[-1]) |
| 370 if lch is not None and lch.tail: |
| 371 p = util.etree.SubElement(lst[-1], 'p') |
| 372 p.text = lch.tail.lstrip() |
| 373 lch.tail = '' |
| 374 |
| 375 # parse first block differently as it gets wrapped in a p. |
| 376 li = util.etree.SubElement(lst, 'li') |
| 377 self.parser.state.set('looselist') |
| 378 firstitem = items.pop(0) |
| 379 self.parser.parseBlocks(li, [firstitem]) |
| 380 self.parser.state.reset() |
| 381 elif parent.tag in ['ol', 'ul']: |
| 382 # this catches the edge case of a multi-item indented list whose |
| 383 # first item is in a blank parent-list item: |
| 384 # * * subitem1 |
| 385 # * subitem2 |
| 386 # see also ListIndentProcessor |
| 387 lst = parent |
| 388 else: |
| 389 # This is a new list so create parent with appropriate tag. |
| 390 lst = util.etree.SubElement(parent, self.TAG) |
| 391 # Check if a custom start integer is set |
| 392 if not self.parser.markdown.lazy_ol and self.STARTSWITH !='1': |
| 393 lst.attrib['start'] = self.STARTSWITH |
| 394 |
| 395 self.parser.state.set('list') |
| 396 # Loop through items in block, recursively parsing each with the |
| 397 # appropriate parent. |
| 398 for item in items: |
| 399 if item.startswith(' '*self.tab_length): |
| 400 # Item is indented. Parse with last item as parent |
| 401 self.parser.parseBlocks(lst[-1], [item]) |
| 402 else: |
| 403 # New item. Create li and parse with it as parent |
| 404 li = util.etree.SubElement(lst, 'li') |
| 405 self.parser.parseBlocks(li, [item]) |
| 406 self.parser.state.reset() |
| 407 |
| 408 def get_items(self, block): |
| 409 """ Break a block into list items. """ |
| 410 items = [] |
| 411 for line in block.split('\n'): |
| 412 m = self.CHILD_RE.match(line) |
| 413 if m: |
| 414 # This is a new list item |
| 415 # Check first item for the start index |
| 416 if not items and self.TAG=='ol': |
| 417 # Detect the integer value of first list item |
| 418 INTEGER_RE = re.compile('(\d+)') |
| 419 self.STARTSWITH = INTEGER_RE.match(m.group(1)).group() |
| 420 # Append to the list |
| 421 items.append(m.group(3)) |
| 422 elif self.INDENT_RE.match(line): |
| 423 # This is an indented (possibly nested) item. |
| 424 if items[-1].startswith(' '*self.tab_length): |
| 425 # Previous item was indented. Append to that item. |
| 426 items[-1] = '%s\n%s' % (items[-1], line) |
| 427 else: |
| 428 items.append(line) |
| 429 else: |
| 430 # This is another line of previous item. Append to that item. |
| 431 items[-1] = '%s\n%s' % (items[-1], line) |
| 432 return items |
| 433 |
| 434 |
| 435 class UListProcessor(OListProcessor): |
| 436 """ Process unordered list blocks. """ |
| 437 |
| 438 TAG = 'ul' |
| 439 RE = re.compile(r'^[ ]{0,3}[*+-][ ]+(.*)') |
| 440 |
| 441 |
| 442 class HashHeaderProcessor(BlockProcessor): |
| 443 """ Process Hash Headers. """ |
| 444 |
| 445 # Detect a header at start of any line in block |
| 446 RE = re.compile(r'(^|\n)(?P<level>#{1,6})(?P<header>.*?)#*(\n|$)') |
| 447 |
| 448 def test(self, parent, block): |
| 449 return bool(self.RE.search(block)) |
| 450 |
| 451 def run(self, parent, blocks): |
| 452 block = blocks.pop(0) |
| 453 m = self.RE.search(block) |
| 454 if m: |
| 455 before = block[:m.start()] # All lines before header |
| 456 after = block[m.end():] # All lines after header |
| 457 if before: |
| 458 # As the header was not the first line of the block and the |
| 459 # lines before the header must be parsed first, |
| 460 # recursively parse this lines as a block. |
| 461 self.parser.parseBlocks(parent, [before]) |
| 462 # Create header using named groups from RE |
| 463 h = util.etree.SubElement(parent, 'h%d' % len(m.group('level'))) |
| 464 h.text = m.group('header').strip() |
| 465 if after: |
| 466 # Insert remaining lines as first block for future parsing. |
| 467 blocks.insert(0, after) |
| 468 else: |
| 469 # This should never happen, but just in case... |
| 470 logger.warn("We've got a problem header: %r" % block) |
| 471 |
| 472 |
| 473 class SetextHeaderProcessor(BlockProcessor): |
| 474 """ Process Setext-style Headers. """ |
| 475 |
| 476 # Detect Setext-style header. Must be first 2 lines of block. |
| 477 RE = re.compile(r'^.*?\n[=-]+[ ]*(\n|$)', re.MULTILINE) |
| 478 |
| 479 def test(self, parent, block): |
| 480 return bool(self.RE.match(block)) |
| 481 |
| 482 def run(self, parent, blocks): |
| 483 lines = blocks.pop(0).split('\n') |
| 484 # Determine level. ``=`` is 1 and ``-`` is 2. |
| 485 if lines[1].startswith('='): |
| 486 level = 1 |
| 487 else: |
| 488 level = 2 |
| 489 h = util.etree.SubElement(parent, 'h%d' % level) |
| 490 h.text = lines[0].strip() |
| 491 if len(lines) > 2: |
| 492 # Block contains additional lines. Add to master blocks for later. |
| 493 blocks.insert(0, '\n'.join(lines[2:])) |
| 494 |
| 495 |
| 496 class HRProcessor(BlockProcessor): |
| 497 """ Process Horizontal Rules. """ |
| 498 |
| 499 RE = r'^[ ]{0,3}((-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,})[ ]*' |
| 500 # Detect hr on any line of a block. |
| 501 SEARCH_RE = re.compile(RE, re.MULTILINE) |
| 502 |
| 503 def test(self, parent, block): |
| 504 m = self.SEARCH_RE.search(block) |
| 505 # No atomic grouping in python so we simulate it here for performance. |
| 506 # The regex only matches what would be in the atomic group - the HR. |
| 507 # Then check if we are at end of block or if next char is a newline. |
| 508 if m and (m.end() == len(block) or block[m.end()] == '\n'): |
| 509 # Save match object on class instance so we can use it later. |
| 510 self.match = m |
| 511 return True |
| 512 return False |
| 513 |
| 514 def run(self, parent, blocks): |
| 515 block = blocks.pop(0) |
| 516 # Check for lines in block before hr. |
| 517 prelines = block[:self.match.start()].rstrip('\n') |
| 518 if prelines: |
| 519 # Recursively parse lines before hr so they get parsed first. |
| 520 self.parser.parseBlocks(parent, [prelines]) |
| 521 # create hr |
| 522 util.etree.SubElement(parent, 'hr') |
| 523 # check for lines in block after hr. |
| 524 postlines = block[self.match.end():].lstrip('\n') |
| 525 if postlines: |
| 526 # Add lines after hr to master blocks for later parsing. |
| 527 blocks.insert(0, postlines) |
| 528 |
| 529 |
| 530 |
| 531 class EmptyBlockProcessor(BlockProcessor): |
| 532 """ Process blocks that are empty or start with an empty line. """ |
| 533 |
| 534 def test(self, parent, block): |
| 535 return not block or block.startswith('\n') |
| 536 |
| 537 def run(self, parent, blocks): |
| 538 block = blocks.pop(0) |
| 539 filler = '\n\n' |
| 540 if block: |
| 541 # Starts with empty line |
| 542 # Only replace a single line. |
| 543 filler = '\n' |
| 544 # Save the rest for later. |
| 545 theRest = block[1:] |
| 546 if theRest: |
| 547 # Add remaining lines to master blocks for later. |
| 548 blocks.insert(0, theRest) |
| 549 sibling = self.lastChild(parent) |
| 550 if sibling and sibling.tag == 'pre' and len(sibling) and sibling[0].tag
== 'code': |
| 551 # Last block is a codeblock. Append to preserve whitespace. |
| 552 sibling[0].text = util.AtomicString('%s%s' % (sibling[0].text, fille
r)) |
| 553 |
| 554 |
| 555 class ParagraphProcessor(BlockProcessor): |
| 556 """ Process Paragraph blocks. """ |
| 557 |
| 558 def test(self, parent, block): |
| 559 return True |
| 560 |
| 561 def run(self, parent, blocks): |
| 562 block = blocks.pop(0) |
| 563 if block.strip(): |
| 564 # Not a blank block. Add to parent, otherwise throw it away. |
| 565 if self.parser.state.isstate('list'): |
| 566 # The parent is a tight-list. |
| 567 # |
| 568 # Check for any children. This will likely only happen in a |
| 569 # tight-list when a header isn't followed by a blank line. |
| 570 # For example: |
| 571 # |
| 572 # * # Header |
| 573 # Line 2 of list item - not part of header. |
| 574 sibling = self.lastChild(parent) |
| 575 if sibling is not None: |
| 576 # Insetrt after sibling. |
| 577 if sibling.tail: |
| 578 sibling.tail = '%s\n%s' % (sibling.tail, block) |
| 579 else: |
| 580 sibling.tail = '\n%s' % block |
| 581 else: |
| 582 # Append to parent.text |
| 583 if parent.text: |
| 584 parent.text = '%s\n%s' % (parent.text, block) |
| 585 else: |
| 586 parent.text = block.lstrip() |
| 587 else: |
| 588 # Create a regular paragraph |
| 589 p = util.etree.SubElement(parent, 'p') |
| 590 p.text = block.lstrip() |
OLD | NEW |