OLD | NEW |
1 # markdown is released under the BSD license | |
2 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later) | |
3 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) | |
4 # Copyright 2004 Manfred Stienstra (the original version) | |
5 # | |
6 # All rights reserved. | |
7 # | |
8 # Redistribution and use in source and binary forms, with or without | |
9 # modification, are permitted provided that the following conditions are met: | |
10 # | |
11 # * Redistributions of source code must retain the above copyright | |
12 # notice, this list of conditions and the following disclaimer. | |
13 # * Redistributions in binary form must reproduce the above copyright | |
14 # notice, this list of conditions and the following disclaimer in the | |
15 # documentation and/or other materials provided with the distribution. | |
16 # * Neither the name of the <organization> nor the | |
17 # names of its contributors may be used to endorse or promote products | |
18 # derived from this software without specific prior written permission. | |
19 # | |
20 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY | |
21 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
22 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
23 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT | |
24 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
30 # POSSIBILITY OF SUCH DAMAGE. | |
31 | |
32 | |
33 """ | 1 """ |
34 CORE MARKDOWN BLOCKPARSER | 2 CORE MARKDOWN BLOCKPARSER |
35 =========================================================================== | 3 =========================================================================== |
36 | 4 |
37 This parser handles basic parsing of Markdown blocks. It doesn't concern itself | 5 This parser handles basic parsing of Markdown blocks. It doesn't concern |
38 with inline elements such as **bold** or *italics*, but rather just catches | 6 itself with inline elements such as **bold** or *italics*, but rather just |
39 blocks, lists, quotes, etc. | 7 catches blocks, lists, quotes, etc. |
40 | 8 |
41 The BlockParser is made up of a bunch of BlockProssors, each handling a | 9 The BlockParser is made up of a bunch of BlockProssors, each handling a |
42 different type of block. Extensions may add/replace/remove BlockProcessors | 10 different type of block. Extensions may add/replace/remove BlockProcessors |
43 as they need to alter how markdown blocks are parsed. | 11 as they need to alter how markdown blocks are parsed. |
44 """ | 12 """ |
45 | 13 |
46 from __future__ import absolute_import | 14 from __future__ import absolute_import |
47 from __future__ import division | 15 from __future__ import division |
48 from __future__ import unicode_literals | 16 from __future__ import unicode_literals |
49 import logging | 17 import logging |
50 import re | 18 import re |
51 from . import util | 19 from . import util |
52 from .blockparser import BlockParser | 20 from .blockparser import BlockParser |
53 | 21 |
54 logger = logging.getLogger('MARKDOWN') | 22 logger = logging.getLogger('MARKDOWN') |
55 | 23 |
56 | 24 |
57 def build_block_parser(md_instance, **kwargs): | 25 def build_block_parser(md_instance, **kwargs): |
58 """ Build the default block parser used by Markdown. """ | 26 """ Build the default block parser used by Markdown. """ |
59 parser = BlockParser(md_instance) | 27 parser = BlockParser(md_instance) |
60 parser.blockprocessors['empty'] = EmptyBlockProcessor(parser) | 28 parser.blockprocessors['empty'] = EmptyBlockProcessor(parser) |
61 parser.blockprocessors['indent'] = ListIndentProcessor(parser) | 29 parser.blockprocessors['indent'] = ListIndentProcessor(parser) |
62 parser.blockprocessors['code'] = CodeBlockProcessor(parser) | 30 parser.blockprocessors['code'] = CodeBlockProcessor(parser) |
63 parser.blockprocessors['hashheader'] = HashHeaderProcessor(parser) | 31 parser.blockprocessors['hashheader'] = HashHeaderProcessor(parser) |
64 parser.blockprocessors['setextheader'] = SetextHeaderProcessor(parser) | 32 parser.blockprocessors['setextheader'] = SetextHeaderProcessor(parser) |
65 parser.blockprocessors['hr'] = HRProcessor(parser) | 33 parser.blockprocessors['hr'] = HRProcessor(parser) |
66 parser.blockprocessors['olist'] = OListProcessor(parser) | 34 parser.blockprocessors['olist'] = OListProcessor(parser) |
67 parser.blockprocessors['ulist'] = UListProcessor(parser) | 35 parser.blockprocessors['ulist'] = UListProcessor(parser) |
68 parser.blockprocessors['quote'] = BlockQuoteProcessor(parser) | 36 parser.blockprocessors['quote'] = BlockQuoteProcessor(parser) |
69 parser.blockprocessors['paragraph'] = ParagraphProcessor(parser) | 37 parser.blockprocessors['paragraph'] = ParagraphProcessor(parser) |
70 return parser | 38 return parser |
71 | 39 |
72 | 40 |
73 class BlockProcessor: | 41 class BlockProcessor: |
74 """ Base class for block processors. | 42 """ Base class for block processors. |
75 | 43 |
76 Each subclass will provide the methods below to work with the source and | 44 Each subclass will provide the methods below to work with the source and |
77 tree. Each processor will need to define it's own ``test`` and ``run`` | 45 tree. Each processor will need to define it's own ``test`` and ``run`` |
78 methods. The ``test`` method should return True or False, to indicate | 46 methods. The ``test`` method should return True or False, to indicate |
79 whether the current block should be processed by this processor. If the | 47 whether the current block should be processed by this processor. If the |
80 test passes, the parser will call the processors ``run`` method. | 48 test passes, the parser will call the processors ``run`` method. |
81 | 49 |
82 """ | 50 """ |
83 | 51 |
84 def __init__(self, parser): | 52 def __init__(self, parser): |
85 self.parser = parser | 53 self.parser = parser |
(...skipping 21 matching lines...) Expand all Loading... |
107 | 75 |
108 def looseDetab(self, text, level=1): | 76 def looseDetab(self, text, level=1): |
109 """ Remove a tab from front of lines but allowing dedented lines. """ | 77 """ Remove a tab from front of lines but allowing dedented lines. """ |
110 lines = text.split('\n') | 78 lines = text.split('\n') |
111 for i in range(len(lines)): | 79 for i in range(len(lines)): |
112 if lines[i].startswith(' '*self.tab_length*level): | 80 if lines[i].startswith(' '*self.tab_length*level): |
113 lines[i] = lines[i][self.tab_length*level:] | 81 lines[i] = lines[i][self.tab_length*level:] |
114 return '\n'.join(lines) | 82 return '\n'.join(lines) |
115 | 83 |
116 def test(self, parent, block): | 84 def test(self, parent, block): |
117 """ Test for block type. Must be overridden by subclasses. | 85 """ Test for block type. Must be overridden by subclasses. |
118 | 86 |
119 As the parser loops through processors, it will call the ``test`` method | 87 As the parser loops through processors, it will call the ``test`` |
120 on each to determine if the given block of text is of that type. This | 88 method on each to determine if the given block of text is of that |
121 method must return a boolean ``True`` or ``False``. The actual method of | 89 type. This method must return a boolean ``True`` or ``False``. The |
122 testing is left to the needs of that particular block type. It could | 90 actual method of testing is left to the needs of that particular |
123 be as simple as ``block.startswith(some_string)`` or a complex regular | 91 block type. It could be as simple as ``block.startswith(some_string)`` |
124 expression. As the block type may be different depending on the parent | 92 or a complex regular expression. As the block type may be different |
125 of the block (i.e. inside a list), the parent etree element is also | 93 depending on the parent of the block (i.e. inside a list), the parent |
126 provided and may be used as part of the test. | 94 etree element is also provided and may be used as part of the test. |
127 | 95 |
128 Keywords: | 96 Keywords: |
129 | 97 |
130 * ``parent``: A etree element which will be the parent of the block. | 98 * ``parent``: A etree element which will be the parent of the block. |
131 * ``block``: A block of text from the source which has been split at | 99 * ``block``: A block of text from the source which has been split at |
132 blank lines. | 100 blank lines. |
133 """ | 101 """ |
134 pass | 102 pass # pragma: no cover |
135 | 103 |
136 def run(self, parent, blocks): | 104 def run(self, parent, blocks): |
137 """ Run processor. Must be overridden by subclasses. | 105 """ Run processor. Must be overridden by subclasses. |
138 | 106 |
139 When the parser determines the appropriate type of a block, the parser | 107 When the parser determines the appropriate type of a block, the parser |
140 will call the corresponding processor's ``run`` method. This method | 108 will call the corresponding processor's ``run`` method. This method |
141 should parse the individual lines of the block and append them to | 109 should parse the individual lines of the block and append them to |
142 the etree. | 110 the etree. |
143 | 111 |
144 Note that both the ``parent`` and ``etree`` keywords are pointers | 112 Note that both the ``parent`` and ``etree`` keywords are pointers |
145 to instances of the objects which should be edited in place. Each | 113 to instances of the objects which should be edited in place. Each |
146 processor must make changes to the existing objects as there is no | 114 processor must make changes to the existing objects as there is no |
147 mechanism to return new/different objects to replace them. | 115 mechanism to return new/different objects to replace them. |
148 | 116 |
149 This means that this method should be adding SubElements or adding text | 117 This means that this method should be adding SubElements or adding text |
150 to the parent, and should remove (``pop``) or add (``insert``) items to | 118 to the parent, and should remove (``pop``) or add (``insert``) items to |
151 the list of blocks. | 119 the list of blocks. |
152 | 120 |
153 Keywords: | 121 Keywords: |
154 | 122 |
155 * ``parent``: A etree element which is the parent of the current block. | 123 * ``parent``: A etree element which is the parent of the current block. |
156 * ``blocks``: A list of all remaining blocks of the document. | 124 * ``blocks``: A list of all remaining blocks of the document. |
157 """ | 125 """ |
158 pass | 126 pass # pragma: no cover |
159 | 127 |
160 | 128 |
161 class ListIndentProcessor(BlockProcessor): | 129 class ListIndentProcessor(BlockProcessor): |
162 """ Process children of list items. | 130 """ Process children of list items. |
163 | 131 |
164 Example: | 132 Example: |
165 * a list item | 133 * a list item |
166 process this part | 134 process this part |
167 | 135 |
168 or this part | 136 or this part |
169 | 137 |
170 """ | 138 """ |
171 | 139 |
172 ITEM_TYPES = ['li'] | 140 ITEM_TYPES = ['li'] |
173 LIST_TYPES = ['ul', 'ol'] | 141 LIST_TYPES = ['ul', 'ol'] |
174 | 142 |
175 def __init__(self, *args): | 143 def __init__(self, *args): |
176 BlockProcessor.__init__(self, *args) | 144 BlockProcessor.__init__(self, *args) |
177 self.INDENT_RE = re.compile(r'^(([ ]{%s})+)'% self.tab_length) | 145 self.INDENT_RE = re.compile(r'^(([ ]{%s})+)' % self.tab_length) |
178 | 146 |
179 def test(self, parent, block): | 147 def test(self, parent, block): |
180 return block.startswith(' '*self.tab_length) and \ | 148 return block.startswith(' '*self.tab_length) and \ |
181 not self.parser.state.isstate('detabbed') and \ | 149 not self.parser.state.isstate('detabbed') and \ |
182 (parent.tag in self.ITEM_TYPES or \ | 150 (parent.tag in self.ITEM_TYPES or |
183 (len(parent) and parent[-1] and \ | 151 (len(parent) and parent[-1] is not None and |
184 (parent[-1].tag in self.LIST_TYPES) | 152 (parent[-1].tag in self.LIST_TYPES))) |
185 ) | |
186 ) | |
187 | 153 |
188 def run(self, parent, blocks): | 154 def run(self, parent, blocks): |
189 block = blocks.pop(0) | 155 block = blocks.pop(0) |
190 level, sibling = self.get_level(parent, block) | 156 level, sibling = self.get_level(parent, block) |
191 block = self.looseDetab(block, level) | 157 block = self.looseDetab(block, level) |
192 | 158 |
193 self.parser.state.set('detabbed') | 159 self.parser.state.set('detabbed') |
194 if parent.tag in self.ITEM_TYPES: | 160 if parent.tag in self.ITEM_TYPES: |
195 # It's possible that this parent has a 'ul' or 'ol' child list | 161 # It's possible that this parent has a 'ul' or 'ol' child list |
196 # with a member. If that is the case, then that should be the | 162 # with a member. If that is the case, then that should be the |
197 # parent. This is intended to catch the edge case of an indented | 163 # parent. This is intended to catch the edge case of an indented |
198 # list whose first member was parsed previous to this point | 164 # list whose first member was parsed previous to this point |
199 # see OListProcessor | 165 # see OListProcessor |
200 if len(parent) and parent[-1].tag in self.LIST_TYPES: | 166 if len(parent) and parent[-1].tag in self.LIST_TYPES: |
201 self.parser.parseBlocks(parent[-1], [block]) | 167 self.parser.parseBlocks(parent[-1], [block]) |
202 else: | 168 else: |
203 # The parent is already a li. Just parse the child block. | 169 # The parent is already a li. Just parse the child block. |
204 self.parser.parseBlocks(parent, [block]) | 170 self.parser.parseBlocks(parent, [block]) |
205 elif sibling.tag in self.ITEM_TYPES: | 171 elif sibling.tag in self.ITEM_TYPES: |
206 # The sibling is a li. Use it as parent. | 172 # The sibling is a li. Use it as parent. |
207 self.parser.parseBlocks(sibling, [block]) | 173 self.parser.parseBlocks(sibling, [block]) |
(...skipping 10 matching lines...) Expand all Loading... |
218 sibling[-1].insert(0, p) | 184 sibling[-1].insert(0, p) |
219 self.parser.parseChunk(sibling[-1], block) | 185 self.parser.parseChunk(sibling[-1], block) |
220 else: | 186 else: |
221 self.create_item(sibling, block) | 187 self.create_item(sibling, block) |
222 self.parser.state.reset() | 188 self.parser.state.reset() |
223 | 189 |
224 def create_item(self, parent, block): | 190 def create_item(self, parent, block): |
225 """ Create a new li and parse the block with it as the parent. """ | 191 """ Create a new li and parse the block with it as the parent. """ |
226 li = util.etree.SubElement(parent, 'li') | 192 li = util.etree.SubElement(parent, 'li') |
227 self.parser.parseBlocks(li, [block]) | 193 self.parser.parseBlocks(li, [block]) |
228 | 194 |
229 def get_level(self, parent, block): | 195 def get_level(self, parent, block): |
230 """ Get level of indent based on list level. """ | 196 """ Get level of indent based on list level. """ |
231 # Get indent level | 197 # Get indent level |
232 m = self.INDENT_RE.match(block) | 198 m = self.INDENT_RE.match(block) |
233 if m: | 199 if m: |
234 indent_level = len(m.group(1))/self.tab_length | 200 indent_level = len(m.group(1))/self.tab_length |
235 else: | 201 else: |
236 indent_level = 0 | 202 indent_level = 0 |
237 if self.parser.state.isstate('list'): | 203 if self.parser.state.isstate('list'): |
238 # We're in a tightlist - so we already are at correct parent. | 204 # We're in a tightlist - so we already are at correct parent. |
239 level = 1 | 205 level = 1 |
240 else: | 206 else: |
241 # We're in a looselist - so we need to find parent. | 207 # We're in a looselist - so we need to find parent. |
242 level = 0 | 208 level = 0 |
243 # Step through children of tree to find matching indent level. | 209 # Step through children of tree to find matching indent level. |
244 while indent_level > level: | 210 while indent_level > level: |
245 child = self.lastChild(parent) | 211 child = self.lastChild(parent) |
246 if child and (child.tag in self.LIST_TYPES or child.tag in self.ITEM
_TYPES): | 212 if (child is not None and |
| 213 (child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES)): |
247 if child.tag in self.LIST_TYPES: | 214 if child.tag in self.LIST_TYPES: |
248 level += 1 | 215 level += 1 |
249 parent = child | 216 parent = child |
250 else: | 217 else: |
251 # No more child levels. If we're short of indent_level, | 218 # No more child levels. If we're short of indent_level, |
252 # we have a code block. So we stop here. | 219 # we have a code block. So we stop here. |
253 break | 220 break |
254 return level, parent | 221 return level, parent |
255 | 222 |
256 | 223 |
257 class CodeBlockProcessor(BlockProcessor): | 224 class CodeBlockProcessor(BlockProcessor): |
258 """ Process code blocks. """ | 225 """ Process code blocks. """ |
259 | 226 |
260 def test(self, parent, block): | 227 def test(self, parent, block): |
261 return block.startswith(' '*self.tab_length) | 228 return block.startswith(' '*self.tab_length) |
262 | 229 |
263 def run(self, parent, blocks): | 230 def run(self, parent, blocks): |
264 sibling = self.lastChild(parent) | 231 sibling = self.lastChild(parent) |
265 block = blocks.pop(0) | 232 block = blocks.pop(0) |
266 theRest = '' | 233 theRest = '' |
267 if sibling and sibling.tag == "pre" and len(sibling) \ | 234 if (sibling is not None and sibling.tag == "pre" and |
268 and sibling[0].tag == "code": | 235 len(sibling) and sibling[0].tag == "code"): |
269 # The previous block was a code block. As blank lines do not start | 236 # The previous block was a code block. As blank lines do not start |
270 # new code blocks, append this block to the previous, adding back | 237 # new code blocks, append this block to the previous, adding back |
271 # linebreaks removed from the split into a list. | 238 # linebreaks removed from the split into a list. |
272 code = sibling[0] | 239 code = sibling[0] |
273 block, theRest = self.detab(block) | 240 block, theRest = self.detab(block) |
274 code.text = util.AtomicString('%s\n%s\n' % (code.text, block.rstrip(
))) | 241 code.text = util.AtomicString( |
| 242 '%s\n%s\n' % (code.text, block.rstrip()) |
| 243 ) |
275 else: | 244 else: |
276 # This is a new codeblock. Create the elements and insert text. | 245 # This is a new codeblock. Create the elements and insert text. |
277 pre = util.etree.SubElement(parent, 'pre') | 246 pre = util.etree.SubElement(parent, 'pre') |
278 code = util.etree.SubElement(pre, 'code') | 247 code = util.etree.SubElement(pre, 'code') |
279 block, theRest = self.detab(block) | 248 block, theRest = self.detab(block) |
280 code.text = util.AtomicString('%s\n' % block.rstrip()) | 249 code.text = util.AtomicString('%s\n' % block.rstrip()) |
281 if theRest: | 250 if theRest: |
282 # This block contained unindented line(s) after the first indented | 251 # This block contained unindented line(s) after the first indented |
283 # line. Insert these lines as the first block of the master blocks | 252 # line. Insert these lines as the first block of the master blocks |
284 # list for future processing. | 253 # list for future processing. |
285 blocks.insert(0, theRest) | 254 blocks.insert(0, theRest) |
286 | 255 |
287 | 256 |
288 class BlockQuoteProcessor(BlockProcessor): | 257 class BlockQuoteProcessor(BlockProcessor): |
289 | 258 |
290 RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)') | 259 RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)') |
291 | 260 |
292 def test(self, parent, block): | 261 def test(self, parent, block): |
293 return bool(self.RE.search(block)) | 262 return bool(self.RE.search(block)) |
294 | 263 |
295 def run(self, parent, blocks): | 264 def run(self, parent, blocks): |
296 block = blocks.pop(0) | 265 block = blocks.pop(0) |
297 m = self.RE.search(block) | 266 m = self.RE.search(block) |
298 if m: | 267 if m: |
299 before = block[:m.start()] # Lines before blockquote | 268 before = block[:m.start()] # Lines before blockquote |
300 # Pass lines before blockquote in recursively for parsing forst. | 269 # Pass lines before blockquote in recursively for parsing forst. |
301 self.parser.parseBlocks(parent, [before]) | 270 self.parser.parseBlocks(parent, [before]) |
302 # Remove ``> `` from begining of each line. | 271 # Remove ``> `` from begining of each line. |
303 block = '\n'.join([self.clean(line) for line in | 272 block = '\n'.join( |
304 block[m.start():].split('\n')]) | 273 [self.clean(line) for line in block[m.start():].split('\n')] |
| 274 ) |
305 sibling = self.lastChild(parent) | 275 sibling = self.lastChild(parent) |
306 if sibling and sibling.tag == "blockquote": | 276 if sibling is not None and sibling.tag == "blockquote": |
307 # Previous block was a blockquote so set that as this blocks parent | 277 # Previous block was a blockquote so set that as this blocks parent |
308 quote = sibling | 278 quote = sibling |
309 else: | 279 else: |
310 # This is a new blockquote. Create a new parent element. | 280 # This is a new blockquote. Create a new parent element. |
311 quote = util.etree.SubElement(parent, 'blockquote') | 281 quote = util.etree.SubElement(parent, 'blockquote') |
312 # Recursively parse block with blockquote as parent. | 282 # Recursively parse block with blockquote as parent. |
313 # change parser state so blockquotes embedded in lists use p tags | 283 # change parser state so blockquotes embedded in lists use p tags |
314 self.parser.state.set('blockquote') | 284 self.parser.state.set('blockquote') |
315 self.parser.parseChunk(quote, block) | 285 self.parser.parseChunk(quote, block) |
316 self.parser.state.reset() | 286 self.parser.state.reset() |
317 | 287 |
318 def clean(self, line): | 288 def clean(self, line): |
319 """ Remove ``>`` from beginning of a line. """ | 289 """ Remove ``>`` from beginning of a line. """ |
320 m = self.RE.match(line) | 290 m = self.RE.match(line) |
321 if line.strip() == ">": | 291 if line.strip() == ">": |
322 return "" | 292 return "" |
323 elif m: | 293 elif m: |
324 return m.group(2) | 294 return m.group(2) |
325 else: | 295 else: |
326 return line | 296 return line |
327 | 297 |
| 298 |
328 class OListProcessor(BlockProcessor): | 299 class OListProcessor(BlockProcessor): |
329 """ Process ordered list blocks. """ | 300 """ Process ordered list blocks. """ |
330 | 301 |
331 TAG = 'ol' | 302 TAG = 'ol' |
332 # Detect an item (``1. item``). ``group(1)`` contains contents of item. | 303 # Detect an item (``1. item``). ``group(1)`` contains contents of item. |
333 RE = re.compile(r'^[ ]{0,3}\d+\.[ ]+(.*)') | 304 RE = re.compile(r'^[ ]{0,3}\d+\.[ ]+(.*)') |
334 # Detect items on secondary lines. they can be of either list type. | 305 # Detect items on secondary lines. they can be of either list type. |
335 CHILD_RE = re.compile(r'^[ ]{0,3}((\d+\.)|[*+-])[ ]+(.*)') | 306 CHILD_RE = re.compile(r'^[ ]{0,3}((\d+\.)|[*+-])[ ]+(.*)') |
336 # Detect indented (nested) items of either type | 307 # Detect indented (nested) items of either type |
337 INDENT_RE = re.compile(r'^[ ]{4,7}((\d+\.)|[*+-])[ ]+.*') | 308 INDENT_RE = re.compile(r'^[ ]{4,7}((\d+\.)|[*+-])[ ]+.*') |
338 # The integer (python string) with which the lists starts (default=1) | 309 # The integer (python string) with which the lists starts (default=1) |
339 # Eg: If list is intialized as) | 310 # Eg: If list is intialized as) |
340 # 3. Item | 311 # 3. Item |
341 # The ol tag will get starts="3" attribute | 312 # The ol tag will get starts="3" attribute |
342 STARTSWITH = '1' | 313 STARTSWITH = '1' |
343 # List of allowed sibling tags. | 314 # List of allowed sibling tags. |
344 SIBLING_TAGS = ['ol', 'ul'] | 315 SIBLING_TAGS = ['ol', 'ul'] |
345 | 316 |
346 def test(self, parent, block): | 317 def test(self, parent, block): |
347 return bool(self.RE.match(block)) | 318 return bool(self.RE.match(block)) |
348 | 319 |
349 def run(self, parent, blocks): | 320 def run(self, parent, blocks): |
350 # Check fr multiple items in one block. | 321 # Check fr multiple items in one block. |
351 items = self.get_items(blocks.pop(0)) | 322 items = self.get_items(blocks.pop(0)) |
352 sibling = self.lastChild(parent) | 323 sibling = self.lastChild(parent) |
353 | 324 |
354 if sibling and sibling.tag in self.SIBLING_TAGS: | 325 if sibling is not None and sibling.tag in self.SIBLING_TAGS: |
355 # Previous block was a list item, so set that as parent | 326 # Previous block was a list item, so set that as parent |
356 lst = sibling | 327 lst = sibling |
357 # make sure previous item is in a p- if the item has text, then it | 328 # make sure previous item is in a p- if the item has text, |
358 # it isn't in a p | 329 # then it isn't in a p |
359 if lst[-1].text: | 330 if lst[-1].text: |
360 # since it's possible there are other children for this sibling, | 331 # since it's possible there are other children for this |
361 # we can't just SubElement the p, we need to insert it as the | 332 # sibling, we can't just SubElement the p, we need to |
362 # first item | 333 # insert it as the first item. |
363 p = util.etree.Element('p') | 334 p = util.etree.Element('p') |
364 p.text = lst[-1].text | 335 p.text = lst[-1].text |
365 lst[-1].text = '' | 336 lst[-1].text = '' |
366 lst[-1].insert(0, p) | 337 lst[-1].insert(0, p) |
367 # if the last item has a tail, then the tail needs to be put in a p | 338 # if the last item has a tail, then the tail needs to be put in a p |
368 # likely only when a header is not followed by a blank line | 339 # likely only when a header is not followed by a blank line |
369 lch = self.lastChild(lst[-1]) | 340 lch = self.lastChild(lst[-1]) |
370 if lch is not None and lch.tail: | 341 if lch is not None and lch.tail: |
371 p = util.etree.SubElement(lst[-1], 'p') | 342 p = util.etree.SubElement(lst[-1], 'p') |
372 p.text = lch.tail.lstrip() | 343 p.text = lch.tail.lstrip() |
373 lch.tail = '' | 344 lch.tail = '' |
374 | 345 |
375 # parse first block differently as it gets wrapped in a p. | 346 # parse first block differently as it gets wrapped in a p. |
376 li = util.etree.SubElement(lst, 'li') | 347 li = util.etree.SubElement(lst, 'li') |
377 self.parser.state.set('looselist') | 348 self.parser.state.set('looselist') |
378 firstitem = items.pop(0) | 349 firstitem = items.pop(0) |
379 self.parser.parseBlocks(li, [firstitem]) | 350 self.parser.parseBlocks(li, [firstitem]) |
380 self.parser.state.reset() | 351 self.parser.state.reset() |
381 elif parent.tag in ['ol', 'ul']: | 352 elif parent.tag in ['ol', 'ul']: |
382 # this catches the edge case of a multi-item indented list whose | 353 # this catches the edge case of a multi-item indented list whose |
383 # first item is in a blank parent-list item: | 354 # first item is in a blank parent-list item: |
384 # * * subitem1 | 355 # * * subitem1 |
385 # * subitem2 | 356 # * subitem2 |
386 # see also ListIndentProcessor | 357 # see also ListIndentProcessor |
387 lst = parent | 358 lst = parent |
388 else: | 359 else: |
389 # This is a new list so create parent with appropriate tag. | 360 # This is a new list so create parent with appropriate tag. |
390 lst = util.etree.SubElement(parent, self.TAG) | 361 lst = util.etree.SubElement(parent, self.TAG) |
391 # Check if a custom start integer is set | 362 # Check if a custom start integer is set |
392 if not self.parser.markdown.lazy_ol and self.STARTSWITH !='1': | 363 if not self.parser.markdown.lazy_ol and self.STARTSWITH != '1': |
393 lst.attrib['start'] = self.STARTSWITH | 364 lst.attrib['start'] = self.STARTSWITH |
394 | 365 |
395 self.parser.state.set('list') | 366 self.parser.state.set('list') |
396 # Loop through items in block, recursively parsing each with the | 367 # Loop through items in block, recursively parsing each with the |
397 # appropriate parent. | 368 # appropriate parent. |
398 for item in items: | 369 for item in items: |
399 if item.startswith(' '*self.tab_length): | 370 if item.startswith(' '*self.tab_length): |
400 # Item is indented. Parse with last item as parent | 371 # Item is indented. Parse with last item as parent |
401 self.parser.parseBlocks(lst[-1], [item]) | 372 self.parser.parseBlocks(lst[-1], [item]) |
402 else: | 373 else: |
403 # New item. Create li and parse with it as parent | 374 # New item. Create li and parse with it as parent |
404 li = util.etree.SubElement(lst, 'li') | 375 li = util.etree.SubElement(lst, 'li') |
405 self.parser.parseBlocks(li, [item]) | 376 self.parser.parseBlocks(li, [item]) |
406 self.parser.state.reset() | 377 self.parser.state.reset() |
407 | 378 |
408 def get_items(self, block): | 379 def get_items(self, block): |
409 """ Break a block into list items. """ | 380 """ Break a block into list items. """ |
410 items = [] | 381 items = [] |
411 for line in block.split('\n'): | 382 for line in block.split('\n'): |
412 m = self.CHILD_RE.match(line) | 383 m = self.CHILD_RE.match(line) |
413 if m: | 384 if m: |
414 # This is a new list item | 385 # This is a new list item |
415 # Check first item for the start index | 386 # Check first item for the start index |
416 if not items and self.TAG=='ol': | 387 if not items and self.TAG == 'ol': |
417 # Detect the integer value of first list item | 388 # Detect the integer value of first list item |
418 INTEGER_RE = re.compile('(\d+)') | 389 INTEGER_RE = re.compile('(\d+)') |
419 self.STARTSWITH = INTEGER_RE.match(m.group(1)).group() | 390 self.STARTSWITH = INTEGER_RE.match(m.group(1)).group() |
420 # Append to the list | 391 # Append to the list |
421 items.append(m.group(3)) | 392 items.append(m.group(3)) |
422 elif self.INDENT_RE.match(line): | 393 elif self.INDENT_RE.match(line): |
423 # This is an indented (possibly nested) item. | 394 # This is an indented (possibly nested) item. |
424 if items[-1].startswith(' '*self.tab_length): | 395 if items[-1].startswith(' '*self.tab_length): |
425 # Previous item was indented. Append to that item. | 396 # Previous item was indented. Append to that item. |
426 items[-1] = '%s\n%s' % (items[-1], line) | 397 items[-1] = '%s\n%s' % (items[-1], line) |
(...skipping 18 matching lines...) Expand all Loading... |
445 # Detect a header at start of any line in block | 416 # Detect a header at start of any line in block |
446 RE = re.compile(r'(^|\n)(?P<level>#{1,6})(?P<header>.*?)#*(\n|$)') | 417 RE = re.compile(r'(^|\n)(?P<level>#{1,6})(?P<header>.*?)#*(\n|$)') |
447 | 418 |
448 def test(self, parent, block): | 419 def test(self, parent, block): |
449 return bool(self.RE.search(block)) | 420 return bool(self.RE.search(block)) |
450 | 421 |
451 def run(self, parent, blocks): | 422 def run(self, parent, blocks): |
452 block = blocks.pop(0) | 423 block = blocks.pop(0) |
453 m = self.RE.search(block) | 424 m = self.RE.search(block) |
454 if m: | 425 if m: |
455 before = block[:m.start()] # All lines before header | 426 before = block[:m.start()] # All lines before header |
456 after = block[m.end():] # All lines after header | 427 after = block[m.end():] # All lines after header |
457 if before: | 428 if before: |
458 # As the header was not the first line of the block and the | 429 # As the header was not the first line of the block and the |
459 # lines before the header must be parsed first, | 430 # lines before the header must be parsed first, |
460 # recursively parse this lines as a block. | 431 # recursively parse this lines as a block. |
461 self.parser.parseBlocks(parent, [before]) | 432 self.parser.parseBlocks(parent, [before]) |
462 # Create header using named groups from RE | 433 # Create header using named groups from RE |
463 h = util.etree.SubElement(parent, 'h%d' % len(m.group('level'))) | 434 h = util.etree.SubElement(parent, 'h%d' % len(m.group('level'))) |
464 h.text = m.group('header').strip() | 435 h.text = m.group('header').strip() |
465 if after: | 436 if after: |
466 # Insert remaining lines as first block for future parsing. | 437 # Insert remaining lines as first block for future parsing. |
467 blocks.insert(0, after) | 438 blocks.insert(0, after) |
468 else: | 439 else: # pragma: no cover |
469 # This should never happen, but just in case... | 440 # This should never happen, but just in case... |
470 logger.warn("We've got a problem header: %r" % block) | 441 logger.warn("We've got a problem header: %r" % block) |
471 | 442 |
472 | 443 |
473 class SetextHeaderProcessor(BlockProcessor): | 444 class SetextHeaderProcessor(BlockProcessor): |
474 """ Process Setext-style Headers. """ | 445 """ Process Setext-style Headers. """ |
475 | 446 |
476 # Detect Setext-style header. Must be first 2 lines of block. | 447 # Detect Setext-style header. Must be first 2 lines of block. |
477 RE = re.compile(r'^.*?\n[=-]+[ ]*(\n|$)', re.MULTILINE) | 448 RE = re.compile(r'^.*?\n[=-]+[ ]*(\n|$)', re.MULTILINE) |
478 | 449 |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
520 self.parser.parseBlocks(parent, [prelines]) | 491 self.parser.parseBlocks(parent, [prelines]) |
521 # create hr | 492 # create hr |
522 util.etree.SubElement(parent, 'hr') | 493 util.etree.SubElement(parent, 'hr') |
523 # check for lines in block after hr. | 494 # check for lines in block after hr. |
524 postlines = block[self.match.end():].lstrip('\n') | 495 postlines = block[self.match.end():].lstrip('\n') |
525 if postlines: | 496 if postlines: |
526 # Add lines after hr to master blocks for later parsing. | 497 # Add lines after hr to master blocks for later parsing. |
527 blocks.insert(0, postlines) | 498 blocks.insert(0, postlines) |
528 | 499 |
529 | 500 |
530 | |
531 class EmptyBlockProcessor(BlockProcessor): | 501 class EmptyBlockProcessor(BlockProcessor): |
532 """ Process blocks that are empty or start with an empty line. """ | 502 """ Process blocks that are empty or start with an empty line. """ |
533 | 503 |
534 def test(self, parent, block): | 504 def test(self, parent, block): |
535 return not block or block.startswith('\n') | 505 return not block or block.startswith('\n') |
536 | 506 |
537 def run(self, parent, blocks): | 507 def run(self, parent, blocks): |
538 block = blocks.pop(0) | 508 block = blocks.pop(0) |
539 filler = '\n\n' | 509 filler = '\n\n' |
540 if block: | 510 if block: |
541 # Starts with empty line | 511 # Starts with empty line |
542 # Only replace a single line. | 512 # Only replace a single line. |
543 filler = '\n' | 513 filler = '\n' |
544 # Save the rest for later. | 514 # Save the rest for later. |
545 theRest = block[1:] | 515 theRest = block[1:] |
546 if theRest: | 516 if theRest: |
547 # Add remaining lines to master blocks for later. | 517 # Add remaining lines to master blocks for later. |
548 blocks.insert(0, theRest) | 518 blocks.insert(0, theRest) |
549 sibling = self.lastChild(parent) | 519 sibling = self.lastChild(parent) |
550 if sibling and sibling.tag == 'pre' and len(sibling) and sibling[0].tag
== 'code': | 520 if (sibling is not None and sibling.tag == 'pre' and |
| 521 len(sibling) and sibling[0].tag == 'code'): |
551 # Last block is a codeblock. Append to preserve whitespace. | 522 # Last block is a codeblock. Append to preserve whitespace. |
552 sibling[0].text = util.AtomicString('%s%s' % (sibling[0].text, fille
r)) | 523 sibling[0].text = util.AtomicString( |
| 524 '%s%s' % (sibling[0].text, filler) |
| 525 ) |
553 | 526 |
554 | 527 |
555 class ParagraphProcessor(BlockProcessor): | 528 class ParagraphProcessor(BlockProcessor): |
556 """ Process Paragraph blocks. """ | 529 """ Process Paragraph blocks. """ |
557 | 530 |
558 def test(self, parent, block): | 531 def test(self, parent, block): |
559 return True | 532 return True |
560 | 533 |
561 def run(self, parent, blocks): | 534 def run(self, parent, blocks): |
562 block = blocks.pop(0) | 535 block = blocks.pop(0) |
563 if block.strip(): | 536 if block.strip(): |
564 # Not a blank block. Add to parent, otherwise throw it away. | 537 # Not a blank block. Add to parent, otherwise throw it away. |
565 if self.parser.state.isstate('list'): | 538 if self.parser.state.isstate('list'): |
566 # The parent is a tight-list. | 539 # The parent is a tight-list. |
567 # | 540 # |
568 # Check for any children. This will likely only happen in a | 541 # Check for any children. This will likely only happen in a |
569 # tight-list when a header isn't followed by a blank line. | 542 # tight-list when a header isn't followed by a blank line. |
570 # For example: | 543 # For example: |
571 # | 544 # |
572 # * # Header | 545 # * # Header |
573 # Line 2 of list item - not part of header. | 546 # Line 2 of list item - not part of header. |
574 sibling = self.lastChild(parent) | 547 sibling = self.lastChild(parent) |
575 if sibling is not None: | 548 if sibling is not None: |
576 # Insetrt after sibling. | 549 # Insetrt after sibling. |
577 if sibling.tail: | 550 if sibling.tail: |
578 sibling.tail = '%s\n%s' % (sibling.tail, block) | 551 sibling.tail = '%s\n%s' % (sibling.tail, block) |
579 else: | 552 else: |
580 sibling.tail = '\n%s' % block | 553 sibling.tail = '\n%s' % block |
581 else: | 554 else: |
582 # Append to parent.text | 555 # Append to parent.text |
583 if parent.text: | 556 if parent.text: |
584 parent.text = '%s\n%s' % (parent.text, block) | 557 parent.text = '%s\n%s' % (parent.text, block) |
585 else: | 558 else: |
586 parent.text = block.lstrip() | 559 parent.text = block.lstrip() |
587 else: | 560 else: |
588 # Create a regular paragraph | 561 # Create a regular paragraph |
589 p = util.etree.SubElement(parent, 'p') | 562 p = util.etree.SubElement(parent, 'p') |
590 p.text = block.lstrip() | 563 p.text = block.lstrip() |
OLD | NEW |