| Index: third_party/markdown/preprocessors.py
|
| diff --git a/third_party/markdown/preprocessors.py b/third_party/markdown/preprocessors.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..3f1cfe7777fbacde0ae2a68ac910e343426e9fb7
|
| --- /dev/null
|
| +++ b/third_party/markdown/preprocessors.py
|
| @@ -0,0 +1,330 @@
|
| +# markdown is released under the BSD license
|
| +# Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
|
| +# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
|
| +# Copyright 2004 Manfred Stienstra (the original version)
|
| +#
|
| +# All rights reserved.
|
| +#
|
| +# Redistribution and use in source and binary forms, with or without
|
| +# modification, are permitted provided that the following conditions are met:
|
| +#
|
| +# * Redistributions of source code must retain the above copyright
|
| +# notice, this list of conditions and the following disclaimer.
|
| +# * Redistributions in binary form must reproduce the above copyright
|
| +# notice, this list of conditions and the following disclaimer in the
|
| +# documentation and/or other materials provided with the distribution.
|
| +# * Neither the name of the <organization> nor the
|
| +# names of its contributors may be used to endorse or promote products
|
| +# derived from this software without specific prior written permission.
|
| +#
|
| +# THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY
|
| +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
| +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
| +# DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT
|
| +# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
| +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
| +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
| +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
| +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
| +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
| +# POSSIBILITY OF SUCH DAMAGE.
|
| +
|
| +
|
| +"""
|
| +PRE-PROCESSORS
|
| +=============================================================================
|
| +
|
| +Preprocessors work on source text before we start doing anything too
|
| +complicated.
|
| +"""
|
| +
|
| +from __future__ import absolute_import
|
| +from __future__ import unicode_literals
|
| +from . import util
|
| +from . import odict
|
| +import re
|
| +
|
| +
|
| +def build_preprocessors(md_instance, **kwargs):
|
| + """ Build the default set of preprocessors used by Markdown. """
|
| + preprocessors = odict.OrderedDict()
|
| + preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance)
|
| + if md_instance.safeMode != 'escape':
|
| + preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance)
|
| + preprocessors["reference"] = ReferencePreprocessor(md_instance)
|
| + return preprocessors
|
| +
|
| +
|
| +class Preprocessor(util.Processor):
|
| + """
|
| + Preprocessors are run after the text is broken into lines.
|
| +
|
| + Each preprocessor implements a "run" method that takes a pointer to a
|
| + list of lines of the document, modifies it as necessary and returns
|
| + either the same pointer or a pointer to a new list.
|
| +
|
| + Preprocessors must extend markdown.Preprocessor.
|
| +
|
| + """
|
| + def run(self, lines):
|
| + """
|
| + Each subclass of Preprocessor should override the `run` method, which
|
| + takes the document as a list of strings split by newlines and returns
|
| + the (possibly modified) list of lines.
|
| +
|
| + """
|
| + pass
|
| +
|
| +
|
| +class NormalizeWhitespace(Preprocessor):
|
| + """ Normalize whitespace for consistant parsing. """
|
| +
|
| + def run(self, lines):
|
| + source = '\n'.join(lines)
|
| + source = source.replace(util.STX, "").replace(util.ETX, "")
|
| + source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
|
| + source = source.expandtabs(self.markdown.tab_length)
|
| + source = re.sub(r'(?<=\n) +\n', '\n', source)
|
| + return source.split('\n')
|
| +
|
| +
|
| +class HtmlBlockPreprocessor(Preprocessor):
|
| + """Remove html blocks from the text and store them for later retrieval."""
|
| +
|
| + right_tag_patterns = ["</%s>", "%s>"]
|
| + attrs_pattern = r"""
|
| + \s+(?P<attr>[^>"'/= ]+)=(?P<q>['"])(?P<value>.*?)(?P=q) # attr="value"
|
| + | # OR
|
| + \s+(?P<attr1>[^>"'/= ]+)=(?P<value1>[^> ]+) # attr=value
|
| + | # OR
|
| + \s+(?P<attr2>[^>"'/= ]+) # attr
|
| + """
|
| + left_tag_pattern = r'^\<(?P<tag>[^> ]+)(?P<attrs>(%s)*)\s*\/?\>?' % attrs_pattern
|
| + attrs_re = re.compile(attrs_pattern, re.VERBOSE)
|
| + left_tag_re = re.compile(left_tag_pattern, re.VERBOSE)
|
| + markdown_in_raw = False
|
| +
|
| + def _get_left_tag(self, block):
|
| + m = self.left_tag_re.match(block)
|
| + if m:
|
| + tag = m.group('tag')
|
| + raw_attrs = m.group('attrs')
|
| + attrs = {}
|
| + if raw_attrs:
|
| + for ma in self.attrs_re.finditer(raw_attrs):
|
| + if ma.group('attr'):
|
| + if ma.group('value'):
|
| + attrs[ma.group('attr').strip()] = ma.group('value')
|
| + else:
|
| + attrs[ma.group('attr').strip()] = ""
|
| + elif ma.group('attr1'):
|
| + if ma.group('value1'):
|
| + attrs[ma.group('attr1').strip()] = ma.group('value1')
|
| + else:
|
| + attrs[ma.group('attr1').strip()] = ""
|
| + elif ma.group('attr2'):
|
| + attrs[ma.group('attr2').strip()] = ""
|
| + return tag, len(m.group(0)), attrs
|
| + else:
|
| + tag = block[1:].split(">", 1)[0].lower()
|
| + return tag, len(tag)+2, {}
|
| +
|
| + def _recursive_tagfind(self, ltag, rtag, start_index, block):
|
| + while 1:
|
| + i = block.find(rtag, start_index)
|
| + if i == -1:
|
| + return -1
|
| + j = block.find(ltag, start_index)
|
| + # if no ltag, or rtag found before another ltag, return index
|
| + if (j > i or j == -1):
|
| + return i + len(rtag)
|
| + # another ltag found before rtag, use end of ltag as starting
|
| + # point and search again
|
| + j = block.find('>', j)
|
| + start_index = self._recursive_tagfind(ltag, rtag, j + 1, block)
|
| + if start_index == -1:
|
| + # HTML potentially malformed- ltag has no corresponding
|
| + # rtag
|
| + return -1
|
| +
|
| + def _get_right_tag(self, left_tag, left_index, block):
|
| + for p in self.right_tag_patterns:
|
| + tag = p % left_tag
|
| + i = self._recursive_tagfind("<%s" % left_tag, tag, left_index, block)
|
| + if i > 2:
|
| + return tag.lstrip("<").rstrip(">"), i
|
| + return block.rstrip()[-left_index:-1].lower(), len(block)
|
| +
|
| + def _equal_tags(self, left_tag, right_tag):
|
| + if left_tag[0] in ['?', '@', '%']: # handle PHP, etc.
|
| + return True
|
| + if ("/" + left_tag) == right_tag:
|
| + return True
|
| + if (right_tag == "--" and left_tag == "--"):
|
| + return True
|
| + elif left_tag == right_tag[1:] \
|
| + and right_tag[0] == "/":
|
| + return True
|
| + else:
|
| + return False
|
| +
|
| + def _is_oneliner(self, tag):
|
| + return (tag in ['hr', 'hr/'])
|
| +
|
| + def run(self, lines):
|
| + text = "\n".join(lines)
|
| + new_blocks = []
|
| + text = text.rsplit("\n\n")
|
| + items = []
|
| + left_tag = ''
|
| + right_tag = ''
|
| + in_tag = False # flag
|
| +
|
| + while text:
|
| + block = text[0]
|
| + if block.startswith("\n"):
|
| + block = block[1:]
|
| + text = text[1:]
|
| +
|
| + if block.startswith("\n"):
|
| + block = block[1:]
|
| +
|
| + if not in_tag:
|
| + if block.startswith("<") and len(block.strip()) > 1:
|
| +
|
| + if block[1] == "!":
|
| + # is a comment block
|
| + left_tag, left_index, attrs = "--", 2, {}
|
| + else:
|
| + left_tag, left_index, attrs = self._get_left_tag(block)
|
| + right_tag, data_index = self._get_right_tag(left_tag,
|
| + left_index,
|
| + block)
|
| + # keep checking conditions below and maybe just append
|
| +
|
| + if data_index < len(block) \
|
| + and (util.isBlockLevel(left_tag)
|
| + or left_tag == '--'):
|
| + text.insert(0, block[data_index:])
|
| + block = block[:data_index]
|
| +
|
| + if not (util.isBlockLevel(left_tag) \
|
| + or block[1] in ["!", "?", "@", "%"]):
|
| + new_blocks.append(block)
|
| + continue
|
| +
|
| + if self._is_oneliner(left_tag):
|
| + new_blocks.append(block.strip())
|
| + continue
|
| +
|
| + if block.rstrip().endswith(">") \
|
| + and self._equal_tags(left_tag, right_tag):
|
| + if self.markdown_in_raw and 'markdown' in attrs.keys():
|
| + start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?',
|
| + '', block[:left_index])
|
| + end = block[-len(right_tag)-2:]
|
| + block = block[left_index:-len(right_tag)-2]
|
| + new_blocks.append(
|
| + self.markdown.htmlStash.store(start))
|
| + new_blocks.append(block)
|
| + new_blocks.append(
|
| + self.markdown.htmlStash.store(end))
|
| + else:
|
| + new_blocks.append(
|
| + self.markdown.htmlStash.store(block.strip()))
|
| + continue
|
| + else:
|
| + # if is block level tag and is not complete
|
| +
|
| + if util.isBlockLevel(left_tag) or left_tag == "--" \
|
| + and not block.rstrip().endswith(">"):
|
| + items.append(block.strip())
|
| + in_tag = True
|
| + else:
|
| + new_blocks.append(
|
| + self.markdown.htmlStash.store(block.strip()))
|
| +
|
| + continue
|
| +
|
| + new_blocks.append(block)
|
| +
|
| + else:
|
| + items.append(block)
|
| +
|
| + right_tag, data_index = self._get_right_tag(left_tag, 0, block)
|
| +
|
| + if self._equal_tags(left_tag, right_tag):
|
| + # if find closing tag
|
| +
|
| + if data_index < len(block):
|
| + # we have more text after right_tag
|
| + items[-1] = block[:data_index]
|
| + text.insert(0, block[data_index:])
|
| +
|
| + in_tag = False
|
| + if self.markdown_in_raw and 'markdown' in attrs.keys():
|
| + start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?',
|
| + '', items[0][:left_index])
|
| + items[0] = items[0][left_index:]
|
| + end = items[-1][-len(right_tag)-2:]
|
| + items[-1] = items[-1][:-len(right_tag)-2]
|
| + new_blocks.append(
|
| + self.markdown.htmlStash.store(start))
|
| + new_blocks.extend(items)
|
| + new_blocks.append(
|
| + self.markdown.htmlStash.store(end))
|
| + else:
|
| + new_blocks.append(
|
| + self.markdown.htmlStash.store('\n\n'.join(items)))
|
| + items = []
|
| +
|
| + if items:
|
| + if self.markdown_in_raw and 'markdown' in attrs.keys():
|
| + start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?',
|
| + '', items[0][:left_index])
|
| + items[0] = items[0][left_index:]
|
| + end = items[-1][-len(right_tag)-2:]
|
| + items[-1] = items[-1][:-len(right_tag)-2]
|
| + new_blocks.append(
|
| + self.markdown.htmlStash.store(start))
|
| + new_blocks.extend(items)
|
| + if end.strip():
|
| + new_blocks.append(
|
| + self.markdown.htmlStash.store(end))
|
| + else:
|
| + new_blocks.append(
|
| + self.markdown.htmlStash.store('\n\n'.join(items)))
|
| + #new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items)))
|
| + new_blocks.append('\n')
|
| +
|
| + new_text = "\n\n".join(new_blocks)
|
| + return new_text.split("\n")
|
| +
|
| +
|
| +class ReferencePreprocessor(Preprocessor):
|
| + """ Remove reference definitions from text and store for later use. """
|
| +
|
| + TITLE = r'[ ]*(\"(.*)\"|\'(.*)\'|\((.*)\))[ ]*'
|
| + RE = re.compile(r'^[ ]{0,3}\[([^\]]*)\]:\s*([^ ]*)[ ]*(%s)?$' % TITLE, re.DOTALL)
|
| + TITLE_RE = re.compile(r'^%s$' % TITLE)
|
| +
|
| + def run (self, lines):
|
| + new_text = [];
|
| + while lines:
|
| + line = lines.pop(0)
|
| + m = self.RE.match(line)
|
| + if m:
|
| + id = m.group(1).strip().lower()
|
| + link = m.group(2).lstrip('<').rstrip('>')
|
| + t = m.group(5) or m.group(6) or m.group(7)
|
| + if not t:
|
| + # Check next line for title
|
| + tm = self.TITLE_RE.match(lines[0])
|
| + if tm:
|
| + lines.pop(0)
|
| + t = tm.group(2) or tm.group(3) or tm.group(4)
|
| + self.markdown.references[id] = (link, t)
|
| + else:
|
| + new_text.append(line)
|
| +
|
| + return new_text #+ "\n"
|
|
|