Index: third_party/markdown/preprocessors.py |
diff --git a/third_party/markdown/preprocessors.py b/third_party/markdown/preprocessors.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..3f1cfe7777fbacde0ae2a68ac910e343426e9fb7 |
--- /dev/null |
+++ b/third_party/markdown/preprocessors.py |
@@ -0,0 +1,330 @@ |
+# markdown is released under the BSD license |
+# Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later) |
+# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) |
+# Copyright 2004 Manfred Stienstra (the original version) |
+# |
+# All rights reserved. |
+# |
+# Redistribution and use in source and binary forms, with or without |
+# modification, are permitted provided that the following conditions are met: |
+# |
+# * Redistributions of source code must retain the above copyright |
+# notice, this list of conditions and the following disclaimer. |
+# * Redistributions in binary form must reproduce the above copyright |
+# notice, this list of conditions and the following disclaimer in the |
+# documentation and/or other materials provided with the distribution. |
+# * Neither the name of the <organization> nor the |
+# names of its contributors may be used to endorse or promote products |
+# derived from this software without specific prior written permission. |
+# |
+# THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY |
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
+# DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT |
+# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
+# POSSIBILITY OF SUCH DAMAGE. |
+ |
+ |
+""" |
+PRE-PROCESSORS |
+============================================================================= |
+ |
+Preprocessors work on source text before we start doing anything too |
+complicated. |
+""" |
+ |
+from __future__ import absolute_import |
+from __future__ import unicode_literals |
+from . import util |
+from . import odict |
+import re |
+ |
+ |
+def build_preprocessors(md_instance, **kwargs): |
+ """ Build the default set of preprocessors used by Markdown. """ |
+ preprocessors = odict.OrderedDict() |
+ preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance) |
+ if md_instance.safeMode != 'escape': |
+ preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance) |
+ preprocessors["reference"] = ReferencePreprocessor(md_instance) |
+ return preprocessors |
+ |
+ |
+class Preprocessor(util.Processor): |
+ """ |
+ Preprocessors are run after the text is broken into lines. |
+ |
+ Each preprocessor implements a "run" method that takes a pointer to a |
+ list of lines of the document, modifies it as necessary and returns |
+ either the same pointer or a pointer to a new list. |
+ |
+ Preprocessors must extend markdown.Preprocessor. |
+ |
+ """ |
+ def run(self, lines): |
+ """ |
+ Each subclass of Preprocessor should override the `run` method, which |
+ takes the document as a list of strings split by newlines and returns |
+ the (possibly modified) list of lines. |
+ |
+ """ |
+ pass |
+ |
+ |
+class NormalizeWhitespace(Preprocessor): |
+ """ Normalize whitespace for consistant parsing. """ |
+ |
+ def run(self, lines): |
+ source = '\n'.join(lines) |
+ source = source.replace(util.STX, "").replace(util.ETX, "") |
+ source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" |
+ source = source.expandtabs(self.markdown.tab_length) |
+ source = re.sub(r'(?<=\n) +\n', '\n', source) |
+ return source.split('\n') |
+ |
+ |
+class HtmlBlockPreprocessor(Preprocessor): |
+ """Remove html blocks from the text and store them for later retrieval.""" |
+ |
+ right_tag_patterns = ["</%s>", "%s>"] |
+ attrs_pattern = r""" |
+ \s+(?P<attr>[^>"'/= ]+)=(?P<q>['"])(?P<value>.*?)(?P=q) # attr="value" |
+ | # OR |
+ \s+(?P<attr1>[^>"'/= ]+)=(?P<value1>[^> ]+) # attr=value |
+ | # OR |
+ \s+(?P<attr2>[^>"'/= ]+) # attr |
+ """ |
+ left_tag_pattern = r'^\<(?P<tag>[^> ]+)(?P<attrs>(%s)*)\s*\/?\>?' % attrs_pattern |
+ attrs_re = re.compile(attrs_pattern, re.VERBOSE) |
+ left_tag_re = re.compile(left_tag_pattern, re.VERBOSE) |
+ markdown_in_raw = False |
+ |
+ def _get_left_tag(self, block): |
+ m = self.left_tag_re.match(block) |
+ if m: |
+ tag = m.group('tag') |
+ raw_attrs = m.group('attrs') |
+ attrs = {} |
+ if raw_attrs: |
+ for ma in self.attrs_re.finditer(raw_attrs): |
+ if ma.group('attr'): |
+ if ma.group('value'): |
+ attrs[ma.group('attr').strip()] = ma.group('value') |
+ else: |
+ attrs[ma.group('attr').strip()] = "" |
+ elif ma.group('attr1'): |
+ if ma.group('value1'): |
+ attrs[ma.group('attr1').strip()] = ma.group('value1') |
+ else: |
+ attrs[ma.group('attr1').strip()] = "" |
+ elif ma.group('attr2'): |
+ attrs[ma.group('attr2').strip()] = "" |
+ return tag, len(m.group(0)), attrs |
+ else: |
+ tag = block[1:].split(">", 1)[0].lower() |
+ return tag, len(tag)+2, {} |
+ |
+ def _recursive_tagfind(self, ltag, rtag, start_index, block): |
+ while 1: |
+ i = block.find(rtag, start_index) |
+ if i == -1: |
+ return -1 |
+ j = block.find(ltag, start_index) |
+ # if no ltag, or rtag found before another ltag, return index |
+ if (j > i or j == -1): |
+ return i + len(rtag) |
+ # another ltag found before rtag, use end of ltag as starting |
+ # point and search again |
+ j = block.find('>', j) |
+ start_index = self._recursive_tagfind(ltag, rtag, j + 1, block) |
+ if start_index == -1: |
+ # HTML potentially malformed- ltag has no corresponding |
+ # rtag |
+ return -1 |
+ |
+ def _get_right_tag(self, left_tag, left_index, block): |
+ for p in self.right_tag_patterns: |
+ tag = p % left_tag |
+ i = self._recursive_tagfind("<%s" % left_tag, tag, left_index, block) |
+ if i > 2: |
+ return tag.lstrip("<").rstrip(">"), i |
+ return block.rstrip()[-left_index:-1].lower(), len(block) |
+ |
+ def _equal_tags(self, left_tag, right_tag): |
+ if left_tag[0] in ['?', '@', '%']: # handle PHP, etc. |
+ return True |
+ if ("/" + left_tag) == right_tag: |
+ return True |
+ if (right_tag == "--" and left_tag == "--"): |
+ return True |
+ elif left_tag == right_tag[1:] \ |
+ and right_tag[0] == "/": |
+ return True |
+ else: |
+ return False |
+ |
+ def _is_oneliner(self, tag): |
+ return (tag in ['hr', 'hr/']) |
+ |
+ def run(self, lines): |
+ text = "\n".join(lines) |
+ new_blocks = [] |
+ text = text.rsplit("\n\n") |
+ items = [] |
+ left_tag = '' |
+ right_tag = '' |
+ in_tag = False # flag |
+ |
+ while text: |
+ block = text[0] |
+ if block.startswith("\n"): |
+ block = block[1:] |
+ text = text[1:] |
+ |
+ if block.startswith("\n"): |
+ block = block[1:] |
+ |
+ if not in_tag: |
+ if block.startswith("<") and len(block.strip()) > 1: |
+ |
+ if block[1] == "!": |
+ # is a comment block |
+ left_tag, left_index, attrs = "--", 2, {} |
+ else: |
+ left_tag, left_index, attrs = self._get_left_tag(block) |
+ right_tag, data_index = self._get_right_tag(left_tag, |
+ left_index, |
+ block) |
+ # keep checking conditions below and maybe just append |
+ |
+ if data_index < len(block) \ |
+ and (util.isBlockLevel(left_tag) |
+ or left_tag == '--'): |
+ text.insert(0, block[data_index:]) |
+ block = block[:data_index] |
+ |
+ if not (util.isBlockLevel(left_tag) \ |
+ or block[1] in ["!", "?", "@", "%"]): |
+ new_blocks.append(block) |
+ continue |
+ |
+ if self._is_oneliner(left_tag): |
+ new_blocks.append(block.strip()) |
+ continue |
+ |
+ if block.rstrip().endswith(">") \ |
+ and self._equal_tags(left_tag, right_tag): |
+ if self.markdown_in_raw and 'markdown' in attrs.keys(): |
+ start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?', |
+ '', block[:left_index]) |
+ end = block[-len(right_tag)-2:] |
+ block = block[left_index:-len(right_tag)-2] |
+ new_blocks.append( |
+ self.markdown.htmlStash.store(start)) |
+ new_blocks.append(block) |
+ new_blocks.append( |
+ self.markdown.htmlStash.store(end)) |
+ else: |
+ new_blocks.append( |
+ self.markdown.htmlStash.store(block.strip())) |
+ continue |
+ else: |
+ # if is block level tag and is not complete |
+ |
+ if util.isBlockLevel(left_tag) or left_tag == "--" \ |
+ and not block.rstrip().endswith(">"): |
+ items.append(block.strip()) |
+ in_tag = True |
+ else: |
+ new_blocks.append( |
+ self.markdown.htmlStash.store(block.strip())) |
+ |
+ continue |
+ |
+ new_blocks.append(block) |
+ |
+ else: |
+ items.append(block) |
+ |
+ right_tag, data_index = self._get_right_tag(left_tag, 0, block) |
+ |
+ if self._equal_tags(left_tag, right_tag): |
+ # if find closing tag |
+ |
+ if data_index < len(block): |
+ # we have more text after right_tag |
+ items[-1] = block[:data_index] |
+ text.insert(0, block[data_index:]) |
+ |
+ in_tag = False |
+ if self.markdown_in_raw and 'markdown' in attrs.keys(): |
+ start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?', |
+ '', items[0][:left_index]) |
+ items[0] = items[0][left_index:] |
+ end = items[-1][-len(right_tag)-2:] |
+ items[-1] = items[-1][:-len(right_tag)-2] |
+ new_blocks.append( |
+ self.markdown.htmlStash.store(start)) |
+ new_blocks.extend(items) |
+ new_blocks.append( |
+ self.markdown.htmlStash.store(end)) |
+ else: |
+ new_blocks.append( |
+ self.markdown.htmlStash.store('\n\n'.join(items))) |
+ items = [] |
+ |
+ if items: |
+ if self.markdown_in_raw and 'markdown' in attrs.keys(): |
+ start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?', |
+ '', items[0][:left_index]) |
+ items[0] = items[0][left_index:] |
+ end = items[-1][-len(right_tag)-2:] |
+ items[-1] = items[-1][:-len(right_tag)-2] |
+ new_blocks.append( |
+ self.markdown.htmlStash.store(start)) |
+ new_blocks.extend(items) |
+ if end.strip(): |
+ new_blocks.append( |
+ self.markdown.htmlStash.store(end)) |
+ else: |
+ new_blocks.append( |
+ self.markdown.htmlStash.store('\n\n'.join(items))) |
+ #new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items))) |
+ new_blocks.append('\n') |
+ |
+ new_text = "\n\n".join(new_blocks) |
+ return new_text.split("\n") |
+ |
+ |
+class ReferencePreprocessor(Preprocessor): |
+ """ Remove reference definitions from text and store for later use. """ |
+ |
+ TITLE = r'[ ]*(\"(.*)\"|\'(.*)\'|\((.*)\))[ ]*' |
+ RE = re.compile(r'^[ ]{0,3}\[([^\]]*)\]:\s*([^ ]*)[ ]*(%s)?$' % TITLE, re.DOTALL) |
+ TITLE_RE = re.compile(r'^%s$' % TITLE) |
+ |
+ def run (self, lines): |
+ new_text = []; |
+ while lines: |
+ line = lines.pop(0) |
+ m = self.RE.match(line) |
+ if m: |
+ id = m.group(1).strip().lower() |
+ link = m.group(2).lstrip('<').rstrip('>') |
+ t = m.group(5) or m.group(6) or m.group(7) |
+ if not t: |
+ # Check next line for title |
+ tm = self.TITLE_RE.match(lines[0]) |
+ if tm: |
+ lines.pop(0) |
+ t = tm.group(2) or tm.group(3) or tm.group(4) |
+ self.markdown.references[id] = (link, t) |
+ else: |
+ new_text.append(line) |
+ |
+ return new_text #+ "\n" |