third_party/markdown/preprocessors.py - Issue 133433002: Docserver: Support markdown for HTML content. Request thirdparty submission review.

Unified Diff: third_party/markdown/preprocessors.py

Issue 133433002: Docserver: Support markdown for HTML content. Request thirdparty submission review. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: change the version of app & cron.yaml Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/markdown/preprocessors.py

diff --git a/third_party/markdown/preprocessors.py b/third_party/markdown/preprocessors.py

new file mode 100644

index 0000000000000000000000000000000000000000..3f1cfe7777fbacde0ae2a68ac910e343426e9fb7

--- /dev/null

+++ b/third_party/markdown/preprocessors.py

@@ -0,0 +1,330 @@

+# markdown is released under the BSD license

+# Redistribution and use in source and binary forms, with or without

+# modification, are permitted provided that the following conditions are met:

+# * Redistributions of source code must retain the above copyright

+# notice, this list of conditions and the following disclaimer.

+# * Redistributions in binary form must reproduce the above copyright

+# notice, this list of conditions and the following disclaimer in the

+# documentation and/or other materials provided with the distribution.

+# * Neither the name of the <organization> nor the

+# names of its contributors may be used to endorse or promote products

+# derived from this software without specific prior written permission.

+# THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY

+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

+# DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT

+# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

+# POSSIBILITY OF SUCH DAMAGE.

+"""

+PRE-PROCESSORS

+=============================================================================

+Preprocessors work on source text before we start doing anything too

+complicated.

+"""

+from __future__ import absolute_import

+from __future__ import unicode_literals

+from . import util

+from . import odict

+import re

+def build_preprocessors(md_instance, **kwargs):

+ """ Build the default set of preprocessors used by Markdown. """

+ preprocessors = odict.OrderedDict()

+ preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance)

+ if md_instance.safeMode != 'escape':

+ preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance)

+ preprocessors["reference"] = ReferencePreprocessor(md_instance)

+ return preprocessors

+class Preprocessor(util.Processor):

+ """

+ Preprocessors are run after the text is broken into lines.

+ Each preprocessor implements a "run" method that takes a pointer to a

+ list of lines of the document, modifies it as necessary and returns

+ either the same pointer or a pointer to a new list.

+ Preprocessors must extend markdown.Preprocessor.

+ """

+ def run(self, lines):

+ """

+ Each subclass of Preprocessor should override the `run` method, which

+ takes the document as a list of strings split by newlines and returns

+ the (possibly modified) list of lines.

+ """

+ pass

+class NormalizeWhitespace(Preprocessor):

+ """ Normalize whitespace for consistant parsing. """

+ def run(self, lines):

+ source = '\n'.join(lines)

+ source = source.replace(util.STX, "").replace(util.ETX, "")

+ source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"

+ source = source.expandtabs(self.markdown.tab_length)

+ source = re.sub(r'(?<=\n) +\n', '\n', source)

+ return source.split('\n')

+class HtmlBlockPreprocessor(Preprocessor):

+ """Remove html blocks from the text and store them for later retrieval."""

+ right_tag_patterns = ["</%s>", "%s>"]

+ attrs_pattern = r"""

+ \s+(?P<attr>[^>"'/= ]+)=(?P<q>['"])(?P<value>.*?)(?P=q) # attr="value"

+ | # OR

+ \s+(?P<attr1>[^>"'/= ]+)=(?P<value1>[^> ]+) # attr=value

+ | # OR

+ \s+(?P<attr2>[^>"'/= ]+) # attr

+ """

+ left_tag_pattern = r'^\<(?P<tag>[^> ]+)(?P<attrs>(%s)*)\s*\/?\>?' % attrs_pattern

+ attrs_re = re.compile(attrs_pattern, re.VERBOSE)

+ left_tag_re = re.compile(left_tag_pattern, re.VERBOSE)

+ markdown_in_raw = False

+ def _get_left_tag(self, block):

+ m = self.left_tag_re.match(block)

+ if m:

+ tag = m.group('tag')

+ raw_attrs = m.group('attrs')

+ attrs = {}

+ if raw_attrs:

+ for ma in self.attrs_re.finditer(raw_attrs):

+ if ma.group('attr'):

+ if ma.group('value'):

+ attrs[ma.group('attr').strip()] = ma.group('value')

+ else:

+ attrs[ma.group('attr').strip()] = ""

+ elif ma.group('attr1'):

+ if ma.group('value1'):

+ attrs[ma.group('attr1').strip()] = ma.group('value1')

+ else:

+ attrs[ma.group('attr1').strip()] = ""

+ elif ma.group('attr2'):

+ attrs[ma.group('attr2').strip()] = ""

+ return tag, len(m.group(0)), attrs

+ else:

+ tag = block[1:].split(">", 1)[0].lower()

+ return tag, len(tag)+2, {}

+ def _recursive_tagfind(self, ltag, rtag, start_index, block):

+ while 1:

+ i = block.find(rtag, start_index)

+ if i == -1:

+ return -1

+ j = block.find(ltag, start_index)

+ # if no ltag, or rtag found before another ltag, return index

+ if (j > i or j == -1):

+ return i + len(rtag)

+ # another ltag found before rtag, use end of ltag as starting

+ # point and search again

+ j = block.find('>', j)

+ start_index = self._recursive_tagfind(ltag, rtag, j + 1, block)

+ if start_index == -1:

+ # HTML potentially malformed- ltag has no corresponding

+ # rtag

+ return -1

+ def _get_right_tag(self, left_tag, left_index, block):

+ for p in self.right_tag_patterns:

+ tag = p % left_tag

+ i = self._recursive_tagfind("<%s" % left_tag, tag, left_index, block)

+ if i > 2:

+ return tag.lstrip("<").rstrip(">"), i

+ return block.rstrip()[-left_index:-1].lower(), len(block)

+ def _equal_tags(self, left_tag, right_tag):

+ if left_tag[0] in ['?', '@', '%']: # handle PHP, etc.

+ return True

+ if ("/" + left_tag) == right_tag:

+ return True

+ if (right_tag == "--" and left_tag == "--"):

+ return True

+ elif left_tag == right_tag[1:] \

+ and right_tag[0] == "/":

+ return True

+ else:

+ return False

+ def _is_oneliner(self, tag):

+ return (tag in ['hr', 'hr/'])

+ def run(self, lines):

+ text = "\n".join(lines)

+ new_blocks = []

+ text = text.rsplit("\n\n")

+ items = []

+ left_tag = ''

+ right_tag = ''

+ in_tag = False # flag

+ while text:

+ block = text[0]

+ if block.startswith("\n"):

+ block = block[1:]

+ text = text[1:]

+ if block.startswith("\n"):

+ block = block[1:]

+ if not in_tag:

+ if block.startswith("<") and len(block.strip()) > 1:

+ if block[1] == "!":

+ # is a comment block

+ left_tag, left_index, attrs = "--", 2, {}

+ else:

+ left_tag, left_index, attrs = self._get_left_tag(block)

+ right_tag, data_index = self._get_right_tag(left_tag,

+ left_index,

+ block)

+ # keep checking conditions below and maybe just append

+ if data_index < len(block) \

+ and (util.isBlockLevel(left_tag)

+ or left_tag == '--'):

+ text.insert(0, block[data_index:])

+ block = block[:data_index]

+ if not (util.isBlockLevel(left_tag) \

+ or block[1] in ["!", "?", "@", "%"]):

+ new_blocks.append(block)

+ continue

+ if self._is_oneliner(left_tag):

+ new_blocks.append(block.strip())

+ continue

+ if block.rstrip().endswith(">") \

+ and self._equal_tags(left_tag, right_tag):

+ if self.markdown_in_raw and 'markdown' in attrs.keys():

+ start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?',

+ '', block[:left_index])

+ end = block[-len(right_tag)-2:]

+ block = block[left_index:-len(right_tag)-2]

+ new_blocks.append(

+ self.markdown.htmlStash.store(start))

+ new_blocks.append(block)

+ new_blocks.append(

+ self.markdown.htmlStash.store(end))

+ else:

+ new_blocks.append(

+ self.markdown.htmlStash.store(block.strip()))

+ continue

+ else:

+ # if is block level tag and is not complete

+ if util.isBlockLevel(left_tag) or left_tag == "--" \

+ and not block.rstrip().endswith(">"):

+ items.append(block.strip())

+ in_tag = True

+ else:

+ new_blocks.append(

+ self.markdown.htmlStash.store(block.strip()))

+ continue

+ new_blocks.append(block)

+ else:

+ items.append(block)

+ right_tag, data_index = self._get_right_tag(left_tag, 0, block)

+ if self._equal_tags(left_tag, right_tag):

+ # if find closing tag

+ if data_index < len(block):

+ # we have more text after right_tag

+ items[-1] = block[:data_index]

+ text.insert(0, block[data_index:])

+ in_tag = False

+ if self.markdown_in_raw and 'markdown' in attrs.keys():

+ start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?',

+ '', items[0][:left_index])

+ items[0] = items[0][left_index:]

+ end = items[-1][-len(right_tag)-2:]

+ items[-1] = items[-1][:-len(right_tag)-2]

+ new_blocks.append(

+ self.markdown.htmlStash.store(start))

+ new_blocks.extend(items)

+ new_blocks.append(

+ self.markdown.htmlStash.store(end))

+ else:

+ new_blocks.append(

+ self.markdown.htmlStash.store('\n\n'.join(items)))

+ items = []

+ if items:

+ if self.markdown_in_raw and 'markdown' in attrs.keys():

+ start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?',

+ '', items[0][:left_index])

+ items[0] = items[0][left_index:]

+ end = items[-1][-len(right_tag)-2:]

+ items[-1] = items[-1][:-len(right_tag)-2]

+ new_blocks.append(

+ self.markdown.htmlStash.store(start))

+ new_blocks.extend(items)

+ if end.strip():

+ new_blocks.append(

+ self.markdown.htmlStash.store(end))

+ else:

+ new_blocks.append(

+ self.markdown.htmlStash.store('\n\n'.join(items)))

+ #new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items)))

+ new_blocks.append('\n')

+ new_text = "\n\n".join(new_blocks)

+ return new_text.split("\n")

+class ReferencePreprocessor(Preprocessor):

+ """ Remove reference definitions from text and store for later use. """

+ TITLE = r'[ ]*(\"(.*)\"|\'(.*)\'|$(.*)$)[ ]*'

+ RE = re.compile(r'^[ ]{0,3}\[([^\]]*)\]:\s*([^ ]*)[ ]*(%s)?$' % TITLE, re.DOTALL)

+ TITLE_RE = re.compile(r'^%s$' % TITLE)

+ def run (self, lines):

+ new_text = [];

+ while lines:

+ line = lines.pop(0)

+ m = self.RE.match(line)

+ if m:

+ id = m.group(1).strip().lower()

+ link = m.group(2).lstrip('<').rstrip('>')

+ t = m.group(5) or m.group(6) or m.group(7)

+ if not t:

+ # Check next line for title

+ tm = self.TITLE_RE.match(lines[0])

+ if tm:

+ lines.pop(0)

+ t = tm.group(2) or tm.group(3) or tm.group(4)

+ self.markdown.references[id] = (link, t)

+ else:

+ new_text.append(line)

+ return new_text #+ "\n"

« no previous file with comments | « third_party/markdown/postprocessors.py ('k') | third_party/markdown/serializers.py » ('j') | no next file with comments »