Index: third_party/Python-Markdown/markdown/preprocessors.py |
diff --git a/third_party/Python-Markdown/markdown/preprocessors.py b/third_party/Python-Markdown/markdown/preprocessors.py |
deleted file mode 100644 |
index 7fd38d331fb5685a4c06f23e646c9d4d40e69b8b..0000000000000000000000000000000000000000 |
--- a/third_party/Python-Markdown/markdown/preprocessors.py |
+++ /dev/null |
@@ -1,345 +0,0 @@ |
-""" |
-PRE-PROCESSORS |
-============================================================================= |
- |
-Preprocessors work on source text before we start doing anything too |
-complicated. |
-""" |
- |
-from __future__ import absolute_import |
-from __future__ import unicode_literals |
-from . import util |
-from . import odict |
-import re |
- |
- |
-def build_preprocessors(md_instance, **kwargs): |
- """ Build the default set of preprocessors used by Markdown. """ |
- preprocessors = odict.OrderedDict() |
- preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance) |
- if md_instance.safeMode != 'escape': |
- preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance) |
- preprocessors["reference"] = ReferencePreprocessor(md_instance) |
- return preprocessors |
- |
- |
-class Preprocessor(util.Processor): |
- """ |
- Preprocessors are run after the text is broken into lines. |
- |
- Each preprocessor implements a "run" method that takes a pointer to a |
- list of lines of the document, modifies it as necessary and returns |
- either the same pointer or a pointer to a new list. |
- |
- Preprocessors must extend markdown.Preprocessor. |
- |
- """ |
- def run(self, lines): |
- """ |
- Each subclass of Preprocessor should override the `run` method, which |
- takes the document as a list of strings split by newlines and returns |
- the (possibly modified) list of lines. |
- |
- """ |
- pass # pragma: no cover |
- |
- |
-class NormalizeWhitespace(Preprocessor): |
- """ Normalize whitespace for consistant parsing. """ |
- |
- def run(self, lines): |
- source = '\n'.join(lines) |
- source = source.replace(util.STX, "").replace(util.ETX, "") |
- source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" |
- source = source.expandtabs(self.markdown.tab_length) |
- source = re.sub(r'(?<=\n) +\n', '\n', source) |
- return source.split('\n') |
- |
- |
-class HtmlBlockPreprocessor(Preprocessor): |
- """Remove html blocks from the text and store them for later retrieval.""" |
- |
- right_tag_patterns = ["</%s>", "%s>"] |
- attrs_pattern = r""" |
- \s+(?P<attr>[^>"'/= ]+)=(?P<q>['"])(?P<value>.*?)(?P=q) # attr="value" |
- | # OR |
- \s+(?P<attr1>[^>"'/= ]+)=(?P<value1>[^> ]+) # attr=value |
- | # OR |
- \s+(?P<attr2>[^>"'/= ]+) # attr |
- """ |
- left_tag_pattern = r'^\<(?P<tag>[^> ]+)(?P<attrs>(%s)*)\s*\/?\>?' % \ |
- attrs_pattern |
- attrs_re = re.compile(attrs_pattern, re.VERBOSE) |
- left_tag_re = re.compile(left_tag_pattern, re.VERBOSE) |
- markdown_in_raw = False |
- |
- def _get_left_tag(self, block): |
- m = self.left_tag_re.match(block) |
- if m: |
- tag = m.group('tag') |
- raw_attrs = m.group('attrs') |
- attrs = {} |
- if raw_attrs: |
- for ma in self.attrs_re.finditer(raw_attrs): |
- if ma.group('attr'): |
- if ma.group('value'): |
- attrs[ma.group('attr').strip()] = ma.group('value') |
- else: |
- attrs[ma.group('attr').strip()] = "" |
- elif ma.group('attr1'): |
- if ma.group('value1'): |
- attrs[ma.group('attr1').strip()] = ma.group( |
- 'value1' |
- ) |
- else: |
- attrs[ma.group('attr1').strip()] = "" |
- elif ma.group('attr2'): |
- attrs[ma.group('attr2').strip()] = "" |
- return tag, len(m.group(0)), attrs |
- else: |
- tag = block[1:].split(">", 1)[0].lower() |
- return tag, len(tag)+2, {} |
- |
- def _recursive_tagfind(self, ltag, rtag, start_index, block): |
- while 1: |
- i = block.find(rtag, start_index) |
- if i == -1: |
- return -1 |
- j = block.find(ltag, start_index) |
- # if no ltag, or rtag found before another ltag, return index |
- if (j > i or j == -1): |
- return i + len(rtag) |
- # another ltag found before rtag, use end of ltag as starting |
- # point and search again |
- j = block.find('>', j) |
- start_index = self._recursive_tagfind(ltag, rtag, j + 1, block) |
- if start_index == -1: |
- # HTML potentially malformed- ltag has no corresponding |
- # rtag |
- return -1 |
- |
- def _get_right_tag(self, left_tag, left_index, block): |
- for p in self.right_tag_patterns: |
- tag = p % left_tag |
- i = self._recursive_tagfind( |
- "<%s" % left_tag, tag, left_index, block |
- ) |
- if i > 2: |
- return tag.lstrip("<").rstrip(">"), i |
- return block.rstrip()[-left_index:-1].lower(), len(block) |
- |
- def _equal_tags(self, left_tag, right_tag): |
- if left_tag[0] in ['?', '@', '%']: # handle PHP, etc. |
- return True |
- if ("/" + left_tag) == right_tag: |
- return True |
- if (right_tag == "--" and left_tag == "--"): |
- return True |
- elif left_tag == right_tag[1:] and right_tag[0] == "/": |
- return True |
- else: |
- return False |
- |
- def _is_oneliner(self, tag): |
- return (tag in ['hr', 'hr/']) |
- |
- def _stringindex_to_listindex(self, stringindex, items): |
- """ |
- Same effect as concatenating the strings in items, |
- finding the character to which stringindex refers in that string, |
- and returning the index of the item in which that character resides. |
- """ |
- items.append('dummy') |
- i, count = 0, 0 |
- while count <= stringindex: |
- count += len(items[i]) |
- i += 1 |
- return i - 1 |
- |
- def _nested_markdown_in_html(self, items): |
- """Find and process html child elements of the given element block.""" |
- for i, item in enumerate(items): |
- if self.left_tag_re.match(item): |
- left_tag, left_index, attrs = \ |
- self._get_left_tag(''.join(items[i:])) |
- right_tag, data_index = self._get_right_tag( |
- left_tag, left_index, ''.join(items[i:])) |
- right_listindex = \ |
- self._stringindex_to_listindex(data_index, items[i:]) + i |
- if 'markdown' in attrs.keys(): |
- items[i] = items[i][left_index:] # remove opening tag |
- placeholder = self.markdown.htmlStash.store_tag( |
- left_tag, attrs, i + 1, right_listindex + 1) |
- items.insert(i, placeholder) |
- if len(items) - right_listindex <= 1: # last nest, no tail |
- right_listindex -= 1 |
- items[right_listindex] = items[right_listindex][ |
- :-len(right_tag) - 2] # remove closing tag |
- else: # raw html |
- if len(items) - right_listindex <= 1: # last element |
- right_listindex -= 1 |
- offset = 1 if i == right_listindex else 0 |
- placeholder = self.markdown.htmlStash.store('\n\n'.join( |
- items[i:right_listindex + offset])) |
- del items[i:right_listindex + offset] |
- items.insert(i, placeholder) |
- return items |
- |
- def run(self, lines): |
- text = "\n".join(lines) |
- new_blocks = [] |
- text = text.rsplit("\n\n") |
- items = [] |
- left_tag = '' |
- right_tag = '' |
- in_tag = False # flag |
- |
- while text: |
- block = text[0] |
- if block.startswith("\n"): |
- block = block[1:] |
- text = text[1:] |
- |
- if block.startswith("\n"): |
- block = block[1:] |
- |
- if not in_tag: |
- if block.startswith("<") and len(block.strip()) > 1: |
- |
- if block[1:4] == "!--": |
- # is a comment block |
- left_tag, left_index, attrs = "--", 2, {} |
- else: |
- left_tag, left_index, attrs = self._get_left_tag(block) |
- right_tag, data_index = self._get_right_tag(left_tag, |
- left_index, |
- block) |
- # keep checking conditions below and maybe just append |
- |
- if data_index < len(block) and (util.isBlockLevel(left_tag) or left_tag == '--'): |
- text.insert(0, block[data_index:]) |
- block = block[:data_index] |
- |
- if not (util.isBlockLevel(left_tag) or block[1] in ["!", "?", "@", "%"]): |
- new_blocks.append(block) |
- continue |
- |
- if self._is_oneliner(left_tag): |
- new_blocks.append(block.strip()) |
- continue |
- |
- if block.rstrip().endswith(">") \ |
- and self._equal_tags(left_tag, right_tag): |
- if self.markdown_in_raw and 'markdown' in attrs.keys(): |
- block = block[left_index:-len(right_tag) - 2] |
- new_blocks.append(self.markdown.htmlStash. |
- store_tag(left_tag, attrs, 0, 2)) |
- new_blocks.extend([block]) |
- else: |
- new_blocks.append( |
- self.markdown.htmlStash.store(block.strip())) |
- continue |
- else: |
- # if is block level tag and is not complete |
- if (not self._equal_tags(left_tag, right_tag)) and \ |
- (util.isBlockLevel(left_tag) or left_tag == "--"): |
- items.append(block.strip()) |
- in_tag = True |
- else: |
- new_blocks.append( |
- self.markdown.htmlStash.store(block.strip()) |
- ) |
- continue |
- |
- else: |
- new_blocks.append(block) |
- |
- else: |
- items.append(block) |
- |
- right_tag, data_index = self._get_right_tag(left_tag, 0, block) |
- |
- if self._equal_tags(left_tag, right_tag): |
- # if find closing tag |
- |
- if data_index < len(block): |
- # we have more text after right_tag |
- items[-1] = block[:data_index] |
- text.insert(0, block[data_index:]) |
- |
- in_tag = False |
- if self.markdown_in_raw and 'markdown' in attrs.keys(): |
- items[0] = items[0][left_index:] |
- items[-1] = items[-1][:-len(right_tag) - 2] |
- if items[len(items) - 1]: # not a newline/empty string |
- right_index = len(items) + 3 |
- else: |
- right_index = len(items) + 2 |
- new_blocks.append(self.markdown.htmlStash.store_tag( |
- left_tag, attrs, 0, right_index)) |
- placeholderslen = len(self.markdown.htmlStash.tag_data) |
- new_blocks.extend( |
- self._nested_markdown_in_html(items)) |
- nests = len(self.markdown.htmlStash.tag_data) - \ |
- placeholderslen |
- self.markdown.htmlStash.tag_data[-1 - nests][ |
- 'right_index'] += nests - 2 |
- else: |
- new_blocks.append( |
- self.markdown.htmlStash.store('\n\n'.join(items))) |
- items = [] |
- |
- if items: |
- if self.markdown_in_raw and 'markdown' in attrs.keys(): |
- items[0] = items[0][left_index:] |
- items[-1] = items[-1][:-len(right_tag) - 2] |
- if items[len(items) - 1]: # not a newline/empty string |
- right_index = len(items) + 3 |
- else: |
- right_index = len(items) + 2 |
- new_blocks.append( |
- self.markdown.htmlStash.store_tag( |
- left_tag, attrs, 0, right_index)) |
- placeholderslen = len(self.markdown.htmlStash.tag_data) |
- new_blocks.extend(self._nested_markdown_in_html(items)) |
- nests = len(self.markdown.htmlStash.tag_data) - placeholderslen |
- self.markdown.htmlStash.tag_data[-1 - nests][ |
- 'right_index'] += nests - 2 |
- else: |
- new_blocks.append( |
- self.markdown.htmlStash.store('\n\n'.join(items))) |
- new_blocks.append('\n') |
- |
- new_text = "\n\n".join(new_blocks) |
- return new_text.split("\n") |
- |
- |
-class ReferencePreprocessor(Preprocessor): |
- """ Remove reference definitions from text and store for later use. """ |
- |
- TITLE = r'[ ]*(\"(.*)\"|\'(.*)\'|\((.*)\))[ ]*' |
- RE = re.compile( |
- r'^[ ]{0,3}\[([^\]]*)\]:\s*([^ ]*)[ ]*(%s)?$' % TITLE, re.DOTALL |
- ) |
- TITLE_RE = re.compile(r'^%s$' % TITLE) |
- |
- def run(self, lines): |
- new_text = [] |
- while lines: |
- line = lines.pop(0) |
- m = self.RE.match(line) |
- if m: |
- id = m.group(1).strip().lower() |
- link = m.group(2).lstrip('<').rstrip('>') |
- t = m.group(5) or m.group(6) or m.group(7) |
- if not t: |
- # Check next line for title |
- tm = self.TITLE_RE.match(lines[0]) |
- if tm: |
- lines.pop(0) |
- t = tm.group(2) or tm.group(3) or tm.group(4) |
- self.markdown.references[id] = (link, t) |
- else: |
- new_text.append(line) |
- |
- return new_text # + "\n" |