Index: third_party/markdown/treeprocessors.py |
diff --git a/third_party/markdown/treeprocessors.py b/third_party/markdown/treeprocessors.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..109358beb703bc244f6adb7eaf6876317c97d656 |
--- /dev/null |
+++ b/third_party/markdown/treeprocessors.py |
@@ -0,0 +1,392 @@ |
+# markdown is released under the BSD license |
+# Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later) |
+# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) |
+# Copyright 2004 Manfred Stienstra (the original version) |
+# |
+# All rights reserved. |
+# |
+# Redistribution and use in source and binary forms, with or without |
+# modification, are permitted provided that the following conditions are met: |
+# |
+# * Redistributions of source code must retain the above copyright |
+# notice, this list of conditions and the following disclaimer. |
+# * Redistributions in binary form must reproduce the above copyright |
+# notice, this list of conditions and the following disclaimer in the |
+# documentation and/or other materials provided with the distribution. |
+# * Neither the name of the <organization> nor the |
+# names of its contributors may be used to endorse or promote products |
+# derived from this software without specific prior written permission. |
+# |
+# THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY |
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
+# DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT |
+# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
+# POSSIBILITY OF SUCH DAMAGE. |
+ |
+ |
+from __future__ import unicode_literals |
+from __future__ import absolute_import |
+from . import util |
+from . import odict |
+from . import inlinepatterns |
+ |
+ |
+def build_treeprocessors(md_instance, **kwargs): |
+ """ Build the default treeprocessors for Markdown. """ |
+ treeprocessors = odict.OrderedDict() |
+ treeprocessors["inline"] = InlineProcessor(md_instance) |
+ treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance) |
+ return treeprocessors |
+ |
+ |
+def isString(s): |
+ """ Check if it's string """ |
+ if not isinstance(s, util.AtomicString): |
+ return isinstance(s, util.string_type) |
+ return False |
+ |
+ |
+class Treeprocessor(util.Processor): |
+ """ |
+ Treeprocessors are run on the ElementTree object before serialization. |
+ |
+ Each Treeprocessor implements a "run" method that takes a pointer to an |
+ ElementTree, modifies it as necessary and returns an ElementTree |
+ object. |
+ |
+ Treeprocessors must extend markdown.Treeprocessor. |
+ |
+ """ |
+ def run(self, root): |
+ """ |
+ Subclasses of Treeprocessor should implement a `run` method, which |
+ takes a root ElementTree. This method can return another ElementTree |
+ object, and the existing root ElementTree will be replaced, or it can |
+ modify the current tree and return None. |
+ """ |
+ pass |
+ |
+ |
+class InlineProcessor(Treeprocessor): |
+ """ |
+ A Treeprocessor that traverses a tree, applying inline patterns. |
+ """ |
+ |
+ def __init__(self, md): |
+ self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX |
+ self.__placeholder_suffix = util.ETX |
+ self.__placeholder_length = 4 + len(self.__placeholder_prefix) \ |
+ + len(self.__placeholder_suffix) |
+ self.__placeholder_re = util.INLINE_PLACEHOLDER_RE |
+ self.markdown = md |
+ |
+ def __makePlaceholder(self, type): |
+ """ Generate a placeholder """ |
+ id = "%04d" % len(self.stashed_nodes) |
+ hash = util.INLINE_PLACEHOLDER % id |
+ return hash, id |
+ |
+ def __findPlaceholder(self, data, index): |
+ """ |
+ Extract id from data string, start from index |
+ |
+ Keyword arguments: |
+ |
+ * data: string |
+ * index: index, from which we start search |
+ |
+ Returns: placeholder id and string index, after the found placeholder. |
+ |
+ """ |
+ m = self.__placeholder_re.search(data, index) |
+ if m: |
+ return m.group(1), m.end() |
+ else: |
+ return None, index + 1 |
+ |
+ def __stashNode(self, node, type): |
+ """ Add node to stash """ |
+ placeholder, id = self.__makePlaceholder(type) |
+ self.stashed_nodes[id] = node |
+ return placeholder |
+ |
+ def __handleInline(self, data, patternIndex=0): |
+ """ |
+ Process string with inline patterns and replace it |
+ with placeholders |
+ |
+ Keyword arguments: |
+ |
+ * data: A line of Markdown text |
+ * patternIndex: The index of the inlinePattern to start with |
+ |
+ Returns: String with placeholders. |
+ |
+ """ |
+ if not isinstance(data, util.AtomicString): |
+ startIndex = 0 |
+ while patternIndex < len(self.markdown.inlinePatterns): |
+ data, matched, startIndex = self.__applyPattern( |
+ self.markdown.inlinePatterns.value_for_index(patternIndex), |
+ data, patternIndex, startIndex) |
+ if not matched: |
+ patternIndex += 1 |
+ return data |
+ |
+ def __processElementText(self, node, subnode, isText=True): |
+ """ |
+ Process placeholders in Element.text or Element.tail |
+ of Elements popped from self.stashed_nodes. |
+ |
+ Keywords arguments: |
+ |
+ * node: parent node |
+ * subnode: processing node |
+ * isText: bool variable, True - it's text, False - it's tail |
+ |
+ Returns: None |
+ |
+ """ |
+ if isText: |
+ text = subnode.text |
+ subnode.text = None |
+ else: |
+ text = subnode.tail |
+ subnode.tail = None |
+ |
+ childResult = self.__processPlaceholders(text, subnode) |
+ |
+ if not isText and node is not subnode: |
+ pos = node.getchildren().index(subnode) |
+ node.remove(subnode) |
+ else: |
+ pos = 0 |
+ |
+ childResult.reverse() |
+ for newChild in childResult: |
+ node.insert(pos, newChild) |
+ |
+ def __processPlaceholders(self, data, parent): |
+ """ |
+ Process string with placeholders and generate ElementTree tree. |
+ |
+ Keyword arguments: |
+ |
+ * data: string with placeholders instead of ElementTree elements. |
+ * parent: Element, which contains processing inline data |
+ |
+ Returns: list with ElementTree elements with applied inline patterns. |
+ |
+ """ |
+ def linkText(text): |
+ if text: |
+ if result: |
+ if result[-1].tail: |
+ result[-1].tail += text |
+ else: |
+ result[-1].tail = text |
+ else: |
+ if parent.text: |
+ parent.text += text |
+ else: |
+ parent.text = text |
+ result = [] |
+ strartIndex = 0 |
+ while data: |
+ index = data.find(self.__placeholder_prefix, strartIndex) |
+ if index != -1: |
+ id, phEndIndex = self.__findPlaceholder(data, index) |
+ |
+ if id in self.stashed_nodes: |
+ node = self.stashed_nodes.get(id) |
+ |
+ if index > 0: |
+ text = data[strartIndex:index] |
+ linkText(text) |
+ |
+ if not isString(node): # it's Element |
+ for child in [node] + node.getchildren(): |
+ if child.tail: |
+ if child.tail.strip(): |
+ self.__processElementText(node, child,False) |
+ if child.text: |
+ if child.text.strip(): |
+ self.__processElementText(child, child) |
+ else: # it's just a string |
+ linkText(node) |
+ strartIndex = phEndIndex |
+ continue |
+ |
+ strartIndex = phEndIndex |
+ result.append(node) |
+ |
+ else: # wrong placeholder |
+ end = index + len(self.__placeholder_prefix) |
+ linkText(data[strartIndex:end]) |
+ strartIndex = end |
+ else: |
+ text = data[strartIndex:] |
+ if isinstance(data, util.AtomicString): |
+ # We don't want to loose the AtomicString |
+ text = util.AtomicString(text) |
+ linkText(text) |
+ data = "" |
+ |
+ return result |
+ |
+ def __applyPattern(self, pattern, data, patternIndex, startIndex=0): |
+ """ |
+ Check if the line fits the pattern, create the necessary |
+ elements, add it to stashed_nodes. |
+ |
+ Keyword arguments: |
+ |
+ * data: the text to be processed |
+ * pattern: the pattern to be checked |
+ * patternIndex: index of current pattern |
+ * startIndex: string index, from which we start searching |
+ |
+ Returns: String with placeholders instead of ElementTree elements. |
+ |
+ """ |
+ match = pattern.getCompiledRegExp().match(data[startIndex:]) |
+ leftData = data[:startIndex] |
+ |
+ if not match: |
+ return data, False, 0 |
+ |
+ node = pattern.handleMatch(match) |
+ |
+ if node is None: |
+ return data, True, len(leftData)+match.span(len(match.groups()))[0] |
+ |
+ if not isString(node): |
+ if not isinstance(node.text, util.AtomicString): |
+ # We need to process current node too |
+ for child in [node] + node.getchildren(): |
+ if not isString(node): |
+ if child.text: |
+ child.text = self.__handleInline(child.text, |
+ patternIndex + 1) |
+ if child.tail: |
+ child.tail = self.__handleInline(child.tail, |
+ patternIndex) |
+ |
+ placeholder = self.__stashNode(node, pattern.type()) |
+ |
+ return "%s%s%s%s" % (leftData, |
+ match.group(1), |
+ placeholder, match.groups()[-1]), True, 0 |
+ |
+ def run(self, tree): |
+ """Apply inline patterns to a parsed Markdown tree. |
+ |
+ Iterate over ElementTree, find elements with inline tag, apply inline |
+ patterns and append newly created Elements to tree. If you don't |
+ want to process your data with inline paterns, instead of normal string, |
+ use subclass AtomicString: |
+ |
+ node.text = markdown.AtomicString("This will not be processed.") |
+ |
+ Arguments: |
+ |
+ * tree: ElementTree object, representing Markdown tree. |
+ |
+ Returns: ElementTree object with applied inline patterns. |
+ |
+ """ |
+ self.stashed_nodes = {} |
+ |
+ stack = [tree] |
+ |
+ while stack: |
+ currElement = stack.pop() |
+ insertQueue = [] |
+ for child in currElement.getchildren(): |
+ if child.text and not isinstance(child.text, util.AtomicString): |
+ text = child.text |
+ child.text = None |
+ lst = self.__processPlaceholders(self.__handleInline( |
+ text), child) |
+ stack += lst |
+ insertQueue.append((child, lst)) |
+ if child.tail: |
+ tail = self.__handleInline(child.tail) |
+ dumby = util.etree.Element('d') |
+ tailResult = self.__processPlaceholders(tail, dumby) |
+ if dumby.text: |
+ child.tail = dumby.text |
+ else: |
+ child.tail = None |
+ pos = currElement.getchildren().index(child) + 1 |
+ tailResult.reverse() |
+ for newChild in tailResult: |
+ currElement.insert(pos, newChild) |
+ if child.getchildren(): |
+ stack.append(child) |
+ |
+ for element, lst in insertQueue: |
+ if self.markdown.enable_attributes: |
+ if element.text and isString(element.text): |
+ element.text = \ |
+ inlinepatterns.handleAttributes(element.text, |
+ element) |
+ i = 0 |
+ for newChild in lst: |
+ if self.markdown.enable_attributes: |
+ # Processing attributes |
+ if newChild.tail and isString(newChild.tail): |
+ newChild.tail = \ |
+ inlinepatterns.handleAttributes(newChild.tail, |
+ element) |
+ if newChild.text and isString(newChild.text): |
+ newChild.text = \ |
+ inlinepatterns.handleAttributes(newChild.text, |
+ newChild) |
+ element.insert(i, newChild) |
+ i += 1 |
+ return tree |
+ |
+ |
+class PrettifyTreeprocessor(Treeprocessor): |
+ """ Add linebreaks to the html document. """ |
+ |
+ def _prettifyETree(self, elem): |
+ """ Recursively add linebreaks to ElementTree children. """ |
+ |
+ i = "\n" |
+ if util.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']: |
+ if (not elem.text or not elem.text.strip()) \ |
+ and len(elem) and util.isBlockLevel(elem[0].tag): |
+ elem.text = i |
+ for e in elem: |
+ if util.isBlockLevel(e.tag): |
+ self._prettifyETree(e) |
+ if not elem.tail or not elem.tail.strip(): |
+ elem.tail = i |
+ if not elem.tail or not elem.tail.strip(): |
+ elem.tail = i |
+ |
+ def run(self, root): |
+ """ Add linebreaks to ElementTree root object. """ |
+ |
+ self._prettifyETree(root) |
+ # Do <br />'s seperately as they are often in the middle of |
+ # inline content and missed by _prettifyETree. |
+ brs = root.getiterator('br') |
+ for br in brs: |
+ if not br.tail or not br.tail.strip(): |
+ br.tail = '\n' |
+ else: |
+ br.tail = '\n%s' % br.tail |
+ # Clean up extra empty lines at end of code blocks. |
+ pres = root.getiterator('pre') |
+ for pre in pres: |
+ if len(pre) and pre[0].tag == 'code': |
+ pre[0].text = pre[0].text.rstrip() + '\n' |