Index: third_party/Python-Markdown/markdown/treeprocessors.py |
diff --git a/third_party/Python-Markdown/markdown/treeprocessors.py b/third_party/Python-Markdown/markdown/treeprocessors.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..d06f192885f18ea3154025cb7b2c89841500f446 |
--- /dev/null |
+++ b/third_party/Python-Markdown/markdown/treeprocessors.py |
@@ -0,0 +1,371 @@ |
+from __future__ import unicode_literals |
+from __future__ import absolute_import |
+from . import util |
+from . import odict |
+from . import inlinepatterns |
+ |
+ |
+def build_treeprocessors(md_instance, **kwargs): |
+ """ Build the default treeprocessors for Markdown. """ |
+ treeprocessors = odict.OrderedDict() |
+ treeprocessors["inline"] = InlineProcessor(md_instance) |
+ treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance) |
+ return treeprocessors |
+ |
+ |
+def isString(s): |
+ """ Check if it's string """ |
+ if not isinstance(s, util.AtomicString): |
+ return isinstance(s, util.string_type) |
+ return False |
+ |
+ |
+class Treeprocessor(util.Processor): |
+ """ |
+ Treeprocessors are run on the ElementTree object before serialization. |
+ |
+ Each Treeprocessor implements a "run" method that takes a pointer to an |
+ ElementTree, modifies it as necessary and returns an ElementTree |
+ object. |
+ |
+ Treeprocessors must extend markdown.Treeprocessor. |
+ |
+ """ |
+ def run(self, root): |
+ """ |
+ Subclasses of Treeprocessor should implement a `run` method, which |
+ takes a root ElementTree. This method can return another ElementTree |
+ object, and the existing root ElementTree will be replaced, or it can |
+ modify the current tree and return None. |
+ """ |
+ pass # pragma: no cover |
+ |
+ |
+class InlineProcessor(Treeprocessor): |
+ """ |
+ A Treeprocessor that traverses a tree, applying inline patterns. |
+ """ |
+ |
+ def __init__(self, md): |
+ self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX |
+ self.__placeholder_suffix = util.ETX |
+ self.__placeholder_length = 4 + len(self.__placeholder_prefix) \ |
+ + len(self.__placeholder_suffix) |
+ self.__placeholder_re = util.INLINE_PLACEHOLDER_RE |
+ self.markdown = md |
+ self.inlinePatterns = md.inlinePatterns |
+ |
+ def __makePlaceholder(self, type): |
+ """ Generate a placeholder """ |
+ id = "%04d" % len(self.stashed_nodes) |
+ hash = util.INLINE_PLACEHOLDER % id |
+ return hash, id |
+ |
+ def __findPlaceholder(self, data, index): |
+ """ |
+ Extract id from data string, start from index |
+ |
+ Keyword arguments: |
+ |
+ * data: string |
+ * index: index, from which we start search |
+ |
+ Returns: placeholder id and string index, after the found placeholder. |
+ |
+ """ |
+ m = self.__placeholder_re.search(data, index) |
+ if m: |
+ return m.group(1), m.end() |
+ else: |
+ return None, index + 1 |
+ |
+ def __stashNode(self, node, type): |
+ """ Add node to stash """ |
+ placeholder, id = self.__makePlaceholder(type) |
+ self.stashed_nodes[id] = node |
+ return placeholder |
+ |
+ def __handleInline(self, data, patternIndex=0): |
+ """ |
+ Process string with inline patterns and replace it |
+ with placeholders |
+ |
+ Keyword arguments: |
+ |
+ * data: A line of Markdown text |
+ * patternIndex: The index of the inlinePattern to start with |
+ |
+ Returns: String with placeholders. |
+ |
+ """ |
+ if not isinstance(data, util.AtomicString): |
+ startIndex = 0 |
+ while patternIndex < len(self.inlinePatterns): |
+ data, matched, startIndex = self.__applyPattern( |
+ self.inlinePatterns.value_for_index(patternIndex), |
+ data, patternIndex, startIndex) |
+ if not matched: |
+ patternIndex += 1 |
+ return data |
+ |
+ def __processElementText(self, node, subnode, isText=True): |
+ """ |
+ Process placeholders in Element.text or Element.tail |
+ of Elements popped from self.stashed_nodes. |
+ |
+ Keywords arguments: |
+ |
+ * node: parent node |
+ * subnode: processing node |
+ * isText: bool variable, True - it's text, False - it's tail |
+ |
+ Returns: None |
+ |
+ """ |
+ if isText: |
+ text = subnode.text |
+ subnode.text = None |
+ else: |
+ text = subnode.tail |
+ subnode.tail = None |
+ |
+ childResult = self.__processPlaceholders(text, subnode, isText) |
+ |
+ if not isText and node is not subnode: |
+ pos = list(node).index(subnode) + 1 |
+ else: |
+ pos = 0 |
+ |
+ childResult.reverse() |
+ for newChild in childResult: |
+ node.insert(pos, newChild) |
+ |
+ def __processPlaceholders(self, data, parent, isText=True): |
+ """ |
+ Process string with placeholders and generate ElementTree tree. |
+ |
+ Keyword arguments: |
+ |
+ * data: string with placeholders instead of ElementTree elements. |
+ * parent: Element, which contains processing inline data |
+ |
+ Returns: list with ElementTree elements with applied inline patterns. |
+ |
+ """ |
+ def linkText(text): |
+ if text: |
+ if result: |
+ if result[-1].tail: |
+ result[-1].tail += text |
+ else: |
+ result[-1].tail = text |
+ elif not isText: |
+ if parent.tail: |
+ parent.tail += text |
+ else: |
+ parent.tail = text |
+ else: |
+ if parent.text: |
+ parent.text += text |
+ else: |
+ parent.text = text |
+ result = [] |
+ strartIndex = 0 |
+ while data: |
+ index = data.find(self.__placeholder_prefix, strartIndex) |
+ if index != -1: |
+ id, phEndIndex = self.__findPlaceholder(data, index) |
+ |
+ if id in self.stashed_nodes: |
+ node = self.stashed_nodes.get(id) |
+ |
+ if index > 0: |
+ text = data[strartIndex:index] |
+ linkText(text) |
+ |
+ if not isString(node): # it's Element |
+ for child in [node] + list(node): |
+ if child.tail: |
+ if child.tail.strip(): |
+ self.__processElementText( |
+ node, child, False |
+ ) |
+ if child.text: |
+ if child.text.strip(): |
+ self.__processElementText(child, child) |
+ else: # it's just a string |
+ linkText(node) |
+ strartIndex = phEndIndex |
+ continue |
+ |
+ strartIndex = phEndIndex |
+ result.append(node) |
+ |
+ else: # wrong placeholder |
+ end = index + len(self.__placeholder_prefix) |
+ linkText(data[strartIndex:end]) |
+ strartIndex = end |
+ else: |
+ text = data[strartIndex:] |
+ if isinstance(data, util.AtomicString): |
+ # We don't want to loose the AtomicString |
+ text = util.AtomicString(text) |
+ linkText(text) |
+ data = "" |
+ |
+ return result |
+ |
+ def __applyPattern(self, pattern, data, patternIndex, startIndex=0): |
+ """ |
+ Check if the line fits the pattern, create the necessary |
+ elements, add it to stashed_nodes. |
+ |
+ Keyword arguments: |
+ |
+ * data: the text to be processed |
+ * pattern: the pattern to be checked |
+ * patternIndex: index of current pattern |
+ * startIndex: string index, from which we start searching |
+ |
+ Returns: String with placeholders instead of ElementTree elements. |
+ |
+ """ |
+ match = pattern.getCompiledRegExp().match(data[startIndex:]) |
+ leftData = data[:startIndex] |
+ |
+ if not match: |
+ return data, False, 0 |
+ |
+ node = pattern.handleMatch(match) |
+ |
+ if node is None: |
+ return data, True, len(leftData)+match.span(len(match.groups()))[0] |
+ |
+ if not isString(node): |
+ if not isinstance(node.text, util.AtomicString): |
+ # We need to process current node too |
+ for child in [node] + list(node): |
+ if not isString(node): |
+ if child.text: |
+ child.text = self.__handleInline( |
+ child.text, patternIndex + 1 |
+ ) |
+ if child.tail: |
+ child.tail = self.__handleInline( |
+ child.tail, patternIndex |
+ ) |
+ |
+ placeholder = self.__stashNode(node, pattern.type()) |
+ |
+ return "%s%s%s%s" % (leftData, |
+ match.group(1), |
+ placeholder, match.groups()[-1]), True, 0 |
+ |
+ def run(self, tree): |
+ """Apply inline patterns to a parsed Markdown tree. |
+ |
+ Iterate over ElementTree, find elements with inline tag, apply inline |
+ patterns and append newly created Elements to tree. If you don't |
+ want to process your data with inline paterns, instead of normal |
+ string, use subclass AtomicString: |
+ |
+ node.text = markdown.AtomicString("This will not be processed.") |
+ |
+ Arguments: |
+ |
+ * tree: ElementTree object, representing Markdown tree. |
+ |
+ Returns: ElementTree object with applied inline patterns. |
+ |
+ """ |
+ self.stashed_nodes = {} |
+ |
+ stack = [tree] |
+ |
+ while stack: |
+ currElement = stack.pop() |
+ insertQueue = [] |
+ for child in currElement: |
+ if child.text and not isinstance( |
+ child.text, util.AtomicString |
+ ): |
+ text = child.text |
+ child.text = None |
+ lst = self.__processPlaceholders( |
+ self.__handleInline(text), child |
+ ) |
+ stack += lst |
+ insertQueue.append((child, lst)) |
+ if child.tail: |
+ tail = self.__handleInline(child.tail) |
+ dumby = util.etree.Element('d') |
+ child.tail = None |
+ tailResult = self.__processPlaceholders(tail, dumby, False) |
+ if dumby.tail: |
+ child.tail = dumby.tail |
+ pos = list(currElement).index(child) + 1 |
+ tailResult.reverse() |
+ for newChild in tailResult: |
+ currElement.insert(pos, newChild) |
+ if len(child): |
+ stack.append(child) |
+ |
+ for element, lst in insertQueue: |
+ if self.markdown.enable_attributes: |
+ if element.text and isString(element.text): |
+ element.text = inlinepatterns.handleAttributes( |
+ element.text, element |
+ ) |
+ i = 0 |
+ for newChild in lst: |
+ if self.markdown.enable_attributes: |
+ # Processing attributes |
+ if newChild.tail and isString(newChild.tail): |
+ newChild.tail = inlinepatterns.handleAttributes( |
+ newChild.tail, element |
+ ) |
+ if newChild.text and isString(newChild.text): |
+ newChild.text = inlinepatterns.handleAttributes( |
+ newChild.text, newChild |
+ ) |
+ element.insert(i, newChild) |
+ i += 1 |
+ return tree |
+ |
+ |
+class PrettifyTreeprocessor(Treeprocessor): |
+ """ Add linebreaks to the html document. """ |
+ |
+ def _prettifyETree(self, elem): |
+ """ Recursively add linebreaks to ElementTree children. """ |
+ |
+ i = "\n" |
+ if util.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']: |
+ if (not elem.text or not elem.text.strip()) \ |
+ and len(elem) and util.isBlockLevel(elem[0].tag): |
+ elem.text = i |
+ for e in elem: |
+ if util.isBlockLevel(e.tag): |
+ self._prettifyETree(e) |
+ if not elem.tail or not elem.tail.strip(): |
+ elem.tail = i |
+ if not elem.tail or not elem.tail.strip(): |
+ elem.tail = i |
+ |
+ def run(self, root): |
+ """ Add linebreaks to ElementTree root object. """ |
+ |
+ self._prettifyETree(root) |
+ # Do <br />'s seperately as they are often in the middle of |
+ # inline content and missed by _prettifyETree. |
+ brs = root.getiterator('br') |
+ for br in brs: |
+ if not br.tail or not br.tail.strip(): |
+ br.tail = '\n' |
+ else: |
+ br.tail = '\n%s' % br.tail |
+ # Clean up extra empty lines at end of code blocks. |
+ pres = root.getiterator('pre') |
+ for pre in pres: |
+ if len(pre) and pre[0].tag == 'code': |
+ pre[0].text = util.AtomicString(pre[0].text.rstrip() + '\n') |