| Index: third_party/markdown/treeprocessors.py
|
| diff --git a/third_party/markdown/treeprocessors.py b/third_party/markdown/treeprocessors.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..e6d3dc93816c7f7e82391a20aaf1feba9bc62795
|
| --- /dev/null
|
| +++ b/third_party/markdown/treeprocessors.py
|
| @@ -0,0 +1,360 @@
|
| +from __future__ import unicode_literals
|
| +from __future__ import absolute_import
|
| +from . import util
|
| +from . import odict
|
| +from . import inlinepatterns
|
| +
|
| +
|
| +def build_treeprocessors(md_instance, **kwargs):
|
| + """ Build the default treeprocessors for Markdown. """
|
| + treeprocessors = odict.OrderedDict()
|
| + treeprocessors["inline"] = InlineProcessor(md_instance)
|
| + treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance)
|
| + return treeprocessors
|
| +
|
| +
|
| +def isString(s):
|
| + """ Check if it's string """
|
| + if not isinstance(s, util.AtomicString):
|
| + return isinstance(s, util.string_type)
|
| + return False
|
| +
|
| +
|
| +class Treeprocessor(util.Processor):
|
| + """
|
| + Treeprocessors are run on the ElementTree object before serialization.
|
| +
|
| + Each Treeprocessor implements a "run" method that takes a pointer to an
|
| + ElementTree, modifies it as necessary and returns an ElementTree
|
| + object.
|
| +
|
| + Treeprocessors must extend markdown.Treeprocessor.
|
| +
|
| + """
|
| + def run(self, root):
|
| + """
|
| + Subclasses of Treeprocessor should implement a `run` method, which
|
| + takes a root ElementTree. This method can return another ElementTree
|
| + object, and the existing root ElementTree will be replaced, or it can
|
| + modify the current tree and return None.
|
| + """
|
| + pass
|
| +
|
| +
|
| +class InlineProcessor(Treeprocessor):
|
| + """
|
| + A Treeprocessor that traverses a tree, applying inline patterns.
|
| + """
|
| +
|
| + def __init__(self, md):
|
| + self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX
|
| + self.__placeholder_suffix = util.ETX
|
| + self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
|
| + + len(self.__placeholder_suffix)
|
| + self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
|
| + self.markdown = md
|
| +
|
| + def __makePlaceholder(self, type):
|
| + """ Generate a placeholder """
|
| + id = "%04d" % len(self.stashed_nodes)
|
| + hash = util.INLINE_PLACEHOLDER % id
|
| + return hash, id
|
| +
|
| + def __findPlaceholder(self, data, index):
|
| + """
|
| + Extract id from data string, start from index
|
| +
|
| + Keyword arguments:
|
| +
|
| + * data: string
|
| + * index: index, from which we start search
|
| +
|
| + Returns: placeholder id and string index, after the found placeholder.
|
| +
|
| + """
|
| + m = self.__placeholder_re.search(data, index)
|
| + if m:
|
| + return m.group(1), m.end()
|
| + else:
|
| + return None, index + 1
|
| +
|
| + def __stashNode(self, node, type):
|
| + """ Add node to stash """
|
| + placeholder, id = self.__makePlaceholder(type)
|
| + self.stashed_nodes[id] = node
|
| + return placeholder
|
| +
|
| + def __handleInline(self, data, patternIndex=0):
|
| + """
|
| + Process string with inline patterns and replace it
|
| + with placeholders
|
| +
|
| + Keyword arguments:
|
| +
|
| + * data: A line of Markdown text
|
| + * patternIndex: The index of the inlinePattern to start with
|
| +
|
| + Returns: String with placeholders.
|
| +
|
| + """
|
| + if not isinstance(data, util.AtomicString):
|
| + startIndex = 0
|
| + while patternIndex < len(self.markdown.inlinePatterns):
|
| + data, matched, startIndex = self.__applyPattern(
|
| + self.markdown.inlinePatterns.value_for_index(patternIndex),
|
| + data, patternIndex, startIndex)
|
| + if not matched:
|
| + patternIndex += 1
|
| + return data
|
| +
|
| + def __processElementText(self, node, subnode, isText=True):
|
| + """
|
| + Process placeholders in Element.text or Element.tail
|
| + of Elements popped from self.stashed_nodes.
|
| +
|
| + Keywords arguments:
|
| +
|
| + * node: parent node
|
| + * subnode: processing node
|
| + * isText: bool variable, True - it's text, False - it's tail
|
| +
|
| + Returns: None
|
| +
|
| + """
|
| + if isText:
|
| + text = subnode.text
|
| + subnode.text = None
|
| + else:
|
| + text = subnode.tail
|
| + subnode.tail = None
|
| +
|
| + childResult = self.__processPlaceholders(text, subnode)
|
| +
|
| + if not isText and node is not subnode:
|
| + pos = node.getchildren().index(subnode)
|
| + node.remove(subnode)
|
| + else:
|
| + pos = 0
|
| +
|
| + childResult.reverse()
|
| + for newChild in childResult:
|
| + node.insert(pos, newChild)
|
| +
|
| + def __processPlaceholders(self, data, parent):
|
| + """
|
| + Process string with placeholders and generate ElementTree tree.
|
| +
|
| + Keyword arguments:
|
| +
|
| + * data: string with placeholders instead of ElementTree elements.
|
| + * parent: Element, which contains processing inline data
|
| +
|
| + Returns: list with ElementTree elements with applied inline patterns.
|
| +
|
| + """
|
| + def linkText(text):
|
| + if text:
|
| + if result:
|
| + if result[-1].tail:
|
| + result[-1].tail += text
|
| + else:
|
| + result[-1].tail = text
|
| + else:
|
| + if parent.text:
|
| + parent.text += text
|
| + else:
|
| + parent.text = text
|
| + result = []
|
| + strartIndex = 0
|
| + while data:
|
| + index = data.find(self.__placeholder_prefix, strartIndex)
|
| + if index != -1:
|
| + id, phEndIndex = self.__findPlaceholder(data, index)
|
| +
|
| + if id in self.stashed_nodes:
|
| + node = self.stashed_nodes.get(id)
|
| +
|
| + if index > 0:
|
| + text = data[strartIndex:index]
|
| + linkText(text)
|
| +
|
| + if not isString(node): # it's Element
|
| + for child in [node] + node.getchildren():
|
| + if child.tail:
|
| + if child.tail.strip():
|
| + self.__processElementText(node, child,False)
|
| + if child.text:
|
| + if child.text.strip():
|
| + self.__processElementText(child, child)
|
| + else: # it's just a string
|
| + linkText(node)
|
| + strartIndex = phEndIndex
|
| + continue
|
| +
|
| + strartIndex = phEndIndex
|
| + result.append(node)
|
| +
|
| + else: # wrong placeholder
|
| + end = index + len(self.__placeholder_prefix)
|
| + linkText(data[strartIndex:end])
|
| + strartIndex = end
|
| + else:
|
| + text = data[strartIndex:]
|
| + if isinstance(data, util.AtomicString):
|
| + # We don't want to loose the AtomicString
|
| + text = util.AtomicString(text)
|
| + linkText(text)
|
| + data = ""
|
| +
|
| + return result
|
| +
|
| + def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
|
| + """
|
| + Check if the line fits the pattern, create the necessary
|
| + elements, add it to stashed_nodes.
|
| +
|
| + Keyword arguments:
|
| +
|
| + * data: the text to be processed
|
| + * pattern: the pattern to be checked
|
| + * patternIndex: index of current pattern
|
| + * startIndex: string index, from which we start searching
|
| +
|
| + Returns: String with placeholders instead of ElementTree elements.
|
| +
|
| + """
|
| + match = pattern.getCompiledRegExp().match(data[startIndex:])
|
| + leftData = data[:startIndex]
|
| +
|
| + if not match:
|
| + return data, False, 0
|
| +
|
| + node = pattern.handleMatch(match)
|
| +
|
| + if node is None:
|
| + return data, True, len(leftData)+match.span(len(match.groups()))[0]
|
| +
|
| + if not isString(node):
|
| + if not isinstance(node.text, util.AtomicString):
|
| + # We need to process current node too
|
| + for child in [node] + node.getchildren():
|
| + if not isString(node):
|
| + if child.text:
|
| + child.text = self.__handleInline(child.text,
|
| + patternIndex + 1)
|
| + if child.tail:
|
| + child.tail = self.__handleInline(child.tail,
|
| + patternIndex)
|
| +
|
| + placeholder = self.__stashNode(node, pattern.type())
|
| +
|
| + return "%s%s%s%s" % (leftData,
|
| + match.group(1),
|
| + placeholder, match.groups()[-1]), True, 0
|
| +
|
| + def run(self, tree):
|
| + """Apply inline patterns to a parsed Markdown tree.
|
| +
|
| + Iterate over ElementTree, find elements with inline tag, apply inline
|
| + patterns and append newly created Elements to tree. If you don't
|
| + want to process your data with inline paterns, instead of normal string,
|
| + use subclass AtomicString:
|
| +
|
| + node.text = markdown.AtomicString("This will not be processed.")
|
| +
|
| + Arguments:
|
| +
|
| + * tree: ElementTree object, representing Markdown tree.
|
| +
|
| + Returns: ElementTree object with applied inline patterns.
|
| +
|
| + """
|
| + self.stashed_nodes = {}
|
| +
|
| + stack = [tree]
|
| +
|
| + while stack:
|
| + currElement = stack.pop()
|
| + insertQueue = []
|
| + for child in currElement.getchildren():
|
| + if child.text and not isinstance(child.text, util.AtomicString):
|
| + text = child.text
|
| + child.text = None
|
| + lst = self.__processPlaceholders(self.__handleInline(
|
| + text), child)
|
| + stack += lst
|
| + insertQueue.append((child, lst))
|
| + if child.tail:
|
| + tail = self.__handleInline(child.tail)
|
| + dumby = util.etree.Element('d')
|
| + tailResult = self.__processPlaceholders(tail, dumby)
|
| + if dumby.text:
|
| + child.tail = dumby.text
|
| + else:
|
| + child.tail = None
|
| + pos = currElement.getchildren().index(child) + 1
|
| + tailResult.reverse()
|
| + for newChild in tailResult:
|
| + currElement.insert(pos, newChild)
|
| + if child.getchildren():
|
| + stack.append(child)
|
| +
|
| + for element, lst in insertQueue:
|
| + if self.markdown.enable_attributes:
|
| + if element.text and isString(element.text):
|
| + element.text = \
|
| + inlinepatterns.handleAttributes(element.text,
|
| + element)
|
| + i = 0
|
| + for newChild in lst:
|
| + if self.markdown.enable_attributes:
|
| + # Processing attributes
|
| + if newChild.tail and isString(newChild.tail):
|
| + newChild.tail = \
|
| + inlinepatterns.handleAttributes(newChild.tail,
|
| + element)
|
| + if newChild.text and isString(newChild.text):
|
| + newChild.text = \
|
| + inlinepatterns.handleAttributes(newChild.text,
|
| + newChild)
|
| + element.insert(i, newChild)
|
| + i += 1
|
| + return tree
|
| +
|
| +
|
| +class PrettifyTreeprocessor(Treeprocessor):
|
| + """ Add linebreaks to the html document. """
|
| +
|
| + def _prettifyETree(self, elem):
|
| + """ Recursively add linebreaks to ElementTree children. """
|
| +
|
| + i = "\n"
|
| + if util.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
|
| + if (not elem.text or not elem.text.strip()) \
|
| + and len(elem) and util.isBlockLevel(elem[0].tag):
|
| + elem.text = i
|
| + for e in elem:
|
| + if util.isBlockLevel(e.tag):
|
| + self._prettifyETree(e)
|
| + if not elem.tail or not elem.tail.strip():
|
| + elem.tail = i
|
| + if not elem.tail or not elem.tail.strip():
|
| + elem.tail = i
|
| +
|
| + def run(self, root):
|
| + """ Add linebreaks to ElementTree root object. """
|
| +
|
| + self._prettifyETree(root)
|
| + # Do <br />'s seperately as they are often in the middle of
|
| + # inline content and missed by _prettifyETree.
|
| + brs = root.getiterator('br')
|
| + for br in brs:
|
| + if not br.tail or not br.tail.strip():
|
| + br.tail = '\n'
|
| + else:
|
| + br.tail = '\n%s' % br.tail
|
| + # Clean up extra empty lines at end of code blocks.
|
| + pres = root.getiterator('pre')
|
| + for pre in pres:
|
| + if len(pre) and pre[0].tag == 'code':
|
| + pre[0].text = pre[0].text.rstrip() + '\n'
|
|
|