Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(277)

Unified Diff: third_party/markdown/treeprocessors.py

Issue 133433002: Docserver: Support markdown for HTML content. Request thirdparty submission review. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: change the version of app & cron.yaml Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/markdown/serializers.py ('k') | third_party/markdown/util.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/markdown/treeprocessors.py
diff --git a/third_party/markdown/treeprocessors.py b/third_party/markdown/treeprocessors.py
new file mode 100644
index 0000000000000000000000000000000000000000..109358beb703bc244f6adb7eaf6876317c97d656
--- /dev/null
+++ b/third_party/markdown/treeprocessors.py
@@ -0,0 +1,392 @@
+# markdown is released under the BSD license
+# Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
+# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+# Copyright 2004 Manfred Stienstra (the original version)
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of the <organization> nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT
+# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+from __future__ import unicode_literals
+from __future__ import absolute_import
+from . import util
+from . import odict
+from . import inlinepatterns
+
+
+def build_treeprocessors(md_instance, **kwargs):
+ """ Build the default treeprocessors for Markdown. """
+ treeprocessors = odict.OrderedDict()
+ treeprocessors["inline"] = InlineProcessor(md_instance)
+ treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance)
+ return treeprocessors
+
+
+def isString(s):
+ """ Check if it's string """
+ if not isinstance(s, util.AtomicString):
+ return isinstance(s, util.string_type)
+ return False
+
+
+class Treeprocessor(util.Processor):
+ """
+ Treeprocessors are run on the ElementTree object before serialization.
+
+ Each Treeprocessor implements a "run" method that takes a pointer to an
+ ElementTree, modifies it as necessary and returns an ElementTree
+ object.
+
+ Treeprocessors must extend markdown.Treeprocessor.
+
+ """
+ def run(self, root):
+ """
+ Subclasses of Treeprocessor should implement a `run` method, which
+ takes a root ElementTree. This method can return another ElementTree
+ object, and the existing root ElementTree will be replaced, or it can
+ modify the current tree and return None.
+ """
+ pass
+
+
+class InlineProcessor(Treeprocessor):
+ """
+ A Treeprocessor that traverses a tree, applying inline patterns.
+ """
+
+ def __init__(self, md):
+ self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX
+ self.__placeholder_suffix = util.ETX
+ self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
+ + len(self.__placeholder_suffix)
+ self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
+ self.markdown = md
+
+ def __makePlaceholder(self, type):
+ """ Generate a placeholder """
+ id = "%04d" % len(self.stashed_nodes)
+ hash = util.INLINE_PLACEHOLDER % id
+ return hash, id
+
+ def __findPlaceholder(self, data, index):
+ """
+ Extract id from data string, start from index
+
+ Keyword arguments:
+
+ * data: string
+ * index: index, from which we start search
+
+ Returns: placeholder id and string index, after the found placeholder.
+
+ """
+ m = self.__placeholder_re.search(data, index)
+ if m:
+ return m.group(1), m.end()
+ else:
+ return None, index + 1
+
+ def __stashNode(self, node, type):
+ """ Add node to stash """
+ placeholder, id = self.__makePlaceholder(type)
+ self.stashed_nodes[id] = node
+ return placeholder
+
+ def __handleInline(self, data, patternIndex=0):
+ """
+ Process string with inline patterns and replace it
+ with placeholders
+
+ Keyword arguments:
+
+ * data: A line of Markdown text
+ * patternIndex: The index of the inlinePattern to start with
+
+ Returns: String with placeholders.
+
+ """
+ if not isinstance(data, util.AtomicString):
+ startIndex = 0
+ while patternIndex < len(self.markdown.inlinePatterns):
+ data, matched, startIndex = self.__applyPattern(
+ self.markdown.inlinePatterns.value_for_index(patternIndex),
+ data, patternIndex, startIndex)
+ if not matched:
+ patternIndex += 1
+ return data
+
+ def __processElementText(self, node, subnode, isText=True):
+ """
+ Process placeholders in Element.text or Element.tail
+ of Elements popped from self.stashed_nodes.
+
+ Keywords arguments:
+
+ * node: parent node
+ * subnode: processing node
+ * isText: bool variable, True - it's text, False - it's tail
+
+ Returns: None
+
+ """
+ if isText:
+ text = subnode.text
+ subnode.text = None
+ else:
+ text = subnode.tail
+ subnode.tail = None
+
+ childResult = self.__processPlaceholders(text, subnode)
+
+ if not isText and node is not subnode:
+ pos = node.getchildren().index(subnode)
+ node.remove(subnode)
+ else:
+ pos = 0
+
+ childResult.reverse()
+ for newChild in childResult:
+ node.insert(pos, newChild)
+
+ def __processPlaceholders(self, data, parent):
+ """
+ Process string with placeholders and generate ElementTree tree.
+
+ Keyword arguments:
+
+ * data: string with placeholders instead of ElementTree elements.
+ * parent: Element, which contains processing inline data
+
+ Returns: list with ElementTree elements with applied inline patterns.
+
+ """
+ def linkText(text):
+ if text:
+ if result:
+ if result[-1].tail:
+ result[-1].tail += text
+ else:
+ result[-1].tail = text
+ else:
+ if parent.text:
+ parent.text += text
+ else:
+ parent.text = text
+ result = []
+ strartIndex = 0
+ while data:
+ index = data.find(self.__placeholder_prefix, strartIndex)
+ if index != -1:
+ id, phEndIndex = self.__findPlaceholder(data, index)
+
+ if id in self.stashed_nodes:
+ node = self.stashed_nodes.get(id)
+
+ if index > 0:
+ text = data[strartIndex:index]
+ linkText(text)
+
+ if not isString(node): # it's Element
+ for child in [node] + node.getchildren():
+ if child.tail:
+ if child.tail.strip():
+ self.__processElementText(node, child,False)
+ if child.text:
+ if child.text.strip():
+ self.__processElementText(child, child)
+ else: # it's just a string
+ linkText(node)
+ strartIndex = phEndIndex
+ continue
+
+ strartIndex = phEndIndex
+ result.append(node)
+
+ else: # wrong placeholder
+ end = index + len(self.__placeholder_prefix)
+ linkText(data[strartIndex:end])
+ strartIndex = end
+ else:
+ text = data[strartIndex:]
+ if isinstance(data, util.AtomicString):
+ # We don't want to loose the AtomicString
+ text = util.AtomicString(text)
+ linkText(text)
+ data = ""
+
+ return result
+
+ def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
+ """
+ Check if the line fits the pattern, create the necessary
+ elements, add it to stashed_nodes.
+
+ Keyword arguments:
+
+ * data: the text to be processed
+ * pattern: the pattern to be checked
+ * patternIndex: index of current pattern
+ * startIndex: string index, from which we start searching
+
+ Returns: String with placeholders instead of ElementTree elements.
+
+ """
+ match = pattern.getCompiledRegExp().match(data[startIndex:])
+ leftData = data[:startIndex]
+
+ if not match:
+ return data, False, 0
+
+ node = pattern.handleMatch(match)
+
+ if node is None:
+ return data, True, len(leftData)+match.span(len(match.groups()))[0]
+
+ if not isString(node):
+ if not isinstance(node.text, util.AtomicString):
+ # We need to process current node too
+ for child in [node] + node.getchildren():
+ if not isString(node):
+ if child.text:
+ child.text = self.__handleInline(child.text,
+ patternIndex + 1)
+ if child.tail:
+ child.tail = self.__handleInline(child.tail,
+ patternIndex)
+
+ placeholder = self.__stashNode(node, pattern.type())
+
+ return "%s%s%s%s" % (leftData,
+ match.group(1),
+ placeholder, match.groups()[-1]), True, 0
+
+ def run(self, tree):
+ """Apply inline patterns to a parsed Markdown tree.
+
+ Iterate over ElementTree, find elements with inline tag, apply inline
+ patterns and append newly created Elements to tree. If you don't
+ want to process your data with inline paterns, instead of normal string,
+ use subclass AtomicString:
+
+ node.text = markdown.AtomicString("This will not be processed.")
+
+ Arguments:
+
+ * tree: ElementTree object, representing Markdown tree.
+
+ Returns: ElementTree object with applied inline patterns.
+
+ """
+ self.stashed_nodes = {}
+
+ stack = [tree]
+
+ while stack:
+ currElement = stack.pop()
+ insertQueue = []
+ for child in currElement.getchildren():
+ if child.text and not isinstance(child.text, util.AtomicString):
+ text = child.text
+ child.text = None
+ lst = self.__processPlaceholders(self.__handleInline(
+ text), child)
+ stack += lst
+ insertQueue.append((child, lst))
+ if child.tail:
+ tail = self.__handleInline(child.tail)
+ dumby = util.etree.Element('d')
+ tailResult = self.__processPlaceholders(tail, dumby)
+ if dumby.text:
+ child.tail = dumby.text
+ else:
+ child.tail = None
+ pos = currElement.getchildren().index(child) + 1
+ tailResult.reverse()
+ for newChild in tailResult:
+ currElement.insert(pos, newChild)
+ if child.getchildren():
+ stack.append(child)
+
+ for element, lst in insertQueue:
+ if self.markdown.enable_attributes:
+ if element.text and isString(element.text):
+ element.text = \
+ inlinepatterns.handleAttributes(element.text,
+ element)
+ i = 0
+ for newChild in lst:
+ if self.markdown.enable_attributes:
+ # Processing attributes
+ if newChild.tail and isString(newChild.tail):
+ newChild.tail = \
+ inlinepatterns.handleAttributes(newChild.tail,
+ element)
+ if newChild.text and isString(newChild.text):
+ newChild.text = \
+ inlinepatterns.handleAttributes(newChild.text,
+ newChild)
+ element.insert(i, newChild)
+ i += 1
+ return tree
+
+
+class PrettifyTreeprocessor(Treeprocessor):
+ """ Add linebreaks to the html document. """
+
+ def _prettifyETree(self, elem):
+ """ Recursively add linebreaks to ElementTree children. """
+
+ i = "\n"
+ if util.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
+ if (not elem.text or not elem.text.strip()) \
+ and len(elem) and util.isBlockLevel(elem[0].tag):
+ elem.text = i
+ for e in elem:
+ if util.isBlockLevel(e.tag):
+ self._prettifyETree(e)
+ if not elem.tail or not elem.tail.strip():
+ elem.tail = i
+ if not elem.tail or not elem.tail.strip():
+ elem.tail = i
+
+ def run(self, root):
+ """ Add linebreaks to ElementTree root object. """
+
+ self._prettifyETree(root)
+ # Do <br />'s seperately as they are often in the middle of
+ # inline content and missed by _prettifyETree.
+ brs = root.getiterator('br')
+ for br in brs:
+ if not br.tail or not br.tail.strip():
+ br.tail = '\n'
+ else:
+ br.tail = '\n%s' % br.tail
+ # Clean up extra empty lines at end of code blocks.
+ pres = root.getiterator('pre')
+ for pre in pres:
+ if len(pre) and pre[0].tag == 'code':
+ pre[0].text = pre[0].text.rstrip() + '\n'
« no previous file with comments | « third_party/markdown/serializers.py ('k') | third_party/markdown/util.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698