third_party/markdown/treeprocessors.py - Issue 133433002: Docserver: Support markdown for HTML content. Request thirdparty submission review.

Side by Side Diff: third_party/markdown/treeprocessors.py

Issue 133433002: Docserver: Support markdown for HTML content. Request thirdparty submission review. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: change the version of app & cron.yaml Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # markdown is released under the BSD license

	2 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)

	3 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)

	4 # Copyright 2004 Manfred Stienstra (the original version)

	5 #

	6 # All rights reserved.

	7 #

	8 # Redistribution and use in source and binary forms, with or without

	9 # modification, are permitted provided that the following conditions are met:

	10 #

	11 # * Redistributions of source code must retain the above copyright

	12 # notice, this list of conditions and the following disclaimer.

	13 # * Redistributions in binary form must reproduce the above copyright

	14 # notice, this list of conditions and the following disclaimer in the

	15 # documentation and/or other materials provided with the distribution.

	16 # * Neither the name of the <organization> nor the

	17 # names of its contributors may be used to endorse or promote products

	18 # derived from this software without specific prior written permission.

	19 #

	20 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY

	21 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

	22 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

	23 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT

	24 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

	25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

	26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

	27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

	28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

	29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

	30 # POSSIBILITY OF SUCH DAMAGE.

	31

	32

	33 from __future__ import unicode_literals

	34 from __future__ import absolute_import

	35 from . import util

	36 from . import odict

	37 from . import inlinepatterns

	38

	39

	40 def build_treeprocessors(md_instance, **kwargs):

	41 """ Build the default treeprocessors for Markdown. """

	42 treeprocessors = odict.OrderedDict()

	43 treeprocessors["inline"] = InlineProcessor(md_instance)

	44 treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance)

	45 return treeprocessors

	46

	47

	48 def isString(s):

	49 """ Check if it's string """

	50 if not isinstance(s, util.AtomicString):

	51 return isinstance(s, util.string_type)

	52 return False

	53

	54

	55 class Treeprocessor(util.Processor):

	56 """

	57 Treeprocessors are run on the ElementTree object before serialization.

	58

	59 Each Treeprocessor implements a "run" method that takes a pointer to an

	60 ElementTree, modifies it as necessary and returns an ElementTree

	61 object.

	62

	63 Treeprocessors must extend markdown.Treeprocessor.

	64

	65 """

	66 def run(self, root):

	67 """

	68 Subclasses of Treeprocessor should implement a `run` method, which

	69 takes a root ElementTree. This method can return another ElementTree

	70 object, and the existing root ElementTree will be replaced, or it can

	71 modify the current tree and return None.

	72 """

	73 pass

	74

	75

	76 class InlineProcessor(Treeprocessor):

	77 """

	78 A Treeprocessor that traverses a tree, applying inline patterns.

	79 """

	80

	81 def __init__(self, md):

	82 self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX

	83 self.__placeholder_suffix = util.ETX

	84 self.__placeholder_length = 4 + len(self.__placeholder_prefix) \

	85 + len(self.__placeholder_suffix)

	86 self.__placeholder_re = util.INLINE_PLACEHOLDER_RE

	87 self.markdown = md

	88

	89 def __makePlaceholder(self, type):

	90 """ Generate a placeholder """

	91 id = "%04d" % len(self.stashed_nodes)

	92 hash = util.INLINE_PLACEHOLDER % id

	93 return hash, id

	94

	95 def __findPlaceholder(self, data, index):

	96 """

	97 Extract id from data string, start from index

	98

	99 Keyword arguments:

	100

	101 * data: string

	102 * index: index, from which we start search

	103

	104 Returns: placeholder id and string index, after the found placeholder.

	105

	106 """

	107 m = self.__placeholder_re.search(data, index)

	108 if m:

	109 return m.group(1), m.end()

	110 else:

	111 return None, index + 1

	112

	113 def __stashNode(self, node, type):

	114 """ Add node to stash """

	115 placeholder, id = self.__makePlaceholder(type)

	116 self.stashed_nodes[id] = node

	117 return placeholder

	118

	119 def __handleInline(self, data, patternIndex=0):

	120 """

	121 Process string with inline patterns and replace it

	122 with placeholders

	123

	124 Keyword arguments:

	125

	126 * data: A line of Markdown text

	127 * patternIndex: The index of the inlinePattern to start with

	128

	129 Returns: String with placeholders.

	130

	131 """

	132 if not isinstance(data, util.AtomicString):

	133 startIndex = 0

	134 while patternIndex < len(self.markdown.inlinePatterns):

	135 data, matched, startIndex = self.__applyPattern(

	136 self.markdown.inlinePatterns.value_for_index(patternIndex),

	137 data, patternIndex, startIndex)

	138 if not matched:

	139 patternIndex += 1

	140 return data

	141

	142 def __processElementText(self, node, subnode, isText=True):

	143 """

	144 Process placeholders in Element.text or Element.tail

	145 of Elements popped from self.stashed_nodes.

	146

	147 Keywords arguments:

	148

	149 * node: parent node

	150 * subnode: processing node

	151 * isText: bool variable, True - it's text, False - it's tail

	152

	153 Returns: None

	154

	155 """

	156 if isText:

	157 text = subnode.text

	158 subnode.text = None

	159 else:

	160 text = subnode.tail

	161 subnode.tail = None

	162

	163 childResult = self.__processPlaceholders(text, subnode)

	164

	165 if not isText and node is not subnode:

	166 pos = node.getchildren().index(subnode)

	167 node.remove(subnode)

	168 else:

	169 pos = 0

	170

	171 childResult.reverse()

	172 for newChild in childResult:

	173 node.insert(pos, newChild)

	174

	175 def __processPlaceholders(self, data, parent):

	176 """

	177 Process string with placeholders and generate ElementTree tree.

	178

	179 Keyword arguments:

	180

	181 * data: string with placeholders instead of ElementTree elements.

	182 * parent: Element, which contains processing inline data

	183

	184 Returns: list with ElementTree elements with applied inline patterns.

	185

	186 """

	187 def linkText(text):

	188 if text:

	189 if result:

	190 if result[-1].tail:

	191 result[-1].tail += text

	192 else:

	193 result[-1].tail = text

	194 else:

	195 if parent.text:

	196 parent.text += text

	197 else:

	198 parent.text = text

	199 result = []

	200 strartIndex = 0

	201 while data:

	202 index = data.find(self.__placeholder_prefix, strartIndex)

	203 if index != -1:

	204 id, phEndIndex = self.__findPlaceholder(data, index)

	205

	206 if id in self.stashed_nodes:

	207 node = self.stashed_nodes.get(id)

	208

	209 if index > 0:

	210 text = data[strartIndex:index]

	211 linkText(text)

	212

	213 if not isString(node): # it's Element

	214 for child in [node] + node.getchildren():

	215 if child.tail:

	216 if child.tail.strip():

	217 self.__processElementText(node, child,False)

	218 if child.text:

	219 if child.text.strip():

	220 self.__processElementText(child, child)

	221 else: # it's just a string

	222 linkText(node)

	223 strartIndex = phEndIndex

	224 continue

	225

	226 strartIndex = phEndIndex

	227 result.append(node)

	228

	229 else: # wrong placeholder

	230 end = index + len(self.__placeholder_prefix)

	231 linkText(data[strartIndex:end])

	232 strartIndex = end

	233 else:

	234 text = data[strartIndex:]

	235 if isinstance(data, util.AtomicString):

	236 # We don't want to loose the AtomicString

	237 text = util.AtomicString(text)

	238 linkText(text)

	239 data = ""

	240

	241 return result

	242

	243 def __applyPattern(self, pattern, data, patternIndex, startIndex=0):

	244 """

	245 Check if the line fits the pattern, create the necessary

	246 elements, add it to stashed_nodes.

	247

	248 Keyword arguments:

	249

	250 * data: the text to be processed

	251 * pattern: the pattern to be checked

	252 * patternIndex: index of current pattern

	253 * startIndex: string index, from which we start searching

	254

	255 Returns: String with placeholders instead of ElementTree elements.

	256

	257 """

	258 match = pattern.getCompiledRegExp().match(data[startIndex:])

	259 leftData = data[:startIndex]

	260

	261 if not match:

	262 return data, False, 0

	263

	264 node = pattern.handleMatch(match)

	265

	266 if node is None:

	267 return data, True, len(leftData)+match.span(len(match.groups()))[0]

	268

	269 if not isString(node):

	270 if not isinstance(node.text, util.AtomicString):

	271 # We need to process current node too

	272 for child in [node] + node.getchildren():

	273 if not isString(node):

	274 if child.text:

	275 child.text = self.__handleInline(child.text,

	276 patternIndex + 1)

	277 if child.tail:

	278 child.tail = self.__handleInline(child.tail,

	279 patternIndex)

	280

	281 placeholder = self.__stashNode(node, pattern.type())

	282

	283 return "%s%s%s%s" % (leftData,

	284 match.group(1),

	285 placeholder, match.groups()[-1]), True, 0

	286

	287 def run(self, tree):

	288 """Apply inline patterns to a parsed Markdown tree.

	289

	290 Iterate over ElementTree, find elements with inline tag, apply inline

	291 patterns and append newly created Elements to tree. If you don't

	292 want to process your data with inline paterns, instead of normal string,

	293 use subclass AtomicString:

	294

	295 node.text = markdown.AtomicString("This will not be processed.")

	296

	297 Arguments:

	298

	299 * tree: ElementTree object, representing Markdown tree.

	300

	301 Returns: ElementTree object with applied inline patterns.

	302

	303 """

	304 self.stashed_nodes = {}

	305

	306 stack = [tree]

	307

	308 while stack:

	309 currElement = stack.pop()

	310 insertQueue = []

	311 for child in currElement.getchildren():

	312 if child.text and not isinstance(child.text, util.AtomicString):

	313 text = child.text

	314 child.text = None

	315 lst = self.__processPlaceholders(self.__handleInline(

	316 text), child)

	317 stack += lst

	318 insertQueue.append((child, lst))

	319 if child.tail:

	320 tail = self.__handleInline(child.tail)

	321 dumby = util.etree.Element('d')

	322 tailResult = self.__processPlaceholders(tail, dumby)

	323 if dumby.text:

	324 child.tail = dumby.text

	325 else:

	326 child.tail = None

	327 pos = currElement.getchildren().index(child) + 1

	328 tailResult.reverse()

	329 for newChild in tailResult:

	330 currElement.insert(pos, newChild)

	331 if child.getchildren():

	332 stack.append(child)

	333

	334 for element, lst in insertQueue:

	335 if self.markdown.enable_attributes:

	336 if element.text and isString(element.text):

	337 element.text = \

	338 inlinepatterns.handleAttributes(element.text,

	339 element)

	340 i = 0

	341 for newChild in lst:

	342 if self.markdown.enable_attributes:

	343 # Processing attributes

	344 if newChild.tail and isString(newChild.tail):

	345 newChild.tail = \

	346 inlinepatterns.handleAttributes(newChild.tail,

	347 element)

	348 if newChild.text and isString(newChild.text):

	349 newChild.text = \

	350 inlinepatterns.handleAttributes(newChild.text,

	351 newChild)

	352 element.insert(i, newChild)

	353 i += 1

	354 return tree

	355

	356

	357 class PrettifyTreeprocessor(Treeprocessor):

	358 """ Add linebreaks to the html document. """

	359

	360 def _prettifyETree(self, elem):

	361 """ Recursively add linebreaks to ElementTree children. """

	362

	363 i = "\n"

	364 if util.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:

	365 if (not elem.text or not elem.text.strip()) \

	366 and len(elem) and util.isBlockLevel(elem[0].tag):

	367 elem.text = i

	368 for e in elem:

	369 if util.isBlockLevel(e.tag):

	370 self._prettifyETree(e)

	371 if not elem.tail or not elem.tail.strip():

	372 elem.tail = i

	373 if not elem.tail or not elem.tail.strip():

	374 elem.tail = i

	375

	376 def run(self, root):

	377 """ Add linebreaks to ElementTree root object. """

	378

	379 self._prettifyETree(root)

	380 # Do <br />'s seperately as they are often in the middle of

	381 # inline content and missed by _prettifyETree.

	382 brs = root.getiterator('br')

	383 for br in brs:

	384 if not br.tail or not br.tail.strip():

	385 br.tail = '\n'

	386 else:

	387 br.tail = '\n%s' % br.tail

	388 # Clean up extra empty lines at end of code blocks.

	389 pres = root.getiterator('pre')

	390 for pre in pres:

	391 if len(pre) and pre[0].tag == 'code':

	392 pre[0].text = pre[0].text.rstrip() + '\n'

OLD	NEW

« no previous file with comments | « third_party/markdown/serializers.py ('k') | third_party/markdown/util.py » ('j') | no next file with comments »