OLD | NEW |
| (Empty) |
1 from __future__ import unicode_literals | |
2 from __future__ import absolute_import | |
3 from . import util | |
4 from . import odict | |
5 from . import inlinepatterns | |
6 | |
7 | |
8 def build_treeprocessors(md_instance, **kwargs): | |
9 """ Build the default treeprocessors for Markdown. """ | |
10 treeprocessors = odict.OrderedDict() | |
11 treeprocessors["inline"] = InlineProcessor(md_instance) | |
12 treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance) | |
13 return treeprocessors | |
14 | |
15 | |
16 def isString(s): | |
17 """ Check if it's string """ | |
18 if not isinstance(s, util.AtomicString): | |
19 return isinstance(s, util.string_type) | |
20 return False | |
21 | |
22 | |
23 class Treeprocessor(util.Processor): | |
24 """ | |
25 Treeprocessors are run on the ElementTree object before serialization. | |
26 | |
27 Each Treeprocessor implements a "run" method that takes a pointer to an | |
28 ElementTree, modifies it as necessary and returns an ElementTree | |
29 object. | |
30 | |
31 Treeprocessors must extend markdown.Treeprocessor. | |
32 | |
33 """ | |
34 def run(self, root): | |
35 """ | |
36 Subclasses of Treeprocessor should implement a `run` method, which | |
37 takes a root ElementTree. This method can return another ElementTree | |
38 object, and the existing root ElementTree will be replaced, or it can | |
39 modify the current tree and return None. | |
40 """ | |
41 pass # pragma: no cover | |
42 | |
43 | |
44 class InlineProcessor(Treeprocessor): | |
45 """ | |
46 A Treeprocessor that traverses a tree, applying inline patterns. | |
47 """ | |
48 | |
49 def __init__(self, md): | |
50 self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX | |
51 self.__placeholder_suffix = util.ETX | |
52 self.__placeholder_length = 4 + len(self.__placeholder_prefix) \ | |
53 + len(self.__placeholder_suffix) | |
54 self.__placeholder_re = util.INLINE_PLACEHOLDER_RE | |
55 self.markdown = md | |
56 self.inlinePatterns = md.inlinePatterns | |
57 | |
58 def __makePlaceholder(self, type): | |
59 """ Generate a placeholder """ | |
60 id = "%04d" % len(self.stashed_nodes) | |
61 hash = util.INLINE_PLACEHOLDER % id | |
62 return hash, id | |
63 | |
64 def __findPlaceholder(self, data, index): | |
65 """ | |
66 Extract id from data string, start from index | |
67 | |
68 Keyword arguments: | |
69 | |
70 * data: string | |
71 * index: index, from which we start search | |
72 | |
73 Returns: placeholder id and string index, after the found placeholder. | |
74 | |
75 """ | |
76 m = self.__placeholder_re.search(data, index) | |
77 if m: | |
78 return m.group(1), m.end() | |
79 else: | |
80 return None, index + 1 | |
81 | |
82 def __stashNode(self, node, type): | |
83 """ Add node to stash """ | |
84 placeholder, id = self.__makePlaceholder(type) | |
85 self.stashed_nodes[id] = node | |
86 return placeholder | |
87 | |
88 def __handleInline(self, data, patternIndex=0): | |
89 """ | |
90 Process string with inline patterns and replace it | |
91 with placeholders | |
92 | |
93 Keyword arguments: | |
94 | |
95 * data: A line of Markdown text | |
96 * patternIndex: The index of the inlinePattern to start with | |
97 | |
98 Returns: String with placeholders. | |
99 | |
100 """ | |
101 if not isinstance(data, util.AtomicString): | |
102 startIndex = 0 | |
103 while patternIndex < len(self.inlinePatterns): | |
104 data, matched, startIndex = self.__applyPattern( | |
105 self.inlinePatterns.value_for_index(patternIndex), | |
106 data, patternIndex, startIndex) | |
107 if not matched: | |
108 patternIndex += 1 | |
109 return data | |
110 | |
111 def __processElementText(self, node, subnode, isText=True): | |
112 """ | |
113 Process placeholders in Element.text or Element.tail | |
114 of Elements popped from self.stashed_nodes. | |
115 | |
116 Keywords arguments: | |
117 | |
118 * node: parent node | |
119 * subnode: processing node | |
120 * isText: bool variable, True - it's text, False - it's tail | |
121 | |
122 Returns: None | |
123 | |
124 """ | |
125 if isText: | |
126 text = subnode.text | |
127 subnode.text = None | |
128 else: | |
129 text = subnode.tail | |
130 subnode.tail = None | |
131 | |
132 childResult = self.__processPlaceholders(text, subnode, isText) | |
133 | |
134 if not isText and node is not subnode: | |
135 pos = list(node).index(subnode) + 1 | |
136 else: | |
137 pos = 0 | |
138 | |
139 childResult.reverse() | |
140 for newChild in childResult: | |
141 node.insert(pos, newChild) | |
142 | |
143 def __processPlaceholders(self, data, parent, isText=True): | |
144 """ | |
145 Process string with placeholders and generate ElementTree tree. | |
146 | |
147 Keyword arguments: | |
148 | |
149 * data: string with placeholders instead of ElementTree elements. | |
150 * parent: Element, which contains processing inline data | |
151 | |
152 Returns: list with ElementTree elements with applied inline patterns. | |
153 | |
154 """ | |
155 def linkText(text): | |
156 if text: | |
157 if result: | |
158 if result[-1].tail: | |
159 result[-1].tail += text | |
160 else: | |
161 result[-1].tail = text | |
162 elif not isText: | |
163 if parent.tail: | |
164 parent.tail += text | |
165 else: | |
166 parent.tail = text | |
167 else: | |
168 if parent.text: | |
169 parent.text += text | |
170 else: | |
171 parent.text = text | |
172 result = [] | |
173 strartIndex = 0 | |
174 while data: | |
175 index = data.find(self.__placeholder_prefix, strartIndex) | |
176 if index != -1: | |
177 id, phEndIndex = self.__findPlaceholder(data, index) | |
178 | |
179 if id in self.stashed_nodes: | |
180 node = self.stashed_nodes.get(id) | |
181 | |
182 if index > 0: | |
183 text = data[strartIndex:index] | |
184 linkText(text) | |
185 | |
186 if not isString(node): # it's Element | |
187 for child in [node] + list(node): | |
188 if child.tail: | |
189 if child.tail.strip(): | |
190 self.__processElementText( | |
191 node, child, False | |
192 ) | |
193 if child.text: | |
194 if child.text.strip(): | |
195 self.__processElementText(child, child) | |
196 else: # it's just a string | |
197 linkText(node) | |
198 strartIndex = phEndIndex | |
199 continue | |
200 | |
201 strartIndex = phEndIndex | |
202 result.append(node) | |
203 | |
204 else: # wrong placeholder | |
205 end = index + len(self.__placeholder_prefix) | |
206 linkText(data[strartIndex:end]) | |
207 strartIndex = end | |
208 else: | |
209 text = data[strartIndex:] | |
210 if isinstance(data, util.AtomicString): | |
211 # We don't want to loose the AtomicString | |
212 text = util.AtomicString(text) | |
213 linkText(text) | |
214 data = "" | |
215 | |
216 return result | |
217 | |
218 def __applyPattern(self, pattern, data, patternIndex, startIndex=0): | |
219 """ | |
220 Check if the line fits the pattern, create the necessary | |
221 elements, add it to stashed_nodes. | |
222 | |
223 Keyword arguments: | |
224 | |
225 * data: the text to be processed | |
226 * pattern: the pattern to be checked | |
227 * patternIndex: index of current pattern | |
228 * startIndex: string index, from which we start searching | |
229 | |
230 Returns: String with placeholders instead of ElementTree elements. | |
231 | |
232 """ | |
233 match = pattern.getCompiledRegExp().match(data[startIndex:]) | |
234 leftData = data[:startIndex] | |
235 | |
236 if not match: | |
237 return data, False, 0 | |
238 | |
239 node = pattern.handleMatch(match) | |
240 | |
241 if node is None: | |
242 return data, True, len(leftData)+match.span(len(match.groups()))[0] | |
243 | |
244 if not isString(node): | |
245 if not isinstance(node.text, util.AtomicString): | |
246 # We need to process current node too | |
247 for child in [node] + list(node): | |
248 if not isString(node): | |
249 if child.text: | |
250 child.text = self.__handleInline( | |
251 child.text, patternIndex + 1 | |
252 ) | |
253 if child.tail: | |
254 child.tail = self.__handleInline( | |
255 child.tail, patternIndex | |
256 ) | |
257 | |
258 placeholder = self.__stashNode(node, pattern.type()) | |
259 | |
260 return "%s%s%s%s" % (leftData, | |
261 match.group(1), | |
262 placeholder, match.groups()[-1]), True, 0 | |
263 | |
264 def run(self, tree): | |
265 """Apply inline patterns to a parsed Markdown tree. | |
266 | |
267 Iterate over ElementTree, find elements with inline tag, apply inline | |
268 patterns and append newly created Elements to tree. If you don't | |
269 want to process your data with inline paterns, instead of normal | |
270 string, use subclass AtomicString: | |
271 | |
272 node.text = markdown.AtomicString("This will not be processed.") | |
273 | |
274 Arguments: | |
275 | |
276 * tree: ElementTree object, representing Markdown tree. | |
277 | |
278 Returns: ElementTree object with applied inline patterns. | |
279 | |
280 """ | |
281 self.stashed_nodes = {} | |
282 | |
283 stack = [tree] | |
284 | |
285 while stack: | |
286 currElement = stack.pop() | |
287 insertQueue = [] | |
288 for child in currElement: | |
289 if child.text and not isinstance( | |
290 child.text, util.AtomicString | |
291 ): | |
292 text = child.text | |
293 child.text = None | |
294 lst = self.__processPlaceholders( | |
295 self.__handleInline(text), child | |
296 ) | |
297 stack += lst | |
298 insertQueue.append((child, lst)) | |
299 if child.tail: | |
300 tail = self.__handleInline(child.tail) | |
301 dumby = util.etree.Element('d') | |
302 child.tail = None | |
303 tailResult = self.__processPlaceholders(tail, dumby, False) | |
304 if dumby.tail: | |
305 child.tail = dumby.tail | |
306 pos = list(currElement).index(child) + 1 | |
307 tailResult.reverse() | |
308 for newChild in tailResult: | |
309 currElement.insert(pos, newChild) | |
310 if len(child): | |
311 stack.append(child) | |
312 | |
313 for element, lst in insertQueue: | |
314 if self.markdown.enable_attributes: | |
315 if element.text and isString(element.text): | |
316 element.text = inlinepatterns.handleAttributes( | |
317 element.text, element | |
318 ) | |
319 i = 0 | |
320 for newChild in lst: | |
321 if self.markdown.enable_attributes: | |
322 # Processing attributes | |
323 if newChild.tail and isString(newChild.tail): | |
324 newChild.tail = inlinepatterns.handleAttributes( | |
325 newChild.tail, element | |
326 ) | |
327 if newChild.text and isString(newChild.text): | |
328 newChild.text = inlinepatterns.handleAttributes( | |
329 newChild.text, newChild | |
330 ) | |
331 element.insert(i, newChild) | |
332 i += 1 | |
333 return tree | |
334 | |
335 | |
336 class PrettifyTreeprocessor(Treeprocessor): | |
337 """ Add linebreaks to the html document. """ | |
338 | |
339 def _prettifyETree(self, elem): | |
340 """ Recursively add linebreaks to ElementTree children. """ | |
341 | |
342 i = "\n" | |
343 if util.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']: | |
344 if (not elem.text or not elem.text.strip()) \ | |
345 and len(elem) and util.isBlockLevel(elem[0].tag): | |
346 elem.text = i | |
347 for e in elem: | |
348 if util.isBlockLevel(e.tag): | |
349 self._prettifyETree(e) | |
350 if not elem.tail or not elem.tail.strip(): | |
351 elem.tail = i | |
352 if not elem.tail or not elem.tail.strip(): | |
353 elem.tail = i | |
354 | |
355 def run(self, root): | |
356 """ Add linebreaks to ElementTree root object. """ | |
357 | |
358 self._prettifyETree(root) | |
359 # Do <br />'s seperately as they are often in the middle of | |
360 # inline content and missed by _prettifyETree. | |
361 brs = root.getiterator('br') | |
362 for br in brs: | |
363 if not br.tail or not br.tail.strip(): | |
364 br.tail = '\n' | |
365 else: | |
366 br.tail = '\n%s' % br.tail | |
367 # Clean up extra empty lines at end of code blocks. | |
368 pres = root.getiterator('pre') | |
369 for pre in pres: | |
370 if len(pre) and pre[0].tag == 'code': | |
371 pre[0].text = util.AtomicString(pre[0].text.rstrip() + '\n') | |
OLD | NEW |