OLD | NEW |
| (Empty) |
1 from __future__ import unicode_literals | |
2 from __future__ import absolute_import | |
3 from . import util | |
4 from . import odict | |
5 from . import inlinepatterns | |
6 | |
7 | |
8 def build_treeprocessors(md_instance, **kwargs): | |
9 """ Build the default treeprocessors for Markdown. """ | |
10 treeprocessors = odict.OrderedDict() | |
11 treeprocessors["inline"] = InlineProcessor(md_instance) | |
12 treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance) | |
13 return treeprocessors | |
14 | |
15 | |
16 def isString(s): | |
17 """ Check if it's string """ | |
18 if not isinstance(s, util.AtomicString): | |
19 return isinstance(s, util.string_type) | |
20 return False | |
21 | |
22 | |
23 class Treeprocessor(util.Processor): | |
24 """ | |
25 Treeprocessors are run on the ElementTree object before serialization. | |
26 | |
27 Each Treeprocessor implements a "run" method that takes a pointer to an | |
28 ElementTree, modifies it as necessary and returns an ElementTree | |
29 object. | |
30 | |
31 Treeprocessors must extend markdown.Treeprocessor. | |
32 | |
33 """ | |
34 def run(self, root): | |
35 """ | |
36 Subclasses of Treeprocessor should implement a `run` method, which | |
37 takes a root ElementTree. This method can return another ElementTree | |
38 object, and the existing root ElementTree will be replaced, or it can | |
39 modify the current tree and return None. | |
40 """ | |
41 pass | |
42 | |
43 | |
44 class InlineProcessor(Treeprocessor): | |
45 """ | |
46 A Treeprocessor that traverses a tree, applying inline patterns. | |
47 """ | |
48 | |
49 def __init__(self, md): | |
50 self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX | |
51 self.__placeholder_suffix = util.ETX | |
52 self.__placeholder_length = 4 + len(self.__placeholder_prefix) \ | |
53 + len(self.__placeholder_suffix) | |
54 self.__placeholder_re = util.INLINE_PLACEHOLDER_RE | |
55 self.markdown = md | |
56 | |
57 def __makePlaceholder(self, type): | |
58 """ Generate a placeholder """ | |
59 id = "%04d" % len(self.stashed_nodes) | |
60 hash = util.INLINE_PLACEHOLDER % id | |
61 return hash, id | |
62 | |
63 def __findPlaceholder(self, data, index): | |
64 """ | |
65 Extract id from data string, start from index | |
66 | |
67 Keyword arguments: | |
68 | |
69 * data: string | |
70 * index: index, from which we start search | |
71 | |
72 Returns: placeholder id and string index, after the found placeholder. | |
73 | |
74 """ | |
75 m = self.__placeholder_re.search(data, index) | |
76 if m: | |
77 return m.group(1), m.end() | |
78 else: | |
79 return None, index + 1 | |
80 | |
81 def __stashNode(self, node, type): | |
82 """ Add node to stash """ | |
83 placeholder, id = self.__makePlaceholder(type) | |
84 self.stashed_nodes[id] = node | |
85 return placeholder | |
86 | |
87 def __handleInline(self, data, patternIndex=0): | |
88 """ | |
89 Process string with inline patterns and replace it | |
90 with placeholders | |
91 | |
92 Keyword arguments: | |
93 | |
94 * data: A line of Markdown text | |
95 * patternIndex: The index of the inlinePattern to start with | |
96 | |
97 Returns: String with placeholders. | |
98 | |
99 """ | |
100 if not isinstance(data, util.AtomicString): | |
101 startIndex = 0 | |
102 while patternIndex < len(self.markdown.inlinePatterns): | |
103 data, matched, startIndex = self.__applyPattern( | |
104 self.markdown.inlinePatterns.value_for_index(patternIndex), | |
105 data, patternIndex, startIndex) | |
106 if not matched: | |
107 patternIndex += 1 | |
108 return data | |
109 | |
110 def __processElementText(self, node, subnode, isText=True): | |
111 """ | |
112 Process placeholders in Element.text or Element.tail | |
113 of Elements popped from self.stashed_nodes. | |
114 | |
115 Keywords arguments: | |
116 | |
117 * node: parent node | |
118 * subnode: processing node | |
119 * isText: bool variable, True - it's text, False - it's tail | |
120 | |
121 Returns: None | |
122 | |
123 """ | |
124 if isText: | |
125 text = subnode.text | |
126 subnode.text = None | |
127 else: | |
128 text = subnode.tail | |
129 subnode.tail = None | |
130 | |
131 childResult = self.__processPlaceholders(text, subnode) | |
132 | |
133 if not isText and node is not subnode: | |
134 pos = node.getchildren().index(subnode) | |
135 node.remove(subnode) | |
136 else: | |
137 pos = 0 | |
138 | |
139 childResult.reverse() | |
140 for newChild in childResult: | |
141 node.insert(pos, newChild) | |
142 | |
143 def __processPlaceholders(self, data, parent): | |
144 """ | |
145 Process string with placeholders and generate ElementTree tree. | |
146 | |
147 Keyword arguments: | |
148 | |
149 * data: string with placeholders instead of ElementTree elements. | |
150 * parent: Element, which contains processing inline data | |
151 | |
152 Returns: list with ElementTree elements with applied inline patterns. | |
153 | |
154 """ | |
155 def linkText(text): | |
156 if text: | |
157 if result: | |
158 if result[-1].tail: | |
159 result[-1].tail += text | |
160 else: | |
161 result[-1].tail = text | |
162 else: | |
163 if parent.text: | |
164 parent.text += text | |
165 else: | |
166 parent.text = text | |
167 result = [] | |
168 strartIndex = 0 | |
169 while data: | |
170 index = data.find(self.__placeholder_prefix, strartIndex) | |
171 if index != -1: | |
172 id, phEndIndex = self.__findPlaceholder(data, index) | |
173 | |
174 if id in self.stashed_nodes: | |
175 node = self.stashed_nodes.get(id) | |
176 | |
177 if index > 0: | |
178 text = data[strartIndex:index] | |
179 linkText(text) | |
180 | |
181 if not isString(node): # it's Element | |
182 for child in [node] + node.getchildren(): | |
183 if child.tail: | |
184 if child.tail.strip(): | |
185 self.__processElementText(node, child,False) | |
186 if child.text: | |
187 if child.text.strip(): | |
188 self.__processElementText(child, child) | |
189 else: # it's just a string | |
190 linkText(node) | |
191 strartIndex = phEndIndex | |
192 continue | |
193 | |
194 strartIndex = phEndIndex | |
195 result.append(node) | |
196 | |
197 else: # wrong placeholder | |
198 end = index + len(self.__placeholder_prefix) | |
199 linkText(data[strartIndex:end]) | |
200 strartIndex = end | |
201 else: | |
202 text = data[strartIndex:] | |
203 if isinstance(data, util.AtomicString): | |
204 # We don't want to loose the AtomicString | |
205 text = util.AtomicString(text) | |
206 linkText(text) | |
207 data = "" | |
208 | |
209 return result | |
210 | |
211 def __applyPattern(self, pattern, data, patternIndex, startIndex=0): | |
212 """ | |
213 Check if the line fits the pattern, create the necessary | |
214 elements, add it to stashed_nodes. | |
215 | |
216 Keyword arguments: | |
217 | |
218 * data: the text to be processed | |
219 * pattern: the pattern to be checked | |
220 * patternIndex: index of current pattern | |
221 * startIndex: string index, from which we start searching | |
222 | |
223 Returns: String with placeholders instead of ElementTree elements. | |
224 | |
225 """ | |
226 match = pattern.getCompiledRegExp().match(data[startIndex:]) | |
227 leftData = data[:startIndex] | |
228 | |
229 if not match: | |
230 return data, False, 0 | |
231 | |
232 node = pattern.handleMatch(match) | |
233 | |
234 if node is None: | |
235 return data, True, len(leftData)+match.span(len(match.groups()))[0] | |
236 | |
237 if not isString(node): | |
238 if not isinstance(node.text, util.AtomicString): | |
239 # We need to process current node too | |
240 for child in [node] + node.getchildren(): | |
241 if not isString(node): | |
242 if child.text: | |
243 child.text = self.__handleInline(child.text, | |
244 patternIndex + 1) | |
245 if child.tail: | |
246 child.tail = self.__handleInline(child.tail, | |
247 patternIndex) | |
248 | |
249 placeholder = self.__stashNode(node, pattern.type()) | |
250 | |
251 return "%s%s%s%s" % (leftData, | |
252 match.group(1), | |
253 placeholder, match.groups()[-1]), True, 0 | |
254 | |
255 def run(self, tree): | |
256 """Apply inline patterns to a parsed Markdown tree. | |
257 | |
258 Iterate over ElementTree, find elements with inline tag, apply inline | |
259 patterns and append newly created Elements to tree. If you don't | |
260 want to process your data with inline paterns, instead of normal string, | |
261 use subclass AtomicString: | |
262 | |
263 node.text = markdown.AtomicString("This will not be processed.") | |
264 | |
265 Arguments: | |
266 | |
267 * tree: ElementTree object, representing Markdown tree. | |
268 | |
269 Returns: ElementTree object with applied inline patterns. | |
270 | |
271 """ | |
272 self.stashed_nodes = {} | |
273 | |
274 stack = [tree] | |
275 | |
276 while stack: | |
277 currElement = stack.pop() | |
278 insertQueue = [] | |
279 for child in currElement.getchildren(): | |
280 if child.text and not isinstance(child.text, util.AtomicString): | |
281 text = child.text | |
282 child.text = None | |
283 lst = self.__processPlaceholders(self.__handleInline( | |
284 text), child) | |
285 stack += lst | |
286 insertQueue.append((child, lst)) | |
287 if child.tail: | |
288 tail = self.__handleInline(child.tail) | |
289 dumby = util.etree.Element('d') | |
290 tailResult = self.__processPlaceholders(tail, dumby) | |
291 if dumby.text: | |
292 child.tail = dumby.text | |
293 else: | |
294 child.tail = None | |
295 pos = currElement.getchildren().index(child) + 1 | |
296 tailResult.reverse() | |
297 for newChild in tailResult: | |
298 currElement.insert(pos, newChild) | |
299 if child.getchildren(): | |
300 stack.append(child) | |
301 | |
302 for element, lst in insertQueue: | |
303 if self.markdown.enable_attributes: | |
304 if element.text and isString(element.text): | |
305 element.text = \ | |
306 inlinepatterns.handleAttributes(element.text, | |
307 element) | |
308 i = 0 | |
309 for newChild in lst: | |
310 if self.markdown.enable_attributes: | |
311 # Processing attributes | |
312 if newChild.tail and isString(newChild.tail): | |
313 newChild.tail = \ | |
314 inlinepatterns.handleAttributes(newChild.tail, | |
315 element) | |
316 if newChild.text and isString(newChild.text): | |
317 newChild.text = \ | |
318 inlinepatterns.handleAttributes(newChild.text, | |
319 newChild) | |
320 element.insert(i, newChild) | |
321 i += 1 | |
322 return tree | |
323 | |
324 | |
325 class PrettifyTreeprocessor(Treeprocessor): | |
326 """ Add linebreaks to the html document. """ | |
327 | |
328 def _prettifyETree(self, elem): | |
329 """ Recursively add linebreaks to ElementTree children. """ | |
330 | |
331 i = "\n" | |
332 if util.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']: | |
333 if (not elem.text or not elem.text.strip()) \ | |
334 and len(elem) and util.isBlockLevel(elem[0].tag): | |
335 elem.text = i | |
336 for e in elem: | |
337 if util.isBlockLevel(e.tag): | |
338 self._prettifyETree(e) | |
339 if not elem.tail or not elem.tail.strip(): | |
340 elem.tail = i | |
341 if not elem.tail or not elem.tail.strip(): | |
342 elem.tail = i | |
343 | |
344 def run(self, root): | |
345 """ Add linebreaks to ElementTree root object. """ | |
346 | |
347 self._prettifyETree(root) | |
348 # Do <br />'s seperately as they are often in the middle of | |
349 # inline content and missed by _prettifyETree. | |
350 brs = root.getiterator('br') | |
351 for br in brs: | |
352 if not br.tail or not br.tail.strip(): | |
353 br.tail = '\n' | |
354 else: | |
355 br.tail = '\n%s' % br.tail | |
356 # Clean up extra empty lines at end of code blocks. | |
357 pres = root.getiterator('pre') | |
358 for pre in pres: | |
359 if len(pre) and pre[0].tag == 'code': | |
360 pre[0].text = pre[0].text.rstrip() + '\n' | |
OLD | NEW |