Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(214)

Side by Side Diff: third_party/Python-Markdown/markdown/preprocessors.py

Issue 1389543003: Revert of Check in a simple pure-python based Markdown previewer. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@add
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 """
2 PRE-PROCESSORS
3 =============================================================================
4
5 Preprocessors work on source text before we start doing anything too
6 complicated.
7 """
8
9 from __future__ import absolute_import
10 from __future__ import unicode_literals
11 from . import util
12 from . import odict
13 import re
14
15
16 def build_preprocessors(md_instance, **kwargs):
17 """ Build the default set of preprocessors used by Markdown. """
18 preprocessors = odict.OrderedDict()
19 preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance)
20 if md_instance.safeMode != 'escape':
21 preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance)
22 preprocessors["reference"] = ReferencePreprocessor(md_instance)
23 return preprocessors
24
25
26 class Preprocessor(util.Processor):
27 """
28 Preprocessors are run after the text is broken into lines.
29
30 Each preprocessor implements a "run" method that takes a pointer to a
31 list of lines of the document, modifies it as necessary and returns
32 either the same pointer or a pointer to a new list.
33
34 Preprocessors must extend markdown.Preprocessor.
35
36 """
37 def run(self, lines):
38 """
39 Each subclass of Preprocessor should override the `run` method, which
40 takes the document as a list of strings split by newlines and returns
41 the (possibly modified) list of lines.
42
43 """
44 pass # pragma: no cover
45
46
47 class NormalizeWhitespace(Preprocessor):
48 """ Normalize whitespace for consistant parsing. """
49
50 def run(self, lines):
51 source = '\n'.join(lines)
52 source = source.replace(util.STX, "").replace(util.ETX, "")
53 source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
54 source = source.expandtabs(self.markdown.tab_length)
55 source = re.sub(r'(?<=\n) +\n', '\n', source)
56 return source.split('\n')
57
58
59 class HtmlBlockPreprocessor(Preprocessor):
60 """Remove html blocks from the text and store them for later retrieval."""
61
62 right_tag_patterns = ["</%s>", "%s>"]
63 attrs_pattern = r"""
64 \s+(?P<attr>[^>"'/= ]+)=(?P<q>['"])(?P<value>.*?)(?P=q) # attr="value"
65 | # OR
66 \s+(?P<attr1>[^>"'/= ]+)=(?P<value1>[^> ]+) # attr=value
67 | # OR
68 \s+(?P<attr2>[^>"'/= ]+) # attr
69 """
70 left_tag_pattern = r'^\<(?P<tag>[^> ]+)(?P<attrs>(%s)*)\s*\/?\>?' % \
71 attrs_pattern
72 attrs_re = re.compile(attrs_pattern, re.VERBOSE)
73 left_tag_re = re.compile(left_tag_pattern, re.VERBOSE)
74 markdown_in_raw = False
75
76 def _get_left_tag(self, block):
77 m = self.left_tag_re.match(block)
78 if m:
79 tag = m.group('tag')
80 raw_attrs = m.group('attrs')
81 attrs = {}
82 if raw_attrs:
83 for ma in self.attrs_re.finditer(raw_attrs):
84 if ma.group('attr'):
85 if ma.group('value'):
86 attrs[ma.group('attr').strip()] = ma.group('value')
87 else:
88 attrs[ma.group('attr').strip()] = ""
89 elif ma.group('attr1'):
90 if ma.group('value1'):
91 attrs[ma.group('attr1').strip()] = ma.group(
92 'value1'
93 )
94 else:
95 attrs[ma.group('attr1').strip()] = ""
96 elif ma.group('attr2'):
97 attrs[ma.group('attr2').strip()] = ""
98 return tag, len(m.group(0)), attrs
99 else:
100 tag = block[1:].split(">", 1)[0].lower()
101 return tag, len(tag)+2, {}
102
103 def _recursive_tagfind(self, ltag, rtag, start_index, block):
104 while 1:
105 i = block.find(rtag, start_index)
106 if i == -1:
107 return -1
108 j = block.find(ltag, start_index)
109 # if no ltag, or rtag found before another ltag, return index
110 if (j > i or j == -1):
111 return i + len(rtag)
112 # another ltag found before rtag, use end of ltag as starting
113 # point and search again
114 j = block.find('>', j)
115 start_index = self._recursive_tagfind(ltag, rtag, j + 1, block)
116 if start_index == -1:
117 # HTML potentially malformed- ltag has no corresponding
118 # rtag
119 return -1
120
121 def _get_right_tag(self, left_tag, left_index, block):
122 for p in self.right_tag_patterns:
123 tag = p % left_tag
124 i = self._recursive_tagfind(
125 "<%s" % left_tag, tag, left_index, block
126 )
127 if i > 2:
128 return tag.lstrip("<").rstrip(">"), i
129 return block.rstrip()[-left_index:-1].lower(), len(block)
130
131 def _equal_tags(self, left_tag, right_tag):
132 if left_tag[0] in ['?', '@', '%']: # handle PHP, etc.
133 return True
134 if ("/" + left_tag) == right_tag:
135 return True
136 if (right_tag == "--" and left_tag == "--"):
137 return True
138 elif left_tag == right_tag[1:] and right_tag[0] == "/":
139 return True
140 else:
141 return False
142
143 def _is_oneliner(self, tag):
144 return (tag in ['hr', 'hr/'])
145
146 def _stringindex_to_listindex(self, stringindex, items):
147 """
148 Same effect as concatenating the strings in items,
149 finding the character to which stringindex refers in that string,
150 and returning the index of the item in which that character resides.
151 """
152 items.append('dummy')
153 i, count = 0, 0
154 while count <= stringindex:
155 count += len(items[i])
156 i += 1
157 return i - 1
158
159 def _nested_markdown_in_html(self, items):
160 """Find and process html child elements of the given element block."""
161 for i, item in enumerate(items):
162 if self.left_tag_re.match(item):
163 left_tag, left_index, attrs = \
164 self._get_left_tag(''.join(items[i:]))
165 right_tag, data_index = self._get_right_tag(
166 left_tag, left_index, ''.join(items[i:]))
167 right_listindex = \
168 self._stringindex_to_listindex(data_index, items[i:]) + i
169 if 'markdown' in attrs.keys():
170 items[i] = items[i][left_index:] # remove opening tag
171 placeholder = self.markdown.htmlStash.store_tag(
172 left_tag, attrs, i + 1, right_listindex + 1)
173 items.insert(i, placeholder)
174 if len(items) - right_listindex <= 1: # last nest, no tail
175 right_listindex -= 1
176 items[right_listindex] = items[right_listindex][
177 :-len(right_tag) - 2] # remove closing tag
178 else: # raw html
179 if len(items) - right_listindex <= 1: # last element
180 right_listindex -= 1
181 offset = 1 if i == right_listindex else 0
182 placeholder = self.markdown.htmlStash.store('\n\n'.join(
183 items[i:right_listindex + offset]))
184 del items[i:right_listindex + offset]
185 items.insert(i, placeholder)
186 return items
187
188 def run(self, lines):
189 text = "\n".join(lines)
190 new_blocks = []
191 text = text.rsplit("\n\n")
192 items = []
193 left_tag = ''
194 right_tag = ''
195 in_tag = False # flag
196
197 while text:
198 block = text[0]
199 if block.startswith("\n"):
200 block = block[1:]
201 text = text[1:]
202
203 if block.startswith("\n"):
204 block = block[1:]
205
206 if not in_tag:
207 if block.startswith("<") and len(block.strip()) > 1:
208
209 if block[1:4] == "!--":
210 # is a comment block
211 left_tag, left_index, attrs = "--", 2, {}
212 else:
213 left_tag, left_index, attrs = self._get_left_tag(block)
214 right_tag, data_index = self._get_right_tag(left_tag,
215 left_index,
216 block)
217 # keep checking conditions below and maybe just append
218
219 if data_index < len(block) and (util.isBlockLevel(left_tag) or left_tag == '--'):
220 text.insert(0, block[data_index:])
221 block = block[:data_index]
222
223 if not (util.isBlockLevel(left_tag) or block[1] in ["!", "?" , "@", "%"]):
224 new_blocks.append(block)
225 continue
226
227 if self._is_oneliner(left_tag):
228 new_blocks.append(block.strip())
229 continue
230
231 if block.rstrip().endswith(">") \
232 and self._equal_tags(left_tag, right_tag):
233 if self.markdown_in_raw and 'markdown' in attrs.keys():
234 block = block[left_index:-len(right_tag) - 2]
235 new_blocks.append(self.markdown.htmlStash.
236 store_tag(left_tag, attrs, 0, 2))
237 new_blocks.extend([block])
238 else:
239 new_blocks.append(
240 self.markdown.htmlStash.store(block.strip()))
241 continue
242 else:
243 # if is block level tag and is not complete
244 if (not self._equal_tags(left_tag, right_tag)) and \
245 (util.isBlockLevel(left_tag) or left_tag == "--"):
246 items.append(block.strip())
247 in_tag = True
248 else:
249 new_blocks.append(
250 self.markdown.htmlStash.store(block.strip())
251 )
252 continue
253
254 else:
255 new_blocks.append(block)
256
257 else:
258 items.append(block)
259
260 right_tag, data_index = self._get_right_tag(left_tag, 0, block)
261
262 if self._equal_tags(left_tag, right_tag):
263 # if find closing tag
264
265 if data_index < len(block):
266 # we have more text after right_tag
267 items[-1] = block[:data_index]
268 text.insert(0, block[data_index:])
269
270 in_tag = False
271 if self.markdown_in_raw and 'markdown' in attrs.keys():
272 items[0] = items[0][left_index:]
273 items[-1] = items[-1][:-len(right_tag) - 2]
274 if items[len(items) - 1]: # not a newline/empty string
275 right_index = len(items) + 3
276 else:
277 right_index = len(items) + 2
278 new_blocks.append(self.markdown.htmlStash.store_tag(
279 left_tag, attrs, 0, right_index))
280 placeholderslen = len(self.markdown.htmlStash.tag_data)
281 new_blocks.extend(
282 self._nested_markdown_in_html(items))
283 nests = len(self.markdown.htmlStash.tag_data) - \
284 placeholderslen
285 self.markdown.htmlStash.tag_data[-1 - nests][
286 'right_index'] += nests - 2
287 else:
288 new_blocks.append(
289 self.markdown.htmlStash.store('\n\n'.join(items)))
290 items = []
291
292 if items:
293 if self.markdown_in_raw and 'markdown' in attrs.keys():
294 items[0] = items[0][left_index:]
295 items[-1] = items[-1][:-len(right_tag) - 2]
296 if items[len(items) - 1]: # not a newline/empty string
297 right_index = len(items) + 3
298 else:
299 right_index = len(items) + 2
300 new_blocks.append(
301 self.markdown.htmlStash.store_tag(
302 left_tag, attrs, 0, right_index))
303 placeholderslen = len(self.markdown.htmlStash.tag_data)
304 new_blocks.extend(self._nested_markdown_in_html(items))
305 nests = len(self.markdown.htmlStash.tag_data) - placeholderslen
306 self.markdown.htmlStash.tag_data[-1 - nests][
307 'right_index'] += nests - 2
308 else:
309 new_blocks.append(
310 self.markdown.htmlStash.store('\n\n'.join(items)))
311 new_blocks.append('\n')
312
313 new_text = "\n\n".join(new_blocks)
314 return new_text.split("\n")
315
316
317 class ReferencePreprocessor(Preprocessor):
318 """ Remove reference definitions from text and store for later use. """
319
320 TITLE = r'[ ]*(\"(.*)\"|\'(.*)\'|\((.*)\))[ ]*'
321 RE = re.compile(
322 r'^[ ]{0,3}\[([^\]]*)\]:\s*([^ ]*)[ ]*(%s)?$' % TITLE, re.DOTALL
323 )
324 TITLE_RE = re.compile(r'^%s$' % TITLE)
325
326 def run(self, lines):
327 new_text = []
328 while lines:
329 line = lines.pop(0)
330 m = self.RE.match(line)
331 if m:
332 id = m.group(1).strip().lower()
333 link = m.group(2).lstrip('<').rstrip('>')
334 t = m.group(5) or m.group(6) or m.group(7)
335 if not t:
336 # Check next line for title
337 tm = self.TITLE_RE.match(lines[0])
338 if tm:
339 lines.pop(0)
340 t = tm.group(2) or tm.group(3) or tm.group(4)
341 self.markdown.references[id] = (link, t)
342 else:
343 new_text.append(line)
344
345 return new_text # + "\n"
OLDNEW
« no previous file with comments | « third_party/Python-Markdown/markdown/postprocessors.py ('k') | third_party/Python-Markdown/markdown/serializers.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698