Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(854)

Side by Side Diff: third_party/markdown/preprocessors.py

Issue 93743005: Support markdown template for html editor (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: fix path without dir Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « third_party/markdown/postprocessors.py ('k') | third_party/markdown/serializers.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 """
2 PRE-PROCESSORS
3 =============================================================================
4
5 Preprocessors work on source text before we start doing anything too
6 complicated.
7 """
8
9 from __future__ import absolute_import
10 from __future__ import unicode_literals
11 from . import util
12 from . import odict
13 import re
14
15
16 def build_preprocessors(md_instance, **kwargs):
17 """ Build the default set of preprocessors used by Markdown. """
18 preprocessors = odict.OrderedDict()
19 preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance)
20 if md_instance.safeMode != 'escape':
21 preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance)
22 preprocessors["reference"] = ReferencePreprocessor(md_instance)
23 return preprocessors
24
25
26 class Preprocessor(util.Processor):
27 """
28 Preprocessors are run after the text is broken into lines.
29
30 Each preprocessor implements a "run" method that takes a pointer to a
31 list of lines of the document, modifies it as necessary and returns
32 either the same pointer or a pointer to a new list.
33
34 Preprocessors must extend markdown.Preprocessor.
35
36 """
37 def run(self, lines):
38 """
39 Each subclass of Preprocessor should override the `run` method, which
40 takes the document as a list of strings split by newlines and returns
41 the (possibly modified) list of lines.
42
43 """
44 pass
45
46
47 class NormalizeWhitespace(Preprocessor):
48 """ Normalize whitespace for consistant parsing. """
49
50 def run(self, lines):
51 source = '\n'.join(lines)
52 source = source.replace(util.STX, "").replace(util.ETX, "")
53 source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
54 source = source.expandtabs(self.markdown.tab_length)
55 source = re.sub(r'(?<=\n) +\n', '\n', source)
56 return source.split('\n')
57
58
59 class HtmlBlockPreprocessor(Preprocessor):
60 """Remove html blocks from the text and store them for later retrieval."""
61
62 right_tag_patterns = ["</%s>", "%s>"]
63 attrs_pattern = r"""
64 \s+(?P<attr>[^>"'/= ]+)=(?P<q>['"])(?P<value>.*?)(?P=q) # attr="value"
65 | # OR
66 \s+(?P<attr1>[^>"'/= ]+)=(?P<value1>[^> ]+) # attr=value
67 | # OR
68 \s+(?P<attr2>[^>"'/= ]+) # attr
69 """
70 left_tag_pattern = r'^\<(?P<tag>[^> ]+)(?P<attrs>(%s)*)\s*\/?\>?' % attrs_pa ttern
71 attrs_re = re.compile(attrs_pattern, re.VERBOSE)
72 left_tag_re = re.compile(left_tag_pattern, re.VERBOSE)
73 markdown_in_raw = False
74
75 def _get_left_tag(self, block):
76 m = self.left_tag_re.match(block)
77 if m:
78 tag = m.group('tag')
79 raw_attrs = m.group('attrs')
80 attrs = {}
81 if raw_attrs:
82 for ma in self.attrs_re.finditer(raw_attrs):
83 if ma.group('attr'):
84 if ma.group('value'):
85 attrs[ma.group('attr').strip()] = ma.group('value')
86 else:
87 attrs[ma.group('attr').strip()] = ""
88 elif ma.group('attr1'):
89 if ma.group('value1'):
90 attrs[ma.group('attr1').strip()] = ma.group('value1' )
91 else:
92 attrs[ma.group('attr1').strip()] = ""
93 elif ma.group('attr2'):
94 attrs[ma.group('attr2').strip()] = ""
95 return tag, len(m.group(0)), attrs
96 else:
97 tag = block[1:].split(">", 1)[0].lower()
98 return tag, len(tag)+2, {}
99
100 def _recursive_tagfind(self, ltag, rtag, start_index, block):
101 while 1:
102 i = block.find(rtag, start_index)
103 if i == -1:
104 return -1
105 j = block.find(ltag, start_index)
106 # if no ltag, or rtag found before another ltag, return index
107 if (j > i or j == -1):
108 return i + len(rtag)
109 # another ltag found before rtag, use end of ltag as starting
110 # point and search again
111 j = block.find('>', j)
112 start_index = self._recursive_tagfind(ltag, rtag, j + 1, block)
113 if start_index == -1:
114 # HTML potentially malformed- ltag has no corresponding
115 # rtag
116 return -1
117
118 def _get_right_tag(self, left_tag, left_index, block):
119 for p in self.right_tag_patterns:
120 tag = p % left_tag
121 i = self._recursive_tagfind("<%s" % left_tag, tag, left_index, block )
122 if i > 2:
123 return tag.lstrip("<").rstrip(">"), i
124 return block.rstrip()[-left_index:-1].lower(), len(block)
125
126 def _equal_tags(self, left_tag, right_tag):
127 if left_tag[0] in ['?', '@', '%']: # handle PHP, etc.
128 return True
129 if ("/" + left_tag) == right_tag:
130 return True
131 if (right_tag == "--" and left_tag == "--"):
132 return True
133 elif left_tag == right_tag[1:] \
134 and right_tag[0] == "/":
135 return True
136 else:
137 return False
138
139 def _is_oneliner(self, tag):
140 return (tag in ['hr', 'hr/'])
141
142 def run(self, lines):
143 text = "\n".join(lines)
144 new_blocks = []
145 text = text.rsplit("\n\n")
146 items = []
147 left_tag = ''
148 right_tag = ''
149 in_tag = False # flag
150
151 while text:
152 block = text[0]
153 if block.startswith("\n"):
154 block = block[1:]
155 text = text[1:]
156
157 if block.startswith("\n"):
158 block = block[1:]
159
160 if not in_tag:
161 if block.startswith("<") and len(block.strip()) > 1:
162
163 if block[1] == "!":
164 # is a comment block
165 left_tag, left_index, attrs = "--", 2, {}
166 else:
167 left_tag, left_index, attrs = self._get_left_tag(block)
168 right_tag, data_index = self._get_right_tag(left_tag,
169 left_index,
170 block)
171 # keep checking conditions below and maybe just append
172
173 if data_index < len(block) \
174 and (util.isBlockLevel(left_tag)
175 or left_tag == '--'):
176 text.insert(0, block[data_index:])
177 block = block[:data_index]
178
179 if not (util.isBlockLevel(left_tag) \
180 or block[1] in ["!", "?", "@", "%"]):
181 new_blocks.append(block)
182 continue
183
184 if self._is_oneliner(left_tag):
185 new_blocks.append(block.strip())
186 continue
187
188 if block.rstrip().endswith(">") \
189 and self._equal_tags(left_tag, right_tag):
190 if self.markdown_in_raw and 'markdown' in attrs.keys():
191 start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?',
192 '', block[:left_index])
193 end = block[-len(right_tag)-2:]
194 block = block[left_index:-len(right_tag)-2]
195 new_blocks.append(
196 self.markdown.htmlStash.store(start))
197 new_blocks.append(block)
198 new_blocks.append(
199 self.markdown.htmlStash.store(end))
200 else:
201 new_blocks.append(
202 self.markdown.htmlStash.store(block.strip()))
203 continue
204 else:
205 # if is block level tag and is not complete
206
207 if util.isBlockLevel(left_tag) or left_tag == "--" \
208 and not block.rstrip().endswith(">"):
209 items.append(block.strip())
210 in_tag = True
211 else:
212 new_blocks.append(
213 self.markdown.htmlStash.store(block.strip()))
214
215 continue
216
217 new_blocks.append(block)
218
219 else:
220 items.append(block)
221
222 right_tag, data_index = self._get_right_tag(left_tag, 0, block)
223
224 if self._equal_tags(left_tag, right_tag):
225 # if find closing tag
226
227 if data_index < len(block):
228 # we have more text after right_tag
229 items[-1] = block[:data_index]
230 text.insert(0, block[data_index:])
231
232 in_tag = False
233 if self.markdown_in_raw and 'markdown' in attrs.keys():
234 start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?',
235 '', items[0][:left_index])
236 items[0] = items[0][left_index:]
237 end = items[-1][-len(right_tag)-2:]
238 items[-1] = items[-1][:-len(right_tag)-2]
239 new_blocks.append(
240 self.markdown.htmlStash.store(start))
241 new_blocks.extend(items)
242 new_blocks.append(
243 self.markdown.htmlStash.store(end))
244 else:
245 new_blocks.append(
246 self.markdown.htmlStash.store('\n\n'.join(items)))
247 items = []
248
249 if items:
250 if self.markdown_in_raw and 'markdown' in attrs.keys():
251 start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?',
252 '', items[0][:left_index])
253 items[0] = items[0][left_index:]
254 end = items[-1][-len(right_tag)-2:]
255 items[-1] = items[-1][:-len(right_tag)-2]
256 new_blocks.append(
257 self.markdown.htmlStash.store(start))
258 new_blocks.extend(items)
259 if end.strip():
260 new_blocks.append(
261 self.markdown.htmlStash.store(end))
262 else:
263 new_blocks.append(
264 self.markdown.htmlStash.store('\n\n'.join(items)))
265 #new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items)) )
266 new_blocks.append('\n')
267
268 new_text = "\n\n".join(new_blocks)
269 return new_text.split("\n")
270
271
272 class ReferencePreprocessor(Preprocessor):
273 """ Remove reference definitions from text and store for later use. """
274
275 TITLE = r'[ ]*(\"(.*)\"|\'(.*)\'|\((.*)\))[ ]*'
276 RE = re.compile(r'^[ ]{0,3}\[([^\]]*)\]:\s*([^ ]*)[ ]*(%s)?$' % TITLE, re.DO TALL)
277 TITLE_RE = re.compile(r'^%s$' % TITLE)
278
279 def run (self, lines):
280 new_text = [];
281 while lines:
282 line = lines.pop(0)
283 m = self.RE.match(line)
284 if m:
285 id = m.group(1).strip().lower()
286 link = m.group(2).lstrip('<').rstrip('>')
287 t = m.group(5) or m.group(6) or m.group(7)
288 if not t:
289 # Check next line for title
290 tm = self.TITLE_RE.match(lines[0])
291 if tm:
292 lines.pop(0)
293 t = tm.group(2) or tm.group(3) or tm.group(4)
294 self.markdown.references[id] = (link, t)
295 else:
296 new_text.append(line)
297
298 return new_text #+ "\n"
OLDNEW
« no previous file with comments | « third_party/markdown/postprocessors.py ('k') | third_party/markdown/serializers.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698