third_party/Python-Markdown/markdown/preprocessors.py - Issue 1356203004: Check in a simple pure-python based Markdown previewer.

Side by Side Diff: third_party/Python-Markdown/markdown/preprocessors.py

Issue 1356203004: Check in a simple pure-python based Markdown previewer. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@add

Patch Set: fix license file Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # markdown is released under the BSD license

2 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)

3 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)

4 # Copyright 2004 Manfred Stienstra (the original version)

5 #

6 # All rights reserved.

7 #

8 # Redistribution and use in source and binary forms, with or without

9 # modification, are permitted provided that the following conditions are met:

10 #

11 # * Redistributions of source code must retain the above copyright

12 # notice, this list of conditions and the following disclaimer.

13 # * Redistributions in binary form must reproduce the above copyright

14 # notice, this list of conditions and the following disclaimer in the

15 # documentation and/or other materials provided with the distribution.

16 # * Neither the name of the <organization> nor the

17 # names of its contributors may be used to endorse or promote products

18 # derived from this software without specific prior written permission.

19 #

20 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY

21 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

22 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

23 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT

24 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

30 # POSSIBILITY OF SUCH DAMAGE.

31

32

33 """	1 """

34 PRE-PROCESSORS	2 PRE-PROCESSORS

35 =============================================================================	3 =============================================================================

36	4

37 Preprocessors work on source text before we start doing anything too	5 Preprocessors work on source text before we start doing anything too

38 complicated.	6 complicated.

39 """	7 """

40	8

41 from __future__ import absolute_import	9 from __future__ import absolute_import

42 from __future__ import unicode_literals	10 from __future__ import unicode_literals

43 from . import util	11 from . import util

44 from . import odict	12 from . import odict

45 import re	13 import re

46	14

47	15

48 def build_preprocessors(md_instance, **kwargs):	16 def build_preprocessors(md_instance, **kwargs):

(...skipping 17 matching lines...) Expand all Loading...
66 Preprocessors must extend markdown.Preprocessor.	34 Preprocessors must extend markdown.Preprocessor.

67	35

68 """	36 """

69 def run(self, lines):	37 def run(self, lines):

70 """	38 """

71 Each subclass of Preprocessor should override the `run` method, which	39 Each subclass of Preprocessor should override the `run` method, which

72 takes the document as a list of strings split by newlines and returns	40 takes the document as a list of strings split by newlines and returns

73 the (possibly modified) list of lines.	41 the (possibly modified) list of lines.

74	42

75 """	43 """

76 pass	44 pass # pragma: no cover

77	45

78	46

79 class NormalizeWhitespace(Preprocessor):	47 class NormalizeWhitespace(Preprocessor):

80 """ Normalize whitespace for consistant parsing. """	48 """ Normalize whitespace for consistant parsing. """

81	49

82 def run(self, lines):	50 def run(self, lines):

83 source = '\n'.join(lines)	51 source = '\n'.join(lines)

84 source = source.replace(util.STX, "").replace(util.ETX, "")	52 source = source.replace(util.STX, "").replace(util.ETX, "")

85 source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"	53 source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"

86 source = source.expandtabs(self.markdown.tab_length)	54 source = source.expandtabs(self.markdown.tab_length)

87 source = re.sub(r'(?<=\n) +\n', '\n', source)	55 source = re.sub(r'(?<=\n) +\n', '\n', source)

88 return source.split('\n')	56 return source.split('\n')

89	57

90	58

91 class HtmlBlockPreprocessor(Preprocessor):	59 class HtmlBlockPreprocessor(Preprocessor):

92 """Remove html blocks from the text and store them for later retrieval."""	60 """Remove html blocks from the text and store them for later retrieval."""

93	61

94 right_tag_patterns = ["</%s>", "%s>"]	62 right_tag_patterns = ["</%s>", "%s>"]

95 attrs_pattern = r"""	63 attrs_pattern = r"""

96 \s+(?P<attr>[^>"'/= ]+)=(?P<q>['"])(?P<value>.*?)(?P=q) # attr="value"	64 \s+(?P<attr>[^>"'/= ]+)=(?P<q>['"])(?P<value>.*?)(?P=q) # attr="value"

97 \| # OR	65 \| # OR

98 \s+(?P<attr1>[^>"'/= ]+)=(?P<value1>[^> ]+) # attr=value	66 \s+(?P<attr1>[^>"'/= ]+)=(?P<value1>[^> ]+) # attr=value

99 \| # OR	67 \| # OR

100 \s+(?P<attr2>[^>"'/= ]+) # attr	68 \s+(?P<attr2>[^>"'/= ]+) # attr

101 """	69 """

102 left_tag_pattern = r'^\<(?P<tag>[^> ]+)(?P<attrs>(%s))\s\/?\>?' % attrs_pa ttern	70 left_tag_pattern = r'^\<(?P<tag>[^> ]+)(?P<attrs>(%s))\s\/?\>?' % \

	71 attrs_pattern

103 attrs_re = re.compile(attrs_pattern, re.VERBOSE)	72 attrs_re = re.compile(attrs_pattern, re.VERBOSE)

104 left_tag_re = re.compile(left_tag_pattern, re.VERBOSE)	73 left_tag_re = re.compile(left_tag_pattern, re.VERBOSE)

105 markdown_in_raw = False	74 markdown_in_raw = False

106	75

107 def _get_left_tag(self, block):	76 def _get_left_tag(self, block):

108 m = self.left_tag_re.match(block)	77 m = self.left_tag_re.match(block)

109 if m:	78 if m:

110 tag = m.group('tag')	79 tag = m.group('tag')

111 raw_attrs = m.group('attrs')	80 raw_attrs = m.group('attrs')

112 attrs = {}	81 attrs = {}

113 if raw_attrs:	82 if raw_attrs:

114 for ma in self.attrs_re.finditer(raw_attrs):	83 for ma in self.attrs_re.finditer(raw_attrs):

115 if ma.group('attr'):	84 if ma.group('attr'):

116 if ma.group('value'):	85 if ma.group('value'):

117 attrs[ma.group('attr').strip()] = ma.group('value')	86 attrs[ma.group('attr').strip()] = ma.group('value')

118 else:	87 else:

119 attrs[ma.group('attr').strip()] = ""	88 attrs[ma.group('attr').strip()] = ""

120 elif ma.group('attr1'):	89 elif ma.group('attr1'):

121 if ma.group('value1'):	90 if ma.group('value1'):

122 attrs[ma.group('attr1').strip()] = ma.group('value1' )	91 attrs[ma.group('attr1').strip()] = ma.group(

	92 'value1'

	93 )

123 else:	94 else:

124 attrs[ma.group('attr1').strip()] = ""	95 attrs[ma.group('attr1').strip()] = ""

125 elif ma.group('attr2'):	96 elif ma.group('attr2'):

126 attrs[ma.group('attr2').strip()] = ""	97 attrs[ma.group('attr2').strip()] = ""

127 return tag, len(m.group(0)), attrs	98 return tag, len(m.group(0)), attrs

128 else:	99 else:

129 tag = block[1:].split(">", 1)[0].lower()	100 tag = block[1:].split(">", 1)[0].lower()

130 return tag, len(tag)+2, {}	101 return tag, len(tag)+2, {}

131	102

132 def _recursive_tagfind(self, ltag, rtag, start_index, block):	103 def _recursive_tagfind(self, ltag, rtag, start_index, block):

133 while 1:	104 while 1:

134 i = block.find(rtag, start_index)	105 i = block.find(rtag, start_index)

135 if i == -1:	106 if i == -1:

136 return -1	107 return -1

137 j = block.find(ltag, start_index)	108 j = block.find(ltag, start_index)

138 # if no ltag, or rtag found before another ltag, return index	109 # if no ltag, or rtag found before another ltag, return index

139 if (j > i or j == -1):	110 if (j > i or j == -1):

140 return i + len(rtag)	111 return i + len(rtag)

141 # another ltag found before rtag, use end of ltag as starting	112 # another ltag found before rtag, use end of ltag as starting

142 # point and search again	113 # point and search again

143 j = block.find('>', j)	114 j = block.find('>', j)

144 start_index = self._recursive_tagfind(ltag, rtag, j + 1, block)	115 start_index = self._recursive_tagfind(ltag, rtag, j + 1, block)

145 if start_index == -1:	116 if start_index == -1:

146 # HTML potentially malformed- ltag has no corresponding	117 # HTML potentially malformed- ltag has no corresponding

147 # rtag	118 # rtag

148 return -1	119 return -1

149	120

150 def _get_right_tag(self, left_tag, left_index, block):	121 def _get_right_tag(self, left_tag, left_index, block):

151 for p in self.right_tag_patterns:	122 for p in self.right_tag_patterns:

152 tag = p % left_tag	123 tag = p % left_tag

153 i = self._recursive_tagfind("<%s" % left_tag, tag, left_index, block )	124 i = self._recursive_tagfind(

	125 "<%s" % left_tag, tag, left_index, block

	126 )

154 if i > 2:	127 if i > 2:

155 return tag.lstrip("<").rstrip(">"), i	128 return tag.lstrip("<").rstrip(">"), i

156 return block.rstrip()[-left_index:-1].lower(), len(block)	129 return block.rstrip()[-left_index:-1].lower(), len(block)

157	130

158 def _equal_tags(self, left_tag, right_tag):	131 def _equal_tags(self, left_tag, right_tag):

159 if left_tag[0] in ['?', '@', '%']: # handle PHP, etc.	132 if left_tag[0] in ['?', '@', '%']: # handle PHP, etc.

160 return True	133 return True

161 if ("/" + left_tag) == right_tag:	134 if ("/" + left_tag) == right_tag:

162 return True	135 return True

163 if (right_tag == "--" and left_tag == "--"):	136 if (right_tag == "--" and left_tag == "--"):

164 return True	137 return True

165 elif left_tag == right_tag[1:] \	138 elif left_tag == right_tag[1:] and right_tag[0] == "/":

166 and right_tag[0] == "/":

167 return True	139 return True

168 else:	140 else:

169 return False	141 return False

170	142

171 def _is_oneliner(self, tag):	143 def _is_oneliner(self, tag):

172 return (tag in ['hr', 'hr/'])	144 return (tag in ['hr', 'hr/'])

173	145

	146 def _stringindex_to_listindex(self, stringindex, items):

	147 """

	148 Same effect as concatenating the strings in items,

	149 finding the character to which stringindex refers in that string,

	150 and returning the index of the item in which that character resides.

	151 """

	152 items.append('dummy')

	153 i, count = 0, 0

	154 while count <= stringindex:

	155 count += len(items[i])

	156 i += 1

	157 return i - 1

	158

	159 def _nested_markdown_in_html(self, items):

	160 """Find and process html child elements of the given element block."""

	161 for i, item in enumerate(items):

	162 if self.left_tag_re.match(item):

	163 left_tag, left_index, attrs = \

	164 self._get_left_tag(''.join(items[i:]))

	165 right_tag, data_index = self._get_right_tag(

	166 left_tag, left_index, ''.join(items[i:]))

	167 right_listindex = \

	168 self._stringindex_to_listindex(data_index, items[i:]) + i

	169 if 'markdown' in attrs.keys():

	170 items[i] = items[i][left_index:] # remove opening tag

	171 placeholder = self.markdown.htmlStash.store_tag(

	172 left_tag, attrs, i + 1, right_listindex + 1)

	173 items.insert(i, placeholder)

	174 if len(items) - right_listindex <= 1: # last nest, no tail

	175 right_listindex -= 1

	176 items[right_listindex] = items[right_listindex][

	177 :-len(right_tag) - 2] # remove closing tag

	178 else: # raw html

	179 if len(items) - right_listindex <= 1: # last element

	180 right_listindex -= 1

	181 offset = 1 if i == right_listindex else 0

	182 placeholder = self.markdown.htmlStash.store('\n\n'.join(

	183 items[i:right_listindex + offset]))

	184 del items[i:right_listindex + offset]

	185 items.insert(i, placeholder)

	186 return items

	187

174 def run(self, lines):	188 def run(self, lines):

175 text = "\n".join(lines)	189 text = "\n".join(lines)

176 new_blocks = []	190 new_blocks = []

177 text = text.rsplit("\n\n")	191 text = text.rsplit("\n\n")

178 items = []	192 items = []

179 left_tag = ''	193 left_tag = ''

180 right_tag = ''	194 right_tag = ''

181 in_tag = False # flag	195 in_tag = False # flag

182	196

183 while text:	197 while text:

184 block = text[0]	198 block = text[0]

185 if block.startswith("\n"):	199 if block.startswith("\n"):

186 block = block[1:]	200 block = block[1:]

187 text = text[1:]	201 text = text[1:]

188	202

189 if block.startswith("\n"):	203 if block.startswith("\n"):

190 block = block[1:]	204 block = block[1:]

191	205

192 if not in_tag:	206 if not in_tag:

193 if block.startswith("<") and len(block.strip()) > 1:	207 if block.startswith("<") and len(block.strip()) > 1:

194	208

195 if block[1] == "!":	209 if block[1:4] == "!--":

196 # is a comment block	210 # is a comment block

197 left_tag, left_index, attrs = "--", 2, {}	211 left_tag, left_index, attrs = "--", 2, {}

198 else:	212 else:

199 left_tag, left_index, attrs = self._get_left_tag(block)	213 left_tag, left_index, attrs = self._get_left_tag(block)

200 right_tag, data_index = self._get_right_tag(left_tag,	214 right_tag, data_index = self._get_right_tag(left_tag,

201 left_index,	215 left_index,

202 block)	216 block)

203 # keep checking conditions below and maybe just append	217 # keep checking conditions below and maybe just append

204	218

205 if data_index < len(block) \	219 if data_index < len(block) and (util.isBlockLevel(left_tag) or left_tag == '--'):

206 and (util.isBlockLevel(left_tag)

207 or left_tag == '--'):

208 text.insert(0, block[data_index:])	220 text.insert(0, block[data_index:])

209 block = block[:data_index]	221 block = block[:data_index]

210	222

211 if not (util.isBlockLevel(left_tag) \	223 if not (util.isBlockLevel(left_tag) or block[1] in ["!", "?" , "@", "%"]):

212 or block[1] in ["!", "?", "@", "%"]):

213 new_blocks.append(block)	224 new_blocks.append(block)

214 continue	225 continue

215	226

216 if self._is_oneliner(left_tag):	227 if self._is_oneliner(left_tag):

217 new_blocks.append(block.strip())	228 new_blocks.append(block.strip())

218 continue	229 continue

219	230

220 if block.rstrip().endswith(">") \	231 if block.rstrip().endswith(">") \

221 and self._equal_tags(left_tag, right_tag):	232 and self._equal_tags(left_tag, right_tag):

222 if self.markdown_in_raw and 'markdown' in attrs.keys():	233 if self.markdown_in_raw and 'markdown' in attrs.keys():

223 start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?',	234 block = block[left_index:-len(right_tag) - 2]

224 '', block[:left_index])	235 new_blocks.append(self.markdown.htmlStash.

225 end = block[-len(right_tag)-2:]	236 store_tag(left_tag, attrs, 0, 2))

226 block = block[left_index:-len(right_tag)-2]	237 new_blocks.extend([block])

227 new_blocks.append(

228 self.markdown.htmlStash.store(start))

229 new_blocks.append(block)

230 new_blocks.append(

231 self.markdown.htmlStash.store(end))

232 else:	238 else:

233 new_blocks.append(	239 new_blocks.append(

234 self.markdown.htmlStash.store(block.strip()))	240 self.markdown.htmlStash.store(block.strip()))

235 continue	241 continue

236 else:	242 else:

237 # if is block level tag and is not complete	243 # if is block level tag and is not complete

238	244 if (not self._equal_tags(left_tag, right_tag)) and \

239 if util.isBlockLevel(left_tag) or left_tag == "--" \	245 (util.isBlockLevel(left_tag) or left_tag == "--"):

240 and not block.rstrip().endswith(">"):

241 items.append(block.strip())	246 items.append(block.strip())

242 in_tag = True	247 in_tag = True

243 else:	248 else:

244 new_blocks.append(	249 new_blocks.append(

245 self.markdown.htmlStash.store(block.strip()))	250 self.markdown.htmlStash.store(block.strip())

246	251 )

247 continue	252 continue

248	253

249 new_blocks.append(block)	254 else:

	255 new_blocks.append(block)

250	256

251 else:	257 else:

252 items.append(block)	258 items.append(block)

253	259

254 right_tag, data_index = self._get_right_tag(left_tag, 0, block)	260 right_tag, data_index = self._get_right_tag(left_tag, 0, block)

255	261

256 if self._equal_tags(left_tag, right_tag):	262 if self._equal_tags(left_tag, right_tag):

257 # if find closing tag	263 # if find closing tag

258	264

259 if data_index < len(block):	265 if data_index < len(block):

260 # we have more text after right_tag	266 # we have more text after right_tag

261 items[-1] = block[:data_index]	267 items[-1] = block[:data_index]

262 text.insert(0, block[data_index:])	268 text.insert(0, block[data_index:])

263	269

264 in_tag = False	270 in_tag = False

265 if self.markdown_in_raw and 'markdown' in attrs.keys():	271 if self.markdown_in_raw and 'markdown' in attrs.keys():

266 start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?',

267 '', items[0][:left_index])

268 items[0] = items[0][left_index:]	272 items[0] = items[0][left_index:]

269 end = items[-1][-len(right_tag)-2:]	273 items[-1] = items[-1][:-len(right_tag) - 2]

270 items[-1] = items[-1][:-len(right_tag)-2]	274 if items[len(items) - 1]: # not a newline/empty string

271 new_blocks.append(	275 right_index = len(items) + 3

272 self.markdown.htmlStash.store(start))	276 else:

273 new_blocks.extend(items)	277 right_index = len(items) + 2

274 new_blocks.append(	278 new_blocks.append(self.markdown.htmlStash.store_tag(

275 self.markdown.htmlStash.store(end))	279 left_tag, attrs, 0, right_index))

	280 placeholderslen = len(self.markdown.htmlStash.tag_data)

	281 new_blocks.extend(

	282 self._nested_markdown_in_html(items))

	283 nests = len(self.markdown.htmlStash.tag_data) - \

	284 placeholderslen

	285 self.markdown.htmlStash.tag_data[-1 - nests][

	286 'right_index'] += nests - 2

276 else:	287 else:

277 new_blocks.append(	288 new_blocks.append(

278 self.markdown.htmlStash.store('\n\n'.join(items)))	289 self.markdown.htmlStash.store('\n\n'.join(items)))

279 items = []	290 items = []

280	291

281 if items:	292 if items:

282 if self.markdown_in_raw and 'markdown' in attrs.keys():	293 if self.markdown_in_raw and 'markdown' in attrs.keys():

283 start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?',

284 '', items[0][:left_index])

285 items[0] = items[0][left_index:]	294 items[0] = items[0][left_index:]

286 end = items[-1][-len(right_tag)-2:]	295 items[-1] = items[-1][:-len(right_tag) - 2]

287 items[-1] = items[-1][:-len(right_tag)-2]	296 if items[len(items) - 1]: # not a newline/empty string

	297 right_index = len(items) + 3

	298 else:

	299 right_index = len(items) + 2

288 new_blocks.append(	300 new_blocks.append(

289 self.markdown.htmlStash.store(start))	301 self.markdown.htmlStash.store_tag(

290 new_blocks.extend(items)	302 left_tag, attrs, 0, right_index))

291 if end.strip():	303 placeholderslen = len(self.markdown.htmlStash.tag_data)

292 new_blocks.append(	304 new_blocks.extend(self._nested_markdown_in_html(items))

293 self.markdown.htmlStash.store(end))	305 nests = len(self.markdown.htmlStash.tag_data) - placeholderslen

	306 self.markdown.htmlStash.tag_data[-1 - nests][

	307 'right_index'] += nests - 2

294 else:	308 else:

295 new_blocks.append(	309 new_blocks.append(

296 self.markdown.htmlStash.store('\n\n'.join(items)))	310 self.markdown.htmlStash.store('\n\n'.join(items)))

297 #new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items)) )

298 new_blocks.append('\n')	311 new_blocks.append('\n')

299	312

300 new_text = "\n\n".join(new_blocks)	313 new_text = "\n\n".join(new_blocks)

301 return new_text.split("\n")	314 return new_text.split("\n")

302	315

303	316

304 class ReferencePreprocessor(Preprocessor):	317 class ReferencePreprocessor(Preprocessor):

305 """ Remove reference definitions from text and store for later use. """	318 """ Remove reference definitions from text and store for later use. """

306	319

307 TITLE = r'[ ](\"(.)\"\|\'(.)\'\|$(.)$)[ ]*'	320 TITLE = r'[ ](\"(.)\"\|\'(.)\'\|$(.)$)[ ]*'

308 RE = re.compile(r'^[ ]{0,3}\[([^\]])\]:\s([^ ])[ ](%s)?$' % TITLE, re.DO TALL)	321 RE = re.compile(

	322 r'^[ ]{0,3}\[([^\]])\]:\s([^ ])[ ](%s)?$' % TITLE, re.DOTALL

	323 )

309 TITLE_RE = re.compile(r'^%s$' % TITLE)	324 TITLE_RE = re.compile(r'^%s$' % TITLE)

310	325

311 def run (self, lines):	326 def run(self, lines):

312 new_text = [];	327 new_text = []

313 while lines:	328 while lines:

314 line = lines.pop(0)	329 line = lines.pop(0)

315 m = self.RE.match(line)	330 m = self.RE.match(line)

316 if m:	331 if m:

317 id = m.group(1).strip().lower()	332 id = m.group(1).strip().lower()

318 link = m.group(2).lstrip('<').rstrip('>')	333 link = m.group(2).lstrip('<').rstrip('>')

319 t = m.group(5) or m.group(6) or m.group(7)	334 t = m.group(5) or m.group(6) or m.group(7)

320 if not t:	335 if not t:

321 # Check next line for title	336 # Check next line for title

322 tm = self.TITLE_RE.match(lines[0])	337 tm = self.TITLE_RE.match(lines[0])

323 if tm:	338 if tm:

324 lines.pop(0)	339 lines.pop(0)

325 t = tm.group(2) or tm.group(3) or tm.group(4)	340 t = tm.group(2) or tm.group(3) or tm.group(4)

326 self.markdown.references[id] = (link, t)	341 self.markdown.references[id] = (link, t)

327 else:	342 else:

328 new_text.append(line)	343 new_text.append(line)

329	344

330 return new_text #+ "\n"	345 return new_text # + "\n"

OLD	NEW

« no previous file with comments | « third_party/Python-Markdown/markdown/postprocessors.py ('k') | third_party/Python-Markdown/markdown/serializers.py » ('j') | no next file with comments »