third_party/Python-Markdown/markdown/preprocessors.py - Issue 1389543003: Revert of Check in a simple pure-python based Markdown previewer.

Side by Side Diff: third_party/Python-Markdown/markdown/preprocessors.py

Issue 1389543003: Revert of Check in a simple pure-python based Markdown previewer. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@add

Patch Set: Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 """

2 PRE-PROCESSORS

3 =============================================================================

4

5 Preprocessors work on source text before we start doing anything too

6 complicated.

7 """

8

9 from __future__ import absolute_import

10 from __future__ import unicode_literals

11 from . import util

12 from . import odict

13 import re

14

15

16 def build_preprocessors(md_instance, **kwargs):

17 """ Build the default set of preprocessors used by Markdown. """

18 preprocessors = odict.OrderedDict()

19 preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance)

20 if md_instance.safeMode != 'escape':

21 preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance)

22 preprocessors["reference"] = ReferencePreprocessor(md_instance)

23 return preprocessors

24

25

26 class Preprocessor(util.Processor):

27 """

28 Preprocessors are run after the text is broken into lines.

29

30 Each preprocessor implements a "run" method that takes a pointer to a

31 list of lines of the document, modifies it as necessary and returns

32 either the same pointer or a pointer to a new list.

33

34 Preprocessors must extend markdown.Preprocessor.

35

36 """

37 def run(self, lines):

38 """

39 Each subclass of Preprocessor should override the `run` method, which

40 takes the document as a list of strings split by newlines and returns

41 the (possibly modified) list of lines.

42

43 """

44 pass # pragma: no cover

45

46

47 class NormalizeWhitespace(Preprocessor):

48 """ Normalize whitespace for consistant parsing. """

49

50 def run(self, lines):

51 source = '\n'.join(lines)

52 source = source.replace(util.STX, "").replace(util.ETX, "")

53 source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"

54 source = source.expandtabs(self.markdown.tab_length)

55 source = re.sub(r'(?<=\n) +\n', '\n', source)

56 return source.split('\n')

57

58

59 class HtmlBlockPreprocessor(Preprocessor):

60 """Remove html blocks from the text and store them for later retrieval."""

61

62 right_tag_patterns = ["</%s>", "%s>"]

63 attrs_pattern = r"""

64 \s+(?P<attr>[^>"'/= ]+)=(?P<q>['"])(?P<value>.*?)(?P=q) # attr="value"

65 \| # OR

66 \s+(?P<attr1>[^>"'/= ]+)=(?P<value1>[^> ]+) # attr=value

67 \| # OR

68 \s+(?P<attr2>[^>"'/= ]+) # attr

69 """

70 left_tag_pattern = r'^\<(?P<tag>[^> ]+)(?P<attrs>(%s))\s\/?\>?' % \

71 attrs_pattern

72 attrs_re = re.compile(attrs_pattern, re.VERBOSE)

73 left_tag_re = re.compile(left_tag_pattern, re.VERBOSE)

74 markdown_in_raw = False

75

76 def _get_left_tag(self, block):

77 m = self.left_tag_re.match(block)

78 if m:

79 tag = m.group('tag')

80 raw_attrs = m.group('attrs')

81 attrs = {}

82 if raw_attrs:

83 for ma in self.attrs_re.finditer(raw_attrs):

84 if ma.group('attr'):

85 if ma.group('value'):

86 attrs[ma.group('attr').strip()] = ma.group('value')

87 else:

88 attrs[ma.group('attr').strip()] = ""

89 elif ma.group('attr1'):

90 if ma.group('value1'):

91 attrs[ma.group('attr1').strip()] = ma.group(

92 'value1'

93 )

94 else:

95 attrs[ma.group('attr1').strip()] = ""

96 elif ma.group('attr2'):

97 attrs[ma.group('attr2').strip()] = ""

98 return tag, len(m.group(0)), attrs

99 else:

100 tag = block[1:].split(">", 1)[0].lower()

101 return tag, len(tag)+2, {}

102

103 def _recursive_tagfind(self, ltag, rtag, start_index, block):

104 while 1:

105 i = block.find(rtag, start_index)

106 if i == -1:

107 return -1

108 j = block.find(ltag, start_index)

109 # if no ltag, or rtag found before another ltag, return index

110 if (j > i or j == -1):

111 return i + len(rtag)

112 # another ltag found before rtag, use end of ltag as starting

113 # point and search again

114 j = block.find('>', j)

115 start_index = self._recursive_tagfind(ltag, rtag, j + 1, block)

116 if start_index == -1:

117 # HTML potentially malformed- ltag has no corresponding

118 # rtag

119 return -1

120

121 def _get_right_tag(self, left_tag, left_index, block):

122 for p in self.right_tag_patterns:

123 tag = p % left_tag

124 i = self._recursive_tagfind(

125 "<%s" % left_tag, tag, left_index, block

126 )

127 if i > 2:

128 return tag.lstrip("<").rstrip(">"), i

129 return block.rstrip()[-left_index:-1].lower(), len(block)

130

131 def _equal_tags(self, left_tag, right_tag):

132 if left_tag[0] in ['?', '@', '%']: # handle PHP, etc.

133 return True

134 if ("/" + left_tag) == right_tag:

135 return True

136 if (right_tag == "--" and left_tag == "--"):

137 return True

138 elif left_tag == right_tag[1:] and right_tag[0] == "/":

139 return True

140 else:

141 return False

142

143 def _is_oneliner(self, tag):

144 return (tag in ['hr', 'hr/'])

145

146 def _stringindex_to_listindex(self, stringindex, items):

147 """

148 Same effect as concatenating the strings in items,

149 finding the character to which stringindex refers in that string,

150 and returning the index of the item in which that character resides.

151 """

152 items.append('dummy')

153 i, count = 0, 0

154 while count <= stringindex:

155 count += len(items[i])

156 i += 1

157 return i - 1

158

159 def _nested_markdown_in_html(self, items):

160 """Find and process html child elements of the given element block."""

161 for i, item in enumerate(items):

162 if self.left_tag_re.match(item):

163 left_tag, left_index, attrs = \

164 self._get_left_tag(''.join(items[i:]))

165 right_tag, data_index = self._get_right_tag(

166 left_tag, left_index, ''.join(items[i:]))

167 right_listindex = \

168 self._stringindex_to_listindex(data_index, items[i:]) + i

169 if 'markdown' in attrs.keys():

170 items[i] = items[i][left_index:] # remove opening tag

171 placeholder = self.markdown.htmlStash.store_tag(

172 left_tag, attrs, i + 1, right_listindex + 1)

173 items.insert(i, placeholder)

174 if len(items) - right_listindex <= 1: # last nest, no tail

175 right_listindex -= 1

176 items[right_listindex] = items[right_listindex][

177 :-len(right_tag) - 2] # remove closing tag

178 else: # raw html

179 if len(items) - right_listindex <= 1: # last element

180 right_listindex -= 1

181 offset = 1 if i == right_listindex else 0

182 placeholder = self.markdown.htmlStash.store('\n\n'.join(

183 items[i:right_listindex + offset]))

184 del items[i:right_listindex + offset]

185 items.insert(i, placeholder)

186 return items

187

188 def run(self, lines):

189 text = "\n".join(lines)

190 new_blocks = []

191 text = text.rsplit("\n\n")

192 items = []

193 left_tag = ''

194 right_tag = ''

195 in_tag = False # flag

196

197 while text:

198 block = text[0]

199 if block.startswith("\n"):

200 block = block[1:]

201 text = text[1:]

202

203 if block.startswith("\n"):

204 block = block[1:]

205

206 if not in_tag:

207 if block.startswith("<") and len(block.strip()) > 1:

208

209 if block[1:4] == "!--":

210 # is a comment block

211 left_tag, left_index, attrs = "--", 2, {}

212 else:

213 left_tag, left_index, attrs = self._get_left_tag(block)

214 right_tag, data_index = self._get_right_tag(left_tag,

215 left_index,

216 block)

217 # keep checking conditions below and maybe just append

218

219 if data_index < len(block) and (util.isBlockLevel(left_tag) or left_tag == '--'):

220 text.insert(0, block[data_index:])

221 block = block[:data_index]

222

223 if not (util.isBlockLevel(left_tag) or block[1] in ["!", "?" , "@", "%"]):

224 new_blocks.append(block)

225 continue

226

227 if self._is_oneliner(left_tag):

228 new_blocks.append(block.strip())

229 continue

230

231 if block.rstrip().endswith(">") \

232 and self._equal_tags(left_tag, right_tag):

233 if self.markdown_in_raw and 'markdown' in attrs.keys():

234 block = block[left_index:-len(right_tag) - 2]

235 new_blocks.append(self.markdown.htmlStash.

236 store_tag(left_tag, attrs, 0, 2))

237 new_blocks.extend([block])

238 else:

239 new_blocks.append(

240 self.markdown.htmlStash.store(block.strip()))

241 continue

242 else:

243 # if is block level tag and is not complete

244 if (not self._equal_tags(left_tag, right_tag)) and \

245 (util.isBlockLevel(left_tag) or left_tag == "--"):

246 items.append(block.strip())

247 in_tag = True

248 else:

249 new_blocks.append(

250 self.markdown.htmlStash.store(block.strip())

251 )

252 continue

253

254 else:

255 new_blocks.append(block)

256

257 else:

258 items.append(block)

259

260 right_tag, data_index = self._get_right_tag(left_tag, 0, block)

261

262 if self._equal_tags(left_tag, right_tag):

263 # if find closing tag

264

265 if data_index < len(block):

266 # we have more text after right_tag

267 items[-1] = block[:data_index]

268 text.insert(0, block[data_index:])

269

270 in_tag = False

271 if self.markdown_in_raw and 'markdown' in attrs.keys():

272 items[0] = items[0][left_index:]

273 items[-1] = items[-1][:-len(right_tag) - 2]

274 if items[len(items) - 1]: # not a newline/empty string

275 right_index = len(items) + 3

276 else:

277 right_index = len(items) + 2

278 new_blocks.append(self.markdown.htmlStash.store_tag(

279 left_tag, attrs, 0, right_index))

280 placeholderslen = len(self.markdown.htmlStash.tag_data)

281 new_blocks.extend(

282 self._nested_markdown_in_html(items))

283 nests = len(self.markdown.htmlStash.tag_data) - \

284 placeholderslen

285 self.markdown.htmlStash.tag_data[-1 - nests][

286 'right_index'] += nests - 2

287 else:

288 new_blocks.append(

289 self.markdown.htmlStash.store('\n\n'.join(items)))

290 items = []

291

292 if items:

293 if self.markdown_in_raw and 'markdown' in attrs.keys():

294 items[0] = items[0][left_index:]

295 items[-1] = items[-1][:-len(right_tag) - 2]

296 if items[len(items) - 1]: # not a newline/empty string

297 right_index = len(items) + 3

298 else:

299 right_index = len(items) + 2

300 new_blocks.append(

301 self.markdown.htmlStash.store_tag(

302 left_tag, attrs, 0, right_index))

303 placeholderslen = len(self.markdown.htmlStash.tag_data)

304 new_blocks.extend(self._nested_markdown_in_html(items))

305 nests = len(self.markdown.htmlStash.tag_data) - placeholderslen

306 self.markdown.htmlStash.tag_data[-1 - nests][

307 'right_index'] += nests - 2

308 else:

309 new_blocks.append(

310 self.markdown.htmlStash.store('\n\n'.join(items)))

311 new_blocks.append('\n')

312

313 new_text = "\n\n".join(new_blocks)

314 return new_text.split("\n")

315

316

317 class ReferencePreprocessor(Preprocessor):

318 """ Remove reference definitions from text and store for later use. """

319

320 TITLE = r'[ ](\"(.)\"\|\'(.)\'\|$(.)$)[ ]*'

321 RE = re.compile(

322 r'^[ ]{0,3}\[([^\]])\]:\s([^ ])[ ](%s)?$' % TITLE, re.DOTALL

323 )

324 TITLE_RE = re.compile(r'^%s$' % TITLE)

325

326 def run(self, lines):

327 new_text = []

328 while lines:

329 line = lines.pop(0)

330 m = self.RE.match(line)

331 if m:

332 id = m.group(1).strip().lower()

333 link = m.group(2).lstrip('<').rstrip('>')

334 t = m.group(5) or m.group(6) or m.group(7)

335 if not t:

336 # Check next line for title

337 tm = self.TITLE_RE.match(lines[0])

338 if tm:

339 lines.pop(0)

340 t = tm.group(2) or tm.group(3) or tm.group(4)

341 self.markdown.references[id] = (link, t)

342 else:

343 new_text.append(line)

344

345 return new_text # + "\n"

OLD	NEW

« no previous file with comments | « third_party/Python-Markdown/markdown/postprocessors.py ('k') | third_party/Python-Markdown/markdown/serializers.py » ('j') | no next file with comments »