trunk/src/third_party/markdown/treeprocessors.py - Issue 132753002: Revert 243980 "Docserver: Support markdown for HTML content."

Side by Side Diff: trunk/src/third_party/markdown/treeprocessors.py

Issue 132753002: Revert 243980 "Docserver: Support markdown for HTML content." (Closed) Base URL: svn://svn.chromium.org/chrome/

Patch Set: Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 from __future__ import unicode_literals

2 from __future__ import absolute_import

3 from . import util

4 from . import odict

5 from . import inlinepatterns

6

7

8 def build_treeprocessors(md_instance, **kwargs):

9 """ Build the default treeprocessors for Markdown. """

10 treeprocessors = odict.OrderedDict()

11 treeprocessors["inline"] = InlineProcessor(md_instance)

12 treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance)

13 return treeprocessors

14

15

16 def isString(s):

17 """ Check if it's string """

18 if not isinstance(s, util.AtomicString):

19 return isinstance(s, util.string_type)

20 return False

21

22

23 class Treeprocessor(util.Processor):

24 """

25 Treeprocessors are run on the ElementTree object before serialization.

26

27 Each Treeprocessor implements a "run" method that takes a pointer to an

28 ElementTree, modifies it as necessary and returns an ElementTree

29 object.

30

31 Treeprocessors must extend markdown.Treeprocessor.

32

33 """

34 def run(self, root):

35 """

36 Subclasses of Treeprocessor should implement a `run` method, which

37 takes a root ElementTree. This method can return another ElementTree

38 object, and the existing root ElementTree will be replaced, or it can

39 modify the current tree and return None.

40 """

41 pass

42

43

44 class InlineProcessor(Treeprocessor):

45 """

46 A Treeprocessor that traverses a tree, applying inline patterns.

47 """

48

49 def __init__(self, md):

50 self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX

51 self.__placeholder_suffix = util.ETX

52 self.__placeholder_length = 4 + len(self.__placeholder_prefix) \

53 + len(self.__placeholder_suffix)

54 self.__placeholder_re = util.INLINE_PLACEHOLDER_RE

55 self.markdown = md

56

57 def __makePlaceholder(self, type):

58 """ Generate a placeholder """

59 id = "%04d" % len(self.stashed_nodes)

60 hash = util.INLINE_PLACEHOLDER % id

61 return hash, id

62

63 def __findPlaceholder(self, data, index):

64 """

65 Extract id from data string, start from index

66

67 Keyword arguments:

68

69 * data: string

70 * index: index, from which we start search

71

72 Returns: placeholder id and string index, after the found placeholder.

73

74 """

75 m = self.__placeholder_re.search(data, index)

76 if m:

77 return m.group(1), m.end()

78 else:

79 return None, index + 1

80

81 def __stashNode(self, node, type):

82 """ Add node to stash """

83 placeholder, id = self.__makePlaceholder(type)

84 self.stashed_nodes[id] = node

85 return placeholder

86

87 def __handleInline(self, data, patternIndex=0):

88 """

89 Process string with inline patterns and replace it

90 with placeholders

91

92 Keyword arguments:

93

94 * data: A line of Markdown text

95 * patternIndex: The index of the inlinePattern to start with

96

97 Returns: String with placeholders.

98

99 """

100 if not isinstance(data, util.AtomicString):

101 startIndex = 0

102 while patternIndex < len(self.markdown.inlinePatterns):

103 data, matched, startIndex = self.__applyPattern(

104 self.markdown.inlinePatterns.value_for_index(patternIndex),

105 data, patternIndex, startIndex)

106 if not matched:

107 patternIndex += 1

108 return data

109

110 def __processElementText(self, node, subnode, isText=True):

111 """

112 Process placeholders in Element.text or Element.tail

113 of Elements popped from self.stashed_nodes.

114

115 Keywords arguments:

116

117 * node: parent node

118 * subnode: processing node

119 * isText: bool variable, True - it's text, False - it's tail

120

121 Returns: None

122

123 """

124 if isText:

125 text = subnode.text

126 subnode.text = None

127 else:

128 text = subnode.tail

129 subnode.tail = None

130

131 childResult = self.__processPlaceholders(text, subnode)

132

133 if not isText and node is not subnode:

134 pos = node.getchildren().index(subnode)

135 node.remove(subnode)

136 else:

137 pos = 0

138

139 childResult.reverse()

140 for newChild in childResult:

141 node.insert(pos, newChild)

142

143 def __processPlaceholders(self, data, parent):

144 """

145 Process string with placeholders and generate ElementTree tree.

146

147 Keyword arguments:

148

149 * data: string with placeholders instead of ElementTree elements.

150 * parent: Element, which contains processing inline data

151

152 Returns: list with ElementTree elements with applied inline patterns.

153

154 """

155 def linkText(text):

156 if text:

157 if result:

158 if result[-1].tail:

159 result[-1].tail += text

160 else:

161 result[-1].tail = text

162 else:

163 if parent.text:

164 parent.text += text

165 else:

166 parent.text = text

167 result = []

168 strartIndex = 0

169 while data:

170 index = data.find(self.__placeholder_prefix, strartIndex)

171 if index != -1:

172 id, phEndIndex = self.__findPlaceholder(data, index)

173

174 if id in self.stashed_nodes:

175 node = self.stashed_nodes.get(id)

176

177 if index > 0:

178 text = data[strartIndex:index]

179 linkText(text)

180

181 if not isString(node): # it's Element

182 for child in [node] + node.getchildren():

183 if child.tail:

184 if child.tail.strip():

185 self.__processElementText(node, child,False)

186 if child.text:

187 if child.text.strip():

188 self.__processElementText(child, child)

189 else: # it's just a string

190 linkText(node)

191 strartIndex = phEndIndex

192 continue

193

194 strartIndex = phEndIndex

195 result.append(node)

196

197 else: # wrong placeholder

198 end = index + len(self.__placeholder_prefix)

199 linkText(data[strartIndex:end])

200 strartIndex = end

201 else:

202 text = data[strartIndex:]

203 if isinstance(data, util.AtomicString):

204 # We don't want to loose the AtomicString

205 text = util.AtomicString(text)

206 linkText(text)

207 data = ""

208

209 return result

210

211 def __applyPattern(self, pattern, data, patternIndex, startIndex=0):

212 """

213 Check if the line fits the pattern, create the necessary

214 elements, add it to stashed_nodes.

215

216 Keyword arguments:

217

218 * data: the text to be processed

219 * pattern: the pattern to be checked

220 * patternIndex: index of current pattern

221 * startIndex: string index, from which we start searching

222

223 Returns: String with placeholders instead of ElementTree elements.

224

225 """

226 match = pattern.getCompiledRegExp().match(data[startIndex:])

227 leftData = data[:startIndex]

228

229 if not match:

230 return data, False, 0

231

232 node = pattern.handleMatch(match)

233

234 if node is None:

235 return data, True, len(leftData)+match.span(len(match.groups()))[0]

236

237 if not isString(node):

238 if not isinstance(node.text, util.AtomicString):

239 # We need to process current node too

240 for child in [node] + node.getchildren():

241 if not isString(node):

242 if child.text:

243 child.text = self.__handleInline(child.text,

244 patternIndex + 1)

245 if child.tail:

246 child.tail = self.__handleInline(child.tail,

247 patternIndex)

248

249 placeholder = self.__stashNode(node, pattern.type())

250

251 return "%s%s%s%s" % (leftData,

252 match.group(1),

253 placeholder, match.groups()[-1]), True, 0

254

255 def run(self, tree):

256 """Apply inline patterns to a parsed Markdown tree.

257

258 Iterate over ElementTree, find elements with inline tag, apply inline

259 patterns and append newly created Elements to tree. If you don't

260 want to process your data with inline paterns, instead of normal string,

261 use subclass AtomicString:

262

263 node.text = markdown.AtomicString("This will not be processed.")

264

265 Arguments:

266

267 * tree: ElementTree object, representing Markdown tree.

268

269 Returns: ElementTree object with applied inline patterns.

270

271 """

272 self.stashed_nodes = {}

273

274 stack = [tree]

275

276 while stack:

277 currElement = stack.pop()

278 insertQueue = []

279 for child in currElement.getchildren():

280 if child.text and not isinstance(child.text, util.AtomicString):

281 text = child.text

282 child.text = None

283 lst = self.__processPlaceholders(self.__handleInline(

284 text), child)

285 stack += lst

286 insertQueue.append((child, lst))

287 if child.tail:

288 tail = self.__handleInline(child.tail)

289 dumby = util.etree.Element('d')

290 tailResult = self.__processPlaceholders(tail, dumby)

291 if dumby.text:

292 child.tail = dumby.text

293 else:

294 child.tail = None

295 pos = currElement.getchildren().index(child) + 1

296 tailResult.reverse()

297 for newChild in tailResult:

298 currElement.insert(pos, newChild)

299 if child.getchildren():

300 stack.append(child)

301

302 for element, lst in insertQueue:

303 if self.markdown.enable_attributes:

304 if element.text and isString(element.text):

305 element.text = \

306 inlinepatterns.handleAttributes(element.text,

307 element)

308 i = 0

309 for newChild in lst:

310 if self.markdown.enable_attributes:

311 # Processing attributes

312 if newChild.tail and isString(newChild.tail):

313 newChild.tail = \

314 inlinepatterns.handleAttributes(newChild.tail,

315 element)

316 if newChild.text and isString(newChild.text):

317 newChild.text = \

318 inlinepatterns.handleAttributes(newChild.text,

319 newChild)

320 element.insert(i, newChild)

321 i += 1

322 return tree

323

324

325 class PrettifyTreeprocessor(Treeprocessor):

326 """ Add linebreaks to the html document. """

327

328 def _prettifyETree(self, elem):

329 """ Recursively add linebreaks to ElementTree children. """

330

331 i = "\n"

332 if util.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:

333 if (not elem.text or not elem.text.strip()) \

334 and len(elem) and util.isBlockLevel(elem[0].tag):

335 elem.text = i

336 for e in elem:

337 if util.isBlockLevel(e.tag):

338 self._prettifyETree(e)

339 if not elem.tail or not elem.tail.strip():

340 elem.tail = i

341 if not elem.tail or not elem.tail.strip():

342 elem.tail = i

343

344 def run(self, root):

345 """ Add linebreaks to ElementTree root object. """

346

347 self._prettifyETree(root)

348 # Do <br />'s seperately as they are often in the middle of

349 # inline content and missed by _prettifyETree.

350 brs = root.getiterator('br')

351 for br in brs:

352 if not br.tail or not br.tail.strip():

353 br.tail = '\n'

354 else:

355 br.tail = '\n%s' % br.tail

356 # Clean up extra empty lines at end of code blocks.

357 pres = root.getiterator('pre')

358 for pre in pres:

359 if len(pre) and pre[0].tag == 'code':

360 pre[0].text = pre[0].text.rstrip() + '\n'

OLD	NEW

« no previous file with comments | « trunk/src/third_party/markdown/serializers.py ('k') | trunk/src/third_party/markdown/util.py » ('j') | no next file with comments »