utils/markdown/inline_parser.dart - Issue 8953042: Move markdown library.

Side by Side Diff: utils/markdown/inline_parser.dart

Issue 8953042: Move markdown library. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Add markdown tests to dartdoc. Created 9 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.

4

5 /// Maintains the internal state needed to parse inline span elements in

6 /// markdown.

7 class InlineParser {

8 static List<InlineSyntax> get syntaxes() {

9 // Lazy initialize.

10 if (_syntaxes == null) {

11 _syntaxes = <InlineSyntax>[

12 new AutolinkSyntax(),

13 new LinkSyntax(),

14 // "*" surrounded by spaces is left alone.

15 new TextSyntax(@' \* '),

16 // "_" surrounded by spaces is left alone.

17 new TextSyntax(@' _ '),

18 // Leave already-encoded HTML entities alone. Ensures we don't turn

19 // "&" into "&amp;"

20 new TextSyntax(@'&[#a-zA-Z0-9]*;'),

21 // Encode "&".

22 new TextSyntax(@'&', sub: '&'),

23 // Encode "<". (Why not encode ">" too? Gruber is toying with us.)

24 new TextSyntax(@'<', sub: '<'),

25 // Parse "strong" tags.

26 new TagSyntax(@'\\', tag: 'strong'),

27 // Parse "__strong__" tags.

28 new TagSyntax(@'__', tag: 'strong'),

29 // Parse "emphasis" tags.

30 new TagSyntax(@'\*', tag: 'em'),

31 // Parse "_emphasis_" tags.

32 // TODO(rnystrom): Underscores in the middle of a word should not be

33 // parsed as emphasis like_in_this.

34 new TagSyntax(@'_', tag: 'em'),

35 // Parse inline code within double backticks: "``code``".

36 new CodeSyntax(@'``\s?((?:.\|\n)*?)\s?``'),

37 // Parse inline code within backticks: "`code`".

38 new CodeSyntax(@'`([^`]*)`')

39 ];

40 }

41

42 return _syntaxes;

43 }

44

45 static List<InlineSyntax> _syntaxes;

46

47 /// The string of markdown being parsed.

48 final String source;

49

50 /// The markdown document this parser is parsing.

51 final Document document;

52

53 /// The current read position.

54 int pos = 0;

55

56 /// Starting position of the last unconsumed text.

57 int start = 0;

58

59 final List<TagState> _stack;

60

61 InlineParser(this.source, this.document)

62 : _stack = <TagState>[];

63

64 List<Node> parse() {

65 // Make a fake top tag to hold the results.

66 _stack.add(new TagState(0, 0, null));

67

68 while (!isDone) {

69 bool matched = false;

70

71 // See if any of the current tags on the stack match. We don't allow tags

72 // of the same kind to nest, so this takes priority over other possible // matches.

73 for (int i = _stack.length - 1; i > 0; i--) {

74 if (_stack[i].tryMatch(this)) {

75 matched = true;

76 break;

77 }

78 }

79 if (matched) continue;

80

81 // See if the current text matches any defined markdown syntax.

82 for (final syntax in syntaxes) {

83 if (syntax.tryMatch(this)) {

84 matched = true;

85 break;

86 }

87 }

88 if (matched) continue;

89

90 // If we got here, it's just text.

91 advanceBy(1);

92 }

93

94 // Unwind any unmatched tags and get the results.

95 return _stack[0].close(this, null);

96 }

97

98 writeText() {

99 writeTextRange(start, pos);

100 start = pos;

101 }

102

103 writeTextRange(int start, int end) {

104 if (end > start) {

105 final text = source.substring(start, end);

106 final nodes = _stack.last().children;

107

108 // If the previous node is text too, just append.

109 if ((nodes.length > 0) && (nodes.last() is Text)) {

110 final newNode = new Text('${nodes.last().text}$text');

111 nodes[nodes.length - 1] = newNode;

112 } else {

113 nodes.add(new Text(text));

114 }

115 }

116 }

117

118 addNode(Node node) {

119 _stack.last().children.add(node);

120 }

121

122 // TODO(rnystrom): Only need this because RegExp doesn't let you start

123 // searching from a given offset.

124 String get currentSource() => source.substring(pos, source.length);

125

126 bool get isDone() => pos == source.length;

127

128 void advanceBy(int length) {

129 pos += length;

130 }

131

132 void consume(int length) {

133 pos += length;

134 start = pos;

135 }

136 }

137

138 /// Represents one kind of markdown tag that can be parsed.

139 class InlineSyntax {

140 final RegExp pattern;

141

142 InlineSyntax(String pattern)

143 : pattern = new RegExp(pattern, true);

144 // TODO(rnystrom): Should use named arg for RegExp multiLine.

145

146 bool tryMatch(InlineParser parser) {

147 final startMatch = pattern.firstMatch(parser.currentSource);

148 if ((startMatch != null) && (startMatch.start() == 0)) {

149 // Write any existing plain text up to this point.

150 parser.writeText();

151

152 if (onMatch(parser, startMatch)) {

153 parser.consume(startMatch[0].length);

154 }

155 return true;

156 }

157 return false;

158 }

159

160 abstract bool onMatch(InlineParser parser, Match match);

161 }

162

163 /// Matches stuff that should just be passed through as straight text.

164 class TextSyntax extends InlineSyntax {

165 String substitute;

166 TextSyntax(String pattern, [String sub])

167 : super(pattern),

168 substitute = sub;

169

170 bool onMatch(InlineParser parser, Match match) {

171 if (substitute == null) {

172 // Just use the original matched text.

173 parser.advanceBy(match[0].length);

174 return false;

175 }

176

177 // Insert the substitution.

178 parser.addNode(new Text(substitute));

179 return true;

180 }

181 }

182

183 /// Matches autolinks like `<http://foo.com>`.

184 class AutolinkSyntax extends InlineSyntax {

185 AutolinkSyntax()

186 : super(@'<((http\|https\|ftp)://[^>]*)>');

187 // TODO(rnystrom): Make case insensitive.

188

189 bool onMatch(InlineParser parser, Match match) {

190 final url = match[1];

191

192 final anchor = new Element.text('a', escapeHtml(url));

193 anchor.attributes['href'] = url;

194 parser.addNode(anchor);

195

196 return true;

197 }

198 }

199

200 /// Matches syntax that has a pair of tags and becomes an element, like `*` for

201 /// `<em>`. Allows nested tags.

202 class TagSyntax extends InlineSyntax {

203 final RegExp endPattern;

204 final String tag;

205

206 TagSyntax(String pattern, [String tag, String end = null])

207 : super(pattern),

208 endPattern = new RegExp((end != null) ? end : pattern, true),

209 tag = tag;

210 // TODO(rnystrom): Doing this.field doesn't seem to work with named args.

211 // TODO(rnystrom): Should use named arg for RegExp multiLine.

212

213 bool onMatch(InlineParser parser, Match match) {

214 parser._stack.add(new TagState(parser.pos,

215 parser.pos + match[0].length, this));

216 return true;

217 }

218

219 bool onMatchEnd(InlineParser parser, Match match, TagState state) {

220 parser.addNode(new Element(tag, state.children));

221 return true;

222 }

223 }

224

225 /// Matches inline links like `[blah] [id]` and `[blah] (url)`.

226 class LinkSyntax extends TagSyntax {

227 /// The regex for the end of a link needs to handle both reference style and

228 /// inline styles as well as optional titles for inline links. To make that

229 /// a bit more palatable, this breaks it into pieces.

230 static get linkPattern() {

231 final refLink = @'\s?\[([^\]]*)\]'; // "[id]" reflink id.

232 final title = @'(?:[ ]*"([^"]+)"\|)'; // Optional title in quotes.

233 final inlineLink = '\\s?\$([^ )]+)$title\$'; // "(url "title")" link.

234 return '\](?:($refLink\|$inlineLink)\|)';

235

236 // The groups matched by this are:

237 // 1: Will be non-empty if it's either a ref or inline link. Will be empty

238 // if it's just a bare pair of square brackets with nothing after them.

239 // 2: Contains the id inside [] for a reference-style link.

240 // 3: Contains the URL for an inline link.

241 // 4: Contains the title, if present, for an inline link.

242 }

243

244 LinkSyntax()

245 : super(@'\[', end: linkPattern);

246

247 bool onMatchEnd(InlineParser parser, Match match, TagState state) {

248 var url;

249 var title;

250

251 // If we didn't match refLink or inlineLink, then it means there was

252 // nothing after the first square bracket, so it isn't a normal markdown

253 // link at all. Instead, we allow users of the library to specify a special

254 // resolver function ([setImplicitLinkResolver]) that may choose to handle

255 // this. Otherwise, it's just treated as plain text.

256 if ((match[1] == null) \|\| (match[1] == '')) {

257 if (_implicitLinkResolver == null) return false;

258

259 // Only allow implicit links if the content is just text.

260 // TODO(rnystrom): Do we want to relax this?

261 if (state.children.length != 1) return false;

262 if (state.children[0] is! Text) return false;

263

264 Text link = state.children[0];

265

266 // See if we have a resolver that will generate a link for us.

267 final node = _implicitLinkResolver(link.text);

268 if (node == null) return false;

269

270 parser.addNode(node);

271 return true;

272 }

273

274 if ((match[3] != null) && (match[3] != '')) {

275 // Inline link like [foo](url).

276 url = match[3];

277 title = match[4];

278

279 // For whatever reason, markdown allows angle-bracketed URLs here.

280 if (url.startsWith('<') && url.endsWith('>')) {

281 url = url.substring(1, url.length - 1);

282 }

283 } else {

284 // Reference link like [foo] [bar].

285 var id = match[2];

286 if (id == '') {

287 // The id is empty ("[]") so infer it from the contents.

288 id = parser.source.substring(state.startPos + 1, parser.pos);

289 }

290

291 // Look up the link.

292 final link = parser.document.refLinks[id];

293 // If it's an unknown link just emit plaintext.

294 if (link == null) return false;

295

296 url = link.url;

297 title = link.title;

298 }

299

300 final anchor = new Element('a', state.children);

301 anchor.attributes['href'] = escapeHtml(url);

302 if ((title != null) && (title != '')) {

303 anchor.attributes['title'] = escapeHtml(title);

304 }

305

306 parser.addNode(anchor);

307 return true;

308 }

309 }

310

311 /// Matches backtick-enclosed inline code blocks.

312 class CodeSyntax extends InlineSyntax {

313 CodeSyntax(String pattern)

314 : super(pattern);

315

316 bool onMatch(InlineParser parser, Match match) {

317 parser.addNode(new Element.text('code', escapeHtml(match[1])));

318 return true;

319 }

320 }

321

322 /// Keeps track of a currently open tag while it is being parsed. The parser

323 /// maintains a stack of these so it can handle nested tags.

324 class TagState {

325 /// The point in the original source where this tag started.

326 int startPos;

327

328 /// The point in the original source where open tag ended.

329 int endPos;

330

331 /// The syntax that created this node.

332 final TagSyntax syntax;

333

334 /// The children of this node. Will be `null` for text nodes.

335 final List<Node> children;

336

337 TagState(this.startPos, this.endPos, this.syntax)

338 : children = <Node>[];

339

340 /// Attempts to close this tag by matching the current text against its end

341 /// pattern.

342 bool tryMatch(InlineParser parser) {

343 Match endMatch = syntax.endPattern.firstMatch(parser.currentSource);

344 if ((endMatch != null) && (endMatch.start() == 0)) {

345 // Close the tag.

346 close(parser, endMatch);

347 return true;

348 }

349

350 return false;

351 }

352

353 /// Pops this tag off the stack, completes it, and adds it to the output.

354 /// Will discard any unmatched tags that happen to be above it on the stack.

355 /// If this is the last node in the stack, returns its children.

356 List<Node> close(InlineParser parser, Match endMatch) {

357 // If there are unclosed tags on top of this one when it's closed, that

358 // means they are mismatched. Mismatched tags are treated as plain text in

359 // markdown. So for each tag above this one, we write its start tag as text

360 // and then adds its children to this one's children.

361 int index = parser._stack.indexOf(this);

362

363 // Remove the unmatched children.

364 final unmatchedTags = parser._stack.getRange(index + 1,

365 parser._stack.length - index - 1);

366 parser._stack.removeRange(index + 1, parser._stack.length - index - 1);

367

368 // Flatten them out onto this tag.

369 for (final unmatched in unmatchedTags) {

370 // Write the start tag as text.

371 parser.writeTextRange(unmatched.startPos, unmatched.endPos);

372

373 // Bequeath its children unto this tag.

374 children.addAll(unmatched.children);

375 }

376

377 // Pop this off the stack.

378 parser.writeText();

379 parser._stack.removeLast();

380

381 // If the stack is empty now, this is the special "results" node.

382 if (parser._stack.length == 0) return children;

383

384 // We are still parsing, so add this to its parent's children.

385 if (syntax.onMatchEnd(parser, endMatch, this)) {

386 parser.consume(endMatch[0].length);

387 } else {

388 // Didn't close correctly so revert to text.

389 parser.start = startPos;

390 parser.advanceBy(endMatch[0].length);

391 }

392

393 return null;

394 }

395 }

OLD	NEW

« no previous file with comments | « utils/markdown/html_renderer.dart ('k') | utils/markdown/lib.dart » ('j') | no next file with comments »