utils/markdown/inline_parser.dart - Issue 8725007: Lots of stuff hooking up markdown to dartdoc.

Side by Side Diff: utils/markdown/inline_parser.dart

Issue 8725007: Lots of stuff hooking up markdown to dartdoc. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Respond to awesome reviews. Created 9 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 /// Maintains the internal state needed to parse inline span elements in	5 /// Maintains the internal state needed to parse inline span elements in

6 /// markdown.	6 /// markdown.

7 class InlineParser {	7 class InlineParser {

8 static List<InlineSyntax> get syntaxes() {	8 static List<InlineSyntax> get syntaxes() {

9 // Lazy initialize.	9 // Lazy initialize.

10 if (_syntaxes == null) {	10 if (_syntaxes == null) {

11 _syntaxes = <InlineSyntax>[	11 _syntaxes = <InlineSyntax>[

12 new AutolinkSyntax(),	12 new AutolinkSyntax(),

13 new LinkSyntax(),	13 new LinkSyntax(),

14 // "*" surrounded by spaces is left alone.	14 // "*" surrounded by spaces is left alone.

15 new TextSyntax(@' \* '),	15 new TextSyntax(@' \* '),

16 // "_" surrounded by spaces is left alone.	16 // "_" surrounded by spaces is left alone.

17 new TextSyntax(@' _ '),	17 new TextSyntax(@' _ '),

18 // Leave already-encoded HTML entities alone. Ensures we don't turn	18 // Leave already-encoded HTML entities alone. Ensures we don't turn

19 // "&" into "&amp;"	19 // "&" into "&amp;"

20 new TextSyntax(@'&[#a-zA-Z0-9]*;'),	20 new TextSyntax(@'&[#a-zA-Z0-9]*;'),

21 // Encode "&".	21 // Encode "&".

22 new TextSyntax(@'&', sub: '&'),	22 new TextSyntax(@'&', sub: '&'),

23 // Encode "<". (Why not encode ">" too? Gruber is toying with us.)	23 // Encode "<". (Why not encode ">" too? Gruber is toying with us.)

24 new TextSyntax(@'<', sub: '<'),	24 new TextSyntax(@'<', sub: '<'),

25 // Parse "strong" tags.	25 // Parse "strong" tags.

26 new TagSyntax(@'\\', tag: 'strong'),	26 new TagSyntax(@'\\', tag: 'strong'),

27 // Parse "__strong__" tags.	27 // Parse "__strong__" tags.

28 new TagSyntax(@'__', tag: 'strong'),	28 new TagSyntax(@'__', tag: 'strong'),

29 // Parse "emphasis" tags.	29 // Parse "emphasis" tags.

30 new TagSyntax(@'\*', tag: 'em'),	30 new TagSyntax(@'\*', tag: 'em'),

31 // Parse "_emphasis_" tags.	31 // Parse "_emphasis_" tags.

32 // TODO(rnystrom): Underscores in the middle of a word should not be	32 // TODO(rnystrom): Underscores in the middle of a word should not be

33 // parsed as emphasis like_in_this.	33 // parsed as emphasis like_in_this.

34 new TagSyntax(@'_', tag: 'em'),	34 new TagSyntax(@'_', tag: 'em'),

35 // Parse inline code within double backticks: "``code``".	35 // Parse inline code within double backticks: "``code``".

36 new CodeSyntax(@'``[ ]?(.*?)[ ]?``'),	36 new CodeSyntax(@'``\s?((?:.\|\n)*?)\s?``'),

37 // Parse inline code within backticks: "`code`".	37 // Parse inline code within backticks: "`code`".

38 new CodeSyntax(@'`([^`]*)`')	38 new CodeSyntax(@'`([^`]*)`')

39 ];	39 ];

40 }	40 }

41	41

42 return _syntaxes;	42 return _syntaxes;

43 }	43 }

44	44

45 static List<InlineSyntax> _syntaxes;	45 static List<InlineSyntax> _syntaxes;

46	46

47 /// The string of markdown being parsed.	47 /// The string of markdown being parsed.

48 final String source;	48 final String source;

49	49

50 /// The markdown document this parser is parsing.	50 /// The markdown document this parser is parsing.

51 final Document document;	51 final Document document;

52	52

53 /// The current read position.	53 /// The current read position.

54 int pos = 0;	54 int pos = 0;

55	55

56 /// Starting position of the last unconsumed text.	56 /// Starting position of the last unconsumed text.

57 int start = 0;	57 int start = 0;

58	58

59 final List<TagState> _stack;	59 final List<TagState> _stack;

60	60

61 InlineParser(this.source, this.document)	61 InlineParser(this.source, this.document)

62 : _stack = <TagState>[];	62 : _stack = <TagState>[];

63	63

64 List<Node> parse() {	64 List<Node> parse() {

65 // Make a fake top tag to hold the results.	65 // Make a fake top tag to hold the results.

66 _stack.add(new TagState(0, null));	66 _stack.add(new TagState(0, 0, null));

67	67

68 while (!isDone) {	68 while (!isDone) {

69 bool matched = false;	69 bool matched = false;

70	70

71 // See if any of the current tags on the stack match. We don't allow tags	71 // See if any of the current tags on the stack match. We don't allow tags

72 // of the same kind to nest, so this takes priority over other possible // matches.	72 // of the same kind to nest, so this takes priority over other possible // matches.

73 for (int i = _stack.length - 1; i > 0; i--) {	73 for (int i = _stack.length - 1; i > 0; i--) {

74 if (_stack[i].tryMatch(this)) {	74 if (_stack[i].tryMatch(this)) {

75 matched = true;	75 matched = true;

76 break;	76 break;

(...skipping 12 matching lines...) Expand all Loading...
89	89

90 // If we got here, it's just text.	90 // If we got here, it's just text.

91 advanceBy(1);	91 advanceBy(1);

92 }	92 }

93	93

94 // Unwind any unmatched tags and get the results.	94 // Unwind any unmatched tags and get the results.

95 return _stack[0].close(this, null);	95 return _stack[0].close(this, null);

96 }	96 }

97	97

98 writeText() {	98 writeText() {

99 if (pos > start) {	99 writeTextRange(start, pos);

100 final text = source.substring(start, pos);	100 start = pos;

	101 }

	102

	103 writeTextRange(int start, int end) {

	104 if (end > start) {

	105 final text = source.substring(start, end);

101 final nodes = _stack.last().children;	106 final nodes = _stack.last().children;

102	107

103 // If the previous node is text too, just append.	108 // If the previous node is text too, just append.

104 if ((nodes.length > 0) && (nodes.last() is Text)) {	109 if ((nodes.length > 0) && (nodes.last() is Text)) {

105 final newNode = new Text('${nodes.last().text}$text');	110 final newNode = new Text('${nodes.last().text}$text');

106 nodes[nodes.length - 1] = newNode;	111 nodes[nodes.length - 1] = newNode;

107 } else {	112 } else {

108 nodes.add(new Text(text));	113 nodes.add(new Text(text));

109 }	114 }

110

111 start = pos;

112 }	115 }

113 }	116 }

114	117

115 /// Removes the top tag from the stack, reverts it to plain text and adds it

116 /// to the output.

117 discardUnmatchedTag() {

118 final unfinished = _stack.removeLast();

119 start = unfinished.startPos;

120 }

121

122 addNode(Node node) {	118 addNode(Node node) {

123 _stack.last().children.add(node);	119 _stack.last().children.add(node);

124 }	120 }

125	121

126 // TODO(rnystrom): Only need this because RegExp doesn't let you start	122 // TODO(rnystrom): Only need this because RegExp doesn't let you start

127 // searching from a given offset.	123 // searching from a given offset.

128 String get currentSource() => source.substring(pos, source.length);	124 String get currentSource() => source.substring(pos, source.length);

129	125

130 bool get isDone() => pos == source.length;	126 bool get isDone() => pos == source.length;

131	127

132 void advanceBy(int length) => pos += length;	128 void advanceBy(int length) {

	129 pos += length;

	130 }

	131

133 void consume(int length) {	132 void consume(int length) {

134 pos += length;	133 pos += length;

135 start = pos;	134 start = pos;

136 }	135 }

137 }	136 }

138	137

139 /// Represents one kind of markdown tag that can be parsed.	138 /// Represents one kind of markdown tag that can be parsed.

140 class InlineSyntax {	139 class InlineSyntax {

141 final RegExp pattern;	140 final RegExp pattern;

142	141

143 InlineSyntax(String pattern)	142 InlineSyntax(String pattern)

144 : pattern = new RegExp(pattern, true);	143 : pattern = new RegExp(pattern, true);

145 // TODO(rnystrom): Should use named arg for RegExp multiLine.	144 // TODO(rnystrom): Should use named arg for RegExp multiLine.

146	145

147 bool tryMatch(InlineParser parser) {	146 bool tryMatch(InlineParser parser) {

148 final startMatch = pattern.firstMatch(parser.currentSource);	147 final startMatch = pattern.firstMatch(parser.currentSource);

149 if ((startMatch != null) && (startMatch.start() == 0)) {	148 if ((startMatch != null) && (startMatch.start() == 0)) {

150 // Write any existing plain text up to this point.	149 // Write any existing plain text up to this point.

151 parser.writeText();	150 parser.writeText();

152	151

153 if (onMatch(parser, startMatch)) {	152 if (onMatch(parser, startMatch)) {

154 parser.consume(startMatch.group(0).length);	153 parser.consume(startMatch.group(0).length);

155 }	154 }

156 return true;	155 return true;

157 }	156 }

158 return false;	157 return false;

159 }	158 }

160	159

161 abstract bool match(InlineParser parser, Match match);	160 abstract bool onMatch(InlineParser parser, Match match);

162 }	161 }

163	162

164 /// Matches stuff that should just be passed through as straight text.	163 /// Matches stuff that should just be passed through as straight text.

165 class TextSyntax extends InlineSyntax {	164 class TextSyntax extends InlineSyntax {

166 String substitute;	165 String substitute;

167 TextSyntax(String pattern, [String sub])	166 TextSyntax(String pattern, [String sub])

168 : super(pattern),	167 : super(pattern),

169 substitute = sub;	168 substitute = sub;

170	169

171 bool onMatch(InlineParser parser, Match match) {	170 bool onMatch(InlineParser parser, Match match) {

172 if (substitute == null) {	171 if (substitute == null) {

173 // Just use the original matched text.	172 // Just use the original matched text.

174 parser.advanceBy(match.group(0).length);	173 parser.advanceBy(match.group(0).length);

175 return false;	174 return false;

176 }	175 }

177	176

178 // Insert the substitution.	177 // Insert the substitution.

179 parser.addNode(new Text(substitute));	178 parser.addNode(new Text(substitute));

180 return true;	179 return true;

181 }	180 }

182 }	181 }

183	182

184 /// Matches autolinks like <http://foo.com>.	183 /// Matches autolinks like `<http://foo.com>`.

185 class AutolinkSyntax extends InlineSyntax {	184 class AutolinkSyntax extends InlineSyntax {

186 AutolinkSyntax()	185 AutolinkSyntax()

187 : super(@'<((http\|https\|ftp)://[^>]*)>');	186 : super(@'<((http\|https\|ftp)://[^>]*)>');

188 // TODO(rnystrom): Make case insensitive.	187 // TODO(rnystrom): Make case insensitive.

189	188

190 bool onMatch(InlineParser parser, Match match) {	189 bool onMatch(InlineParser parser, Match match) {

191 final url = match.group(1);	190 final url = match.group(1);

192	191

193 final anchor = new Element.text('a', escapeHtml(url));	192 final anchor = new Element.text('a', escapeHtml(url));

194 anchor.attributes['href'] = url;	193 anchor.attributes['href'] = url;

195 parser.addNode(anchor);	194 parser.addNode(anchor);

196	195

197 return true;	196 return true;

198 }	197 }

199 }	198 }

200	199

201 /// Matches syntax that has a pair of tags and becomes an element, like '*' for	200 /// Matches syntax that has a pair of tags and becomes an element, like `*` for

202 /// `<em>`. Allows nested tags.	201 /// `<em>`. Allows nested tags.

203 class TagSyntax extends InlineSyntax {	202 class TagSyntax extends InlineSyntax {

204 final RegExp endPattern;	203 final RegExp endPattern;

205 final String tag;	204 final String tag;

206	205

207 TagSyntax(String pattern, [String tag, String end = null])	206 TagSyntax(String pattern, [String tag, String end = null])

208 : super(pattern),	207 : super(pattern),

209 endPattern = new RegExp((end != null) ? end : pattern, true),	208 endPattern = new RegExp((end != null) ? end : pattern, true),

210 tag = tag;	209 tag = tag;

211 // TODO(rnystrom): Doing this.field doesn't seem to work with named args.	210 // TODO(rnystrom): Doing this.field doesn't seem to work with named args.

212 // TODO(rnystrom): Should use named arg for RegExp multiLine.	211 // TODO(rnystrom): Should use named arg for RegExp multiLine.

213	212

214 bool onMatch(InlineParser parser, Match match) {	213 bool onMatch(InlineParser parser, Match match) {

215 parser._stack.add(new TagState(parser.pos, this));	214 parser._stack.add(new TagState(parser.pos,

	215 parser.pos + match.group(0).length, this));

216 return true;	216 return true;

217 }	217 }

218	218

219 bool onMatchEnd(InlineParser parser, Match match, TagState state) {	219 bool onMatchEnd(InlineParser parser, Match match, TagState state) {

220 parser.addNode(new Element(tag, state.children));	220 parser.addNode(new Element(tag, state.children));

221 return true;	221 return true;

222 }	222 }

223 }	223 }

224	224

225 /// Matches inline links like [blah] [id] and [blah] (url).	225 /// Matches inline links like `[blah] [id]` and `[blah] (url)`.

226 class LinkSyntax extends TagSyntax {	226 class LinkSyntax extends TagSyntax {

227 /// The regex for the end of a link needs to handle both reference style and	227 /// The regex for the end of a link needs to handle both reference style and

228 /// inline styles as well as optional titles for inline links. To make that	228 /// inline styles as well as optional titles for inline links. To make that

229 /// a bit more palatable, this breaks it into pieces.	229 /// a bit more palatable, this breaks it into pieces.

230 static get linkPattern() {	230 static get linkPattern() {

231 final bracket = @'\][ \n\t]?'; // "]" with optional space after.	231 final refLink = @'\s?\[([^\]]*)\]'; // "[id]" reflink id.

232 final refLink = @'\[([^\]]*)\]'; // "[id]" reflink id.	232 final title = @'(?:[ ]*"([^"]+)"\|)'; // Optional title in quotes.

233 final title = @'(?:[ ]*"([^"]+)"\|)'; // Optional title in quotes.	233 final inlineLink = '\\s?\$([^ )]+)$title\$'; // "(url "title")" link.

234 final inlineLink = '\$([^ )]+)$title\$'; // "(url "title")" inline link.	234 return '\](?:($refLink\|$inlineLink)\|)';

235 return '$bracket(?:$refLink\|$inlineLink)';	235

	236 // The groups matched by this are:

	237 // 1: Will be non-empty if it's either a ref or inline link. Will be empty

	238 // if it's just a bare pair of square brackets with nothing after them.

	239 // 2: Contains the id inside [] for a reference-style link.

	240 // 3: Contains the URL for an inline link.

	241 // 4: Contains the title, if present, for an inline link.

236 }	242 }

237	243

238 LinkSyntax()	244 LinkSyntax()

239 : super(@'\[', end: linkPattern);	245 : super(@'\[', end: linkPattern);

240	246

241 bool onMatchEnd(InlineParser parser, Match match, TagState state) {	247 bool onMatchEnd(InlineParser parser, Match match, TagState state) {

242 var url;	248 var url;

243 var title;	249 var title;

244	250

245 if (match.group(2) != '') {	251 // If we didn't match refLink or inlineLink, then it means there was

	252 // nothing after the first square bracket, so it isn't a normal markdown

	253 // link at all. Instead, we allow users of the library to specify a special

	254 // resolver function ([setImplicitLinkResolver]) that may choose to handle

	255 // this. Otherwise, it's just treated as plain text.

	256 if ((match.group(1) == null) \|\| (match.group(1) == '')) {

	257 if (_implicitLinkResolver == null) return false;

	258

	259 // Only allow implicit links if the content is just text.

	260 // TODO(rnystrom): Do we want to relax this?

	261 if (state.children.length != 1) return false;

	262 if (state.children[0] is! Text) return false;

	263

	264 Text link = state.children[0];

	265

	266 // See if we have a resolver that will generate a link for us.

	267 final node = _implicitLinkResolver(link.text);

	268 if (node == null) return false;

	269

	270 parser.addNode(node);

	271 return true;

	272 }

	273

	274 if ((match.group(3) != null) && (match.group(3) != '')) {

246 // Inline link like [foo](url).	275 // Inline link like [foo](url).

247 url = match.group(2);	276 url = match.group(3);

248 title = match.group(3);	277 title = match.group(4);

249	278

250 // For whatever reason, markdown allows angle-bracketed URLs here.	279 // For whatever reason, markdown allows angle-bracketed URLs here.

251 if (url.startsWith('<') && url.endsWith('>')) {	280 if (url.startsWith('<') && url.endsWith('>')) {

252 url = url.substring(1, url.length - 1);	281 url = url.substring(1, url.length - 1);

253 }	282 }

254 } else {	283 } else {

255 // Reference link like [foo] [bar].	284 // Reference link like [foo] [bar].

256 var id = match.group(1);	285 var id = match.group(2);

257 if (id == '') {	286 if (id == '') {

258 // The id is empty ("[]") so infer it from the contents.	287 // The id is empty ("[]") so infer it from the contents.

259 id = parser.source.substring(state.startPos + 1, parser.pos);	288 id = parser.source.substring(state.startPos + 1, parser.pos);

260 }	289 }

261	290

262 // Look up the link.	291 // Look up the link.

263 final link = parser.document.refLinks[id];	292 final link = parser.document.refLinks[id];

264 // If it's an unknown link just emit plaintext.	293 // If it's an unknown link just emit plaintext.

265 if (link == null) return false;	294 if (link == null) return false;

266	295

(...skipping 22 matching lines...) Expand all Loading...
289 return true;	318 return true;

290 }	319 }

291 }	320 }

292	321

293 /// Keeps track of a currently open tag while it is being parsed. The parser	322 /// Keeps track of a currently open tag while it is being parsed. The parser

294 /// maintains a stack of these so it can handle nested tags.	323 /// maintains a stack of these so it can handle nested tags.

295 class TagState {	324 class TagState {

296 /// The point in the original source where this tag started.	325 /// The point in the original source where this tag started.

297 int startPos;	326 int startPos;

298	327

	328 /// The point in the original source where open tag ended.

	329 int endPos;

	330

299 /// The syntax that created this node.	331 /// The syntax that created this node.

300 final TagSyntax syntax;	332 final TagSyntax syntax;

301	333

302 /// The children of this node. Will be `null` for text nodes.	334 /// The children of this node. Will be `null` for text nodes.

303 final List<Node> children;	335 final List<Node> children;

304	336

305 TagState(this.startPos, this.syntax)	337 TagState(this.startPos, this.endPos, this.syntax)

306 : children = <Node>[];	338 : children = <Node>[];

307	339

308 /// Attempts to close this tag by matching the current text against its end	340 /// Attempts to close this tag by matching the current text against its end

309 /// pattern.	341 /// pattern.

310 bool tryMatch(InlineParser parser) {	342 bool tryMatch(InlineParser parser) {

311 Match endMatch = syntax.endPattern.firstMatch(parser.currentSource);	343 Match endMatch = syntax.endPattern.firstMatch(parser.currentSource);

312 if ((endMatch != null) && (endMatch.start() == 0)) {	344 if ((endMatch != null) && (endMatch.start() == 0)) {

313 // Close the tag.	345 // Close the tag.

314 close(parser, endMatch);	346 close(parser, endMatch);

315 return true;	347 return true;

316 }	348 }

317	349

318 return false;	350 return false;

319 }	351 }

320	352

321 /// Pops this tag off the stack, completes it, and adds it to the output.	353 /// Pops this tag off the stack, completes it, and adds it to the output.

322 /// Will discard any unmatched tags that happen to be above it on the stack.	354 /// Will discard any unmatched tags that happen to be above it on the stack.

323 /// If this is the last node in the stack, returns its children.	355 /// If this is the last node in the stack, returns its children.

324 List<Node> close(InlineParser parser, Match endMatch) {	356 List<Node> close(InlineParser parser, Match endMatch) {

325 // Found a match. If there is anything above this tag on the stack,	357 // If there are unclosed tags on top of this one when it's closed, that

326 // discard it. For example, given 'a _b...' when we reach the second	358 // means they are mismatched. Mismatched tags are treated as plain text in

327 // '*', '_' will be on the top of the stack. It's mismatched, so we	359 // markdown. So for each tag above this one, we write its start tag as text

328 // just treat it as text.	360 // and then adds its children to this one's children.

329 while (parser._stack.last() != this) parser.discardUnmatchedTag();	361 int index = parser._stack.indexOf(this);

	362

	363 // Remove the unmatched children.

	364 final unmatchedTags = parser._stack.getRange(index + 1,

	365 parser._stack.length - index - 1);

	366 parser._stack.removeRange(index + 1, parser._stack.length - index - 1);

	367

	368 // Flatten them out onto this tag.

	369 for (final unmatched in unmatchedTags) {

	370 // Write the start tag as text.

	371 parser.writeTextRange(unmatched.startPos, unmatched.endPos);

	372

	373 // Bequeath its children unto this tag.

	374 children.addAll(unmatched.children);

	375 }

330	376

331 // Pop this off the stack.	377 // Pop this off the stack.

332 parser.writeText();	378 parser.writeText();

333 parser._stack.removeLast();	379 parser._stack.removeLast();

334	380

335 // If the stack is empty now, this is the special "results" node.	381 // If the stack is empty now, this is the special "results" node.

336 if (parser._stack.length == 0) return children;	382 if (parser._stack.length == 0) return children;

337	383

338 // We are still parsing, so add this to its parent's children.	384 // We are still parsing, so add this to its parent's children.

339 if (syntax.onMatchEnd(parser, endMatch, this)) {	385 if (syntax.onMatchEnd(parser, endMatch, this)) {

340 parser.consume(endMatch.group(0).length);	386 parser.consume(endMatch.group(0).length);

341 } else {	387 } else {

342 // Didn't close correctly so revert to text.	388 // Didn't close correctly so revert to text.

343 parser.start = startPos;	389 parser.start = startPos;

344 parser.advanceBy(endMatch.group(0).length);	390 parser.advanceBy(endMatch.group(0).length);

345 }	391 }

346	392

347 return null;	393 return null;

348 }	394 }

349 }	395 }

OLD	NEW

« no previous file with comments | « utils/markdown/block_parser.dart ('k') | utils/markdown/lib.dart » ('j') | no next file with comments »