Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(300)

Side by Side Diff: utils/markdown/inline_parser.dart

Issue 8725007: Lots of stuff hooking up markdown to dartdoc. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Respond to awesome reviews. Created 9 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « utils/markdown/block_parser.dart ('k') | utils/markdown/lib.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 /// Maintains the internal state needed to parse inline span elements in 5 /// Maintains the internal state needed to parse inline span elements in
6 /// markdown. 6 /// markdown.
7 class InlineParser { 7 class InlineParser {
8 static List<InlineSyntax> get syntaxes() { 8 static List<InlineSyntax> get syntaxes() {
9 // Lazy initialize. 9 // Lazy initialize.
10 if (_syntaxes == null) { 10 if (_syntaxes == null) {
11 _syntaxes = <InlineSyntax>[ 11 _syntaxes = <InlineSyntax>[
12 new AutolinkSyntax(), 12 new AutolinkSyntax(),
13 new LinkSyntax(), 13 new LinkSyntax(),
14 // "*" surrounded by spaces is left alone. 14 // "*" surrounded by spaces is left alone.
15 new TextSyntax(@' \* '), 15 new TextSyntax(@' \* '),
16 // "_" surrounded by spaces is left alone. 16 // "_" surrounded by spaces is left alone.
17 new TextSyntax(@' _ '), 17 new TextSyntax(@' _ '),
18 // Leave already-encoded HTML entities alone. Ensures we don't turn 18 // Leave already-encoded HTML entities alone. Ensures we don't turn
19 // "&amp;" into "&amp;amp;" 19 // "&amp;" into "&amp;amp;"
20 new TextSyntax(@'&[#a-zA-Z0-9]*;'), 20 new TextSyntax(@'&[#a-zA-Z0-9]*;'),
21 // Encode "&". 21 // Encode "&".
22 new TextSyntax(@'&', sub: '&amp;'), 22 new TextSyntax(@'&', sub: '&amp;'),
23 // Encode "<". (Why not encode ">" too? Gruber is toying with us.) 23 // Encode "<". (Why not encode ">" too? Gruber is toying with us.)
24 new TextSyntax(@'<', sub: '&lt;'), 24 new TextSyntax(@'<', sub: '&lt;'),
25 // Parse "**strong**" tags. 25 // Parse "**strong**" tags.
26 new TagSyntax(@'\*\*', tag: 'strong'), 26 new TagSyntax(@'\*\*', tag: 'strong'),
27 // Parse "__strong__" tags. 27 // Parse "__strong__" tags.
28 new TagSyntax(@'__', tag: 'strong'), 28 new TagSyntax(@'__', tag: 'strong'),
29 // Parse "*emphasis*" tags. 29 // Parse "*emphasis*" tags.
30 new TagSyntax(@'\*', tag: 'em'), 30 new TagSyntax(@'\*', tag: 'em'),
31 // Parse "_emphasis_" tags. 31 // Parse "_emphasis_" tags.
32 // TODO(rnystrom): Underscores in the middle of a word should not be 32 // TODO(rnystrom): Underscores in the middle of a word should not be
33 // parsed as emphasis like_in_this. 33 // parsed as emphasis like_in_this.
34 new TagSyntax(@'_', tag: 'em'), 34 new TagSyntax(@'_', tag: 'em'),
35 // Parse inline code within double backticks: "``code``". 35 // Parse inline code within double backticks: "``code``".
36 new CodeSyntax(@'``[ ]?(.*?)[ ]?``'), 36 new CodeSyntax(@'``\s?((?:.|\n)*?)\s?``'),
37 // Parse inline code within backticks: "`code`". 37 // Parse inline code within backticks: "`code`".
38 new CodeSyntax(@'`([^`]*)`') 38 new CodeSyntax(@'`([^`]*)`')
39 ]; 39 ];
40 } 40 }
41 41
42 return _syntaxes; 42 return _syntaxes;
43 } 43 }
44 44
45 static List<InlineSyntax> _syntaxes; 45 static List<InlineSyntax> _syntaxes;
46 46
47 /// The string of markdown being parsed. 47 /// The string of markdown being parsed.
48 final String source; 48 final String source;
49 49
50 /// The markdown document this parser is parsing. 50 /// The markdown document this parser is parsing.
51 final Document document; 51 final Document document;
52 52
53 /// The current read position. 53 /// The current read position.
54 int pos = 0; 54 int pos = 0;
55 55
56 /// Starting position of the last unconsumed text. 56 /// Starting position of the last unconsumed text.
57 int start = 0; 57 int start = 0;
58 58
59 final List<TagState> _stack; 59 final List<TagState> _stack;
60 60
61 InlineParser(this.source, this.document) 61 InlineParser(this.source, this.document)
62 : _stack = <TagState>[]; 62 : _stack = <TagState>[];
63 63
64 List<Node> parse() { 64 List<Node> parse() {
65 // Make a fake top tag to hold the results. 65 // Make a fake top tag to hold the results.
66 _stack.add(new TagState(0, null)); 66 _stack.add(new TagState(0, 0, null));
67 67
68 while (!isDone) { 68 while (!isDone) {
69 bool matched = false; 69 bool matched = false;
70 70
71 // See if any of the current tags on the stack match. We don't allow tags 71 // See if any of the current tags on the stack match. We don't allow tags
72 // of the same kind to nest, so this takes priority over other possible // matches. 72 // of the same kind to nest, so this takes priority over other possible // matches.
73 for (int i = _stack.length - 1; i > 0; i--) { 73 for (int i = _stack.length - 1; i > 0; i--) {
74 if (_stack[i].tryMatch(this)) { 74 if (_stack[i].tryMatch(this)) {
75 matched = true; 75 matched = true;
76 break; 76 break;
(...skipping 12 matching lines...) Expand all
89 89
90 // If we got here, it's just text. 90 // If we got here, it's just text.
91 advanceBy(1); 91 advanceBy(1);
92 } 92 }
93 93
94 // Unwind any unmatched tags and get the results. 94 // Unwind any unmatched tags and get the results.
95 return _stack[0].close(this, null); 95 return _stack[0].close(this, null);
96 } 96 }
97 97
98 writeText() { 98 writeText() {
99 if (pos > start) { 99 writeTextRange(start, pos);
100 final text = source.substring(start, pos); 100 start = pos;
101 }
102
103 writeTextRange(int start, int end) {
104 if (end > start) {
105 final text = source.substring(start, end);
101 final nodes = _stack.last().children; 106 final nodes = _stack.last().children;
102 107
103 // If the previous node is text too, just append. 108 // If the previous node is text too, just append.
104 if ((nodes.length > 0) && (nodes.last() is Text)) { 109 if ((nodes.length > 0) && (nodes.last() is Text)) {
105 final newNode = new Text('${nodes.last().text}$text'); 110 final newNode = new Text('${nodes.last().text}$text');
106 nodes[nodes.length - 1] = newNode; 111 nodes[nodes.length - 1] = newNode;
107 } else { 112 } else {
108 nodes.add(new Text(text)); 113 nodes.add(new Text(text));
109 } 114 }
110
111 start = pos;
112 } 115 }
113 } 116 }
114 117
115 /// Removes the top tag from the stack, reverts it to plain text and adds it
116 /// to the output.
117 discardUnmatchedTag() {
118 final unfinished = _stack.removeLast();
119 start = unfinished.startPos;
120 }
121
122 addNode(Node node) { 118 addNode(Node node) {
123 _stack.last().children.add(node); 119 _stack.last().children.add(node);
124 } 120 }
125 121
126 // TODO(rnystrom): Only need this because RegExp doesn't let you start 122 // TODO(rnystrom): Only need this because RegExp doesn't let you start
127 // searching from a given offset. 123 // searching from a given offset.
128 String get currentSource() => source.substring(pos, source.length); 124 String get currentSource() => source.substring(pos, source.length);
129 125
130 bool get isDone() => pos == source.length; 126 bool get isDone() => pos == source.length;
131 127
132 void advanceBy(int length) => pos += length; 128 void advanceBy(int length) {
129 pos += length;
130 }
131
133 void consume(int length) { 132 void consume(int length) {
134 pos += length; 133 pos += length;
135 start = pos; 134 start = pos;
136 } 135 }
137 } 136 }
138 137
139 /// Represents one kind of markdown tag that can be parsed. 138 /// Represents one kind of markdown tag that can be parsed.
140 class InlineSyntax { 139 class InlineSyntax {
141 final RegExp pattern; 140 final RegExp pattern;
142 141
143 InlineSyntax(String pattern) 142 InlineSyntax(String pattern)
144 : pattern = new RegExp(pattern, true); 143 : pattern = new RegExp(pattern, true);
145 // TODO(rnystrom): Should use named arg for RegExp multiLine. 144 // TODO(rnystrom): Should use named arg for RegExp multiLine.
146 145
147 bool tryMatch(InlineParser parser) { 146 bool tryMatch(InlineParser parser) {
148 final startMatch = pattern.firstMatch(parser.currentSource); 147 final startMatch = pattern.firstMatch(parser.currentSource);
149 if ((startMatch != null) && (startMatch.start() == 0)) { 148 if ((startMatch != null) && (startMatch.start() == 0)) {
150 // Write any existing plain text up to this point. 149 // Write any existing plain text up to this point.
151 parser.writeText(); 150 parser.writeText();
152 151
153 if (onMatch(parser, startMatch)) { 152 if (onMatch(parser, startMatch)) {
154 parser.consume(startMatch.group(0).length); 153 parser.consume(startMatch.group(0).length);
155 } 154 }
156 return true; 155 return true;
157 } 156 }
158 return false; 157 return false;
159 } 158 }
160 159
161 abstract bool match(InlineParser parser, Match match); 160 abstract bool onMatch(InlineParser parser, Match match);
162 } 161 }
163 162
164 /// Matches stuff that should just be passed through as straight text. 163 /// Matches stuff that should just be passed through as straight text.
165 class TextSyntax extends InlineSyntax { 164 class TextSyntax extends InlineSyntax {
166 String substitute; 165 String substitute;
167 TextSyntax(String pattern, [String sub]) 166 TextSyntax(String pattern, [String sub])
168 : super(pattern), 167 : super(pattern),
169 substitute = sub; 168 substitute = sub;
170 169
171 bool onMatch(InlineParser parser, Match match) { 170 bool onMatch(InlineParser parser, Match match) {
172 if (substitute == null) { 171 if (substitute == null) {
173 // Just use the original matched text. 172 // Just use the original matched text.
174 parser.advanceBy(match.group(0).length); 173 parser.advanceBy(match.group(0).length);
175 return false; 174 return false;
176 } 175 }
177 176
178 // Insert the substitution. 177 // Insert the substitution.
179 parser.addNode(new Text(substitute)); 178 parser.addNode(new Text(substitute));
180 return true; 179 return true;
181 } 180 }
182 } 181 }
183 182
184 /// Matches autolinks like <http://foo.com>. 183 /// Matches autolinks like `<http://foo.com>`.
185 class AutolinkSyntax extends InlineSyntax { 184 class AutolinkSyntax extends InlineSyntax {
186 AutolinkSyntax() 185 AutolinkSyntax()
187 : super(@'<((http|https|ftp)://[^>]*)>'); 186 : super(@'<((http|https|ftp)://[^>]*)>');
188 // TODO(rnystrom): Make case insensitive. 187 // TODO(rnystrom): Make case insensitive.
189 188
190 bool onMatch(InlineParser parser, Match match) { 189 bool onMatch(InlineParser parser, Match match) {
191 final url = match.group(1); 190 final url = match.group(1);
192 191
193 final anchor = new Element.text('a', escapeHtml(url)); 192 final anchor = new Element.text('a', escapeHtml(url));
194 anchor.attributes['href'] = url; 193 anchor.attributes['href'] = url;
195 parser.addNode(anchor); 194 parser.addNode(anchor);
196 195
197 return true; 196 return true;
198 } 197 }
199 } 198 }
200 199
201 /// Matches syntax that has a pair of tags and becomes an element, like '*' for 200 /// Matches syntax that has a pair of tags and becomes an element, like `*` for
202 /// `<em>`. Allows nested tags. 201 /// `<em>`. Allows nested tags.
203 class TagSyntax extends InlineSyntax { 202 class TagSyntax extends InlineSyntax {
204 final RegExp endPattern; 203 final RegExp endPattern;
205 final String tag; 204 final String tag;
206 205
207 TagSyntax(String pattern, [String tag, String end = null]) 206 TagSyntax(String pattern, [String tag, String end = null])
208 : super(pattern), 207 : super(pattern),
209 endPattern = new RegExp((end != null) ? end : pattern, true), 208 endPattern = new RegExp((end != null) ? end : pattern, true),
210 tag = tag; 209 tag = tag;
211 // TODO(rnystrom): Doing this.field doesn't seem to work with named args. 210 // TODO(rnystrom): Doing this.field doesn't seem to work with named args.
212 // TODO(rnystrom): Should use named arg for RegExp multiLine. 211 // TODO(rnystrom): Should use named arg for RegExp multiLine.
213 212
214 bool onMatch(InlineParser parser, Match match) { 213 bool onMatch(InlineParser parser, Match match) {
215 parser._stack.add(new TagState(parser.pos, this)); 214 parser._stack.add(new TagState(parser.pos,
215 parser.pos + match.group(0).length, this));
216 return true; 216 return true;
217 } 217 }
218 218
219 bool onMatchEnd(InlineParser parser, Match match, TagState state) { 219 bool onMatchEnd(InlineParser parser, Match match, TagState state) {
220 parser.addNode(new Element(tag, state.children)); 220 parser.addNode(new Element(tag, state.children));
221 return true; 221 return true;
222 } 222 }
223 } 223 }
224 224
225 /// Matches inline links like [blah] [id] and [blah] (url). 225 /// Matches inline links like `[blah] [id]` and `[blah] (url)`.
226 class LinkSyntax extends TagSyntax { 226 class LinkSyntax extends TagSyntax {
227 /// The regex for the end of a link needs to handle both reference style and 227 /// The regex for the end of a link needs to handle both reference style and
228 /// inline styles as well as optional titles for inline links. To make that 228 /// inline styles as well as optional titles for inline links. To make that
229 /// a bit more palatable, this breaks it into pieces. 229 /// a bit more palatable, this breaks it into pieces.
230 static get linkPattern() { 230 static get linkPattern() {
231 final bracket = @'\][ \n\t]?'; // "]" with optional space after. 231 final refLink = @'\s?\[([^\]]*)\]'; // "[id]" reflink id.
232 final refLink = @'\[([^\]]*)\]'; // "[id]" reflink id. 232 final title = @'(?:[ ]*"([^"]+)"|)'; // Optional title in quotes.
233 final title = @'(?:[ ]*"([^"]+)"|)'; // Optional title in quotes. 233 final inlineLink = '\\s?\\(([^ )]+)$title\\)'; // "(url "title")" link.
234 final inlineLink = '\\(([^ )]+)$title\\)'; // "(url "title")" inline link. 234 return '\](?:($refLink|$inlineLink)|)';
235 return '$bracket(?:$refLink|$inlineLink)'; 235
236 // The groups matched by this are:
237 // 1: Will be non-empty if it's either a ref or inline link. Will be empty
238 // if it's just a bare pair of square brackets with nothing after them.
239 // 2: Contains the id inside [] for a reference-style link.
240 // 3: Contains the URL for an inline link.
241 // 4: Contains the title, if present, for an inline link.
236 } 242 }
237 243
238 LinkSyntax() 244 LinkSyntax()
239 : super(@'\[', end: linkPattern); 245 : super(@'\[', end: linkPattern);
240 246
241 bool onMatchEnd(InlineParser parser, Match match, TagState state) { 247 bool onMatchEnd(InlineParser parser, Match match, TagState state) {
242 var url; 248 var url;
243 var title; 249 var title;
244 250
245 if (match.group(2) != '') { 251 // If we didn't match refLink or inlineLink, then it means there was
252 // nothing after the first square bracket, so it isn't a normal markdown
253 // link at all. Instead, we allow users of the library to specify a special
254 // resolver function ([setImplicitLinkResolver]) that may choose to handle
255 // this. Otherwise, it's just treated as plain text.
256 if ((match.group(1) == null) || (match.group(1) == '')) {
257 if (_implicitLinkResolver == null) return false;
258
259 // Only allow implicit links if the content is just text.
260 // TODO(rnystrom): Do we want to relax this?
261 if (state.children.length != 1) return false;
262 if (state.children[0] is! Text) return false;
263
264 Text link = state.children[0];
265
266 // See if we have a resolver that will generate a link for us.
267 final node = _implicitLinkResolver(link.text);
268 if (node == null) return false;
269
270 parser.addNode(node);
271 return true;
272 }
273
274 if ((match.group(3) != null) && (match.group(3) != '')) {
246 // Inline link like [foo](url). 275 // Inline link like [foo](url).
247 url = match.group(2); 276 url = match.group(3);
248 title = match.group(3); 277 title = match.group(4);
249 278
250 // For whatever reason, markdown allows angle-bracketed URLs here. 279 // For whatever reason, markdown allows angle-bracketed URLs here.
251 if (url.startsWith('<') && url.endsWith('>')) { 280 if (url.startsWith('<') && url.endsWith('>')) {
252 url = url.substring(1, url.length - 1); 281 url = url.substring(1, url.length - 1);
253 } 282 }
254 } else { 283 } else {
255 // Reference link like [foo] [bar]. 284 // Reference link like [foo] [bar].
256 var id = match.group(1); 285 var id = match.group(2);
257 if (id == '') { 286 if (id == '') {
258 // The id is empty ("[]") so infer it from the contents. 287 // The id is empty ("[]") so infer it from the contents.
259 id = parser.source.substring(state.startPos + 1, parser.pos); 288 id = parser.source.substring(state.startPos + 1, parser.pos);
260 } 289 }
261 290
262 // Look up the link. 291 // Look up the link.
263 final link = parser.document.refLinks[id]; 292 final link = parser.document.refLinks[id];
264 // If it's an unknown link just emit plaintext. 293 // If it's an unknown link just emit plaintext.
265 if (link == null) return false; 294 if (link == null) return false;
266 295
(...skipping 22 matching lines...) Expand all
289 return true; 318 return true;
290 } 319 }
291 } 320 }
292 321
293 /// Keeps track of a currently open tag while it is being parsed. The parser 322 /// Keeps track of a currently open tag while it is being parsed. The parser
294 /// maintains a stack of these so it can handle nested tags. 323 /// maintains a stack of these so it can handle nested tags.
295 class TagState { 324 class TagState {
296 /// The point in the original source where this tag started. 325 /// The point in the original source where this tag started.
297 int startPos; 326 int startPos;
298 327
328 /// The point in the original source where open tag ended.
329 int endPos;
330
299 /// The syntax that created this node. 331 /// The syntax that created this node.
300 final TagSyntax syntax; 332 final TagSyntax syntax;
301 333
302 /// The children of this node. Will be `null` for text nodes. 334 /// The children of this node. Will be `null` for text nodes.
303 final List<Node> children; 335 final List<Node> children;
304 336
305 TagState(this.startPos, this.syntax) 337 TagState(this.startPos, this.endPos, this.syntax)
306 : children = <Node>[]; 338 : children = <Node>[];
307 339
308 /// Attempts to close this tag by matching the current text against its end 340 /// Attempts to close this tag by matching the current text against its end
309 /// pattern. 341 /// pattern.
310 bool tryMatch(InlineParser parser) { 342 bool tryMatch(InlineParser parser) {
311 Match endMatch = syntax.endPattern.firstMatch(parser.currentSource); 343 Match endMatch = syntax.endPattern.firstMatch(parser.currentSource);
312 if ((endMatch != null) && (endMatch.start() == 0)) { 344 if ((endMatch != null) && (endMatch.start() == 0)) {
313 // Close the tag. 345 // Close the tag.
314 close(parser, endMatch); 346 close(parser, endMatch);
315 return true; 347 return true;
316 } 348 }
317 349
318 return false; 350 return false;
319 } 351 }
320 352
321 /// Pops this tag off the stack, completes it, and adds it to the output. 353 /// Pops this tag off the stack, completes it, and adds it to the output.
322 /// Will discard any unmatched tags that happen to be above it on the stack. 354 /// Will discard any unmatched tags that happen to be above it on the stack.
323 /// If this is the last node in the stack, returns its children. 355 /// If this is the last node in the stack, returns its children.
324 List<Node> close(InlineParser parser, Match endMatch) { 356 List<Node> close(InlineParser parser, Match endMatch) {
325 // Found a match. If there is anything above this tag on the stack, 357 // If there are unclosed tags on top of this one when it's closed, that
326 // discard it. For example, given '*a _b*...' when we reach the second 358 // means they are mismatched. Mismatched tags are treated as plain text in
327 // '*', '_' will be on the top of the stack. It's mismatched, so we 359 // markdown. So for each tag above this one, we write its start tag as text
328 // just treat it as text. 360 // and then adds its children to this one's children.
329 while (parser._stack.last() != this) parser.discardUnmatchedTag(); 361 int index = parser._stack.indexOf(this);
362
363 // Remove the unmatched children.
364 final unmatchedTags = parser._stack.getRange(index + 1,
365 parser._stack.length - index - 1);
366 parser._stack.removeRange(index + 1, parser._stack.length - index - 1);
367
368 // Flatten them out onto this tag.
369 for (final unmatched in unmatchedTags) {
370 // Write the start tag as text.
371 parser.writeTextRange(unmatched.startPos, unmatched.endPos);
372
373 // Bequeath its children unto this tag.
374 children.addAll(unmatched.children);
375 }
330 376
331 // Pop this off the stack. 377 // Pop this off the stack.
332 parser.writeText(); 378 parser.writeText();
333 parser._stack.removeLast(); 379 parser._stack.removeLast();
334 380
335 // If the stack is empty now, this is the special "results" node. 381 // If the stack is empty now, this is the special "results" node.
336 if (parser._stack.length == 0) return children; 382 if (parser._stack.length == 0) return children;
337 383
338 // We are still parsing, so add this to its parent's children. 384 // We are still parsing, so add this to its parent's children.
339 if (syntax.onMatchEnd(parser, endMatch, this)) { 385 if (syntax.onMatchEnd(parser, endMatch, this)) {
340 parser.consume(endMatch.group(0).length); 386 parser.consume(endMatch.group(0).length);
341 } else { 387 } else {
342 // Didn't close correctly so revert to text. 388 // Didn't close correctly so revert to text.
343 parser.start = startPos; 389 parser.start = startPos;
344 parser.advanceBy(endMatch.group(0).length); 390 parser.advanceBy(endMatch.group(0).length);
345 } 391 }
346 392
347 return null; 393 return null;
348 } 394 }
349 } 395 }
OLDNEW
« no previous file with comments | « utils/markdown/block_parser.dart ('k') | utils/markdown/lib.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698