OLD | NEW |
| (Empty) |
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 /// Maintains the internal state needed to parse inline span elements in | |
6 /// markdown. | |
7 class InlineParser { | |
8 static List<InlineSyntax> get syntaxes() { | |
9 // Lazy initialize. | |
10 if (_syntaxes == null) { | |
11 _syntaxes = <InlineSyntax>[ | |
12 new AutolinkSyntax(), | |
13 new LinkSyntax(), | |
14 // "*" surrounded by spaces is left alone. | |
15 new TextSyntax(@' \* '), | |
16 // "_" surrounded by spaces is left alone. | |
17 new TextSyntax(@' _ '), | |
18 // Leave already-encoded HTML entities alone. Ensures we don't turn | |
19 // "&" into "&amp;" | |
20 new TextSyntax(@'&[#a-zA-Z0-9]*;'), | |
21 // Encode "&". | |
22 new TextSyntax(@'&', sub: '&'), | |
23 // Encode "<". (Why not encode ">" too? Gruber is toying with us.) | |
24 new TextSyntax(@'<', sub: '<'), | |
25 // Parse "**strong**" tags. | |
26 new TagSyntax(@'\*\*', tag: 'strong'), | |
27 // Parse "__strong__" tags. | |
28 new TagSyntax(@'__', tag: 'strong'), | |
29 // Parse "*emphasis*" tags. | |
30 new TagSyntax(@'\*', tag: 'em'), | |
31 // Parse "_emphasis_" tags. | |
32 // TODO(rnystrom): Underscores in the middle of a word should not be | |
33 // parsed as emphasis like_in_this. | |
34 new TagSyntax(@'_', tag: 'em'), | |
35 // Parse inline code within double backticks: "``code``". | |
36 new CodeSyntax(@'``\s?((?:.|\n)*?)\s?``'), | |
37 // Parse inline code within backticks: "`code`". | |
38 new CodeSyntax(@'`([^`]*)`') | |
39 ]; | |
40 } | |
41 | |
42 return _syntaxes; | |
43 } | |
44 | |
45 static List<InlineSyntax> _syntaxes; | |
46 | |
47 /// The string of markdown being parsed. | |
48 final String source; | |
49 | |
50 /// The markdown document this parser is parsing. | |
51 final Document document; | |
52 | |
53 /// The current read position. | |
54 int pos = 0; | |
55 | |
56 /// Starting position of the last unconsumed text. | |
57 int start = 0; | |
58 | |
59 final List<TagState> _stack; | |
60 | |
61 InlineParser(this.source, this.document) | |
62 : _stack = <TagState>[]; | |
63 | |
64 List<Node> parse() { | |
65 // Make a fake top tag to hold the results. | |
66 _stack.add(new TagState(0, 0, null)); | |
67 | |
68 while (!isDone) { | |
69 bool matched = false; | |
70 | |
71 // See if any of the current tags on the stack match. We don't allow tags | |
72 // of the same kind to nest, so this takes priority over other possible //
matches. | |
73 for (int i = _stack.length - 1; i > 0; i--) { | |
74 if (_stack[i].tryMatch(this)) { | |
75 matched = true; | |
76 break; | |
77 } | |
78 } | |
79 if (matched) continue; | |
80 | |
81 // See if the current text matches any defined markdown syntax. | |
82 for (final syntax in syntaxes) { | |
83 if (syntax.tryMatch(this)) { | |
84 matched = true; | |
85 break; | |
86 } | |
87 } | |
88 if (matched) continue; | |
89 | |
90 // If we got here, it's just text. | |
91 advanceBy(1); | |
92 } | |
93 | |
94 // Unwind any unmatched tags and get the results. | |
95 return _stack[0].close(this, null); | |
96 } | |
97 | |
98 writeText() { | |
99 writeTextRange(start, pos); | |
100 start = pos; | |
101 } | |
102 | |
103 writeTextRange(int start, int end) { | |
104 if (end > start) { | |
105 final text = source.substring(start, end); | |
106 final nodes = _stack.last().children; | |
107 | |
108 // If the previous node is text too, just append. | |
109 if ((nodes.length > 0) && (nodes.last() is Text)) { | |
110 final newNode = new Text('${nodes.last().text}$text'); | |
111 nodes[nodes.length - 1] = newNode; | |
112 } else { | |
113 nodes.add(new Text(text)); | |
114 } | |
115 } | |
116 } | |
117 | |
118 addNode(Node node) { | |
119 _stack.last().children.add(node); | |
120 } | |
121 | |
122 // TODO(rnystrom): Only need this because RegExp doesn't let you start | |
123 // searching from a given offset. | |
124 String get currentSource() => source.substring(pos, source.length); | |
125 | |
126 bool get isDone() => pos == source.length; | |
127 | |
128 void advanceBy(int length) { | |
129 pos += length; | |
130 } | |
131 | |
132 void consume(int length) { | |
133 pos += length; | |
134 start = pos; | |
135 } | |
136 } | |
137 | |
138 /// Represents one kind of markdown tag that can be parsed. | |
139 class InlineSyntax { | |
140 final RegExp pattern; | |
141 | |
142 InlineSyntax(String pattern) | |
143 : pattern = new RegExp(pattern, true); | |
144 // TODO(rnystrom): Should use named arg for RegExp multiLine. | |
145 | |
146 bool tryMatch(InlineParser parser) { | |
147 final startMatch = pattern.firstMatch(parser.currentSource); | |
148 if ((startMatch != null) && (startMatch.start() == 0)) { | |
149 // Write any existing plain text up to this point. | |
150 parser.writeText(); | |
151 | |
152 if (onMatch(parser, startMatch)) { | |
153 parser.consume(startMatch[0].length); | |
154 } | |
155 return true; | |
156 } | |
157 return false; | |
158 } | |
159 | |
160 abstract bool onMatch(InlineParser parser, Match match); | |
161 } | |
162 | |
163 /// Matches stuff that should just be passed through as straight text. | |
164 class TextSyntax extends InlineSyntax { | |
165 String substitute; | |
166 TextSyntax(String pattern, [String sub]) | |
167 : super(pattern), | |
168 substitute = sub; | |
169 | |
170 bool onMatch(InlineParser parser, Match match) { | |
171 if (substitute == null) { | |
172 // Just use the original matched text. | |
173 parser.advanceBy(match[0].length); | |
174 return false; | |
175 } | |
176 | |
177 // Insert the substitution. | |
178 parser.addNode(new Text(substitute)); | |
179 return true; | |
180 } | |
181 } | |
182 | |
183 /// Matches autolinks like `<http://foo.com>`. | |
184 class AutolinkSyntax extends InlineSyntax { | |
185 AutolinkSyntax() | |
186 : super(@'<((http|https|ftp)://[^>]*)>'); | |
187 // TODO(rnystrom): Make case insensitive. | |
188 | |
189 bool onMatch(InlineParser parser, Match match) { | |
190 final url = match[1]; | |
191 | |
192 final anchor = new Element.text('a', escapeHtml(url)); | |
193 anchor.attributes['href'] = url; | |
194 parser.addNode(anchor); | |
195 | |
196 return true; | |
197 } | |
198 } | |
199 | |
200 /// Matches syntax that has a pair of tags and becomes an element, like `*` for | |
201 /// `<em>`. Allows nested tags. | |
202 class TagSyntax extends InlineSyntax { | |
203 final RegExp endPattern; | |
204 final String tag; | |
205 | |
206 TagSyntax(String pattern, [String tag, String end = null]) | |
207 : super(pattern), | |
208 endPattern = new RegExp((end != null) ? end : pattern, true), | |
209 tag = tag; | |
210 // TODO(rnystrom): Doing this.field doesn't seem to work with named args. | |
211 // TODO(rnystrom): Should use named arg for RegExp multiLine. | |
212 | |
213 bool onMatch(InlineParser parser, Match match) { | |
214 parser._stack.add(new TagState(parser.pos, | |
215 parser.pos + match[0].length, this)); | |
216 return true; | |
217 } | |
218 | |
219 bool onMatchEnd(InlineParser parser, Match match, TagState state) { | |
220 parser.addNode(new Element(tag, state.children)); | |
221 return true; | |
222 } | |
223 } | |
224 | |
225 /// Matches inline links like `[blah] [id]` and `[blah] (url)`. | |
226 class LinkSyntax extends TagSyntax { | |
227 /// The regex for the end of a link needs to handle both reference style and | |
228 /// inline styles as well as optional titles for inline links. To make that | |
229 /// a bit more palatable, this breaks it into pieces. | |
230 static get linkPattern() { | |
231 final refLink = @'\s?\[([^\]]*)\]'; // "[id]" reflink id. | |
232 final title = @'(?:[ ]*"([^"]+)"|)'; // Optional title in quotes. | |
233 final inlineLink = '\\s?\\(([^ )]+)$title\\)'; // "(url "title")" link. | |
234 return '\](?:($refLink|$inlineLink)|)'; | |
235 | |
236 // The groups matched by this are: | |
237 // 1: Will be non-empty if it's either a ref or inline link. Will be empty | |
238 // if it's just a bare pair of square brackets with nothing after them. | |
239 // 2: Contains the id inside [] for a reference-style link. | |
240 // 3: Contains the URL for an inline link. | |
241 // 4: Contains the title, if present, for an inline link. | |
242 } | |
243 | |
244 LinkSyntax() | |
245 : super(@'\[', end: linkPattern); | |
246 | |
247 bool onMatchEnd(InlineParser parser, Match match, TagState state) { | |
248 var url; | |
249 var title; | |
250 | |
251 // If we didn't match refLink or inlineLink, then it means there was | |
252 // nothing after the first square bracket, so it isn't a normal markdown | |
253 // link at all. Instead, we allow users of the library to specify a special | |
254 // resolver function ([setImplicitLinkResolver]) that may choose to handle | |
255 // this. Otherwise, it's just treated as plain text. | |
256 if ((match[1] == null) || (match[1] == '')) { | |
257 if (_implicitLinkResolver == null) return false; | |
258 | |
259 // Only allow implicit links if the content is just text. | |
260 // TODO(rnystrom): Do we want to relax this? | |
261 if (state.children.length != 1) return false; | |
262 if (state.children[0] is! Text) return false; | |
263 | |
264 Text link = state.children[0]; | |
265 | |
266 // See if we have a resolver that will generate a link for us. | |
267 final node = _implicitLinkResolver(link.text); | |
268 if (node == null) return false; | |
269 | |
270 parser.addNode(node); | |
271 return true; | |
272 } | |
273 | |
274 if ((match[3] != null) && (match[3] != '')) { | |
275 // Inline link like [foo](url). | |
276 url = match[3]; | |
277 title = match[4]; | |
278 | |
279 // For whatever reason, markdown allows angle-bracketed URLs here. | |
280 if (url.startsWith('<') && url.endsWith('>')) { | |
281 url = url.substring(1, url.length - 1); | |
282 } | |
283 } else { | |
284 // Reference link like [foo] [bar]. | |
285 var id = match[2]; | |
286 if (id == '') { | |
287 // The id is empty ("[]") so infer it from the contents. | |
288 id = parser.source.substring(state.startPos + 1, parser.pos); | |
289 } | |
290 | |
291 // Look up the link. | |
292 final link = parser.document.refLinks[id]; | |
293 // If it's an unknown link just emit plaintext. | |
294 if (link == null) return false; | |
295 | |
296 url = link.url; | |
297 title = link.title; | |
298 } | |
299 | |
300 final anchor = new Element('a', state.children); | |
301 anchor.attributes['href'] = escapeHtml(url); | |
302 if ((title != null) && (title != '')) { | |
303 anchor.attributes['title'] = escapeHtml(title); | |
304 } | |
305 | |
306 parser.addNode(anchor); | |
307 return true; | |
308 } | |
309 } | |
310 | |
311 /// Matches backtick-enclosed inline code blocks. | |
312 class CodeSyntax extends InlineSyntax { | |
313 CodeSyntax(String pattern) | |
314 : super(pattern); | |
315 | |
316 bool onMatch(InlineParser parser, Match match) { | |
317 parser.addNode(new Element.text('code', escapeHtml(match[1]))); | |
318 return true; | |
319 } | |
320 } | |
321 | |
322 /// Keeps track of a currently open tag while it is being parsed. The parser | |
323 /// maintains a stack of these so it can handle nested tags. | |
324 class TagState { | |
325 /// The point in the original source where this tag started. | |
326 int startPos; | |
327 | |
328 /// The point in the original source where open tag ended. | |
329 int endPos; | |
330 | |
331 /// The syntax that created this node. | |
332 final TagSyntax syntax; | |
333 | |
334 /// The children of this node. Will be `null` for text nodes. | |
335 final List<Node> children; | |
336 | |
337 TagState(this.startPos, this.endPos, this.syntax) | |
338 : children = <Node>[]; | |
339 | |
340 /// Attempts to close this tag by matching the current text against its end | |
341 /// pattern. | |
342 bool tryMatch(InlineParser parser) { | |
343 Match endMatch = syntax.endPattern.firstMatch(parser.currentSource); | |
344 if ((endMatch != null) && (endMatch.start() == 0)) { | |
345 // Close the tag. | |
346 close(parser, endMatch); | |
347 return true; | |
348 } | |
349 | |
350 return false; | |
351 } | |
352 | |
353 /// Pops this tag off the stack, completes it, and adds it to the output. | |
354 /// Will discard any unmatched tags that happen to be above it on the stack. | |
355 /// If this is the last node in the stack, returns its children. | |
356 List<Node> close(InlineParser parser, Match endMatch) { | |
357 // If there are unclosed tags on top of this one when it's closed, that | |
358 // means they are mismatched. Mismatched tags are treated as plain text in | |
359 // markdown. So for each tag above this one, we write its start tag as text | |
360 // and then adds its children to this one's children. | |
361 int index = parser._stack.indexOf(this); | |
362 | |
363 // Remove the unmatched children. | |
364 final unmatchedTags = parser._stack.getRange(index + 1, | |
365 parser._stack.length - index - 1); | |
366 parser._stack.removeRange(index + 1, parser._stack.length - index - 1); | |
367 | |
368 // Flatten them out onto this tag. | |
369 for (final unmatched in unmatchedTags) { | |
370 // Write the start tag as text. | |
371 parser.writeTextRange(unmatched.startPos, unmatched.endPos); | |
372 | |
373 // Bequeath its children unto this tag. | |
374 children.addAll(unmatched.children); | |
375 } | |
376 | |
377 // Pop this off the stack. | |
378 parser.writeText(); | |
379 parser._stack.removeLast(); | |
380 | |
381 // If the stack is empty now, this is the special "results" node. | |
382 if (parser._stack.length == 0) return children; | |
383 | |
384 // We are still parsing, so add this to its parent's children. | |
385 if (syntax.onMatchEnd(parser, endMatch, this)) { | |
386 parser.consume(endMatch[0].length); | |
387 } else { | |
388 // Didn't close correctly so revert to text. | |
389 parser.start = startPos; | |
390 parser.advanceBy(endMatch[0].length); | |
391 } | |
392 | |
393 return null; | |
394 } | |
395 } | |
OLD | NEW |