OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 /// Maintains the internal state needed to parse inline span elements in | |
6 /// markdown. | |
7 class InlineParser { | |
8 static List<InlineSyntax> get syntaxes() { | |
9 // Lazy initialize. | |
10 if (_syntaxes == null) { | |
11 _syntaxes = <InlineSyntax>[ | |
12 new AutolinkSyntax(), | |
Jennifer Messerly
2011/11/23 22:25:41
const ctors?
Bob Nystrom
2011/11/29 02:56:29
See similar comment on block parser. Here I also r
| |
13 new LinkSyntax(), | |
14 // "*" surrounded by spaces is left alone. | |
15 new TextSyntax(@' \* '), | |
16 // "_" surrounded by spaces is left alone. | |
17 new TextSyntax(@' _ '), | |
18 // Leave already-encoded HTML entities alone. Ensures we don't turn | |
19 // "&" into "&amp;" | |
20 new TextSyntax(@'&[#a-zA-Z0-9]*;'), | |
21 // Encode "&". | |
22 new TextSyntax(@'&', sub: '&'), | |
23 // Encode "<". (Why not encode ">" too? Gruber is toying with us.) | |
24 new TextSyntax(@'<', sub: '<'), | |
25 // Parse "**strong**" tags. | |
26 new TagSyntax(@'\*\*', tag: 'strong'), | |
27 // Parse "__strong__" tags. | |
28 new TagSyntax(@'__', tag: 'strong'), | |
29 // Parse "*emphasis*" tags. | |
30 new TagSyntax(@'\*', tag: 'em'), | |
31 // Parse "_emphasis_" tags. | |
32 // TODO(rnystrom): Underscores in the middle of a word should not be | |
33 // parsed as emphasis like_in_this. | |
34 new TagSyntax(@'_', tag: 'em'), | |
35 // Parse inline code within double backticks: "``code``". | |
36 new CodeSyntax(@'``[ ]?(.*?)[ ]?``'), | |
37 // Parse inline code within backticks: "`code`". | |
38 new CodeSyntax(@'`([^`]*)`') | |
39 ]; | |
40 } | |
41 | |
42 return _syntaxes; | |
43 } | |
44 | |
45 static List<InlineSyntax> _syntaxes; | |
46 | |
47 /// The string of markdown being parsed. | |
48 final String source; | |
49 | |
50 /// The markdown document this parser is parsing. | |
51 final Document document; | |
52 | |
53 /// The current read position. | |
54 int pos = 0; | |
55 | |
56 /// Starting position of the last unconsumed text. | |
57 int start = 0; | |
58 | |
59 final List<TagState> _stack; | |
60 | |
61 InlineParser(this.source, this.document) | |
62 : _stack = <TagState>[]; | |
63 | |
64 List<Node> parse() { | |
65 // Make a fake top tag to hold the results. | |
66 _stack.add(new TagState(0, null)); | |
67 | |
68 while (!isDone) { | |
69 bool matched = false; | |
70 | |
71 // See if any of the current tags on the stack match. We don't allow tags | |
72 // of the same kind to nest, so this takes priority over other possible // matches. | |
73 for (int i = _stack.length - 1; i > 0; i--) { | |
74 if (_stack[i].tryMatch(this)) { | |
75 matched = true; | |
76 break; | |
77 } | |
78 } | |
79 if (matched) continue; | |
80 | |
81 // See if the current text matches any defined markdown syntax. | |
82 for (final syntax in syntaxes) { | |
83 if (syntax.tryMatch(this)) { | |
84 matched = true; | |
85 break; | |
86 } | |
87 } | |
88 if (matched) continue; | |
89 | |
90 // If we got here, it's just text. | |
91 advanceBy(1); | |
92 } | |
93 | |
94 // Unwind any unmatched tags and get the results. | |
95 return _stack[0].close(this, null); | |
96 } | |
97 | |
98 writeText() { | |
99 if (pos > start) { | |
100 final text = source.substring(start, pos); | |
101 final nodes = _stack.last().children; | |
102 | |
103 // If the previous node is text too, just append. | |
104 if ((nodes.length > 0) && (nodes.last() is Text)) { | |
105 final newNode = new Text('${nodes.last().text}$text'); | |
106 nodes[nodes.length - 1] = newNode; | |
107 } else { | |
108 nodes.add(new Text(text)); | |
109 } | |
110 | |
111 start = pos; | |
112 } | |
113 } | |
114 | |
115 /// Removes the top tag from the stack, reverts it to plain text and adds it | |
116 /// to the output. | |
117 discardUnmatchedTag() { | |
118 final unfinished = _stack.removeLast(); | |
119 start = unfinished.startPos; | |
120 } | |
121 | |
122 addNode(Node node) { | |
123 _stack.last().children.add(node); | |
124 } | |
125 | |
126 // TODO(rnystrom): Only need this because RegExp doesn't let you start | |
127 // searching from a given offset. | |
Jennifer Messerly
2011/11/23 22:25:41
yeah... that seriously needs to be fixed in RegExp
Bob Nystrom
2011/11/29 02:56:29
Yeah. There's a few things in RegExp that are anno
| |
128 String get currentSource() => source.substring(pos, source.length); | |
129 | |
130 bool get isDone() => pos == source.length; | |
131 | |
132 void advanceBy(int length) => pos += length; | |
133 void consume(int length) { | |
134 pos += length; | |
135 start = pos; | |
136 } | |
137 } | |
138 | |
139 /// Represents one kind of markdown tag that can be parsed. | |
140 class InlineSyntax { | |
141 final RegExp pattern; | |
142 | |
143 InlineSyntax(String pattern) | |
144 : pattern = new RegExp(pattern, true); | |
145 // TODO(rnystrom): Should use named arg for RegExp multiLine. | |
146 | |
147 bool tryMatch(InlineParser parser) { | |
148 final startMatch = pattern.firstMatch(parser.currentSource); | |
149 if ((startMatch != null) && (startMatch.start() == 0)) { | |
150 // Write any existing plain text up to this point. | |
151 parser.writeText(); | |
152 | |
153 if (onMatch(parser, startMatch)) { | |
154 parser.consume(startMatch.group(0).length); | |
155 } | |
156 return true; | |
157 } | |
158 return false; | |
159 } | |
160 | |
161 abstract bool match(InlineParser parser, Match match); | |
162 } | |
163 | |
164 /// Matches stuff that should just be passed through as straight text. | |
165 class TextSyntax extends InlineSyntax { | |
166 String substitute; | |
167 TextSyntax(String pattern, [String sub]) | |
168 : super(pattern), | |
169 substitute = sub; | |
170 | |
171 bool onMatch(InlineParser parser, Match match) { | |
172 if (substitute == null) { | |
173 // Just use the original matched text. | |
174 parser.advanceBy(match.group(0).length); | |
175 return false; | |
176 } | |
177 | |
178 // Insert the substitution. | |
179 parser.addNode(new Text(substitute)); | |
180 return true; | |
181 } | |
182 } | |
183 | |
184 /// Matches autolinks like <http://foo.com>. | |
185 class AutolinkSyntax extends InlineSyntax { | |
186 AutolinkSyntax() | |
187 : super(@'<((http|https|ftp)://[^>]*)>'); | |
188 // TODO(rnystrom): Make case insensitive. | |
189 | |
190 bool onMatch(InlineParser parser, Match match) { | |
191 final url = match.group(1); | |
192 | |
193 final anchor = new Element.text('a', escapeHtml(url)); | |
194 anchor.attributes['href'] = url; | |
195 parser.addNode(anchor); | |
196 | |
197 return true; | |
198 } | |
199 } | |
200 | |
201 /// Matches syntax that has a pair of tags and becomes an element, like '*' for | |
202 /// `<em>`. Allows nested tags. | |
203 class TagSyntax extends InlineSyntax { | |
204 final RegExp endPattern; | |
205 final String tag; | |
206 | |
207 TagSyntax(String pattern, [String tag, String end = null]) | |
208 : super(pattern), | |
209 endPattern = new RegExp((end != null) ? end : pattern, true), | |
210 tag = tag; | |
211 // TODO(rnystrom): Doing this.field doesn't seem to work with named args. | |
Jennifer Messerly
2011/11/23 22:25:41
what's the issue here? can you file to the issue t
Bob Nystrom
2011/11/29 02:56:29
I think this might be the same issue that Mattias
| |
212 // TODO(rnystrom): Should use named arg for RegExp multiLine. | |
213 | |
214 bool onMatch(InlineParser parser, Match match) { | |
215 parser._stack.add(new TagState(parser.pos, this)); | |
216 return true; | |
217 } | |
218 | |
219 bool onMatchEnd(InlineParser parser, Match match, TagState state) { | |
220 parser.addNode(new Element(tag, state.children)); | |
221 return true; | |
222 } | |
223 } | |
224 | |
225 /// Matches inline links like [blah] [id] and [blah] (url). | |
226 class LinkSyntax extends TagSyntax { | |
227 /// The regex for the end of a link needs to handle both reference style and | |
228 /// inline styles as well as optional titles for inline links. To make that | |
229 /// a bit more palatable, this breaks it into pieces. | |
230 static get linkPattern() { | |
Jennifer Messerly
2011/11/23 22:25:41
could this be a field? or does the string interp b
Bob Nystrom
2011/11/29 02:56:29
It was breaking constness when I tried that.
| |
231 final bracket = @'\][ \n\t]?'; // "]" with optional space after. | |
232 final refLink = @'\[([^\]]*)\]'; // "[id]" reflink id. | |
233 final title = @'(?:[ ]*"([^"]+)"|)'; // Optional title in quotes. | |
234 final inlineLink = '\\(([^ )]+)$title\\)'; // "(url "title")" inline link. | |
235 return '$bracket(?:$refLink|$inlineLink)'; | |
236 } | |
237 | |
238 LinkSyntax() | |
239 : super(@'\[', end: linkPattern); | |
240 | |
241 bool onMatchEnd(InlineParser parser, Match match, TagState state) { | |
242 var url; | |
243 var title; | |
244 | |
245 if (match.group(2) != '') { | |
246 // Inline link like [foo](url). | |
247 url = match.group(2); | |
248 title = match.group(3); | |
249 | |
250 // For whatever reason, markdown allows angle-bracketed URLs here. | |
251 if (url.startsWith('<') && url.endsWith('>')) { | |
252 url = url.substring(1, url.length - 1); | |
253 } | |
254 } else { | |
255 // Reference link like [foo] [bar]. | |
256 var id = match.group(1); | |
257 if (id == '') { | |
258 // The id is empty ("[]") so infer it from the contents. | |
259 id = parser.source.substring(state.startPos + 1, parser.pos); | |
260 } | |
261 | |
262 // Look up the link. | |
263 final link = parser.document.refLinks[id]; | |
264 // If it's an unknown link just emit plaintext. | |
265 if (link == null) return false; | |
266 | |
267 url = link.url; | |
268 title = link.title; | |
269 } | |
270 | |
271 final anchor = new Element('a', state.children); | |
272 anchor.attributes['href'] = escapeHtml(url); | |
273 if ((title != null) && (title != '')) { | |
274 anchor.attributes['title'] = escapeHtml(title); | |
275 } | |
276 | |
277 parser.addNode(anchor); | |
278 return true; | |
279 } | |
280 } | |
281 | |
282 /// Matches backtick-enclosed inline code blocks. | |
283 class CodeSyntax extends InlineSyntax { | |
284 CodeSyntax(String pattern) | |
285 : super(pattern); | |
286 | |
287 bool onMatch(InlineParser parser, Match match) { | |
288 parser.addNode(new Element.text('code', escapeHtml(match.group(1)))); | |
289 return true; | |
290 } | |
291 } | |
292 | |
293 /// Keeps track of a currently open tag while it is being parsed. The parser | |
294 /// maintains a stack of these so it can handle nested tags. | |
295 class TagState { | |
296 /// The point in the original source where this tag started. | |
297 int startPos; | |
298 | |
299 /// The syntax that created this node. | |
300 final TagSyntax syntax; | |
301 | |
302 /// The children of this node. Will be `null` for text nodes. | |
303 final List<Node> children; | |
304 | |
305 TagState(this.startPos, this.syntax) | |
306 : children = <Node>[]; | |
307 | |
308 /// Attempts to close this tag by matching the current text against its end | |
309 /// pattern. | |
310 bool tryMatch(InlineParser parser) { | |
311 Match endMatch = syntax.endPattern.firstMatch(parser.currentSource); | |
312 if ((endMatch != null) && (endMatch.start() == 0)) { | |
313 // Close the tag. | |
314 close(parser, endMatch); | |
315 return true; | |
316 } | |
317 | |
318 return false; | |
319 } | |
320 | |
321 /// Pops this tag off the stack, completes it, and adds it to the output. | |
322 /// Will discard any unmatched tags that happen to be above it on the stack. | |
323 /// If this is the last node in the stack, returns its children. | |
324 List<Node> close(InlineParser parser, Match endMatch) { | |
325 // Found a match. If there is anything above this tag on the stack, | |
326 // discard it. For example, given '*a _b*...' when we reach the second | |
327 // '*', '_' will be on the top of the stack. It's mismatched, so we | |
328 // just treat it as text. | |
329 while (parser._stack.last() != this) parser.discardUnmatchedTag(); | |
330 | |
331 // Pop this off the stack. | |
332 parser.writeText(); | |
333 parser._stack.removeLast(); | |
334 | |
335 // If the stack is empty now, this is the special "results" node. | |
336 if (parser._stack.length == 0) return children; | |
337 | |
338 // We are still parsing, so add this to its parent's children. | |
339 if (syntax.onMatchEnd(parser, endMatch, this)) { | |
340 parser.consume(endMatch.group(0).length); | |
341 } else { | |
342 // Didn't close correctly so revert to text. | |
343 parser.start = startPos; | |
344 parser.advanceBy(endMatch.group(0).length); | |
345 } | |
346 | |
347 return null; | |
348 } | |
349 } | |
OLD | NEW |