Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Side by Side Diff: utils/markdown/inline_parser.dart

Issue 8680025: First pass at a markdown parser in Dart. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Review. Add missing file (oops!). Created 9 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « utils/markdown/html_renderer.dart ('k') | utils/markdown/lib.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 /// Maintains the internal state needed to parse inline span elements in
6 /// markdown.
7 class InlineParser {
8 static List<InlineSyntax> get syntaxes() {
9 // Lazy initialize.
10 if (_syntaxes == null) {
11 _syntaxes = <InlineSyntax>[
12 new AutolinkSyntax(),
13 new LinkSyntax(),
14 // "*" surrounded by spaces is left alone.
15 new TextSyntax(@' \* '),
16 // "_" surrounded by spaces is left alone.
17 new TextSyntax(@' _ '),
18 // Leave already-encoded HTML entities alone. Ensures we don't turn
19 // "&amp;" into "&amp;amp;"
20 new TextSyntax(@'&[#a-zA-Z0-9]*;'),
21 // Encode "&".
22 new TextSyntax(@'&', sub: '&amp;'),
23 // Encode "<". (Why not encode ">" too? Gruber is toying with us.)
24 new TextSyntax(@'<', sub: '&lt;'),
25 // Parse "**strong**" tags.
26 new TagSyntax(@'\*\*', tag: 'strong'),
27 // Parse "__strong__" tags.
28 new TagSyntax(@'__', tag: 'strong'),
29 // Parse "*emphasis*" tags.
30 new TagSyntax(@'\*', tag: 'em'),
31 // Parse "_emphasis_" tags.
32 // TODO(rnystrom): Underscores in the middle of a word should not be
33 // parsed as emphasis like_in_this.
34 new TagSyntax(@'_', tag: 'em'),
35 // Parse inline code within double backticks: "``code``".
36 new CodeSyntax(@'``[ ]?(.*?)[ ]?``'),
37 // Parse inline code within backticks: "`code`".
38 new CodeSyntax(@'`([^`]*)`')
39 ];
40 }
41
42 return _syntaxes;
43 }
44
45 static List<InlineSyntax> _syntaxes;
46
47 /// The string of markdown being parsed.
48 final String source;
49
50 /// The markdown document this parser is parsing.
51 final Document document;
52
53 /// The current read position.
54 int pos = 0;
55
56 /// Starting position of the last unconsumed text.
57 int start = 0;
58
59 final List<TagState> _stack;
60
61 InlineParser(this.source, this.document)
62 : _stack = <TagState>[];
63
64 List<Node> parse() {
65 // Make a fake top tag to hold the results.
66 _stack.add(new TagState(0, null));
67
68 while (!isDone) {
69 bool matched = false;
70
71 // See if any of the current tags on the stack match. We don't allow tags
72 // of the same kind to nest, so this takes priority over other possible // matches.
73 for (int i = _stack.length - 1; i > 0; i--) {
74 if (_stack[i].tryMatch(this)) {
75 matched = true;
76 break;
77 }
78 }
79 if (matched) continue;
80
81 // See if the current text matches any defined markdown syntax.
82 for (final syntax in syntaxes) {
83 if (syntax.tryMatch(this)) {
84 matched = true;
85 break;
86 }
87 }
88 if (matched) continue;
89
90 // If we got here, it's just text.
91 advanceBy(1);
92 }
93
94 // Unwind any unmatched tags and get the results.
95 return _stack[0].close(this, null);
96 }
97
98 writeText() {
99 if (pos > start) {
100 final text = source.substring(start, pos);
101 final nodes = _stack.last().children;
102
103 // If the previous node is text too, just append.
104 if ((nodes.length > 0) && (nodes.last() is Text)) {
105 final newNode = new Text('${nodes.last().text}$text');
106 nodes[nodes.length - 1] = newNode;
107 } else {
108 nodes.add(new Text(text));
109 }
110
111 start = pos;
112 }
113 }
114
115 /// Removes the top tag from the stack, reverts it to plain text and adds it
116 /// to the output.
117 discardUnmatchedTag() {
118 final unfinished = _stack.removeLast();
119 start = unfinished.startPos;
120 }
121
122 addNode(Node node) {
123 _stack.last().children.add(node);
124 }
125
126 // TODO(rnystrom): Only need this because RegExp doesn't let you start
127 // searching from a given offset.
128 String get currentSource() => source.substring(pos, source.length);
129
130 bool get isDone() => pos == source.length;
131
132 void advanceBy(int length) => pos += length;
133 void consume(int length) {
134 pos += length;
135 start = pos;
136 }
137 }
138
139 /// Represents one kind of markdown tag that can be parsed.
140 class InlineSyntax {
141 final RegExp pattern;
142
143 InlineSyntax(String pattern)
144 : pattern = new RegExp(pattern, true);
145 // TODO(rnystrom): Should use named arg for RegExp multiLine.
146
147 bool tryMatch(InlineParser parser) {
148 final startMatch = pattern.firstMatch(parser.currentSource);
149 if ((startMatch != null) && (startMatch.start() == 0)) {
150 // Write any existing plain text up to this point.
151 parser.writeText();
152
153 if (onMatch(parser, startMatch)) {
154 parser.consume(startMatch.group(0).length);
155 }
156 return true;
157 }
158 return false;
159 }
160
161 abstract bool match(InlineParser parser, Match match);
162 }
163
164 /// Matches stuff that should just be passed through as straight text.
165 class TextSyntax extends InlineSyntax {
166 String substitute;
167 TextSyntax(String pattern, [String sub])
168 : super(pattern),
169 substitute = sub;
170
171 bool onMatch(InlineParser parser, Match match) {
172 if (substitute == null) {
173 // Just use the original matched text.
174 parser.advanceBy(match.group(0).length);
175 return false;
176 }
177
178 // Insert the substitution.
179 parser.addNode(new Text(substitute));
180 return true;
181 }
182 }
183
184 /// Matches autolinks like <http://foo.com>.
185 class AutolinkSyntax extends InlineSyntax {
186 AutolinkSyntax()
187 : super(@'<((http|https|ftp)://[^>]*)>');
188 // TODO(rnystrom): Make case insensitive.
189
190 bool onMatch(InlineParser parser, Match match) {
191 final url = match.group(1);
192
193 final anchor = new Element.text('a', escapeHtml(url));
194 anchor.attributes['href'] = url;
195 parser.addNode(anchor);
196
197 return true;
198 }
199 }
200
201 /// Matches syntax that has a pair of tags and becomes an element, like '*' for
202 /// `<em>`. Allows nested tags.
203 class TagSyntax extends InlineSyntax {
204 final RegExp endPattern;
205 final String tag;
206
207 TagSyntax(String pattern, [String tag, String end = null])
208 : super(pattern),
209 endPattern = new RegExp((end != null) ? end : pattern, true),
210 tag = tag;
211 // TODO(rnystrom): Doing this.field doesn't seem to work with named args.
212 // TODO(rnystrom): Should use named arg for RegExp multiLine.
213
214 bool onMatch(InlineParser parser, Match match) {
215 parser._stack.add(new TagState(parser.pos, this));
216 return true;
217 }
218
219 bool onMatchEnd(InlineParser parser, Match match, TagState state) {
220 parser.addNode(new Element(tag, state.children));
221 return true;
222 }
223 }
224
225 /// Matches inline links like [blah] [id] and [blah] (url).
226 class LinkSyntax extends TagSyntax {
227 /// The regex for the end of a link needs to handle both reference style and
228 /// inline styles as well as optional titles for inline links. To make that
229 /// a bit more palatable, this breaks it into pieces.
230 static get linkPattern() {
231 final bracket = @'\][ \n\t]?'; // "]" with optional space after.
232 final refLink = @'\[([^\]]*)\]'; // "[id]" reflink id.
233 final title = @'(?:[ ]*"([^"]+)"|)'; // Optional title in quotes.
234 final inlineLink = '\\(([^ )]+)$title\\)'; // "(url "title")" inline link.
235 return '$bracket(?:$refLink|$inlineLink)';
236 }
237
238 LinkSyntax()
239 : super(@'\[', end: linkPattern);
240
241 bool onMatchEnd(InlineParser parser, Match match, TagState state) {
242 var url;
243 var title;
244
245 if (match.group(2) != '') {
246 // Inline link like [foo](url).
247 url = match.group(2);
248 title = match.group(3);
249
250 // For whatever reason, markdown allows angle-bracketed URLs here.
251 if (url.startsWith('<') && url.endsWith('>')) {
252 url = url.substring(1, url.length - 1);
253 }
254 } else {
255 // Reference link like [foo] [bar].
256 var id = match.group(1);
257 if (id == '') {
258 // The id is empty ("[]") so infer it from the contents.
259 id = parser.source.substring(state.startPos + 1, parser.pos);
260 }
261
262 // Look up the link.
263 final link = parser.document.refLinks[id];
264 // If it's an unknown link just emit plaintext.
265 if (link == null) return false;
266
267 url = link.url;
268 title = link.title;
269 }
270
271 final anchor = new Element('a', state.children);
272 anchor.attributes['href'] = escapeHtml(url);
273 if ((title != null) && (title != '')) {
274 anchor.attributes['title'] = escapeHtml(title);
275 }
276
277 parser.addNode(anchor);
278 return true;
279 }
280 }
281
282 /// Matches backtick-enclosed inline code blocks.
283 class CodeSyntax extends InlineSyntax {
284 CodeSyntax(String pattern)
285 : super(pattern);
286
287 bool onMatch(InlineParser parser, Match match) {
288 parser.addNode(new Element.text('code', escapeHtml(match.group(1))));
289 return true;
290 }
291 }
292
293 /// Keeps track of a currently open tag while it is being parsed. The parser
294 /// maintains a stack of these so it can handle nested tags.
295 class TagState {
296 /// The point in the original source where this tag started.
297 int startPos;
298
299 /// The syntax that created this node.
300 final TagSyntax syntax;
301
302 /// The children of this node. Will be `null` for text nodes.
303 final List<Node> children;
304
305 TagState(this.startPos, this.syntax)
306 : children = <Node>[];
307
308 /// Attempts to close this tag by matching the current text against its end
309 /// pattern.
310 bool tryMatch(InlineParser parser) {
311 Match endMatch = syntax.endPattern.firstMatch(parser.currentSource);
312 if ((endMatch != null) && (endMatch.start() == 0)) {
313 // Close the tag.
314 close(parser, endMatch);
315 return true;
316 }
317
318 return false;
319 }
320
321 /// Pops this tag off the stack, completes it, and adds it to the output.
322 /// Will discard any unmatched tags that happen to be above it on the stack.
323 /// If this is the last node in the stack, returns its children.
324 List<Node> close(InlineParser parser, Match endMatch) {
325 // Found a match. If there is anything above this tag on the stack,
326 // discard it. For example, given '*a _b*...' when we reach the second
327 // '*', '_' will be on the top of the stack. It's mismatched, so we
328 // just treat it as text.
329 while (parser._stack.last() != this) parser.discardUnmatchedTag();
330
331 // Pop this off the stack.
332 parser.writeText();
333 parser._stack.removeLast();
334
335 // If the stack is empty now, this is the special "results" node.
336 if (parser._stack.length == 0) return children;
337
338 // We are still parsing, so add this to its parent's children.
339 if (syntax.onMatchEnd(parser, endMatch, this)) {
340 parser.consume(endMatch.group(0).length);
341 } else {
342 // Didn't close correctly so revert to text.
343 parser.start = startPos;
344 parser.advanceBy(endMatch.group(0).length);
345 }
346
347 return null;
348 }
349 }
OLDNEW
« no previous file with comments | « utils/markdown/html_renderer.dart ('k') | utils/markdown/lib.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698