OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 /// Maintains the internal state needed to parse inline span elements in |
| 6 /// markdown. |
| 7 class InlineParser { |
| 8 static List<InlineSyntax> get syntaxes() { |
| 9 // Lazy initialize. |
| 10 if (_syntaxes == null) { |
| 11 _syntaxes = <InlineSyntax>[ |
| 12 new AutolinkSyntax(), |
| 13 new LinkSyntax(), |
| 14 // "*" surrounded by spaces is left alone. |
| 15 new TextSyntax(@' \* '), |
| 16 // "_" surrounded by spaces is left alone. |
| 17 new TextSyntax(@' _ '), |
| 18 // Leave already-encoded HTML entities alone. Ensures we don't turn |
| 19 // "&" into "&amp;" |
| 20 new TextSyntax(@'&[#a-zA-Z0-9]*;'), |
| 21 // Encode "&". |
| 22 new TextSyntax(@'&', sub: '&'), |
| 23 // Encode "<". (Why not encode ">" too? Gruber is toying with us.) |
| 24 new TextSyntax(@'<', sub: '<'), |
| 25 // Parse "**strong**" tags. |
| 26 new TagSyntax(@'\*\*', tag: 'strong'), |
| 27 // Parse "__strong__" tags. |
| 28 new TagSyntax(@'__', tag: 'strong'), |
| 29 // Parse "*emphasis*" tags. |
| 30 new TagSyntax(@'\*', tag: 'em'), |
| 31 // Parse "_emphasis_" tags. |
| 32 // TODO(rnystrom): Underscores in the middle of a word should not be |
| 33 // parsed as emphasis like_in_this. |
| 34 new TagSyntax(@'_', tag: 'em'), |
| 35 // Parse inline code within double backticks: "``code``". |
| 36 new CodeSyntax(@'``[ ]?(.*?)[ ]?``'), |
| 37 // Parse inline code within backticks: "`code`". |
| 38 new CodeSyntax(@'`([^`]*)`') |
| 39 ]; |
| 40 } |
| 41 |
| 42 return _syntaxes; |
| 43 } |
| 44 |
| 45 static List<InlineSyntax> _syntaxes; |
| 46 |
| 47 /// The string of markdown being parsed. |
| 48 final String source; |
| 49 |
| 50 /// The markdown document this parser is parsing. |
| 51 final Document document; |
| 52 |
| 53 /// The current read position. |
| 54 int pos = 0; |
| 55 |
| 56 /// Starting position of the last unconsumed text. |
| 57 int start = 0; |
| 58 |
| 59 final List<TagState> _stack; |
| 60 |
| 61 InlineParser(this.source, this.document) |
| 62 : _stack = <TagState>[]; |
| 63 |
| 64 List<Node> parse() { |
| 65 // Make a fake top tag to hold the results. |
| 66 _stack.add(new TagState(0, null)); |
| 67 |
| 68 while (!isDone) { |
| 69 bool matched = false; |
| 70 |
| 71 // See if any of the current tags on the stack match. We don't allow tags |
| 72 // of the same kind to nest, so this takes priority over other possible //
matches. |
| 73 for (int i = _stack.length - 1; i > 0; i--) { |
| 74 if (_stack[i].tryMatch(this)) { |
| 75 matched = true; |
| 76 break; |
| 77 } |
| 78 } |
| 79 if (matched) continue; |
| 80 |
| 81 // See if the current text matches any defined markdown syntax. |
| 82 for (final syntax in syntaxes) { |
| 83 if (syntax.tryMatch(this)) { |
| 84 matched = true; |
| 85 break; |
| 86 } |
| 87 } |
| 88 if (matched) continue; |
| 89 |
| 90 // If we got here, it's just text. |
| 91 advanceBy(1); |
| 92 } |
| 93 |
| 94 // Unwind any unmatched tags and get the results. |
| 95 return _stack[0].close(this, null); |
| 96 } |
| 97 |
| 98 writeText() { |
| 99 if (pos > start) { |
| 100 final text = source.substring(start, pos); |
| 101 final nodes = _stack.last().children; |
| 102 |
| 103 // If the previous node is text too, just append. |
| 104 if ((nodes.length > 0) && (nodes.last() is Text)) { |
| 105 final newNode = new Text('${nodes.last().text}$text'); |
| 106 nodes[nodes.length - 1] = newNode; |
| 107 } else { |
| 108 nodes.add(new Text(text)); |
| 109 } |
| 110 |
| 111 start = pos; |
| 112 } |
| 113 } |
| 114 |
| 115 /// Removes the top tag from the stack, reverts it to plain text and adds it |
| 116 /// to the output. |
| 117 discardUnmatchedTag() { |
| 118 final unfinished = _stack.removeLast(); |
| 119 start = unfinished.startPos; |
| 120 } |
| 121 |
| 122 addNode(Node node) { |
| 123 _stack.last().children.add(node); |
| 124 } |
| 125 |
| 126 // TODO(rnystrom): Only need this because RegExp doesn't let you start |
| 127 // searching from a given offset. |
| 128 String get currentSource() => source.substring(pos, source.length); |
| 129 |
| 130 bool get isDone() => pos == source.length; |
| 131 |
| 132 void advanceBy(int length) => pos += length; |
| 133 void consume(int length) { |
| 134 pos += length; |
| 135 start = pos; |
| 136 } |
| 137 } |
| 138 |
| 139 /// Represents one kind of markdown tag that can be parsed. |
| 140 class InlineSyntax { |
| 141 final RegExp pattern; |
| 142 |
| 143 InlineSyntax(String pattern) |
| 144 : pattern = new RegExp(pattern, true); |
| 145 // TODO(rnystrom): Should use named arg for RegExp multiLine. |
| 146 |
| 147 bool tryMatch(InlineParser parser) { |
| 148 final startMatch = pattern.firstMatch(parser.currentSource); |
| 149 if ((startMatch != null) && (startMatch.start() == 0)) { |
| 150 // Write any existing plain text up to this point. |
| 151 parser.writeText(); |
| 152 |
| 153 if (onMatch(parser, startMatch)) { |
| 154 parser.consume(startMatch.group(0).length); |
| 155 } |
| 156 return true; |
| 157 } |
| 158 return false; |
| 159 } |
| 160 |
| 161 abstract bool match(InlineParser parser, Match match); |
| 162 } |
| 163 |
| 164 /// Matches stuff that should just be passed through as straight text. |
| 165 class TextSyntax extends InlineSyntax { |
| 166 String substitute; |
| 167 TextSyntax(String pattern, [String sub]) |
| 168 : super(pattern), |
| 169 substitute = sub; |
| 170 |
| 171 bool onMatch(InlineParser parser, Match match) { |
| 172 if (substitute == null) { |
| 173 // Just use the original matched text. |
| 174 parser.advanceBy(match.group(0).length); |
| 175 return false; |
| 176 } |
| 177 |
| 178 // Insert the substitution. |
| 179 parser.addNode(new Text(substitute)); |
| 180 return true; |
| 181 } |
| 182 } |
| 183 |
| 184 /// Matches autolinks like <http://foo.com>. |
| 185 class AutolinkSyntax extends InlineSyntax { |
| 186 AutolinkSyntax() |
| 187 : super(@'<((http|https|ftp)://[^>]*)>'); |
| 188 // TODO(rnystrom): Make case insensitive. |
| 189 |
| 190 bool onMatch(InlineParser parser, Match match) { |
| 191 final url = match.group(1); |
| 192 |
| 193 final anchor = new Element.text('a', escapeHtml(url)); |
| 194 anchor.attributes['href'] = url; |
| 195 parser.addNode(anchor); |
| 196 |
| 197 return true; |
| 198 } |
| 199 } |
| 200 |
| 201 /// Matches syntax that has a pair of tags and becomes an element, like '*' for |
| 202 /// `<em>`. Allows nested tags. |
| 203 class TagSyntax extends InlineSyntax { |
| 204 final RegExp endPattern; |
| 205 final String tag; |
| 206 |
| 207 TagSyntax(String pattern, [String tag, String end = null]) |
| 208 : super(pattern), |
| 209 endPattern = new RegExp((end != null) ? end : pattern, true), |
| 210 tag = tag; |
| 211 // TODO(rnystrom): Doing this.field doesn't seem to work with named args. |
| 212 // TODO(rnystrom): Should use named arg for RegExp multiLine. |
| 213 |
| 214 bool onMatch(InlineParser parser, Match match) { |
| 215 parser._stack.add(new TagState(parser.pos, this)); |
| 216 return true; |
| 217 } |
| 218 |
| 219 bool onMatchEnd(InlineParser parser, Match match, TagState state) { |
| 220 parser.addNode(new Element(tag, state.children)); |
| 221 return true; |
| 222 } |
| 223 } |
| 224 |
| 225 /// Matches inline links like [blah] [id] and [blah] (url). |
| 226 class LinkSyntax extends TagSyntax { |
| 227 /// The regex for the end of a link needs to handle both reference style and |
| 228 /// inline styles as well as optional titles for inline links. To make that |
| 229 /// a bit more palatable, this breaks it into pieces. |
| 230 static get linkPattern() { |
| 231 final bracket = @'\][ \n\t]?'; // "]" with optional space after. |
| 232 final refLink = @'\[([^\]]*)\]'; // "[id]" reflink id. |
| 233 final title = @'(?:[ ]*"([^"]+)"|)'; // Optional title in quotes. |
| 234 final inlineLink = '\\(([^ )]+)$title\\)'; // "(url "title")" inline link. |
| 235 return '$bracket(?:$refLink|$inlineLink)'; |
| 236 } |
| 237 |
| 238 LinkSyntax() |
| 239 : super(@'\[', end: linkPattern); |
| 240 |
| 241 bool onMatchEnd(InlineParser parser, Match match, TagState state) { |
| 242 var url; |
| 243 var title; |
| 244 |
| 245 if (match.group(2) != '') { |
| 246 // Inline link like [foo](url). |
| 247 url = match.group(2); |
| 248 title = match.group(3); |
| 249 |
| 250 // For whatever reason, markdown allows angle-bracketed URLs here. |
| 251 if (url.startsWith('<') && url.endsWith('>')) { |
| 252 url = url.substring(1, url.length - 1); |
| 253 } |
| 254 } else { |
| 255 // Reference link like [foo] [bar]. |
| 256 var id = match.group(1); |
| 257 if (id == '') { |
| 258 // The id is empty ("[]") so infer it from the contents. |
| 259 id = parser.source.substring(state.startPos + 1, parser.pos); |
| 260 } |
| 261 |
| 262 // Look up the link. |
| 263 final link = parser.document.refLinks[id]; |
| 264 // If it's an unknown link just emit plaintext. |
| 265 if (link == null) return false; |
| 266 |
| 267 url = link.url; |
| 268 title = link.title; |
| 269 } |
| 270 |
| 271 final anchor = new Element('a', state.children); |
| 272 anchor.attributes['href'] = escapeHtml(url); |
| 273 if ((title != null) && (title != '')) { |
| 274 anchor.attributes['title'] = escapeHtml(title); |
| 275 } |
| 276 |
| 277 parser.addNode(anchor); |
| 278 return true; |
| 279 } |
| 280 } |
| 281 |
| 282 /// Matches backtick-enclosed inline code blocks. |
| 283 class CodeSyntax extends InlineSyntax { |
| 284 CodeSyntax(String pattern) |
| 285 : super(pattern); |
| 286 |
| 287 bool onMatch(InlineParser parser, Match match) { |
| 288 parser.addNode(new Element.text('code', escapeHtml(match.group(1)))); |
| 289 return true; |
| 290 } |
| 291 } |
| 292 |
| 293 /// Keeps track of a currently open tag while it is being parsed. The parser |
| 294 /// maintains a stack of these so it can handle nested tags. |
| 295 class TagState { |
| 296 /// The point in the original source where this tag started. |
| 297 int startPos; |
| 298 |
| 299 /// The syntax that created this node. |
| 300 final TagSyntax syntax; |
| 301 |
| 302 /// The children of this node. Will be `null` for text nodes. |
| 303 final List<Node> children; |
| 304 |
| 305 TagState(this.startPos, this.syntax) |
| 306 : children = <Node>[]; |
| 307 |
| 308 /// Attempts to close this tag by matching the current text against its end |
| 309 /// pattern. |
| 310 bool tryMatch(InlineParser parser) { |
| 311 Match endMatch = syntax.endPattern.firstMatch(parser.currentSource); |
| 312 if ((endMatch != null) && (endMatch.start() == 0)) { |
| 313 // Close the tag. |
| 314 close(parser, endMatch); |
| 315 return true; |
| 316 } |
| 317 |
| 318 return false; |
| 319 } |
| 320 |
| 321 /// Pops this tag off the stack, completes it, and adds it to the output. |
| 322 /// Will discard any unmatched tags that happen to be above it on the stack. |
| 323 /// If this is the last node in the stack, returns its children. |
| 324 List<Node> close(InlineParser parser, Match endMatch) { |
| 325 // Found a match. If there is anything above this tag on the stack, |
| 326 // discard it. For example, given '*a _b*...' when we reach the second |
| 327 // '*', '_' will be on the top of the stack. It's mismatched, so we |
| 328 // just treat it as text. |
| 329 while (parser._stack.last() != this) parser.discardUnmatchedTag(); |
| 330 |
| 331 // Pop this off the stack. |
| 332 parser.writeText(); |
| 333 parser._stack.removeLast(); |
| 334 |
| 335 // If the stack is empty now, this is the special "results" node. |
| 336 if (parser._stack.length == 0) return children; |
| 337 |
| 338 // We are still parsing, so add this to its parent's children. |
| 339 if (syntax.onMatchEnd(parser, endMatch, this)) { |
| 340 parser.consume(endMatch.group(0).length); |
| 341 } else { |
| 342 // Didn't close correctly so revert to text. |
| 343 parser.start = startPos; |
| 344 parser.advanceBy(endMatch.group(0).length); |
| 345 } |
| 346 |
| 347 return null; |
| 348 } |
| 349 } |
OLD | NEW |