| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | |
| 2 // for details. All rights reserved. Use of this source code is governed by a | |
| 3 // BSD-style license that can be found in the LICENSE file. | |
| 4 | |
| 5 /// Maintains the internal state needed to parse inline span elements in | |
| 6 /// markdown. | |
| 7 class InlineParser { | |
| 8 static List<InlineSyntax> get syntaxes() { | |
| 9 // Lazy initialize. | |
| 10 if (_syntaxes == null) { | |
| 11 _syntaxes = <InlineSyntax>[ | |
| 12 new AutolinkSyntax(), | |
| 13 new LinkSyntax(), | |
| 14 // "*" surrounded by spaces is left alone. | |
| 15 new TextSyntax(@' \* '), | |
| 16 // "_" surrounded by spaces is left alone. | |
| 17 new TextSyntax(@' _ '), | |
| 18 // Leave already-encoded HTML entities alone. Ensures we don't turn | |
| 19 // "&" into "&amp;" | |
| 20 new TextSyntax(@'&[#a-zA-Z0-9]*;'), | |
| 21 // Encode "&". | |
| 22 new TextSyntax(@'&', sub: '&'), | |
| 23 // Encode "<". (Why not encode ">" too? Gruber is toying with us.) | |
| 24 new TextSyntax(@'<', sub: '<'), | |
| 25 // Parse "**strong**" tags. | |
| 26 new TagSyntax(@'\*\*', tag: 'strong'), | |
| 27 // Parse "__strong__" tags. | |
| 28 new TagSyntax(@'__', tag: 'strong'), | |
| 29 // Parse "*emphasis*" tags. | |
| 30 new TagSyntax(@'\*', tag: 'em'), | |
| 31 // Parse "_emphasis_" tags. | |
| 32 // TODO(rnystrom): Underscores in the middle of a word should not be | |
| 33 // parsed as emphasis like_in_this. | |
| 34 new TagSyntax(@'_', tag: 'em'), | |
| 35 // Parse inline code within double backticks: "``code``". | |
| 36 new CodeSyntax(@'``\s?((?:.|\n)*?)\s?``'), | |
| 37 // Parse inline code within backticks: "`code`". | |
| 38 new CodeSyntax(@'`([^`]*)`') | |
| 39 ]; | |
| 40 } | |
| 41 | |
| 42 return _syntaxes; | |
| 43 } | |
| 44 | |
| 45 static List<InlineSyntax> _syntaxes; | |
| 46 | |
| 47 /// The string of markdown being parsed. | |
| 48 final String source; | |
| 49 | |
| 50 /// The markdown document this parser is parsing. | |
| 51 final Document document; | |
| 52 | |
| 53 /// The current read position. | |
| 54 int pos = 0; | |
| 55 | |
| 56 /// Starting position of the last unconsumed text. | |
| 57 int start = 0; | |
| 58 | |
| 59 final List<TagState> _stack; | |
| 60 | |
| 61 InlineParser(this.source, this.document) | |
| 62 : _stack = <TagState>[]; | |
| 63 | |
| 64 List<Node> parse() { | |
| 65 // Make a fake top tag to hold the results. | |
| 66 _stack.add(new TagState(0, 0, null)); | |
| 67 | |
| 68 while (!isDone) { | |
| 69 bool matched = false; | |
| 70 | |
| 71 // See if any of the current tags on the stack match. We don't allow tags | |
| 72 // of the same kind to nest, so this takes priority over other possible //
matches. | |
| 73 for (int i = _stack.length - 1; i > 0; i--) { | |
| 74 if (_stack[i].tryMatch(this)) { | |
| 75 matched = true; | |
| 76 break; | |
| 77 } | |
| 78 } | |
| 79 if (matched) continue; | |
| 80 | |
| 81 // See if the current text matches any defined markdown syntax. | |
| 82 for (final syntax in syntaxes) { | |
| 83 if (syntax.tryMatch(this)) { | |
| 84 matched = true; | |
| 85 break; | |
| 86 } | |
| 87 } | |
| 88 if (matched) continue; | |
| 89 | |
| 90 // If we got here, it's just text. | |
| 91 advanceBy(1); | |
| 92 } | |
| 93 | |
| 94 // Unwind any unmatched tags and get the results. | |
| 95 return _stack[0].close(this, null); | |
| 96 } | |
| 97 | |
| 98 writeText() { | |
| 99 writeTextRange(start, pos); | |
| 100 start = pos; | |
| 101 } | |
| 102 | |
| 103 writeTextRange(int start, int end) { | |
| 104 if (end > start) { | |
| 105 final text = source.substring(start, end); | |
| 106 final nodes = _stack.last().children; | |
| 107 | |
| 108 // If the previous node is text too, just append. | |
| 109 if ((nodes.length > 0) && (nodes.last() is Text)) { | |
| 110 final newNode = new Text('${nodes.last().text}$text'); | |
| 111 nodes[nodes.length - 1] = newNode; | |
| 112 } else { | |
| 113 nodes.add(new Text(text)); | |
| 114 } | |
| 115 } | |
| 116 } | |
| 117 | |
| 118 addNode(Node node) { | |
| 119 _stack.last().children.add(node); | |
| 120 } | |
| 121 | |
| 122 // TODO(rnystrom): Only need this because RegExp doesn't let you start | |
| 123 // searching from a given offset. | |
| 124 String get currentSource() => source.substring(pos, source.length); | |
| 125 | |
| 126 bool get isDone() => pos == source.length; | |
| 127 | |
| 128 void advanceBy(int length) { | |
| 129 pos += length; | |
| 130 } | |
| 131 | |
| 132 void consume(int length) { | |
| 133 pos += length; | |
| 134 start = pos; | |
| 135 } | |
| 136 } | |
| 137 | |
| 138 /// Represents one kind of markdown tag that can be parsed. | |
| 139 class InlineSyntax { | |
| 140 final RegExp pattern; | |
| 141 | |
| 142 InlineSyntax(String pattern) | |
| 143 : pattern = new RegExp(pattern, true); | |
| 144 // TODO(rnystrom): Should use named arg for RegExp multiLine. | |
| 145 | |
| 146 bool tryMatch(InlineParser parser) { | |
| 147 final startMatch = pattern.firstMatch(parser.currentSource); | |
| 148 if ((startMatch != null) && (startMatch.start() == 0)) { | |
| 149 // Write any existing plain text up to this point. | |
| 150 parser.writeText(); | |
| 151 | |
| 152 if (onMatch(parser, startMatch)) { | |
| 153 parser.consume(startMatch[0].length); | |
| 154 } | |
| 155 return true; | |
| 156 } | |
| 157 return false; | |
| 158 } | |
| 159 | |
| 160 abstract bool onMatch(InlineParser parser, Match match); | |
| 161 } | |
| 162 | |
| 163 /// Matches stuff that should just be passed through as straight text. | |
| 164 class TextSyntax extends InlineSyntax { | |
| 165 String substitute; | |
| 166 TextSyntax(String pattern, [String sub]) | |
| 167 : super(pattern), | |
| 168 substitute = sub; | |
| 169 | |
| 170 bool onMatch(InlineParser parser, Match match) { | |
| 171 if (substitute == null) { | |
| 172 // Just use the original matched text. | |
| 173 parser.advanceBy(match[0].length); | |
| 174 return false; | |
| 175 } | |
| 176 | |
| 177 // Insert the substitution. | |
| 178 parser.addNode(new Text(substitute)); | |
| 179 return true; | |
| 180 } | |
| 181 } | |
| 182 | |
| 183 /// Matches autolinks like `<http://foo.com>`. | |
| 184 class AutolinkSyntax extends InlineSyntax { | |
| 185 AutolinkSyntax() | |
| 186 : super(@'<((http|https|ftp)://[^>]*)>'); | |
| 187 // TODO(rnystrom): Make case insensitive. | |
| 188 | |
| 189 bool onMatch(InlineParser parser, Match match) { | |
| 190 final url = match[1]; | |
| 191 | |
| 192 final anchor = new Element.text('a', escapeHtml(url)); | |
| 193 anchor.attributes['href'] = url; | |
| 194 parser.addNode(anchor); | |
| 195 | |
| 196 return true; | |
| 197 } | |
| 198 } | |
| 199 | |
| 200 /// Matches syntax that has a pair of tags and becomes an element, like `*` for | |
| 201 /// `<em>`. Allows nested tags. | |
| 202 class TagSyntax extends InlineSyntax { | |
| 203 final RegExp endPattern; | |
| 204 final String tag; | |
| 205 | |
| 206 TagSyntax(String pattern, [String tag, String end = null]) | |
| 207 : super(pattern), | |
| 208 endPattern = new RegExp((end != null) ? end : pattern, true), | |
| 209 tag = tag; | |
| 210 // TODO(rnystrom): Doing this.field doesn't seem to work with named args. | |
| 211 // TODO(rnystrom): Should use named arg for RegExp multiLine. | |
| 212 | |
| 213 bool onMatch(InlineParser parser, Match match) { | |
| 214 parser._stack.add(new TagState(parser.pos, | |
| 215 parser.pos + match[0].length, this)); | |
| 216 return true; | |
| 217 } | |
| 218 | |
| 219 bool onMatchEnd(InlineParser parser, Match match, TagState state) { | |
| 220 parser.addNode(new Element(tag, state.children)); | |
| 221 return true; | |
| 222 } | |
| 223 } | |
| 224 | |
| 225 /// Matches inline links like `[blah] [id]` and `[blah] (url)`. | |
| 226 class LinkSyntax extends TagSyntax { | |
| 227 /// The regex for the end of a link needs to handle both reference style and | |
| 228 /// inline styles as well as optional titles for inline links. To make that | |
| 229 /// a bit more palatable, this breaks it into pieces. | |
| 230 static get linkPattern() { | |
| 231 final refLink = @'\s?\[([^\]]*)\]'; // "[id]" reflink id. | |
| 232 final title = @'(?:[ ]*"([^"]+)"|)'; // Optional title in quotes. | |
| 233 final inlineLink = '\\s?\\(([^ )]+)$title\\)'; // "(url "title")" link. | |
| 234 return '\](?:($refLink|$inlineLink)|)'; | |
| 235 | |
| 236 // The groups matched by this are: | |
| 237 // 1: Will be non-empty if it's either a ref or inline link. Will be empty | |
| 238 // if it's just a bare pair of square brackets with nothing after them. | |
| 239 // 2: Contains the id inside [] for a reference-style link. | |
| 240 // 3: Contains the URL for an inline link. | |
| 241 // 4: Contains the title, if present, for an inline link. | |
| 242 } | |
| 243 | |
| 244 LinkSyntax() | |
| 245 : super(@'\[', end: linkPattern); | |
| 246 | |
| 247 bool onMatchEnd(InlineParser parser, Match match, TagState state) { | |
| 248 var url; | |
| 249 var title; | |
| 250 | |
| 251 // If we didn't match refLink or inlineLink, then it means there was | |
| 252 // nothing after the first square bracket, so it isn't a normal markdown | |
| 253 // link at all. Instead, we allow users of the library to specify a special | |
| 254 // resolver function ([setImplicitLinkResolver]) that may choose to handle | |
| 255 // this. Otherwise, it's just treated as plain text. | |
| 256 if ((match[1] == null) || (match[1] == '')) { | |
| 257 if (_implicitLinkResolver == null) return false; | |
| 258 | |
| 259 // Only allow implicit links if the content is just text. | |
| 260 // TODO(rnystrom): Do we want to relax this? | |
| 261 if (state.children.length != 1) return false; | |
| 262 if (state.children[0] is! Text) return false; | |
| 263 | |
| 264 Text link = state.children[0]; | |
| 265 | |
| 266 // See if we have a resolver that will generate a link for us. | |
| 267 final node = _implicitLinkResolver(link.text); | |
| 268 if (node == null) return false; | |
| 269 | |
| 270 parser.addNode(node); | |
| 271 return true; | |
| 272 } | |
| 273 | |
| 274 if ((match[3] != null) && (match[3] != '')) { | |
| 275 // Inline link like [foo](url). | |
| 276 url = match[3]; | |
| 277 title = match[4]; | |
| 278 | |
| 279 // For whatever reason, markdown allows angle-bracketed URLs here. | |
| 280 if (url.startsWith('<') && url.endsWith('>')) { | |
| 281 url = url.substring(1, url.length - 1); | |
| 282 } | |
| 283 } else { | |
| 284 // Reference link like [foo] [bar]. | |
| 285 var id = match[2]; | |
| 286 if (id == '') { | |
| 287 // The id is empty ("[]") so infer it from the contents. | |
| 288 id = parser.source.substring(state.startPos + 1, parser.pos); | |
| 289 } | |
| 290 | |
| 291 // Look up the link. | |
| 292 final link = parser.document.refLinks[id]; | |
| 293 // If it's an unknown link just emit plaintext. | |
| 294 if (link == null) return false; | |
| 295 | |
| 296 url = link.url; | |
| 297 title = link.title; | |
| 298 } | |
| 299 | |
| 300 final anchor = new Element('a', state.children); | |
| 301 anchor.attributes['href'] = escapeHtml(url); | |
| 302 if ((title != null) && (title != '')) { | |
| 303 anchor.attributes['title'] = escapeHtml(title); | |
| 304 } | |
| 305 | |
| 306 parser.addNode(anchor); | |
| 307 return true; | |
| 308 } | |
| 309 } | |
| 310 | |
| 311 /// Matches backtick-enclosed inline code blocks. | |
| 312 class CodeSyntax extends InlineSyntax { | |
| 313 CodeSyntax(String pattern) | |
| 314 : super(pattern); | |
| 315 | |
| 316 bool onMatch(InlineParser parser, Match match) { | |
| 317 parser.addNode(new Element.text('code', escapeHtml(match[1]))); | |
| 318 return true; | |
| 319 } | |
| 320 } | |
| 321 | |
| 322 /// Keeps track of a currently open tag while it is being parsed. The parser | |
| 323 /// maintains a stack of these so it can handle nested tags. | |
| 324 class TagState { | |
| 325 /// The point in the original source where this tag started. | |
| 326 int startPos; | |
| 327 | |
| 328 /// The point in the original source where open tag ended. | |
| 329 int endPos; | |
| 330 | |
| 331 /// The syntax that created this node. | |
| 332 final TagSyntax syntax; | |
| 333 | |
| 334 /// The children of this node. Will be `null` for text nodes. | |
| 335 final List<Node> children; | |
| 336 | |
| 337 TagState(this.startPos, this.endPos, this.syntax) | |
| 338 : children = <Node>[]; | |
| 339 | |
| 340 /// Attempts to close this tag by matching the current text against its end | |
| 341 /// pattern. | |
| 342 bool tryMatch(InlineParser parser) { | |
| 343 Match endMatch = syntax.endPattern.firstMatch(parser.currentSource); | |
| 344 if ((endMatch != null) && (endMatch.start() == 0)) { | |
| 345 // Close the tag. | |
| 346 close(parser, endMatch); | |
| 347 return true; | |
| 348 } | |
| 349 | |
| 350 return false; | |
| 351 } | |
| 352 | |
| 353 /// Pops this tag off the stack, completes it, and adds it to the output. | |
| 354 /// Will discard any unmatched tags that happen to be above it on the stack. | |
| 355 /// If this is the last node in the stack, returns its children. | |
| 356 List<Node> close(InlineParser parser, Match endMatch) { | |
| 357 // If there are unclosed tags on top of this one when it's closed, that | |
| 358 // means they are mismatched. Mismatched tags are treated as plain text in | |
| 359 // markdown. So for each tag above this one, we write its start tag as text | |
| 360 // and then adds its children to this one's children. | |
| 361 int index = parser._stack.indexOf(this); | |
| 362 | |
| 363 // Remove the unmatched children. | |
| 364 final unmatchedTags = parser._stack.getRange(index + 1, | |
| 365 parser._stack.length - index - 1); | |
| 366 parser._stack.removeRange(index + 1, parser._stack.length - index - 1); | |
| 367 | |
| 368 // Flatten them out onto this tag. | |
| 369 for (final unmatched in unmatchedTags) { | |
| 370 // Write the start tag as text. | |
| 371 parser.writeTextRange(unmatched.startPos, unmatched.endPos); | |
| 372 | |
| 373 // Bequeath its children unto this tag. | |
| 374 children.addAll(unmatched.children); | |
| 375 } | |
| 376 | |
| 377 // Pop this off the stack. | |
| 378 parser.writeText(); | |
| 379 parser._stack.removeLast(); | |
| 380 | |
| 381 // If the stack is empty now, this is the special "results" node. | |
| 382 if (parser._stack.length == 0) return children; | |
| 383 | |
| 384 // We are still parsing, so add this to its parent's children. | |
| 385 if (syntax.onMatchEnd(parser, endMatch, this)) { | |
| 386 parser.consume(endMatch[0].length); | |
| 387 } else { | |
| 388 // Didn't close correctly so revert to text. | |
| 389 parser.start = startPos; | |
| 390 parser.advanceBy(endMatch[0].length); | |
| 391 } | |
| 392 | |
| 393 return null; | |
| 394 } | |
| 395 } | |
| OLD | NEW |