Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | |
| 2 // for details. All rights reserved. Use of this source code is governed by a | |
| 3 // BSD-style license that can be found in the LICENSE file. | |
| 4 | |
| 5 /// Maintains the internal state needed to parse inline span elements in | |
| 6 /// markdown. | |
| 7 class InlineParser { | |
| 8 static List<InlineSyntax> get syntaxes() { | |
| 9 // Lazy initialize. | |
| 10 if (_syntaxes == null) { | |
| 11 _syntaxes = <InlineSyntax>[ | |
| 12 new AutolinkSyntax(), | |
|
Jennifer Messerly
2011/11/23 22:25:41
const ctors?
Bob Nystrom
2011/11/29 02:56:29
See similar comment on block parser. Here I also r
| |
| 13 new LinkSyntax(), | |
| 14 // "*" surrounded by spaces is left alone. | |
| 15 new TextSyntax(@' \* '), | |
| 16 // "_" surrounded by spaces is left alone. | |
| 17 new TextSyntax(@' _ '), | |
| 18 // Leave already-encoded HTML entities alone. Ensures we don't turn | |
| 19 // "&" into "&amp;" | |
| 20 new TextSyntax(@'&[#a-zA-Z0-9]*;'), | |
| 21 // Encode "&". | |
| 22 new TextSyntax(@'&', sub: '&'), | |
| 23 // Encode "<". (Why not encode ">" too? Gruber is toying with us.) | |
| 24 new TextSyntax(@'<', sub: '<'), | |
| 25 // Parse "**strong**" tags. | |
| 26 new TagSyntax(@'\*\*', tag: 'strong'), | |
| 27 // Parse "__strong__" tags. | |
| 28 new TagSyntax(@'__', tag: 'strong'), | |
| 29 // Parse "*emphasis*" tags. | |
| 30 new TagSyntax(@'\*', tag: 'em'), | |
| 31 // Parse "_emphasis_" tags. | |
| 32 // TODO(rnystrom): Underscores in the middle of a word should not be | |
| 33 // parsed as emphasis like_in_this. | |
| 34 new TagSyntax(@'_', tag: 'em'), | |
| 35 // Parse inline code within double backticks: "``code``". | |
| 36 new CodeSyntax(@'``[ ]?(.*?)[ ]?``'), | |
| 37 // Parse inline code within backticks: "`code`". | |
| 38 new CodeSyntax(@'`([^`]*)`') | |
| 39 ]; | |
| 40 } | |
| 41 | |
| 42 return _syntaxes; | |
| 43 } | |
| 44 | |
| 45 static List<InlineSyntax> _syntaxes; | |
| 46 | |
| 47 /// The string of markdown being parsed. | |
| 48 final String source; | |
| 49 | |
| 50 /// The markdown document this parser is parsing. | |
| 51 final Document document; | |
| 52 | |
| 53 /// The current read position. | |
| 54 int pos = 0; | |
| 55 | |
| 56 /// Starting position of the last unconsumed text. | |
| 57 int start = 0; | |
| 58 | |
| 59 final List<TagState> _stack; | |
| 60 | |
| 61 InlineParser(this.source, this.document) | |
| 62 : _stack = <TagState>[]; | |
| 63 | |
| 64 List<Node> parse() { | |
| 65 // Make a fake top tag to hold the results. | |
| 66 _stack.add(new TagState(0, null)); | |
| 67 | |
| 68 while (!isDone) { | |
| 69 bool matched = false; | |
| 70 | |
| 71 // See if any of the current tags on the stack match. We don't allow tags | |
| 72 // of the same kind to nest, so this takes priority over other possible // matches. | |
| 73 for (int i = _stack.length - 1; i > 0; i--) { | |
| 74 if (_stack[i].tryMatch(this)) { | |
| 75 matched = true; | |
| 76 break; | |
| 77 } | |
| 78 } | |
| 79 if (matched) continue; | |
| 80 | |
| 81 // See if the current text matches any defined markdown syntax. | |
| 82 for (final syntax in syntaxes) { | |
| 83 if (syntax.tryMatch(this)) { | |
| 84 matched = true; | |
| 85 break; | |
| 86 } | |
| 87 } | |
| 88 if (matched) continue; | |
| 89 | |
| 90 // If we got here, it's just text. | |
| 91 advanceBy(1); | |
| 92 } | |
| 93 | |
| 94 // Unwind any unmatched tags and get the results. | |
| 95 return _stack[0].close(this, null); | |
| 96 } | |
| 97 | |
| 98 writeText() { | |
| 99 if (pos > start) { | |
| 100 final text = source.substring(start, pos); | |
| 101 final nodes = _stack.last().children; | |
| 102 | |
| 103 // If the previous node is text too, just append. | |
| 104 if ((nodes.length > 0) && (nodes.last() is Text)) { | |
| 105 final newNode = new Text('${nodes.last().text}$text'); | |
| 106 nodes[nodes.length - 1] = newNode; | |
| 107 } else { | |
| 108 nodes.add(new Text(text)); | |
| 109 } | |
| 110 | |
| 111 start = pos; | |
| 112 } | |
| 113 } | |
| 114 | |
| 115 /// Removes the top tag from the stack, reverts it to plain text and adds it | |
| 116 /// to the output. | |
| 117 discardUnmatchedTag() { | |
| 118 final unfinished = _stack.removeLast(); | |
| 119 start = unfinished.startPos; | |
| 120 } | |
| 121 | |
| 122 addNode(Node node) { | |
| 123 _stack.last().children.add(node); | |
| 124 } | |
| 125 | |
| 126 // TODO(rnystrom): Only need this because RegExp doesn't let you start | |
| 127 // searching from a given offset. | |
|
Jennifer Messerly
2011/11/23 22:25:41
yeah... that seriously needs to be fixed in RegExp
Bob Nystrom
2011/11/29 02:56:29
Yeah. There's a few things in RegExp that are anno
| |
| 128 String get currentSource() => source.substring(pos, source.length); | |
| 129 | |
| 130 bool get isDone() => pos == source.length; | |
| 131 | |
| 132 void advanceBy(int length) => pos += length; | |
| 133 void consume(int length) { | |
| 134 pos += length; | |
| 135 start = pos; | |
| 136 } | |
| 137 } | |
| 138 | |
| 139 /// Represents one kind of markdown tag that can be parsed. | |
| 140 class InlineSyntax { | |
| 141 final RegExp pattern; | |
| 142 | |
| 143 InlineSyntax(String pattern) | |
| 144 : pattern = new RegExp(pattern, true); | |
| 145 // TODO(rnystrom): Should use named arg for RegExp multiLine. | |
| 146 | |
| 147 bool tryMatch(InlineParser parser) { | |
| 148 final startMatch = pattern.firstMatch(parser.currentSource); | |
| 149 if ((startMatch != null) && (startMatch.start() == 0)) { | |
| 150 // Write any existing plain text up to this point. | |
| 151 parser.writeText(); | |
| 152 | |
| 153 if (onMatch(parser, startMatch)) { | |
| 154 parser.consume(startMatch.group(0).length); | |
| 155 } | |
| 156 return true; | |
| 157 } | |
| 158 return false; | |
| 159 } | |
| 160 | |
| 161 abstract bool match(InlineParser parser, Match match); | |
| 162 } | |
| 163 | |
| 164 /// Matches stuff that should just be passed through as straight text. | |
| 165 class TextSyntax extends InlineSyntax { | |
| 166 String substitute; | |
| 167 TextSyntax(String pattern, [String sub]) | |
| 168 : super(pattern), | |
| 169 substitute = sub; | |
| 170 | |
| 171 bool onMatch(InlineParser parser, Match match) { | |
| 172 if (substitute == null) { | |
| 173 // Just use the original matched text. | |
| 174 parser.advanceBy(match.group(0).length); | |
| 175 return false; | |
| 176 } | |
| 177 | |
| 178 // Insert the substitution. | |
| 179 parser.addNode(new Text(substitute)); | |
| 180 return true; | |
| 181 } | |
| 182 } | |
| 183 | |
| 184 /// Matches autolinks like <http://foo.com>. | |
| 185 class AutolinkSyntax extends InlineSyntax { | |
| 186 AutolinkSyntax() | |
| 187 : super(@'<((http|https|ftp)://[^>]*)>'); | |
| 188 // TODO(rnystrom): Make case insensitive. | |
| 189 | |
| 190 bool onMatch(InlineParser parser, Match match) { | |
| 191 final url = match.group(1); | |
| 192 | |
| 193 final anchor = new Element.text('a', escapeHtml(url)); | |
| 194 anchor.attributes['href'] = url; | |
| 195 parser.addNode(anchor); | |
| 196 | |
| 197 return true; | |
| 198 } | |
| 199 } | |
| 200 | |
| 201 /// Matches syntax that has a pair of tags and becomes an element, like '*' for | |
| 202 /// `<em>`. Allows nested tags. | |
| 203 class TagSyntax extends InlineSyntax { | |
| 204 final RegExp endPattern; | |
| 205 final String tag; | |
| 206 | |
| 207 TagSyntax(String pattern, [String tag, String end = null]) | |
| 208 : super(pattern), | |
| 209 endPattern = new RegExp((end != null) ? end : pattern, true), | |
| 210 tag = tag; | |
| 211 // TODO(rnystrom): Doing this.field doesn't seem to work with named args. | |
|
Jennifer Messerly
2011/11/23 22:25:41
what's the issue here? can you file to the issue t
Bob Nystrom
2011/11/29 02:56:29
I think this might be the same issue that Mattias
| |
| 212 // TODO(rnystrom): Should use named arg for RegExp multiLine. | |
| 213 | |
| 214 bool onMatch(InlineParser parser, Match match) { | |
| 215 parser._stack.add(new TagState(parser.pos, this)); | |
| 216 return true; | |
| 217 } | |
| 218 | |
| 219 bool onMatchEnd(InlineParser parser, Match match, TagState state) { | |
| 220 parser.addNode(new Element(tag, state.children)); | |
| 221 return true; | |
| 222 } | |
| 223 } | |
| 224 | |
| 225 /// Matches inline links like [blah] [id] and [blah] (url). | |
| 226 class LinkSyntax extends TagSyntax { | |
| 227 /// The regex for the end of a link needs to handle both reference style and | |
| 228 /// inline styles as well as optional titles for inline links. To make that | |
| 229 /// a bit more palatable, this breaks it into pieces. | |
| 230 static get linkPattern() { | |
|
Jennifer Messerly
2011/11/23 22:25:41
could this be a field? or does the string interp b
Bob Nystrom
2011/11/29 02:56:29
It was breaking constness when I tried that.
| |
| 231 final bracket = @'\][ \n\t]?'; // "]" with optional space after. | |
| 232 final refLink = @'\[([^\]]*)\]'; // "[id]" reflink id. | |
| 233 final title = @'(?:[ ]*"([^"]+)"|)'; // Optional title in quotes. | |
| 234 final inlineLink = '\\(([^ )]+)$title\\)'; // "(url "title")" inline link. | |
| 235 return '$bracket(?:$refLink|$inlineLink)'; | |
| 236 } | |
| 237 | |
| 238 LinkSyntax() | |
| 239 : super(@'\[', end: linkPattern); | |
| 240 | |
| 241 bool onMatchEnd(InlineParser parser, Match match, TagState state) { | |
| 242 var url; | |
| 243 var title; | |
| 244 | |
| 245 if (match.group(2) != '') { | |
| 246 // Inline link like [foo](url). | |
| 247 url = match.group(2); | |
| 248 title = match.group(3); | |
| 249 | |
| 250 // For whatever reason, markdown allows angle-bracketed URLs here. | |
| 251 if (url.startsWith('<') && url.endsWith('>')) { | |
| 252 url = url.substring(1, url.length - 1); | |
| 253 } | |
| 254 } else { | |
| 255 // Reference link like [foo] [bar]. | |
| 256 var id = match.group(1); | |
| 257 if (id == '') { | |
| 258 // The id is empty ("[]") so infer it from the contents. | |
| 259 id = parser.source.substring(state.startPos + 1, parser.pos); | |
| 260 } | |
| 261 | |
| 262 // Look up the link. | |
| 263 final link = parser.document.refLinks[id]; | |
| 264 // If it's an unknown link just emit plaintext. | |
| 265 if (link == null) return false; | |
| 266 | |
| 267 url = link.url; | |
| 268 title = link.title; | |
| 269 } | |
| 270 | |
| 271 final anchor = new Element('a', state.children); | |
| 272 anchor.attributes['href'] = escapeHtml(url); | |
| 273 if ((title != null) && (title != '')) { | |
| 274 anchor.attributes['title'] = escapeHtml(title); | |
| 275 } | |
| 276 | |
| 277 parser.addNode(anchor); | |
| 278 return true; | |
| 279 } | |
| 280 } | |
| 281 | |
| 282 /// Matches backtick-enclosed inline code blocks. | |
| 283 class CodeSyntax extends InlineSyntax { | |
| 284 CodeSyntax(String pattern) | |
| 285 : super(pattern); | |
| 286 | |
| 287 bool onMatch(InlineParser parser, Match match) { | |
| 288 parser.addNode(new Element.text('code', escapeHtml(match.group(1)))); | |
| 289 return true; | |
| 290 } | |
| 291 } | |
| 292 | |
| 293 /// Keeps track of a currently open tag while it is being parsed. The parser | |
| 294 /// maintains a stack of these so it can handle nested tags. | |
| 295 class TagState { | |
| 296 /// The point in the original source where this tag started. | |
| 297 int startPos; | |
| 298 | |
| 299 /// The syntax that created this node. | |
| 300 final TagSyntax syntax; | |
| 301 | |
| 302 /// The children of this node. Will be `null` for text nodes. | |
| 303 final List<Node> children; | |
| 304 | |
| 305 TagState(this.startPos, this.syntax) | |
| 306 : children = <Node>[]; | |
| 307 | |
| 308 /// Attempts to close this tag by matching the current text against its end | |
| 309 /// pattern. | |
| 310 bool tryMatch(InlineParser parser) { | |
| 311 Match endMatch = syntax.endPattern.firstMatch(parser.currentSource); | |
| 312 if ((endMatch != null) && (endMatch.start() == 0)) { | |
| 313 // Close the tag. | |
| 314 close(parser, endMatch); | |
| 315 return true; | |
| 316 } | |
| 317 | |
| 318 return false; | |
| 319 } | |
| 320 | |
| 321 /// Pops this tag off the stack, completes it, and adds it to the output. | |
| 322 /// Will discard any unmatched tags that happen to be above it on the stack. | |
| 323 /// If this is the last node in the stack, returns its children. | |
| 324 List<Node> close(InlineParser parser, Match endMatch) { | |
| 325 // Found a match. If there is anything above this tag on the stack, | |
| 326 // discard it. For example, given '*a _b*...' when we reach the second | |
| 327 // '*', '_' will be on the top of the stack. It's mismatched, so we | |
| 328 // just treat it as text. | |
| 329 while (parser._stack.last() != this) parser.discardUnmatchedTag(); | |
| 330 | |
| 331 // Pop this off the stack. | |
| 332 parser.writeText(); | |
| 333 parser._stack.removeLast(); | |
| 334 | |
| 335 // If the stack is empty now, this is the special "results" node. | |
| 336 if (parser._stack.length == 0) return children; | |
| 337 | |
| 338 // We are still parsing, so add this to its parent's children. | |
| 339 if (syntax.onMatchEnd(parser, endMatch, this)) { | |
| 340 parser.consume(endMatch.group(0).length); | |
| 341 } else { | |
| 342 // Didn't close correctly so revert to text. | |
| 343 parser.start = startPos; | |
| 344 parser.advanceBy(endMatch.group(0).length); | |
| 345 } | |
| 346 | |
| 347 return null; | |
| 348 } | |
| 349 } | |
| OLD | NEW |