| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 library markdown.inline_parser; | 5 library markdown.inline_parser; |
| 6 | 6 |
| 7 import 'ast.dart'; | 7 import 'ast.dart'; |
| 8 import 'document.dart'; | 8 import 'document.dart'; |
| 9 import 'util.dart'; | 9 import 'util.dart'; |
| 10 | 10 |
| (...skipping 11 matching lines...) Expand all Loading... |
| 22 // Since it is purely for optimization, it can be removed for debugging. | 22 // Since it is purely for optimization, it can be removed for debugging. |
| 23 | 23 |
| 24 // TODO(amouravski): this regex will glom up any custom syntaxes unless | 24 // TODO(amouravski): this regex will glom up any custom syntaxes unless |
| 25 // they're at the beginning. | 25 // they're at the beginning. |
| 26 new TextSyntax(r'\s*[A-Za-z0-9]+'), | 26 new TextSyntax(r'\s*[A-Za-z0-9]+'), |
| 27 | 27 |
| 28 // The real syntaxes. | 28 // The real syntaxes. |
| 29 | 29 |
| 30 new AutolinkSyntax(), | 30 new AutolinkSyntax(), |
| 31 new LinkSyntax(), | 31 new LinkSyntax(), |
| 32 new ImageLinkSyntax(), |
| 32 // "*" surrounded by spaces is left alone. | 33 // "*" surrounded by spaces is left alone. |
| 33 new TextSyntax(r' \* '), | 34 new TextSyntax(r' \* '), |
| 34 // "_" surrounded by spaces is left alone. | 35 // "_" surrounded by spaces is left alone. |
| 35 new TextSyntax(r' _ '), | 36 new TextSyntax(r' _ '), |
| 36 // Leave already-encoded HTML entities alone. Ensures we don't turn | 37 // Leave already-encoded HTML entities alone. Ensures we don't turn |
| 37 // "&" into "&" | 38 // "&" into "&" |
| 38 new TextSyntax(r'&[#a-zA-Z0-9]*;'), | 39 new TextSyntax(r'&[#a-zA-Z0-9]*;'), |
| 39 // Encode "&". | 40 // Encode "&". |
| 40 new TextSyntax(r'&', sub: '&'), | 41 new TextSyntax(r'&', sub: '&'), |
| 41 // Encode "<". (Why not encode ">" too? Gruber is toying with us.) | 42 // Encode "<". (Why not encode ">" too? Gruber is toying with us.) |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 76 InlineParser(this.source, this.document) | 77 InlineParser(this.source, this.document) |
| 77 : _stack = <TagState>[] { | 78 : _stack = <TagState>[] { |
| 78 /// User specified syntaxes will be the first syntaxes to be evaluated. | 79 /// User specified syntaxes will be the first syntaxes to be evaluated. |
| 79 if (document.inlineSyntaxes != null) { | 80 if (document.inlineSyntaxes != null) { |
| 80 syntaxes = []; | 81 syntaxes = []; |
| 81 syntaxes.addAll(document.inlineSyntaxes); | 82 syntaxes.addAll(document.inlineSyntaxes); |
| 82 syntaxes.addAll(defaultSyntaxes); | 83 syntaxes.addAll(defaultSyntaxes); |
| 83 } else { | 84 } else { |
| 84 syntaxes = defaultSyntaxes; | 85 syntaxes = defaultSyntaxes; |
| 85 } | 86 } |
| 86 // Custom link resolver goes after the generic text syntax. | 87 // Custom link resolvers goes after the generic text syntax. |
| 87 syntaxes.insert(1, new LinkSyntax(linkResolver: document.linkResolver)); | 88 syntaxes.insertAll(1, [ |
| 89 new LinkSyntax(linkResolver: document.linkResolver), |
| 90 new ImageLinkSyntax(linkResolver: document.linkResolver) |
| 91 ]); |
| 88 } | 92 } |
| 89 | 93 |
| 90 List<Node> parse() { | 94 List<Node> parse() { |
| 91 // Make a fake top tag to hold the results. | 95 // Make a fake top tag to hold the results. |
| 92 _stack.add(new TagState(0, 0, null)); | 96 _stack.add(new TagState(0, 0, null)); |
| 93 | 97 |
| 94 while (!isDone) { | 98 while (!isDone) { |
| 95 bool matched = false; | 99 bool matched = false; |
| 96 | 100 |
| 97 // See if any of the current tags on the stack match. We don't allow tags | 101 // See if any of the current tags on the stack match. We don't allow tags |
| (...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 207 | 211 |
| 208 /// Matches autolinks like `<http://foo.com>`. | 212 /// Matches autolinks like `<http://foo.com>`. |
| 209 class AutolinkSyntax extends InlineSyntax { | 213 class AutolinkSyntax extends InlineSyntax { |
| 210 AutolinkSyntax() | 214 AutolinkSyntax() |
| 211 : super(r'<((http|https|ftp)://[^>]*)>'); | 215 : super(r'<((http|https|ftp)://[^>]*)>'); |
| 212 // TODO(rnystrom): Make case insensitive. | 216 // TODO(rnystrom): Make case insensitive. |
| 213 | 217 |
| 214 bool onMatch(InlineParser parser, Match match) { | 218 bool onMatch(InlineParser parser, Match match) { |
| 215 final url = match[1]; | 219 final url = match[1]; |
| 216 | 220 |
| 217 final anchor = new Element.text('a', escapeHtml(url)); | 221 final anchor = new Element.text('a', escapeHtml(url)) |
| 218 anchor.attributes['href'] = url; | 222 ..attributes['href'] = url; |
| 219 parser.addNode(anchor); | 223 parser.addNode(anchor); |
| 220 | 224 |
| 221 return true; | 225 return true; |
| 222 } | 226 } |
| 223 } | 227 } |
| 224 | 228 |
| 225 /// Matches syntax that has a pair of tags and becomes an element, like `*` for | 229 /// Matches syntax that has a pair of tags and becomes an element, like `*` for |
| 226 /// `<em>`. Allows nested tags. | 230 /// `<em>`. Allows nested tags. |
| 227 class TagSyntax extends InlineSyntax { | 231 class TagSyntax extends InlineSyntax { |
| 228 final RegExp endPattern; | 232 final RegExp endPattern; |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 260 return '\](?:($refLink|$inlineLink)|)'; | 264 return '\](?:($refLink|$inlineLink)|)'; |
| 261 | 265 |
| 262 // The groups matched by this are: | 266 // The groups matched by this are: |
| 263 // 1: Will be non-empty if it's either a ref or inline link. Will be empty | 267 // 1: Will be non-empty if it's either a ref or inline link. Will be empty |
| 264 // if it's just a bare pair of square brackets with nothing after them. | 268 // if it's just a bare pair of square brackets with nothing after them. |
| 265 // 2: Contains the id inside [] for a reference-style link. | 269 // 2: Contains the id inside [] for a reference-style link. |
| 266 // 3: Contains the URL for an inline link. | 270 // 3: Contains the URL for an inline link. |
| 267 // 4: Contains the title, if present, for an inline link. | 271 // 4: Contains the title, if present, for an inline link. |
| 268 } | 272 } |
| 269 | 273 |
| 270 LinkSyntax({this.linkResolver}) | 274 LinkSyntax({this.linkResolver, String pattern: r'\['}) |
| 271 : super(r'\[', end: linkPattern); | 275 : super(pattern, end: linkPattern); |
| 272 | 276 |
| 273 bool onMatchEnd(InlineParser parser, Match match, TagState state) { | 277 Node createNode(InlineParser parser, Match match, TagState state) { |
| 274 var url; | |
| 275 var title; | |
| 276 | |
| 277 // If we didn't match refLink or inlineLink, then it means there was | 278 // If we didn't match refLink or inlineLink, then it means there was |
| 278 // nothing after the first square bracket, so it isn't a normal markdown | 279 // nothing after the first square bracket, so it isn't a normal markdown |
| 279 // link at all. Instead, we allow users of the library to specify a special | 280 // link at all. Instead, we allow users of the library to specify a special |
| 280 // resolver function ([linkResolver]) that may choose to handle | 281 // resolver function ([linkResolver]) that may choose to handle |
| 281 // this. Otherwise, it's just treated as plain text. | 282 // this. Otherwise, it's just treated as plain text. |
| 282 if ((match[1] == null) || (match[1] == '')) { | 283 if (isNullOrEmpty(match[1])) { |
| 283 if (linkResolver == null) return false; | 284 if (linkResolver == null) return null; |
| 284 | 285 |
| 285 // Only allow implicit links if the content is just text. | 286 // Only allow implicit links if the content is just text. |
| 286 // TODO(rnystrom): Do we want to relax this? | 287 // TODO(rnystrom): Do we want to relax this? |
| 287 if (state.children.any((child) => child is! Text)) return false; | 288 if (state.children.any((child) => child is! Text)) return null; |
| 288 // If there are multiple children, but they are all text, send the | 289 // If there are multiple children, but they are all text, send the |
| 289 // combined text to linkResolver. | 290 // combined text to linkResolver. |
| 290 var textToResolve = state.children.fold('', | 291 var textToResolve = state.children.fold('', |
| 291 (oldVal, child) => oldVal + child.text); | 292 (oldVal, child) => oldVal + child.text); |
| 293 |
| 292 // See if we have a resolver that will generate a link for us. | 294 // See if we have a resolver that will generate a link for us. |
| 293 final node = linkResolver(textToResolve); | 295 return linkResolver(textToResolve); |
| 294 if (node == null) return false; | 296 } else { |
| 297 Link link = getLink(parser, match, state); |
| 298 if (link == null) return null; |
| 295 | 299 |
| 296 parser.addNode(node); | 300 final Element node = new Element('a', state.children) |
| 297 return true; | 301 ..attributes["href"] = escapeHtml(link.url) |
| 302 ..attributes['title'] = escapeHtml(link.title); |
| 303 |
| 304 cleanMap(node.attributes); |
| 305 return node; |
| 298 } | 306 } |
| 307 } |
| 299 | 308 |
| 309 Link getLink(InlineParser parser, Match match, TagState state) { |
| 300 if ((match[3] != null) && (match[3] != '')) { | 310 if ((match[3] != null) && (match[3] != '')) { |
| 301 // Inline link like [foo](url). | 311 // Inline link like [foo](url). |
| 302 url = match[3]; | 312 var url = match[3]; |
| 303 title = match[4]; | 313 var title = match[4]; |
| 304 | 314 |
| 305 // For whatever reason, markdown allows angle-bracketed URLs here. | 315 // For whatever reason, markdown allows angle-bracketed URLs here. |
| 306 if (url.startsWith('<') && url.endsWith('>')) { | 316 if (url.startsWith('<') && url.endsWith('>')) { |
| 307 url = url.substring(1, url.length - 1); | 317 url = url.substring(1, url.length - 1); |
| 308 } | 318 } |
| 319 |
| 320 return new Link(null, url, title); |
| 309 } else { | 321 } else { |
| 322 var id; |
| 310 // Reference link like [foo] [bar]. | 323 // Reference link like [foo] [bar]. |
| 311 var id = match[2]; | 324 if (match[2] == '') |
| 312 if (id == '') { | |
| 313 // The id is empty ("[]") so infer it from the contents. | 325 // The id is empty ("[]") so infer it from the contents. |
| 314 id = parser.source.substring(state.startPos + 1, parser.pos); | 326 id = parser.source.substring(state.startPos + 1, parser.pos); |
| 315 } | 327 else |
| 328 id = match[2]; |
| 316 | 329 |
| 317 // References are case-insensitive. | 330 // References are case-insensitive. |
| 318 id = id.toLowerCase(); | 331 id = id.toLowerCase(); |
| 332 return parser.document.refLinks[id]; |
| 333 } |
| 334 } |
| 319 | 335 |
| 320 // Look up the link. | 336 bool onMatchEnd(InlineParser parser, Match match, TagState state) { |
| 321 final link = parser.document.refLinks[id]; | 337 Node node = createNode(parser, match, state); |
| 322 // If it's an unknown link just emit plaintext. | 338 if (node == null) return false; |
| 323 if (link == null) return false; | 339 parser.addNode(node); |
| 324 | |
| 325 url = link.url; | |
| 326 title = link.title; | |
| 327 } | |
| 328 | |
| 329 final anchor = new Element('a', state.children); | |
| 330 anchor.attributes['href'] = escapeHtml(url); | |
| 331 if ((title != null) && (title != '')) { | |
| 332 anchor.attributes['title'] = escapeHtml(title); | |
| 333 } | |
| 334 | |
| 335 parser.addNode(anchor); | |
| 336 return true; | 340 return true; |
| 337 } | 341 } |
| 338 } | 342 } |
| 339 | 343 |
| 344 /// Matches images like `` and |
| 345 /// `![alternate text][url reference]`. |
| 346 class ImageLinkSyntax extends LinkSyntax { |
| 347 Resolver linkResolver; |
| 348 ImageLinkSyntax({this.linkResolver}) |
| 349 : super(pattern: r'!\['); |
| 350 |
| 351 Node createNode(InlineParser parser, Match match, TagState state) { |
| 352 Node node = super.createNode(parser, match, state); |
| 353 if (node == null) return null; |
| 354 |
| 355 final Element imageElement = new Element.withTag("img") |
| 356 ..attributes["src"] = node.attributes["href"] |
| 357 ..attributes["title"] = node.attributes["title"] |
| 358 ..attributes["alt"] = node.children |
| 359 .map((e) => isNullOrEmpty(e) || e is! Text ? '' : e.text) |
| 360 .join(' '); |
| 361 |
| 362 cleanMap(imageElement.attributes); |
| 363 |
| 364 node.children |
| 365 ..clear() |
| 366 ..add(imageElement); |
| 367 |
| 368 return node; |
| 369 } |
| 370 } |
| 371 |
| 372 |
| 340 /// Matches backtick-enclosed inline code blocks. | 373 /// Matches backtick-enclosed inline code blocks. |
| 341 class CodeSyntax extends InlineSyntax { | 374 class CodeSyntax extends InlineSyntax { |
| 342 CodeSyntax(String pattern) | 375 CodeSyntax(String pattern) |
| 343 : super(pattern); | 376 : super(pattern); |
| 344 | 377 |
| 345 bool onMatch(InlineParser parser, Match match) { | 378 bool onMatch(InlineParser parser, Match match) { |
| 346 parser.addNode(new Element.text('code', escapeHtml(match[1]))); | 379 parser.addNode(new Element.text('code', escapeHtml(match[1]))); |
| 347 return true; | 380 return true; |
| 348 } | 381 } |
| 349 } | 382 } |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 414 parser.consume(endMatch[0].length); | 447 parser.consume(endMatch[0].length); |
| 415 } else { | 448 } else { |
| 416 // Didn't close correctly so revert to text. | 449 // Didn't close correctly so revert to text. |
| 417 parser.start = startPos; | 450 parser.start = startPos; |
| 418 parser.advanceBy(endMatch[0].length); | 451 parser.advanceBy(endMatch[0].length); |
| 419 } | 452 } |
| 420 | 453 |
| 421 return null; | 454 return null; |
| 422 } | 455 } |
| 423 } | 456 } |
| OLD | NEW |