| Index: utils/markdown/inline_parser.dart
|
| diff --git a/utils/markdown/inline_parser.dart b/utils/markdown/inline_parser.dart
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..8bf7693d81d8050abac5cec43a8cb8aecafc1010
|
| --- /dev/null
|
| +++ b/utils/markdown/inline_parser.dart
|
| @@ -0,0 +1,349 @@
|
| +// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
|
| +// for details. All rights reserved. Use of this source code is governed by a
|
| +// BSD-style license that can be found in the LICENSE file.
|
| +
|
| +/// Maintains the internal state needed to parse inline span elements in
|
| +/// markdown.
|
| +class InlineParser {
|
| + static List<InlineSyntax> get syntaxes() {
|
| + // Lazy initialize.
|
| + if (_syntaxes == null) {
|
| + _syntaxes = <InlineSyntax>[
|
| + new AutolinkSyntax(),
|
| + new LinkSyntax(),
|
| + // "*" surrounded by spaces is left alone.
|
| + new TextSyntax(@' \* '),
|
| + // "_" surrounded by spaces is left alone.
|
| + new TextSyntax(@' _ '),
|
| + // Leave already-encoded HTML entities alone. Ensures we don't turn
|
| + // "&" into "&amp;"
|
| + new TextSyntax(@'&[#a-zA-Z0-9]*;'),
|
| + // Encode "&".
|
| + new TextSyntax(@'&', sub: '&'),
|
| + // Encode "<". (Why not encode ">" too? Gruber is toying with us.)
|
| + new TextSyntax(@'<', sub: '<'),
|
| + // Parse "**strong**" tags.
|
| + new TagSyntax(@'\*\*', tag: 'strong'),
|
| + // Parse "__strong__" tags.
|
| + new TagSyntax(@'__', tag: 'strong'),
|
| + // Parse "*emphasis*" tags.
|
| + new TagSyntax(@'\*', tag: 'em'),
|
| + // Parse "_emphasis_" tags.
|
| + // TODO(rnystrom): Underscores in the middle of a word should not be
|
| + // parsed as emphasis like_in_this.
|
| + new TagSyntax(@'_', tag: 'em'),
|
| + // Parse inline code within double backticks: "``code``".
|
| + new CodeSyntax(@'``[ ]?(.*?)[ ]?``'),
|
| + // Parse inline code within backticks: "`code`".
|
| + new CodeSyntax(@'`([^`]*)`')
|
| + ];
|
| + }
|
| +
|
| + return _syntaxes;
|
| + }
|
| +
|
| + static List<InlineSyntax> _syntaxes;
|
| +
|
| + /// The string of markdown being parsed.
|
| + final String source;
|
| +
|
| + /// The markdown document this parser is parsing.
|
| + final Document document;
|
| +
|
| + /// The current read position.
|
| + int pos = 0;
|
| +
|
| + /// Starting position of the last unconsumed text.
|
| + int start = 0;
|
| +
|
| + final List<TagState> _stack;
|
| +
|
| + InlineParser(this.source, this.document)
|
| + : _stack = <TagState>[];
|
| +
|
| + List<Node> parse() {
|
| + // Make a fake top tag to hold the results.
|
| + _stack.add(new TagState(0, null));
|
| +
|
| + while (!isDone) {
|
| + bool matched = false;
|
| +
|
| + // See if any of the current tags on the stack match. We don't allow tags
|
| + // of the same kind to nest, so this takes priority over other possible // matches.
|
| + for (int i = _stack.length - 1; i > 0; i--) {
|
| + if (_stack[i].tryMatch(this)) {
|
| + matched = true;
|
| + break;
|
| + }
|
| + }
|
| + if (matched) continue;
|
| +
|
| + // See if the current text matches any defined markdown syntax.
|
| + for (final syntax in syntaxes) {
|
| + if (syntax.tryMatch(this)) {
|
| + matched = true;
|
| + break;
|
| + }
|
| + }
|
| + if (matched) continue;
|
| +
|
| + // If we got here, it's just text.
|
| + advanceBy(1);
|
| + }
|
| +
|
| + // Unwind any unmatched tags and get the results.
|
| + return _stack[0].close(this, null);
|
| + }
|
| +
|
| + writeText() {
|
| + if (pos > start) {
|
| + final text = source.substring(start, pos);
|
| + final nodes = _stack.last().children;
|
| +
|
| + // If the previous node is text too, just append.
|
| + if ((nodes.length > 0) && (nodes.last() is Text)) {
|
| + final newNode = new Text('${nodes.last().text}$text');
|
| + nodes[nodes.length - 1] = newNode;
|
| + } else {
|
| + nodes.add(new Text(text));
|
| + }
|
| +
|
| + start = pos;
|
| + }
|
| + }
|
| +
|
| + /// Removes the top tag from the stack, reverts it to plain text and adds it
|
| + /// to the output.
|
| + discardUnmatchedTag() {
|
| + final unfinished = _stack.removeLast();
|
| + start = unfinished.startPos;
|
| + }
|
| +
|
| + addNode(Node node) {
|
| + _stack.last().children.add(node);
|
| + }
|
| +
|
| + // TODO(rnystrom): Only need this because RegExp doesn't let you start
|
| + // searching from a given offset.
|
| + String get currentSource() => source.substring(pos, source.length);
|
| +
|
| + bool get isDone() => pos == source.length;
|
| +
|
| + void advanceBy(int length) => pos += length;
|
| + void consume(int length) {
|
| + pos += length;
|
| + start = pos;
|
| + }
|
| +}
|
| +
|
| +/// Represents one kind of markdown tag that can be parsed.
|
| +class InlineSyntax {
|
| + final RegExp pattern;
|
| +
|
| + InlineSyntax(String pattern)
|
| + : pattern = new RegExp(pattern, true);
|
| + // TODO(rnystrom): Should use named arg for RegExp multiLine.
|
| +
|
| + bool tryMatch(InlineParser parser) {
|
| + final startMatch = pattern.firstMatch(parser.currentSource);
|
| + if ((startMatch != null) && (startMatch.start() == 0)) {
|
| + // Write any existing plain text up to this point.
|
| + parser.writeText();
|
| +
|
| + if (onMatch(parser, startMatch)) {
|
| + parser.consume(startMatch.group(0).length);
|
| + }
|
| + return true;
|
| + }
|
| + return false;
|
| + }
|
| +
|
| + abstract bool match(InlineParser parser, Match match);
|
| +}
|
| +
|
| +/// Matches stuff that should just be passed through as straight text.
|
| +class TextSyntax extends InlineSyntax {
|
| + String substitute;
|
| + TextSyntax(String pattern, [String sub])
|
| + : super(pattern),
|
| + substitute = sub;
|
| +
|
| + bool onMatch(InlineParser parser, Match match) {
|
| + if (substitute == null) {
|
| + // Just use the original matched text.
|
| + parser.advanceBy(match.group(0).length);
|
| + return false;
|
| + }
|
| +
|
| + // Insert the substitution.
|
| + parser.addNode(new Text(substitute));
|
| + return true;
|
| + }
|
| +}
|
| +
|
| +/// Matches autolinks like <http://foo.com>.
|
| +class AutolinkSyntax extends InlineSyntax {
|
| + AutolinkSyntax()
|
| + : super(@'<((http|https|ftp)://[^>]*)>');
|
| + // TODO(rnystrom): Make case insensitive.
|
| +
|
| + bool onMatch(InlineParser parser, Match match) {
|
| + final url = match.group(1);
|
| +
|
| + final anchor = new Element.text('a', escapeHtml(url));
|
| + anchor.attributes['href'] = url;
|
| + parser.addNode(anchor);
|
| +
|
| + return true;
|
| + }
|
| +}
|
| +
|
| +/// Matches syntax that has a pair of tags and becomes an element, like '*' for
|
| +/// `<em>`. Allows nested tags.
|
| +class TagSyntax extends InlineSyntax {
|
| + final RegExp endPattern;
|
| + final String tag;
|
| +
|
| + TagSyntax(String pattern, [String tag, String end = null])
|
| + : super(pattern),
|
| + endPattern = new RegExp((end != null) ? end : pattern, true),
|
| + tag = tag;
|
| + // TODO(rnystrom): Doing this.field doesn't seem to work with named args.
|
| + // TODO(rnystrom): Should use named arg for RegExp multiLine.
|
| +
|
| + bool onMatch(InlineParser parser, Match match) {
|
| + parser._stack.add(new TagState(parser.pos, this));
|
| + return true;
|
| + }
|
| +
|
| + bool onMatchEnd(InlineParser parser, Match match, TagState state) {
|
| + parser.addNode(new Element(tag, state.children));
|
| + return true;
|
| + }
|
| +}
|
| +
|
| +/// Matches inline links like [blah] [id] and [blah] (url).
|
| +class LinkSyntax extends TagSyntax {
|
| + /// The regex for the end of a link needs to handle both reference style and
|
| + /// inline styles as well as optional titles for inline links. To make that
|
| + /// a bit more palatable, this breaks it into pieces.
|
| + static get linkPattern() {
|
| + final bracket = @'\][ \n\t]?'; // "]" with optional space after.
|
| + final refLink = @'\[([^\]]*)\]'; // "[id]" reflink id.
|
| + final title = @'(?:[ ]*"([^"]+)"|)'; // Optional title in quotes.
|
| + final inlineLink = '\\(([^ )]+)$title\\)'; // "(url "title")" inline link.
|
| + return '$bracket(?:$refLink|$inlineLink)';
|
| + }
|
| +
|
| + LinkSyntax()
|
| + : super(@'\[', end: linkPattern);
|
| +
|
| + bool onMatchEnd(InlineParser parser, Match match, TagState state) {
|
| + var url;
|
| + var title;
|
| +
|
| + if (match.group(2) != '') {
|
| + // Inline link like [foo](url).
|
| + url = match.group(2);
|
| + title = match.group(3);
|
| +
|
| + // For whatever reason, markdown allows angle-bracketed URLs here.
|
| + if (url.startsWith('<') && url.endsWith('>')) {
|
| + url = url.substring(1, url.length - 1);
|
| + }
|
| + } else {
|
| + // Reference link like [foo] [bar].
|
| + var id = match.group(1);
|
| + if (id == '') {
|
| + // The id is empty ("[]") so infer it from the contents.
|
| + id = parser.source.substring(state.startPos + 1, parser.pos);
|
| + }
|
| +
|
| + // Look up the link.
|
| + final link = parser.document.refLinks[id];
|
| + // If it's an unknown link just emit plaintext.
|
| + if (link == null) return false;
|
| +
|
| + url = link.url;
|
| + title = link.title;
|
| + }
|
| +
|
| + final anchor = new Element('a', state.children);
|
| + anchor.attributes['href'] = escapeHtml(url);
|
| + if ((title != null) && (title != '')) {
|
| + anchor.attributes['title'] = escapeHtml(title);
|
| + }
|
| +
|
| + parser.addNode(anchor);
|
| + return true;
|
| + }
|
| +}
|
| +
|
| +/// Matches backtick-enclosed inline code blocks.
|
| +class CodeSyntax extends InlineSyntax {
|
| + CodeSyntax(String pattern)
|
| + : super(pattern);
|
| +
|
| + bool onMatch(InlineParser parser, Match match) {
|
| + parser.addNode(new Element.text('code', escapeHtml(match.group(1))));
|
| + return true;
|
| + }
|
| +}
|
| +
|
| +/// Keeps track of a currently open tag while it is being parsed. The parser
|
| +/// maintains a stack of these so it can handle nested tags.
|
| +class TagState {
|
| + /// The point in the original source where this tag started.
|
| + int startPos;
|
| +
|
| + /// The syntax that created this node.
|
| + final TagSyntax syntax;
|
| +
|
| + /// The children of this node. Will be `null` for text nodes.
|
| + final List<Node> children;
|
| +
|
| + TagState(this.startPos, this.syntax)
|
| + : children = <Node>[];
|
| +
|
| + /// Attempts to close this tag by matching the current text against its end
|
| + /// pattern.
|
| + bool tryMatch(InlineParser parser) {
|
| + Match endMatch = syntax.endPattern.firstMatch(parser.currentSource);
|
| + if ((endMatch != null) && (endMatch.start() == 0)) {
|
| + // Close the tag.
|
| + close(parser, endMatch);
|
| + return true;
|
| + }
|
| +
|
| + return false;
|
| + }
|
| +
|
| + /// Pops this tag off the stack, completes it, and adds it to the output.
|
| + /// Will discard any unmatched tags that happen to be above it on the stack.
|
| + /// If this is the last node in the stack, returns its children.
|
| + List<Node> close(InlineParser parser, Match endMatch) {
|
| + // Found a match. If there is anything above this tag on the stack,
|
| + // discard it. For example, given '*a _b*...' when we reach the second
|
| + // '*', '_' will be on the top of the stack. It's mismatched, so we
|
| + // just treat it as text.
|
| + while (parser._stack.last() != this) parser.discardUnmatchedTag();
|
| +
|
| + // Pop this off the stack.
|
| + parser.writeText();
|
| + parser._stack.removeLast();
|
| +
|
| + // If the stack is empty now, this is the special "results" node.
|
| + if (parser._stack.length == 0) return children;
|
| +
|
| + // We are still parsing, so add this to its parent's children.
|
| + if (syntax.onMatchEnd(parser, endMatch, this)) {
|
| + parser.consume(endMatch.group(0).length);
|
| + } else {
|
| + // Didn't close correctly so revert to text.
|
| + parser.start = startPos;
|
| + parser.advanceBy(endMatch.group(0).length);
|
| + }
|
| +
|
| + return null;
|
| + }
|
| +}
|
|
|