utils/markdown/inline_parser.dart - Issue 8725007: Lots of stuff hooking up markdown to dartdoc.

Unified Diff: utils/markdown/inline_parser.dart

Issue 8725007: Lots of stuff hooking up markdown to dartdoc. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Implement removeRange() on List in frog. Created 9 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: utils/markdown/inline_parser.dart

diff --git a/utils/markdown/inline_parser.dart b/utils/markdown/inline_parser.dart

index 8bf7693d81d8050abac5cec43a8cb8aecafc1010..57dae79450107b19dc202fae6fca300436f1e581 100644

--- a/utils/markdown/inline_parser.dart

+++ b/utils/markdown/inline_parser.dart

@@ -23,17 +23,17 @@ class InlineParser {

// Encode "<". (Why not encode ">" too? Gruber is toying with us.)

new TextSyntax(@'<', sub: '<'),

// Parse "**strong**" tags.

- new TagSyntax(@'\*\*', tag: 'strong'),

+ new TagSyntax(@'\*\*', tag: 'strong'),

// Parse "__strong__" tags.

- new TagSyntax(@'__', tag: 'strong'),

+ new TagSyntax(@'__', tag: 'strong'),

// Parse "*emphasis*" tags.

- new TagSyntax(@'\*', tag: 'em'),

+ new TagSyntax(@'\*', tag: 'em'),

// Parse "_emphasis_" tags.

// TODO(rnystrom): Underscores in the middle of a word should not be

// parsed as emphasis like_in_this.

- new TagSyntax(@'_', tag: 'em'),

+ new TagSyntax(@'_', tag: 'em'),

// Parse inline code within double backticks: "``code``".

- new CodeSyntax(@'``[ ]?(.*?)[ ]?``'),

+ new CodeSyntax(@'``\s?((?:.|\n)*?)\s?``'),

// Parse inline code within backticks: "`code`".

new CodeSyntax(@'`([^`]*)`')

];

@@ -63,7 +63,7 @@ class InlineParser {

List<Node> parse() {

// Make a fake top tag to hold the results.

- _stack.add(new TagState(0, null));

+ _stack.add(new TagState(0, 0, null));

while (!isDone) {

bool matched = false;

@@ -96,8 +96,13 @@ class InlineParser {

}

writeText() {

- if (pos > start) {

- final text = source.substring(start, pos);

+ writeTextRange(start, pos);

+ start = pos;

+ }

+ writeTextRange(int start, int end) {

nweiz 2011/11/28 22:50:45 Shadowing of start here is a little confusing.

Bob Nystrom 2011/11/29 02:44:08 Yeah, but I couldn't come up with a better name th

+ if (end > start) {

+ final text = source.substring(start, end);

final nodes = _stack.last().children;

// If the previous node is text too, just append.

@@ -107,18 +112,9 @@ class InlineParser {

} else {

nodes.add(new Text(text));

}

- start = pos;

}

- /// Removes the top tag from the stack, reverts it to plain text and adds it

- /// to the output.

- discardUnmatchedTag() {

- final unfinished = _stack.removeLast();

- start = unfinished.startPos;

- }

addNode(Node node) {

_stack.last().children.add(node);

}

@@ -129,7 +125,10 @@ class InlineParser {

bool get isDone() => pos == source.length;

- void advanceBy(int length) => pos += length;

+ void advanceBy(int length) {

+ pos += length;

+ }

void consume(int length) {

pos += length;

start = pos;

@@ -158,7 +157,7 @@ class InlineSyntax {

return false;

}

- abstract bool match(InlineParser parser, Match match);

+ abstract bool onMatch(InlineParser parser, Match match);

}

/// Matches stuff that should just be passed through as straight text.

@@ -181,7 +180,7 @@ class TextSyntax extends InlineSyntax {

}

-/// Matches autolinks like <http://foo.com>.

+/// Matches autolinks like `<http://foo.com>`.

class AutolinkSyntax extends InlineSyntax {

AutolinkSyntax()

: super(@'<((http|https|ftp)://[^>]*)>');

@@ -198,7 +197,7 @@ class AutolinkSyntax extends InlineSyntax {

}

-/// Matches syntax that has a pair of tags and becomes an element, like '*' for

+/// Matches syntax that has a pair of tags and becomes an element, like `*` for

/// `<em>`. Allows nested tags.

class TagSyntax extends InlineSyntax {

final RegExp endPattern;

@@ -212,7 +211,8 @@ class TagSyntax extends InlineSyntax {

// TODO(rnystrom): Should use named arg for RegExp multiLine.

bool onMatch(InlineParser parser, Match match) {

- parser._stack.add(new TagState(parser.pos, this));

+ parser._stack.add(new TagState(parser.pos,

+ parser.pos + match.group(0).length, this));

return true;

}

@@ -222,17 +222,23 @@ class TagSyntax extends InlineSyntax {

}

-/// Matches inline links like [blah] [id] and [blah] (url).

+/// Matches inline links like `[blah] [id]` and `[blah] (url)`.

class LinkSyntax extends TagSyntax {

/// The regex for the end of a link needs to handle both reference style and

/// inline styles as well as optional titles for inline links. To make that

/// a bit more palatable, this breaks it into pieces.

static get linkPattern() {

- final bracket = @'\][ \n\t]?'; // "]" with optional space after.

- final refLink = @'\[([^\]]*)\]'; // "[id]" reflink id.

- final title = @'(?:[ ]*"([^"]+)"|)'; // Optional title in quotes.

- final inlineLink = '\$([^ )]+)$title\$'; // "(url "title")" inline link.

- return '$bracket(?:$refLink|$inlineLink)';

+ final refLink = @'\s?\[([^\]]*)\]'; // "[id]" reflink id.

+ final title = @'(?:[ ]*"([^"]+)"|)'; // Optional title in quotes.

+ final inlineLink = '\\s?\$([^ )]+)$title\$'; // "(url "title")" link.

+ return '\](?:($refLink|$inlineLink)|)';

+ // The groups matched by this are:

+ // 1: Will be non-empty if it's either a ref or inline link. Will be empty

+ // if it's just a bare pair of square brackets with nothing after them.

+ // 2: Contains the id inside [] for a reference-style link.

+ // 3: Contains the URL for an inline link.

+ // 4: Contains the title, if present, for an inline link.

}

LinkSyntax()

@@ -242,10 +248,33 @@ class LinkSyntax extends TagSyntax {

var url;

var title;

- if (match.group(2) != '') {

+ // If we didn't match refLink or inlineLink, then it means there was

+ // nothing after the first square bracket, so it isn't a normal markdown

+ // link at all. Instead, we allow users of the library to specify a special

+ // resolver function ([setImplicitLinkResolver]) that may choose to handle

+ // this. Otherwise, it's just treated as plain text.

+ if ((match.group(1) == null) || (match.group(1) == '')) {

+ if (_implicitLinkResolver == null) return false;

+ // Only allow implicit links if the content is just text.

+ // TODO(rnystrom): Do we want to relax this?

+ if (state.children.length != 1) return false;

+ if (state.children[0] is! Text) return false;

+ Text link = state.children[0];

+ // See if we have a resolver that will generate a link for us.

+ final node = _implicitLinkResolver(link.text);

+ if (node == null) return false;

+ parser.addNode(node);

+ return true;

+ }

+ if ((match.group(3) != null) && (match.group(3) != '')) {

// Inline link like [foo](url).

- url = match.group(2);

- title = match.group(3);

+ url = match.group(3);

+ title = match.group(4);

// For whatever reason, markdown allows angle-bracketed URLs here.

if (url.startsWith('<') && url.endsWith('>')) {

@@ -253,7 +282,7 @@ class LinkSyntax extends TagSyntax {

}

} else {

// Reference link like [foo] [bar].

- var id = match.group(1);

+ var id = match.group(2);

if (id == '') {

// The id is empty ("[]") so infer it from the contents.

id = parser.source.substring(state.startPos + 1, parser.pos);

@@ -262,7 +291,9 @@ class LinkSyntax extends TagSyntax {

// Look up the link.

final link = parser.document.refLinks[id];

// If it's an unknown link just emit plaintext.

- if (link == null) return false;

+ if (link == null) {

nweiz 2011/11/28 22:50:45 :(

Bob Nystrom 2011/11/29 02:44:08 This is actually part of markdown. Gruber hates us

+ return false;

+ }

url = link.url;

title = link.title;

@@ -296,13 +327,16 @@ class TagState {

/// The point in the original source where this tag started.

int startPos;

+ /// The point in the original source where open tag ended.

+ int endPos;

/// The syntax that created this node.

final TagSyntax syntax;

/// The children of this node. Will be `null` for text nodes.

final List<Node> children;

- TagState(this.startPos, this.syntax)

+ TagState(this.startPos, this.endPos, this.syntax)

: children = <Node>[];

/// Attempts to close this tag by matching the current text against its end

@@ -322,11 +356,25 @@ class TagState {

/// Will discard any unmatched tags that happen to be above it on the stack.

/// If this is the last node in the stack, returns its children.

List<Node> close(InlineParser parser, Match endMatch) {

- // Found a match. If there is anything above this tag on the stack,

- // discard it. For example, given '*a _b*...' when we reach the second

- // '*', '_' will be on the top of the stack. It's mismatched, so we

- // just treat it as text.

- while (parser._stack.last() != this) parser.discardUnmatchedTag();

+ // If there are unclosed tags on top of this one when it's closed, that

+ // means they are mismatched. Mismatched tags are treated as plain text in

+ // markdown. So for each tag above this one, we write its start tag as text

+ // and then adds its children to this one's children.

+ int index = parser._stack.indexOf(this);

+ // Remove the unmatched children.

+ final unmatchedTags = parser._stack.getRange(index + 1,

+ parser._stack.length - index - 1);

+ parser._stack.removeRange(index + 1, parser._stack.length - index - 1);

+ // Flatten them out onto this tag.

+ for (final unmatched in unmatchedTags) {

+ // Write the start tag as text.

+ parser.writeTextRange(unmatched.startPos, unmatched.endPos);

+ // Bequeath its children unto this tag.

+ children.addAll(unmatched.children);

+ }

// Pop this off the stack.

parser.writeText();

« utils/markdown/block_parser.dart ('K') | « utils/markdown/block_parser.dart ('k') | utils/markdown/lib.dart » ('j') | utils/markdown/lib.dart » ('J')