Index: utils/markdown/block_parser.dart |
diff --git a/utils/markdown/block_parser.dart b/utils/markdown/block_parser.dart |
deleted file mode 100644 |
index d4a46cf9f811ccddce342324cda5baf6fe0cea61..0000000000000000000000000000000000000000 |
--- a/utils/markdown/block_parser.dart |
+++ /dev/null |
@@ -1,436 +0,0 @@ |
-// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
-// for details. All rights reserved. Use of this source code is governed by a |
-// BSD-style license that can be found in the LICENSE file. |
- |
-/// The line contains only whitespace or is empty. |
-final _RE_EMPTY = const RegExp(@'^([ \t]*)$'); |
- |
-/// A series of `=` or `-` (on the next line) define setext-style headers. |
-final _RE_SETEXT = const RegExp(@'^((=+)|(-+))$'); |
- |
-/// Leading (and trailing) `#` define atx-style headers. |
-final _RE_HEADER = const RegExp(@'^(#{1,6})(.*?)#*$'); |
- |
-/// The line starts with `>` with one optional space after. |
-final _RE_BLOCKQUOTE = const RegExp(@'^[ ]{0,3}>[ ]?(.*)$'); |
- |
-/// A line indented four spaces. Used for code blocks and lists. |
-final _RE_INDENT = const RegExp(@'^(?: |\t)(.*)$'); |
- |
-/// Three or more hyphens, asterisks or underscores by themselves. Note that |
-/// a line like `----` is valid as both HR and SETEXT. In case of a tie, |
-/// SETEXT should win. |
-final _RE_HR = const RegExp(@'^[ ]{0,3}((-+[ ]{0,2}){3,}|' + |
- @'(_+[ ]{0,2}){3,}|' + |
- @'(\*+[ ]{0,2}){3,})$'); |
- |
-/// Really hacky way to detect block-level embedded HTML. Just looks for |
-/// "<somename". |
-final _RE_HTML = const RegExp(@'^<[ ]*\w+[ >]'); |
- |
-/// A line starting with one of these markers: `-`, `*`, `+`. May have up to |
-/// three leading spaces before the marker and any number of spaces or tabs |
-/// after. |
-final _RE_UL = const RegExp(@'^[ ]{0,3}[*+-][ \t]+(.*)$'); |
- |
-/// A line starting with a number like `123.`. May have up to three leading |
-/// spaces before the marker and any number of spaces or tabs after. |
-final _RE_OL = const RegExp(@'^[ ]{0,3}\d+\.[ \t]+(.*)$'); |
- |
-/// Maintains the internal state needed to parse a series of lines into blocks |
-/// of markdown suitable for further inline parsing. |
-class BlockParser { |
- final List<String> lines; |
- |
- /// The markdown document this parser is parsing. |
- final Document document; |
- |
- /// Index of the current line. |
- int pos; |
- |
- BlockParser(this.lines, this.document) |
- : pos = 0; |
- |
- /// Gets the current line. |
- String get current() => lines[pos]; |
- |
- /// Gets the line after the current one or `null` if there is none. |
- String get next() { |
- // Don't read past the end. |
- if (pos >= lines.length - 1) return null; |
- return lines[pos + 1]; |
- } |
- |
- void advance() { |
- pos++; |
- } |
- |
- bool get isDone() => pos >= lines.length; |
- |
- /// Gets whether or not the current line matches the given pattern. |
- bool matches(RegExp regex) { |
- if (isDone) return false; |
- return regex.firstMatch(current) != null; |
- } |
- |
- /// Gets whether or not the current line matches the given pattern. |
- bool matchesNext(RegExp regex) { |
- if (next == null) return false; |
- return regex.firstMatch(next) != null; |
- } |
-} |
- |
-class BlockSyntax { |
- /// Gets the collection of built-in block parsers. To turn a series of lines |
- /// into blocks, each of these will be tried in turn. Order matters here. |
- static List<BlockSyntax> get syntaxes() { |
- // Lazy initialize. |
- if (_syntaxes == null) { |
- _syntaxes = [ |
- new EmptyBlockSyntax(), |
- new BlockHtmlSyntax(), |
- new SetextHeaderSyntax(), |
- new HeaderSyntax(), |
- new CodeBlockSyntax(), |
- new BlockquoteSyntax(), |
- new HorizontalRuleSyntax(), |
- new UnorderedListSyntax(), |
- new OrderedListSyntax(), |
- new ParagraphSyntax() |
- ]; |
- } |
- |
- return _syntaxes; |
- } |
- |
- static List<BlockSyntax> _syntaxes; |
- |
- /// Gets the regex used to identify the beginning of this block, if any. |
- RegExp get pattern() => null; |
- |
- bool get canEndBlock() => true; |
- |
- bool canParse(BlockParser parser) { |
- return pattern.firstMatch(parser.current) != null; |
- } |
- |
- abstract Node parse(BlockParser parser); |
- |
- List<String> parseChildLines(BlockParser parser) { |
- // Grab all of the lines that form the blockquote, stripping off the ">". |
- final childLines = <String>[]; |
- |
- while (!parser.isDone) { |
- final match = pattern.firstMatch(parser.current); |
- if (match == null) break; |
- childLines.add(match[1]); |
- parser.advance(); |
- } |
- |
- return childLines; |
- } |
- |
- /// Gets whether or not [parser]'s current line should end the previous block. |
- static bool isAtBlockEnd(BlockParser parser) { |
- if (parser.isDone) return true; |
- return syntaxes.some((s) => s.canParse(parser) && s.canEndBlock); |
- } |
-} |
- |
-class EmptyBlockSyntax extends BlockSyntax { |
- RegExp get pattern() => _RE_EMPTY; |
- |
- Node parse(BlockParser parser) { |
- parser.advance(); |
- |
- // Don't actually emit anything. |
- return null; |
- } |
-} |
- |
-/// Parses setext-style headers. |
-class SetextHeaderSyntax extends BlockSyntax { |
- bool canParse(BlockParser parser) { |
- // Note: matches *next* line, not the current one. We're looking for the |
- // underlining after this line. |
- return parser.matchesNext(_RE_SETEXT); |
- } |
- |
- Node parse(BlockParser parser) { |
- final match = _RE_SETEXT.firstMatch(parser.next); |
- |
- final tag = (match[1][0] == '=') ? 'h1' : 'h2'; |
- final contents = parser.document.parseInline(parser.current); |
- parser.advance(); |
- parser.advance(); |
- |
- return new Element(tag, contents); |
- } |
-} |
- |
-/// Parses atx-style headers: `## Header ##`. |
-class HeaderSyntax extends BlockSyntax { |
- RegExp get pattern() => _RE_HEADER; |
- |
- Node parse(BlockParser parser) { |
- final match = pattern.firstMatch(parser.current); |
- parser.advance(); |
- final level = match[1].length; |
- final contents = parser.document.parseInline(match[2].trim()); |
- return new Element('h$level', contents); |
- } |
-} |
- |
-/// Parses email-style blockquotes: `> quote`. |
-class BlockquoteSyntax extends BlockSyntax { |
- RegExp get pattern() => _RE_BLOCKQUOTE; |
- |
- Node parse(BlockParser parser) { |
- final childLines = parseChildLines(parser); |
- |
- // Recursively parse the contents of the blockquote. |
- final children = parser.document.parseLines(childLines); |
- |
- return new Element('blockquote', children); |
- } |
-} |
- |
-/// Parses preformatted code blocks that are indented four spaces. |
-class CodeBlockSyntax extends BlockSyntax { |
- RegExp get pattern() => _RE_INDENT; |
- |
- Node parse(BlockParser parser) { |
- final childLines = parseChildLines(parser); |
- |
- // The Markdown tests expect a trailing newline. |
- childLines.add(''); |
- |
- // Escape the code. |
- final escaped = escapeHtml(Strings.join(childLines, '\n')); |
- |
- return new Element('pre', [new Element.text('code', escaped)]); |
- } |
-} |
- |
-/// Parses horizontal rules like `---`, `_ _ _`, `* * *`, etc. |
-class HorizontalRuleSyntax extends BlockSyntax { |
- RegExp get pattern() => _RE_HR; |
- |
- Node parse(BlockParser parser) { |
- final match = pattern.firstMatch(parser.current); |
- parser.advance(); |
- return new Element.empty('hr'); |
- } |
-} |
- |
-/// Parses inline HTML at the block level. This differs from other markdown |
-/// implementations in several ways: |
-/// |
-/// 1. This one is way way WAY simpler. |
-/// 2. All HTML tags at the block level will be treated as blocks. If you |
-/// start a paragraph with `<em>`, it will not wrap it in a `<p>` for you. |
-/// As soon as it sees something like HTML, it stops mucking with it until |
-/// it hits the next block. |
-/// 3. Absolutely no HTML parsing or validation is done. We're a markdown |
-/// parser not an HTML parser! |
-class BlockHtmlSyntax extends BlockSyntax { |
- RegExp get pattern() => _RE_HTML; |
- |
- bool get canEndBlock() => false; |
- |
- Node parse(BlockParser parser) { |
- final childLines = []; |
- |
- // Eat until we hit a blank line. |
- while (!parser.isDone && !parser.matches(_RE_EMPTY)) { |
- childLines.add(parser.current); |
- parser.advance(); |
- } |
- |
- return new Text(Strings.join(childLines, '\n')); |
- } |
-} |
- |
-class ListItem { |
- bool forceBlock = false; |
- final List<String> lines; |
- |
- ListItem(this.lines); |
-} |
- |
-/// Base class for both ordered and unordered lists. |
-class ListSyntax extends BlockSyntax { |
- bool get canEndBlock() => false; |
- |
- abstract String get listTag(); |
- |
- Node parse(BlockParser parser) { |
- final items = <ListItem>[]; |
- var childLines = <String>[]; |
- |
- endItem() { |
- if (childLines.length > 0) { |
- items.add(new ListItem(childLines)); |
- childLines = <String>[]; |
- } |
- } |
- |
- var match; |
- tryMatch(RegExp pattern) { |
- match = pattern.firstMatch(parser.current); |
- return match != null; |
- } |
- |
- bool afterEmpty = false; |
- while (!parser.isDone) { |
- if (tryMatch(_RE_EMPTY)) { |
- // Add a blank line to the current list item. |
- childLines.add(''); |
- } else if (tryMatch(_RE_UL) || tryMatch(_RE_OL)) { |
- // End the current list item and start a new one. |
- endItem(); |
- childLines.add(match[1]); |
- } else if (tryMatch(_RE_INDENT)) { |
- // Strip off indent and add to current item. |
- childLines.add(match[1]); |
- } else if (isAtBlockEnd(parser)) { |
- // Done with the list. |
- break; |
- } else { |
- // Anything else is paragraph text or other stuff that can be in a list |
- // item. However, if the previous item is a blank line, this means we're |
- // done with the list and are starting a new top-level paragraph. |
- if ((childLines.length > 0) && (childLines.last() == '')) break; |
- childLines.add(parser.current); |
- } |
- parser.advance(); |
- } |
- |
- endItem(); |
- |
- // Markdown, because it hates us, specifies two kinds of list items. If you |
- // have a list like: |
- // |
- // * one |
- // * two |
- // |
- // Then it will insert the conents of the lines directly in the <li>, like: |
- // <ul> |
- // <li>one</li> |
- // <li>two</li> |
- // <ul> |
- // |
- // If, however, there are blank lines between the items, each is wrapped in |
- // paragraphs: |
- // |
- // * one |
- // |
- // * two |
- // |
- // <ul> |
- // <li><p>one</p></li> |
- // <li><p>two</p></li> |
- // <ul> |
- // |
- // In other words, sometimes we parse the contents of a list item like a |
- // block, and sometimes line an inline. The rules our parser implements are: |
- // |
- // - If it has more than one line, it's a block. |
- // - If the line matches any block parser (BLOCKQUOTE, HEADER, HR, INDENT, |
- // UL, OL) it's a block. (This is for cases like "* > quote".) |
- // - If there was a blank line between this item and the previous one, it's |
- // a block. |
- // - If there was a blank line between this item and the next one, it's a |
- // block. |
- // - Otherwise, parse it as an inline. |
- |
- // Remove any trailing empty lines and note which items are separated by |
- // empty lines. Do this before seeing which items are single-line so that |
- // trailing empty lines on the last item don't force it into being a block. |
- for (int i = 0; i < items.length; i++) { |
- for (int j = items[i].lines.length - 1; j > 0; j--) { |
- if (_RE_EMPTY.firstMatch(items[i].lines[j]) != null) { |
- // Found an empty line. Item and one after it are blocks. |
- if (i < items.length - 1) { |
- items[i].forceBlock = true; |
- items[i + 1].forceBlock = true; |
- } |
- items[i].lines.removeLast(); |
- } else { |
- break; |
- } |
- } |
- } |
- |
- // Convert the list items to Nodes. |
- final itemNodes = <Node>[]; |
- for (final item in items) { |
- bool blockItem = item.forceBlock || (item.lines.length > 1); |
- |
- // See if it matches some block parser. |
- final blocksInList = const [ |
- _RE_BLOCKQUOTE, |
- _RE_HEADER, |
- _RE_HR, |
- _RE_INDENT, |
- _RE_UL, |
- _RE_OL |
- ]; |
- |
- if (!blockItem) { |
- for (final pattern in blocksInList) { |
- if (pattern.firstMatch(item.lines[0]) != null) { |
- blockItem = true; |
- break; |
- } |
- } |
- } |
- |
- // Parse the item as a block or inline. |
- if (blockItem) { |
- // Block list item. |
- final children = parser.document.parseLines(item.lines); |
- itemNodes.add(new Element('li', children)); |
- } else { |
- // Raw list item. |
- final contents = parser.document.parseInline(item.lines[0]); |
- itemNodes.add(new Element('li', contents)); |
- } |
- } |
- |
- return new Element(listTag, itemNodes); |
- } |
-} |
- |
-/// Parses unordered lists. |
-class UnorderedListSyntax extends ListSyntax { |
- RegExp get pattern() => _RE_UL; |
- String get listTag() => 'ul'; |
-} |
- |
-/// Parses ordered lists. |
-class OrderedListSyntax extends ListSyntax { |
- RegExp get pattern() => _RE_OL; |
- String get listTag() => 'ol'; |
-} |
- |
-/// Parses paragraphs of regular text. |
-class ParagraphSyntax extends BlockSyntax { |
- bool get canEndBlock() => false; |
- |
- bool canParse(BlockParser parser) => true; |
- |
- Node parse(BlockParser parser) { |
- final childLines = []; |
- |
- // Eat until we hit something that ends a paragraph. |
- while (!isAtBlockEnd(parser)) { |
- childLines.add(parser.current); |
- parser.advance(); |
- } |
- |
- final contents = parser.document.parseInline( |
- Strings.join(childLines, '\n')); |
- return new Element('p', contents); |
- } |
-} |