Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(176)

Unified Diff: utils/markdown/block_parser.dart

Issue 8953042: Move markdown library. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Add markdown tests to dartdoc. Created 9 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « utils/markdown/ast.dart ('k') | utils/markdown/html_renderer.dart » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: utils/markdown/block_parser.dart
diff --git a/utils/markdown/block_parser.dart b/utils/markdown/block_parser.dart
deleted file mode 100644
index d4a46cf9f811ccddce342324cda5baf6fe0cea61..0000000000000000000000000000000000000000
--- a/utils/markdown/block_parser.dart
+++ /dev/null
@@ -1,436 +0,0 @@
-// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
-// for details. All rights reserved. Use of this source code is governed by a
-// BSD-style license that can be found in the LICENSE file.
-
-/// The line contains only whitespace or is empty.
-final _RE_EMPTY = const RegExp(@'^([ \t]*)$');
-
-/// A series of `=` or `-` (on the next line) define setext-style headers.
-final _RE_SETEXT = const RegExp(@'^((=+)|(-+))$');
-
-/// Leading (and trailing) `#` define atx-style headers.
-final _RE_HEADER = const RegExp(@'^(#{1,6})(.*?)#*$');
-
-/// The line starts with `>` with one optional space after.
-final _RE_BLOCKQUOTE = const RegExp(@'^[ ]{0,3}>[ ]?(.*)$');
-
-/// A line indented four spaces. Used for code blocks and lists.
-final _RE_INDENT = const RegExp(@'^(?: |\t)(.*)$');
-
-/// Three or more hyphens, asterisks or underscores by themselves. Note that
-/// a line like `----` is valid as both HR and SETEXT. In case of a tie,
-/// SETEXT should win.
-final _RE_HR = const RegExp(@'^[ ]{0,3}((-+[ ]{0,2}){3,}|' +
- @'(_+[ ]{0,2}){3,}|' +
- @'(\*+[ ]{0,2}){3,})$');
-
-/// Really hacky way to detect block-level embedded HTML. Just looks for
-/// "<somename".
-final _RE_HTML = const RegExp(@'^<[ ]*\w+[ >]');
-
-/// A line starting with one of these markers: `-`, `*`, `+`. May have up to
-/// three leading spaces before the marker and any number of spaces or tabs
-/// after.
-final _RE_UL = const RegExp(@'^[ ]{0,3}[*+-][ \t]+(.*)$');
-
-/// A line starting with a number like `123.`. May have up to three leading
-/// spaces before the marker and any number of spaces or tabs after.
-final _RE_OL = const RegExp(@'^[ ]{0,3}\d+\.[ \t]+(.*)$');
-
-/// Maintains the internal state needed to parse a series of lines into blocks
-/// of markdown suitable for further inline parsing.
-class BlockParser {
- final List<String> lines;
-
- /// The markdown document this parser is parsing.
- final Document document;
-
- /// Index of the current line.
- int pos;
-
- BlockParser(this.lines, this.document)
- : pos = 0;
-
- /// Gets the current line.
- String get current() => lines[pos];
-
- /// Gets the line after the current one or `null` if there is none.
- String get next() {
- // Don't read past the end.
- if (pos >= lines.length - 1) return null;
- return lines[pos + 1];
- }
-
- void advance() {
- pos++;
- }
-
- bool get isDone() => pos >= lines.length;
-
- /// Gets whether or not the current line matches the given pattern.
- bool matches(RegExp regex) {
- if (isDone) return false;
- return regex.firstMatch(current) != null;
- }
-
- /// Gets whether or not the current line matches the given pattern.
- bool matchesNext(RegExp regex) {
- if (next == null) return false;
- return regex.firstMatch(next) != null;
- }
-}
-
-class BlockSyntax {
- /// Gets the collection of built-in block parsers. To turn a series of lines
- /// into blocks, each of these will be tried in turn. Order matters here.
- static List<BlockSyntax> get syntaxes() {
- // Lazy initialize.
- if (_syntaxes == null) {
- _syntaxes = [
- new EmptyBlockSyntax(),
- new BlockHtmlSyntax(),
- new SetextHeaderSyntax(),
- new HeaderSyntax(),
- new CodeBlockSyntax(),
- new BlockquoteSyntax(),
- new HorizontalRuleSyntax(),
- new UnorderedListSyntax(),
- new OrderedListSyntax(),
- new ParagraphSyntax()
- ];
- }
-
- return _syntaxes;
- }
-
- static List<BlockSyntax> _syntaxes;
-
- /// Gets the regex used to identify the beginning of this block, if any.
- RegExp get pattern() => null;
-
- bool get canEndBlock() => true;
-
- bool canParse(BlockParser parser) {
- return pattern.firstMatch(parser.current) != null;
- }
-
- abstract Node parse(BlockParser parser);
-
- List<String> parseChildLines(BlockParser parser) {
- // Grab all of the lines that form the blockquote, stripping off the ">".
- final childLines = <String>[];
-
- while (!parser.isDone) {
- final match = pattern.firstMatch(parser.current);
- if (match == null) break;
- childLines.add(match[1]);
- parser.advance();
- }
-
- return childLines;
- }
-
- /// Gets whether or not [parser]'s current line should end the previous block.
- static bool isAtBlockEnd(BlockParser parser) {
- if (parser.isDone) return true;
- return syntaxes.some((s) => s.canParse(parser) && s.canEndBlock);
- }
-}
-
-class EmptyBlockSyntax extends BlockSyntax {
- RegExp get pattern() => _RE_EMPTY;
-
- Node parse(BlockParser parser) {
- parser.advance();
-
- // Don't actually emit anything.
- return null;
- }
-}
-
-/// Parses setext-style headers.
-class SetextHeaderSyntax extends BlockSyntax {
- bool canParse(BlockParser parser) {
- // Note: matches *next* line, not the current one. We're looking for the
- // underlining after this line.
- return parser.matchesNext(_RE_SETEXT);
- }
-
- Node parse(BlockParser parser) {
- final match = _RE_SETEXT.firstMatch(parser.next);
-
- final tag = (match[1][0] == '=') ? 'h1' : 'h2';
- final contents = parser.document.parseInline(parser.current);
- parser.advance();
- parser.advance();
-
- return new Element(tag, contents);
- }
-}
-
-/// Parses atx-style headers: `## Header ##`.
-class HeaderSyntax extends BlockSyntax {
- RegExp get pattern() => _RE_HEADER;
-
- Node parse(BlockParser parser) {
- final match = pattern.firstMatch(parser.current);
- parser.advance();
- final level = match[1].length;
- final contents = parser.document.parseInline(match[2].trim());
- return new Element('h$level', contents);
- }
-}
-
-/// Parses email-style blockquotes: `> quote`.
-class BlockquoteSyntax extends BlockSyntax {
- RegExp get pattern() => _RE_BLOCKQUOTE;
-
- Node parse(BlockParser parser) {
- final childLines = parseChildLines(parser);
-
- // Recursively parse the contents of the blockquote.
- final children = parser.document.parseLines(childLines);
-
- return new Element('blockquote', children);
- }
-}
-
-/// Parses preformatted code blocks that are indented four spaces.
-class CodeBlockSyntax extends BlockSyntax {
- RegExp get pattern() => _RE_INDENT;
-
- Node parse(BlockParser parser) {
- final childLines = parseChildLines(parser);
-
- // The Markdown tests expect a trailing newline.
- childLines.add('');
-
- // Escape the code.
- final escaped = escapeHtml(Strings.join(childLines, '\n'));
-
- return new Element('pre', [new Element.text('code', escaped)]);
- }
-}
-
-/// Parses horizontal rules like `---`, `_ _ _`, `* * *`, etc.
-class HorizontalRuleSyntax extends BlockSyntax {
- RegExp get pattern() => _RE_HR;
-
- Node parse(BlockParser parser) {
- final match = pattern.firstMatch(parser.current);
- parser.advance();
- return new Element.empty('hr');
- }
-}
-
-/// Parses inline HTML at the block level. This differs from other markdown
-/// implementations in several ways:
-///
-/// 1. This one is way way WAY simpler.
-/// 2. All HTML tags at the block level will be treated as blocks. If you
-/// start a paragraph with `<em>`, it will not wrap it in a `<p>` for you.
-/// As soon as it sees something like HTML, it stops mucking with it until
-/// it hits the next block.
-/// 3. Absolutely no HTML parsing or validation is done. We're a markdown
-/// parser not an HTML parser!
-class BlockHtmlSyntax extends BlockSyntax {
- RegExp get pattern() => _RE_HTML;
-
- bool get canEndBlock() => false;
-
- Node parse(BlockParser parser) {
- final childLines = [];
-
- // Eat until we hit a blank line.
- while (!parser.isDone && !parser.matches(_RE_EMPTY)) {
- childLines.add(parser.current);
- parser.advance();
- }
-
- return new Text(Strings.join(childLines, '\n'));
- }
-}
-
-class ListItem {
- bool forceBlock = false;
- final List<String> lines;
-
- ListItem(this.lines);
-}
-
-/// Base class for both ordered and unordered lists.
-class ListSyntax extends BlockSyntax {
- bool get canEndBlock() => false;
-
- abstract String get listTag();
-
- Node parse(BlockParser parser) {
- final items = <ListItem>[];
- var childLines = <String>[];
-
- endItem() {
- if (childLines.length > 0) {
- items.add(new ListItem(childLines));
- childLines = <String>[];
- }
- }
-
- var match;
- tryMatch(RegExp pattern) {
- match = pattern.firstMatch(parser.current);
- return match != null;
- }
-
- bool afterEmpty = false;
- while (!parser.isDone) {
- if (tryMatch(_RE_EMPTY)) {
- // Add a blank line to the current list item.
- childLines.add('');
- } else if (tryMatch(_RE_UL) || tryMatch(_RE_OL)) {
- // End the current list item and start a new one.
- endItem();
- childLines.add(match[1]);
- } else if (tryMatch(_RE_INDENT)) {
- // Strip off indent and add to current item.
- childLines.add(match[1]);
- } else if (isAtBlockEnd(parser)) {
- // Done with the list.
- break;
- } else {
- // Anything else is paragraph text or other stuff that can be in a list
- // item. However, if the previous item is a blank line, this means we're
- // done with the list and are starting a new top-level paragraph.
- if ((childLines.length > 0) && (childLines.last() == '')) break;
- childLines.add(parser.current);
- }
- parser.advance();
- }
-
- endItem();
-
- // Markdown, because it hates us, specifies two kinds of list items. If you
- // have a list like:
- //
- // * one
- // * two
- //
- // Then it will insert the conents of the lines directly in the <li>, like:
- // <ul>
- // <li>one</li>
- // <li>two</li>
- // <ul>
- //
- // If, however, there are blank lines between the items, each is wrapped in
- // paragraphs:
- //
- // * one
- //
- // * two
- //
- // <ul>
- // <li><p>one</p></li>
- // <li><p>two</p></li>
- // <ul>
- //
- // In other words, sometimes we parse the contents of a list item like a
- // block, and sometimes line an inline. The rules our parser implements are:
- //
- // - If it has more than one line, it's a block.
- // - If the line matches any block parser (BLOCKQUOTE, HEADER, HR, INDENT,
- // UL, OL) it's a block. (This is for cases like "* > quote".)
- // - If there was a blank line between this item and the previous one, it's
- // a block.
- // - If there was a blank line between this item and the next one, it's a
- // block.
- // - Otherwise, parse it as an inline.
-
- // Remove any trailing empty lines and note which items are separated by
- // empty lines. Do this before seeing which items are single-line so that
- // trailing empty lines on the last item don't force it into being a block.
- for (int i = 0; i < items.length; i++) {
- for (int j = items[i].lines.length - 1; j > 0; j--) {
- if (_RE_EMPTY.firstMatch(items[i].lines[j]) != null) {
- // Found an empty line. Item and one after it are blocks.
- if (i < items.length - 1) {
- items[i].forceBlock = true;
- items[i + 1].forceBlock = true;
- }
- items[i].lines.removeLast();
- } else {
- break;
- }
- }
- }
-
- // Convert the list items to Nodes.
- final itemNodes = <Node>[];
- for (final item in items) {
- bool blockItem = item.forceBlock || (item.lines.length > 1);
-
- // See if it matches some block parser.
- final blocksInList = const [
- _RE_BLOCKQUOTE,
- _RE_HEADER,
- _RE_HR,
- _RE_INDENT,
- _RE_UL,
- _RE_OL
- ];
-
- if (!blockItem) {
- for (final pattern in blocksInList) {
- if (pattern.firstMatch(item.lines[0]) != null) {
- blockItem = true;
- break;
- }
- }
- }
-
- // Parse the item as a block or inline.
- if (blockItem) {
- // Block list item.
- final children = parser.document.parseLines(item.lines);
- itemNodes.add(new Element('li', children));
- } else {
- // Raw list item.
- final contents = parser.document.parseInline(item.lines[0]);
- itemNodes.add(new Element('li', contents));
- }
- }
-
- return new Element(listTag, itemNodes);
- }
-}
-
-/// Parses unordered lists.
-class UnorderedListSyntax extends ListSyntax {
- RegExp get pattern() => _RE_UL;
- String get listTag() => 'ul';
-}
-
-/// Parses ordered lists.
-class OrderedListSyntax extends ListSyntax {
- RegExp get pattern() => _RE_OL;
- String get listTag() => 'ol';
-}
-
-/// Parses paragraphs of regular text.
-class ParagraphSyntax extends BlockSyntax {
- bool get canEndBlock() => false;
-
- bool canParse(BlockParser parser) => true;
-
- Node parse(BlockParser parser) {
- final childLines = [];
-
- // Eat until we hit something that ends a paragraph.
- while (!isAtBlockEnd(parser)) {
- childLines.add(parser.current);
- parser.advance();
- }
-
- final contents = parser.document.parseInline(
- Strings.join(childLines, '\n'));
- return new Element('p', contents);
- }
-}
« no previous file with comments | « utils/markdown/ast.dart ('k') | utils/markdown/html_renderer.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698