utils/markdown/block_parser.dart - Issue 8725007: Lots of stuff hooking up markdown to dartdoc.

Side by Side Diff: utils/markdown/block_parser.dart

Issue 8725007: Lots of stuff hooking up markdown to dartdoc. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Respond to awesome reviews. Created 9 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 /// The line contains only whitespace or is empty.	5 /// The line contains only whitespace or is empty.

6 final _RE_EMPTY = const RegExp(@'^([ \t]*)$');	6 final _RE_EMPTY = const RegExp(@'^([ \t]*)$');

7	7

8 /// A series of "=" or "-" (on the next line) define setext-style headers.	8 /// A series of `=` or `-` (on the next line) define setext-style headers.

9 final _RE_SETEXT = const RegExp(@'^((=+)\|(-+))$');	9 final _RE_SETEXT = const RegExp(@'^((=+)\|(-+))$');

10	10

11 /// Leading (and trailing) "#" define atx-style headers.	11 /// Leading (and trailing) `#` define atx-style headers.

12 final _RE_HEADER = const RegExp(@'^(#{1,6})(.?)#$');	12 final _RE_HEADER = const RegExp(@'^(#{1,6})(.?)#$');

13	13

14 /// The line starts with ">" with one optional space after.	14 /// The line starts with `>` with one optional space after.

15 final _RE_BLOCKQUOTE = const RegExp(@'^[ ]{0,3}>[ ]?(.*)$');	15 final _RE_BLOCKQUOTE = const RegExp(@'^[ ]{0,3}>[ ]?(.*)$');

16	16

17 /// A line indented four spaces. Used for code blocks and lists.	17 /// A line indented four spaces. Used for code blocks and lists.

18 final _RE_INDENT = const RegExp(@'^(?: \|\t)(.*)$');	18 final _RE_INDENT = const RegExp(@'^(?: \|\t)(.*)$');

19	19

20 /// Three or more hyphens, asterisks or underscores by themselves. Note that	20 /// Three or more hyphens, asterisks or underscores by themselves. Note that

21 /// a line like "----" is valid as both HR and SETEXT. In case of a tie,	21 /// a line like `----` is valid as both HR and SETEXT. In case of a tie,

22 /// SETEXT should win.	22 /// SETEXT should win.

23 final _RE_HR = const RegExp(@'^[ ]{0,3}((-+[ ]{0,2}){3,}\|' +	23 final _RE_HR = const RegExp(@'^[ ]{0,3}((-+[ ]{0,2}){3,}\|' +

24 @'(_+[ ]{0,2}){3,}\|' +	24 @'(_+[ ]{0,2}){3,}\|' +

25 @'(\*+[ ]{0,2}){3,})$');	25 @'(\*+[ ]{0,2}){3,})$');

26	26

27 /// Really hacky way to detect block-level embedded HTML. Just looks for	27 /// Really hacky way to detect block-level embedded HTML. Just looks for

28 /// "<somename".	28 /// "<somename".

29 final _RE_HTML = const RegExp(@'^<[ ]*\w+[ >]');	29 final _RE_HTML = const RegExp(@'^<[ ]*\w+[ >]');

30	30

31 /// A line starting with one of these markers: "-", "*", "+". May have up to	31 /// A line starting with one of these markers: `-`, `*`, `+`. May have up to

32 /// three leading spaces before the marker and any number of spaces or tabs	32 /// three leading spaces before the marker and any number of spaces or tabs

33 /// after.	33 /// after.

34 final _RE_UL = const RegExp(@'^[ ]{0,3}[+-][ \t]+(.)$');	34 final _RE_UL = const RegExp(@'^[ ]{0,3}[+-][ \t]+(.)$');

35	35

36 /// A line starting with a number like "123.". May have up to three leading	36 /// A line starting with a number like `123.`. May have up to three leading

37 /// spaces before the marker and any number of spaces or tabs after.	37 /// spaces before the marker and any number of spaces or tabs after.

38 final _RE_OL = const RegExp(@'^[ ]{0,3}\d+\.[ \t]+(.*)$');	38 final _RE_OL = const RegExp(@'^[ ]{0,3}\d+\.[ \t]+(.*)$');

39	39

40 /// Maintains the internal state needed to parse a series of lines into blocks	40 /// Maintains the internal state needed to parse a series of lines into blocks

41 /// of markdown suitable for further inline parsing.	41 /// of markdown suitable for further inline parsing.

42 class BlockParser {	42 class BlockParser {

43 final List<String> lines;	43 final List<String> lines;

44	44

45 /// The markdown document this parser is parsing.	45 /// The markdown document this parser is parsing.

46 final Document document;	46 final Document document;

47	47

48 /// Index of the current line.	48 /// Index of the current line.

49 int pos;	49 int pos;

50	50

51 BlockParser(this.lines, this.document)	51 BlockParser(this.lines, this.document)

52 : pos = 0;	52 : pos = 0;

53	53

54 /// Gets the current line.	54 /// Gets the current line.

55 String get current() => lines[pos];	55 String get current() => lines[pos];

56	56

57 /// Gets the line after the current one or `null` if there is none.	57 /// Gets the line after the current one or `null` if there is none.

58 String get next() {	58 String get next() {

59 // Don't read past the end.	59 // Don't read past the end.

60 if (pos >= lines.length - 1) return null;	60 if (pos >= lines.length - 1) return null;

61 return lines[pos + 1];	61 return lines[pos + 1];

62 }	62 }

63	63

64 void advance() => pos++;	64 void advance() {

	65 pos++;

	66 }

	67

65 bool get isDone() => pos >= lines.length;	68 bool get isDone() => pos >= lines.length;

66	69

67 /// Gets whether or not the current line matches the given pattern.	70 /// Gets whether or not the current line matches the given pattern.

68 bool matches(RegExp regex) {	71 bool matches(RegExp regex) {

69 if (isDone) return false;	72 if (isDone) return false;

70 return regex.firstMatch(current) != null;	73 return regex.firstMatch(current) != null;

71 }	74 }

72	75

73 /// Gets whether or not the current line matches the given pattern.	76 /// Gets whether or not the current line matches the given pattern.

74 bool matchesNext(RegExp regex) {	77 bool matchesNext(RegExp regex) {

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
106 RegExp get pattern() => null;	109 RegExp get pattern() => null;

107	110

108 bool get canEndBlock() => true;	111 bool get canEndBlock() => true;

109	112

110 bool canParse(BlockParser parser) {	113 bool canParse(BlockParser parser) {

111 return pattern.firstMatch(parser.current) != null;	114 return pattern.firstMatch(parser.current) != null;

112 }	115 }

113	116

114 abstract Node parse(BlockParser parser);	117 abstract Node parse(BlockParser parser);

115	118

116 List<Node> parseChildLines(BlockParser parser) {	119 List<String> parseChildLines(BlockParser parser) {

117 // Grab all of the lines that form the blockquote, stripping off the ">".	120 // Grab all of the lines that form the blockquote, stripping off the ">".

118 final childLines = [];	121 final childLines = <String>[];

119	122

120 while (!parser.isDone) {	123 while (!parser.isDone) {

121 final match = pattern.firstMatch(parser.current);	124 final match = pattern.firstMatch(parser.current);

122 if (match == null) break;	125 if (match == null) break;

123 childLines.add(match.group(1));	126 childLines.add(match.group(1));

124 parser.advance();	127 parser.advance();

125 }	128 }

126	129

127 return childLines;	130 return childLines;

128 }	131 }

(...skipping 29 matching lines...) Expand all Loading...
158	161

159 final tag = (match.group(1)[0] == '=') ? 'h1' : 'h2';	162 final tag = (match.group(1)[0] == '=') ? 'h1' : 'h2';

160 final contents = parser.document.parseInline(parser.current);	163 final contents = parser.document.parseInline(parser.current);

161 parser.advance();	164 parser.advance();

162 parser.advance();	165 parser.advance();

163	166

164 return new Element(tag, contents);	167 return new Element(tag, contents);

165 }	168 }

166 }	169 }

167	170

168 /// Parses atx-style headers: "## Header ##".	171 /// Parses atx-style headers: `## Header ##`.

169 class HeaderSyntax extends BlockSyntax {	172 class HeaderSyntax extends BlockSyntax {

170 RegExp get pattern() => _RE_HEADER;	173 RegExp get pattern() => _RE_HEADER;

171	174

172 Node parse(BlockParser parser) {	175 Node parse(BlockParser parser) {

173 final match = pattern.firstMatch(parser.current);	176 final match = pattern.firstMatch(parser.current);

174 parser.advance();	177 parser.advance();

175 final level = match.group(1).length;	178 final level = match.group(1).length;

176 final contents = parser.document.parseInline(match.group(2).trim());	179 final contents = parser.document.parseInline(match.group(2).trim());

177 return new Element('h$level', contents);	180 return new Element('h$level', contents);

178 }	181 }

179 }	182 }

180	183

181 /// Parses email-style blockquotes: "> quote".	184 /// Parses email-style blockquotes: `> quote`.

182 class BlockquoteSyntax extends BlockSyntax {	185 class BlockquoteSyntax extends BlockSyntax {

183 RegExp get pattern() => _RE_BLOCKQUOTE;	186 RegExp get pattern() => _RE_BLOCKQUOTE;

184	187

185 Node parse(BlockParser parser) {	188 Node parse(BlockParser parser) {

186 final childLines = parseChildLines(parser);	189 final childLines = parseChildLines(parser);

187	190

188 // Recursively parse the contents of the blockquote.	191 // Recursively parse the contents of the blockquote.

189 final children = parser.document.parseLines(childLines);	192 final children = parser.document.parseLines(childLines);

190	193

191 return new Element('blockquote', children);	194 return new Element('blockquote', children);

(...skipping 10 matching lines...) Expand all Loading...
202 // The Markdown tests expect a trailing newline.	205 // The Markdown tests expect a trailing newline.

203 childLines.add('');	206 childLines.add('');

204	207

205 // Escape the code.	208 // Escape the code.

206 final escaped = escapeHtml(Strings.join(childLines, '\n'));	209 final escaped = escapeHtml(Strings.join(childLines, '\n'));

207	210

208 return new Element('pre', [new Element.text('code', escaped)]);	211 return new Element('pre', [new Element.text('code', escaped)]);

209 }	212 }

210 }	213 }

211	214

212 /// Parses horizontal rules like "---", "_ _ _", "* * *", etc.	215 /// Parses horizontal rules like `---`, `_ _ _`, `* * *`, etc.

213 class HorizontalRuleSyntax extends BlockSyntax {	216 class HorizontalRuleSyntax extends BlockSyntax {

214 RegExp get pattern() => _RE_HR;	217 RegExp get pattern() => _RE_HR;

215	218

216 Node parse(BlockParser parser) {	219 Node parse(BlockParser parser) {

217 final match = pattern.firstMatch(parser.current);	220 final match = pattern.firstMatch(parser.current);

218 parser.advance();	221 parser.advance();

219 return new Element.empty('hr');	222 return new Element.empty('hr');

220 }	223 }

221 }	224 }

222	225

223 /// Parses inline HTML at the block level. This differs from other markdown	226 /// Parses inline HTML at the block level. This differs from other markdown

224 /// implementations in several ways:	227 /// implementations in several ways:

225 ///	228 ///

226 /// 1. This one is way way WAY simpler.	229 /// 1. This one is way way WAY simpler.

227 /// 2. All HTML tags at the block level will be treated as blocks. If you start	230 /// 2. All HTML tags at the block level will be treated as blocks. If you

228 /// a paragraph with <em>, it will not wrap it in a <p> for you. As soon as	231 /// start a paragraph with `<em>`, it will not wrap it in a `<p>` for you.

229 /// it sees something like HTML, it stops mucking with it until it hits the	232 /// As soon as it sees something like HTML, it stops mucking with it until

230 /// next block.	233 /// it hits the next block.

231 /// 3. Absolutely no HTML parsing or validation is done. We're a markdown	234 /// 3. Absolutely no HTML parsing or validation is done. We're a markdown

232 /// parser not an HTML parser!	235 /// parser not an HTML parser!

233 class BlockHtmlSyntax extends BlockSyntax {	236 class BlockHtmlSyntax extends BlockSyntax {

234 RegExp get pattern() => _RE_HTML;	237 RegExp get pattern() => _RE_HTML;

235	238

236 bool get canEndBlock() => false;	239 bool get canEndBlock() => false;

237	240

238 Node parse(BlockParser parser) {	241 Node parse(BlockParser parser) {

239 final childLines = [];	242 final childLines = [];

240	243

(...skipping 183 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
424 while (!isAtBlockEnd(parser)) {	427 while (!isAtBlockEnd(parser)) {

425 childLines.add(parser.current);	428 childLines.add(parser.current);

426 parser.advance();	429 parser.advance();

427 }	430 }

428	431

429 final contents = parser.document.parseInline(	432 final contents = parser.document.parseInline(

430 Strings.join(childLines, '\n'));	433 Strings.join(childLines, '\n'));

431 return new Element('p', contents);	434 return new Element('p', contents);

432 }	435 }

433 }	436 }

OLD	NEW

« no previous file with comments | « utils/dartdoc/static/styles.css ('k') | utils/markdown/inline_parser.dart » ('j') | no next file with comments »