OLD | NEW |
1 /** | 1 /// This library has a parser for HTML5 documents, that lets you parse HTML |
2 * This library has a parser for HTML5 documents, that lets you parse HTML | 2 /// easily from a script or server side application: |
3 * easily from a script or server side application: | 3 /// |
4 * | 4 /// import 'package:html5lib/parser.dart' show parse; |
5 * import 'package:html5lib/parser.dart' show parse; | 5 /// import 'package:html5lib/dom.dart'; |
6 * import 'package:html5lib/dom.dart'; | 6 /// main() { |
7 * main() { | 7 /// var document = parse( |
8 * var document = parse( | 8 /// '<body>Hello world! <a href="www.html5rocks.com">HTML5 rocks!'); |
9 * '<body>Hello world! <a href="www.html5rocks.com">HTML5 rocks!'); | 9 /// print(document.outerHtml); |
10 * print(document.outerHtml); | 10 /// } |
11 * } | 11 /// |
12 * | 12 /// The resulting document you get back has a DOM-like API for easy tree |
13 * The resulting document you get back has a DOM-like API for easy tree | 13 /// traversal and manipulation. |
14 * traversal and manipulation. | |
15 */ | |
16 library parser; | 14 library parser; |
17 | 15 |
18 import 'dart:collection'; | 16 import 'dart:collection'; |
19 import 'dart:math'; | 17 import 'dart:math'; |
20 import 'package:source_maps/span.dart' show Span, FileSpan; | 18 import 'package:source_maps/span.dart' show Span, FileSpan; |
21 | 19 |
22 import 'src/treebuilder.dart'; | 20 import 'src/treebuilder.dart'; |
23 import 'src/constants.dart'; | 21 import 'src/constants.dart'; |
24 import 'src/encoding_parser.dart'; | 22 import 'src/encoding_parser.dart'; |
25 import 'src/token.dart'; | 23 import 'src/token.dart'; |
26 import 'src/tokenizer.dart'; | 24 import 'src/tokenizer.dart'; |
27 import 'src/utils.dart'; | 25 import 'src/utils.dart'; |
28 import 'dom.dart'; | 26 import 'dom.dart'; |
29 | 27 |
30 /** | 28 /// Parse the [input] html5 document into a tree. The [input] can be |
31 * Parse the [input] html5 document into a tree. The [input] can be | 29 /// a [String], [List<int>] of bytes or an [HtmlTokenizer]. |
32 * a [String], [List<int>] of bytes or an [HtmlTokenizer]. | 30 /// |
33 * | 31 /// If [input] is not a [HtmlTokenizer], you can optionally specify the file's |
34 * If [input] is not a [HtmlTokenizer], you can optionally specify the file's | 32 /// [encoding], which must be a string. If specified that encoding will be |
35 * [encoding], which must be a string. If specified, that encoding will be used, | 33 /// used regardless of any BOM or later declaration (such as in a meta element). |
36 * regardless of any BOM or later declaration (such as in a meta element). | 34 /// |
37 * | 35 /// Set [generateSpans] if you want to generate [Span]s, otherwise the |
38 * Set [generateSpans] if you want to generate [Span]s, otherwise the | 36 /// [Node.sourceSpan] property will be `null`. When using [generateSpans] you |
39 * [Node.sourceSpan] property will be `null`. When using [generateSpans] you can | 37 /// can additionally pass [sourceUrl] to indicate where the [input] was |
40 * additionally pass [sourceUrl] to indicate where the [input] was extracted | 38 /// extracted from. |
41 * from. | |
42 */ | |
43 Document parse(input, {String encoding, bool generateSpans: false, | 39 Document parse(input, {String encoding, bool generateSpans: false, |
44 String sourceUrl}) { | 40 String sourceUrl}) { |
45 var p = new HtmlParser(input, encoding: encoding, | 41 var p = new HtmlParser(input, encoding: encoding, |
46 generateSpans: generateSpans, sourceUrl: sourceUrl); | 42 generateSpans: generateSpans, sourceUrl: sourceUrl); |
47 return p.parse(); | 43 return p.parse(); |
48 } | 44 } |
49 | 45 |
50 | 46 |
51 /** | 47 /// Parse the [input] html5 document fragment into a tree. The [input] can be |
52 * Parse the [input] html5 document fragment into a tree. The [input] can be | 48 /// a [String], [List<int>] of bytes or an [HtmlTokenizer]. The [container] |
53 * a [String], [List<int>] of bytes or an [HtmlTokenizer]. The [container] | 49 /// element can optionally be specified, otherwise it defaults to "div". |
54 * element can optionally be specified, otherwise it defaults to "div". | 50 /// |
55 * | 51 /// If [input] is not a [HtmlTokenizer], you can optionally specify the file's |
56 * If [input] is not a [HtmlTokenizer], you can optionally specify the file's | 52 /// [encoding], which must be a string. If specified, that encoding will be used
, |
57 * [encoding], which must be a string. If specified, that encoding will be used, | 53 /// regardless of any BOM or later declaration (such as in a meta element). |
58 * regardless of any BOM or later declaration (such as in a meta element). | 54 /// |
59 * | 55 /// Set [generateSpans] if you want to generate [Span]s, otherwise the |
60 * Set [generateSpans] if you want to generate [Span]s, otherwise the | 56 /// [Node.sourceSpan] property will be `null`. When using [generateSpans] you ca
n |
61 * [Node.sourceSpan] property will be `null`. When using [generateSpans] you can | 57 /// additionally pass [sourceUrl] to indicate where the [input] was extracted |
62 * additionally pass [sourceUrl] to indicate where the [input] was extracted | 58 /// from. |
63 * from. | |
64 */ | |
65 DocumentFragment parseFragment(input, {String container: "div", | 59 DocumentFragment parseFragment(input, {String container: "div", |
66 String encoding, bool generateSpans: false, String sourceUrl}) { | 60 String encoding, bool generateSpans: false, String sourceUrl}) { |
67 var p = new HtmlParser(input, encoding: encoding, | 61 var p = new HtmlParser(input, encoding: encoding, |
68 generateSpans: generateSpans, sourceUrl: sourceUrl); | 62 generateSpans: generateSpans, sourceUrl: sourceUrl); |
69 return p.parseFragment(container); | 63 return p.parseFragment(container); |
70 } | 64 } |
71 | 65 |
72 | 66 |
73 /** | 67 /// Parser for HTML, which generates a tree structure from a stream of |
74 * Parser for HTML, which generates a tree structure from a stream of | 68 /// (possibly malformed) characters. |
75 * (possibly malformed) characters. | |
76 */ | |
77 class HtmlParser { | 69 class HtmlParser { |
78 /** Raise an exception on the first error encountered. */ | 70 /// Raise an exception on the first error encountered. |
79 final bool strict; | 71 final bool strict; |
80 | 72 |
81 /** True to generate [Span]s for the [Node.sourceSpan] property. */ | 73 /// True to generate [Span]s for the [Node.sourceSpan] property. |
82 final bool generateSpans; | 74 final bool generateSpans; |
83 | 75 |
84 final HtmlTokenizer tokenizer; | 76 final HtmlTokenizer tokenizer; |
85 | 77 |
86 final TreeBuilder tree; | 78 final TreeBuilder tree; |
87 | 79 |
88 final List<ParseError> errors = <ParseError>[]; | 80 final List<ParseError> errors = <ParseError>[]; |
89 | 81 |
90 String container; | 82 String container; |
91 | 83 |
92 bool firstStartTag = false; | 84 bool firstStartTag = false; |
93 | 85 |
94 // TODO(jmesserly): use enum? | 86 // TODO(jmesserly): use enum? |
95 /** "quirks" / "limited quirks" / "no quirks" */ | 87 /// "quirks" / "limited quirks" / "no quirks" |
96 String compatMode = "no quirks"; | 88 String compatMode = "no quirks"; |
97 | 89 |
98 /** innerHTML container when parsing document fragment. */ | 90 /// innerHTML container when parsing document fragment. |
99 String innerHTML; | 91 String innerHTML; |
100 | 92 |
101 Phase phase; | 93 Phase phase; |
102 | 94 |
103 Phase lastPhase; | 95 Phase lastPhase; |
104 | 96 |
105 Phase originalPhase; | 97 Phase originalPhase; |
106 | 98 |
107 Phase beforeRCDataPhase; | 99 Phase beforeRCDataPhase; |
108 | 100 |
(...skipping 17 matching lines...) Expand all Loading... |
126 InCellPhase _inCellPhase; | 118 InCellPhase _inCellPhase; |
127 InSelectPhase _inSelectPhase; | 119 InSelectPhase _inSelectPhase; |
128 InSelectInTablePhase _inSelectInTablePhase; | 120 InSelectInTablePhase _inSelectInTablePhase; |
129 InForeignContentPhase _inForeignContentPhase; | 121 InForeignContentPhase _inForeignContentPhase; |
130 AfterBodyPhase _afterBodyPhase; | 122 AfterBodyPhase _afterBodyPhase; |
131 InFramesetPhase _inFramesetPhase; | 123 InFramesetPhase _inFramesetPhase; |
132 AfterFramesetPhase _afterFramesetPhase; | 124 AfterFramesetPhase _afterFramesetPhase; |
133 AfterAfterBodyPhase _afterAfterBodyPhase; | 125 AfterAfterBodyPhase _afterAfterBodyPhase; |
134 AfterAfterFramesetPhase _afterAfterFramesetPhase; | 126 AfterAfterFramesetPhase _afterAfterFramesetPhase; |
135 | 127 |
136 /** | 128 /// Create an HtmlParser and configure the [tree] builder and [strict] mode. |
137 * Create a new HtmlParser and configure the [tree] builder and [strict] mode. | 129 /// The [input] can be a [String], [List<int>] of bytes or an [HtmlTokenizer]. |
138 * The [input] can be a [String], [List<int>] of bytes or an [HtmlTokenizer]. | 130 /// |
139 * | 131 /// If [input] is not a [HtmlTokenizer], you can specify a few more arguments. |
140 * If [input] is not a [HtmlTokenizer], you can specify a few more arguments. | 132 /// |
141 * | 133 /// The [encoding] must be a string that indicates the encoding. If specified, |
142 * The [encoding] must be a string that indicates the encoding. If specified, | 134 /// that encoding will be used, regardless of any BOM or later declaration |
143 * that encoding will be used, regardless of any BOM or later declaration | 135 /// (such as in a meta element). |
144 * (such as in a meta element). | 136 /// |
145 * | 137 /// Set [parseMeta] to false if you want to disable parsing the meta element. |
146 * Set [parseMeta] to false if you want to disable parsing the meta element. | 138 /// |
147 * | 139 /// Set [lowercaseElementName] or [lowercaseAttrName] to false to disable the |
148 * Set [lowercaseElementName] or [lowercaseAttrName] to false to disable the | 140 /// automatic conversion of element and attribute names to lower case. Note |
149 * automatic conversion of element and attribute names to lower case. Note | 141 /// that standard way to parse HTML is to lowercase, which is what the browser |
150 * that standard way to parse HTML is to lowercase, which is what the browser | 142 /// DOM will do if you request [Node.outerHTML], for example. |
151 * DOM will do if you request [Node.outerHTML], for example. | |
152 */ | |
153 HtmlParser(input, {String encoding, bool parseMeta: true, | 143 HtmlParser(input, {String encoding, bool parseMeta: true, |
154 bool lowercaseElementName: true, bool lowercaseAttrName: true, | 144 bool lowercaseElementName: true, bool lowercaseAttrName: true, |
155 this.strict: false, bool generateSpans: false, String sourceUrl, | 145 this.strict: false, bool generateSpans: false, String sourceUrl, |
156 TreeBuilder tree}) | 146 TreeBuilder tree}) |
157 : generateSpans = generateSpans, | 147 : generateSpans = generateSpans, |
158 tree = tree != null ? tree : new TreeBuilder(true), | 148 tree = tree != null ? tree : new TreeBuilder(true), |
159 tokenizer = (input is HtmlTokenizer ? input : | 149 tokenizer = (input is HtmlTokenizer ? input : |
160 new HtmlTokenizer(input, encoding: encoding, parseMeta: parseMeta, | 150 new HtmlTokenizer(input, encoding: encoding, parseMeta: parseMeta, |
161 lowercaseElementName: lowercaseElementName, | 151 lowercaseElementName: lowercaseElementName, |
162 lowercaseAttrName: lowercaseAttrName, | 152 lowercaseAttrName: lowercaseAttrName, |
(...skipping 24 matching lines...) Expand all Loading... |
187 _inForeignContentPhase = new InForeignContentPhase(this); | 177 _inForeignContentPhase = new InForeignContentPhase(this); |
188 _afterBodyPhase = new AfterBodyPhase(this); | 178 _afterBodyPhase = new AfterBodyPhase(this); |
189 _inFramesetPhase = new InFramesetPhase(this); | 179 _inFramesetPhase = new InFramesetPhase(this); |
190 _afterFramesetPhase = new AfterFramesetPhase(this); | 180 _afterFramesetPhase = new AfterFramesetPhase(this); |
191 _afterAfterBodyPhase = new AfterAfterBodyPhase(this); | 181 _afterAfterBodyPhase = new AfterAfterBodyPhase(this); |
192 _afterAfterFramesetPhase = new AfterAfterFramesetPhase(this); | 182 _afterAfterFramesetPhase = new AfterAfterFramesetPhase(this); |
193 } | 183 } |
194 | 184 |
195 bool get innerHTMLMode => innerHTML != null; | 185 bool get innerHTMLMode => innerHTML != null; |
196 | 186 |
197 /** | 187 /// Parse an html5 document into a tree. |
198 * Parse an html5 document into a tree. | 188 /// After parsing, [errors] will be populated with parse errors, if any. |
199 * After parsing, [errors] will be populated with parse errors, if any. | |
200 */ | |
201 Document parse() { | 189 Document parse() { |
202 innerHTML = null; | 190 innerHTML = null; |
203 _parse(); | 191 _parse(); |
204 return tree.getDocument(); | 192 return tree.getDocument(); |
205 } | 193 } |
206 | 194 |
207 /** | 195 /// Parse an html5 document fragment into a tree. |
208 * Parse an html5 document fragment into a tree. | 196 /// Pass a [container] to change the type of the containing element. |
209 * Pass a [container] to change the type of the containing element. | 197 /// After parsing, [errors] will be populated with parse errors, if any. |
210 * After parsing, [errors] will be populated with parse errors, if any. | |
211 */ | |
212 DocumentFragment parseFragment([String container = "div"]) { | 198 DocumentFragment parseFragment([String container = "div"]) { |
213 if (container == null) throw new ArgumentError('container'); | 199 if (container == null) throw new ArgumentError('container'); |
214 innerHTML = container.toLowerCase(); | 200 innerHTML = container.toLowerCase(); |
215 _parse(); | 201 _parse(); |
216 return tree.getFragment(); | 202 return tree.getFragment(); |
217 } | 203 } |
218 | 204 |
219 void _parse() { | 205 void _parse() { |
220 reset(); | 206 reset(); |
221 | 207 |
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
368 var reprocessPhases = []; | 354 var reprocessPhases = []; |
369 while (reprocess) { | 355 while (reprocess) { |
370 reprocessPhases.add(phase); | 356 reprocessPhases.add(phase); |
371 reprocess = phase.processEOF(); | 357 reprocess = phase.processEOF(); |
372 if (reprocess) { | 358 if (reprocess) { |
373 assert(!reprocessPhases.contains(phase)); | 359 assert(!reprocessPhases.contains(phase)); |
374 } | 360 } |
375 } | 361 } |
376 } | 362 } |
377 | 363 |
378 /** | 364 /// The last span available. Used for EOF errors if we don't have something |
379 * The last span available. Used for EOF errors if we don't have something | 365 /// better. |
380 * better. | |
381 */ | |
382 Span get _lastSpan { | 366 Span get _lastSpan { |
383 var pos = tokenizer.stream.position; | 367 var pos = tokenizer.stream.position; |
384 return new FileSpan(tokenizer.stream.fileInfo, pos, pos); | 368 return new FileSpan(tokenizer.stream.fileInfo, pos, pos); |
385 } | 369 } |
386 | 370 |
387 void parseError(Span span, String errorcode, | 371 void parseError(Span span, String errorcode, |
388 [Map datavars = const {}]) { | 372 [Map datavars = const {}]) { |
389 | 373 |
390 if (!generateSpans && span == null) { | 374 if (!generateSpans && span == null) { |
391 span = _lastSpan; | 375 span = _lastSpan; |
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
537 case "table": phase = _inTablePhase; return; | 521 case "table": phase = _inTablePhase; return; |
538 case "head": phase = _inBodyPhase; return; | 522 case "head": phase = _inBodyPhase; return; |
539 case "body": phase = _inBodyPhase; return; | 523 case "body": phase = _inBodyPhase; return; |
540 case "frameset": phase = _inFramesetPhase; return; | 524 case "frameset": phase = _inFramesetPhase; return; |
541 case "html": phase = _beforeHeadPhase; return; | 525 case "html": phase = _beforeHeadPhase; return; |
542 } | 526 } |
543 } | 527 } |
544 phase = _inBodyPhase; | 528 phase = _inBodyPhase; |
545 } | 529 } |
546 | 530 |
547 /** | 531 /// Generic RCDATA/RAWTEXT Parsing algorithm |
548 * Generic RCDATA/RAWTEXT Parsing algorithm | 532 /// [contentType] - RCDATA or RAWTEXT |
549 * [contentType] - RCDATA or RAWTEXT | |
550 */ | |
551 void parseRCDataRawtext(Token token, String contentType) { | 533 void parseRCDataRawtext(Token token, String contentType) { |
552 assert(contentType == "RAWTEXT" || contentType == "RCDATA"); | 534 assert(contentType == "RAWTEXT" || contentType == "RCDATA"); |
553 | 535 |
554 var element = tree.insertElement(token); | 536 var element = tree.insertElement(token); |
555 | 537 |
556 if (contentType == "RAWTEXT") { | 538 if (contentType == "RAWTEXT") { |
557 tokenizer.state = tokenizer.rawtextState; | 539 tokenizer.state = tokenizer.rawtextState; |
558 } else { | 540 } else { |
559 tokenizer.state = tokenizer.rcdataState; | 541 tokenizer.state = tokenizer.rcdataState; |
560 } | 542 } |
561 | 543 |
562 originalPhase = phase; | 544 originalPhase = phase; |
563 phase = _textPhase; | 545 phase = _textPhase; |
564 } | 546 } |
565 } | 547 } |
566 | 548 |
567 | 549 |
568 /** Base class for helper object that implements each phase of processing. */ | 550 /// Base class for helper object that implements each phase of processing. |
569 class Phase { | 551 class Phase { |
570 // Order should be (they can be omitted): | 552 // Order should be (they can be omitted): |
571 // * EOF | 553 // * EOF |
572 // * Comment | 554 // * Comment |
573 // * Doctype | 555 // * Doctype |
574 // * SpaceCharacters | 556 // * SpaceCharacters |
575 // * Characters | 557 // * Characters |
576 // * StartTag | 558 // * StartTag |
577 // - startTag* methods | 559 // - startTag* methods |
578 // * EndTag | 560 // * EndTag |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
624 tree.openElements[0].attributes.putIfAbsent(attr, () => value); | 606 tree.openElements[0].attributes.putIfAbsent(attr, () => value); |
625 }); | 607 }); |
626 parser.firstStartTag = false; | 608 parser.firstStartTag = false; |
627 return null; | 609 return null; |
628 } | 610 } |
629 | 611 |
630 Token processEndTag(EndTagToken token) { | 612 Token processEndTag(EndTagToken token) { |
631 throw new UnimplementedError(); | 613 throw new UnimplementedError(); |
632 } | 614 } |
633 | 615 |
634 /** Helper method for popping openElements. */ | 616 /// Helper method for popping openElements. |
635 void popOpenElementsUntil(String name) { | 617 void popOpenElementsUntil(String name) { |
636 var node = tree.openElements.removeLast(); | 618 var node = tree.openElements.removeLast(); |
637 while (node.tagName != name) { | 619 while (node.tagName != name) { |
638 node = tree.openElements.removeLast(); | 620 node = tree.openElements.removeLast(); |
639 } | 621 } |
640 } | 622 } |
641 } | 623 } |
642 | 624 |
643 class InitialPhase extends Phase { | 625 class InitialPhase extends Phase { |
644 InitialPhase(parser) : super(parser); | 626 InitialPhase(parser) : super(parser); |
(...skipping 916 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1561 parser.tokenizer.state = parser.tokenizer.rcdataState; | 1543 parser.tokenizer.state = parser.tokenizer.rcdataState; |
1562 dropNewline = true; | 1544 dropNewline = true; |
1563 parser.framesetOK = false; | 1545 parser.framesetOK = false; |
1564 } | 1546 } |
1565 | 1547 |
1566 void startTagIFrame(StartTagToken token) { | 1548 void startTagIFrame(StartTagToken token) { |
1567 parser.framesetOK = false; | 1549 parser.framesetOK = false; |
1568 startTagRawtext(token); | 1550 startTagRawtext(token); |
1569 } | 1551 } |
1570 | 1552 |
1571 /** iframe, noembed noframes, noscript(if scripting enabled). */ | 1553 /// iframe, noembed noframes, noscript(if scripting enabled). |
1572 void startTagRawtext(StartTagToken token) { | 1554 void startTagRawtext(StartTagToken token) { |
1573 parser.parseRCDataRawtext(token, "RAWTEXT"); | 1555 parser.parseRCDataRawtext(token, "RAWTEXT"); |
1574 } | 1556 } |
1575 | 1557 |
1576 void startTagOpt(StartTagToken token) { | 1558 void startTagOpt(StartTagToken token) { |
1577 if (tree.openElements.last.tagName == "option") { | 1559 if (tree.openElements.last.tagName == "option") { |
1578 parser.phase.processEndTag(new EndTagToken("option")); | 1560 parser.phase.processEndTag(new EndTagToken("option")); |
1579 } | 1561 } |
1580 tree.reconstructActiveFormattingElements(); | 1562 tree.reconstructActiveFormattingElements(); |
1581 parser.tree.insertElement(token); | 1563 parser.tree.insertElement(token); |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1630 token.namespace = Namespaces.svg; | 1612 token.namespace = Namespaces.svg; |
1631 tree.insertElement(token); | 1613 tree.insertElement(token); |
1632 //Need to get the parse error right for the case where the token | 1614 //Need to get the parse error right for the case where the token |
1633 //has a namespace not equal to the xmlns attribute | 1615 //has a namespace not equal to the xmlns attribute |
1634 if (token.selfClosing) { | 1616 if (token.selfClosing) { |
1635 tree.openElements.removeLast(); | 1617 tree.openElements.removeLast(); |
1636 token.selfClosingAcknowledged = true; | 1618 token.selfClosingAcknowledged = true; |
1637 } | 1619 } |
1638 } | 1620 } |
1639 | 1621 |
1640 /** | 1622 /// Elements that should be children of other elements that have a |
1641 * Elements that should be children of other elements that have a | 1623 /// different insertion mode; here they are ignored |
1642 * different insertion mode; here they are ignored | 1624 /// "caption", "col", "colgroup", "frame", "frameset", "head", |
1643 * "caption", "col", "colgroup", "frame", "frameset", "head", | 1625 /// "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", |
1644 * "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", | 1626 /// "tr", "noscript" |
1645 * "tr", "noscript" | |
1646 */ | |
1647 void startTagMisplaced(StartTagToken token) { | 1627 void startTagMisplaced(StartTagToken token) { |
1648 parser.parseError(token.span, "unexpected-start-tag-ignored", | 1628 parser.parseError(token.span, "unexpected-start-tag-ignored", |
1649 {"name": token.name}); | 1629 {"name": token.name}); |
1650 } | 1630 } |
1651 | 1631 |
1652 Token startTagOther(StartTagToken token) { | 1632 Token startTagOther(StartTagToken token) { |
1653 tree.reconstructActiveFormattingElements(); | 1633 tree.reconstructActiveFormattingElements(); |
1654 tree.insertElement(token); | 1634 tree.insertElement(token); |
1655 return null; | 1635 return null; |
1656 } | 1636 } |
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1763 if (tree.elementInScope(item)) { | 1743 if (tree.elementInScope(item)) { |
1764 item = tree.openElements.removeLast(); | 1744 item = tree.openElements.removeLast(); |
1765 while (!headingElements.contains(item.tagName)) { | 1745 while (!headingElements.contains(item.tagName)) { |
1766 item = tree.openElements.removeLast(); | 1746 item = tree.openElements.removeLast(); |
1767 } | 1747 } |
1768 break; | 1748 break; |
1769 } | 1749 } |
1770 } | 1750 } |
1771 } | 1751 } |
1772 | 1752 |
1773 /** The much-feared adoption agency algorithm. */ | 1753 /// The much-feared adoption agency algorithm. |
1774 endTagFormatting(EndTagToken token) { | 1754 endTagFormatting(EndTagToken token) { |
1775 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construc
tion.html#adoptionAgency | 1755 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construc
tion.html#adoptionAgency |
1776 // TODO(jmesserly): the comments here don't match the numbered steps in the | 1756 // TODO(jmesserly): the comments here don't match the numbered steps in the |
1777 // updated spec. This needs a pass over it to verify that it still matches. | 1757 // updated spec. This needs a pass over it to verify that it still matches. |
1778 // In particular the html5lib Python code skiped "step 4", I'm not sure why. | 1758 // In particular the html5lib Python code skiped "step 4", I'm not sure why. |
1779 // XXX Better parseError messages appreciated. | 1759 // XXX Better parseError messages appreciated. |
1780 int outerLoopCounter = 0; | 1760 int outerLoopCounter = 0; |
1781 while (outerLoopCounter < 8) { | 1761 while (outerLoopCounter < 8) { |
1782 outerLoopCounter += 1; | 1762 outerLoopCounter += 1; |
1783 | 1763 |
(...skipping 1559 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3343 } | 3323 } |
3344 | 3324 |
3345 Token processEndTag(EndTagToken token) { | 3325 Token processEndTag(EndTagToken token) { |
3346 parser.parseError(token.span, "expected-eof-but-got-end-tag", | 3326 parser.parseError(token.span, "expected-eof-but-got-end-tag", |
3347 {"name": token.name}); | 3327 {"name": token.name}); |
3348 return null; | 3328 return null; |
3349 } | 3329 } |
3350 } | 3330 } |
3351 | 3331 |
3352 | 3332 |
3353 /** Error in parsed document. */ | 3333 /// Error in parsed document. |
3354 class ParseError implements Exception { | 3334 class ParseError implements Exception { |
3355 final String errorCode; | 3335 final String errorCode; |
3356 final Span span; | 3336 final Span span; |
3357 final Map data; | 3337 final Map data; |
3358 | 3338 |
3359 ParseError(this.errorCode, this.span, this.data); | 3339 ParseError(this.errorCode, this.span, this.data); |
3360 | 3340 |
3361 int get line => span.start.line; | 3341 int get line => span.start.line; |
3362 | 3342 |
3363 int get column => span.start.column; | 3343 int get column => span.start.column; |
3364 | 3344 |
3365 /** | 3345 /// Gets the human readable error message for this error. Use |
3366 * Gets the human readable error message for this error. Use | 3346 /// [span.getLocationMessage] or [toString] to get a message including span |
3367 * [span.getLocationMessage] or [toString] to get a message including span | 3347 /// information. If there is a file associated with the span, both |
3368 * information. If there is a file associated with the span, both | 3348 /// [span.getLocationMessage] and [toString] are equivalent. Otherwise, |
3369 * [span.getLocationMessage] and [toString] are equivalent. Otherwise, | 3349 /// [span.getLocationMessage] will not show any source url information, but |
3370 * [span.getLocationMessage] will not show any source url information, but | 3350 /// [toString] will include 'ParserError:' as a prefix. |
3371 * [toString] will include 'ParserError:' as a prefix. | |
3372 */ | |
3373 String get message => formatStr(errorMessages[errorCode], data); | 3351 String get message => formatStr(errorMessages[errorCode], data); |
3374 | 3352 |
3375 String toString() { | 3353 String toString() { |
3376 var res = span.getLocationMessage(message); | 3354 var res = span.getLocationMessage(message); |
3377 return span.sourceUrl == null ? 'ParserError$res' : res; | 3355 return span.sourceUrl == null ? 'ParserError$res' : res; |
3378 } | 3356 } |
3379 } | 3357 } |
OLD | NEW |