Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(217)

Side by Side Diff: pkg/third_party/html5lib/lib/parser.dart

Issue 22375011: move html5lib code into dart svn repo (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: change location of html5lib to pkg/third_party/html5lib Created 7 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /**
2 * This library has a parser for HTML5 documents, that lets you parse HTML
3 * easily from a script or server side application:
4 *
5 * import 'package:html5lib/parser.dart' show parse;
6 * import 'package:html5lib/dom.dart';
7 * main() {
8 * var document = parse(
9 * '<body>Hello world! <a href="www.html5rocks.com">HTML5 rocks!');
10 * print(document.outerHtml);
11 * }
12 *
13 * The resulting document you get back has a DOM-like API for easy tree
14 * traversal and manipulation.
15 */
16 library parser;
17
18 import 'dart:collection';
19 import 'dart:math';
20 import 'package:source_maps/span.dart' show Span, FileSpan;
21
22 import 'src/treebuilder.dart';
23 import 'src/constants.dart';
24 import 'src/encoding_parser.dart';
25 import 'src/token.dart';
26 import 'src/tokenizer.dart';
27 import 'src/utils.dart';
28 import 'dom.dart';
29 import 'dom_parsing.dart';
30
31 /**
32 * Parse the [input] html5 document into a tree. The [input] can be
33 * a [String], [List<int>] of bytes or an [HtmlTokenizer].
34 *
35 * If [input] is not a [HtmlTokenizer], you can optionally specify the file's
36 * [encoding], which must be a string. If specified, that encoding will be used,
37 * regardless of any BOM or later declaration (such as in a meta element).
38 *
39 * Set [generateSpans] if you want to generate [Span]s, otherwise the
40 * [Node.sourceSpan] property will be `null`. When using [generateSpans] you can
41 * additionally pass [sourceUrl] to indicate where the [input] was extracted
42 * from.
43 */
44 Document parse(input, {String encoding, bool generateSpans: false,
45 String sourceUrl}) {
46 var p = new HtmlParser(input, encoding: encoding,
47 generateSpans: generateSpans, sourceUrl: sourceUrl);
48 return p.parse();
49 }
50
51
52 /**
53 * Parse the [input] html5 document fragment into a tree. The [input] can be
54 * a [String], [List<int>] of bytes or an [HtmlTokenizer]. The [container]
55 * element can optionally be specified, otherwise it defaults to "div".
56 *
57 * If [input] is not a [HtmlTokenizer], you can optionally specify the file's
58 * [encoding], which must be a string. If specified, that encoding will be used,
59 * regardless of any BOM or later declaration (such as in a meta element).
60 *
61 * Set [generateSpans] if you want to generate [Span]s, otherwise the
62 * [Node.sourceSpan] property will be `null`. When using [generateSpans] you can
63 * additionally pass [sourceUrl] to indicate where the [input] was extracted
64 * from.
65 */
66 DocumentFragment parseFragment(input, {String container: "div",
67 String encoding, bool generateSpans: false, String sourceUrl}) {
68 var p = new HtmlParser(input, encoding: encoding,
69 generateSpans: generateSpans, sourceUrl: sourceUrl);
70 return p.parseFragment(container);
71 }
72
73
74 /**
75 * Parser for HTML, which generates a tree structure from a stream of
76 * (possibly malformed) characters.
77 */
78 class HtmlParser {
79 /** Raise an exception on the first error encountered. */
80 final bool strict;
81
82 /** True to generate [Span]s for the [Node.sourceSpan] property. */
83 final bool generateSpans;
84
85 final HtmlTokenizer tokenizer;
86
87 final TreeBuilder tree;
88
89 final List<ParseError> errors = <ParseError>[];
90
91 String container;
92
93 bool firstStartTag = false;
94
95 // TODO(jmesserly): use enum?
96 /** "quirks" / "limited quirks" / "no quirks" */
97 String compatMode = "no quirks";
98
99 /** innerHTML container when parsing document fragment. */
100 String innerHTML;
101
102 Phase phase;
103
104 Phase lastPhase;
105
106 Phase originalPhase;
107
108 Phase beforeRCDataPhase;
109
110 bool framesetOK;
111
112 // These fields hold the different phase singletons. At any given time one
113 // of them will be active.
114 InitialPhase _initialPhase;
115 BeforeHtmlPhase _beforeHtmlPhase;
116 BeforeHeadPhase _beforeHeadPhase;
117 InHeadPhase _inHeadPhase;
118 AfterHeadPhase _afterHeadPhase;
119 InBodyPhase _inBodyPhase;
120 TextPhase _textPhase;
121 InTablePhase _inTablePhase;
122 InTableTextPhase _inTableTextPhase;
123 InCaptionPhase _inCaptionPhase;
124 InColumnGroupPhase _inColumnGroupPhase;
125 InTableBodyPhase _inTableBodyPhase;
126 InRowPhase _inRowPhase;
127 InCellPhase _inCellPhase;
128 InSelectPhase _inSelectPhase;
129 InSelectInTablePhase _inSelectInTablePhase;
130 InForeignContentPhase _inForeignContentPhase;
131 AfterBodyPhase _afterBodyPhase;
132 InFramesetPhase _inFramesetPhase;
133 AfterFramesetPhase _afterFramesetPhase;
134 AfterAfterBodyPhase _afterAfterBodyPhase;
135 AfterAfterFramesetPhase _afterAfterFramesetPhase;
136
137 /**
138 * Create a new HtmlParser and configure the [tree] builder and [strict] mode.
139 * The [input] can be a [String], [List<int>] of bytes or an [HtmlTokenizer].
140 *
141 * If [input] is not a [HtmlTokenizer], you can specify a few more arguments.
142 *
143 * The [encoding] must be a string that indicates the encoding. If specified,
144 * that encoding will be used, regardless of any BOM or later declaration
145 * (such as in a meta element).
146 *
147 * Set [parseMeta] to false if you want to disable parsing the meta element.
148 *
149 * Set [lowercaseElementName] or [lowercaseAttrName] to false to disable the
150 * automatic conversion of element and attribute names to lower case. Note
151 * that standard way to parse HTML is to lowercase, which is what the browser
152 * DOM will do if you request [Node.outerHTML], for example.
153 */
154 HtmlParser(input, {String encoding, bool parseMeta: true,
155 bool lowercaseElementName: true, bool lowercaseAttrName: true,
156 this.strict: false, bool generateSpans: false, String sourceUrl,
157 TreeBuilder tree})
158 : generateSpans = generateSpans,
159 tree = tree != null ? tree : new TreeBuilder(true),
160 tokenizer = (input is HtmlTokenizer ? input :
161 new HtmlTokenizer(input, encoding: encoding, parseMeta: parseMeta,
162 lowercaseElementName: lowercaseElementName,
163 lowercaseAttrName: lowercaseAttrName,
164 generateSpans: generateSpans, sourceUrl: sourceUrl)) {
165
166 tokenizer.parser = this;
167 _initialPhase = new InitialPhase(this);
168 _beforeHtmlPhase = new BeforeHtmlPhase(this);
169 _beforeHeadPhase = new BeforeHeadPhase(this);
170 _inHeadPhase = new InHeadPhase(this);
171 // TODO(jmesserly): html5lib did not implement the no script parsing mode
172 // More information here:
173 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html# scripting-flag
174 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construc tion.html#parsing-main-inheadnoscript
175 // "inHeadNoscript": new InHeadNoScriptPhase(this);
176 _afterHeadPhase = new AfterHeadPhase(this);
177 _inBodyPhase = new InBodyPhase(this);
178 _textPhase = new TextPhase(this);
179 _inTablePhase = new InTablePhase(this);
180 _inTableTextPhase = new InTableTextPhase(this);
181 _inCaptionPhase = new InCaptionPhase(this);
182 _inColumnGroupPhase = new InColumnGroupPhase(this);
183 _inTableBodyPhase = new InTableBodyPhase(this);
184 _inRowPhase = new InRowPhase(this);
185 _inCellPhase = new InCellPhase(this);
186 _inSelectPhase = new InSelectPhase(this);
187 _inSelectInTablePhase = new InSelectInTablePhase(this);
188 _inForeignContentPhase = new InForeignContentPhase(this);
189 _afterBodyPhase = new AfterBodyPhase(this);
190 _inFramesetPhase = new InFramesetPhase(this);
191 _afterFramesetPhase = new AfterFramesetPhase(this);
192 _afterAfterBodyPhase = new AfterAfterBodyPhase(this);
193 _afterAfterFramesetPhase = new AfterAfterFramesetPhase(this);
194 }
195
196 bool get innerHTMLMode => innerHTML != null;
197
198 /**
199 * Parse an html5 document into a tree.
200 * After parsing, [errors] will be populated with parse errors, if any.
201 */
202 Document parse() {
203 innerHTML = null;
204 _parse();
205 return tree.getDocument();
206 }
207
208 /**
209 * Parse an html5 document fragment into a tree.
210 * Pass a [container] to change the type of the containing element.
211 * After parsing, [errors] will be populated with parse errors, if any.
212 */
213 DocumentFragment parseFragment([String container = "div"]) {
214 if (container == null) throw new ArgumentError('container');
215 innerHTML = container.toLowerCase();
216 _parse();
217 return tree.getFragment();
218 }
219
220 void _parse() {
221 reset();
222
223 while (true) {
224 try {
225 mainLoop();
226 break;
227 } on ReparseException catch (e) {
228 // Note: this happens if we start parsing but the character encoding
229 // changes. So we should only need to restart very early in the parse.
230 reset();
231 }
232 }
233 }
234
235 void reset() {
236 tokenizer.reset();
237
238 tree.reset();
239 firstStartTag = false;
240 errors.clear();
241 // "quirks" / "limited quirks" / "no quirks"
242 compatMode = "no quirks";
243
244 if (innerHTMLMode) {
245 if (cdataElements.contains(innerHTML)) {
246 tokenizer.state = tokenizer.rcdataState;
247 } else if (rcdataElements.contains(innerHTML)) {
248 tokenizer.state = tokenizer.rawtextState;
249 } else if (innerHTML == 'plaintext') {
250 tokenizer.state = tokenizer.plaintextState;
251 } else {
252 // state already is data state
253 // tokenizer.state = tokenizer.dataState;
254 }
255 phase = _beforeHtmlPhase;
256 _beforeHtmlPhase.insertHtmlElement();
257 resetInsertionMode();
258 } else {
259 phase = _initialPhase;
260 }
261
262 lastPhase = null;
263 beforeRCDataPhase = null;
264 framesetOK = true;
265 }
266
267 bool isHTMLIntegrationPoint(Node element) {
268 if (element.tagName == "annotation-xml" &&
269 element.namespace == Namespaces.mathml) {
270 var enc = element.attributes["encoding"];
271 if (enc != null) enc = asciiUpper2Lower(enc);
272 return enc == "text/html" || enc == "application/xhtml+xml";
273 } else {
274 return htmlIntegrationPointElements.contains(
275 new Pair(element.namespace, element.tagName));
276 }
277 }
278
279 bool isMathMLTextIntegrationPoint(Node element) {
280 return mathmlTextIntegrationPointElements.contains(
281 new Pair(element.namespace, element.tagName));
282 }
283
284 bool inForeignContent(Token token, int type) {
285 if (tree.openElements.length == 0) return false;
286
287 var node = tree.openElements.last;
288 if (node.namespace == tree.defaultNamespace) return false;
289
290 if (isMathMLTextIntegrationPoint(node)) {
291 if (type == TokenKind.startTag &&
292 (token as StartTagToken).name != "mglyph" &&
293 (token as StartTagToken).name != "malignmark") {
294 return false;
295 }
296 if (type == TokenKind.characters || type == TokenKind.spaceCharacters) {
297 return false;
298 }
299 }
300
301 if (node.tagName == "annotation-xml" && type == TokenKind.startTag &&
302 (token as StartTagToken).name == "svg") {
303 return false;
304 }
305
306 if (isHTMLIntegrationPoint(node)) {
307 if (type == TokenKind.startTag ||
308 type == TokenKind.characters ||
309 type == TokenKind.spaceCharacters) {
310 return false;
311 }
312 }
313
314 return true;
315 }
316
317 void mainLoop() {
318 while (tokenizer.moveNext()) {
319 var token = tokenizer.current;
320 var newToken = token;
321 int type;
322 while (newToken != null) {
323 type = newToken.kind;
324
325 // Note: avoid "is" test here, see http://dartbug.com/4795
326 if (type == TokenKind.parseError) {
327 ParseErrorToken error = newToken;
328 parseError(error.span, error.data, error.messageParams);
329 newToken = null;
330 } else {
331 Phase phase_ = phase;
332 if (inForeignContent(token, type)) {
333 phase_ = _inForeignContentPhase;
334 }
335
336 switch (type) {
337 case TokenKind.characters:
338 newToken = phase_.processCharacters(newToken);
339 break;
340 case TokenKind.spaceCharacters:
341 newToken = phase_.processSpaceCharacters(newToken);
342 break;
343 case TokenKind.startTag:
344 newToken = phase_.processStartTag(newToken);
345 break;
346 case TokenKind.endTag:
347 newToken = phase_.processEndTag(newToken);
348 break;
349 case TokenKind.comment:
350 newToken = phase_.processComment(newToken);
351 break;
352 case TokenKind.doctype:
353 newToken = phase_.processDoctype(newToken);
354 break;
355 }
356 }
357 }
358
359 if (token is StartTagToken) {
360 if (token.selfClosing && !token.selfClosingAcknowledged) {
361 parseError(token.span, "non-void-element-with-trailing-solidus",
362 {"name": token.name});
363 }
364 }
365 }
366
367 // When the loop finishes it's EOF
368 var reprocess = true;
369 var reprocessPhases = [];
370 while (reprocess) {
371 reprocessPhases.add(phase);
372 reprocess = phase.processEOF();
373 if (reprocess) {
374 assert(!reprocessPhases.contains(phase));
375 }
376 }
377 }
378
379 /**
380 * The last span available. Used for EOF errors if we don't have something
381 * better.
382 */
383 Span get _lastSpan {
384 var pos = tokenizer.stream.position;
385 return new FileSpan(tokenizer.stream.fileInfo, pos, pos);
386 }
387
388 void parseError(Span span, String errorcode,
389 [Map datavars = const {}]) {
390
391 if (!generateSpans && span == null) {
392 span = _lastSpan;
393 }
394
395 var err = new ParseError(errorcode, span, datavars);
396 errors.add(err);
397 if (strict) throw err;
398 }
399
400 void adjustMathMLAttributes(StartTagToken token) {
401 var orig = token.data.remove("definitionurl");
402 if (orig != null) {
403 token.data["definitionURL"] = orig;
404 }
405 }
406
407 void adjustSVGAttributes(StartTagToken token) {
408 final replacements = const {
409 "attributename":"attributeName",
410 "attributetype":"attributeType",
411 "basefrequency":"baseFrequency",
412 "baseprofile":"baseProfile",
413 "calcmode":"calcMode",
414 "clippathunits":"clipPathUnits",
415 "contentscripttype":"contentScriptType",
416 "contentstyletype":"contentStyleType",
417 "diffuseconstant":"diffuseConstant",
418 "edgemode":"edgeMode",
419 "externalresourcesrequired":"externalResourcesRequired",
420 "filterres":"filterRes",
421 "filterunits":"filterUnits",
422 "glyphref":"glyphRef",
423 "gradienttransform":"gradientTransform",
424 "gradientunits":"gradientUnits",
425 "kernelmatrix":"kernelMatrix",
426 "kernelunitlength":"kernelUnitLength",
427 "keypoints":"keyPoints",
428 "keysplines":"keySplines",
429 "keytimes":"keyTimes",
430 "lengthadjust":"lengthAdjust",
431 "limitingconeangle":"limitingConeAngle",
432 "markerheight":"markerHeight",
433 "markerunits":"markerUnits",
434 "markerwidth":"markerWidth",
435 "maskcontentunits":"maskContentUnits",
436 "maskunits":"maskUnits",
437 "numoctaves":"numOctaves",
438 "pathlength":"pathLength",
439 "patterncontentunits":"patternContentUnits",
440 "patterntransform":"patternTransform",
441 "patternunits":"patternUnits",
442 "pointsatx":"pointsAtX",
443 "pointsaty":"pointsAtY",
444 "pointsatz":"pointsAtZ",
445 "preservealpha":"preserveAlpha",
446 "preserveaspectratio":"preserveAspectRatio",
447 "primitiveunits":"primitiveUnits",
448 "refx":"refX",
449 "refy":"refY",
450 "repeatcount":"repeatCount",
451 "repeatdur":"repeatDur",
452 "requiredextensions":"requiredExtensions",
453 "requiredfeatures":"requiredFeatures",
454 "specularconstant":"specularConstant",
455 "specularexponent":"specularExponent",
456 "spreadmethod":"spreadMethod",
457 "startoffset":"startOffset",
458 "stddeviation":"stdDeviation",
459 "stitchtiles":"stitchTiles",
460 "surfacescale":"surfaceScale",
461 "systemlanguage":"systemLanguage",
462 "tablevalues":"tableValues",
463 "targetx":"targetX",
464 "targety":"targetY",
465 "textlength":"textLength",
466 "viewbox":"viewBox",
467 "viewtarget":"viewTarget",
468 "xchannelselector":"xChannelSelector",
469 "ychannelselector":"yChannelSelector",
470 "zoomandpan":"zoomAndPan"
471 };
472 for (var originalName in token.data.keys.toList()) {
473 var svgName = replacements[originalName];
474 if (svgName != null) {
475 token.data[svgName] = token.data.remove(originalName);
476 }
477 }
478 }
479
480 void adjustForeignAttributes(StartTagToken token) {
481 // TODO(jmesserly): I don't like mixing non-string objects with strings in
482 // the Node.attributes Map. Is there another solution?
483 final replacements = const {
484 "xlink:actuate": const AttributeName("xlink", "actuate",
485 Namespaces.xlink),
486 "xlink:arcrole": const AttributeName("xlink", "arcrole",
487 Namespaces.xlink),
488 "xlink:href": const AttributeName("xlink", "href", Namespaces.xlink),
489 "xlink:role": const AttributeName("xlink", "role", Namespaces.xlink),
490 "xlink:show": const AttributeName("xlink", "show", Namespaces.xlink),
491 "xlink:title": const AttributeName("xlink", "title", Namespaces.xlink),
492 "xlink:type": const AttributeName("xlink", "type", Namespaces.xlink),
493 "xml:base": const AttributeName("xml", "base", Namespaces.xml),
494 "xml:lang": const AttributeName("xml", "lang", Namespaces.xml),
495 "xml:space": const AttributeName("xml", "space", Namespaces.xml),
496 "xmlns": const AttributeName(null, "xmlns", Namespaces.xmlns),
497 "xmlns:xlink": const AttributeName("xmlns", "xlink", Namespaces.xmlns)
498 };
499
500 for (var originalName in token.data.keys.toList()) {
501 var foreignName = replacements[originalName];
502 if (foreignName != null) {
503 token.data[foreignName] = token.data.remove(originalName);
504 }
505 }
506 }
507
508 void resetInsertionMode() {
509 // The name of this method is mostly historical. (It's also used in the
510 // specification.)
511 for (Node node in tree.openElements.reversed) {
512 var nodeName = node.tagName;
513 bool last = node == tree.openElements[0];
514 if (last) {
515 assert(innerHTMLMode);
516 nodeName = innerHTML;
517 }
518 // Check for conditions that should only happen in the innerHTML
519 // case
520 switch (nodeName) {
521 case "select": case "colgroup": case "head": case "html":
522 assert(innerHTMLMode);
523 break;
524 }
525 if (!last && node.namespace != tree.defaultNamespace) {
526 continue;
527 }
528 switch (nodeName) {
529 case "select": phase = _inSelectPhase; return;
530 case "td": phase = _inCellPhase; return;
531 case "th": phase = _inCellPhase; return;
532 case "tr": phase = _inRowPhase; return;
533 case "tbody": phase = _inTableBodyPhase; return;
534 case "thead": phase = _inTableBodyPhase; return;
535 case "tfoot": phase = _inTableBodyPhase; return;
536 case "caption": phase = _inCaptionPhase; return;
537 case "colgroup": phase = _inColumnGroupPhase; return;
538 case "table": phase = _inTablePhase; return;
539 case "head": phase = _inBodyPhase; return;
540 case "body": phase = _inBodyPhase; return;
541 case "frameset": phase = _inFramesetPhase; return;
542 case "html": phase = _beforeHeadPhase; return;
543 }
544 }
545 phase = _inBodyPhase;
546 }
547
548 /**
549 * Generic RCDATA/RAWTEXT Parsing algorithm
550 * [contentType] - RCDATA or RAWTEXT
551 */
552 void parseRCDataRawtext(Token token, String contentType) {
553 assert(contentType == "RAWTEXT" || contentType == "RCDATA");
554
555 var element = tree.insertElement(token);
556
557 if (contentType == "RAWTEXT") {
558 tokenizer.state = tokenizer.rawtextState;
559 } else {
560 tokenizer.state = tokenizer.rcdataState;
561 }
562
563 originalPhase = phase;
564 phase = _textPhase;
565 }
566 }
567
568
569 /** Base class for helper object that implements each phase of processing. */
570 class Phase {
571 // Order should be (they can be omitted):
572 // * EOF
573 // * Comment
574 // * Doctype
575 // * SpaceCharacters
576 // * Characters
577 // * StartTag
578 // - startTag* methods
579 // * EndTag
580 // - endTag* methods
581
582 final HtmlParser parser;
583
584 final TreeBuilder tree;
585
586 Phase(HtmlParser parser) : parser = parser, tree = parser.tree;
587
588 bool processEOF() {
589 throw new UnimplementedError();
590 }
591
592 Token processComment(CommentToken token) {
593 // For most phases the following is correct. Where it's not it will be
594 // overridden.
595 tree.insertComment(token, tree.openElements.last);
596 }
597
598 Token processDoctype(DoctypeToken token) {
599 parser.parseError(token.span, "unexpected-doctype");
600 }
601
602 Token processCharacters(CharactersToken token) {
603 tree.insertText(token.data, token.span);
604 }
605
606 Token processSpaceCharacters(SpaceCharactersToken token) {
607 tree.insertText(token.data, token.span);
608 }
609
610 Token processStartTag(StartTagToken token) {
611 throw new UnimplementedError();
612 }
613
614 Token startTagHtml(StartTagToken token) {
615 if (parser.firstStartTag == false && token.name == "html") {
616 parser.parseError(token.span, "non-html-root");
617 }
618 // XXX Need a check here to see if the first start tag token emitted is
619 // this token... If it's not, invoke parser.parseError().
620 token.data.forEach((attr, value) {
621 tree.openElements[0].attributes.putIfAbsent(attr, () => value);
622 });
623 parser.firstStartTag = false;
624 }
625
626 Token processEndTag(EndTagToken token) {
627 throw new UnimplementedError();
628 }
629
630 /** Helper method for popping openElements. */
631 void popOpenElementsUntil(String name) {
632 var node = tree.openElements.removeLast();
633 while (node.tagName != name) {
634 node = tree.openElements.removeLast();
635 }
636 }
637 }
638
639 class InitialPhase extends Phase {
640 InitialPhase(parser) : super(parser);
641
642 Token processSpaceCharacters(SpaceCharactersToken token) {
643 }
644
645 Token processComment(CommentToken token) {
646 tree.insertComment(token, tree.document);
647 }
648
649 Token processDoctype(DoctypeToken token) {
650 var name = token.name;
651 String publicId = token.publicId;
652 var systemId = token.systemId;
653 var correct = token.correct;
654
655 if ((name != "html" || publicId != null ||
656 systemId != null && systemId != "about:legacy-compat")) {
657 parser.parseError(token.span, "unknown-doctype");
658 }
659
660 if (publicId == null) {
661 publicId = "";
662 }
663
664 tree.insertDoctype(token);
665
666 if (publicId != "") {
667 publicId = asciiUpper2Lower(publicId);
668 }
669
670 if (!correct || token.name != "html"
671 || startsWithAny(publicId, const [
672 "+//silmaril//dtd html pro v0r11 19970101//",
673 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
674 "-//as//dtd html 3.0 aswedit + extensions//",
675 "-//ietf//dtd html 2.0 level 1//",
676 "-//ietf//dtd html 2.0 level 2//",
677 "-//ietf//dtd html 2.0 strict level 1//",
678 "-//ietf//dtd html 2.0 strict level 2//",
679 "-//ietf//dtd html 2.0 strict//",
680 "-//ietf//dtd html 2.0//",
681 "-//ietf//dtd html 2.1e//",
682 "-//ietf//dtd html 3.0//",
683 "-//ietf//dtd html 3.2 final//",
684 "-//ietf//dtd html 3.2//",
685 "-//ietf//dtd html 3//",
686 "-//ietf//dtd html level 0//",
687 "-//ietf//dtd html level 1//",
688 "-//ietf//dtd html level 2//",
689 "-//ietf//dtd html level 3//",
690 "-//ietf//dtd html strict level 0//",
691 "-//ietf//dtd html strict level 1//",
692 "-//ietf//dtd html strict level 2//",
693 "-//ietf//dtd html strict level 3//",
694 "-//ietf//dtd html strict//",
695 "-//ietf//dtd html//",
696 "-//metrius//dtd metrius presentational//",
697 "-//microsoft//dtd internet explorer 2.0 html strict//",
698 "-//microsoft//dtd internet explorer 2.0 html//",
699 "-//microsoft//dtd internet explorer 2.0 tables//",
700 "-//microsoft//dtd internet explorer 3.0 html strict//",
701 "-//microsoft//dtd internet explorer 3.0 html//",
702 "-//microsoft//dtd internet explorer 3.0 tables//",
703 "-//netscape comm. corp.//dtd html//",
704 "-//netscape comm. corp.//dtd strict html//",
705 "-//o'reilly and associates//dtd html 2.0//",
706 "-//o'reilly and associates//dtd html extended 1.0//",
707 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
708 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to h tml 4.0//",
709 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0// ",
710 "-//spyglass//dtd html 2.0 extended//",
711 "-//sq//dtd html 2.0 hotmetal + extensions//",
712 "-//sun microsystems corp.//dtd hotjava html//",
713 "-//sun microsystems corp.//dtd hotjava strict html//",
714 "-//w3c//dtd html 3 1995-03-24//",
715 "-//w3c//dtd html 3.2 draft//",
716 "-//w3c//dtd html 3.2 final//",
717 "-//w3c//dtd html 3.2//",
718 "-//w3c//dtd html 3.2s draft//",
719 "-//w3c//dtd html 4.0 frameset//",
720 "-//w3c//dtd html 4.0 transitional//",
721 "-//w3c//dtd html experimental 19960712//",
722 "-//w3c//dtd html experimental 970421//",
723 "-//w3c//dtd w3 html//",
724 "-//w3o//dtd w3 html 3.0//",
725 "-//webtechs//dtd mozilla html 2.0//",
726 "-//webtechs//dtd mozilla html//"])
727 || const ["-//w3o//dtd w3 html strict 3.0//en//",
728 "-/w3c/dtd html 4.0 transitional/en",
729 "html"].contains(publicId)
730 || startsWithAny(publicId, const [
731 "-//w3c//dtd html 4.01 frameset//",
732 "-//w3c//dtd html 4.01 transitional//"]) && systemId == null
733 || systemId != null && systemId.toLowerCase() ==
734 "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
735
736 parser.compatMode = "quirks";
737 } else if (startsWithAny(publicId, const [
738 "-//w3c//dtd xhtml 1.0 frameset//",
739 "-//w3c//dtd xhtml 1.0 transitional//"])
740 || startsWithAny(publicId, const [
741 "-//w3c//dtd html 4.01 frameset//",
742 "-//w3c//dtd html 4.01 transitional//"]) &&
743 systemId != null) {
744 parser.compatMode = "limited quirks";
745 }
746 parser.phase = parser._beforeHtmlPhase;
747 }
748
749 void anythingElse() {
750 parser.compatMode = "quirks";
751 parser.phase = parser._beforeHtmlPhase;
752 }
753
754 Token processCharacters(CharactersToken token) {
755 parser.parseError(token.span, "expected-doctype-but-got-chars");
756 anythingElse();
757 return token;
758 }
759
760 Token processStartTag(StartTagToken token) {
761 parser.parseError(token.span, "expected-doctype-but-got-start-tag",
762 {"name": token.name});
763 anythingElse();
764 return token;
765 }
766
767 Token processEndTag(EndTagToken token) {
768 parser.parseError(token.span, "expected-doctype-but-got-end-tag",
769 {"name": token.name});
770 anythingElse();
771 return token;
772 }
773
774 bool processEOF() {
775 parser.parseError(parser._lastSpan, "expected-doctype-but-got-eof");
776 anythingElse();
777 return true;
778 }
779 }
780
781
782 class BeforeHtmlPhase extends Phase {
783 BeforeHtmlPhase(parser) : super(parser);
784
785 // helper methods
786 void insertHtmlElement() {
787 tree.insertRoot(new StartTagToken("html", data: {}));
788 parser.phase = parser._beforeHeadPhase;
789 }
790
791 // other
792 bool processEOF() {
793 insertHtmlElement();
794 return true;
795 }
796
797 Token processComment(CommentToken token) {
798 tree.insertComment(token, tree.document);
799 }
800
801 Token processSpaceCharacters(SpaceCharactersToken token) {
802 }
803
804 Token processCharacters(CharactersToken token) {
805 insertHtmlElement();
806 return token;
807 }
808
809 Token processStartTag(StartTagToken token) {
810 if (token.name == "html") {
811 parser.firstStartTag = true;
812 }
813 insertHtmlElement();
814 return token;
815 }
816
817 Token processEndTag(EndTagToken token) {
818 switch (token.name) {
819 case "head": case "body": case "html": case "br":
820 insertHtmlElement();
821 return token;
822 default:
823 parser.parseError(token.span, "unexpected-end-tag-before-html",
824 {"name": token.name});
825 return null;
826 }
827 }
828 }
829
830
831 class BeforeHeadPhase extends Phase {
832 BeforeHeadPhase(parser) : super(parser);
833
834 processStartTag(StartTagToken token) {
835 switch (token.name) {
836 case 'html': return startTagHtml(token);
837 case 'head': return startTagHead(token);
838 default: return startTagOther(token);
839 }
840 }
841
842 processEndTag(EndTagToken token) {
843 switch (token.name) {
844 case "head": case "body": case "html": case "br":
845 return endTagImplyHead(token);
846 default: return endTagOther(token);
847 }
848 }
849
850 bool processEOF() {
851 startTagHead(new StartTagToken("head", data: {}));
852 return true;
853 }
854
855 Token processSpaceCharacters(SpaceCharactersToken token) {
856 }
857
858 Token processCharacters(CharactersToken token) {
859 startTagHead(new StartTagToken("head", data: {}));
860 return token;
861 }
862
863 Token startTagHtml(StartTagToken token) {
864 return parser._inBodyPhase.processStartTag(token);
865 }
866
867 void startTagHead(StartTagToken token) {
868 tree.insertElement(token);
869 tree.headPointer = tree.openElements.last;
870 parser.phase = parser._inHeadPhase;
871 }
872
873 Token startTagOther(StartTagToken token) {
874 startTagHead(new StartTagToken("head", data: {}));
875 return token;
876 }
877
878 Token endTagImplyHead(EndTagToken token) {
879 startTagHead(new StartTagToken("head", data: {}));
880 return token;
881 }
882
883 void endTagOther(EndTagToken token) {
884 parser.parseError(token.span, "end-tag-after-implied-root",
885 {"name": token.name});
886 }
887 }
888
889 class InHeadPhase extends Phase {
890 InHeadPhase(parser) : super(parser);
891
892 processStartTag(StartTagToken token) {
893 switch (token.name) {
894 case "html": return startTagHtml(token);
895 case "title": return startTagTitle(token);
896 case "noscript": case "noframes": case "style":
897 return startTagNoScriptNoFramesStyle(token);
898 case "script": return startTagScript(token);
899 case "base": case "basefont": case "bgsound": case "command": case "link":
900 return startTagBaseLinkCommand(token);
901 case "meta": return startTagMeta(token);
902 case "head": return startTagHead(token);
903 default: return startTagOther(token);
904 }
905 }
906
907 processEndTag(EndTagToken token) {
908 switch (token.name) {
909 case "head": return endTagHead(token);
910 case "br": case "html": case "body": return endTagHtmlBodyBr(token);
911 default: return endTagOther(token);
912 }
913 }
914
915 // the real thing
916 bool processEOF() {
917 anythingElse();
918 return true;
919 }
920
921 Token processCharacters(CharactersToken token) {
922 anythingElse();
923 return token;
924 }
925
926 Token startTagHtml(StartTagToken token) {
927 return parser._inBodyPhase.processStartTag(token);
928 }
929
930 void startTagHead(StartTagToken token) {
931 parser.parseError(token.span, "two-heads-are-not-better-than-one");
932 }
933
934 void startTagBaseLinkCommand(StartTagToken token) {
935 tree.insertElement(token);
936 tree.openElements.removeLast();
937 token.selfClosingAcknowledged = true;
938 }
939
940 void startTagMeta(StartTagToken token) {
941 tree.insertElement(token);
942 tree.openElements.removeLast();
943 token.selfClosingAcknowledged = true;
944
945 var attributes = token.data;
946 if (!parser.tokenizer.stream.charEncodingCertain) {
947 var charset = attributes["charset"];
948 var content = attributes["content"];
949 if (charset != null) {
950 parser.tokenizer.stream.changeEncoding(charset);
951 } else if (content != null) {
952 var data = new EncodingBytes(content);
953 var codec = new ContentAttrParser(data).parse();
954 parser.tokenizer.stream.changeEncoding(codec);
955 }
956 }
957 }
958
959 void startTagTitle(StartTagToken token) {
960 parser.parseRCDataRawtext(token, "RCDATA");
961 }
962
963 void startTagNoScriptNoFramesStyle(StartTagToken token) {
964 // Need to decide whether to implement the scripting-disabled case
965 parser.parseRCDataRawtext(token, "RAWTEXT");
966 }
967
968 void startTagScript(StartTagToken token) {
969 tree.insertElement(token);
970 parser.tokenizer.state = parser.tokenizer.scriptDataState;
971 parser.originalPhase = parser.phase;
972 parser.phase = parser._textPhase;
973 }
974
975 Token startTagOther(StartTagToken token) {
976 anythingElse();
977 return token;
978 }
979
980 void endTagHead(EndTagToken token) {
981 var node = parser.tree.openElements.removeLast();
982 assert(node.tagName == "head");
983 parser.phase = parser._afterHeadPhase;
984 }
985
986 Token endTagHtmlBodyBr(EndTagToken token) {
987 anythingElse();
988 return token;
989 }
990
991 void endTagOther(EndTagToken token) {
992 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name});
993 }
994
995 void anythingElse() {
996 endTagHead(new EndTagToken("head"));
997 }
998 }
999
1000
1001 // XXX If we implement a parser for which scripting is disabled we need to
1002 // implement this phase.
1003 //
1004 // class InHeadNoScriptPhase extends Phase {
1005
1006 class AfterHeadPhase extends Phase {
1007 AfterHeadPhase(parser) : super(parser);
1008
1009 processStartTag(StartTagToken token) {
1010 switch (token.name) {
1011 case "html": return startTagHtml(token);
1012 case "body": return startTagBody(token);
1013 case "frameset": return startTagFrameset(token);
1014 case "base": case "basefont": case "bgsound": case "link": case "meta":
1015 case "noframes": case "script": case "style": case "title":
1016 return startTagFromHead(token);
1017 case "head": return startTagHead(token);
1018 default: return startTagOther(token);
1019 }
1020 }
1021
1022 processEndTag(EndTagToken token) {
1023 switch (token.name) {
1024 case "body": case "html": case "br":
1025 return endTagHtmlBodyBr(token);
1026 default: return endTagOther(token);
1027 }
1028 }
1029
1030 bool processEOF() {
1031 anythingElse();
1032 return true;
1033 }
1034
1035 Token processCharacters(CharactersToken token) {
1036 anythingElse();
1037 return token;
1038 }
1039
1040 Token startTagHtml(StartTagToken token) {
1041 return parser._inBodyPhase.processStartTag(token);
1042 }
1043
1044 void startTagBody(StartTagToken token) {
1045 parser.framesetOK = false;
1046 tree.insertElement(token);
1047 parser.phase = parser._inBodyPhase;
1048 }
1049
1050 void startTagFrameset(StartTagToken token) {
1051 tree.insertElement(token);
1052 parser.phase = parser._inFramesetPhase;
1053 }
1054
1055 void startTagFromHead(StartTagToken token) {
1056 parser.parseError(token.span, "unexpected-start-tag-out-of-my-head",
1057 {"name": token.name});
1058 tree.openElements.add(tree.headPointer);
1059 parser._inHeadPhase.processStartTag(token);
1060 for (Node node in tree.openElements.reversed) {
1061 if (node.tagName == "head") {
1062 tree.openElements.remove(node);
1063 break;
1064 }
1065 }
1066 }
1067
1068 void startTagHead(StartTagToken token) {
1069 parser.parseError(token.span, "unexpected-start-tag", {"name": token.name});
1070 }
1071
1072 Token startTagOther(StartTagToken token) {
1073 anythingElse();
1074 return token;
1075 }
1076
1077 Token endTagHtmlBodyBr(EndTagToken token) {
1078 anythingElse();
1079 return token;
1080 }
1081
1082 void endTagOther(EndTagToken token) {
1083 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name});
1084 }
1085
1086 void anythingElse() {
1087 tree.insertElement(new StartTagToken("body", data: {}));
1088 parser.phase = parser._inBodyPhase;
1089 parser.framesetOK = true;
1090 }
1091 }
1092
1093 typedef Token TokenProccessor(Token token);
1094
1095 class InBodyPhase extends Phase {
1096 bool dropNewline = false;
1097
1098 // http://www.whatwg.org/specs/web-apps/current-work///parsing-main-inbody
1099 // the really-really-really-very crazy mode
1100 InBodyPhase(parser) : super(parser);
1101
1102 processStartTag(StartTagToken token) {
1103 switch (token.name) {
1104 case "html":
1105 return startTagHtml(token);
1106 case "base": case "basefont": case "bgsound": case "command": case "link":
1107 case "meta": case "noframes": case "script": case "style": case "title":
1108 return startTagProcessInHead(token);
1109 case "body":
1110 return startTagBody(token);
1111 case "frameset":
1112 return startTagFrameset(token);
1113 case "address": case "article": case "aside": case "blockquote":
1114 case "center": case "details": case "details": case "dir": case "div":
1115 case "dl": case "fieldset": case "figcaption": case "figure":
1116 case "footer": case "header": case "hgroup": case "menu": case "nav":
1117 case "ol": case "p": case "section": case "summary": case "ul":
1118 return startTagCloseP(token);
1119 // headingElements
1120 case "h1": case "h2": case "h3": case "h4": case "h5": case "h6":
1121 return startTagHeading(token);
1122 case "pre": case "listing":
1123 return startTagPreListing(token);
1124 case "form":
1125 return startTagForm(token);
1126 case "li": case "dd": case "dt":
1127 return startTagListItem(token);
1128 case "plaintext":
1129 return startTagPlaintext(token);
1130 case "a": return startTagA(token);
1131 case "b": case "big": case "code": case "em": case "font": case "i":
1132 case "s": case "small": case "strike": case "strong": case "tt": case "u":
1133 return startTagFormatting(token);
1134 case "nobr":
1135 return startTagNobr(token);
1136 case "button":
1137 return startTagButton(token);
1138 case "applet": case "marquee": case "object":
1139 return startTagAppletMarqueeObject(token);
1140 case "xmp":
1141 return startTagXmp(token);
1142 case "table":
1143 return startTagTable(token);
1144 case "area": case "br": case "embed": case "img": case "keygen":
1145 case "wbr":
1146 return startTagVoidFormatting(token);
1147 case "param": case "source": case "track":
1148 return startTagParamSource(token);
1149 case "input":
1150 return startTagInput(token);
1151 case "hr":
1152 return startTagHr(token);
1153 case "image":
1154 return startTagImage(token);
1155 case "isindex":
1156 return startTagIsIndex(token);
1157 case "textarea":
1158 return startTagTextarea(token);
1159 case "iframe":
1160 return startTagIFrame(token);
1161 case "noembed": case "noframes": case "noscript":
1162 return startTagRawtext(token);
1163 case "select":
1164 return startTagSelect(token);
1165 case "rp": case "rt":
1166 return startTagRpRt(token);
1167 case "option": case "optgroup":
1168 return startTagOpt(token);
1169 case "math":
1170 return startTagMath(token);
1171 case "svg":
1172 return startTagSvg(token);
1173 case "caption": case "col": case "colgroup": case "frame": case "head":
1174 case "tbody": case "td": case "tfoot": case "th": case "thead": case "tr":
1175 return startTagMisplaced(token);
1176 default: return startTagOther(token);
1177 }
1178 }
1179
1180 processEndTag(EndTagToken token) {
1181 switch (token.name) {
1182 case "body": return endTagBody(token);
1183 case "html": return endTagHtml(token);
1184 case "address": case "article": case "aside": case "blockquote":
1185 case "center": case "details": case "dir": case "div": case "dl":
1186 case "fieldset": case "figcaption": case "figure": case "footer":
1187 case "header": case "hgroup": case "listing": case "menu": case "nav":
1188 case "ol": case "pre": case "section": case "summary": case "ul":
1189 return endTagBlock(token);
1190 case "form": return endTagForm(token);
1191 case "p": return endTagP(token);
1192 case "dd": case "dt": case "li": return endTagListItem(token);
1193 // headingElements
1194 case "h1": case "h2": case "h3": case "h4": case "h5": case "h6":
1195 return endTagHeading(token);
1196 case "a": case "b": case "big": case "code": case "em": case "font":
1197 case "i": case "nobr": case "s": case "small": case "strike":
1198 case "strong": case "tt": case "u":
1199 return endTagFormatting(token);
1200 case "applet": case "marquee": case "object":
1201 return endTagAppletMarqueeObject(token);
1202 case "br": return endTagBr(token);
1203 default: return endTagOther(token);
1204 }
1205 }
1206
1207 bool isMatchingFormattingElement(Node node1, Node node2) {
1208 if (node1.tagName != node2.tagName || node1.namespace != node2.namespace) {
1209 return false;
1210 } else if (node1.attributes.length != node2.attributes.length) {
1211 return false;
1212 } else {
1213 for (var key in node1.attributes.keys) {
1214 if (node1.attributes[key] != node2.attributes[key]) {
1215 return false;
1216 }
1217 }
1218 }
1219 return true;
1220 }
1221
1222 // helper
1223 void addFormattingElement(token) {
1224 tree.insertElement(token);
1225 var element = tree.openElements.last;
1226
1227 var matchingElements = [];
1228 for (Node node in tree.activeFormattingElements.reversed) {
1229 if (node == Marker) {
1230 break;
1231 } else if (isMatchingFormattingElement(node, element)) {
1232 matchingElements.add(node);
1233 }
1234 }
1235
1236 assert(matchingElements.length <= 3);
1237 if (matchingElements.length == 3) {
1238 tree.activeFormattingElements.remove(matchingElements.last);
1239 }
1240 tree.activeFormattingElements.add(element);
1241 }
1242
1243 // the real deal
1244 bool processEOF() {
1245 for (Node node in tree.openElements.reversed) {
1246 switch (node.tagName) {
1247 case "dd": case "dt": case "li": case "p": case "tbody": case "td":
1248 case "tfoot": case "th": case "thead": case "tr": case "body":
1249 case "html":
1250 continue;
1251 }
1252 parser.parseError(node.sourceSpan, "expected-closing-tag-but-got-eof");
1253 break;
1254 }
1255 //Stop parsing
1256 return false;
1257 }
1258
1259 void processSpaceCharactersDropNewline(StringToken token) {
1260 // Sometimes (start of <pre>, <listing>, and <textarea> blocks) we
1261 // want to drop leading newlines
1262 var data = token.data;
1263 dropNewline = false;
1264 if (data.startsWith("\n")) {
1265 var lastOpen = tree.openElements.last;
1266 if (const ["pre", "listing", "textarea"].contains(lastOpen.tagName)
1267 && !lastOpen.hasContent()) {
1268 data = data.substring(1);
1269 }
1270 }
1271 if (data.length > 0) {
1272 tree.reconstructActiveFormattingElements();
1273 tree.insertText(data, token.span);
1274 }
1275 }
1276
1277 Token processCharacters(CharactersToken token) {
1278 if (token.data == "\u0000") {
1279 //The tokenizer should always emit null on its own
1280 return null;
1281 }
1282 tree.reconstructActiveFormattingElements();
1283 tree.insertText(token.data, token.span);
1284 if (parser.framesetOK && !allWhitespace(token.data)) {
1285 parser.framesetOK = false;
1286 }
1287 return null;
1288 }
1289
1290 Token processSpaceCharacters(SpaceCharactersToken token) {
1291 if (dropNewline) {
1292 processSpaceCharactersDropNewline(token);
1293 } else {
1294 tree.reconstructActiveFormattingElements();
1295 tree.insertText(token.data, token.span);
1296 }
1297 return null;
1298 }
1299
1300 Token startTagProcessInHead(StartTagToken token) {
1301 return parser._inHeadPhase.processStartTag(token);
1302 }
1303
1304 void startTagBody(StartTagToken token) {
1305 parser.parseError(token.span, "unexpected-start-tag", {"name": "body"});
1306 if (tree.openElements.length == 1
1307 || tree.openElements[1].tagName != "body") {
1308 assert(parser.innerHTMLMode);
1309 } else {
1310 parser.framesetOK = false;
1311 token.data.forEach((attr, value) {
1312 tree.openElements[1].attributes.putIfAbsent(attr, () => value);
1313 });
1314 }
1315 }
1316
1317 void startTagFrameset(StartTagToken token) {
1318 parser.parseError(token.span, "unexpected-start-tag", {"name": "frameset"});
1319 if ((tree.openElements.length == 1 ||
1320 tree.openElements[1].tagName != "body")) {
1321 assert(parser.innerHTMLMode);
1322 } else if (parser.framesetOK) {
1323 if (tree.openElements[1].parent != null) {
1324 tree.openElements[1].parent.nodes.remove(tree.openElements[1]);
1325 }
1326 while (tree.openElements.last.tagName != "html") {
1327 tree.openElements.removeLast();
1328 }
1329 tree.insertElement(token);
1330 parser.phase = parser._inFramesetPhase;
1331 }
1332 }
1333
1334 void startTagCloseP(StartTagToken token) {
1335 if (tree.elementInScope("p", variant: "button")) {
1336 endTagP(new EndTagToken("p"));
1337 }
1338 tree.insertElement(token);
1339 }
1340
1341 void startTagPreListing(StartTagToken token) {
1342 if (tree.elementInScope("p", variant: "button")) {
1343 endTagP(new EndTagToken("p"));
1344 }
1345 tree.insertElement(token);
1346 parser.framesetOK = false;
1347 dropNewline = true;
1348 }
1349
1350 void startTagForm(StartTagToken token) {
1351 if (tree.formPointer != null) {
1352 parser.parseError(token.span, "unexpected-start-tag", {"name": "form"});
1353 } else {
1354 if (tree.elementInScope("p", variant: "button")) {
1355 endTagP(new EndTagToken("p"));
1356 }
1357 tree.insertElement(token);
1358 tree.formPointer = tree.openElements.last;
1359 }
1360 }
1361
1362 void startTagListItem(StartTagToken token) {
1363 parser.framesetOK = false;
1364
1365 final stopNamesMap = const {"li": const ["li"],
1366 "dt": const ["dt", "dd"],
1367 "dd": const ["dt", "dd"]};
1368 var stopNames = stopNamesMap[token.name];
1369 for (Node node in tree.openElements.reversed) {
1370 if (stopNames.contains(node.tagName)) {
1371 parser.phase.processEndTag(new EndTagToken(node.tagName));
1372 break;
1373 }
1374 if (specialElements.contains(node.nameTuple) &&
1375 !const ["address", "div", "p"].contains(node.tagName)) {
1376 break;
1377 }
1378 }
1379
1380 if (tree.elementInScope("p", variant: "button")) {
1381 parser.phase.processEndTag(new EndTagToken("p"));
1382 }
1383
1384 tree.insertElement(token);
1385 }
1386
1387 void startTagPlaintext(StartTagToken token) {
1388 if (tree.elementInScope("p", variant: "button")) {
1389 endTagP(new EndTagToken("p"));
1390 }
1391 tree.insertElement(token);
1392 parser.tokenizer.state = parser.tokenizer.plaintextState;
1393 }
1394
1395 void startTagHeading(StartTagToken token) {
1396 if (tree.elementInScope("p", variant: "button")) {
1397 endTagP(new EndTagToken("p"));
1398 }
1399 if (headingElements.contains(tree.openElements.last.tagName)) {
1400 parser.parseError(token.span, "unexpected-start-tag",
1401 {"name": token.name});
1402 tree.openElements.removeLast();
1403 }
1404 tree.insertElement(token);
1405 }
1406
1407 void startTagA(StartTagToken token) {
1408 var afeAElement = tree.elementInActiveFormattingElements("a");
1409 if (afeAElement != null) {
1410 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag",
1411 {"startName": "a", "endName": "a"});
1412 endTagFormatting(new EndTagToken("a"));
1413 tree.openElements.remove(afeAElement);
1414 tree.activeFormattingElements.remove(afeAElement);
1415 }
1416 tree.reconstructActiveFormattingElements();
1417 addFormattingElement(token);
1418 }
1419
1420 void startTagFormatting(StartTagToken token) {
1421 tree.reconstructActiveFormattingElements();
1422 addFormattingElement(token);
1423 }
1424
1425 void startTagNobr(StartTagToken token) {
1426 tree.reconstructActiveFormattingElements();
1427 if (tree.elementInScope("nobr")) {
1428 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag",
1429 {"startName": "nobr", "endName": "nobr"});
1430 processEndTag(new EndTagToken("nobr"));
1431 // XXX Need tests that trigger the following
1432 tree.reconstructActiveFormattingElements();
1433 }
1434 addFormattingElement(token);
1435 }
1436
1437 Token startTagButton(StartTagToken token) {
1438 if (tree.elementInScope("button")) {
1439 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag",
1440 {"startName": "button", "endName": "button"});
1441 processEndTag(new EndTagToken("button"));
1442 return token;
1443 } else {
1444 tree.reconstructActiveFormattingElements();
1445 tree.insertElement(token);
1446 parser.framesetOK = false;
1447 }
1448 }
1449
1450 void startTagAppletMarqueeObject(StartTagToken token) {
1451 tree.reconstructActiveFormattingElements();
1452 tree.insertElement(token);
1453 tree.activeFormattingElements.add(Marker);
1454 parser.framesetOK = false;
1455 }
1456
1457 void startTagXmp(StartTagToken token) {
1458 if (tree.elementInScope("p", variant: "button")) {
1459 endTagP(new EndTagToken("p"));
1460 }
1461 tree.reconstructActiveFormattingElements();
1462 parser.framesetOK = false;
1463 parser.parseRCDataRawtext(token, "RAWTEXT");
1464 }
1465
1466 void startTagTable(StartTagToken token) {
1467 if (parser.compatMode != "quirks") {
1468 if (tree.elementInScope("p", variant: "button")) {
1469 processEndTag(new EndTagToken("p"));
1470 }
1471 }
1472 tree.insertElement(token);
1473 parser.framesetOK = false;
1474 parser.phase = parser._inTablePhase;
1475 }
1476
1477 void startTagVoidFormatting(StartTagToken token) {
1478 tree.reconstructActiveFormattingElements();
1479 tree.insertElement(token);
1480 tree.openElements.removeLast();
1481 token.selfClosingAcknowledged = true;
1482 parser.framesetOK = false;
1483 }
1484
1485 void startTagInput(StartTagToken token) {
1486 var savedFramesetOK = parser.framesetOK;
1487 startTagVoidFormatting(token);
1488 if (asciiUpper2Lower(token.data["type"]) == "hidden") {
1489 //input type=hidden doesn't change framesetOK
1490 parser.framesetOK = savedFramesetOK;
1491 }
1492 }
1493
1494 void startTagParamSource(StartTagToken token) {
1495 tree.insertElement(token);
1496 tree.openElements.removeLast();
1497 token.selfClosingAcknowledged = true;
1498 }
1499
1500 void startTagHr(StartTagToken token) {
1501 if (tree.elementInScope("p", variant: "button")) {
1502 endTagP(new EndTagToken("p"));
1503 }
1504 tree.insertElement(token);
1505 tree.openElements.removeLast();
1506 token.selfClosingAcknowledged = true;
1507 parser.framesetOK = false;
1508 }
1509
1510 void startTagImage(StartTagToken token) {
1511 // No really...
1512 parser.parseError(token.span, "unexpected-start-tag-treated-as",
1513 {"originalName": "image", "newName": "img"});
1514 processStartTag(new StartTagToken("img", data: token.data,
1515 selfClosing: token.selfClosing));
1516 }
1517
1518 void startTagIsIndex(StartTagToken token) {
1519 parser.parseError(token.span, "deprecated-tag", {"name": "isindex"});
1520 if (tree.formPointer != null) {
1521 return;
1522 }
1523 var formAttrs = {};
1524 var dataAction = token.data["action"];
1525 if (dataAction != null) {
1526 formAttrs["action"] = dataAction;
1527 }
1528 processStartTag(new StartTagToken("form", data: formAttrs));
1529 processStartTag(new StartTagToken("hr", data: {}));
1530 processStartTag(new StartTagToken("label", data: {}));
1531 // XXX Localization ...
1532 var prompt = token.data["prompt"];
1533 if (prompt == null) {
1534 prompt = "This is a searchable index. Enter search keywords: ";
1535 }
1536 processCharacters(new CharactersToken(prompt));
1537 var attributes = new LinkedHashMap.from(token.data);
1538 attributes.remove('action');
1539 attributes.remove('prompt');
1540 attributes["name"] = "isindex";
1541 processStartTag(new StartTagToken("input",
1542 data: attributes, selfClosing: token.selfClosing));
1543 processEndTag(new EndTagToken("label"));
1544 processStartTag(new StartTagToken("hr", data: {}));
1545 processEndTag(new EndTagToken("form"));
1546 }
1547
1548 void startTagTextarea(StartTagToken token) {
1549 tree.insertElement(token);
1550 parser.tokenizer.state = parser.tokenizer.rcdataState;
1551 dropNewline = true;
1552 parser.framesetOK = false;
1553 }
1554
1555 void startTagIFrame(StartTagToken token) {
1556 parser.framesetOK = false;
1557 startTagRawtext(token);
1558 }
1559
1560 /** iframe, noembed noframes, noscript(if scripting enabled). */
1561 void startTagRawtext(StartTagToken token) {
1562 parser.parseRCDataRawtext(token, "RAWTEXT");
1563 }
1564
1565 void startTagOpt(StartTagToken token) {
1566 if (tree.openElements.last.tagName == "option") {
1567 parser.phase.processEndTag(new EndTagToken("option"));
1568 }
1569 tree.reconstructActiveFormattingElements();
1570 parser.tree.insertElement(token);
1571 }
1572
1573 void startTagSelect(StartTagToken token) {
1574 tree.reconstructActiveFormattingElements();
1575 tree.insertElement(token);
1576 parser.framesetOK = false;
1577
1578 if (parser._inTablePhase == parser.phase ||
1579 parser._inCaptionPhase == parser.phase ||
1580 parser._inColumnGroupPhase == parser.phase ||
1581 parser._inTableBodyPhase == parser.phase ||
1582 parser._inRowPhase == parser.phase ||
1583 parser._inCellPhase == parser.phase) {
1584 parser.phase = parser._inSelectInTablePhase;
1585 } else {
1586 parser.phase = parser._inSelectPhase;
1587 }
1588 }
1589
1590 void startTagRpRt(StartTagToken token) {
1591 if (tree.elementInScope("ruby")) {
1592 tree.generateImpliedEndTags();
1593 var last = tree.openElements.last;
1594 if (last.tagName != "ruby") {
1595 parser.parseError(last.sourceSpan, 'undefined-error');
1596 }
1597 }
1598 tree.insertElement(token);
1599 }
1600
1601 void startTagMath(StartTagToken token) {
1602 tree.reconstructActiveFormattingElements();
1603 parser.adjustMathMLAttributes(token);
1604 parser.adjustForeignAttributes(token);
1605 token.namespace = Namespaces.mathml;
1606 tree.insertElement(token);
1607 //Need to get the parse error right for the case where the token
1608 //has a namespace not equal to the xmlns attribute
1609 if (token.selfClosing) {
1610 tree.openElements.removeLast();
1611 token.selfClosingAcknowledged = true;
1612 }
1613 }
1614
1615 void startTagSvg(StartTagToken token) {
1616 tree.reconstructActiveFormattingElements();
1617 parser.adjustSVGAttributes(token);
1618 parser.adjustForeignAttributes(token);
1619 token.namespace = Namespaces.svg;
1620 tree.insertElement(token);
1621 //Need to get the parse error right for the case where the token
1622 //has a namespace not equal to the xmlns attribute
1623 if (token.selfClosing) {
1624 tree.openElements.removeLast();
1625 token.selfClosingAcknowledged = true;
1626 }
1627 }
1628
1629 /**
1630 * Elements that should be children of other elements that have a
1631 * different insertion mode; here they are ignored
1632 * "caption", "col", "colgroup", "frame", "frameset", "head",
1633 * "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
1634 * "tr", "noscript"
1635 */
1636 void startTagMisplaced(StartTagToken token) {
1637 parser.parseError(token.span, "unexpected-start-tag-ignored",
1638 {"name": token.name});
1639 }
1640
1641 Token startTagOther(StartTagToken token) {
1642 tree.reconstructActiveFormattingElements();
1643 tree.insertElement(token);
1644 }
1645
1646 void endTagP(EndTagToken token) {
1647 if (!tree.elementInScope("p", variant: "button")) {
1648 startTagCloseP(new StartTagToken("p", data: {}));
1649 parser.parseError(token.span, "unexpected-end-tag", {"name": "p"});
1650 endTagP(new EndTagToken("p"));
1651 } else {
1652 tree.generateImpliedEndTags("p");
1653 if (tree.openElements.last.tagName != "p") {
1654 parser.parseError(token.span, "unexpected-end-tag", {"name": "p"});
1655 }
1656 popOpenElementsUntil("p");
1657 }
1658 }
1659
1660 void endTagBody(EndTagToken token) {
1661 if (!tree.elementInScope("body")) {
1662 parser.parseError(token.span, 'undefined-error');
1663 return;
1664 } else if (tree.openElements.last.tagName != "body") {
1665 for (Node node in slice(tree.openElements, 2)) {
1666 switch (node.tagName) {
1667 case "dd": case "dt": case "li": case "optgroup": case "option":
1668 case "p": case "rp": case "rt": case "tbody": case "td": case "tfoot":
1669 case "th": case "thead": case "tr": case "body": case "html":
1670 continue;
1671 }
1672 // Not sure this is the correct name for the parse error
1673 parser.parseError(token.span, "expected-one-end-tag-but-got-another",
1674 {"expectedName": "body", "gotName": node.tagName});
1675 break;
1676 }
1677 }
1678 parser.phase = parser._afterBodyPhase;
1679 }
1680
1681 Token endTagHtml(EndTagToken token) {
1682 //We repeat the test for the body end tag token being ignored here
1683 if (tree.elementInScope("body")) {
1684 endTagBody(new EndTagToken("body"));
1685 return token;
1686 }
1687 }
1688
1689 void endTagBlock(EndTagToken token) {
1690 //Put us back in the right whitespace handling mode
1691 if (token.name == "pre") {
1692 dropNewline = false;
1693 }
1694 var inScope = tree.elementInScope(token.name);
1695 if (inScope) {
1696 tree.generateImpliedEndTags();
1697 }
1698 if (tree.openElements.last.tagName != token.name) {
1699 parser.parseError(token.span, "end-tag-too-early", {"name": token.name});
1700 }
1701 if (inScope) {
1702 popOpenElementsUntil(token.name);
1703 }
1704 }
1705
1706 void endTagForm(EndTagToken token) {
1707 var node = tree.formPointer;
1708 tree.formPointer = null;
1709 if (node == null || !tree.elementInScope(node)) {
1710 parser.parseError(token.span, "unexpected-end-tag", {"name": "form"});
1711 } else {
1712 tree.generateImpliedEndTags();
1713 if (tree.openElements.last != node) {
1714 parser.parseError(token.span, "end-tag-too-early-ignored", {"name": "for m"});
1715 }
1716 tree.openElements.remove(node);
1717 }
1718 }
1719
1720 void endTagListItem(EndTagToken token) {
1721 var variant;
1722 if (token.name == "li") {
1723 variant = "list";
1724 } else {
1725 variant = null;
1726 }
1727 if (!tree.elementInScope(token.name, variant: variant)) {
1728 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name});
1729 } else {
1730 tree.generateImpliedEndTags(token.name);
1731 if (tree.openElements.last.tagName != token.name) {
1732 parser.parseError(token.span, "end-tag-too-early", {"name": token.name}) ;
1733 }
1734 popOpenElementsUntil(token.name);
1735 }
1736 }
1737
1738 void endTagHeading(EndTagToken token) {
1739 for (var item in headingElements) {
1740 if (tree.elementInScope(item)) {
1741 tree.generateImpliedEndTags();
1742 break;
1743 }
1744 }
1745 if (tree.openElements.last.tagName != token.name) {
1746 parser.parseError(token.span, "end-tag-too-early", {"name": token.name});
1747 }
1748
1749 for (var item in headingElements) {
1750 if (tree.elementInScope(item)) {
1751 item = tree.openElements.removeLast();
1752 while (!headingElements.contains(item.tagName)) {
1753 item = tree.openElements.removeLast();
1754 }
1755 break;
1756 }
1757 }
1758 }
1759
1760 /** The much-feared adoption agency algorithm. */
1761 endTagFormatting(EndTagToken token) {
1762 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construc tion.html#adoptionAgency
1763 // TODO(jmesserly): the comments here don't match the numbered steps in the
1764 // updated spec. This needs a pass over it to verify that it still matches.
1765 // In particular the html5lib Python code skiped "step 4", I'm not sure why.
1766 // XXX Better parseError messages appreciated.
1767 int outerLoopCounter = 0;
1768 while (outerLoopCounter < 8) {
1769 outerLoopCounter += 1;
1770
1771 // Step 1 paragraph 1
1772 var formattingElement = tree.elementInActiveFormattingElements(
1773 token.name);
1774 if (formattingElement == null ||
1775 (tree.openElements.contains(formattingElement) &&
1776 !tree.elementInScope(formattingElement.tagName))) {
1777 parser.parseError(token.span, "adoption-agency-1.1",
1778 {"name": token.name});
1779 return;
1780 // Step 1 paragraph 2
1781 } else if (!tree.openElements.contains(formattingElement)) {
1782 parser.parseError(token.span, "adoption-agency-1.2",
1783 {"name": token.name});
1784 tree.activeFormattingElements.remove(formattingElement);
1785 return;
1786 }
1787
1788 // Step 1 paragraph 3
1789 if (formattingElement != tree.openElements.last) {
1790 parser.parseError(token.span, "adoption-agency-1.3",
1791 {"name": token.name});
1792 }
1793
1794 // Step 2
1795 // Start of the adoption agency algorithm proper
1796 var afeIndex = tree.openElements.indexOf(formattingElement);
1797 Node furthestBlock = null;
1798 for (Node element in slice(tree.openElements, afeIndex)) {
1799 if (specialElements.contains(element.nameTuple)) {
1800 furthestBlock = element;
1801 break;
1802 }
1803 }
1804 // Step 3
1805 if (furthestBlock == null) {
1806 var element = tree.openElements.removeLast();
1807 while (element != formattingElement) {
1808 element = tree.openElements.removeLast();
1809 }
1810 tree.activeFormattingElements.remove(element);
1811 return;
1812 }
1813
1814 var commonAncestor = tree.openElements[afeIndex - 1];
1815
1816 // Step 5
1817 // The bookmark is supposed to help us identify where to reinsert
1818 // nodes in step 12. We have to ensure that we reinsert nodes after
1819 // the node before the active formatting element. Note the bookmark
1820 // can move in step 7.4
1821 var bookmark = tree.activeFormattingElements.indexOf(formattingElement);
1822
1823 // Step 6
1824 Node lastNode = furthestBlock;
1825 var node = furthestBlock;
1826 int innerLoopCounter = 0;
1827
1828 var index = tree.openElements.indexOf(node);
1829 while (innerLoopCounter < 3) {
1830 innerLoopCounter += 1;
1831
1832 // Node is element before node in open elements
1833 index -= 1;
1834 node = tree.openElements[index];
1835 if (!tree.activeFormattingElements.contains(node)) {
1836 tree.openElements.remove(node);
1837 continue;
1838 }
1839 // Step 6.3
1840 if (node == formattingElement) {
1841 break;
1842 }
1843 // Step 6.4
1844 if (lastNode == furthestBlock) {
1845 bookmark = (tree.activeFormattingElements.indexOf(node) + 1);
1846 }
1847 // Step 6.5
1848 //cite = node.parent
1849 var clone = node.clone();
1850 // Replace node with clone
1851 tree.activeFormattingElements[
1852 tree.activeFormattingElements.indexOf(node)] = clone;
1853 tree.openElements[tree.openElements.indexOf(node)] = clone;
1854 node = clone;
1855
1856 // Step 6.6
1857 // Remove lastNode from its parents, if any
1858 if (lastNode.parent != null) {
1859 lastNode.parent.nodes.remove(lastNode);
1860 }
1861 node.nodes.add(lastNode);
1862 // Step 7.7
1863 lastNode = node;
1864 // End of inner loop
1865 }
1866
1867 // Step 7
1868 // Foster parent lastNode if commonAncestor is a
1869 // table, tbody, tfoot, thead, or tr we need to foster parent the
1870 // lastNode
1871 if (lastNode.parent != null) {
1872 lastNode.parent.nodes.remove(lastNode);
1873 }
1874
1875 if (const ["table", "tbody", "tfoot", "thead", "tr"].contains(
1876 commonAncestor.tagName)) {
1877 var nodePos = tree.getTableMisnestedNodePosition();
1878 nodePos[0].insertBefore(lastNode, nodePos[1]);
1879 } else {
1880 commonAncestor.nodes.add(lastNode);
1881 }
1882
1883 // Step 8
1884 var clone = formattingElement.clone();
1885
1886 // Step 9
1887 furthestBlock.reparentChildren(clone);
1888
1889 // Step 10
1890 furthestBlock.nodes.add(clone);
1891
1892 // Step 11
1893 tree.activeFormattingElements.remove(formattingElement);
1894 tree.activeFormattingElements.insert(
1895 min(bookmark, tree.activeFormattingElements.length), clone);
1896
1897 // Step 12
1898 tree.openElements.remove(formattingElement);
1899 tree.openElements.insert(
1900 tree.openElements.indexOf(furthestBlock) + 1, clone);
1901 }
1902 }
1903
1904 void endTagAppletMarqueeObject(EndTagToken token) {
1905 if (tree.elementInScope(token.name)) {
1906 tree.generateImpliedEndTags();
1907 }
1908 if (tree.openElements.last.tagName != token.name) {
1909 parser.parseError(token.span, "end-tag-too-early", {"name": token.name});
1910 }
1911 if (tree.elementInScope(token.name)) {
1912 popOpenElementsUntil(token.name);
1913 tree.clearActiveFormattingElements();
1914 }
1915 }
1916
1917 void endTagBr(EndTagToken token) {
1918 parser.parseError(token.span, "unexpected-end-tag-treated-as",
1919 {"originalName": "br", "newName": "br element"});
1920 tree.reconstructActiveFormattingElements();
1921 tree.insertElement(new StartTagToken("br", data: {}));
1922 tree.openElements.removeLast();
1923 }
1924
1925 void endTagOther(EndTagToken token) {
1926 for (Node node in tree.openElements.reversed) {
1927 if (node.tagName == token.name) {
1928 tree.generateImpliedEndTags(token.name);
1929 if (tree.openElements.last.tagName != token.name) {
1930 parser.parseError(token.span, "unexpected-end-tag",
1931 {"name": token.name});
1932 }
1933 while (tree.openElements.removeLast() != node);
1934 break;
1935 } else {
1936 if (specialElements.contains(node.nameTuple)) {
1937 parser.parseError(token.span, "unexpected-end-tag",
1938 {"name": token.name});
1939 break;
1940 }
1941 }
1942 }
1943 }
1944 }
1945
1946
1947 class TextPhase extends Phase {
1948 TextPhase(parser) : super(parser);
1949
1950 // "Tried to process start tag %s in RCDATA/RAWTEXT mode"%token.name
1951 processStartTag(StartTagToken token) { assert(false); }
1952
1953 processEndTag(EndTagToken token) {
1954 if (token.name == 'script') return endTagScript(token);
1955 return endTagOther(token);
1956 }
1957
1958 Token processCharacters(CharactersToken token) {
1959 tree.insertText(token.data, token.span);
1960 }
1961
1962 bool processEOF() {
1963 var last = tree.openElements.last;
1964 parser.parseError(last.sourceSpan, "expected-named-closing-tag-but-got-eof",
1965 {'name': last.tagName});
1966 tree.openElements.removeLast();
1967 parser.phase = parser.originalPhase;
1968 return true;
1969 }
1970
1971 void endTagScript(EndTagToken token) {
1972 var node = tree.openElements.removeLast();
1973 assert(node.tagName == "script");
1974 parser.phase = parser.originalPhase;
1975 //The rest of this method is all stuff that only happens if
1976 //document.write works
1977 }
1978
1979 void endTagOther(EndTagToken token) {
1980 var node = tree.openElements.removeLast();
1981 parser.phase = parser.originalPhase;
1982 }
1983 }
1984
1985 class InTablePhase extends Phase {
1986 // http://www.whatwg.org/specs/web-apps/current-work///in-table
1987 InTablePhase(parser) : super(parser);
1988
1989 processStartTag(StartTagToken token) {
1990 switch (token.name) {
1991 case "html": return startTagHtml(token);
1992 case "caption": return startTagCaption(token);
1993 case "colgroup": return startTagColgroup(token);
1994 case "col": return startTagCol(token);
1995 case "tbody": case "tfoot": case "thead": return startTagRowGroup(token);
1996 case "td": case "th": case "tr": return startTagImplyTbody(token);
1997 case "table": return startTagTable(token);
1998 case "style": case "script": return startTagStyleScript(token);
1999 case "input": return startTagInput(token);
2000 case "form": return startTagForm(token);
2001 default: return startTagOther(token);
2002 }
2003 }
2004
2005 processEndTag(EndTagToken token) {
2006 switch (token.name) {
2007 case "table": return endTagTable(token);
2008 case "body": case "caption": case "col": case "colgroup": case "html":
2009 case "tbody": case "td": case "tfoot": case "th": case "thead": case "tr":
2010 return endTagIgnore(token);
2011 default: return endTagOther(token);
2012 }
2013 }
2014
2015 // helper methods
2016 void clearStackToTableContext() {
2017 // "clear the stack back to a table context"
2018 while (tree.openElements.last.tagName != "table" &&
2019 tree.openElements.last.tagName != "html") {
2020 //parser.parseError(token.span, "unexpected-implied-end-tag-in-table",
2021 // {"name": tree.openElements.last.name})
2022 tree.openElements.removeLast();
2023 }
2024 // When the current node is <html> it's an innerHTML case
2025 }
2026
2027 // processing methods
2028 bool processEOF() {
2029 var last = tree.openElements.last;
2030 if (last.tagName != "html") {
2031 parser.parseError(last.sourceSpan, "eof-in-table");
2032 } else {
2033 assert(parser.innerHTMLMode);
2034 }
2035 //Stop parsing
2036 return false;
2037 }
2038
2039 Token processSpaceCharacters(SpaceCharactersToken token) {
2040 var originalPhase = parser.phase;
2041 parser.phase = parser._inTableTextPhase;
2042 parser._inTableTextPhase.originalPhase = originalPhase;
2043 parser.phase.processSpaceCharacters(token);
2044 }
2045
2046 Token processCharacters(CharactersToken token) {
2047 var originalPhase = parser.phase;
2048 parser.phase = parser._inTableTextPhase;
2049 parser._inTableTextPhase.originalPhase = originalPhase;
2050 parser.phase.processCharacters(token);
2051 }
2052
2053 void insertText(CharactersToken token) {
2054 // If we get here there must be at least one non-whitespace character
2055 // Do the table magic!
2056 tree.insertFromTable = true;
2057 parser._inBodyPhase.processCharacters(token);
2058 tree.insertFromTable = false;
2059 }
2060
2061 void startTagCaption(StartTagToken token) {
2062 clearStackToTableContext();
2063 tree.activeFormattingElements.add(Marker);
2064 tree.insertElement(token);
2065 parser.phase = parser._inCaptionPhase;
2066 }
2067
2068 void startTagColgroup(StartTagToken token) {
2069 clearStackToTableContext();
2070 tree.insertElement(token);
2071 parser.phase = parser._inColumnGroupPhase;
2072 }
2073
2074 Token startTagCol(StartTagToken token) {
2075 startTagColgroup(new StartTagToken("colgroup", data: {}));
2076 return token;
2077 }
2078
2079 void startTagRowGroup(StartTagToken token) {
2080 clearStackToTableContext();
2081 tree.insertElement(token);
2082 parser.phase = parser._inTableBodyPhase;
2083 }
2084
2085 Token startTagImplyTbody(StartTagToken token) {
2086 startTagRowGroup(new StartTagToken("tbody", data: {}));
2087 return token;
2088 }
2089
2090 Token startTagTable(StartTagToken token) {
2091 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag",
2092 {"startName": "table", "endName": "table"});
2093 parser.phase.processEndTag(new EndTagToken("table"));
2094 if (!parser.innerHTMLMode) {
2095 return token;
2096 }
2097 }
2098
2099 Token startTagStyleScript(StartTagToken token) {
2100 return parser._inHeadPhase.processStartTag(token);
2101 }
2102
2103 void startTagInput(StartTagToken token) {
2104 if (asciiUpper2Lower(token.data["type"]) == "hidden") {
2105 parser.parseError(token.span, "unexpected-hidden-input-in-table");
2106 tree.insertElement(token);
2107 // XXX associate with form
2108 tree.openElements.removeLast();
2109 } else {
2110 startTagOther(token);
2111 }
2112 }
2113
2114 void startTagForm(StartTagToken token) {
2115 parser.parseError(token.span, "unexpected-form-in-table");
2116 if (tree.formPointer == null) {
2117 tree.insertElement(token);
2118 tree.formPointer = tree.openElements.last;
2119 tree.openElements.removeLast();
2120 }
2121 }
2122
2123 void startTagOther(StartTagToken token) {
2124 parser.parseError(token.span, "unexpected-start-tag-implies-table-voodoo",
2125 {"name": token.name});
2126 // Do the table magic!
2127 tree.insertFromTable = true;
2128 parser._inBodyPhase.processStartTag(token);
2129 tree.insertFromTable = false;
2130 }
2131
2132 void endTagTable(EndTagToken token) {
2133 if (tree.elementInScope("table", variant: "table")) {
2134 tree.generateImpliedEndTags();
2135 var last = tree.openElements.last;
2136 if (last.tagName != "table") {
2137 parser.parseError(token.span, "end-tag-too-early-named",
2138 {"gotName": "table", "expectedName": last.tagName});
2139 }
2140 while (tree.openElements.last.tagName != "table") {
2141 tree.openElements.removeLast();
2142 }
2143 tree.openElements.removeLast();
2144 parser.resetInsertionMode();
2145 } else {
2146 // innerHTML case
2147 assert(parser.innerHTMLMode);
2148 parser.parseError(token.span, "undefined-error");
2149 }
2150 }
2151
2152 void endTagIgnore(EndTagToken token) {
2153 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name});
2154 }
2155
2156 void endTagOther(EndTagToken token) {
2157 parser.parseError(token.span, "unexpected-end-tag-implies-table-voodoo",
2158 {"name": token.name});
2159 // Do the table magic!
2160 tree.insertFromTable = true;
2161 parser._inBodyPhase.processEndTag(token);
2162 tree.insertFromTable = false;
2163 }
2164 }
2165
2166 class InTableTextPhase extends Phase {
2167 Phase originalPhase;
2168 List<StringToken> characterTokens;
2169
2170 InTableTextPhase(parser)
2171 : characterTokens = <StringToken>[],
2172 super(parser);
2173
2174 void flushCharacters() {
2175 if (characterTokens.length == 0) return;
2176
2177 // TODO(sigmund,jmesserly): remove '' (dartbug.com/8480)
2178 var data = characterTokens.map((t) => t.data).join('');
2179 var span = null;
2180
2181 if (parser.generateSpans) {
2182 span = new FileSpan.union(
2183 characterTokens[0].span,
2184 characterTokens.last.span);
2185 }
2186
2187 if (!allWhitespace(data)) {
2188 parser._inTablePhase.insertText(new CharactersToken(data)..span = span);
2189 } else if (data.length > 0) {
2190 tree.insertText(data, span);
2191 }
2192 characterTokens = <StringToken>[];
2193 }
2194
2195 Token processComment(CommentToken token) {
2196 flushCharacters();
2197 parser.phase = originalPhase;
2198 return token;
2199 }
2200
2201 bool processEOF() {
2202 flushCharacters();
2203 parser.phase = originalPhase;
2204 return true;
2205 }
2206
2207 Token processCharacters(CharactersToken token) {
2208 if (token.data == "\u0000") {
2209 return null;
2210 }
2211 characterTokens.add(token);
2212 }
2213
2214 Token processSpaceCharacters(SpaceCharactersToken token) {
2215 //pretty sure we should never reach here
2216 characterTokens.add(token);
2217 // XXX assert(false);
2218 }
2219
2220 Token processStartTag(StartTagToken token) {
2221 flushCharacters();
2222 parser.phase = originalPhase;
2223 return token;
2224 }
2225
2226 Token processEndTag(EndTagToken token) {
2227 flushCharacters();
2228 parser.phase = originalPhase;
2229 return token;
2230 }
2231 }
2232
2233
2234 class InCaptionPhase extends Phase {
2235 // http://www.whatwg.org/specs/web-apps/current-work///in-caption
2236 InCaptionPhase(parser) : super(parser);
2237
2238 processStartTag(StartTagToken token) {
2239 switch (token.name) {
2240 case "html": return startTagHtml(token);
2241 case "caption": case "col": case "colgroup": case "tbody": case "td":
2242 case "tfoot": case "th": case "thead": case "tr":
2243 return startTagTableElement(token);
2244 default: return startTagOther(token);
2245 }
2246 }
2247
2248 processEndTag(EndTagToken token) {
2249 switch (token.name) {
2250 case "caption": return endTagCaption(token);
2251 case "table": return endTagTable(token);
2252 case "body": case "col": case "colgroup": case "html": case "tbody":
2253 case "td": case "tfoot": case "th": case "thead": case "tr":
2254 return endTagIgnore(token);
2255 default: return endTagOther(token);
2256 }
2257 }
2258
2259 bool ignoreEndTagCaption() {
2260 return !tree.elementInScope("caption", variant: "table");
2261 }
2262
2263 bool processEOF() {
2264 parser._inBodyPhase.processEOF();
2265 return false;
2266 }
2267
2268 Token processCharacters(CharactersToken token) {
2269 return parser._inBodyPhase.processCharacters(token);
2270 }
2271
2272 Token startTagTableElement(StartTagToken token) {
2273 parser.parseError(token.span, "undefined-error");
2274 //XXX Have to duplicate logic here to find out if the tag is ignored
2275 var ignoreEndTag = ignoreEndTagCaption();
2276 parser.phase.processEndTag(new EndTagToken("caption"));
2277 if (!ignoreEndTag) {
2278 return token;
2279 }
2280 return null;
2281 }
2282
2283 Token startTagOther(StartTagToken token) {
2284 return parser._inBodyPhase.processStartTag(token);
2285 }
2286
2287 void endTagCaption(EndTagToken token) {
2288 if (!ignoreEndTagCaption()) {
2289 // AT this code is quite similar to endTagTable in "InTable"
2290 tree.generateImpliedEndTags();
2291 if (tree.openElements.last.tagName != "caption") {
2292 parser.parseError(token.span, "expected-one-end-tag-but-got-another",
2293 {"gotName": "caption",
2294 "expectedName": tree.openElements.last.tagName});
2295 }
2296 while (tree.openElements.last.tagName != "caption") {
2297 tree.openElements.removeLast();
2298 }
2299 tree.openElements.removeLast();
2300 tree.clearActiveFormattingElements();
2301 parser.phase = parser._inTablePhase;
2302 } else {
2303 // innerHTML case
2304 assert(parser.innerHTMLMode);
2305 parser.parseError(token.span, "undefined-error");
2306 }
2307 }
2308
2309 Token endTagTable(EndTagToken token) {
2310 parser.parseError(token.span, "undefined-error");
2311 var ignoreEndTag = ignoreEndTagCaption();
2312 parser.phase.processEndTag(new EndTagToken("caption"));
2313 if (!ignoreEndTag) {
2314 return token;
2315 }
2316 return null;
2317 }
2318
2319 void endTagIgnore(EndTagToken token) {
2320 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name});
2321 }
2322
2323 Token endTagOther(EndTagToken token) {
2324 return parser._inBodyPhase.processEndTag(token);
2325 }
2326 }
2327
2328
2329 class InColumnGroupPhase extends Phase {
2330 // http://www.whatwg.org/specs/web-apps/current-work///in-column
2331 InColumnGroupPhase(parser) : super(parser);
2332
2333 processStartTag(StartTagToken token) {
2334 switch (token.name) {
2335 case "html": return startTagHtml(token);
2336 case "col": return startTagCol(token);
2337 default: return startTagOther(token);
2338 }
2339 }
2340
2341 processEndTag(EndTagToken token) {
2342 switch (token.name) {
2343 case "colgroup": return endTagColgroup(token);
2344 case "col": return endTagCol(token);
2345 default: return endTagOther(token);
2346 }
2347 }
2348
2349 bool ignoreEndTagColgroup() {
2350 return tree.openElements.last.tagName == "html";
2351 }
2352
2353 bool processEOF() {
2354 var ignoreEndTag = ignoreEndTagColgroup();
2355 if (ignoreEndTag) {
2356 assert(parser.innerHTMLMode);
2357 return false;
2358 } else {
2359 endTagColgroup(new EndTagToken("colgroup"));
2360 return true;
2361 }
2362 }
2363
2364 Token processCharacters(CharactersToken token) {
2365 var ignoreEndTag = ignoreEndTagColgroup();
2366 endTagColgroup(new EndTagToken("colgroup"));
2367 return ignoreEndTag ? null : token;
2368 }
2369
2370 void startTagCol(StartTagToken token) {
2371 tree.insertElement(token);
2372 tree.openElements.removeLast();
2373 }
2374
2375 Token startTagOther(StartTagToken token) {
2376 var ignoreEndTag = ignoreEndTagColgroup();
2377 endTagColgroup(new EndTagToken("colgroup"));
2378 return ignoreEndTag ? null : token;
2379 }
2380
2381 void endTagColgroup(EndTagToken token) {
2382 if (ignoreEndTagColgroup()) {
2383 // innerHTML case
2384 assert(parser.innerHTMLMode);
2385 parser.parseError(token.span, "undefined-error");
2386 } else {
2387 tree.openElements.removeLast();
2388 parser.phase = parser._inTablePhase;
2389 }
2390 }
2391
2392 void endTagCol(EndTagToken token) {
2393 parser.parseError(token.span, "no-end-tag", {"name": "col"});
2394 }
2395
2396 Token endTagOther(EndTagToken token) {
2397 var ignoreEndTag = ignoreEndTagColgroup();
2398 endTagColgroup(new EndTagToken("colgroup"));
2399 return ignoreEndTag ? null : token;
2400 }
2401 }
2402
2403
2404 class InTableBodyPhase extends Phase {
2405 // http://www.whatwg.org/specs/web-apps/current-work///in-table0
2406 InTableBodyPhase(parser) : super(parser);
2407
2408 processStartTag(StartTagToken token) {
2409 switch (token.name) {
2410 case "html": return startTagHtml(token);
2411 case "tr": return startTagTr(token);
2412 case "td": case "th": return startTagTableCell(token);
2413 case "caption": case "col": case "colgroup": case "tbody": case "tfoot":
2414 case "thead":
2415 return startTagTableOther(token);
2416 default: return startTagOther(token);
2417 }
2418 }
2419
2420 processEndTag(EndTagToken token) {
2421 switch (token.name) {
2422 case "tbody": case "tfoot": case "thead":
2423 return endTagTableRowGroup(token);
2424 case "table": return endTagTable(token);
2425 case "body": case "caption": case "col": case "colgroup": case "html":
2426 case "td": case "th": case "tr":
2427 return endTagIgnore(token);
2428 default: return endTagOther(token);
2429 }
2430 }
2431
2432 // helper methods
2433 void clearStackToTableBodyContext() {
2434 var tableTags = const ["tbody", "tfoot", "thead", "html"];
2435 while (!tableTags.contains(tree.openElements.last.tagName)) {
2436 //XXX parser.parseError(token.span, "unexpected-implied-end-tag-in-table",
2437 // {"name": tree.openElements.last.name})
2438 tree.openElements.removeLast();
2439 }
2440 if (tree.openElements.last.tagName == "html") {
2441 assert(parser.innerHTMLMode);
2442 }
2443 }
2444
2445 // the rest
2446 bool processEOF() {
2447 parser._inTablePhase.processEOF();
2448 return false;
2449 }
2450
2451 Token processSpaceCharacters(SpaceCharactersToken token) {
2452 return parser._inTablePhase.processSpaceCharacters(token);
2453 }
2454
2455 Token processCharacters(CharactersToken token) {
2456 return parser._inTablePhase.processCharacters(token);
2457 }
2458
2459 void startTagTr(StartTagToken token) {
2460 clearStackToTableBodyContext();
2461 tree.insertElement(token);
2462 parser.phase = parser._inRowPhase;
2463 }
2464
2465 Token startTagTableCell(StartTagToken token) {
2466 parser.parseError(token.span, "unexpected-cell-in-table-body",
2467 {"name": token.name});
2468 startTagTr(new StartTagToken("tr", data: {}));
2469 return token;
2470 }
2471
2472 Token startTagTableOther(token) => endTagTable(token);
2473
2474 Token startTagOther(StartTagToken token) {
2475 return parser._inTablePhase.processStartTag(token);
2476 }
2477
2478 void endTagTableRowGroup(EndTagToken token) {
2479 if (tree.elementInScope(token.name, variant: "table")) {
2480 clearStackToTableBodyContext();
2481 tree.openElements.removeLast();
2482 parser.phase = parser._inTablePhase;
2483 } else {
2484 parser.parseError(token.span, "unexpected-end-tag-in-table-body",
2485 {"name": token.name});
2486 }
2487 }
2488
2489 Token endTagTable(TagToken token) {
2490 // XXX AT Any ideas on how to share this with endTagTable?
2491 if (tree.elementInScope("tbody", variant: "table") ||
2492 tree.elementInScope("thead", variant: "table") ||
2493 tree.elementInScope("tfoot", variant: "table")) {
2494 clearStackToTableBodyContext();
2495 endTagTableRowGroup(new EndTagToken(tree.openElements.last.tagName));
2496 return token;
2497 } else {
2498 // innerHTML case
2499 assert(parser.innerHTMLMode);
2500 parser.parseError(token.span, "undefined-error");
2501 }
2502 return null;
2503 }
2504
2505 void endTagIgnore(EndTagToken token) {
2506 parser.parseError(token.span, "unexpected-end-tag-in-table-body",
2507 {"name": token.name});
2508 }
2509
2510 Token endTagOther(EndTagToken token) {
2511 return parser._inTablePhase.processEndTag(token);
2512 }
2513 }
2514
2515
2516 class InRowPhase extends Phase {
2517 // http://www.whatwg.org/specs/web-apps/current-work///in-row
2518 InRowPhase(parser) : super(parser);
2519
2520 processStartTag(StartTagToken token) {
2521 switch (token.name) {
2522 case "html": return startTagHtml(token);
2523 case "td": case "th": return startTagTableCell(token);
2524 case "caption": case "col": case "colgroup": case "tbody": case "tfoot":
2525 case "thead": case "tr":
2526 return startTagTableOther(token);
2527 default: return startTagOther(token);
2528 }
2529 }
2530
2531 processEndTag(EndTagToken token) {
2532 switch (token.name) {
2533 case "tr": return endTagTr(token);
2534 case "table": return endTagTable(token);
2535 case "tbody": case "tfoot": case "thead":
2536 return endTagTableRowGroup(token);
2537 case "body": case "caption": case "col": case "colgroup": case "html":
2538 case "td": case "th":
2539 return endTagIgnore(token);
2540 default: return endTagOther(token);
2541 }
2542 }
2543
2544 // helper methods (XXX unify this with other table helper methods)
2545 void clearStackToTableRowContext() {
2546 while (true) {
2547 var last = tree.openElements.last;
2548 if (last.tagName == "tr" || last.tagName == "html") break;
2549
2550 parser.parseError(last.sourceSpan,
2551 "unexpected-implied-end-tag-in-table-row",
2552 {"name": tree.openElements.last.tagName});
2553 tree.openElements.removeLast();
2554 }
2555 }
2556
2557 bool ignoreEndTagTr() {
2558 return !tree.elementInScope("tr", variant: "table");
2559 }
2560
2561 // the rest
2562 bool processEOF() {
2563 parser._inTablePhase.processEOF();
2564 return false;
2565 }
2566
2567 Token processSpaceCharacters(SpaceCharactersToken token) {
2568 return parser._inTablePhase.processSpaceCharacters(token);
2569 }
2570
2571 Token processCharacters(CharactersToken token) {
2572 return parser._inTablePhase.processCharacters(token);
2573 }
2574
2575 void startTagTableCell(StartTagToken token) {
2576 clearStackToTableRowContext();
2577 tree.insertElement(token);
2578 parser.phase = parser._inCellPhase;
2579 tree.activeFormattingElements.add(Marker);
2580 }
2581
2582 Token startTagTableOther(StartTagToken token) {
2583 bool ignoreEndTag = ignoreEndTagTr();
2584 endTagTr(new EndTagToken("tr"));
2585 // XXX how are we sure it's always ignored in the innerHTML case?
2586 return ignoreEndTag ? null : token;
2587 }
2588
2589 Token startTagOther(StartTagToken token) {
2590 return parser._inTablePhase.processStartTag(token);
2591 }
2592
2593 void endTagTr(EndTagToken token) {
2594 if (!ignoreEndTagTr()) {
2595 clearStackToTableRowContext();
2596 tree.openElements.removeLast();
2597 parser.phase = parser._inTableBodyPhase;
2598 } else {
2599 // innerHTML case
2600 assert(parser.innerHTMLMode);
2601 parser.parseError(token.span, "undefined-error");
2602 }
2603 }
2604
2605 Token endTagTable(EndTagToken token) {
2606 var ignoreEndTag = ignoreEndTagTr();
2607 endTagTr(new EndTagToken("tr"));
2608 // Reprocess the current tag if the tr end tag was not ignored
2609 // XXX how are we sure it's always ignored in the innerHTML case?
2610 return ignoreEndTag ? null : token;
2611 }
2612
2613 Token endTagTableRowGroup(EndTagToken token) {
2614 if (tree.elementInScope(token.name, variant: "table")) {
2615 endTagTr(new EndTagToken("tr"));
2616 return token;
2617 } else {
2618 parser.parseError(token.span, "undefined-error");
2619 return null;
2620 }
2621 }
2622
2623 void endTagIgnore(EndTagToken token) {
2624 parser.parseError(token.span, "unexpected-end-tag-in-table-row",
2625 {"name": token.name});
2626 }
2627
2628 Token endTagOther(EndTagToken token) {
2629 return parser._inTablePhase.processEndTag(token);
2630 }
2631 }
2632
2633 class InCellPhase extends Phase {
2634 // http://www.whatwg.org/specs/web-apps/current-work///in-cell
2635 InCellPhase(parser) : super(parser);
2636
2637 processStartTag(StartTagToken token) {
2638 switch (token.name) {
2639 case "html": return startTagHtml(token);
2640 case "caption": case "col": case "colgroup": case "tbody": case "td":
2641 case "tfoot": case "th": case "thead": case "tr":
2642 return startTagTableOther(token);
2643 default: return startTagOther(token);
2644 }
2645 }
2646
2647 processEndTag(EndTagToken token) {
2648 switch (token.name) {
2649 case "td": case "th":
2650 return endTagTableCell(token);
2651 case "body": case "caption": case "col": case "colgroup": case "html":
2652 return endTagIgnore(token);
2653 case "table": case "tbody": case "tfoot": case "thead": case "tr":
2654 return endTagImply(token);
2655 default: return endTagOther(token);
2656 }
2657 }
2658
2659 // helper
2660 void closeCell() {
2661 if (tree.elementInScope("td", variant: "table")) {
2662 endTagTableCell(new EndTagToken("td"));
2663 } else if (tree.elementInScope("th", variant: "table")) {
2664 endTagTableCell(new EndTagToken("th"));
2665 }
2666 }
2667
2668 // the rest
2669 bool processEOF() {
2670 parser._inBodyPhase.processEOF();
2671 return false;
2672 }
2673
2674 Token processCharacters(CharactersToken token) {
2675 return parser._inBodyPhase.processCharacters(token);
2676 }
2677
2678 Token startTagTableOther(StartTagToken token) {
2679 if (tree.elementInScope("td", variant: "table") ||
2680 tree.elementInScope("th", variant: "table")) {
2681 closeCell();
2682 return token;
2683 } else {
2684 // innerHTML case
2685 assert(parser.innerHTMLMode);
2686 parser.parseError(token.span, "undefined-error");
2687 }
2688 }
2689
2690 Token startTagOther(StartTagToken token) {
2691 return parser._inBodyPhase.processStartTag(token);
2692 }
2693
2694 void endTagTableCell(EndTagToken token) {
2695 if (tree.elementInScope(token.name, variant: "table")) {
2696 tree.generateImpliedEndTags(token.name);
2697 if (tree.openElements.last.tagName != token.name) {
2698 parser.parseError(token.span, "unexpected-cell-end-tag",
2699 {"name": token.name});
2700 popOpenElementsUntil(token.name);
2701 } else {
2702 tree.openElements.removeLast();
2703 }
2704 tree.clearActiveFormattingElements();
2705 parser.phase = parser._inRowPhase;
2706 } else {
2707 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name});
2708 }
2709 }
2710
2711 void endTagIgnore(EndTagToken token) {
2712 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name});
2713 }
2714
2715 Token endTagImply(EndTagToken token) {
2716 if (tree.elementInScope(token.name, variant: "table")) {
2717 closeCell();
2718 return token;
2719 } else {
2720 // sometimes innerHTML case
2721 parser.parseError(token.span, "undefined-error");
2722 }
2723 }
2724
2725 Token endTagOther(EndTagToken token) {
2726 return parser._inBodyPhase.processEndTag(token);
2727 }
2728 }
2729
2730 class InSelectPhase extends Phase {
2731 InSelectPhase(parser) : super(parser);
2732
2733 processStartTag(StartTagToken token) {
2734 switch (token.name) {
2735 case "html": return startTagHtml(token);
2736 case "option": return startTagOption(token);
2737 case "optgroup": return startTagOptgroup(token);
2738 case "select": return startTagSelect(token);
2739 case "input": case "keygen": case "textarea":
2740 return startTagInput(token);
2741 case "script": return startTagScript(token);
2742 default: return startTagOther(token);
2743 }
2744 }
2745
2746 processEndTag(EndTagToken token) {
2747 switch (token.name) {
2748 case "option": return endTagOption(token);
2749 case "optgroup": return endTagOptgroup(token);
2750 case "select": return endTagSelect(token);
2751 default: return endTagOther(token);
2752 }
2753 }
2754
2755 // http://www.whatwg.org/specs/web-apps/current-work///in-select
2756 bool processEOF() {
2757 var last = tree.openElements.last;
2758 if (last.tagName != "html") {
2759 parser.parseError(last.sourceSpan, "eof-in-select");
2760 } else {
2761 assert(parser.innerHTMLMode);
2762 }
2763 return false;
2764 }
2765
2766 Token processCharacters(CharactersToken token) {
2767 if (token.data == "\u0000") {
2768 return null;
2769 }
2770 tree.insertText(token.data, token.span);
2771 }
2772
2773 void startTagOption(StartTagToken token) {
2774 // We need to imply </option> if <option> is the current node.
2775 if (tree.openElements.last.tagName == "option") {
2776 tree.openElements.removeLast();
2777 }
2778 tree.insertElement(token);
2779 }
2780
2781 void startTagOptgroup(StartTagToken token) {
2782 if (tree.openElements.last.tagName == "option") {
2783 tree.openElements.removeLast();
2784 }
2785 if (tree.openElements.last.tagName == "optgroup") {
2786 tree.openElements.removeLast();
2787 }
2788 tree.insertElement(token);
2789 }
2790
2791 void startTagSelect(StartTagToken token) {
2792 parser.parseError(token.span, "unexpected-select-in-select");
2793 endTagSelect(new EndTagToken("select"));
2794 }
2795
2796 Token startTagInput(StartTagToken token) {
2797 parser.parseError(token.span, "unexpected-input-in-select");
2798 if (tree.elementInScope("select", variant: "select")) {
2799 endTagSelect(new EndTagToken("select"));
2800 return token;
2801 } else {
2802 assert(parser.innerHTMLMode);
2803 }
2804 }
2805
2806 Token startTagScript(StartTagToken token) {
2807 return parser._inHeadPhase.processStartTag(token);
2808 }
2809
2810 Token startTagOther(StartTagToken token) {
2811 parser.parseError(token.span, "unexpected-start-tag-in-select",
2812 {"name": token.name});
2813 }
2814
2815 void endTagOption(EndTagToken token) {
2816 if (tree.openElements.last.tagName == "option") {
2817 tree.openElements.removeLast();
2818 } else {
2819 parser.parseError(token.span, "unexpected-end-tag-in-select",
2820 {"name": "option"});
2821 }
2822 }
2823
2824 void endTagOptgroup(EndTagToken token) {
2825 // </optgroup> implicitly closes <option>
2826 if (tree.openElements.last.tagName == "option" &&
2827 tree.openElements[tree.openElements.length - 2].tagName == "optgroup") {
2828 tree.openElements.removeLast();
2829 }
2830 // It also closes </optgroup>
2831 if (tree.openElements.last.tagName == "optgroup") {
2832 tree.openElements.removeLast();
2833 // But nothing else
2834 } else {
2835 parser.parseError(token.span, "unexpected-end-tag-in-select",
2836 {"name": "optgroup"});
2837 }
2838 }
2839
2840 void endTagSelect(EndTagToken token) {
2841 if (tree.elementInScope("select", variant: "select")) {
2842 popOpenElementsUntil("select");
2843 parser.resetInsertionMode();
2844 } else {
2845 // innerHTML case
2846 assert(parser.innerHTMLMode);
2847 parser.parseError(token.span, "undefined-error");
2848 }
2849 }
2850
2851 void endTagOther(EndTagToken token) {
2852 parser.parseError(token.span, "unexpected-end-tag-in-select",
2853 {"name": token.name});
2854 }
2855 }
2856
2857
2858 class InSelectInTablePhase extends Phase {
2859 InSelectInTablePhase(parser) : super(parser);
2860
2861 processStartTag(StartTagToken token) {
2862 switch (token.name) {
2863 case "caption": case "table": case "tbody": case "tfoot": case "thead":
2864 case "tr": case "td": case "th":
2865 return startTagTable(token);
2866 default: return startTagOther(token);
2867 }
2868 }
2869
2870 processEndTag(EndTagToken token) {
2871 switch (token.name) {
2872 case "caption": case "table": case "tbody": case "tfoot": case "thead":
2873 case "tr": case "td": case "th":
2874 return endTagTable(token);
2875 default: return endTagOther(token);
2876 }
2877 }
2878
2879 bool processEOF() {
2880 parser._inSelectPhase.processEOF();
2881 return false;
2882 }
2883
2884 Token processCharacters(CharactersToken token) {
2885 return parser._inSelectPhase.processCharacters(token);
2886 }
2887
2888 Token startTagTable(StartTagToken token) {
2889 parser.parseError(token.span,
2890 "unexpected-table-element-start-tag-in-select-in-table",
2891 {"name": token.name});
2892 endTagOther(new EndTagToken("select"));
2893 return token;
2894 }
2895
2896 Token startTagOther(StartTagToken token) {
2897 return parser._inSelectPhase.processStartTag(token);
2898 }
2899
2900 Token endTagTable(EndTagToken token) {
2901 parser.parseError(token.span,
2902 "unexpected-table-element-end-tag-in-select-in-table",
2903 {"name": token.name});
2904 if (tree.elementInScope(token.name, variant: "table")) {
2905 endTagOther(new EndTagToken("select"));
2906 return token;
2907 }
2908 }
2909
2910 Token endTagOther(EndTagToken token) {
2911 return parser._inSelectPhase.processEndTag(token);
2912 }
2913 }
2914
2915
2916 class InForeignContentPhase extends Phase {
2917 // TODO(jmesserly): this is sorted so we could binary search.
2918 static const breakoutElements = const [
2919 'b', 'big', 'blockquote', 'body', 'br','center', 'code', 'dd', 'div', 'dl',
2920 'dt', 'em', 'embed', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'i',
2921 'img', 'li', 'listing', 'menu', 'meta', 'nobr', 'ol', 'p', 'pre', 'ruby',
2922 's', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tt', 'u',
2923 'ul', 'var'
2924 ];
2925
2926 InForeignContentPhase(parser) : super(parser);
2927
2928 void adjustSVGTagNames(token) {
2929 final replacements = const {
2930 "altglyph":"altGlyph",
2931 "altglyphdef":"altGlyphDef",
2932 "altglyphitem":"altGlyphItem",
2933 "animatecolor":"animateColor",
2934 "animatemotion":"animateMotion",
2935 "animatetransform":"animateTransform",
2936 "clippath":"clipPath",
2937 "feblend":"feBlend",
2938 "fecolormatrix":"feColorMatrix",
2939 "fecomponenttransfer":"feComponentTransfer",
2940 "fecomposite":"feComposite",
2941 "feconvolvematrix":"feConvolveMatrix",
2942 "fediffuselighting":"feDiffuseLighting",
2943 "fedisplacementmap":"feDisplacementMap",
2944 "fedistantlight":"feDistantLight",
2945 "feflood":"feFlood",
2946 "fefunca":"feFuncA",
2947 "fefuncb":"feFuncB",
2948 "fefuncg":"feFuncG",
2949 "fefuncr":"feFuncR",
2950 "fegaussianblur":"feGaussianBlur",
2951 "feimage":"feImage",
2952 "femerge":"feMerge",
2953 "femergenode":"feMergeNode",
2954 "femorphology":"feMorphology",
2955 "feoffset":"feOffset",
2956 "fepointlight":"fePointLight",
2957 "fespecularlighting":"feSpecularLighting",
2958 "fespotlight":"feSpotLight",
2959 "fetile":"feTile",
2960 "feturbulence":"feTurbulence",
2961 "foreignobject":"foreignObject",
2962 "glyphref":"glyphRef",
2963 "lineargradient":"linearGradient",
2964 "radialgradient":"radialGradient",
2965 "textpath":"textPath"
2966 };
2967
2968 var replace = replacements[token.name];
2969 if (replace != null) {
2970 token.name = replace;
2971 }
2972 }
2973
2974 Token processCharacters(CharactersToken token) {
2975 if (token.data == "\u0000") {
2976 token.data = "\uFFFD";
2977 } else if (parser.framesetOK && !allWhitespace(token.data)) {
2978 parser.framesetOK = false;
2979 }
2980 super.processCharacters(token);
2981 }
2982
2983 Token processStartTag(StartTagToken token) {
2984 var currentNode = tree.openElements.last;
2985 if (breakoutElements.contains(token.name) ||
2986 (token.name == "font" &&
2987 (token.data.containsKey("color") ||
2988 token.data.containsKey("face") ||
2989 token.data.containsKey("size")))) {
2990
2991 parser.parseError(token.span,
2992 "unexpected-html-element-in-foreign-content", {'name': token.name});
2993 while (tree.openElements.last.namespace !=
2994 tree.defaultNamespace &&
2995 !parser.isHTMLIntegrationPoint(tree.openElements.last) &&
2996 !parser.isMathMLTextIntegrationPoint(tree.openElements.last)) {
2997 tree.openElements.removeLast();
2998 }
2999 return token;
3000
3001 } else {
3002 if (currentNode.namespace == Namespaces.mathml) {
3003 parser.adjustMathMLAttributes(token);
3004 } else if (currentNode.namespace == Namespaces.svg) {
3005 adjustSVGTagNames(token);
3006 parser.adjustSVGAttributes(token);
3007 }
3008 parser.adjustForeignAttributes(token);
3009 token.namespace = currentNode.namespace;
3010 tree.insertElement(token);
3011 if (token.selfClosing) {
3012 tree.openElements.removeLast();
3013 token.selfClosingAcknowledged = true;
3014 }
3015 }
3016 }
3017
3018 Token processEndTag(EndTagToken token) {
3019 var nodeIndex = tree.openElements.length - 1;
3020 var node = tree.openElements.last;
3021 if (node.tagName != token.name) {
3022 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name});
3023 }
3024
3025 var newToken = null;
3026 while (true) {
3027 if (asciiUpper2Lower(node.tagName) == token.name) {
3028 //XXX this isn't in the spec but it seems necessary
3029 if (parser.phase == parser._inTableTextPhase) {
3030 InTableTextPhase inTableText = parser.phase;
3031 inTableText.flushCharacters();
3032 parser.phase = inTableText.originalPhase;
3033 }
3034 while (tree.openElements.removeLast() != node) {
3035 assert(tree.openElements.length > 0);
3036 }
3037 newToken = null;
3038 break;
3039 }
3040 nodeIndex -= 1;
3041
3042 node = tree.openElements[nodeIndex];
3043 if (node.namespace != tree.defaultNamespace) {
3044 continue;
3045 } else {
3046 newToken = parser.phase.processEndTag(token);
3047 break;
3048 }
3049 }
3050 return newToken;
3051 }
3052 }
3053
3054
3055 class AfterBodyPhase extends Phase {
3056 AfterBodyPhase(parser) : super(parser);
3057
3058 processStartTag(StartTagToken token) {
3059 if (token.name == "html") return startTagHtml(token);
3060 return startTagOther(token);
3061 }
3062
3063 processEndTag(EndTagToken token) {
3064 if (token.name == "html") return endTagHtml(token);
3065 return endTagOther(token);
3066 }
3067
3068 //Stop parsing
3069 bool processEOF() => false;
3070
3071 Token processComment(CommentToken token) {
3072 // This is needed because data is to be appended to the <html> element
3073 // here and not to whatever is currently open.
3074 tree.insertComment(token, tree.openElements[0]);
3075 }
3076
3077 Token processCharacters(CharactersToken token) {
3078 parser.parseError(token.span, "unexpected-char-after-body");
3079 parser.phase = parser._inBodyPhase;
3080 return token;
3081 }
3082
3083 Token startTagHtml(StartTagToken token) {
3084 return parser._inBodyPhase.processStartTag(token);
3085 }
3086
3087 Token startTagOther(StartTagToken token) {
3088 parser.parseError(token.span, "unexpected-start-tag-after-body",
3089 {"name": token.name});
3090 parser.phase = parser._inBodyPhase;
3091 return token;
3092 }
3093
3094 void endTagHtml(Token token) {
3095 if (parser.innerHTMLMode) {
3096 parser.parseError(token.span, "unexpected-end-tag-after-body-innerhtml");
3097 } else {
3098 parser.phase = parser._afterAfterBodyPhase;
3099 }
3100 }
3101
3102 Token endTagOther(EndTagToken token) {
3103 parser.parseError(token.span, "unexpected-end-tag-after-body",
3104 {"name": token.name});
3105 parser.phase = parser._inBodyPhase;
3106 return token;
3107 }
3108 }
3109
3110 class InFramesetPhase extends Phase {
3111 // http://www.whatwg.org/specs/web-apps/current-work///in-frameset
3112 InFramesetPhase(parser) : super(parser);
3113
3114 processStartTag(StartTagToken token) {
3115 switch (token.name) {
3116 case "html": return startTagHtml(token);
3117 case "frameset": return startTagFrameset(token);
3118 case "frame": return startTagFrame(token);
3119 case "noframes": return startTagNoframes(token);
3120 default: return startTagOther(token);
3121 }
3122 }
3123
3124 processEndTag(EndTagToken token) {
3125 switch (token.name) {
3126 case "frameset": return endTagFrameset(token);
3127 default: return endTagOther(token);
3128 }
3129 }
3130
3131 bool processEOF() {
3132 var last = tree.openElements.last;
3133 if (last.tagName != "html") {
3134 parser.parseError(last.sourceSpan, "eof-in-frameset");
3135 } else {
3136 assert(parser.innerHTMLMode);
3137 }
3138 return false;
3139 }
3140
3141 Token processCharacters(CharactersToken token) {
3142 parser.parseError(token.span, "unexpected-char-in-frameset");
3143 }
3144
3145 void startTagFrameset(StartTagToken token) {
3146 tree.insertElement(token);
3147 }
3148
3149 void startTagFrame(StartTagToken token) {
3150 tree.insertElement(token);
3151 tree.openElements.removeLast();
3152 }
3153
3154 Token startTagNoframes(StartTagToken token) {
3155 return parser._inBodyPhase.processStartTag(token);
3156 }
3157
3158 Token startTagOther(StartTagToken token) {
3159 parser.parseError(token.span, "unexpected-start-tag-in-frameset",
3160 {"name": token.name});
3161 }
3162
3163 void endTagFrameset(EndTagToken token) {
3164 if (tree.openElements.last.tagName == "html") {
3165 // innerHTML case
3166 parser.parseError(token.span,
3167 "unexpected-frameset-in-frameset-innerhtml");
3168 } else {
3169 tree.openElements.removeLast();
3170 }
3171 if (!parser.innerHTMLMode && tree.openElements.last.tagName != "frameset") {
3172 // If we're not in innerHTML mode and the the current node is not a
3173 // "frameset" element (anymore) then switch.
3174 parser.phase = parser._afterFramesetPhase;
3175 }
3176 }
3177
3178 void endTagOther(EndTagToken token) {
3179 parser.parseError(token.span, "unexpected-end-tag-in-frameset",
3180 {"name": token.name});
3181 }
3182 }
3183
3184
3185 class AfterFramesetPhase extends Phase {
3186 // http://www.whatwg.org/specs/web-apps/current-work///after3
3187 AfterFramesetPhase(parser) : super(parser);
3188
3189 processStartTag(StartTagToken token) {
3190 switch (token.name) {
3191 case "html": return startTagHtml(token);
3192 case "noframes": return startTagNoframes(token);
3193 default: return startTagOther(token);
3194 }
3195 }
3196
3197 processEndTag(EndTagToken token) {
3198 switch (token.name) {
3199 case "html": return endTagHtml(token);
3200 default: return endTagOther(token);
3201 }
3202 }
3203
3204 // Stop parsing
3205 bool processEOF() => false;
3206
3207 Token processCharacters(CharactersToken token) {
3208 parser.parseError(token.span, "unexpected-char-after-frameset");
3209 }
3210
3211 Token startTagNoframes(StartTagToken token) {
3212 return parser._inHeadPhase.processStartTag(token);
3213 }
3214
3215 void startTagOther(StartTagToken token) {
3216 parser.parseError(token.span, "unexpected-start-tag-after-frameset",
3217 {"name": token.name});
3218 }
3219
3220 void endTagHtml(EndTagToken token) {
3221 parser.phase = parser._afterAfterFramesetPhase;
3222 }
3223
3224 void endTagOther(EndTagToken token) {
3225 parser.parseError(token.span, "unexpected-end-tag-after-frameset",
3226 {"name": token.name});
3227 }
3228 }
3229
3230
3231 class AfterAfterBodyPhase extends Phase {
3232 AfterAfterBodyPhase(parser) : super(parser);
3233
3234 processStartTag(StartTagToken token) {
3235 if (token.name == 'html') return startTagHtml(token);
3236 return startTagOther(token);
3237 }
3238
3239 bool processEOF() => false;
3240
3241 Token processComment(CommentToken token) {
3242 tree.insertComment(token, tree.document);
3243 }
3244
3245 Token processSpaceCharacters(SpaceCharactersToken token) {
3246 return parser._inBodyPhase.processSpaceCharacters(token);
3247 }
3248
3249 Token processCharacters(CharactersToken token) {
3250 parser.parseError(token.span, "expected-eof-but-got-char");
3251 parser.phase = parser._inBodyPhase;
3252 return token;
3253 }
3254
3255 Token startTagHtml(StartTagToken token) {
3256 return parser._inBodyPhase.processStartTag(token);
3257 }
3258
3259 Token startTagOther(StartTagToken token) {
3260 parser.parseError(token.span, "expected-eof-but-got-start-tag",
3261 {"name": token.name});
3262 parser.phase = parser._inBodyPhase;
3263 return token;
3264 }
3265
3266 Token processEndTag(EndTagToken token) {
3267 parser.parseError(token.span, "expected-eof-but-got-end-tag",
3268 {"name": token.name});
3269 parser.phase = parser._inBodyPhase;
3270 return token;
3271 }
3272 }
3273
3274 class AfterAfterFramesetPhase extends Phase {
3275 AfterAfterFramesetPhase(parser) : super(parser);
3276
3277 processStartTag(StartTagToken token) {
3278 switch (token.name) {
3279 case "html": return startTagHtml(token);
3280 case "noframes": return startTagNoFrames(token);
3281 default: return startTagOther(token);
3282 }
3283 }
3284
3285 bool processEOF() => false;
3286
3287 Token processComment(CommentToken token) {
3288 tree.insertComment(token, tree.document);
3289 }
3290
3291 Token processSpaceCharacters(SpaceCharactersToken token) {
3292 return parser._inBodyPhase.processSpaceCharacters(token);
3293 }
3294
3295 Token processCharacters(CharactersToken token) {
3296 parser.parseError(token.span, "expected-eof-but-got-char");
3297 }
3298
3299 Token startTagHtml(StartTagToken token) {
3300 return parser._inBodyPhase.processStartTag(token);
3301 }
3302
3303 Token startTagNoFrames(StartTagToken token) {
3304 return parser._inHeadPhase.processStartTag(token);
3305 }
3306
3307 void startTagOther(StartTagToken token) {
3308 parser.parseError(token.span, "expected-eof-but-got-start-tag",
3309 {"name": token.name});
3310 }
3311
3312 Token processEndTag(EndTagToken token) {
3313 parser.parseError(token.span, "expected-eof-but-got-end-tag",
3314 {"name": token.name});
3315 }
3316 }
3317
3318
3319 /** Error in parsed document. */
3320 class ParseError implements Exception {
3321 final String errorCode;
3322 final Span span;
3323 final Map data;
3324
3325 ParseError(this.errorCode, this.span, this.data);
3326
3327 int get line => span.start.line;
3328
3329 int get column => span.start.column;
3330
3331 /**
3332 * Gets the human readable error message for this error. Use
3333 * [span.getLocationMessage] or [toString] to get a message including span
3334 * information. If there is a file associated with the span, both
3335 * [span.getLocationMessage] and [toString] are equivalent. Otherwise,
3336 * [span.getLocationMessage] will not show any source url information, but
3337 * [toString] will include 'ParserError:' as a prefix.
3338 */
3339 String get message => formatStr(errorMessages[errorCode], data);
3340
3341 String toString() {
3342 var res = span.getLocationMessage(message);
3343 return span.sourceUrl == null ? 'ParserError$res' : res;
3344 }
3345 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698