Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(24)

Side by Side Diff: observatory_pub_packages/html5lib/parser.dart

Issue 816693004: Add observatory_pub_packages snapshot to third_party (Closed) Base URL: http://dart.googlecode.com/svn/third_party/
Patch Set: Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /// This library has a parser for HTML5 documents, that lets you parse HTML
2 /// easily from a script or server side application:
3 ///
4 /// import 'package:html5lib/parser.dart' show parse;
5 /// import 'package:html5lib/dom.dart';
6 /// main() {
7 /// var document = parse(
8 /// '<body>Hello world! <a href="www.html5rocks.com">HTML5 rocks!');
9 /// print(document.outerHtml);
10 /// }
11 ///
12 /// The resulting document you get back has a DOM-like API for easy tree
13 /// traversal and manipulation.
14 library parser;
15
16 import 'dart:collection';
17 import 'dart:math';
18 import 'package:source_span/source_span.dart';
19
20 import 'src/treebuilder.dart';
21 import 'src/constants.dart';
22 import 'src/encoding_parser.dart';
23 import 'src/token.dart';
24 import 'src/tokenizer.dart';
25 import 'src/utils.dart';
26 import 'dom.dart';
27
28 /// Parse the [input] html5 document into a tree. The [input] can be
29 /// a [String], [List<int>] of bytes or an [HtmlTokenizer].
30 ///
31 /// If [input] is not a [HtmlTokenizer], you can optionally specify the file's
32 /// [encoding], which must be a string. If specified that encoding will be
33 /// used regardless of any BOM or later declaration (such as in a meta element).
34 ///
35 /// Set [generateSpans] if you want to generate [SourceSpan]s, otherwise the
36 /// [Node.sourceSpan] property will be `null`. When using [generateSpans] you
37 /// can additionally pass [sourceUrl] to indicate where the [input] was
38 /// extracted from.
39 Document parse(input, {String encoding, bool generateSpans: false,
40 String sourceUrl}) {
41 var p = new HtmlParser(input, encoding: encoding,
42 generateSpans: generateSpans, sourceUrl: sourceUrl);
43 return p.parse();
44 }
45
46
47 /// Parse the [input] html5 document fragment into a tree. The [input] can be
48 /// a [String], [List<int>] of bytes or an [HtmlTokenizer]. The [container]
49 /// element can optionally be specified, otherwise it defaults to "div".
50 ///
51 /// If [input] is not a [HtmlTokenizer], you can optionally specify the file's
52 /// [encoding], which must be a string. If specified, that encoding will be used ,
53 /// regardless of any BOM or later declaration (such as in a meta element).
54 ///
55 /// Set [generateSpans] if you want to generate [SourceSpan]s, otherwise the
56 /// [Node.sourceSpan] property will be `null`. When using [generateSpans] you ca n
57 /// additionally pass [sourceUrl] to indicate where the [input] was extracted
58 /// from.
59 DocumentFragment parseFragment(input, {String container: "div",
60 String encoding, bool generateSpans: false, String sourceUrl}) {
61 var p = new HtmlParser(input, encoding: encoding,
62 generateSpans: generateSpans, sourceUrl: sourceUrl);
63 return p.parseFragment(container);
64 }
65
66
67 /// Parser for HTML, which generates a tree structure from a stream of
68 /// (possibly malformed) characters.
69 class HtmlParser {
70 /// Raise an exception on the first error encountered.
71 final bool strict;
72
73 /// True to generate [SourceSpan]s for the [Node.sourceSpan] property.
74 final bool generateSpans;
75
76 final HtmlTokenizer tokenizer;
77
78 final TreeBuilder tree;
79
80 final List<ParseError> errors = <ParseError>[];
81
82 String container;
83
84 bool firstStartTag = false;
85
86 // TODO(jmesserly): use enum?
87 /// "quirks" / "limited quirks" / "no quirks"
88 String compatMode = "no quirks";
89
90 /// innerHTML container when parsing document fragment.
91 String innerHTML;
92
93 Phase phase;
94
95 Phase lastPhase;
96
97 Phase originalPhase;
98
99 Phase beforeRCDataPhase;
100
101 bool framesetOK;
102
103 // These fields hold the different phase singletons. At any given time one
104 // of them will be active.
105 InitialPhase _initialPhase;
106 BeforeHtmlPhase _beforeHtmlPhase;
107 BeforeHeadPhase _beforeHeadPhase;
108 InHeadPhase _inHeadPhase;
109 AfterHeadPhase _afterHeadPhase;
110 InBodyPhase _inBodyPhase;
111 TextPhase _textPhase;
112 InTablePhase _inTablePhase;
113 InTableTextPhase _inTableTextPhase;
114 InCaptionPhase _inCaptionPhase;
115 InColumnGroupPhase _inColumnGroupPhase;
116 InTableBodyPhase _inTableBodyPhase;
117 InRowPhase _inRowPhase;
118 InCellPhase _inCellPhase;
119 InSelectPhase _inSelectPhase;
120 InSelectInTablePhase _inSelectInTablePhase;
121 InForeignContentPhase _inForeignContentPhase;
122 AfterBodyPhase _afterBodyPhase;
123 InFramesetPhase _inFramesetPhase;
124 AfterFramesetPhase _afterFramesetPhase;
125 AfterAfterBodyPhase _afterAfterBodyPhase;
126 AfterAfterFramesetPhase _afterAfterFramesetPhase;
127
128 /// Create an HtmlParser and configure the [tree] builder and [strict] mode.
129 /// The [input] can be a [String], [List<int>] of bytes or an [HtmlTokenizer].
130 ///
131 /// If [input] is not a [HtmlTokenizer], you can specify a few more arguments.
132 ///
133 /// The [encoding] must be a string that indicates the encoding. If specified,
134 /// that encoding will be used, regardless of any BOM or later declaration
135 /// (such as in a meta element).
136 ///
137 /// Set [parseMeta] to false if you want to disable parsing the meta element.
138 ///
139 /// Set [lowercaseElementName] or [lowercaseAttrName] to false to disable the
140 /// automatic conversion of element and attribute names to lower case. Note
141 /// that standard way to parse HTML is to lowercase, which is what the browser
142 /// DOM will do if you request [Node.outerHTML], for example.
143 HtmlParser(input, {String encoding, bool parseMeta: true,
144 bool lowercaseElementName: true, bool lowercaseAttrName: true,
145 this.strict: false, bool generateSpans: false, String sourceUrl,
146 TreeBuilder tree})
147 : generateSpans = generateSpans,
148 tree = tree != null ? tree : new TreeBuilder(true),
149 tokenizer = (input is HtmlTokenizer ? input :
150 new HtmlTokenizer(input, encoding: encoding, parseMeta: parseMeta,
151 lowercaseElementName: lowercaseElementName,
152 lowercaseAttrName: lowercaseAttrName,
153 generateSpans: generateSpans, sourceUrl: sourceUrl)) {
154
155 tokenizer.parser = this;
156 _initialPhase = new InitialPhase(this);
157 _beforeHtmlPhase = new BeforeHtmlPhase(this);
158 _beforeHeadPhase = new BeforeHeadPhase(this);
159 _inHeadPhase = new InHeadPhase(this);
160 // TODO(jmesserly): html5lib did not implement the no script parsing mode
161 // More information here:
162 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html# scripting-flag
163 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construc tion.html#parsing-main-inheadnoscript
164 // "inHeadNoscript": new InHeadNoScriptPhase(this);
165 _afterHeadPhase = new AfterHeadPhase(this);
166 _inBodyPhase = new InBodyPhase(this);
167 _textPhase = new TextPhase(this);
168 _inTablePhase = new InTablePhase(this);
169 _inTableTextPhase = new InTableTextPhase(this);
170 _inCaptionPhase = new InCaptionPhase(this);
171 _inColumnGroupPhase = new InColumnGroupPhase(this);
172 _inTableBodyPhase = new InTableBodyPhase(this);
173 _inRowPhase = new InRowPhase(this);
174 _inCellPhase = new InCellPhase(this);
175 _inSelectPhase = new InSelectPhase(this);
176 _inSelectInTablePhase = new InSelectInTablePhase(this);
177 _inForeignContentPhase = new InForeignContentPhase(this);
178 _afterBodyPhase = new AfterBodyPhase(this);
179 _inFramesetPhase = new InFramesetPhase(this);
180 _afterFramesetPhase = new AfterFramesetPhase(this);
181 _afterAfterBodyPhase = new AfterAfterBodyPhase(this);
182 _afterAfterFramesetPhase = new AfterAfterFramesetPhase(this);
183 }
184
185 bool get innerHTMLMode => innerHTML != null;
186
187 /// Parse an html5 document into a tree.
188 /// After parsing, [errors] will be populated with parse errors, if any.
189 Document parse() {
190 innerHTML = null;
191 _parse();
192 return tree.getDocument();
193 }
194
195 /// Parse an html5 document fragment into a tree.
196 /// Pass a [container] to change the type of the containing element.
197 /// After parsing, [errors] will be populated with parse errors, if any.
198 DocumentFragment parseFragment([String container = "div"]) {
199 if (container == null) throw new ArgumentError('container');
200 innerHTML = container.toLowerCase();
201 _parse();
202 return tree.getFragment();
203 }
204
205 void _parse() {
206 reset();
207
208 while (true) {
209 try {
210 mainLoop();
211 break;
212 } on ReparseException catch (e) {
213 // Note: this happens if we start parsing but the character encoding
214 // changes. So we should only need to restart very early in the parse.
215 reset();
216 }
217 }
218 }
219
220 void reset() {
221 tokenizer.reset();
222
223 tree.reset();
224 firstStartTag = false;
225 errors.clear();
226 // "quirks" / "limited quirks" / "no quirks"
227 compatMode = "no quirks";
228
229 if (innerHTMLMode) {
230 if (cdataElements.contains(innerHTML)) {
231 tokenizer.state = tokenizer.rcdataState;
232 } else if (rcdataElements.contains(innerHTML)) {
233 tokenizer.state = tokenizer.rawtextState;
234 } else if (innerHTML == 'plaintext') {
235 tokenizer.state = tokenizer.plaintextState;
236 } else {
237 // state already is data state
238 // tokenizer.state = tokenizer.dataState;
239 }
240 phase = _beforeHtmlPhase;
241 _beforeHtmlPhase.insertHtmlElement();
242 resetInsertionMode();
243 } else {
244 phase = _initialPhase;
245 }
246
247 lastPhase = null;
248 beforeRCDataPhase = null;
249 framesetOK = true;
250 }
251
252 bool isHTMLIntegrationPoint(Element element) {
253 if (element.localName == "annotation-xml" &&
254 element.namespaceUri == Namespaces.mathml) {
255 var enc = element.attributes["encoding"];
256 if (enc != null) enc = asciiUpper2Lower(enc);
257 return enc == "text/html" || enc == "application/xhtml+xml";
258 } else {
259 return htmlIntegrationPointElements.contains(
260 new Pair(element.namespaceUri, element.localName));
261 }
262 }
263
264 bool isMathMLTextIntegrationPoint(Element element) {
265 return mathmlTextIntegrationPointElements.contains(
266 new Pair(element.namespaceUri, element.localName));
267 }
268
269 bool inForeignContent(Token token, int type) {
270 if (tree.openElements.length == 0) return false;
271
272 var node = tree.openElements.last;
273 if (node.namespaceUri == tree.defaultNamespace) return false;
274
275 if (isMathMLTextIntegrationPoint(node)) {
276 if (type == TokenKind.startTag &&
277 (token as StartTagToken).name != "mglyph" &&
278 (token as StartTagToken).name != "malignmark") {
279 return false;
280 }
281 if (type == TokenKind.characters || type == TokenKind.spaceCharacters) {
282 return false;
283 }
284 }
285
286 if (node.localName == "annotation-xml" && type == TokenKind.startTag &&
287 (token as StartTagToken).name == "svg") {
288 return false;
289 }
290
291 if (isHTMLIntegrationPoint(node)) {
292 if (type == TokenKind.startTag ||
293 type == TokenKind.characters ||
294 type == TokenKind.spaceCharacters) {
295 return false;
296 }
297 }
298
299 return true;
300 }
301
302 void mainLoop() {
303 while (tokenizer.moveNext()) {
304 var token = tokenizer.current;
305 var newToken = token;
306 int type;
307 while (newToken != null) {
308 type = newToken.kind;
309
310 // Note: avoid "is" test here, see http://dartbug.com/4795
311 if (type == TokenKind.parseError) {
312 ParseErrorToken error = newToken;
313 parseError(error.span, error.data, error.messageParams);
314 newToken = null;
315 } else {
316 Phase phase_ = phase;
317 if (inForeignContent(token, type)) {
318 phase_ = _inForeignContentPhase;
319 }
320
321 switch (type) {
322 case TokenKind.characters:
323 newToken = phase_.processCharacters(newToken);
324 break;
325 case TokenKind.spaceCharacters:
326 newToken = phase_.processSpaceCharacters(newToken);
327 break;
328 case TokenKind.startTag:
329 newToken = phase_.processStartTag(newToken);
330 break;
331 case TokenKind.endTag:
332 newToken = phase_.processEndTag(newToken);
333 break;
334 case TokenKind.comment:
335 newToken = phase_.processComment(newToken);
336 break;
337 case TokenKind.doctype:
338 newToken = phase_.processDoctype(newToken);
339 break;
340 }
341 }
342 }
343
344 if (token is StartTagToken) {
345 if (token.selfClosing && !token.selfClosingAcknowledged) {
346 parseError(token.span, "non-void-element-with-trailing-solidus",
347 {"name": token.name});
348 }
349 }
350 }
351
352 // When the loop finishes it's EOF
353 var reprocess = true;
354 var reprocessPhases = [];
355 while (reprocess) {
356 reprocessPhases.add(phase);
357 reprocess = phase.processEOF();
358 if (reprocess) {
359 assert(!reprocessPhases.contains(phase));
360 }
361 }
362 }
363
364 /// The last span available. Used for EOF errors if we don't have something
365 /// better.
366 SourceSpan get _lastSpan {
367 if (tokenizer.stream.fileInfo == null) return null;
368 var pos = tokenizer.stream.position;
369 return tokenizer.stream.fileInfo.location(pos).pointSpan();
370 }
371
372 void parseError(SourceSpan span, String errorcode,
373 [Map datavars = const {}]) {
374
375 if (!generateSpans && span == null) {
376 span = _lastSpan;
377 }
378
379 var err = new ParseError(errorcode, span, datavars);
380 errors.add(err);
381 if (strict) throw err;
382 }
383
384 void adjustMathMLAttributes(StartTagToken token) {
385 var orig = token.data.remove("definitionurl");
386 if (orig != null) {
387 token.data["definitionURL"] = orig;
388 }
389 }
390
391 void adjustSVGAttributes(StartTagToken token) {
392 final replacements = const {
393 "attributename":"attributeName",
394 "attributetype":"attributeType",
395 "basefrequency":"baseFrequency",
396 "baseprofile":"baseProfile",
397 "calcmode":"calcMode",
398 "clippathunits":"clipPathUnits",
399 "contentscripttype":"contentScriptType",
400 "contentstyletype":"contentStyleType",
401 "diffuseconstant":"diffuseConstant",
402 "edgemode":"edgeMode",
403 "externalresourcesrequired":"externalResourcesRequired",
404 "filterres":"filterRes",
405 "filterunits":"filterUnits",
406 "glyphref":"glyphRef",
407 "gradienttransform":"gradientTransform",
408 "gradientunits":"gradientUnits",
409 "kernelmatrix":"kernelMatrix",
410 "kernelunitlength":"kernelUnitLength",
411 "keypoints":"keyPoints",
412 "keysplines":"keySplines",
413 "keytimes":"keyTimes",
414 "lengthadjust":"lengthAdjust",
415 "limitingconeangle":"limitingConeAngle",
416 "markerheight":"markerHeight",
417 "markerunits":"markerUnits",
418 "markerwidth":"markerWidth",
419 "maskcontentunits":"maskContentUnits",
420 "maskunits":"maskUnits",
421 "numoctaves":"numOctaves",
422 "pathlength":"pathLength",
423 "patterncontentunits":"patternContentUnits",
424 "patterntransform":"patternTransform",
425 "patternunits":"patternUnits",
426 "pointsatx":"pointsAtX",
427 "pointsaty":"pointsAtY",
428 "pointsatz":"pointsAtZ",
429 "preservealpha":"preserveAlpha",
430 "preserveaspectratio":"preserveAspectRatio",
431 "primitiveunits":"primitiveUnits",
432 "refx":"refX",
433 "refy":"refY",
434 "repeatcount":"repeatCount",
435 "repeatdur":"repeatDur",
436 "requiredextensions":"requiredExtensions",
437 "requiredfeatures":"requiredFeatures",
438 "specularconstant":"specularConstant",
439 "specularexponent":"specularExponent",
440 "spreadmethod":"spreadMethod",
441 "startoffset":"startOffset",
442 "stddeviation":"stdDeviation",
443 "stitchtiles":"stitchTiles",
444 "surfacescale":"surfaceScale",
445 "systemlanguage":"systemLanguage",
446 "tablevalues":"tableValues",
447 "targetx":"targetX",
448 "targety":"targetY",
449 "textlength":"textLength",
450 "viewbox":"viewBox",
451 "viewtarget":"viewTarget",
452 "xchannelselector":"xChannelSelector",
453 "ychannelselector":"yChannelSelector",
454 "zoomandpan":"zoomAndPan"
455 };
456 for (var originalName in token.data.keys.toList()) {
457 var svgName = replacements[originalName];
458 if (svgName != null) {
459 token.data[svgName] = token.data.remove(originalName);
460 }
461 }
462 }
463
464 void adjustForeignAttributes(StartTagToken token) {
465 // TODO(jmesserly): I don't like mixing non-string objects with strings in
466 // the Node.attributes Map. Is there another solution?
467 final replacements = const {
468 "xlink:actuate": const AttributeName("xlink", "actuate",
469 Namespaces.xlink),
470 "xlink:arcrole": const AttributeName("xlink", "arcrole",
471 Namespaces.xlink),
472 "xlink:href": const AttributeName("xlink", "href", Namespaces.xlink),
473 "xlink:role": const AttributeName("xlink", "role", Namespaces.xlink),
474 "xlink:show": const AttributeName("xlink", "show", Namespaces.xlink),
475 "xlink:title": const AttributeName("xlink", "title", Namespaces.xlink),
476 "xlink:type": const AttributeName("xlink", "type", Namespaces.xlink),
477 "xml:base": const AttributeName("xml", "base", Namespaces.xml),
478 "xml:lang": const AttributeName("xml", "lang", Namespaces.xml),
479 "xml:space": const AttributeName("xml", "space", Namespaces.xml),
480 "xmlns": const AttributeName(null, "xmlns", Namespaces.xmlns),
481 "xmlns:xlink": const AttributeName("xmlns", "xlink", Namespaces.xmlns)
482 };
483
484 for (var originalName in token.data.keys.toList()) {
485 var foreignName = replacements[originalName];
486 if (foreignName != null) {
487 token.data[foreignName] = token.data.remove(originalName);
488 }
489 }
490 }
491
492 void resetInsertionMode() {
493 // The name of this method is mostly historical. (It's also used in the
494 // specification.)
495 for (var node in tree.openElements.reversed) {
496 var nodeName = node.localName;
497 bool last = node == tree.openElements[0];
498 if (last) {
499 assert(innerHTMLMode);
500 nodeName = innerHTML;
501 }
502 // Check for conditions that should only happen in the innerHTML
503 // case
504 switch (nodeName) {
505 case "select": case "colgroup": case "head": case "html":
506 assert(innerHTMLMode);
507 break;
508 }
509 if (!last && node.namespaceUri != tree.defaultNamespace) {
510 continue;
511 }
512 switch (nodeName) {
513 case "select": phase = _inSelectPhase; return;
514 case "td": phase = _inCellPhase; return;
515 case "th": phase = _inCellPhase; return;
516 case "tr": phase = _inRowPhase; return;
517 case "tbody": phase = _inTableBodyPhase; return;
518 case "thead": phase = _inTableBodyPhase; return;
519 case "tfoot": phase = _inTableBodyPhase; return;
520 case "caption": phase = _inCaptionPhase; return;
521 case "colgroup": phase = _inColumnGroupPhase; return;
522 case "table": phase = _inTablePhase; return;
523 case "head": phase = _inBodyPhase; return;
524 case "body": phase = _inBodyPhase; return;
525 case "frameset": phase = _inFramesetPhase; return;
526 case "html": phase = _beforeHeadPhase; return;
527 }
528 }
529 phase = _inBodyPhase;
530 }
531
532 /// Generic RCDATA/RAWTEXT Parsing algorithm
533 /// [contentType] - RCDATA or RAWTEXT
534 void parseRCDataRawtext(Token token, String contentType) {
535 assert(contentType == "RAWTEXT" || contentType == "RCDATA");
536
537 var element = tree.insertElement(token);
538
539 if (contentType == "RAWTEXT") {
540 tokenizer.state = tokenizer.rawtextState;
541 } else {
542 tokenizer.state = tokenizer.rcdataState;
543 }
544
545 originalPhase = phase;
546 phase = _textPhase;
547 }
548 }
549
550
551 /// Base class for helper object that implements each phase of processing.
552 class Phase {
553 // Order should be (they can be omitted):
554 // * EOF
555 // * Comment
556 // * Doctype
557 // * SpaceCharacters
558 // * Characters
559 // * StartTag
560 // - startTag* methods
561 // * EndTag
562 // - endTag* methods
563
564 final HtmlParser parser;
565
566 final TreeBuilder tree;
567
568 Phase(HtmlParser parser) : parser = parser, tree = parser.tree;
569
570 bool processEOF() {
571 throw new UnimplementedError();
572 }
573
574 Token processComment(CommentToken token) {
575 // For most phases the following is correct. Where it's not it will be
576 // overridden.
577 tree.insertComment(token, tree.openElements.last);
578 return null;
579 }
580
581 Token processDoctype(DoctypeToken token) {
582 parser.parseError(token.span, "unexpected-doctype");
583 return null;
584 }
585
586 Token processCharacters(CharactersToken token) {
587 tree.insertText(token.data, token.span);
588 return null;
589 }
590
591 Token processSpaceCharacters(SpaceCharactersToken token) {
592 tree.insertText(token.data, token.span);
593 return null;
594 }
595
596 Token processStartTag(StartTagToken token) {
597 throw new UnimplementedError();
598 }
599
600 Token startTagHtml(StartTagToken token) {
601 if (parser.firstStartTag == false && token.name == "html") {
602 parser.parseError(token.span, "non-html-root");
603 }
604 // XXX Need a check here to see if the first start tag token emitted is
605 // this token... If it's not, invoke parser.parseError().
606 token.data.forEach((attr, value) {
607 tree.openElements[0].attributes.putIfAbsent(attr, () => value);
608 });
609 parser.firstStartTag = false;
610 return null;
611 }
612
613 Token processEndTag(EndTagToken token) {
614 throw new UnimplementedError();
615 }
616
617 /// Helper method for popping openElements.
618 void popOpenElementsUntil(String name) {
619 var node = tree.openElements.removeLast();
620 while (node.localName != name) {
621 node = tree.openElements.removeLast();
622 }
623 }
624 }
625
626 class InitialPhase extends Phase {
627 InitialPhase(parser) : super(parser);
628
629 Token processSpaceCharacters(SpaceCharactersToken token) {
630 return null;
631 }
632
633 Token processComment(CommentToken token) {
634 tree.insertComment(token, tree.document);
635 return null;
636 }
637
638 Token processDoctype(DoctypeToken token) {
639 var name = token.name;
640 String publicId = token.publicId;
641 var systemId = token.systemId;
642 var correct = token.correct;
643
644 if ((name != "html" || publicId != null ||
645 systemId != null && systemId != "about:legacy-compat")) {
646 parser.parseError(token.span, "unknown-doctype");
647 }
648
649 if (publicId == null) {
650 publicId = "";
651 }
652
653 tree.insertDoctype(token);
654
655 if (publicId != "") {
656 publicId = asciiUpper2Lower(publicId);
657 }
658
659 if (!correct || token.name != "html"
660 || startsWithAny(publicId, const [
661 "+//silmaril//dtd html pro v0r11 19970101//",
662 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
663 "-//as//dtd html 3.0 aswedit + extensions//",
664 "-//ietf//dtd html 2.0 level 1//",
665 "-//ietf//dtd html 2.0 level 2//",
666 "-//ietf//dtd html 2.0 strict level 1//",
667 "-//ietf//dtd html 2.0 strict level 2//",
668 "-//ietf//dtd html 2.0 strict//",
669 "-//ietf//dtd html 2.0//",
670 "-//ietf//dtd html 2.1e//",
671 "-//ietf//dtd html 3.0//",
672 "-//ietf//dtd html 3.2 final//",
673 "-//ietf//dtd html 3.2//",
674 "-//ietf//dtd html 3//",
675 "-//ietf//dtd html level 0//",
676 "-//ietf//dtd html level 1//",
677 "-//ietf//dtd html level 2//",
678 "-//ietf//dtd html level 3//",
679 "-//ietf//dtd html strict level 0//",
680 "-//ietf//dtd html strict level 1//",
681 "-//ietf//dtd html strict level 2//",
682 "-//ietf//dtd html strict level 3//",
683 "-//ietf//dtd html strict//",
684 "-//ietf//dtd html//",
685 "-//metrius//dtd metrius presentational//",
686 "-//microsoft//dtd internet explorer 2.0 html strict//",
687 "-//microsoft//dtd internet explorer 2.0 html//",
688 "-//microsoft//dtd internet explorer 2.0 tables//",
689 "-//microsoft//dtd internet explorer 3.0 html strict//",
690 "-//microsoft//dtd internet explorer 3.0 html//",
691 "-//microsoft//dtd internet explorer 3.0 tables//",
692 "-//netscape comm. corp.//dtd html//",
693 "-//netscape comm. corp.//dtd strict html//",
694 "-//o'reilly and associates//dtd html 2.0//",
695 "-//o'reilly and associates//dtd html extended 1.0//",
696 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
697 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to h tml 4.0//",
698 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0// ",
699 "-//spyglass//dtd html 2.0 extended//",
700 "-//sq//dtd html 2.0 hotmetal + extensions//",
701 "-//sun microsystems corp.//dtd hotjava html//",
702 "-//sun microsystems corp.//dtd hotjava strict html//",
703 "-//w3c//dtd html 3 1995-03-24//",
704 "-//w3c//dtd html 3.2 draft//",
705 "-//w3c//dtd html 3.2 final//",
706 "-//w3c//dtd html 3.2//",
707 "-//w3c//dtd html 3.2s draft//",
708 "-//w3c//dtd html 4.0 frameset//",
709 "-//w3c//dtd html 4.0 transitional//",
710 "-//w3c//dtd html experimental 19960712//",
711 "-//w3c//dtd html experimental 970421//",
712 "-//w3c//dtd w3 html//",
713 "-//w3o//dtd w3 html 3.0//",
714 "-//webtechs//dtd mozilla html 2.0//",
715 "-//webtechs//dtd mozilla html//"])
716 || const ["-//w3o//dtd w3 html strict 3.0//en//",
717 "-/w3c/dtd html 4.0 transitional/en",
718 "html"].contains(publicId)
719 || startsWithAny(publicId, const [
720 "-//w3c//dtd html 4.01 frameset//",
721 "-//w3c//dtd html 4.01 transitional//"]) && systemId == null
722 || systemId != null && systemId.toLowerCase() ==
723 "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
724
725 parser.compatMode = "quirks";
726 } else if (startsWithAny(publicId, const [
727 "-//w3c//dtd xhtml 1.0 frameset//",
728 "-//w3c//dtd xhtml 1.0 transitional//"])
729 || startsWithAny(publicId, const [
730 "-//w3c//dtd html 4.01 frameset//",
731 "-//w3c//dtd html 4.01 transitional//"]) &&
732 systemId != null) {
733 parser.compatMode = "limited quirks";
734 }
735 parser.phase = parser._beforeHtmlPhase;
736 return null;
737 }
738
739 void anythingElse() {
740 parser.compatMode = "quirks";
741 parser.phase = parser._beforeHtmlPhase;
742 }
743
744 Token processCharacters(CharactersToken token) {
745 parser.parseError(token.span, "expected-doctype-but-got-chars");
746 anythingElse();
747 return token;
748 }
749
750 Token processStartTag(StartTagToken token) {
751 parser.parseError(token.span, "expected-doctype-but-got-start-tag",
752 {"name": token.name});
753 anythingElse();
754 return token;
755 }
756
757 Token processEndTag(EndTagToken token) {
758 parser.parseError(token.span, "expected-doctype-but-got-end-tag",
759 {"name": token.name});
760 anythingElse();
761 return token;
762 }
763
764 bool processEOF() {
765 parser.parseError(parser._lastSpan, "expected-doctype-but-got-eof");
766 anythingElse();
767 return true;
768 }
769 }
770
771
772 class BeforeHtmlPhase extends Phase {
773 BeforeHtmlPhase(parser) : super(parser);
774
775 // helper methods
776 void insertHtmlElement() {
777 tree.insertRoot(new StartTagToken("html", data: {}));
778 parser.phase = parser._beforeHeadPhase;
779 }
780
781 // other
782 bool processEOF() {
783 insertHtmlElement();
784 return true;
785 }
786
787 Token processComment(CommentToken token) {
788 tree.insertComment(token, tree.document);
789 return null;
790 }
791
792 Token processSpaceCharacters(SpaceCharactersToken token) {
793 return null;
794 }
795
796 Token processCharacters(CharactersToken token) {
797 insertHtmlElement();
798 return token;
799 }
800
801 Token processStartTag(StartTagToken token) {
802 if (token.name == "html") {
803 parser.firstStartTag = true;
804 }
805 insertHtmlElement();
806 return token;
807 }
808
809 Token processEndTag(EndTagToken token) {
810 switch (token.name) {
811 case "head": case "body": case "html": case "br":
812 insertHtmlElement();
813 return token;
814 default:
815 parser.parseError(token.span, "unexpected-end-tag-before-html",
816 {"name": token.name});
817 return null;
818 }
819 }
820 }
821
822
823 class BeforeHeadPhase extends Phase {
824 BeforeHeadPhase(parser) : super(parser);
825
826 processStartTag(StartTagToken token) {
827 switch (token.name) {
828 case 'html': return startTagHtml(token);
829 case 'head': return startTagHead(token);
830 default: return startTagOther(token);
831 }
832 }
833
834 processEndTag(EndTagToken token) {
835 switch (token.name) {
836 case "head": case "body": case "html": case "br":
837 return endTagImplyHead(token);
838 default: return endTagOther(token);
839 }
840 }
841
842 bool processEOF() {
843 startTagHead(new StartTagToken("head", data: {}));
844 return true;
845 }
846
847 Token processSpaceCharacters(SpaceCharactersToken token) {
848 return null;
849 }
850
851 Token processCharacters(CharactersToken token) {
852 startTagHead(new StartTagToken("head", data: {}));
853 return token;
854 }
855
856 Token startTagHtml(StartTagToken token) {
857 return parser._inBodyPhase.processStartTag(token);
858 }
859
860 void startTagHead(StartTagToken token) {
861 tree.insertElement(token);
862 tree.headPointer = tree.openElements.last;
863 parser.phase = parser._inHeadPhase;
864 }
865
866 Token startTagOther(StartTagToken token) {
867 startTagHead(new StartTagToken("head", data: {}));
868 return token;
869 }
870
871 Token endTagImplyHead(EndTagToken token) {
872 startTagHead(new StartTagToken("head", data: {}));
873 return token;
874 }
875
876 void endTagOther(EndTagToken token) {
877 parser.parseError(token.span, "end-tag-after-implied-root",
878 {"name": token.name});
879 }
880 }
881
882 class InHeadPhase extends Phase {
883 InHeadPhase(parser) : super(parser);
884
885 processStartTag(StartTagToken token) {
886 switch (token.name) {
887 case "html": return startTagHtml(token);
888 case "title": return startTagTitle(token);
889 case "noscript": case "noframes": case "style":
890 return startTagNoScriptNoFramesStyle(token);
891 case "script": return startTagScript(token);
892 case "base": case "basefont": case "bgsound": case "command": case "link":
893 return startTagBaseLinkCommand(token);
894 case "meta": return startTagMeta(token);
895 case "head": return startTagHead(token);
896 default: return startTagOther(token);
897 }
898 }
899
900 processEndTag(EndTagToken token) {
901 switch (token.name) {
902 case "head": return endTagHead(token);
903 case "br": case "html": case "body": return endTagHtmlBodyBr(token);
904 default: return endTagOther(token);
905 }
906 }
907
908 // the real thing
909 bool processEOF() {
910 anythingElse();
911 return true;
912 }
913
914 Token processCharacters(CharactersToken token) {
915 anythingElse();
916 return token;
917 }
918
919 Token startTagHtml(StartTagToken token) {
920 return parser._inBodyPhase.processStartTag(token);
921 }
922
923 void startTagHead(StartTagToken token) {
924 parser.parseError(token.span, "two-heads-are-not-better-than-one");
925 }
926
927 void startTagBaseLinkCommand(StartTagToken token) {
928 tree.insertElement(token);
929 tree.openElements.removeLast();
930 token.selfClosingAcknowledged = true;
931 }
932
933 void startTagMeta(StartTagToken token) {
934 tree.insertElement(token);
935 tree.openElements.removeLast();
936 token.selfClosingAcknowledged = true;
937
938 var attributes = token.data;
939 if (!parser.tokenizer.stream.charEncodingCertain) {
940 var charset = attributes["charset"];
941 var content = attributes["content"];
942 if (charset != null) {
943 parser.tokenizer.stream.changeEncoding(charset);
944 } else if (content != null) {
945 var data = new EncodingBytes(content);
946 var codec = new ContentAttrParser(data).parse();
947 parser.tokenizer.stream.changeEncoding(codec);
948 }
949 }
950 }
951
952 void startTagTitle(StartTagToken token) {
953 parser.parseRCDataRawtext(token, "RCDATA");
954 }
955
956 void startTagNoScriptNoFramesStyle(StartTagToken token) {
957 // Need to decide whether to implement the scripting-disabled case
958 parser.parseRCDataRawtext(token, "RAWTEXT");
959 }
960
961 void startTagScript(StartTagToken token) {
962 tree.insertElement(token);
963 parser.tokenizer.state = parser.tokenizer.scriptDataState;
964 parser.originalPhase = parser.phase;
965 parser.phase = parser._textPhase;
966 }
967
968 Token startTagOther(StartTagToken token) {
969 anythingElse();
970 return token;
971 }
972
973 void endTagHead(EndTagToken token) {
974 var node = parser.tree.openElements.removeLast();
975 assert(node.localName == "head");
976 parser.phase = parser._afterHeadPhase;
977 }
978
979 Token endTagHtmlBodyBr(EndTagToken token) {
980 anythingElse();
981 return token;
982 }
983
984 void endTagOther(EndTagToken token) {
985 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name});
986 }
987
988 void anythingElse() {
989 endTagHead(new EndTagToken("head"));
990 }
991 }
992
993
994 // XXX If we implement a parser for which scripting is disabled we need to
995 // implement this phase.
996 //
997 // class InHeadNoScriptPhase extends Phase {
998
999 class AfterHeadPhase extends Phase {
1000 AfterHeadPhase(parser) : super(parser);
1001
1002 processStartTag(StartTagToken token) {
1003 switch (token.name) {
1004 case "html": return startTagHtml(token);
1005 case "body": return startTagBody(token);
1006 case "frameset": return startTagFrameset(token);
1007 case "base": case "basefont": case "bgsound": case "link": case "meta":
1008 case "noframes": case "script": case "style": case "title":
1009 return startTagFromHead(token);
1010 case "head": return startTagHead(token);
1011 default: return startTagOther(token);
1012 }
1013 }
1014
1015 processEndTag(EndTagToken token) {
1016 switch (token.name) {
1017 case "body": case "html": case "br":
1018 return endTagHtmlBodyBr(token);
1019 default: return endTagOther(token);
1020 }
1021 }
1022
1023 bool processEOF() {
1024 anythingElse();
1025 return true;
1026 }
1027
1028 Token processCharacters(CharactersToken token) {
1029 anythingElse();
1030 return token;
1031 }
1032
1033 Token startTagHtml(StartTagToken token) {
1034 return parser._inBodyPhase.processStartTag(token);
1035 }
1036
1037 void startTagBody(StartTagToken token) {
1038 parser.framesetOK = false;
1039 tree.insertElement(token);
1040 parser.phase = parser._inBodyPhase;
1041 }
1042
1043 void startTagFrameset(StartTagToken token) {
1044 tree.insertElement(token);
1045 parser.phase = parser._inFramesetPhase;
1046 }
1047
1048 void startTagFromHead(StartTagToken token) {
1049 parser.parseError(token.span, "unexpected-start-tag-out-of-my-head",
1050 {"name": token.name});
1051 tree.openElements.add(tree.headPointer);
1052 parser._inHeadPhase.processStartTag(token);
1053 for (var node in tree.openElements.reversed) {
1054 if (node.localName == "head") {
1055 tree.openElements.remove(node);
1056 break;
1057 }
1058 }
1059 }
1060
1061 void startTagHead(StartTagToken token) {
1062 parser.parseError(token.span, "unexpected-start-tag", {"name": token.name});
1063 }
1064
1065 Token startTagOther(StartTagToken token) {
1066 anythingElse();
1067 return token;
1068 }
1069
1070 Token endTagHtmlBodyBr(EndTagToken token) {
1071 anythingElse();
1072 return token;
1073 }
1074
1075 void endTagOther(EndTagToken token) {
1076 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name});
1077 }
1078
1079 void anythingElse() {
1080 tree.insertElement(new StartTagToken("body", data: {}));
1081 parser.phase = parser._inBodyPhase;
1082 parser.framesetOK = true;
1083 }
1084 }
1085
1086 typedef Token TokenProccessor(Token token);
1087
1088 class InBodyPhase extends Phase {
1089 bool dropNewline = false;
1090
1091 // http://www.whatwg.org/specs/web-apps/current-work///parsing-main-inbody
1092 // the really-really-really-very crazy mode
1093 InBodyPhase(parser) : super(parser);
1094
1095 processStartTag(StartTagToken token) {
1096 switch (token.name) {
1097 case "html":
1098 return startTagHtml(token);
1099 case "base": case "basefont": case "bgsound": case "command": case "link":
1100 case "meta": case "noframes": case "script": case "style": case "title":
1101 return startTagProcessInHead(token);
1102 case "body":
1103 return startTagBody(token);
1104 case "frameset":
1105 return startTagFrameset(token);
1106 case "address": case "article": case "aside": case "blockquote":
1107 case "center": case "details": case "details": case "dir": case "div":
1108 case "dl": case "fieldset": case "figcaption": case "figure":
1109 case "footer": case "header": case "hgroup": case "menu": case "nav":
1110 case "ol": case "p": case "section": case "summary": case "ul":
1111 return startTagCloseP(token);
1112 // headingElements
1113 case "h1": case "h2": case "h3": case "h4": case "h5": case "h6":
1114 return startTagHeading(token);
1115 case "pre": case "listing":
1116 return startTagPreListing(token);
1117 case "form":
1118 return startTagForm(token);
1119 case "li": case "dd": case "dt":
1120 return startTagListItem(token);
1121 case "plaintext":
1122 return startTagPlaintext(token);
1123 case "a": return startTagA(token);
1124 case "b": case "big": case "code": case "em": case "font": case "i":
1125 case "s": case "small": case "strike": case "strong": case "tt": case "u":
1126 return startTagFormatting(token);
1127 case "nobr":
1128 return startTagNobr(token);
1129 case "button":
1130 return startTagButton(token);
1131 case "applet": case "marquee": case "object":
1132 return startTagAppletMarqueeObject(token);
1133 case "xmp":
1134 return startTagXmp(token);
1135 case "table":
1136 return startTagTable(token);
1137 case "area": case "br": case "embed": case "img": case "keygen":
1138 case "wbr":
1139 return startTagVoidFormatting(token);
1140 case "param": case "source": case "track":
1141 return startTagParamSource(token);
1142 case "input":
1143 return startTagInput(token);
1144 case "hr":
1145 return startTagHr(token);
1146 case "image":
1147 return startTagImage(token);
1148 case "isindex":
1149 return startTagIsIndex(token);
1150 case "textarea":
1151 return startTagTextarea(token);
1152 case "iframe":
1153 return startTagIFrame(token);
1154 case "noembed": case "noframes": case "noscript":
1155 return startTagRawtext(token);
1156 case "select":
1157 return startTagSelect(token);
1158 case "rp": case "rt":
1159 return startTagRpRt(token);
1160 case "option": case "optgroup":
1161 return startTagOpt(token);
1162 case "math":
1163 return startTagMath(token);
1164 case "svg":
1165 return startTagSvg(token);
1166 case "caption": case "col": case "colgroup": case "frame": case "head":
1167 case "tbody": case "td": case "tfoot": case "th": case "thead": case "tr":
1168 return startTagMisplaced(token);
1169 default: return startTagOther(token);
1170 }
1171 }
1172
1173 processEndTag(EndTagToken token) {
1174 switch (token.name) {
1175 case "body": return endTagBody(token);
1176 case "html": return endTagHtml(token);
1177 case "address": case "article": case "aside": case "blockquote":
1178 case "center": case "details": case "dir": case "div": case "dl":
1179 case "fieldset": case "figcaption": case "figure": case "footer":
1180 case "header": case "hgroup": case "listing": case "menu": case "nav":
1181 case "ol": case "pre": case "section": case "summary": case "ul":
1182 return endTagBlock(token);
1183 case "form": return endTagForm(token);
1184 case "p": return endTagP(token);
1185 case "dd": case "dt": case "li": return endTagListItem(token);
1186 // headingElements
1187 case "h1": case "h2": case "h3": case "h4": case "h5": case "h6":
1188 return endTagHeading(token);
1189 case "a": case "b": case "big": case "code": case "em": case "font":
1190 case "i": case "nobr": case "s": case "small": case "strike":
1191 case "strong": case "tt": case "u":
1192 return endTagFormatting(token);
1193 case "applet": case "marquee": case "object":
1194 return endTagAppletMarqueeObject(token);
1195 case "br": return endTagBr(token);
1196 default: return endTagOther(token);
1197 }
1198 }
1199
1200 bool isMatchingFormattingElement(Element node1, Element node2) {
1201 if (node1.localName != node2.localName ||
1202 node1.namespaceUri != node2.namespaceUri) {
1203 return false;
1204 } else if (node1.attributes.length != node2.attributes.length) {
1205 return false;
1206 } else {
1207 for (var key in node1.attributes.keys) {
1208 if (node1.attributes[key] != node2.attributes[key]) {
1209 return false;
1210 }
1211 }
1212 }
1213 return true;
1214 }
1215
1216 // helper
1217 void addFormattingElement(token) {
1218 tree.insertElement(token);
1219 var element = tree.openElements.last;
1220
1221 var matchingElements = [];
1222 for (Node node in tree.activeFormattingElements.reversed) {
1223 if (node == Marker) {
1224 break;
1225 } else if (isMatchingFormattingElement(node, element)) {
1226 matchingElements.add(node);
1227 }
1228 }
1229
1230 assert(matchingElements.length <= 3);
1231 if (matchingElements.length == 3) {
1232 tree.activeFormattingElements.remove(matchingElements.last);
1233 }
1234 tree.activeFormattingElements.add(element);
1235 }
1236
1237 // the real deal
1238 bool processEOF() {
1239 for (var node in tree.openElements.reversed) {
1240 switch (node.localName) {
1241 case "dd": case "dt": case "li": case "p": case "tbody": case "td":
1242 case "tfoot": case "th": case "thead": case "tr": case "body":
1243 case "html":
1244 continue;
1245 }
1246 parser.parseError(node.sourceSpan, "expected-closing-tag-but-got-eof");
1247 break;
1248 }
1249 //Stop parsing
1250 return false;
1251 }
1252
1253 void processSpaceCharactersDropNewline(StringToken token) {
1254 // Sometimes (start of <pre>, <listing>, and <textarea> blocks) we
1255 // want to drop leading newlines
1256 var data = token.data;
1257 dropNewline = false;
1258 if (data.startsWith("\n")) {
1259 var lastOpen = tree.openElements.last;
1260 if (const ["pre", "listing", "textarea"].contains(lastOpen.localName)
1261 && !lastOpen.hasContent()) {
1262 data = data.substring(1);
1263 }
1264 }
1265 if (data.length > 0) {
1266 tree.reconstructActiveFormattingElements();
1267 tree.insertText(data, token.span);
1268 }
1269 }
1270
1271 Token processCharacters(CharactersToken token) {
1272 if (token.data == "\u0000") {
1273 //The tokenizer should always emit null on its own
1274 return null;
1275 }
1276 tree.reconstructActiveFormattingElements();
1277 tree.insertText(token.data, token.span);
1278 if (parser.framesetOK && !allWhitespace(token.data)) {
1279 parser.framesetOK = false;
1280 }
1281 return null;
1282 }
1283
1284 Token processSpaceCharacters(SpaceCharactersToken token) {
1285 if (dropNewline) {
1286 processSpaceCharactersDropNewline(token);
1287 } else {
1288 tree.reconstructActiveFormattingElements();
1289 tree.insertText(token.data, token.span);
1290 }
1291 return null;
1292 }
1293
1294 Token startTagProcessInHead(StartTagToken token) {
1295 return parser._inHeadPhase.processStartTag(token);
1296 }
1297
1298 void startTagBody(StartTagToken token) {
1299 parser.parseError(token.span, "unexpected-start-tag", {"name": "body"});
1300 if (tree.openElements.length == 1
1301 || tree.openElements[1].localName != "body") {
1302 assert(parser.innerHTMLMode);
1303 } else {
1304 parser.framesetOK = false;
1305 token.data.forEach((attr, value) {
1306 tree.openElements[1].attributes.putIfAbsent(attr, () => value);
1307 });
1308 }
1309 }
1310
1311 void startTagFrameset(StartTagToken token) {
1312 parser.parseError(token.span, "unexpected-start-tag", {"name": "frameset"});
1313 if ((tree.openElements.length == 1 ||
1314 tree.openElements[1].localName != "body")) {
1315 assert(parser.innerHTMLMode);
1316 } else if (parser.framesetOK) {
1317 if (tree.openElements[1].parentNode != null) {
1318 tree.openElements[1].parentNode.nodes.remove(tree.openElements[1]);
1319 }
1320 while (tree.openElements.last.localName != "html") {
1321 tree.openElements.removeLast();
1322 }
1323 tree.insertElement(token);
1324 parser.phase = parser._inFramesetPhase;
1325 }
1326 }
1327
1328 void startTagCloseP(StartTagToken token) {
1329 if (tree.elementInScope("p", variant: "button")) {
1330 endTagP(new EndTagToken("p"));
1331 }
1332 tree.insertElement(token);
1333 }
1334
1335 void startTagPreListing(StartTagToken token) {
1336 if (tree.elementInScope("p", variant: "button")) {
1337 endTagP(new EndTagToken("p"));
1338 }
1339 tree.insertElement(token);
1340 parser.framesetOK = false;
1341 dropNewline = true;
1342 }
1343
1344 void startTagForm(StartTagToken token) {
1345 if (tree.formPointer != null) {
1346 parser.parseError(token.span, "unexpected-start-tag", {"name": "form"});
1347 } else {
1348 if (tree.elementInScope("p", variant: "button")) {
1349 endTagP(new EndTagToken("p"));
1350 }
1351 tree.insertElement(token);
1352 tree.formPointer = tree.openElements.last;
1353 }
1354 }
1355
1356 void startTagListItem(StartTagToken token) {
1357 parser.framesetOK = false;
1358
1359 final stopNamesMap = const {"li": const ["li"],
1360 "dt": const ["dt", "dd"],
1361 "dd": const ["dt", "dd"]};
1362 var stopNames = stopNamesMap[token.name];
1363 for (var node in tree.openElements.reversed) {
1364 if (stopNames.contains(node.localName)) {
1365 parser.phase.processEndTag(new EndTagToken(node.localName));
1366 break;
1367 }
1368 if (specialElements.contains(getElementNameTuple(node)) &&
1369 !const ["address", "div", "p"].contains(node.localName)) {
1370 break;
1371 }
1372 }
1373
1374 if (tree.elementInScope("p", variant: "button")) {
1375 parser.phase.processEndTag(new EndTagToken("p"));
1376 }
1377
1378 tree.insertElement(token);
1379 }
1380
1381 void startTagPlaintext(StartTagToken token) {
1382 if (tree.elementInScope("p", variant: "button")) {
1383 endTagP(new EndTagToken("p"));
1384 }
1385 tree.insertElement(token);
1386 parser.tokenizer.state = parser.tokenizer.plaintextState;
1387 }
1388
1389 void startTagHeading(StartTagToken token) {
1390 if (tree.elementInScope("p", variant: "button")) {
1391 endTagP(new EndTagToken("p"));
1392 }
1393 if (headingElements.contains(tree.openElements.last.localName)) {
1394 parser.parseError(token.span, "unexpected-start-tag",
1395 {"name": token.name});
1396 tree.openElements.removeLast();
1397 }
1398 tree.insertElement(token);
1399 }
1400
1401 void startTagA(StartTagToken token) {
1402 var afeAElement = tree.elementInActiveFormattingElements("a");
1403 if (afeAElement != null) {
1404 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag",
1405 {"startName": "a", "endName": "a"});
1406 endTagFormatting(new EndTagToken("a"));
1407 tree.openElements.remove(afeAElement);
1408 tree.activeFormattingElements.remove(afeAElement);
1409 }
1410 tree.reconstructActiveFormattingElements();
1411 addFormattingElement(token);
1412 }
1413
1414 void startTagFormatting(StartTagToken token) {
1415 tree.reconstructActiveFormattingElements();
1416 addFormattingElement(token);
1417 }
1418
1419 void startTagNobr(StartTagToken token) {
1420 tree.reconstructActiveFormattingElements();
1421 if (tree.elementInScope("nobr")) {
1422 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag",
1423 {"startName": "nobr", "endName": "nobr"});
1424 processEndTag(new EndTagToken("nobr"));
1425 // XXX Need tests that trigger the following
1426 tree.reconstructActiveFormattingElements();
1427 }
1428 addFormattingElement(token);
1429 }
1430
1431 Token startTagButton(StartTagToken token) {
1432 if (tree.elementInScope("button")) {
1433 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag",
1434 {"startName": "button", "endName": "button"});
1435 processEndTag(new EndTagToken("button"));
1436 return token;
1437 } else {
1438 tree.reconstructActiveFormattingElements();
1439 tree.insertElement(token);
1440 parser.framesetOK = false;
1441 }
1442 return null;
1443 }
1444
1445 void startTagAppletMarqueeObject(StartTagToken token) {
1446 tree.reconstructActiveFormattingElements();
1447 tree.insertElement(token);
1448 tree.activeFormattingElements.add(Marker);
1449 parser.framesetOK = false;
1450 }
1451
1452 void startTagXmp(StartTagToken token) {
1453 if (tree.elementInScope("p", variant: "button")) {
1454 endTagP(new EndTagToken("p"));
1455 }
1456 tree.reconstructActiveFormattingElements();
1457 parser.framesetOK = false;
1458 parser.parseRCDataRawtext(token, "RAWTEXT");
1459 }
1460
1461 void startTagTable(StartTagToken token) {
1462 if (parser.compatMode != "quirks") {
1463 if (tree.elementInScope("p", variant: "button")) {
1464 processEndTag(new EndTagToken("p"));
1465 }
1466 }
1467 tree.insertElement(token);
1468 parser.framesetOK = false;
1469 parser.phase = parser._inTablePhase;
1470 }
1471
1472 void startTagVoidFormatting(StartTagToken token) {
1473 tree.reconstructActiveFormattingElements();
1474 tree.insertElement(token);
1475 tree.openElements.removeLast();
1476 token.selfClosingAcknowledged = true;
1477 parser.framesetOK = false;
1478 }
1479
1480 void startTagInput(StartTagToken token) {
1481 var savedFramesetOK = parser.framesetOK;
1482 startTagVoidFormatting(token);
1483 if (asciiUpper2Lower(token.data["type"]) == "hidden") {
1484 //input type=hidden doesn't change framesetOK
1485 parser.framesetOK = savedFramesetOK;
1486 }
1487 }
1488
1489 void startTagParamSource(StartTagToken token) {
1490 tree.insertElement(token);
1491 tree.openElements.removeLast();
1492 token.selfClosingAcknowledged = true;
1493 }
1494
1495 void startTagHr(StartTagToken token) {
1496 if (tree.elementInScope("p", variant: "button")) {
1497 endTagP(new EndTagToken("p"));
1498 }
1499 tree.insertElement(token);
1500 tree.openElements.removeLast();
1501 token.selfClosingAcknowledged = true;
1502 parser.framesetOK = false;
1503 }
1504
1505 void startTagImage(StartTagToken token) {
1506 // No really...
1507 parser.parseError(token.span, "unexpected-start-tag-treated-as",
1508 {"originalName": "image", "newName": "img"});
1509 processStartTag(new StartTagToken("img", data: token.data,
1510 selfClosing: token.selfClosing));
1511 }
1512
1513 void startTagIsIndex(StartTagToken token) {
1514 parser.parseError(token.span, "deprecated-tag", {"name": "isindex"});
1515 if (tree.formPointer != null) {
1516 return;
1517 }
1518 var formAttrs = {};
1519 var dataAction = token.data["action"];
1520 if (dataAction != null) {
1521 formAttrs["action"] = dataAction;
1522 }
1523 processStartTag(new StartTagToken("form", data: formAttrs));
1524 processStartTag(new StartTagToken("hr", data: {}));
1525 processStartTag(new StartTagToken("label", data: {}));
1526 // XXX Localization ...
1527 var prompt = token.data["prompt"];
1528 if (prompt == null) {
1529 prompt = "This is a searchable index. Enter search keywords: ";
1530 }
1531 processCharacters(new CharactersToken(prompt));
1532 var attributes = new LinkedHashMap.from(token.data);
1533 attributes.remove('action');
1534 attributes.remove('prompt');
1535 attributes["name"] = "isindex";
1536 processStartTag(new StartTagToken("input",
1537 data: attributes, selfClosing: token.selfClosing));
1538 processEndTag(new EndTagToken("label"));
1539 processStartTag(new StartTagToken("hr", data: {}));
1540 processEndTag(new EndTagToken("form"));
1541 }
1542
1543 void startTagTextarea(StartTagToken token) {
1544 tree.insertElement(token);
1545 parser.tokenizer.state = parser.tokenizer.rcdataState;
1546 dropNewline = true;
1547 parser.framesetOK = false;
1548 }
1549
1550 void startTagIFrame(StartTagToken token) {
1551 parser.framesetOK = false;
1552 startTagRawtext(token);
1553 }
1554
1555 /// iframe, noembed noframes, noscript(if scripting enabled).
1556 void startTagRawtext(StartTagToken token) {
1557 parser.parseRCDataRawtext(token, "RAWTEXT");
1558 }
1559
1560 void startTagOpt(StartTagToken token) {
1561 if (tree.openElements.last.localName == "option") {
1562 parser.phase.processEndTag(new EndTagToken("option"));
1563 }
1564 tree.reconstructActiveFormattingElements();
1565 parser.tree.insertElement(token);
1566 }
1567
1568 void startTagSelect(StartTagToken token) {
1569 tree.reconstructActiveFormattingElements();
1570 tree.insertElement(token);
1571 parser.framesetOK = false;
1572
1573 if (parser._inTablePhase == parser.phase ||
1574 parser._inCaptionPhase == parser.phase ||
1575 parser._inColumnGroupPhase == parser.phase ||
1576 parser._inTableBodyPhase == parser.phase ||
1577 parser._inRowPhase == parser.phase ||
1578 parser._inCellPhase == parser.phase) {
1579 parser.phase = parser._inSelectInTablePhase;
1580 } else {
1581 parser.phase = parser._inSelectPhase;
1582 }
1583 }
1584
1585 void startTagRpRt(StartTagToken token) {
1586 if (tree.elementInScope("ruby")) {
1587 tree.generateImpliedEndTags();
1588 var last = tree.openElements.last;
1589 if (last.localName != "ruby") {
1590 parser.parseError(last.sourceSpan, 'undefined-error');
1591 }
1592 }
1593 tree.insertElement(token);
1594 }
1595
1596 void startTagMath(StartTagToken token) {
1597 tree.reconstructActiveFormattingElements();
1598 parser.adjustMathMLAttributes(token);
1599 parser.adjustForeignAttributes(token);
1600 token.namespace = Namespaces.mathml;
1601 tree.insertElement(token);
1602 //Need to get the parse error right for the case where the token
1603 //has a namespace not equal to the xmlns attribute
1604 if (token.selfClosing) {
1605 tree.openElements.removeLast();
1606 token.selfClosingAcknowledged = true;
1607 }
1608 }
1609
1610 void startTagSvg(StartTagToken token) {
1611 tree.reconstructActiveFormattingElements();
1612 parser.adjustSVGAttributes(token);
1613 parser.adjustForeignAttributes(token);
1614 token.namespace = Namespaces.svg;
1615 tree.insertElement(token);
1616 //Need to get the parse error right for the case where the token
1617 //has a namespace not equal to the xmlns attribute
1618 if (token.selfClosing) {
1619 tree.openElements.removeLast();
1620 token.selfClosingAcknowledged = true;
1621 }
1622 }
1623
1624 /// Elements that should be children of other elements that have a
1625 /// different insertion mode; here they are ignored
1626 /// "caption", "col", "colgroup", "frame", "frameset", "head",
1627 /// "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
1628 /// "tr", "noscript"
1629 void startTagMisplaced(StartTagToken token) {
1630 parser.parseError(token.span, "unexpected-start-tag-ignored",
1631 {"name": token.name});
1632 }
1633
1634 Token startTagOther(StartTagToken token) {
1635 tree.reconstructActiveFormattingElements();
1636 tree.insertElement(token);
1637 return null;
1638 }
1639
1640 void endTagP(EndTagToken token) {
1641 if (!tree.elementInScope("p", variant: "button")) {
1642 startTagCloseP(new StartTagToken("p", data: {}));
1643 parser.parseError(token.span, "unexpected-end-tag", {"name": "p"});
1644 endTagP(new EndTagToken("p"));
1645 } else {
1646 tree.generateImpliedEndTags("p");
1647 if (tree.openElements.last.localName != "p") {
1648 parser.parseError(token.span, "unexpected-end-tag", {"name": "p"});
1649 }
1650 popOpenElementsUntil("p");
1651 }
1652 }
1653
1654 void endTagBody(EndTagToken token) {
1655 if (!tree.elementInScope("body")) {
1656 parser.parseError(token.span, 'undefined-error');
1657 return;
1658 } else if (tree.openElements.last.localName != "body") {
1659 for (Element node in slice(tree.openElements, 2)) {
1660 switch (node.localName) {
1661 case "dd": case "dt": case "li": case "optgroup": case "option":
1662 case "p": case "rp": case "rt": case "tbody": case "td": case "tfoot":
1663 case "th": case "thead": case "tr": case "body": case "html":
1664 continue;
1665 }
1666 // Not sure this is the correct name for the parse error
1667 parser.parseError(token.span, "expected-one-end-tag-but-got-another",
1668 {"gotName": "body", "expectedName": node.localName});
1669 break;
1670 }
1671 }
1672 parser.phase = parser._afterBodyPhase;
1673 }
1674
1675 Token endTagHtml(EndTagToken token) {
1676 //We repeat the test for the body end tag token being ignored here
1677 if (tree.elementInScope("body")) {
1678 endTagBody(new EndTagToken("body"));
1679 return token;
1680 }
1681 return null;
1682 }
1683
1684 void endTagBlock(EndTagToken token) {
1685 //Put us back in the right whitespace handling mode
1686 if (token.name == "pre") {
1687 dropNewline = false;
1688 }
1689 var inScope = tree.elementInScope(token.name);
1690 if (inScope) {
1691 tree.generateImpliedEndTags();
1692 }
1693 if (tree.openElements.last.localName != token.name) {
1694 parser.parseError(token.span, "end-tag-too-early", {"name": token.name});
1695 }
1696 if (inScope) {
1697 popOpenElementsUntil(token.name);
1698 }
1699 }
1700
1701 void endTagForm(EndTagToken token) {
1702 var node = tree.formPointer;
1703 tree.formPointer = null;
1704 if (node == null || !tree.elementInScope(node)) {
1705 parser.parseError(token.span, "unexpected-end-tag", {"name": "form"});
1706 } else {
1707 tree.generateImpliedEndTags();
1708 if (tree.openElements.last != node) {
1709 parser.parseError(token.span, "end-tag-too-early-ignored", {"name": "for m"});
1710 }
1711 tree.openElements.remove(node);
1712 }
1713 }
1714
1715 void endTagListItem(EndTagToken token) {
1716 var variant;
1717 if (token.name == "li") {
1718 variant = "list";
1719 } else {
1720 variant = null;
1721 }
1722 if (!tree.elementInScope(token.name, variant: variant)) {
1723 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name});
1724 } else {
1725 tree.generateImpliedEndTags(token.name);
1726 if (tree.openElements.last.localName != token.name) {
1727 parser.parseError(token.span, "end-tag-too-early", {"name": token.name}) ;
1728 }
1729 popOpenElementsUntil(token.name);
1730 }
1731 }
1732
1733 void endTagHeading(EndTagToken token) {
1734 for (var item in headingElements) {
1735 if (tree.elementInScope(item)) {
1736 tree.generateImpliedEndTags();
1737 break;
1738 }
1739 }
1740 if (tree.openElements.last.localName != token.name) {
1741 parser.parseError(token.span, "end-tag-too-early", {"name": token.name});
1742 }
1743
1744 for (var item in headingElements) {
1745 if (tree.elementInScope(item)) {
1746 var node = tree.openElements.removeLast();
1747 while (!headingElements.contains(node.localName)) {
1748 node = tree.openElements.removeLast();
1749 }
1750 break;
1751 }
1752 }
1753 }
1754
1755 /// The much-feared adoption agency algorithm.
1756 endTagFormatting(EndTagToken token) {
1757 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construc tion.html#adoptionAgency
1758 // TODO(jmesserly): the comments here don't match the numbered steps in the
1759 // updated spec. This needs a pass over it to verify that it still matches.
1760 // In particular the html5lib Python code skiped "step 4", I'm not sure why.
1761 // XXX Better parseError messages appreciated.
1762 int outerLoopCounter = 0;
1763 while (outerLoopCounter < 8) {
1764 outerLoopCounter += 1;
1765
1766 // Step 1 paragraph 1
1767 var formattingElement = tree.elementInActiveFormattingElements(
1768 token.name);
1769 if (formattingElement == null ||
1770 (tree.openElements.contains(formattingElement) &&
1771 !tree.elementInScope(formattingElement.localName))) {
1772 parser.parseError(token.span, "adoption-agency-1.1",
1773 {"name": token.name});
1774 return;
1775 // Step 1 paragraph 2
1776 } else if (!tree.openElements.contains(formattingElement)) {
1777 parser.parseError(token.span, "adoption-agency-1.2",
1778 {"name": token.name});
1779 tree.activeFormattingElements.remove(formattingElement);
1780 return;
1781 }
1782
1783 // Step 1 paragraph 3
1784 if (formattingElement != tree.openElements.last) {
1785 parser.parseError(token.span, "adoption-agency-1.3",
1786 {"name": token.name});
1787 }
1788
1789 // Step 2
1790 // Start of the adoption agency algorithm proper
1791 var afeIndex = tree.openElements.indexOf(formattingElement);
1792 Node furthestBlock = null;
1793 for (Node element in slice(tree.openElements, afeIndex)) {
1794 if (specialElements.contains(getElementNameTuple(element))) {
1795 furthestBlock = element;
1796 break;
1797 }
1798 }
1799 // Step 3
1800 if (furthestBlock == null) {
1801 var element = tree.openElements.removeLast();
1802 while (element != formattingElement) {
1803 element = tree.openElements.removeLast();
1804 }
1805 tree.activeFormattingElements.remove(element);
1806 return;
1807 }
1808
1809 var commonAncestor = tree.openElements[afeIndex - 1];
1810
1811 // Step 5
1812 // The bookmark is supposed to help us identify where to reinsert
1813 // nodes in step 12. We have to ensure that we reinsert nodes after
1814 // the node before the active formatting element. Note the bookmark
1815 // can move in step 7.4
1816 var bookmark = tree.activeFormattingElements.indexOf(formattingElement);
1817
1818 // Step 6
1819 Node lastNode = furthestBlock;
1820 var node = furthestBlock;
1821 int innerLoopCounter = 0;
1822
1823 var index = tree.openElements.indexOf(node);
1824 while (innerLoopCounter < 3) {
1825 innerLoopCounter += 1;
1826
1827 // Node is element before node in open elements
1828 index -= 1;
1829 node = tree.openElements[index];
1830 if (!tree.activeFormattingElements.contains(node)) {
1831 tree.openElements.remove(node);
1832 continue;
1833 }
1834 // Step 6.3
1835 if (node == formattingElement) {
1836 break;
1837 }
1838 // Step 6.4
1839 if (lastNode == furthestBlock) {
1840 bookmark = (tree.activeFormattingElements.indexOf(node) + 1);
1841 }
1842 // Step 6.5
1843 //cite = node.parent
1844 var clone = node.clone(false);
1845 // Replace node with clone
1846 tree.activeFormattingElements[
1847 tree.activeFormattingElements.indexOf(node)] = clone;
1848 tree.openElements[tree.openElements.indexOf(node)] = clone;
1849 node = clone;
1850
1851 // Step 6.6
1852 // Remove lastNode from its parents, if any
1853 if (lastNode.parentNode != null) {
1854 lastNode.parentNode.nodes.remove(lastNode);
1855 }
1856 node.nodes.add(lastNode);
1857 // Step 7.7
1858 lastNode = node;
1859 // End of inner loop
1860 }
1861
1862 // Step 7
1863 // Foster parent lastNode if commonAncestor is a
1864 // table, tbody, tfoot, thead, or tr we need to foster parent the
1865 // lastNode
1866 if (lastNode.parentNode != null) {
1867 lastNode.parentNode.nodes.remove(lastNode);
1868 }
1869
1870 if (const ["table", "tbody", "tfoot", "thead", "tr"].contains(
1871 commonAncestor.localName)) {
1872 var nodePos = tree.getTableMisnestedNodePosition();
1873 nodePos[0].insertBefore(lastNode, nodePos[1]);
1874 } else {
1875 commonAncestor.nodes.add(lastNode);
1876 }
1877
1878 // Step 8
1879 var clone = formattingElement.clone(false);
1880
1881 // Step 9
1882 furthestBlock.reparentChildren(clone);
1883
1884 // Step 10
1885 furthestBlock.nodes.add(clone);
1886
1887 // Step 11
1888 tree.activeFormattingElements.remove(formattingElement);
1889 tree.activeFormattingElements.insert(
1890 min(bookmark, tree.activeFormattingElements.length), clone);
1891
1892 // Step 12
1893 tree.openElements.remove(formattingElement);
1894 tree.openElements.insert(
1895 tree.openElements.indexOf(furthestBlock) + 1, clone);
1896 }
1897 }
1898
1899 void endTagAppletMarqueeObject(EndTagToken token) {
1900 if (tree.elementInScope(token.name)) {
1901 tree.generateImpliedEndTags();
1902 }
1903 if (tree.openElements.last.localName != token.name) {
1904 parser.parseError(token.span, "end-tag-too-early", {"name": token.name});
1905 }
1906 if (tree.elementInScope(token.name)) {
1907 popOpenElementsUntil(token.name);
1908 tree.clearActiveFormattingElements();
1909 }
1910 }
1911
1912 void endTagBr(EndTagToken token) {
1913 parser.parseError(token.span, "unexpected-end-tag-treated-as",
1914 {"originalName": "br", "newName": "br element"});
1915 tree.reconstructActiveFormattingElements();
1916 tree.insertElement(new StartTagToken("br", data: {}));
1917 tree.openElements.removeLast();
1918 }
1919
1920 void endTagOther(EndTagToken token) {
1921 for (var node in tree.openElements.reversed) {
1922 if (node.localName == token.name) {
1923 tree.generateImpliedEndTags(token.name);
1924 if (tree.openElements.last.localName != token.name) {
1925 parser.parseError(token.span, "unexpected-end-tag",
1926 {"name": token.name});
1927 }
1928 while (tree.openElements.removeLast() != node);
1929 break;
1930 } else {
1931 if (specialElements.contains(getElementNameTuple(node))) {
1932 parser.parseError(token.span, "unexpected-end-tag",
1933 {"name": token.name});
1934 break;
1935 }
1936 }
1937 }
1938 }
1939 }
1940
1941
1942 class TextPhase extends Phase {
1943 TextPhase(parser) : super(parser);
1944
1945 // "Tried to process start tag %s in RCDATA/RAWTEXT mode"%token.name
1946 processStartTag(StartTagToken token) { assert(false); }
1947
1948 processEndTag(EndTagToken token) {
1949 if (token.name == 'script') return endTagScript(token);
1950 return endTagOther(token);
1951 }
1952
1953 Token processCharacters(CharactersToken token) {
1954 tree.insertText(token.data, token.span);
1955 return null;
1956 }
1957
1958 bool processEOF() {
1959 var last = tree.openElements.last;
1960 parser.parseError(last.sourceSpan, "expected-named-closing-tag-but-got-eof",
1961 {'name': last.localName});
1962 tree.openElements.removeLast();
1963 parser.phase = parser.originalPhase;
1964 return true;
1965 }
1966
1967 void endTagScript(EndTagToken token) {
1968 var node = tree.openElements.removeLast();
1969 assert(node.localName == "script");
1970 parser.phase = parser.originalPhase;
1971 //The rest of this method is all stuff that only happens if
1972 //document.write works
1973 }
1974
1975 void endTagOther(EndTagToken token) {
1976 var node = tree.openElements.removeLast();
1977 parser.phase = parser.originalPhase;
1978 }
1979 }
1980
1981 class InTablePhase extends Phase {
1982 // http://www.whatwg.org/specs/web-apps/current-work///in-table
1983 InTablePhase(parser) : super(parser);
1984
1985 processStartTag(StartTagToken token) {
1986 switch (token.name) {
1987 case "html": return startTagHtml(token);
1988 case "caption": return startTagCaption(token);
1989 case "colgroup": return startTagColgroup(token);
1990 case "col": return startTagCol(token);
1991 case "tbody": case "tfoot": case "thead": return startTagRowGroup(token);
1992 case "td": case "th": case "tr": return startTagImplyTbody(token);
1993 case "table": return startTagTable(token);
1994 case "style": case "script": return startTagStyleScript(token);
1995 case "input": return startTagInput(token);
1996 case "form": return startTagForm(token);
1997 default: return startTagOther(token);
1998 }
1999 }
2000
2001 processEndTag(EndTagToken token) {
2002 switch (token.name) {
2003 case "table": return endTagTable(token);
2004 case "body": case "caption": case "col": case "colgroup": case "html":
2005 case "tbody": case "td": case "tfoot": case "th": case "thead": case "tr":
2006 return endTagIgnore(token);
2007 default: return endTagOther(token);
2008 }
2009 }
2010
2011 // helper methods
2012 void clearStackToTableContext() {
2013 // "clear the stack back to a table context"
2014 while (tree.openElements.last.localName != "table" &&
2015 tree.openElements.last.localName != "html") {
2016 //parser.parseError(token.span, "unexpected-implied-end-tag-in-table",
2017 // {"name": tree.openElements.last.name})
2018 tree.openElements.removeLast();
2019 }
2020 // When the current node is <html> it's an innerHTML case
2021 }
2022
2023 // processing methods
2024 bool processEOF() {
2025 var last = tree.openElements.last;
2026 if (last.localName != "html") {
2027 parser.parseError(last.sourceSpan, "eof-in-table");
2028 } else {
2029 assert(parser.innerHTMLMode);
2030 }
2031 //Stop parsing
2032 return false;
2033 }
2034
2035 Token processSpaceCharacters(SpaceCharactersToken token) {
2036 var originalPhase = parser.phase;
2037 parser.phase = parser._inTableTextPhase;
2038 parser._inTableTextPhase.originalPhase = originalPhase;
2039 parser.phase.processSpaceCharacters(token);
2040 return null;
2041 }
2042
2043 Token processCharacters(CharactersToken token) {
2044 var originalPhase = parser.phase;
2045 parser.phase = parser._inTableTextPhase;
2046 parser._inTableTextPhase.originalPhase = originalPhase;
2047 parser.phase.processCharacters(token);
2048 return null;
2049 }
2050
2051 void insertText(CharactersToken token) {
2052 // If we get here there must be at least one non-whitespace character
2053 // Do the table magic!
2054 tree.insertFromTable = true;
2055 parser._inBodyPhase.processCharacters(token);
2056 tree.insertFromTable = false;
2057 }
2058
2059 void startTagCaption(StartTagToken token) {
2060 clearStackToTableContext();
2061 tree.activeFormattingElements.add(Marker);
2062 tree.insertElement(token);
2063 parser.phase = parser._inCaptionPhase;
2064 }
2065
2066 void startTagColgroup(StartTagToken token) {
2067 clearStackToTableContext();
2068 tree.insertElement(token);
2069 parser.phase = parser._inColumnGroupPhase;
2070 }
2071
2072 Token startTagCol(StartTagToken token) {
2073 startTagColgroup(new StartTagToken("colgroup", data: {}));
2074 return token;
2075 }
2076
2077 void startTagRowGroup(StartTagToken token) {
2078 clearStackToTableContext();
2079 tree.insertElement(token);
2080 parser.phase = parser._inTableBodyPhase;
2081 }
2082
2083 Token startTagImplyTbody(StartTagToken token) {
2084 startTagRowGroup(new StartTagToken("tbody", data: {}));
2085 return token;
2086 }
2087
2088 Token startTagTable(StartTagToken token) {
2089 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag",
2090 {"startName": "table", "endName": "table"});
2091 parser.phase.processEndTag(new EndTagToken("table"));
2092 if (!parser.innerHTMLMode) {
2093 return token;
2094 }
2095 return null;
2096 }
2097
2098 Token startTagStyleScript(StartTagToken token) {
2099 return parser._inHeadPhase.processStartTag(token);
2100 }
2101
2102 void startTagInput(StartTagToken token) {
2103 if (asciiUpper2Lower(token.data["type"]) == "hidden") {
2104 parser.parseError(token.span, "unexpected-hidden-input-in-table");
2105 tree.insertElement(token);
2106 // XXX associate with form
2107 tree.openElements.removeLast();
2108 } else {
2109 startTagOther(token);
2110 }
2111 }
2112
2113 void startTagForm(StartTagToken token) {
2114 parser.parseError(token.span, "unexpected-form-in-table");
2115 if (tree.formPointer == null) {
2116 tree.insertElement(token);
2117 tree.formPointer = tree.openElements.last;
2118 tree.openElements.removeLast();
2119 }
2120 }
2121
2122 void startTagOther(StartTagToken token) {
2123 parser.parseError(token.span, "unexpected-start-tag-implies-table-voodoo",
2124 {"name": token.name});
2125 // Do the table magic!
2126 tree.insertFromTable = true;
2127 parser._inBodyPhase.processStartTag(token);
2128 tree.insertFromTable = false;
2129 }
2130
2131 void endTagTable(EndTagToken token) {
2132 if (tree.elementInScope("table", variant: "table")) {
2133 tree.generateImpliedEndTags();
2134 var last = tree.openElements.last;
2135 if (last.localName != "table") {
2136 parser.parseError(token.span, "end-tag-too-early-named",
2137 {"gotName": "table", "expectedName": last.localName});
2138 }
2139 while (tree.openElements.last.localName != "table") {
2140 tree.openElements.removeLast();
2141 }
2142 tree.openElements.removeLast();
2143 parser.resetInsertionMode();
2144 } else {
2145 // innerHTML case
2146 assert(parser.innerHTMLMode);
2147 parser.parseError(token.span, "undefined-error");
2148 }
2149 }
2150
2151 void endTagIgnore(EndTagToken token) {
2152 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name});
2153 }
2154
2155 void endTagOther(EndTagToken token) {
2156 parser.parseError(token.span, "unexpected-end-tag-implies-table-voodoo",
2157 {"name": token.name});
2158 // Do the table magic!
2159 tree.insertFromTable = true;
2160 parser._inBodyPhase.processEndTag(token);
2161 tree.insertFromTable = false;
2162 }
2163 }
2164
2165 class InTableTextPhase extends Phase {
2166 Phase originalPhase;
2167 List<StringToken> characterTokens;
2168
2169 InTableTextPhase(parser)
2170 : characterTokens = <StringToken>[],
2171 super(parser);
2172
2173 void flushCharacters() {
2174 if (characterTokens.length == 0) return;
2175
2176 // TODO(sigmund,jmesserly): remove '' (dartbug.com/8480)
2177 var data = characterTokens.map((t) => t.data).join('');
2178 var span = null;
2179
2180 if (parser.generateSpans) {
2181 span = characterTokens[0].span.expand(characterTokens.last.span);
2182 }
2183
2184 if (!allWhitespace(data)) {
2185 parser._inTablePhase.insertText(new CharactersToken(data)..span = span);
2186 } else if (data.length > 0) {
2187 tree.insertText(data, span);
2188 }
2189 characterTokens = <StringToken>[];
2190 }
2191
2192 Token processComment(CommentToken token) {
2193 flushCharacters();
2194 parser.phase = originalPhase;
2195 return token;
2196 }
2197
2198 bool processEOF() {
2199 flushCharacters();
2200 parser.phase = originalPhase;
2201 return true;
2202 }
2203
2204 Token processCharacters(CharactersToken token) {
2205 if (token.data == "\u0000") {
2206 return null;
2207 }
2208 characterTokens.add(token);
2209 return null;
2210 }
2211
2212 Token processSpaceCharacters(SpaceCharactersToken token) {
2213 //pretty sure we should never reach here
2214 characterTokens.add(token);
2215 // XXX assert(false);
2216 return null;
2217 }
2218
2219 Token processStartTag(StartTagToken token) {
2220 flushCharacters();
2221 parser.phase = originalPhase;
2222 return token;
2223 }
2224
2225 Token processEndTag(EndTagToken token) {
2226 flushCharacters();
2227 parser.phase = originalPhase;
2228 return token;
2229 }
2230 }
2231
2232
2233 class InCaptionPhase extends Phase {
2234 // http://www.whatwg.org/specs/web-apps/current-work///in-caption
2235 InCaptionPhase(parser) : super(parser);
2236
2237 processStartTag(StartTagToken token) {
2238 switch (token.name) {
2239 case "html": return startTagHtml(token);
2240 case "caption": case "col": case "colgroup": case "tbody": case "td":
2241 case "tfoot": case "th": case "thead": case "tr":
2242 return startTagTableElement(token);
2243 default: return startTagOther(token);
2244 }
2245 }
2246
2247 processEndTag(EndTagToken token) {
2248 switch (token.name) {
2249 case "caption": return endTagCaption(token);
2250 case "table": return endTagTable(token);
2251 case "body": case "col": case "colgroup": case "html": case "tbody":
2252 case "td": case "tfoot": case "th": case "thead": case "tr":
2253 return endTagIgnore(token);
2254 default: return endTagOther(token);
2255 }
2256 }
2257
2258 bool ignoreEndTagCaption() {
2259 return !tree.elementInScope("caption", variant: "table");
2260 }
2261
2262 bool processEOF() {
2263 parser._inBodyPhase.processEOF();
2264 return false;
2265 }
2266
2267 Token processCharacters(CharactersToken token) {
2268 return parser._inBodyPhase.processCharacters(token);
2269 }
2270
2271 Token startTagTableElement(StartTagToken token) {
2272 parser.parseError(token.span, "undefined-error");
2273 //XXX Have to duplicate logic here to find out if the tag is ignored
2274 var ignoreEndTag = ignoreEndTagCaption();
2275 parser.phase.processEndTag(new EndTagToken("caption"));
2276 if (!ignoreEndTag) {
2277 return token;
2278 }
2279 return null;
2280 }
2281
2282 Token startTagOther(StartTagToken token) {
2283 return parser._inBodyPhase.processStartTag(token);
2284 }
2285
2286 void endTagCaption(EndTagToken token) {
2287 if (!ignoreEndTagCaption()) {
2288 // AT this code is quite similar to endTagTable in "InTable"
2289 tree.generateImpliedEndTags();
2290 if (tree.openElements.last.localName != "caption") {
2291 parser.parseError(token.span, "expected-one-end-tag-but-got-another",
2292 {"gotName": "caption",
2293 "expectedName": tree.openElements.last.localName});
2294 }
2295 while (tree.openElements.last.localName != "caption") {
2296 tree.openElements.removeLast();
2297 }
2298 tree.openElements.removeLast();
2299 tree.clearActiveFormattingElements();
2300 parser.phase = parser._inTablePhase;
2301 } else {
2302 // innerHTML case
2303 assert(parser.innerHTMLMode);
2304 parser.parseError(token.span, "undefined-error");
2305 }
2306 }
2307
2308 Token endTagTable(EndTagToken token) {
2309 parser.parseError(token.span, "undefined-error");
2310 var ignoreEndTag = ignoreEndTagCaption();
2311 parser.phase.processEndTag(new EndTagToken("caption"));
2312 if (!ignoreEndTag) {
2313 return token;
2314 }
2315 return null;
2316 }
2317
2318 void endTagIgnore(EndTagToken token) {
2319 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name});
2320 }
2321
2322 Token endTagOther(EndTagToken token) {
2323 return parser._inBodyPhase.processEndTag(token);
2324 }
2325 }
2326
2327
2328 class InColumnGroupPhase extends Phase {
2329 // http://www.whatwg.org/specs/web-apps/current-work///in-column
2330 InColumnGroupPhase(parser) : super(parser);
2331
2332 processStartTag(StartTagToken token) {
2333 switch (token.name) {
2334 case "html": return startTagHtml(token);
2335 case "col": return startTagCol(token);
2336 default: return startTagOther(token);
2337 }
2338 }
2339
2340 processEndTag(EndTagToken token) {
2341 switch (token.name) {
2342 case "colgroup": return endTagColgroup(token);
2343 case "col": return endTagCol(token);
2344 default: return endTagOther(token);
2345 }
2346 }
2347
2348 bool ignoreEndTagColgroup() {
2349 return tree.openElements.last.localName == "html";
2350 }
2351
2352 bool processEOF() {
2353 var ignoreEndTag = ignoreEndTagColgroup();
2354 if (ignoreEndTag) {
2355 assert(parser.innerHTMLMode);
2356 return false;
2357 } else {
2358 endTagColgroup(new EndTagToken("colgroup"));
2359 return true;
2360 }
2361 }
2362
2363 Token processCharacters(CharactersToken token) {
2364 var ignoreEndTag = ignoreEndTagColgroup();
2365 endTagColgroup(new EndTagToken("colgroup"));
2366 return ignoreEndTag ? null : token;
2367 }
2368
2369 void startTagCol(StartTagToken token) {
2370 tree.insertElement(token);
2371 tree.openElements.removeLast();
2372 }
2373
2374 Token startTagOther(StartTagToken token) {
2375 var ignoreEndTag = ignoreEndTagColgroup();
2376 endTagColgroup(new EndTagToken("colgroup"));
2377 return ignoreEndTag ? null : token;
2378 }
2379
2380 void endTagColgroup(EndTagToken token) {
2381 if (ignoreEndTagColgroup()) {
2382 // innerHTML case
2383 assert(parser.innerHTMLMode);
2384 parser.parseError(token.span, "undefined-error");
2385 } else {
2386 tree.openElements.removeLast();
2387 parser.phase = parser._inTablePhase;
2388 }
2389 }
2390
2391 void endTagCol(EndTagToken token) {
2392 parser.parseError(token.span, "no-end-tag", {"name": "col"});
2393 }
2394
2395 Token endTagOther(EndTagToken token) {
2396 var ignoreEndTag = ignoreEndTagColgroup();
2397 endTagColgroup(new EndTagToken("colgroup"));
2398 return ignoreEndTag ? null : token;
2399 }
2400 }
2401
2402
2403 class InTableBodyPhase extends Phase {
2404 // http://www.whatwg.org/specs/web-apps/current-work///in-table0
2405 InTableBodyPhase(parser) : super(parser);
2406
2407 processStartTag(StartTagToken token) {
2408 switch (token.name) {
2409 case "html": return startTagHtml(token);
2410 case "tr": return startTagTr(token);
2411 case "td": case "th": return startTagTableCell(token);
2412 case "caption": case "col": case "colgroup": case "tbody": case "tfoot":
2413 case "thead":
2414 return startTagTableOther(token);
2415 default: return startTagOther(token);
2416 }
2417 }
2418
2419 processEndTag(EndTagToken token) {
2420 switch (token.name) {
2421 case "tbody": case "tfoot": case "thead":
2422 return endTagTableRowGroup(token);
2423 case "table": return endTagTable(token);
2424 case "body": case "caption": case "col": case "colgroup": case "html":
2425 case "td": case "th": case "tr":
2426 return endTagIgnore(token);
2427 default: return endTagOther(token);
2428 }
2429 }
2430
2431 // helper methods
2432 void clearStackToTableBodyContext() {
2433 var tableTags = const ["tbody", "tfoot", "thead", "html"];
2434 while (!tableTags.contains(tree.openElements.last.localName)) {
2435 //XXX parser.parseError(token.span, "unexpected-implied-end-tag-in-table",
2436 // {"name": tree.openElements.last.name})
2437 tree.openElements.removeLast();
2438 }
2439 if (tree.openElements.last.localName == "html") {
2440 assert(parser.innerHTMLMode);
2441 }
2442 }
2443
2444 // the rest
2445 bool processEOF() {
2446 parser._inTablePhase.processEOF();
2447 return false;
2448 }
2449
2450 Token processSpaceCharacters(SpaceCharactersToken token) {
2451 return parser._inTablePhase.processSpaceCharacters(token);
2452 }
2453
2454 Token processCharacters(CharactersToken token) {
2455 return parser._inTablePhase.processCharacters(token);
2456 }
2457
2458 void startTagTr(StartTagToken token) {
2459 clearStackToTableBodyContext();
2460 tree.insertElement(token);
2461 parser.phase = parser._inRowPhase;
2462 }
2463
2464 Token startTagTableCell(StartTagToken token) {
2465 parser.parseError(token.span, "unexpected-cell-in-table-body",
2466 {"name": token.name});
2467 startTagTr(new StartTagToken("tr", data: {}));
2468 return token;
2469 }
2470
2471 Token startTagTableOther(token) => endTagTable(token);
2472
2473 Token startTagOther(StartTagToken token) {
2474 return parser._inTablePhase.processStartTag(token);
2475 }
2476
2477 void endTagTableRowGroup(EndTagToken token) {
2478 if (tree.elementInScope(token.name, variant: "table")) {
2479 clearStackToTableBodyContext();
2480 tree.openElements.removeLast();
2481 parser.phase = parser._inTablePhase;
2482 } else {
2483 parser.parseError(token.span, "unexpected-end-tag-in-table-body",
2484 {"name": token.name});
2485 }
2486 }
2487
2488 Token endTagTable(TagToken token) {
2489 // XXX AT Any ideas on how to share this with endTagTable?
2490 if (tree.elementInScope("tbody", variant: "table") ||
2491 tree.elementInScope("thead", variant: "table") ||
2492 tree.elementInScope("tfoot", variant: "table")) {
2493 clearStackToTableBodyContext();
2494 endTagTableRowGroup(new EndTagToken(tree.openElements.last.localName));
2495 return token;
2496 } else {
2497 // innerHTML case
2498 assert(parser.innerHTMLMode);
2499 parser.parseError(token.span, "undefined-error");
2500 }
2501 return null;
2502 }
2503
2504 void endTagIgnore(EndTagToken token) {
2505 parser.parseError(token.span, "unexpected-end-tag-in-table-body",
2506 {"name": token.name});
2507 }
2508
2509 Token endTagOther(EndTagToken token) {
2510 return parser._inTablePhase.processEndTag(token);
2511 }
2512 }
2513
2514
2515 class InRowPhase extends Phase {
2516 // http://www.whatwg.org/specs/web-apps/current-work///in-row
2517 InRowPhase(parser) : super(parser);
2518
2519 processStartTag(StartTagToken token) {
2520 switch (token.name) {
2521 case "html": return startTagHtml(token);
2522 case "td": case "th": return startTagTableCell(token);
2523 case "caption": case "col": case "colgroup": case "tbody": case "tfoot":
2524 case "thead": case "tr":
2525 return startTagTableOther(token);
2526 default: return startTagOther(token);
2527 }
2528 }
2529
2530 processEndTag(EndTagToken token) {
2531 switch (token.name) {
2532 case "tr": return endTagTr(token);
2533 case "table": return endTagTable(token);
2534 case "tbody": case "tfoot": case "thead":
2535 return endTagTableRowGroup(token);
2536 case "body": case "caption": case "col": case "colgroup": case "html":
2537 case "td": case "th":
2538 return endTagIgnore(token);
2539 default: return endTagOther(token);
2540 }
2541 }
2542
2543 // helper methods (XXX unify this with other table helper methods)
2544 void clearStackToTableRowContext() {
2545 while (true) {
2546 var last = tree.openElements.last;
2547 if (last.localName == "tr" || last.localName == "html") break;
2548
2549 parser.parseError(last.sourceSpan,
2550 "unexpected-implied-end-tag-in-table-row",
2551 {"name": tree.openElements.last.localName});
2552 tree.openElements.removeLast();
2553 }
2554 }
2555
2556 bool ignoreEndTagTr() {
2557 return !tree.elementInScope("tr", variant: "table");
2558 }
2559
2560 // the rest
2561 bool processEOF() {
2562 parser._inTablePhase.processEOF();
2563 return false;
2564 }
2565
2566 Token processSpaceCharacters(SpaceCharactersToken token) {
2567 return parser._inTablePhase.processSpaceCharacters(token);
2568 }
2569
2570 Token processCharacters(CharactersToken token) {
2571 return parser._inTablePhase.processCharacters(token);
2572 }
2573
2574 void startTagTableCell(StartTagToken token) {
2575 clearStackToTableRowContext();
2576 tree.insertElement(token);
2577 parser.phase = parser._inCellPhase;
2578 tree.activeFormattingElements.add(Marker);
2579 }
2580
2581 Token startTagTableOther(StartTagToken token) {
2582 bool ignoreEndTag = ignoreEndTagTr();
2583 endTagTr(new EndTagToken("tr"));
2584 // XXX how are we sure it's always ignored in the innerHTML case?
2585 return ignoreEndTag ? null : token;
2586 }
2587
2588 Token startTagOther(StartTagToken token) {
2589 return parser._inTablePhase.processStartTag(token);
2590 }
2591
2592 void endTagTr(EndTagToken token) {
2593 if (!ignoreEndTagTr()) {
2594 clearStackToTableRowContext();
2595 tree.openElements.removeLast();
2596 parser.phase = parser._inTableBodyPhase;
2597 } else {
2598 // innerHTML case
2599 assert(parser.innerHTMLMode);
2600 parser.parseError(token.span, "undefined-error");
2601 }
2602 }
2603
2604 Token endTagTable(EndTagToken token) {
2605 var ignoreEndTag = ignoreEndTagTr();
2606 endTagTr(new EndTagToken("tr"));
2607 // Reprocess the current tag if the tr end tag was not ignored
2608 // XXX how are we sure it's always ignored in the innerHTML case?
2609 return ignoreEndTag ? null : token;
2610 }
2611
2612 Token endTagTableRowGroup(EndTagToken token) {
2613 if (tree.elementInScope(token.name, variant: "table")) {
2614 endTagTr(new EndTagToken("tr"));
2615 return token;
2616 } else {
2617 parser.parseError(token.span, "undefined-error");
2618 return null;
2619 }
2620 }
2621
2622 void endTagIgnore(EndTagToken token) {
2623 parser.parseError(token.span, "unexpected-end-tag-in-table-row",
2624 {"name": token.name});
2625 }
2626
2627 Token endTagOther(EndTagToken token) {
2628 return parser._inTablePhase.processEndTag(token);
2629 }
2630 }
2631
2632 class InCellPhase extends Phase {
2633 // http://www.whatwg.org/specs/web-apps/current-work///in-cell
2634 InCellPhase(parser) : super(parser);
2635
2636 processStartTag(StartTagToken token) {
2637 switch (token.name) {
2638 case "html": return startTagHtml(token);
2639 case "caption": case "col": case "colgroup": case "tbody": case "td":
2640 case "tfoot": case "th": case "thead": case "tr":
2641 return startTagTableOther(token);
2642 default: return startTagOther(token);
2643 }
2644 }
2645
2646 processEndTag(EndTagToken token) {
2647 switch (token.name) {
2648 case "td": case "th":
2649 return endTagTableCell(token);
2650 case "body": case "caption": case "col": case "colgroup": case "html":
2651 return endTagIgnore(token);
2652 case "table": case "tbody": case "tfoot": case "thead": case "tr":
2653 return endTagImply(token);
2654 default: return endTagOther(token);
2655 }
2656 }
2657
2658 // helper
2659 void closeCell() {
2660 if (tree.elementInScope("td", variant: "table")) {
2661 endTagTableCell(new EndTagToken("td"));
2662 } else if (tree.elementInScope("th", variant: "table")) {
2663 endTagTableCell(new EndTagToken("th"));
2664 }
2665 }
2666
2667 // the rest
2668 bool processEOF() {
2669 parser._inBodyPhase.processEOF();
2670 return false;
2671 }
2672
2673 Token processCharacters(CharactersToken token) {
2674 return parser._inBodyPhase.processCharacters(token);
2675 }
2676
2677 Token startTagTableOther(StartTagToken token) {
2678 if (tree.elementInScope("td", variant: "table") ||
2679 tree.elementInScope("th", variant: "table")) {
2680 closeCell();
2681 return token;
2682 } else {
2683 // innerHTML case
2684 assert(parser.innerHTMLMode);
2685 parser.parseError(token.span, "undefined-error");
2686 return null;
2687 }
2688 }
2689
2690 Token startTagOther(StartTagToken token) {
2691 return parser._inBodyPhase.processStartTag(token);
2692 }
2693
2694 void endTagTableCell(EndTagToken token) {
2695 if (tree.elementInScope(token.name, variant: "table")) {
2696 tree.generateImpliedEndTags(token.name);
2697 if (tree.openElements.last.localName != token.name) {
2698 parser.parseError(token.span, "unexpected-cell-end-tag",
2699 {"name": token.name});
2700 popOpenElementsUntil(token.name);
2701 } else {
2702 tree.openElements.removeLast();
2703 }
2704 tree.clearActiveFormattingElements();
2705 parser.phase = parser._inRowPhase;
2706 } else {
2707 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name});
2708 }
2709 }
2710
2711 void endTagIgnore(EndTagToken token) {
2712 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name});
2713 }
2714
2715 Token endTagImply(EndTagToken token) {
2716 if (tree.elementInScope(token.name, variant: "table")) {
2717 closeCell();
2718 return token;
2719 } else {
2720 // sometimes innerHTML case
2721 parser.parseError(token.span, "undefined-error");
2722 }
2723 return null;
2724 }
2725
2726 Token endTagOther(EndTagToken token) {
2727 return parser._inBodyPhase.processEndTag(token);
2728 }
2729 }
2730
2731 class InSelectPhase extends Phase {
2732 InSelectPhase(parser) : super(parser);
2733
2734 processStartTag(StartTagToken token) {
2735 switch (token.name) {
2736 case "html": return startTagHtml(token);
2737 case "option": return startTagOption(token);
2738 case "optgroup": return startTagOptgroup(token);
2739 case "select": return startTagSelect(token);
2740 case "input": case "keygen": case "textarea":
2741 return startTagInput(token);
2742 case "script": return startTagScript(token);
2743 default: return startTagOther(token);
2744 }
2745 }
2746
2747 processEndTag(EndTagToken token) {
2748 switch (token.name) {
2749 case "option": return endTagOption(token);
2750 case "optgroup": return endTagOptgroup(token);
2751 case "select": return endTagSelect(token);
2752 default: return endTagOther(token);
2753 }
2754 }
2755
2756 // http://www.whatwg.org/specs/web-apps/current-work///in-select
2757 bool processEOF() {
2758 var last = tree.openElements.last;
2759 if (last.localName != "html") {
2760 parser.parseError(last.sourceSpan, "eof-in-select");
2761 } else {
2762 assert(parser.innerHTMLMode);
2763 }
2764 return false;
2765 }
2766
2767 Token processCharacters(CharactersToken token) {
2768 if (token.data == "\u0000") {
2769 return null;
2770 }
2771 tree.insertText(token.data, token.span);
2772 return null;
2773 }
2774
2775 void startTagOption(StartTagToken token) {
2776 // We need to imply </option> if <option> is the current node.
2777 if (tree.openElements.last.localName == "option") {
2778 tree.openElements.removeLast();
2779 }
2780 tree.insertElement(token);
2781 }
2782
2783 void startTagOptgroup(StartTagToken token) {
2784 if (tree.openElements.last.localName == "option") {
2785 tree.openElements.removeLast();
2786 }
2787 if (tree.openElements.last.localName == "optgroup") {
2788 tree.openElements.removeLast();
2789 }
2790 tree.insertElement(token);
2791 }
2792
2793 void startTagSelect(StartTagToken token) {
2794 parser.parseError(token.span, "unexpected-select-in-select");
2795 endTagSelect(new EndTagToken("select"));
2796 }
2797
2798 Token startTagInput(StartTagToken token) {
2799 parser.parseError(token.span, "unexpected-input-in-select");
2800 if (tree.elementInScope("select", variant: "select")) {
2801 endTagSelect(new EndTagToken("select"));
2802 return token;
2803 } else {
2804 assert(parser.innerHTMLMode);
2805 }
2806 return null;
2807 }
2808
2809 Token startTagScript(StartTagToken token) {
2810 return parser._inHeadPhase.processStartTag(token);
2811 }
2812
2813 Token startTagOther(StartTagToken token) {
2814 parser.parseError(token.span, "unexpected-start-tag-in-select",
2815 {"name": token.name});
2816 return null;
2817 }
2818
2819 void endTagOption(EndTagToken token) {
2820 if (tree.openElements.last.localName == "option") {
2821 tree.openElements.removeLast();
2822 } else {
2823 parser.parseError(token.span, "unexpected-end-tag-in-select",
2824 {"name": "option"});
2825 }
2826 }
2827
2828 void endTagOptgroup(EndTagToken token) {
2829 // </optgroup> implicitly closes <option>
2830 if (tree.openElements.last.localName == "option" &&
2831 tree.openElements[tree.openElements.length - 2].localName == "optgroup") {
2832 tree.openElements.removeLast();
2833 }
2834 // It also closes </optgroup>
2835 if (tree.openElements.last.localName == "optgroup") {
2836 tree.openElements.removeLast();
2837 // But nothing else
2838 } else {
2839 parser.parseError(token.span, "unexpected-end-tag-in-select",
2840 {"name": "optgroup"});
2841 }
2842 }
2843
2844 void endTagSelect(EndTagToken token) {
2845 if (tree.elementInScope("select", variant: "select")) {
2846 popOpenElementsUntil("select");
2847 parser.resetInsertionMode();
2848 } else {
2849 // innerHTML case
2850 assert(parser.innerHTMLMode);
2851 parser.parseError(token.span, "undefined-error");
2852 }
2853 }
2854
2855 void endTagOther(EndTagToken token) {
2856 parser.parseError(token.span, "unexpected-end-tag-in-select",
2857 {"name": token.name});
2858 }
2859 }
2860
2861
2862 class InSelectInTablePhase extends Phase {
2863 InSelectInTablePhase(parser) : super(parser);
2864
2865 processStartTag(StartTagToken token) {
2866 switch (token.name) {
2867 case "caption": case "table": case "tbody": case "tfoot": case "thead":
2868 case "tr": case "td": case "th":
2869 return startTagTable(token);
2870 default: return startTagOther(token);
2871 }
2872 }
2873
2874 processEndTag(EndTagToken token) {
2875 switch (token.name) {
2876 case "caption": case "table": case "tbody": case "tfoot": case "thead":
2877 case "tr": case "td": case "th":
2878 return endTagTable(token);
2879 default: return endTagOther(token);
2880 }
2881 }
2882
2883 bool processEOF() {
2884 parser._inSelectPhase.processEOF();
2885 return false;
2886 }
2887
2888 Token processCharacters(CharactersToken token) {
2889 return parser._inSelectPhase.processCharacters(token);
2890 }
2891
2892 Token startTagTable(StartTagToken token) {
2893 parser.parseError(token.span,
2894 "unexpected-table-element-start-tag-in-select-in-table",
2895 {"name": token.name});
2896 endTagOther(new EndTagToken("select"));
2897 return token;
2898 }
2899
2900 Token startTagOther(StartTagToken token) {
2901 return parser._inSelectPhase.processStartTag(token);
2902 }
2903
2904 Token endTagTable(EndTagToken token) {
2905 parser.parseError(token.span,
2906 "unexpected-table-element-end-tag-in-select-in-table",
2907 {"name": token.name});
2908 if (tree.elementInScope(token.name, variant: "table")) {
2909 endTagOther(new EndTagToken("select"));
2910 return token;
2911 }
2912 return null;
2913 }
2914
2915 Token endTagOther(EndTagToken token) {
2916 return parser._inSelectPhase.processEndTag(token);
2917 }
2918 }
2919
2920
2921 class InForeignContentPhase extends Phase {
2922 // TODO(jmesserly): this is sorted so we could binary search.
2923 static const breakoutElements = const [
2924 'b', 'big', 'blockquote', 'body', 'br','center', 'code', 'dd', 'div', 'dl',
2925 'dt', 'em', 'embed', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'i',
2926 'img', 'li', 'listing', 'menu', 'meta', 'nobr', 'ol', 'p', 'pre', 'ruby',
2927 's', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tt', 'u',
2928 'ul', 'var'
2929 ];
2930
2931 InForeignContentPhase(parser) : super(parser);
2932
2933 void adjustSVGTagNames(token) {
2934 final replacements = const {
2935 "altglyph":"altGlyph",
2936 "altglyphdef":"altGlyphDef",
2937 "altglyphitem":"altGlyphItem",
2938 "animatecolor":"animateColor",
2939 "animatemotion":"animateMotion",
2940 "animatetransform":"animateTransform",
2941 "clippath":"clipPath",
2942 "feblend":"feBlend",
2943 "fecolormatrix":"feColorMatrix",
2944 "fecomponenttransfer":"feComponentTransfer",
2945 "fecomposite":"feComposite",
2946 "feconvolvematrix":"feConvolveMatrix",
2947 "fediffuselighting":"feDiffuseLighting",
2948 "fedisplacementmap":"feDisplacementMap",
2949 "fedistantlight":"feDistantLight",
2950 "feflood":"feFlood",
2951 "fefunca":"feFuncA",
2952 "fefuncb":"feFuncB",
2953 "fefuncg":"feFuncG",
2954 "fefuncr":"feFuncR",
2955 "fegaussianblur":"feGaussianBlur",
2956 "feimage":"feImage",
2957 "femerge":"feMerge",
2958 "femergenode":"feMergeNode",
2959 "femorphology":"feMorphology",
2960 "feoffset":"feOffset",
2961 "fepointlight":"fePointLight",
2962 "fespecularlighting":"feSpecularLighting",
2963 "fespotlight":"feSpotLight",
2964 "fetile":"feTile",
2965 "feturbulence":"feTurbulence",
2966 "foreignobject":"foreignObject",
2967 "glyphref":"glyphRef",
2968 "lineargradient":"linearGradient",
2969 "radialgradient":"radialGradient",
2970 "textpath":"textPath"
2971 };
2972
2973 var replace = replacements[token.name];
2974 if (replace != null) {
2975 token.name = replace;
2976 }
2977 }
2978
2979 Token processCharacters(CharactersToken token) {
2980 if (token.data == "\u0000") {
2981 token.data = "\uFFFD";
2982 } else if (parser.framesetOK && !allWhitespace(token.data)) {
2983 parser.framesetOK = false;
2984 }
2985 return super.processCharacters(token);
2986 }
2987
2988 Token processStartTag(StartTagToken token) {
2989 var currentNode = tree.openElements.last;
2990 if (breakoutElements.contains(token.name) ||
2991 (token.name == "font" &&
2992 (token.data.containsKey("color") ||
2993 token.data.containsKey("face") ||
2994 token.data.containsKey("size")))) {
2995
2996 parser.parseError(token.span,
2997 "unexpected-html-element-in-foreign-content", {'name': token.name});
2998 while (tree.openElements.last.namespaceUri !=
2999 tree.defaultNamespace &&
3000 !parser.isHTMLIntegrationPoint(tree.openElements.last) &&
3001 !parser.isMathMLTextIntegrationPoint(tree.openElements.last)) {
3002 tree.openElements.removeLast();
3003 }
3004 return token;
3005
3006 } else {
3007 if (currentNode.namespaceUri == Namespaces.mathml) {
3008 parser.adjustMathMLAttributes(token);
3009 } else if (currentNode.namespaceUri == Namespaces.svg) {
3010 adjustSVGTagNames(token);
3011 parser.adjustSVGAttributes(token);
3012 }
3013 parser.adjustForeignAttributes(token);
3014 token.namespace = currentNode.namespaceUri;
3015 tree.insertElement(token);
3016 if (token.selfClosing) {
3017 tree.openElements.removeLast();
3018 token.selfClosingAcknowledged = true;
3019 }
3020 return null;
3021 }
3022 }
3023
3024 Token processEndTag(EndTagToken token) {
3025 var nodeIndex = tree.openElements.length - 1;
3026 var node = tree.openElements.last;
3027 if (node.localName != token.name) {
3028 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name});
3029 }
3030
3031 var newToken = null;
3032 while (true) {
3033 if (asciiUpper2Lower(node.localName) == token.name) {
3034 //XXX this isn't in the spec but it seems necessary
3035 if (parser.phase == parser._inTableTextPhase) {
3036 InTableTextPhase inTableText = parser.phase;
3037 inTableText.flushCharacters();
3038 parser.phase = inTableText.originalPhase;
3039 }
3040 while (tree.openElements.removeLast() != node) {
3041 assert(tree.openElements.length > 0);
3042 }
3043 newToken = null;
3044 break;
3045 }
3046 nodeIndex -= 1;
3047
3048 node = tree.openElements[nodeIndex];
3049 if (node.namespaceUri != tree.defaultNamespace) {
3050 continue;
3051 } else {
3052 newToken = parser.phase.processEndTag(token);
3053 break;
3054 }
3055 }
3056 return newToken;
3057 }
3058 }
3059
3060
3061 class AfterBodyPhase extends Phase {
3062 AfterBodyPhase(parser) : super(parser);
3063
3064 processStartTag(StartTagToken token) {
3065 if (token.name == "html") return startTagHtml(token);
3066 return startTagOther(token);
3067 }
3068
3069 processEndTag(EndTagToken token) {
3070 if (token.name == "html") return endTagHtml(token);
3071 return endTagOther(token);
3072 }
3073
3074 //Stop parsing
3075 bool processEOF() => false;
3076
3077 Token processComment(CommentToken token) {
3078 // This is needed because data is to be appended to the <html> element
3079 // here and not to whatever is currently open.
3080 tree.insertComment(token, tree.openElements[0]);
3081 return null;
3082 }
3083
3084 Token processCharacters(CharactersToken token) {
3085 parser.parseError(token.span, "unexpected-char-after-body");
3086 parser.phase = parser._inBodyPhase;
3087 return token;
3088 }
3089
3090 Token startTagHtml(StartTagToken token) {
3091 return parser._inBodyPhase.processStartTag(token);
3092 }
3093
3094 Token startTagOther(StartTagToken token) {
3095 parser.parseError(token.span, "unexpected-start-tag-after-body",
3096 {"name": token.name});
3097 parser.phase = parser._inBodyPhase;
3098 return token;
3099 }
3100
3101 void endTagHtml(Token token) {
3102 if (parser.innerHTMLMode) {
3103 parser.parseError(token.span, "unexpected-end-tag-after-body-innerhtml");
3104 } else {
3105 parser.phase = parser._afterAfterBodyPhase;
3106 }
3107 }
3108
3109 Token endTagOther(EndTagToken token) {
3110 parser.parseError(token.span, "unexpected-end-tag-after-body",
3111 {"name": token.name});
3112 parser.phase = parser._inBodyPhase;
3113 return token;
3114 }
3115 }
3116
3117 class InFramesetPhase extends Phase {
3118 // http://www.whatwg.org/specs/web-apps/current-work///in-frameset
3119 InFramesetPhase(parser) : super(parser);
3120
3121 processStartTag(StartTagToken token) {
3122 switch (token.name) {
3123 case "html": return startTagHtml(token);
3124 case "frameset": return startTagFrameset(token);
3125 case "frame": return startTagFrame(token);
3126 case "noframes": return startTagNoframes(token);
3127 default: return startTagOther(token);
3128 }
3129 }
3130
3131 processEndTag(EndTagToken token) {
3132 switch (token.name) {
3133 case "frameset": return endTagFrameset(token);
3134 default: return endTagOther(token);
3135 }
3136 }
3137
3138 bool processEOF() {
3139 var last = tree.openElements.last;
3140 if (last.localName != "html") {
3141 parser.parseError(last.sourceSpan, "eof-in-frameset");
3142 } else {
3143 assert(parser.innerHTMLMode);
3144 }
3145 return false;
3146 }
3147
3148 Token processCharacters(CharactersToken token) {
3149 parser.parseError(token.span, "unexpected-char-in-frameset");
3150 return null;
3151 }
3152
3153 void startTagFrameset(StartTagToken token) {
3154 tree.insertElement(token);
3155 }
3156
3157 void startTagFrame(StartTagToken token) {
3158 tree.insertElement(token);
3159 tree.openElements.removeLast();
3160 }
3161
3162 Token startTagNoframes(StartTagToken token) {
3163 return parser._inBodyPhase.processStartTag(token);
3164 }
3165
3166 Token startTagOther(StartTagToken token) {
3167 parser.parseError(token.span, "unexpected-start-tag-in-frameset",
3168 {"name": token.name});
3169 return null;
3170 }
3171
3172 void endTagFrameset(EndTagToken token) {
3173 if (tree.openElements.last.localName == "html") {
3174 // innerHTML case
3175 parser.parseError(token.span,
3176 "unexpected-frameset-in-frameset-innerhtml");
3177 } else {
3178 tree.openElements.removeLast();
3179 }
3180 if (!parser.innerHTMLMode &&
3181 tree.openElements.last.localName != "frameset") {
3182 // If we're not in innerHTML mode and the the current node is not a
3183 // "frameset" element (anymore) then switch.
3184 parser.phase = parser._afterFramesetPhase;
3185 }
3186 }
3187
3188 void endTagOther(EndTagToken token) {
3189 parser.parseError(token.span, "unexpected-end-tag-in-frameset",
3190 {"name": token.name});
3191 }
3192 }
3193
3194
3195 class AfterFramesetPhase extends Phase {
3196 // http://www.whatwg.org/specs/web-apps/current-work///after3
3197 AfterFramesetPhase(parser) : super(parser);
3198
3199 processStartTag(StartTagToken token) {
3200 switch (token.name) {
3201 case "html": return startTagHtml(token);
3202 case "noframes": return startTagNoframes(token);
3203 default: return startTagOther(token);
3204 }
3205 }
3206
3207 processEndTag(EndTagToken token) {
3208 switch (token.name) {
3209 case "html": return endTagHtml(token);
3210 default: return endTagOther(token);
3211 }
3212 }
3213
3214 // Stop parsing
3215 bool processEOF() => false;
3216
3217 Token processCharacters(CharactersToken token) {
3218 parser.parseError(token.span, "unexpected-char-after-frameset");
3219 return null;
3220 }
3221
3222 Token startTagNoframes(StartTagToken token) {
3223 return parser._inHeadPhase.processStartTag(token);
3224 }
3225
3226 void startTagOther(StartTagToken token) {
3227 parser.parseError(token.span, "unexpected-start-tag-after-frameset",
3228 {"name": token.name});
3229 }
3230
3231 void endTagHtml(EndTagToken token) {
3232 parser.phase = parser._afterAfterFramesetPhase;
3233 }
3234
3235 void endTagOther(EndTagToken token) {
3236 parser.parseError(token.span, "unexpected-end-tag-after-frameset",
3237 {"name": token.name});
3238 }
3239 }
3240
3241
3242 class AfterAfterBodyPhase extends Phase {
3243 AfterAfterBodyPhase(parser) : super(parser);
3244
3245 processStartTag(StartTagToken token) {
3246 if (token.name == 'html') return startTagHtml(token);
3247 return startTagOther(token);
3248 }
3249
3250 bool processEOF() => false;
3251
3252 Token processComment(CommentToken token) {
3253 tree.insertComment(token, tree.document);
3254 return null;
3255 }
3256
3257 Token processSpaceCharacters(SpaceCharactersToken token) {
3258 return parser._inBodyPhase.processSpaceCharacters(token);
3259 }
3260
3261 Token processCharacters(CharactersToken token) {
3262 parser.parseError(token.span, "expected-eof-but-got-char");
3263 parser.phase = parser._inBodyPhase;
3264 return token;
3265 }
3266
3267 Token startTagHtml(StartTagToken token) {
3268 return parser._inBodyPhase.processStartTag(token);
3269 }
3270
3271 Token startTagOther(StartTagToken token) {
3272 parser.parseError(token.span, "expected-eof-but-got-start-tag",
3273 {"name": token.name});
3274 parser.phase = parser._inBodyPhase;
3275 return token;
3276 }
3277
3278 Token processEndTag(EndTagToken token) {
3279 parser.parseError(token.span, "expected-eof-but-got-end-tag",
3280 {"name": token.name});
3281 parser.phase = parser._inBodyPhase;
3282 return token;
3283 }
3284 }
3285
3286 class AfterAfterFramesetPhase extends Phase {
3287 AfterAfterFramesetPhase(parser) : super(parser);
3288
3289 processStartTag(StartTagToken token) {
3290 switch (token.name) {
3291 case "html": return startTagHtml(token);
3292 case "noframes": return startTagNoFrames(token);
3293 default: return startTagOther(token);
3294 }
3295 }
3296
3297 bool processEOF() => false;
3298
3299 Token processComment(CommentToken token) {
3300 tree.insertComment(token, tree.document);
3301 return null;
3302 }
3303
3304 Token processSpaceCharacters(SpaceCharactersToken token) {
3305 return parser._inBodyPhase.processSpaceCharacters(token);
3306 }
3307
3308 Token processCharacters(CharactersToken token) {
3309 parser.parseError(token.span, "expected-eof-but-got-char");
3310 return null;
3311 }
3312
3313 Token startTagHtml(StartTagToken token) {
3314 return parser._inBodyPhase.processStartTag(token);
3315 }
3316
3317 Token startTagNoFrames(StartTagToken token) {
3318 return parser._inHeadPhase.processStartTag(token);
3319 }
3320
3321 void startTagOther(StartTagToken token) {
3322 parser.parseError(token.span, "expected-eof-but-got-start-tag",
3323 {"name": token.name});
3324 }
3325
3326 Token processEndTag(EndTagToken token) {
3327 parser.parseError(token.span, "expected-eof-but-got-end-tag",
3328 {"name": token.name});
3329 return null;
3330 }
3331 }
3332
3333
3334 /// Error in parsed document.
3335 class ParseError implements SourceSpanException {
3336 final String errorCode;
3337 final SourceSpan span;
3338 final Map data;
3339
3340 ParseError(this.errorCode, this.span, this.data);
3341
3342 int get line => span.start.line;
3343
3344 int get column => span.start.column;
3345
3346 /// Gets the human readable error message for this error. Use
3347 /// [span.getLocationMessage] or [toString] to get a message including span
3348 /// information. If there is a file associated with the span, both
3349 /// [span.getLocationMessage] and [toString] are equivalent. Otherwise,
3350 /// [span.getLocationMessage] will not show any source url information, but
3351 /// [toString] will include 'ParserError:' as a prefix.
3352 String get message => formatStr(errorMessages[errorCode], data);
3353
3354 String toString({color}) {
3355 var res = span.message(message, color: color);
3356 return span.sourceUrl == null ? 'ParserError on $res' : 'On $res';
3357 }
3358 }
3359
3360
3361 /// Convenience function to get the pair of namespace and localName.
3362 Pair<String, String> getElementNameTuple(Element e) {
3363 var ns = e.namespaceUri;
3364 if (ns == null) ns = Namespaces.html;
3365 return new Pair(ns, e.localName);
3366 }
OLDNEW
« no previous file with comments | « observatory_pub_packages/html5lib/dom_parsing.dart ('k') | observatory_pub_packages/html5lib/parser_console.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698