OLD | NEW |
| (Empty) |
1 /// This library has a parser for HTML5 documents, that lets you parse HTML | |
2 /// easily from a script or server side application: | |
3 /// | |
4 /// import 'package:html/parser.dart' show parse; | |
5 /// import 'package:html/dom.dart'; | |
6 /// main() { | |
7 /// var document = parse( | |
8 /// '<body>Hello world! <a href="www.html5rocks.com">HTML5 rocks!'); | |
9 /// print(document.outerHtml); | |
10 /// } | |
11 /// | |
12 /// The resulting document you get back has a DOM-like API for easy tree | |
13 /// traversal and manipulation. | |
14 library parser; | |
15 | |
16 import 'dart:collection'; | |
17 import 'dart:math'; | |
18 import 'package:source_span/source_span.dart'; | |
19 | |
20 import 'src/treebuilder.dart'; | |
21 import 'src/constants.dart'; | |
22 import 'src/encoding_parser.dart'; | |
23 import 'src/token.dart'; | |
24 import 'src/tokenizer.dart'; | |
25 import 'src/utils.dart'; | |
26 import 'dom.dart'; | |
27 | |
28 /// Parse the [input] html5 document into a tree. The [input] can be | |
29 /// a [String], [List<int>] of bytes or an [HtmlTokenizer]. | |
30 /// | |
31 /// If [input] is not a [HtmlTokenizer], you can optionally specify the file's | |
32 /// [encoding], which must be a string. If specified that encoding will be | |
33 /// used regardless of any BOM or later declaration (such as in a meta element). | |
34 /// | |
35 /// Set [generateSpans] if you want to generate [SourceSpan]s, otherwise the | |
36 /// [Node.sourceSpan] property will be `null`. When using [generateSpans] you | |
37 /// can additionally pass [sourceUrl] to indicate where the [input] was | |
38 /// extracted from. | |
39 Document parse(input, | |
40 {String encoding, bool generateSpans: false, String sourceUrl}) { | |
41 var p = new HtmlParser(input, | |
42 encoding: encoding, generateSpans: generateSpans, sourceUrl: sourceUrl); | |
43 return p.parse(); | |
44 } | |
45 | |
46 /// Parse the [input] html5 document fragment into a tree. The [input] can be | |
47 /// a [String], [List<int>] of bytes or an [HtmlTokenizer]. The [container] | |
48 /// element can optionally be specified, otherwise it defaults to "div". | |
49 /// | |
50 /// If [input] is not a [HtmlTokenizer], you can optionally specify the file's | |
51 /// [encoding], which must be a string. If specified, that encoding will be used
, | |
52 /// regardless of any BOM or later declaration (such as in a meta element). | |
53 /// | |
54 /// Set [generateSpans] if you want to generate [SourceSpan]s, otherwise the | |
55 /// [Node.sourceSpan] property will be `null`. When using [generateSpans] you ca
n | |
56 /// additionally pass [sourceUrl] to indicate where the [input] was extracted | |
57 /// from. | |
58 DocumentFragment parseFragment(input, {String container: "div", String encoding, | |
59 bool generateSpans: false, String sourceUrl}) { | |
60 var p = new HtmlParser(input, | |
61 encoding: encoding, generateSpans: generateSpans, sourceUrl: sourceUrl); | |
62 return p.parseFragment(container); | |
63 } | |
64 | |
65 /// Parser for HTML, which generates a tree structure from a stream of | |
66 /// (possibly malformed) characters. | |
67 class HtmlParser { | |
68 /// Raise an exception on the first error encountered. | |
69 final bool strict; | |
70 | |
71 /// True to generate [SourceSpan]s for the [Node.sourceSpan] property. | |
72 final bool generateSpans; | |
73 | |
74 final HtmlTokenizer tokenizer; | |
75 | |
76 final TreeBuilder tree; | |
77 | |
78 final List<ParseError> errors = <ParseError>[]; | |
79 | |
80 String container; | |
81 | |
82 bool firstStartTag = false; | |
83 | |
84 // TODO(jmesserly): use enum? | |
85 /// "quirks" / "limited quirks" / "no quirks" | |
86 String compatMode = "no quirks"; | |
87 | |
88 /// innerHTML container when parsing document fragment. | |
89 String innerHTML; | |
90 | |
91 Phase phase; | |
92 | |
93 Phase lastPhase; | |
94 | |
95 Phase originalPhase; | |
96 | |
97 Phase beforeRCDataPhase; | |
98 | |
99 bool framesetOK; | |
100 | |
101 // These fields hold the different phase singletons. At any given time one | |
102 // of them will be active. | |
103 InitialPhase _initialPhase; | |
104 BeforeHtmlPhase _beforeHtmlPhase; | |
105 BeforeHeadPhase _beforeHeadPhase; | |
106 InHeadPhase _inHeadPhase; | |
107 AfterHeadPhase _afterHeadPhase; | |
108 InBodyPhase _inBodyPhase; | |
109 TextPhase _textPhase; | |
110 InTablePhase _inTablePhase; | |
111 InTableTextPhase _inTableTextPhase; | |
112 InCaptionPhase _inCaptionPhase; | |
113 InColumnGroupPhase _inColumnGroupPhase; | |
114 InTableBodyPhase _inTableBodyPhase; | |
115 InRowPhase _inRowPhase; | |
116 InCellPhase _inCellPhase; | |
117 InSelectPhase _inSelectPhase; | |
118 InSelectInTablePhase _inSelectInTablePhase; | |
119 InForeignContentPhase _inForeignContentPhase; | |
120 AfterBodyPhase _afterBodyPhase; | |
121 InFramesetPhase _inFramesetPhase; | |
122 AfterFramesetPhase _afterFramesetPhase; | |
123 AfterAfterBodyPhase _afterAfterBodyPhase; | |
124 AfterAfterFramesetPhase _afterAfterFramesetPhase; | |
125 | |
126 /// Create an HtmlParser and configure the [tree] builder and [strict] mode. | |
127 /// The [input] can be a [String], [List<int>] of bytes or an [HtmlTokenizer]. | |
128 /// | |
129 /// If [input] is not a [HtmlTokenizer], you can specify a few more arguments. | |
130 /// | |
131 /// The [encoding] must be a string that indicates the encoding. If specified, | |
132 /// that encoding will be used, regardless of any BOM or later declaration | |
133 /// (such as in a meta element). | |
134 /// | |
135 /// Set [parseMeta] to false if you want to disable parsing the meta element. | |
136 /// | |
137 /// Set [lowercaseElementName] or [lowercaseAttrName] to false to disable the | |
138 /// automatic conversion of element and attribute names to lower case. Note | |
139 /// that standard way to parse HTML is to lowercase, which is what the browser | |
140 /// DOM will do if you request [Node.outerHTML], for example. | |
141 HtmlParser(input, {String encoding, bool parseMeta: true, | |
142 bool lowercaseElementName: true, bool lowercaseAttrName: true, | |
143 this.strict: false, bool generateSpans: false, String sourceUrl, | |
144 TreeBuilder tree}) | |
145 : generateSpans = generateSpans, | |
146 tree = tree != null ? tree : new TreeBuilder(true), | |
147 tokenizer = (input is HtmlTokenizer | |
148 ? input | |
149 : new HtmlTokenizer(input, | |
150 encoding: encoding, | |
151 parseMeta: parseMeta, | |
152 lowercaseElementName: lowercaseElementName, | |
153 lowercaseAttrName: lowercaseAttrName, | |
154 generateSpans: generateSpans, | |
155 sourceUrl: sourceUrl)) { | |
156 tokenizer.parser = this; | |
157 _initialPhase = new InitialPhase(this); | |
158 _beforeHtmlPhase = new BeforeHtmlPhase(this); | |
159 _beforeHeadPhase = new BeforeHeadPhase(this); | |
160 _inHeadPhase = new InHeadPhase(this); | |
161 // TODO(jmesserly): html5lib did not implement the no script parsing mode | |
162 // More information here: | |
163 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#
scripting-flag | |
164 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construc
tion.html#parsing-main-inheadnoscript | |
165 // "inHeadNoscript": new InHeadNoScriptPhase(this); | |
166 _afterHeadPhase = new AfterHeadPhase(this); | |
167 _inBodyPhase = new InBodyPhase(this); | |
168 _textPhase = new TextPhase(this); | |
169 _inTablePhase = new InTablePhase(this); | |
170 _inTableTextPhase = new InTableTextPhase(this); | |
171 _inCaptionPhase = new InCaptionPhase(this); | |
172 _inColumnGroupPhase = new InColumnGroupPhase(this); | |
173 _inTableBodyPhase = new InTableBodyPhase(this); | |
174 _inRowPhase = new InRowPhase(this); | |
175 _inCellPhase = new InCellPhase(this); | |
176 _inSelectPhase = new InSelectPhase(this); | |
177 _inSelectInTablePhase = new InSelectInTablePhase(this); | |
178 _inForeignContentPhase = new InForeignContentPhase(this); | |
179 _afterBodyPhase = new AfterBodyPhase(this); | |
180 _inFramesetPhase = new InFramesetPhase(this); | |
181 _afterFramesetPhase = new AfterFramesetPhase(this); | |
182 _afterAfterBodyPhase = new AfterAfterBodyPhase(this); | |
183 _afterAfterFramesetPhase = new AfterAfterFramesetPhase(this); | |
184 } | |
185 | |
186 bool get innerHTMLMode => innerHTML != null; | |
187 | |
188 /// Parse an html5 document into a tree. | |
189 /// After parsing, [errors] will be populated with parse errors, if any. | |
190 Document parse() { | |
191 innerHTML = null; | |
192 _parse(); | |
193 return tree.getDocument(); | |
194 } | |
195 | |
196 /// Parse an html5 document fragment into a tree. | |
197 /// Pass a [container] to change the type of the containing element. | |
198 /// After parsing, [errors] will be populated with parse errors, if any. | |
199 DocumentFragment parseFragment([String container = "div"]) { | |
200 if (container == null) throw new ArgumentError('container'); | |
201 innerHTML = container.toLowerCase(); | |
202 _parse(); | |
203 return tree.getFragment(); | |
204 } | |
205 | |
206 void _parse() { | |
207 reset(); | |
208 | |
209 while (true) { | |
210 try { | |
211 mainLoop(); | |
212 break; | |
213 } on ReparseException catch (e) { | |
214 // Note: this happens if we start parsing but the character encoding | |
215 // changes. So we should only need to restart very early in the parse. | |
216 reset(); | |
217 } | |
218 } | |
219 } | |
220 | |
221 void reset() { | |
222 tokenizer.reset(); | |
223 | |
224 tree.reset(); | |
225 firstStartTag = false; | |
226 errors.clear(); | |
227 // "quirks" / "limited quirks" / "no quirks" | |
228 compatMode = "no quirks"; | |
229 | |
230 if (innerHTMLMode) { | |
231 if (cdataElements.contains(innerHTML)) { | |
232 tokenizer.state = tokenizer.rcdataState; | |
233 } else if (rcdataElements.contains(innerHTML)) { | |
234 tokenizer.state = tokenizer.rawtextState; | |
235 } else if (innerHTML == 'plaintext') { | |
236 tokenizer.state = tokenizer.plaintextState; | |
237 } else { | |
238 // state already is data state | |
239 // tokenizer.state = tokenizer.dataState; | |
240 } | |
241 phase = _beforeHtmlPhase; | |
242 _beforeHtmlPhase.insertHtmlElement(); | |
243 resetInsertionMode(); | |
244 } else { | |
245 phase = _initialPhase; | |
246 } | |
247 | |
248 lastPhase = null; | |
249 beforeRCDataPhase = null; | |
250 framesetOK = true; | |
251 } | |
252 | |
253 bool isHTMLIntegrationPoint(Element element) { | |
254 if (element.localName == "annotation-xml" && | |
255 element.namespaceUri == Namespaces.mathml) { | |
256 var enc = element.attributes["encoding"]; | |
257 if (enc != null) enc = asciiUpper2Lower(enc); | |
258 return enc == "text/html" || enc == "application/xhtml+xml"; | |
259 } else { | |
260 return htmlIntegrationPointElements | |
261 .contains(new Pair(element.namespaceUri, element.localName)); | |
262 } | |
263 } | |
264 | |
265 bool isMathMLTextIntegrationPoint(Element element) { | |
266 return mathmlTextIntegrationPointElements | |
267 .contains(new Pair(element.namespaceUri, element.localName)); | |
268 } | |
269 | |
270 bool inForeignContent(Token token, int type) { | |
271 if (tree.openElements.length == 0) return false; | |
272 | |
273 var node = tree.openElements.last; | |
274 if (node.namespaceUri == tree.defaultNamespace) return false; | |
275 | |
276 if (isMathMLTextIntegrationPoint(node)) { | |
277 if (type == TokenKind.startTag && | |
278 (token as StartTagToken).name != "mglyph" && | |
279 (token as StartTagToken).name != "malignmark") { | |
280 return false; | |
281 } | |
282 if (type == TokenKind.characters || type == TokenKind.spaceCharacters) { | |
283 return false; | |
284 } | |
285 } | |
286 | |
287 if (node.localName == "annotation-xml" && | |
288 type == TokenKind.startTag && | |
289 (token as StartTagToken).name == "svg") { | |
290 return false; | |
291 } | |
292 | |
293 if (isHTMLIntegrationPoint(node)) { | |
294 if (type == TokenKind.startTag || | |
295 type == TokenKind.characters || | |
296 type == TokenKind.spaceCharacters) { | |
297 return false; | |
298 } | |
299 } | |
300 | |
301 return true; | |
302 } | |
303 | |
304 void mainLoop() { | |
305 while (tokenizer.moveNext()) { | |
306 var token = tokenizer.current; | |
307 var newToken = token; | |
308 int type; | |
309 while (newToken != null) { | |
310 type = newToken.kind; | |
311 | |
312 // Note: avoid "is" test here, see http://dartbug.com/4795 | |
313 if (type == TokenKind.parseError) { | |
314 ParseErrorToken error = newToken; | |
315 parseError(error.span, error.data, error.messageParams); | |
316 newToken = null; | |
317 } else { | |
318 Phase phase_ = phase; | |
319 if (inForeignContent(token, type)) { | |
320 phase_ = _inForeignContentPhase; | |
321 } | |
322 | |
323 switch (type) { | |
324 case TokenKind.characters: | |
325 newToken = phase_.processCharacters(newToken); | |
326 break; | |
327 case TokenKind.spaceCharacters: | |
328 newToken = phase_.processSpaceCharacters(newToken); | |
329 break; | |
330 case TokenKind.startTag: | |
331 newToken = phase_.processStartTag(newToken); | |
332 break; | |
333 case TokenKind.endTag: | |
334 newToken = phase_.processEndTag(newToken); | |
335 break; | |
336 case TokenKind.comment: | |
337 newToken = phase_.processComment(newToken); | |
338 break; | |
339 case TokenKind.doctype: | |
340 newToken = phase_.processDoctype(newToken); | |
341 break; | |
342 } | |
343 } | |
344 } | |
345 | |
346 if (token is StartTagToken) { | |
347 if (token.selfClosing && !token.selfClosingAcknowledged) { | |
348 parseError(token.span, "non-void-element-with-trailing-solidus", { | |
349 "name": token.name | |
350 }); | |
351 } | |
352 } | |
353 } | |
354 | |
355 // When the loop finishes it's EOF | |
356 var reprocess = true; | |
357 var reprocessPhases = []; | |
358 while (reprocess) { | |
359 reprocessPhases.add(phase); | |
360 reprocess = phase.processEOF(); | |
361 if (reprocess) { | |
362 assert(!reprocessPhases.contains(phase)); | |
363 } | |
364 } | |
365 } | |
366 | |
367 /// The last span available. Used for EOF errors if we don't have something | |
368 /// better. | |
369 SourceSpan get _lastSpan { | |
370 if (tokenizer.stream.fileInfo == null) return null; | |
371 var pos = tokenizer.stream.position; | |
372 return tokenizer.stream.fileInfo.location(pos).pointSpan(); | |
373 } | |
374 | |
375 void parseError(SourceSpan span, String errorcode, | |
376 [Map datavars = const {}]) { | |
377 if (!generateSpans && span == null) { | |
378 span = _lastSpan; | |
379 } | |
380 | |
381 var err = new ParseError(errorcode, span, datavars); | |
382 errors.add(err); | |
383 if (strict) throw err; | |
384 } | |
385 | |
386 void adjustMathMLAttributes(StartTagToken token) { | |
387 var orig = token.data.remove("definitionurl"); | |
388 if (orig != null) { | |
389 token.data["definitionURL"] = orig; | |
390 } | |
391 } | |
392 | |
393 void adjustSVGAttributes(StartTagToken token) { | |
394 final replacements = const { | |
395 "attributename": "attributeName", | |
396 "attributetype": "attributeType", | |
397 "basefrequency": "baseFrequency", | |
398 "baseprofile": "baseProfile", | |
399 "calcmode": "calcMode", | |
400 "clippathunits": "clipPathUnits", | |
401 "contentscripttype": "contentScriptType", | |
402 "contentstyletype": "contentStyleType", | |
403 "diffuseconstant": "diffuseConstant", | |
404 "edgemode": "edgeMode", | |
405 "externalresourcesrequired": "externalResourcesRequired", | |
406 "filterres": "filterRes", | |
407 "filterunits": "filterUnits", | |
408 "glyphref": "glyphRef", | |
409 "gradienttransform": "gradientTransform", | |
410 "gradientunits": "gradientUnits", | |
411 "kernelmatrix": "kernelMatrix", | |
412 "kernelunitlength": "kernelUnitLength", | |
413 "keypoints": "keyPoints", | |
414 "keysplines": "keySplines", | |
415 "keytimes": "keyTimes", | |
416 "lengthadjust": "lengthAdjust", | |
417 "limitingconeangle": "limitingConeAngle", | |
418 "markerheight": "markerHeight", | |
419 "markerunits": "markerUnits", | |
420 "markerwidth": "markerWidth", | |
421 "maskcontentunits": "maskContentUnits", | |
422 "maskunits": "maskUnits", | |
423 "numoctaves": "numOctaves", | |
424 "pathlength": "pathLength", | |
425 "patterncontentunits": "patternContentUnits", | |
426 "patterntransform": "patternTransform", | |
427 "patternunits": "patternUnits", | |
428 "pointsatx": "pointsAtX", | |
429 "pointsaty": "pointsAtY", | |
430 "pointsatz": "pointsAtZ", | |
431 "preservealpha": "preserveAlpha", | |
432 "preserveaspectratio": "preserveAspectRatio", | |
433 "primitiveunits": "primitiveUnits", | |
434 "refx": "refX", | |
435 "refy": "refY", | |
436 "repeatcount": "repeatCount", | |
437 "repeatdur": "repeatDur", | |
438 "requiredextensions": "requiredExtensions", | |
439 "requiredfeatures": "requiredFeatures", | |
440 "specularconstant": "specularConstant", | |
441 "specularexponent": "specularExponent", | |
442 "spreadmethod": "spreadMethod", | |
443 "startoffset": "startOffset", | |
444 "stddeviation": "stdDeviation", | |
445 "stitchtiles": "stitchTiles", | |
446 "surfacescale": "surfaceScale", | |
447 "systemlanguage": "systemLanguage", | |
448 "tablevalues": "tableValues", | |
449 "targetx": "targetX", | |
450 "targety": "targetY", | |
451 "textlength": "textLength", | |
452 "viewbox": "viewBox", | |
453 "viewtarget": "viewTarget", | |
454 "xchannelselector": "xChannelSelector", | |
455 "ychannelselector": "yChannelSelector", | |
456 "zoomandpan": "zoomAndPan" | |
457 }; | |
458 for (var originalName in token.data.keys.toList()) { | |
459 var svgName = replacements[originalName]; | |
460 if (svgName != null) { | |
461 token.data[svgName] = token.data.remove(originalName); | |
462 } | |
463 } | |
464 } | |
465 | |
466 void adjustForeignAttributes(StartTagToken token) { | |
467 // TODO(jmesserly): I don't like mixing non-string objects with strings in | |
468 // the Node.attributes Map. Is there another solution? | |
469 final replacements = const { | |
470 "xlink:actuate": | |
471 const AttributeName("xlink", "actuate", Namespaces.xlink), | |
472 "xlink:arcrole": | |
473 const AttributeName("xlink", "arcrole", Namespaces.xlink), | |
474 "xlink:href": const AttributeName("xlink", "href", Namespaces.xlink), | |
475 "xlink:role": const AttributeName("xlink", "role", Namespaces.xlink), | |
476 "xlink:show": const AttributeName("xlink", "show", Namespaces.xlink), | |
477 "xlink:title": const AttributeName("xlink", "title", Namespaces.xlink), | |
478 "xlink:type": const AttributeName("xlink", "type", Namespaces.xlink), | |
479 "xml:base": const AttributeName("xml", "base", Namespaces.xml), | |
480 "xml:lang": const AttributeName("xml", "lang", Namespaces.xml), | |
481 "xml:space": const AttributeName("xml", "space", Namespaces.xml), | |
482 "xmlns": const AttributeName(null, "xmlns", Namespaces.xmlns), | |
483 "xmlns:xlink": const AttributeName("xmlns", "xlink", Namespaces.xmlns) | |
484 }; | |
485 | |
486 for (var originalName in token.data.keys.toList()) { | |
487 var foreignName = replacements[originalName]; | |
488 if (foreignName != null) { | |
489 token.data[foreignName] = token.data.remove(originalName); | |
490 } | |
491 } | |
492 } | |
493 | |
494 void resetInsertionMode() { | |
495 // The name of this method is mostly historical. (It's also used in the | |
496 // specification.) | |
497 for (var node in tree.openElements.reversed) { | |
498 var nodeName = node.localName; | |
499 bool last = node == tree.openElements[0]; | |
500 if (last) { | |
501 assert(innerHTMLMode); | |
502 nodeName = innerHTML; | |
503 } | |
504 // Check for conditions that should only happen in the innerHTML | |
505 // case | |
506 switch (nodeName) { | |
507 case "select": | |
508 case "colgroup": | |
509 case "head": | |
510 case "html": | |
511 assert(innerHTMLMode); | |
512 break; | |
513 } | |
514 if (!last && node.namespaceUri != tree.defaultNamespace) { | |
515 continue; | |
516 } | |
517 switch (nodeName) { | |
518 case "select": | |
519 phase = _inSelectPhase; | |
520 return; | |
521 case "td": | |
522 phase = _inCellPhase; | |
523 return; | |
524 case "th": | |
525 phase = _inCellPhase; | |
526 return; | |
527 case "tr": | |
528 phase = _inRowPhase; | |
529 return; | |
530 case "tbody": | |
531 phase = _inTableBodyPhase; | |
532 return; | |
533 case "thead": | |
534 phase = _inTableBodyPhase; | |
535 return; | |
536 case "tfoot": | |
537 phase = _inTableBodyPhase; | |
538 return; | |
539 case "caption": | |
540 phase = _inCaptionPhase; | |
541 return; | |
542 case "colgroup": | |
543 phase = _inColumnGroupPhase; | |
544 return; | |
545 case "table": | |
546 phase = _inTablePhase; | |
547 return; | |
548 case "head": | |
549 phase = _inBodyPhase; | |
550 return; | |
551 case "body": | |
552 phase = _inBodyPhase; | |
553 return; | |
554 case "frameset": | |
555 phase = _inFramesetPhase; | |
556 return; | |
557 case "html": | |
558 phase = _beforeHeadPhase; | |
559 return; | |
560 } | |
561 } | |
562 phase = _inBodyPhase; | |
563 } | |
564 | |
565 /// Generic RCDATA/RAWTEXT Parsing algorithm | |
566 /// [contentType] - RCDATA or RAWTEXT | |
567 void parseRCDataRawtext(Token token, String contentType) { | |
568 assert(contentType == "RAWTEXT" || contentType == "RCDATA"); | |
569 | |
570 tree.insertElement(token); | |
571 | |
572 if (contentType == "RAWTEXT") { | |
573 tokenizer.state = tokenizer.rawtextState; | |
574 } else { | |
575 tokenizer.state = tokenizer.rcdataState; | |
576 } | |
577 | |
578 originalPhase = phase; | |
579 phase = _textPhase; | |
580 } | |
581 } | |
582 | |
583 /// Base class for helper object that implements each phase of processing. | |
584 class Phase { | |
585 // Order should be (they can be omitted): | |
586 // * EOF | |
587 // * Comment | |
588 // * Doctype | |
589 // * SpaceCharacters | |
590 // * Characters | |
591 // * StartTag | |
592 // - startTag* methods | |
593 // * EndTag | |
594 // - endTag* methods | |
595 | |
596 final HtmlParser parser; | |
597 | |
598 final TreeBuilder tree; | |
599 | |
600 Phase(HtmlParser parser) | |
601 : parser = parser, | |
602 tree = parser.tree; | |
603 | |
604 bool processEOF() { | |
605 throw new UnimplementedError(); | |
606 } | |
607 | |
608 Token processComment(CommentToken token) { | |
609 // For most phases the following is correct. Where it's not it will be | |
610 // overridden. | |
611 tree.insertComment(token, tree.openElements.last); | |
612 return null; | |
613 } | |
614 | |
615 Token processDoctype(DoctypeToken token) { | |
616 parser.parseError(token.span, "unexpected-doctype"); | |
617 return null; | |
618 } | |
619 | |
620 Token processCharacters(CharactersToken token) { | |
621 tree.insertText(token.data, token.span); | |
622 return null; | |
623 } | |
624 | |
625 Token processSpaceCharacters(SpaceCharactersToken token) { | |
626 tree.insertText(token.data, token.span); | |
627 return null; | |
628 } | |
629 | |
630 Token processStartTag(StartTagToken token) { | |
631 throw new UnimplementedError(); | |
632 } | |
633 | |
634 Token startTagHtml(StartTagToken token) { | |
635 if (parser.firstStartTag == false && token.name == "html") { | |
636 parser.parseError(token.span, "non-html-root"); | |
637 } | |
638 // XXX Need a check here to see if the first start tag token emitted is | |
639 // this token... If it's not, invoke parser.parseError(). | |
640 tree.openElements[0].sourceSpan = token.span; | |
641 token.data.forEach((attr, value) { | |
642 tree.openElements[0].attributes.putIfAbsent(attr, () => value); | |
643 }); | |
644 parser.firstStartTag = false; | |
645 return null; | |
646 } | |
647 | |
648 Token processEndTag(EndTagToken token) { | |
649 throw new UnimplementedError(); | |
650 } | |
651 | |
652 /// Helper method for popping openElements. | |
653 void popOpenElementsUntil(EndTagToken token) { | |
654 String name = token.name; | |
655 var node = tree.openElements.removeLast(); | |
656 while (node.localName != name) { | |
657 node = tree.openElements.removeLast(); | |
658 } | |
659 if (node != null) { | |
660 node.endSourceSpan = token.span; | |
661 } | |
662 } | |
663 } | |
664 | |
665 class InitialPhase extends Phase { | |
666 InitialPhase(parser) : super(parser); | |
667 | |
668 Token processSpaceCharacters(SpaceCharactersToken token) { | |
669 return null; | |
670 } | |
671 | |
672 Token processComment(CommentToken token) { | |
673 tree.insertComment(token, tree.document); | |
674 return null; | |
675 } | |
676 | |
677 Token processDoctype(DoctypeToken token) { | |
678 var name = token.name; | |
679 String publicId = token.publicId; | |
680 var systemId = token.systemId; | |
681 var correct = token.correct; | |
682 | |
683 if ((name != "html" || | |
684 publicId != null || | |
685 systemId != null && systemId != "about:legacy-compat")) { | |
686 parser.parseError(token.span, "unknown-doctype"); | |
687 } | |
688 | |
689 if (publicId == null) { | |
690 publicId = ""; | |
691 } | |
692 | |
693 tree.insertDoctype(token); | |
694 | |
695 if (publicId != "") { | |
696 publicId = asciiUpper2Lower(publicId); | |
697 } | |
698 | |
699 if (!correct || token.name != "html" || startsWithAny(publicId, const [ | |
700 "+//silmaril//dtd html pro v0r11 19970101//", | |
701 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", | |
702 "-//as//dtd html 3.0 aswedit + extensions//", | |
703 "-//ietf//dtd html 2.0 level 1//", | |
704 "-//ietf//dtd html 2.0 level 2//", | |
705 "-//ietf//dtd html 2.0 strict level 1//", | |
706 "-//ietf//dtd html 2.0 strict level 2//", | |
707 "-//ietf//dtd html 2.0 strict//", | |
708 "-//ietf//dtd html 2.0//", | |
709 "-//ietf//dtd html 2.1e//", | |
710 "-//ietf//dtd html 3.0//", | |
711 "-//ietf//dtd html 3.2 final//", | |
712 "-//ietf//dtd html 3.2//", | |
713 "-//ietf//dtd html 3//", | |
714 "-//ietf//dtd html level 0//", | |
715 "-//ietf//dtd html level 1//", | |
716 "-//ietf//dtd html level 2//", | |
717 "-//ietf//dtd html level 3//", | |
718 "-//ietf//dtd html strict level 0//", | |
719 "-//ietf//dtd html strict level 1//", | |
720 "-//ietf//dtd html strict level 2//", | |
721 "-//ietf//dtd html strict level 3//", | |
722 "-//ietf//dtd html strict//", | |
723 "-//ietf//dtd html//", | |
724 "-//metrius//dtd metrius presentational//", | |
725 "-//microsoft//dtd internet explorer 2.0 html strict//", | |
726 "-//microsoft//dtd internet explorer 2.0 html//", | |
727 "-//microsoft//dtd internet explorer 2.0 tables//", | |
728 "-//microsoft//dtd internet explorer 3.0 html strict//", | |
729 "-//microsoft//dtd internet explorer 3.0 html//", | |
730 "-//microsoft//dtd internet explorer 3.0 tables//", | |
731 "-//netscape comm. corp.//dtd html//", | |
732 "-//netscape comm. corp.//dtd strict html//", | |
733 "-//o'reilly and associates//dtd html 2.0//", | |
734 "-//o'reilly and associates//dtd html extended 1.0//", | |
735 "-//o'reilly and associates//dtd html extended relaxed 1.0//", | |
736 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html
4.0//", | |
737 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", | |
738 "-//spyglass//dtd html 2.0 extended//", | |
739 "-//sq//dtd html 2.0 hotmetal + extensions//", | |
740 "-//sun microsystems corp.//dtd hotjava html//", | |
741 "-//sun microsystems corp.//dtd hotjava strict html//", | |
742 "-//w3c//dtd html 3 1995-03-24//", | |
743 "-//w3c//dtd html 3.2 draft//", | |
744 "-//w3c//dtd html 3.2 final//", | |
745 "-//w3c//dtd html 3.2//", | |
746 "-//w3c//dtd html 3.2s draft//", | |
747 "-//w3c//dtd html 4.0 frameset//", | |
748 "-//w3c//dtd html 4.0 transitional//", | |
749 "-//w3c//dtd html experimental 19960712//", | |
750 "-//w3c//dtd html experimental 970421//", | |
751 "-//w3c//dtd w3 html//", | |
752 "-//w3o//dtd w3 html 3.0//", | |
753 "-//webtechs//dtd mozilla html 2.0//", | |
754 "-//webtechs//dtd mozilla html//" | |
755 ]) || | |
756 const [ | |
757 "-//w3o//dtd w3 html strict 3.0//en//", | |
758 "-/w3c/dtd html 4.0 transitional/en", | |
759 "html" | |
760 ].contains(publicId) || | |
761 startsWithAny(publicId, const [ | |
762 "-//w3c//dtd html 4.01 frameset//", | |
763 "-//w3c//dtd html 4.01 transitional//" | |
764 ]) && | |
765 systemId == null || | |
766 systemId != null && | |
767 systemId.toLowerCase() == | |
768 "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") { | |
769 parser.compatMode = "quirks"; | |
770 } else if (startsWithAny(publicId, const [ | |
771 "-//w3c//dtd xhtml 1.0 frameset//", | |
772 "-//w3c//dtd xhtml 1.0 transitional//" | |
773 ]) || | |
774 startsWithAny(publicId, const [ | |
775 "-//w3c//dtd html 4.01 frameset//", | |
776 "-//w3c//dtd html 4.01 transitional//" | |
777 ]) && | |
778 systemId != null) { | |
779 parser.compatMode = "limited quirks"; | |
780 } | |
781 parser.phase = parser._beforeHtmlPhase; | |
782 return null; | |
783 } | |
784 | |
785 void anythingElse() { | |
786 parser.compatMode = "quirks"; | |
787 parser.phase = parser._beforeHtmlPhase; | |
788 } | |
789 | |
790 Token processCharacters(CharactersToken token) { | |
791 parser.parseError(token.span, "expected-doctype-but-got-chars"); | |
792 anythingElse(); | |
793 return token; | |
794 } | |
795 | |
796 Token processStartTag(StartTagToken token) { | |
797 parser.parseError( | |
798 token.span, "expected-doctype-but-got-start-tag", {"name": token.name}); | |
799 anythingElse(); | |
800 return token; | |
801 } | |
802 | |
803 Token processEndTag(EndTagToken token) { | |
804 parser.parseError( | |
805 token.span, "expected-doctype-but-got-end-tag", {"name": token.name}); | |
806 anythingElse(); | |
807 return token; | |
808 } | |
809 | |
810 bool processEOF() { | |
811 parser.parseError(parser._lastSpan, "expected-doctype-but-got-eof"); | |
812 anythingElse(); | |
813 return true; | |
814 } | |
815 } | |
816 | |
817 class BeforeHtmlPhase extends Phase { | |
818 BeforeHtmlPhase(parser) : super(parser); | |
819 | |
820 // helper methods | |
821 void insertHtmlElement() { | |
822 tree.insertRoot(new StartTagToken("html", data: {})); | |
823 parser.phase = parser._beforeHeadPhase; | |
824 } | |
825 | |
826 // other | |
827 bool processEOF() { | |
828 insertHtmlElement(); | |
829 return true; | |
830 } | |
831 | |
832 Token processComment(CommentToken token) { | |
833 tree.insertComment(token, tree.document); | |
834 return null; | |
835 } | |
836 | |
837 Token processSpaceCharacters(SpaceCharactersToken token) { | |
838 return null; | |
839 } | |
840 | |
841 Token processCharacters(CharactersToken token) { | |
842 insertHtmlElement(); | |
843 return token; | |
844 } | |
845 | |
846 Token processStartTag(StartTagToken token) { | |
847 if (token.name == "html") { | |
848 parser.firstStartTag = true; | |
849 } | |
850 insertHtmlElement(); | |
851 return token; | |
852 } | |
853 | |
854 Token processEndTag(EndTagToken token) { | |
855 switch (token.name) { | |
856 case "head": | |
857 case "body": | |
858 case "html": | |
859 case "br": | |
860 insertHtmlElement(); | |
861 return token; | |
862 default: | |
863 parser.parseError( | |
864 token.span, "unexpected-end-tag-before-html", {"name": token.name}); | |
865 return null; | |
866 } | |
867 } | |
868 } | |
869 | |
870 class BeforeHeadPhase extends Phase { | |
871 BeforeHeadPhase(parser) : super(parser); | |
872 | |
873 processStartTag(StartTagToken token) { | |
874 switch (token.name) { | |
875 case 'html': | |
876 return startTagHtml(token); | |
877 case 'head': | |
878 return startTagHead(token); | |
879 default: | |
880 return startTagOther(token); | |
881 } | |
882 } | |
883 | |
884 processEndTag(EndTagToken token) { | |
885 switch (token.name) { | |
886 case "head": | |
887 case "body": | |
888 case "html": | |
889 case "br": | |
890 return endTagImplyHead(token); | |
891 default: | |
892 return endTagOther(token); | |
893 } | |
894 } | |
895 | |
896 bool processEOF() { | |
897 startTagHead(new StartTagToken("head", data: {})); | |
898 return true; | |
899 } | |
900 | |
901 Token processSpaceCharacters(SpaceCharactersToken token) { | |
902 return null; | |
903 } | |
904 | |
905 Token processCharacters(CharactersToken token) { | |
906 startTagHead(new StartTagToken("head", data: {})); | |
907 return token; | |
908 } | |
909 | |
910 Token startTagHtml(StartTagToken token) { | |
911 return parser._inBodyPhase.processStartTag(token); | |
912 } | |
913 | |
914 void startTagHead(StartTagToken token) { | |
915 tree.insertElement(token); | |
916 tree.headPointer = tree.openElements.last; | |
917 parser.phase = parser._inHeadPhase; | |
918 } | |
919 | |
920 Token startTagOther(StartTagToken token) { | |
921 startTagHead(new StartTagToken("head", data: {})); | |
922 return token; | |
923 } | |
924 | |
925 Token endTagImplyHead(EndTagToken token) { | |
926 startTagHead(new StartTagToken("head", data: {})); | |
927 return token; | |
928 } | |
929 | |
930 void endTagOther(EndTagToken token) { | |
931 parser.parseError( | |
932 token.span, "end-tag-after-implied-root", {"name": token.name}); | |
933 } | |
934 } | |
935 | |
936 class InHeadPhase extends Phase { | |
937 InHeadPhase(parser) : super(parser); | |
938 | |
939 processStartTag(StartTagToken token) { | |
940 switch (token.name) { | |
941 case "html": | |
942 return startTagHtml(token); | |
943 case "title": | |
944 return startTagTitle(token); | |
945 case "noscript": | |
946 case "noframes": | |
947 case "style": | |
948 return startTagNoScriptNoFramesStyle(token); | |
949 case "script": | |
950 return startTagScript(token); | |
951 case "base": | |
952 case "basefont": | |
953 case "bgsound": | |
954 case "command": | |
955 case "link": | |
956 return startTagBaseLinkCommand(token); | |
957 case "meta": | |
958 return startTagMeta(token); | |
959 case "head": | |
960 return startTagHead(token); | |
961 default: | |
962 return startTagOther(token); | |
963 } | |
964 } | |
965 | |
966 processEndTag(EndTagToken token) { | |
967 switch (token.name) { | |
968 case "head": | |
969 return endTagHead(token); | |
970 case "br": | |
971 case "html": | |
972 case "body": | |
973 return endTagHtmlBodyBr(token); | |
974 default: | |
975 return endTagOther(token); | |
976 } | |
977 } | |
978 | |
979 // the real thing | |
980 bool processEOF() { | |
981 anythingElse(); | |
982 return true; | |
983 } | |
984 | |
985 Token processCharacters(CharactersToken token) { | |
986 anythingElse(); | |
987 return token; | |
988 } | |
989 | |
990 Token startTagHtml(StartTagToken token) { | |
991 return parser._inBodyPhase.processStartTag(token); | |
992 } | |
993 | |
994 void startTagHead(StartTagToken token) { | |
995 parser.parseError(token.span, "two-heads-are-not-better-than-one"); | |
996 } | |
997 | |
998 void startTagBaseLinkCommand(StartTagToken token) { | |
999 tree.insertElement(token); | |
1000 tree.openElements.removeLast(); | |
1001 token.selfClosingAcknowledged = true; | |
1002 } | |
1003 | |
1004 void startTagMeta(StartTagToken token) { | |
1005 tree.insertElement(token); | |
1006 tree.openElements.removeLast(); | |
1007 token.selfClosingAcknowledged = true; | |
1008 | |
1009 var attributes = token.data; | |
1010 if (!parser.tokenizer.stream.charEncodingCertain) { | |
1011 var charset = attributes["charset"]; | |
1012 var content = attributes["content"]; | |
1013 if (charset != null) { | |
1014 parser.tokenizer.stream.changeEncoding(charset); | |
1015 } else if (content != null) { | |
1016 var data = new EncodingBytes(content); | |
1017 var codec = new ContentAttrParser(data).parse(); | |
1018 parser.tokenizer.stream.changeEncoding(codec); | |
1019 } | |
1020 } | |
1021 } | |
1022 | |
1023 void startTagTitle(StartTagToken token) { | |
1024 parser.parseRCDataRawtext(token, "RCDATA"); | |
1025 } | |
1026 | |
1027 void startTagNoScriptNoFramesStyle(StartTagToken token) { | |
1028 // Need to decide whether to implement the scripting-disabled case | |
1029 parser.parseRCDataRawtext(token, "RAWTEXT"); | |
1030 } | |
1031 | |
1032 void startTagScript(StartTagToken token) { | |
1033 tree.insertElement(token); | |
1034 parser.tokenizer.state = parser.tokenizer.scriptDataState; | |
1035 parser.originalPhase = parser.phase; | |
1036 parser.phase = parser._textPhase; | |
1037 } | |
1038 | |
1039 Token startTagOther(StartTagToken token) { | |
1040 anythingElse(); | |
1041 return token; | |
1042 } | |
1043 | |
1044 void endTagHead(EndTagToken token) { | |
1045 var node = parser.tree.openElements.removeLast(); | |
1046 assert(node.localName == "head"); | |
1047 node.endSourceSpan = token.span; | |
1048 parser.phase = parser._afterHeadPhase; | |
1049 } | |
1050 | |
1051 Token endTagHtmlBodyBr(EndTagToken token) { | |
1052 anythingElse(); | |
1053 return token; | |
1054 } | |
1055 | |
1056 void endTagOther(EndTagToken token) { | |
1057 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); | |
1058 } | |
1059 | |
1060 void anythingElse() { | |
1061 endTagHead(new EndTagToken("head")); | |
1062 } | |
1063 } | |
1064 | |
1065 // XXX If we implement a parser for which scripting is disabled we need to | |
1066 // implement this phase. | |
1067 // | |
1068 // class InHeadNoScriptPhase extends Phase { | |
1069 | |
1070 class AfterHeadPhase extends Phase { | |
1071 AfterHeadPhase(parser) : super(parser); | |
1072 | |
1073 processStartTag(StartTagToken token) { | |
1074 switch (token.name) { | |
1075 case "html": | |
1076 return startTagHtml(token); | |
1077 case "body": | |
1078 return startTagBody(token); | |
1079 case "frameset": | |
1080 return startTagFrameset(token); | |
1081 case "base": | |
1082 case "basefont": | |
1083 case "bgsound": | |
1084 case "link": | |
1085 case "meta": | |
1086 case "noframes": | |
1087 case "script": | |
1088 case "style": | |
1089 case "title": | |
1090 return startTagFromHead(token); | |
1091 case "head": | |
1092 return startTagHead(token); | |
1093 default: | |
1094 return startTagOther(token); | |
1095 } | |
1096 } | |
1097 | |
1098 processEndTag(EndTagToken token) { | |
1099 switch (token.name) { | |
1100 case "body": | |
1101 case "html": | |
1102 case "br": | |
1103 return endTagHtmlBodyBr(token); | |
1104 default: | |
1105 return endTagOther(token); | |
1106 } | |
1107 } | |
1108 | |
1109 bool processEOF() { | |
1110 anythingElse(); | |
1111 return true; | |
1112 } | |
1113 | |
1114 Token processCharacters(CharactersToken token) { | |
1115 anythingElse(); | |
1116 return token; | |
1117 } | |
1118 | |
1119 Token startTagHtml(StartTagToken token) { | |
1120 return parser._inBodyPhase.processStartTag(token); | |
1121 } | |
1122 | |
1123 void startTagBody(StartTagToken token) { | |
1124 parser.framesetOK = false; | |
1125 tree.insertElement(token); | |
1126 parser.phase = parser._inBodyPhase; | |
1127 } | |
1128 | |
1129 void startTagFrameset(StartTagToken token) { | |
1130 tree.insertElement(token); | |
1131 parser.phase = parser._inFramesetPhase; | |
1132 } | |
1133 | |
1134 void startTagFromHead(StartTagToken token) { | |
1135 parser.parseError(token.span, "unexpected-start-tag-out-of-my-head", { | |
1136 "name": token.name | |
1137 }); | |
1138 tree.openElements.add(tree.headPointer); | |
1139 parser._inHeadPhase.processStartTag(token); | |
1140 for (var node in tree.openElements.reversed) { | |
1141 if (node.localName == "head") { | |
1142 tree.openElements.remove(node); | |
1143 break; | |
1144 } | |
1145 } | |
1146 } | |
1147 | |
1148 void startTagHead(StartTagToken token) { | |
1149 parser.parseError(token.span, "unexpected-start-tag", {"name": token.name}); | |
1150 } | |
1151 | |
1152 Token startTagOther(StartTagToken token) { | |
1153 anythingElse(); | |
1154 return token; | |
1155 } | |
1156 | |
1157 Token endTagHtmlBodyBr(EndTagToken token) { | |
1158 anythingElse(); | |
1159 return token; | |
1160 } | |
1161 | |
1162 void endTagOther(EndTagToken token) { | |
1163 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); | |
1164 } | |
1165 | |
1166 void anythingElse() { | |
1167 tree.insertElement(new StartTagToken("body", data: {})); | |
1168 parser.phase = parser._inBodyPhase; | |
1169 parser.framesetOK = true; | |
1170 } | |
1171 } | |
1172 | |
1173 typedef Token TokenProccessor(Token token); | |
1174 | |
1175 class InBodyPhase extends Phase { | |
1176 bool dropNewline = false; | |
1177 | |
1178 // http://www.whatwg.org/specs/web-apps/current-work///parsing-main-inbody | |
1179 // the really-really-really-very crazy mode | |
1180 InBodyPhase(parser) : super(parser); | |
1181 | |
1182 processStartTag(StartTagToken token) { | |
1183 switch (token.name) { | |
1184 case "html": | |
1185 return startTagHtml(token); | |
1186 case "base": | |
1187 case "basefont": | |
1188 case "bgsound": | |
1189 case "command": | |
1190 case "link": | |
1191 case "meta": | |
1192 case "noframes": | |
1193 case "script": | |
1194 case "style": | |
1195 case "title": | |
1196 return startTagProcessInHead(token); | |
1197 case "body": | |
1198 return startTagBody(token); | |
1199 case "frameset": | |
1200 return startTagFrameset(token); | |
1201 case "address": | |
1202 case "article": | |
1203 case "aside": | |
1204 case "blockquote": | |
1205 case "center": | |
1206 case "details": | |
1207 case "details": | |
1208 case "dir": | |
1209 case "div": | |
1210 case "dl": | |
1211 case "fieldset": | |
1212 case "figcaption": | |
1213 case "figure": | |
1214 case "footer": | |
1215 case "header": | |
1216 case "hgroup": | |
1217 case "menu": | |
1218 case "nav": | |
1219 case "ol": | |
1220 case "p": | |
1221 case "section": | |
1222 case "summary": | |
1223 case "ul": | |
1224 return startTagCloseP(token); | |
1225 // headingElements | |
1226 case "h1": | |
1227 case "h2": | |
1228 case "h3": | |
1229 case "h4": | |
1230 case "h5": | |
1231 case "h6": | |
1232 return startTagHeading(token); | |
1233 case "pre": | |
1234 case "listing": | |
1235 return startTagPreListing(token); | |
1236 case "form": | |
1237 return startTagForm(token); | |
1238 case "li": | |
1239 case "dd": | |
1240 case "dt": | |
1241 return startTagListItem(token); | |
1242 case "plaintext": | |
1243 return startTagPlaintext(token); | |
1244 case "a": | |
1245 return startTagA(token); | |
1246 case "b": | |
1247 case "big": | |
1248 case "code": | |
1249 case "em": | |
1250 case "font": | |
1251 case "i": | |
1252 case "s": | |
1253 case "small": | |
1254 case "strike": | |
1255 case "strong": | |
1256 case "tt": | |
1257 case "u": | |
1258 return startTagFormatting(token); | |
1259 case "nobr": | |
1260 return startTagNobr(token); | |
1261 case "button": | |
1262 return startTagButton(token); | |
1263 case "applet": | |
1264 case "marquee": | |
1265 case "object": | |
1266 return startTagAppletMarqueeObject(token); | |
1267 case "xmp": | |
1268 return startTagXmp(token); | |
1269 case "table": | |
1270 return startTagTable(token); | |
1271 case "area": | |
1272 case "br": | |
1273 case "embed": | |
1274 case "img": | |
1275 case "keygen": | |
1276 case "wbr": | |
1277 return startTagVoidFormatting(token); | |
1278 case "param": | |
1279 case "source": | |
1280 case "track": | |
1281 return startTagParamSource(token); | |
1282 case "input": | |
1283 return startTagInput(token); | |
1284 case "hr": | |
1285 return startTagHr(token); | |
1286 case "image": | |
1287 return startTagImage(token); | |
1288 case "isindex": | |
1289 return startTagIsIndex(token); | |
1290 case "textarea": | |
1291 return startTagTextarea(token); | |
1292 case "iframe": | |
1293 return startTagIFrame(token); | |
1294 case "noembed": | |
1295 case "noframes": | |
1296 case "noscript": | |
1297 return startTagRawtext(token); | |
1298 case "select": | |
1299 return startTagSelect(token); | |
1300 case "rp": | |
1301 case "rt": | |
1302 return startTagRpRt(token); | |
1303 case "option": | |
1304 case "optgroup": | |
1305 return startTagOpt(token); | |
1306 case "math": | |
1307 return startTagMath(token); | |
1308 case "svg": | |
1309 return startTagSvg(token); | |
1310 case "caption": | |
1311 case "col": | |
1312 case "colgroup": | |
1313 case "frame": | |
1314 case "head": | |
1315 case "tbody": | |
1316 case "td": | |
1317 case "tfoot": | |
1318 case "th": | |
1319 case "thead": | |
1320 case "tr": | |
1321 return startTagMisplaced(token); | |
1322 default: | |
1323 return startTagOther(token); | |
1324 } | |
1325 } | |
1326 | |
1327 processEndTag(EndTagToken token) { | |
1328 switch (token.name) { | |
1329 case "body": | |
1330 return endTagBody(token); | |
1331 case "html": | |
1332 return endTagHtml(token); | |
1333 case "address": | |
1334 case "article": | |
1335 case "aside": | |
1336 case "blockquote": | |
1337 case "center": | |
1338 case "details": | |
1339 case "dir": | |
1340 case "div": | |
1341 case "dl": | |
1342 case "fieldset": | |
1343 case "figcaption": | |
1344 case "figure": | |
1345 case "footer": | |
1346 case "header": | |
1347 case "hgroup": | |
1348 case "listing": | |
1349 case "menu": | |
1350 case "nav": | |
1351 case "ol": | |
1352 case "pre": | |
1353 case "section": | |
1354 case "summary": | |
1355 case "ul": | |
1356 return endTagBlock(token); | |
1357 case "form": | |
1358 return endTagForm(token); | |
1359 case "p": | |
1360 return endTagP(token); | |
1361 case "dd": | |
1362 case "dt": | |
1363 case "li": | |
1364 return endTagListItem(token); | |
1365 // headingElements | |
1366 case "h1": | |
1367 case "h2": | |
1368 case "h3": | |
1369 case "h4": | |
1370 case "h5": | |
1371 case "h6": | |
1372 return endTagHeading(token); | |
1373 case "a": | |
1374 case "b": | |
1375 case "big": | |
1376 case "code": | |
1377 case "em": | |
1378 case "font": | |
1379 case "i": | |
1380 case "nobr": | |
1381 case "s": | |
1382 case "small": | |
1383 case "strike": | |
1384 case "strong": | |
1385 case "tt": | |
1386 case "u": | |
1387 return endTagFormatting(token); | |
1388 case "applet": | |
1389 case "marquee": | |
1390 case "object": | |
1391 return endTagAppletMarqueeObject(token); | |
1392 case "br": | |
1393 return endTagBr(token); | |
1394 default: | |
1395 return endTagOther(token); | |
1396 } | |
1397 } | |
1398 | |
1399 bool isMatchingFormattingElement(Element node1, Element node2) { | |
1400 if (node1.localName != node2.localName || | |
1401 node1.namespaceUri != node2.namespaceUri) { | |
1402 return false; | |
1403 } else if (node1.attributes.length != node2.attributes.length) { | |
1404 return false; | |
1405 } else { | |
1406 for (var key in node1.attributes.keys) { | |
1407 if (node1.attributes[key] != node2.attributes[key]) { | |
1408 return false; | |
1409 } | |
1410 } | |
1411 } | |
1412 return true; | |
1413 } | |
1414 | |
1415 // helper | |
1416 void addFormattingElement(token) { | |
1417 tree.insertElement(token); | |
1418 var element = tree.openElements.last; | |
1419 | |
1420 var matchingElements = []; | |
1421 for (Node node in tree.activeFormattingElements.reversed) { | |
1422 if (node == Marker) { | |
1423 break; | |
1424 } else if (isMatchingFormattingElement(node, element)) { | |
1425 matchingElements.add(node); | |
1426 } | |
1427 } | |
1428 | |
1429 assert(matchingElements.length <= 3); | |
1430 if (matchingElements.length == 3) { | |
1431 tree.activeFormattingElements.remove(matchingElements.last); | |
1432 } | |
1433 tree.activeFormattingElements.add(element); | |
1434 } | |
1435 | |
1436 // the real deal | |
1437 bool processEOF() { | |
1438 for (var node in tree.openElements.reversed) { | |
1439 switch (node.localName) { | |
1440 case "dd": | |
1441 case "dt": | |
1442 case "li": | |
1443 case "p": | |
1444 case "tbody": | |
1445 case "td": | |
1446 case "tfoot": | |
1447 case "th": | |
1448 case "thead": | |
1449 case "tr": | |
1450 case "body": | |
1451 case "html": | |
1452 continue; | |
1453 } | |
1454 parser.parseError(node.sourceSpan, "expected-closing-tag-but-got-eof"); | |
1455 break; | |
1456 } | |
1457 //Stop parsing | |
1458 return false; | |
1459 } | |
1460 | |
1461 void processSpaceCharactersDropNewline(StringToken token) { | |
1462 // Sometimes (start of <pre>, <listing>, and <textarea> blocks) we | |
1463 // want to drop leading newlines | |
1464 var data = token.data; | |
1465 dropNewline = false; | |
1466 if (data.startsWith("\n")) { | |
1467 var lastOpen = tree.openElements.last; | |
1468 if (const ["pre", "listing", "textarea"].contains(lastOpen.localName) && | |
1469 !lastOpen.hasContent()) { | |
1470 data = data.substring(1); | |
1471 } | |
1472 } | |
1473 if (data.length > 0) { | |
1474 tree.reconstructActiveFormattingElements(); | |
1475 tree.insertText(data, token.span); | |
1476 } | |
1477 } | |
1478 | |
1479 Token processCharacters(CharactersToken token) { | |
1480 if (token.data == "\u0000") { | |
1481 //The tokenizer should always emit null on its own | |
1482 return null; | |
1483 } | |
1484 tree.reconstructActiveFormattingElements(); | |
1485 tree.insertText(token.data, token.span); | |
1486 if (parser.framesetOK && !allWhitespace(token.data)) { | |
1487 parser.framesetOK = false; | |
1488 } | |
1489 return null; | |
1490 } | |
1491 | |
1492 Token processSpaceCharacters(SpaceCharactersToken token) { | |
1493 if (dropNewline) { | |
1494 processSpaceCharactersDropNewline(token); | |
1495 } else { | |
1496 tree.reconstructActiveFormattingElements(); | |
1497 tree.insertText(token.data, token.span); | |
1498 } | |
1499 return null; | |
1500 } | |
1501 | |
1502 Token startTagProcessInHead(StartTagToken token) { | |
1503 return parser._inHeadPhase.processStartTag(token); | |
1504 } | |
1505 | |
1506 void startTagBody(StartTagToken token) { | |
1507 parser.parseError(token.span, "unexpected-start-tag", {"name": "body"}); | |
1508 if (tree.openElements.length == 1 || | |
1509 tree.openElements[1].localName != "body") { | |
1510 assert(parser.innerHTMLMode); | |
1511 } else { | |
1512 parser.framesetOK = false; | |
1513 token.data.forEach((attr, value) { | |
1514 tree.openElements[1].attributes.putIfAbsent(attr, () => value); | |
1515 }); | |
1516 } | |
1517 } | |
1518 | |
1519 void startTagFrameset(StartTagToken token) { | |
1520 parser.parseError(token.span, "unexpected-start-tag", {"name": "frameset"}); | |
1521 if ((tree.openElements.length == 1 || | |
1522 tree.openElements[1].localName != "body")) { | |
1523 assert(parser.innerHTMLMode); | |
1524 } else if (parser.framesetOK) { | |
1525 if (tree.openElements[1].parentNode != null) { | |
1526 tree.openElements[1].parentNode.nodes.remove(tree.openElements[1]); | |
1527 } | |
1528 while (tree.openElements.last.localName != "html") { | |
1529 tree.openElements.removeLast(); | |
1530 } | |
1531 tree.insertElement(token); | |
1532 parser.phase = parser._inFramesetPhase; | |
1533 } | |
1534 } | |
1535 | |
1536 void startTagCloseP(StartTagToken token) { | |
1537 if (tree.elementInScope("p", variant: "button")) { | |
1538 endTagP(new EndTagToken("p")); | |
1539 } | |
1540 tree.insertElement(token); | |
1541 } | |
1542 | |
1543 void startTagPreListing(StartTagToken token) { | |
1544 if (tree.elementInScope("p", variant: "button")) { | |
1545 endTagP(new EndTagToken("p")); | |
1546 } | |
1547 tree.insertElement(token); | |
1548 parser.framesetOK = false; | |
1549 dropNewline = true; | |
1550 } | |
1551 | |
1552 void startTagForm(StartTagToken token) { | |
1553 if (tree.formPointer != null) { | |
1554 parser.parseError(token.span, "unexpected-start-tag", {"name": "form"}); | |
1555 } else { | |
1556 if (tree.elementInScope("p", variant: "button")) { | |
1557 endTagP(new EndTagToken("p")); | |
1558 } | |
1559 tree.insertElement(token); | |
1560 tree.formPointer = tree.openElements.last; | |
1561 } | |
1562 } | |
1563 | |
1564 void startTagListItem(StartTagToken token) { | |
1565 parser.framesetOK = false; | |
1566 | |
1567 final stopNamesMap = const { | |
1568 "li": const ["li"], | |
1569 "dt": const ["dt", "dd"], | |
1570 "dd": const ["dt", "dd"] | |
1571 }; | |
1572 var stopNames = stopNamesMap[token.name]; | |
1573 for (var node in tree.openElements.reversed) { | |
1574 if (stopNames.contains(node.localName)) { | |
1575 parser.phase.processEndTag(new EndTagToken(node.localName)); | |
1576 break; | |
1577 } | |
1578 if (specialElements.contains(getElementNameTuple(node)) && | |
1579 !const ["address", "div", "p"].contains(node.localName)) { | |
1580 break; | |
1581 } | |
1582 } | |
1583 | |
1584 if (tree.elementInScope("p", variant: "button")) { | |
1585 parser.phase.processEndTag(new EndTagToken("p")); | |
1586 } | |
1587 | |
1588 tree.insertElement(token); | |
1589 } | |
1590 | |
1591 void startTagPlaintext(StartTagToken token) { | |
1592 if (tree.elementInScope("p", variant: "button")) { | |
1593 endTagP(new EndTagToken("p")); | |
1594 } | |
1595 tree.insertElement(token); | |
1596 parser.tokenizer.state = parser.tokenizer.plaintextState; | |
1597 } | |
1598 | |
1599 void startTagHeading(StartTagToken token) { | |
1600 if (tree.elementInScope("p", variant: "button")) { | |
1601 endTagP(new EndTagToken("p")); | |
1602 } | |
1603 if (headingElements.contains(tree.openElements.last.localName)) { | |
1604 parser.parseError( | |
1605 token.span, "unexpected-start-tag", {"name": token.name}); | |
1606 tree.openElements.removeLast(); | |
1607 } | |
1608 tree.insertElement(token); | |
1609 } | |
1610 | |
1611 void startTagA(StartTagToken token) { | |
1612 var afeAElement = tree.elementInActiveFormattingElements("a"); | |
1613 if (afeAElement != null) { | |
1614 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag", { | |
1615 "startName": "a", | |
1616 "endName": "a" | |
1617 }); | |
1618 endTagFormatting(new EndTagToken("a")); | |
1619 tree.openElements.remove(afeAElement); | |
1620 tree.activeFormattingElements.remove(afeAElement); | |
1621 } | |
1622 tree.reconstructActiveFormattingElements(); | |
1623 addFormattingElement(token); | |
1624 } | |
1625 | |
1626 void startTagFormatting(StartTagToken token) { | |
1627 tree.reconstructActiveFormattingElements(); | |
1628 addFormattingElement(token); | |
1629 } | |
1630 | |
1631 void startTagNobr(StartTagToken token) { | |
1632 tree.reconstructActiveFormattingElements(); | |
1633 if (tree.elementInScope("nobr")) { | |
1634 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag", { | |
1635 "startName": "nobr", | |
1636 "endName": "nobr" | |
1637 }); | |
1638 processEndTag(new EndTagToken("nobr")); | |
1639 // XXX Need tests that trigger the following | |
1640 tree.reconstructActiveFormattingElements(); | |
1641 } | |
1642 addFormattingElement(token); | |
1643 } | |
1644 | |
1645 Token startTagButton(StartTagToken token) { | |
1646 if (tree.elementInScope("button")) { | |
1647 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag", { | |
1648 "startName": "button", | |
1649 "endName": "button" | |
1650 }); | |
1651 processEndTag(new EndTagToken("button")); | |
1652 return token; | |
1653 } else { | |
1654 tree.reconstructActiveFormattingElements(); | |
1655 tree.insertElement(token); | |
1656 parser.framesetOK = false; | |
1657 } | |
1658 return null; | |
1659 } | |
1660 | |
1661 void startTagAppletMarqueeObject(StartTagToken token) { | |
1662 tree.reconstructActiveFormattingElements(); | |
1663 tree.insertElement(token); | |
1664 tree.activeFormattingElements.add(Marker); | |
1665 parser.framesetOK = false; | |
1666 } | |
1667 | |
1668 void startTagXmp(StartTagToken token) { | |
1669 if (tree.elementInScope("p", variant: "button")) { | |
1670 endTagP(new EndTagToken("p")); | |
1671 } | |
1672 tree.reconstructActiveFormattingElements(); | |
1673 parser.framesetOK = false; | |
1674 parser.parseRCDataRawtext(token, "RAWTEXT"); | |
1675 } | |
1676 | |
1677 void startTagTable(StartTagToken token) { | |
1678 if (parser.compatMode != "quirks") { | |
1679 if (tree.elementInScope("p", variant: "button")) { | |
1680 processEndTag(new EndTagToken("p")); | |
1681 } | |
1682 } | |
1683 tree.insertElement(token); | |
1684 parser.framesetOK = false; | |
1685 parser.phase = parser._inTablePhase; | |
1686 } | |
1687 | |
1688 void startTagVoidFormatting(StartTagToken token) { | |
1689 tree.reconstructActiveFormattingElements(); | |
1690 tree.insertElement(token); | |
1691 tree.openElements.removeLast(); | |
1692 token.selfClosingAcknowledged = true; | |
1693 parser.framesetOK = false; | |
1694 } | |
1695 | |
1696 void startTagInput(StartTagToken token) { | |
1697 var savedFramesetOK = parser.framesetOK; | |
1698 startTagVoidFormatting(token); | |
1699 if (asciiUpper2Lower(token.data["type"]) == "hidden") { | |
1700 //input type=hidden doesn't change framesetOK | |
1701 parser.framesetOK = savedFramesetOK; | |
1702 } | |
1703 } | |
1704 | |
1705 void startTagParamSource(StartTagToken token) { | |
1706 tree.insertElement(token); | |
1707 tree.openElements.removeLast(); | |
1708 token.selfClosingAcknowledged = true; | |
1709 } | |
1710 | |
1711 void startTagHr(StartTagToken token) { | |
1712 if (tree.elementInScope("p", variant: "button")) { | |
1713 endTagP(new EndTagToken("p")); | |
1714 } | |
1715 tree.insertElement(token); | |
1716 tree.openElements.removeLast(); | |
1717 token.selfClosingAcknowledged = true; | |
1718 parser.framesetOK = false; | |
1719 } | |
1720 | |
1721 void startTagImage(StartTagToken token) { | |
1722 // No really... | |
1723 parser.parseError(token.span, "unexpected-start-tag-treated-as", { | |
1724 "originalName": "image", | |
1725 "newName": "img" | |
1726 }); | |
1727 processStartTag(new StartTagToken("img", | |
1728 data: token.data, selfClosing: token.selfClosing)); | |
1729 } | |
1730 | |
1731 void startTagIsIndex(StartTagToken token) { | |
1732 parser.parseError(token.span, "deprecated-tag", {"name": "isindex"}); | |
1733 if (tree.formPointer != null) { | |
1734 return; | |
1735 } | |
1736 var formAttrs = {}; | |
1737 var dataAction = token.data["action"]; | |
1738 if (dataAction != null) { | |
1739 formAttrs["action"] = dataAction; | |
1740 } | |
1741 processStartTag(new StartTagToken("form", data: formAttrs)); | |
1742 processStartTag(new StartTagToken("hr", data: {})); | |
1743 processStartTag(new StartTagToken("label", data: {})); | |
1744 // XXX Localization ... | |
1745 var prompt = token.data["prompt"]; | |
1746 if (prompt == null) { | |
1747 prompt = "This is a searchable index. Enter search keywords: "; | |
1748 } | |
1749 processCharacters(new CharactersToken(prompt)); | |
1750 var attributes = new LinkedHashMap.from(token.data); | |
1751 attributes.remove('action'); | |
1752 attributes.remove('prompt'); | |
1753 attributes["name"] = "isindex"; | |
1754 processStartTag(new StartTagToken( | |
1755 "input", data: attributes, selfClosing: token.selfClosing)); | |
1756 processEndTag(new EndTagToken("label")); | |
1757 processStartTag(new StartTagToken("hr", data: {})); | |
1758 processEndTag(new EndTagToken("form")); | |
1759 } | |
1760 | |
1761 void startTagTextarea(StartTagToken token) { | |
1762 tree.insertElement(token); | |
1763 parser.tokenizer.state = parser.tokenizer.rcdataState; | |
1764 dropNewline = true; | |
1765 parser.framesetOK = false; | |
1766 } | |
1767 | |
1768 void startTagIFrame(StartTagToken token) { | |
1769 parser.framesetOK = false; | |
1770 startTagRawtext(token); | |
1771 } | |
1772 | |
1773 /// iframe, noembed noframes, noscript(if scripting enabled). | |
1774 void startTagRawtext(StartTagToken token) { | |
1775 parser.parseRCDataRawtext(token, "RAWTEXT"); | |
1776 } | |
1777 | |
1778 void startTagOpt(StartTagToken token) { | |
1779 if (tree.openElements.last.localName == "option") { | |
1780 parser.phase.processEndTag(new EndTagToken("option")); | |
1781 } | |
1782 tree.reconstructActiveFormattingElements(); | |
1783 parser.tree.insertElement(token); | |
1784 } | |
1785 | |
1786 void startTagSelect(StartTagToken token) { | |
1787 tree.reconstructActiveFormattingElements(); | |
1788 tree.insertElement(token); | |
1789 parser.framesetOK = false; | |
1790 | |
1791 if (parser._inTablePhase == parser.phase || | |
1792 parser._inCaptionPhase == parser.phase || | |
1793 parser._inColumnGroupPhase == parser.phase || | |
1794 parser._inTableBodyPhase == parser.phase || | |
1795 parser._inRowPhase == parser.phase || | |
1796 parser._inCellPhase == parser.phase) { | |
1797 parser.phase = parser._inSelectInTablePhase; | |
1798 } else { | |
1799 parser.phase = parser._inSelectPhase; | |
1800 } | |
1801 } | |
1802 | |
1803 void startTagRpRt(StartTagToken token) { | |
1804 if (tree.elementInScope("ruby")) { | |
1805 tree.generateImpliedEndTags(); | |
1806 var last = tree.openElements.last; | |
1807 if (last.localName != "ruby") { | |
1808 parser.parseError(last.sourceSpan, 'undefined-error'); | |
1809 } | |
1810 } | |
1811 tree.insertElement(token); | |
1812 } | |
1813 | |
1814 void startTagMath(StartTagToken token) { | |
1815 tree.reconstructActiveFormattingElements(); | |
1816 parser.adjustMathMLAttributes(token); | |
1817 parser.adjustForeignAttributes(token); | |
1818 token.namespace = Namespaces.mathml; | |
1819 tree.insertElement(token); | |
1820 //Need to get the parse error right for the case where the token | |
1821 //has a namespace not equal to the xmlns attribute | |
1822 if (token.selfClosing) { | |
1823 tree.openElements.removeLast(); | |
1824 token.selfClosingAcknowledged = true; | |
1825 } | |
1826 } | |
1827 | |
1828 void startTagSvg(StartTagToken token) { | |
1829 tree.reconstructActiveFormattingElements(); | |
1830 parser.adjustSVGAttributes(token); | |
1831 parser.adjustForeignAttributes(token); | |
1832 token.namespace = Namespaces.svg; | |
1833 tree.insertElement(token); | |
1834 //Need to get the parse error right for the case where the token | |
1835 //has a namespace not equal to the xmlns attribute | |
1836 if (token.selfClosing) { | |
1837 tree.openElements.removeLast(); | |
1838 token.selfClosingAcknowledged = true; | |
1839 } | |
1840 } | |
1841 | |
1842 /// Elements that should be children of other elements that have a | |
1843 /// different insertion mode; here they are ignored | |
1844 /// "caption", "col", "colgroup", "frame", "frameset", "head", | |
1845 /// "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", | |
1846 /// "tr", "noscript" | |
1847 void startTagMisplaced(StartTagToken token) { | |
1848 parser.parseError( | |
1849 token.span, "unexpected-start-tag-ignored", {"name": token.name}); | |
1850 } | |
1851 | |
1852 Token startTagOther(StartTagToken token) { | |
1853 tree.reconstructActiveFormattingElements(); | |
1854 tree.insertElement(token); | |
1855 return null; | |
1856 } | |
1857 | |
1858 void endTagP(EndTagToken token) { | |
1859 if (!tree.elementInScope("p", variant: "button")) { | |
1860 startTagCloseP(new StartTagToken("p", data: {})); | |
1861 parser.parseError(token.span, "unexpected-end-tag", {"name": "p"}); | |
1862 endTagP(new EndTagToken("p")); | |
1863 } else { | |
1864 tree.generateImpliedEndTags("p"); | |
1865 if (tree.openElements.last.localName != "p") { | |
1866 parser.parseError(token.span, "unexpected-end-tag", {"name": "p"}); | |
1867 } | |
1868 popOpenElementsUntil(token); | |
1869 } | |
1870 } | |
1871 | |
1872 void endTagBody(EndTagToken token) { | |
1873 if (!tree.elementInScope("body")) { | |
1874 parser.parseError(token.span, 'undefined-error'); | |
1875 return; | |
1876 } else if (tree.openElements.last.localName == "body") { | |
1877 tree.openElements.last.endSourceSpan = token.span; | |
1878 } else { | |
1879 for (Element node in slice(tree.openElements, 2)) { | |
1880 switch (node.localName) { | |
1881 case "dd": | |
1882 case "dt": | |
1883 case "li": | |
1884 case "optgroup": | |
1885 case "option": | |
1886 case "p": | |
1887 case "rp": | |
1888 case "rt": | |
1889 case "tbody": | |
1890 case "td": | |
1891 case "tfoot": | |
1892 case "th": | |
1893 case "thead": | |
1894 case "tr": | |
1895 case "body": | |
1896 case "html": | |
1897 continue; | |
1898 } | |
1899 // Not sure this is the correct name for the parse error | |
1900 parser.parseError(token.span, "expected-one-end-tag-but-got-another", { | |
1901 "gotName": "body", | |
1902 "expectedName": node.localName | |
1903 }); | |
1904 break; | |
1905 } | |
1906 } | |
1907 parser.phase = parser._afterBodyPhase; | |
1908 } | |
1909 | |
1910 Token endTagHtml(EndTagToken token) { | |
1911 //We repeat the test for the body end tag token being ignored here | |
1912 if (tree.elementInScope("body")) { | |
1913 endTagBody(new EndTagToken("body")); | |
1914 return token; | |
1915 } | |
1916 return null; | |
1917 } | |
1918 | |
1919 void endTagBlock(EndTagToken token) { | |
1920 //Put us back in the right whitespace handling mode | |
1921 if (token.name == "pre") { | |
1922 dropNewline = false; | |
1923 } | |
1924 var inScope = tree.elementInScope(token.name); | |
1925 if (inScope) { | |
1926 tree.generateImpliedEndTags(); | |
1927 } | |
1928 if (tree.openElements.last.localName != token.name) { | |
1929 parser.parseError(token.span, "end-tag-too-early", {"name": token.name}); | |
1930 } | |
1931 if (inScope) { | |
1932 popOpenElementsUntil(token); | |
1933 } | |
1934 } | |
1935 | |
1936 void endTagForm(EndTagToken token) { | |
1937 var node = tree.formPointer; | |
1938 tree.formPointer = null; | |
1939 if (node == null || !tree.elementInScope(node)) { | |
1940 parser.parseError(token.span, "unexpected-end-tag", {"name": "form"}); | |
1941 } else { | |
1942 tree.generateImpliedEndTags(); | |
1943 if (tree.openElements.last != node) { | |
1944 parser.parseError( | |
1945 token.span, "end-tag-too-early-ignored", {"name": "form"}); | |
1946 } | |
1947 tree.openElements.remove(node); | |
1948 node.endSourceSpan = token.span; | |
1949 } | |
1950 } | |
1951 | |
1952 void endTagListItem(EndTagToken token) { | |
1953 var variant; | |
1954 if (token.name == "li") { | |
1955 variant = "list"; | |
1956 } else { | |
1957 variant = null; | |
1958 } | |
1959 if (!tree.elementInScope(token.name, variant: variant)) { | |
1960 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); | |
1961 } else { | |
1962 tree.generateImpliedEndTags(token.name); | |
1963 if (tree.openElements.last.localName != token.name) { | |
1964 parser.parseError( | |
1965 token.span, "end-tag-too-early", {"name": token.name}); | |
1966 } | |
1967 popOpenElementsUntil(token); | |
1968 } | |
1969 } | |
1970 | |
1971 void endTagHeading(EndTagToken token) { | |
1972 for (var item in headingElements) { | |
1973 if (tree.elementInScope(item)) { | |
1974 tree.generateImpliedEndTags(); | |
1975 break; | |
1976 } | |
1977 } | |
1978 if (tree.openElements.last.localName != token.name) { | |
1979 parser.parseError(token.span, "end-tag-too-early", {"name": token.name}); | |
1980 } | |
1981 | |
1982 for (var item in headingElements) { | |
1983 if (tree.elementInScope(item)) { | |
1984 Element node = tree.openElements.removeLast(); | |
1985 while (!headingElements.contains(node.localName)) { | |
1986 node = tree.openElements.removeLast(); | |
1987 } | |
1988 if (node != null) { | |
1989 node.endSourceSpan = token.span; | |
1990 } | |
1991 break; | |
1992 } | |
1993 } | |
1994 } | |
1995 | |
1996 /// The much-feared adoption agency algorithm. | |
1997 endTagFormatting(EndTagToken token) { | |
1998 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construc
tion.html#adoptionAgency | |
1999 // TODO(jmesserly): the comments here don't match the numbered steps in the | |
2000 // updated spec. This needs a pass over it to verify that it still matches. | |
2001 // In particular the html5lib Python code skiped "step 4", I'm not sure why. | |
2002 // XXX Better parseError messages appreciated. | |
2003 int outerLoopCounter = 0; | |
2004 while (outerLoopCounter < 8) { | |
2005 outerLoopCounter += 1; | |
2006 | |
2007 // Step 1 paragraph 1 | |
2008 var formattingElement = | |
2009 tree.elementInActiveFormattingElements(token.name); | |
2010 if (formattingElement == null || | |
2011 (tree.openElements.contains(formattingElement) && | |
2012 !tree.elementInScope(formattingElement.localName))) { | |
2013 parser.parseError( | |
2014 token.span, "adoption-agency-1.1", {"name": token.name}); | |
2015 return; | |
2016 // Step 1 paragraph 2 | |
2017 } else if (!tree.openElements.contains(formattingElement)) { | |
2018 parser.parseError( | |
2019 token.span, "adoption-agency-1.2", {"name": token.name}); | |
2020 tree.activeFormattingElements.remove(formattingElement); | |
2021 return; | |
2022 } | |
2023 | |
2024 // Step 1 paragraph 3 | |
2025 if (formattingElement != tree.openElements.last) { | |
2026 parser.parseError( | |
2027 token.span, "adoption-agency-1.3", {"name": token.name}); | |
2028 } | |
2029 | |
2030 // Step 2 | |
2031 // Start of the adoption agency algorithm proper | |
2032 var afeIndex = tree.openElements.indexOf(formattingElement); | |
2033 Node furthestBlock = null; | |
2034 for (Node element in slice(tree.openElements, afeIndex)) { | |
2035 if (specialElements.contains(getElementNameTuple(element))) { | |
2036 furthestBlock = element; | |
2037 break; | |
2038 } | |
2039 } | |
2040 // Step 3 | |
2041 if (furthestBlock == null) { | |
2042 Element element = tree.openElements.removeLast(); | |
2043 while (element != formattingElement) { | |
2044 element = tree.openElements.removeLast(); | |
2045 } | |
2046 if (element != null) { | |
2047 element.endSourceSpan = token.span; | |
2048 } | |
2049 tree.activeFormattingElements.remove(element); | |
2050 return; | |
2051 } | |
2052 | |
2053 var commonAncestor = tree.openElements[afeIndex - 1]; | |
2054 | |
2055 // Step 5 | |
2056 // The bookmark is supposed to help us identify where to reinsert | |
2057 // nodes in step 12. We have to ensure that we reinsert nodes after | |
2058 // the node before the active formatting element. Note the bookmark | |
2059 // can move in step 7.4 | |
2060 var bookmark = tree.activeFormattingElements.indexOf(formattingElement); | |
2061 | |
2062 // Step 6 | |
2063 Node lastNode = furthestBlock; | |
2064 var node = furthestBlock; | |
2065 int innerLoopCounter = 0; | |
2066 | |
2067 var index = tree.openElements.indexOf(node); | |
2068 while (innerLoopCounter < 3) { | |
2069 innerLoopCounter += 1; | |
2070 | |
2071 // Node is element before node in open elements | |
2072 index -= 1; | |
2073 node = tree.openElements[index]; | |
2074 if (!tree.activeFormattingElements.contains(node)) { | |
2075 tree.openElements.remove(node); | |
2076 continue; | |
2077 } | |
2078 // Step 6.3 | |
2079 if (node == formattingElement) { | |
2080 break; | |
2081 } | |
2082 // Step 6.4 | |
2083 if (lastNode == furthestBlock) { | |
2084 bookmark = (tree.activeFormattingElements.indexOf(node) + 1); | |
2085 } | |
2086 // Step 6.5 | |
2087 //cite = node.parent | |
2088 var clone = node.clone(false); | |
2089 // Replace node with clone | |
2090 tree.activeFormattingElements[ | |
2091 tree.activeFormattingElements.indexOf(node)] = clone; | |
2092 tree.openElements[tree.openElements.indexOf(node)] = clone; | |
2093 node = clone; | |
2094 | |
2095 // Step 6.6 | |
2096 // Remove lastNode from its parents, if any | |
2097 if (lastNode.parentNode != null) { | |
2098 lastNode.parentNode.nodes.remove(lastNode); | |
2099 } | |
2100 node.nodes.add(lastNode); | |
2101 // Step 7.7 | |
2102 lastNode = node; | |
2103 // End of inner loop | |
2104 } | |
2105 | |
2106 // Step 7 | |
2107 // Foster parent lastNode if commonAncestor is a | |
2108 // table, tbody, tfoot, thead, or tr we need to foster parent the | |
2109 // lastNode | |
2110 if (lastNode.parentNode != null) { | |
2111 lastNode.parentNode.nodes.remove(lastNode); | |
2112 } | |
2113 | |
2114 if (const [ | |
2115 "table", | |
2116 "tbody", | |
2117 "tfoot", | |
2118 "thead", | |
2119 "tr" | |
2120 ].contains(commonAncestor.localName)) { | |
2121 var nodePos = tree.getTableMisnestedNodePosition(); | |
2122 nodePos[0].insertBefore(lastNode, nodePos[1]); | |
2123 } else { | |
2124 commonAncestor.nodes.add(lastNode); | |
2125 } | |
2126 | |
2127 // Step 8 | |
2128 var clone = formattingElement.clone(false); | |
2129 | |
2130 // Step 9 | |
2131 furthestBlock.reparentChildren(clone); | |
2132 | |
2133 // Step 10 | |
2134 furthestBlock.nodes.add(clone); | |
2135 | |
2136 // Step 11 | |
2137 tree.activeFormattingElements.remove(formattingElement); | |
2138 tree.activeFormattingElements.insert( | |
2139 min(bookmark, tree.activeFormattingElements.length), clone); | |
2140 | |
2141 // Step 12 | |
2142 tree.openElements.remove(formattingElement); | |
2143 tree.openElements.insert( | |
2144 tree.openElements.indexOf(furthestBlock) + 1, clone); | |
2145 } | |
2146 } | |
2147 | |
2148 void endTagAppletMarqueeObject(EndTagToken token) { | |
2149 if (tree.elementInScope(token.name)) { | |
2150 tree.generateImpliedEndTags(); | |
2151 } | |
2152 if (tree.openElements.last.localName != token.name) { | |
2153 parser.parseError(token.span, "end-tag-too-early", {"name": token.name}); | |
2154 } | |
2155 if (tree.elementInScope(token.name)) { | |
2156 popOpenElementsUntil(token); | |
2157 tree.clearActiveFormattingElements(); | |
2158 } | |
2159 } | |
2160 | |
2161 void endTagBr(EndTagToken token) { | |
2162 parser.parseError(token.span, "unexpected-end-tag-treated-as", { | |
2163 "originalName": "br", | |
2164 "newName": "br element" | |
2165 }); | |
2166 tree.reconstructActiveFormattingElements(); | |
2167 tree.insertElement(new StartTagToken("br", data: {})); | |
2168 tree.openElements.removeLast(); | |
2169 } | |
2170 | |
2171 void endTagOther(EndTagToken token) { | |
2172 for (var node in tree.openElements.reversed) { | |
2173 if (node.localName == token.name) { | |
2174 tree.generateImpliedEndTags(token.name); | |
2175 if (tree.openElements.last.localName != token.name) { | |
2176 parser.parseError( | |
2177 token.span, "unexpected-end-tag", {"name": token.name}); | |
2178 } | |
2179 while (tree.openElements.removeLast() != node); | |
2180 node.endSourceSpan = token.span; | |
2181 break; | |
2182 } else { | |
2183 if (specialElements.contains(getElementNameTuple(node))) { | |
2184 parser.parseError( | |
2185 token.span, "unexpected-end-tag", {"name": token.name}); | |
2186 break; | |
2187 } | |
2188 } | |
2189 } | |
2190 } | |
2191 } | |
2192 | |
2193 class TextPhase extends Phase { | |
2194 TextPhase(parser) : super(parser); | |
2195 | |
2196 // "Tried to process start tag %s in RCDATA/RAWTEXT mode"%token.name | |
2197 processStartTag(StartTagToken token) { | |
2198 assert(false); | |
2199 } | |
2200 | |
2201 processEndTag(EndTagToken token) { | |
2202 if (token.name == 'script') return endTagScript(token); | |
2203 return endTagOther(token); | |
2204 } | |
2205 | |
2206 Token processCharacters(CharactersToken token) { | |
2207 tree.insertText(token.data, token.span); | |
2208 return null; | |
2209 } | |
2210 | |
2211 bool processEOF() { | |
2212 var last = tree.openElements.last; | |
2213 parser.parseError(last.sourceSpan, "expected-named-closing-tag-but-got-eof", | |
2214 {'name': last.localName}); | |
2215 tree.openElements.removeLast(); | |
2216 parser.phase = parser.originalPhase; | |
2217 return true; | |
2218 } | |
2219 | |
2220 void endTagScript(EndTagToken token) { | |
2221 var node = tree.openElements.removeLast(); | |
2222 assert(node.localName == "script"); | |
2223 parser.phase = parser.originalPhase; | |
2224 //The rest of this method is all stuff that only happens if | |
2225 //document.write works | |
2226 } | |
2227 | |
2228 void endTagOther(EndTagToken token) { | |
2229 tree.openElements.removeLast(); | |
2230 parser.phase = parser.originalPhase; | |
2231 } | |
2232 } | |
2233 | |
2234 class InTablePhase extends Phase { | |
2235 // http://www.whatwg.org/specs/web-apps/current-work///in-table | |
2236 InTablePhase(parser) : super(parser); | |
2237 | |
2238 processStartTag(StartTagToken token) { | |
2239 switch (token.name) { | |
2240 case "html": | |
2241 return startTagHtml(token); | |
2242 case "caption": | |
2243 return startTagCaption(token); | |
2244 case "colgroup": | |
2245 return startTagColgroup(token); | |
2246 case "col": | |
2247 return startTagCol(token); | |
2248 case "tbody": | |
2249 case "tfoot": | |
2250 case "thead": | |
2251 return startTagRowGroup(token); | |
2252 case "td": | |
2253 case "th": | |
2254 case "tr": | |
2255 return startTagImplyTbody(token); | |
2256 case "table": | |
2257 return startTagTable(token); | |
2258 case "style": | |
2259 case "script": | |
2260 return startTagStyleScript(token); | |
2261 case "input": | |
2262 return startTagInput(token); | |
2263 case "form": | |
2264 return startTagForm(token); | |
2265 default: | |
2266 return startTagOther(token); | |
2267 } | |
2268 } | |
2269 | |
2270 processEndTag(EndTagToken token) { | |
2271 switch (token.name) { | |
2272 case "table": | |
2273 return endTagTable(token); | |
2274 case "body": | |
2275 case "caption": | |
2276 case "col": | |
2277 case "colgroup": | |
2278 case "html": | |
2279 case "tbody": | |
2280 case "td": | |
2281 case "tfoot": | |
2282 case "th": | |
2283 case "thead": | |
2284 case "tr": | |
2285 return endTagIgnore(token); | |
2286 default: | |
2287 return endTagOther(token); | |
2288 } | |
2289 } | |
2290 | |
2291 // helper methods | |
2292 void clearStackToTableContext() { | |
2293 // "clear the stack back to a table context" | |
2294 while (tree.openElements.last.localName != "table" && | |
2295 tree.openElements.last.localName != "html") { | |
2296 //parser.parseError(token.span, "unexpected-implied-end-tag-in-table", | |
2297 // {"name": tree.openElements.last.name}) | |
2298 tree.openElements.removeLast(); | |
2299 } | |
2300 // When the current node is <html> it's an innerHTML case | |
2301 } | |
2302 | |
2303 // processing methods | |
2304 bool processEOF() { | |
2305 var last = tree.openElements.last; | |
2306 if (last.localName != "html") { | |
2307 parser.parseError(last.sourceSpan, "eof-in-table"); | |
2308 } else { | |
2309 assert(parser.innerHTMLMode); | |
2310 } | |
2311 //Stop parsing | |
2312 return false; | |
2313 } | |
2314 | |
2315 Token processSpaceCharacters(SpaceCharactersToken token) { | |
2316 var originalPhase = parser.phase; | |
2317 parser.phase = parser._inTableTextPhase; | |
2318 parser._inTableTextPhase.originalPhase = originalPhase; | |
2319 parser.phase.processSpaceCharacters(token); | |
2320 return null; | |
2321 } | |
2322 | |
2323 Token processCharacters(CharactersToken token) { | |
2324 var originalPhase = parser.phase; | |
2325 parser.phase = parser._inTableTextPhase; | |
2326 parser._inTableTextPhase.originalPhase = originalPhase; | |
2327 parser.phase.processCharacters(token); | |
2328 return null; | |
2329 } | |
2330 | |
2331 void insertText(CharactersToken token) { | |
2332 // If we get here there must be at least one non-whitespace character | |
2333 // Do the table magic! | |
2334 tree.insertFromTable = true; | |
2335 parser._inBodyPhase.processCharacters(token); | |
2336 tree.insertFromTable = false; | |
2337 } | |
2338 | |
2339 void startTagCaption(StartTagToken token) { | |
2340 clearStackToTableContext(); | |
2341 tree.activeFormattingElements.add(Marker); | |
2342 tree.insertElement(token); | |
2343 parser.phase = parser._inCaptionPhase; | |
2344 } | |
2345 | |
2346 void startTagColgroup(StartTagToken token) { | |
2347 clearStackToTableContext(); | |
2348 tree.insertElement(token); | |
2349 parser.phase = parser._inColumnGroupPhase; | |
2350 } | |
2351 | |
2352 Token startTagCol(StartTagToken token) { | |
2353 startTagColgroup(new StartTagToken("colgroup", data: {})); | |
2354 return token; | |
2355 } | |
2356 | |
2357 void startTagRowGroup(StartTagToken token) { | |
2358 clearStackToTableContext(); | |
2359 tree.insertElement(token); | |
2360 parser.phase = parser._inTableBodyPhase; | |
2361 } | |
2362 | |
2363 Token startTagImplyTbody(StartTagToken token) { | |
2364 startTagRowGroup(new StartTagToken("tbody", data: {})); | |
2365 return token; | |
2366 } | |
2367 | |
2368 Token startTagTable(StartTagToken token) { | |
2369 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag", { | |
2370 "startName": "table", | |
2371 "endName": "table" | |
2372 }); | |
2373 parser.phase.processEndTag(new EndTagToken("table")); | |
2374 if (!parser.innerHTMLMode) { | |
2375 return token; | |
2376 } | |
2377 return null; | |
2378 } | |
2379 | |
2380 Token startTagStyleScript(StartTagToken token) { | |
2381 return parser._inHeadPhase.processStartTag(token); | |
2382 } | |
2383 | |
2384 void startTagInput(StartTagToken token) { | |
2385 if (asciiUpper2Lower(token.data["type"]) == "hidden") { | |
2386 parser.parseError(token.span, "unexpected-hidden-input-in-table"); | |
2387 tree.insertElement(token); | |
2388 // XXX associate with form | |
2389 tree.openElements.removeLast(); | |
2390 } else { | |
2391 startTagOther(token); | |
2392 } | |
2393 } | |
2394 | |
2395 void startTagForm(StartTagToken token) { | |
2396 parser.parseError(token.span, "unexpected-form-in-table"); | |
2397 if (tree.formPointer == null) { | |
2398 tree.insertElement(token); | |
2399 tree.formPointer = tree.openElements.last; | |
2400 tree.openElements.removeLast(); | |
2401 } | |
2402 } | |
2403 | |
2404 void startTagOther(StartTagToken token) { | |
2405 parser.parseError(token.span, "unexpected-start-tag-implies-table-voodoo", { | |
2406 "name": token.name | |
2407 }); | |
2408 // Do the table magic! | |
2409 tree.insertFromTable = true; | |
2410 parser._inBodyPhase.processStartTag(token); | |
2411 tree.insertFromTable = false; | |
2412 } | |
2413 | |
2414 void endTagTable(EndTagToken token) { | |
2415 if (tree.elementInScope("table", variant: "table")) { | |
2416 tree.generateImpliedEndTags(); | |
2417 var last = tree.openElements.last; | |
2418 if (last.localName != "table") { | |
2419 parser.parseError(token.span, "end-tag-too-early-named", { | |
2420 "gotName": "table", | |
2421 "expectedName": last.localName | |
2422 }); | |
2423 } | |
2424 while (tree.openElements.last.localName != "table") { | |
2425 tree.openElements.removeLast(); | |
2426 } | |
2427 var node = tree.openElements.removeLast(); | |
2428 node.endSourceSpan = token.span; | |
2429 parser.resetInsertionMode(); | |
2430 } else { | |
2431 // innerHTML case | |
2432 assert(parser.innerHTMLMode); | |
2433 parser.parseError(token.span, "undefined-error"); | |
2434 } | |
2435 } | |
2436 | |
2437 void endTagIgnore(EndTagToken token) { | |
2438 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); | |
2439 } | |
2440 | |
2441 void endTagOther(EndTagToken token) { | |
2442 parser.parseError(token.span, "unexpected-end-tag-implies-table-voodoo", { | |
2443 "name": token.name | |
2444 }); | |
2445 // Do the table magic! | |
2446 tree.insertFromTable = true; | |
2447 parser._inBodyPhase.processEndTag(token); | |
2448 tree.insertFromTable = false; | |
2449 } | |
2450 } | |
2451 | |
2452 class InTableTextPhase extends Phase { | |
2453 Phase originalPhase; | |
2454 List<StringToken> characterTokens; | |
2455 | |
2456 InTableTextPhase(parser) | |
2457 : characterTokens = <StringToken>[], | |
2458 super(parser); | |
2459 | |
2460 void flushCharacters() { | |
2461 if (characterTokens.length == 0) return; | |
2462 | |
2463 // TODO(sigmund,jmesserly): remove '' (dartbug.com/8480) | |
2464 var data = characterTokens.map((t) => t.data).join(''); | |
2465 var span = null; | |
2466 | |
2467 if (parser.generateSpans) { | |
2468 span = characterTokens[0].span.expand(characterTokens.last.span); | |
2469 } | |
2470 | |
2471 if (!allWhitespace(data)) { | |
2472 parser._inTablePhase.insertText(new CharactersToken(data)..span = span); | |
2473 } else if (data.length > 0) { | |
2474 tree.insertText(data, span); | |
2475 } | |
2476 characterTokens = <StringToken>[]; | |
2477 } | |
2478 | |
2479 Token processComment(CommentToken token) { | |
2480 flushCharacters(); | |
2481 parser.phase = originalPhase; | |
2482 return token; | |
2483 } | |
2484 | |
2485 bool processEOF() { | |
2486 flushCharacters(); | |
2487 parser.phase = originalPhase; | |
2488 return true; | |
2489 } | |
2490 | |
2491 Token processCharacters(CharactersToken token) { | |
2492 if (token.data == "\u0000") { | |
2493 return null; | |
2494 } | |
2495 characterTokens.add(token); | |
2496 return null; | |
2497 } | |
2498 | |
2499 Token processSpaceCharacters(SpaceCharactersToken token) { | |
2500 //pretty sure we should never reach here | |
2501 characterTokens.add(token); | |
2502 // XXX assert(false); | |
2503 return null; | |
2504 } | |
2505 | |
2506 Token processStartTag(StartTagToken token) { | |
2507 flushCharacters(); | |
2508 parser.phase = originalPhase; | |
2509 return token; | |
2510 } | |
2511 | |
2512 Token processEndTag(EndTagToken token) { | |
2513 flushCharacters(); | |
2514 parser.phase = originalPhase; | |
2515 return token; | |
2516 } | |
2517 } | |
2518 | |
2519 class InCaptionPhase extends Phase { | |
2520 // http://www.whatwg.org/specs/web-apps/current-work///in-caption | |
2521 InCaptionPhase(parser) : super(parser); | |
2522 | |
2523 processStartTag(StartTagToken token) { | |
2524 switch (token.name) { | |
2525 case "html": | |
2526 return startTagHtml(token); | |
2527 case "caption": | |
2528 case "col": | |
2529 case "colgroup": | |
2530 case "tbody": | |
2531 case "td": | |
2532 case "tfoot": | |
2533 case "th": | |
2534 case "thead": | |
2535 case "tr": | |
2536 return startTagTableElement(token); | |
2537 default: | |
2538 return startTagOther(token); | |
2539 } | |
2540 } | |
2541 | |
2542 processEndTag(EndTagToken token) { | |
2543 switch (token.name) { | |
2544 case "caption": | |
2545 return endTagCaption(token); | |
2546 case "table": | |
2547 return endTagTable(token); | |
2548 case "body": | |
2549 case "col": | |
2550 case "colgroup": | |
2551 case "html": | |
2552 case "tbody": | |
2553 case "td": | |
2554 case "tfoot": | |
2555 case "th": | |
2556 case "thead": | |
2557 case "tr": | |
2558 return endTagIgnore(token); | |
2559 default: | |
2560 return endTagOther(token); | |
2561 } | |
2562 } | |
2563 | |
2564 bool ignoreEndTagCaption() { | |
2565 return !tree.elementInScope("caption", variant: "table"); | |
2566 } | |
2567 | |
2568 bool processEOF() { | |
2569 parser._inBodyPhase.processEOF(); | |
2570 return false; | |
2571 } | |
2572 | |
2573 Token processCharacters(CharactersToken token) { | |
2574 return parser._inBodyPhase.processCharacters(token); | |
2575 } | |
2576 | |
2577 Token startTagTableElement(StartTagToken token) { | |
2578 parser.parseError(token.span, "undefined-error"); | |
2579 //XXX Have to duplicate logic here to find out if the tag is ignored | |
2580 var ignoreEndTag = ignoreEndTagCaption(); | |
2581 parser.phase.processEndTag(new EndTagToken("caption")); | |
2582 if (!ignoreEndTag) { | |
2583 return token; | |
2584 } | |
2585 return null; | |
2586 } | |
2587 | |
2588 Token startTagOther(StartTagToken token) { | |
2589 return parser._inBodyPhase.processStartTag(token); | |
2590 } | |
2591 | |
2592 void endTagCaption(EndTagToken token) { | |
2593 if (!ignoreEndTagCaption()) { | |
2594 // AT this code is quite similar to endTagTable in "InTable" | |
2595 tree.generateImpliedEndTags(); | |
2596 if (tree.openElements.last.localName != "caption") { | |
2597 parser.parseError(token.span, "expected-one-end-tag-but-got-another", { | |
2598 "gotName": "caption", | |
2599 "expectedName": tree.openElements.last.localName | |
2600 }); | |
2601 } | |
2602 while (tree.openElements.last.localName != "caption") { | |
2603 tree.openElements.removeLast(); | |
2604 } | |
2605 var node = tree.openElements.removeLast(); | |
2606 node.endSourceSpan = token.span; | |
2607 tree.clearActiveFormattingElements(); | |
2608 parser.phase = parser._inTablePhase; | |
2609 } else { | |
2610 // innerHTML case | |
2611 assert(parser.innerHTMLMode); | |
2612 parser.parseError(token.span, "undefined-error"); | |
2613 } | |
2614 } | |
2615 | |
2616 Token endTagTable(EndTagToken token) { | |
2617 parser.parseError(token.span, "undefined-error"); | |
2618 var ignoreEndTag = ignoreEndTagCaption(); | |
2619 parser.phase.processEndTag(new EndTagToken("caption")); | |
2620 if (!ignoreEndTag) { | |
2621 return token; | |
2622 } | |
2623 return null; | |
2624 } | |
2625 | |
2626 void endTagIgnore(EndTagToken token) { | |
2627 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); | |
2628 } | |
2629 | |
2630 Token endTagOther(EndTagToken token) { | |
2631 return parser._inBodyPhase.processEndTag(token); | |
2632 } | |
2633 } | |
2634 | |
2635 class InColumnGroupPhase extends Phase { | |
2636 // http://www.whatwg.org/specs/web-apps/current-work///in-column | |
2637 InColumnGroupPhase(parser) : super(parser); | |
2638 | |
2639 processStartTag(StartTagToken token) { | |
2640 switch (token.name) { | |
2641 case "html": | |
2642 return startTagHtml(token); | |
2643 case "col": | |
2644 return startTagCol(token); | |
2645 default: | |
2646 return startTagOther(token); | |
2647 } | |
2648 } | |
2649 | |
2650 processEndTag(EndTagToken token) { | |
2651 switch (token.name) { | |
2652 case "colgroup": | |
2653 return endTagColgroup(token); | |
2654 case "col": | |
2655 return endTagCol(token); | |
2656 default: | |
2657 return endTagOther(token); | |
2658 } | |
2659 } | |
2660 | |
2661 bool ignoreEndTagColgroup() { | |
2662 return tree.openElements.last.localName == "html"; | |
2663 } | |
2664 | |
2665 bool processEOF() { | |
2666 var ignoreEndTag = ignoreEndTagColgroup(); | |
2667 if (ignoreEndTag) { | |
2668 assert(parser.innerHTMLMode); | |
2669 return false; | |
2670 } else { | |
2671 endTagColgroup(new EndTagToken("colgroup")); | |
2672 return true; | |
2673 } | |
2674 } | |
2675 | |
2676 Token processCharacters(CharactersToken token) { | |
2677 var ignoreEndTag = ignoreEndTagColgroup(); | |
2678 endTagColgroup(new EndTagToken("colgroup")); | |
2679 return ignoreEndTag ? null : token; | |
2680 } | |
2681 | |
2682 void startTagCol(StartTagToken token) { | |
2683 tree.insertElement(token); | |
2684 tree.openElements.removeLast(); | |
2685 } | |
2686 | |
2687 Token startTagOther(StartTagToken token) { | |
2688 var ignoreEndTag = ignoreEndTagColgroup(); | |
2689 endTagColgroup(new EndTagToken("colgroup")); | |
2690 return ignoreEndTag ? null : token; | |
2691 } | |
2692 | |
2693 void endTagColgroup(EndTagToken token) { | |
2694 if (ignoreEndTagColgroup()) { | |
2695 // innerHTML case | |
2696 assert(parser.innerHTMLMode); | |
2697 parser.parseError(token.span, "undefined-error"); | |
2698 } else { | |
2699 var node = tree.openElements.removeLast(); | |
2700 node.endSourceSpan = token.span; | |
2701 parser.phase = parser._inTablePhase; | |
2702 } | |
2703 } | |
2704 | |
2705 void endTagCol(EndTagToken token) { | |
2706 parser.parseError(token.span, "no-end-tag", {"name": "col"}); | |
2707 } | |
2708 | |
2709 Token endTagOther(EndTagToken token) { | |
2710 var ignoreEndTag = ignoreEndTagColgroup(); | |
2711 endTagColgroup(new EndTagToken("colgroup")); | |
2712 return ignoreEndTag ? null : token; | |
2713 } | |
2714 } | |
2715 | |
2716 class InTableBodyPhase extends Phase { | |
2717 // http://www.whatwg.org/specs/web-apps/current-work///in-table0 | |
2718 InTableBodyPhase(parser) : super(parser); | |
2719 | |
2720 processStartTag(StartTagToken token) { | |
2721 switch (token.name) { | |
2722 case "html": | |
2723 return startTagHtml(token); | |
2724 case "tr": | |
2725 return startTagTr(token); | |
2726 case "td": | |
2727 case "th": | |
2728 return startTagTableCell(token); | |
2729 case "caption": | |
2730 case "col": | |
2731 case "colgroup": | |
2732 case "tbody": | |
2733 case "tfoot": | |
2734 case "thead": | |
2735 return startTagTableOther(token); | |
2736 default: | |
2737 return startTagOther(token); | |
2738 } | |
2739 } | |
2740 | |
2741 processEndTag(EndTagToken token) { | |
2742 switch (token.name) { | |
2743 case "tbody": | |
2744 case "tfoot": | |
2745 case "thead": | |
2746 return endTagTableRowGroup(token); | |
2747 case "table": | |
2748 return endTagTable(token); | |
2749 case "body": | |
2750 case "caption": | |
2751 case "col": | |
2752 case "colgroup": | |
2753 case "html": | |
2754 case "td": | |
2755 case "th": | |
2756 case "tr": | |
2757 return endTagIgnore(token); | |
2758 default: | |
2759 return endTagOther(token); | |
2760 } | |
2761 } | |
2762 | |
2763 // helper methods | |
2764 void clearStackToTableBodyContext() { | |
2765 var tableTags = const ["tbody", "tfoot", "thead", "html"]; | |
2766 while (!tableTags.contains(tree.openElements.last.localName)) { | |
2767 //XXX parser.parseError(token.span, "unexpected-implied-end-tag-in-table", | |
2768 // {"name": tree.openElements.last.name}) | |
2769 tree.openElements.removeLast(); | |
2770 } | |
2771 if (tree.openElements.last.localName == "html") { | |
2772 assert(parser.innerHTMLMode); | |
2773 } | |
2774 } | |
2775 | |
2776 // the rest | |
2777 bool processEOF() { | |
2778 parser._inTablePhase.processEOF(); | |
2779 return false; | |
2780 } | |
2781 | |
2782 Token processSpaceCharacters(SpaceCharactersToken token) { | |
2783 return parser._inTablePhase.processSpaceCharacters(token); | |
2784 } | |
2785 | |
2786 Token processCharacters(CharactersToken token) { | |
2787 return parser._inTablePhase.processCharacters(token); | |
2788 } | |
2789 | |
2790 void startTagTr(StartTagToken token) { | |
2791 clearStackToTableBodyContext(); | |
2792 tree.insertElement(token); | |
2793 parser.phase = parser._inRowPhase; | |
2794 } | |
2795 | |
2796 Token startTagTableCell(StartTagToken token) { | |
2797 parser.parseError( | |
2798 token.span, "unexpected-cell-in-table-body", {"name": token.name}); | |
2799 startTagTr(new StartTagToken("tr", data: {})); | |
2800 return token; | |
2801 } | |
2802 | |
2803 Token startTagTableOther(token) => endTagTable(token); | |
2804 | |
2805 Token startTagOther(StartTagToken token) { | |
2806 return parser._inTablePhase.processStartTag(token); | |
2807 } | |
2808 | |
2809 void endTagTableRowGroup(EndTagToken token) { | |
2810 if (tree.elementInScope(token.name, variant: "table")) { | |
2811 clearStackToTableBodyContext(); | |
2812 var node = tree.openElements.removeLast(); | |
2813 node.endSourceSpan = token.span; | |
2814 parser.phase = parser._inTablePhase; | |
2815 } else { | |
2816 parser.parseError( | |
2817 token.span, "unexpected-end-tag-in-table-body", {"name": token.name}); | |
2818 } | |
2819 } | |
2820 | |
2821 Token endTagTable(TagToken token) { | |
2822 // XXX AT Any ideas on how to share this with endTagTable? | |
2823 if (tree.elementInScope("tbody", variant: "table") || | |
2824 tree.elementInScope("thead", variant: "table") || | |
2825 tree.elementInScope("tfoot", variant: "table")) { | |
2826 clearStackToTableBodyContext(); | |
2827 endTagTableRowGroup(new EndTagToken(tree.openElements.last.localName)); | |
2828 return token; | |
2829 } else { | |
2830 // innerHTML case | |
2831 assert(parser.innerHTMLMode); | |
2832 parser.parseError(token.span, "undefined-error"); | |
2833 } | |
2834 return null; | |
2835 } | |
2836 | |
2837 void endTagIgnore(EndTagToken token) { | |
2838 parser.parseError( | |
2839 token.span, "unexpected-end-tag-in-table-body", {"name": token.name}); | |
2840 } | |
2841 | |
2842 Token endTagOther(EndTagToken token) { | |
2843 return parser._inTablePhase.processEndTag(token); | |
2844 } | |
2845 } | |
2846 | |
2847 class InRowPhase extends Phase { | |
2848 // http://www.whatwg.org/specs/web-apps/current-work///in-row | |
2849 InRowPhase(parser) : super(parser); | |
2850 | |
2851 processStartTag(StartTagToken token) { | |
2852 switch (token.name) { | |
2853 case "html": | |
2854 return startTagHtml(token); | |
2855 case "td": | |
2856 case "th": | |
2857 return startTagTableCell(token); | |
2858 case "caption": | |
2859 case "col": | |
2860 case "colgroup": | |
2861 case "tbody": | |
2862 case "tfoot": | |
2863 case "thead": | |
2864 case "tr": | |
2865 return startTagTableOther(token); | |
2866 default: | |
2867 return startTagOther(token); | |
2868 } | |
2869 } | |
2870 | |
2871 processEndTag(EndTagToken token) { | |
2872 switch (token.name) { | |
2873 case "tr": | |
2874 return endTagTr(token); | |
2875 case "table": | |
2876 return endTagTable(token); | |
2877 case "tbody": | |
2878 case "tfoot": | |
2879 case "thead": | |
2880 return endTagTableRowGroup(token); | |
2881 case "body": | |
2882 case "caption": | |
2883 case "col": | |
2884 case "colgroup": | |
2885 case "html": | |
2886 case "td": | |
2887 case "th": | |
2888 return endTagIgnore(token); | |
2889 default: | |
2890 return endTagOther(token); | |
2891 } | |
2892 } | |
2893 | |
2894 // helper methods (XXX unify this with other table helper methods) | |
2895 void clearStackToTableRowContext() { | |
2896 while (true) { | |
2897 var last = tree.openElements.last; | |
2898 if (last.localName == "tr" || last.localName == "html") break; | |
2899 | |
2900 parser.parseError(last.sourceSpan, | |
2901 "unexpected-implied-end-tag-in-table-row", { | |
2902 "name": tree.openElements.last.localName | |
2903 }); | |
2904 tree.openElements.removeLast(); | |
2905 } | |
2906 } | |
2907 | |
2908 bool ignoreEndTagTr() { | |
2909 return !tree.elementInScope("tr", variant: "table"); | |
2910 } | |
2911 | |
2912 // the rest | |
2913 bool processEOF() { | |
2914 parser._inTablePhase.processEOF(); | |
2915 return false; | |
2916 } | |
2917 | |
2918 Token processSpaceCharacters(SpaceCharactersToken token) { | |
2919 return parser._inTablePhase.processSpaceCharacters(token); | |
2920 } | |
2921 | |
2922 Token processCharacters(CharactersToken token) { | |
2923 return parser._inTablePhase.processCharacters(token); | |
2924 } | |
2925 | |
2926 void startTagTableCell(StartTagToken token) { | |
2927 clearStackToTableRowContext(); | |
2928 tree.insertElement(token); | |
2929 parser.phase = parser._inCellPhase; | |
2930 tree.activeFormattingElements.add(Marker); | |
2931 } | |
2932 | |
2933 Token startTagTableOther(StartTagToken token) { | |
2934 bool ignoreEndTag = ignoreEndTagTr(); | |
2935 endTagTr(new EndTagToken("tr")); | |
2936 // XXX how are we sure it's always ignored in the innerHTML case? | |
2937 return ignoreEndTag ? null : token; | |
2938 } | |
2939 | |
2940 Token startTagOther(StartTagToken token) { | |
2941 return parser._inTablePhase.processStartTag(token); | |
2942 } | |
2943 | |
2944 void endTagTr(EndTagToken token) { | |
2945 if (!ignoreEndTagTr()) { | |
2946 clearStackToTableRowContext(); | |
2947 var node = tree.openElements.removeLast(); | |
2948 node.endSourceSpan = token.span; | |
2949 parser.phase = parser._inTableBodyPhase; | |
2950 } else { | |
2951 // innerHTML case | |
2952 assert(parser.innerHTMLMode); | |
2953 parser.parseError(token.span, "undefined-error"); | |
2954 } | |
2955 } | |
2956 | |
2957 Token endTagTable(EndTagToken token) { | |
2958 var ignoreEndTag = ignoreEndTagTr(); | |
2959 endTagTr(new EndTagToken("tr")); | |
2960 // Reprocess the current tag if the tr end tag was not ignored | |
2961 // XXX how are we sure it's always ignored in the innerHTML case? | |
2962 return ignoreEndTag ? null : token; | |
2963 } | |
2964 | |
2965 Token endTagTableRowGroup(EndTagToken token) { | |
2966 if (tree.elementInScope(token.name, variant: "table")) { | |
2967 endTagTr(new EndTagToken("tr")); | |
2968 return token; | |
2969 } else { | |
2970 parser.parseError(token.span, "undefined-error"); | |
2971 return null; | |
2972 } | |
2973 } | |
2974 | |
2975 void endTagIgnore(EndTagToken token) { | |
2976 parser.parseError( | |
2977 token.span, "unexpected-end-tag-in-table-row", {"name": token.name}); | |
2978 } | |
2979 | |
2980 Token endTagOther(EndTagToken token) { | |
2981 return parser._inTablePhase.processEndTag(token); | |
2982 } | |
2983 } | |
2984 | |
2985 class InCellPhase extends Phase { | |
2986 // http://www.whatwg.org/specs/web-apps/current-work///in-cell | |
2987 InCellPhase(parser) : super(parser); | |
2988 | |
2989 processStartTag(StartTagToken token) { | |
2990 switch (token.name) { | |
2991 case "html": | |
2992 return startTagHtml(token); | |
2993 case "caption": | |
2994 case "col": | |
2995 case "colgroup": | |
2996 case "tbody": | |
2997 case "td": | |
2998 case "tfoot": | |
2999 case "th": | |
3000 case "thead": | |
3001 case "tr": | |
3002 return startTagTableOther(token); | |
3003 default: | |
3004 return startTagOther(token); | |
3005 } | |
3006 } | |
3007 | |
3008 processEndTag(EndTagToken token) { | |
3009 switch (token.name) { | |
3010 case "td": | |
3011 case "th": | |
3012 return endTagTableCell(token); | |
3013 case "body": | |
3014 case "caption": | |
3015 case "col": | |
3016 case "colgroup": | |
3017 case "html": | |
3018 return endTagIgnore(token); | |
3019 case "table": | |
3020 case "tbody": | |
3021 case "tfoot": | |
3022 case "thead": | |
3023 case "tr": | |
3024 return endTagImply(token); | |
3025 default: | |
3026 return endTagOther(token); | |
3027 } | |
3028 } | |
3029 | |
3030 // helper | |
3031 void closeCell() { | |
3032 if (tree.elementInScope("td", variant: "table")) { | |
3033 endTagTableCell(new EndTagToken("td")); | |
3034 } else if (tree.elementInScope("th", variant: "table")) { | |
3035 endTagTableCell(new EndTagToken("th")); | |
3036 } | |
3037 } | |
3038 | |
3039 // the rest | |
3040 bool processEOF() { | |
3041 parser._inBodyPhase.processEOF(); | |
3042 return false; | |
3043 } | |
3044 | |
3045 Token processCharacters(CharactersToken token) { | |
3046 return parser._inBodyPhase.processCharacters(token); | |
3047 } | |
3048 | |
3049 Token startTagTableOther(StartTagToken token) { | |
3050 if (tree.elementInScope("td", variant: "table") || | |
3051 tree.elementInScope("th", variant: "table")) { | |
3052 closeCell(); | |
3053 return token; | |
3054 } else { | |
3055 // innerHTML case | |
3056 assert(parser.innerHTMLMode); | |
3057 parser.parseError(token.span, "undefined-error"); | |
3058 return null; | |
3059 } | |
3060 } | |
3061 | |
3062 Token startTagOther(StartTagToken token) { | |
3063 return parser._inBodyPhase.processStartTag(token); | |
3064 } | |
3065 | |
3066 void endTagTableCell(EndTagToken token) { | |
3067 if (tree.elementInScope(token.name, variant: "table")) { | |
3068 tree.generateImpliedEndTags(token.name); | |
3069 if (tree.openElements.last.localName != token.name) { | |
3070 parser.parseError( | |
3071 token.span, "unexpected-cell-end-tag", {"name": token.name}); | |
3072 popOpenElementsUntil(token); | |
3073 } else { | |
3074 var node = tree.openElements.removeLast(); | |
3075 node.endSourceSpan = token.span; | |
3076 } | |
3077 tree.clearActiveFormattingElements(); | |
3078 parser.phase = parser._inRowPhase; | |
3079 } else { | |
3080 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); | |
3081 } | |
3082 } | |
3083 | |
3084 void endTagIgnore(EndTagToken token) { | |
3085 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); | |
3086 } | |
3087 | |
3088 Token endTagImply(EndTagToken token) { | |
3089 if (tree.elementInScope(token.name, variant: "table")) { | |
3090 closeCell(); | |
3091 return token; | |
3092 } else { | |
3093 // sometimes innerHTML case | |
3094 parser.parseError(token.span, "undefined-error"); | |
3095 } | |
3096 return null; | |
3097 } | |
3098 | |
3099 Token endTagOther(EndTagToken token) { | |
3100 return parser._inBodyPhase.processEndTag(token); | |
3101 } | |
3102 } | |
3103 | |
3104 class InSelectPhase extends Phase { | |
3105 InSelectPhase(parser) : super(parser); | |
3106 | |
3107 processStartTag(StartTagToken token) { | |
3108 switch (token.name) { | |
3109 case "html": | |
3110 return startTagHtml(token); | |
3111 case "option": | |
3112 return startTagOption(token); | |
3113 case "optgroup": | |
3114 return startTagOptgroup(token); | |
3115 case "select": | |
3116 return startTagSelect(token); | |
3117 case "input": | |
3118 case "keygen": | |
3119 case "textarea": | |
3120 return startTagInput(token); | |
3121 case "script": | |
3122 return startTagScript(token); | |
3123 default: | |
3124 return startTagOther(token); | |
3125 } | |
3126 } | |
3127 | |
3128 processEndTag(EndTagToken token) { | |
3129 switch (token.name) { | |
3130 case "option": | |
3131 return endTagOption(token); | |
3132 case "optgroup": | |
3133 return endTagOptgroup(token); | |
3134 case "select": | |
3135 return endTagSelect(token); | |
3136 default: | |
3137 return endTagOther(token); | |
3138 } | |
3139 } | |
3140 | |
3141 // http://www.whatwg.org/specs/web-apps/current-work///in-select | |
3142 bool processEOF() { | |
3143 var last = tree.openElements.last; | |
3144 if (last.localName != "html") { | |
3145 parser.parseError(last.sourceSpan, "eof-in-select"); | |
3146 } else { | |
3147 assert(parser.innerHTMLMode); | |
3148 } | |
3149 return false; | |
3150 } | |
3151 | |
3152 Token processCharacters(CharactersToken token) { | |
3153 if (token.data == "\u0000") { | |
3154 return null; | |
3155 } | |
3156 tree.insertText(token.data, token.span); | |
3157 return null; | |
3158 } | |
3159 | |
3160 void startTagOption(StartTagToken token) { | |
3161 // We need to imply </option> if <option> is the current node. | |
3162 if (tree.openElements.last.localName == "option") { | |
3163 tree.openElements.removeLast(); | |
3164 } | |
3165 tree.insertElement(token); | |
3166 } | |
3167 | |
3168 void startTagOptgroup(StartTagToken token) { | |
3169 if (tree.openElements.last.localName == "option") { | |
3170 tree.openElements.removeLast(); | |
3171 } | |
3172 if (tree.openElements.last.localName == "optgroup") { | |
3173 tree.openElements.removeLast(); | |
3174 } | |
3175 tree.insertElement(token); | |
3176 } | |
3177 | |
3178 void startTagSelect(StartTagToken token) { | |
3179 parser.parseError(token.span, "unexpected-select-in-select"); | |
3180 endTagSelect(new EndTagToken("select")); | |
3181 } | |
3182 | |
3183 Token startTagInput(StartTagToken token) { | |
3184 parser.parseError(token.span, "unexpected-input-in-select"); | |
3185 if (tree.elementInScope("select", variant: "select")) { | |
3186 endTagSelect(new EndTagToken("select")); | |
3187 return token; | |
3188 } else { | |
3189 assert(parser.innerHTMLMode); | |
3190 } | |
3191 return null; | |
3192 } | |
3193 | |
3194 Token startTagScript(StartTagToken token) { | |
3195 return parser._inHeadPhase.processStartTag(token); | |
3196 } | |
3197 | |
3198 Token startTagOther(StartTagToken token) { | |
3199 parser.parseError( | |
3200 token.span, "unexpected-start-tag-in-select", {"name": token.name}); | |
3201 return null; | |
3202 } | |
3203 | |
3204 void endTagOption(EndTagToken token) { | |
3205 if (tree.openElements.last.localName == "option") { | |
3206 var node = tree.openElements.removeLast(); | |
3207 node.endSourceSpan = token.span; | |
3208 } else { | |
3209 parser.parseError( | |
3210 token.span, "unexpected-end-tag-in-select", {"name": "option"}); | |
3211 } | |
3212 } | |
3213 | |
3214 void endTagOptgroup(EndTagToken token) { | |
3215 // </optgroup> implicitly closes <option> | |
3216 if (tree.openElements.last.localName == "option" && | |
3217 tree.openElements[tree.openElements.length - 2].localName == | |
3218 "optgroup") { | |
3219 tree.openElements.removeLast(); | |
3220 } | |
3221 // It also closes </optgroup> | |
3222 if (tree.openElements.last.localName == "optgroup") { | |
3223 var node = tree.openElements.removeLast(); | |
3224 node.endSourceSpan = token.span; | |
3225 // But nothing else | |
3226 } else { | |
3227 parser.parseError( | |
3228 token.span, "unexpected-end-tag-in-select", {"name": "optgroup"}); | |
3229 } | |
3230 } | |
3231 | |
3232 void endTagSelect(EndTagToken token) { | |
3233 if (tree.elementInScope("select", variant: "select")) { | |
3234 popOpenElementsUntil(token); | |
3235 parser.resetInsertionMode(); | |
3236 } else { | |
3237 // innerHTML case | |
3238 assert(parser.innerHTMLMode); | |
3239 parser.parseError(token.span, "undefined-error"); | |
3240 } | |
3241 } | |
3242 | |
3243 void endTagOther(EndTagToken token) { | |
3244 parser.parseError( | |
3245 token.span, "unexpected-end-tag-in-select", {"name": token.name}); | |
3246 } | |
3247 } | |
3248 | |
3249 class InSelectInTablePhase extends Phase { | |
3250 InSelectInTablePhase(parser) : super(parser); | |
3251 | |
3252 processStartTag(StartTagToken token) { | |
3253 switch (token.name) { | |
3254 case "caption": | |
3255 case "table": | |
3256 case "tbody": | |
3257 case "tfoot": | |
3258 case "thead": | |
3259 case "tr": | |
3260 case "td": | |
3261 case "th": | |
3262 return startTagTable(token); | |
3263 default: | |
3264 return startTagOther(token); | |
3265 } | |
3266 } | |
3267 | |
3268 processEndTag(EndTagToken token) { | |
3269 switch (token.name) { | |
3270 case "caption": | |
3271 case "table": | |
3272 case "tbody": | |
3273 case "tfoot": | |
3274 case "thead": | |
3275 case "tr": | |
3276 case "td": | |
3277 case "th": | |
3278 return endTagTable(token); | |
3279 default: | |
3280 return endTagOther(token); | |
3281 } | |
3282 } | |
3283 | |
3284 bool processEOF() { | |
3285 parser._inSelectPhase.processEOF(); | |
3286 return false; | |
3287 } | |
3288 | |
3289 Token processCharacters(CharactersToken token) { | |
3290 return parser._inSelectPhase.processCharacters(token); | |
3291 } | |
3292 | |
3293 Token startTagTable(StartTagToken token) { | |
3294 parser.parseError(token.span, | |
3295 "unexpected-table-element-start-tag-in-select-in-table", { | |
3296 "name": token.name | |
3297 }); | |
3298 endTagOther(new EndTagToken("select")); | |
3299 return token; | |
3300 } | |
3301 | |
3302 Token startTagOther(StartTagToken token) { | |
3303 return parser._inSelectPhase.processStartTag(token); | |
3304 } | |
3305 | |
3306 Token endTagTable(EndTagToken token) { | |
3307 parser.parseError(token.span, | |
3308 "unexpected-table-element-end-tag-in-select-in-table", { | |
3309 "name": token.name | |
3310 }); | |
3311 if (tree.elementInScope(token.name, variant: "table")) { | |
3312 endTagOther(new EndTagToken("select")); | |
3313 return token; | |
3314 } | |
3315 return null; | |
3316 } | |
3317 | |
3318 Token endTagOther(EndTagToken token) { | |
3319 return parser._inSelectPhase.processEndTag(token); | |
3320 } | |
3321 } | |
3322 | |
3323 class InForeignContentPhase extends Phase { | |
3324 // TODO(jmesserly): this is sorted so we could binary search. | |
3325 static const breakoutElements = const [ | |
3326 'b', | |
3327 'big', | |
3328 'blockquote', | |
3329 'body', | |
3330 'br', | |
3331 'center', | |
3332 'code', | |
3333 'dd', | |
3334 'div', | |
3335 'dl', | |
3336 'dt', | |
3337 'em', | |
3338 'embed', | |
3339 'h1', | |
3340 'h2', | |
3341 'h3', | |
3342 'h4', | |
3343 'h5', | |
3344 'h6', | |
3345 'head', | |
3346 'hr', | |
3347 'i', | |
3348 'img', | |
3349 'li', | |
3350 'listing', | |
3351 'menu', | |
3352 'meta', | |
3353 'nobr', | |
3354 'ol', | |
3355 'p', | |
3356 'pre', | |
3357 'ruby', | |
3358 's', | |
3359 'small', | |
3360 'span', | |
3361 'strike', | |
3362 'strong', | |
3363 'sub', | |
3364 'sup', | |
3365 'table', | |
3366 'tt', | |
3367 'u', | |
3368 'ul', | |
3369 'var' | |
3370 ]; | |
3371 | |
3372 InForeignContentPhase(parser) : super(parser); | |
3373 | |
3374 void adjustSVGTagNames(token) { | |
3375 final replacements = const { | |
3376 "altglyph": "altGlyph", | |
3377 "altglyphdef": "altGlyphDef", | |
3378 "altglyphitem": "altGlyphItem", | |
3379 "animatecolor": "animateColor", | |
3380 "animatemotion": "animateMotion", | |
3381 "animatetransform": "animateTransform", | |
3382 "clippath": "clipPath", | |
3383 "feblend": "feBlend", | |
3384 "fecolormatrix": "feColorMatrix", | |
3385 "fecomponenttransfer": "feComponentTransfer", | |
3386 "fecomposite": "feComposite", | |
3387 "feconvolvematrix": "feConvolveMatrix", | |
3388 "fediffuselighting": "feDiffuseLighting", | |
3389 "fedisplacementmap": "feDisplacementMap", | |
3390 "fedistantlight": "feDistantLight", | |
3391 "feflood": "feFlood", | |
3392 "fefunca": "feFuncA", | |
3393 "fefuncb": "feFuncB", | |
3394 "fefuncg": "feFuncG", | |
3395 "fefuncr": "feFuncR", | |
3396 "fegaussianblur": "feGaussianBlur", | |
3397 "feimage": "feImage", | |
3398 "femerge": "feMerge", | |
3399 "femergenode": "feMergeNode", | |
3400 "femorphology": "feMorphology", | |
3401 "feoffset": "feOffset", | |
3402 "fepointlight": "fePointLight", | |
3403 "fespecularlighting": "feSpecularLighting", | |
3404 "fespotlight": "feSpotLight", | |
3405 "fetile": "feTile", | |
3406 "feturbulence": "feTurbulence", | |
3407 "foreignobject": "foreignObject", | |
3408 "glyphref": "glyphRef", | |
3409 "lineargradient": "linearGradient", | |
3410 "radialgradient": "radialGradient", | |
3411 "textpath": "textPath" | |
3412 }; | |
3413 | |
3414 var replace = replacements[token.name]; | |
3415 if (replace != null) { | |
3416 token.name = replace; | |
3417 } | |
3418 } | |
3419 | |
3420 Token processCharacters(CharactersToken token) { | |
3421 if (token.data == "\u0000") { | |
3422 token.replaceData("\uFFFD"); | |
3423 } else if (parser.framesetOK && !allWhitespace(token.data)) { | |
3424 parser.framesetOK = false; | |
3425 } | |
3426 return super.processCharacters(token); | |
3427 } | |
3428 | |
3429 Token processStartTag(StartTagToken token) { | |
3430 var currentNode = tree.openElements.last; | |
3431 if (breakoutElements.contains(token.name) || | |
3432 (token.name == "font" && | |
3433 (token.data.containsKey("color") || | |
3434 token.data.containsKey("face") || | |
3435 token.data.containsKey("size")))) { | |
3436 parser.parseError(token.span, | |
3437 "unexpected-html-element-in-foreign-content", {'name': token.name}); | |
3438 while (tree.openElements.last.namespaceUri != tree.defaultNamespace && | |
3439 !parser.isHTMLIntegrationPoint(tree.openElements.last) && | |
3440 !parser.isMathMLTextIntegrationPoint(tree.openElements.last)) { | |
3441 tree.openElements.removeLast(); | |
3442 } | |
3443 return token; | |
3444 } else { | |
3445 if (currentNode.namespaceUri == Namespaces.mathml) { | |
3446 parser.adjustMathMLAttributes(token); | |
3447 } else if (currentNode.namespaceUri == Namespaces.svg) { | |
3448 adjustSVGTagNames(token); | |
3449 parser.adjustSVGAttributes(token); | |
3450 } | |
3451 parser.adjustForeignAttributes(token); | |
3452 token.namespace = currentNode.namespaceUri; | |
3453 tree.insertElement(token); | |
3454 if (token.selfClosing) { | |
3455 tree.openElements.removeLast(); | |
3456 token.selfClosingAcknowledged = true; | |
3457 } | |
3458 return null; | |
3459 } | |
3460 } | |
3461 | |
3462 Token processEndTag(EndTagToken token) { | |
3463 var nodeIndex = tree.openElements.length - 1; | |
3464 var node = tree.openElements.last; | |
3465 if (asciiUpper2Lower(node.localName) != token.name) { | |
3466 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); | |
3467 } | |
3468 | |
3469 var newToken = null; | |
3470 while (true) { | |
3471 if (asciiUpper2Lower(node.localName) == token.name) { | |
3472 //XXX this isn't in the spec but it seems necessary | |
3473 if (parser.phase == parser._inTableTextPhase) { | |
3474 InTableTextPhase inTableText = parser.phase; | |
3475 inTableText.flushCharacters(); | |
3476 parser.phase = inTableText.originalPhase; | |
3477 } | |
3478 while (tree.openElements.removeLast() != node) { | |
3479 assert(tree.openElements.length > 0); | |
3480 } | |
3481 newToken = null; | |
3482 break; | |
3483 } | |
3484 nodeIndex -= 1; | |
3485 | |
3486 node = tree.openElements[nodeIndex]; | |
3487 if (node.namespaceUri != tree.defaultNamespace) { | |
3488 continue; | |
3489 } else { | |
3490 newToken = parser.phase.processEndTag(token); | |
3491 break; | |
3492 } | |
3493 } | |
3494 return newToken; | |
3495 } | |
3496 } | |
3497 | |
3498 class AfterBodyPhase extends Phase { | |
3499 AfterBodyPhase(parser) : super(parser); | |
3500 | |
3501 processStartTag(StartTagToken token) { | |
3502 if (token.name == "html") return startTagHtml(token); | |
3503 return startTagOther(token); | |
3504 } | |
3505 | |
3506 processEndTag(EndTagToken token) { | |
3507 if (token.name == "html") return endTagHtml(token); | |
3508 return endTagOther(token); | |
3509 } | |
3510 | |
3511 //Stop parsing | |
3512 bool processEOF() => false; | |
3513 | |
3514 Token processComment(CommentToken token) { | |
3515 // This is needed because data is to be appended to the <html> element | |
3516 // here and not to whatever is currently open. | |
3517 tree.insertComment(token, tree.openElements[0]); | |
3518 return null; | |
3519 } | |
3520 | |
3521 Token processCharacters(CharactersToken token) { | |
3522 parser.parseError(token.span, "unexpected-char-after-body"); | |
3523 parser.phase = parser._inBodyPhase; | |
3524 return token; | |
3525 } | |
3526 | |
3527 Token startTagHtml(StartTagToken token) { | |
3528 return parser._inBodyPhase.processStartTag(token); | |
3529 } | |
3530 | |
3531 Token startTagOther(StartTagToken token) { | |
3532 parser.parseError( | |
3533 token.span, "unexpected-start-tag-after-body", {"name": token.name}); | |
3534 parser.phase = parser._inBodyPhase; | |
3535 return token; | |
3536 } | |
3537 | |
3538 void endTagHtml(Token token) { | |
3539 for (var node in tree.openElements.reversed) { | |
3540 if (node.localName == 'html') { | |
3541 node.endSourceSpan = token.span; | |
3542 break; | |
3543 } | |
3544 } | |
3545 if (parser.innerHTMLMode) { | |
3546 parser.parseError(token.span, "unexpected-end-tag-after-body-innerhtml"); | |
3547 } else { | |
3548 parser.phase = parser._afterAfterBodyPhase; | |
3549 } | |
3550 } | |
3551 | |
3552 Token endTagOther(EndTagToken token) { | |
3553 parser.parseError( | |
3554 token.span, "unexpected-end-tag-after-body", {"name": token.name}); | |
3555 parser.phase = parser._inBodyPhase; | |
3556 return token; | |
3557 } | |
3558 } | |
3559 | |
3560 class InFramesetPhase extends Phase { | |
3561 // http://www.whatwg.org/specs/web-apps/current-work///in-frameset | |
3562 InFramesetPhase(parser) : super(parser); | |
3563 | |
3564 processStartTag(StartTagToken token) { | |
3565 switch (token.name) { | |
3566 case "html": | |
3567 return startTagHtml(token); | |
3568 case "frameset": | |
3569 return startTagFrameset(token); | |
3570 case "frame": | |
3571 return startTagFrame(token); | |
3572 case "noframes": | |
3573 return startTagNoframes(token); | |
3574 default: | |
3575 return startTagOther(token); | |
3576 } | |
3577 } | |
3578 | |
3579 processEndTag(EndTagToken token) { | |
3580 switch (token.name) { | |
3581 case "frameset": | |
3582 return endTagFrameset(token); | |
3583 default: | |
3584 return endTagOther(token); | |
3585 } | |
3586 } | |
3587 | |
3588 bool processEOF() { | |
3589 var last = tree.openElements.last; | |
3590 if (last.localName != "html") { | |
3591 parser.parseError(last.sourceSpan, "eof-in-frameset"); | |
3592 } else { | |
3593 assert(parser.innerHTMLMode); | |
3594 } | |
3595 return false; | |
3596 } | |
3597 | |
3598 Token processCharacters(CharactersToken token) { | |
3599 parser.parseError(token.span, "unexpected-char-in-frameset"); | |
3600 return null; | |
3601 } | |
3602 | |
3603 void startTagFrameset(StartTagToken token) { | |
3604 tree.insertElement(token); | |
3605 } | |
3606 | |
3607 void startTagFrame(StartTagToken token) { | |
3608 tree.insertElement(token); | |
3609 tree.openElements.removeLast(); | |
3610 } | |
3611 | |
3612 Token startTagNoframes(StartTagToken token) { | |
3613 return parser._inBodyPhase.processStartTag(token); | |
3614 } | |
3615 | |
3616 Token startTagOther(StartTagToken token) { | |
3617 parser.parseError( | |
3618 token.span, "unexpected-start-tag-in-frameset", {"name": token.name}); | |
3619 return null; | |
3620 } | |
3621 | |
3622 void endTagFrameset(EndTagToken token) { | |
3623 if (tree.openElements.last.localName == "html") { | |
3624 // innerHTML case | |
3625 parser.parseError( | |
3626 token.span, "unexpected-frameset-in-frameset-innerhtml"); | |
3627 } else { | |
3628 var node = tree.openElements.removeLast(); | |
3629 node.endSourceSpan = token.span; | |
3630 } | |
3631 if (!parser.innerHTMLMode && | |
3632 tree.openElements.last.localName != "frameset") { | |
3633 // If we're not in innerHTML mode and the the current node is not a | |
3634 // "frameset" element (anymore) then switch. | |
3635 parser.phase = parser._afterFramesetPhase; | |
3636 } | |
3637 } | |
3638 | |
3639 void endTagOther(EndTagToken token) { | |
3640 parser.parseError( | |
3641 token.span, "unexpected-end-tag-in-frameset", {"name": token.name}); | |
3642 } | |
3643 } | |
3644 | |
3645 class AfterFramesetPhase extends Phase { | |
3646 // http://www.whatwg.org/specs/web-apps/current-work///after3 | |
3647 AfterFramesetPhase(parser) : super(parser); | |
3648 | |
3649 processStartTag(StartTagToken token) { | |
3650 switch (token.name) { | |
3651 case "html": | |
3652 return startTagHtml(token); | |
3653 case "noframes": | |
3654 return startTagNoframes(token); | |
3655 default: | |
3656 return startTagOther(token); | |
3657 } | |
3658 } | |
3659 | |
3660 processEndTag(EndTagToken token) { | |
3661 switch (token.name) { | |
3662 case "html": | |
3663 return endTagHtml(token); | |
3664 default: | |
3665 return endTagOther(token); | |
3666 } | |
3667 } | |
3668 | |
3669 // Stop parsing | |
3670 bool processEOF() => false; | |
3671 | |
3672 Token processCharacters(CharactersToken token) { | |
3673 parser.parseError(token.span, "unexpected-char-after-frameset"); | |
3674 return null; | |
3675 } | |
3676 | |
3677 Token startTagNoframes(StartTagToken token) { | |
3678 return parser._inHeadPhase.processStartTag(token); | |
3679 } | |
3680 | |
3681 void startTagOther(StartTagToken token) { | |
3682 parser.parseError(token.span, "unexpected-start-tag-after-frameset", { | |
3683 "name": token.name | |
3684 }); | |
3685 } | |
3686 | |
3687 void endTagHtml(EndTagToken token) { | |
3688 parser.phase = parser._afterAfterFramesetPhase; | |
3689 } | |
3690 | |
3691 void endTagOther(EndTagToken token) { | |
3692 parser.parseError( | |
3693 token.span, "unexpected-end-tag-after-frameset", {"name": token.name}); | |
3694 } | |
3695 } | |
3696 | |
3697 class AfterAfterBodyPhase extends Phase { | |
3698 AfterAfterBodyPhase(parser) : super(parser); | |
3699 | |
3700 processStartTag(StartTagToken token) { | |
3701 if (token.name == 'html') return startTagHtml(token); | |
3702 return startTagOther(token); | |
3703 } | |
3704 | |
3705 bool processEOF() => false; | |
3706 | |
3707 Token processComment(CommentToken token) { | |
3708 tree.insertComment(token, tree.document); | |
3709 return null; | |
3710 } | |
3711 | |
3712 Token processSpaceCharacters(SpaceCharactersToken token) { | |
3713 return parser._inBodyPhase.processSpaceCharacters(token); | |
3714 } | |
3715 | |
3716 Token processCharacters(CharactersToken token) { | |
3717 parser.parseError(token.span, "expected-eof-but-got-char"); | |
3718 parser.phase = parser._inBodyPhase; | |
3719 return token; | |
3720 } | |
3721 | |
3722 Token startTagHtml(StartTagToken token) { | |
3723 return parser._inBodyPhase.processStartTag(token); | |
3724 } | |
3725 | |
3726 Token startTagOther(StartTagToken token) { | |
3727 parser.parseError( | |
3728 token.span, "expected-eof-but-got-start-tag", {"name": token.name}); | |
3729 parser.phase = parser._inBodyPhase; | |
3730 return token; | |
3731 } | |
3732 | |
3733 Token processEndTag(EndTagToken token) { | |
3734 parser.parseError( | |
3735 token.span, "expected-eof-but-got-end-tag", {"name": token.name}); | |
3736 parser.phase = parser._inBodyPhase; | |
3737 return token; | |
3738 } | |
3739 } | |
3740 | |
3741 class AfterAfterFramesetPhase extends Phase { | |
3742 AfterAfterFramesetPhase(parser) : super(parser); | |
3743 | |
3744 processStartTag(StartTagToken token) { | |
3745 switch (token.name) { | |
3746 case "html": | |
3747 return startTagHtml(token); | |
3748 case "noframes": | |
3749 return startTagNoFrames(token); | |
3750 default: | |
3751 return startTagOther(token); | |
3752 } | |
3753 } | |
3754 | |
3755 bool processEOF() => false; | |
3756 | |
3757 Token processComment(CommentToken token) { | |
3758 tree.insertComment(token, tree.document); | |
3759 return null; | |
3760 } | |
3761 | |
3762 Token processSpaceCharacters(SpaceCharactersToken token) { | |
3763 return parser._inBodyPhase.processSpaceCharacters(token); | |
3764 } | |
3765 | |
3766 Token processCharacters(CharactersToken token) { | |
3767 parser.parseError(token.span, "expected-eof-but-got-char"); | |
3768 return null; | |
3769 } | |
3770 | |
3771 Token startTagHtml(StartTagToken token) { | |
3772 return parser._inBodyPhase.processStartTag(token); | |
3773 } | |
3774 | |
3775 Token startTagNoFrames(StartTagToken token) { | |
3776 return parser._inHeadPhase.processStartTag(token); | |
3777 } | |
3778 | |
3779 void startTagOther(StartTagToken token) { | |
3780 parser.parseError( | |
3781 token.span, "expected-eof-but-got-start-tag", {"name": token.name}); | |
3782 } | |
3783 | |
3784 Token processEndTag(EndTagToken token) { | |
3785 parser.parseError( | |
3786 token.span, "expected-eof-but-got-end-tag", {"name": token.name}); | |
3787 return null; | |
3788 } | |
3789 } | |
3790 | |
3791 /// Error in parsed document. | |
3792 class ParseError implements SourceSpanException { | |
3793 final String errorCode; | |
3794 final SourceSpan span; | |
3795 final Map data; | |
3796 | |
3797 ParseError(this.errorCode, this.span, this.data); | |
3798 | |
3799 int get line => span.start.line; | |
3800 | |
3801 int get column => span.start.column; | |
3802 | |
3803 /// Gets the human readable error message for this error. Use | |
3804 /// [span.getLocationMessage] or [toString] to get a message including span | |
3805 /// information. If there is a file associated with the span, both | |
3806 /// [span.getLocationMessage] and [toString] are equivalent. Otherwise, | |
3807 /// [span.getLocationMessage] will not show any source url information, but | |
3808 /// [toString] will include 'ParserError:' as a prefix. | |
3809 String get message => formatStr(errorMessages[errorCode], data); | |
3810 | |
3811 String toString({color}) { | |
3812 var res = span.message(message, color: color); | |
3813 return span.sourceUrl == null ? 'ParserError on $res' : 'On $res'; | |
3814 } | |
3815 } | |
3816 | |
3817 /// Convenience function to get the pair of namespace and localName. | |
3818 Pair<String, String> getElementNameTuple(Element e) { | |
3819 var ns = e.namespaceUri; | |
3820 if (ns == null) ns = Namespaces.html; | |
3821 return new Pair(ns, e.localName); | |
3822 } | |
OLD | NEW |