OLD | NEW |
(Empty) | |
| 1 /** |
| 2 * This library has a parser for HTML5 documents, that lets you parse HTML |
| 3 * easily from a script or server side application: |
| 4 * |
| 5 * import 'package:html5lib/parser.dart' show parse; |
| 6 * import 'package:html5lib/dom.dart'; |
| 7 * main() { |
| 8 * var document = parse( |
| 9 * '<body>Hello world! <a href="www.html5rocks.com">HTML5 rocks!'); |
| 10 * print(document.outerHtml); |
| 11 * } |
| 12 * |
| 13 * The resulting document you get back has a DOM-like API for easy tree |
| 14 * traversal and manipulation. |
| 15 */ |
| 16 library parser; |
| 17 |
| 18 import 'dart:collection'; |
| 19 import 'dart:math'; |
| 20 import 'package:source_maps/span.dart' show Span, FileSpan; |
| 21 |
| 22 import 'src/treebuilder.dart'; |
| 23 import 'src/constants.dart'; |
| 24 import 'src/encoding_parser.dart'; |
| 25 import 'src/token.dart'; |
| 26 import 'src/tokenizer.dart'; |
| 27 import 'src/utils.dart'; |
| 28 import 'dom.dart'; |
| 29 import 'dom_parsing.dart'; |
| 30 |
| 31 /** |
| 32 * Parse the [input] html5 document into a tree. The [input] can be |
| 33 * a [String], [List<int>] of bytes or an [HtmlTokenizer]. |
| 34 * |
| 35 * If [input] is not a [HtmlTokenizer], you can optionally specify the file's |
| 36 * [encoding], which must be a string. If specified, that encoding will be used, |
| 37 * regardless of any BOM or later declaration (such as in a meta element). |
| 38 * |
| 39 * Set [generateSpans] if you want to generate [Span]s, otherwise the |
| 40 * [Node.sourceSpan] property will be `null`. When using [generateSpans] you can |
| 41 * additionally pass [sourceUrl] to indicate where the [input] was extracted |
| 42 * from. |
| 43 */ |
| 44 Document parse(input, {String encoding, bool generateSpans: false, |
| 45 String sourceUrl}) { |
| 46 var p = new HtmlParser(input, encoding: encoding, |
| 47 generateSpans: generateSpans, sourceUrl: sourceUrl); |
| 48 return p.parse(); |
| 49 } |
| 50 |
| 51 |
| 52 /** |
| 53 * Parse the [input] html5 document fragment into a tree. The [input] can be |
| 54 * a [String], [List<int>] of bytes or an [HtmlTokenizer]. The [container] |
| 55 * element can optionally be specified, otherwise it defaults to "div". |
| 56 * |
| 57 * If [input] is not a [HtmlTokenizer], you can optionally specify the file's |
| 58 * [encoding], which must be a string. If specified, that encoding will be used, |
| 59 * regardless of any BOM or later declaration (such as in a meta element). |
| 60 * |
| 61 * Set [generateSpans] if you want to generate [Span]s, otherwise the |
| 62 * [Node.sourceSpan] property will be `null`. When using [generateSpans] you can |
| 63 * additionally pass [sourceUrl] to indicate where the [input] was extracted |
| 64 * from. |
| 65 */ |
| 66 DocumentFragment parseFragment(input, {String container: "div", |
| 67 String encoding, bool generateSpans: false, String sourceUrl}) { |
| 68 var p = new HtmlParser(input, encoding: encoding, |
| 69 generateSpans: generateSpans, sourceUrl: sourceUrl); |
| 70 return p.parseFragment(container); |
| 71 } |
| 72 |
| 73 |
| 74 /** |
| 75 * Parser for HTML, which generates a tree structure from a stream of |
| 76 * (possibly malformed) characters. |
| 77 */ |
| 78 class HtmlParser { |
| 79 /** Raise an exception on the first error encountered. */ |
| 80 final bool strict; |
| 81 |
| 82 /** True to generate [Span]s for the [Node.sourceSpan] property. */ |
| 83 final bool generateSpans; |
| 84 |
| 85 final HtmlTokenizer tokenizer; |
| 86 |
| 87 final TreeBuilder tree; |
| 88 |
| 89 final List<ParseError> errors = <ParseError>[]; |
| 90 |
| 91 String container; |
| 92 |
| 93 bool firstStartTag = false; |
| 94 |
| 95 // TODO(jmesserly): use enum? |
| 96 /** "quirks" / "limited quirks" / "no quirks" */ |
| 97 String compatMode = "no quirks"; |
| 98 |
| 99 /** innerHTML container when parsing document fragment. */ |
| 100 String innerHTML; |
| 101 |
| 102 Phase phase; |
| 103 |
| 104 Phase lastPhase; |
| 105 |
| 106 Phase originalPhase; |
| 107 |
| 108 Phase beforeRCDataPhase; |
| 109 |
| 110 bool framesetOK; |
| 111 |
| 112 // These fields hold the different phase singletons. At any given time one |
| 113 // of them will be active. |
| 114 InitialPhase _initialPhase; |
| 115 BeforeHtmlPhase _beforeHtmlPhase; |
| 116 BeforeHeadPhase _beforeHeadPhase; |
| 117 InHeadPhase _inHeadPhase; |
| 118 AfterHeadPhase _afterHeadPhase; |
| 119 InBodyPhase _inBodyPhase; |
| 120 TextPhase _textPhase; |
| 121 InTablePhase _inTablePhase; |
| 122 InTableTextPhase _inTableTextPhase; |
| 123 InCaptionPhase _inCaptionPhase; |
| 124 InColumnGroupPhase _inColumnGroupPhase; |
| 125 InTableBodyPhase _inTableBodyPhase; |
| 126 InRowPhase _inRowPhase; |
| 127 InCellPhase _inCellPhase; |
| 128 InSelectPhase _inSelectPhase; |
| 129 InSelectInTablePhase _inSelectInTablePhase; |
| 130 InForeignContentPhase _inForeignContentPhase; |
| 131 AfterBodyPhase _afterBodyPhase; |
| 132 InFramesetPhase _inFramesetPhase; |
| 133 AfterFramesetPhase _afterFramesetPhase; |
| 134 AfterAfterBodyPhase _afterAfterBodyPhase; |
| 135 AfterAfterFramesetPhase _afterAfterFramesetPhase; |
| 136 |
| 137 /** |
| 138 * Create a new HtmlParser and configure the [tree] builder and [strict] mode. |
| 139 * The [input] can be a [String], [List<int>] of bytes or an [HtmlTokenizer]. |
| 140 * |
| 141 * If [input] is not a [HtmlTokenizer], you can specify a few more arguments. |
| 142 * |
| 143 * The [encoding] must be a string that indicates the encoding. If specified, |
| 144 * that encoding will be used, regardless of any BOM or later declaration |
| 145 * (such as in a meta element). |
| 146 * |
| 147 * Set [parseMeta] to false if you want to disable parsing the meta element. |
| 148 * |
| 149 * Set [lowercaseElementName] or [lowercaseAttrName] to false to disable the |
| 150 * automatic conversion of element and attribute names to lower case. Note |
| 151 * that standard way to parse HTML is to lowercase, which is what the browser |
| 152 * DOM will do if you request [Node.outerHTML], for example. |
| 153 */ |
| 154 HtmlParser(input, {String encoding, bool parseMeta: true, |
| 155 bool lowercaseElementName: true, bool lowercaseAttrName: true, |
| 156 this.strict: false, bool generateSpans: false, String sourceUrl, |
| 157 TreeBuilder tree}) |
| 158 : generateSpans = generateSpans, |
| 159 tree = tree != null ? tree : new TreeBuilder(true), |
| 160 tokenizer = (input is HtmlTokenizer ? input : |
| 161 new HtmlTokenizer(input, encoding: encoding, parseMeta: parseMeta, |
| 162 lowercaseElementName: lowercaseElementName, |
| 163 lowercaseAttrName: lowercaseAttrName, |
| 164 generateSpans: generateSpans, sourceUrl: sourceUrl)) { |
| 165 |
| 166 tokenizer.parser = this; |
| 167 _initialPhase = new InitialPhase(this); |
| 168 _beforeHtmlPhase = new BeforeHtmlPhase(this); |
| 169 _beforeHeadPhase = new BeforeHeadPhase(this); |
| 170 _inHeadPhase = new InHeadPhase(this); |
| 171 // TODO(jmesserly): html5lib did not implement the no script parsing mode |
| 172 // More information here: |
| 173 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#
scripting-flag |
| 174 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construc
tion.html#parsing-main-inheadnoscript |
| 175 // "inHeadNoscript": new InHeadNoScriptPhase(this); |
| 176 _afterHeadPhase = new AfterHeadPhase(this); |
| 177 _inBodyPhase = new InBodyPhase(this); |
| 178 _textPhase = new TextPhase(this); |
| 179 _inTablePhase = new InTablePhase(this); |
| 180 _inTableTextPhase = new InTableTextPhase(this); |
| 181 _inCaptionPhase = new InCaptionPhase(this); |
| 182 _inColumnGroupPhase = new InColumnGroupPhase(this); |
| 183 _inTableBodyPhase = new InTableBodyPhase(this); |
| 184 _inRowPhase = new InRowPhase(this); |
| 185 _inCellPhase = new InCellPhase(this); |
| 186 _inSelectPhase = new InSelectPhase(this); |
| 187 _inSelectInTablePhase = new InSelectInTablePhase(this); |
| 188 _inForeignContentPhase = new InForeignContentPhase(this); |
| 189 _afterBodyPhase = new AfterBodyPhase(this); |
| 190 _inFramesetPhase = new InFramesetPhase(this); |
| 191 _afterFramesetPhase = new AfterFramesetPhase(this); |
| 192 _afterAfterBodyPhase = new AfterAfterBodyPhase(this); |
| 193 _afterAfterFramesetPhase = new AfterAfterFramesetPhase(this); |
| 194 } |
| 195 |
| 196 bool get innerHTMLMode => innerHTML != null; |
| 197 |
| 198 /** |
| 199 * Parse an html5 document into a tree. |
| 200 * After parsing, [errors] will be populated with parse errors, if any. |
| 201 */ |
| 202 Document parse() { |
| 203 innerHTML = null; |
| 204 _parse(); |
| 205 return tree.getDocument(); |
| 206 } |
| 207 |
| 208 /** |
| 209 * Parse an html5 document fragment into a tree. |
| 210 * Pass a [container] to change the type of the containing element. |
| 211 * After parsing, [errors] will be populated with parse errors, if any. |
| 212 */ |
| 213 DocumentFragment parseFragment([String container = "div"]) { |
| 214 if (container == null) throw new ArgumentError('container'); |
| 215 innerHTML = container.toLowerCase(); |
| 216 _parse(); |
| 217 return tree.getFragment(); |
| 218 } |
| 219 |
| 220 void _parse() { |
| 221 reset(); |
| 222 |
| 223 while (true) { |
| 224 try { |
| 225 mainLoop(); |
| 226 break; |
| 227 } on ReparseException catch (e) { |
| 228 // Note: this happens if we start parsing but the character encoding |
| 229 // changes. So we should only need to restart very early in the parse. |
| 230 reset(); |
| 231 } |
| 232 } |
| 233 } |
| 234 |
| 235 void reset() { |
| 236 tokenizer.reset(); |
| 237 |
| 238 tree.reset(); |
| 239 firstStartTag = false; |
| 240 errors.clear(); |
| 241 // "quirks" / "limited quirks" / "no quirks" |
| 242 compatMode = "no quirks"; |
| 243 |
| 244 if (innerHTMLMode) { |
| 245 if (cdataElements.contains(innerHTML)) { |
| 246 tokenizer.state = tokenizer.rcdataState; |
| 247 } else if (rcdataElements.contains(innerHTML)) { |
| 248 tokenizer.state = tokenizer.rawtextState; |
| 249 } else if (innerHTML == 'plaintext') { |
| 250 tokenizer.state = tokenizer.plaintextState; |
| 251 } else { |
| 252 // state already is data state |
| 253 // tokenizer.state = tokenizer.dataState; |
| 254 } |
| 255 phase = _beforeHtmlPhase; |
| 256 _beforeHtmlPhase.insertHtmlElement(); |
| 257 resetInsertionMode(); |
| 258 } else { |
| 259 phase = _initialPhase; |
| 260 } |
| 261 |
| 262 lastPhase = null; |
| 263 beforeRCDataPhase = null; |
| 264 framesetOK = true; |
| 265 } |
| 266 |
| 267 bool isHTMLIntegrationPoint(Node element) { |
| 268 if (element.tagName == "annotation-xml" && |
| 269 element.namespace == Namespaces.mathml) { |
| 270 var enc = element.attributes["encoding"]; |
| 271 if (enc != null) enc = asciiUpper2Lower(enc); |
| 272 return enc == "text/html" || enc == "application/xhtml+xml"; |
| 273 } else { |
| 274 return htmlIntegrationPointElements.contains( |
| 275 new Pair(element.namespace, element.tagName)); |
| 276 } |
| 277 } |
| 278 |
| 279 bool isMathMLTextIntegrationPoint(Node element) { |
| 280 return mathmlTextIntegrationPointElements.contains( |
| 281 new Pair(element.namespace, element.tagName)); |
| 282 } |
| 283 |
| 284 bool inForeignContent(Token token, int type) { |
| 285 if (tree.openElements.length == 0) return false; |
| 286 |
| 287 var node = tree.openElements.last; |
| 288 if (node.namespace == tree.defaultNamespace) return false; |
| 289 |
| 290 if (isMathMLTextIntegrationPoint(node)) { |
| 291 if (type == TokenKind.startTag && |
| 292 (token as StartTagToken).name != "mglyph" && |
| 293 (token as StartTagToken).name != "malignmark") { |
| 294 return false; |
| 295 } |
| 296 if (type == TokenKind.characters || type == TokenKind.spaceCharacters) { |
| 297 return false; |
| 298 } |
| 299 } |
| 300 |
| 301 if (node.tagName == "annotation-xml" && type == TokenKind.startTag && |
| 302 (token as StartTagToken).name == "svg") { |
| 303 return false; |
| 304 } |
| 305 |
| 306 if (isHTMLIntegrationPoint(node)) { |
| 307 if (type == TokenKind.startTag || |
| 308 type == TokenKind.characters || |
| 309 type == TokenKind.spaceCharacters) { |
| 310 return false; |
| 311 } |
| 312 } |
| 313 |
| 314 return true; |
| 315 } |
| 316 |
| 317 void mainLoop() { |
| 318 while (tokenizer.moveNext()) { |
| 319 var token = tokenizer.current; |
| 320 var newToken = token; |
| 321 int type; |
| 322 while (newToken != null) { |
| 323 type = newToken.kind; |
| 324 |
| 325 // Note: avoid "is" test here, see http://dartbug.com/4795 |
| 326 if (type == TokenKind.parseError) { |
| 327 ParseErrorToken error = newToken; |
| 328 parseError(error.span, error.data, error.messageParams); |
| 329 newToken = null; |
| 330 } else { |
| 331 Phase phase_ = phase; |
| 332 if (inForeignContent(token, type)) { |
| 333 phase_ = _inForeignContentPhase; |
| 334 } |
| 335 |
| 336 switch (type) { |
| 337 case TokenKind.characters: |
| 338 newToken = phase_.processCharacters(newToken); |
| 339 break; |
| 340 case TokenKind.spaceCharacters: |
| 341 newToken = phase_.processSpaceCharacters(newToken); |
| 342 break; |
| 343 case TokenKind.startTag: |
| 344 newToken = phase_.processStartTag(newToken); |
| 345 break; |
| 346 case TokenKind.endTag: |
| 347 newToken = phase_.processEndTag(newToken); |
| 348 break; |
| 349 case TokenKind.comment: |
| 350 newToken = phase_.processComment(newToken); |
| 351 break; |
| 352 case TokenKind.doctype: |
| 353 newToken = phase_.processDoctype(newToken); |
| 354 break; |
| 355 } |
| 356 } |
| 357 } |
| 358 |
| 359 if (token is StartTagToken) { |
| 360 if (token.selfClosing && !token.selfClosingAcknowledged) { |
| 361 parseError(token.span, "non-void-element-with-trailing-solidus", |
| 362 {"name": token.name}); |
| 363 } |
| 364 } |
| 365 } |
| 366 |
| 367 // When the loop finishes it's EOF |
| 368 var reprocess = true; |
| 369 var reprocessPhases = []; |
| 370 while (reprocess) { |
| 371 reprocessPhases.add(phase); |
| 372 reprocess = phase.processEOF(); |
| 373 if (reprocess) { |
| 374 assert(!reprocessPhases.contains(phase)); |
| 375 } |
| 376 } |
| 377 } |
| 378 |
| 379 /** |
| 380 * The last span available. Used for EOF errors if we don't have something |
| 381 * better. |
| 382 */ |
| 383 Span get _lastSpan { |
| 384 var pos = tokenizer.stream.position; |
| 385 return new FileSpan(tokenizer.stream.fileInfo, pos, pos); |
| 386 } |
| 387 |
| 388 void parseError(Span span, String errorcode, |
| 389 [Map datavars = const {}]) { |
| 390 |
| 391 if (!generateSpans && span == null) { |
| 392 span = _lastSpan; |
| 393 } |
| 394 |
| 395 var err = new ParseError(errorcode, span, datavars); |
| 396 errors.add(err); |
| 397 if (strict) throw err; |
| 398 } |
| 399 |
| 400 void adjustMathMLAttributes(StartTagToken token) { |
| 401 var orig = token.data.remove("definitionurl"); |
| 402 if (orig != null) { |
| 403 token.data["definitionURL"] = orig; |
| 404 } |
| 405 } |
| 406 |
| 407 void adjustSVGAttributes(StartTagToken token) { |
| 408 final replacements = const { |
| 409 "attributename":"attributeName", |
| 410 "attributetype":"attributeType", |
| 411 "basefrequency":"baseFrequency", |
| 412 "baseprofile":"baseProfile", |
| 413 "calcmode":"calcMode", |
| 414 "clippathunits":"clipPathUnits", |
| 415 "contentscripttype":"contentScriptType", |
| 416 "contentstyletype":"contentStyleType", |
| 417 "diffuseconstant":"diffuseConstant", |
| 418 "edgemode":"edgeMode", |
| 419 "externalresourcesrequired":"externalResourcesRequired", |
| 420 "filterres":"filterRes", |
| 421 "filterunits":"filterUnits", |
| 422 "glyphref":"glyphRef", |
| 423 "gradienttransform":"gradientTransform", |
| 424 "gradientunits":"gradientUnits", |
| 425 "kernelmatrix":"kernelMatrix", |
| 426 "kernelunitlength":"kernelUnitLength", |
| 427 "keypoints":"keyPoints", |
| 428 "keysplines":"keySplines", |
| 429 "keytimes":"keyTimes", |
| 430 "lengthadjust":"lengthAdjust", |
| 431 "limitingconeangle":"limitingConeAngle", |
| 432 "markerheight":"markerHeight", |
| 433 "markerunits":"markerUnits", |
| 434 "markerwidth":"markerWidth", |
| 435 "maskcontentunits":"maskContentUnits", |
| 436 "maskunits":"maskUnits", |
| 437 "numoctaves":"numOctaves", |
| 438 "pathlength":"pathLength", |
| 439 "patterncontentunits":"patternContentUnits", |
| 440 "patterntransform":"patternTransform", |
| 441 "patternunits":"patternUnits", |
| 442 "pointsatx":"pointsAtX", |
| 443 "pointsaty":"pointsAtY", |
| 444 "pointsatz":"pointsAtZ", |
| 445 "preservealpha":"preserveAlpha", |
| 446 "preserveaspectratio":"preserveAspectRatio", |
| 447 "primitiveunits":"primitiveUnits", |
| 448 "refx":"refX", |
| 449 "refy":"refY", |
| 450 "repeatcount":"repeatCount", |
| 451 "repeatdur":"repeatDur", |
| 452 "requiredextensions":"requiredExtensions", |
| 453 "requiredfeatures":"requiredFeatures", |
| 454 "specularconstant":"specularConstant", |
| 455 "specularexponent":"specularExponent", |
| 456 "spreadmethod":"spreadMethod", |
| 457 "startoffset":"startOffset", |
| 458 "stddeviation":"stdDeviation", |
| 459 "stitchtiles":"stitchTiles", |
| 460 "surfacescale":"surfaceScale", |
| 461 "systemlanguage":"systemLanguage", |
| 462 "tablevalues":"tableValues", |
| 463 "targetx":"targetX", |
| 464 "targety":"targetY", |
| 465 "textlength":"textLength", |
| 466 "viewbox":"viewBox", |
| 467 "viewtarget":"viewTarget", |
| 468 "xchannelselector":"xChannelSelector", |
| 469 "ychannelselector":"yChannelSelector", |
| 470 "zoomandpan":"zoomAndPan" |
| 471 }; |
| 472 for (var originalName in token.data.keys.toList()) { |
| 473 var svgName = replacements[originalName]; |
| 474 if (svgName != null) { |
| 475 token.data[svgName] = token.data.remove(originalName); |
| 476 } |
| 477 } |
| 478 } |
| 479 |
| 480 void adjustForeignAttributes(StartTagToken token) { |
| 481 // TODO(jmesserly): I don't like mixing non-string objects with strings in |
| 482 // the Node.attributes Map. Is there another solution? |
| 483 final replacements = const { |
| 484 "xlink:actuate": const AttributeName("xlink", "actuate", |
| 485 Namespaces.xlink), |
| 486 "xlink:arcrole": const AttributeName("xlink", "arcrole", |
| 487 Namespaces.xlink), |
| 488 "xlink:href": const AttributeName("xlink", "href", Namespaces.xlink), |
| 489 "xlink:role": const AttributeName("xlink", "role", Namespaces.xlink), |
| 490 "xlink:show": const AttributeName("xlink", "show", Namespaces.xlink), |
| 491 "xlink:title": const AttributeName("xlink", "title", Namespaces.xlink), |
| 492 "xlink:type": const AttributeName("xlink", "type", Namespaces.xlink), |
| 493 "xml:base": const AttributeName("xml", "base", Namespaces.xml), |
| 494 "xml:lang": const AttributeName("xml", "lang", Namespaces.xml), |
| 495 "xml:space": const AttributeName("xml", "space", Namespaces.xml), |
| 496 "xmlns": const AttributeName(null, "xmlns", Namespaces.xmlns), |
| 497 "xmlns:xlink": const AttributeName("xmlns", "xlink", Namespaces.xmlns) |
| 498 }; |
| 499 |
| 500 for (var originalName in token.data.keys.toList()) { |
| 501 var foreignName = replacements[originalName]; |
| 502 if (foreignName != null) { |
| 503 token.data[foreignName] = token.data.remove(originalName); |
| 504 } |
| 505 } |
| 506 } |
| 507 |
| 508 void resetInsertionMode() { |
| 509 // The name of this method is mostly historical. (It's also used in the |
| 510 // specification.) |
| 511 for (Node node in tree.openElements.reversed) { |
| 512 var nodeName = node.tagName; |
| 513 bool last = node == tree.openElements[0]; |
| 514 if (last) { |
| 515 assert(innerHTMLMode); |
| 516 nodeName = innerHTML; |
| 517 } |
| 518 // Check for conditions that should only happen in the innerHTML |
| 519 // case |
| 520 switch (nodeName) { |
| 521 case "select": case "colgroup": case "head": case "html": |
| 522 assert(innerHTMLMode); |
| 523 break; |
| 524 } |
| 525 if (!last && node.namespace != tree.defaultNamespace) { |
| 526 continue; |
| 527 } |
| 528 switch (nodeName) { |
| 529 case "select": phase = _inSelectPhase; return; |
| 530 case "td": phase = _inCellPhase; return; |
| 531 case "th": phase = _inCellPhase; return; |
| 532 case "tr": phase = _inRowPhase; return; |
| 533 case "tbody": phase = _inTableBodyPhase; return; |
| 534 case "thead": phase = _inTableBodyPhase; return; |
| 535 case "tfoot": phase = _inTableBodyPhase; return; |
| 536 case "caption": phase = _inCaptionPhase; return; |
| 537 case "colgroup": phase = _inColumnGroupPhase; return; |
| 538 case "table": phase = _inTablePhase; return; |
| 539 case "head": phase = _inBodyPhase; return; |
| 540 case "body": phase = _inBodyPhase; return; |
| 541 case "frameset": phase = _inFramesetPhase; return; |
| 542 case "html": phase = _beforeHeadPhase; return; |
| 543 } |
| 544 } |
| 545 phase = _inBodyPhase; |
| 546 } |
| 547 |
| 548 /** |
| 549 * Generic RCDATA/RAWTEXT Parsing algorithm |
| 550 * [contentType] - RCDATA or RAWTEXT |
| 551 */ |
| 552 void parseRCDataRawtext(Token token, String contentType) { |
| 553 assert(contentType == "RAWTEXT" || contentType == "RCDATA"); |
| 554 |
| 555 var element = tree.insertElement(token); |
| 556 |
| 557 if (contentType == "RAWTEXT") { |
| 558 tokenizer.state = tokenizer.rawtextState; |
| 559 } else { |
| 560 tokenizer.state = tokenizer.rcdataState; |
| 561 } |
| 562 |
| 563 originalPhase = phase; |
| 564 phase = _textPhase; |
| 565 } |
| 566 } |
| 567 |
| 568 |
| 569 /** Base class for helper object that implements each phase of processing. */ |
| 570 class Phase { |
| 571 // Order should be (they can be omitted): |
| 572 // * EOF |
| 573 // * Comment |
| 574 // * Doctype |
| 575 // * SpaceCharacters |
| 576 // * Characters |
| 577 // * StartTag |
| 578 // - startTag* methods |
| 579 // * EndTag |
| 580 // - endTag* methods |
| 581 |
| 582 final HtmlParser parser; |
| 583 |
| 584 final TreeBuilder tree; |
| 585 |
| 586 Phase(HtmlParser parser) : parser = parser, tree = parser.tree; |
| 587 |
| 588 bool processEOF() { |
| 589 throw new UnimplementedError(); |
| 590 } |
| 591 |
| 592 Token processComment(CommentToken token) { |
| 593 // For most phases the following is correct. Where it's not it will be |
| 594 // overridden. |
| 595 tree.insertComment(token, tree.openElements.last); |
| 596 } |
| 597 |
| 598 Token processDoctype(DoctypeToken token) { |
| 599 parser.parseError(token.span, "unexpected-doctype"); |
| 600 } |
| 601 |
| 602 Token processCharacters(CharactersToken token) { |
| 603 tree.insertText(token.data, token.span); |
| 604 } |
| 605 |
| 606 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 607 tree.insertText(token.data, token.span); |
| 608 } |
| 609 |
| 610 Token processStartTag(StartTagToken token) { |
| 611 throw new UnimplementedError(); |
| 612 } |
| 613 |
| 614 Token startTagHtml(StartTagToken token) { |
| 615 if (parser.firstStartTag == false && token.name == "html") { |
| 616 parser.parseError(token.span, "non-html-root"); |
| 617 } |
| 618 // XXX Need a check here to see if the first start tag token emitted is |
| 619 // this token... If it's not, invoke parser.parseError(). |
| 620 token.data.forEach((attr, value) { |
| 621 tree.openElements[0].attributes.putIfAbsent(attr, () => value); |
| 622 }); |
| 623 parser.firstStartTag = false; |
| 624 } |
| 625 |
| 626 Token processEndTag(EndTagToken token) { |
| 627 throw new UnimplementedError(); |
| 628 } |
| 629 |
| 630 /** Helper method for popping openElements. */ |
| 631 void popOpenElementsUntil(String name) { |
| 632 var node = tree.openElements.removeLast(); |
| 633 while (node.tagName != name) { |
| 634 node = tree.openElements.removeLast(); |
| 635 } |
| 636 } |
| 637 } |
| 638 |
| 639 class InitialPhase extends Phase { |
| 640 InitialPhase(parser) : super(parser); |
| 641 |
| 642 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 643 } |
| 644 |
| 645 Token processComment(CommentToken token) { |
| 646 tree.insertComment(token, tree.document); |
| 647 } |
| 648 |
| 649 Token processDoctype(DoctypeToken token) { |
| 650 var name = token.name; |
| 651 String publicId = token.publicId; |
| 652 var systemId = token.systemId; |
| 653 var correct = token.correct; |
| 654 |
| 655 if ((name != "html" || publicId != null || |
| 656 systemId != null && systemId != "about:legacy-compat")) { |
| 657 parser.parseError(token.span, "unknown-doctype"); |
| 658 } |
| 659 |
| 660 if (publicId == null) { |
| 661 publicId = ""; |
| 662 } |
| 663 |
| 664 tree.insertDoctype(token); |
| 665 |
| 666 if (publicId != "") { |
| 667 publicId = asciiUpper2Lower(publicId); |
| 668 } |
| 669 |
| 670 if (!correct || token.name != "html" |
| 671 || startsWithAny(publicId, const [ |
| 672 "+//silmaril//dtd html pro v0r11 19970101//", |
| 673 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", |
| 674 "-//as//dtd html 3.0 aswedit + extensions//", |
| 675 "-//ietf//dtd html 2.0 level 1//", |
| 676 "-//ietf//dtd html 2.0 level 2//", |
| 677 "-//ietf//dtd html 2.0 strict level 1//", |
| 678 "-//ietf//dtd html 2.0 strict level 2//", |
| 679 "-//ietf//dtd html 2.0 strict//", |
| 680 "-//ietf//dtd html 2.0//", |
| 681 "-//ietf//dtd html 2.1e//", |
| 682 "-//ietf//dtd html 3.0//", |
| 683 "-//ietf//dtd html 3.2 final//", |
| 684 "-//ietf//dtd html 3.2//", |
| 685 "-//ietf//dtd html 3//", |
| 686 "-//ietf//dtd html level 0//", |
| 687 "-//ietf//dtd html level 1//", |
| 688 "-//ietf//dtd html level 2//", |
| 689 "-//ietf//dtd html level 3//", |
| 690 "-//ietf//dtd html strict level 0//", |
| 691 "-//ietf//dtd html strict level 1//", |
| 692 "-//ietf//dtd html strict level 2//", |
| 693 "-//ietf//dtd html strict level 3//", |
| 694 "-//ietf//dtd html strict//", |
| 695 "-//ietf//dtd html//", |
| 696 "-//metrius//dtd metrius presentational//", |
| 697 "-//microsoft//dtd internet explorer 2.0 html strict//", |
| 698 "-//microsoft//dtd internet explorer 2.0 html//", |
| 699 "-//microsoft//dtd internet explorer 2.0 tables//", |
| 700 "-//microsoft//dtd internet explorer 3.0 html strict//", |
| 701 "-//microsoft//dtd internet explorer 3.0 html//", |
| 702 "-//microsoft//dtd internet explorer 3.0 tables//", |
| 703 "-//netscape comm. corp.//dtd html//", |
| 704 "-//netscape comm. corp.//dtd strict html//", |
| 705 "-//o'reilly and associates//dtd html 2.0//", |
| 706 "-//o'reilly and associates//dtd html extended 1.0//", |
| 707 "-//o'reilly and associates//dtd html extended relaxed 1.0//", |
| 708 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to h
tml 4.0//", |
| 709 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//
", |
| 710 "-//spyglass//dtd html 2.0 extended//", |
| 711 "-//sq//dtd html 2.0 hotmetal + extensions//", |
| 712 "-//sun microsystems corp.//dtd hotjava html//", |
| 713 "-//sun microsystems corp.//dtd hotjava strict html//", |
| 714 "-//w3c//dtd html 3 1995-03-24//", |
| 715 "-//w3c//dtd html 3.2 draft//", |
| 716 "-//w3c//dtd html 3.2 final//", |
| 717 "-//w3c//dtd html 3.2//", |
| 718 "-//w3c//dtd html 3.2s draft//", |
| 719 "-//w3c//dtd html 4.0 frameset//", |
| 720 "-//w3c//dtd html 4.0 transitional//", |
| 721 "-//w3c//dtd html experimental 19960712//", |
| 722 "-//w3c//dtd html experimental 970421//", |
| 723 "-//w3c//dtd w3 html//", |
| 724 "-//w3o//dtd w3 html 3.0//", |
| 725 "-//webtechs//dtd mozilla html 2.0//", |
| 726 "-//webtechs//dtd mozilla html//"]) |
| 727 || const ["-//w3o//dtd w3 html strict 3.0//en//", |
| 728 "-/w3c/dtd html 4.0 transitional/en", |
| 729 "html"].contains(publicId) |
| 730 || startsWithAny(publicId, const [ |
| 731 "-//w3c//dtd html 4.01 frameset//", |
| 732 "-//w3c//dtd html 4.01 transitional//"]) && systemId == null |
| 733 || systemId != null && systemId.toLowerCase() == |
| 734 "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") { |
| 735 |
| 736 parser.compatMode = "quirks"; |
| 737 } else if (startsWithAny(publicId, const [ |
| 738 "-//w3c//dtd xhtml 1.0 frameset//", |
| 739 "-//w3c//dtd xhtml 1.0 transitional//"]) |
| 740 || startsWithAny(publicId, const [ |
| 741 "-//w3c//dtd html 4.01 frameset//", |
| 742 "-//w3c//dtd html 4.01 transitional//"]) && |
| 743 systemId != null) { |
| 744 parser.compatMode = "limited quirks"; |
| 745 } |
| 746 parser.phase = parser._beforeHtmlPhase; |
| 747 } |
| 748 |
| 749 void anythingElse() { |
| 750 parser.compatMode = "quirks"; |
| 751 parser.phase = parser._beforeHtmlPhase; |
| 752 } |
| 753 |
| 754 Token processCharacters(CharactersToken token) { |
| 755 parser.parseError(token.span, "expected-doctype-but-got-chars"); |
| 756 anythingElse(); |
| 757 return token; |
| 758 } |
| 759 |
| 760 Token processStartTag(StartTagToken token) { |
| 761 parser.parseError(token.span, "expected-doctype-but-got-start-tag", |
| 762 {"name": token.name}); |
| 763 anythingElse(); |
| 764 return token; |
| 765 } |
| 766 |
| 767 Token processEndTag(EndTagToken token) { |
| 768 parser.parseError(token.span, "expected-doctype-but-got-end-tag", |
| 769 {"name": token.name}); |
| 770 anythingElse(); |
| 771 return token; |
| 772 } |
| 773 |
| 774 bool processEOF() { |
| 775 parser.parseError(parser._lastSpan, "expected-doctype-but-got-eof"); |
| 776 anythingElse(); |
| 777 return true; |
| 778 } |
| 779 } |
| 780 |
| 781 |
| 782 class BeforeHtmlPhase extends Phase { |
| 783 BeforeHtmlPhase(parser) : super(parser); |
| 784 |
| 785 // helper methods |
| 786 void insertHtmlElement() { |
| 787 tree.insertRoot(new StartTagToken("html", data: {})); |
| 788 parser.phase = parser._beforeHeadPhase; |
| 789 } |
| 790 |
| 791 // other |
| 792 bool processEOF() { |
| 793 insertHtmlElement(); |
| 794 return true; |
| 795 } |
| 796 |
| 797 Token processComment(CommentToken token) { |
| 798 tree.insertComment(token, tree.document); |
| 799 } |
| 800 |
| 801 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 802 } |
| 803 |
| 804 Token processCharacters(CharactersToken token) { |
| 805 insertHtmlElement(); |
| 806 return token; |
| 807 } |
| 808 |
| 809 Token processStartTag(StartTagToken token) { |
| 810 if (token.name == "html") { |
| 811 parser.firstStartTag = true; |
| 812 } |
| 813 insertHtmlElement(); |
| 814 return token; |
| 815 } |
| 816 |
| 817 Token processEndTag(EndTagToken token) { |
| 818 switch (token.name) { |
| 819 case "head": case "body": case "html": case "br": |
| 820 insertHtmlElement(); |
| 821 return token; |
| 822 default: |
| 823 parser.parseError(token.span, "unexpected-end-tag-before-html", |
| 824 {"name": token.name}); |
| 825 return null; |
| 826 } |
| 827 } |
| 828 } |
| 829 |
| 830 |
| 831 class BeforeHeadPhase extends Phase { |
| 832 BeforeHeadPhase(parser) : super(parser); |
| 833 |
| 834 processStartTag(StartTagToken token) { |
| 835 switch (token.name) { |
| 836 case 'html': return startTagHtml(token); |
| 837 case 'head': return startTagHead(token); |
| 838 default: return startTagOther(token); |
| 839 } |
| 840 } |
| 841 |
| 842 processEndTag(EndTagToken token) { |
| 843 switch (token.name) { |
| 844 case "head": case "body": case "html": case "br": |
| 845 return endTagImplyHead(token); |
| 846 default: return endTagOther(token); |
| 847 } |
| 848 } |
| 849 |
| 850 bool processEOF() { |
| 851 startTagHead(new StartTagToken("head", data: {})); |
| 852 return true; |
| 853 } |
| 854 |
| 855 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 856 } |
| 857 |
| 858 Token processCharacters(CharactersToken token) { |
| 859 startTagHead(new StartTagToken("head", data: {})); |
| 860 return token; |
| 861 } |
| 862 |
| 863 Token startTagHtml(StartTagToken token) { |
| 864 return parser._inBodyPhase.processStartTag(token); |
| 865 } |
| 866 |
| 867 void startTagHead(StartTagToken token) { |
| 868 tree.insertElement(token); |
| 869 tree.headPointer = tree.openElements.last; |
| 870 parser.phase = parser._inHeadPhase; |
| 871 } |
| 872 |
| 873 Token startTagOther(StartTagToken token) { |
| 874 startTagHead(new StartTagToken("head", data: {})); |
| 875 return token; |
| 876 } |
| 877 |
| 878 Token endTagImplyHead(EndTagToken token) { |
| 879 startTagHead(new StartTagToken("head", data: {})); |
| 880 return token; |
| 881 } |
| 882 |
| 883 void endTagOther(EndTagToken token) { |
| 884 parser.parseError(token.span, "end-tag-after-implied-root", |
| 885 {"name": token.name}); |
| 886 } |
| 887 } |
| 888 |
| 889 class InHeadPhase extends Phase { |
| 890 InHeadPhase(parser) : super(parser); |
| 891 |
| 892 processStartTag(StartTagToken token) { |
| 893 switch (token.name) { |
| 894 case "html": return startTagHtml(token); |
| 895 case "title": return startTagTitle(token); |
| 896 case "noscript": case "noframes": case "style": |
| 897 return startTagNoScriptNoFramesStyle(token); |
| 898 case "script": return startTagScript(token); |
| 899 case "base": case "basefont": case "bgsound": case "command": case "link": |
| 900 return startTagBaseLinkCommand(token); |
| 901 case "meta": return startTagMeta(token); |
| 902 case "head": return startTagHead(token); |
| 903 default: return startTagOther(token); |
| 904 } |
| 905 } |
| 906 |
| 907 processEndTag(EndTagToken token) { |
| 908 switch (token.name) { |
| 909 case "head": return endTagHead(token); |
| 910 case "br": case "html": case "body": return endTagHtmlBodyBr(token); |
| 911 default: return endTagOther(token); |
| 912 } |
| 913 } |
| 914 |
| 915 // the real thing |
| 916 bool processEOF() { |
| 917 anythingElse(); |
| 918 return true; |
| 919 } |
| 920 |
| 921 Token processCharacters(CharactersToken token) { |
| 922 anythingElse(); |
| 923 return token; |
| 924 } |
| 925 |
| 926 Token startTagHtml(StartTagToken token) { |
| 927 return parser._inBodyPhase.processStartTag(token); |
| 928 } |
| 929 |
| 930 void startTagHead(StartTagToken token) { |
| 931 parser.parseError(token.span, "two-heads-are-not-better-than-one"); |
| 932 } |
| 933 |
| 934 void startTagBaseLinkCommand(StartTagToken token) { |
| 935 tree.insertElement(token); |
| 936 tree.openElements.removeLast(); |
| 937 token.selfClosingAcknowledged = true; |
| 938 } |
| 939 |
| 940 void startTagMeta(StartTagToken token) { |
| 941 tree.insertElement(token); |
| 942 tree.openElements.removeLast(); |
| 943 token.selfClosingAcknowledged = true; |
| 944 |
| 945 var attributes = token.data; |
| 946 if (!parser.tokenizer.stream.charEncodingCertain) { |
| 947 var charset = attributes["charset"]; |
| 948 var content = attributes["content"]; |
| 949 if (charset != null) { |
| 950 parser.tokenizer.stream.changeEncoding(charset); |
| 951 } else if (content != null) { |
| 952 var data = new EncodingBytes(content); |
| 953 var codec = new ContentAttrParser(data).parse(); |
| 954 parser.tokenizer.stream.changeEncoding(codec); |
| 955 } |
| 956 } |
| 957 } |
| 958 |
| 959 void startTagTitle(StartTagToken token) { |
| 960 parser.parseRCDataRawtext(token, "RCDATA"); |
| 961 } |
| 962 |
| 963 void startTagNoScriptNoFramesStyle(StartTagToken token) { |
| 964 // Need to decide whether to implement the scripting-disabled case |
| 965 parser.parseRCDataRawtext(token, "RAWTEXT"); |
| 966 } |
| 967 |
| 968 void startTagScript(StartTagToken token) { |
| 969 tree.insertElement(token); |
| 970 parser.tokenizer.state = parser.tokenizer.scriptDataState; |
| 971 parser.originalPhase = parser.phase; |
| 972 parser.phase = parser._textPhase; |
| 973 } |
| 974 |
| 975 Token startTagOther(StartTagToken token) { |
| 976 anythingElse(); |
| 977 return token; |
| 978 } |
| 979 |
| 980 void endTagHead(EndTagToken token) { |
| 981 var node = parser.tree.openElements.removeLast(); |
| 982 assert(node.tagName == "head"); |
| 983 parser.phase = parser._afterHeadPhase; |
| 984 } |
| 985 |
| 986 Token endTagHtmlBodyBr(EndTagToken token) { |
| 987 anythingElse(); |
| 988 return token; |
| 989 } |
| 990 |
| 991 void endTagOther(EndTagToken token) { |
| 992 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); |
| 993 } |
| 994 |
| 995 void anythingElse() { |
| 996 endTagHead(new EndTagToken("head")); |
| 997 } |
| 998 } |
| 999 |
| 1000 |
| 1001 // XXX If we implement a parser for which scripting is disabled we need to |
| 1002 // implement this phase. |
| 1003 // |
| 1004 // class InHeadNoScriptPhase extends Phase { |
| 1005 |
| 1006 class AfterHeadPhase extends Phase { |
| 1007 AfterHeadPhase(parser) : super(parser); |
| 1008 |
| 1009 processStartTag(StartTagToken token) { |
| 1010 switch (token.name) { |
| 1011 case "html": return startTagHtml(token); |
| 1012 case "body": return startTagBody(token); |
| 1013 case "frameset": return startTagFrameset(token); |
| 1014 case "base": case "basefont": case "bgsound": case "link": case "meta": |
| 1015 case "noframes": case "script": case "style": case "title": |
| 1016 return startTagFromHead(token); |
| 1017 case "head": return startTagHead(token); |
| 1018 default: return startTagOther(token); |
| 1019 } |
| 1020 } |
| 1021 |
| 1022 processEndTag(EndTagToken token) { |
| 1023 switch (token.name) { |
| 1024 case "body": case "html": case "br": |
| 1025 return endTagHtmlBodyBr(token); |
| 1026 default: return endTagOther(token); |
| 1027 } |
| 1028 } |
| 1029 |
| 1030 bool processEOF() { |
| 1031 anythingElse(); |
| 1032 return true; |
| 1033 } |
| 1034 |
| 1035 Token processCharacters(CharactersToken token) { |
| 1036 anythingElse(); |
| 1037 return token; |
| 1038 } |
| 1039 |
| 1040 Token startTagHtml(StartTagToken token) { |
| 1041 return parser._inBodyPhase.processStartTag(token); |
| 1042 } |
| 1043 |
| 1044 void startTagBody(StartTagToken token) { |
| 1045 parser.framesetOK = false; |
| 1046 tree.insertElement(token); |
| 1047 parser.phase = parser._inBodyPhase; |
| 1048 } |
| 1049 |
| 1050 void startTagFrameset(StartTagToken token) { |
| 1051 tree.insertElement(token); |
| 1052 parser.phase = parser._inFramesetPhase; |
| 1053 } |
| 1054 |
| 1055 void startTagFromHead(StartTagToken token) { |
| 1056 parser.parseError(token.span, "unexpected-start-tag-out-of-my-head", |
| 1057 {"name": token.name}); |
| 1058 tree.openElements.add(tree.headPointer); |
| 1059 parser._inHeadPhase.processStartTag(token); |
| 1060 for (Node node in tree.openElements.reversed) { |
| 1061 if (node.tagName == "head") { |
| 1062 tree.openElements.remove(node); |
| 1063 break; |
| 1064 } |
| 1065 } |
| 1066 } |
| 1067 |
| 1068 void startTagHead(StartTagToken token) { |
| 1069 parser.parseError(token.span, "unexpected-start-tag", {"name": token.name}); |
| 1070 } |
| 1071 |
| 1072 Token startTagOther(StartTagToken token) { |
| 1073 anythingElse(); |
| 1074 return token; |
| 1075 } |
| 1076 |
| 1077 Token endTagHtmlBodyBr(EndTagToken token) { |
| 1078 anythingElse(); |
| 1079 return token; |
| 1080 } |
| 1081 |
| 1082 void endTagOther(EndTagToken token) { |
| 1083 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); |
| 1084 } |
| 1085 |
| 1086 void anythingElse() { |
| 1087 tree.insertElement(new StartTagToken("body", data: {})); |
| 1088 parser.phase = parser._inBodyPhase; |
| 1089 parser.framesetOK = true; |
| 1090 } |
| 1091 } |
| 1092 |
| 1093 typedef Token TokenProccessor(Token token); |
| 1094 |
| 1095 class InBodyPhase extends Phase { |
| 1096 bool dropNewline = false; |
| 1097 |
| 1098 // http://www.whatwg.org/specs/web-apps/current-work///parsing-main-inbody |
| 1099 // the really-really-really-very crazy mode |
| 1100 InBodyPhase(parser) : super(parser); |
| 1101 |
| 1102 processStartTag(StartTagToken token) { |
| 1103 switch (token.name) { |
| 1104 case "html": |
| 1105 return startTagHtml(token); |
| 1106 case "base": case "basefont": case "bgsound": case "command": case "link": |
| 1107 case "meta": case "noframes": case "script": case "style": case "title": |
| 1108 return startTagProcessInHead(token); |
| 1109 case "body": |
| 1110 return startTagBody(token); |
| 1111 case "frameset": |
| 1112 return startTagFrameset(token); |
| 1113 case "address": case "article": case "aside": case "blockquote": |
| 1114 case "center": case "details": case "details": case "dir": case "div": |
| 1115 case "dl": case "fieldset": case "figcaption": case "figure": |
| 1116 case "footer": case "header": case "hgroup": case "menu": case "nav": |
| 1117 case "ol": case "p": case "section": case "summary": case "ul": |
| 1118 return startTagCloseP(token); |
| 1119 // headingElements |
| 1120 case "h1": case "h2": case "h3": case "h4": case "h5": case "h6": |
| 1121 return startTagHeading(token); |
| 1122 case "pre": case "listing": |
| 1123 return startTagPreListing(token); |
| 1124 case "form": |
| 1125 return startTagForm(token); |
| 1126 case "li": case "dd": case "dt": |
| 1127 return startTagListItem(token); |
| 1128 case "plaintext": |
| 1129 return startTagPlaintext(token); |
| 1130 case "a": return startTagA(token); |
| 1131 case "b": case "big": case "code": case "em": case "font": case "i": |
| 1132 case "s": case "small": case "strike": case "strong": case "tt": case "u": |
| 1133 return startTagFormatting(token); |
| 1134 case "nobr": |
| 1135 return startTagNobr(token); |
| 1136 case "button": |
| 1137 return startTagButton(token); |
| 1138 case "applet": case "marquee": case "object": |
| 1139 return startTagAppletMarqueeObject(token); |
| 1140 case "xmp": |
| 1141 return startTagXmp(token); |
| 1142 case "table": |
| 1143 return startTagTable(token); |
| 1144 case "area": case "br": case "embed": case "img": case "keygen": |
| 1145 case "wbr": |
| 1146 return startTagVoidFormatting(token); |
| 1147 case "param": case "source": case "track": |
| 1148 return startTagParamSource(token); |
| 1149 case "input": |
| 1150 return startTagInput(token); |
| 1151 case "hr": |
| 1152 return startTagHr(token); |
| 1153 case "image": |
| 1154 return startTagImage(token); |
| 1155 case "isindex": |
| 1156 return startTagIsIndex(token); |
| 1157 case "textarea": |
| 1158 return startTagTextarea(token); |
| 1159 case "iframe": |
| 1160 return startTagIFrame(token); |
| 1161 case "noembed": case "noframes": case "noscript": |
| 1162 return startTagRawtext(token); |
| 1163 case "select": |
| 1164 return startTagSelect(token); |
| 1165 case "rp": case "rt": |
| 1166 return startTagRpRt(token); |
| 1167 case "option": case "optgroup": |
| 1168 return startTagOpt(token); |
| 1169 case "math": |
| 1170 return startTagMath(token); |
| 1171 case "svg": |
| 1172 return startTagSvg(token); |
| 1173 case "caption": case "col": case "colgroup": case "frame": case "head": |
| 1174 case "tbody": case "td": case "tfoot": case "th": case "thead": case "tr": |
| 1175 return startTagMisplaced(token); |
| 1176 default: return startTagOther(token); |
| 1177 } |
| 1178 } |
| 1179 |
| 1180 processEndTag(EndTagToken token) { |
| 1181 switch (token.name) { |
| 1182 case "body": return endTagBody(token); |
| 1183 case "html": return endTagHtml(token); |
| 1184 case "address": case "article": case "aside": case "blockquote": |
| 1185 case "center": case "details": case "dir": case "div": case "dl": |
| 1186 case "fieldset": case "figcaption": case "figure": case "footer": |
| 1187 case "header": case "hgroup": case "listing": case "menu": case "nav": |
| 1188 case "ol": case "pre": case "section": case "summary": case "ul": |
| 1189 return endTagBlock(token); |
| 1190 case "form": return endTagForm(token); |
| 1191 case "p": return endTagP(token); |
| 1192 case "dd": case "dt": case "li": return endTagListItem(token); |
| 1193 // headingElements |
| 1194 case "h1": case "h2": case "h3": case "h4": case "h5": case "h6": |
| 1195 return endTagHeading(token); |
| 1196 case "a": case "b": case "big": case "code": case "em": case "font": |
| 1197 case "i": case "nobr": case "s": case "small": case "strike": |
| 1198 case "strong": case "tt": case "u": |
| 1199 return endTagFormatting(token); |
| 1200 case "applet": case "marquee": case "object": |
| 1201 return endTagAppletMarqueeObject(token); |
| 1202 case "br": return endTagBr(token); |
| 1203 default: return endTagOther(token); |
| 1204 } |
| 1205 } |
| 1206 |
| 1207 bool isMatchingFormattingElement(Node node1, Node node2) { |
| 1208 if (node1.tagName != node2.tagName || node1.namespace != node2.namespace) { |
| 1209 return false; |
| 1210 } else if (node1.attributes.length != node2.attributes.length) { |
| 1211 return false; |
| 1212 } else { |
| 1213 for (var key in node1.attributes.keys) { |
| 1214 if (node1.attributes[key] != node2.attributes[key]) { |
| 1215 return false; |
| 1216 } |
| 1217 } |
| 1218 } |
| 1219 return true; |
| 1220 } |
| 1221 |
| 1222 // helper |
| 1223 void addFormattingElement(token) { |
| 1224 tree.insertElement(token); |
| 1225 var element = tree.openElements.last; |
| 1226 |
| 1227 var matchingElements = []; |
| 1228 for (Node node in tree.activeFormattingElements.reversed) { |
| 1229 if (node == Marker) { |
| 1230 break; |
| 1231 } else if (isMatchingFormattingElement(node, element)) { |
| 1232 matchingElements.add(node); |
| 1233 } |
| 1234 } |
| 1235 |
| 1236 assert(matchingElements.length <= 3); |
| 1237 if (matchingElements.length == 3) { |
| 1238 tree.activeFormattingElements.remove(matchingElements.last); |
| 1239 } |
| 1240 tree.activeFormattingElements.add(element); |
| 1241 } |
| 1242 |
| 1243 // the real deal |
| 1244 bool processEOF() { |
| 1245 for (Node node in tree.openElements.reversed) { |
| 1246 switch (node.tagName) { |
| 1247 case "dd": case "dt": case "li": case "p": case "tbody": case "td": |
| 1248 case "tfoot": case "th": case "thead": case "tr": case "body": |
| 1249 case "html": |
| 1250 continue; |
| 1251 } |
| 1252 parser.parseError(node.sourceSpan, "expected-closing-tag-but-got-eof"); |
| 1253 break; |
| 1254 } |
| 1255 //Stop parsing |
| 1256 return false; |
| 1257 } |
| 1258 |
| 1259 void processSpaceCharactersDropNewline(StringToken token) { |
| 1260 // Sometimes (start of <pre>, <listing>, and <textarea> blocks) we |
| 1261 // want to drop leading newlines |
| 1262 var data = token.data; |
| 1263 dropNewline = false; |
| 1264 if (data.startsWith("\n")) { |
| 1265 var lastOpen = tree.openElements.last; |
| 1266 if (const ["pre", "listing", "textarea"].contains(lastOpen.tagName) |
| 1267 && !lastOpen.hasContent()) { |
| 1268 data = data.substring(1); |
| 1269 } |
| 1270 } |
| 1271 if (data.length > 0) { |
| 1272 tree.reconstructActiveFormattingElements(); |
| 1273 tree.insertText(data, token.span); |
| 1274 } |
| 1275 } |
| 1276 |
| 1277 Token processCharacters(CharactersToken token) { |
| 1278 if (token.data == "\u0000") { |
| 1279 //The tokenizer should always emit null on its own |
| 1280 return null; |
| 1281 } |
| 1282 tree.reconstructActiveFormattingElements(); |
| 1283 tree.insertText(token.data, token.span); |
| 1284 if (parser.framesetOK && !allWhitespace(token.data)) { |
| 1285 parser.framesetOK = false; |
| 1286 } |
| 1287 return null; |
| 1288 } |
| 1289 |
| 1290 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 1291 if (dropNewline) { |
| 1292 processSpaceCharactersDropNewline(token); |
| 1293 } else { |
| 1294 tree.reconstructActiveFormattingElements(); |
| 1295 tree.insertText(token.data, token.span); |
| 1296 } |
| 1297 return null; |
| 1298 } |
| 1299 |
| 1300 Token startTagProcessInHead(StartTagToken token) { |
| 1301 return parser._inHeadPhase.processStartTag(token); |
| 1302 } |
| 1303 |
| 1304 void startTagBody(StartTagToken token) { |
| 1305 parser.parseError(token.span, "unexpected-start-tag", {"name": "body"}); |
| 1306 if (tree.openElements.length == 1 |
| 1307 || tree.openElements[1].tagName != "body") { |
| 1308 assert(parser.innerHTMLMode); |
| 1309 } else { |
| 1310 parser.framesetOK = false; |
| 1311 token.data.forEach((attr, value) { |
| 1312 tree.openElements[1].attributes.putIfAbsent(attr, () => value); |
| 1313 }); |
| 1314 } |
| 1315 } |
| 1316 |
| 1317 void startTagFrameset(StartTagToken token) { |
| 1318 parser.parseError(token.span, "unexpected-start-tag", {"name": "frameset"}); |
| 1319 if ((tree.openElements.length == 1 || |
| 1320 tree.openElements[1].tagName != "body")) { |
| 1321 assert(parser.innerHTMLMode); |
| 1322 } else if (parser.framesetOK) { |
| 1323 if (tree.openElements[1].parent != null) { |
| 1324 tree.openElements[1].parent.nodes.remove(tree.openElements[1]); |
| 1325 } |
| 1326 while (tree.openElements.last.tagName != "html") { |
| 1327 tree.openElements.removeLast(); |
| 1328 } |
| 1329 tree.insertElement(token); |
| 1330 parser.phase = parser._inFramesetPhase; |
| 1331 } |
| 1332 } |
| 1333 |
| 1334 void startTagCloseP(StartTagToken token) { |
| 1335 if (tree.elementInScope("p", variant: "button")) { |
| 1336 endTagP(new EndTagToken("p")); |
| 1337 } |
| 1338 tree.insertElement(token); |
| 1339 } |
| 1340 |
| 1341 void startTagPreListing(StartTagToken token) { |
| 1342 if (tree.elementInScope("p", variant: "button")) { |
| 1343 endTagP(new EndTagToken("p")); |
| 1344 } |
| 1345 tree.insertElement(token); |
| 1346 parser.framesetOK = false; |
| 1347 dropNewline = true; |
| 1348 } |
| 1349 |
| 1350 void startTagForm(StartTagToken token) { |
| 1351 if (tree.formPointer != null) { |
| 1352 parser.parseError(token.span, "unexpected-start-tag", {"name": "form"}); |
| 1353 } else { |
| 1354 if (tree.elementInScope("p", variant: "button")) { |
| 1355 endTagP(new EndTagToken("p")); |
| 1356 } |
| 1357 tree.insertElement(token); |
| 1358 tree.formPointer = tree.openElements.last; |
| 1359 } |
| 1360 } |
| 1361 |
| 1362 void startTagListItem(StartTagToken token) { |
| 1363 parser.framesetOK = false; |
| 1364 |
| 1365 final stopNamesMap = const {"li": const ["li"], |
| 1366 "dt": const ["dt", "dd"], |
| 1367 "dd": const ["dt", "dd"]}; |
| 1368 var stopNames = stopNamesMap[token.name]; |
| 1369 for (Node node in tree.openElements.reversed) { |
| 1370 if (stopNames.contains(node.tagName)) { |
| 1371 parser.phase.processEndTag(new EndTagToken(node.tagName)); |
| 1372 break; |
| 1373 } |
| 1374 if (specialElements.contains(node.nameTuple) && |
| 1375 !const ["address", "div", "p"].contains(node.tagName)) { |
| 1376 break; |
| 1377 } |
| 1378 } |
| 1379 |
| 1380 if (tree.elementInScope("p", variant: "button")) { |
| 1381 parser.phase.processEndTag(new EndTagToken("p")); |
| 1382 } |
| 1383 |
| 1384 tree.insertElement(token); |
| 1385 } |
| 1386 |
| 1387 void startTagPlaintext(StartTagToken token) { |
| 1388 if (tree.elementInScope("p", variant: "button")) { |
| 1389 endTagP(new EndTagToken("p")); |
| 1390 } |
| 1391 tree.insertElement(token); |
| 1392 parser.tokenizer.state = parser.tokenizer.plaintextState; |
| 1393 } |
| 1394 |
| 1395 void startTagHeading(StartTagToken token) { |
| 1396 if (tree.elementInScope("p", variant: "button")) { |
| 1397 endTagP(new EndTagToken("p")); |
| 1398 } |
| 1399 if (headingElements.contains(tree.openElements.last.tagName)) { |
| 1400 parser.parseError(token.span, "unexpected-start-tag", |
| 1401 {"name": token.name}); |
| 1402 tree.openElements.removeLast(); |
| 1403 } |
| 1404 tree.insertElement(token); |
| 1405 } |
| 1406 |
| 1407 void startTagA(StartTagToken token) { |
| 1408 var afeAElement = tree.elementInActiveFormattingElements("a"); |
| 1409 if (afeAElement != null) { |
| 1410 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag", |
| 1411 {"startName": "a", "endName": "a"}); |
| 1412 endTagFormatting(new EndTagToken("a")); |
| 1413 tree.openElements.remove(afeAElement); |
| 1414 tree.activeFormattingElements.remove(afeAElement); |
| 1415 } |
| 1416 tree.reconstructActiveFormattingElements(); |
| 1417 addFormattingElement(token); |
| 1418 } |
| 1419 |
| 1420 void startTagFormatting(StartTagToken token) { |
| 1421 tree.reconstructActiveFormattingElements(); |
| 1422 addFormattingElement(token); |
| 1423 } |
| 1424 |
| 1425 void startTagNobr(StartTagToken token) { |
| 1426 tree.reconstructActiveFormattingElements(); |
| 1427 if (tree.elementInScope("nobr")) { |
| 1428 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag", |
| 1429 {"startName": "nobr", "endName": "nobr"}); |
| 1430 processEndTag(new EndTagToken("nobr")); |
| 1431 // XXX Need tests that trigger the following |
| 1432 tree.reconstructActiveFormattingElements(); |
| 1433 } |
| 1434 addFormattingElement(token); |
| 1435 } |
| 1436 |
| 1437 Token startTagButton(StartTagToken token) { |
| 1438 if (tree.elementInScope("button")) { |
| 1439 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag", |
| 1440 {"startName": "button", "endName": "button"}); |
| 1441 processEndTag(new EndTagToken("button")); |
| 1442 return token; |
| 1443 } else { |
| 1444 tree.reconstructActiveFormattingElements(); |
| 1445 tree.insertElement(token); |
| 1446 parser.framesetOK = false; |
| 1447 } |
| 1448 } |
| 1449 |
| 1450 void startTagAppletMarqueeObject(StartTagToken token) { |
| 1451 tree.reconstructActiveFormattingElements(); |
| 1452 tree.insertElement(token); |
| 1453 tree.activeFormattingElements.add(Marker); |
| 1454 parser.framesetOK = false; |
| 1455 } |
| 1456 |
| 1457 void startTagXmp(StartTagToken token) { |
| 1458 if (tree.elementInScope("p", variant: "button")) { |
| 1459 endTagP(new EndTagToken("p")); |
| 1460 } |
| 1461 tree.reconstructActiveFormattingElements(); |
| 1462 parser.framesetOK = false; |
| 1463 parser.parseRCDataRawtext(token, "RAWTEXT"); |
| 1464 } |
| 1465 |
| 1466 void startTagTable(StartTagToken token) { |
| 1467 if (parser.compatMode != "quirks") { |
| 1468 if (tree.elementInScope("p", variant: "button")) { |
| 1469 processEndTag(new EndTagToken("p")); |
| 1470 } |
| 1471 } |
| 1472 tree.insertElement(token); |
| 1473 parser.framesetOK = false; |
| 1474 parser.phase = parser._inTablePhase; |
| 1475 } |
| 1476 |
| 1477 void startTagVoidFormatting(StartTagToken token) { |
| 1478 tree.reconstructActiveFormattingElements(); |
| 1479 tree.insertElement(token); |
| 1480 tree.openElements.removeLast(); |
| 1481 token.selfClosingAcknowledged = true; |
| 1482 parser.framesetOK = false; |
| 1483 } |
| 1484 |
| 1485 void startTagInput(StartTagToken token) { |
| 1486 var savedFramesetOK = parser.framesetOK; |
| 1487 startTagVoidFormatting(token); |
| 1488 if (asciiUpper2Lower(token.data["type"]) == "hidden") { |
| 1489 //input type=hidden doesn't change framesetOK |
| 1490 parser.framesetOK = savedFramesetOK; |
| 1491 } |
| 1492 } |
| 1493 |
| 1494 void startTagParamSource(StartTagToken token) { |
| 1495 tree.insertElement(token); |
| 1496 tree.openElements.removeLast(); |
| 1497 token.selfClosingAcknowledged = true; |
| 1498 } |
| 1499 |
| 1500 void startTagHr(StartTagToken token) { |
| 1501 if (tree.elementInScope("p", variant: "button")) { |
| 1502 endTagP(new EndTagToken("p")); |
| 1503 } |
| 1504 tree.insertElement(token); |
| 1505 tree.openElements.removeLast(); |
| 1506 token.selfClosingAcknowledged = true; |
| 1507 parser.framesetOK = false; |
| 1508 } |
| 1509 |
| 1510 void startTagImage(StartTagToken token) { |
| 1511 // No really... |
| 1512 parser.parseError(token.span, "unexpected-start-tag-treated-as", |
| 1513 {"originalName": "image", "newName": "img"}); |
| 1514 processStartTag(new StartTagToken("img", data: token.data, |
| 1515 selfClosing: token.selfClosing)); |
| 1516 } |
| 1517 |
| 1518 void startTagIsIndex(StartTagToken token) { |
| 1519 parser.parseError(token.span, "deprecated-tag", {"name": "isindex"}); |
| 1520 if (tree.formPointer != null) { |
| 1521 return; |
| 1522 } |
| 1523 var formAttrs = {}; |
| 1524 var dataAction = token.data["action"]; |
| 1525 if (dataAction != null) { |
| 1526 formAttrs["action"] = dataAction; |
| 1527 } |
| 1528 processStartTag(new StartTagToken("form", data: formAttrs)); |
| 1529 processStartTag(new StartTagToken("hr", data: {})); |
| 1530 processStartTag(new StartTagToken("label", data: {})); |
| 1531 // XXX Localization ... |
| 1532 var prompt = token.data["prompt"]; |
| 1533 if (prompt == null) { |
| 1534 prompt = "This is a searchable index. Enter search keywords: "; |
| 1535 } |
| 1536 processCharacters(new CharactersToken(prompt)); |
| 1537 var attributes = new LinkedHashMap.from(token.data); |
| 1538 attributes.remove('action'); |
| 1539 attributes.remove('prompt'); |
| 1540 attributes["name"] = "isindex"; |
| 1541 processStartTag(new StartTagToken("input", |
| 1542 data: attributes, selfClosing: token.selfClosing)); |
| 1543 processEndTag(new EndTagToken("label")); |
| 1544 processStartTag(new StartTagToken("hr", data: {})); |
| 1545 processEndTag(new EndTagToken("form")); |
| 1546 } |
| 1547 |
| 1548 void startTagTextarea(StartTagToken token) { |
| 1549 tree.insertElement(token); |
| 1550 parser.tokenizer.state = parser.tokenizer.rcdataState; |
| 1551 dropNewline = true; |
| 1552 parser.framesetOK = false; |
| 1553 } |
| 1554 |
| 1555 void startTagIFrame(StartTagToken token) { |
| 1556 parser.framesetOK = false; |
| 1557 startTagRawtext(token); |
| 1558 } |
| 1559 |
| 1560 /** iframe, noembed noframes, noscript(if scripting enabled). */ |
| 1561 void startTagRawtext(StartTagToken token) { |
| 1562 parser.parseRCDataRawtext(token, "RAWTEXT"); |
| 1563 } |
| 1564 |
| 1565 void startTagOpt(StartTagToken token) { |
| 1566 if (tree.openElements.last.tagName == "option") { |
| 1567 parser.phase.processEndTag(new EndTagToken("option")); |
| 1568 } |
| 1569 tree.reconstructActiveFormattingElements(); |
| 1570 parser.tree.insertElement(token); |
| 1571 } |
| 1572 |
| 1573 void startTagSelect(StartTagToken token) { |
| 1574 tree.reconstructActiveFormattingElements(); |
| 1575 tree.insertElement(token); |
| 1576 parser.framesetOK = false; |
| 1577 |
| 1578 if (parser._inTablePhase == parser.phase || |
| 1579 parser._inCaptionPhase == parser.phase || |
| 1580 parser._inColumnGroupPhase == parser.phase || |
| 1581 parser._inTableBodyPhase == parser.phase || |
| 1582 parser._inRowPhase == parser.phase || |
| 1583 parser._inCellPhase == parser.phase) { |
| 1584 parser.phase = parser._inSelectInTablePhase; |
| 1585 } else { |
| 1586 parser.phase = parser._inSelectPhase; |
| 1587 } |
| 1588 } |
| 1589 |
| 1590 void startTagRpRt(StartTagToken token) { |
| 1591 if (tree.elementInScope("ruby")) { |
| 1592 tree.generateImpliedEndTags(); |
| 1593 var last = tree.openElements.last; |
| 1594 if (last.tagName != "ruby") { |
| 1595 parser.parseError(last.sourceSpan, 'undefined-error'); |
| 1596 } |
| 1597 } |
| 1598 tree.insertElement(token); |
| 1599 } |
| 1600 |
| 1601 void startTagMath(StartTagToken token) { |
| 1602 tree.reconstructActiveFormattingElements(); |
| 1603 parser.adjustMathMLAttributes(token); |
| 1604 parser.adjustForeignAttributes(token); |
| 1605 token.namespace = Namespaces.mathml; |
| 1606 tree.insertElement(token); |
| 1607 //Need to get the parse error right for the case where the token |
| 1608 //has a namespace not equal to the xmlns attribute |
| 1609 if (token.selfClosing) { |
| 1610 tree.openElements.removeLast(); |
| 1611 token.selfClosingAcknowledged = true; |
| 1612 } |
| 1613 } |
| 1614 |
| 1615 void startTagSvg(StartTagToken token) { |
| 1616 tree.reconstructActiveFormattingElements(); |
| 1617 parser.adjustSVGAttributes(token); |
| 1618 parser.adjustForeignAttributes(token); |
| 1619 token.namespace = Namespaces.svg; |
| 1620 tree.insertElement(token); |
| 1621 //Need to get the parse error right for the case where the token |
| 1622 //has a namespace not equal to the xmlns attribute |
| 1623 if (token.selfClosing) { |
| 1624 tree.openElements.removeLast(); |
| 1625 token.selfClosingAcknowledged = true; |
| 1626 } |
| 1627 } |
| 1628 |
| 1629 /** |
| 1630 * Elements that should be children of other elements that have a |
| 1631 * different insertion mode; here they are ignored |
| 1632 * "caption", "col", "colgroup", "frame", "frameset", "head", |
| 1633 * "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", |
| 1634 * "tr", "noscript" |
| 1635 */ |
| 1636 void startTagMisplaced(StartTagToken token) { |
| 1637 parser.parseError(token.span, "unexpected-start-tag-ignored", |
| 1638 {"name": token.name}); |
| 1639 } |
| 1640 |
| 1641 Token startTagOther(StartTagToken token) { |
| 1642 tree.reconstructActiveFormattingElements(); |
| 1643 tree.insertElement(token); |
| 1644 } |
| 1645 |
| 1646 void endTagP(EndTagToken token) { |
| 1647 if (!tree.elementInScope("p", variant: "button")) { |
| 1648 startTagCloseP(new StartTagToken("p", data: {})); |
| 1649 parser.parseError(token.span, "unexpected-end-tag", {"name": "p"}); |
| 1650 endTagP(new EndTagToken("p")); |
| 1651 } else { |
| 1652 tree.generateImpliedEndTags("p"); |
| 1653 if (tree.openElements.last.tagName != "p") { |
| 1654 parser.parseError(token.span, "unexpected-end-tag", {"name": "p"}); |
| 1655 } |
| 1656 popOpenElementsUntil("p"); |
| 1657 } |
| 1658 } |
| 1659 |
| 1660 void endTagBody(EndTagToken token) { |
| 1661 if (!tree.elementInScope("body")) { |
| 1662 parser.parseError(token.span, 'undefined-error'); |
| 1663 return; |
| 1664 } else if (tree.openElements.last.tagName != "body") { |
| 1665 for (Node node in slice(tree.openElements, 2)) { |
| 1666 switch (node.tagName) { |
| 1667 case "dd": case "dt": case "li": case "optgroup": case "option": |
| 1668 case "p": case "rp": case "rt": case "tbody": case "td": case "tfoot": |
| 1669 case "th": case "thead": case "tr": case "body": case "html": |
| 1670 continue; |
| 1671 } |
| 1672 // Not sure this is the correct name for the parse error |
| 1673 parser.parseError(token.span, "expected-one-end-tag-but-got-another", |
| 1674 {"expectedName": "body", "gotName": node.tagName}); |
| 1675 break; |
| 1676 } |
| 1677 } |
| 1678 parser.phase = parser._afterBodyPhase; |
| 1679 } |
| 1680 |
| 1681 Token endTagHtml(EndTagToken token) { |
| 1682 //We repeat the test for the body end tag token being ignored here |
| 1683 if (tree.elementInScope("body")) { |
| 1684 endTagBody(new EndTagToken("body")); |
| 1685 return token; |
| 1686 } |
| 1687 } |
| 1688 |
| 1689 void endTagBlock(EndTagToken token) { |
| 1690 //Put us back in the right whitespace handling mode |
| 1691 if (token.name == "pre") { |
| 1692 dropNewline = false; |
| 1693 } |
| 1694 var inScope = tree.elementInScope(token.name); |
| 1695 if (inScope) { |
| 1696 tree.generateImpliedEndTags(); |
| 1697 } |
| 1698 if (tree.openElements.last.tagName != token.name) { |
| 1699 parser.parseError(token.span, "end-tag-too-early", {"name": token.name}); |
| 1700 } |
| 1701 if (inScope) { |
| 1702 popOpenElementsUntil(token.name); |
| 1703 } |
| 1704 } |
| 1705 |
| 1706 void endTagForm(EndTagToken token) { |
| 1707 var node = tree.formPointer; |
| 1708 tree.formPointer = null; |
| 1709 if (node == null || !tree.elementInScope(node)) { |
| 1710 parser.parseError(token.span, "unexpected-end-tag", {"name": "form"}); |
| 1711 } else { |
| 1712 tree.generateImpliedEndTags(); |
| 1713 if (tree.openElements.last != node) { |
| 1714 parser.parseError(token.span, "end-tag-too-early-ignored", {"name": "for
m"}); |
| 1715 } |
| 1716 tree.openElements.remove(node); |
| 1717 } |
| 1718 } |
| 1719 |
| 1720 void endTagListItem(EndTagToken token) { |
| 1721 var variant; |
| 1722 if (token.name == "li") { |
| 1723 variant = "list"; |
| 1724 } else { |
| 1725 variant = null; |
| 1726 } |
| 1727 if (!tree.elementInScope(token.name, variant: variant)) { |
| 1728 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); |
| 1729 } else { |
| 1730 tree.generateImpliedEndTags(token.name); |
| 1731 if (tree.openElements.last.tagName != token.name) { |
| 1732 parser.parseError(token.span, "end-tag-too-early", {"name": token.name})
; |
| 1733 } |
| 1734 popOpenElementsUntil(token.name); |
| 1735 } |
| 1736 } |
| 1737 |
| 1738 void endTagHeading(EndTagToken token) { |
| 1739 for (var item in headingElements) { |
| 1740 if (tree.elementInScope(item)) { |
| 1741 tree.generateImpliedEndTags(); |
| 1742 break; |
| 1743 } |
| 1744 } |
| 1745 if (tree.openElements.last.tagName != token.name) { |
| 1746 parser.parseError(token.span, "end-tag-too-early", {"name": token.name}); |
| 1747 } |
| 1748 |
| 1749 for (var item in headingElements) { |
| 1750 if (tree.elementInScope(item)) { |
| 1751 item = tree.openElements.removeLast(); |
| 1752 while (!headingElements.contains(item.tagName)) { |
| 1753 item = tree.openElements.removeLast(); |
| 1754 } |
| 1755 break; |
| 1756 } |
| 1757 } |
| 1758 } |
| 1759 |
| 1760 /** The much-feared adoption agency algorithm. */ |
| 1761 endTagFormatting(EndTagToken token) { |
| 1762 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construc
tion.html#adoptionAgency |
| 1763 // TODO(jmesserly): the comments here don't match the numbered steps in the |
| 1764 // updated spec. This needs a pass over it to verify that it still matches. |
| 1765 // In particular the html5lib Python code skiped "step 4", I'm not sure why. |
| 1766 // XXX Better parseError messages appreciated. |
| 1767 int outerLoopCounter = 0; |
| 1768 while (outerLoopCounter < 8) { |
| 1769 outerLoopCounter += 1; |
| 1770 |
| 1771 // Step 1 paragraph 1 |
| 1772 var formattingElement = tree.elementInActiveFormattingElements( |
| 1773 token.name); |
| 1774 if (formattingElement == null || |
| 1775 (tree.openElements.contains(formattingElement) && |
| 1776 !tree.elementInScope(formattingElement.tagName))) { |
| 1777 parser.parseError(token.span, "adoption-agency-1.1", |
| 1778 {"name": token.name}); |
| 1779 return; |
| 1780 // Step 1 paragraph 2 |
| 1781 } else if (!tree.openElements.contains(formattingElement)) { |
| 1782 parser.parseError(token.span, "adoption-agency-1.2", |
| 1783 {"name": token.name}); |
| 1784 tree.activeFormattingElements.remove(formattingElement); |
| 1785 return; |
| 1786 } |
| 1787 |
| 1788 // Step 1 paragraph 3 |
| 1789 if (formattingElement != tree.openElements.last) { |
| 1790 parser.parseError(token.span, "adoption-agency-1.3", |
| 1791 {"name": token.name}); |
| 1792 } |
| 1793 |
| 1794 // Step 2 |
| 1795 // Start of the adoption agency algorithm proper |
| 1796 var afeIndex = tree.openElements.indexOf(formattingElement); |
| 1797 Node furthestBlock = null; |
| 1798 for (Node element in slice(tree.openElements, afeIndex)) { |
| 1799 if (specialElements.contains(element.nameTuple)) { |
| 1800 furthestBlock = element; |
| 1801 break; |
| 1802 } |
| 1803 } |
| 1804 // Step 3 |
| 1805 if (furthestBlock == null) { |
| 1806 var element = tree.openElements.removeLast(); |
| 1807 while (element != formattingElement) { |
| 1808 element = tree.openElements.removeLast(); |
| 1809 } |
| 1810 tree.activeFormattingElements.remove(element); |
| 1811 return; |
| 1812 } |
| 1813 |
| 1814 var commonAncestor = tree.openElements[afeIndex - 1]; |
| 1815 |
| 1816 // Step 5 |
| 1817 // The bookmark is supposed to help us identify where to reinsert |
| 1818 // nodes in step 12. We have to ensure that we reinsert nodes after |
| 1819 // the node before the active formatting element. Note the bookmark |
| 1820 // can move in step 7.4 |
| 1821 var bookmark = tree.activeFormattingElements.indexOf(formattingElement); |
| 1822 |
| 1823 // Step 6 |
| 1824 Node lastNode = furthestBlock; |
| 1825 var node = furthestBlock; |
| 1826 int innerLoopCounter = 0; |
| 1827 |
| 1828 var index = tree.openElements.indexOf(node); |
| 1829 while (innerLoopCounter < 3) { |
| 1830 innerLoopCounter += 1; |
| 1831 |
| 1832 // Node is element before node in open elements |
| 1833 index -= 1; |
| 1834 node = tree.openElements[index]; |
| 1835 if (!tree.activeFormattingElements.contains(node)) { |
| 1836 tree.openElements.remove(node); |
| 1837 continue; |
| 1838 } |
| 1839 // Step 6.3 |
| 1840 if (node == formattingElement) { |
| 1841 break; |
| 1842 } |
| 1843 // Step 6.4 |
| 1844 if (lastNode == furthestBlock) { |
| 1845 bookmark = (tree.activeFormattingElements.indexOf(node) + 1); |
| 1846 } |
| 1847 // Step 6.5 |
| 1848 //cite = node.parent |
| 1849 var clone = node.clone(); |
| 1850 // Replace node with clone |
| 1851 tree.activeFormattingElements[ |
| 1852 tree.activeFormattingElements.indexOf(node)] = clone; |
| 1853 tree.openElements[tree.openElements.indexOf(node)] = clone; |
| 1854 node = clone; |
| 1855 |
| 1856 // Step 6.6 |
| 1857 // Remove lastNode from its parents, if any |
| 1858 if (lastNode.parent != null) { |
| 1859 lastNode.parent.nodes.remove(lastNode); |
| 1860 } |
| 1861 node.nodes.add(lastNode); |
| 1862 // Step 7.7 |
| 1863 lastNode = node; |
| 1864 // End of inner loop |
| 1865 } |
| 1866 |
| 1867 // Step 7 |
| 1868 // Foster parent lastNode if commonAncestor is a |
| 1869 // table, tbody, tfoot, thead, or tr we need to foster parent the |
| 1870 // lastNode |
| 1871 if (lastNode.parent != null) { |
| 1872 lastNode.parent.nodes.remove(lastNode); |
| 1873 } |
| 1874 |
| 1875 if (const ["table", "tbody", "tfoot", "thead", "tr"].contains( |
| 1876 commonAncestor.tagName)) { |
| 1877 var nodePos = tree.getTableMisnestedNodePosition(); |
| 1878 nodePos[0].insertBefore(lastNode, nodePos[1]); |
| 1879 } else { |
| 1880 commonAncestor.nodes.add(lastNode); |
| 1881 } |
| 1882 |
| 1883 // Step 8 |
| 1884 var clone = formattingElement.clone(); |
| 1885 |
| 1886 // Step 9 |
| 1887 furthestBlock.reparentChildren(clone); |
| 1888 |
| 1889 // Step 10 |
| 1890 furthestBlock.nodes.add(clone); |
| 1891 |
| 1892 // Step 11 |
| 1893 tree.activeFormattingElements.remove(formattingElement); |
| 1894 tree.activeFormattingElements.insert( |
| 1895 min(bookmark, tree.activeFormattingElements.length), clone); |
| 1896 |
| 1897 // Step 12 |
| 1898 tree.openElements.remove(formattingElement); |
| 1899 tree.openElements.insert( |
| 1900 tree.openElements.indexOf(furthestBlock) + 1, clone); |
| 1901 } |
| 1902 } |
| 1903 |
| 1904 void endTagAppletMarqueeObject(EndTagToken token) { |
| 1905 if (tree.elementInScope(token.name)) { |
| 1906 tree.generateImpliedEndTags(); |
| 1907 } |
| 1908 if (tree.openElements.last.tagName != token.name) { |
| 1909 parser.parseError(token.span, "end-tag-too-early", {"name": token.name}); |
| 1910 } |
| 1911 if (tree.elementInScope(token.name)) { |
| 1912 popOpenElementsUntil(token.name); |
| 1913 tree.clearActiveFormattingElements(); |
| 1914 } |
| 1915 } |
| 1916 |
| 1917 void endTagBr(EndTagToken token) { |
| 1918 parser.parseError(token.span, "unexpected-end-tag-treated-as", |
| 1919 {"originalName": "br", "newName": "br element"}); |
| 1920 tree.reconstructActiveFormattingElements(); |
| 1921 tree.insertElement(new StartTagToken("br", data: {})); |
| 1922 tree.openElements.removeLast(); |
| 1923 } |
| 1924 |
| 1925 void endTagOther(EndTagToken token) { |
| 1926 for (Node node in tree.openElements.reversed) { |
| 1927 if (node.tagName == token.name) { |
| 1928 tree.generateImpliedEndTags(token.name); |
| 1929 if (tree.openElements.last.tagName != token.name) { |
| 1930 parser.parseError(token.span, "unexpected-end-tag", |
| 1931 {"name": token.name}); |
| 1932 } |
| 1933 while (tree.openElements.removeLast() != node); |
| 1934 break; |
| 1935 } else { |
| 1936 if (specialElements.contains(node.nameTuple)) { |
| 1937 parser.parseError(token.span, "unexpected-end-tag", |
| 1938 {"name": token.name}); |
| 1939 break; |
| 1940 } |
| 1941 } |
| 1942 } |
| 1943 } |
| 1944 } |
| 1945 |
| 1946 |
| 1947 class TextPhase extends Phase { |
| 1948 TextPhase(parser) : super(parser); |
| 1949 |
| 1950 // "Tried to process start tag %s in RCDATA/RAWTEXT mode"%token.name |
| 1951 processStartTag(StartTagToken token) { assert(false); } |
| 1952 |
| 1953 processEndTag(EndTagToken token) { |
| 1954 if (token.name == 'script') return endTagScript(token); |
| 1955 return endTagOther(token); |
| 1956 } |
| 1957 |
| 1958 Token processCharacters(CharactersToken token) { |
| 1959 tree.insertText(token.data, token.span); |
| 1960 } |
| 1961 |
| 1962 bool processEOF() { |
| 1963 var last = tree.openElements.last; |
| 1964 parser.parseError(last.sourceSpan, "expected-named-closing-tag-but-got-eof", |
| 1965 {'name': last.tagName}); |
| 1966 tree.openElements.removeLast(); |
| 1967 parser.phase = parser.originalPhase; |
| 1968 return true; |
| 1969 } |
| 1970 |
| 1971 void endTagScript(EndTagToken token) { |
| 1972 var node = tree.openElements.removeLast(); |
| 1973 assert(node.tagName == "script"); |
| 1974 parser.phase = parser.originalPhase; |
| 1975 //The rest of this method is all stuff that only happens if |
| 1976 //document.write works |
| 1977 } |
| 1978 |
| 1979 void endTagOther(EndTagToken token) { |
| 1980 var node = tree.openElements.removeLast(); |
| 1981 parser.phase = parser.originalPhase; |
| 1982 } |
| 1983 } |
| 1984 |
| 1985 class InTablePhase extends Phase { |
| 1986 // http://www.whatwg.org/specs/web-apps/current-work///in-table |
| 1987 InTablePhase(parser) : super(parser); |
| 1988 |
| 1989 processStartTag(StartTagToken token) { |
| 1990 switch (token.name) { |
| 1991 case "html": return startTagHtml(token); |
| 1992 case "caption": return startTagCaption(token); |
| 1993 case "colgroup": return startTagColgroup(token); |
| 1994 case "col": return startTagCol(token); |
| 1995 case "tbody": case "tfoot": case "thead": return startTagRowGroup(token); |
| 1996 case "td": case "th": case "tr": return startTagImplyTbody(token); |
| 1997 case "table": return startTagTable(token); |
| 1998 case "style": case "script": return startTagStyleScript(token); |
| 1999 case "input": return startTagInput(token); |
| 2000 case "form": return startTagForm(token); |
| 2001 default: return startTagOther(token); |
| 2002 } |
| 2003 } |
| 2004 |
| 2005 processEndTag(EndTagToken token) { |
| 2006 switch (token.name) { |
| 2007 case "table": return endTagTable(token); |
| 2008 case "body": case "caption": case "col": case "colgroup": case "html": |
| 2009 case "tbody": case "td": case "tfoot": case "th": case "thead": case "tr": |
| 2010 return endTagIgnore(token); |
| 2011 default: return endTagOther(token); |
| 2012 } |
| 2013 } |
| 2014 |
| 2015 // helper methods |
| 2016 void clearStackToTableContext() { |
| 2017 // "clear the stack back to a table context" |
| 2018 while (tree.openElements.last.tagName != "table" && |
| 2019 tree.openElements.last.tagName != "html") { |
| 2020 //parser.parseError(token.span, "unexpected-implied-end-tag-in-table", |
| 2021 // {"name": tree.openElements.last.name}) |
| 2022 tree.openElements.removeLast(); |
| 2023 } |
| 2024 // When the current node is <html> it's an innerHTML case |
| 2025 } |
| 2026 |
| 2027 // processing methods |
| 2028 bool processEOF() { |
| 2029 var last = tree.openElements.last; |
| 2030 if (last.tagName != "html") { |
| 2031 parser.parseError(last.sourceSpan, "eof-in-table"); |
| 2032 } else { |
| 2033 assert(parser.innerHTMLMode); |
| 2034 } |
| 2035 //Stop parsing |
| 2036 return false; |
| 2037 } |
| 2038 |
| 2039 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 2040 var originalPhase = parser.phase; |
| 2041 parser.phase = parser._inTableTextPhase; |
| 2042 parser._inTableTextPhase.originalPhase = originalPhase; |
| 2043 parser.phase.processSpaceCharacters(token); |
| 2044 } |
| 2045 |
| 2046 Token processCharacters(CharactersToken token) { |
| 2047 var originalPhase = parser.phase; |
| 2048 parser.phase = parser._inTableTextPhase; |
| 2049 parser._inTableTextPhase.originalPhase = originalPhase; |
| 2050 parser.phase.processCharacters(token); |
| 2051 } |
| 2052 |
| 2053 void insertText(CharactersToken token) { |
| 2054 // If we get here there must be at least one non-whitespace character |
| 2055 // Do the table magic! |
| 2056 tree.insertFromTable = true; |
| 2057 parser._inBodyPhase.processCharacters(token); |
| 2058 tree.insertFromTable = false; |
| 2059 } |
| 2060 |
| 2061 void startTagCaption(StartTagToken token) { |
| 2062 clearStackToTableContext(); |
| 2063 tree.activeFormattingElements.add(Marker); |
| 2064 tree.insertElement(token); |
| 2065 parser.phase = parser._inCaptionPhase; |
| 2066 } |
| 2067 |
| 2068 void startTagColgroup(StartTagToken token) { |
| 2069 clearStackToTableContext(); |
| 2070 tree.insertElement(token); |
| 2071 parser.phase = parser._inColumnGroupPhase; |
| 2072 } |
| 2073 |
| 2074 Token startTagCol(StartTagToken token) { |
| 2075 startTagColgroup(new StartTagToken("colgroup", data: {})); |
| 2076 return token; |
| 2077 } |
| 2078 |
| 2079 void startTagRowGroup(StartTagToken token) { |
| 2080 clearStackToTableContext(); |
| 2081 tree.insertElement(token); |
| 2082 parser.phase = parser._inTableBodyPhase; |
| 2083 } |
| 2084 |
| 2085 Token startTagImplyTbody(StartTagToken token) { |
| 2086 startTagRowGroup(new StartTagToken("tbody", data: {})); |
| 2087 return token; |
| 2088 } |
| 2089 |
| 2090 Token startTagTable(StartTagToken token) { |
| 2091 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag", |
| 2092 {"startName": "table", "endName": "table"}); |
| 2093 parser.phase.processEndTag(new EndTagToken("table")); |
| 2094 if (!parser.innerHTMLMode) { |
| 2095 return token; |
| 2096 } |
| 2097 } |
| 2098 |
| 2099 Token startTagStyleScript(StartTagToken token) { |
| 2100 return parser._inHeadPhase.processStartTag(token); |
| 2101 } |
| 2102 |
| 2103 void startTagInput(StartTagToken token) { |
| 2104 if (asciiUpper2Lower(token.data["type"]) == "hidden") { |
| 2105 parser.parseError(token.span, "unexpected-hidden-input-in-table"); |
| 2106 tree.insertElement(token); |
| 2107 // XXX associate with form |
| 2108 tree.openElements.removeLast(); |
| 2109 } else { |
| 2110 startTagOther(token); |
| 2111 } |
| 2112 } |
| 2113 |
| 2114 void startTagForm(StartTagToken token) { |
| 2115 parser.parseError(token.span, "unexpected-form-in-table"); |
| 2116 if (tree.formPointer == null) { |
| 2117 tree.insertElement(token); |
| 2118 tree.formPointer = tree.openElements.last; |
| 2119 tree.openElements.removeLast(); |
| 2120 } |
| 2121 } |
| 2122 |
| 2123 void startTagOther(StartTagToken token) { |
| 2124 parser.parseError(token.span, "unexpected-start-tag-implies-table-voodoo", |
| 2125 {"name": token.name}); |
| 2126 // Do the table magic! |
| 2127 tree.insertFromTable = true; |
| 2128 parser._inBodyPhase.processStartTag(token); |
| 2129 tree.insertFromTable = false; |
| 2130 } |
| 2131 |
| 2132 void endTagTable(EndTagToken token) { |
| 2133 if (tree.elementInScope("table", variant: "table")) { |
| 2134 tree.generateImpliedEndTags(); |
| 2135 var last = tree.openElements.last; |
| 2136 if (last.tagName != "table") { |
| 2137 parser.parseError(token.span, "end-tag-too-early-named", |
| 2138 {"gotName": "table", "expectedName": last.tagName}); |
| 2139 } |
| 2140 while (tree.openElements.last.tagName != "table") { |
| 2141 tree.openElements.removeLast(); |
| 2142 } |
| 2143 tree.openElements.removeLast(); |
| 2144 parser.resetInsertionMode(); |
| 2145 } else { |
| 2146 // innerHTML case |
| 2147 assert(parser.innerHTMLMode); |
| 2148 parser.parseError(token.span, "undefined-error"); |
| 2149 } |
| 2150 } |
| 2151 |
| 2152 void endTagIgnore(EndTagToken token) { |
| 2153 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); |
| 2154 } |
| 2155 |
| 2156 void endTagOther(EndTagToken token) { |
| 2157 parser.parseError(token.span, "unexpected-end-tag-implies-table-voodoo", |
| 2158 {"name": token.name}); |
| 2159 // Do the table magic! |
| 2160 tree.insertFromTable = true; |
| 2161 parser._inBodyPhase.processEndTag(token); |
| 2162 tree.insertFromTable = false; |
| 2163 } |
| 2164 } |
| 2165 |
| 2166 class InTableTextPhase extends Phase { |
| 2167 Phase originalPhase; |
| 2168 List<StringToken> characterTokens; |
| 2169 |
| 2170 InTableTextPhase(parser) |
| 2171 : characterTokens = <StringToken>[], |
| 2172 super(parser); |
| 2173 |
| 2174 void flushCharacters() { |
| 2175 if (characterTokens.length == 0) return; |
| 2176 |
| 2177 // TODO(sigmund,jmesserly): remove '' (dartbug.com/8480) |
| 2178 var data = characterTokens.map((t) => t.data).join(''); |
| 2179 var span = null; |
| 2180 |
| 2181 if (parser.generateSpans) { |
| 2182 span = new FileSpan.union( |
| 2183 characterTokens[0].span, |
| 2184 characterTokens.last.span); |
| 2185 } |
| 2186 |
| 2187 if (!allWhitespace(data)) { |
| 2188 parser._inTablePhase.insertText(new CharactersToken(data)..span = span); |
| 2189 } else if (data.length > 0) { |
| 2190 tree.insertText(data, span); |
| 2191 } |
| 2192 characterTokens = <StringToken>[]; |
| 2193 } |
| 2194 |
| 2195 Token processComment(CommentToken token) { |
| 2196 flushCharacters(); |
| 2197 parser.phase = originalPhase; |
| 2198 return token; |
| 2199 } |
| 2200 |
| 2201 bool processEOF() { |
| 2202 flushCharacters(); |
| 2203 parser.phase = originalPhase; |
| 2204 return true; |
| 2205 } |
| 2206 |
| 2207 Token processCharacters(CharactersToken token) { |
| 2208 if (token.data == "\u0000") { |
| 2209 return null; |
| 2210 } |
| 2211 characterTokens.add(token); |
| 2212 } |
| 2213 |
| 2214 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 2215 //pretty sure we should never reach here |
| 2216 characterTokens.add(token); |
| 2217 // XXX assert(false); |
| 2218 } |
| 2219 |
| 2220 Token processStartTag(StartTagToken token) { |
| 2221 flushCharacters(); |
| 2222 parser.phase = originalPhase; |
| 2223 return token; |
| 2224 } |
| 2225 |
| 2226 Token processEndTag(EndTagToken token) { |
| 2227 flushCharacters(); |
| 2228 parser.phase = originalPhase; |
| 2229 return token; |
| 2230 } |
| 2231 } |
| 2232 |
| 2233 |
| 2234 class InCaptionPhase extends Phase { |
| 2235 // http://www.whatwg.org/specs/web-apps/current-work///in-caption |
| 2236 InCaptionPhase(parser) : super(parser); |
| 2237 |
| 2238 processStartTag(StartTagToken token) { |
| 2239 switch (token.name) { |
| 2240 case "html": return startTagHtml(token); |
| 2241 case "caption": case "col": case "colgroup": case "tbody": case "td": |
| 2242 case "tfoot": case "th": case "thead": case "tr": |
| 2243 return startTagTableElement(token); |
| 2244 default: return startTagOther(token); |
| 2245 } |
| 2246 } |
| 2247 |
| 2248 processEndTag(EndTagToken token) { |
| 2249 switch (token.name) { |
| 2250 case "caption": return endTagCaption(token); |
| 2251 case "table": return endTagTable(token); |
| 2252 case "body": case "col": case "colgroup": case "html": case "tbody": |
| 2253 case "td": case "tfoot": case "th": case "thead": case "tr": |
| 2254 return endTagIgnore(token); |
| 2255 default: return endTagOther(token); |
| 2256 } |
| 2257 } |
| 2258 |
| 2259 bool ignoreEndTagCaption() { |
| 2260 return !tree.elementInScope("caption", variant: "table"); |
| 2261 } |
| 2262 |
| 2263 bool processEOF() { |
| 2264 parser._inBodyPhase.processEOF(); |
| 2265 return false; |
| 2266 } |
| 2267 |
| 2268 Token processCharacters(CharactersToken token) { |
| 2269 return parser._inBodyPhase.processCharacters(token); |
| 2270 } |
| 2271 |
| 2272 Token startTagTableElement(StartTagToken token) { |
| 2273 parser.parseError(token.span, "undefined-error"); |
| 2274 //XXX Have to duplicate logic here to find out if the tag is ignored |
| 2275 var ignoreEndTag = ignoreEndTagCaption(); |
| 2276 parser.phase.processEndTag(new EndTagToken("caption")); |
| 2277 if (!ignoreEndTag) { |
| 2278 return token; |
| 2279 } |
| 2280 return null; |
| 2281 } |
| 2282 |
| 2283 Token startTagOther(StartTagToken token) { |
| 2284 return parser._inBodyPhase.processStartTag(token); |
| 2285 } |
| 2286 |
| 2287 void endTagCaption(EndTagToken token) { |
| 2288 if (!ignoreEndTagCaption()) { |
| 2289 // AT this code is quite similar to endTagTable in "InTable" |
| 2290 tree.generateImpliedEndTags(); |
| 2291 if (tree.openElements.last.tagName != "caption") { |
| 2292 parser.parseError(token.span, "expected-one-end-tag-but-got-another", |
| 2293 {"gotName": "caption", |
| 2294 "expectedName": tree.openElements.last.tagName}); |
| 2295 } |
| 2296 while (tree.openElements.last.tagName != "caption") { |
| 2297 tree.openElements.removeLast(); |
| 2298 } |
| 2299 tree.openElements.removeLast(); |
| 2300 tree.clearActiveFormattingElements(); |
| 2301 parser.phase = parser._inTablePhase; |
| 2302 } else { |
| 2303 // innerHTML case |
| 2304 assert(parser.innerHTMLMode); |
| 2305 parser.parseError(token.span, "undefined-error"); |
| 2306 } |
| 2307 } |
| 2308 |
| 2309 Token endTagTable(EndTagToken token) { |
| 2310 parser.parseError(token.span, "undefined-error"); |
| 2311 var ignoreEndTag = ignoreEndTagCaption(); |
| 2312 parser.phase.processEndTag(new EndTagToken("caption")); |
| 2313 if (!ignoreEndTag) { |
| 2314 return token; |
| 2315 } |
| 2316 return null; |
| 2317 } |
| 2318 |
| 2319 void endTagIgnore(EndTagToken token) { |
| 2320 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); |
| 2321 } |
| 2322 |
| 2323 Token endTagOther(EndTagToken token) { |
| 2324 return parser._inBodyPhase.processEndTag(token); |
| 2325 } |
| 2326 } |
| 2327 |
| 2328 |
| 2329 class InColumnGroupPhase extends Phase { |
| 2330 // http://www.whatwg.org/specs/web-apps/current-work///in-column |
| 2331 InColumnGroupPhase(parser) : super(parser); |
| 2332 |
| 2333 processStartTag(StartTagToken token) { |
| 2334 switch (token.name) { |
| 2335 case "html": return startTagHtml(token); |
| 2336 case "col": return startTagCol(token); |
| 2337 default: return startTagOther(token); |
| 2338 } |
| 2339 } |
| 2340 |
| 2341 processEndTag(EndTagToken token) { |
| 2342 switch (token.name) { |
| 2343 case "colgroup": return endTagColgroup(token); |
| 2344 case "col": return endTagCol(token); |
| 2345 default: return endTagOther(token); |
| 2346 } |
| 2347 } |
| 2348 |
| 2349 bool ignoreEndTagColgroup() { |
| 2350 return tree.openElements.last.tagName == "html"; |
| 2351 } |
| 2352 |
| 2353 bool processEOF() { |
| 2354 var ignoreEndTag = ignoreEndTagColgroup(); |
| 2355 if (ignoreEndTag) { |
| 2356 assert(parser.innerHTMLMode); |
| 2357 return false; |
| 2358 } else { |
| 2359 endTagColgroup(new EndTagToken("colgroup")); |
| 2360 return true; |
| 2361 } |
| 2362 } |
| 2363 |
| 2364 Token processCharacters(CharactersToken token) { |
| 2365 var ignoreEndTag = ignoreEndTagColgroup(); |
| 2366 endTagColgroup(new EndTagToken("colgroup")); |
| 2367 return ignoreEndTag ? null : token; |
| 2368 } |
| 2369 |
| 2370 void startTagCol(StartTagToken token) { |
| 2371 tree.insertElement(token); |
| 2372 tree.openElements.removeLast(); |
| 2373 } |
| 2374 |
| 2375 Token startTagOther(StartTagToken token) { |
| 2376 var ignoreEndTag = ignoreEndTagColgroup(); |
| 2377 endTagColgroup(new EndTagToken("colgroup")); |
| 2378 return ignoreEndTag ? null : token; |
| 2379 } |
| 2380 |
| 2381 void endTagColgroup(EndTagToken token) { |
| 2382 if (ignoreEndTagColgroup()) { |
| 2383 // innerHTML case |
| 2384 assert(parser.innerHTMLMode); |
| 2385 parser.parseError(token.span, "undefined-error"); |
| 2386 } else { |
| 2387 tree.openElements.removeLast(); |
| 2388 parser.phase = parser._inTablePhase; |
| 2389 } |
| 2390 } |
| 2391 |
| 2392 void endTagCol(EndTagToken token) { |
| 2393 parser.parseError(token.span, "no-end-tag", {"name": "col"}); |
| 2394 } |
| 2395 |
| 2396 Token endTagOther(EndTagToken token) { |
| 2397 var ignoreEndTag = ignoreEndTagColgroup(); |
| 2398 endTagColgroup(new EndTagToken("colgroup")); |
| 2399 return ignoreEndTag ? null : token; |
| 2400 } |
| 2401 } |
| 2402 |
| 2403 |
| 2404 class InTableBodyPhase extends Phase { |
| 2405 // http://www.whatwg.org/specs/web-apps/current-work///in-table0 |
| 2406 InTableBodyPhase(parser) : super(parser); |
| 2407 |
| 2408 processStartTag(StartTagToken token) { |
| 2409 switch (token.name) { |
| 2410 case "html": return startTagHtml(token); |
| 2411 case "tr": return startTagTr(token); |
| 2412 case "td": case "th": return startTagTableCell(token); |
| 2413 case "caption": case "col": case "colgroup": case "tbody": case "tfoot": |
| 2414 case "thead": |
| 2415 return startTagTableOther(token); |
| 2416 default: return startTagOther(token); |
| 2417 } |
| 2418 } |
| 2419 |
| 2420 processEndTag(EndTagToken token) { |
| 2421 switch (token.name) { |
| 2422 case "tbody": case "tfoot": case "thead": |
| 2423 return endTagTableRowGroup(token); |
| 2424 case "table": return endTagTable(token); |
| 2425 case "body": case "caption": case "col": case "colgroup": case "html": |
| 2426 case "td": case "th": case "tr": |
| 2427 return endTagIgnore(token); |
| 2428 default: return endTagOther(token); |
| 2429 } |
| 2430 } |
| 2431 |
| 2432 // helper methods |
| 2433 void clearStackToTableBodyContext() { |
| 2434 var tableTags = const ["tbody", "tfoot", "thead", "html"]; |
| 2435 while (!tableTags.contains(tree.openElements.last.tagName)) { |
| 2436 //XXX parser.parseError(token.span, "unexpected-implied-end-tag-in-table", |
| 2437 // {"name": tree.openElements.last.name}) |
| 2438 tree.openElements.removeLast(); |
| 2439 } |
| 2440 if (tree.openElements.last.tagName == "html") { |
| 2441 assert(parser.innerHTMLMode); |
| 2442 } |
| 2443 } |
| 2444 |
| 2445 // the rest |
| 2446 bool processEOF() { |
| 2447 parser._inTablePhase.processEOF(); |
| 2448 return false; |
| 2449 } |
| 2450 |
| 2451 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 2452 return parser._inTablePhase.processSpaceCharacters(token); |
| 2453 } |
| 2454 |
| 2455 Token processCharacters(CharactersToken token) { |
| 2456 return parser._inTablePhase.processCharacters(token); |
| 2457 } |
| 2458 |
| 2459 void startTagTr(StartTagToken token) { |
| 2460 clearStackToTableBodyContext(); |
| 2461 tree.insertElement(token); |
| 2462 parser.phase = parser._inRowPhase; |
| 2463 } |
| 2464 |
| 2465 Token startTagTableCell(StartTagToken token) { |
| 2466 parser.parseError(token.span, "unexpected-cell-in-table-body", |
| 2467 {"name": token.name}); |
| 2468 startTagTr(new StartTagToken("tr", data: {})); |
| 2469 return token; |
| 2470 } |
| 2471 |
| 2472 Token startTagTableOther(token) => endTagTable(token); |
| 2473 |
| 2474 Token startTagOther(StartTagToken token) { |
| 2475 return parser._inTablePhase.processStartTag(token); |
| 2476 } |
| 2477 |
| 2478 void endTagTableRowGroup(EndTagToken token) { |
| 2479 if (tree.elementInScope(token.name, variant: "table")) { |
| 2480 clearStackToTableBodyContext(); |
| 2481 tree.openElements.removeLast(); |
| 2482 parser.phase = parser._inTablePhase; |
| 2483 } else { |
| 2484 parser.parseError(token.span, "unexpected-end-tag-in-table-body", |
| 2485 {"name": token.name}); |
| 2486 } |
| 2487 } |
| 2488 |
| 2489 Token endTagTable(TagToken token) { |
| 2490 // XXX AT Any ideas on how to share this with endTagTable? |
| 2491 if (tree.elementInScope("tbody", variant: "table") || |
| 2492 tree.elementInScope("thead", variant: "table") || |
| 2493 tree.elementInScope("tfoot", variant: "table")) { |
| 2494 clearStackToTableBodyContext(); |
| 2495 endTagTableRowGroup(new EndTagToken(tree.openElements.last.tagName)); |
| 2496 return token; |
| 2497 } else { |
| 2498 // innerHTML case |
| 2499 assert(parser.innerHTMLMode); |
| 2500 parser.parseError(token.span, "undefined-error"); |
| 2501 } |
| 2502 return null; |
| 2503 } |
| 2504 |
| 2505 void endTagIgnore(EndTagToken token) { |
| 2506 parser.parseError(token.span, "unexpected-end-tag-in-table-body", |
| 2507 {"name": token.name}); |
| 2508 } |
| 2509 |
| 2510 Token endTagOther(EndTagToken token) { |
| 2511 return parser._inTablePhase.processEndTag(token); |
| 2512 } |
| 2513 } |
| 2514 |
| 2515 |
| 2516 class InRowPhase extends Phase { |
| 2517 // http://www.whatwg.org/specs/web-apps/current-work///in-row |
| 2518 InRowPhase(parser) : super(parser); |
| 2519 |
| 2520 processStartTag(StartTagToken token) { |
| 2521 switch (token.name) { |
| 2522 case "html": return startTagHtml(token); |
| 2523 case "td": case "th": return startTagTableCell(token); |
| 2524 case "caption": case "col": case "colgroup": case "tbody": case "tfoot": |
| 2525 case "thead": case "tr": |
| 2526 return startTagTableOther(token); |
| 2527 default: return startTagOther(token); |
| 2528 } |
| 2529 } |
| 2530 |
| 2531 processEndTag(EndTagToken token) { |
| 2532 switch (token.name) { |
| 2533 case "tr": return endTagTr(token); |
| 2534 case "table": return endTagTable(token); |
| 2535 case "tbody": case "tfoot": case "thead": |
| 2536 return endTagTableRowGroup(token); |
| 2537 case "body": case "caption": case "col": case "colgroup": case "html": |
| 2538 case "td": case "th": |
| 2539 return endTagIgnore(token); |
| 2540 default: return endTagOther(token); |
| 2541 } |
| 2542 } |
| 2543 |
| 2544 // helper methods (XXX unify this with other table helper methods) |
| 2545 void clearStackToTableRowContext() { |
| 2546 while (true) { |
| 2547 var last = tree.openElements.last; |
| 2548 if (last.tagName == "tr" || last.tagName == "html") break; |
| 2549 |
| 2550 parser.parseError(last.sourceSpan, |
| 2551 "unexpected-implied-end-tag-in-table-row", |
| 2552 {"name": tree.openElements.last.tagName}); |
| 2553 tree.openElements.removeLast(); |
| 2554 } |
| 2555 } |
| 2556 |
| 2557 bool ignoreEndTagTr() { |
| 2558 return !tree.elementInScope("tr", variant: "table"); |
| 2559 } |
| 2560 |
| 2561 // the rest |
| 2562 bool processEOF() { |
| 2563 parser._inTablePhase.processEOF(); |
| 2564 return false; |
| 2565 } |
| 2566 |
| 2567 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 2568 return parser._inTablePhase.processSpaceCharacters(token); |
| 2569 } |
| 2570 |
| 2571 Token processCharacters(CharactersToken token) { |
| 2572 return parser._inTablePhase.processCharacters(token); |
| 2573 } |
| 2574 |
| 2575 void startTagTableCell(StartTagToken token) { |
| 2576 clearStackToTableRowContext(); |
| 2577 tree.insertElement(token); |
| 2578 parser.phase = parser._inCellPhase; |
| 2579 tree.activeFormattingElements.add(Marker); |
| 2580 } |
| 2581 |
| 2582 Token startTagTableOther(StartTagToken token) { |
| 2583 bool ignoreEndTag = ignoreEndTagTr(); |
| 2584 endTagTr(new EndTagToken("tr")); |
| 2585 // XXX how are we sure it's always ignored in the innerHTML case? |
| 2586 return ignoreEndTag ? null : token; |
| 2587 } |
| 2588 |
| 2589 Token startTagOther(StartTagToken token) { |
| 2590 return parser._inTablePhase.processStartTag(token); |
| 2591 } |
| 2592 |
| 2593 void endTagTr(EndTagToken token) { |
| 2594 if (!ignoreEndTagTr()) { |
| 2595 clearStackToTableRowContext(); |
| 2596 tree.openElements.removeLast(); |
| 2597 parser.phase = parser._inTableBodyPhase; |
| 2598 } else { |
| 2599 // innerHTML case |
| 2600 assert(parser.innerHTMLMode); |
| 2601 parser.parseError(token.span, "undefined-error"); |
| 2602 } |
| 2603 } |
| 2604 |
| 2605 Token endTagTable(EndTagToken token) { |
| 2606 var ignoreEndTag = ignoreEndTagTr(); |
| 2607 endTagTr(new EndTagToken("tr")); |
| 2608 // Reprocess the current tag if the tr end tag was not ignored |
| 2609 // XXX how are we sure it's always ignored in the innerHTML case? |
| 2610 return ignoreEndTag ? null : token; |
| 2611 } |
| 2612 |
| 2613 Token endTagTableRowGroup(EndTagToken token) { |
| 2614 if (tree.elementInScope(token.name, variant: "table")) { |
| 2615 endTagTr(new EndTagToken("tr")); |
| 2616 return token; |
| 2617 } else { |
| 2618 parser.parseError(token.span, "undefined-error"); |
| 2619 return null; |
| 2620 } |
| 2621 } |
| 2622 |
| 2623 void endTagIgnore(EndTagToken token) { |
| 2624 parser.parseError(token.span, "unexpected-end-tag-in-table-row", |
| 2625 {"name": token.name}); |
| 2626 } |
| 2627 |
| 2628 Token endTagOther(EndTagToken token) { |
| 2629 return parser._inTablePhase.processEndTag(token); |
| 2630 } |
| 2631 } |
| 2632 |
| 2633 class InCellPhase extends Phase { |
| 2634 // http://www.whatwg.org/specs/web-apps/current-work///in-cell |
| 2635 InCellPhase(parser) : super(parser); |
| 2636 |
| 2637 processStartTag(StartTagToken token) { |
| 2638 switch (token.name) { |
| 2639 case "html": return startTagHtml(token); |
| 2640 case "caption": case "col": case "colgroup": case "tbody": case "td": |
| 2641 case "tfoot": case "th": case "thead": case "tr": |
| 2642 return startTagTableOther(token); |
| 2643 default: return startTagOther(token); |
| 2644 } |
| 2645 } |
| 2646 |
| 2647 processEndTag(EndTagToken token) { |
| 2648 switch (token.name) { |
| 2649 case "td": case "th": |
| 2650 return endTagTableCell(token); |
| 2651 case "body": case "caption": case "col": case "colgroup": case "html": |
| 2652 return endTagIgnore(token); |
| 2653 case "table": case "tbody": case "tfoot": case "thead": case "tr": |
| 2654 return endTagImply(token); |
| 2655 default: return endTagOther(token); |
| 2656 } |
| 2657 } |
| 2658 |
| 2659 // helper |
| 2660 void closeCell() { |
| 2661 if (tree.elementInScope("td", variant: "table")) { |
| 2662 endTagTableCell(new EndTagToken("td")); |
| 2663 } else if (tree.elementInScope("th", variant: "table")) { |
| 2664 endTagTableCell(new EndTagToken("th")); |
| 2665 } |
| 2666 } |
| 2667 |
| 2668 // the rest |
| 2669 bool processEOF() { |
| 2670 parser._inBodyPhase.processEOF(); |
| 2671 return false; |
| 2672 } |
| 2673 |
| 2674 Token processCharacters(CharactersToken token) { |
| 2675 return parser._inBodyPhase.processCharacters(token); |
| 2676 } |
| 2677 |
| 2678 Token startTagTableOther(StartTagToken token) { |
| 2679 if (tree.elementInScope("td", variant: "table") || |
| 2680 tree.elementInScope("th", variant: "table")) { |
| 2681 closeCell(); |
| 2682 return token; |
| 2683 } else { |
| 2684 // innerHTML case |
| 2685 assert(parser.innerHTMLMode); |
| 2686 parser.parseError(token.span, "undefined-error"); |
| 2687 } |
| 2688 } |
| 2689 |
| 2690 Token startTagOther(StartTagToken token) { |
| 2691 return parser._inBodyPhase.processStartTag(token); |
| 2692 } |
| 2693 |
| 2694 void endTagTableCell(EndTagToken token) { |
| 2695 if (tree.elementInScope(token.name, variant: "table")) { |
| 2696 tree.generateImpliedEndTags(token.name); |
| 2697 if (tree.openElements.last.tagName != token.name) { |
| 2698 parser.parseError(token.span, "unexpected-cell-end-tag", |
| 2699 {"name": token.name}); |
| 2700 popOpenElementsUntil(token.name); |
| 2701 } else { |
| 2702 tree.openElements.removeLast(); |
| 2703 } |
| 2704 tree.clearActiveFormattingElements(); |
| 2705 parser.phase = parser._inRowPhase; |
| 2706 } else { |
| 2707 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); |
| 2708 } |
| 2709 } |
| 2710 |
| 2711 void endTagIgnore(EndTagToken token) { |
| 2712 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); |
| 2713 } |
| 2714 |
| 2715 Token endTagImply(EndTagToken token) { |
| 2716 if (tree.elementInScope(token.name, variant: "table")) { |
| 2717 closeCell(); |
| 2718 return token; |
| 2719 } else { |
| 2720 // sometimes innerHTML case |
| 2721 parser.parseError(token.span, "undefined-error"); |
| 2722 } |
| 2723 } |
| 2724 |
| 2725 Token endTagOther(EndTagToken token) { |
| 2726 return parser._inBodyPhase.processEndTag(token); |
| 2727 } |
| 2728 } |
| 2729 |
| 2730 class InSelectPhase extends Phase { |
| 2731 InSelectPhase(parser) : super(parser); |
| 2732 |
| 2733 processStartTag(StartTagToken token) { |
| 2734 switch (token.name) { |
| 2735 case "html": return startTagHtml(token); |
| 2736 case "option": return startTagOption(token); |
| 2737 case "optgroup": return startTagOptgroup(token); |
| 2738 case "select": return startTagSelect(token); |
| 2739 case "input": case "keygen": case "textarea": |
| 2740 return startTagInput(token); |
| 2741 case "script": return startTagScript(token); |
| 2742 default: return startTagOther(token); |
| 2743 } |
| 2744 } |
| 2745 |
| 2746 processEndTag(EndTagToken token) { |
| 2747 switch (token.name) { |
| 2748 case "option": return endTagOption(token); |
| 2749 case "optgroup": return endTagOptgroup(token); |
| 2750 case "select": return endTagSelect(token); |
| 2751 default: return endTagOther(token); |
| 2752 } |
| 2753 } |
| 2754 |
| 2755 // http://www.whatwg.org/specs/web-apps/current-work///in-select |
| 2756 bool processEOF() { |
| 2757 var last = tree.openElements.last; |
| 2758 if (last.tagName != "html") { |
| 2759 parser.parseError(last.sourceSpan, "eof-in-select"); |
| 2760 } else { |
| 2761 assert(parser.innerHTMLMode); |
| 2762 } |
| 2763 return false; |
| 2764 } |
| 2765 |
| 2766 Token processCharacters(CharactersToken token) { |
| 2767 if (token.data == "\u0000") { |
| 2768 return null; |
| 2769 } |
| 2770 tree.insertText(token.data, token.span); |
| 2771 } |
| 2772 |
| 2773 void startTagOption(StartTagToken token) { |
| 2774 // We need to imply </option> if <option> is the current node. |
| 2775 if (tree.openElements.last.tagName == "option") { |
| 2776 tree.openElements.removeLast(); |
| 2777 } |
| 2778 tree.insertElement(token); |
| 2779 } |
| 2780 |
| 2781 void startTagOptgroup(StartTagToken token) { |
| 2782 if (tree.openElements.last.tagName == "option") { |
| 2783 tree.openElements.removeLast(); |
| 2784 } |
| 2785 if (tree.openElements.last.tagName == "optgroup") { |
| 2786 tree.openElements.removeLast(); |
| 2787 } |
| 2788 tree.insertElement(token); |
| 2789 } |
| 2790 |
| 2791 void startTagSelect(StartTagToken token) { |
| 2792 parser.parseError(token.span, "unexpected-select-in-select"); |
| 2793 endTagSelect(new EndTagToken("select")); |
| 2794 } |
| 2795 |
| 2796 Token startTagInput(StartTagToken token) { |
| 2797 parser.parseError(token.span, "unexpected-input-in-select"); |
| 2798 if (tree.elementInScope("select", variant: "select")) { |
| 2799 endTagSelect(new EndTagToken("select")); |
| 2800 return token; |
| 2801 } else { |
| 2802 assert(parser.innerHTMLMode); |
| 2803 } |
| 2804 } |
| 2805 |
| 2806 Token startTagScript(StartTagToken token) { |
| 2807 return parser._inHeadPhase.processStartTag(token); |
| 2808 } |
| 2809 |
| 2810 Token startTagOther(StartTagToken token) { |
| 2811 parser.parseError(token.span, "unexpected-start-tag-in-select", |
| 2812 {"name": token.name}); |
| 2813 } |
| 2814 |
| 2815 void endTagOption(EndTagToken token) { |
| 2816 if (tree.openElements.last.tagName == "option") { |
| 2817 tree.openElements.removeLast(); |
| 2818 } else { |
| 2819 parser.parseError(token.span, "unexpected-end-tag-in-select", |
| 2820 {"name": "option"}); |
| 2821 } |
| 2822 } |
| 2823 |
| 2824 void endTagOptgroup(EndTagToken token) { |
| 2825 // </optgroup> implicitly closes <option> |
| 2826 if (tree.openElements.last.tagName == "option" && |
| 2827 tree.openElements[tree.openElements.length - 2].tagName == "optgroup") { |
| 2828 tree.openElements.removeLast(); |
| 2829 } |
| 2830 // It also closes </optgroup> |
| 2831 if (tree.openElements.last.tagName == "optgroup") { |
| 2832 tree.openElements.removeLast(); |
| 2833 // But nothing else |
| 2834 } else { |
| 2835 parser.parseError(token.span, "unexpected-end-tag-in-select", |
| 2836 {"name": "optgroup"}); |
| 2837 } |
| 2838 } |
| 2839 |
| 2840 void endTagSelect(EndTagToken token) { |
| 2841 if (tree.elementInScope("select", variant: "select")) { |
| 2842 popOpenElementsUntil("select"); |
| 2843 parser.resetInsertionMode(); |
| 2844 } else { |
| 2845 // innerHTML case |
| 2846 assert(parser.innerHTMLMode); |
| 2847 parser.parseError(token.span, "undefined-error"); |
| 2848 } |
| 2849 } |
| 2850 |
| 2851 void endTagOther(EndTagToken token) { |
| 2852 parser.parseError(token.span, "unexpected-end-tag-in-select", |
| 2853 {"name": token.name}); |
| 2854 } |
| 2855 } |
| 2856 |
| 2857 |
| 2858 class InSelectInTablePhase extends Phase { |
| 2859 InSelectInTablePhase(parser) : super(parser); |
| 2860 |
| 2861 processStartTag(StartTagToken token) { |
| 2862 switch (token.name) { |
| 2863 case "caption": case "table": case "tbody": case "tfoot": case "thead": |
| 2864 case "tr": case "td": case "th": |
| 2865 return startTagTable(token); |
| 2866 default: return startTagOther(token); |
| 2867 } |
| 2868 } |
| 2869 |
| 2870 processEndTag(EndTagToken token) { |
| 2871 switch (token.name) { |
| 2872 case "caption": case "table": case "tbody": case "tfoot": case "thead": |
| 2873 case "tr": case "td": case "th": |
| 2874 return endTagTable(token); |
| 2875 default: return endTagOther(token); |
| 2876 } |
| 2877 } |
| 2878 |
| 2879 bool processEOF() { |
| 2880 parser._inSelectPhase.processEOF(); |
| 2881 return false; |
| 2882 } |
| 2883 |
| 2884 Token processCharacters(CharactersToken token) { |
| 2885 return parser._inSelectPhase.processCharacters(token); |
| 2886 } |
| 2887 |
| 2888 Token startTagTable(StartTagToken token) { |
| 2889 parser.parseError(token.span, |
| 2890 "unexpected-table-element-start-tag-in-select-in-table", |
| 2891 {"name": token.name}); |
| 2892 endTagOther(new EndTagToken("select")); |
| 2893 return token; |
| 2894 } |
| 2895 |
| 2896 Token startTagOther(StartTagToken token) { |
| 2897 return parser._inSelectPhase.processStartTag(token); |
| 2898 } |
| 2899 |
| 2900 Token endTagTable(EndTagToken token) { |
| 2901 parser.parseError(token.span, |
| 2902 "unexpected-table-element-end-tag-in-select-in-table", |
| 2903 {"name": token.name}); |
| 2904 if (tree.elementInScope(token.name, variant: "table")) { |
| 2905 endTagOther(new EndTagToken("select")); |
| 2906 return token; |
| 2907 } |
| 2908 } |
| 2909 |
| 2910 Token endTagOther(EndTagToken token) { |
| 2911 return parser._inSelectPhase.processEndTag(token); |
| 2912 } |
| 2913 } |
| 2914 |
| 2915 |
| 2916 class InForeignContentPhase extends Phase { |
| 2917 // TODO(jmesserly): this is sorted so we could binary search. |
| 2918 static const breakoutElements = const [ |
| 2919 'b', 'big', 'blockquote', 'body', 'br','center', 'code', 'dd', 'div', 'dl', |
| 2920 'dt', 'em', 'embed', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'i', |
| 2921 'img', 'li', 'listing', 'menu', 'meta', 'nobr', 'ol', 'p', 'pre', 'ruby', |
| 2922 's', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tt', 'u', |
| 2923 'ul', 'var' |
| 2924 ]; |
| 2925 |
| 2926 InForeignContentPhase(parser) : super(parser); |
| 2927 |
| 2928 void adjustSVGTagNames(token) { |
| 2929 final replacements = const { |
| 2930 "altglyph":"altGlyph", |
| 2931 "altglyphdef":"altGlyphDef", |
| 2932 "altglyphitem":"altGlyphItem", |
| 2933 "animatecolor":"animateColor", |
| 2934 "animatemotion":"animateMotion", |
| 2935 "animatetransform":"animateTransform", |
| 2936 "clippath":"clipPath", |
| 2937 "feblend":"feBlend", |
| 2938 "fecolormatrix":"feColorMatrix", |
| 2939 "fecomponenttransfer":"feComponentTransfer", |
| 2940 "fecomposite":"feComposite", |
| 2941 "feconvolvematrix":"feConvolveMatrix", |
| 2942 "fediffuselighting":"feDiffuseLighting", |
| 2943 "fedisplacementmap":"feDisplacementMap", |
| 2944 "fedistantlight":"feDistantLight", |
| 2945 "feflood":"feFlood", |
| 2946 "fefunca":"feFuncA", |
| 2947 "fefuncb":"feFuncB", |
| 2948 "fefuncg":"feFuncG", |
| 2949 "fefuncr":"feFuncR", |
| 2950 "fegaussianblur":"feGaussianBlur", |
| 2951 "feimage":"feImage", |
| 2952 "femerge":"feMerge", |
| 2953 "femergenode":"feMergeNode", |
| 2954 "femorphology":"feMorphology", |
| 2955 "feoffset":"feOffset", |
| 2956 "fepointlight":"fePointLight", |
| 2957 "fespecularlighting":"feSpecularLighting", |
| 2958 "fespotlight":"feSpotLight", |
| 2959 "fetile":"feTile", |
| 2960 "feturbulence":"feTurbulence", |
| 2961 "foreignobject":"foreignObject", |
| 2962 "glyphref":"glyphRef", |
| 2963 "lineargradient":"linearGradient", |
| 2964 "radialgradient":"radialGradient", |
| 2965 "textpath":"textPath" |
| 2966 }; |
| 2967 |
| 2968 var replace = replacements[token.name]; |
| 2969 if (replace != null) { |
| 2970 token.name = replace; |
| 2971 } |
| 2972 } |
| 2973 |
| 2974 Token processCharacters(CharactersToken token) { |
| 2975 if (token.data == "\u0000") { |
| 2976 token.data = "\uFFFD"; |
| 2977 } else if (parser.framesetOK && !allWhitespace(token.data)) { |
| 2978 parser.framesetOK = false; |
| 2979 } |
| 2980 super.processCharacters(token); |
| 2981 } |
| 2982 |
| 2983 Token processStartTag(StartTagToken token) { |
| 2984 var currentNode = tree.openElements.last; |
| 2985 if (breakoutElements.contains(token.name) || |
| 2986 (token.name == "font" && |
| 2987 (token.data.containsKey("color") || |
| 2988 token.data.containsKey("face") || |
| 2989 token.data.containsKey("size")))) { |
| 2990 |
| 2991 parser.parseError(token.span, |
| 2992 "unexpected-html-element-in-foreign-content", {'name': token.name}); |
| 2993 while (tree.openElements.last.namespace != |
| 2994 tree.defaultNamespace && |
| 2995 !parser.isHTMLIntegrationPoint(tree.openElements.last) && |
| 2996 !parser.isMathMLTextIntegrationPoint(tree.openElements.last)) { |
| 2997 tree.openElements.removeLast(); |
| 2998 } |
| 2999 return token; |
| 3000 |
| 3001 } else { |
| 3002 if (currentNode.namespace == Namespaces.mathml) { |
| 3003 parser.adjustMathMLAttributes(token); |
| 3004 } else if (currentNode.namespace == Namespaces.svg) { |
| 3005 adjustSVGTagNames(token); |
| 3006 parser.adjustSVGAttributes(token); |
| 3007 } |
| 3008 parser.adjustForeignAttributes(token); |
| 3009 token.namespace = currentNode.namespace; |
| 3010 tree.insertElement(token); |
| 3011 if (token.selfClosing) { |
| 3012 tree.openElements.removeLast(); |
| 3013 token.selfClosingAcknowledged = true; |
| 3014 } |
| 3015 } |
| 3016 } |
| 3017 |
| 3018 Token processEndTag(EndTagToken token) { |
| 3019 var nodeIndex = tree.openElements.length - 1; |
| 3020 var node = tree.openElements.last; |
| 3021 if (node.tagName != token.name) { |
| 3022 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); |
| 3023 } |
| 3024 |
| 3025 var newToken = null; |
| 3026 while (true) { |
| 3027 if (asciiUpper2Lower(node.tagName) == token.name) { |
| 3028 //XXX this isn't in the spec but it seems necessary |
| 3029 if (parser.phase == parser._inTableTextPhase) { |
| 3030 InTableTextPhase inTableText = parser.phase; |
| 3031 inTableText.flushCharacters(); |
| 3032 parser.phase = inTableText.originalPhase; |
| 3033 } |
| 3034 while (tree.openElements.removeLast() != node) { |
| 3035 assert(tree.openElements.length > 0); |
| 3036 } |
| 3037 newToken = null; |
| 3038 break; |
| 3039 } |
| 3040 nodeIndex -= 1; |
| 3041 |
| 3042 node = tree.openElements[nodeIndex]; |
| 3043 if (node.namespace != tree.defaultNamespace) { |
| 3044 continue; |
| 3045 } else { |
| 3046 newToken = parser.phase.processEndTag(token); |
| 3047 break; |
| 3048 } |
| 3049 } |
| 3050 return newToken; |
| 3051 } |
| 3052 } |
| 3053 |
| 3054 |
| 3055 class AfterBodyPhase extends Phase { |
| 3056 AfterBodyPhase(parser) : super(parser); |
| 3057 |
| 3058 processStartTag(StartTagToken token) { |
| 3059 if (token.name == "html") return startTagHtml(token); |
| 3060 return startTagOther(token); |
| 3061 } |
| 3062 |
| 3063 processEndTag(EndTagToken token) { |
| 3064 if (token.name == "html") return endTagHtml(token); |
| 3065 return endTagOther(token); |
| 3066 } |
| 3067 |
| 3068 //Stop parsing |
| 3069 bool processEOF() => false; |
| 3070 |
| 3071 Token processComment(CommentToken token) { |
| 3072 // This is needed because data is to be appended to the <html> element |
| 3073 // here and not to whatever is currently open. |
| 3074 tree.insertComment(token, tree.openElements[0]); |
| 3075 } |
| 3076 |
| 3077 Token processCharacters(CharactersToken token) { |
| 3078 parser.parseError(token.span, "unexpected-char-after-body"); |
| 3079 parser.phase = parser._inBodyPhase; |
| 3080 return token; |
| 3081 } |
| 3082 |
| 3083 Token startTagHtml(StartTagToken token) { |
| 3084 return parser._inBodyPhase.processStartTag(token); |
| 3085 } |
| 3086 |
| 3087 Token startTagOther(StartTagToken token) { |
| 3088 parser.parseError(token.span, "unexpected-start-tag-after-body", |
| 3089 {"name": token.name}); |
| 3090 parser.phase = parser._inBodyPhase; |
| 3091 return token; |
| 3092 } |
| 3093 |
| 3094 void endTagHtml(Token token) { |
| 3095 if (parser.innerHTMLMode) { |
| 3096 parser.parseError(token.span, "unexpected-end-tag-after-body-innerhtml"); |
| 3097 } else { |
| 3098 parser.phase = parser._afterAfterBodyPhase; |
| 3099 } |
| 3100 } |
| 3101 |
| 3102 Token endTagOther(EndTagToken token) { |
| 3103 parser.parseError(token.span, "unexpected-end-tag-after-body", |
| 3104 {"name": token.name}); |
| 3105 parser.phase = parser._inBodyPhase; |
| 3106 return token; |
| 3107 } |
| 3108 } |
| 3109 |
| 3110 class InFramesetPhase extends Phase { |
| 3111 // http://www.whatwg.org/specs/web-apps/current-work///in-frameset |
| 3112 InFramesetPhase(parser) : super(parser); |
| 3113 |
| 3114 processStartTag(StartTagToken token) { |
| 3115 switch (token.name) { |
| 3116 case "html": return startTagHtml(token); |
| 3117 case "frameset": return startTagFrameset(token); |
| 3118 case "frame": return startTagFrame(token); |
| 3119 case "noframes": return startTagNoframes(token); |
| 3120 default: return startTagOther(token); |
| 3121 } |
| 3122 } |
| 3123 |
| 3124 processEndTag(EndTagToken token) { |
| 3125 switch (token.name) { |
| 3126 case "frameset": return endTagFrameset(token); |
| 3127 default: return endTagOther(token); |
| 3128 } |
| 3129 } |
| 3130 |
| 3131 bool processEOF() { |
| 3132 var last = tree.openElements.last; |
| 3133 if (last.tagName != "html") { |
| 3134 parser.parseError(last.sourceSpan, "eof-in-frameset"); |
| 3135 } else { |
| 3136 assert(parser.innerHTMLMode); |
| 3137 } |
| 3138 return false; |
| 3139 } |
| 3140 |
| 3141 Token processCharacters(CharactersToken token) { |
| 3142 parser.parseError(token.span, "unexpected-char-in-frameset"); |
| 3143 } |
| 3144 |
| 3145 void startTagFrameset(StartTagToken token) { |
| 3146 tree.insertElement(token); |
| 3147 } |
| 3148 |
| 3149 void startTagFrame(StartTagToken token) { |
| 3150 tree.insertElement(token); |
| 3151 tree.openElements.removeLast(); |
| 3152 } |
| 3153 |
| 3154 Token startTagNoframes(StartTagToken token) { |
| 3155 return parser._inBodyPhase.processStartTag(token); |
| 3156 } |
| 3157 |
| 3158 Token startTagOther(StartTagToken token) { |
| 3159 parser.parseError(token.span, "unexpected-start-tag-in-frameset", |
| 3160 {"name": token.name}); |
| 3161 } |
| 3162 |
| 3163 void endTagFrameset(EndTagToken token) { |
| 3164 if (tree.openElements.last.tagName == "html") { |
| 3165 // innerHTML case |
| 3166 parser.parseError(token.span, |
| 3167 "unexpected-frameset-in-frameset-innerhtml"); |
| 3168 } else { |
| 3169 tree.openElements.removeLast(); |
| 3170 } |
| 3171 if (!parser.innerHTMLMode && tree.openElements.last.tagName != "frameset") { |
| 3172 // If we're not in innerHTML mode and the the current node is not a |
| 3173 // "frameset" element (anymore) then switch. |
| 3174 parser.phase = parser._afterFramesetPhase; |
| 3175 } |
| 3176 } |
| 3177 |
| 3178 void endTagOther(EndTagToken token) { |
| 3179 parser.parseError(token.span, "unexpected-end-tag-in-frameset", |
| 3180 {"name": token.name}); |
| 3181 } |
| 3182 } |
| 3183 |
| 3184 |
| 3185 class AfterFramesetPhase extends Phase { |
| 3186 // http://www.whatwg.org/specs/web-apps/current-work///after3 |
| 3187 AfterFramesetPhase(parser) : super(parser); |
| 3188 |
| 3189 processStartTag(StartTagToken token) { |
| 3190 switch (token.name) { |
| 3191 case "html": return startTagHtml(token); |
| 3192 case "noframes": return startTagNoframes(token); |
| 3193 default: return startTagOther(token); |
| 3194 } |
| 3195 } |
| 3196 |
| 3197 processEndTag(EndTagToken token) { |
| 3198 switch (token.name) { |
| 3199 case "html": return endTagHtml(token); |
| 3200 default: return endTagOther(token); |
| 3201 } |
| 3202 } |
| 3203 |
| 3204 // Stop parsing |
| 3205 bool processEOF() => false; |
| 3206 |
| 3207 Token processCharacters(CharactersToken token) { |
| 3208 parser.parseError(token.span, "unexpected-char-after-frameset"); |
| 3209 } |
| 3210 |
| 3211 Token startTagNoframes(StartTagToken token) { |
| 3212 return parser._inHeadPhase.processStartTag(token); |
| 3213 } |
| 3214 |
| 3215 void startTagOther(StartTagToken token) { |
| 3216 parser.parseError(token.span, "unexpected-start-tag-after-frameset", |
| 3217 {"name": token.name}); |
| 3218 } |
| 3219 |
| 3220 void endTagHtml(EndTagToken token) { |
| 3221 parser.phase = parser._afterAfterFramesetPhase; |
| 3222 } |
| 3223 |
| 3224 void endTagOther(EndTagToken token) { |
| 3225 parser.parseError(token.span, "unexpected-end-tag-after-frameset", |
| 3226 {"name": token.name}); |
| 3227 } |
| 3228 } |
| 3229 |
| 3230 |
| 3231 class AfterAfterBodyPhase extends Phase { |
| 3232 AfterAfterBodyPhase(parser) : super(parser); |
| 3233 |
| 3234 processStartTag(StartTagToken token) { |
| 3235 if (token.name == 'html') return startTagHtml(token); |
| 3236 return startTagOther(token); |
| 3237 } |
| 3238 |
| 3239 bool processEOF() => false; |
| 3240 |
| 3241 Token processComment(CommentToken token) { |
| 3242 tree.insertComment(token, tree.document); |
| 3243 } |
| 3244 |
| 3245 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 3246 return parser._inBodyPhase.processSpaceCharacters(token); |
| 3247 } |
| 3248 |
| 3249 Token processCharacters(CharactersToken token) { |
| 3250 parser.parseError(token.span, "expected-eof-but-got-char"); |
| 3251 parser.phase = parser._inBodyPhase; |
| 3252 return token; |
| 3253 } |
| 3254 |
| 3255 Token startTagHtml(StartTagToken token) { |
| 3256 return parser._inBodyPhase.processStartTag(token); |
| 3257 } |
| 3258 |
| 3259 Token startTagOther(StartTagToken token) { |
| 3260 parser.parseError(token.span, "expected-eof-but-got-start-tag", |
| 3261 {"name": token.name}); |
| 3262 parser.phase = parser._inBodyPhase; |
| 3263 return token; |
| 3264 } |
| 3265 |
| 3266 Token processEndTag(EndTagToken token) { |
| 3267 parser.parseError(token.span, "expected-eof-but-got-end-tag", |
| 3268 {"name": token.name}); |
| 3269 parser.phase = parser._inBodyPhase; |
| 3270 return token; |
| 3271 } |
| 3272 } |
| 3273 |
| 3274 class AfterAfterFramesetPhase extends Phase { |
| 3275 AfterAfterFramesetPhase(parser) : super(parser); |
| 3276 |
| 3277 processStartTag(StartTagToken token) { |
| 3278 switch (token.name) { |
| 3279 case "html": return startTagHtml(token); |
| 3280 case "noframes": return startTagNoFrames(token); |
| 3281 default: return startTagOther(token); |
| 3282 } |
| 3283 } |
| 3284 |
| 3285 bool processEOF() => false; |
| 3286 |
| 3287 Token processComment(CommentToken token) { |
| 3288 tree.insertComment(token, tree.document); |
| 3289 } |
| 3290 |
| 3291 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 3292 return parser._inBodyPhase.processSpaceCharacters(token); |
| 3293 } |
| 3294 |
| 3295 Token processCharacters(CharactersToken token) { |
| 3296 parser.parseError(token.span, "expected-eof-but-got-char"); |
| 3297 } |
| 3298 |
| 3299 Token startTagHtml(StartTagToken token) { |
| 3300 return parser._inBodyPhase.processStartTag(token); |
| 3301 } |
| 3302 |
| 3303 Token startTagNoFrames(StartTagToken token) { |
| 3304 return parser._inHeadPhase.processStartTag(token); |
| 3305 } |
| 3306 |
| 3307 void startTagOther(StartTagToken token) { |
| 3308 parser.parseError(token.span, "expected-eof-but-got-start-tag", |
| 3309 {"name": token.name}); |
| 3310 } |
| 3311 |
| 3312 Token processEndTag(EndTagToken token) { |
| 3313 parser.parseError(token.span, "expected-eof-but-got-end-tag", |
| 3314 {"name": token.name}); |
| 3315 } |
| 3316 } |
| 3317 |
| 3318 |
| 3319 /** Error in parsed document. */ |
| 3320 class ParseError implements Exception { |
| 3321 final String errorCode; |
| 3322 final Span span; |
| 3323 final Map data; |
| 3324 |
| 3325 ParseError(this.errorCode, this.span, this.data); |
| 3326 |
| 3327 int get line => span.start.line; |
| 3328 |
| 3329 int get column => span.start.column; |
| 3330 |
| 3331 /** |
| 3332 * Gets the human readable error message for this error. Use |
| 3333 * [span.getLocationMessage] or [toString] to get a message including span |
| 3334 * information. If there is a file associated with the span, both |
| 3335 * [span.getLocationMessage] and [toString] are equivalent. Otherwise, |
| 3336 * [span.getLocationMessage] will not show any source url information, but |
| 3337 * [toString] will include 'ParserError:' as a prefix. |
| 3338 */ |
| 3339 String get message => formatStr(errorMessages[errorCode], data); |
| 3340 |
| 3341 String toString() { |
| 3342 var res = span.getLocationMessage(message); |
| 3343 return span.sourceUrl == null ? 'ParserError$res' : res; |
| 3344 } |
| 3345 } |
OLD | NEW |