OLD | NEW |
(Empty) | |
| 1 /// This library has a parser for HTML5 documents, that lets you parse HTML |
| 2 /// easily from a script or server side application: |
| 3 /// |
| 4 /// import 'package:html5lib/parser.dart' show parse; |
| 5 /// import 'package:html5lib/dom.dart'; |
| 6 /// main() { |
| 7 /// var document = parse( |
| 8 /// '<body>Hello world! <a href="www.html5rocks.com">HTML5 rocks!'); |
| 9 /// print(document.outerHtml); |
| 10 /// } |
| 11 /// |
| 12 /// The resulting document you get back has a DOM-like API for easy tree |
| 13 /// traversal and manipulation. |
| 14 library parser; |
| 15 |
| 16 import 'dart:collection'; |
| 17 import 'dart:math'; |
| 18 import 'package:source_span/source_span.dart'; |
| 19 |
| 20 import 'src/treebuilder.dart'; |
| 21 import 'src/constants.dart'; |
| 22 import 'src/encoding_parser.dart'; |
| 23 import 'src/token.dart'; |
| 24 import 'src/tokenizer.dart'; |
| 25 import 'src/utils.dart'; |
| 26 import 'dom.dart'; |
| 27 |
| 28 /// Parse the [input] html5 document into a tree. The [input] can be |
| 29 /// a [String], [List<int>] of bytes or an [HtmlTokenizer]. |
| 30 /// |
| 31 /// If [input] is not a [HtmlTokenizer], you can optionally specify the file's |
| 32 /// [encoding], which must be a string. If specified that encoding will be |
| 33 /// used regardless of any BOM or later declaration (such as in a meta element). |
| 34 /// |
| 35 /// Set [generateSpans] if you want to generate [SourceSpan]s, otherwise the |
| 36 /// [Node.sourceSpan] property will be `null`. When using [generateSpans] you |
| 37 /// can additionally pass [sourceUrl] to indicate where the [input] was |
| 38 /// extracted from. |
| 39 Document parse(input, {String encoding, bool generateSpans: false, |
| 40 String sourceUrl}) { |
| 41 var p = new HtmlParser(input, encoding: encoding, |
| 42 generateSpans: generateSpans, sourceUrl: sourceUrl); |
| 43 return p.parse(); |
| 44 } |
| 45 |
| 46 |
| 47 /// Parse the [input] html5 document fragment into a tree. The [input] can be |
| 48 /// a [String], [List<int>] of bytes or an [HtmlTokenizer]. The [container] |
| 49 /// element can optionally be specified, otherwise it defaults to "div". |
| 50 /// |
| 51 /// If [input] is not a [HtmlTokenizer], you can optionally specify the file's |
| 52 /// [encoding], which must be a string. If specified, that encoding will be used
, |
| 53 /// regardless of any BOM or later declaration (such as in a meta element). |
| 54 /// |
| 55 /// Set [generateSpans] if you want to generate [SourceSpan]s, otherwise the |
| 56 /// [Node.sourceSpan] property will be `null`. When using [generateSpans] you ca
n |
| 57 /// additionally pass [sourceUrl] to indicate where the [input] was extracted |
| 58 /// from. |
| 59 DocumentFragment parseFragment(input, {String container: "div", |
| 60 String encoding, bool generateSpans: false, String sourceUrl}) { |
| 61 var p = new HtmlParser(input, encoding: encoding, |
| 62 generateSpans: generateSpans, sourceUrl: sourceUrl); |
| 63 return p.parseFragment(container); |
| 64 } |
| 65 |
| 66 |
| 67 /// Parser for HTML, which generates a tree structure from a stream of |
| 68 /// (possibly malformed) characters. |
| 69 class HtmlParser { |
| 70 /// Raise an exception on the first error encountered. |
| 71 final bool strict; |
| 72 |
| 73 /// True to generate [SourceSpan]s for the [Node.sourceSpan] property. |
| 74 final bool generateSpans; |
| 75 |
| 76 final HtmlTokenizer tokenizer; |
| 77 |
| 78 final TreeBuilder tree; |
| 79 |
| 80 final List<ParseError> errors = <ParseError>[]; |
| 81 |
| 82 String container; |
| 83 |
| 84 bool firstStartTag = false; |
| 85 |
| 86 // TODO(jmesserly): use enum? |
| 87 /// "quirks" / "limited quirks" / "no quirks" |
| 88 String compatMode = "no quirks"; |
| 89 |
| 90 /// innerHTML container when parsing document fragment. |
| 91 String innerHTML; |
| 92 |
| 93 Phase phase; |
| 94 |
| 95 Phase lastPhase; |
| 96 |
| 97 Phase originalPhase; |
| 98 |
| 99 Phase beforeRCDataPhase; |
| 100 |
| 101 bool framesetOK; |
| 102 |
| 103 // These fields hold the different phase singletons. At any given time one |
| 104 // of them will be active. |
| 105 InitialPhase _initialPhase; |
| 106 BeforeHtmlPhase _beforeHtmlPhase; |
| 107 BeforeHeadPhase _beforeHeadPhase; |
| 108 InHeadPhase _inHeadPhase; |
| 109 AfterHeadPhase _afterHeadPhase; |
| 110 InBodyPhase _inBodyPhase; |
| 111 TextPhase _textPhase; |
| 112 InTablePhase _inTablePhase; |
| 113 InTableTextPhase _inTableTextPhase; |
| 114 InCaptionPhase _inCaptionPhase; |
| 115 InColumnGroupPhase _inColumnGroupPhase; |
| 116 InTableBodyPhase _inTableBodyPhase; |
| 117 InRowPhase _inRowPhase; |
| 118 InCellPhase _inCellPhase; |
| 119 InSelectPhase _inSelectPhase; |
| 120 InSelectInTablePhase _inSelectInTablePhase; |
| 121 InForeignContentPhase _inForeignContentPhase; |
| 122 AfterBodyPhase _afterBodyPhase; |
| 123 InFramesetPhase _inFramesetPhase; |
| 124 AfterFramesetPhase _afterFramesetPhase; |
| 125 AfterAfterBodyPhase _afterAfterBodyPhase; |
| 126 AfterAfterFramesetPhase _afterAfterFramesetPhase; |
| 127 |
| 128 /// Create an HtmlParser and configure the [tree] builder and [strict] mode. |
| 129 /// The [input] can be a [String], [List<int>] of bytes or an [HtmlTokenizer]. |
| 130 /// |
| 131 /// If [input] is not a [HtmlTokenizer], you can specify a few more arguments. |
| 132 /// |
| 133 /// The [encoding] must be a string that indicates the encoding. If specified, |
| 134 /// that encoding will be used, regardless of any BOM or later declaration |
| 135 /// (such as in a meta element). |
| 136 /// |
| 137 /// Set [parseMeta] to false if you want to disable parsing the meta element. |
| 138 /// |
| 139 /// Set [lowercaseElementName] or [lowercaseAttrName] to false to disable the |
| 140 /// automatic conversion of element and attribute names to lower case. Note |
| 141 /// that standard way to parse HTML is to lowercase, which is what the browser |
| 142 /// DOM will do if you request [Node.outerHTML], for example. |
| 143 HtmlParser(input, {String encoding, bool parseMeta: true, |
| 144 bool lowercaseElementName: true, bool lowercaseAttrName: true, |
| 145 this.strict: false, bool generateSpans: false, String sourceUrl, |
| 146 TreeBuilder tree}) |
| 147 : generateSpans = generateSpans, |
| 148 tree = tree != null ? tree : new TreeBuilder(true), |
| 149 tokenizer = (input is HtmlTokenizer ? input : |
| 150 new HtmlTokenizer(input, encoding: encoding, parseMeta: parseMeta, |
| 151 lowercaseElementName: lowercaseElementName, |
| 152 lowercaseAttrName: lowercaseAttrName, |
| 153 generateSpans: generateSpans, sourceUrl: sourceUrl)) { |
| 154 |
| 155 tokenizer.parser = this; |
| 156 _initialPhase = new InitialPhase(this); |
| 157 _beforeHtmlPhase = new BeforeHtmlPhase(this); |
| 158 _beforeHeadPhase = new BeforeHeadPhase(this); |
| 159 _inHeadPhase = new InHeadPhase(this); |
| 160 // TODO(jmesserly): html5lib did not implement the no script parsing mode |
| 161 // More information here: |
| 162 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#
scripting-flag |
| 163 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construc
tion.html#parsing-main-inheadnoscript |
| 164 // "inHeadNoscript": new InHeadNoScriptPhase(this); |
| 165 _afterHeadPhase = new AfterHeadPhase(this); |
| 166 _inBodyPhase = new InBodyPhase(this); |
| 167 _textPhase = new TextPhase(this); |
| 168 _inTablePhase = new InTablePhase(this); |
| 169 _inTableTextPhase = new InTableTextPhase(this); |
| 170 _inCaptionPhase = new InCaptionPhase(this); |
| 171 _inColumnGroupPhase = new InColumnGroupPhase(this); |
| 172 _inTableBodyPhase = new InTableBodyPhase(this); |
| 173 _inRowPhase = new InRowPhase(this); |
| 174 _inCellPhase = new InCellPhase(this); |
| 175 _inSelectPhase = new InSelectPhase(this); |
| 176 _inSelectInTablePhase = new InSelectInTablePhase(this); |
| 177 _inForeignContentPhase = new InForeignContentPhase(this); |
| 178 _afterBodyPhase = new AfterBodyPhase(this); |
| 179 _inFramesetPhase = new InFramesetPhase(this); |
| 180 _afterFramesetPhase = new AfterFramesetPhase(this); |
| 181 _afterAfterBodyPhase = new AfterAfterBodyPhase(this); |
| 182 _afterAfterFramesetPhase = new AfterAfterFramesetPhase(this); |
| 183 } |
| 184 |
| 185 bool get innerHTMLMode => innerHTML != null; |
| 186 |
| 187 /// Parse an html5 document into a tree. |
| 188 /// After parsing, [errors] will be populated with parse errors, if any. |
| 189 Document parse() { |
| 190 innerHTML = null; |
| 191 _parse(); |
| 192 return tree.getDocument(); |
| 193 } |
| 194 |
| 195 /// Parse an html5 document fragment into a tree. |
| 196 /// Pass a [container] to change the type of the containing element. |
| 197 /// After parsing, [errors] will be populated with parse errors, if any. |
| 198 DocumentFragment parseFragment([String container = "div"]) { |
| 199 if (container == null) throw new ArgumentError('container'); |
| 200 innerHTML = container.toLowerCase(); |
| 201 _parse(); |
| 202 return tree.getFragment(); |
| 203 } |
| 204 |
| 205 void _parse() { |
| 206 reset(); |
| 207 |
| 208 while (true) { |
| 209 try { |
| 210 mainLoop(); |
| 211 break; |
| 212 } on ReparseException catch (e) { |
| 213 // Note: this happens if we start parsing but the character encoding |
| 214 // changes. So we should only need to restart very early in the parse. |
| 215 reset(); |
| 216 } |
| 217 } |
| 218 } |
| 219 |
| 220 void reset() { |
| 221 tokenizer.reset(); |
| 222 |
| 223 tree.reset(); |
| 224 firstStartTag = false; |
| 225 errors.clear(); |
| 226 // "quirks" / "limited quirks" / "no quirks" |
| 227 compatMode = "no quirks"; |
| 228 |
| 229 if (innerHTMLMode) { |
| 230 if (cdataElements.contains(innerHTML)) { |
| 231 tokenizer.state = tokenizer.rcdataState; |
| 232 } else if (rcdataElements.contains(innerHTML)) { |
| 233 tokenizer.state = tokenizer.rawtextState; |
| 234 } else if (innerHTML == 'plaintext') { |
| 235 tokenizer.state = tokenizer.plaintextState; |
| 236 } else { |
| 237 // state already is data state |
| 238 // tokenizer.state = tokenizer.dataState; |
| 239 } |
| 240 phase = _beforeHtmlPhase; |
| 241 _beforeHtmlPhase.insertHtmlElement(); |
| 242 resetInsertionMode(); |
| 243 } else { |
| 244 phase = _initialPhase; |
| 245 } |
| 246 |
| 247 lastPhase = null; |
| 248 beforeRCDataPhase = null; |
| 249 framesetOK = true; |
| 250 } |
| 251 |
| 252 bool isHTMLIntegrationPoint(Element element) { |
| 253 if (element.localName == "annotation-xml" && |
| 254 element.namespaceUri == Namespaces.mathml) { |
| 255 var enc = element.attributes["encoding"]; |
| 256 if (enc != null) enc = asciiUpper2Lower(enc); |
| 257 return enc == "text/html" || enc == "application/xhtml+xml"; |
| 258 } else { |
| 259 return htmlIntegrationPointElements.contains( |
| 260 new Pair(element.namespaceUri, element.localName)); |
| 261 } |
| 262 } |
| 263 |
| 264 bool isMathMLTextIntegrationPoint(Element element) { |
| 265 return mathmlTextIntegrationPointElements.contains( |
| 266 new Pair(element.namespaceUri, element.localName)); |
| 267 } |
| 268 |
| 269 bool inForeignContent(Token token, int type) { |
| 270 if (tree.openElements.length == 0) return false; |
| 271 |
| 272 var node = tree.openElements.last; |
| 273 if (node.namespaceUri == tree.defaultNamespace) return false; |
| 274 |
| 275 if (isMathMLTextIntegrationPoint(node)) { |
| 276 if (type == TokenKind.startTag && |
| 277 (token as StartTagToken).name != "mglyph" && |
| 278 (token as StartTagToken).name != "malignmark") { |
| 279 return false; |
| 280 } |
| 281 if (type == TokenKind.characters || type == TokenKind.spaceCharacters) { |
| 282 return false; |
| 283 } |
| 284 } |
| 285 |
| 286 if (node.localName == "annotation-xml" && type == TokenKind.startTag && |
| 287 (token as StartTagToken).name == "svg") { |
| 288 return false; |
| 289 } |
| 290 |
| 291 if (isHTMLIntegrationPoint(node)) { |
| 292 if (type == TokenKind.startTag || |
| 293 type == TokenKind.characters || |
| 294 type == TokenKind.spaceCharacters) { |
| 295 return false; |
| 296 } |
| 297 } |
| 298 |
| 299 return true; |
| 300 } |
| 301 |
| 302 void mainLoop() { |
| 303 while (tokenizer.moveNext()) { |
| 304 var token = tokenizer.current; |
| 305 var newToken = token; |
| 306 int type; |
| 307 while (newToken != null) { |
| 308 type = newToken.kind; |
| 309 |
| 310 // Note: avoid "is" test here, see http://dartbug.com/4795 |
| 311 if (type == TokenKind.parseError) { |
| 312 ParseErrorToken error = newToken; |
| 313 parseError(error.span, error.data, error.messageParams); |
| 314 newToken = null; |
| 315 } else { |
| 316 Phase phase_ = phase; |
| 317 if (inForeignContent(token, type)) { |
| 318 phase_ = _inForeignContentPhase; |
| 319 } |
| 320 |
| 321 switch (type) { |
| 322 case TokenKind.characters: |
| 323 newToken = phase_.processCharacters(newToken); |
| 324 break; |
| 325 case TokenKind.spaceCharacters: |
| 326 newToken = phase_.processSpaceCharacters(newToken); |
| 327 break; |
| 328 case TokenKind.startTag: |
| 329 newToken = phase_.processStartTag(newToken); |
| 330 break; |
| 331 case TokenKind.endTag: |
| 332 newToken = phase_.processEndTag(newToken); |
| 333 break; |
| 334 case TokenKind.comment: |
| 335 newToken = phase_.processComment(newToken); |
| 336 break; |
| 337 case TokenKind.doctype: |
| 338 newToken = phase_.processDoctype(newToken); |
| 339 break; |
| 340 } |
| 341 } |
| 342 } |
| 343 |
| 344 if (token is StartTagToken) { |
| 345 if (token.selfClosing && !token.selfClosingAcknowledged) { |
| 346 parseError(token.span, "non-void-element-with-trailing-solidus", |
| 347 {"name": token.name}); |
| 348 } |
| 349 } |
| 350 } |
| 351 |
| 352 // When the loop finishes it's EOF |
| 353 var reprocess = true; |
| 354 var reprocessPhases = []; |
| 355 while (reprocess) { |
| 356 reprocessPhases.add(phase); |
| 357 reprocess = phase.processEOF(); |
| 358 if (reprocess) { |
| 359 assert(!reprocessPhases.contains(phase)); |
| 360 } |
| 361 } |
| 362 } |
| 363 |
| 364 /// The last span available. Used for EOF errors if we don't have something |
| 365 /// better. |
| 366 SourceSpan get _lastSpan { |
| 367 if (tokenizer.stream.fileInfo == null) return null; |
| 368 var pos = tokenizer.stream.position; |
| 369 return tokenizer.stream.fileInfo.location(pos).pointSpan(); |
| 370 } |
| 371 |
| 372 void parseError(SourceSpan span, String errorcode, |
| 373 [Map datavars = const {}]) { |
| 374 |
| 375 if (!generateSpans && span == null) { |
| 376 span = _lastSpan; |
| 377 } |
| 378 |
| 379 var err = new ParseError(errorcode, span, datavars); |
| 380 errors.add(err); |
| 381 if (strict) throw err; |
| 382 } |
| 383 |
| 384 void adjustMathMLAttributes(StartTagToken token) { |
| 385 var orig = token.data.remove("definitionurl"); |
| 386 if (orig != null) { |
| 387 token.data["definitionURL"] = orig; |
| 388 } |
| 389 } |
| 390 |
| 391 void adjustSVGAttributes(StartTagToken token) { |
| 392 final replacements = const { |
| 393 "attributename":"attributeName", |
| 394 "attributetype":"attributeType", |
| 395 "basefrequency":"baseFrequency", |
| 396 "baseprofile":"baseProfile", |
| 397 "calcmode":"calcMode", |
| 398 "clippathunits":"clipPathUnits", |
| 399 "contentscripttype":"contentScriptType", |
| 400 "contentstyletype":"contentStyleType", |
| 401 "diffuseconstant":"diffuseConstant", |
| 402 "edgemode":"edgeMode", |
| 403 "externalresourcesrequired":"externalResourcesRequired", |
| 404 "filterres":"filterRes", |
| 405 "filterunits":"filterUnits", |
| 406 "glyphref":"glyphRef", |
| 407 "gradienttransform":"gradientTransform", |
| 408 "gradientunits":"gradientUnits", |
| 409 "kernelmatrix":"kernelMatrix", |
| 410 "kernelunitlength":"kernelUnitLength", |
| 411 "keypoints":"keyPoints", |
| 412 "keysplines":"keySplines", |
| 413 "keytimes":"keyTimes", |
| 414 "lengthadjust":"lengthAdjust", |
| 415 "limitingconeangle":"limitingConeAngle", |
| 416 "markerheight":"markerHeight", |
| 417 "markerunits":"markerUnits", |
| 418 "markerwidth":"markerWidth", |
| 419 "maskcontentunits":"maskContentUnits", |
| 420 "maskunits":"maskUnits", |
| 421 "numoctaves":"numOctaves", |
| 422 "pathlength":"pathLength", |
| 423 "patterncontentunits":"patternContentUnits", |
| 424 "patterntransform":"patternTransform", |
| 425 "patternunits":"patternUnits", |
| 426 "pointsatx":"pointsAtX", |
| 427 "pointsaty":"pointsAtY", |
| 428 "pointsatz":"pointsAtZ", |
| 429 "preservealpha":"preserveAlpha", |
| 430 "preserveaspectratio":"preserveAspectRatio", |
| 431 "primitiveunits":"primitiveUnits", |
| 432 "refx":"refX", |
| 433 "refy":"refY", |
| 434 "repeatcount":"repeatCount", |
| 435 "repeatdur":"repeatDur", |
| 436 "requiredextensions":"requiredExtensions", |
| 437 "requiredfeatures":"requiredFeatures", |
| 438 "specularconstant":"specularConstant", |
| 439 "specularexponent":"specularExponent", |
| 440 "spreadmethod":"spreadMethod", |
| 441 "startoffset":"startOffset", |
| 442 "stddeviation":"stdDeviation", |
| 443 "stitchtiles":"stitchTiles", |
| 444 "surfacescale":"surfaceScale", |
| 445 "systemlanguage":"systemLanguage", |
| 446 "tablevalues":"tableValues", |
| 447 "targetx":"targetX", |
| 448 "targety":"targetY", |
| 449 "textlength":"textLength", |
| 450 "viewbox":"viewBox", |
| 451 "viewtarget":"viewTarget", |
| 452 "xchannelselector":"xChannelSelector", |
| 453 "ychannelselector":"yChannelSelector", |
| 454 "zoomandpan":"zoomAndPan" |
| 455 }; |
| 456 for (var originalName in token.data.keys.toList()) { |
| 457 var svgName = replacements[originalName]; |
| 458 if (svgName != null) { |
| 459 token.data[svgName] = token.data.remove(originalName); |
| 460 } |
| 461 } |
| 462 } |
| 463 |
| 464 void adjustForeignAttributes(StartTagToken token) { |
| 465 // TODO(jmesserly): I don't like mixing non-string objects with strings in |
| 466 // the Node.attributes Map. Is there another solution? |
| 467 final replacements = const { |
| 468 "xlink:actuate": const AttributeName("xlink", "actuate", |
| 469 Namespaces.xlink), |
| 470 "xlink:arcrole": const AttributeName("xlink", "arcrole", |
| 471 Namespaces.xlink), |
| 472 "xlink:href": const AttributeName("xlink", "href", Namespaces.xlink), |
| 473 "xlink:role": const AttributeName("xlink", "role", Namespaces.xlink), |
| 474 "xlink:show": const AttributeName("xlink", "show", Namespaces.xlink), |
| 475 "xlink:title": const AttributeName("xlink", "title", Namespaces.xlink), |
| 476 "xlink:type": const AttributeName("xlink", "type", Namespaces.xlink), |
| 477 "xml:base": const AttributeName("xml", "base", Namespaces.xml), |
| 478 "xml:lang": const AttributeName("xml", "lang", Namespaces.xml), |
| 479 "xml:space": const AttributeName("xml", "space", Namespaces.xml), |
| 480 "xmlns": const AttributeName(null, "xmlns", Namespaces.xmlns), |
| 481 "xmlns:xlink": const AttributeName("xmlns", "xlink", Namespaces.xmlns) |
| 482 }; |
| 483 |
| 484 for (var originalName in token.data.keys.toList()) { |
| 485 var foreignName = replacements[originalName]; |
| 486 if (foreignName != null) { |
| 487 token.data[foreignName] = token.data.remove(originalName); |
| 488 } |
| 489 } |
| 490 } |
| 491 |
| 492 void resetInsertionMode() { |
| 493 // The name of this method is mostly historical. (It's also used in the |
| 494 // specification.) |
| 495 for (var node in tree.openElements.reversed) { |
| 496 var nodeName = node.localName; |
| 497 bool last = node == tree.openElements[0]; |
| 498 if (last) { |
| 499 assert(innerHTMLMode); |
| 500 nodeName = innerHTML; |
| 501 } |
| 502 // Check for conditions that should only happen in the innerHTML |
| 503 // case |
| 504 switch (nodeName) { |
| 505 case "select": case "colgroup": case "head": case "html": |
| 506 assert(innerHTMLMode); |
| 507 break; |
| 508 } |
| 509 if (!last && node.namespaceUri != tree.defaultNamespace) { |
| 510 continue; |
| 511 } |
| 512 switch (nodeName) { |
| 513 case "select": phase = _inSelectPhase; return; |
| 514 case "td": phase = _inCellPhase; return; |
| 515 case "th": phase = _inCellPhase; return; |
| 516 case "tr": phase = _inRowPhase; return; |
| 517 case "tbody": phase = _inTableBodyPhase; return; |
| 518 case "thead": phase = _inTableBodyPhase; return; |
| 519 case "tfoot": phase = _inTableBodyPhase; return; |
| 520 case "caption": phase = _inCaptionPhase; return; |
| 521 case "colgroup": phase = _inColumnGroupPhase; return; |
| 522 case "table": phase = _inTablePhase; return; |
| 523 case "head": phase = _inBodyPhase; return; |
| 524 case "body": phase = _inBodyPhase; return; |
| 525 case "frameset": phase = _inFramesetPhase; return; |
| 526 case "html": phase = _beforeHeadPhase; return; |
| 527 } |
| 528 } |
| 529 phase = _inBodyPhase; |
| 530 } |
| 531 |
| 532 /// Generic RCDATA/RAWTEXT Parsing algorithm |
| 533 /// [contentType] - RCDATA or RAWTEXT |
| 534 void parseRCDataRawtext(Token token, String contentType) { |
| 535 assert(contentType == "RAWTEXT" || contentType == "RCDATA"); |
| 536 |
| 537 var element = tree.insertElement(token); |
| 538 |
| 539 if (contentType == "RAWTEXT") { |
| 540 tokenizer.state = tokenizer.rawtextState; |
| 541 } else { |
| 542 tokenizer.state = tokenizer.rcdataState; |
| 543 } |
| 544 |
| 545 originalPhase = phase; |
| 546 phase = _textPhase; |
| 547 } |
| 548 } |
| 549 |
| 550 |
| 551 /// Base class for helper object that implements each phase of processing. |
| 552 class Phase { |
| 553 // Order should be (they can be omitted): |
| 554 // * EOF |
| 555 // * Comment |
| 556 // * Doctype |
| 557 // * SpaceCharacters |
| 558 // * Characters |
| 559 // * StartTag |
| 560 // - startTag* methods |
| 561 // * EndTag |
| 562 // - endTag* methods |
| 563 |
| 564 final HtmlParser parser; |
| 565 |
| 566 final TreeBuilder tree; |
| 567 |
| 568 Phase(HtmlParser parser) : parser = parser, tree = parser.tree; |
| 569 |
| 570 bool processEOF() { |
| 571 throw new UnimplementedError(); |
| 572 } |
| 573 |
| 574 Token processComment(CommentToken token) { |
| 575 // For most phases the following is correct. Where it's not it will be |
| 576 // overridden. |
| 577 tree.insertComment(token, tree.openElements.last); |
| 578 return null; |
| 579 } |
| 580 |
| 581 Token processDoctype(DoctypeToken token) { |
| 582 parser.parseError(token.span, "unexpected-doctype"); |
| 583 return null; |
| 584 } |
| 585 |
| 586 Token processCharacters(CharactersToken token) { |
| 587 tree.insertText(token.data, token.span); |
| 588 return null; |
| 589 } |
| 590 |
| 591 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 592 tree.insertText(token.data, token.span); |
| 593 return null; |
| 594 } |
| 595 |
| 596 Token processStartTag(StartTagToken token) { |
| 597 throw new UnimplementedError(); |
| 598 } |
| 599 |
| 600 Token startTagHtml(StartTagToken token) { |
| 601 if (parser.firstStartTag == false && token.name == "html") { |
| 602 parser.parseError(token.span, "non-html-root"); |
| 603 } |
| 604 // XXX Need a check here to see if the first start tag token emitted is |
| 605 // this token... If it's not, invoke parser.parseError(). |
| 606 token.data.forEach((attr, value) { |
| 607 tree.openElements[0].attributes.putIfAbsent(attr, () => value); |
| 608 }); |
| 609 parser.firstStartTag = false; |
| 610 return null; |
| 611 } |
| 612 |
| 613 Token processEndTag(EndTagToken token) { |
| 614 throw new UnimplementedError(); |
| 615 } |
| 616 |
| 617 /// Helper method for popping openElements. |
| 618 void popOpenElementsUntil(String name) { |
| 619 var node = tree.openElements.removeLast(); |
| 620 while (node.localName != name) { |
| 621 node = tree.openElements.removeLast(); |
| 622 } |
| 623 } |
| 624 } |
| 625 |
| 626 class InitialPhase extends Phase { |
| 627 InitialPhase(parser) : super(parser); |
| 628 |
| 629 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 630 return null; |
| 631 } |
| 632 |
| 633 Token processComment(CommentToken token) { |
| 634 tree.insertComment(token, tree.document); |
| 635 return null; |
| 636 } |
| 637 |
| 638 Token processDoctype(DoctypeToken token) { |
| 639 var name = token.name; |
| 640 String publicId = token.publicId; |
| 641 var systemId = token.systemId; |
| 642 var correct = token.correct; |
| 643 |
| 644 if ((name != "html" || publicId != null || |
| 645 systemId != null && systemId != "about:legacy-compat")) { |
| 646 parser.parseError(token.span, "unknown-doctype"); |
| 647 } |
| 648 |
| 649 if (publicId == null) { |
| 650 publicId = ""; |
| 651 } |
| 652 |
| 653 tree.insertDoctype(token); |
| 654 |
| 655 if (publicId != "") { |
| 656 publicId = asciiUpper2Lower(publicId); |
| 657 } |
| 658 |
| 659 if (!correct || token.name != "html" |
| 660 || startsWithAny(publicId, const [ |
| 661 "+//silmaril//dtd html pro v0r11 19970101//", |
| 662 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", |
| 663 "-//as//dtd html 3.0 aswedit + extensions//", |
| 664 "-//ietf//dtd html 2.0 level 1//", |
| 665 "-//ietf//dtd html 2.0 level 2//", |
| 666 "-//ietf//dtd html 2.0 strict level 1//", |
| 667 "-//ietf//dtd html 2.0 strict level 2//", |
| 668 "-//ietf//dtd html 2.0 strict//", |
| 669 "-//ietf//dtd html 2.0//", |
| 670 "-//ietf//dtd html 2.1e//", |
| 671 "-//ietf//dtd html 3.0//", |
| 672 "-//ietf//dtd html 3.2 final//", |
| 673 "-//ietf//dtd html 3.2//", |
| 674 "-//ietf//dtd html 3//", |
| 675 "-//ietf//dtd html level 0//", |
| 676 "-//ietf//dtd html level 1//", |
| 677 "-//ietf//dtd html level 2//", |
| 678 "-//ietf//dtd html level 3//", |
| 679 "-//ietf//dtd html strict level 0//", |
| 680 "-//ietf//dtd html strict level 1//", |
| 681 "-//ietf//dtd html strict level 2//", |
| 682 "-//ietf//dtd html strict level 3//", |
| 683 "-//ietf//dtd html strict//", |
| 684 "-//ietf//dtd html//", |
| 685 "-//metrius//dtd metrius presentational//", |
| 686 "-//microsoft//dtd internet explorer 2.0 html strict//", |
| 687 "-//microsoft//dtd internet explorer 2.0 html//", |
| 688 "-//microsoft//dtd internet explorer 2.0 tables//", |
| 689 "-//microsoft//dtd internet explorer 3.0 html strict//", |
| 690 "-//microsoft//dtd internet explorer 3.0 html//", |
| 691 "-//microsoft//dtd internet explorer 3.0 tables//", |
| 692 "-//netscape comm. corp.//dtd html//", |
| 693 "-//netscape comm. corp.//dtd strict html//", |
| 694 "-//o'reilly and associates//dtd html 2.0//", |
| 695 "-//o'reilly and associates//dtd html extended 1.0//", |
| 696 "-//o'reilly and associates//dtd html extended relaxed 1.0//", |
| 697 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to h
tml 4.0//", |
| 698 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//
", |
| 699 "-//spyglass//dtd html 2.0 extended//", |
| 700 "-//sq//dtd html 2.0 hotmetal + extensions//", |
| 701 "-//sun microsystems corp.//dtd hotjava html//", |
| 702 "-//sun microsystems corp.//dtd hotjava strict html//", |
| 703 "-//w3c//dtd html 3 1995-03-24//", |
| 704 "-//w3c//dtd html 3.2 draft//", |
| 705 "-//w3c//dtd html 3.2 final//", |
| 706 "-//w3c//dtd html 3.2//", |
| 707 "-//w3c//dtd html 3.2s draft//", |
| 708 "-//w3c//dtd html 4.0 frameset//", |
| 709 "-//w3c//dtd html 4.0 transitional//", |
| 710 "-//w3c//dtd html experimental 19960712//", |
| 711 "-//w3c//dtd html experimental 970421//", |
| 712 "-//w3c//dtd w3 html//", |
| 713 "-//w3o//dtd w3 html 3.0//", |
| 714 "-//webtechs//dtd mozilla html 2.0//", |
| 715 "-//webtechs//dtd mozilla html//"]) |
| 716 || const ["-//w3o//dtd w3 html strict 3.0//en//", |
| 717 "-/w3c/dtd html 4.0 transitional/en", |
| 718 "html"].contains(publicId) |
| 719 || startsWithAny(publicId, const [ |
| 720 "-//w3c//dtd html 4.01 frameset//", |
| 721 "-//w3c//dtd html 4.01 transitional//"]) && systemId == null |
| 722 || systemId != null && systemId.toLowerCase() == |
| 723 "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") { |
| 724 |
| 725 parser.compatMode = "quirks"; |
| 726 } else if (startsWithAny(publicId, const [ |
| 727 "-//w3c//dtd xhtml 1.0 frameset//", |
| 728 "-//w3c//dtd xhtml 1.0 transitional//"]) |
| 729 || startsWithAny(publicId, const [ |
| 730 "-//w3c//dtd html 4.01 frameset//", |
| 731 "-//w3c//dtd html 4.01 transitional//"]) && |
| 732 systemId != null) { |
| 733 parser.compatMode = "limited quirks"; |
| 734 } |
| 735 parser.phase = parser._beforeHtmlPhase; |
| 736 return null; |
| 737 } |
| 738 |
| 739 void anythingElse() { |
| 740 parser.compatMode = "quirks"; |
| 741 parser.phase = parser._beforeHtmlPhase; |
| 742 } |
| 743 |
| 744 Token processCharacters(CharactersToken token) { |
| 745 parser.parseError(token.span, "expected-doctype-but-got-chars"); |
| 746 anythingElse(); |
| 747 return token; |
| 748 } |
| 749 |
| 750 Token processStartTag(StartTagToken token) { |
| 751 parser.parseError(token.span, "expected-doctype-but-got-start-tag", |
| 752 {"name": token.name}); |
| 753 anythingElse(); |
| 754 return token; |
| 755 } |
| 756 |
| 757 Token processEndTag(EndTagToken token) { |
| 758 parser.parseError(token.span, "expected-doctype-but-got-end-tag", |
| 759 {"name": token.name}); |
| 760 anythingElse(); |
| 761 return token; |
| 762 } |
| 763 |
| 764 bool processEOF() { |
| 765 parser.parseError(parser._lastSpan, "expected-doctype-but-got-eof"); |
| 766 anythingElse(); |
| 767 return true; |
| 768 } |
| 769 } |
| 770 |
| 771 |
| 772 class BeforeHtmlPhase extends Phase { |
| 773 BeforeHtmlPhase(parser) : super(parser); |
| 774 |
| 775 // helper methods |
| 776 void insertHtmlElement() { |
| 777 tree.insertRoot(new StartTagToken("html", data: {})); |
| 778 parser.phase = parser._beforeHeadPhase; |
| 779 } |
| 780 |
| 781 // other |
| 782 bool processEOF() { |
| 783 insertHtmlElement(); |
| 784 return true; |
| 785 } |
| 786 |
| 787 Token processComment(CommentToken token) { |
| 788 tree.insertComment(token, tree.document); |
| 789 return null; |
| 790 } |
| 791 |
| 792 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 793 return null; |
| 794 } |
| 795 |
| 796 Token processCharacters(CharactersToken token) { |
| 797 insertHtmlElement(); |
| 798 return token; |
| 799 } |
| 800 |
| 801 Token processStartTag(StartTagToken token) { |
| 802 if (token.name == "html") { |
| 803 parser.firstStartTag = true; |
| 804 } |
| 805 insertHtmlElement(); |
| 806 return token; |
| 807 } |
| 808 |
| 809 Token processEndTag(EndTagToken token) { |
| 810 switch (token.name) { |
| 811 case "head": case "body": case "html": case "br": |
| 812 insertHtmlElement(); |
| 813 return token; |
| 814 default: |
| 815 parser.parseError(token.span, "unexpected-end-tag-before-html", |
| 816 {"name": token.name}); |
| 817 return null; |
| 818 } |
| 819 } |
| 820 } |
| 821 |
| 822 |
| 823 class BeforeHeadPhase extends Phase { |
| 824 BeforeHeadPhase(parser) : super(parser); |
| 825 |
| 826 processStartTag(StartTagToken token) { |
| 827 switch (token.name) { |
| 828 case 'html': return startTagHtml(token); |
| 829 case 'head': return startTagHead(token); |
| 830 default: return startTagOther(token); |
| 831 } |
| 832 } |
| 833 |
| 834 processEndTag(EndTagToken token) { |
| 835 switch (token.name) { |
| 836 case "head": case "body": case "html": case "br": |
| 837 return endTagImplyHead(token); |
| 838 default: return endTagOther(token); |
| 839 } |
| 840 } |
| 841 |
| 842 bool processEOF() { |
| 843 startTagHead(new StartTagToken("head", data: {})); |
| 844 return true; |
| 845 } |
| 846 |
| 847 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 848 return null; |
| 849 } |
| 850 |
| 851 Token processCharacters(CharactersToken token) { |
| 852 startTagHead(new StartTagToken("head", data: {})); |
| 853 return token; |
| 854 } |
| 855 |
| 856 Token startTagHtml(StartTagToken token) { |
| 857 return parser._inBodyPhase.processStartTag(token); |
| 858 } |
| 859 |
| 860 void startTagHead(StartTagToken token) { |
| 861 tree.insertElement(token); |
| 862 tree.headPointer = tree.openElements.last; |
| 863 parser.phase = parser._inHeadPhase; |
| 864 } |
| 865 |
| 866 Token startTagOther(StartTagToken token) { |
| 867 startTagHead(new StartTagToken("head", data: {})); |
| 868 return token; |
| 869 } |
| 870 |
| 871 Token endTagImplyHead(EndTagToken token) { |
| 872 startTagHead(new StartTagToken("head", data: {})); |
| 873 return token; |
| 874 } |
| 875 |
| 876 void endTagOther(EndTagToken token) { |
| 877 parser.parseError(token.span, "end-tag-after-implied-root", |
| 878 {"name": token.name}); |
| 879 } |
| 880 } |
| 881 |
| 882 class InHeadPhase extends Phase { |
| 883 InHeadPhase(parser) : super(parser); |
| 884 |
| 885 processStartTag(StartTagToken token) { |
| 886 switch (token.name) { |
| 887 case "html": return startTagHtml(token); |
| 888 case "title": return startTagTitle(token); |
| 889 case "noscript": case "noframes": case "style": |
| 890 return startTagNoScriptNoFramesStyle(token); |
| 891 case "script": return startTagScript(token); |
| 892 case "base": case "basefont": case "bgsound": case "command": case "link": |
| 893 return startTagBaseLinkCommand(token); |
| 894 case "meta": return startTagMeta(token); |
| 895 case "head": return startTagHead(token); |
| 896 default: return startTagOther(token); |
| 897 } |
| 898 } |
| 899 |
| 900 processEndTag(EndTagToken token) { |
| 901 switch (token.name) { |
| 902 case "head": return endTagHead(token); |
| 903 case "br": case "html": case "body": return endTagHtmlBodyBr(token); |
| 904 default: return endTagOther(token); |
| 905 } |
| 906 } |
| 907 |
| 908 // the real thing |
| 909 bool processEOF() { |
| 910 anythingElse(); |
| 911 return true; |
| 912 } |
| 913 |
| 914 Token processCharacters(CharactersToken token) { |
| 915 anythingElse(); |
| 916 return token; |
| 917 } |
| 918 |
| 919 Token startTagHtml(StartTagToken token) { |
| 920 return parser._inBodyPhase.processStartTag(token); |
| 921 } |
| 922 |
| 923 void startTagHead(StartTagToken token) { |
| 924 parser.parseError(token.span, "two-heads-are-not-better-than-one"); |
| 925 } |
| 926 |
| 927 void startTagBaseLinkCommand(StartTagToken token) { |
| 928 tree.insertElement(token); |
| 929 tree.openElements.removeLast(); |
| 930 token.selfClosingAcknowledged = true; |
| 931 } |
| 932 |
| 933 void startTagMeta(StartTagToken token) { |
| 934 tree.insertElement(token); |
| 935 tree.openElements.removeLast(); |
| 936 token.selfClosingAcknowledged = true; |
| 937 |
| 938 var attributes = token.data; |
| 939 if (!parser.tokenizer.stream.charEncodingCertain) { |
| 940 var charset = attributes["charset"]; |
| 941 var content = attributes["content"]; |
| 942 if (charset != null) { |
| 943 parser.tokenizer.stream.changeEncoding(charset); |
| 944 } else if (content != null) { |
| 945 var data = new EncodingBytes(content); |
| 946 var codec = new ContentAttrParser(data).parse(); |
| 947 parser.tokenizer.stream.changeEncoding(codec); |
| 948 } |
| 949 } |
| 950 } |
| 951 |
| 952 void startTagTitle(StartTagToken token) { |
| 953 parser.parseRCDataRawtext(token, "RCDATA"); |
| 954 } |
| 955 |
| 956 void startTagNoScriptNoFramesStyle(StartTagToken token) { |
| 957 // Need to decide whether to implement the scripting-disabled case |
| 958 parser.parseRCDataRawtext(token, "RAWTEXT"); |
| 959 } |
| 960 |
| 961 void startTagScript(StartTagToken token) { |
| 962 tree.insertElement(token); |
| 963 parser.tokenizer.state = parser.tokenizer.scriptDataState; |
| 964 parser.originalPhase = parser.phase; |
| 965 parser.phase = parser._textPhase; |
| 966 } |
| 967 |
| 968 Token startTagOther(StartTagToken token) { |
| 969 anythingElse(); |
| 970 return token; |
| 971 } |
| 972 |
| 973 void endTagHead(EndTagToken token) { |
| 974 var node = parser.tree.openElements.removeLast(); |
| 975 assert(node.localName == "head"); |
| 976 parser.phase = parser._afterHeadPhase; |
| 977 } |
| 978 |
| 979 Token endTagHtmlBodyBr(EndTagToken token) { |
| 980 anythingElse(); |
| 981 return token; |
| 982 } |
| 983 |
| 984 void endTagOther(EndTagToken token) { |
| 985 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); |
| 986 } |
| 987 |
| 988 void anythingElse() { |
| 989 endTagHead(new EndTagToken("head")); |
| 990 } |
| 991 } |
| 992 |
| 993 |
| 994 // XXX If we implement a parser for which scripting is disabled we need to |
| 995 // implement this phase. |
| 996 // |
| 997 // class InHeadNoScriptPhase extends Phase { |
| 998 |
| 999 class AfterHeadPhase extends Phase { |
| 1000 AfterHeadPhase(parser) : super(parser); |
| 1001 |
| 1002 processStartTag(StartTagToken token) { |
| 1003 switch (token.name) { |
| 1004 case "html": return startTagHtml(token); |
| 1005 case "body": return startTagBody(token); |
| 1006 case "frameset": return startTagFrameset(token); |
| 1007 case "base": case "basefont": case "bgsound": case "link": case "meta": |
| 1008 case "noframes": case "script": case "style": case "title": |
| 1009 return startTagFromHead(token); |
| 1010 case "head": return startTagHead(token); |
| 1011 default: return startTagOther(token); |
| 1012 } |
| 1013 } |
| 1014 |
| 1015 processEndTag(EndTagToken token) { |
| 1016 switch (token.name) { |
| 1017 case "body": case "html": case "br": |
| 1018 return endTagHtmlBodyBr(token); |
| 1019 default: return endTagOther(token); |
| 1020 } |
| 1021 } |
| 1022 |
| 1023 bool processEOF() { |
| 1024 anythingElse(); |
| 1025 return true; |
| 1026 } |
| 1027 |
| 1028 Token processCharacters(CharactersToken token) { |
| 1029 anythingElse(); |
| 1030 return token; |
| 1031 } |
| 1032 |
| 1033 Token startTagHtml(StartTagToken token) { |
| 1034 return parser._inBodyPhase.processStartTag(token); |
| 1035 } |
| 1036 |
| 1037 void startTagBody(StartTagToken token) { |
| 1038 parser.framesetOK = false; |
| 1039 tree.insertElement(token); |
| 1040 parser.phase = parser._inBodyPhase; |
| 1041 } |
| 1042 |
| 1043 void startTagFrameset(StartTagToken token) { |
| 1044 tree.insertElement(token); |
| 1045 parser.phase = parser._inFramesetPhase; |
| 1046 } |
| 1047 |
| 1048 void startTagFromHead(StartTagToken token) { |
| 1049 parser.parseError(token.span, "unexpected-start-tag-out-of-my-head", |
| 1050 {"name": token.name}); |
| 1051 tree.openElements.add(tree.headPointer); |
| 1052 parser._inHeadPhase.processStartTag(token); |
| 1053 for (var node in tree.openElements.reversed) { |
| 1054 if (node.localName == "head") { |
| 1055 tree.openElements.remove(node); |
| 1056 break; |
| 1057 } |
| 1058 } |
| 1059 } |
| 1060 |
| 1061 void startTagHead(StartTagToken token) { |
| 1062 parser.parseError(token.span, "unexpected-start-tag", {"name": token.name}); |
| 1063 } |
| 1064 |
| 1065 Token startTagOther(StartTagToken token) { |
| 1066 anythingElse(); |
| 1067 return token; |
| 1068 } |
| 1069 |
| 1070 Token endTagHtmlBodyBr(EndTagToken token) { |
| 1071 anythingElse(); |
| 1072 return token; |
| 1073 } |
| 1074 |
| 1075 void endTagOther(EndTagToken token) { |
| 1076 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); |
| 1077 } |
| 1078 |
| 1079 void anythingElse() { |
| 1080 tree.insertElement(new StartTagToken("body", data: {})); |
| 1081 parser.phase = parser._inBodyPhase; |
| 1082 parser.framesetOK = true; |
| 1083 } |
| 1084 } |
| 1085 |
| 1086 typedef Token TokenProccessor(Token token); |
| 1087 |
| 1088 class InBodyPhase extends Phase { |
| 1089 bool dropNewline = false; |
| 1090 |
| 1091 // http://www.whatwg.org/specs/web-apps/current-work///parsing-main-inbody |
| 1092 // the really-really-really-very crazy mode |
| 1093 InBodyPhase(parser) : super(parser); |
| 1094 |
| 1095 processStartTag(StartTagToken token) { |
| 1096 switch (token.name) { |
| 1097 case "html": |
| 1098 return startTagHtml(token); |
| 1099 case "base": case "basefont": case "bgsound": case "command": case "link": |
| 1100 case "meta": case "noframes": case "script": case "style": case "title": |
| 1101 return startTagProcessInHead(token); |
| 1102 case "body": |
| 1103 return startTagBody(token); |
| 1104 case "frameset": |
| 1105 return startTagFrameset(token); |
| 1106 case "address": case "article": case "aside": case "blockquote": |
| 1107 case "center": case "details": case "details": case "dir": case "div": |
| 1108 case "dl": case "fieldset": case "figcaption": case "figure": |
| 1109 case "footer": case "header": case "hgroup": case "menu": case "nav": |
| 1110 case "ol": case "p": case "section": case "summary": case "ul": |
| 1111 return startTagCloseP(token); |
| 1112 // headingElements |
| 1113 case "h1": case "h2": case "h3": case "h4": case "h5": case "h6": |
| 1114 return startTagHeading(token); |
| 1115 case "pre": case "listing": |
| 1116 return startTagPreListing(token); |
| 1117 case "form": |
| 1118 return startTagForm(token); |
| 1119 case "li": case "dd": case "dt": |
| 1120 return startTagListItem(token); |
| 1121 case "plaintext": |
| 1122 return startTagPlaintext(token); |
| 1123 case "a": return startTagA(token); |
| 1124 case "b": case "big": case "code": case "em": case "font": case "i": |
| 1125 case "s": case "small": case "strike": case "strong": case "tt": case "u": |
| 1126 return startTagFormatting(token); |
| 1127 case "nobr": |
| 1128 return startTagNobr(token); |
| 1129 case "button": |
| 1130 return startTagButton(token); |
| 1131 case "applet": case "marquee": case "object": |
| 1132 return startTagAppletMarqueeObject(token); |
| 1133 case "xmp": |
| 1134 return startTagXmp(token); |
| 1135 case "table": |
| 1136 return startTagTable(token); |
| 1137 case "area": case "br": case "embed": case "img": case "keygen": |
| 1138 case "wbr": |
| 1139 return startTagVoidFormatting(token); |
| 1140 case "param": case "source": case "track": |
| 1141 return startTagParamSource(token); |
| 1142 case "input": |
| 1143 return startTagInput(token); |
| 1144 case "hr": |
| 1145 return startTagHr(token); |
| 1146 case "image": |
| 1147 return startTagImage(token); |
| 1148 case "isindex": |
| 1149 return startTagIsIndex(token); |
| 1150 case "textarea": |
| 1151 return startTagTextarea(token); |
| 1152 case "iframe": |
| 1153 return startTagIFrame(token); |
| 1154 case "noembed": case "noframes": case "noscript": |
| 1155 return startTagRawtext(token); |
| 1156 case "select": |
| 1157 return startTagSelect(token); |
| 1158 case "rp": case "rt": |
| 1159 return startTagRpRt(token); |
| 1160 case "option": case "optgroup": |
| 1161 return startTagOpt(token); |
| 1162 case "math": |
| 1163 return startTagMath(token); |
| 1164 case "svg": |
| 1165 return startTagSvg(token); |
| 1166 case "caption": case "col": case "colgroup": case "frame": case "head": |
| 1167 case "tbody": case "td": case "tfoot": case "th": case "thead": case "tr": |
| 1168 return startTagMisplaced(token); |
| 1169 default: return startTagOther(token); |
| 1170 } |
| 1171 } |
| 1172 |
| 1173 processEndTag(EndTagToken token) { |
| 1174 switch (token.name) { |
| 1175 case "body": return endTagBody(token); |
| 1176 case "html": return endTagHtml(token); |
| 1177 case "address": case "article": case "aside": case "blockquote": |
| 1178 case "center": case "details": case "dir": case "div": case "dl": |
| 1179 case "fieldset": case "figcaption": case "figure": case "footer": |
| 1180 case "header": case "hgroup": case "listing": case "menu": case "nav": |
| 1181 case "ol": case "pre": case "section": case "summary": case "ul": |
| 1182 return endTagBlock(token); |
| 1183 case "form": return endTagForm(token); |
| 1184 case "p": return endTagP(token); |
| 1185 case "dd": case "dt": case "li": return endTagListItem(token); |
| 1186 // headingElements |
| 1187 case "h1": case "h2": case "h3": case "h4": case "h5": case "h6": |
| 1188 return endTagHeading(token); |
| 1189 case "a": case "b": case "big": case "code": case "em": case "font": |
| 1190 case "i": case "nobr": case "s": case "small": case "strike": |
| 1191 case "strong": case "tt": case "u": |
| 1192 return endTagFormatting(token); |
| 1193 case "applet": case "marquee": case "object": |
| 1194 return endTagAppletMarqueeObject(token); |
| 1195 case "br": return endTagBr(token); |
| 1196 default: return endTagOther(token); |
| 1197 } |
| 1198 } |
| 1199 |
| 1200 bool isMatchingFormattingElement(Element node1, Element node2) { |
| 1201 if (node1.localName != node2.localName || |
| 1202 node1.namespaceUri != node2.namespaceUri) { |
| 1203 return false; |
| 1204 } else if (node1.attributes.length != node2.attributes.length) { |
| 1205 return false; |
| 1206 } else { |
| 1207 for (var key in node1.attributes.keys) { |
| 1208 if (node1.attributes[key] != node2.attributes[key]) { |
| 1209 return false; |
| 1210 } |
| 1211 } |
| 1212 } |
| 1213 return true; |
| 1214 } |
| 1215 |
| 1216 // helper |
| 1217 void addFormattingElement(token) { |
| 1218 tree.insertElement(token); |
| 1219 var element = tree.openElements.last; |
| 1220 |
| 1221 var matchingElements = []; |
| 1222 for (Node node in tree.activeFormattingElements.reversed) { |
| 1223 if (node == Marker) { |
| 1224 break; |
| 1225 } else if (isMatchingFormattingElement(node, element)) { |
| 1226 matchingElements.add(node); |
| 1227 } |
| 1228 } |
| 1229 |
| 1230 assert(matchingElements.length <= 3); |
| 1231 if (matchingElements.length == 3) { |
| 1232 tree.activeFormattingElements.remove(matchingElements.last); |
| 1233 } |
| 1234 tree.activeFormattingElements.add(element); |
| 1235 } |
| 1236 |
| 1237 // the real deal |
| 1238 bool processEOF() { |
| 1239 for (var node in tree.openElements.reversed) { |
| 1240 switch (node.localName) { |
| 1241 case "dd": case "dt": case "li": case "p": case "tbody": case "td": |
| 1242 case "tfoot": case "th": case "thead": case "tr": case "body": |
| 1243 case "html": |
| 1244 continue; |
| 1245 } |
| 1246 parser.parseError(node.sourceSpan, "expected-closing-tag-but-got-eof"); |
| 1247 break; |
| 1248 } |
| 1249 //Stop parsing |
| 1250 return false; |
| 1251 } |
| 1252 |
| 1253 void processSpaceCharactersDropNewline(StringToken token) { |
| 1254 // Sometimes (start of <pre>, <listing>, and <textarea> blocks) we |
| 1255 // want to drop leading newlines |
| 1256 var data = token.data; |
| 1257 dropNewline = false; |
| 1258 if (data.startsWith("\n")) { |
| 1259 var lastOpen = tree.openElements.last; |
| 1260 if (const ["pre", "listing", "textarea"].contains(lastOpen.localName) |
| 1261 && !lastOpen.hasContent()) { |
| 1262 data = data.substring(1); |
| 1263 } |
| 1264 } |
| 1265 if (data.length > 0) { |
| 1266 tree.reconstructActiveFormattingElements(); |
| 1267 tree.insertText(data, token.span); |
| 1268 } |
| 1269 } |
| 1270 |
| 1271 Token processCharacters(CharactersToken token) { |
| 1272 if (token.data == "\u0000") { |
| 1273 //The tokenizer should always emit null on its own |
| 1274 return null; |
| 1275 } |
| 1276 tree.reconstructActiveFormattingElements(); |
| 1277 tree.insertText(token.data, token.span); |
| 1278 if (parser.framesetOK && !allWhitespace(token.data)) { |
| 1279 parser.framesetOK = false; |
| 1280 } |
| 1281 return null; |
| 1282 } |
| 1283 |
| 1284 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 1285 if (dropNewline) { |
| 1286 processSpaceCharactersDropNewline(token); |
| 1287 } else { |
| 1288 tree.reconstructActiveFormattingElements(); |
| 1289 tree.insertText(token.data, token.span); |
| 1290 } |
| 1291 return null; |
| 1292 } |
| 1293 |
| 1294 Token startTagProcessInHead(StartTagToken token) { |
| 1295 return parser._inHeadPhase.processStartTag(token); |
| 1296 } |
| 1297 |
| 1298 void startTagBody(StartTagToken token) { |
| 1299 parser.parseError(token.span, "unexpected-start-tag", {"name": "body"}); |
| 1300 if (tree.openElements.length == 1 |
| 1301 || tree.openElements[1].localName != "body") { |
| 1302 assert(parser.innerHTMLMode); |
| 1303 } else { |
| 1304 parser.framesetOK = false; |
| 1305 token.data.forEach((attr, value) { |
| 1306 tree.openElements[1].attributes.putIfAbsent(attr, () => value); |
| 1307 }); |
| 1308 } |
| 1309 } |
| 1310 |
| 1311 void startTagFrameset(StartTagToken token) { |
| 1312 parser.parseError(token.span, "unexpected-start-tag", {"name": "frameset"}); |
| 1313 if ((tree.openElements.length == 1 || |
| 1314 tree.openElements[1].localName != "body")) { |
| 1315 assert(parser.innerHTMLMode); |
| 1316 } else if (parser.framesetOK) { |
| 1317 if (tree.openElements[1].parentNode != null) { |
| 1318 tree.openElements[1].parentNode.nodes.remove(tree.openElements[1]); |
| 1319 } |
| 1320 while (tree.openElements.last.localName != "html") { |
| 1321 tree.openElements.removeLast(); |
| 1322 } |
| 1323 tree.insertElement(token); |
| 1324 parser.phase = parser._inFramesetPhase; |
| 1325 } |
| 1326 } |
| 1327 |
| 1328 void startTagCloseP(StartTagToken token) { |
| 1329 if (tree.elementInScope("p", variant: "button")) { |
| 1330 endTagP(new EndTagToken("p")); |
| 1331 } |
| 1332 tree.insertElement(token); |
| 1333 } |
| 1334 |
| 1335 void startTagPreListing(StartTagToken token) { |
| 1336 if (tree.elementInScope("p", variant: "button")) { |
| 1337 endTagP(new EndTagToken("p")); |
| 1338 } |
| 1339 tree.insertElement(token); |
| 1340 parser.framesetOK = false; |
| 1341 dropNewline = true; |
| 1342 } |
| 1343 |
| 1344 void startTagForm(StartTagToken token) { |
| 1345 if (tree.formPointer != null) { |
| 1346 parser.parseError(token.span, "unexpected-start-tag", {"name": "form"}); |
| 1347 } else { |
| 1348 if (tree.elementInScope("p", variant: "button")) { |
| 1349 endTagP(new EndTagToken("p")); |
| 1350 } |
| 1351 tree.insertElement(token); |
| 1352 tree.formPointer = tree.openElements.last; |
| 1353 } |
| 1354 } |
| 1355 |
| 1356 void startTagListItem(StartTagToken token) { |
| 1357 parser.framesetOK = false; |
| 1358 |
| 1359 final stopNamesMap = const {"li": const ["li"], |
| 1360 "dt": const ["dt", "dd"], |
| 1361 "dd": const ["dt", "dd"]}; |
| 1362 var stopNames = stopNamesMap[token.name]; |
| 1363 for (var node in tree.openElements.reversed) { |
| 1364 if (stopNames.contains(node.localName)) { |
| 1365 parser.phase.processEndTag(new EndTagToken(node.localName)); |
| 1366 break; |
| 1367 } |
| 1368 if (specialElements.contains(getElementNameTuple(node)) && |
| 1369 !const ["address", "div", "p"].contains(node.localName)) { |
| 1370 break; |
| 1371 } |
| 1372 } |
| 1373 |
| 1374 if (tree.elementInScope("p", variant: "button")) { |
| 1375 parser.phase.processEndTag(new EndTagToken("p")); |
| 1376 } |
| 1377 |
| 1378 tree.insertElement(token); |
| 1379 } |
| 1380 |
| 1381 void startTagPlaintext(StartTagToken token) { |
| 1382 if (tree.elementInScope("p", variant: "button")) { |
| 1383 endTagP(new EndTagToken("p")); |
| 1384 } |
| 1385 tree.insertElement(token); |
| 1386 parser.tokenizer.state = parser.tokenizer.plaintextState; |
| 1387 } |
| 1388 |
| 1389 void startTagHeading(StartTagToken token) { |
| 1390 if (tree.elementInScope("p", variant: "button")) { |
| 1391 endTagP(new EndTagToken("p")); |
| 1392 } |
| 1393 if (headingElements.contains(tree.openElements.last.localName)) { |
| 1394 parser.parseError(token.span, "unexpected-start-tag", |
| 1395 {"name": token.name}); |
| 1396 tree.openElements.removeLast(); |
| 1397 } |
| 1398 tree.insertElement(token); |
| 1399 } |
| 1400 |
| 1401 void startTagA(StartTagToken token) { |
| 1402 var afeAElement = tree.elementInActiveFormattingElements("a"); |
| 1403 if (afeAElement != null) { |
| 1404 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag", |
| 1405 {"startName": "a", "endName": "a"}); |
| 1406 endTagFormatting(new EndTagToken("a")); |
| 1407 tree.openElements.remove(afeAElement); |
| 1408 tree.activeFormattingElements.remove(afeAElement); |
| 1409 } |
| 1410 tree.reconstructActiveFormattingElements(); |
| 1411 addFormattingElement(token); |
| 1412 } |
| 1413 |
| 1414 void startTagFormatting(StartTagToken token) { |
| 1415 tree.reconstructActiveFormattingElements(); |
| 1416 addFormattingElement(token); |
| 1417 } |
| 1418 |
| 1419 void startTagNobr(StartTagToken token) { |
| 1420 tree.reconstructActiveFormattingElements(); |
| 1421 if (tree.elementInScope("nobr")) { |
| 1422 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag", |
| 1423 {"startName": "nobr", "endName": "nobr"}); |
| 1424 processEndTag(new EndTagToken("nobr")); |
| 1425 // XXX Need tests that trigger the following |
| 1426 tree.reconstructActiveFormattingElements(); |
| 1427 } |
| 1428 addFormattingElement(token); |
| 1429 } |
| 1430 |
| 1431 Token startTagButton(StartTagToken token) { |
| 1432 if (tree.elementInScope("button")) { |
| 1433 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag", |
| 1434 {"startName": "button", "endName": "button"}); |
| 1435 processEndTag(new EndTagToken("button")); |
| 1436 return token; |
| 1437 } else { |
| 1438 tree.reconstructActiveFormattingElements(); |
| 1439 tree.insertElement(token); |
| 1440 parser.framesetOK = false; |
| 1441 } |
| 1442 return null; |
| 1443 } |
| 1444 |
| 1445 void startTagAppletMarqueeObject(StartTagToken token) { |
| 1446 tree.reconstructActiveFormattingElements(); |
| 1447 tree.insertElement(token); |
| 1448 tree.activeFormattingElements.add(Marker); |
| 1449 parser.framesetOK = false; |
| 1450 } |
| 1451 |
| 1452 void startTagXmp(StartTagToken token) { |
| 1453 if (tree.elementInScope("p", variant: "button")) { |
| 1454 endTagP(new EndTagToken("p")); |
| 1455 } |
| 1456 tree.reconstructActiveFormattingElements(); |
| 1457 parser.framesetOK = false; |
| 1458 parser.parseRCDataRawtext(token, "RAWTEXT"); |
| 1459 } |
| 1460 |
| 1461 void startTagTable(StartTagToken token) { |
| 1462 if (parser.compatMode != "quirks") { |
| 1463 if (tree.elementInScope("p", variant: "button")) { |
| 1464 processEndTag(new EndTagToken("p")); |
| 1465 } |
| 1466 } |
| 1467 tree.insertElement(token); |
| 1468 parser.framesetOK = false; |
| 1469 parser.phase = parser._inTablePhase; |
| 1470 } |
| 1471 |
| 1472 void startTagVoidFormatting(StartTagToken token) { |
| 1473 tree.reconstructActiveFormattingElements(); |
| 1474 tree.insertElement(token); |
| 1475 tree.openElements.removeLast(); |
| 1476 token.selfClosingAcknowledged = true; |
| 1477 parser.framesetOK = false; |
| 1478 } |
| 1479 |
| 1480 void startTagInput(StartTagToken token) { |
| 1481 var savedFramesetOK = parser.framesetOK; |
| 1482 startTagVoidFormatting(token); |
| 1483 if (asciiUpper2Lower(token.data["type"]) == "hidden") { |
| 1484 //input type=hidden doesn't change framesetOK |
| 1485 parser.framesetOK = savedFramesetOK; |
| 1486 } |
| 1487 } |
| 1488 |
| 1489 void startTagParamSource(StartTagToken token) { |
| 1490 tree.insertElement(token); |
| 1491 tree.openElements.removeLast(); |
| 1492 token.selfClosingAcknowledged = true; |
| 1493 } |
| 1494 |
| 1495 void startTagHr(StartTagToken token) { |
| 1496 if (tree.elementInScope("p", variant: "button")) { |
| 1497 endTagP(new EndTagToken("p")); |
| 1498 } |
| 1499 tree.insertElement(token); |
| 1500 tree.openElements.removeLast(); |
| 1501 token.selfClosingAcknowledged = true; |
| 1502 parser.framesetOK = false; |
| 1503 } |
| 1504 |
| 1505 void startTagImage(StartTagToken token) { |
| 1506 // No really... |
| 1507 parser.parseError(token.span, "unexpected-start-tag-treated-as", |
| 1508 {"originalName": "image", "newName": "img"}); |
| 1509 processStartTag(new StartTagToken("img", data: token.data, |
| 1510 selfClosing: token.selfClosing)); |
| 1511 } |
| 1512 |
| 1513 void startTagIsIndex(StartTagToken token) { |
| 1514 parser.parseError(token.span, "deprecated-tag", {"name": "isindex"}); |
| 1515 if (tree.formPointer != null) { |
| 1516 return; |
| 1517 } |
| 1518 var formAttrs = {}; |
| 1519 var dataAction = token.data["action"]; |
| 1520 if (dataAction != null) { |
| 1521 formAttrs["action"] = dataAction; |
| 1522 } |
| 1523 processStartTag(new StartTagToken("form", data: formAttrs)); |
| 1524 processStartTag(new StartTagToken("hr", data: {})); |
| 1525 processStartTag(new StartTagToken("label", data: {})); |
| 1526 // XXX Localization ... |
| 1527 var prompt = token.data["prompt"]; |
| 1528 if (prompt == null) { |
| 1529 prompt = "This is a searchable index. Enter search keywords: "; |
| 1530 } |
| 1531 processCharacters(new CharactersToken(prompt)); |
| 1532 var attributes = new LinkedHashMap.from(token.data); |
| 1533 attributes.remove('action'); |
| 1534 attributes.remove('prompt'); |
| 1535 attributes["name"] = "isindex"; |
| 1536 processStartTag(new StartTagToken("input", |
| 1537 data: attributes, selfClosing: token.selfClosing)); |
| 1538 processEndTag(new EndTagToken("label")); |
| 1539 processStartTag(new StartTagToken("hr", data: {})); |
| 1540 processEndTag(new EndTagToken("form")); |
| 1541 } |
| 1542 |
| 1543 void startTagTextarea(StartTagToken token) { |
| 1544 tree.insertElement(token); |
| 1545 parser.tokenizer.state = parser.tokenizer.rcdataState; |
| 1546 dropNewline = true; |
| 1547 parser.framesetOK = false; |
| 1548 } |
| 1549 |
| 1550 void startTagIFrame(StartTagToken token) { |
| 1551 parser.framesetOK = false; |
| 1552 startTagRawtext(token); |
| 1553 } |
| 1554 |
| 1555 /// iframe, noembed noframes, noscript(if scripting enabled). |
| 1556 void startTagRawtext(StartTagToken token) { |
| 1557 parser.parseRCDataRawtext(token, "RAWTEXT"); |
| 1558 } |
| 1559 |
| 1560 void startTagOpt(StartTagToken token) { |
| 1561 if (tree.openElements.last.localName == "option") { |
| 1562 parser.phase.processEndTag(new EndTagToken("option")); |
| 1563 } |
| 1564 tree.reconstructActiveFormattingElements(); |
| 1565 parser.tree.insertElement(token); |
| 1566 } |
| 1567 |
| 1568 void startTagSelect(StartTagToken token) { |
| 1569 tree.reconstructActiveFormattingElements(); |
| 1570 tree.insertElement(token); |
| 1571 parser.framesetOK = false; |
| 1572 |
| 1573 if (parser._inTablePhase == parser.phase || |
| 1574 parser._inCaptionPhase == parser.phase || |
| 1575 parser._inColumnGroupPhase == parser.phase || |
| 1576 parser._inTableBodyPhase == parser.phase || |
| 1577 parser._inRowPhase == parser.phase || |
| 1578 parser._inCellPhase == parser.phase) { |
| 1579 parser.phase = parser._inSelectInTablePhase; |
| 1580 } else { |
| 1581 parser.phase = parser._inSelectPhase; |
| 1582 } |
| 1583 } |
| 1584 |
| 1585 void startTagRpRt(StartTagToken token) { |
| 1586 if (tree.elementInScope("ruby")) { |
| 1587 tree.generateImpliedEndTags(); |
| 1588 var last = tree.openElements.last; |
| 1589 if (last.localName != "ruby") { |
| 1590 parser.parseError(last.sourceSpan, 'undefined-error'); |
| 1591 } |
| 1592 } |
| 1593 tree.insertElement(token); |
| 1594 } |
| 1595 |
| 1596 void startTagMath(StartTagToken token) { |
| 1597 tree.reconstructActiveFormattingElements(); |
| 1598 parser.adjustMathMLAttributes(token); |
| 1599 parser.adjustForeignAttributes(token); |
| 1600 token.namespace = Namespaces.mathml; |
| 1601 tree.insertElement(token); |
| 1602 //Need to get the parse error right for the case where the token |
| 1603 //has a namespace not equal to the xmlns attribute |
| 1604 if (token.selfClosing) { |
| 1605 tree.openElements.removeLast(); |
| 1606 token.selfClosingAcknowledged = true; |
| 1607 } |
| 1608 } |
| 1609 |
| 1610 void startTagSvg(StartTagToken token) { |
| 1611 tree.reconstructActiveFormattingElements(); |
| 1612 parser.adjustSVGAttributes(token); |
| 1613 parser.adjustForeignAttributes(token); |
| 1614 token.namespace = Namespaces.svg; |
| 1615 tree.insertElement(token); |
| 1616 //Need to get the parse error right for the case where the token |
| 1617 //has a namespace not equal to the xmlns attribute |
| 1618 if (token.selfClosing) { |
| 1619 tree.openElements.removeLast(); |
| 1620 token.selfClosingAcknowledged = true; |
| 1621 } |
| 1622 } |
| 1623 |
| 1624 /// Elements that should be children of other elements that have a |
| 1625 /// different insertion mode; here they are ignored |
| 1626 /// "caption", "col", "colgroup", "frame", "frameset", "head", |
| 1627 /// "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", |
| 1628 /// "tr", "noscript" |
| 1629 void startTagMisplaced(StartTagToken token) { |
| 1630 parser.parseError(token.span, "unexpected-start-tag-ignored", |
| 1631 {"name": token.name}); |
| 1632 } |
| 1633 |
| 1634 Token startTagOther(StartTagToken token) { |
| 1635 tree.reconstructActiveFormattingElements(); |
| 1636 tree.insertElement(token); |
| 1637 return null; |
| 1638 } |
| 1639 |
| 1640 void endTagP(EndTagToken token) { |
| 1641 if (!tree.elementInScope("p", variant: "button")) { |
| 1642 startTagCloseP(new StartTagToken("p", data: {})); |
| 1643 parser.parseError(token.span, "unexpected-end-tag", {"name": "p"}); |
| 1644 endTagP(new EndTagToken("p")); |
| 1645 } else { |
| 1646 tree.generateImpliedEndTags("p"); |
| 1647 if (tree.openElements.last.localName != "p") { |
| 1648 parser.parseError(token.span, "unexpected-end-tag", {"name": "p"}); |
| 1649 } |
| 1650 popOpenElementsUntil("p"); |
| 1651 } |
| 1652 } |
| 1653 |
| 1654 void endTagBody(EndTagToken token) { |
| 1655 if (!tree.elementInScope("body")) { |
| 1656 parser.parseError(token.span, 'undefined-error'); |
| 1657 return; |
| 1658 } else if (tree.openElements.last.localName != "body") { |
| 1659 for (Element node in slice(tree.openElements, 2)) { |
| 1660 switch (node.localName) { |
| 1661 case "dd": case "dt": case "li": case "optgroup": case "option": |
| 1662 case "p": case "rp": case "rt": case "tbody": case "td": case "tfoot": |
| 1663 case "th": case "thead": case "tr": case "body": case "html": |
| 1664 continue; |
| 1665 } |
| 1666 // Not sure this is the correct name for the parse error |
| 1667 parser.parseError(token.span, "expected-one-end-tag-but-got-another", |
| 1668 {"gotName": "body", "expectedName": node.localName}); |
| 1669 break; |
| 1670 } |
| 1671 } |
| 1672 parser.phase = parser._afterBodyPhase; |
| 1673 } |
| 1674 |
| 1675 Token endTagHtml(EndTagToken token) { |
| 1676 //We repeat the test for the body end tag token being ignored here |
| 1677 if (tree.elementInScope("body")) { |
| 1678 endTagBody(new EndTagToken("body")); |
| 1679 return token; |
| 1680 } |
| 1681 return null; |
| 1682 } |
| 1683 |
| 1684 void endTagBlock(EndTagToken token) { |
| 1685 //Put us back in the right whitespace handling mode |
| 1686 if (token.name == "pre") { |
| 1687 dropNewline = false; |
| 1688 } |
| 1689 var inScope = tree.elementInScope(token.name); |
| 1690 if (inScope) { |
| 1691 tree.generateImpliedEndTags(); |
| 1692 } |
| 1693 if (tree.openElements.last.localName != token.name) { |
| 1694 parser.parseError(token.span, "end-tag-too-early", {"name": token.name}); |
| 1695 } |
| 1696 if (inScope) { |
| 1697 popOpenElementsUntil(token.name); |
| 1698 } |
| 1699 } |
| 1700 |
| 1701 void endTagForm(EndTagToken token) { |
| 1702 var node = tree.formPointer; |
| 1703 tree.formPointer = null; |
| 1704 if (node == null || !tree.elementInScope(node)) { |
| 1705 parser.parseError(token.span, "unexpected-end-tag", {"name": "form"}); |
| 1706 } else { |
| 1707 tree.generateImpliedEndTags(); |
| 1708 if (tree.openElements.last != node) { |
| 1709 parser.parseError(token.span, "end-tag-too-early-ignored", {"name": "for
m"}); |
| 1710 } |
| 1711 tree.openElements.remove(node); |
| 1712 } |
| 1713 } |
| 1714 |
| 1715 void endTagListItem(EndTagToken token) { |
| 1716 var variant; |
| 1717 if (token.name == "li") { |
| 1718 variant = "list"; |
| 1719 } else { |
| 1720 variant = null; |
| 1721 } |
| 1722 if (!tree.elementInScope(token.name, variant: variant)) { |
| 1723 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); |
| 1724 } else { |
| 1725 tree.generateImpliedEndTags(token.name); |
| 1726 if (tree.openElements.last.localName != token.name) { |
| 1727 parser.parseError(token.span, "end-tag-too-early", {"name": token.name})
; |
| 1728 } |
| 1729 popOpenElementsUntil(token.name); |
| 1730 } |
| 1731 } |
| 1732 |
| 1733 void endTagHeading(EndTagToken token) { |
| 1734 for (var item in headingElements) { |
| 1735 if (tree.elementInScope(item)) { |
| 1736 tree.generateImpliedEndTags(); |
| 1737 break; |
| 1738 } |
| 1739 } |
| 1740 if (tree.openElements.last.localName != token.name) { |
| 1741 parser.parseError(token.span, "end-tag-too-early", {"name": token.name}); |
| 1742 } |
| 1743 |
| 1744 for (var item in headingElements) { |
| 1745 if (tree.elementInScope(item)) { |
| 1746 var node = tree.openElements.removeLast(); |
| 1747 while (!headingElements.contains(node.localName)) { |
| 1748 node = tree.openElements.removeLast(); |
| 1749 } |
| 1750 break; |
| 1751 } |
| 1752 } |
| 1753 } |
| 1754 |
| 1755 /// The much-feared adoption agency algorithm. |
| 1756 endTagFormatting(EndTagToken token) { |
| 1757 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construc
tion.html#adoptionAgency |
| 1758 // TODO(jmesserly): the comments here don't match the numbered steps in the |
| 1759 // updated spec. This needs a pass over it to verify that it still matches. |
| 1760 // In particular the html5lib Python code skiped "step 4", I'm not sure why. |
| 1761 // XXX Better parseError messages appreciated. |
| 1762 int outerLoopCounter = 0; |
| 1763 while (outerLoopCounter < 8) { |
| 1764 outerLoopCounter += 1; |
| 1765 |
| 1766 // Step 1 paragraph 1 |
| 1767 var formattingElement = tree.elementInActiveFormattingElements( |
| 1768 token.name); |
| 1769 if (formattingElement == null || |
| 1770 (tree.openElements.contains(formattingElement) && |
| 1771 !tree.elementInScope(formattingElement.localName))) { |
| 1772 parser.parseError(token.span, "adoption-agency-1.1", |
| 1773 {"name": token.name}); |
| 1774 return; |
| 1775 // Step 1 paragraph 2 |
| 1776 } else if (!tree.openElements.contains(formattingElement)) { |
| 1777 parser.parseError(token.span, "adoption-agency-1.2", |
| 1778 {"name": token.name}); |
| 1779 tree.activeFormattingElements.remove(formattingElement); |
| 1780 return; |
| 1781 } |
| 1782 |
| 1783 // Step 1 paragraph 3 |
| 1784 if (formattingElement != tree.openElements.last) { |
| 1785 parser.parseError(token.span, "adoption-agency-1.3", |
| 1786 {"name": token.name}); |
| 1787 } |
| 1788 |
| 1789 // Step 2 |
| 1790 // Start of the adoption agency algorithm proper |
| 1791 var afeIndex = tree.openElements.indexOf(formattingElement); |
| 1792 Node furthestBlock = null; |
| 1793 for (Node element in slice(tree.openElements, afeIndex)) { |
| 1794 if (specialElements.contains(getElementNameTuple(element))) { |
| 1795 furthestBlock = element; |
| 1796 break; |
| 1797 } |
| 1798 } |
| 1799 // Step 3 |
| 1800 if (furthestBlock == null) { |
| 1801 var element = tree.openElements.removeLast(); |
| 1802 while (element != formattingElement) { |
| 1803 element = tree.openElements.removeLast(); |
| 1804 } |
| 1805 tree.activeFormattingElements.remove(element); |
| 1806 return; |
| 1807 } |
| 1808 |
| 1809 var commonAncestor = tree.openElements[afeIndex - 1]; |
| 1810 |
| 1811 // Step 5 |
| 1812 // The bookmark is supposed to help us identify where to reinsert |
| 1813 // nodes in step 12. We have to ensure that we reinsert nodes after |
| 1814 // the node before the active formatting element. Note the bookmark |
| 1815 // can move in step 7.4 |
| 1816 var bookmark = tree.activeFormattingElements.indexOf(formattingElement); |
| 1817 |
| 1818 // Step 6 |
| 1819 Node lastNode = furthestBlock; |
| 1820 var node = furthestBlock; |
| 1821 int innerLoopCounter = 0; |
| 1822 |
| 1823 var index = tree.openElements.indexOf(node); |
| 1824 while (innerLoopCounter < 3) { |
| 1825 innerLoopCounter += 1; |
| 1826 |
| 1827 // Node is element before node in open elements |
| 1828 index -= 1; |
| 1829 node = tree.openElements[index]; |
| 1830 if (!tree.activeFormattingElements.contains(node)) { |
| 1831 tree.openElements.remove(node); |
| 1832 continue; |
| 1833 } |
| 1834 // Step 6.3 |
| 1835 if (node == formattingElement) { |
| 1836 break; |
| 1837 } |
| 1838 // Step 6.4 |
| 1839 if (lastNode == furthestBlock) { |
| 1840 bookmark = (tree.activeFormattingElements.indexOf(node) + 1); |
| 1841 } |
| 1842 // Step 6.5 |
| 1843 //cite = node.parent |
| 1844 var clone = node.clone(false); |
| 1845 // Replace node with clone |
| 1846 tree.activeFormattingElements[ |
| 1847 tree.activeFormattingElements.indexOf(node)] = clone; |
| 1848 tree.openElements[tree.openElements.indexOf(node)] = clone; |
| 1849 node = clone; |
| 1850 |
| 1851 // Step 6.6 |
| 1852 // Remove lastNode from its parents, if any |
| 1853 if (lastNode.parentNode != null) { |
| 1854 lastNode.parentNode.nodes.remove(lastNode); |
| 1855 } |
| 1856 node.nodes.add(lastNode); |
| 1857 // Step 7.7 |
| 1858 lastNode = node; |
| 1859 // End of inner loop |
| 1860 } |
| 1861 |
| 1862 // Step 7 |
| 1863 // Foster parent lastNode if commonAncestor is a |
| 1864 // table, tbody, tfoot, thead, or tr we need to foster parent the |
| 1865 // lastNode |
| 1866 if (lastNode.parentNode != null) { |
| 1867 lastNode.parentNode.nodes.remove(lastNode); |
| 1868 } |
| 1869 |
| 1870 if (const ["table", "tbody", "tfoot", "thead", "tr"].contains( |
| 1871 commonAncestor.localName)) { |
| 1872 var nodePos = tree.getTableMisnestedNodePosition(); |
| 1873 nodePos[0].insertBefore(lastNode, nodePos[1]); |
| 1874 } else { |
| 1875 commonAncestor.nodes.add(lastNode); |
| 1876 } |
| 1877 |
| 1878 // Step 8 |
| 1879 var clone = formattingElement.clone(false); |
| 1880 |
| 1881 // Step 9 |
| 1882 furthestBlock.reparentChildren(clone); |
| 1883 |
| 1884 // Step 10 |
| 1885 furthestBlock.nodes.add(clone); |
| 1886 |
| 1887 // Step 11 |
| 1888 tree.activeFormattingElements.remove(formattingElement); |
| 1889 tree.activeFormattingElements.insert( |
| 1890 min(bookmark, tree.activeFormattingElements.length), clone); |
| 1891 |
| 1892 // Step 12 |
| 1893 tree.openElements.remove(formattingElement); |
| 1894 tree.openElements.insert( |
| 1895 tree.openElements.indexOf(furthestBlock) + 1, clone); |
| 1896 } |
| 1897 } |
| 1898 |
| 1899 void endTagAppletMarqueeObject(EndTagToken token) { |
| 1900 if (tree.elementInScope(token.name)) { |
| 1901 tree.generateImpliedEndTags(); |
| 1902 } |
| 1903 if (tree.openElements.last.localName != token.name) { |
| 1904 parser.parseError(token.span, "end-tag-too-early", {"name": token.name}); |
| 1905 } |
| 1906 if (tree.elementInScope(token.name)) { |
| 1907 popOpenElementsUntil(token.name); |
| 1908 tree.clearActiveFormattingElements(); |
| 1909 } |
| 1910 } |
| 1911 |
| 1912 void endTagBr(EndTagToken token) { |
| 1913 parser.parseError(token.span, "unexpected-end-tag-treated-as", |
| 1914 {"originalName": "br", "newName": "br element"}); |
| 1915 tree.reconstructActiveFormattingElements(); |
| 1916 tree.insertElement(new StartTagToken("br", data: {})); |
| 1917 tree.openElements.removeLast(); |
| 1918 } |
| 1919 |
| 1920 void endTagOther(EndTagToken token) { |
| 1921 for (var node in tree.openElements.reversed) { |
| 1922 if (node.localName == token.name) { |
| 1923 tree.generateImpliedEndTags(token.name); |
| 1924 if (tree.openElements.last.localName != token.name) { |
| 1925 parser.parseError(token.span, "unexpected-end-tag", |
| 1926 {"name": token.name}); |
| 1927 } |
| 1928 while (tree.openElements.removeLast() != node); |
| 1929 break; |
| 1930 } else { |
| 1931 if (specialElements.contains(getElementNameTuple(node))) { |
| 1932 parser.parseError(token.span, "unexpected-end-tag", |
| 1933 {"name": token.name}); |
| 1934 break; |
| 1935 } |
| 1936 } |
| 1937 } |
| 1938 } |
| 1939 } |
| 1940 |
| 1941 |
| 1942 class TextPhase extends Phase { |
| 1943 TextPhase(parser) : super(parser); |
| 1944 |
| 1945 // "Tried to process start tag %s in RCDATA/RAWTEXT mode"%token.name |
| 1946 processStartTag(StartTagToken token) { assert(false); } |
| 1947 |
| 1948 processEndTag(EndTagToken token) { |
| 1949 if (token.name == 'script') return endTagScript(token); |
| 1950 return endTagOther(token); |
| 1951 } |
| 1952 |
| 1953 Token processCharacters(CharactersToken token) { |
| 1954 tree.insertText(token.data, token.span); |
| 1955 return null; |
| 1956 } |
| 1957 |
| 1958 bool processEOF() { |
| 1959 var last = tree.openElements.last; |
| 1960 parser.parseError(last.sourceSpan, "expected-named-closing-tag-but-got-eof", |
| 1961 {'name': last.localName}); |
| 1962 tree.openElements.removeLast(); |
| 1963 parser.phase = parser.originalPhase; |
| 1964 return true; |
| 1965 } |
| 1966 |
| 1967 void endTagScript(EndTagToken token) { |
| 1968 var node = tree.openElements.removeLast(); |
| 1969 assert(node.localName == "script"); |
| 1970 parser.phase = parser.originalPhase; |
| 1971 //The rest of this method is all stuff that only happens if |
| 1972 //document.write works |
| 1973 } |
| 1974 |
| 1975 void endTagOther(EndTagToken token) { |
| 1976 var node = tree.openElements.removeLast(); |
| 1977 parser.phase = parser.originalPhase; |
| 1978 } |
| 1979 } |
| 1980 |
| 1981 class InTablePhase extends Phase { |
| 1982 // http://www.whatwg.org/specs/web-apps/current-work///in-table |
| 1983 InTablePhase(parser) : super(parser); |
| 1984 |
| 1985 processStartTag(StartTagToken token) { |
| 1986 switch (token.name) { |
| 1987 case "html": return startTagHtml(token); |
| 1988 case "caption": return startTagCaption(token); |
| 1989 case "colgroup": return startTagColgroup(token); |
| 1990 case "col": return startTagCol(token); |
| 1991 case "tbody": case "tfoot": case "thead": return startTagRowGroup(token); |
| 1992 case "td": case "th": case "tr": return startTagImplyTbody(token); |
| 1993 case "table": return startTagTable(token); |
| 1994 case "style": case "script": return startTagStyleScript(token); |
| 1995 case "input": return startTagInput(token); |
| 1996 case "form": return startTagForm(token); |
| 1997 default: return startTagOther(token); |
| 1998 } |
| 1999 } |
| 2000 |
| 2001 processEndTag(EndTagToken token) { |
| 2002 switch (token.name) { |
| 2003 case "table": return endTagTable(token); |
| 2004 case "body": case "caption": case "col": case "colgroup": case "html": |
| 2005 case "tbody": case "td": case "tfoot": case "th": case "thead": case "tr": |
| 2006 return endTagIgnore(token); |
| 2007 default: return endTagOther(token); |
| 2008 } |
| 2009 } |
| 2010 |
| 2011 // helper methods |
| 2012 void clearStackToTableContext() { |
| 2013 // "clear the stack back to a table context" |
| 2014 while (tree.openElements.last.localName != "table" && |
| 2015 tree.openElements.last.localName != "html") { |
| 2016 //parser.parseError(token.span, "unexpected-implied-end-tag-in-table", |
| 2017 // {"name": tree.openElements.last.name}) |
| 2018 tree.openElements.removeLast(); |
| 2019 } |
| 2020 // When the current node is <html> it's an innerHTML case |
| 2021 } |
| 2022 |
| 2023 // processing methods |
| 2024 bool processEOF() { |
| 2025 var last = tree.openElements.last; |
| 2026 if (last.localName != "html") { |
| 2027 parser.parseError(last.sourceSpan, "eof-in-table"); |
| 2028 } else { |
| 2029 assert(parser.innerHTMLMode); |
| 2030 } |
| 2031 //Stop parsing |
| 2032 return false; |
| 2033 } |
| 2034 |
| 2035 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 2036 var originalPhase = parser.phase; |
| 2037 parser.phase = parser._inTableTextPhase; |
| 2038 parser._inTableTextPhase.originalPhase = originalPhase; |
| 2039 parser.phase.processSpaceCharacters(token); |
| 2040 return null; |
| 2041 } |
| 2042 |
| 2043 Token processCharacters(CharactersToken token) { |
| 2044 var originalPhase = parser.phase; |
| 2045 parser.phase = parser._inTableTextPhase; |
| 2046 parser._inTableTextPhase.originalPhase = originalPhase; |
| 2047 parser.phase.processCharacters(token); |
| 2048 return null; |
| 2049 } |
| 2050 |
| 2051 void insertText(CharactersToken token) { |
| 2052 // If we get here there must be at least one non-whitespace character |
| 2053 // Do the table magic! |
| 2054 tree.insertFromTable = true; |
| 2055 parser._inBodyPhase.processCharacters(token); |
| 2056 tree.insertFromTable = false; |
| 2057 } |
| 2058 |
| 2059 void startTagCaption(StartTagToken token) { |
| 2060 clearStackToTableContext(); |
| 2061 tree.activeFormattingElements.add(Marker); |
| 2062 tree.insertElement(token); |
| 2063 parser.phase = parser._inCaptionPhase; |
| 2064 } |
| 2065 |
| 2066 void startTagColgroup(StartTagToken token) { |
| 2067 clearStackToTableContext(); |
| 2068 tree.insertElement(token); |
| 2069 parser.phase = parser._inColumnGroupPhase; |
| 2070 } |
| 2071 |
| 2072 Token startTagCol(StartTagToken token) { |
| 2073 startTagColgroup(new StartTagToken("colgroup", data: {})); |
| 2074 return token; |
| 2075 } |
| 2076 |
| 2077 void startTagRowGroup(StartTagToken token) { |
| 2078 clearStackToTableContext(); |
| 2079 tree.insertElement(token); |
| 2080 parser.phase = parser._inTableBodyPhase; |
| 2081 } |
| 2082 |
| 2083 Token startTagImplyTbody(StartTagToken token) { |
| 2084 startTagRowGroup(new StartTagToken("tbody", data: {})); |
| 2085 return token; |
| 2086 } |
| 2087 |
| 2088 Token startTagTable(StartTagToken token) { |
| 2089 parser.parseError(token.span, "unexpected-start-tag-implies-end-tag", |
| 2090 {"startName": "table", "endName": "table"}); |
| 2091 parser.phase.processEndTag(new EndTagToken("table")); |
| 2092 if (!parser.innerHTMLMode) { |
| 2093 return token; |
| 2094 } |
| 2095 return null; |
| 2096 } |
| 2097 |
| 2098 Token startTagStyleScript(StartTagToken token) { |
| 2099 return parser._inHeadPhase.processStartTag(token); |
| 2100 } |
| 2101 |
| 2102 void startTagInput(StartTagToken token) { |
| 2103 if (asciiUpper2Lower(token.data["type"]) == "hidden") { |
| 2104 parser.parseError(token.span, "unexpected-hidden-input-in-table"); |
| 2105 tree.insertElement(token); |
| 2106 // XXX associate with form |
| 2107 tree.openElements.removeLast(); |
| 2108 } else { |
| 2109 startTagOther(token); |
| 2110 } |
| 2111 } |
| 2112 |
| 2113 void startTagForm(StartTagToken token) { |
| 2114 parser.parseError(token.span, "unexpected-form-in-table"); |
| 2115 if (tree.formPointer == null) { |
| 2116 tree.insertElement(token); |
| 2117 tree.formPointer = tree.openElements.last; |
| 2118 tree.openElements.removeLast(); |
| 2119 } |
| 2120 } |
| 2121 |
| 2122 void startTagOther(StartTagToken token) { |
| 2123 parser.parseError(token.span, "unexpected-start-tag-implies-table-voodoo", |
| 2124 {"name": token.name}); |
| 2125 // Do the table magic! |
| 2126 tree.insertFromTable = true; |
| 2127 parser._inBodyPhase.processStartTag(token); |
| 2128 tree.insertFromTable = false; |
| 2129 } |
| 2130 |
| 2131 void endTagTable(EndTagToken token) { |
| 2132 if (tree.elementInScope("table", variant: "table")) { |
| 2133 tree.generateImpliedEndTags(); |
| 2134 var last = tree.openElements.last; |
| 2135 if (last.localName != "table") { |
| 2136 parser.parseError(token.span, "end-tag-too-early-named", |
| 2137 {"gotName": "table", "expectedName": last.localName}); |
| 2138 } |
| 2139 while (tree.openElements.last.localName != "table") { |
| 2140 tree.openElements.removeLast(); |
| 2141 } |
| 2142 tree.openElements.removeLast(); |
| 2143 parser.resetInsertionMode(); |
| 2144 } else { |
| 2145 // innerHTML case |
| 2146 assert(parser.innerHTMLMode); |
| 2147 parser.parseError(token.span, "undefined-error"); |
| 2148 } |
| 2149 } |
| 2150 |
| 2151 void endTagIgnore(EndTagToken token) { |
| 2152 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); |
| 2153 } |
| 2154 |
| 2155 void endTagOther(EndTagToken token) { |
| 2156 parser.parseError(token.span, "unexpected-end-tag-implies-table-voodoo", |
| 2157 {"name": token.name}); |
| 2158 // Do the table magic! |
| 2159 tree.insertFromTable = true; |
| 2160 parser._inBodyPhase.processEndTag(token); |
| 2161 tree.insertFromTable = false; |
| 2162 } |
| 2163 } |
| 2164 |
| 2165 class InTableTextPhase extends Phase { |
| 2166 Phase originalPhase; |
| 2167 List<StringToken> characterTokens; |
| 2168 |
| 2169 InTableTextPhase(parser) |
| 2170 : characterTokens = <StringToken>[], |
| 2171 super(parser); |
| 2172 |
| 2173 void flushCharacters() { |
| 2174 if (characterTokens.length == 0) return; |
| 2175 |
| 2176 // TODO(sigmund,jmesserly): remove '' (dartbug.com/8480) |
| 2177 var data = characterTokens.map((t) => t.data).join(''); |
| 2178 var span = null; |
| 2179 |
| 2180 if (parser.generateSpans) { |
| 2181 span = characterTokens[0].span.expand(characterTokens.last.span); |
| 2182 } |
| 2183 |
| 2184 if (!allWhitespace(data)) { |
| 2185 parser._inTablePhase.insertText(new CharactersToken(data)..span = span); |
| 2186 } else if (data.length > 0) { |
| 2187 tree.insertText(data, span); |
| 2188 } |
| 2189 characterTokens = <StringToken>[]; |
| 2190 } |
| 2191 |
| 2192 Token processComment(CommentToken token) { |
| 2193 flushCharacters(); |
| 2194 parser.phase = originalPhase; |
| 2195 return token; |
| 2196 } |
| 2197 |
| 2198 bool processEOF() { |
| 2199 flushCharacters(); |
| 2200 parser.phase = originalPhase; |
| 2201 return true; |
| 2202 } |
| 2203 |
| 2204 Token processCharacters(CharactersToken token) { |
| 2205 if (token.data == "\u0000") { |
| 2206 return null; |
| 2207 } |
| 2208 characterTokens.add(token); |
| 2209 return null; |
| 2210 } |
| 2211 |
| 2212 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 2213 //pretty sure we should never reach here |
| 2214 characterTokens.add(token); |
| 2215 // XXX assert(false); |
| 2216 return null; |
| 2217 } |
| 2218 |
| 2219 Token processStartTag(StartTagToken token) { |
| 2220 flushCharacters(); |
| 2221 parser.phase = originalPhase; |
| 2222 return token; |
| 2223 } |
| 2224 |
| 2225 Token processEndTag(EndTagToken token) { |
| 2226 flushCharacters(); |
| 2227 parser.phase = originalPhase; |
| 2228 return token; |
| 2229 } |
| 2230 } |
| 2231 |
| 2232 |
| 2233 class InCaptionPhase extends Phase { |
| 2234 // http://www.whatwg.org/specs/web-apps/current-work///in-caption |
| 2235 InCaptionPhase(parser) : super(parser); |
| 2236 |
| 2237 processStartTag(StartTagToken token) { |
| 2238 switch (token.name) { |
| 2239 case "html": return startTagHtml(token); |
| 2240 case "caption": case "col": case "colgroup": case "tbody": case "td": |
| 2241 case "tfoot": case "th": case "thead": case "tr": |
| 2242 return startTagTableElement(token); |
| 2243 default: return startTagOther(token); |
| 2244 } |
| 2245 } |
| 2246 |
| 2247 processEndTag(EndTagToken token) { |
| 2248 switch (token.name) { |
| 2249 case "caption": return endTagCaption(token); |
| 2250 case "table": return endTagTable(token); |
| 2251 case "body": case "col": case "colgroup": case "html": case "tbody": |
| 2252 case "td": case "tfoot": case "th": case "thead": case "tr": |
| 2253 return endTagIgnore(token); |
| 2254 default: return endTagOther(token); |
| 2255 } |
| 2256 } |
| 2257 |
| 2258 bool ignoreEndTagCaption() { |
| 2259 return !tree.elementInScope("caption", variant: "table"); |
| 2260 } |
| 2261 |
| 2262 bool processEOF() { |
| 2263 parser._inBodyPhase.processEOF(); |
| 2264 return false; |
| 2265 } |
| 2266 |
| 2267 Token processCharacters(CharactersToken token) { |
| 2268 return parser._inBodyPhase.processCharacters(token); |
| 2269 } |
| 2270 |
| 2271 Token startTagTableElement(StartTagToken token) { |
| 2272 parser.parseError(token.span, "undefined-error"); |
| 2273 //XXX Have to duplicate logic here to find out if the tag is ignored |
| 2274 var ignoreEndTag = ignoreEndTagCaption(); |
| 2275 parser.phase.processEndTag(new EndTagToken("caption")); |
| 2276 if (!ignoreEndTag) { |
| 2277 return token; |
| 2278 } |
| 2279 return null; |
| 2280 } |
| 2281 |
| 2282 Token startTagOther(StartTagToken token) { |
| 2283 return parser._inBodyPhase.processStartTag(token); |
| 2284 } |
| 2285 |
| 2286 void endTagCaption(EndTagToken token) { |
| 2287 if (!ignoreEndTagCaption()) { |
| 2288 // AT this code is quite similar to endTagTable in "InTable" |
| 2289 tree.generateImpliedEndTags(); |
| 2290 if (tree.openElements.last.localName != "caption") { |
| 2291 parser.parseError(token.span, "expected-one-end-tag-but-got-another", |
| 2292 {"gotName": "caption", |
| 2293 "expectedName": tree.openElements.last.localName}); |
| 2294 } |
| 2295 while (tree.openElements.last.localName != "caption") { |
| 2296 tree.openElements.removeLast(); |
| 2297 } |
| 2298 tree.openElements.removeLast(); |
| 2299 tree.clearActiveFormattingElements(); |
| 2300 parser.phase = parser._inTablePhase; |
| 2301 } else { |
| 2302 // innerHTML case |
| 2303 assert(parser.innerHTMLMode); |
| 2304 parser.parseError(token.span, "undefined-error"); |
| 2305 } |
| 2306 } |
| 2307 |
| 2308 Token endTagTable(EndTagToken token) { |
| 2309 parser.parseError(token.span, "undefined-error"); |
| 2310 var ignoreEndTag = ignoreEndTagCaption(); |
| 2311 parser.phase.processEndTag(new EndTagToken("caption")); |
| 2312 if (!ignoreEndTag) { |
| 2313 return token; |
| 2314 } |
| 2315 return null; |
| 2316 } |
| 2317 |
| 2318 void endTagIgnore(EndTagToken token) { |
| 2319 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); |
| 2320 } |
| 2321 |
| 2322 Token endTagOther(EndTagToken token) { |
| 2323 return parser._inBodyPhase.processEndTag(token); |
| 2324 } |
| 2325 } |
| 2326 |
| 2327 |
| 2328 class InColumnGroupPhase extends Phase { |
| 2329 // http://www.whatwg.org/specs/web-apps/current-work///in-column |
| 2330 InColumnGroupPhase(parser) : super(parser); |
| 2331 |
| 2332 processStartTag(StartTagToken token) { |
| 2333 switch (token.name) { |
| 2334 case "html": return startTagHtml(token); |
| 2335 case "col": return startTagCol(token); |
| 2336 default: return startTagOther(token); |
| 2337 } |
| 2338 } |
| 2339 |
| 2340 processEndTag(EndTagToken token) { |
| 2341 switch (token.name) { |
| 2342 case "colgroup": return endTagColgroup(token); |
| 2343 case "col": return endTagCol(token); |
| 2344 default: return endTagOther(token); |
| 2345 } |
| 2346 } |
| 2347 |
| 2348 bool ignoreEndTagColgroup() { |
| 2349 return tree.openElements.last.localName == "html"; |
| 2350 } |
| 2351 |
| 2352 bool processEOF() { |
| 2353 var ignoreEndTag = ignoreEndTagColgroup(); |
| 2354 if (ignoreEndTag) { |
| 2355 assert(parser.innerHTMLMode); |
| 2356 return false; |
| 2357 } else { |
| 2358 endTagColgroup(new EndTagToken("colgroup")); |
| 2359 return true; |
| 2360 } |
| 2361 } |
| 2362 |
| 2363 Token processCharacters(CharactersToken token) { |
| 2364 var ignoreEndTag = ignoreEndTagColgroup(); |
| 2365 endTagColgroup(new EndTagToken("colgroup")); |
| 2366 return ignoreEndTag ? null : token; |
| 2367 } |
| 2368 |
| 2369 void startTagCol(StartTagToken token) { |
| 2370 tree.insertElement(token); |
| 2371 tree.openElements.removeLast(); |
| 2372 } |
| 2373 |
| 2374 Token startTagOther(StartTagToken token) { |
| 2375 var ignoreEndTag = ignoreEndTagColgroup(); |
| 2376 endTagColgroup(new EndTagToken("colgroup")); |
| 2377 return ignoreEndTag ? null : token; |
| 2378 } |
| 2379 |
| 2380 void endTagColgroup(EndTagToken token) { |
| 2381 if (ignoreEndTagColgroup()) { |
| 2382 // innerHTML case |
| 2383 assert(parser.innerHTMLMode); |
| 2384 parser.parseError(token.span, "undefined-error"); |
| 2385 } else { |
| 2386 tree.openElements.removeLast(); |
| 2387 parser.phase = parser._inTablePhase; |
| 2388 } |
| 2389 } |
| 2390 |
| 2391 void endTagCol(EndTagToken token) { |
| 2392 parser.parseError(token.span, "no-end-tag", {"name": "col"}); |
| 2393 } |
| 2394 |
| 2395 Token endTagOther(EndTagToken token) { |
| 2396 var ignoreEndTag = ignoreEndTagColgroup(); |
| 2397 endTagColgroup(new EndTagToken("colgroup")); |
| 2398 return ignoreEndTag ? null : token; |
| 2399 } |
| 2400 } |
| 2401 |
| 2402 |
| 2403 class InTableBodyPhase extends Phase { |
| 2404 // http://www.whatwg.org/specs/web-apps/current-work///in-table0 |
| 2405 InTableBodyPhase(parser) : super(parser); |
| 2406 |
| 2407 processStartTag(StartTagToken token) { |
| 2408 switch (token.name) { |
| 2409 case "html": return startTagHtml(token); |
| 2410 case "tr": return startTagTr(token); |
| 2411 case "td": case "th": return startTagTableCell(token); |
| 2412 case "caption": case "col": case "colgroup": case "tbody": case "tfoot": |
| 2413 case "thead": |
| 2414 return startTagTableOther(token); |
| 2415 default: return startTagOther(token); |
| 2416 } |
| 2417 } |
| 2418 |
| 2419 processEndTag(EndTagToken token) { |
| 2420 switch (token.name) { |
| 2421 case "tbody": case "tfoot": case "thead": |
| 2422 return endTagTableRowGroup(token); |
| 2423 case "table": return endTagTable(token); |
| 2424 case "body": case "caption": case "col": case "colgroup": case "html": |
| 2425 case "td": case "th": case "tr": |
| 2426 return endTagIgnore(token); |
| 2427 default: return endTagOther(token); |
| 2428 } |
| 2429 } |
| 2430 |
| 2431 // helper methods |
| 2432 void clearStackToTableBodyContext() { |
| 2433 var tableTags = const ["tbody", "tfoot", "thead", "html"]; |
| 2434 while (!tableTags.contains(tree.openElements.last.localName)) { |
| 2435 //XXX parser.parseError(token.span, "unexpected-implied-end-tag-in-table", |
| 2436 // {"name": tree.openElements.last.name}) |
| 2437 tree.openElements.removeLast(); |
| 2438 } |
| 2439 if (tree.openElements.last.localName == "html") { |
| 2440 assert(parser.innerHTMLMode); |
| 2441 } |
| 2442 } |
| 2443 |
| 2444 // the rest |
| 2445 bool processEOF() { |
| 2446 parser._inTablePhase.processEOF(); |
| 2447 return false; |
| 2448 } |
| 2449 |
| 2450 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 2451 return parser._inTablePhase.processSpaceCharacters(token); |
| 2452 } |
| 2453 |
| 2454 Token processCharacters(CharactersToken token) { |
| 2455 return parser._inTablePhase.processCharacters(token); |
| 2456 } |
| 2457 |
| 2458 void startTagTr(StartTagToken token) { |
| 2459 clearStackToTableBodyContext(); |
| 2460 tree.insertElement(token); |
| 2461 parser.phase = parser._inRowPhase; |
| 2462 } |
| 2463 |
| 2464 Token startTagTableCell(StartTagToken token) { |
| 2465 parser.parseError(token.span, "unexpected-cell-in-table-body", |
| 2466 {"name": token.name}); |
| 2467 startTagTr(new StartTagToken("tr", data: {})); |
| 2468 return token; |
| 2469 } |
| 2470 |
| 2471 Token startTagTableOther(token) => endTagTable(token); |
| 2472 |
| 2473 Token startTagOther(StartTagToken token) { |
| 2474 return parser._inTablePhase.processStartTag(token); |
| 2475 } |
| 2476 |
| 2477 void endTagTableRowGroup(EndTagToken token) { |
| 2478 if (tree.elementInScope(token.name, variant: "table")) { |
| 2479 clearStackToTableBodyContext(); |
| 2480 tree.openElements.removeLast(); |
| 2481 parser.phase = parser._inTablePhase; |
| 2482 } else { |
| 2483 parser.parseError(token.span, "unexpected-end-tag-in-table-body", |
| 2484 {"name": token.name}); |
| 2485 } |
| 2486 } |
| 2487 |
| 2488 Token endTagTable(TagToken token) { |
| 2489 // XXX AT Any ideas on how to share this with endTagTable? |
| 2490 if (tree.elementInScope("tbody", variant: "table") || |
| 2491 tree.elementInScope("thead", variant: "table") || |
| 2492 tree.elementInScope("tfoot", variant: "table")) { |
| 2493 clearStackToTableBodyContext(); |
| 2494 endTagTableRowGroup(new EndTagToken(tree.openElements.last.localName)); |
| 2495 return token; |
| 2496 } else { |
| 2497 // innerHTML case |
| 2498 assert(parser.innerHTMLMode); |
| 2499 parser.parseError(token.span, "undefined-error"); |
| 2500 } |
| 2501 return null; |
| 2502 } |
| 2503 |
| 2504 void endTagIgnore(EndTagToken token) { |
| 2505 parser.parseError(token.span, "unexpected-end-tag-in-table-body", |
| 2506 {"name": token.name}); |
| 2507 } |
| 2508 |
| 2509 Token endTagOther(EndTagToken token) { |
| 2510 return parser._inTablePhase.processEndTag(token); |
| 2511 } |
| 2512 } |
| 2513 |
| 2514 |
| 2515 class InRowPhase extends Phase { |
| 2516 // http://www.whatwg.org/specs/web-apps/current-work///in-row |
| 2517 InRowPhase(parser) : super(parser); |
| 2518 |
| 2519 processStartTag(StartTagToken token) { |
| 2520 switch (token.name) { |
| 2521 case "html": return startTagHtml(token); |
| 2522 case "td": case "th": return startTagTableCell(token); |
| 2523 case "caption": case "col": case "colgroup": case "tbody": case "tfoot": |
| 2524 case "thead": case "tr": |
| 2525 return startTagTableOther(token); |
| 2526 default: return startTagOther(token); |
| 2527 } |
| 2528 } |
| 2529 |
| 2530 processEndTag(EndTagToken token) { |
| 2531 switch (token.name) { |
| 2532 case "tr": return endTagTr(token); |
| 2533 case "table": return endTagTable(token); |
| 2534 case "tbody": case "tfoot": case "thead": |
| 2535 return endTagTableRowGroup(token); |
| 2536 case "body": case "caption": case "col": case "colgroup": case "html": |
| 2537 case "td": case "th": |
| 2538 return endTagIgnore(token); |
| 2539 default: return endTagOther(token); |
| 2540 } |
| 2541 } |
| 2542 |
| 2543 // helper methods (XXX unify this with other table helper methods) |
| 2544 void clearStackToTableRowContext() { |
| 2545 while (true) { |
| 2546 var last = tree.openElements.last; |
| 2547 if (last.localName == "tr" || last.localName == "html") break; |
| 2548 |
| 2549 parser.parseError(last.sourceSpan, |
| 2550 "unexpected-implied-end-tag-in-table-row", |
| 2551 {"name": tree.openElements.last.localName}); |
| 2552 tree.openElements.removeLast(); |
| 2553 } |
| 2554 } |
| 2555 |
| 2556 bool ignoreEndTagTr() { |
| 2557 return !tree.elementInScope("tr", variant: "table"); |
| 2558 } |
| 2559 |
| 2560 // the rest |
| 2561 bool processEOF() { |
| 2562 parser._inTablePhase.processEOF(); |
| 2563 return false; |
| 2564 } |
| 2565 |
| 2566 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 2567 return parser._inTablePhase.processSpaceCharacters(token); |
| 2568 } |
| 2569 |
| 2570 Token processCharacters(CharactersToken token) { |
| 2571 return parser._inTablePhase.processCharacters(token); |
| 2572 } |
| 2573 |
| 2574 void startTagTableCell(StartTagToken token) { |
| 2575 clearStackToTableRowContext(); |
| 2576 tree.insertElement(token); |
| 2577 parser.phase = parser._inCellPhase; |
| 2578 tree.activeFormattingElements.add(Marker); |
| 2579 } |
| 2580 |
| 2581 Token startTagTableOther(StartTagToken token) { |
| 2582 bool ignoreEndTag = ignoreEndTagTr(); |
| 2583 endTagTr(new EndTagToken("tr")); |
| 2584 // XXX how are we sure it's always ignored in the innerHTML case? |
| 2585 return ignoreEndTag ? null : token; |
| 2586 } |
| 2587 |
| 2588 Token startTagOther(StartTagToken token) { |
| 2589 return parser._inTablePhase.processStartTag(token); |
| 2590 } |
| 2591 |
| 2592 void endTagTr(EndTagToken token) { |
| 2593 if (!ignoreEndTagTr()) { |
| 2594 clearStackToTableRowContext(); |
| 2595 tree.openElements.removeLast(); |
| 2596 parser.phase = parser._inTableBodyPhase; |
| 2597 } else { |
| 2598 // innerHTML case |
| 2599 assert(parser.innerHTMLMode); |
| 2600 parser.parseError(token.span, "undefined-error"); |
| 2601 } |
| 2602 } |
| 2603 |
| 2604 Token endTagTable(EndTagToken token) { |
| 2605 var ignoreEndTag = ignoreEndTagTr(); |
| 2606 endTagTr(new EndTagToken("tr")); |
| 2607 // Reprocess the current tag if the tr end tag was not ignored |
| 2608 // XXX how are we sure it's always ignored in the innerHTML case? |
| 2609 return ignoreEndTag ? null : token; |
| 2610 } |
| 2611 |
| 2612 Token endTagTableRowGroup(EndTagToken token) { |
| 2613 if (tree.elementInScope(token.name, variant: "table")) { |
| 2614 endTagTr(new EndTagToken("tr")); |
| 2615 return token; |
| 2616 } else { |
| 2617 parser.parseError(token.span, "undefined-error"); |
| 2618 return null; |
| 2619 } |
| 2620 } |
| 2621 |
| 2622 void endTagIgnore(EndTagToken token) { |
| 2623 parser.parseError(token.span, "unexpected-end-tag-in-table-row", |
| 2624 {"name": token.name}); |
| 2625 } |
| 2626 |
| 2627 Token endTagOther(EndTagToken token) { |
| 2628 return parser._inTablePhase.processEndTag(token); |
| 2629 } |
| 2630 } |
| 2631 |
| 2632 class InCellPhase extends Phase { |
| 2633 // http://www.whatwg.org/specs/web-apps/current-work///in-cell |
| 2634 InCellPhase(parser) : super(parser); |
| 2635 |
| 2636 processStartTag(StartTagToken token) { |
| 2637 switch (token.name) { |
| 2638 case "html": return startTagHtml(token); |
| 2639 case "caption": case "col": case "colgroup": case "tbody": case "td": |
| 2640 case "tfoot": case "th": case "thead": case "tr": |
| 2641 return startTagTableOther(token); |
| 2642 default: return startTagOther(token); |
| 2643 } |
| 2644 } |
| 2645 |
| 2646 processEndTag(EndTagToken token) { |
| 2647 switch (token.name) { |
| 2648 case "td": case "th": |
| 2649 return endTagTableCell(token); |
| 2650 case "body": case "caption": case "col": case "colgroup": case "html": |
| 2651 return endTagIgnore(token); |
| 2652 case "table": case "tbody": case "tfoot": case "thead": case "tr": |
| 2653 return endTagImply(token); |
| 2654 default: return endTagOther(token); |
| 2655 } |
| 2656 } |
| 2657 |
| 2658 // helper |
| 2659 void closeCell() { |
| 2660 if (tree.elementInScope("td", variant: "table")) { |
| 2661 endTagTableCell(new EndTagToken("td")); |
| 2662 } else if (tree.elementInScope("th", variant: "table")) { |
| 2663 endTagTableCell(new EndTagToken("th")); |
| 2664 } |
| 2665 } |
| 2666 |
| 2667 // the rest |
| 2668 bool processEOF() { |
| 2669 parser._inBodyPhase.processEOF(); |
| 2670 return false; |
| 2671 } |
| 2672 |
| 2673 Token processCharacters(CharactersToken token) { |
| 2674 return parser._inBodyPhase.processCharacters(token); |
| 2675 } |
| 2676 |
| 2677 Token startTagTableOther(StartTagToken token) { |
| 2678 if (tree.elementInScope("td", variant: "table") || |
| 2679 tree.elementInScope("th", variant: "table")) { |
| 2680 closeCell(); |
| 2681 return token; |
| 2682 } else { |
| 2683 // innerHTML case |
| 2684 assert(parser.innerHTMLMode); |
| 2685 parser.parseError(token.span, "undefined-error"); |
| 2686 return null; |
| 2687 } |
| 2688 } |
| 2689 |
| 2690 Token startTagOther(StartTagToken token) { |
| 2691 return parser._inBodyPhase.processStartTag(token); |
| 2692 } |
| 2693 |
| 2694 void endTagTableCell(EndTagToken token) { |
| 2695 if (tree.elementInScope(token.name, variant: "table")) { |
| 2696 tree.generateImpliedEndTags(token.name); |
| 2697 if (tree.openElements.last.localName != token.name) { |
| 2698 parser.parseError(token.span, "unexpected-cell-end-tag", |
| 2699 {"name": token.name}); |
| 2700 popOpenElementsUntil(token.name); |
| 2701 } else { |
| 2702 tree.openElements.removeLast(); |
| 2703 } |
| 2704 tree.clearActiveFormattingElements(); |
| 2705 parser.phase = parser._inRowPhase; |
| 2706 } else { |
| 2707 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); |
| 2708 } |
| 2709 } |
| 2710 |
| 2711 void endTagIgnore(EndTagToken token) { |
| 2712 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); |
| 2713 } |
| 2714 |
| 2715 Token endTagImply(EndTagToken token) { |
| 2716 if (tree.elementInScope(token.name, variant: "table")) { |
| 2717 closeCell(); |
| 2718 return token; |
| 2719 } else { |
| 2720 // sometimes innerHTML case |
| 2721 parser.parseError(token.span, "undefined-error"); |
| 2722 } |
| 2723 return null; |
| 2724 } |
| 2725 |
| 2726 Token endTagOther(EndTagToken token) { |
| 2727 return parser._inBodyPhase.processEndTag(token); |
| 2728 } |
| 2729 } |
| 2730 |
| 2731 class InSelectPhase extends Phase { |
| 2732 InSelectPhase(parser) : super(parser); |
| 2733 |
| 2734 processStartTag(StartTagToken token) { |
| 2735 switch (token.name) { |
| 2736 case "html": return startTagHtml(token); |
| 2737 case "option": return startTagOption(token); |
| 2738 case "optgroup": return startTagOptgroup(token); |
| 2739 case "select": return startTagSelect(token); |
| 2740 case "input": case "keygen": case "textarea": |
| 2741 return startTagInput(token); |
| 2742 case "script": return startTagScript(token); |
| 2743 default: return startTagOther(token); |
| 2744 } |
| 2745 } |
| 2746 |
| 2747 processEndTag(EndTagToken token) { |
| 2748 switch (token.name) { |
| 2749 case "option": return endTagOption(token); |
| 2750 case "optgroup": return endTagOptgroup(token); |
| 2751 case "select": return endTagSelect(token); |
| 2752 default: return endTagOther(token); |
| 2753 } |
| 2754 } |
| 2755 |
| 2756 // http://www.whatwg.org/specs/web-apps/current-work///in-select |
| 2757 bool processEOF() { |
| 2758 var last = tree.openElements.last; |
| 2759 if (last.localName != "html") { |
| 2760 parser.parseError(last.sourceSpan, "eof-in-select"); |
| 2761 } else { |
| 2762 assert(parser.innerHTMLMode); |
| 2763 } |
| 2764 return false; |
| 2765 } |
| 2766 |
| 2767 Token processCharacters(CharactersToken token) { |
| 2768 if (token.data == "\u0000") { |
| 2769 return null; |
| 2770 } |
| 2771 tree.insertText(token.data, token.span); |
| 2772 return null; |
| 2773 } |
| 2774 |
| 2775 void startTagOption(StartTagToken token) { |
| 2776 // We need to imply </option> if <option> is the current node. |
| 2777 if (tree.openElements.last.localName == "option") { |
| 2778 tree.openElements.removeLast(); |
| 2779 } |
| 2780 tree.insertElement(token); |
| 2781 } |
| 2782 |
| 2783 void startTagOptgroup(StartTagToken token) { |
| 2784 if (tree.openElements.last.localName == "option") { |
| 2785 tree.openElements.removeLast(); |
| 2786 } |
| 2787 if (tree.openElements.last.localName == "optgroup") { |
| 2788 tree.openElements.removeLast(); |
| 2789 } |
| 2790 tree.insertElement(token); |
| 2791 } |
| 2792 |
| 2793 void startTagSelect(StartTagToken token) { |
| 2794 parser.parseError(token.span, "unexpected-select-in-select"); |
| 2795 endTagSelect(new EndTagToken("select")); |
| 2796 } |
| 2797 |
| 2798 Token startTagInput(StartTagToken token) { |
| 2799 parser.parseError(token.span, "unexpected-input-in-select"); |
| 2800 if (tree.elementInScope("select", variant: "select")) { |
| 2801 endTagSelect(new EndTagToken("select")); |
| 2802 return token; |
| 2803 } else { |
| 2804 assert(parser.innerHTMLMode); |
| 2805 } |
| 2806 return null; |
| 2807 } |
| 2808 |
| 2809 Token startTagScript(StartTagToken token) { |
| 2810 return parser._inHeadPhase.processStartTag(token); |
| 2811 } |
| 2812 |
| 2813 Token startTagOther(StartTagToken token) { |
| 2814 parser.parseError(token.span, "unexpected-start-tag-in-select", |
| 2815 {"name": token.name}); |
| 2816 return null; |
| 2817 } |
| 2818 |
| 2819 void endTagOption(EndTagToken token) { |
| 2820 if (tree.openElements.last.localName == "option") { |
| 2821 tree.openElements.removeLast(); |
| 2822 } else { |
| 2823 parser.parseError(token.span, "unexpected-end-tag-in-select", |
| 2824 {"name": "option"}); |
| 2825 } |
| 2826 } |
| 2827 |
| 2828 void endTagOptgroup(EndTagToken token) { |
| 2829 // </optgroup> implicitly closes <option> |
| 2830 if (tree.openElements.last.localName == "option" && |
| 2831 tree.openElements[tree.openElements.length - 2].localName == "optgroup") { |
| 2832 tree.openElements.removeLast(); |
| 2833 } |
| 2834 // It also closes </optgroup> |
| 2835 if (tree.openElements.last.localName == "optgroup") { |
| 2836 tree.openElements.removeLast(); |
| 2837 // But nothing else |
| 2838 } else { |
| 2839 parser.parseError(token.span, "unexpected-end-tag-in-select", |
| 2840 {"name": "optgroup"}); |
| 2841 } |
| 2842 } |
| 2843 |
| 2844 void endTagSelect(EndTagToken token) { |
| 2845 if (tree.elementInScope("select", variant: "select")) { |
| 2846 popOpenElementsUntil("select"); |
| 2847 parser.resetInsertionMode(); |
| 2848 } else { |
| 2849 // innerHTML case |
| 2850 assert(parser.innerHTMLMode); |
| 2851 parser.parseError(token.span, "undefined-error"); |
| 2852 } |
| 2853 } |
| 2854 |
| 2855 void endTagOther(EndTagToken token) { |
| 2856 parser.parseError(token.span, "unexpected-end-tag-in-select", |
| 2857 {"name": token.name}); |
| 2858 } |
| 2859 } |
| 2860 |
| 2861 |
| 2862 class InSelectInTablePhase extends Phase { |
| 2863 InSelectInTablePhase(parser) : super(parser); |
| 2864 |
| 2865 processStartTag(StartTagToken token) { |
| 2866 switch (token.name) { |
| 2867 case "caption": case "table": case "tbody": case "tfoot": case "thead": |
| 2868 case "tr": case "td": case "th": |
| 2869 return startTagTable(token); |
| 2870 default: return startTagOther(token); |
| 2871 } |
| 2872 } |
| 2873 |
| 2874 processEndTag(EndTagToken token) { |
| 2875 switch (token.name) { |
| 2876 case "caption": case "table": case "tbody": case "tfoot": case "thead": |
| 2877 case "tr": case "td": case "th": |
| 2878 return endTagTable(token); |
| 2879 default: return endTagOther(token); |
| 2880 } |
| 2881 } |
| 2882 |
| 2883 bool processEOF() { |
| 2884 parser._inSelectPhase.processEOF(); |
| 2885 return false; |
| 2886 } |
| 2887 |
| 2888 Token processCharacters(CharactersToken token) { |
| 2889 return parser._inSelectPhase.processCharacters(token); |
| 2890 } |
| 2891 |
| 2892 Token startTagTable(StartTagToken token) { |
| 2893 parser.parseError(token.span, |
| 2894 "unexpected-table-element-start-tag-in-select-in-table", |
| 2895 {"name": token.name}); |
| 2896 endTagOther(new EndTagToken("select")); |
| 2897 return token; |
| 2898 } |
| 2899 |
| 2900 Token startTagOther(StartTagToken token) { |
| 2901 return parser._inSelectPhase.processStartTag(token); |
| 2902 } |
| 2903 |
| 2904 Token endTagTable(EndTagToken token) { |
| 2905 parser.parseError(token.span, |
| 2906 "unexpected-table-element-end-tag-in-select-in-table", |
| 2907 {"name": token.name}); |
| 2908 if (tree.elementInScope(token.name, variant: "table")) { |
| 2909 endTagOther(new EndTagToken("select")); |
| 2910 return token; |
| 2911 } |
| 2912 return null; |
| 2913 } |
| 2914 |
| 2915 Token endTagOther(EndTagToken token) { |
| 2916 return parser._inSelectPhase.processEndTag(token); |
| 2917 } |
| 2918 } |
| 2919 |
| 2920 |
| 2921 class InForeignContentPhase extends Phase { |
| 2922 // TODO(jmesserly): this is sorted so we could binary search. |
| 2923 static const breakoutElements = const [ |
| 2924 'b', 'big', 'blockquote', 'body', 'br','center', 'code', 'dd', 'div', 'dl', |
| 2925 'dt', 'em', 'embed', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'i', |
| 2926 'img', 'li', 'listing', 'menu', 'meta', 'nobr', 'ol', 'p', 'pre', 'ruby', |
| 2927 's', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tt', 'u', |
| 2928 'ul', 'var' |
| 2929 ]; |
| 2930 |
| 2931 InForeignContentPhase(parser) : super(parser); |
| 2932 |
| 2933 void adjustSVGTagNames(token) { |
| 2934 final replacements = const { |
| 2935 "altglyph":"altGlyph", |
| 2936 "altglyphdef":"altGlyphDef", |
| 2937 "altglyphitem":"altGlyphItem", |
| 2938 "animatecolor":"animateColor", |
| 2939 "animatemotion":"animateMotion", |
| 2940 "animatetransform":"animateTransform", |
| 2941 "clippath":"clipPath", |
| 2942 "feblend":"feBlend", |
| 2943 "fecolormatrix":"feColorMatrix", |
| 2944 "fecomponenttransfer":"feComponentTransfer", |
| 2945 "fecomposite":"feComposite", |
| 2946 "feconvolvematrix":"feConvolveMatrix", |
| 2947 "fediffuselighting":"feDiffuseLighting", |
| 2948 "fedisplacementmap":"feDisplacementMap", |
| 2949 "fedistantlight":"feDistantLight", |
| 2950 "feflood":"feFlood", |
| 2951 "fefunca":"feFuncA", |
| 2952 "fefuncb":"feFuncB", |
| 2953 "fefuncg":"feFuncG", |
| 2954 "fefuncr":"feFuncR", |
| 2955 "fegaussianblur":"feGaussianBlur", |
| 2956 "feimage":"feImage", |
| 2957 "femerge":"feMerge", |
| 2958 "femergenode":"feMergeNode", |
| 2959 "femorphology":"feMorphology", |
| 2960 "feoffset":"feOffset", |
| 2961 "fepointlight":"fePointLight", |
| 2962 "fespecularlighting":"feSpecularLighting", |
| 2963 "fespotlight":"feSpotLight", |
| 2964 "fetile":"feTile", |
| 2965 "feturbulence":"feTurbulence", |
| 2966 "foreignobject":"foreignObject", |
| 2967 "glyphref":"glyphRef", |
| 2968 "lineargradient":"linearGradient", |
| 2969 "radialgradient":"radialGradient", |
| 2970 "textpath":"textPath" |
| 2971 }; |
| 2972 |
| 2973 var replace = replacements[token.name]; |
| 2974 if (replace != null) { |
| 2975 token.name = replace; |
| 2976 } |
| 2977 } |
| 2978 |
| 2979 Token processCharacters(CharactersToken token) { |
| 2980 if (token.data == "\u0000") { |
| 2981 token.data = "\uFFFD"; |
| 2982 } else if (parser.framesetOK && !allWhitespace(token.data)) { |
| 2983 parser.framesetOK = false; |
| 2984 } |
| 2985 return super.processCharacters(token); |
| 2986 } |
| 2987 |
| 2988 Token processStartTag(StartTagToken token) { |
| 2989 var currentNode = tree.openElements.last; |
| 2990 if (breakoutElements.contains(token.name) || |
| 2991 (token.name == "font" && |
| 2992 (token.data.containsKey("color") || |
| 2993 token.data.containsKey("face") || |
| 2994 token.data.containsKey("size")))) { |
| 2995 |
| 2996 parser.parseError(token.span, |
| 2997 "unexpected-html-element-in-foreign-content", {'name': token.name}); |
| 2998 while (tree.openElements.last.namespaceUri != |
| 2999 tree.defaultNamespace && |
| 3000 !parser.isHTMLIntegrationPoint(tree.openElements.last) && |
| 3001 !parser.isMathMLTextIntegrationPoint(tree.openElements.last)) { |
| 3002 tree.openElements.removeLast(); |
| 3003 } |
| 3004 return token; |
| 3005 |
| 3006 } else { |
| 3007 if (currentNode.namespaceUri == Namespaces.mathml) { |
| 3008 parser.adjustMathMLAttributes(token); |
| 3009 } else if (currentNode.namespaceUri == Namespaces.svg) { |
| 3010 adjustSVGTagNames(token); |
| 3011 parser.adjustSVGAttributes(token); |
| 3012 } |
| 3013 parser.adjustForeignAttributes(token); |
| 3014 token.namespace = currentNode.namespaceUri; |
| 3015 tree.insertElement(token); |
| 3016 if (token.selfClosing) { |
| 3017 tree.openElements.removeLast(); |
| 3018 token.selfClosingAcknowledged = true; |
| 3019 } |
| 3020 return null; |
| 3021 } |
| 3022 } |
| 3023 |
| 3024 Token processEndTag(EndTagToken token) { |
| 3025 var nodeIndex = tree.openElements.length - 1; |
| 3026 var node = tree.openElements.last; |
| 3027 if (node.localName != token.name) { |
| 3028 parser.parseError(token.span, "unexpected-end-tag", {"name": token.name}); |
| 3029 } |
| 3030 |
| 3031 var newToken = null; |
| 3032 while (true) { |
| 3033 if (asciiUpper2Lower(node.localName) == token.name) { |
| 3034 //XXX this isn't in the spec but it seems necessary |
| 3035 if (parser.phase == parser._inTableTextPhase) { |
| 3036 InTableTextPhase inTableText = parser.phase; |
| 3037 inTableText.flushCharacters(); |
| 3038 parser.phase = inTableText.originalPhase; |
| 3039 } |
| 3040 while (tree.openElements.removeLast() != node) { |
| 3041 assert(tree.openElements.length > 0); |
| 3042 } |
| 3043 newToken = null; |
| 3044 break; |
| 3045 } |
| 3046 nodeIndex -= 1; |
| 3047 |
| 3048 node = tree.openElements[nodeIndex]; |
| 3049 if (node.namespaceUri != tree.defaultNamespace) { |
| 3050 continue; |
| 3051 } else { |
| 3052 newToken = parser.phase.processEndTag(token); |
| 3053 break; |
| 3054 } |
| 3055 } |
| 3056 return newToken; |
| 3057 } |
| 3058 } |
| 3059 |
| 3060 |
| 3061 class AfterBodyPhase extends Phase { |
| 3062 AfterBodyPhase(parser) : super(parser); |
| 3063 |
| 3064 processStartTag(StartTagToken token) { |
| 3065 if (token.name == "html") return startTagHtml(token); |
| 3066 return startTagOther(token); |
| 3067 } |
| 3068 |
| 3069 processEndTag(EndTagToken token) { |
| 3070 if (token.name == "html") return endTagHtml(token); |
| 3071 return endTagOther(token); |
| 3072 } |
| 3073 |
| 3074 //Stop parsing |
| 3075 bool processEOF() => false; |
| 3076 |
| 3077 Token processComment(CommentToken token) { |
| 3078 // This is needed because data is to be appended to the <html> element |
| 3079 // here and not to whatever is currently open. |
| 3080 tree.insertComment(token, tree.openElements[0]); |
| 3081 return null; |
| 3082 } |
| 3083 |
| 3084 Token processCharacters(CharactersToken token) { |
| 3085 parser.parseError(token.span, "unexpected-char-after-body"); |
| 3086 parser.phase = parser._inBodyPhase; |
| 3087 return token; |
| 3088 } |
| 3089 |
| 3090 Token startTagHtml(StartTagToken token) { |
| 3091 return parser._inBodyPhase.processStartTag(token); |
| 3092 } |
| 3093 |
| 3094 Token startTagOther(StartTagToken token) { |
| 3095 parser.parseError(token.span, "unexpected-start-tag-after-body", |
| 3096 {"name": token.name}); |
| 3097 parser.phase = parser._inBodyPhase; |
| 3098 return token; |
| 3099 } |
| 3100 |
| 3101 void endTagHtml(Token token) { |
| 3102 if (parser.innerHTMLMode) { |
| 3103 parser.parseError(token.span, "unexpected-end-tag-after-body-innerhtml"); |
| 3104 } else { |
| 3105 parser.phase = parser._afterAfterBodyPhase; |
| 3106 } |
| 3107 } |
| 3108 |
| 3109 Token endTagOther(EndTagToken token) { |
| 3110 parser.parseError(token.span, "unexpected-end-tag-after-body", |
| 3111 {"name": token.name}); |
| 3112 parser.phase = parser._inBodyPhase; |
| 3113 return token; |
| 3114 } |
| 3115 } |
| 3116 |
| 3117 class InFramesetPhase extends Phase { |
| 3118 // http://www.whatwg.org/specs/web-apps/current-work///in-frameset |
| 3119 InFramesetPhase(parser) : super(parser); |
| 3120 |
| 3121 processStartTag(StartTagToken token) { |
| 3122 switch (token.name) { |
| 3123 case "html": return startTagHtml(token); |
| 3124 case "frameset": return startTagFrameset(token); |
| 3125 case "frame": return startTagFrame(token); |
| 3126 case "noframes": return startTagNoframes(token); |
| 3127 default: return startTagOther(token); |
| 3128 } |
| 3129 } |
| 3130 |
| 3131 processEndTag(EndTagToken token) { |
| 3132 switch (token.name) { |
| 3133 case "frameset": return endTagFrameset(token); |
| 3134 default: return endTagOther(token); |
| 3135 } |
| 3136 } |
| 3137 |
| 3138 bool processEOF() { |
| 3139 var last = tree.openElements.last; |
| 3140 if (last.localName != "html") { |
| 3141 parser.parseError(last.sourceSpan, "eof-in-frameset"); |
| 3142 } else { |
| 3143 assert(parser.innerHTMLMode); |
| 3144 } |
| 3145 return false; |
| 3146 } |
| 3147 |
| 3148 Token processCharacters(CharactersToken token) { |
| 3149 parser.parseError(token.span, "unexpected-char-in-frameset"); |
| 3150 return null; |
| 3151 } |
| 3152 |
| 3153 void startTagFrameset(StartTagToken token) { |
| 3154 tree.insertElement(token); |
| 3155 } |
| 3156 |
| 3157 void startTagFrame(StartTagToken token) { |
| 3158 tree.insertElement(token); |
| 3159 tree.openElements.removeLast(); |
| 3160 } |
| 3161 |
| 3162 Token startTagNoframes(StartTagToken token) { |
| 3163 return parser._inBodyPhase.processStartTag(token); |
| 3164 } |
| 3165 |
| 3166 Token startTagOther(StartTagToken token) { |
| 3167 parser.parseError(token.span, "unexpected-start-tag-in-frameset", |
| 3168 {"name": token.name}); |
| 3169 return null; |
| 3170 } |
| 3171 |
| 3172 void endTagFrameset(EndTagToken token) { |
| 3173 if (tree.openElements.last.localName == "html") { |
| 3174 // innerHTML case |
| 3175 parser.parseError(token.span, |
| 3176 "unexpected-frameset-in-frameset-innerhtml"); |
| 3177 } else { |
| 3178 tree.openElements.removeLast(); |
| 3179 } |
| 3180 if (!parser.innerHTMLMode && |
| 3181 tree.openElements.last.localName != "frameset") { |
| 3182 // If we're not in innerHTML mode and the the current node is not a |
| 3183 // "frameset" element (anymore) then switch. |
| 3184 parser.phase = parser._afterFramesetPhase; |
| 3185 } |
| 3186 } |
| 3187 |
| 3188 void endTagOther(EndTagToken token) { |
| 3189 parser.parseError(token.span, "unexpected-end-tag-in-frameset", |
| 3190 {"name": token.name}); |
| 3191 } |
| 3192 } |
| 3193 |
| 3194 |
| 3195 class AfterFramesetPhase extends Phase { |
| 3196 // http://www.whatwg.org/specs/web-apps/current-work///after3 |
| 3197 AfterFramesetPhase(parser) : super(parser); |
| 3198 |
| 3199 processStartTag(StartTagToken token) { |
| 3200 switch (token.name) { |
| 3201 case "html": return startTagHtml(token); |
| 3202 case "noframes": return startTagNoframes(token); |
| 3203 default: return startTagOther(token); |
| 3204 } |
| 3205 } |
| 3206 |
| 3207 processEndTag(EndTagToken token) { |
| 3208 switch (token.name) { |
| 3209 case "html": return endTagHtml(token); |
| 3210 default: return endTagOther(token); |
| 3211 } |
| 3212 } |
| 3213 |
| 3214 // Stop parsing |
| 3215 bool processEOF() => false; |
| 3216 |
| 3217 Token processCharacters(CharactersToken token) { |
| 3218 parser.parseError(token.span, "unexpected-char-after-frameset"); |
| 3219 return null; |
| 3220 } |
| 3221 |
| 3222 Token startTagNoframes(StartTagToken token) { |
| 3223 return parser._inHeadPhase.processStartTag(token); |
| 3224 } |
| 3225 |
| 3226 void startTagOther(StartTagToken token) { |
| 3227 parser.parseError(token.span, "unexpected-start-tag-after-frameset", |
| 3228 {"name": token.name}); |
| 3229 } |
| 3230 |
| 3231 void endTagHtml(EndTagToken token) { |
| 3232 parser.phase = parser._afterAfterFramesetPhase; |
| 3233 } |
| 3234 |
| 3235 void endTagOther(EndTagToken token) { |
| 3236 parser.parseError(token.span, "unexpected-end-tag-after-frameset", |
| 3237 {"name": token.name}); |
| 3238 } |
| 3239 } |
| 3240 |
| 3241 |
| 3242 class AfterAfterBodyPhase extends Phase { |
| 3243 AfterAfterBodyPhase(parser) : super(parser); |
| 3244 |
| 3245 processStartTag(StartTagToken token) { |
| 3246 if (token.name == 'html') return startTagHtml(token); |
| 3247 return startTagOther(token); |
| 3248 } |
| 3249 |
| 3250 bool processEOF() => false; |
| 3251 |
| 3252 Token processComment(CommentToken token) { |
| 3253 tree.insertComment(token, tree.document); |
| 3254 return null; |
| 3255 } |
| 3256 |
| 3257 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 3258 return parser._inBodyPhase.processSpaceCharacters(token); |
| 3259 } |
| 3260 |
| 3261 Token processCharacters(CharactersToken token) { |
| 3262 parser.parseError(token.span, "expected-eof-but-got-char"); |
| 3263 parser.phase = parser._inBodyPhase; |
| 3264 return token; |
| 3265 } |
| 3266 |
| 3267 Token startTagHtml(StartTagToken token) { |
| 3268 return parser._inBodyPhase.processStartTag(token); |
| 3269 } |
| 3270 |
| 3271 Token startTagOther(StartTagToken token) { |
| 3272 parser.parseError(token.span, "expected-eof-but-got-start-tag", |
| 3273 {"name": token.name}); |
| 3274 parser.phase = parser._inBodyPhase; |
| 3275 return token; |
| 3276 } |
| 3277 |
| 3278 Token processEndTag(EndTagToken token) { |
| 3279 parser.parseError(token.span, "expected-eof-but-got-end-tag", |
| 3280 {"name": token.name}); |
| 3281 parser.phase = parser._inBodyPhase; |
| 3282 return token; |
| 3283 } |
| 3284 } |
| 3285 |
| 3286 class AfterAfterFramesetPhase extends Phase { |
| 3287 AfterAfterFramesetPhase(parser) : super(parser); |
| 3288 |
| 3289 processStartTag(StartTagToken token) { |
| 3290 switch (token.name) { |
| 3291 case "html": return startTagHtml(token); |
| 3292 case "noframes": return startTagNoFrames(token); |
| 3293 default: return startTagOther(token); |
| 3294 } |
| 3295 } |
| 3296 |
| 3297 bool processEOF() => false; |
| 3298 |
| 3299 Token processComment(CommentToken token) { |
| 3300 tree.insertComment(token, tree.document); |
| 3301 return null; |
| 3302 } |
| 3303 |
| 3304 Token processSpaceCharacters(SpaceCharactersToken token) { |
| 3305 return parser._inBodyPhase.processSpaceCharacters(token); |
| 3306 } |
| 3307 |
| 3308 Token processCharacters(CharactersToken token) { |
| 3309 parser.parseError(token.span, "expected-eof-but-got-char"); |
| 3310 return null; |
| 3311 } |
| 3312 |
| 3313 Token startTagHtml(StartTagToken token) { |
| 3314 return parser._inBodyPhase.processStartTag(token); |
| 3315 } |
| 3316 |
| 3317 Token startTagNoFrames(StartTagToken token) { |
| 3318 return parser._inHeadPhase.processStartTag(token); |
| 3319 } |
| 3320 |
| 3321 void startTagOther(StartTagToken token) { |
| 3322 parser.parseError(token.span, "expected-eof-but-got-start-tag", |
| 3323 {"name": token.name}); |
| 3324 } |
| 3325 |
| 3326 Token processEndTag(EndTagToken token) { |
| 3327 parser.parseError(token.span, "expected-eof-but-got-end-tag", |
| 3328 {"name": token.name}); |
| 3329 return null; |
| 3330 } |
| 3331 } |
| 3332 |
| 3333 |
| 3334 /// Error in parsed document. |
| 3335 class ParseError implements SourceSpanException { |
| 3336 final String errorCode; |
| 3337 final SourceSpan span; |
| 3338 final Map data; |
| 3339 |
| 3340 ParseError(this.errorCode, this.span, this.data); |
| 3341 |
| 3342 int get line => span.start.line; |
| 3343 |
| 3344 int get column => span.start.column; |
| 3345 |
| 3346 /// Gets the human readable error message for this error. Use |
| 3347 /// [span.getLocationMessage] or [toString] to get a message including span |
| 3348 /// information. If there is a file associated with the span, both |
| 3349 /// [span.getLocationMessage] and [toString] are equivalent. Otherwise, |
| 3350 /// [span.getLocationMessage] will not show any source url information, but |
| 3351 /// [toString] will include 'ParserError:' as a prefix. |
| 3352 String get message => formatStr(errorMessages[errorCode], data); |
| 3353 |
| 3354 String toString({color}) { |
| 3355 var res = span.message(message, color: color); |
| 3356 return span.sourceUrl == null ? 'ParserError on $res' : 'On $res'; |
| 3357 } |
| 3358 } |
| 3359 |
| 3360 |
| 3361 /// Convenience function to get the pair of namespace and localName. |
| 3362 Pair<String, String> getElementNameTuple(Element e) { |
| 3363 var ns = e.namespaceUri; |
| 3364 if (ns == null) ns = Namespaces.html; |
| 3365 return new Pair(ns, e.localName); |
| 3366 } |
OLD | NEW |