| Index: third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/html5lib/html5lib/html5parser.py
|
| diff --git a/third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/html5lib/html5lib/html5parser.py b/third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/html5lib/html5lib/html5parser.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..5b9ce7d72068040907bdf42528c765a2a1ca7553
|
| --- /dev/null
|
| +++ b/third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/html5lib/html5lib/html5parser.py
|
| @@ -0,0 +1,2723 @@
|
| +from __future__ import absolute_import, division, unicode_literals
|
| +from six import with_metaclass
|
| +
|
| +import types
|
| +
|
| +from . import inputstream
|
| +from . import tokenizer
|
| +
|
| +from . import treebuilders
|
| +from .treebuilders._base import Marker
|
| +
|
| +from . import utils
|
| +from . import constants
|
| +from .constants import spaceCharacters, asciiUpper2Lower
|
| +from .constants import specialElements
|
| +from .constants import headingElements
|
| +from .constants import cdataElements, rcdataElements
|
| +from .constants import tokenTypes, ReparseException, namespaces
|
| +from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements
|
| +from .constants import adjustForeignAttributes as adjustForeignAttributesMap
|
| +
|
| +
|
| +def parse(doc, treebuilder="etree", encoding=None,
|
| + namespaceHTMLElements=True):
|
| + """Parse a string or file-like object into a tree"""
|
| + tb = treebuilders.getTreeBuilder(treebuilder)
|
| + p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
|
| + return p.parse(doc, encoding=encoding)
|
| +
|
| +
|
| +def parseFragment(doc, container="div", treebuilder="etree", encoding=None,
|
| + namespaceHTMLElements=True):
|
| + tb = treebuilders.getTreeBuilder(treebuilder)
|
| + p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
|
| + return p.parseFragment(doc, container=container, encoding=encoding)
|
| +
|
| +
|
| +def method_decorator_metaclass(function):
|
| + class Decorated(type):
|
| + def __new__(meta, classname, bases, classDict):
|
| + for attributeName, attribute in classDict.items():
|
| + if isinstance(attribute, types.FunctionType):
|
| + attribute = function(attribute)
|
| +
|
| + classDict[attributeName] = attribute
|
| + return type.__new__(meta, classname, bases, classDict)
|
| + return Decorated
|
| +
|
| +
|
| +class HTMLParser(object):
|
| + """HTML parser. Generates a tree structure from a stream of (possibly
|
| + malformed) HTML"""
|
| +
|
| + def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer,
|
| + strict=False, namespaceHTMLElements=True, debug=False):
|
| + """
|
| + strict - raise an exception when a parse error is encountered
|
| +
|
| + tree - a treebuilder class controlling the type of tree that will be
|
| + returned. Built in treebuilders can be accessed through
|
| + html5lib.treebuilders.getTreeBuilder(treeType)
|
| +
|
| + tokenizer - a class that provides a stream of tokens to the treebuilder.
|
| + This may be replaced for e.g. a sanitizer which converts some tags to
|
| + text
|
| + """
|
| +
|
| + # Raise an exception on the first error encountered
|
| + self.strict = strict
|
| +
|
| + if tree is None:
|
| + tree = treebuilders.getTreeBuilder("etree")
|
| + self.tree = tree(namespaceHTMLElements)
|
| + self.tokenizer_class = tokenizer
|
| + self.errors = []
|
| +
|
| + self.phases = dict([(name, cls(self, self.tree)) for name, cls in
|
| + getPhases(debug).items()])
|
| +
|
| + def _parse(self, stream, innerHTML=False, container="div",
|
| + encoding=None, parseMeta=True, useChardet=True, **kwargs):
|
| +
|
| + self.innerHTMLMode = innerHTML
|
| + self.container = container
|
| + self.tokenizer = self.tokenizer_class(stream, encoding=encoding,
|
| + parseMeta=parseMeta,
|
| + useChardet=useChardet,
|
| + parser=self, **kwargs)
|
| + self.reset()
|
| +
|
| + while True:
|
| + try:
|
| + self.mainLoop()
|
| + break
|
| + except ReparseException:
|
| + self.reset()
|
| +
|
| + def reset(self):
|
| + self.tree.reset()
|
| + self.firstStartTag = False
|
| + self.errors = []
|
| + self.log = [] # only used with debug mode
|
| + # "quirks" / "limited quirks" / "no quirks"
|
| + self.compatMode = "no quirks"
|
| +
|
| + if self.innerHTMLMode:
|
| + self.innerHTML = self.container.lower()
|
| +
|
| + if self.innerHTML in cdataElements:
|
| + self.tokenizer.state = self.tokenizer.rcdataState
|
| + elif self.innerHTML in rcdataElements:
|
| + self.tokenizer.state = self.tokenizer.rawtextState
|
| + elif self.innerHTML == 'plaintext':
|
| + self.tokenizer.state = self.tokenizer.plaintextState
|
| + else:
|
| + # state already is data state
|
| + # self.tokenizer.state = self.tokenizer.dataState
|
| + pass
|
| + self.phase = self.phases["beforeHtml"]
|
| + self.phase.insertHtmlElement()
|
| + self.resetInsertionMode()
|
| + else:
|
| + self.innerHTML = False
|
| + self.phase = self.phases["initial"]
|
| +
|
| + self.lastPhase = None
|
| +
|
| + self.beforeRCDataPhase = None
|
| +
|
| + self.framesetOK = True
|
| +
|
| + @property
|
| + def documentEncoding(self):
|
| + """The name of the character encoding
|
| + that was used to decode the input stream,
|
| + or :obj:`None` if that is not determined yet.
|
| +
|
| + """
|
| + if not hasattr(self, 'tokenizer'):
|
| + return None
|
| + return self.tokenizer.stream.charEncoding[0]
|
| +
|
| + def isHTMLIntegrationPoint(self, element):
|
| + if (element.name == "annotation-xml" and
|
| + element.namespace == namespaces["mathml"]):
|
| + return ("encoding" in element.attributes and
|
| + element.attributes["encoding"].translate(
|
| + asciiUpper2Lower) in
|
| + ("text/html", "application/xhtml+xml"))
|
| + else:
|
| + return (element.namespace, element.name) in htmlIntegrationPointElements
|
| +
|
| + def isMathMLTextIntegrationPoint(self, element):
|
| + return (element.namespace, element.name) in mathmlTextIntegrationPointElements
|
| +
|
| + def mainLoop(self):
|
| + CharactersToken = tokenTypes["Characters"]
|
| + SpaceCharactersToken = tokenTypes["SpaceCharacters"]
|
| + StartTagToken = tokenTypes["StartTag"]
|
| + EndTagToken = tokenTypes["EndTag"]
|
| + CommentToken = tokenTypes["Comment"]
|
| + DoctypeToken = tokenTypes["Doctype"]
|
| + ParseErrorToken = tokenTypes["ParseError"]
|
| +
|
| + for token in self.normalizedTokens():
|
| + new_token = token
|
| + while new_token is not None:
|
| + currentNode = self.tree.openElements[-1] if self.tree.openElements else None
|
| + currentNodeNamespace = currentNode.namespace if currentNode else None
|
| + currentNodeName = currentNode.name if currentNode else None
|
| +
|
| + type = new_token["type"]
|
| +
|
| + if type == ParseErrorToken:
|
| + self.parseError(new_token["data"], new_token.get("datavars", {}))
|
| + new_token = None
|
| + else:
|
| + if (len(self.tree.openElements) == 0 or
|
| + currentNodeNamespace == self.tree.defaultNamespace or
|
| + (self.isMathMLTextIntegrationPoint(currentNode) and
|
| + ((type == StartTagToken and
|
| + token["name"] not in frozenset(["mglyph", "malignmark"])) or
|
| + type in (CharactersToken, SpaceCharactersToken))) or
|
| + (currentNodeNamespace == namespaces["mathml"] and
|
| + currentNodeName == "annotation-xml" and
|
| + token["name"] == "svg") or
|
| + (self.isHTMLIntegrationPoint(currentNode) and
|
| + type in (StartTagToken, CharactersToken, SpaceCharactersToken))):
|
| + phase = self.phase
|
| + else:
|
| + phase = self.phases["inForeignContent"]
|
| +
|
| + if type == CharactersToken:
|
| + new_token = phase.processCharacters(new_token)
|
| + elif type == SpaceCharactersToken:
|
| + new_token = phase.processSpaceCharacters(new_token)
|
| + elif type == StartTagToken:
|
| + new_token = phase.processStartTag(new_token)
|
| + elif type == EndTagToken:
|
| + new_token = phase.processEndTag(new_token)
|
| + elif type == CommentToken:
|
| + new_token = phase.processComment(new_token)
|
| + elif type == DoctypeToken:
|
| + new_token = phase.processDoctype(new_token)
|
| +
|
| + if (type == StartTagToken and token["selfClosing"]
|
| + and not token["selfClosingAcknowledged"]):
|
| + self.parseError("non-void-element-with-trailing-solidus",
|
| + {"name": token["name"]})
|
| +
|
| + # When the loop finishes it's EOF
|
| + reprocess = True
|
| + phases = []
|
| + while reprocess:
|
| + phases.append(self.phase)
|
| + reprocess = self.phase.processEOF()
|
| + if reprocess:
|
| + assert self.phase not in phases
|
| +
|
| + def normalizedTokens(self):
|
| + for token in self.tokenizer:
|
| + yield self.normalizeToken(token)
|
| +
|
| + def parse(self, stream, encoding=None, parseMeta=True, useChardet=True):
|
| + """Parse a HTML document into a well-formed tree
|
| +
|
| + stream - a filelike object or string containing the HTML to be parsed
|
| +
|
| + The optional encoding parameter must be a string that indicates
|
| + the encoding. If specified, that encoding will be used,
|
| + regardless of any BOM or later declaration (such as in a meta
|
| + element)
|
| + """
|
| + self._parse(stream, innerHTML=False, encoding=encoding,
|
| + parseMeta=parseMeta, useChardet=useChardet)
|
| + return self.tree.getDocument()
|
| +
|
| + def parseFragment(self, stream, container="div", encoding=None,
|
| + parseMeta=False, useChardet=True):
|
| + """Parse a HTML fragment into a well-formed tree fragment
|
| +
|
| + container - name of the element we're setting the innerHTML property
|
| + if set to None, default to 'div'
|
| +
|
| + stream - a filelike object or string containing the HTML to be parsed
|
| +
|
| + The optional encoding parameter must be a string that indicates
|
| + the encoding. If specified, that encoding will be used,
|
| + regardless of any BOM or later declaration (such as in a meta
|
| + element)
|
| + """
|
| + self._parse(stream, True, container=container, encoding=encoding)
|
| + return self.tree.getFragment()
|
| +
|
| + def parseError(self, errorcode="XXX-undefined-error", datavars={}):
|
| + # XXX The idea is to make errorcode mandatory.
|
| + self.errors.append((self.tokenizer.stream.position(), errorcode, datavars))
|
| + if self.strict:
|
| + raise ParseError
|
| +
|
| + def normalizeToken(self, token):
|
| + """ HTML5 specific normalizations to the token stream """
|
| +
|
| + if token["type"] == tokenTypes["StartTag"]:
|
| + token["data"] = dict(token["data"][::-1])
|
| +
|
| + return token
|
| +
|
| + def adjustMathMLAttributes(self, token):
|
| + replacements = {"definitionurl": "definitionURL"}
|
| + for k, v in replacements.items():
|
| + if k in token["data"]:
|
| + token["data"][v] = token["data"][k]
|
| + del token["data"][k]
|
| +
|
| + def adjustSVGAttributes(self, token):
|
| + replacements = {
|
| + "attributename": "attributeName",
|
| + "attributetype": "attributeType",
|
| + "basefrequency": "baseFrequency",
|
| + "baseprofile": "baseProfile",
|
| + "calcmode": "calcMode",
|
| + "clippathunits": "clipPathUnits",
|
| + "contentscripttype": "contentScriptType",
|
| + "contentstyletype": "contentStyleType",
|
| + "diffuseconstant": "diffuseConstant",
|
| + "edgemode": "edgeMode",
|
| + "externalresourcesrequired": "externalResourcesRequired",
|
| + "filterres": "filterRes",
|
| + "filterunits": "filterUnits",
|
| + "glyphref": "glyphRef",
|
| + "gradienttransform": "gradientTransform",
|
| + "gradientunits": "gradientUnits",
|
| + "kernelmatrix": "kernelMatrix",
|
| + "kernelunitlength": "kernelUnitLength",
|
| + "keypoints": "keyPoints",
|
| + "keysplines": "keySplines",
|
| + "keytimes": "keyTimes",
|
| + "lengthadjust": "lengthAdjust",
|
| + "limitingconeangle": "limitingConeAngle",
|
| + "markerheight": "markerHeight",
|
| + "markerunits": "markerUnits",
|
| + "markerwidth": "markerWidth",
|
| + "maskcontentunits": "maskContentUnits",
|
| + "maskunits": "maskUnits",
|
| + "numoctaves": "numOctaves",
|
| + "pathlength": "pathLength",
|
| + "patterncontentunits": "patternContentUnits",
|
| + "patterntransform": "patternTransform",
|
| + "patternunits": "patternUnits",
|
| + "pointsatx": "pointsAtX",
|
| + "pointsaty": "pointsAtY",
|
| + "pointsatz": "pointsAtZ",
|
| + "preservealpha": "preserveAlpha",
|
| + "preserveaspectratio": "preserveAspectRatio",
|
| + "primitiveunits": "primitiveUnits",
|
| + "refx": "refX",
|
| + "refy": "refY",
|
| + "repeatcount": "repeatCount",
|
| + "repeatdur": "repeatDur",
|
| + "requiredextensions": "requiredExtensions",
|
| + "requiredfeatures": "requiredFeatures",
|
| + "specularconstant": "specularConstant",
|
| + "specularexponent": "specularExponent",
|
| + "spreadmethod": "spreadMethod",
|
| + "startoffset": "startOffset",
|
| + "stddeviation": "stdDeviation",
|
| + "stitchtiles": "stitchTiles",
|
| + "surfacescale": "surfaceScale",
|
| + "systemlanguage": "systemLanguage",
|
| + "tablevalues": "tableValues",
|
| + "targetx": "targetX",
|
| + "targety": "targetY",
|
| + "textlength": "textLength",
|
| + "viewbox": "viewBox",
|
| + "viewtarget": "viewTarget",
|
| + "xchannelselector": "xChannelSelector",
|
| + "ychannelselector": "yChannelSelector",
|
| + "zoomandpan": "zoomAndPan"
|
| + }
|
| + for originalName in list(token["data"].keys()):
|
| + if originalName in replacements:
|
| + svgName = replacements[originalName]
|
| + token["data"][svgName] = token["data"][originalName]
|
| + del token["data"][originalName]
|
| +
|
| + def adjustForeignAttributes(self, token):
|
| + replacements = adjustForeignAttributesMap
|
| +
|
| + for originalName in token["data"].keys():
|
| + if originalName in replacements:
|
| + foreignName = replacements[originalName]
|
| + token["data"][foreignName] = token["data"][originalName]
|
| + del token["data"][originalName]
|
| +
|
| + def reparseTokenNormal(self, token):
|
| + self.parser.phase()
|
| +
|
| + def resetInsertionMode(self):
|
| + # The name of this method is mostly historical. (It's also used in the
|
| + # specification.)
|
| + last = False
|
| + newModes = {
|
| + "select": "inSelect",
|
| + "td": "inCell",
|
| + "th": "inCell",
|
| + "tr": "inRow",
|
| + "tbody": "inTableBody",
|
| + "thead": "inTableBody",
|
| + "tfoot": "inTableBody",
|
| + "caption": "inCaption",
|
| + "colgroup": "inColumnGroup",
|
| + "table": "inTable",
|
| + "head": "inBody",
|
| + "body": "inBody",
|
| + "frameset": "inFrameset",
|
| + "html": "beforeHead"
|
| + }
|
| + for node in self.tree.openElements[::-1]:
|
| + nodeName = node.name
|
| + new_phase = None
|
| + if node == self.tree.openElements[0]:
|
| + assert self.innerHTML
|
| + last = True
|
| + nodeName = self.innerHTML
|
| + # Check for conditions that should only happen in the innerHTML
|
| + # case
|
| + if nodeName in ("select", "colgroup", "head", "html"):
|
| + assert self.innerHTML
|
| +
|
| + if not last and node.namespace != self.tree.defaultNamespace:
|
| + continue
|
| +
|
| + if nodeName in newModes:
|
| + new_phase = self.phases[newModes[nodeName]]
|
| + break
|
| + elif last:
|
| + new_phase = self.phases["inBody"]
|
| + break
|
| +
|
| + self.phase = new_phase
|
| +
|
| + def parseRCDataRawtext(self, token, contentType):
|
| + """Generic RCDATA/RAWTEXT Parsing algorithm
|
| + contentType - RCDATA or RAWTEXT
|
| + """
|
| + assert contentType in ("RAWTEXT", "RCDATA")
|
| +
|
| + self.tree.insertElement(token)
|
| +
|
| + if contentType == "RAWTEXT":
|
| + self.tokenizer.state = self.tokenizer.rawtextState
|
| + else:
|
| + self.tokenizer.state = self.tokenizer.rcdataState
|
| +
|
| + self.originalPhase = self.phase
|
| +
|
| + self.phase = self.phases["text"]
|
| +
|
| +
|
| +def getPhases(debug):
|
| + def log(function):
|
| + """Logger that records which phase processes each token"""
|
| + type_names = dict((value, key) for key, value in
|
| + constants.tokenTypes.items())
|
| +
|
| + def wrapped(self, *args, **kwargs):
|
| + if function.__name__.startswith("process") and len(args) > 0:
|
| + token = args[0]
|
| + try:
|
| + info = {"type": type_names[token['type']]}
|
| + except:
|
| + raise
|
| + if token['type'] in constants.tagTokenTypes:
|
| + info["name"] = token['name']
|
| +
|
| + self.parser.log.append((self.parser.tokenizer.state.__name__,
|
| + self.parser.phase.__class__.__name__,
|
| + self.__class__.__name__,
|
| + function.__name__,
|
| + info))
|
| + return function(self, *args, **kwargs)
|
| + else:
|
| + return function(self, *args, **kwargs)
|
| + return wrapped
|
| +
|
| + def getMetaclass(use_metaclass, metaclass_func):
|
| + if use_metaclass:
|
| + return method_decorator_metaclass(metaclass_func)
|
| + else:
|
| + return type
|
| +
|
| + class Phase(with_metaclass(getMetaclass(debug, log))):
|
| + """Base class for helper object that implements each phase of processing
|
| + """
|
| +
|
| + def __init__(self, parser, tree):
|
| + self.parser = parser
|
| + self.tree = tree
|
| +
|
| + def processEOF(self):
|
| + raise NotImplementedError
|
| +
|
| + def processComment(self, token):
|
| + # For most phases the following is correct. Where it's not it will be
|
| + # overridden.
|
| + self.tree.insertComment(token, self.tree.openElements[-1])
|
| +
|
| + def processDoctype(self, token):
|
| + self.parser.parseError("unexpected-doctype")
|
| +
|
| + def processCharacters(self, token):
|
| + self.tree.insertText(token["data"])
|
| +
|
| + def processSpaceCharacters(self, token):
|
| + self.tree.insertText(token["data"])
|
| +
|
| + def processStartTag(self, token):
|
| + return self.startTagHandler[token["name"]](token)
|
| +
|
| + def startTagHtml(self, token):
|
| + if not self.parser.firstStartTag and token["name"] == "html":
|
| + self.parser.parseError("non-html-root")
|
| + # XXX Need a check here to see if the first start tag token emitted is
|
| + # this token... If it's not, invoke self.parser.parseError().
|
| + for attr, value in token["data"].items():
|
| + if attr not in self.tree.openElements[0].attributes:
|
| + self.tree.openElements[0].attributes[attr] = value
|
| + self.parser.firstStartTag = False
|
| +
|
| + def processEndTag(self, token):
|
| + return self.endTagHandler[token["name"]](token)
|
| +
|
| + class InitialPhase(Phase):
|
| + def processSpaceCharacters(self, token):
|
| + pass
|
| +
|
| + def processComment(self, token):
|
| + self.tree.insertComment(token, self.tree.document)
|
| +
|
| + def processDoctype(self, token):
|
| + name = token["name"]
|
| + publicId = token["publicId"]
|
| + systemId = token["systemId"]
|
| + correct = token["correct"]
|
| +
|
| + if (name != "html" or publicId is not None or
|
| + systemId is not None and systemId != "about:legacy-compat"):
|
| + self.parser.parseError("unknown-doctype")
|
| +
|
| + if publicId is None:
|
| + publicId = ""
|
| +
|
| + self.tree.insertDoctype(token)
|
| +
|
| + if publicId != "":
|
| + publicId = publicId.translate(asciiUpper2Lower)
|
| +
|
| + if (not correct or token["name"] != "html"
|
| + or publicId.startswith(
|
| + ("+//silmaril//dtd html pro v0r11 19970101//",
|
| + "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
|
| + "-//as//dtd html 3.0 aswedit + extensions//",
|
| + "-//ietf//dtd html 2.0 level 1//",
|
| + "-//ietf//dtd html 2.0 level 2//",
|
| + "-//ietf//dtd html 2.0 strict level 1//",
|
| + "-//ietf//dtd html 2.0 strict level 2//",
|
| + "-//ietf//dtd html 2.0 strict//",
|
| + "-//ietf//dtd html 2.0//",
|
| + "-//ietf//dtd html 2.1e//",
|
| + "-//ietf//dtd html 3.0//",
|
| + "-//ietf//dtd html 3.2 final//",
|
| + "-//ietf//dtd html 3.2//",
|
| + "-//ietf//dtd html 3//",
|
| + "-//ietf//dtd html level 0//",
|
| + "-//ietf//dtd html level 1//",
|
| + "-//ietf//dtd html level 2//",
|
| + "-//ietf//dtd html level 3//",
|
| + "-//ietf//dtd html strict level 0//",
|
| + "-//ietf//dtd html strict level 1//",
|
| + "-//ietf//dtd html strict level 2//",
|
| + "-//ietf//dtd html strict level 3//",
|
| + "-//ietf//dtd html strict//",
|
| + "-//ietf//dtd html//",
|
| + "-//metrius//dtd metrius presentational//",
|
| + "-//microsoft//dtd internet explorer 2.0 html strict//",
|
| + "-//microsoft//dtd internet explorer 2.0 html//",
|
| + "-//microsoft//dtd internet explorer 2.0 tables//",
|
| + "-//microsoft//dtd internet explorer 3.0 html strict//",
|
| + "-//microsoft//dtd internet explorer 3.0 html//",
|
| + "-//microsoft//dtd internet explorer 3.0 tables//",
|
| + "-//netscape comm. corp.//dtd html//",
|
| + "-//netscape comm. corp.//dtd strict html//",
|
| + "-//o'reilly and associates//dtd html 2.0//",
|
| + "-//o'reilly and associates//dtd html extended 1.0//",
|
| + "-//o'reilly and associates//dtd html extended relaxed 1.0//",
|
| + "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
|
| + "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
|
| + "-//spyglass//dtd html 2.0 extended//",
|
| + "-//sq//dtd html 2.0 hotmetal + extensions//",
|
| + "-//sun microsystems corp.//dtd hotjava html//",
|
| + "-//sun microsystems corp.//dtd hotjava strict html//",
|
| + "-//w3c//dtd html 3 1995-03-24//",
|
| + "-//w3c//dtd html 3.2 draft//",
|
| + "-//w3c//dtd html 3.2 final//",
|
| + "-//w3c//dtd html 3.2//",
|
| + "-//w3c//dtd html 3.2s draft//",
|
| + "-//w3c//dtd html 4.0 frameset//",
|
| + "-//w3c//dtd html 4.0 transitional//",
|
| + "-//w3c//dtd html experimental 19960712//",
|
| + "-//w3c//dtd html experimental 970421//",
|
| + "-//w3c//dtd w3 html//",
|
| + "-//w3o//dtd w3 html 3.0//",
|
| + "-//webtechs//dtd mozilla html 2.0//",
|
| + "-//webtechs//dtd mozilla html//"))
|
| + or publicId in
|
| + ("-//w3o//dtd w3 html strict 3.0//en//",
|
| + "-/w3c/dtd html 4.0 transitional/en",
|
| + "html")
|
| + or publicId.startswith(
|
| + ("-//w3c//dtd html 4.01 frameset//",
|
| + "-//w3c//dtd html 4.01 transitional//")) and
|
| + systemId is None
|
| + or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
|
| + self.parser.compatMode = "quirks"
|
| + elif (publicId.startswith(
|
| + ("-//w3c//dtd xhtml 1.0 frameset//",
|
| + "-//w3c//dtd xhtml 1.0 transitional//"))
|
| + or publicId.startswith(
|
| + ("-//w3c//dtd html 4.01 frameset//",
|
| + "-//w3c//dtd html 4.01 transitional//")) and
|
| + systemId is not None):
|
| + self.parser.compatMode = "limited quirks"
|
| +
|
| + self.parser.phase = self.parser.phases["beforeHtml"]
|
| +
|
| + def anythingElse(self):
|
| + self.parser.compatMode = "quirks"
|
| + self.parser.phase = self.parser.phases["beforeHtml"]
|
| +
|
| + def processCharacters(self, token):
|
| + self.parser.parseError("expected-doctype-but-got-chars")
|
| + self.anythingElse()
|
| + return token
|
| +
|
| + def processStartTag(self, token):
|
| + self.parser.parseError("expected-doctype-but-got-start-tag",
|
| + {"name": token["name"]})
|
| + self.anythingElse()
|
| + return token
|
| +
|
| + def processEndTag(self, token):
|
| + self.parser.parseError("expected-doctype-but-got-end-tag",
|
| + {"name": token["name"]})
|
| + self.anythingElse()
|
| + return token
|
| +
|
| + def processEOF(self):
|
| + self.parser.parseError("expected-doctype-but-got-eof")
|
| + self.anythingElse()
|
| + return True
|
| +
|
| + class BeforeHtmlPhase(Phase):
|
| + # helper methods
|
| + def insertHtmlElement(self):
|
| + self.tree.insertRoot(impliedTagToken("html", "StartTag"))
|
| + self.parser.phase = self.parser.phases["beforeHead"]
|
| +
|
| + # other
|
| + def processEOF(self):
|
| + self.insertHtmlElement()
|
| + return True
|
| +
|
| + def processComment(self, token):
|
| + self.tree.insertComment(token, self.tree.document)
|
| +
|
| + def processSpaceCharacters(self, token):
|
| + pass
|
| +
|
| + def processCharacters(self, token):
|
| + self.insertHtmlElement()
|
| + return token
|
| +
|
| + def processStartTag(self, token):
|
| + if token["name"] == "html":
|
| + self.parser.firstStartTag = True
|
| + self.insertHtmlElement()
|
| + return token
|
| +
|
| + def processEndTag(self, token):
|
| + if token["name"] not in ("head", "body", "html", "br"):
|
| + self.parser.parseError("unexpected-end-tag-before-html",
|
| + {"name": token["name"]})
|
| + else:
|
| + self.insertHtmlElement()
|
| + return token
|
| +
|
| + class BeforeHeadPhase(Phase):
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| +
|
| + self.startTagHandler = utils.MethodDispatcher([
|
| + ("html", self.startTagHtml),
|
| + ("head", self.startTagHead)
|
| + ])
|
| + self.startTagHandler.default = self.startTagOther
|
| +
|
| + self.endTagHandler = utils.MethodDispatcher([
|
| + (("head", "body", "html", "br"), self.endTagImplyHead)
|
| + ])
|
| + self.endTagHandler.default = self.endTagOther
|
| +
|
| + def processEOF(self):
|
| + self.startTagHead(impliedTagToken("head", "StartTag"))
|
| + return True
|
| +
|
| + def processSpaceCharacters(self, token):
|
| + pass
|
| +
|
| + def processCharacters(self, token):
|
| + self.startTagHead(impliedTagToken("head", "StartTag"))
|
| + return token
|
| +
|
| + def startTagHtml(self, token):
|
| + return self.parser.phases["inBody"].processStartTag(token)
|
| +
|
| + def startTagHead(self, token):
|
| + self.tree.insertElement(token)
|
| + self.tree.headPointer = self.tree.openElements[-1]
|
| + self.parser.phase = self.parser.phases["inHead"]
|
| +
|
| + def startTagOther(self, token):
|
| + self.startTagHead(impliedTagToken("head", "StartTag"))
|
| + return token
|
| +
|
| + def endTagImplyHead(self, token):
|
| + self.startTagHead(impliedTagToken("head", "StartTag"))
|
| + return token
|
| +
|
| + def endTagOther(self, token):
|
| + self.parser.parseError("end-tag-after-implied-root",
|
| + {"name": token["name"]})
|
| +
|
| + class InHeadPhase(Phase):
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| +
|
| + self.startTagHandler = utils.MethodDispatcher([
|
| + ("html", self.startTagHtml),
|
| + ("title", self.startTagTitle),
|
| + (("noscript", "noframes", "style"), self.startTagNoScriptNoFramesStyle),
|
| + ("script", self.startTagScript),
|
| + (("base", "basefont", "bgsound", "command", "link"),
|
| + self.startTagBaseLinkCommand),
|
| + ("meta", self.startTagMeta),
|
| + ("head", self.startTagHead)
|
| + ])
|
| + self.startTagHandler.default = self.startTagOther
|
| +
|
| + self. endTagHandler = utils.MethodDispatcher([
|
| + ("head", self.endTagHead),
|
| + (("br", "html", "body"), self.endTagHtmlBodyBr)
|
| + ])
|
| + self.endTagHandler.default = self.endTagOther
|
| +
|
| + # the real thing
|
| + def processEOF(self):
|
| + self.anythingElse()
|
| + return True
|
| +
|
| + def processCharacters(self, token):
|
| + self.anythingElse()
|
| + return token
|
| +
|
| + def startTagHtml(self, token):
|
| + return self.parser.phases["inBody"].processStartTag(token)
|
| +
|
| + def startTagHead(self, token):
|
| + self.parser.parseError("two-heads-are-not-better-than-one")
|
| +
|
| + def startTagBaseLinkCommand(self, token):
|
| + self.tree.insertElement(token)
|
| + self.tree.openElements.pop()
|
| + token["selfClosingAcknowledged"] = True
|
| +
|
| + def startTagMeta(self, token):
|
| + self.tree.insertElement(token)
|
| + self.tree.openElements.pop()
|
| + token["selfClosingAcknowledged"] = True
|
| +
|
| + attributes = token["data"]
|
| + if self.parser.tokenizer.stream.charEncoding[1] == "tentative":
|
| + if "charset" in attributes:
|
| + self.parser.tokenizer.stream.changeEncoding(attributes["charset"])
|
| + elif ("content" in attributes and
|
| + "http-equiv" in attributes and
|
| + attributes["http-equiv"].lower() == "content-type"):
|
| + # Encoding it as UTF-8 here is a hack, as really we should pass
|
| + # the abstract Unicode string, and just use the
|
| + # ContentAttrParser on that, but using UTF-8 allows all chars
|
| + # to be encoded and as a ASCII-superset works.
|
| + data = inputstream.EncodingBytes(attributes["content"].encode("utf-8"))
|
| + parser = inputstream.ContentAttrParser(data)
|
| + codec = parser.parse()
|
| + self.parser.tokenizer.stream.changeEncoding(codec)
|
| +
|
| + def startTagTitle(self, token):
|
| + self.parser.parseRCDataRawtext(token, "RCDATA")
|
| +
|
| + def startTagNoScriptNoFramesStyle(self, token):
|
| + # Need to decide whether to implement the scripting-disabled case
|
| + self.parser.parseRCDataRawtext(token, "RAWTEXT")
|
| +
|
| + def startTagScript(self, token):
|
| + self.tree.insertElement(token)
|
| + self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState
|
| + self.parser.originalPhase = self.parser.phase
|
| + self.parser.phase = self.parser.phases["text"]
|
| +
|
| + def startTagOther(self, token):
|
| + self.anythingElse()
|
| + return token
|
| +
|
| + def endTagHead(self, token):
|
| + node = self.parser.tree.openElements.pop()
|
| + assert node.name == "head", "Expected head got %s" % node.name
|
| + self.parser.phase = self.parser.phases["afterHead"]
|
| +
|
| + def endTagHtmlBodyBr(self, token):
|
| + self.anythingElse()
|
| + return token
|
| +
|
| + def endTagOther(self, token):
|
| + self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
|
| +
|
| + def anythingElse(self):
|
| + self.endTagHead(impliedTagToken("head"))
|
| +
|
| + # XXX If we implement a parser for which scripting is disabled we need to
|
| + # implement this phase.
|
| + #
|
| + # class InHeadNoScriptPhase(Phase):
|
| + class AfterHeadPhase(Phase):
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| +
|
| + self.startTagHandler = utils.MethodDispatcher([
|
| + ("html", self.startTagHtml),
|
| + ("body", self.startTagBody),
|
| + ("frameset", self.startTagFrameset),
|
| + (("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
|
| + "style", "title"),
|
| + self.startTagFromHead),
|
| + ("head", self.startTagHead)
|
| + ])
|
| + self.startTagHandler.default = self.startTagOther
|
| + self.endTagHandler = utils.MethodDispatcher([(("body", "html", "br"),
|
| + self.endTagHtmlBodyBr)])
|
| + self.endTagHandler.default = self.endTagOther
|
| +
|
| + def processEOF(self):
|
| + self.anythingElse()
|
| + return True
|
| +
|
| + def processCharacters(self, token):
|
| + self.anythingElse()
|
| + return token
|
| +
|
| + def startTagHtml(self, token):
|
| + return self.parser.phases["inBody"].processStartTag(token)
|
| +
|
| + def startTagBody(self, token):
|
| + self.parser.framesetOK = False
|
| + self.tree.insertElement(token)
|
| + self.parser.phase = self.parser.phases["inBody"]
|
| +
|
| + def startTagFrameset(self, token):
|
| + self.tree.insertElement(token)
|
| + self.parser.phase = self.parser.phases["inFrameset"]
|
| +
|
| + def startTagFromHead(self, token):
|
| + self.parser.parseError("unexpected-start-tag-out-of-my-head",
|
| + {"name": token["name"]})
|
| + self.tree.openElements.append(self.tree.headPointer)
|
| + self.parser.phases["inHead"].processStartTag(token)
|
| + for node in self.tree.openElements[::-1]:
|
| + if node.name == "head":
|
| + self.tree.openElements.remove(node)
|
| + break
|
| +
|
| + def startTagHead(self, token):
|
| + self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
|
| +
|
| + def startTagOther(self, token):
|
| + self.anythingElse()
|
| + return token
|
| +
|
| + def endTagHtmlBodyBr(self, token):
|
| + self.anythingElse()
|
| + return token
|
| +
|
| + def endTagOther(self, token):
|
| + self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
|
| +
|
| + def anythingElse(self):
|
| + self.tree.insertElement(impliedTagToken("body", "StartTag"))
|
| + self.parser.phase = self.parser.phases["inBody"]
|
| + self.parser.framesetOK = True
|
| +
|
| + class InBodyPhase(Phase):
|
| + # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
|
| + # the really-really-really-very crazy mode
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| +
|
| + # Keep a ref to this for special handling of whitespace in <pre>
|
| + self.processSpaceCharactersNonPre = self.processSpaceCharacters
|
| +
|
| + self.startTagHandler = utils.MethodDispatcher([
|
| + ("html", self.startTagHtml),
|
| + (("base", "basefont", "bgsound", "command", "link", "meta",
|
| + "noframes", "script", "style", "title"),
|
| + self.startTagProcessInHead),
|
| + ("body", self.startTagBody),
|
| + ("frameset", self.startTagFrameset),
|
| + (("address", "article", "aside", "blockquote", "center", "details",
|
| + "details", "dir", "div", "dl", "fieldset", "figcaption", "figure",
|
| + "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
|
| + "section", "summary", "ul"),
|
| + self.startTagCloseP),
|
| + (headingElements, self.startTagHeading),
|
| + (("pre", "listing"), self.startTagPreListing),
|
| + ("form", self.startTagForm),
|
| + (("li", "dd", "dt"), self.startTagListItem),
|
| + ("plaintext", self.startTagPlaintext),
|
| + ("a", self.startTagA),
|
| + (("b", "big", "code", "em", "font", "i", "s", "small", "strike",
|
| + "strong", "tt", "u"), self.startTagFormatting),
|
| + ("nobr", self.startTagNobr),
|
| + ("button", self.startTagButton),
|
| + (("applet", "marquee", "object"), self.startTagAppletMarqueeObject),
|
| + ("xmp", self.startTagXmp),
|
| + ("table", self.startTagTable),
|
| + (("area", "br", "embed", "img", "keygen", "wbr"),
|
| + self.startTagVoidFormatting),
|
| + (("param", "source", "track"), self.startTagParamSource),
|
| + ("input", self.startTagInput),
|
| + ("hr", self.startTagHr),
|
| + ("image", self.startTagImage),
|
| + ("isindex", self.startTagIsIndex),
|
| + ("textarea", self.startTagTextarea),
|
| + ("iframe", self.startTagIFrame),
|
| + (("noembed", "noframes", "noscript"), self.startTagRawtext),
|
| + ("select", self.startTagSelect),
|
| + (("rp", "rt"), self.startTagRpRt),
|
| + (("option", "optgroup"), self.startTagOpt),
|
| + (("math"), self.startTagMath),
|
| + (("svg"), self.startTagSvg),
|
| + (("caption", "col", "colgroup", "frame", "head",
|
| + "tbody", "td", "tfoot", "th", "thead",
|
| + "tr"), self.startTagMisplaced)
|
| + ])
|
| + self.startTagHandler.default = self.startTagOther
|
| +
|
| + self.endTagHandler = utils.MethodDispatcher([
|
| + ("body", self.endTagBody),
|
| + ("html", self.endTagHtml),
|
| + (("address", "article", "aside", "blockquote", "button", "center",
|
| + "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
|
| + "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
|
| + "section", "summary", "ul"), self.endTagBlock),
|
| + ("form", self.endTagForm),
|
| + ("p", self.endTagP),
|
| + (("dd", "dt", "li"), self.endTagListItem),
|
| + (headingElements, self.endTagHeading),
|
| + (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
|
| + "strike", "strong", "tt", "u"), self.endTagFormatting),
|
| + (("applet", "marquee", "object"), self.endTagAppletMarqueeObject),
|
| + ("br", self.endTagBr),
|
| + ])
|
| + self.endTagHandler.default = self.endTagOther
|
| +
|
| + def isMatchingFormattingElement(self, node1, node2):
|
| + if node1.name != node2.name or node1.namespace != node2.namespace:
|
| + return False
|
| + elif len(node1.attributes) != len(node2.attributes):
|
| + return False
|
| + else:
|
| + attributes1 = sorted(node1.attributes.items())
|
| + attributes2 = sorted(node2.attributes.items())
|
| + for attr1, attr2 in zip(attributes1, attributes2):
|
| + if attr1 != attr2:
|
| + return False
|
| + return True
|
| +
|
| + # helper
|
| + def addFormattingElement(self, token):
|
| + self.tree.insertElement(token)
|
| + element = self.tree.openElements[-1]
|
| +
|
| + matchingElements = []
|
| + for node in self.tree.activeFormattingElements[::-1]:
|
| + if node is Marker:
|
| + break
|
| + elif self.isMatchingFormattingElement(node, element):
|
| + matchingElements.append(node)
|
| +
|
| + assert len(matchingElements) <= 3
|
| + if len(matchingElements) == 3:
|
| + self.tree.activeFormattingElements.remove(matchingElements[-1])
|
| + self.tree.activeFormattingElements.append(element)
|
| +
|
| + # the real deal
|
| + def processEOF(self):
|
| + allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td",
|
| + "tfoot", "th", "thead", "tr", "body",
|
| + "html"))
|
| + for node in self.tree.openElements[::-1]:
|
| + if node.name not in allowed_elements:
|
| + self.parser.parseError("expected-closing-tag-but-got-eof")
|
| + break
|
| + # Stop parsing
|
| +
|
| + def processSpaceCharactersDropNewline(self, token):
|
| + # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we
|
| + # want to drop leading newlines
|
| + data = token["data"]
|
| + self.processSpaceCharacters = self.processSpaceCharactersNonPre
|
| + if (data.startswith("\n") and
|
| + self.tree.openElements[-1].name in ("pre", "listing", "textarea")
|
| + and not self.tree.openElements[-1].hasContent()):
|
| + data = data[1:]
|
| + if data:
|
| + self.tree.reconstructActiveFormattingElements()
|
| + self.tree.insertText(data)
|
| +
|
| + def processCharacters(self, token):
|
| + if token["data"] == "\u0000":
|
| + # The tokenizer should always emit null on its own
|
| + return
|
| + self.tree.reconstructActiveFormattingElements()
|
| + self.tree.insertText(token["data"])
|
| + # This must be bad for performance
|
| + if (self.parser.framesetOK and
|
| + any([char not in spaceCharacters
|
| + for char in token["data"]])):
|
| + self.parser.framesetOK = False
|
| +
|
| + def processSpaceCharacters(self, token):
|
| + self.tree.reconstructActiveFormattingElements()
|
| + self.tree.insertText(token["data"])
|
| +
|
| + def startTagProcessInHead(self, token):
|
| + return self.parser.phases["inHead"].processStartTag(token)
|
| +
|
| + def startTagBody(self, token):
|
| + self.parser.parseError("unexpected-start-tag", {"name": "body"})
|
| + if (len(self.tree.openElements) == 1
|
| + or self.tree.openElements[1].name != "body"):
|
| + assert self.parser.innerHTML
|
| + else:
|
| + self.parser.framesetOK = False
|
| + for attr, value in token["data"].items():
|
| + if attr not in self.tree.openElements[1].attributes:
|
| + self.tree.openElements[1].attributes[attr] = value
|
| +
|
| + def startTagFrameset(self, token):
|
| + self.parser.parseError("unexpected-start-tag", {"name": "frameset"})
|
| + if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"):
|
| + assert self.parser.innerHTML
|
| + elif not self.parser.framesetOK:
|
| + pass
|
| + else:
|
| + if self.tree.openElements[1].parent:
|
| + self.tree.openElements[1].parent.removeChild(self.tree.openElements[1])
|
| + while self.tree.openElements[-1].name != "html":
|
| + self.tree.openElements.pop()
|
| + self.tree.insertElement(token)
|
| + self.parser.phase = self.parser.phases["inFrameset"]
|
| +
|
| + def startTagCloseP(self, token):
|
| + if self.tree.elementInScope("p", variant="button"):
|
| + self.endTagP(impliedTagToken("p"))
|
| + self.tree.insertElement(token)
|
| +
|
| + def startTagPreListing(self, token):
|
| + if self.tree.elementInScope("p", variant="button"):
|
| + self.endTagP(impliedTagToken("p"))
|
| + self.tree.insertElement(token)
|
| + self.parser.framesetOK = False
|
| + self.processSpaceCharacters = self.processSpaceCharactersDropNewline
|
| +
|
| + def startTagForm(self, token):
|
| + if self.tree.formPointer:
|
| + self.parser.parseError("unexpected-start-tag", {"name": "form"})
|
| + else:
|
| + if self.tree.elementInScope("p", variant="button"):
|
| + self.endTagP(impliedTagToken("p"))
|
| + self.tree.insertElement(token)
|
| + self.tree.formPointer = self.tree.openElements[-1]
|
| +
|
| + def startTagListItem(self, token):
|
| + self.parser.framesetOK = False
|
| +
|
| + stopNamesMap = {"li": ["li"],
|
| + "dt": ["dt", "dd"],
|
| + "dd": ["dt", "dd"]}
|
| + stopNames = stopNamesMap[token["name"]]
|
| + for node in reversed(self.tree.openElements):
|
| + if node.name in stopNames:
|
| + self.parser.phase.processEndTag(
|
| + impliedTagToken(node.name, "EndTag"))
|
| + break
|
| + if (node.nameTuple in specialElements and
|
| + node.name not in ("address", "div", "p")):
|
| + break
|
| +
|
| + if self.tree.elementInScope("p", variant="button"):
|
| + self.parser.phase.processEndTag(
|
| + impliedTagToken("p", "EndTag"))
|
| +
|
| + self.tree.insertElement(token)
|
| +
|
| + def startTagPlaintext(self, token):
|
| + if self.tree.elementInScope("p", variant="button"):
|
| + self.endTagP(impliedTagToken("p"))
|
| + self.tree.insertElement(token)
|
| + self.parser.tokenizer.state = self.parser.tokenizer.plaintextState
|
| +
|
| + def startTagHeading(self, token):
|
| + if self.tree.elementInScope("p", variant="button"):
|
| + self.endTagP(impliedTagToken("p"))
|
| + if self.tree.openElements[-1].name in headingElements:
|
| + self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
|
| + self.tree.openElements.pop()
|
| + self.tree.insertElement(token)
|
| +
|
| + def startTagA(self, token):
|
| + afeAElement = self.tree.elementInActiveFormattingElements("a")
|
| + if afeAElement:
|
| + self.parser.parseError("unexpected-start-tag-implies-end-tag",
|
| + {"startName": "a", "endName": "a"})
|
| + self.endTagFormatting(impliedTagToken("a"))
|
| + if afeAElement in self.tree.openElements:
|
| + self.tree.openElements.remove(afeAElement)
|
| + if afeAElement in self.tree.activeFormattingElements:
|
| + self.tree.activeFormattingElements.remove(afeAElement)
|
| + self.tree.reconstructActiveFormattingElements()
|
| + self.addFormattingElement(token)
|
| +
|
| + def startTagFormatting(self, token):
|
| + self.tree.reconstructActiveFormattingElements()
|
| + self.addFormattingElement(token)
|
| +
|
| + def startTagNobr(self, token):
|
| + self.tree.reconstructActiveFormattingElements()
|
| + if self.tree.elementInScope("nobr"):
|
| + self.parser.parseError("unexpected-start-tag-implies-end-tag",
|
| + {"startName": "nobr", "endName": "nobr"})
|
| + self.processEndTag(impliedTagToken("nobr"))
|
| + # XXX Need tests that trigger the following
|
| + self.tree.reconstructActiveFormattingElements()
|
| + self.addFormattingElement(token)
|
| +
|
| + def startTagButton(self, token):
|
| + if self.tree.elementInScope("button"):
|
| + self.parser.parseError("unexpected-start-tag-implies-end-tag",
|
| + {"startName": "button", "endName": "button"})
|
| + self.processEndTag(impliedTagToken("button"))
|
| + return token
|
| + else:
|
| + self.tree.reconstructActiveFormattingElements()
|
| + self.tree.insertElement(token)
|
| + self.parser.framesetOK = False
|
| +
|
| + def startTagAppletMarqueeObject(self, token):
|
| + self.tree.reconstructActiveFormattingElements()
|
| + self.tree.insertElement(token)
|
| + self.tree.activeFormattingElements.append(Marker)
|
| + self.parser.framesetOK = False
|
| +
|
| + def startTagXmp(self, token):
|
| + if self.tree.elementInScope("p", variant="button"):
|
| + self.endTagP(impliedTagToken("p"))
|
| + self.tree.reconstructActiveFormattingElements()
|
| + self.parser.framesetOK = False
|
| + self.parser.parseRCDataRawtext(token, "RAWTEXT")
|
| +
|
| + def startTagTable(self, token):
|
| + if self.parser.compatMode != "quirks":
|
| + if self.tree.elementInScope("p", variant="button"):
|
| + self.processEndTag(impliedTagToken("p"))
|
| + self.tree.insertElement(token)
|
| + self.parser.framesetOK = False
|
| + self.parser.phase = self.parser.phases["inTable"]
|
| +
|
| + def startTagVoidFormatting(self, token):
|
| + self.tree.reconstructActiveFormattingElements()
|
| + self.tree.insertElement(token)
|
| + self.tree.openElements.pop()
|
| + token["selfClosingAcknowledged"] = True
|
| + self.parser.framesetOK = False
|
| +
|
| + def startTagInput(self, token):
|
| + framesetOK = self.parser.framesetOK
|
| + self.startTagVoidFormatting(token)
|
| + if ("type" in token["data"] and
|
| + token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
|
| + # input type=hidden doesn't change framesetOK
|
| + self.parser.framesetOK = framesetOK
|
| +
|
| + def startTagParamSource(self, token):
|
| + self.tree.insertElement(token)
|
| + self.tree.openElements.pop()
|
| + token["selfClosingAcknowledged"] = True
|
| +
|
| + def startTagHr(self, token):
|
| + if self.tree.elementInScope("p", variant="button"):
|
| + self.endTagP(impliedTagToken("p"))
|
| + self.tree.insertElement(token)
|
| + self.tree.openElements.pop()
|
| + token["selfClosingAcknowledged"] = True
|
| + self.parser.framesetOK = False
|
| +
|
| + def startTagImage(self, token):
|
| + # No really...
|
| + self.parser.parseError("unexpected-start-tag-treated-as",
|
| + {"originalName": "image", "newName": "img"})
|
| + self.processStartTag(impliedTagToken("img", "StartTag",
|
| + attributes=token["data"],
|
| + selfClosing=token["selfClosing"]))
|
| +
|
| + def startTagIsIndex(self, token):
|
| + self.parser.parseError("deprecated-tag", {"name": "isindex"})
|
| + if self.tree.formPointer:
|
| + return
|
| + form_attrs = {}
|
| + if "action" in token["data"]:
|
| + form_attrs["action"] = token["data"]["action"]
|
| + self.processStartTag(impliedTagToken("form", "StartTag",
|
| + attributes=form_attrs))
|
| + self.processStartTag(impliedTagToken("hr", "StartTag"))
|
| + self.processStartTag(impliedTagToken("label", "StartTag"))
|
| + # XXX Localization ...
|
| + if "prompt" in token["data"]:
|
| + prompt = token["data"]["prompt"]
|
| + else:
|
| + prompt = "This is a searchable index. Enter search keywords: "
|
| + self.processCharacters(
|
| + {"type": tokenTypes["Characters"], "data": prompt})
|
| + attributes = token["data"].copy()
|
| + if "action" in attributes:
|
| + del attributes["action"]
|
| + if "prompt" in attributes:
|
| + del attributes["prompt"]
|
| + attributes["name"] = "isindex"
|
| + self.processStartTag(impliedTagToken("input", "StartTag",
|
| + attributes=attributes,
|
| + selfClosing=token["selfClosing"]))
|
| + self.processEndTag(impliedTagToken("label"))
|
| + self.processStartTag(impliedTagToken("hr", "StartTag"))
|
| + self.processEndTag(impliedTagToken("form"))
|
| +
|
| + def startTagTextarea(self, token):
|
| + self.tree.insertElement(token)
|
| + self.parser.tokenizer.state = self.parser.tokenizer.rcdataState
|
| + self.processSpaceCharacters = self.processSpaceCharactersDropNewline
|
| + self.parser.framesetOK = False
|
| +
|
| + def startTagIFrame(self, token):
|
| + self.parser.framesetOK = False
|
| + self.startTagRawtext(token)
|
| +
|
| + def startTagRawtext(self, token):
|
| + """iframe, noembed noframes, noscript(if scripting enabled)"""
|
| + self.parser.parseRCDataRawtext(token, "RAWTEXT")
|
| +
|
| + def startTagOpt(self, token):
|
| + if self.tree.openElements[-1].name == "option":
|
| + self.parser.phase.processEndTag(impliedTagToken("option"))
|
| + self.tree.reconstructActiveFormattingElements()
|
| + self.parser.tree.insertElement(token)
|
| +
|
| + def startTagSelect(self, token):
|
| + self.tree.reconstructActiveFormattingElements()
|
| + self.tree.insertElement(token)
|
| + self.parser.framesetOK = False
|
| + if self.parser.phase in (self.parser.phases["inTable"],
|
| + self.parser.phases["inCaption"],
|
| + self.parser.phases["inColumnGroup"],
|
| + self.parser.phases["inTableBody"],
|
| + self.parser.phases["inRow"],
|
| + self.parser.phases["inCell"]):
|
| + self.parser.phase = self.parser.phases["inSelectInTable"]
|
| + else:
|
| + self.parser.phase = self.parser.phases["inSelect"]
|
| +
|
| + def startTagRpRt(self, token):
|
| + if self.tree.elementInScope("ruby"):
|
| + self.tree.generateImpliedEndTags()
|
| + if self.tree.openElements[-1].name != "ruby":
|
| + self.parser.parseError()
|
| + self.tree.insertElement(token)
|
| +
|
| + def startTagMath(self, token):
|
| + self.tree.reconstructActiveFormattingElements()
|
| + self.parser.adjustMathMLAttributes(token)
|
| + self.parser.adjustForeignAttributes(token)
|
| + token["namespace"] = namespaces["mathml"]
|
| + self.tree.insertElement(token)
|
| + # Need to get the parse error right for the case where the token
|
| + # has a namespace not equal to the xmlns attribute
|
| + if token["selfClosing"]:
|
| + self.tree.openElements.pop()
|
| + token["selfClosingAcknowledged"] = True
|
| +
|
| + def startTagSvg(self, token):
|
| + self.tree.reconstructActiveFormattingElements()
|
| + self.parser.adjustSVGAttributes(token)
|
| + self.parser.adjustForeignAttributes(token)
|
| + token["namespace"] = namespaces["svg"]
|
| + self.tree.insertElement(token)
|
| + # Need to get the parse error right for the case where the token
|
| + # has a namespace not equal to the xmlns attribute
|
| + if token["selfClosing"]:
|
| + self.tree.openElements.pop()
|
| + token["selfClosingAcknowledged"] = True
|
| +
|
| + def startTagMisplaced(self, token):
|
| + """ Elements that should be children of other elements that have a
|
| + different insertion mode; here they are ignored
|
| + "caption", "col", "colgroup", "frame", "frameset", "head",
|
| + "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
|
| + "tr", "noscript"
|
| + """
|
| + self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]})
|
| +
|
| + def startTagOther(self, token):
|
| + self.tree.reconstructActiveFormattingElements()
|
| + self.tree.insertElement(token)
|
| +
|
| + def endTagP(self, token):
|
| + if not self.tree.elementInScope("p", variant="button"):
|
| + self.startTagCloseP(impliedTagToken("p", "StartTag"))
|
| + self.parser.parseError("unexpected-end-tag", {"name": "p"})
|
| + self.endTagP(impliedTagToken("p", "EndTag"))
|
| + else:
|
| + self.tree.generateImpliedEndTags("p")
|
| + if self.tree.openElements[-1].name != "p":
|
| + self.parser.parseError("unexpected-end-tag", {"name": "p"})
|
| + node = self.tree.openElements.pop()
|
| + while node.name != "p":
|
| + node = self.tree.openElements.pop()
|
| +
|
| + def endTagBody(self, token):
|
| + if not self.tree.elementInScope("body"):
|
| + self.parser.parseError()
|
| + return
|
| + elif self.tree.openElements[-1].name != "body":
|
| + for node in self.tree.openElements[2:]:
|
| + if node.name not in frozenset(("dd", "dt", "li", "optgroup",
|
| + "option", "p", "rp", "rt",
|
| + "tbody", "td", "tfoot",
|
| + "th", "thead", "tr", "body",
|
| + "html")):
|
| + # Not sure this is the correct name for the parse error
|
| + self.parser.parseError(
|
| + "expected-one-end-tag-but-got-another",
|
| + {"expectedName": "body", "gotName": node.name})
|
| + break
|
| + self.parser.phase = self.parser.phases["afterBody"]
|
| +
|
| + def endTagHtml(self, token):
|
| + # We repeat the test for the body end tag token being ignored here
|
| + if self.tree.elementInScope("body"):
|
| + self.endTagBody(impliedTagToken("body"))
|
| + return token
|
| +
|
| + def endTagBlock(self, token):
|
| + # Put us back in the right whitespace handling mode
|
| + if token["name"] == "pre":
|
| + self.processSpaceCharacters = self.processSpaceCharactersNonPre
|
| + inScope = self.tree.elementInScope(token["name"])
|
| + if inScope:
|
| + self.tree.generateImpliedEndTags()
|
| + if self.tree.openElements[-1].name != token["name"]:
|
| + self.parser.parseError("end-tag-too-early", {"name": token["name"]})
|
| + if inScope:
|
| + node = self.tree.openElements.pop()
|
| + while node.name != token["name"]:
|
| + node = self.tree.openElements.pop()
|
| +
|
| + def endTagForm(self, token):
|
| + node = self.tree.formPointer
|
| + self.tree.formPointer = None
|
| + if node is None or not self.tree.elementInScope(node):
|
| + self.parser.parseError("unexpected-end-tag",
|
| + {"name": "form"})
|
| + else:
|
| + self.tree.generateImpliedEndTags()
|
| + if self.tree.openElements[-1] != node:
|
| + self.parser.parseError("end-tag-too-early-ignored",
|
| + {"name": "form"})
|
| + self.tree.openElements.remove(node)
|
| +
|
| + def endTagListItem(self, token):
|
| + if token["name"] == "li":
|
| + variant = "list"
|
| + else:
|
| + variant = None
|
| + if not self.tree.elementInScope(token["name"], variant=variant):
|
| + self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
|
| + else:
|
| + self.tree.generateImpliedEndTags(exclude=token["name"])
|
| + if self.tree.openElements[-1].name != token["name"]:
|
| + self.parser.parseError(
|
| + "end-tag-too-early",
|
| + {"name": token["name"]})
|
| + node = self.tree.openElements.pop()
|
| + while node.name != token["name"]:
|
| + node = self.tree.openElements.pop()
|
| +
|
| + def endTagHeading(self, token):
|
| + for item in headingElements:
|
| + if self.tree.elementInScope(item):
|
| + self.tree.generateImpliedEndTags()
|
| + break
|
| + if self.tree.openElements[-1].name != token["name"]:
|
| + self.parser.parseError("end-tag-too-early", {"name": token["name"]})
|
| +
|
| + for item in headingElements:
|
| + if self.tree.elementInScope(item):
|
| + item = self.tree.openElements.pop()
|
| + while item.name not in headingElements:
|
| + item = self.tree.openElements.pop()
|
| + break
|
| +
|
| + def endTagFormatting(self, token):
|
| + """The much-feared adoption agency algorithm"""
|
| + # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867
|
| + # XXX Better parseError messages appreciated.
|
| +
|
| + # Step 1
|
| + outerLoopCounter = 0
|
| +
|
| + # Step 2
|
| + while outerLoopCounter < 8:
|
| +
|
| + # Step 3
|
| + outerLoopCounter += 1
|
| +
|
| + # Step 4:
|
| +
|
| + # Let the formatting element be the last element in
|
| + # the list of active formatting elements that:
|
| + # - is between the end of the list and the last scope
|
| + # marker in the list, if any, or the start of the list
|
| + # otherwise, and
|
| + # - has the same tag name as the token.
|
| + formattingElement = self.tree.elementInActiveFormattingElements(
|
| + token["name"])
|
| + if (not formattingElement or
|
| + (formattingElement in self.tree.openElements and
|
| + not self.tree.elementInScope(formattingElement.name))):
|
| + # If there is no such node, then abort these steps
|
| + # and instead act as described in the "any other
|
| + # end tag" entry below.
|
| + self.endTagOther(token)
|
| + return
|
| +
|
| + # Otherwise, if there is such a node, but that node is
|
| + # not in the stack of open elements, then this is a
|
| + # parse error; remove the element from the list, and
|
| + # abort these steps.
|
| + elif formattingElement not in self.tree.openElements:
|
| + self.parser.parseError("adoption-agency-1.2", {"name": token["name"]})
|
| + self.tree.activeFormattingElements.remove(formattingElement)
|
| + return
|
| +
|
| + # Otherwise, if there is such a node, and that node is
|
| + # also in the stack of open elements, but the element
|
| + # is not in scope, then this is a parse error; ignore
|
| + # the token, and abort these steps.
|
| + elif not self.tree.elementInScope(formattingElement.name):
|
| + self.parser.parseError("adoption-agency-4.4", {"name": token["name"]})
|
| + return
|
| +
|
| + # Otherwise, there is a formatting element and that
|
| + # element is in the stack and is in scope. If the
|
| + # element is not the current node, this is a parse
|
| + # error. In any case, proceed with the algorithm as
|
| + # written in the following steps.
|
| + else:
|
| + if formattingElement != self.tree.openElements[-1]:
|
| + self.parser.parseError("adoption-agency-1.3", {"name": token["name"]})
|
| +
|
| + # Step 5:
|
| +
|
| + # Let the furthest block be the topmost node in the
|
| + # stack of open elements that is lower in the stack
|
| + # than the formatting element, and is an element in
|
| + # the special category. There might not be one.
|
| + afeIndex = self.tree.openElements.index(formattingElement)
|
| + furthestBlock = None
|
| + for element in self.tree.openElements[afeIndex:]:
|
| + if element.nameTuple in specialElements:
|
| + furthestBlock = element
|
| + break
|
| +
|
| + # Step 6:
|
| +
|
| + # If there is no furthest block, then the UA must
|
| + # first pop all the nodes from the bottom of the stack
|
| + # of open elements, from the current node up to and
|
| + # including the formatting element, then remove the
|
| + # formatting element from the list of active
|
| + # formatting elements, and finally abort these steps.
|
| + if furthestBlock is None:
|
| + element = self.tree.openElements.pop()
|
| + while element != formattingElement:
|
| + element = self.tree.openElements.pop()
|
| + self.tree.activeFormattingElements.remove(element)
|
| + return
|
| +
|
| + # Step 7
|
| + commonAncestor = self.tree.openElements[afeIndex - 1]
|
| +
|
| + # Step 8:
|
| + # The bookmark is supposed to help us identify where to reinsert
|
| + # nodes in step 15. We have to ensure that we reinsert nodes after
|
| + # the node before the active formatting element. Note the bookmark
|
| + # can move in step 9.7
|
| + bookmark = self.tree.activeFormattingElements.index(formattingElement)
|
| +
|
| + # Step 9
|
| + lastNode = node = furthestBlock
|
| + innerLoopCounter = 0
|
| +
|
| + index = self.tree.openElements.index(node)
|
| + while innerLoopCounter < 3:
|
| + innerLoopCounter += 1
|
| + # Node is element before node in open elements
|
| + index -= 1
|
| + node = self.tree.openElements[index]
|
| + if node not in self.tree.activeFormattingElements:
|
| + self.tree.openElements.remove(node)
|
| + continue
|
| + # Step 9.6
|
| + if node == formattingElement:
|
| + break
|
| + # Step 9.7
|
| + if lastNode == furthestBlock:
|
| + bookmark = self.tree.activeFormattingElements.index(node) + 1
|
| + # Step 9.8
|
| + clone = node.cloneNode()
|
| + # Replace node with clone
|
| + self.tree.activeFormattingElements[
|
| + self.tree.activeFormattingElements.index(node)] = clone
|
| + self.tree.openElements[
|
| + self.tree.openElements.index(node)] = clone
|
| + node = clone
|
| + # Step 9.9
|
| + # Remove lastNode from its parents, if any
|
| + if lastNode.parent:
|
| + lastNode.parent.removeChild(lastNode)
|
| + node.appendChild(lastNode)
|
| + # Step 9.10
|
| + lastNode = node
|
| +
|
| + # Step 10
|
| + # Foster parent lastNode if commonAncestor is a
|
| + # table, tbody, tfoot, thead, or tr we need to foster
|
| + # parent the lastNode
|
| + if lastNode.parent:
|
| + lastNode.parent.removeChild(lastNode)
|
| +
|
| + if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")):
|
| + parent, insertBefore = self.tree.getTableMisnestedNodePosition()
|
| + parent.insertBefore(lastNode, insertBefore)
|
| + else:
|
| + commonAncestor.appendChild(lastNode)
|
| +
|
| + # Step 11
|
| + clone = formattingElement.cloneNode()
|
| +
|
| + # Step 12
|
| + furthestBlock.reparentChildren(clone)
|
| +
|
| + # Step 13
|
| + furthestBlock.appendChild(clone)
|
| +
|
| + # Step 14
|
| + self.tree.activeFormattingElements.remove(formattingElement)
|
| + self.tree.activeFormattingElements.insert(bookmark, clone)
|
| +
|
| + # Step 15
|
| + self.tree.openElements.remove(formattingElement)
|
| + self.tree.openElements.insert(
|
| + self.tree.openElements.index(furthestBlock) + 1, clone)
|
| +
|
| + def endTagAppletMarqueeObject(self, token):
|
| + if self.tree.elementInScope(token["name"]):
|
| + self.tree.generateImpliedEndTags()
|
| + if self.tree.openElements[-1].name != token["name"]:
|
| + self.parser.parseError("end-tag-too-early", {"name": token["name"]})
|
| +
|
| + if self.tree.elementInScope(token["name"]):
|
| + element = self.tree.openElements.pop()
|
| + while element.name != token["name"]:
|
| + element = self.tree.openElements.pop()
|
| + self.tree.clearActiveFormattingElements()
|
| +
|
| + def endTagBr(self, token):
|
| + self.parser.parseError("unexpected-end-tag-treated-as",
|
| + {"originalName": "br", "newName": "br element"})
|
| + self.tree.reconstructActiveFormattingElements()
|
| + self.tree.insertElement(impliedTagToken("br", "StartTag"))
|
| + self.tree.openElements.pop()
|
| +
|
| + def endTagOther(self, token):
|
| + for node in self.tree.openElements[::-1]:
|
| + if node.name == token["name"]:
|
| + self.tree.generateImpliedEndTags(exclude=token["name"])
|
| + if self.tree.openElements[-1].name != token["name"]:
|
| + self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
|
| + while self.tree.openElements.pop() != node:
|
| + pass
|
| + break
|
| + else:
|
| + if node.nameTuple in specialElements:
|
| + self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
|
| + break
|
| +
|
| + class TextPhase(Phase):
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| + self.startTagHandler = utils.MethodDispatcher([])
|
| + self.startTagHandler.default = self.startTagOther
|
| + self.endTagHandler = utils.MethodDispatcher([
|
| + ("script", self.endTagScript)])
|
| + self.endTagHandler.default = self.endTagOther
|
| +
|
| + def processCharacters(self, token):
|
| + self.tree.insertText(token["data"])
|
| +
|
| + def processEOF(self):
|
| + self.parser.parseError("expected-named-closing-tag-but-got-eof",
|
| + {"name": self.tree.openElements[-1].name})
|
| + self.tree.openElements.pop()
|
| + self.parser.phase = self.parser.originalPhase
|
| + return True
|
| +
|
| + def startTagOther(self, token):
|
| + assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name']
|
| +
|
| + def endTagScript(self, token):
|
| + node = self.tree.openElements.pop()
|
| + assert node.name == "script"
|
| + self.parser.phase = self.parser.originalPhase
|
| + # The rest of this method is all stuff that only happens if
|
| + # document.write works
|
| +
|
| + def endTagOther(self, token):
|
| + self.tree.openElements.pop()
|
| + self.parser.phase = self.parser.originalPhase
|
| +
|
| + class InTablePhase(Phase):
|
| + # http://www.whatwg.org/specs/web-apps/current-work/#in-table
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| + self.startTagHandler = utils.MethodDispatcher([
|
| + ("html", self.startTagHtml),
|
| + ("caption", self.startTagCaption),
|
| + ("colgroup", self.startTagColgroup),
|
| + ("col", self.startTagCol),
|
| + (("tbody", "tfoot", "thead"), self.startTagRowGroup),
|
| + (("td", "th", "tr"), self.startTagImplyTbody),
|
| + ("table", self.startTagTable),
|
| + (("style", "script"), self.startTagStyleScript),
|
| + ("input", self.startTagInput),
|
| + ("form", self.startTagForm)
|
| + ])
|
| + self.startTagHandler.default = self.startTagOther
|
| +
|
| + self.endTagHandler = utils.MethodDispatcher([
|
| + ("table", self.endTagTable),
|
| + (("body", "caption", "col", "colgroup", "html", "tbody", "td",
|
| + "tfoot", "th", "thead", "tr"), self.endTagIgnore)
|
| + ])
|
| + self.endTagHandler.default = self.endTagOther
|
| +
|
| + # helper methods
|
| + def clearStackToTableContext(self):
|
| + # "clear the stack back to a table context"
|
| + while self.tree.openElements[-1].name not in ("table", "html"):
|
| + # self.parser.parseError("unexpected-implied-end-tag-in-table",
|
| + # {"name": self.tree.openElements[-1].name})
|
| + self.tree.openElements.pop()
|
| + # When the current node is <html> it's an innerHTML case
|
| +
|
| + # processing methods
|
| + def processEOF(self):
|
| + if self.tree.openElements[-1].name != "html":
|
| + self.parser.parseError("eof-in-table")
|
| + else:
|
| + assert self.parser.innerHTML
|
| + # Stop parsing
|
| +
|
| + def processSpaceCharacters(self, token):
|
| + originalPhase = self.parser.phase
|
| + self.parser.phase = self.parser.phases["inTableText"]
|
| + self.parser.phase.originalPhase = originalPhase
|
| + self.parser.phase.processSpaceCharacters(token)
|
| +
|
| + def processCharacters(self, token):
|
| + originalPhase = self.parser.phase
|
| + self.parser.phase = self.parser.phases["inTableText"]
|
| + self.parser.phase.originalPhase = originalPhase
|
| + self.parser.phase.processCharacters(token)
|
| +
|
| + def insertText(self, token):
|
| + # If we get here there must be at least one non-whitespace character
|
| + # Do the table magic!
|
| + self.tree.insertFromTable = True
|
| + self.parser.phases["inBody"].processCharacters(token)
|
| + self.tree.insertFromTable = False
|
| +
|
| + def startTagCaption(self, token):
|
| + self.clearStackToTableContext()
|
| + self.tree.activeFormattingElements.append(Marker)
|
| + self.tree.insertElement(token)
|
| + self.parser.phase = self.parser.phases["inCaption"]
|
| +
|
| + def startTagColgroup(self, token):
|
| + self.clearStackToTableContext()
|
| + self.tree.insertElement(token)
|
| + self.parser.phase = self.parser.phases["inColumnGroup"]
|
| +
|
| + def startTagCol(self, token):
|
| + self.startTagColgroup(impliedTagToken("colgroup", "StartTag"))
|
| + return token
|
| +
|
| + def startTagRowGroup(self, token):
|
| + self.clearStackToTableContext()
|
| + self.tree.insertElement(token)
|
| + self.parser.phase = self.parser.phases["inTableBody"]
|
| +
|
| + def startTagImplyTbody(self, token):
|
| + self.startTagRowGroup(impliedTagToken("tbody", "StartTag"))
|
| + return token
|
| +
|
| + def startTagTable(self, token):
|
| + self.parser.parseError("unexpected-start-tag-implies-end-tag",
|
| + {"startName": "table", "endName": "table"})
|
| + self.parser.phase.processEndTag(impliedTagToken("table"))
|
| + if not self.parser.innerHTML:
|
| + return token
|
| +
|
| + def startTagStyleScript(self, token):
|
| + return self.parser.phases["inHead"].processStartTag(token)
|
| +
|
| + def startTagInput(self, token):
|
| + if ("type" in token["data"] and
|
| + token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
|
| + self.parser.parseError("unexpected-hidden-input-in-table")
|
| + self.tree.insertElement(token)
|
| + # XXX associate with form
|
| + self.tree.openElements.pop()
|
| + else:
|
| + self.startTagOther(token)
|
| +
|
| + def startTagForm(self, token):
|
| + self.parser.parseError("unexpected-form-in-table")
|
| + if self.tree.formPointer is None:
|
| + self.tree.insertElement(token)
|
| + self.tree.formPointer = self.tree.openElements[-1]
|
| + self.tree.openElements.pop()
|
| +
|
| + def startTagOther(self, token):
|
| + self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]})
|
| + # Do the table magic!
|
| + self.tree.insertFromTable = True
|
| + self.parser.phases["inBody"].processStartTag(token)
|
| + self.tree.insertFromTable = False
|
| +
|
| + def endTagTable(self, token):
|
| + if self.tree.elementInScope("table", variant="table"):
|
| + self.tree.generateImpliedEndTags()
|
| + if self.tree.openElements[-1].name != "table":
|
| + self.parser.parseError("end-tag-too-early-named",
|
| + {"gotName": "table",
|
| + "expectedName": self.tree.openElements[-1].name})
|
| + while self.tree.openElements[-1].name != "table":
|
| + self.tree.openElements.pop()
|
| + self.tree.openElements.pop()
|
| + self.parser.resetInsertionMode()
|
| + else:
|
| + # innerHTML case
|
| + assert self.parser.innerHTML
|
| + self.parser.parseError()
|
| +
|
| + def endTagIgnore(self, token):
|
| + self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
|
| +
|
| + def endTagOther(self, token):
|
| + self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]})
|
| + # Do the table magic!
|
| + self.tree.insertFromTable = True
|
| + self.parser.phases["inBody"].processEndTag(token)
|
| + self.tree.insertFromTable = False
|
| +
|
| + class InTableTextPhase(Phase):
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| + self.originalPhase = None
|
| + self.characterTokens = []
|
| +
|
| + def flushCharacters(self):
|
| + data = "".join([item["data"] for item in self.characterTokens])
|
| + if any([item not in spaceCharacters for item in data]):
|
| + token = {"type": tokenTypes["Characters"], "data": data}
|
| + self.parser.phases["inTable"].insertText(token)
|
| + elif data:
|
| + self.tree.insertText(data)
|
| + self.characterTokens = []
|
| +
|
| + def processComment(self, token):
|
| + self.flushCharacters()
|
| + self.parser.phase = self.originalPhase
|
| + return token
|
| +
|
| + def processEOF(self):
|
| + self.flushCharacters()
|
| + self.parser.phase = self.originalPhase
|
| + return True
|
| +
|
| + def processCharacters(self, token):
|
| + if token["data"] == "\u0000":
|
| + return
|
| + self.characterTokens.append(token)
|
| +
|
| + def processSpaceCharacters(self, token):
|
| + # pretty sure we should never reach here
|
| + self.characterTokens.append(token)
|
| + # assert False
|
| +
|
| + def processStartTag(self, token):
|
| + self.flushCharacters()
|
| + self.parser.phase = self.originalPhase
|
| + return token
|
| +
|
| + def processEndTag(self, token):
|
| + self.flushCharacters()
|
| + self.parser.phase = self.originalPhase
|
| + return token
|
| +
|
| + class InCaptionPhase(Phase):
|
| + # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| +
|
| + self.startTagHandler = utils.MethodDispatcher([
|
| + ("html", self.startTagHtml),
|
| + (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
|
| + "thead", "tr"), self.startTagTableElement)
|
| + ])
|
| + self.startTagHandler.default = self.startTagOther
|
| +
|
| + self.endTagHandler = utils.MethodDispatcher([
|
| + ("caption", self.endTagCaption),
|
| + ("table", self.endTagTable),
|
| + (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
|
| + "thead", "tr"), self.endTagIgnore)
|
| + ])
|
| + self.endTagHandler.default = self.endTagOther
|
| +
|
| + def ignoreEndTagCaption(self):
|
| + return not self.tree.elementInScope("caption", variant="table")
|
| +
|
| + def processEOF(self):
|
| + self.parser.phases["inBody"].processEOF()
|
| +
|
| + def processCharacters(self, token):
|
| + return self.parser.phases["inBody"].processCharacters(token)
|
| +
|
| + def startTagTableElement(self, token):
|
| + self.parser.parseError()
|
| + # XXX Have to duplicate logic here to find out if the tag is ignored
|
| + ignoreEndTag = self.ignoreEndTagCaption()
|
| + self.parser.phase.processEndTag(impliedTagToken("caption"))
|
| + if not ignoreEndTag:
|
| + return token
|
| +
|
| + def startTagOther(self, token):
|
| + return self.parser.phases["inBody"].processStartTag(token)
|
| +
|
| + def endTagCaption(self, token):
|
| + if not self.ignoreEndTagCaption():
|
| + # AT this code is quite similar to endTagTable in "InTable"
|
| + self.tree.generateImpliedEndTags()
|
| + if self.tree.openElements[-1].name != "caption":
|
| + self.parser.parseError("expected-one-end-tag-but-got-another",
|
| + {"gotName": "caption",
|
| + "expectedName": self.tree.openElements[-1].name})
|
| + while self.tree.openElements[-1].name != "caption":
|
| + self.tree.openElements.pop()
|
| + self.tree.openElements.pop()
|
| + self.tree.clearActiveFormattingElements()
|
| + self.parser.phase = self.parser.phases["inTable"]
|
| + else:
|
| + # innerHTML case
|
| + assert self.parser.innerHTML
|
| + self.parser.parseError()
|
| +
|
| + def endTagTable(self, token):
|
| + self.parser.parseError()
|
| + ignoreEndTag = self.ignoreEndTagCaption()
|
| + self.parser.phase.processEndTag(impliedTagToken("caption"))
|
| + if not ignoreEndTag:
|
| + return token
|
| +
|
| + def endTagIgnore(self, token):
|
| + self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
|
| +
|
| + def endTagOther(self, token):
|
| + return self.parser.phases["inBody"].processEndTag(token)
|
| +
|
| + class InColumnGroupPhase(Phase):
|
| + # http://www.whatwg.org/specs/web-apps/current-work/#in-column
|
| +
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| +
|
| + self.startTagHandler = utils.MethodDispatcher([
|
| + ("html", self.startTagHtml),
|
| + ("col", self.startTagCol)
|
| + ])
|
| + self.startTagHandler.default = self.startTagOther
|
| +
|
| + self.endTagHandler = utils.MethodDispatcher([
|
| + ("colgroup", self.endTagColgroup),
|
| + ("col", self.endTagCol)
|
| + ])
|
| + self.endTagHandler.default = self.endTagOther
|
| +
|
| + def ignoreEndTagColgroup(self):
|
| + return self.tree.openElements[-1].name == "html"
|
| +
|
| + def processEOF(self):
|
| + if self.tree.openElements[-1].name == "html":
|
| + assert self.parser.innerHTML
|
| + return
|
| + else:
|
| + ignoreEndTag = self.ignoreEndTagColgroup()
|
| + self.endTagColgroup(impliedTagToken("colgroup"))
|
| + if not ignoreEndTag:
|
| + return True
|
| +
|
| + def processCharacters(self, token):
|
| + ignoreEndTag = self.ignoreEndTagColgroup()
|
| + self.endTagColgroup(impliedTagToken("colgroup"))
|
| + if not ignoreEndTag:
|
| + return token
|
| +
|
| + def startTagCol(self, token):
|
| + self.tree.insertElement(token)
|
| + self.tree.openElements.pop()
|
| +
|
| + def startTagOther(self, token):
|
| + ignoreEndTag = self.ignoreEndTagColgroup()
|
| + self.endTagColgroup(impliedTagToken("colgroup"))
|
| + if not ignoreEndTag:
|
| + return token
|
| +
|
| + def endTagColgroup(self, token):
|
| + if self.ignoreEndTagColgroup():
|
| + # innerHTML case
|
| + assert self.parser.innerHTML
|
| + self.parser.parseError()
|
| + else:
|
| + self.tree.openElements.pop()
|
| + self.parser.phase = self.parser.phases["inTable"]
|
| +
|
| + def endTagCol(self, token):
|
| + self.parser.parseError("no-end-tag", {"name": "col"})
|
| +
|
| + def endTagOther(self, token):
|
| + ignoreEndTag = self.ignoreEndTagColgroup()
|
| + self.endTagColgroup(impliedTagToken("colgroup"))
|
| + if not ignoreEndTag:
|
| + return token
|
| +
|
| + class InTableBodyPhase(Phase):
|
| + # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| + self.startTagHandler = utils.MethodDispatcher([
|
| + ("html", self.startTagHtml),
|
| + ("tr", self.startTagTr),
|
| + (("td", "th"), self.startTagTableCell),
|
| + (("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
|
| + self.startTagTableOther)
|
| + ])
|
| + self.startTagHandler.default = self.startTagOther
|
| +
|
| + self.endTagHandler = utils.MethodDispatcher([
|
| + (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
|
| + ("table", self.endTagTable),
|
| + (("body", "caption", "col", "colgroup", "html", "td", "th",
|
| + "tr"), self.endTagIgnore)
|
| + ])
|
| + self.endTagHandler.default = self.endTagOther
|
| +
|
| + # helper methods
|
| + def clearStackToTableBodyContext(self):
|
| + while self.tree.openElements[-1].name not in ("tbody", "tfoot",
|
| + "thead", "html"):
|
| + # self.parser.parseError("unexpected-implied-end-tag-in-table",
|
| + # {"name": self.tree.openElements[-1].name})
|
| + self.tree.openElements.pop()
|
| + if self.tree.openElements[-1].name == "html":
|
| + assert self.parser.innerHTML
|
| +
|
| + # the rest
|
| + def processEOF(self):
|
| + self.parser.phases["inTable"].processEOF()
|
| +
|
| + def processSpaceCharacters(self, token):
|
| + return self.parser.phases["inTable"].processSpaceCharacters(token)
|
| +
|
| + def processCharacters(self, token):
|
| + return self.parser.phases["inTable"].processCharacters(token)
|
| +
|
| + def startTagTr(self, token):
|
| + self.clearStackToTableBodyContext()
|
| + self.tree.insertElement(token)
|
| + self.parser.phase = self.parser.phases["inRow"]
|
| +
|
| + def startTagTableCell(self, token):
|
| + self.parser.parseError("unexpected-cell-in-table-body",
|
| + {"name": token["name"]})
|
| + self.startTagTr(impliedTagToken("tr", "StartTag"))
|
| + return token
|
| +
|
| + def startTagTableOther(self, token):
|
| + # XXX AT Any ideas on how to share this with endTagTable?
|
| + if (self.tree.elementInScope("tbody", variant="table") or
|
| + self.tree.elementInScope("thead", variant="table") or
|
| + self.tree.elementInScope("tfoot", variant="table")):
|
| + self.clearStackToTableBodyContext()
|
| + self.endTagTableRowGroup(
|
| + impliedTagToken(self.tree.openElements[-1].name))
|
| + return token
|
| + else:
|
| + # innerHTML case
|
| + assert self.parser.innerHTML
|
| + self.parser.parseError()
|
| +
|
| + def startTagOther(self, token):
|
| + return self.parser.phases["inTable"].processStartTag(token)
|
| +
|
| + def endTagTableRowGroup(self, token):
|
| + if self.tree.elementInScope(token["name"], variant="table"):
|
| + self.clearStackToTableBodyContext()
|
| + self.tree.openElements.pop()
|
| + self.parser.phase = self.parser.phases["inTable"]
|
| + else:
|
| + self.parser.parseError("unexpected-end-tag-in-table-body",
|
| + {"name": token["name"]})
|
| +
|
| + def endTagTable(self, token):
|
| + if (self.tree.elementInScope("tbody", variant="table") or
|
| + self.tree.elementInScope("thead", variant="table") or
|
| + self.tree.elementInScope("tfoot", variant="table")):
|
| + self.clearStackToTableBodyContext()
|
| + self.endTagTableRowGroup(
|
| + impliedTagToken(self.tree.openElements[-1].name))
|
| + return token
|
| + else:
|
| + # innerHTML case
|
| + assert self.parser.innerHTML
|
| + self.parser.parseError()
|
| +
|
| + def endTagIgnore(self, token):
|
| + self.parser.parseError("unexpected-end-tag-in-table-body",
|
| + {"name": token["name"]})
|
| +
|
| + def endTagOther(self, token):
|
| + return self.parser.phases["inTable"].processEndTag(token)
|
| +
|
| + class InRowPhase(Phase):
|
| + # http://www.whatwg.org/specs/web-apps/current-work/#in-row
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| + self.startTagHandler = utils.MethodDispatcher([
|
| + ("html", self.startTagHtml),
|
| + (("td", "th"), self.startTagTableCell),
|
| + (("caption", "col", "colgroup", "tbody", "tfoot", "thead",
|
| + "tr"), self.startTagTableOther)
|
| + ])
|
| + self.startTagHandler.default = self.startTagOther
|
| +
|
| + self.endTagHandler = utils.MethodDispatcher([
|
| + ("tr", self.endTagTr),
|
| + ("table", self.endTagTable),
|
| + (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
|
| + (("body", "caption", "col", "colgroup", "html", "td", "th"),
|
| + self.endTagIgnore)
|
| + ])
|
| + self.endTagHandler.default = self.endTagOther
|
| +
|
| + # helper methods (XXX unify this with other table helper methods)
|
| + def clearStackToTableRowContext(self):
|
| + while self.tree.openElements[-1].name not in ("tr", "html"):
|
| + self.parser.parseError("unexpected-implied-end-tag-in-table-row",
|
| + {"name": self.tree.openElements[-1].name})
|
| + self.tree.openElements.pop()
|
| +
|
| + def ignoreEndTagTr(self):
|
| + return not self.tree.elementInScope("tr", variant="table")
|
| +
|
| + # the rest
|
| + def processEOF(self):
|
| + self.parser.phases["inTable"].processEOF()
|
| +
|
| + def processSpaceCharacters(self, token):
|
| + return self.parser.phases["inTable"].processSpaceCharacters(token)
|
| +
|
| + def processCharacters(self, token):
|
| + return self.parser.phases["inTable"].processCharacters(token)
|
| +
|
| + def startTagTableCell(self, token):
|
| + self.clearStackToTableRowContext()
|
| + self.tree.insertElement(token)
|
| + self.parser.phase = self.parser.phases["inCell"]
|
| + self.tree.activeFormattingElements.append(Marker)
|
| +
|
| + def startTagTableOther(self, token):
|
| + ignoreEndTag = self.ignoreEndTagTr()
|
| + self.endTagTr(impliedTagToken("tr"))
|
| + # XXX how are we sure it's always ignored in the innerHTML case?
|
| + if not ignoreEndTag:
|
| + return token
|
| +
|
| + def startTagOther(self, token):
|
| + return self.parser.phases["inTable"].processStartTag(token)
|
| +
|
| + def endTagTr(self, token):
|
| + if not self.ignoreEndTagTr():
|
| + self.clearStackToTableRowContext()
|
| + self.tree.openElements.pop()
|
| + self.parser.phase = self.parser.phases["inTableBody"]
|
| + else:
|
| + # innerHTML case
|
| + assert self.parser.innerHTML
|
| + self.parser.parseError()
|
| +
|
| + def endTagTable(self, token):
|
| + ignoreEndTag = self.ignoreEndTagTr()
|
| + self.endTagTr(impliedTagToken("tr"))
|
| + # Reprocess the current tag if the tr end tag was not ignored
|
| + # XXX how are we sure it's always ignored in the innerHTML case?
|
| + if not ignoreEndTag:
|
| + return token
|
| +
|
| + def endTagTableRowGroup(self, token):
|
| + if self.tree.elementInScope(token["name"], variant="table"):
|
| + self.endTagTr(impliedTagToken("tr"))
|
| + return token
|
| + else:
|
| + self.parser.parseError()
|
| +
|
| + def endTagIgnore(self, token):
|
| + self.parser.parseError("unexpected-end-tag-in-table-row",
|
| + {"name": token["name"]})
|
| +
|
| + def endTagOther(self, token):
|
| + return self.parser.phases["inTable"].processEndTag(token)
|
| +
|
| + class InCellPhase(Phase):
|
| + # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| + self.startTagHandler = utils.MethodDispatcher([
|
| + ("html", self.startTagHtml),
|
| + (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
|
| + "thead", "tr"), self.startTagTableOther)
|
| + ])
|
| + self.startTagHandler.default = self.startTagOther
|
| +
|
| + self.endTagHandler = utils.MethodDispatcher([
|
| + (("td", "th"), self.endTagTableCell),
|
| + (("body", "caption", "col", "colgroup", "html"), self.endTagIgnore),
|
| + (("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply)
|
| + ])
|
| + self.endTagHandler.default = self.endTagOther
|
| +
|
| + # helper
|
| + def closeCell(self):
|
| + if self.tree.elementInScope("td", variant="table"):
|
| + self.endTagTableCell(impliedTagToken("td"))
|
| + elif self.tree.elementInScope("th", variant="table"):
|
| + self.endTagTableCell(impliedTagToken("th"))
|
| +
|
| + # the rest
|
| + def processEOF(self):
|
| + self.parser.phases["inBody"].processEOF()
|
| +
|
| + def processCharacters(self, token):
|
| + return self.parser.phases["inBody"].processCharacters(token)
|
| +
|
| + def startTagTableOther(self, token):
|
| + if (self.tree.elementInScope("td", variant="table") or
|
| + self.tree.elementInScope("th", variant="table")):
|
| + self.closeCell()
|
| + return token
|
| + else:
|
| + # innerHTML case
|
| + assert self.parser.innerHTML
|
| + self.parser.parseError()
|
| +
|
| + def startTagOther(self, token):
|
| + return self.parser.phases["inBody"].processStartTag(token)
|
| +
|
| + def endTagTableCell(self, token):
|
| + if self.tree.elementInScope(token["name"], variant="table"):
|
| + self.tree.generateImpliedEndTags(token["name"])
|
| + if self.tree.openElements[-1].name != token["name"]:
|
| + self.parser.parseError("unexpected-cell-end-tag",
|
| + {"name": token["name"]})
|
| + while True:
|
| + node = self.tree.openElements.pop()
|
| + if node.name == token["name"]:
|
| + break
|
| + else:
|
| + self.tree.openElements.pop()
|
| + self.tree.clearActiveFormattingElements()
|
| + self.parser.phase = self.parser.phases["inRow"]
|
| + else:
|
| + self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
|
| +
|
| + def endTagIgnore(self, token):
|
| + self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
|
| +
|
| + def endTagImply(self, token):
|
| + if self.tree.elementInScope(token["name"], variant="table"):
|
| + self.closeCell()
|
| + return token
|
| + else:
|
| + # sometimes innerHTML case
|
| + self.parser.parseError()
|
| +
|
| + def endTagOther(self, token):
|
| + return self.parser.phases["inBody"].processEndTag(token)
|
| +
|
| + class InSelectPhase(Phase):
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| +
|
| + self.startTagHandler = utils.MethodDispatcher([
|
| + ("html", self.startTagHtml),
|
| + ("option", self.startTagOption),
|
| + ("optgroup", self.startTagOptgroup),
|
| + ("select", self.startTagSelect),
|
| + (("input", "keygen", "textarea"), self.startTagInput),
|
| + ("script", self.startTagScript)
|
| + ])
|
| + self.startTagHandler.default = self.startTagOther
|
| +
|
| + self.endTagHandler = utils.MethodDispatcher([
|
| + ("option", self.endTagOption),
|
| + ("optgroup", self.endTagOptgroup),
|
| + ("select", self.endTagSelect)
|
| + ])
|
| + self.endTagHandler.default = self.endTagOther
|
| +
|
| + # http://www.whatwg.org/specs/web-apps/current-work/#in-select
|
| + def processEOF(self):
|
| + if self.tree.openElements[-1].name != "html":
|
| + self.parser.parseError("eof-in-select")
|
| + else:
|
| + assert self.parser.innerHTML
|
| +
|
| + def processCharacters(self, token):
|
| + if token["data"] == "\u0000":
|
| + return
|
| + self.tree.insertText(token["data"])
|
| +
|
| + def startTagOption(self, token):
|
| + # We need to imply </option> if <option> is the current node.
|
| + if self.tree.openElements[-1].name == "option":
|
| + self.tree.openElements.pop()
|
| + self.tree.insertElement(token)
|
| +
|
| + def startTagOptgroup(self, token):
|
| + if self.tree.openElements[-1].name == "option":
|
| + self.tree.openElements.pop()
|
| + if self.tree.openElements[-1].name == "optgroup":
|
| + self.tree.openElements.pop()
|
| + self.tree.insertElement(token)
|
| +
|
| + def startTagSelect(self, token):
|
| + self.parser.parseError("unexpected-select-in-select")
|
| + self.endTagSelect(impliedTagToken("select"))
|
| +
|
| + def startTagInput(self, token):
|
| + self.parser.parseError("unexpected-input-in-select")
|
| + if self.tree.elementInScope("select", variant="select"):
|
| + self.endTagSelect(impliedTagToken("select"))
|
| + return token
|
| + else:
|
| + assert self.parser.innerHTML
|
| +
|
| + def startTagScript(self, token):
|
| + return self.parser.phases["inHead"].processStartTag(token)
|
| +
|
| + def startTagOther(self, token):
|
| + self.parser.parseError("unexpected-start-tag-in-select",
|
| + {"name": token["name"]})
|
| +
|
| + def endTagOption(self, token):
|
| + if self.tree.openElements[-1].name == "option":
|
| + self.tree.openElements.pop()
|
| + else:
|
| + self.parser.parseError("unexpected-end-tag-in-select",
|
| + {"name": "option"})
|
| +
|
| + def endTagOptgroup(self, token):
|
| + # </optgroup> implicitly closes <option>
|
| + if (self.tree.openElements[-1].name == "option" and
|
| + self.tree.openElements[-2].name == "optgroup"):
|
| + self.tree.openElements.pop()
|
| + # It also closes </optgroup>
|
| + if self.tree.openElements[-1].name == "optgroup":
|
| + self.tree.openElements.pop()
|
| + # But nothing else
|
| + else:
|
| + self.parser.parseError("unexpected-end-tag-in-select",
|
| + {"name": "optgroup"})
|
| +
|
| + def endTagSelect(self, token):
|
| + if self.tree.elementInScope("select", variant="select"):
|
| + node = self.tree.openElements.pop()
|
| + while node.name != "select":
|
| + node = self.tree.openElements.pop()
|
| + self.parser.resetInsertionMode()
|
| + else:
|
| + # innerHTML case
|
| + assert self.parser.innerHTML
|
| + self.parser.parseError()
|
| +
|
| + def endTagOther(self, token):
|
| + self.parser.parseError("unexpected-end-tag-in-select",
|
| + {"name": token["name"]})
|
| +
|
| + class InSelectInTablePhase(Phase):
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| +
|
| + self.startTagHandler = utils.MethodDispatcher([
|
| + (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
|
| + self.startTagTable)
|
| + ])
|
| + self.startTagHandler.default = self.startTagOther
|
| +
|
| + self.endTagHandler = utils.MethodDispatcher([
|
| + (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
|
| + self.endTagTable)
|
| + ])
|
| + self.endTagHandler.default = self.endTagOther
|
| +
|
| + def processEOF(self):
|
| + self.parser.phases["inSelect"].processEOF()
|
| +
|
| + def processCharacters(self, token):
|
| + return self.parser.phases["inSelect"].processCharacters(token)
|
| +
|
| + def startTagTable(self, token):
|
| + self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]})
|
| + self.endTagOther(impliedTagToken("select"))
|
| + return token
|
| +
|
| + def startTagOther(self, token):
|
| + return self.parser.phases["inSelect"].processStartTag(token)
|
| +
|
| + def endTagTable(self, token):
|
| + self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]})
|
| + if self.tree.elementInScope(token["name"], variant="table"):
|
| + self.endTagOther(impliedTagToken("select"))
|
| + return token
|
| +
|
| + def endTagOther(self, token):
|
| + return self.parser.phases["inSelect"].processEndTag(token)
|
| +
|
| + class InForeignContentPhase(Phase):
|
| + breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
|
| + "center", "code", "dd", "div", "dl", "dt",
|
| + "em", "embed", "h1", "h2", "h3",
|
| + "h4", "h5", "h6", "head", "hr", "i", "img",
|
| + "li", "listing", "menu", "meta", "nobr",
|
| + "ol", "p", "pre", "ruby", "s", "small",
|
| + "span", "strong", "strike", "sub", "sup",
|
| + "table", "tt", "u", "ul", "var"])
|
| +
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| +
|
| + def adjustSVGTagNames(self, token):
|
| + replacements = {"altglyph": "altGlyph",
|
| + "altglyphdef": "altGlyphDef",
|
| + "altglyphitem": "altGlyphItem",
|
| + "animatecolor": "animateColor",
|
| + "animatemotion": "animateMotion",
|
| + "animatetransform": "animateTransform",
|
| + "clippath": "clipPath",
|
| + "feblend": "feBlend",
|
| + "fecolormatrix": "feColorMatrix",
|
| + "fecomponenttransfer": "feComponentTransfer",
|
| + "fecomposite": "feComposite",
|
| + "feconvolvematrix": "feConvolveMatrix",
|
| + "fediffuselighting": "feDiffuseLighting",
|
| + "fedisplacementmap": "feDisplacementMap",
|
| + "fedistantlight": "feDistantLight",
|
| + "feflood": "feFlood",
|
| + "fefunca": "feFuncA",
|
| + "fefuncb": "feFuncB",
|
| + "fefuncg": "feFuncG",
|
| + "fefuncr": "feFuncR",
|
| + "fegaussianblur": "feGaussianBlur",
|
| + "feimage": "feImage",
|
| + "femerge": "feMerge",
|
| + "femergenode": "feMergeNode",
|
| + "femorphology": "feMorphology",
|
| + "feoffset": "feOffset",
|
| + "fepointlight": "fePointLight",
|
| + "fespecularlighting": "feSpecularLighting",
|
| + "fespotlight": "feSpotLight",
|
| + "fetile": "feTile",
|
| + "feturbulence": "feTurbulence",
|
| + "foreignobject": "foreignObject",
|
| + "glyphref": "glyphRef",
|
| + "lineargradient": "linearGradient",
|
| + "radialgradient": "radialGradient",
|
| + "textpath": "textPath"}
|
| +
|
| + if token["name"] in replacements:
|
| + token["name"] = replacements[token["name"]]
|
| +
|
| + def processCharacters(self, token):
|
| + if token["data"] == "\u0000":
|
| + token["data"] = "\uFFFD"
|
| + elif (self.parser.framesetOK and
|
| + any(char not in spaceCharacters for char in token["data"])):
|
| + self.parser.framesetOK = False
|
| + Phase.processCharacters(self, token)
|
| +
|
| + def processStartTag(self, token):
|
| + currentNode = self.tree.openElements[-1]
|
| + if (token["name"] in self.breakoutElements or
|
| + (token["name"] == "font" and
|
| + set(token["data"].keys()) & set(["color", "face", "size"]))):
|
| + self.parser.parseError("unexpected-html-element-in-foreign-content",
|
| + {"name": token["name"]})
|
| + while (self.tree.openElements[-1].namespace !=
|
| + self.tree.defaultNamespace and
|
| + not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and
|
| + not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])):
|
| + self.tree.openElements.pop()
|
| + return token
|
| +
|
| + else:
|
| + if currentNode.namespace == namespaces["mathml"]:
|
| + self.parser.adjustMathMLAttributes(token)
|
| + elif currentNode.namespace == namespaces["svg"]:
|
| + self.adjustSVGTagNames(token)
|
| + self.parser.adjustSVGAttributes(token)
|
| + self.parser.adjustForeignAttributes(token)
|
| + token["namespace"] = currentNode.namespace
|
| + self.tree.insertElement(token)
|
| + if token["selfClosing"]:
|
| + self.tree.openElements.pop()
|
| + token["selfClosingAcknowledged"] = True
|
| +
|
| + def processEndTag(self, token):
|
| + nodeIndex = len(self.tree.openElements) - 1
|
| + node = self.tree.openElements[-1]
|
| + if node.name != token["name"]:
|
| + self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
|
| +
|
| + while True:
|
| + if node.name.translate(asciiUpper2Lower) == token["name"]:
|
| + # XXX this isn't in the spec but it seems necessary
|
| + if self.parser.phase == self.parser.phases["inTableText"]:
|
| + self.parser.phase.flushCharacters()
|
| + self.parser.phase = self.parser.phase.originalPhase
|
| + while self.tree.openElements.pop() != node:
|
| + assert self.tree.openElements
|
| + new_token = None
|
| + break
|
| + nodeIndex -= 1
|
| +
|
| + node = self.tree.openElements[nodeIndex]
|
| + if node.namespace != self.tree.defaultNamespace:
|
| + continue
|
| + else:
|
| + new_token = self.parser.phase.processEndTag(token)
|
| + break
|
| + return new_token
|
| +
|
| + class AfterBodyPhase(Phase):
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| +
|
| + self.startTagHandler = utils.MethodDispatcher([
|
| + ("html", self.startTagHtml)
|
| + ])
|
| + self.startTagHandler.default = self.startTagOther
|
| +
|
| + self.endTagHandler = utils.MethodDispatcher([("html", self.endTagHtml)])
|
| + self.endTagHandler.default = self.endTagOther
|
| +
|
| + def processEOF(self):
|
| + # Stop parsing
|
| + pass
|
| +
|
| + def processComment(self, token):
|
| + # This is needed because data is to be appended to the <html> element
|
| + # here and not to whatever is currently open.
|
| + self.tree.insertComment(token, self.tree.openElements[0])
|
| +
|
| + def processCharacters(self, token):
|
| + self.parser.parseError("unexpected-char-after-body")
|
| + self.parser.phase = self.parser.phases["inBody"]
|
| + return token
|
| +
|
| + def startTagHtml(self, token):
|
| + return self.parser.phases["inBody"].processStartTag(token)
|
| +
|
| + def startTagOther(self, token):
|
| + self.parser.parseError("unexpected-start-tag-after-body",
|
| + {"name": token["name"]})
|
| + self.parser.phase = self.parser.phases["inBody"]
|
| + return token
|
| +
|
| + def endTagHtml(self, name):
|
| + if self.parser.innerHTML:
|
| + self.parser.parseError("unexpected-end-tag-after-body-innerhtml")
|
| + else:
|
| + self.parser.phase = self.parser.phases["afterAfterBody"]
|
| +
|
| + def endTagOther(self, token):
|
| + self.parser.parseError("unexpected-end-tag-after-body",
|
| + {"name": token["name"]})
|
| + self.parser.phase = self.parser.phases["inBody"]
|
| + return token
|
| +
|
| + class InFramesetPhase(Phase):
|
| + # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| +
|
| + self.startTagHandler = utils.MethodDispatcher([
|
| + ("html", self.startTagHtml),
|
| + ("frameset", self.startTagFrameset),
|
| + ("frame", self.startTagFrame),
|
| + ("noframes", self.startTagNoframes)
|
| + ])
|
| + self.startTagHandler.default = self.startTagOther
|
| +
|
| + self.endTagHandler = utils.MethodDispatcher([
|
| + ("frameset", self.endTagFrameset)
|
| + ])
|
| + self.endTagHandler.default = self.endTagOther
|
| +
|
| + def processEOF(self):
|
| + if self.tree.openElements[-1].name != "html":
|
| + self.parser.parseError("eof-in-frameset")
|
| + else:
|
| + assert self.parser.innerHTML
|
| +
|
| + def processCharacters(self, token):
|
| + self.parser.parseError("unexpected-char-in-frameset")
|
| +
|
| + def startTagFrameset(self, token):
|
| + self.tree.insertElement(token)
|
| +
|
| + def startTagFrame(self, token):
|
| + self.tree.insertElement(token)
|
| + self.tree.openElements.pop()
|
| +
|
| + def startTagNoframes(self, token):
|
| + return self.parser.phases["inBody"].processStartTag(token)
|
| +
|
| + def startTagOther(self, token):
|
| + self.parser.parseError("unexpected-start-tag-in-frameset",
|
| + {"name": token["name"]})
|
| +
|
| + def endTagFrameset(self, token):
|
| + if self.tree.openElements[-1].name == "html":
|
| + # innerHTML case
|
| + self.parser.parseError("unexpected-frameset-in-frameset-innerhtml")
|
| + else:
|
| + self.tree.openElements.pop()
|
| + if (not self.parser.innerHTML and
|
| + self.tree.openElements[-1].name != "frameset"):
|
| + # If we're not in innerHTML mode and the the current node is not a
|
| + # "frameset" element (anymore) then switch.
|
| + self.parser.phase = self.parser.phases["afterFrameset"]
|
| +
|
| + def endTagOther(self, token):
|
| + self.parser.parseError("unexpected-end-tag-in-frameset",
|
| + {"name": token["name"]})
|
| +
|
| + class AfterFramesetPhase(Phase):
|
| + # http://www.whatwg.org/specs/web-apps/current-work/#after3
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| +
|
| + self.startTagHandler = utils.MethodDispatcher([
|
| + ("html", self.startTagHtml),
|
| + ("noframes", self.startTagNoframes)
|
| + ])
|
| + self.startTagHandler.default = self.startTagOther
|
| +
|
| + self.endTagHandler = utils.MethodDispatcher([
|
| + ("html", self.endTagHtml)
|
| + ])
|
| + self.endTagHandler.default = self.endTagOther
|
| +
|
| + def processEOF(self):
|
| + # Stop parsing
|
| + pass
|
| +
|
| + def processCharacters(self, token):
|
| + self.parser.parseError("unexpected-char-after-frameset")
|
| +
|
| + def startTagNoframes(self, token):
|
| + return self.parser.phases["inHead"].processStartTag(token)
|
| +
|
| + def startTagOther(self, token):
|
| + self.parser.parseError("unexpected-start-tag-after-frameset",
|
| + {"name": token["name"]})
|
| +
|
| + def endTagHtml(self, token):
|
| + self.parser.phase = self.parser.phases["afterAfterFrameset"]
|
| +
|
| + def endTagOther(self, token):
|
| + self.parser.parseError("unexpected-end-tag-after-frameset",
|
| + {"name": token["name"]})
|
| +
|
| + class AfterAfterBodyPhase(Phase):
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| +
|
| + self.startTagHandler = utils.MethodDispatcher([
|
| + ("html", self.startTagHtml)
|
| + ])
|
| + self.startTagHandler.default = self.startTagOther
|
| +
|
| + def processEOF(self):
|
| + pass
|
| +
|
| + def processComment(self, token):
|
| + self.tree.insertComment(token, self.tree.document)
|
| +
|
| + def processSpaceCharacters(self, token):
|
| + return self.parser.phases["inBody"].processSpaceCharacters(token)
|
| +
|
| + def processCharacters(self, token):
|
| + self.parser.parseError("expected-eof-but-got-char")
|
| + self.parser.phase = self.parser.phases["inBody"]
|
| + return token
|
| +
|
| + def startTagHtml(self, token):
|
| + return self.parser.phases["inBody"].processStartTag(token)
|
| +
|
| + def startTagOther(self, token):
|
| + self.parser.parseError("expected-eof-but-got-start-tag",
|
| + {"name": token["name"]})
|
| + self.parser.phase = self.parser.phases["inBody"]
|
| + return token
|
| +
|
| + def processEndTag(self, token):
|
| + self.parser.parseError("expected-eof-but-got-end-tag",
|
| + {"name": token["name"]})
|
| + self.parser.phase = self.parser.phases["inBody"]
|
| + return token
|
| +
|
| + class AfterAfterFramesetPhase(Phase):
|
| + def __init__(self, parser, tree):
|
| + Phase.__init__(self, parser, tree)
|
| +
|
| + self.startTagHandler = utils.MethodDispatcher([
|
| + ("html", self.startTagHtml),
|
| + ("noframes", self.startTagNoFrames)
|
| + ])
|
| + self.startTagHandler.default = self.startTagOther
|
| +
|
| + def processEOF(self):
|
| + pass
|
| +
|
| + def processComment(self, token):
|
| + self.tree.insertComment(token, self.tree.document)
|
| +
|
| + def processSpaceCharacters(self, token):
|
| + return self.parser.phases["inBody"].processSpaceCharacters(token)
|
| +
|
| + def processCharacters(self, token):
|
| + self.parser.parseError("expected-eof-but-got-char")
|
| +
|
| + def startTagHtml(self, token):
|
| + return self.parser.phases["inBody"].processStartTag(token)
|
| +
|
| + def startTagNoFrames(self, token):
|
| + return self.parser.phases["inHead"].processStartTag(token)
|
| +
|
| + def startTagOther(self, token):
|
| + self.parser.parseError("expected-eof-but-got-start-tag",
|
| + {"name": token["name"]})
|
| +
|
| + def processEndTag(self, token):
|
| + self.parser.parseError("expected-eof-but-got-end-tag",
|
| + {"name": token["name"]})
|
| +
|
| + return {
|
| + "initial": InitialPhase,
|
| + "beforeHtml": BeforeHtmlPhase,
|
| + "beforeHead": BeforeHeadPhase,
|
| + "inHead": InHeadPhase,
|
| + # XXX "inHeadNoscript": InHeadNoScriptPhase,
|
| + "afterHead": AfterHeadPhase,
|
| + "inBody": InBodyPhase,
|
| + "text": TextPhase,
|
| + "inTable": InTablePhase,
|
| + "inTableText": InTableTextPhase,
|
| + "inCaption": InCaptionPhase,
|
| + "inColumnGroup": InColumnGroupPhase,
|
| + "inTableBody": InTableBodyPhase,
|
| + "inRow": InRowPhase,
|
| + "inCell": InCellPhase,
|
| + "inSelect": InSelectPhase,
|
| + "inSelectInTable": InSelectInTablePhase,
|
| + "inForeignContent": InForeignContentPhase,
|
| + "afterBody": AfterBodyPhase,
|
| + "inFrameset": InFramesetPhase,
|
| + "afterFrameset": AfterFramesetPhase,
|
| + "afterAfterBody": AfterAfterBodyPhase,
|
| + "afterAfterFrameset": AfterAfterFramesetPhase,
|
| + # XXX after after frameset
|
| + }
|
| +
|
| +
|
| +def impliedTagToken(name, type="EndTag", attributes=None,
|
| + selfClosing=False):
|
| + if attributes is None:
|
| + attributes = {}
|
| + return {"type": tokenTypes[type], "name": name, "data": attributes,
|
| + "selfClosing": selfClosing}
|
| +
|
| +
|
| +class ParseError(Exception):
|
| + """Error in parsed document"""
|
| + pass
|
|
|