third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/html5lib/html5lib/html5parser.py - Issue 2635033005: Add html5lib to the list of modules in webkitpy/thirdparty/wpt/wpt/tools.

Unified Diff: third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/html5lib/html5lib/html5parser.py

Issue 2635033005: Add html5lib to the list of modules in webkitpy/thirdparty/wpt/wpt/tools. (Closed)

Patch Set: Update webkitpy/thirdparty/README.chromium. Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« no previous file with comments | « third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/html5lib/html5lib/filters/whitespace.py ('k') | third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/html5lib/html5lib/ihatexml.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/html5lib/html5lib/html5parser.py

diff --git a/third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/html5lib/html5lib/html5parser.py b/third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/html5lib/html5lib/html5parser.py

new file mode 100644

index 0000000000000000000000000000000000000000..5b9ce7d72068040907bdf42528c765a2a1ca7553

--- /dev/null

+++ b/third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/html5lib/html5lib/html5parser.py

@@ -0,0 +1,2723 @@

+from __future__ import absolute_import, division, unicode_literals

+from six import with_metaclass

+import types

+from . import inputstream

+from . import tokenizer

+from . import treebuilders

+from .treebuilders._base import Marker

+from . import utils

+from . import constants

+from .constants import spaceCharacters, asciiUpper2Lower

+from .constants import specialElements

+from .constants import headingElements

+from .constants import cdataElements, rcdataElements

+from .constants import tokenTypes, ReparseException, namespaces

+from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements

+from .constants import adjustForeignAttributes as adjustForeignAttributesMap

+def parse(doc, treebuilder="etree", encoding=None,

+ namespaceHTMLElements=True):

+ """Parse a string or file-like object into a tree"""

+ tb = treebuilders.getTreeBuilder(treebuilder)

+ p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)

+ return p.parse(doc, encoding=encoding)

+def parseFragment(doc, container="div", treebuilder="etree", encoding=None,

+ namespaceHTMLElements=True):

+ tb = treebuilders.getTreeBuilder(treebuilder)

+ p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)

+ return p.parseFragment(doc, container=container, encoding=encoding)

+def method_decorator_metaclass(function):

+ class Decorated(type):

+ def __new__(meta, classname, bases, classDict):

+ for attributeName, attribute in classDict.items():

+ if isinstance(attribute, types.FunctionType):

+ attribute = function(attribute)

+ classDict[attributeName] = attribute

+ return type.__new__(meta, classname, bases, classDict)

+ return Decorated

+class HTMLParser(object):

+ """HTML parser. Generates a tree structure from a stream of (possibly

+ malformed) HTML"""

+ def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer,

+ strict=False, namespaceHTMLElements=True, debug=False):

+ """

+ strict - raise an exception when a parse error is encountered

+ tree - a treebuilder class controlling the type of tree that will be

+ returned. Built in treebuilders can be accessed through

+ html5lib.treebuilders.getTreeBuilder(treeType)

+ tokenizer - a class that provides a stream of tokens to the treebuilder.

+ This may be replaced for e.g. a sanitizer which converts some tags to

+ text

+ """

+ # Raise an exception on the first error encountered

+ self.strict = strict

+ if tree is None:

+ tree = treebuilders.getTreeBuilder("etree")

+ self.tree = tree(namespaceHTMLElements)

+ self.tokenizer_class = tokenizer

+ self.errors = []

+ self.phases = dict([(name, cls(self, self.tree)) for name, cls in

+ getPhases(debug).items()])

+ def _parse(self, stream, innerHTML=False, container="div",

+ encoding=None, parseMeta=True, useChardet=True, **kwargs):

+ self.innerHTMLMode = innerHTML

+ self.container = container

+ self.tokenizer = self.tokenizer_class(stream, encoding=encoding,

+ parseMeta=parseMeta,

+ useChardet=useChardet,

+ parser=self, **kwargs)

+ self.reset()

+ while True:

+ try:

+ self.mainLoop()

+ break

+ except ReparseException:

+ self.reset()

+ def reset(self):

+ self.tree.reset()

+ self.firstStartTag = False

+ self.errors = []

+ self.log = [] # only used with debug mode

+ # "quirks" / "limited quirks" / "no quirks"

+ self.compatMode = "no quirks"

+ if self.innerHTMLMode:

+ self.innerHTML = self.container.lower()

+ if self.innerHTML in cdataElements:

+ self.tokenizer.state = self.tokenizer.rcdataState

+ elif self.innerHTML in rcdataElements:

+ self.tokenizer.state = self.tokenizer.rawtextState

+ elif self.innerHTML == 'plaintext':

+ self.tokenizer.state = self.tokenizer.plaintextState

+ else:

+ # state already is data state

+ # self.tokenizer.state = self.tokenizer.dataState

+ pass

+ self.phase = self.phases["beforeHtml"]

+ self.phase.insertHtmlElement()

+ self.resetInsertionMode()

+ else:

+ self.innerHTML = False

+ self.phase = self.phases["initial"]

+ self.lastPhase = None

+ self.beforeRCDataPhase = None

+ self.framesetOK = True

+ @property

+ def documentEncoding(self):

+ """The name of the character encoding

+ that was used to decode the input stream,

+ or :obj:`None` if that is not determined yet.

+ """

+ if not hasattr(self, 'tokenizer'):

+ return None

+ return self.tokenizer.stream.charEncoding[0]

+ def isHTMLIntegrationPoint(self, element):

+ if (element.name == "annotation-xml" and

+ element.namespace == namespaces["mathml"]):

+ return ("encoding" in element.attributes and

+ element.attributes["encoding"].translate(

+ asciiUpper2Lower) in

+ ("text/html", "application/xhtml+xml"))

+ else:

+ return (element.namespace, element.name) in htmlIntegrationPointElements

+ def isMathMLTextIntegrationPoint(self, element):

+ return (element.namespace, element.name) in mathmlTextIntegrationPointElements

+ def mainLoop(self):

+ CharactersToken = tokenTypes["Characters"]

+ SpaceCharactersToken = tokenTypes["SpaceCharacters"]

+ StartTagToken = tokenTypes["StartTag"]

+ EndTagToken = tokenTypes["EndTag"]

+ CommentToken = tokenTypes["Comment"]

+ DoctypeToken = tokenTypes["Doctype"]

+ ParseErrorToken = tokenTypes["ParseError"]

+ for token in self.normalizedTokens():

+ new_token = token

+ while new_token is not None:

+ currentNode = self.tree.openElements[-1] if self.tree.openElements else None

+ currentNodeNamespace = currentNode.namespace if currentNode else None

+ currentNodeName = currentNode.name if currentNode else None

+ type = new_token["type"]

+ if type == ParseErrorToken:

+ self.parseError(new_token["data"], new_token.get("datavars", {}))

+ new_token = None

+ else:

+ if (len(self.tree.openElements) == 0 or

+ currentNodeNamespace == self.tree.defaultNamespace or

+ (self.isMathMLTextIntegrationPoint(currentNode) and

+ ((type == StartTagToken and

+ token["name"] not in frozenset(["mglyph", "malignmark"])) or

+ type in (CharactersToken, SpaceCharactersToken))) or

+ (currentNodeNamespace == namespaces["mathml"] and

+ currentNodeName == "annotation-xml" and

+ token["name"] == "svg") or

+ (self.isHTMLIntegrationPoint(currentNode) and

+ type in (StartTagToken, CharactersToken, SpaceCharactersToken))):

+ phase = self.phase

+ else:

+ phase = self.phases["inForeignContent"]

+ if type == CharactersToken:

+ new_token = phase.processCharacters(new_token)

+ elif type == SpaceCharactersToken:

+ new_token = phase.processSpaceCharacters(new_token)

+ elif type == StartTagToken:

+ new_token = phase.processStartTag(new_token)

+ elif type == EndTagToken:

+ new_token = phase.processEndTag(new_token)

+ elif type == CommentToken:

+ new_token = phase.processComment(new_token)

+ elif type == DoctypeToken:

+ new_token = phase.processDoctype(new_token)

+ if (type == StartTagToken and token["selfClosing"]

+ and not token["selfClosingAcknowledged"]):

+ self.parseError("non-void-element-with-trailing-solidus",

+ {"name": token["name"]})

+ # When the loop finishes it's EOF

+ reprocess = True

+ phases = []

+ while reprocess:

+ phases.append(self.phase)

+ reprocess = self.phase.processEOF()

+ if reprocess:

+ assert self.phase not in phases

+ def normalizedTokens(self):

+ for token in self.tokenizer:

+ yield self.normalizeToken(token)

+ def parse(self, stream, encoding=None, parseMeta=True, useChardet=True):

+ """Parse a HTML document into a well-formed tree

+ stream - a filelike object or string containing the HTML to be parsed

+ The optional encoding parameter must be a string that indicates

+ the encoding. If specified, that encoding will be used,

+ regardless of any BOM or later declaration (such as in a meta

+ element)

+ """

+ self._parse(stream, innerHTML=False, encoding=encoding,

+ parseMeta=parseMeta, useChardet=useChardet)

+ return self.tree.getDocument()

+ def parseFragment(self, stream, container="div", encoding=None,

+ parseMeta=False, useChardet=True):

+ """Parse a HTML fragment into a well-formed tree fragment

+ container - name of the element we're setting the innerHTML property

+ if set to None, default to 'div'

+ stream - a filelike object or string containing the HTML to be parsed

+ The optional encoding parameter must be a string that indicates

+ the encoding. If specified, that encoding will be used,

+ regardless of any BOM or later declaration (such as in a meta

+ element)

+ """

+ self._parse(stream, True, container=container, encoding=encoding)

+ return self.tree.getFragment()

+ def parseError(self, errorcode="XXX-undefined-error", datavars={}):

+ # XXX The idea is to make errorcode mandatory.

+ self.errors.append((self.tokenizer.stream.position(), errorcode, datavars))

+ if self.strict:

+ raise ParseError

+ def normalizeToken(self, token):

+ """ HTML5 specific normalizations to the token stream """

+ if token["type"] == tokenTypes["StartTag"]:

+ token["data"] = dict(token["data"][::-1])

+ return token

+ def adjustMathMLAttributes(self, token):

+ replacements = {"definitionurl": "definitionURL"}

+ for k, v in replacements.items():

+ if k in token["data"]:

+ token["data"][v] = token["data"][k]

+ del token["data"][k]

+ def adjustSVGAttributes(self, token):

+ replacements = {

+ "attributename": "attributeName",

+ "attributetype": "attributeType",

+ "basefrequency": "baseFrequency",

+ "baseprofile": "baseProfile",

+ "calcmode": "calcMode",

+ "clippathunits": "clipPathUnits",

+ "contentscripttype": "contentScriptType",

+ "contentstyletype": "contentStyleType",

+ "diffuseconstant": "diffuseConstant",

+ "edgemode": "edgeMode",

+ "externalresourcesrequired": "externalResourcesRequired",

+ "filterres": "filterRes",

+ "filterunits": "filterUnits",

+ "glyphref": "glyphRef",

+ "gradienttransform": "gradientTransform",

+ "gradientunits": "gradientUnits",

+ "kernelmatrix": "kernelMatrix",

+ "kernelunitlength": "kernelUnitLength",

+ "keypoints": "keyPoints",

+ "keysplines": "keySplines",

+ "keytimes": "keyTimes",

+ "lengthadjust": "lengthAdjust",

+ "limitingconeangle": "limitingConeAngle",

+ "markerheight": "markerHeight",

+ "markerunits": "markerUnits",

+ "markerwidth": "markerWidth",

+ "maskcontentunits": "maskContentUnits",

+ "maskunits": "maskUnits",

+ "numoctaves": "numOctaves",

+ "pathlength": "pathLength",

+ "patterncontentunits": "patternContentUnits",

+ "patterntransform": "patternTransform",

+ "patternunits": "patternUnits",

+ "pointsatx": "pointsAtX",

+ "pointsaty": "pointsAtY",

+ "pointsatz": "pointsAtZ",

+ "preservealpha": "preserveAlpha",

+ "preserveaspectratio": "preserveAspectRatio",

+ "primitiveunits": "primitiveUnits",

+ "refx": "refX",

+ "refy": "refY",

+ "repeatcount": "repeatCount",

+ "repeatdur": "repeatDur",

+ "requiredextensions": "requiredExtensions",

+ "requiredfeatures": "requiredFeatures",

+ "specularconstant": "specularConstant",

+ "specularexponent": "specularExponent",

+ "spreadmethod": "spreadMethod",

+ "startoffset": "startOffset",

+ "stddeviation": "stdDeviation",

+ "stitchtiles": "stitchTiles",

+ "surfacescale": "surfaceScale",

+ "systemlanguage": "systemLanguage",

+ "tablevalues": "tableValues",

+ "targetx": "targetX",

+ "targety": "targetY",

+ "textlength": "textLength",

+ "viewbox": "viewBox",

+ "viewtarget": "viewTarget",

+ "xchannelselector": "xChannelSelector",

+ "ychannelselector": "yChannelSelector",

+ "zoomandpan": "zoomAndPan"

+ }

+ for originalName in list(token["data"].keys()):

+ if originalName in replacements:

+ svgName = replacements[originalName]

+ token["data"][svgName] = token["data"][originalName]

+ del token["data"][originalName]

+ def adjustForeignAttributes(self, token):

+ replacements = adjustForeignAttributesMap

+ for originalName in token["data"].keys():

+ if originalName in replacements:

+ foreignName = replacements[originalName]

+ token["data"][foreignName] = token["data"][originalName]

+ del token["data"][originalName]

+ def reparseTokenNormal(self, token):

+ self.parser.phase()

+ def resetInsertionMode(self):

+ # The name of this method is mostly historical. (It's also used in the

+ # specification.)

+ last = False

+ newModes = {

+ "select": "inSelect",

+ "td": "inCell",

+ "th": "inCell",

+ "tr": "inRow",

+ "tbody": "inTableBody",

+ "thead": "inTableBody",

+ "tfoot": "inTableBody",

+ "caption": "inCaption",

+ "colgroup": "inColumnGroup",

+ "table": "inTable",

+ "head": "inBody",

+ "body": "inBody",

+ "frameset": "inFrameset",

+ "html": "beforeHead"

+ }

+ for node in self.tree.openElements[::-1]:

+ nodeName = node.name

+ new_phase = None

+ if node == self.tree.openElements[0]:

+ assert self.innerHTML

+ last = True

+ nodeName = self.innerHTML

+ # Check for conditions that should only happen in the innerHTML

+ # case

+ if nodeName in ("select", "colgroup", "head", "html"):

+ assert self.innerHTML

+ if not last and node.namespace != self.tree.defaultNamespace:

+ continue

+ if nodeName in newModes:

+ new_phase = self.phases[newModes[nodeName]]

+ break

+ elif last:

+ new_phase = self.phases["inBody"]

+ break

+ self.phase = new_phase

+ def parseRCDataRawtext(self, token, contentType):

+ """Generic RCDATA/RAWTEXT Parsing algorithm

+ contentType - RCDATA or RAWTEXT

+ """

+ assert contentType in ("RAWTEXT", "RCDATA")

+ self.tree.insertElement(token)

+ if contentType == "RAWTEXT":

+ self.tokenizer.state = self.tokenizer.rawtextState

+ else:

+ self.tokenizer.state = self.tokenizer.rcdataState

+ self.originalPhase = self.phase

+ self.phase = self.phases["text"]

+def getPhases(debug):

+ def log(function):

+ """Logger that records which phase processes each token"""

+ type_names = dict((value, key) for key, value in

+ constants.tokenTypes.items())

+ def wrapped(self, *args, **kwargs):

+ if function.__name__.startswith("process") and len(args) > 0:

+ token = args[0]

+ try:

+ info = {"type": type_names[token['type']]}

+ except:

+ raise

+ if token['type'] in constants.tagTokenTypes:

+ info["name"] = token['name']

+ self.parser.log.append((self.parser.tokenizer.state.__name__,

+ self.parser.phase.__class__.__name__,

+ self.__class__.__name__,

+ function.__name__,

+ info))

+ return function(self, *args, **kwargs)

+ else:

+ return function(self, *args, **kwargs)

+ return wrapped

+ def getMetaclass(use_metaclass, metaclass_func):

+ if use_metaclass:

+ return method_decorator_metaclass(metaclass_func)

+ else:

+ return type

+ class Phase(with_metaclass(getMetaclass(debug, log))):

+ """Base class for helper object that implements each phase of processing

+ """

+ def __init__(self, parser, tree):

+ self.parser = parser

+ self.tree = tree

+ def processEOF(self):

+ raise NotImplementedError

+ def processComment(self, token):

+ # For most phases the following is correct. Where it's not it will be

+ # overridden.

+ self.tree.insertComment(token, self.tree.openElements[-1])

+ def processDoctype(self, token):

+ self.parser.parseError("unexpected-doctype")

+ def processCharacters(self, token):

+ self.tree.insertText(token["data"])

+ def processSpaceCharacters(self, token):

+ self.tree.insertText(token["data"])

+ def processStartTag(self, token):

+ return self.startTagHandler[token["name"]](token)

+ def startTagHtml(self, token):

+ if not self.parser.firstStartTag and token["name"] == "html":

+ self.parser.parseError("non-html-root")

+ # XXX Need a check here to see if the first start tag token emitted is

+ # this token... If it's not, invoke self.parser.parseError().

+ for attr, value in token["data"].items():

+ if attr not in self.tree.openElements[0].attributes:

+ self.tree.openElements[0].attributes[attr] = value

+ self.parser.firstStartTag = False

+ def processEndTag(self, token):

+ return self.endTagHandler[token["name"]](token)

+ class InitialPhase(Phase):

+ def processSpaceCharacters(self, token):

+ pass

+ def processComment(self, token):

+ self.tree.insertComment(token, self.tree.document)

+ def processDoctype(self, token):

+ name = token["name"]

+ publicId = token["publicId"]

+ systemId = token["systemId"]

+ correct = token["correct"]

+ if (name != "html" or publicId is not None or

+ systemId is not None and systemId != "about:legacy-compat"):

+ self.parser.parseError("unknown-doctype")

+ if publicId is None:

+ publicId = ""

+ self.tree.insertDoctype(token)

+ if publicId != "":

+ publicId = publicId.translate(asciiUpper2Lower)

+ if (not correct or token["name"] != "html"

+ or publicId.startswith(

+ ("+//silmaril//dtd html pro v0r11 19970101//",

+ "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",

+ "-//as//dtd html 3.0 aswedit + extensions//",

+ "-//ietf//dtd html 2.0 level 1//",

+ "-//ietf//dtd html 2.0 level 2//",

+ "-//ietf//dtd html 2.0 strict level 1//",

+ "-//ietf//dtd html 2.0 strict level 2//",

+ "-//ietf//dtd html 2.0 strict//",

+ "-//ietf//dtd html 2.0//",

+ "-//ietf//dtd html 2.1e//",

+ "-//ietf//dtd html 3.0//",

+ "-//ietf//dtd html 3.2 final//",

+ "-//ietf//dtd html 3.2//",

+ "-//ietf//dtd html 3//",

+ "-//ietf//dtd html level 0//",

+ "-//ietf//dtd html level 1//",

+ "-//ietf//dtd html level 2//",

+ "-//ietf//dtd html level 3//",

+ "-//ietf//dtd html strict level 0//",

+ "-//ietf//dtd html strict level 1//",

+ "-//ietf//dtd html strict level 2//",

+ "-//ietf//dtd html strict level 3//",

+ "-//ietf//dtd html strict//",

+ "-//ietf//dtd html//",

+ "-//metrius//dtd metrius presentational//",

+ "-//microsoft//dtd internet explorer 2.0 html strict//",

+ "-//microsoft//dtd internet explorer 2.0 html//",

+ "-//microsoft//dtd internet explorer 2.0 tables//",

+ "-//microsoft//dtd internet explorer 3.0 html strict//",

+ "-//microsoft//dtd internet explorer 3.0 html//",

+ "-//microsoft//dtd internet explorer 3.0 tables//",

+ "-//netscape comm. corp.//dtd html//",

+ "-//netscape comm. corp.//dtd strict html//",

+ "-//o'reilly and associates//dtd html 2.0//",

+ "-//o'reilly and associates//dtd html extended 1.0//",

+ "-//o'reilly and associates//dtd html extended relaxed 1.0//",

+ "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",

+ "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",

+ "-//spyglass//dtd html 2.0 extended//",

+ "-//sq//dtd html 2.0 hotmetal + extensions//",

+ "-//sun microsystems corp.//dtd hotjava html//",

+ "-//sun microsystems corp.//dtd hotjava strict html//",

+ "-//w3c//dtd html 3 1995-03-24//",

+ "-//w3c//dtd html 3.2 draft//",

+ "-//w3c//dtd html 3.2 final//",

+ "-//w3c//dtd html 3.2//",

+ "-//w3c//dtd html 3.2s draft//",

+ "-//w3c//dtd html 4.0 frameset//",

+ "-//w3c//dtd html 4.0 transitional//",

+ "-//w3c//dtd html experimental 19960712//",

+ "-//w3c//dtd html experimental 970421//",

+ "-//w3c//dtd w3 html//",

+ "-//w3o//dtd w3 html 3.0//",

+ "-//webtechs//dtd mozilla html 2.0//",

+ "-//webtechs//dtd mozilla html//"))

+ or publicId in

+ ("-//w3o//dtd w3 html strict 3.0//en//",

+ "-/w3c/dtd html 4.0 transitional/en",

+ "html")

+ or publicId.startswith(

+ ("-//w3c//dtd html 4.01 frameset//",

+ "-//w3c//dtd html 4.01 transitional//")) and

+ systemId is None

+ or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):

+ self.parser.compatMode = "quirks"

+ elif (publicId.startswith(

+ ("-//w3c//dtd xhtml 1.0 frameset//",

+ "-//w3c//dtd xhtml 1.0 transitional//"))

+ or publicId.startswith(

+ ("-//w3c//dtd html 4.01 frameset//",

+ "-//w3c//dtd html 4.01 transitional//")) and

+ systemId is not None):

+ self.parser.compatMode = "limited quirks"

+ self.parser.phase = self.parser.phases["beforeHtml"]

+ def anythingElse(self):

+ self.parser.compatMode = "quirks"

+ self.parser.phase = self.parser.phases["beforeHtml"]

+ def processCharacters(self, token):

+ self.parser.parseError("expected-doctype-but-got-chars")

+ self.anythingElse()

+ return token

+ def processStartTag(self, token):

+ self.parser.parseError("expected-doctype-but-got-start-tag",

+ {"name": token["name"]})

+ self.anythingElse()

+ return token

+ def processEndTag(self, token):

+ self.parser.parseError("expected-doctype-but-got-end-tag",

+ {"name": token["name"]})

+ self.anythingElse()

+ return token

+ def processEOF(self):

+ self.parser.parseError("expected-doctype-but-got-eof")

+ self.anythingElse()

+ return True

+ class BeforeHtmlPhase(Phase):

+ # helper methods

+ def insertHtmlElement(self):

+ self.tree.insertRoot(impliedTagToken("html", "StartTag"))

+ self.parser.phase = self.parser.phases["beforeHead"]

+ # other

+ def processEOF(self):

+ self.insertHtmlElement()

+ return True

+ def processComment(self, token):

+ self.tree.insertComment(token, self.tree.document)

+ def processSpaceCharacters(self, token):

+ pass

+ def processCharacters(self, token):

+ self.insertHtmlElement()

+ return token

+ def processStartTag(self, token):

+ if token["name"] == "html":

+ self.parser.firstStartTag = True

+ self.insertHtmlElement()

+ return token

+ def processEndTag(self, token):

+ if token["name"] not in ("head", "body", "html", "br"):

+ self.parser.parseError("unexpected-end-tag-before-html",

+ {"name": token["name"]})

+ else:

+ self.insertHtmlElement()

+ return token

+ class BeforeHeadPhase(Phase):

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ self.startTagHandler = utils.MethodDispatcher([

+ ("html", self.startTagHtml),

+ ("head", self.startTagHead)

+ ])

+ self.startTagHandler.default = self.startTagOther

+ self.endTagHandler = utils.MethodDispatcher([

+ (("head", "body", "html", "br"), self.endTagImplyHead)

+ ])

+ self.endTagHandler.default = self.endTagOther

+ def processEOF(self):

+ self.startTagHead(impliedTagToken("head", "StartTag"))

+ return True

+ def processSpaceCharacters(self, token):

+ pass

+ def processCharacters(self, token):

+ self.startTagHead(impliedTagToken("head", "StartTag"))

+ return token

+ def startTagHtml(self, token):

+ return self.parser.phases["inBody"].processStartTag(token)

+ def startTagHead(self, token):

+ self.tree.insertElement(token)

+ self.tree.headPointer = self.tree.openElements[-1]

+ self.parser.phase = self.parser.phases["inHead"]

+ def startTagOther(self, token):

+ self.startTagHead(impliedTagToken("head", "StartTag"))

+ return token

+ def endTagImplyHead(self, token):

+ self.startTagHead(impliedTagToken("head", "StartTag"))

+ return token

+ def endTagOther(self, token):

+ self.parser.parseError("end-tag-after-implied-root",

+ {"name": token["name"]})

+ class InHeadPhase(Phase):

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ self.startTagHandler = utils.MethodDispatcher([

+ ("html", self.startTagHtml),

+ ("title", self.startTagTitle),

+ (("noscript", "noframes", "style"), self.startTagNoScriptNoFramesStyle),

+ ("script", self.startTagScript),

+ (("base", "basefont", "bgsound", "command", "link"),

+ self.startTagBaseLinkCommand),

+ ("meta", self.startTagMeta),

+ ("head", self.startTagHead)

+ ])

+ self.startTagHandler.default = self.startTagOther

+ self. endTagHandler = utils.MethodDispatcher([

+ ("head", self.endTagHead),

+ (("br", "html", "body"), self.endTagHtmlBodyBr)

+ ])

+ self.endTagHandler.default = self.endTagOther

+ # the real thing

+ def processEOF(self):

+ self.anythingElse()

+ return True

+ def processCharacters(self, token):

+ self.anythingElse()

+ return token

+ def startTagHtml(self, token):

+ return self.parser.phases["inBody"].processStartTag(token)

+ def startTagHead(self, token):

+ self.parser.parseError("two-heads-are-not-better-than-one")

+ def startTagBaseLinkCommand(self, token):

+ self.tree.insertElement(token)

+ self.tree.openElements.pop()

+ token["selfClosingAcknowledged"] = True

+ def startTagMeta(self, token):

+ self.tree.insertElement(token)

+ self.tree.openElements.pop()

+ token["selfClosingAcknowledged"] = True

+ attributes = token["data"]

+ if self.parser.tokenizer.stream.charEncoding[1] == "tentative":

+ if "charset" in attributes:

+ self.parser.tokenizer.stream.changeEncoding(attributes["charset"])

+ elif ("content" in attributes and

+ "http-equiv" in attributes and

+ attributes["http-equiv"].lower() == "content-type"):

+ # Encoding it as UTF-8 here is a hack, as really we should pass

+ # the abstract Unicode string, and just use the

+ # ContentAttrParser on that, but using UTF-8 allows all chars

+ # to be encoded and as a ASCII-superset works.

+ data = inputstream.EncodingBytes(attributes["content"].encode("utf-8"))

+ parser = inputstream.ContentAttrParser(data)

+ codec = parser.parse()

+ self.parser.tokenizer.stream.changeEncoding(codec)

+ def startTagTitle(self, token):

+ self.parser.parseRCDataRawtext(token, "RCDATA")

+ def startTagNoScriptNoFramesStyle(self, token):

+ # Need to decide whether to implement the scripting-disabled case

+ self.parser.parseRCDataRawtext(token, "RAWTEXT")

+ def startTagScript(self, token):

+ self.tree.insertElement(token)

+ self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState

+ self.parser.originalPhase = self.parser.phase

+ self.parser.phase = self.parser.phases["text"]

+ def startTagOther(self, token):

+ self.anythingElse()

+ return token

+ def endTagHead(self, token):

+ node = self.parser.tree.openElements.pop()

+ assert node.name == "head", "Expected head got %s" % node.name

+ self.parser.phase = self.parser.phases["afterHead"]

+ def endTagHtmlBodyBr(self, token):

+ self.anythingElse()

+ return token

+ def endTagOther(self, token):

+ self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

+ def anythingElse(self):

+ self.endTagHead(impliedTagToken("head"))

+ # XXX If we implement a parser for which scripting is disabled we need to

+ # implement this phase.

+ #

+ # class InHeadNoScriptPhase(Phase):

+ class AfterHeadPhase(Phase):

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ self.startTagHandler = utils.MethodDispatcher([

+ ("html", self.startTagHtml),

+ ("body", self.startTagBody),

+ ("frameset", self.startTagFrameset),

+ (("base", "basefont", "bgsound", "link", "meta", "noframes", "script",

+ "style", "title"),

+ self.startTagFromHead),

+ ("head", self.startTagHead)

+ ])

+ self.startTagHandler.default = self.startTagOther

+ self.endTagHandler = utils.MethodDispatcher([(("body", "html", "br"),

+ self.endTagHtmlBodyBr)])

+ self.endTagHandler.default = self.endTagOther

+ def processEOF(self):

+ self.anythingElse()

+ return True

+ def processCharacters(self, token):

+ self.anythingElse()

+ return token

+ def startTagHtml(self, token):

+ return self.parser.phases["inBody"].processStartTag(token)

+ def startTagBody(self, token):

+ self.parser.framesetOK = False

+ self.tree.insertElement(token)

+ self.parser.phase = self.parser.phases["inBody"]

+ def startTagFrameset(self, token):

+ self.tree.insertElement(token)

+ self.parser.phase = self.parser.phases["inFrameset"]

+ def startTagFromHead(self, token):

+ self.parser.parseError("unexpected-start-tag-out-of-my-head",

+ {"name": token["name"]})

+ self.tree.openElements.append(self.tree.headPointer)

+ self.parser.phases["inHead"].processStartTag(token)

+ for node in self.tree.openElements[::-1]:

+ if node.name == "head":

+ self.tree.openElements.remove(node)

+ break

+ def startTagHead(self, token):

+ self.parser.parseError("unexpected-start-tag", {"name": token["name"]})

+ def startTagOther(self, token):

+ self.anythingElse()

+ return token

+ def endTagHtmlBodyBr(self, token):

+ self.anythingElse()

+ return token

+ def endTagOther(self, token):

+ self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

+ def anythingElse(self):

+ self.tree.insertElement(impliedTagToken("body", "StartTag"))

+ self.parser.phase = self.parser.phases["inBody"]

+ self.parser.framesetOK = True

+ class InBodyPhase(Phase):

+ # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody

+ # the really-really-really-very crazy mode

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ # Keep a ref to this for special handling of whitespace in <pre>

+ self.processSpaceCharactersNonPre = self.processSpaceCharacters

+ self.startTagHandler = utils.MethodDispatcher([

+ ("html", self.startTagHtml),

+ (("base", "basefont", "bgsound", "command", "link", "meta",

+ "noframes", "script", "style", "title"),

+ self.startTagProcessInHead),

+ ("body", self.startTagBody),

+ ("frameset", self.startTagFrameset),

+ (("address", "article", "aside", "blockquote", "center", "details",

+ "details", "dir", "div", "dl", "fieldset", "figcaption", "figure",

+ "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",

+ "section", "summary", "ul"),

+ self.startTagCloseP),

+ (headingElements, self.startTagHeading),

+ (("pre", "listing"), self.startTagPreListing),

+ ("form", self.startTagForm),

+ (("li", "dd", "dt"), self.startTagListItem),

+ ("plaintext", self.startTagPlaintext),

+ ("a", self.startTagA),

+ (("b", "big", "code", "em", "font", "i", "s", "small", "strike",

+ "strong", "tt", "u"), self.startTagFormatting),

+ ("nobr", self.startTagNobr),

+ ("button", self.startTagButton),

+ (("applet", "marquee", "object"), self.startTagAppletMarqueeObject),

+ ("xmp", self.startTagXmp),

+ ("table", self.startTagTable),

+ (("area", "br", "embed", "img", "keygen", "wbr"),

+ self.startTagVoidFormatting),

+ (("param", "source", "track"), self.startTagParamSource),

+ ("input", self.startTagInput),

+ ("hr", self.startTagHr),

+ ("image", self.startTagImage),

+ ("isindex", self.startTagIsIndex),

+ ("textarea", self.startTagTextarea),

+ ("iframe", self.startTagIFrame),

+ (("noembed", "noframes", "noscript"), self.startTagRawtext),

+ ("select", self.startTagSelect),

+ (("rp", "rt"), self.startTagRpRt),

+ (("option", "optgroup"), self.startTagOpt),

+ (("math"), self.startTagMath),

+ (("svg"), self.startTagSvg),

+ (("caption", "col", "colgroup", "frame", "head",

+ "tbody", "td", "tfoot", "th", "thead",

+ "tr"), self.startTagMisplaced)

+ ])

+ self.startTagHandler.default = self.startTagOther

+ self.endTagHandler = utils.MethodDispatcher([

+ ("body", self.endTagBody),

+ ("html", self.endTagHtml),

+ (("address", "article", "aside", "blockquote", "button", "center",

+ "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",

+ "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",

+ "section", "summary", "ul"), self.endTagBlock),

+ ("form", self.endTagForm),

+ ("p", self.endTagP),

+ (("dd", "dt", "li"), self.endTagListItem),

+ (headingElements, self.endTagHeading),

+ (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",

+ "strike", "strong", "tt", "u"), self.endTagFormatting),

+ (("applet", "marquee", "object"), self.endTagAppletMarqueeObject),

+ ("br", self.endTagBr),

+ ])

+ self.endTagHandler.default = self.endTagOther

+ def isMatchingFormattingElement(self, node1, node2):

+ if node1.name != node2.name or node1.namespace != node2.namespace:

+ return False

+ elif len(node1.attributes) != len(node2.attributes):

+ return False

+ else:

+ attributes1 = sorted(node1.attributes.items())

+ attributes2 = sorted(node2.attributes.items())

+ for attr1, attr2 in zip(attributes1, attributes2):

+ if attr1 != attr2:

+ return False

+ return True

+ # helper

+ def addFormattingElement(self, token):

+ self.tree.insertElement(token)

+ element = self.tree.openElements[-1]

+ matchingElements = []

+ for node in self.tree.activeFormattingElements[::-1]:

+ if node is Marker:

+ break

+ elif self.isMatchingFormattingElement(node, element):

+ matchingElements.append(node)

+ assert len(matchingElements) <= 3

+ if len(matchingElements) == 3:

+ self.tree.activeFormattingElements.remove(matchingElements[-1])

+ self.tree.activeFormattingElements.append(element)

+ # the real deal

+ def processEOF(self):

+ allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td",

+ "tfoot", "th", "thead", "tr", "body",

+ "html"))

+ for node in self.tree.openElements[::-1]:

+ if node.name not in allowed_elements:

+ self.parser.parseError("expected-closing-tag-but-got-eof")

+ break

+ # Stop parsing

+ def processSpaceCharactersDropNewline(self, token):

+ # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we

+ # want to drop leading newlines

+ data = token["data"]

+ self.processSpaceCharacters = self.processSpaceCharactersNonPre

+ if (data.startswith("\n") and

+ self.tree.openElements[-1].name in ("pre", "listing", "textarea")

+ and not self.tree.openElements[-1].hasContent()):

+ data = data[1:]

+ if data:

+ self.tree.reconstructActiveFormattingElements()

+ self.tree.insertText(data)

+ def processCharacters(self, token):

+ if token["data"] == "\u0000":

+ # The tokenizer should always emit null on its own

+ return

+ self.tree.reconstructActiveFormattingElements()

+ self.tree.insertText(token["data"])

+ # This must be bad for performance

+ if (self.parser.framesetOK and

+ any([char not in spaceCharacters

+ for char in token["data"]])):

+ self.parser.framesetOK = False

+ def processSpaceCharacters(self, token):

+ self.tree.reconstructActiveFormattingElements()

+ self.tree.insertText(token["data"])

+ def startTagProcessInHead(self, token):

+ return self.parser.phases["inHead"].processStartTag(token)

+ def startTagBody(self, token):

+ self.parser.parseError("unexpected-start-tag", {"name": "body"})

+ if (len(self.tree.openElements) == 1

+ or self.tree.openElements[1].name != "body"):

+ assert self.parser.innerHTML

+ else:

+ self.parser.framesetOK = False

+ for attr, value in token["data"].items():

+ if attr not in self.tree.openElements[1].attributes:

+ self.tree.openElements[1].attributes[attr] = value

+ def startTagFrameset(self, token):

+ self.parser.parseError("unexpected-start-tag", {"name": "frameset"})

+ if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"):

+ assert self.parser.innerHTML

+ elif not self.parser.framesetOK:

+ pass

+ else:

+ if self.tree.openElements[1].parent:

+ self.tree.openElements[1].parent.removeChild(self.tree.openElements[1])

+ while self.tree.openElements[-1].name != "html":

+ self.tree.openElements.pop()

+ self.tree.insertElement(token)

+ self.parser.phase = self.parser.phases["inFrameset"]

+ def startTagCloseP(self, token):

+ if self.tree.elementInScope("p", variant="button"):

+ self.endTagP(impliedTagToken("p"))

+ self.tree.insertElement(token)

+ def startTagPreListing(self, token):

+ if self.tree.elementInScope("p", variant="button"):

+ self.endTagP(impliedTagToken("p"))

+ self.tree.insertElement(token)

+ self.parser.framesetOK = False

+ self.processSpaceCharacters = self.processSpaceCharactersDropNewline

+ def startTagForm(self, token):

+ if self.tree.formPointer:

+ self.parser.parseError("unexpected-start-tag", {"name": "form"})

+ else:

+ if self.tree.elementInScope("p", variant="button"):

+ self.endTagP(impliedTagToken("p"))

+ self.tree.insertElement(token)

+ self.tree.formPointer = self.tree.openElements[-1]

+ def startTagListItem(self, token):

+ self.parser.framesetOK = False

+ stopNamesMap = {"li": ["li"],

+ "dt": ["dt", "dd"],

+ "dd": ["dt", "dd"]}

+ stopNames = stopNamesMap[token["name"]]

+ for node in reversed(self.tree.openElements):

+ if node.name in stopNames:

+ self.parser.phase.processEndTag(

+ impliedTagToken(node.name, "EndTag"))

+ break

+ if (node.nameTuple in specialElements and

+ node.name not in ("address", "div", "p")):

+ break

+ if self.tree.elementInScope("p", variant="button"):

+ self.parser.phase.processEndTag(

+ impliedTagToken("p", "EndTag"))

+ self.tree.insertElement(token)

+ def startTagPlaintext(self, token):

+ if self.tree.elementInScope("p", variant="button"):

+ self.endTagP(impliedTagToken("p"))

+ self.tree.insertElement(token)

+ self.parser.tokenizer.state = self.parser.tokenizer.plaintextState

+ def startTagHeading(self, token):

+ if self.tree.elementInScope("p", variant="button"):

+ self.endTagP(impliedTagToken("p"))

+ if self.tree.openElements[-1].name in headingElements:

+ self.parser.parseError("unexpected-start-tag", {"name": token["name"]})

+ self.tree.openElements.pop()

+ self.tree.insertElement(token)

+ def startTagA(self, token):

+ afeAElement = self.tree.elementInActiveFormattingElements("a")

+ if afeAElement:

+ self.parser.parseError("unexpected-start-tag-implies-end-tag",

+ {"startName": "a", "endName": "a"})

+ self.endTagFormatting(impliedTagToken("a"))

+ if afeAElement in self.tree.openElements:

+ self.tree.openElements.remove(afeAElement)

+ if afeAElement in self.tree.activeFormattingElements:

+ self.tree.activeFormattingElements.remove(afeAElement)

+ self.tree.reconstructActiveFormattingElements()

+ self.addFormattingElement(token)

+ def startTagFormatting(self, token):

+ self.tree.reconstructActiveFormattingElements()

+ self.addFormattingElement(token)

+ def startTagNobr(self, token):

+ self.tree.reconstructActiveFormattingElements()

+ if self.tree.elementInScope("nobr"):

+ self.parser.parseError("unexpected-start-tag-implies-end-tag",

+ {"startName": "nobr", "endName": "nobr"})

+ self.processEndTag(impliedTagToken("nobr"))

+ # XXX Need tests that trigger the following

+ self.tree.reconstructActiveFormattingElements()

+ self.addFormattingElement(token)

+ def startTagButton(self, token):

+ if self.tree.elementInScope("button"):

+ self.parser.parseError("unexpected-start-tag-implies-end-tag",

+ {"startName": "button", "endName": "button"})

+ self.processEndTag(impliedTagToken("button"))

+ return token

+ else:

+ self.tree.reconstructActiveFormattingElements()

+ self.tree.insertElement(token)

+ self.parser.framesetOK = False

+ def startTagAppletMarqueeObject(self, token):

+ self.tree.reconstructActiveFormattingElements()

+ self.tree.insertElement(token)

+ self.tree.activeFormattingElements.append(Marker)

+ self.parser.framesetOK = False

+ def startTagXmp(self, token):

+ if self.tree.elementInScope("p", variant="button"):

+ self.endTagP(impliedTagToken("p"))

+ self.tree.reconstructActiveFormattingElements()

+ self.parser.framesetOK = False

+ self.parser.parseRCDataRawtext(token, "RAWTEXT")

+ def startTagTable(self, token):

+ if self.parser.compatMode != "quirks":

+ if self.tree.elementInScope("p", variant="button"):

+ self.processEndTag(impliedTagToken("p"))

+ self.tree.insertElement(token)

+ self.parser.framesetOK = False

+ self.parser.phase = self.parser.phases["inTable"]

+ def startTagVoidFormatting(self, token):

+ self.tree.reconstructActiveFormattingElements()

+ self.tree.insertElement(token)

+ self.tree.openElements.pop()

+ token["selfClosingAcknowledged"] = True

+ self.parser.framesetOK = False

+ def startTagInput(self, token):

+ framesetOK = self.parser.framesetOK

+ self.startTagVoidFormatting(token)

+ if ("type" in token["data"] and

+ token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):

+ # input type=hidden doesn't change framesetOK

+ self.parser.framesetOK = framesetOK

+ def startTagParamSource(self, token):

+ self.tree.insertElement(token)

+ self.tree.openElements.pop()

+ token["selfClosingAcknowledged"] = True

+ def startTagHr(self, token):

+ if self.tree.elementInScope("p", variant="button"):

+ self.endTagP(impliedTagToken("p"))

+ self.tree.insertElement(token)

+ self.tree.openElements.pop()

+ token["selfClosingAcknowledged"] = True

+ self.parser.framesetOK = False

+ def startTagImage(self, token):

+ # No really...

+ self.parser.parseError("unexpected-start-tag-treated-as",

+ {"originalName": "image", "newName": "img"})

+ self.processStartTag(impliedTagToken("img", "StartTag",

+ attributes=token["data"],

+ selfClosing=token["selfClosing"]))

+ def startTagIsIndex(self, token):

+ self.parser.parseError("deprecated-tag", {"name": "isindex"})

+ if self.tree.formPointer:

+ return

+ form_attrs = {}

+ if "action" in token["data"]:

+ form_attrs["action"] = token["data"]["action"]

+ self.processStartTag(impliedTagToken("form", "StartTag",

+ attributes=form_attrs))

+ self.processStartTag(impliedTagToken("hr", "StartTag"))

+ self.processStartTag(impliedTagToken("label", "StartTag"))

+ # XXX Localization ...

+ if "prompt" in token["data"]:

+ prompt = token["data"]["prompt"]

+ else:

+ prompt = "This is a searchable index. Enter search keywords: "

+ self.processCharacters(

+ {"type": tokenTypes["Characters"], "data": prompt})

+ attributes = token["data"].copy()

+ if "action" in attributes:

+ del attributes["action"]

+ if "prompt" in attributes:

+ del attributes["prompt"]

+ attributes["name"] = "isindex"

+ self.processStartTag(impliedTagToken("input", "StartTag",

+ attributes=attributes,

+ selfClosing=token["selfClosing"]))

+ self.processEndTag(impliedTagToken("label"))

+ self.processStartTag(impliedTagToken("hr", "StartTag"))

+ self.processEndTag(impliedTagToken("form"))

+ def startTagTextarea(self, token):

+ self.tree.insertElement(token)

+ self.parser.tokenizer.state = self.parser.tokenizer.rcdataState

+ self.processSpaceCharacters = self.processSpaceCharactersDropNewline

+ self.parser.framesetOK = False

+ def startTagIFrame(self, token):

+ self.parser.framesetOK = False

+ self.startTagRawtext(token)

+ def startTagRawtext(self, token):

+ """iframe, noembed noframes, noscript(if scripting enabled)"""

+ self.parser.parseRCDataRawtext(token, "RAWTEXT")

+ def startTagOpt(self, token):

+ if self.tree.openElements[-1].name == "option":

+ self.parser.phase.processEndTag(impliedTagToken("option"))

+ self.tree.reconstructActiveFormattingElements()

+ self.parser.tree.insertElement(token)

+ def startTagSelect(self, token):

+ self.tree.reconstructActiveFormattingElements()

+ self.tree.insertElement(token)

+ self.parser.framesetOK = False

+ if self.parser.phase in (self.parser.phases["inTable"],

+ self.parser.phases["inCaption"],

+ self.parser.phases["inColumnGroup"],

+ self.parser.phases["inTableBody"],

+ self.parser.phases["inRow"],

+ self.parser.phases["inCell"]):

+ self.parser.phase = self.parser.phases["inSelectInTable"]

+ else:

+ self.parser.phase = self.parser.phases["inSelect"]

+ def startTagRpRt(self, token):

+ if self.tree.elementInScope("ruby"):

+ self.tree.generateImpliedEndTags()

+ if self.tree.openElements[-1].name != "ruby":

+ self.parser.parseError()

+ self.tree.insertElement(token)

+ def startTagMath(self, token):

+ self.tree.reconstructActiveFormattingElements()

+ self.parser.adjustMathMLAttributes(token)

+ self.parser.adjustForeignAttributes(token)

+ token["namespace"] = namespaces["mathml"]

+ self.tree.insertElement(token)

+ # Need to get the parse error right for the case where the token

+ # has a namespace not equal to the xmlns attribute

+ if token["selfClosing"]:

+ self.tree.openElements.pop()

+ token["selfClosingAcknowledged"] = True

+ def startTagSvg(self, token):

+ self.tree.reconstructActiveFormattingElements()

+ self.parser.adjustSVGAttributes(token)

+ self.parser.adjustForeignAttributes(token)

+ token["namespace"] = namespaces["svg"]

+ self.tree.insertElement(token)

+ # Need to get the parse error right for the case where the token

+ # has a namespace not equal to the xmlns attribute

+ if token["selfClosing"]:

+ self.tree.openElements.pop()

+ token["selfClosingAcknowledged"] = True

+ def startTagMisplaced(self, token):

+ """ Elements that should be children of other elements that have a

+ different insertion mode; here they are ignored

+ "caption", "col", "colgroup", "frame", "frameset", "head",

+ "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",

+ "tr", "noscript"

+ """

+ self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]})

+ def startTagOther(self, token):

+ self.tree.reconstructActiveFormattingElements()

+ self.tree.insertElement(token)

+ def endTagP(self, token):

+ if not self.tree.elementInScope("p", variant="button"):

+ self.startTagCloseP(impliedTagToken("p", "StartTag"))

+ self.parser.parseError("unexpected-end-tag", {"name": "p"})

+ self.endTagP(impliedTagToken("p", "EndTag"))

+ else:

+ self.tree.generateImpliedEndTags("p")

+ if self.tree.openElements[-1].name != "p":

+ self.parser.parseError("unexpected-end-tag", {"name": "p"})

+ node = self.tree.openElements.pop()

+ while node.name != "p":

+ node = self.tree.openElements.pop()

+ def endTagBody(self, token):

+ if not self.tree.elementInScope("body"):

+ self.parser.parseError()

+ return

+ elif self.tree.openElements[-1].name != "body":

+ for node in self.tree.openElements[2:]:

+ if node.name not in frozenset(("dd", "dt", "li", "optgroup",

+ "option", "p", "rp", "rt",

+ "tbody", "td", "tfoot",

+ "th", "thead", "tr", "body",

+ "html")):

+ # Not sure this is the correct name for the parse error

+ self.parser.parseError(

+ "expected-one-end-tag-but-got-another",

+ {"expectedName": "body", "gotName": node.name})

+ break

+ self.parser.phase = self.parser.phases["afterBody"]

+ def endTagHtml(self, token):

+ # We repeat the test for the body end tag token being ignored here

+ if self.tree.elementInScope("body"):

+ self.endTagBody(impliedTagToken("body"))

+ return token

+ def endTagBlock(self, token):

+ # Put us back in the right whitespace handling mode

+ if token["name"] == "pre":

+ self.processSpaceCharacters = self.processSpaceCharactersNonPre

+ inScope = self.tree.elementInScope(token["name"])

+ if inScope:

+ self.tree.generateImpliedEndTags()

+ if self.tree.openElements[-1].name != token["name"]:

+ self.parser.parseError("end-tag-too-early", {"name": token["name"]})

+ if inScope:

+ node = self.tree.openElements.pop()

+ while node.name != token["name"]:

+ node = self.tree.openElements.pop()

+ def endTagForm(self, token):

+ node = self.tree.formPointer

+ self.tree.formPointer = None

+ if node is None or not self.tree.elementInScope(node):

+ self.parser.parseError("unexpected-end-tag",

+ {"name": "form"})

+ else:

+ self.tree.generateImpliedEndTags()

+ if self.tree.openElements[-1] != node:

+ self.parser.parseError("end-tag-too-early-ignored",

+ {"name": "form"})

+ self.tree.openElements.remove(node)

+ def endTagListItem(self, token):

+ if token["name"] == "li":

+ variant = "list"

+ else:

+ variant = None

+ if not self.tree.elementInScope(token["name"], variant=variant):

+ self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

+ else:

+ self.tree.generateImpliedEndTags(exclude=token["name"])

+ if self.tree.openElements[-1].name != token["name"]:

+ self.parser.parseError(

+ "end-tag-too-early",

+ {"name": token["name"]})

+ node = self.tree.openElements.pop()

+ while node.name != token["name"]:

+ node = self.tree.openElements.pop()

+ def endTagHeading(self, token):

+ for item in headingElements:

+ if self.tree.elementInScope(item):

+ self.tree.generateImpliedEndTags()

+ break

+ if self.tree.openElements[-1].name != token["name"]:

+ self.parser.parseError("end-tag-too-early", {"name": token["name"]})

+ for item in headingElements:

+ if self.tree.elementInScope(item):

+ item = self.tree.openElements.pop()

+ while item.name not in headingElements:

+ item = self.tree.openElements.pop()

+ break

+ def endTagFormatting(self, token):

+ """The much-feared adoption agency algorithm"""

+ # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867

+ # XXX Better parseError messages appreciated.

+ # Step 1

+ outerLoopCounter = 0

+ # Step 2

+ while outerLoopCounter < 8:

+ # Step 3

+ outerLoopCounter += 1

+ # Step 4:

+ # Let the formatting element be the last element in

+ # the list of active formatting elements that:

+ # - is between the end of the list and the last scope

+ # marker in the list, if any, or the start of the list

+ # otherwise, and

+ # - has the same tag name as the token.

+ formattingElement = self.tree.elementInActiveFormattingElements(

+ token["name"])

+ if (not formattingElement or

+ (formattingElement in self.tree.openElements and

+ not self.tree.elementInScope(formattingElement.name))):

+ # If there is no such node, then abort these steps

+ # and instead act as described in the "any other

+ # end tag" entry below.

+ self.endTagOther(token)

+ return

+ # Otherwise, if there is such a node, but that node is

+ # not in the stack of open elements, then this is a

+ # parse error; remove the element from the list, and

+ # abort these steps.

+ elif formattingElement not in self.tree.openElements:

+ self.parser.parseError("adoption-agency-1.2", {"name": token["name"]})

+ self.tree.activeFormattingElements.remove(formattingElement)

+ return

+ # Otherwise, if there is such a node, and that node is

+ # also in the stack of open elements, but the element

+ # is not in scope, then this is a parse error; ignore

+ # the token, and abort these steps.

+ elif not self.tree.elementInScope(formattingElement.name):

+ self.parser.parseError("adoption-agency-4.4", {"name": token["name"]})

+ return

+ # Otherwise, there is a formatting element and that

+ # element is in the stack and is in scope. If the

+ # element is not the current node, this is a parse

+ # error. In any case, proceed with the algorithm as

+ # written in the following steps.

+ else:

+ if formattingElement != self.tree.openElements[-1]:

+ self.parser.parseError("adoption-agency-1.3", {"name": token["name"]})

+ # Step 5:

+ # Let the furthest block be the topmost node in the

+ # stack of open elements that is lower in the stack

+ # than the formatting element, and is an element in

+ # the special category. There might not be one.

+ afeIndex = self.tree.openElements.index(formattingElement)

+ furthestBlock = None

+ for element in self.tree.openElements[afeIndex:]:

+ if element.nameTuple in specialElements:

+ furthestBlock = element

+ break

+ # Step 6:

+ # If there is no furthest block, then the UA must

+ # first pop all the nodes from the bottom of the stack

+ # of open elements, from the current node up to and

+ # including the formatting element, then remove the

+ # formatting element from the list of active

+ # formatting elements, and finally abort these steps.

+ if furthestBlock is None:

+ element = self.tree.openElements.pop()

+ while element != formattingElement:

+ element = self.tree.openElements.pop()

+ self.tree.activeFormattingElements.remove(element)

+ return

+ # Step 7

+ commonAncestor = self.tree.openElements[afeIndex - 1]

+ # Step 8:

+ # The bookmark is supposed to help us identify where to reinsert

+ # nodes in step 15. We have to ensure that we reinsert nodes after

+ # the node before the active formatting element. Note the bookmark

+ # can move in step 9.7

+ bookmark = self.tree.activeFormattingElements.index(formattingElement)

+ # Step 9

+ lastNode = node = furthestBlock

+ innerLoopCounter = 0

+ index = self.tree.openElements.index(node)

+ while innerLoopCounter < 3:

+ innerLoopCounter += 1

+ # Node is element before node in open elements

+ index -= 1

+ node = self.tree.openElements[index]

+ if node not in self.tree.activeFormattingElements:

+ self.tree.openElements.remove(node)

+ continue

+ # Step 9.6

+ if node == formattingElement:

+ break

+ # Step 9.7

+ if lastNode == furthestBlock:

+ bookmark = self.tree.activeFormattingElements.index(node) + 1

+ # Step 9.8

+ clone = node.cloneNode()

+ # Replace node with clone

+ self.tree.activeFormattingElements[

+ self.tree.activeFormattingElements.index(node)] = clone

+ self.tree.openElements[

+ self.tree.openElements.index(node)] = clone

+ node = clone

+ # Step 9.9

+ # Remove lastNode from its parents, if any

+ if lastNode.parent:

+ lastNode.parent.removeChild(lastNode)

+ node.appendChild(lastNode)

+ # Step 9.10

+ lastNode = node

+ # Step 10

+ # Foster parent lastNode if commonAncestor is a

+ # table, tbody, tfoot, thead, or tr we need to foster

+ # parent the lastNode

+ if lastNode.parent:

+ lastNode.parent.removeChild(lastNode)

+ if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")):

+ parent, insertBefore = self.tree.getTableMisnestedNodePosition()

+ parent.insertBefore(lastNode, insertBefore)

+ else:

+ commonAncestor.appendChild(lastNode)

+ # Step 11

+ clone = formattingElement.cloneNode()

+ # Step 12

+ furthestBlock.reparentChildren(clone)

+ # Step 13

+ furthestBlock.appendChild(clone)

+ # Step 14

+ self.tree.activeFormattingElements.remove(formattingElement)

+ self.tree.activeFormattingElements.insert(bookmark, clone)

+ # Step 15

+ self.tree.openElements.remove(formattingElement)

+ self.tree.openElements.insert(

+ self.tree.openElements.index(furthestBlock) + 1, clone)

+ def endTagAppletMarqueeObject(self, token):

+ if self.tree.elementInScope(token["name"]):

+ self.tree.generateImpliedEndTags()

+ if self.tree.openElements[-1].name != token["name"]:

+ self.parser.parseError("end-tag-too-early", {"name": token["name"]})

+ if self.tree.elementInScope(token["name"]):

+ element = self.tree.openElements.pop()

+ while element.name != token["name"]:

+ element = self.tree.openElements.pop()

+ self.tree.clearActiveFormattingElements()

+ def endTagBr(self, token):

+ self.parser.parseError("unexpected-end-tag-treated-as",

+ {"originalName": "br", "newName": "br element"})

+ self.tree.reconstructActiveFormattingElements()

+ self.tree.insertElement(impliedTagToken("br", "StartTag"))

+ self.tree.openElements.pop()

+ def endTagOther(self, token):

+ for node in self.tree.openElements[::-1]:

+ if node.name == token["name"]:

+ self.tree.generateImpliedEndTags(exclude=token["name"])

+ if self.tree.openElements[-1].name != token["name"]:

+ self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

+ while self.tree.openElements.pop() != node:

+ pass

+ break

+ else:

+ if node.nameTuple in specialElements:

+ self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

+ break

+ class TextPhase(Phase):

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ self.startTagHandler = utils.MethodDispatcher([])

+ self.startTagHandler.default = self.startTagOther

+ self.endTagHandler = utils.MethodDispatcher([

+ ("script", self.endTagScript)])

+ self.endTagHandler.default = self.endTagOther

+ def processCharacters(self, token):

+ self.tree.insertText(token["data"])

+ def processEOF(self):

+ self.parser.parseError("expected-named-closing-tag-but-got-eof",

+ {"name": self.tree.openElements[-1].name})

+ self.tree.openElements.pop()

+ self.parser.phase = self.parser.originalPhase

+ return True

+ def startTagOther(self, token):

+ assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name']

+ def endTagScript(self, token):

+ node = self.tree.openElements.pop()

+ assert node.name == "script"

+ self.parser.phase = self.parser.originalPhase

+ # The rest of this method is all stuff that only happens if

+ # document.write works

+ def endTagOther(self, token):

+ self.tree.openElements.pop()

+ self.parser.phase = self.parser.originalPhase

+ class InTablePhase(Phase):

+ # http://www.whatwg.org/specs/web-apps/current-work/#in-table

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ self.startTagHandler = utils.MethodDispatcher([

+ ("html", self.startTagHtml),

+ ("caption", self.startTagCaption),

+ ("colgroup", self.startTagColgroup),

+ ("col", self.startTagCol),

+ (("tbody", "tfoot", "thead"), self.startTagRowGroup),

+ (("td", "th", "tr"), self.startTagImplyTbody),

+ ("table", self.startTagTable),

+ (("style", "script"), self.startTagStyleScript),

+ ("input", self.startTagInput),

+ ("form", self.startTagForm)

+ ])

+ self.startTagHandler.default = self.startTagOther

+ self.endTagHandler = utils.MethodDispatcher([

+ ("table", self.endTagTable),

+ (("body", "caption", "col", "colgroup", "html", "tbody", "td",

+ "tfoot", "th", "thead", "tr"), self.endTagIgnore)

+ ])

+ self.endTagHandler.default = self.endTagOther

+ # helper methods

+ def clearStackToTableContext(self):

+ # "clear the stack back to a table context"

+ while self.tree.openElements[-1].name not in ("table", "html"):

+ # self.parser.parseError("unexpected-implied-end-tag-in-table",

+ # {"name": self.tree.openElements[-1].name})

+ self.tree.openElements.pop()

+ # When the current node is <html> it's an innerHTML case

+ # processing methods

+ def processEOF(self):

+ if self.tree.openElements[-1].name != "html":

+ self.parser.parseError("eof-in-table")

+ else:

+ assert self.parser.innerHTML

+ # Stop parsing

+ def processSpaceCharacters(self, token):

+ originalPhase = self.parser.phase

+ self.parser.phase = self.parser.phases["inTableText"]

+ self.parser.phase.originalPhase = originalPhase

+ self.parser.phase.processSpaceCharacters(token)

+ def processCharacters(self, token):

+ originalPhase = self.parser.phase

+ self.parser.phase = self.parser.phases["inTableText"]

+ self.parser.phase.originalPhase = originalPhase

+ self.parser.phase.processCharacters(token)

+ def insertText(self, token):

+ # If we get here there must be at least one non-whitespace character

+ # Do the table magic!

+ self.tree.insertFromTable = True

+ self.parser.phases["inBody"].processCharacters(token)

+ self.tree.insertFromTable = False

+ def startTagCaption(self, token):

+ self.clearStackToTableContext()

+ self.tree.activeFormattingElements.append(Marker)

+ self.tree.insertElement(token)

+ self.parser.phase = self.parser.phases["inCaption"]

+ def startTagColgroup(self, token):

+ self.clearStackToTableContext()

+ self.tree.insertElement(token)

+ self.parser.phase = self.parser.phases["inColumnGroup"]

+ def startTagCol(self, token):

+ self.startTagColgroup(impliedTagToken("colgroup", "StartTag"))

+ return token

+ def startTagRowGroup(self, token):

+ self.clearStackToTableContext()

+ self.tree.insertElement(token)

+ self.parser.phase = self.parser.phases["inTableBody"]

+ def startTagImplyTbody(self, token):

+ self.startTagRowGroup(impliedTagToken("tbody", "StartTag"))

+ return token

+ def startTagTable(self, token):

+ self.parser.parseError("unexpected-start-tag-implies-end-tag",

+ {"startName": "table", "endName": "table"})

+ self.parser.phase.processEndTag(impliedTagToken("table"))

+ if not self.parser.innerHTML:

+ return token

+ def startTagStyleScript(self, token):

+ return self.parser.phases["inHead"].processStartTag(token)

+ def startTagInput(self, token):

+ if ("type" in token["data"] and

+ token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):

+ self.parser.parseError("unexpected-hidden-input-in-table")

+ self.tree.insertElement(token)

+ # XXX associate with form

+ self.tree.openElements.pop()

+ else:

+ self.startTagOther(token)

+ def startTagForm(self, token):

+ self.parser.parseError("unexpected-form-in-table")

+ if self.tree.formPointer is None:

+ self.tree.insertElement(token)

+ self.tree.formPointer = self.tree.openElements[-1]

+ self.tree.openElements.pop()

+ def startTagOther(self, token):

+ self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]})

+ # Do the table magic!

+ self.tree.insertFromTable = True

+ self.parser.phases["inBody"].processStartTag(token)

+ self.tree.insertFromTable = False

+ def endTagTable(self, token):

+ if self.tree.elementInScope("table", variant="table"):

+ self.tree.generateImpliedEndTags()

+ if self.tree.openElements[-1].name != "table":

+ self.parser.parseError("end-tag-too-early-named",

+ {"gotName": "table",

+ "expectedName": self.tree.openElements[-1].name})

+ while self.tree.openElements[-1].name != "table":

+ self.tree.openElements.pop()

+ self.parser.resetInsertionMode()

+ else:

+ # innerHTML case

+ assert self.parser.innerHTML

+ self.parser.parseError()

+ def endTagIgnore(self, token):

+ self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

+ def endTagOther(self, token):

+ self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]})

+ # Do the table magic!

+ self.tree.insertFromTable = True

+ self.parser.phases["inBody"].processEndTag(token)

+ self.tree.insertFromTable = False

+ class InTableTextPhase(Phase):

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ self.originalPhase = None

+ self.characterTokens = []

+ def flushCharacters(self):

+ data = "".join([item["data"] for item in self.characterTokens])

+ if any([item not in spaceCharacters for item in data]):

+ token = {"type": tokenTypes["Characters"], "data": data}

+ self.parser.phases["inTable"].insertText(token)

+ elif data:

+ self.tree.insertText(data)

+ self.characterTokens = []

+ def processComment(self, token):

+ self.flushCharacters()

+ self.parser.phase = self.originalPhase

+ return token

+ def processEOF(self):

+ self.flushCharacters()

+ self.parser.phase = self.originalPhase

+ return True

+ def processCharacters(self, token):

+ if token["data"] == "\u0000":

+ return

+ self.characterTokens.append(token)

+ def processSpaceCharacters(self, token):

+ # pretty sure we should never reach here

+ self.characterTokens.append(token)

+ # assert False

+ def processStartTag(self, token):

+ self.flushCharacters()

+ self.parser.phase = self.originalPhase

+ return token

+ def processEndTag(self, token):

+ self.flushCharacters()

+ self.parser.phase = self.originalPhase

+ return token

+ class InCaptionPhase(Phase):

+ # http://www.whatwg.org/specs/web-apps/current-work/#in-caption

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ self.startTagHandler = utils.MethodDispatcher([

+ ("html", self.startTagHtml),

+ (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",

+ "thead", "tr"), self.startTagTableElement)

+ ])

+ self.startTagHandler.default = self.startTagOther

+ self.endTagHandler = utils.MethodDispatcher([

+ ("caption", self.endTagCaption),

+ ("table", self.endTagTable),

+ (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",

+ "thead", "tr"), self.endTagIgnore)

+ ])

+ self.endTagHandler.default = self.endTagOther

+ def ignoreEndTagCaption(self):

+ return not self.tree.elementInScope("caption", variant="table")

+ def processEOF(self):

+ self.parser.phases["inBody"].processEOF()

+ def processCharacters(self, token):

+ return self.parser.phases["inBody"].processCharacters(token)

+ def startTagTableElement(self, token):

+ self.parser.parseError()

+ # XXX Have to duplicate logic here to find out if the tag is ignored

+ ignoreEndTag = self.ignoreEndTagCaption()

+ self.parser.phase.processEndTag(impliedTagToken("caption"))

+ if not ignoreEndTag:

+ return token

+ def startTagOther(self, token):

+ return self.parser.phases["inBody"].processStartTag(token)

+ def endTagCaption(self, token):

+ if not self.ignoreEndTagCaption():

+ # AT this code is quite similar to endTagTable in "InTable"

+ self.tree.generateImpliedEndTags()

+ if self.tree.openElements[-1].name != "caption":

+ self.parser.parseError("expected-one-end-tag-but-got-another",

+ {"gotName": "caption",

+ "expectedName": self.tree.openElements[-1].name})

+ while self.tree.openElements[-1].name != "caption":

+ self.tree.openElements.pop()

+ self.tree.clearActiveFormattingElements()

+ self.parser.phase = self.parser.phases["inTable"]

+ else:

+ # innerHTML case

+ assert self.parser.innerHTML

+ self.parser.parseError()

+ def endTagTable(self, token):

+ self.parser.parseError()

+ ignoreEndTag = self.ignoreEndTagCaption()

+ self.parser.phase.processEndTag(impliedTagToken("caption"))

+ if not ignoreEndTag:

+ return token

+ def endTagIgnore(self, token):

+ self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

+ def endTagOther(self, token):

+ return self.parser.phases["inBody"].processEndTag(token)

+ class InColumnGroupPhase(Phase):

+ # http://www.whatwg.org/specs/web-apps/current-work/#in-column

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ self.startTagHandler = utils.MethodDispatcher([

+ ("html", self.startTagHtml),

+ ("col", self.startTagCol)

+ ])

+ self.startTagHandler.default = self.startTagOther

+ self.endTagHandler = utils.MethodDispatcher([

+ ("colgroup", self.endTagColgroup),

+ ("col", self.endTagCol)

+ ])

+ self.endTagHandler.default = self.endTagOther

+ def ignoreEndTagColgroup(self):

+ return self.tree.openElements[-1].name == "html"

+ def processEOF(self):

+ if self.tree.openElements[-1].name == "html":

+ assert self.parser.innerHTML

+ return

+ else:

+ ignoreEndTag = self.ignoreEndTagColgroup()

+ self.endTagColgroup(impliedTagToken("colgroup"))

+ if not ignoreEndTag:

+ return True

+ def processCharacters(self, token):

+ ignoreEndTag = self.ignoreEndTagColgroup()

+ self.endTagColgroup(impliedTagToken("colgroup"))

+ if not ignoreEndTag:

+ return token

+ def startTagCol(self, token):

+ self.tree.insertElement(token)

+ self.tree.openElements.pop()

+ def startTagOther(self, token):

+ ignoreEndTag = self.ignoreEndTagColgroup()

+ self.endTagColgroup(impliedTagToken("colgroup"))

+ if not ignoreEndTag:

+ return token

+ def endTagColgroup(self, token):

+ if self.ignoreEndTagColgroup():

+ # innerHTML case

+ assert self.parser.innerHTML

+ self.parser.parseError()

+ else:

+ self.tree.openElements.pop()

+ self.parser.phase = self.parser.phases["inTable"]

+ def endTagCol(self, token):

+ self.parser.parseError("no-end-tag", {"name": "col"})

+ def endTagOther(self, token):

+ ignoreEndTag = self.ignoreEndTagColgroup()

+ self.endTagColgroup(impliedTagToken("colgroup"))

+ if not ignoreEndTag:

+ return token

+ class InTableBodyPhase(Phase):

+ # http://www.whatwg.org/specs/web-apps/current-work/#in-table0

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ self.startTagHandler = utils.MethodDispatcher([

+ ("html", self.startTagHtml),

+ ("tr", self.startTagTr),

+ (("td", "th"), self.startTagTableCell),

+ (("caption", "col", "colgroup", "tbody", "tfoot", "thead"),

+ self.startTagTableOther)

+ ])

+ self.startTagHandler.default = self.startTagOther

+ self.endTagHandler = utils.MethodDispatcher([

+ (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),

+ ("table", self.endTagTable),

+ (("body", "caption", "col", "colgroup", "html", "td", "th",

+ "tr"), self.endTagIgnore)

+ ])

+ self.endTagHandler.default = self.endTagOther

+ # helper methods

+ def clearStackToTableBodyContext(self):

+ while self.tree.openElements[-1].name not in ("tbody", "tfoot",

+ "thead", "html"):

+ # self.parser.parseError("unexpected-implied-end-tag-in-table",

+ # {"name": self.tree.openElements[-1].name})

+ self.tree.openElements.pop()

+ if self.tree.openElements[-1].name == "html":

+ assert self.parser.innerHTML

+ # the rest

+ def processEOF(self):

+ self.parser.phases["inTable"].processEOF()

+ def processSpaceCharacters(self, token):

+ return self.parser.phases["inTable"].processSpaceCharacters(token)

+ def processCharacters(self, token):

+ return self.parser.phases["inTable"].processCharacters(token)

+ def startTagTr(self, token):

+ self.clearStackToTableBodyContext()

+ self.tree.insertElement(token)

+ self.parser.phase = self.parser.phases["inRow"]

+ def startTagTableCell(self, token):

+ self.parser.parseError("unexpected-cell-in-table-body",

+ {"name": token["name"]})

+ self.startTagTr(impliedTagToken("tr", "StartTag"))

+ return token

+ def startTagTableOther(self, token):

+ # XXX AT Any ideas on how to share this with endTagTable?

+ if (self.tree.elementInScope("tbody", variant="table") or

+ self.tree.elementInScope("thead", variant="table") or

+ self.tree.elementInScope("tfoot", variant="table")):

+ self.clearStackToTableBodyContext()

+ self.endTagTableRowGroup(

+ impliedTagToken(self.tree.openElements[-1].name))

+ return token

+ else:

+ # innerHTML case

+ assert self.parser.innerHTML

+ self.parser.parseError()

+ def startTagOther(self, token):

+ return self.parser.phases["inTable"].processStartTag(token)

+ def endTagTableRowGroup(self, token):

+ if self.tree.elementInScope(token["name"], variant="table"):

+ self.clearStackToTableBodyContext()

+ self.tree.openElements.pop()

+ self.parser.phase = self.parser.phases["inTable"]

+ else:

+ self.parser.parseError("unexpected-end-tag-in-table-body",

+ {"name": token["name"]})

+ def endTagTable(self, token):

+ if (self.tree.elementInScope("tbody", variant="table") or

+ self.tree.elementInScope("thead", variant="table") or

+ self.tree.elementInScope("tfoot", variant="table")):

+ self.clearStackToTableBodyContext()

+ self.endTagTableRowGroup(

+ impliedTagToken(self.tree.openElements[-1].name))

+ return token

+ else:

+ # innerHTML case

+ assert self.parser.innerHTML

+ self.parser.parseError()

+ def endTagIgnore(self, token):

+ self.parser.parseError("unexpected-end-tag-in-table-body",

+ {"name": token["name"]})

+ def endTagOther(self, token):

+ return self.parser.phases["inTable"].processEndTag(token)

+ class InRowPhase(Phase):

+ # http://www.whatwg.org/specs/web-apps/current-work/#in-row

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ self.startTagHandler = utils.MethodDispatcher([

+ ("html", self.startTagHtml),

+ (("td", "th"), self.startTagTableCell),

+ (("caption", "col", "colgroup", "tbody", "tfoot", "thead",

+ "tr"), self.startTagTableOther)

+ ])

+ self.startTagHandler.default = self.startTagOther

+ self.endTagHandler = utils.MethodDispatcher([

+ ("tr", self.endTagTr),

+ ("table", self.endTagTable),

+ (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),

+ (("body", "caption", "col", "colgroup", "html", "td", "th"),

+ self.endTagIgnore)

+ ])

+ self.endTagHandler.default = self.endTagOther

+ # helper methods (XXX unify this with other table helper methods)

+ def clearStackToTableRowContext(self):

+ while self.tree.openElements[-1].name not in ("tr", "html"):

+ self.parser.parseError("unexpected-implied-end-tag-in-table-row",

+ {"name": self.tree.openElements[-1].name})

+ self.tree.openElements.pop()

+ def ignoreEndTagTr(self):

+ return not self.tree.elementInScope("tr", variant="table")

+ # the rest

+ def processEOF(self):

+ self.parser.phases["inTable"].processEOF()

+ def processSpaceCharacters(self, token):

+ return self.parser.phases["inTable"].processSpaceCharacters(token)

+ def processCharacters(self, token):

+ return self.parser.phases["inTable"].processCharacters(token)

+ def startTagTableCell(self, token):

+ self.clearStackToTableRowContext()

+ self.tree.insertElement(token)

+ self.parser.phase = self.parser.phases["inCell"]

+ self.tree.activeFormattingElements.append(Marker)

+ def startTagTableOther(self, token):

+ ignoreEndTag = self.ignoreEndTagTr()

+ self.endTagTr(impliedTagToken("tr"))

+ # XXX how are we sure it's always ignored in the innerHTML case?

+ if not ignoreEndTag:

+ return token

+ def startTagOther(self, token):

+ return self.parser.phases["inTable"].processStartTag(token)

+ def endTagTr(self, token):

+ if not self.ignoreEndTagTr():

+ self.clearStackToTableRowContext()

+ self.tree.openElements.pop()

+ self.parser.phase = self.parser.phases["inTableBody"]

+ else:

+ # innerHTML case

+ assert self.parser.innerHTML

+ self.parser.parseError()

+ def endTagTable(self, token):

+ ignoreEndTag = self.ignoreEndTagTr()

+ self.endTagTr(impliedTagToken("tr"))

+ # Reprocess the current tag if the tr end tag was not ignored

+ # XXX how are we sure it's always ignored in the innerHTML case?

+ if not ignoreEndTag:

+ return token

+ def endTagTableRowGroup(self, token):

+ if self.tree.elementInScope(token["name"], variant="table"):

+ self.endTagTr(impliedTagToken("tr"))

+ return token

+ else:

+ self.parser.parseError()

+ def endTagIgnore(self, token):

+ self.parser.parseError("unexpected-end-tag-in-table-row",

+ {"name": token["name"]})

+ def endTagOther(self, token):

+ return self.parser.phases["inTable"].processEndTag(token)

+ class InCellPhase(Phase):

+ # http://www.whatwg.org/specs/web-apps/current-work/#in-cell

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ self.startTagHandler = utils.MethodDispatcher([

+ ("html", self.startTagHtml),

+ (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",

+ "thead", "tr"), self.startTagTableOther)

+ ])

+ self.startTagHandler.default = self.startTagOther

+ self.endTagHandler = utils.MethodDispatcher([

+ (("td", "th"), self.endTagTableCell),

+ (("body", "caption", "col", "colgroup", "html"), self.endTagIgnore),

+ (("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply)

+ ])

+ self.endTagHandler.default = self.endTagOther

+ # helper

+ def closeCell(self):

+ if self.tree.elementInScope("td", variant="table"):

+ self.endTagTableCell(impliedTagToken("td"))

+ elif self.tree.elementInScope("th", variant="table"):

+ self.endTagTableCell(impliedTagToken("th"))

+ # the rest

+ def processEOF(self):

+ self.parser.phases["inBody"].processEOF()

+ def processCharacters(self, token):

+ return self.parser.phases["inBody"].processCharacters(token)

+ def startTagTableOther(self, token):

+ if (self.tree.elementInScope("td", variant="table") or

+ self.tree.elementInScope("th", variant="table")):

+ self.closeCell()

+ return token

+ else:

+ # innerHTML case

+ assert self.parser.innerHTML

+ self.parser.parseError()

+ def startTagOther(self, token):

+ return self.parser.phases["inBody"].processStartTag(token)

+ def endTagTableCell(self, token):

+ if self.tree.elementInScope(token["name"], variant="table"):

+ self.tree.generateImpliedEndTags(token["name"])

+ if self.tree.openElements[-1].name != token["name"]:

+ self.parser.parseError("unexpected-cell-end-tag",

+ {"name": token["name"]})

+ while True:

+ node = self.tree.openElements.pop()

+ if node.name == token["name"]:

+ break

+ else:

+ self.tree.openElements.pop()

+ self.tree.clearActiveFormattingElements()

+ self.parser.phase = self.parser.phases["inRow"]

+ else:

+ self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

+ def endTagIgnore(self, token):

+ self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

+ def endTagImply(self, token):

+ if self.tree.elementInScope(token["name"], variant="table"):

+ self.closeCell()

+ return token

+ else:

+ # sometimes innerHTML case

+ self.parser.parseError()

+ def endTagOther(self, token):

+ return self.parser.phases["inBody"].processEndTag(token)

+ class InSelectPhase(Phase):

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ self.startTagHandler = utils.MethodDispatcher([

+ ("html", self.startTagHtml),

+ ("option", self.startTagOption),

+ ("optgroup", self.startTagOptgroup),

+ ("select", self.startTagSelect),

+ (("input", "keygen", "textarea"), self.startTagInput),

+ ("script", self.startTagScript)

+ ])

+ self.startTagHandler.default = self.startTagOther

+ self.endTagHandler = utils.MethodDispatcher([

+ ("option", self.endTagOption),

+ ("optgroup", self.endTagOptgroup),

+ ("select", self.endTagSelect)

+ ])

+ self.endTagHandler.default = self.endTagOther

+ # http://www.whatwg.org/specs/web-apps/current-work/#in-select

+ def processEOF(self):

+ if self.tree.openElements[-1].name != "html":

+ self.parser.parseError("eof-in-select")

+ else:

+ assert self.parser.innerHTML

+ def processCharacters(self, token):

+ if token["data"] == "\u0000":

+ return

+ self.tree.insertText(token["data"])

+ def startTagOption(self, token):

+ # We need to imply </option> if <option> is the current node.

+ if self.tree.openElements[-1].name == "option":

+ self.tree.openElements.pop()

+ self.tree.insertElement(token)

+ def startTagOptgroup(self, token):

+ if self.tree.openElements[-1].name == "option":

+ self.tree.openElements.pop()

+ if self.tree.openElements[-1].name == "optgroup":

+ self.tree.openElements.pop()

+ self.tree.insertElement(token)

+ def startTagSelect(self, token):

+ self.parser.parseError("unexpected-select-in-select")

+ self.endTagSelect(impliedTagToken("select"))

+ def startTagInput(self, token):

+ self.parser.parseError("unexpected-input-in-select")

+ if self.tree.elementInScope("select", variant="select"):

+ self.endTagSelect(impliedTagToken("select"))

+ return token

+ else:

+ assert self.parser.innerHTML

+ def startTagScript(self, token):

+ return self.parser.phases["inHead"].processStartTag(token)

+ def startTagOther(self, token):

+ self.parser.parseError("unexpected-start-tag-in-select",

+ {"name": token["name"]})

+ def endTagOption(self, token):

+ if self.tree.openElements[-1].name == "option":

+ self.tree.openElements.pop()

+ else:

+ self.parser.parseError("unexpected-end-tag-in-select",

+ {"name": "option"})

+ def endTagOptgroup(self, token):

+ # </optgroup> implicitly closes <option>

+ if (self.tree.openElements[-1].name == "option" and

+ self.tree.openElements[-2].name == "optgroup"):

+ self.tree.openElements.pop()

+ # It also closes </optgroup>

+ if self.tree.openElements[-1].name == "optgroup":

+ self.tree.openElements.pop()

+ # But nothing else

+ else:

+ self.parser.parseError("unexpected-end-tag-in-select",

+ {"name": "optgroup"})

+ def endTagSelect(self, token):

+ if self.tree.elementInScope("select", variant="select"):

+ node = self.tree.openElements.pop()

+ while node.name != "select":

+ node = self.tree.openElements.pop()

+ self.parser.resetInsertionMode()

+ else:

+ # innerHTML case

+ assert self.parser.innerHTML

+ self.parser.parseError()

+ def endTagOther(self, token):

+ self.parser.parseError("unexpected-end-tag-in-select",

+ {"name": token["name"]})

+ class InSelectInTablePhase(Phase):

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ self.startTagHandler = utils.MethodDispatcher([

+ (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),

+ self.startTagTable)

+ ])

+ self.startTagHandler.default = self.startTagOther

+ self.endTagHandler = utils.MethodDispatcher([

+ (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),

+ self.endTagTable)

+ ])

+ self.endTagHandler.default = self.endTagOther

+ def processEOF(self):

+ self.parser.phases["inSelect"].processEOF()

+ def processCharacters(self, token):

+ return self.parser.phases["inSelect"].processCharacters(token)

+ def startTagTable(self, token):

+ self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]})

+ self.endTagOther(impliedTagToken("select"))

+ return token

+ def startTagOther(self, token):

+ return self.parser.phases["inSelect"].processStartTag(token)

+ def endTagTable(self, token):

+ self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]})

+ if self.tree.elementInScope(token["name"], variant="table"):

+ self.endTagOther(impliedTagToken("select"))

+ return token

+ def endTagOther(self, token):

+ return self.parser.phases["inSelect"].processEndTag(token)

+ class InForeignContentPhase(Phase):

+ breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",

+ "center", "code", "dd", "div", "dl", "dt",

+ "em", "embed", "h1", "h2", "h3",

+ "h4", "h5", "h6", "head", "hr", "i", "img",

+ "li", "listing", "menu", "meta", "nobr",

+ "ol", "p", "pre", "ruby", "s", "small",

+ "span", "strong", "strike", "sub", "sup",

+ "table", "tt", "u", "ul", "var"])

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ def adjustSVGTagNames(self, token):

+ replacements = {"altglyph": "altGlyph",

+ "altglyphdef": "altGlyphDef",

+ "altglyphitem": "altGlyphItem",

+ "animatecolor": "animateColor",

+ "animatemotion": "animateMotion",

+ "animatetransform": "animateTransform",

+ "clippath": "clipPath",

+ "feblend": "feBlend",

+ "fecolormatrix": "feColorMatrix",

+ "fecomponenttransfer": "feComponentTransfer",

+ "fecomposite": "feComposite",

+ "feconvolvematrix": "feConvolveMatrix",

+ "fediffuselighting": "feDiffuseLighting",

+ "fedisplacementmap": "feDisplacementMap",

+ "fedistantlight": "feDistantLight",

+ "feflood": "feFlood",

+ "fefunca": "feFuncA",

+ "fefuncb": "feFuncB",

+ "fefuncg": "feFuncG",

+ "fefuncr": "feFuncR",

+ "fegaussianblur": "feGaussianBlur",

+ "feimage": "feImage",

+ "femerge": "feMerge",

+ "femergenode": "feMergeNode",

+ "femorphology": "feMorphology",

+ "feoffset": "feOffset",

+ "fepointlight": "fePointLight",

+ "fespecularlighting": "feSpecularLighting",

+ "fespotlight": "feSpotLight",

+ "fetile": "feTile",

+ "feturbulence": "feTurbulence",

+ "foreignobject": "foreignObject",

+ "glyphref": "glyphRef",

+ "lineargradient": "linearGradient",

+ "radialgradient": "radialGradient",

+ "textpath": "textPath"}

+ if token["name"] in replacements:

+ token["name"] = replacements[token["name"]]

+ def processCharacters(self, token):

+ if token["data"] == "\u0000":

+ token["data"] = "\uFFFD"

+ elif (self.parser.framesetOK and

+ any(char not in spaceCharacters for char in token["data"])):

+ self.parser.framesetOK = False

+ Phase.processCharacters(self, token)

+ def processStartTag(self, token):

+ currentNode = self.tree.openElements[-1]

+ if (token["name"] in self.breakoutElements or

+ (token["name"] == "font" and

+ set(token["data"].keys()) & set(["color", "face", "size"]))):

+ self.parser.parseError("unexpected-html-element-in-foreign-content",

+ {"name": token["name"]})

+ while (self.tree.openElements[-1].namespace !=

+ self.tree.defaultNamespace and

+ not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and

+ not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])):

+ self.tree.openElements.pop()

+ return token

+ else:

+ if currentNode.namespace == namespaces["mathml"]:

+ self.parser.adjustMathMLAttributes(token)

+ elif currentNode.namespace == namespaces["svg"]:

+ self.adjustSVGTagNames(token)

+ self.parser.adjustSVGAttributes(token)

+ self.parser.adjustForeignAttributes(token)

+ token["namespace"] = currentNode.namespace

+ self.tree.insertElement(token)

+ if token["selfClosing"]:

+ self.tree.openElements.pop()

+ token["selfClosingAcknowledged"] = True

+ def processEndTag(self, token):

+ nodeIndex = len(self.tree.openElements) - 1

+ node = self.tree.openElements[-1]

+ if node.name != token["name"]:

+ self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

+ while True:

+ if node.name.translate(asciiUpper2Lower) == token["name"]:

+ # XXX this isn't in the spec but it seems necessary

+ if self.parser.phase == self.parser.phases["inTableText"]:

+ self.parser.phase.flushCharacters()

+ self.parser.phase = self.parser.phase.originalPhase

+ while self.tree.openElements.pop() != node:

+ assert self.tree.openElements

+ new_token = None

+ break

+ nodeIndex -= 1

+ node = self.tree.openElements[nodeIndex]

+ if node.namespace != self.tree.defaultNamespace:

+ continue

+ else:

+ new_token = self.parser.phase.processEndTag(token)

+ break

+ return new_token

+ class AfterBodyPhase(Phase):

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ self.startTagHandler = utils.MethodDispatcher([

+ ("html", self.startTagHtml)

+ ])

+ self.startTagHandler.default = self.startTagOther

+ self.endTagHandler = utils.MethodDispatcher([("html", self.endTagHtml)])

+ self.endTagHandler.default = self.endTagOther

+ def processEOF(self):

+ # Stop parsing

+ pass

+ def processComment(self, token):

+ # This is needed because data is to be appended to the <html> element

+ # here and not to whatever is currently open.

+ self.tree.insertComment(token, self.tree.openElements[0])

+ def processCharacters(self, token):

+ self.parser.parseError("unexpected-char-after-body")

+ self.parser.phase = self.parser.phases["inBody"]

+ return token

+ def startTagHtml(self, token):

+ return self.parser.phases["inBody"].processStartTag(token)

+ def startTagOther(self, token):

+ self.parser.parseError("unexpected-start-tag-after-body",

+ {"name": token["name"]})

+ self.parser.phase = self.parser.phases["inBody"]

+ return token

+ def endTagHtml(self, name):

+ if self.parser.innerHTML:

+ self.parser.parseError("unexpected-end-tag-after-body-innerhtml")

+ else:

+ self.parser.phase = self.parser.phases["afterAfterBody"]

+ def endTagOther(self, token):

+ self.parser.parseError("unexpected-end-tag-after-body",

+ {"name": token["name"]})

+ self.parser.phase = self.parser.phases["inBody"]

+ return token

+ class InFramesetPhase(Phase):

+ # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ self.startTagHandler = utils.MethodDispatcher([

+ ("html", self.startTagHtml),

+ ("frameset", self.startTagFrameset),

+ ("frame", self.startTagFrame),

+ ("noframes", self.startTagNoframes)

+ ])

+ self.startTagHandler.default = self.startTagOther

+ self.endTagHandler = utils.MethodDispatcher([

+ ("frameset", self.endTagFrameset)

+ ])

+ self.endTagHandler.default = self.endTagOther

+ def processEOF(self):

+ if self.tree.openElements[-1].name != "html":

+ self.parser.parseError("eof-in-frameset")

+ else:

+ assert self.parser.innerHTML

+ def processCharacters(self, token):

+ self.parser.parseError("unexpected-char-in-frameset")

+ def startTagFrameset(self, token):

+ self.tree.insertElement(token)

+ def startTagFrame(self, token):

+ self.tree.insertElement(token)

+ self.tree.openElements.pop()

+ def startTagNoframes(self, token):

+ return self.parser.phases["inBody"].processStartTag(token)

+ def startTagOther(self, token):

+ self.parser.parseError("unexpected-start-tag-in-frameset",

+ {"name": token["name"]})

+ def endTagFrameset(self, token):

+ if self.tree.openElements[-1].name == "html":

+ # innerHTML case

+ self.parser.parseError("unexpected-frameset-in-frameset-innerhtml")

+ else:

+ self.tree.openElements.pop()

+ if (not self.parser.innerHTML and

+ self.tree.openElements[-1].name != "frameset"):

+ # If we're not in innerHTML mode and the the current node is not a

+ # "frameset" element (anymore) then switch.

+ self.parser.phase = self.parser.phases["afterFrameset"]

+ def endTagOther(self, token):

+ self.parser.parseError("unexpected-end-tag-in-frameset",

+ {"name": token["name"]})

+ class AfterFramesetPhase(Phase):

+ # http://www.whatwg.org/specs/web-apps/current-work/#after3

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ self.startTagHandler = utils.MethodDispatcher([

+ ("html", self.startTagHtml),

+ ("noframes", self.startTagNoframes)

+ ])

+ self.startTagHandler.default = self.startTagOther

+ self.endTagHandler = utils.MethodDispatcher([

+ ("html", self.endTagHtml)

+ ])

+ self.endTagHandler.default = self.endTagOther

+ def processEOF(self):

+ # Stop parsing

+ pass

+ def processCharacters(self, token):

+ self.parser.parseError("unexpected-char-after-frameset")

+ def startTagNoframes(self, token):

+ return self.parser.phases["inHead"].processStartTag(token)

+ def startTagOther(self, token):

+ self.parser.parseError("unexpected-start-tag-after-frameset",

+ {"name": token["name"]})

+ def endTagHtml(self, token):

+ self.parser.phase = self.parser.phases["afterAfterFrameset"]

+ def endTagOther(self, token):

+ self.parser.parseError("unexpected-end-tag-after-frameset",

+ {"name": token["name"]})

+ class AfterAfterBodyPhase(Phase):

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ self.startTagHandler = utils.MethodDispatcher([

+ ("html", self.startTagHtml)

+ ])

+ self.startTagHandler.default = self.startTagOther

+ def processEOF(self):

+ pass

+ def processComment(self, token):

+ self.tree.insertComment(token, self.tree.document)

+ def processSpaceCharacters(self, token):

+ return self.parser.phases["inBody"].processSpaceCharacters(token)

+ def processCharacters(self, token):

+ self.parser.parseError("expected-eof-but-got-char")

+ self.parser.phase = self.parser.phases["inBody"]

+ return token

+ def startTagHtml(self, token):

+ return self.parser.phases["inBody"].processStartTag(token)

+ def startTagOther(self, token):

+ self.parser.parseError("expected-eof-but-got-start-tag",

+ {"name": token["name"]})

+ self.parser.phase = self.parser.phases["inBody"]

+ return token

+ def processEndTag(self, token):

+ self.parser.parseError("expected-eof-but-got-end-tag",

+ {"name": token["name"]})

+ self.parser.phase = self.parser.phases["inBody"]

+ return token

+ class AfterAfterFramesetPhase(Phase):

+ def __init__(self, parser, tree):

+ Phase.__init__(self, parser, tree)

+ self.startTagHandler = utils.MethodDispatcher([

+ ("html", self.startTagHtml),

+ ("noframes", self.startTagNoFrames)

+ ])

+ self.startTagHandler.default = self.startTagOther

+ def processEOF(self):

+ pass

+ def processComment(self, token):

+ self.tree.insertComment(token, self.tree.document)

+ def processSpaceCharacters(self, token):

+ return self.parser.phases["inBody"].processSpaceCharacters(token)

+ def processCharacters(self, token):

+ self.parser.parseError("expected-eof-but-got-char")

+ def startTagHtml(self, token):

+ return self.parser.phases["inBody"].processStartTag(token)

+ def startTagNoFrames(self, token):

+ return self.parser.phases["inHead"].processStartTag(token)

+ def startTagOther(self, token):

+ self.parser.parseError("expected-eof-but-got-start-tag",

+ {"name": token["name"]})

+ def processEndTag(self, token):

+ self.parser.parseError("expected-eof-but-got-end-tag",

+ {"name": token["name"]})

+ return {

+ "initial": InitialPhase,

+ "beforeHtml": BeforeHtmlPhase,

+ "beforeHead": BeforeHeadPhase,

+ "inHead": InHeadPhase,

+ # XXX "inHeadNoscript": InHeadNoScriptPhase,

+ "afterHead": AfterHeadPhase,

+ "inBody": InBodyPhase,

+ "text": TextPhase,

+ "inTable": InTablePhase,

+ "inTableText": InTableTextPhase,

+ "inCaption": InCaptionPhase,

+ "inColumnGroup": InColumnGroupPhase,

+ "inTableBody": InTableBodyPhase,

+ "inRow": InRowPhase,

+ "inCell": InCellPhase,

+ "inSelect": InSelectPhase,

+ "inSelectInTable": InSelectInTablePhase,

+ "inForeignContent": InForeignContentPhase,

+ "afterBody": AfterBodyPhase,

+ "inFrameset": InFramesetPhase,

+ "afterFrameset": AfterFramesetPhase,

+ "afterAfterBody": AfterAfterBodyPhase,

+ "afterAfterFrameset": AfterAfterFramesetPhase,

+ # XXX after after frameset

+ }

+def impliedTagToken(name, type="EndTag", attributes=None,

+ selfClosing=False):

+ if attributes is None:

+ attributes = {}

+ return {"type": tokenTypes[type], "name": name, "data": attributes,

+ "selfClosing": selfClosing}

+class ParseError(Exception):

+ """Error in parsed document"""

+ pass