| Index: third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/html5lib/html5lib/treebuilders/_base.py
|
| diff --git a/third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/html5lib/html5lib/treebuilders/_base.py b/third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/html5lib/html5lib/treebuilders/_base.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..8b97cc11a21a5cd41c7a1ef836a5a2ac9baf873d
|
| --- /dev/null
|
| +++ b/third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/html5lib/html5lib/treebuilders/_base.py
|
| @@ -0,0 +1,377 @@
|
| +from __future__ import absolute_import, division, unicode_literals
|
| +from six import text_type
|
| +
|
| +from ..constants import scopingElements, tableInsertModeElements, namespaces
|
| +
|
| +# The scope markers are inserted when entering object elements,
|
| +# marquees, table cells, and table captions, and are used to prevent formatting
|
| +# from "leaking" into tables, object elements, and marquees.
|
| +Marker = None
|
| +
|
| +listElementsMap = {
|
| + None: (frozenset(scopingElements), False),
|
| + "button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False),
|
| + "list": (frozenset(scopingElements | set([(namespaces["html"], "ol"),
|
| + (namespaces["html"], "ul")])), False),
|
| + "table": (frozenset([(namespaces["html"], "html"),
|
| + (namespaces["html"], "table")]), False),
|
| + "select": (frozenset([(namespaces["html"], "optgroup"),
|
| + (namespaces["html"], "option")]), True)
|
| +}
|
| +
|
| +
|
| +class Node(object):
|
| + def __init__(self, name):
|
| + """Node representing an item in the tree.
|
| + name - The tag name associated with the node
|
| + parent - The parent of the current node (or None for the document node)
|
| + value - The value of the current node (applies to text nodes and
|
| + comments
|
| + attributes - a dict holding name, value pairs for attributes of the node
|
| + childNodes - a list of child nodes of the current node. This must
|
| + include all elements but not necessarily other node types
|
| + _flags - A list of miscellaneous flags that can be set on the node
|
| + """
|
| + self.name = name
|
| + self.parent = None
|
| + self.value = None
|
| + self.attributes = {}
|
| + self.childNodes = []
|
| + self._flags = []
|
| +
|
| + def __str__(self):
|
| + attributesStr = " ".join(["%s=\"%s\"" % (name, value)
|
| + for name, value in
|
| + self.attributes.items()])
|
| + if attributesStr:
|
| + return "<%s %s>" % (self.name, attributesStr)
|
| + else:
|
| + return "<%s>" % (self.name)
|
| +
|
| + def __repr__(self):
|
| + return "<%s>" % (self.name)
|
| +
|
| + def appendChild(self, node):
|
| + """Insert node as a child of the current node
|
| + """
|
| + raise NotImplementedError
|
| +
|
| + def insertText(self, data, insertBefore=None):
|
| + """Insert data as text in the current node, positioned before the
|
| + start of node insertBefore or to the end of the node's text.
|
| + """
|
| + raise NotImplementedError
|
| +
|
| + def insertBefore(self, node, refNode):
|
| + """Insert node as a child of the current node, before refNode in the
|
| + list of child nodes. Raises ValueError if refNode is not a child of
|
| + the current node"""
|
| + raise NotImplementedError
|
| +
|
| + def removeChild(self, node):
|
| + """Remove node from the children of the current node
|
| + """
|
| + raise NotImplementedError
|
| +
|
| + def reparentChildren(self, newParent):
|
| + """Move all the children of the current node to newParent.
|
| + This is needed so that trees that don't store text as nodes move the
|
| + text in the correct way
|
| + """
|
| + # XXX - should this method be made more general?
|
| + for child in self.childNodes:
|
| + newParent.appendChild(child)
|
| + self.childNodes = []
|
| +
|
| + def cloneNode(self):
|
| + """Return a shallow copy of the current node i.e. a node with the same
|
| + name and attributes but with no parent or child nodes
|
| + """
|
| + raise NotImplementedError
|
| +
|
| + def hasContent(self):
|
| + """Return true if the node has children or text, false otherwise
|
| + """
|
| + raise NotImplementedError
|
| +
|
| +
|
| +class ActiveFormattingElements(list):
|
| + def append(self, node):
|
| + equalCount = 0
|
| + if node != Marker:
|
| + for element in self[::-1]:
|
| + if element == Marker:
|
| + break
|
| + if self.nodesEqual(element, node):
|
| + equalCount += 1
|
| + if equalCount == 3:
|
| + self.remove(element)
|
| + break
|
| + list.append(self, node)
|
| +
|
| + def nodesEqual(self, node1, node2):
|
| + if not node1.nameTuple == node2.nameTuple:
|
| + return False
|
| +
|
| + if not node1.attributes == node2.attributes:
|
| + return False
|
| +
|
| + return True
|
| +
|
| +
|
| +class TreeBuilder(object):
|
| + """Base treebuilder implementation
|
| + documentClass - the class to use for the bottommost node of a document
|
| + elementClass - the class to use for HTML Elements
|
| + commentClass - the class to use for comments
|
| + doctypeClass - the class to use for doctypes
|
| + """
|
| +
|
| + # Document class
|
| + documentClass = None
|
| +
|
| + # The class to use for creating a node
|
| + elementClass = None
|
| +
|
| + # The class to use for creating comments
|
| + commentClass = None
|
| +
|
| + # The class to use for creating doctypes
|
| + doctypeClass = None
|
| +
|
| + # Fragment class
|
| + fragmentClass = None
|
| +
|
| + def __init__(self, namespaceHTMLElements):
|
| + if namespaceHTMLElements:
|
| + self.defaultNamespace = "http://www.w3.org/1999/xhtml"
|
| + else:
|
| + self.defaultNamespace = None
|
| + self.reset()
|
| +
|
| + def reset(self):
|
| + self.openElements = []
|
| + self.activeFormattingElements = ActiveFormattingElements()
|
| +
|
| + # XXX - rename these to headElement, formElement
|
| + self.headPointer = None
|
| + self.formPointer = None
|
| +
|
| + self.insertFromTable = False
|
| +
|
| + self.document = self.documentClass()
|
| +
|
| + def elementInScope(self, target, variant=None):
|
| +
|
| + # If we pass a node in we match that. if we pass a string
|
| + # match any node with that name
|
| + exactNode = hasattr(target, "nameTuple")
|
| +
|
| + listElements, invert = listElementsMap[variant]
|
| +
|
| + for node in reversed(self.openElements):
|
| + if (node.name == target and not exactNode or
|
| + node == target and exactNode):
|
| + return True
|
| + elif (invert ^ (node.nameTuple in listElements)):
|
| + return False
|
| +
|
| + assert False # We should never reach this point
|
| +
|
| + def reconstructActiveFormattingElements(self):
|
| + # Within this algorithm the order of steps described in the
|
| + # specification is not quite the same as the order of steps in the
|
| + # code. It should still do the same though.
|
| +
|
| + # Step 1: stop the algorithm when there's nothing to do.
|
| + if not self.activeFormattingElements:
|
| + return
|
| +
|
| + # Step 2 and step 3: we start with the last element. So i is -1.
|
| + i = len(self.activeFormattingElements) - 1
|
| + entry = self.activeFormattingElements[i]
|
| + if entry == Marker or entry in self.openElements:
|
| + return
|
| +
|
| + # Step 6
|
| + while entry != Marker and entry not in self.openElements:
|
| + if i == 0:
|
| + # This will be reset to 0 below
|
| + i = -1
|
| + break
|
| + i -= 1
|
| + # Step 5: let entry be one earlier in the list.
|
| + entry = self.activeFormattingElements[i]
|
| +
|
| + while True:
|
| + # Step 7
|
| + i += 1
|
| +
|
| + # Step 8
|
| + entry = self.activeFormattingElements[i]
|
| + clone = entry.cloneNode() # Mainly to get a new copy of the attributes
|
| +
|
| + # Step 9
|
| + element = self.insertElement({"type": "StartTag",
|
| + "name": clone.name,
|
| + "namespace": clone.namespace,
|
| + "data": clone.attributes})
|
| +
|
| + # Step 10
|
| + self.activeFormattingElements[i] = element
|
| +
|
| + # Step 11
|
| + if element == self.activeFormattingElements[-1]:
|
| + break
|
| +
|
| + def clearActiveFormattingElements(self):
|
| + entry = self.activeFormattingElements.pop()
|
| + while self.activeFormattingElements and entry != Marker:
|
| + entry = self.activeFormattingElements.pop()
|
| +
|
| + def elementInActiveFormattingElements(self, name):
|
| + """Check if an element exists between the end of the active
|
| + formatting elements and the last marker. If it does, return it, else
|
| + return false"""
|
| +
|
| + for item in self.activeFormattingElements[::-1]:
|
| + # Check for Marker first because if it's a Marker it doesn't have a
|
| + # name attribute.
|
| + if item == Marker:
|
| + break
|
| + elif item.name == name:
|
| + return item
|
| + return False
|
| +
|
| + def insertRoot(self, token):
|
| + element = self.createElement(token)
|
| + self.openElements.append(element)
|
| + self.document.appendChild(element)
|
| +
|
| + def insertDoctype(self, token):
|
| + name = token["name"]
|
| + publicId = token["publicId"]
|
| + systemId = token["systemId"]
|
| +
|
| + doctype = self.doctypeClass(name, publicId, systemId)
|
| + self.document.appendChild(doctype)
|
| +
|
| + def insertComment(self, token, parent=None):
|
| + if parent is None:
|
| + parent = self.openElements[-1]
|
| + parent.appendChild(self.commentClass(token["data"]))
|
| +
|
| + def createElement(self, token):
|
| + """Create an element but don't insert it anywhere"""
|
| + name = token["name"]
|
| + namespace = token.get("namespace", self.defaultNamespace)
|
| + element = self.elementClass(name, namespace)
|
| + element.attributes = token["data"]
|
| + return element
|
| +
|
| + def _getInsertFromTable(self):
|
| + return self._insertFromTable
|
| +
|
| + def _setInsertFromTable(self, value):
|
| + """Switch the function used to insert an element from the
|
| + normal one to the misnested table one and back again"""
|
| + self._insertFromTable = value
|
| + if value:
|
| + self.insertElement = self.insertElementTable
|
| + else:
|
| + self.insertElement = self.insertElementNormal
|
| +
|
| + insertFromTable = property(_getInsertFromTable, _setInsertFromTable)
|
| +
|
| + def insertElementNormal(self, token):
|
| + name = token["name"]
|
| + assert isinstance(name, text_type), "Element %s not unicode" % name
|
| + namespace = token.get("namespace", self.defaultNamespace)
|
| + element = self.elementClass(name, namespace)
|
| + element.attributes = token["data"]
|
| + self.openElements[-1].appendChild(element)
|
| + self.openElements.append(element)
|
| + return element
|
| +
|
| + def insertElementTable(self, token):
|
| + """Create an element and insert it into the tree"""
|
| + element = self.createElement(token)
|
| + if self.openElements[-1].name not in tableInsertModeElements:
|
| + return self.insertElementNormal(token)
|
| + else:
|
| + # We should be in the InTable mode. This means we want to do
|
| + # special magic element rearranging
|
| + parent, insertBefore = self.getTableMisnestedNodePosition()
|
| + if insertBefore is None:
|
| + parent.appendChild(element)
|
| + else:
|
| + parent.insertBefore(element, insertBefore)
|
| + self.openElements.append(element)
|
| + return element
|
| +
|
| + def insertText(self, data, parent=None):
|
| + """Insert text data."""
|
| + if parent is None:
|
| + parent = self.openElements[-1]
|
| +
|
| + if (not self.insertFromTable or (self.insertFromTable and
|
| + self.openElements[-1].name
|
| + not in tableInsertModeElements)):
|
| + parent.insertText(data)
|
| + else:
|
| + # We should be in the InTable mode. This means we want to do
|
| + # special magic element rearranging
|
| + parent, insertBefore = self.getTableMisnestedNodePosition()
|
| + parent.insertText(data, insertBefore)
|
| +
|
| + def getTableMisnestedNodePosition(self):
|
| + """Get the foster parent element, and sibling to insert before
|
| + (or None) when inserting a misnested table node"""
|
| + # The foster parent element is the one which comes before the most
|
| + # recently opened table element
|
| + # XXX - this is really inelegant
|
| + lastTable = None
|
| + fosterParent = None
|
| + insertBefore = None
|
| + for elm in self.openElements[::-1]:
|
| + if elm.name == "table":
|
| + lastTable = elm
|
| + break
|
| + if lastTable:
|
| + # XXX - we should really check that this parent is actually a
|
| + # node here
|
| + if lastTable.parent:
|
| + fosterParent = lastTable.parent
|
| + insertBefore = lastTable
|
| + else:
|
| + fosterParent = self.openElements[
|
| + self.openElements.index(lastTable) - 1]
|
| + else:
|
| + fosterParent = self.openElements[0]
|
| + return fosterParent, insertBefore
|
| +
|
| + def generateImpliedEndTags(self, exclude=None):
|
| + name = self.openElements[-1].name
|
| + # XXX td, th and tr are not actually needed
|
| + if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"))
|
| + and name != exclude):
|
| + self.openElements.pop()
|
| + # XXX This is not entirely what the specification says. We should
|
| + # investigate it more closely.
|
| + self.generateImpliedEndTags(exclude)
|
| +
|
| + def getDocument(self):
|
| + "Return the final tree"
|
| + return self.document
|
| +
|
| + def getFragment(self):
|
| + "Return the final fragment"
|
| + # assert self.innerHTML
|
| + fragment = self.fragmentClass()
|
| + self.openElements[0].reparentChildren(fragment)
|
| + return fragment
|
| +
|
| + def testSerializer(self, node):
|
| + """Serialize the subtree of node in the format required by unit tests
|
| + node - the node from which to start serializing"""
|
| + raise NotImplementedError
|
|
|