Index: third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/manifest/XMLParser.py |
diff --git a/third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/manifest/XMLParser.py b/third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/manifest/XMLParser.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..5ceeb0b5f727190f3d929e1d41b920c2654b1dd7 |
--- /dev/null |
+++ b/third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/wpt/wpt/tools/manifest/XMLParser.py |
@@ -0,0 +1,126 @@ |
+from os.path import dirname, join |
+ |
+from collections import OrderedDict |
+ |
+from xml.parsers import expat |
+import xml.etree.ElementTree as etree |
+ |
+_catalog = join(dirname(__file__), "catalog") |
+ |
+def _wrap_error(e): |
+ err = etree.ParseError(e) |
+ err.code = e.code |
+ err.position = e.lineno, e.offset |
+ raise err |
+ |
+_names = {} |
+def _fixname(key): |
+ try: |
+ name = _names[key] |
+ except KeyError: |
+ name = key |
+ if "}" in name: |
+ name = "{" + name |
+ _names[key] = name |
+ return name |
+ |
+ |
+class XMLParser(object): |
+ """ |
+ An XML parser with support for XHTML DTDs and all Python-supported encodings |
+ |
+ This implements the API defined by |
+ xml.etree.ElementTree.XMLParser, but supports XHTML DTDs |
+ (therefore allowing XHTML entities) and supports all encodings |
+ Python does, rather than just those supported by expat. |
+ """ |
+ def __init__(self, encoding=None): |
+ self._parser = expat.ParserCreate(encoding, "}") |
+ self._target = etree.TreeBuilder() |
+ # parser settings |
+ self._parser.buffer_text = 1 |
+ self._parser.ordered_attributes = 1 |
+ self._parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) |
+ # parser callbacks |
+ self._parser.XmlDeclHandler = self._xml_decl |
+ self._parser.StartElementHandler = self._start |
+ self._parser.EndElementHandler = self._end |
+ self._parser.CharacterDataHandler = self._data |
+ self._parser.ExternalEntityRefHandler = self._external |
+ self._parser.SkippedEntityHandler = self._skipped |
+ # used for our horrible re-encoding hack |
+ self._fed_data = [] |
+ self._read_encoding = None |
+ |
+ def _xml_decl(self, version, encoding, standalone): |
+ self._read_encoding = encoding |
+ |
+ def _start(self, tag, attrib_in): |
+ self._fed_data = None |
+ tag = _fixname(tag) |
+ attrib = OrderedDict() |
+ if attrib_in: |
+ for i in range(0, len(attrib_in), 2): |
+ attrib[_fixname(attrib_in[i])] = attrib_in[i+1] |
+ return self._target.start(tag, attrib) |
+ |
+ def _data(self, text): |
+ return self._target.data(text) |
+ |
+ def _end(self, tag): |
+ return self._target.end(_fixname(tag)) |
+ |
+ def _external(self, context, base, systemId, publicId): |
+ if publicId in { |
+ "-//W3C//DTD XHTML 1.0 Transitional//EN", |
+ "-//W3C//DTD XHTML 1.1//EN", |
+ "-//W3C//DTD XHTML 1.0 Strict//EN", |
+ "-//W3C//DTD XHTML 1.0 Frameset//EN", |
+ "-//W3C//DTD XHTML Basic 1.0//EN", |
+ "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN", |
+ "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN", |
+ "-//W3C//DTD MathML 2.0//EN", |
+ "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" |
+ }: |
+ parser = self._parser.ExternalEntityParserCreate(context) |
+ with open(join(_catalog, "xhtml.dtd"), "rb") as fp: |
+ try: |
+ parser.ParseFile(fp) |
+ except expat.error: |
+ return False |
+ |
+ return True |
+ |
+ def _skipped(self, name, is_parameter_entity): |
+ err = expat.error("undefined entity %s: line %d, column %d" % |
+ (name, self._parser.ErrorLineNumber, |
+ self._parser.ErrorColumnNumber)) |
+ err.code = expat.errors.XML_ERROR_UNDEFINED_ENTITY |
+ err.lineno = self._parser.ErrorLineNumber |
+ err.offset = self._parser.ErrorColumnNumber |
+ raise err |
+ |
+ def feed(self, data): |
+ if self._fed_data is not None: |
+ self._fed_data.append(data) |
+ try: |
+ self._parser.Parse(data, False) |
+ except expat.error as v: |
+ _wrap_error(v) |
+ except ValueError as e: |
+ if e.args[0] == 'multi-byte encodings are not supported': |
+ assert self._read_encoding is not None |
+ xml = b"".join(self._fed_data).decode(self._read_encoding).encode("utf-8") |
+ new_parser = XMLParser("utf-8") |
+ self._parser = new_parser._parser |
+ self._target = new_parser._target |
+ self._fed_data = None |
+ self.feed(xml) |
+ |
+ def close(self): |
+ try: |
+ self._parser.Parse("", True) |
+ except expat.error as v: |
+ _wrap_error(v) |
+ tree = self._target.close() |
+ return tree |