Index: packages/html/test/data/tokenizer/entities.test |
diff --git a/packages/html/test/data/tokenizer/entities.test b/packages/html/test/data/tokenizer/entities.test |
new file mode 100644 |
index 0000000000000000000000000000000000000000..1cb17a76000143b3455398bc5ddc1e078b9b6119 |
--- /dev/null |
+++ b/packages/html/test/data/tokenizer/entities.test |
@@ -0,0 +1,283 @@ |
+{"tests": [ |
+ |
+{"description": "Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name.", |
+"input":"<h a='¬i;'>", |
+"output": ["ParseError", ["StartTag", "h", {"a": "¬i;"}]]}, |
+ |
+{"description": "Entity name followed by the equals sign in an attribute value.", |
+"input":"<h a='&lang='>", |
+"output": ["ParseError", ["StartTag", "h", {"a": "&lang="}]]}, |
+ |
+{"description": "CR as numeric entity", |
+"input":"
", |
+"output": ["ParseError", ["Character", "\r"]]}, |
+ |
+{"description": "CR as hexadecimal numeric entity", |
+"input":"
", |
+"output": ["ParseError", ["Character", "\r"]]}, |
+ |
+{"description": "Windows-1252 EURO SIGN numeric entity.", |
+"input":"€", |
+"output": ["ParseError", ["Character", "\u20AC"]]}, |
+ |
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", |
+"input":"", |
+"output": ["ParseError", ["Character", "\u0081"]]}, |
+ |
+{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.", |
+"input":"‚", |
+"output": ["ParseError", ["Character", "\u201A"]]}, |
+ |
+{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity.", |
+"input":"ƒ", |
+"output": ["ParseError", ["Character", "\u0192"]]}, |
+ |
+{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity.", |
+"input":"„", |
+"output": ["ParseError", ["Character", "\u201E"]]}, |
+ |
+{"description": "Windows-1252 HORIZONTAL ELLIPSIS numeric entity.", |
+"input":"…", |
+"output": ["ParseError", ["Character", "\u2026"]]}, |
+ |
+{"description": "Windows-1252 DAGGER numeric entity.", |
+"input":"†", |
+"output": ["ParseError", ["Character", "\u2020"]]}, |
+ |
+{"description": "Windows-1252 DOUBLE DAGGER numeric entity.", |
+"input":"‡", |
+"output": ["ParseError", ["Character", "\u2021"]]}, |
+ |
+{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity.", |
+"input":"ˆ", |
+"output": ["ParseError", ["Character", "\u02C6"]]}, |
+ |
+{"description": "Windows-1252 PER MILLE SIGN numeric entity.", |
+"input":"‰", |
+"output": ["ParseError", ["Character", "\u2030"]]}, |
+ |
+{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity.", |
+"input":"Š", |
+"output": ["ParseError", ["Character", "\u0160"]]}, |
+ |
+{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity.", |
+"input":"‹", |
+"output": ["ParseError", ["Character", "\u2039"]]}, |
+ |
+{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity.", |
+"input":"Œ", |
+"output": ["ParseError", ["Character", "\u0152"]]}, |
+ |
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", |
+"input":"", |
+"output": ["ParseError", ["Character", "\u008D"]]}, |
+ |
+{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.", |
+"input":"Ž", |
+"output": ["ParseError", ["Character", "\u017D"]]}, |
+ |
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", |
+"input":"", |
+"output": ["ParseError", ["Character", "\u008F"]]}, |
+ |
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", |
+"input":"", |
+"output": ["ParseError", ["Character", "\u0090"]]}, |
+ |
+{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.", |
+"input":"‘", |
+"output": ["ParseError", ["Character", "\u2018"]]}, |
+ |
+{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity.", |
+"input":"’", |
+"output": ["ParseError", ["Character", "\u2019"]]}, |
+ |
+{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity.", |
+"input":"“", |
+"output": ["ParseError", ["Character", "\u201C"]]}, |
+ |
+{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity.", |
+"input":"”", |
+"output": ["ParseError", ["Character", "\u201D"]]}, |
+ |
+{"description": "Windows-1252 BULLET numeric entity.", |
+"input":"•", |
+"output": ["ParseError", ["Character", "\u2022"]]}, |
+ |
+{"description": "Windows-1252 EN DASH numeric entity.", |
+"input":"–", |
+"output": ["ParseError", ["Character", "\u2013"]]}, |
+ |
+{"description": "Windows-1252 EM DASH numeric entity.", |
+"input":"—", |
+"output": ["ParseError", ["Character", "\u2014"]]}, |
+ |
+{"description": "Windows-1252 SMALL TILDE numeric entity.", |
+"input":"˜", |
+"output": ["ParseError", ["Character", "\u02DC"]]}, |
+ |
+{"description": "Windows-1252 TRADE MARK SIGN numeric entity.", |
+"input":"™", |
+"output": ["ParseError", ["Character", "\u2122"]]}, |
+ |
+{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity.", |
+"input":"š", |
+"output": ["ParseError", ["Character", "\u0161"]]}, |
+ |
+{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity.", |
+"input":"›", |
+"output": ["ParseError", ["Character", "\u203A"]]}, |
+ |
+{"description": "Windows-1252 LATIN SMALL LIGATURE OE numeric entity.", |
+"input":"œ", |
+"output": ["ParseError", ["Character", "\u0153"]]}, |
+ |
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", |
+"input":"", |
+"output": ["ParseError", ["Character", "\u009D"]]}, |
+ |
+{"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.", |
+"input":"€", |
+"output": ["ParseError", ["Character", "\u20AC"]]}, |
+ |
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", |
+"input":"", |
+"output": ["ParseError", ["Character", "\u0081"]]}, |
+ |
+{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.", |
+"input":"‚", |
+"output": ["ParseError", ["Character", "\u201A"]]}, |
+ |
+{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity.", |
+"input":"ƒ", |
+"output": ["ParseError", ["Character", "\u0192"]]}, |
+ |
+{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity.", |
+"input":"„", |
+"output": ["ParseError", ["Character", "\u201E"]]}, |
+ |
+{"description": "Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity.", |
+"input":"…", |
+"output": ["ParseError", ["Character", "\u2026"]]}, |
+ |
+{"description": "Windows-1252 DAGGER hexadecimal numeric entity.", |
+"input":"†", |
+"output": ["ParseError", ["Character", "\u2020"]]}, |
+ |
+{"description": "Windows-1252 DOUBLE DAGGER hexadecimal numeric entity.", |
+"input":"‡", |
+"output": ["ParseError", ["Character", "\u2021"]]}, |
+ |
+{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity.", |
+"input":"ˆ", |
+"output": ["ParseError", ["Character", "\u02C6"]]}, |
+ |
+{"description": "Windows-1252 PER MILLE SIGN hexadecimal numeric entity.", |
+"input":"‰", |
+"output": ["ParseError", ["Character", "\u2030"]]}, |
+ |
+{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity.", |
+"input":"Š", |
+"output": ["ParseError", ["Character", "\u0160"]]}, |
+ |
+{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.", |
+"input":"‹", |
+"output": ["ParseError", ["Character", "\u2039"]]}, |
+ |
+{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity.", |
+"input":"Œ", |
+"output": ["ParseError", ["Character", "\u0152"]]}, |
+ |
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", |
+"input":"", |
+"output": ["ParseError", ["Character", "\u008D"]]}, |
+ |
+{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.", |
+"input":"Ž", |
+"output": ["ParseError", ["Character", "\u017D"]]}, |
+ |
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", |
+"input":"", |
+"output": ["ParseError", ["Character", "\u008F"]]}, |
+ |
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", |
+"input":"", |
+"output": ["ParseError", ["Character", "\u0090"]]}, |
+ |
+{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.", |
+"input":"‘", |
+"output": ["ParseError", ["Character", "\u2018"]]}, |
+ |
+{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity.", |
+"input":"’", |
+"output": ["ParseError", ["Character", "\u2019"]]}, |
+ |
+{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity.", |
+"input":"“", |
+"output": ["ParseError", ["Character", "\u201C"]]}, |
+ |
+{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity.", |
+"input":"”", |
+"output": ["ParseError", ["Character", "\u201D"]]}, |
+ |
+{"description": "Windows-1252 BULLET hexadecimal numeric entity.", |
+"input":"•", |
+"output": ["ParseError", ["Character", "\u2022"]]}, |
+ |
+{"description": "Windows-1252 EN DASH hexadecimal numeric entity.", |
+"input":"–", |
+"output": ["ParseError", ["Character", "\u2013"]]}, |
+ |
+{"description": "Windows-1252 EM DASH hexadecimal numeric entity.", |
+"input":"—", |
+"output": ["ParseError", ["Character", "\u2014"]]}, |
+ |
+{"description": "Windows-1252 SMALL TILDE hexadecimal numeric entity.", |
+"input":"˜", |
+"output": ["ParseError", ["Character", "\u02DC"]]}, |
+ |
+{"description": "Windows-1252 TRADE MARK SIGN hexadecimal numeric entity.", |
+"input":"™", |
+"output": ["ParseError", ["Character", "\u2122"]]}, |
+ |
+{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity.", |
+"input":"š", |
+"output": ["ParseError", ["Character", "\u0161"]]}, |
+ |
+{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.", |
+"input":"›", |
+"output": ["ParseError", ["Character", "\u203A"]]}, |
+ |
+{"description": "Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity.", |
+"input":"œ", |
+"output": ["ParseError", ["Character", "\u0153"]]}, |
+ |
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", |
+"input":"", |
+"output": ["ParseError", ["Character", "\u009D"]]}, |
+ |
+{"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.", |
+"input":"ž", |
+"output": ["ParseError", ["Character", "\u017E"]]}, |
+ |
+{"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.", |
+"input":"Ÿ", |
+"output": ["ParseError", ["Character", "\u0178"]]}, |
+ |
+{"description": "Decimal numeric entity followed by hex character a.", |
+"input":"aa", |
+"output": ["ParseError", ["Character", "aa"]]}, |
+ |
+{"description": "Decimal numeric entity followed by hex character A.", |
+"input":"aA", |
+"output": ["ParseError", ["Character", "aA"]]}, |
+ |
+{"description": "Decimal numeric entity followed by hex character f.", |
+"input":"af", |
+"output": ["ParseError", ["Character", "af"]]}, |
+ |
+{"description": "Decimal numeric entity followed by hex character A.", |
+"input":"aF", |
+"output": ["ParseError", ["Character", "aF"]]} |
+ |
+]} |