| Index: packages/html/test/data/tokenizer/entities.test | 
| diff --git a/packages/html/test/data/tokenizer/entities.test b/packages/html/test/data/tokenizer/entities.test | 
| new file mode 100644 | 
| index 0000000000000000000000000000000000000000..1cb17a76000143b3455398bc5ddc1e078b9b6119 | 
| --- /dev/null | 
| +++ b/packages/html/test/data/tokenizer/entities.test | 
| @@ -0,0 +1,283 @@ | 
| +{"tests": [ | 
| + | 
| +{"description": "Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name.", | 
| +"input":"<h a='¬i;'>", | 
| +"output": ["ParseError", ["StartTag", "h", {"a": "¬i;"}]]}, | 
| + | 
| +{"description": "Entity name followed by the equals sign in an attribute value.", | 
| +"input":"<h a='&lang='>", | 
| +"output": ["ParseError", ["StartTag", "h", {"a": "&lang="}]]}, | 
| + | 
| +{"description": "CR as numeric entity", | 
| +"input":"
", | 
| +"output": ["ParseError", ["Character", "\r"]]}, | 
| + | 
| +{"description": "CR as hexadecimal numeric entity", | 
| +"input":"
", | 
| +"output": ["ParseError", ["Character", "\r"]]}, | 
| + | 
| +{"description": "Windows-1252 EURO SIGN numeric entity.", | 
| +"input":"€", | 
| +"output": ["ParseError", ["Character", "\u20AC"]]}, | 
| + | 
| +{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", | 
| +"input":"", | 
| +"output": ["ParseError", ["Character", "\u0081"]]}, | 
| + | 
| +{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.", | 
| +"input":"‚", | 
| +"output": ["ParseError", ["Character", "\u201A"]]}, | 
| + | 
| +{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity.", | 
| +"input":"ƒ", | 
| +"output": ["ParseError", ["Character", "\u0192"]]}, | 
| + | 
| +{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity.", | 
| +"input":"„", | 
| +"output": ["ParseError", ["Character", "\u201E"]]}, | 
| + | 
| +{"description": "Windows-1252 HORIZONTAL ELLIPSIS numeric entity.", | 
| +"input":"…", | 
| +"output": ["ParseError", ["Character", "\u2026"]]}, | 
| + | 
| +{"description": "Windows-1252 DAGGER numeric entity.", | 
| +"input":"†", | 
| +"output": ["ParseError", ["Character", "\u2020"]]}, | 
| + | 
| +{"description": "Windows-1252 DOUBLE DAGGER numeric entity.", | 
| +"input":"‡", | 
| +"output": ["ParseError", ["Character", "\u2021"]]}, | 
| + | 
| +{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity.", | 
| +"input":"ˆ", | 
| +"output": ["ParseError", ["Character", "\u02C6"]]}, | 
| + | 
| +{"description": "Windows-1252 PER MILLE SIGN numeric entity.", | 
| +"input":"‰", | 
| +"output": ["ParseError", ["Character", "\u2030"]]}, | 
| + | 
| +{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity.", | 
| +"input":"Š", | 
| +"output": ["ParseError", ["Character", "\u0160"]]}, | 
| + | 
| +{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity.", | 
| +"input":"‹", | 
| +"output": ["ParseError", ["Character", "\u2039"]]}, | 
| + | 
| +{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity.", | 
| +"input":"Œ", | 
| +"output": ["ParseError", ["Character", "\u0152"]]}, | 
| + | 
| +{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", | 
| +"input":"", | 
| +"output": ["ParseError", ["Character", "\u008D"]]}, | 
| + | 
| +{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.", | 
| +"input":"Ž", | 
| +"output": ["ParseError", ["Character", "\u017D"]]}, | 
| + | 
| +{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", | 
| +"input":"", | 
| +"output": ["ParseError", ["Character", "\u008F"]]}, | 
| + | 
| +{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", | 
| +"input":"", | 
| +"output": ["ParseError", ["Character", "\u0090"]]}, | 
| + | 
| +{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.", | 
| +"input":"‘", | 
| +"output": ["ParseError", ["Character", "\u2018"]]}, | 
| + | 
| +{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity.", | 
| +"input":"’", | 
| +"output": ["ParseError", ["Character", "\u2019"]]}, | 
| + | 
| +{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity.", | 
| +"input":"“", | 
| +"output": ["ParseError", ["Character", "\u201C"]]}, | 
| + | 
| +{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity.", | 
| +"input":"”", | 
| +"output": ["ParseError", ["Character", "\u201D"]]}, | 
| + | 
| +{"description": "Windows-1252 BULLET numeric entity.", | 
| +"input":"•", | 
| +"output": ["ParseError", ["Character", "\u2022"]]}, | 
| + | 
| +{"description": "Windows-1252 EN DASH numeric entity.", | 
| +"input":"–", | 
| +"output": ["ParseError", ["Character", "\u2013"]]}, | 
| + | 
| +{"description": "Windows-1252 EM DASH numeric entity.", | 
| +"input":"—", | 
| +"output": ["ParseError", ["Character", "\u2014"]]}, | 
| + | 
| +{"description": "Windows-1252 SMALL TILDE numeric entity.", | 
| +"input":"˜", | 
| +"output": ["ParseError", ["Character", "\u02DC"]]}, | 
| + | 
| +{"description": "Windows-1252 TRADE MARK SIGN numeric entity.", | 
| +"input":"™", | 
| +"output": ["ParseError", ["Character", "\u2122"]]}, | 
| + | 
| +{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity.", | 
| +"input":"š", | 
| +"output": ["ParseError", ["Character", "\u0161"]]}, | 
| + | 
| +{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity.", | 
| +"input":"›", | 
| +"output": ["ParseError", ["Character", "\u203A"]]}, | 
| + | 
| +{"description": "Windows-1252 LATIN SMALL LIGATURE OE numeric entity.", | 
| +"input":"œ", | 
| +"output": ["ParseError", ["Character", "\u0153"]]}, | 
| + | 
| +{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", | 
| +"input":"", | 
| +"output": ["ParseError", ["Character", "\u009D"]]}, | 
| + | 
| +{"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.", | 
| +"input":"€", | 
| +"output": ["ParseError", ["Character", "\u20AC"]]}, | 
| + | 
| +{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", | 
| +"input":"", | 
| +"output": ["ParseError", ["Character", "\u0081"]]}, | 
| + | 
| +{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.", | 
| +"input":"‚", | 
| +"output": ["ParseError", ["Character", "\u201A"]]}, | 
| + | 
| +{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity.", | 
| +"input":"ƒ", | 
| +"output": ["ParseError", ["Character", "\u0192"]]}, | 
| + | 
| +{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity.", | 
| +"input":"„", | 
| +"output": ["ParseError", ["Character", "\u201E"]]}, | 
| + | 
| +{"description": "Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity.", | 
| +"input":"…", | 
| +"output": ["ParseError", ["Character", "\u2026"]]}, | 
| + | 
| +{"description": "Windows-1252 DAGGER hexadecimal numeric entity.", | 
| +"input":"†", | 
| +"output": ["ParseError", ["Character", "\u2020"]]}, | 
| + | 
| +{"description": "Windows-1252 DOUBLE DAGGER hexadecimal numeric entity.", | 
| +"input":"‡", | 
| +"output": ["ParseError", ["Character", "\u2021"]]}, | 
| + | 
| +{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity.", | 
| +"input":"ˆ", | 
| +"output": ["ParseError", ["Character", "\u02C6"]]}, | 
| + | 
| +{"description": "Windows-1252 PER MILLE SIGN hexadecimal numeric entity.", | 
| +"input":"‰", | 
| +"output": ["ParseError", ["Character", "\u2030"]]}, | 
| + | 
| +{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity.", | 
| +"input":"Š", | 
| +"output": ["ParseError", ["Character", "\u0160"]]}, | 
| + | 
| +{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.", | 
| +"input":"‹", | 
| +"output": ["ParseError", ["Character", "\u2039"]]}, | 
| + | 
| +{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity.", | 
| +"input":"Œ", | 
| +"output": ["ParseError", ["Character", "\u0152"]]}, | 
| + | 
| +{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", | 
| +"input":"", | 
| +"output": ["ParseError", ["Character", "\u008D"]]}, | 
| + | 
| +{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.", | 
| +"input":"Ž", | 
| +"output": ["ParseError", ["Character", "\u017D"]]}, | 
| + | 
| +{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", | 
| +"input":"", | 
| +"output": ["ParseError", ["Character", "\u008F"]]}, | 
| + | 
| +{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", | 
| +"input":"", | 
| +"output": ["ParseError", ["Character", "\u0090"]]}, | 
| + | 
| +{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.", | 
| +"input":"‘", | 
| +"output": ["ParseError", ["Character", "\u2018"]]}, | 
| + | 
| +{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity.", | 
| +"input":"’", | 
| +"output": ["ParseError", ["Character", "\u2019"]]}, | 
| + | 
| +{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity.", | 
| +"input":"“", | 
| +"output": ["ParseError", ["Character", "\u201C"]]}, | 
| + | 
| +{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity.", | 
| +"input":"”", | 
| +"output": ["ParseError", ["Character", "\u201D"]]}, | 
| + | 
| +{"description": "Windows-1252 BULLET hexadecimal numeric entity.", | 
| +"input":"•", | 
| +"output": ["ParseError", ["Character", "\u2022"]]}, | 
| + | 
| +{"description": "Windows-1252 EN DASH hexadecimal numeric entity.", | 
| +"input":"–", | 
| +"output": ["ParseError", ["Character", "\u2013"]]}, | 
| + | 
| +{"description": "Windows-1252 EM DASH hexadecimal numeric entity.", | 
| +"input":"—", | 
| +"output": ["ParseError", ["Character", "\u2014"]]}, | 
| + | 
| +{"description": "Windows-1252 SMALL TILDE hexadecimal numeric entity.", | 
| +"input":"˜", | 
| +"output": ["ParseError", ["Character", "\u02DC"]]}, | 
| + | 
| +{"description": "Windows-1252 TRADE MARK SIGN hexadecimal numeric entity.", | 
| +"input":"™", | 
| +"output": ["ParseError", ["Character", "\u2122"]]}, | 
| + | 
| +{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity.", | 
| +"input":"š", | 
| +"output": ["ParseError", ["Character", "\u0161"]]}, | 
| + | 
| +{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.", | 
| +"input":"›", | 
| +"output": ["ParseError", ["Character", "\u203A"]]}, | 
| + | 
| +{"description": "Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity.", | 
| +"input":"œ", | 
| +"output": ["ParseError", ["Character", "\u0153"]]}, | 
| + | 
| +{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", | 
| +"input":"", | 
| +"output": ["ParseError", ["Character", "\u009D"]]}, | 
| + | 
| +{"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.", | 
| +"input":"ž", | 
| +"output": ["ParseError", ["Character", "\u017E"]]}, | 
| + | 
| +{"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.", | 
| +"input":"Ÿ", | 
| +"output": ["ParseError", ["Character", "\u0178"]]}, | 
| + | 
| +{"description": "Decimal numeric entity followed by hex character a.", | 
| +"input":"aa", | 
| +"output": ["ParseError", ["Character", "aa"]]}, | 
| + | 
| +{"description": "Decimal numeric entity followed by hex character A.", | 
| +"input":"aA", | 
| +"output": ["ParseError", ["Character", "aA"]]}, | 
| + | 
| +{"description": "Decimal numeric entity followed by hex character f.", | 
| +"input":"af", | 
| +"output": ["ParseError", ["Character", "af"]]}, | 
| + | 
| +{"description": "Decimal numeric entity followed by hex character A.", | 
| +"input":"aF", | 
| +"output": ["ParseError", ["Character", "aF"]]} | 
| + | 
| +]} | 
|  |