Index: packages/html/test/data/tokenizer/test1.test |
diff --git a/packages/html/test/data/tokenizer/test1.test b/packages/html/test/data/tokenizer/test1.test |
new file mode 100644 |
index 0000000000000000000000000000000000000000..5de66f54ed255daebb1fd551d03e7ed47c351267 |
--- /dev/null |
+++ b/packages/html/test/data/tokenizer/test1.test |
@@ -0,0 +1,196 @@ |
+{"tests": [ |
+ |
+{"description":"Correct Doctype lowercase", |
+"input":"<!DOCTYPE html>", |
+"output":[["DOCTYPE", "html", null, null, true]]}, |
+ |
+{"description":"Correct Doctype uppercase", |
+"input":"<!DOCTYPE HTML>", |
+"output":[["DOCTYPE", "html", null, null, true]]}, |
+ |
+{"description":"Correct Doctype mixed case", |
+"input":"<!DOCTYPE HtMl>", |
+"output":[["DOCTYPE", "html", null, null, true]]}, |
+ |
+{"description":"Correct Doctype case with EOF", |
+"input":"<!DOCTYPE HtMl", |
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]}, |
+ |
+{"description":"Truncated doctype start", |
+"input":"<!DOC>", |
+"output":["ParseError", ["Comment", "DOC"]]}, |
+ |
+{"description":"Doctype in error", |
+"input":"<!DOCTYPE foo>", |
+"output":[["DOCTYPE", "foo", null, null, true]]}, |
+ |
+{"description":"Single Start Tag", |
+"input":"<h>", |
+"output":[["StartTag", "h", {}]]}, |
+ |
+{"description":"Empty end tag", |
+"input":"</>", |
+"output":["ParseError"]}, |
+ |
+{"description":"Empty start tag", |
+"input":"<>", |
+"output":["ParseError", ["Character", "<>"]]}, |
+ |
+{"description":"Start Tag w/attribute", |
+"input":"<h a='b'>", |
+"output":[["StartTag", "h", {"a":"b"}]]}, |
+ |
+{"description":"Start Tag w/attribute no quotes", |
+"input":"<h a=b>", |
+"output":[["StartTag", "h", {"a":"b"}]]}, |
+ |
+{"description":"Start/End Tag", |
+"input":"<h></h>", |
+"output":[["StartTag", "h", {}], ["EndTag", "h"]]}, |
+ |
+{"description":"Two unclosed start tags", |
+"input":"<p>One<p>Two", |
+"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]}, |
+ |
+{"description":"End Tag w/attribute", |
+"input":"<h></h a='b'>", |
+"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]}, |
+ |
+{"description":"Multiple atts", |
+"input":"<h a='b' c='d'>", |
+"output":[["StartTag", "h", {"a":"b", "c":"d"}]]}, |
+ |
+{"description":"Multiple atts no space", |
+"input":"<h a='b'c='d'>", |
+"output":["ParseError", ["StartTag", "h", {"a":"b", "c":"d"}]]}, |
+ |
+{"description":"Repeated attr", |
+ "input":"<h a='b' a='d'>", |
+ "output":["ParseError", ["StartTag", "h", {"a":"b"}]]}, |
+ |
+{"description":"Simple comment", |
+ "input":"<!--comment-->", |
+ "output":[["Comment", "comment"]]}, |
+ |
+{"description":"Comment, Central dash no space", |
+ "input":"<!----->", |
+ "output":["ParseError", ["Comment", "-"]]}, |
+ |
+{"description":"Comment, two central dashes", |
+"input":"<!-- --comment -->", |
+"output":["ParseError", ["Comment", " --comment "]]}, |
+ |
+{"description":"Unfinished comment", |
+"input":"<!--comment", |
+"output":["ParseError", ["Comment", "comment"]]}, |
+ |
+{"description":"Start of a comment", |
+"input":"<!-", |
+"output":["ParseError", ["Comment", "-"]]}, |
+ |
+{"description":"Short comment", |
+ "input":"<!-->", |
+ "output":["ParseError", ["Comment", ""]]}, |
+ |
+{"description":"Short comment two", |
+ "input":"<!--->", |
+ "output":["ParseError", ["Comment", ""]]}, |
+ |
+{"description":"Short comment three", |
+ "input":"<!---->", |
+ "output":[["Comment", ""]]}, |
+ |
+ |
+{"description":"Ampersand EOF", |
+"input":"&", |
+"output":[["Character", "&"]]}, |
+ |
+{"description":"Ampersand ampersand EOF", |
+"input":"&&", |
+"output":[["Character", "&&"]]}, |
+ |
+{"description":"Ampersand space EOF", |
+"input":"& ", |
+"output":[["Character", "& "]]}, |
+ |
+{"description":"Unfinished entity", |
+"input":"&f", |
+"output":["ParseError", ["Character", "&f"]]}, |
+ |
+{"description":"Ampersand, number sign", |
+"input":"&#", |
+"output":["ParseError", ["Character", "&#"]]}, |
+ |
+{"description":"Unfinished numeric entity", |
+"input":"&#x", |
+"output":["ParseError", ["Character", "&#x"]]}, |
+ |
+{"description":"Entity with trailing semicolon (1)", |
+"input":"I'm ¬it", |
+"output":[["Character","I'm \u00ACit"]]}, |
+ |
+{"description":"Entity with trailing semicolon (2)", |
+"input":"I'm ∉", |
+"output":[["Character","I'm \u2209"]]}, |
+ |
+{"description":"Entity without trailing semicolon (1)", |
+"input":"I'm ¬it", |
+"output":[["Character","I'm "], "ParseError", ["Character", "\u00ACit"]]}, |
+ |
+{"description":"Entity without trailing semicolon (2)", |
+"input":"I'm ¬in", |
+"output":[["Character","I'm "], "ParseError", ["Character", "\u00ACin"]]}, |
+ |
+{"description":"Partial entity match at end of file", |
+"input":"I'm &no", |
+"output":[["Character","I'm "], "ParseError", ["Character", "&no"]]}, |
+ |
+{"description":"Non-ASCII character reference name", |
+"input":"&\u00AC;", |
+"output":["ParseError", ["Character", "&\u00AC;"]]}, |
+ |
+{"description":"ASCII decimal entity", |
+"input":"$", |
+"output":[["Character","$"]]}, |
+ |
+{"description":"ASCII hexadecimal entity", |
+"input":"?", |
+"output":[["Character","?"]]}, |
+ |
+{"description":"Hexadecimal entity in attribute", |
+"input":"<h a='?'></h>", |
+"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]}, |
+ |
+{"description":"Entity in attribute without semicolon ending in x", |
+"input":"<h a='¬x'>", |
+"output":["ParseError", ["StartTag", "h", {"a":"¬x"}]]}, |
+ |
+{"description":"Entity in attribute without semicolon ending in 1", |
+"input":"<h a='¬1'>", |
+"output":["ParseError", ["StartTag", "h", {"a":"¬1"}]]}, |
+ |
+{"description":"Entity in attribute without semicolon ending in i", |
+"input":"<h a='¬i'>", |
+"output":["ParseError", ["StartTag", "h", {"a":"¬i"}]]}, |
+ |
+{"description":"Entity in attribute without semicolon", |
+"input":"<h a='©'>", |
+"output":["ParseError", ["StartTag", "h", {"a":"\u00A9"}]]}, |
+ |
+{"description":"Unquoted attribute ending in ampersand", |
+"input":"<s o=& t>", |
+"output":[["StartTag","s",{"o":"&","t":""}]]}, |
+ |
+{"description":"Unquoted attribute at end of tag with final character of &, with tag followed by characters", |
+"input":"<a a=a&>foo", |
+"output":[["StartTag", "a", {"a":"a&"}], ["Character", "foo"]]}, |
+ |
+{"description":"plaintext element", |
+ "input":"<plaintext>foobar", |
+ "output":[["StartTag","plaintext",{}], ["Character","foobar"]]}, |
+ |
+{"description":"Open angled bracket in unquoted attribute value state", |
+ "input":"<a a=f<>", |
+ "output":["ParseError", ["StartTag", "a", {"a":"f<"}]]} |
+ |
+]} |