pkg/third_party/html5lib/test/tokenizer_test.dart - Issue 22375011: move html5lib code into dart svn repo

Side by Side Diff: pkg/third_party/html5lib/test/tokenizer_test.dart

Issue 22375011: move html5lib code into dart svn repo (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: change location of html5lib to pkg/third_party/html5lib Created 7 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 library tokenizer_test;

	2

	3 // Note: mirrors used to match the getattr usage in the original test

	4 import 'dart:async';

	5 import 'dart:io';

	6 import 'dart:json' as json;

	7 import 'dart:mirrors';

	8 import 'dart:utf';

	9 import 'package:path/path.dart' as pathos;

	10 import 'package:unittest/unittest.dart';

	11 import 'package:html5lib/src/char_encodings.dart';

	12 import 'package:html5lib/src/constants.dart' as constants;

	13 import 'package:html5lib/src/token.dart';

	14 import 'package:html5lib/src/tokenizer.dart';

	15 import 'package:html5lib/src/utils.dart';

	16 import 'support.dart';

	17

	18 class TokenizerTestParser {

	19 String _state;

	20 var _lastStartTag;

	21 List outputTokens;

	22

	23 TokenizerTestParser(String initialState, [lastStartTag])

	24 : _state = initialState,

	25 _lastStartTag = lastStartTag;

	26

	27 List parse(String str) {

	28 // Note: we need to pass bytes to the tokenizer if we want it to handle BOM.

	29 var bytes = codepointsToUtf8(toCodepoints(str));

	30 var tokenizer = new HtmlTokenizer(bytes, encoding: 'utf-8');

	31 outputTokens = [];

	32

	33 // Note: we can't get a closure of the state method. However, we can

	34 // create a new closure to invoke it via mirrors.

	35 var mtok = reflect(tokenizer);

	36 tokenizer.state = () => deprecatedFutureValue(

	37 mtok.invokeAsync(new Symbol(_state), const [])).reflectee;

	38

	39 if (_lastStartTag != null) {

	40 tokenizer.currentToken = new StartTagToken(_lastStartTag);

	41 }

	42

	43 while (tokenizer.moveNext()) {

	44 var token = tokenizer.current;

	45 switch (token.kind) {

	46 case TokenKind.characters:

	47 processCharacters(token);

	48 break;

	49 case TokenKind.spaceCharacters:

	50 processSpaceCharacters(token);

	51 break;

	52 case TokenKind.startTag:

	53 processStartTag(token);

	54 break;

	55 case TokenKind.endTag:

	56 processEndTag(token);

	57 break;

	58 case TokenKind.comment:

	59 processComment(token);

	60 break;

	61 case TokenKind.doctype:

	62 processDoctype(token);

	63 break;

	64 case TokenKind.parseError:

	65 processParseError(token);

	66 break;

	67 }

	68 }

	69

	70 return outputTokens;

	71 }

	72

	73 void processDoctype(DoctypeToken token) {

	74 outputTokens.add(["DOCTYPE", token.name, token.publicId,

	75 token.systemId, token.correct]);

	76 }

	77

	78 void processStartTag(StartTagToken token) {

	79 outputTokens.add(["StartTag", token.name, token.data, token.selfClosing]);

	80 }

	81

	82 void processEndTag(EndTagToken token) {

	83 outputTokens.add(["EndTag", token.name, token.selfClosing]);

	84 }

	85

	86 void processComment(StringToken token) {

	87 outputTokens.add(["Comment", token.data]);

	88 }

	89

	90 void processSpaceCharacters(StringToken token) {

	91 processCharacters(token);

	92 }

	93

	94 void processCharacters(StringToken token) {

	95 outputTokens.add(["Character", token.data]);

	96 }

	97

	98 void processEOF(token) {

	99 }

	100

	101 void processParseError(StringToken token) {

	102 // TODO(jmesserly): when debugging test failures it can be useful to add

	103 // logging here like `print('ParseError $token');`. It would be nice to

	104 // use the actual logging library.

	105 outputTokens.add(["ParseError", token.data]);

	106 }

	107 }

	108

	109 List concatenateCharacterTokens(List tokens) {

	110 var outputTokens = [];

	111 for (var token in tokens) {

	112 if (token.indexOf("ParseError") == -1 && token[0] == "Character") {

	113 if (outputTokens.length > 0 &&

	114 outputTokens.last.indexOf("ParseError") == -1 &&

	115 outputTokens.last[0] == "Character") {

	116

	117 outputTokens.last[1] = '${outputTokens.last[1]}${token[1]}';

	118 } else {

	119 outputTokens.add(token);

	120 }

	121 } else {

	122 outputTokens.add(token);

	123 }

	124 }

	125 return outputTokens;

	126 }

	127

	128 List normalizeTokens(List tokens) {

	129 // TODO: convert tests to reflect arrays

	130 for (int i = 0; i < tokens.length; i++) {

	131 var token = tokens[i];

	132 if (token[0] == 'ParseError') {

	133 tokens[i] = token[0];

	134 }

	135 }

	136 return tokens;

	137 }

	138

	139

	140 /**

	141 * Test whether the test has passed or failed

	142 *

	143 * If the ignoreErrorOrder flag is set to true we don't test the relative

	144 * positions of parse errors and non parse errors.

	145 */

	146 void expectTokensMatch(List expectedTokens, List receivedTokens,

	147 bool ignoreErrorOrder, [bool ignoreErrors = false, String message]) {

	148

	149 var checkSelfClosing = false;

	150 for (var token in expectedTokens) {

	151 if (token[0] == "StartTag" && token.length == 4

	152 \|\| token[0] == "EndTag" && token.length == 3) {

	153 checkSelfClosing = true;

	154 break;

	155 }

	156 }

	157

	158 if (!checkSelfClosing) {

	159 for (var token in receivedTokens) {

	160 if (token[0] == "StartTag" \|\| token[0] == "EndTag") {

	161 token.removeLast();

	162 }

	163 }

	164 }

	165

	166 if (!ignoreErrorOrder && !ignoreErrors) {

	167 expect(receivedTokens, equals(expectedTokens), reason: message);

	168 } else {

	169 // Sort the tokens into two groups; non-parse errors and parse errors

	170 var expectedNonErrors = expectedTokens.where((t) => t != "ParseError");

	171 var receivedNonErrors = receivedTokens.where((t) => t != "ParseError");

	172

	173 expect(receivedNonErrors, equals(expectedNonErrors), reason: message);

	174 if (!ignoreErrors) {

	175 var expectedParseErrors = expectedTokens.where((t) => t == "ParseError");

	176 var receivedParseErrors = receivedTokens.where((t) => t == "ParseError");

	177 expect(receivedParseErrors, equals(expectedParseErrors), reason: message);

	178 }

	179 }

	180 }

	181

	182 void runTokenizerTest(Map testInfo) {

	183 // XXX - move this out into the setup function

	184 // concatenate all consecutive character tokens into a single token

	185 if (testInfo.containsKey('doubleEscaped')) {

	186 testInfo = unescape(testInfo);

	187 }

	188

	189 var expected = concatenateCharacterTokens(testInfo['output']);

	190 if (!testInfo.containsKey('lastStartTag')) {

	191 testInfo['lastStartTag'] = null;

	192 }

	193 var parser = new TokenizerTestParser(testInfo['initialState'],

	194 testInfo['lastStartTag']);

	195 var tokens = parser.parse(testInfo['input']);

	196 tokens = concatenateCharacterTokens(tokens);

	197 var received = normalizeTokens(tokens);

	198 var errorMsg = ["\n\nInitial state:",

	199 testInfo['initialState'],

	200 "\nInput:", testInfo['input'],

	201 "\nExpected:", expected,

	202 "\nreceived:", tokens].map((s) => '$s').join('\n');

	203 var ignoreErrorOrder = testInfo['ignoreErrorOrder'];

	204 if (ignoreErrorOrder == null) ignoreErrorOrder = false;

	205

	206 expectTokensMatch(expected, received, ignoreErrorOrder, true, errorMsg);

	207 }

	208

	209 Map unescape(Map testInfo) {

	210 // TODO(sigmundch,jmesserly): we currently use json.parse to unescape the

	211 // unicode characters in the string, we should use a decoding that works with

	212 // any control characters.

	213 decode(inp) => inp == '\u0000' ? inp : json.parse('"$inp"');

	214

	215 testInfo["input"] = decode(testInfo["input"]);

	216 for (var token in testInfo["output"]) {

	217 if (token == "ParseError") {

	218 continue;

	219 } else {

	220 token[1] = decode(token[1]);

	221 if (token.length > 2) {

	222 for (var pair in token[2]) {

	223 var key = pair[0];

	224 var value = pair[1];

	225 token[2].remove(key);

	226 token[2][decode(key)] = decode(value);

	227 }

	228 }

	229 }

	230 }

	231 return testInfo;

	232 }

	233

	234

	235 String camelCase(String s) {

	236 s = s.toLowerCase();

	237 var result = new StringBuffer();

	238 for (var match in new RegExp(r"\W+(\w)(\w+)").allMatches(s)) {

	239 if (result.length == 0) result.write(s.substring(0, match.start));

	240 result.write(match.group(1).toUpperCase());

	241 result.write(match.group(2));

	242 }

	243 return result.toString();

	244 }

	245

	246 void main() {

	247 for (var path in getDataFiles('tokenizer')) {

	248 if (!path.endsWith('.test')) continue;

	249

	250 var text = new File(path).readAsStringSync();

	251 var tests = json.parse(text);

	252 var testName = pathos.basenameWithoutExtension(path);

	253 var testList = tests['tests'];

	254 if (testList == null) continue;

	255

	256 group(testName, () {

	257 for (int index = 0; index < testList.length; index++) {

	258 final testInfo = testList[index];

	259

	260 testInfo.putIfAbsent("initialStates", () => ["Data state"]);

	261 for (var initialState in testInfo["initialStates"]) {

	262 test(testInfo["description"], () {

	263 testInfo["initialState"] = camelCase(initialState);

	264 runTokenizerTest(testInfo);

	265 });

	266 }

	267 }

	268 });

	269 }

	270 }

OLD	NEW

« pkg/third_party/html5lib/html5lib.status ('K') | « pkg/third_party/html5lib/test/support.dart ('k') | tools/publish_all_pkgs.py » ('j') | no next file with comments »