pkg/third_party/html5lib/test/tokenizer_test.dart - Issue 22375011: move html5lib code into dart svn repo

Unified Diff: pkg/third_party/html5lib/test/tokenizer_test.dart

Issue 22375011: move html5lib code into dart svn repo (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: change location of html5lib to pkg/third_party/html5lib Created 7 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: pkg/third_party/html5lib/test/tokenizer_test.dart

diff --git a/pkg/third_party/html5lib/test/tokenizer_test.dart b/pkg/third_party/html5lib/test/tokenizer_test.dart

new file mode 100644

index 0000000000000000000000000000000000000000..fc98012ef4f4e45c1e199c5c4c3d0b43d08e7c48

--- /dev/null

+++ b/pkg/third_party/html5lib/test/tokenizer_test.dart

@@ -0,0 +1,270 @@

+library tokenizer_test;

+// Note: mirrors used to match the getattr usage in the original test

+import 'dart:async';

+import 'dart:io';

+import 'dart:json' as json;

+import 'dart:mirrors';

+import 'dart:utf';

+import 'package:path/path.dart' as pathos;

+import 'package:unittest/unittest.dart';

+import 'package:html5lib/src/char_encodings.dart';

+import 'package:html5lib/src/constants.dart' as constants;

+import 'package:html5lib/src/token.dart';

+import 'package:html5lib/src/tokenizer.dart';

+import 'package:html5lib/src/utils.dart';

+import 'support.dart';

+class TokenizerTestParser {

+ String _state;

+ var _lastStartTag;

+ List outputTokens;

+ TokenizerTestParser(String initialState, [lastStartTag])

+ : _state = initialState,

+ _lastStartTag = lastStartTag;

+ List parse(String str) {

+ // Note: we need to pass bytes to the tokenizer if we want it to handle BOM.

+ var bytes = codepointsToUtf8(toCodepoints(str));

+ var tokenizer = new HtmlTokenizer(bytes, encoding: 'utf-8');

+ outputTokens = [];

+ // Note: we can't get a closure of the state method. However, we can

+ // create a new closure to invoke it via mirrors.

+ var mtok = reflect(tokenizer);

+ tokenizer.state = () => deprecatedFutureValue(

+ mtok.invokeAsync(new Symbol(_state), const [])).reflectee;

+ if (_lastStartTag != null) {

+ tokenizer.currentToken = new StartTagToken(_lastStartTag);

+ }

+ while (tokenizer.moveNext()) {

+ var token = tokenizer.current;

+ switch (token.kind) {

+ case TokenKind.characters:

+ processCharacters(token);

+ break;

+ case TokenKind.spaceCharacters:

+ processSpaceCharacters(token);

+ break;

+ case TokenKind.startTag:

+ processStartTag(token);

+ break;

+ case TokenKind.endTag:

+ processEndTag(token);

+ break;

+ case TokenKind.comment:

+ processComment(token);

+ break;

+ case TokenKind.doctype:

+ processDoctype(token);

+ break;

+ case TokenKind.parseError:

+ processParseError(token);

+ break;

+ }

+ return outputTokens;

+ }

+ void processDoctype(DoctypeToken token) {

+ outputTokens.add(["DOCTYPE", token.name, token.publicId,

+ token.systemId, token.correct]);

+ }

+ void processStartTag(StartTagToken token) {

+ outputTokens.add(["StartTag", token.name, token.data, token.selfClosing]);

+ }

+ void processEndTag(EndTagToken token) {

+ outputTokens.add(["EndTag", token.name, token.selfClosing]);

+ }

+ void processComment(StringToken token) {

+ outputTokens.add(["Comment", token.data]);

+ }

+ void processSpaceCharacters(StringToken token) {

+ processCharacters(token);

+ }

+ void processCharacters(StringToken token) {

+ outputTokens.add(["Character", token.data]);

+ }

+ void processEOF(token) {

+ }

+ void processParseError(StringToken token) {

+ // TODO(jmesserly): when debugging test failures it can be useful to add

+ // logging here like `print('ParseError $token');`. It would be nice to

+ // use the actual logging library.

+ outputTokens.add(["ParseError", token.data]);

+ }

+List concatenateCharacterTokens(List tokens) {

+ var outputTokens = [];

+ for (var token in tokens) {

+ if (token.indexOf("ParseError") == -1 && token[0] == "Character") {

+ if (outputTokens.length > 0 &&

+ outputTokens.last.indexOf("ParseError") == -1 &&

+ outputTokens.last[0] == "Character") {

+ outputTokens.last[1] = '${outputTokens.last[1]}${token[1]}';

+ } else {

+ outputTokens.add(token);

+ }

+ } else {

+ outputTokens.add(token);

+ }

+ return outputTokens;

+List normalizeTokens(List tokens) {

+ // TODO: convert tests to reflect arrays

+ for (int i = 0; i < tokens.length; i++) {

+ var token = tokens[i];

+ if (token[0] == 'ParseError') {

+ tokens[i] = token[0];

+ }

+ return tokens;

+/**

+ * Test whether the test has passed or failed

+ *

+ * If the ignoreErrorOrder flag is set to true we don't test the relative

+ * positions of parse errors and non parse errors.

+ */

+void expectTokensMatch(List expectedTokens, List receivedTokens,

+ bool ignoreErrorOrder, [bool ignoreErrors = false, String message]) {

+ var checkSelfClosing = false;

+ for (var token in expectedTokens) {

+ if (token[0] == "StartTag" && token.length == 4

+ || token[0] == "EndTag" && token.length == 3) {

+ checkSelfClosing = true;

+ break;

+ }

+ if (!checkSelfClosing) {

+ for (var token in receivedTokens) {

+ if (token[0] == "StartTag" || token[0] == "EndTag") {

+ token.removeLast();

+ }

+ if (!ignoreErrorOrder && !ignoreErrors) {

+ expect(receivedTokens, equals(expectedTokens), reason: message);

+ } else {

+ // Sort the tokens into two groups; non-parse errors and parse errors

+ var expectedNonErrors = expectedTokens.where((t) => t != "ParseError");

+ var receivedNonErrors = receivedTokens.where((t) => t != "ParseError");

+ expect(receivedNonErrors, equals(expectedNonErrors), reason: message);

+ if (!ignoreErrors) {

+ var expectedParseErrors = expectedTokens.where((t) => t == "ParseError");

+ var receivedParseErrors = receivedTokens.where((t) => t == "ParseError");

+ expect(receivedParseErrors, equals(expectedParseErrors), reason: message);

+ }

+void runTokenizerTest(Map testInfo) {

+ // XXX - move this out into the setup function

+ // concatenate all consecutive character tokens into a single token

+ if (testInfo.containsKey('doubleEscaped')) {

+ testInfo = unescape(testInfo);

+ }

+ var expected = concatenateCharacterTokens(testInfo['output']);

+ if (!testInfo.containsKey('lastStartTag')) {

+ testInfo['lastStartTag'] = null;

+ }

+ var parser = new TokenizerTestParser(testInfo['initialState'],

+ testInfo['lastStartTag']);

+ var tokens = parser.parse(testInfo['input']);

+ tokens = concatenateCharacterTokens(tokens);

+ var received = normalizeTokens(tokens);

+ var errorMsg = ["\n\nInitial state:",

+ testInfo['initialState'],

+ "\nInput:", testInfo['input'],

+ "\nExpected:", expected,

+ "\nreceived:", tokens].map((s) => '$s').join('\n');

+ var ignoreErrorOrder = testInfo['ignoreErrorOrder'];

+ if (ignoreErrorOrder == null) ignoreErrorOrder = false;

+ expectTokensMatch(expected, received, ignoreErrorOrder, true, errorMsg);

+Map unescape(Map testInfo) {

+ // TODO(sigmundch,jmesserly): we currently use json.parse to unescape the

+ // unicode characters in the string, we should use a decoding that works with

+ // any control characters.

+ decode(inp) => inp == '\u0000' ? inp : json.parse('"$inp"');

+ testInfo["input"] = decode(testInfo["input"]);

+ for (var token in testInfo["output"]) {

+ if (token == "ParseError") {

+ continue;

+ } else {

+ token[1] = decode(token[1]);

+ if (token.length > 2) {

+ for (var pair in token[2]) {

+ var key = pair[0];

+ var value = pair[1];

+ token[2].remove(key);

+ token[2][decode(key)] = decode(value);

+ }

+ return testInfo;

+String camelCase(String s) {

+ s = s.toLowerCase();

+ var result = new StringBuffer();

+ for (var match in new RegExp(r"\W+(\w)(\w+)").allMatches(s)) {

+ if (result.length == 0) result.write(s.substring(0, match.start));

+ result.write(match.group(1).toUpperCase());

+ result.write(match.group(2));

+ }

+ return result.toString();

+void main() {

+ for (var path in getDataFiles('tokenizer')) {

+ if (!path.endsWith('.test')) continue;

+ var text = new File(path).readAsStringSync();

+ var tests = json.parse(text);

+ var testName = pathos.basenameWithoutExtension(path);

+ var testList = tests['tests'];

+ if (testList == null) continue;

+ group(testName, () {

+ for (int index = 0; index < testList.length; index++) {

+ final testInfo = testList[index];

+ testInfo.putIfAbsent("initialStates", () => ["Data state"]);

+ for (var initialState in testInfo["initialStates"]) {

+ test(testInfo["description"], () {

+ testInfo["initialState"] = camelCase(initialState);

+ runTokenizerTest(testInfo);

+ });

+ }

+ });

+ }

« pkg/third_party/html5lib/html5lib.status ('K') | « pkg/third_party/html5lib/test/support.dart ('k') | tools/publish_all_pkgs.py » ('j') | no next file with comments »