Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(316)

Unified Diff: pkg/third_party/html5lib/test/tokenizer_test.dart

Issue 22375011: move html5lib code into dart svn repo (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: change location of html5lib to pkg/third_party/html5lib Created 7 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: pkg/third_party/html5lib/test/tokenizer_test.dart
diff --git a/pkg/third_party/html5lib/test/tokenizer_test.dart b/pkg/third_party/html5lib/test/tokenizer_test.dart
new file mode 100644
index 0000000000000000000000000000000000000000..fc98012ef4f4e45c1e199c5c4c3d0b43d08e7c48
--- /dev/null
+++ b/pkg/third_party/html5lib/test/tokenizer_test.dart
@@ -0,0 +1,270 @@
+library tokenizer_test;
+
+// Note: mirrors used to match the getattr usage in the original test
+import 'dart:async';
+import 'dart:io';
+import 'dart:json' as json;
+import 'dart:mirrors';
+import 'dart:utf';
+import 'package:path/path.dart' as pathos;
+import 'package:unittest/unittest.dart';
+import 'package:html5lib/src/char_encodings.dart';
+import 'package:html5lib/src/constants.dart' as constants;
+import 'package:html5lib/src/token.dart';
+import 'package:html5lib/src/tokenizer.dart';
+import 'package:html5lib/src/utils.dart';
+import 'support.dart';
+
+class TokenizerTestParser {
+ String _state;
+ var _lastStartTag;
+ List outputTokens;
+
+ TokenizerTestParser(String initialState, [lastStartTag])
+ : _state = initialState,
+ _lastStartTag = lastStartTag;
+
+ List parse(String str) {
+ // Note: we need to pass bytes to the tokenizer if we want it to handle BOM.
+ var bytes = codepointsToUtf8(toCodepoints(str));
+ var tokenizer = new HtmlTokenizer(bytes, encoding: 'utf-8');
+ outputTokens = [];
+
+ // Note: we can't get a closure of the state method. However, we can
+ // create a new closure to invoke it via mirrors.
+ var mtok = reflect(tokenizer);
+ tokenizer.state = () => deprecatedFutureValue(
+ mtok.invokeAsync(new Symbol(_state), const [])).reflectee;
+
+ if (_lastStartTag != null) {
+ tokenizer.currentToken = new StartTagToken(_lastStartTag);
+ }
+
+ while (tokenizer.moveNext()) {
+ var token = tokenizer.current;
+ switch (token.kind) {
+ case TokenKind.characters:
+ processCharacters(token);
+ break;
+ case TokenKind.spaceCharacters:
+ processSpaceCharacters(token);
+ break;
+ case TokenKind.startTag:
+ processStartTag(token);
+ break;
+ case TokenKind.endTag:
+ processEndTag(token);
+ break;
+ case TokenKind.comment:
+ processComment(token);
+ break;
+ case TokenKind.doctype:
+ processDoctype(token);
+ break;
+ case TokenKind.parseError:
+ processParseError(token);
+ break;
+ }
+ }
+
+ return outputTokens;
+ }
+
+ void processDoctype(DoctypeToken token) {
+ outputTokens.add(["DOCTYPE", token.name, token.publicId,
+ token.systemId, token.correct]);
+ }
+
+ void processStartTag(StartTagToken token) {
+ outputTokens.add(["StartTag", token.name, token.data, token.selfClosing]);
+ }
+
+ void processEndTag(EndTagToken token) {
+ outputTokens.add(["EndTag", token.name, token.selfClosing]);
+ }
+
+ void processComment(StringToken token) {
+ outputTokens.add(["Comment", token.data]);
+ }
+
+ void processSpaceCharacters(StringToken token) {
+ processCharacters(token);
+ }
+
+ void processCharacters(StringToken token) {
+ outputTokens.add(["Character", token.data]);
+ }
+
+ void processEOF(token) {
+ }
+
+ void processParseError(StringToken token) {
+ // TODO(jmesserly): when debugging test failures it can be useful to add
+ // logging here like `print('ParseError $token');`. It would be nice to
+ // use the actual logging library.
+ outputTokens.add(["ParseError", token.data]);
+ }
+}
+
+List concatenateCharacterTokens(List tokens) {
+ var outputTokens = [];
+ for (var token in tokens) {
+ if (token.indexOf("ParseError") == -1 && token[0] == "Character") {
+ if (outputTokens.length > 0 &&
+ outputTokens.last.indexOf("ParseError") == -1 &&
+ outputTokens.last[0] == "Character") {
+
+ outputTokens.last[1] = '${outputTokens.last[1]}${token[1]}';
+ } else {
+ outputTokens.add(token);
+ }
+ } else {
+ outputTokens.add(token);
+ }
+ }
+ return outputTokens;
+}
+
+List normalizeTokens(List tokens) {
+ // TODO: convert tests to reflect arrays
+ for (int i = 0; i < tokens.length; i++) {
+ var token = tokens[i];
+ if (token[0] == 'ParseError') {
+ tokens[i] = token[0];
+ }
+ }
+ return tokens;
+}
+
+
+/**
+ * Test whether the test has passed or failed
+ *
+ * If the ignoreErrorOrder flag is set to true we don't test the relative
+ * positions of parse errors and non parse errors.
+ */
+void expectTokensMatch(List expectedTokens, List receivedTokens,
+ bool ignoreErrorOrder, [bool ignoreErrors = false, String message]) {
+
+ var checkSelfClosing = false;
+ for (var token in expectedTokens) {
+ if (token[0] == "StartTag" && token.length == 4
+ || token[0] == "EndTag" && token.length == 3) {
+ checkSelfClosing = true;
+ break;
+ }
+ }
+
+ if (!checkSelfClosing) {
+ for (var token in receivedTokens) {
+ if (token[0] == "StartTag" || token[0] == "EndTag") {
+ token.removeLast();
+ }
+ }
+ }
+
+ if (!ignoreErrorOrder && !ignoreErrors) {
+ expect(receivedTokens, equals(expectedTokens), reason: message);
+ } else {
+ // Sort the tokens into two groups; non-parse errors and parse errors
+ var expectedNonErrors = expectedTokens.where((t) => t != "ParseError");
+ var receivedNonErrors = receivedTokens.where((t) => t != "ParseError");
+
+ expect(receivedNonErrors, equals(expectedNonErrors), reason: message);
+ if (!ignoreErrors) {
+ var expectedParseErrors = expectedTokens.where((t) => t == "ParseError");
+ var receivedParseErrors = receivedTokens.where((t) => t == "ParseError");
+ expect(receivedParseErrors, equals(expectedParseErrors), reason: message);
+ }
+ }
+}
+
+void runTokenizerTest(Map testInfo) {
+ // XXX - move this out into the setup function
+ // concatenate all consecutive character tokens into a single token
+ if (testInfo.containsKey('doubleEscaped')) {
+ testInfo = unescape(testInfo);
+ }
+
+ var expected = concatenateCharacterTokens(testInfo['output']);
+ if (!testInfo.containsKey('lastStartTag')) {
+ testInfo['lastStartTag'] = null;
+ }
+ var parser = new TokenizerTestParser(testInfo['initialState'],
+ testInfo['lastStartTag']);
+ var tokens = parser.parse(testInfo['input']);
+ tokens = concatenateCharacterTokens(tokens);
+ var received = normalizeTokens(tokens);
+ var errorMsg = ["\n\nInitial state:",
+ testInfo['initialState'],
+ "\nInput:", testInfo['input'],
+ "\nExpected:", expected,
+ "\nreceived:", tokens].map((s) => '$s').join('\n');
+ var ignoreErrorOrder = testInfo['ignoreErrorOrder'];
+ if (ignoreErrorOrder == null) ignoreErrorOrder = false;
+
+ expectTokensMatch(expected, received, ignoreErrorOrder, true, errorMsg);
+}
+
+Map unescape(Map testInfo) {
+ // TODO(sigmundch,jmesserly): we currently use json.parse to unescape the
+ // unicode characters in the string, we should use a decoding that works with
+ // any control characters.
+ decode(inp) => inp == '\u0000' ? inp : json.parse('"$inp"');
+
+ testInfo["input"] = decode(testInfo["input"]);
+ for (var token in testInfo["output"]) {
+ if (token == "ParseError") {
+ continue;
+ } else {
+ token[1] = decode(token[1]);
+ if (token.length > 2) {
+ for (var pair in token[2]) {
+ var key = pair[0];
+ var value = pair[1];
+ token[2].remove(key);
+ token[2][decode(key)] = decode(value);
+ }
+ }
+ }
+ }
+ return testInfo;
+}
+
+
+String camelCase(String s) {
+ s = s.toLowerCase();
+ var result = new StringBuffer();
+ for (var match in new RegExp(r"\W+(\w)(\w+)").allMatches(s)) {
+ if (result.length == 0) result.write(s.substring(0, match.start));
+ result.write(match.group(1).toUpperCase());
+ result.write(match.group(2));
+ }
+ return result.toString();
+}
+
+void main() {
+ for (var path in getDataFiles('tokenizer')) {
+ if (!path.endsWith('.test')) continue;
+
+ var text = new File(path).readAsStringSync();
+ var tests = json.parse(text);
+ var testName = pathos.basenameWithoutExtension(path);
+ var testList = tests['tests'];
+ if (testList == null) continue;
+
+ group(testName, () {
+ for (int index = 0; index < testList.length; index++) {
+ final testInfo = testList[index];
+
+ testInfo.putIfAbsent("initialStates", () => ["Data state"]);
+ for (var initialState in testInfo["initialStates"]) {
+ test(testInfo["description"], () {
+ testInfo["initialState"] = camelCase(initialState);
+ runTokenizerTest(testInfo);
+ });
+ }
+ }
+ });
+ }
+}

Powered by Google App Engine
This is Rietveld 408576698