Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(69)

Side by Side Diff: pkg/third_party/html5lib/test/tokenizer_test.dart

Issue 22375011: move html5lib code into dart svn repo (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: change location of html5lib to pkg/third_party/html5lib Created 7 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 library tokenizer_test;
2
3 // Note: mirrors used to match the getattr usage in the original test
4 import 'dart:async';
5 import 'dart:io';
6 import 'dart:json' as json;
7 import 'dart:mirrors';
8 import 'dart:utf';
9 import 'package:path/path.dart' as pathos;
10 import 'package:unittest/unittest.dart';
11 import 'package:html5lib/src/char_encodings.dart';
12 import 'package:html5lib/src/constants.dart' as constants;
13 import 'package:html5lib/src/token.dart';
14 import 'package:html5lib/src/tokenizer.dart';
15 import 'package:html5lib/src/utils.dart';
16 import 'support.dart';
17
18 class TokenizerTestParser {
19 String _state;
20 var _lastStartTag;
21 List outputTokens;
22
23 TokenizerTestParser(String initialState, [lastStartTag])
24 : _state = initialState,
25 _lastStartTag = lastStartTag;
26
27 List parse(String str) {
28 // Note: we need to pass bytes to the tokenizer if we want it to handle BOM.
29 var bytes = codepointsToUtf8(toCodepoints(str));
30 var tokenizer = new HtmlTokenizer(bytes, encoding: 'utf-8');
31 outputTokens = [];
32
33 // Note: we can't get a closure of the state method. However, we can
34 // create a new closure to invoke it via mirrors.
35 var mtok = reflect(tokenizer);
36 tokenizer.state = () => deprecatedFutureValue(
37 mtok.invokeAsync(new Symbol(_state), const [])).reflectee;
38
39 if (_lastStartTag != null) {
40 tokenizer.currentToken = new StartTagToken(_lastStartTag);
41 }
42
43 while (tokenizer.moveNext()) {
44 var token = tokenizer.current;
45 switch (token.kind) {
46 case TokenKind.characters:
47 processCharacters(token);
48 break;
49 case TokenKind.spaceCharacters:
50 processSpaceCharacters(token);
51 break;
52 case TokenKind.startTag:
53 processStartTag(token);
54 break;
55 case TokenKind.endTag:
56 processEndTag(token);
57 break;
58 case TokenKind.comment:
59 processComment(token);
60 break;
61 case TokenKind.doctype:
62 processDoctype(token);
63 break;
64 case TokenKind.parseError:
65 processParseError(token);
66 break;
67 }
68 }
69
70 return outputTokens;
71 }
72
73 void processDoctype(DoctypeToken token) {
74 outputTokens.add(["DOCTYPE", token.name, token.publicId,
75 token.systemId, token.correct]);
76 }
77
78 void processStartTag(StartTagToken token) {
79 outputTokens.add(["StartTag", token.name, token.data, token.selfClosing]);
80 }
81
82 void processEndTag(EndTagToken token) {
83 outputTokens.add(["EndTag", token.name, token.selfClosing]);
84 }
85
86 void processComment(StringToken token) {
87 outputTokens.add(["Comment", token.data]);
88 }
89
90 void processSpaceCharacters(StringToken token) {
91 processCharacters(token);
92 }
93
94 void processCharacters(StringToken token) {
95 outputTokens.add(["Character", token.data]);
96 }
97
98 void processEOF(token) {
99 }
100
101 void processParseError(StringToken token) {
102 // TODO(jmesserly): when debugging test failures it can be useful to add
103 // logging here like `print('ParseError $token');`. It would be nice to
104 // use the actual logging library.
105 outputTokens.add(["ParseError", token.data]);
106 }
107 }
108
109 List concatenateCharacterTokens(List tokens) {
110 var outputTokens = [];
111 for (var token in tokens) {
112 if (token.indexOf("ParseError") == -1 && token[0] == "Character") {
113 if (outputTokens.length > 0 &&
114 outputTokens.last.indexOf("ParseError") == -1 &&
115 outputTokens.last[0] == "Character") {
116
117 outputTokens.last[1] = '${outputTokens.last[1]}${token[1]}';
118 } else {
119 outputTokens.add(token);
120 }
121 } else {
122 outputTokens.add(token);
123 }
124 }
125 return outputTokens;
126 }
127
128 List normalizeTokens(List tokens) {
129 // TODO: convert tests to reflect arrays
130 for (int i = 0; i < tokens.length; i++) {
131 var token = tokens[i];
132 if (token[0] == 'ParseError') {
133 tokens[i] = token[0];
134 }
135 }
136 return tokens;
137 }
138
139
140 /**
141 * Test whether the test has passed or failed
142 *
143 * If the ignoreErrorOrder flag is set to true we don't test the relative
144 * positions of parse errors and non parse errors.
145 */
146 void expectTokensMatch(List expectedTokens, List receivedTokens,
147 bool ignoreErrorOrder, [bool ignoreErrors = false, String message]) {
148
149 var checkSelfClosing = false;
150 for (var token in expectedTokens) {
151 if (token[0] == "StartTag" && token.length == 4
152 || token[0] == "EndTag" && token.length == 3) {
153 checkSelfClosing = true;
154 break;
155 }
156 }
157
158 if (!checkSelfClosing) {
159 for (var token in receivedTokens) {
160 if (token[0] == "StartTag" || token[0] == "EndTag") {
161 token.removeLast();
162 }
163 }
164 }
165
166 if (!ignoreErrorOrder && !ignoreErrors) {
167 expect(receivedTokens, equals(expectedTokens), reason: message);
168 } else {
169 // Sort the tokens into two groups; non-parse errors and parse errors
170 var expectedNonErrors = expectedTokens.where((t) => t != "ParseError");
171 var receivedNonErrors = receivedTokens.where((t) => t != "ParseError");
172
173 expect(receivedNonErrors, equals(expectedNonErrors), reason: message);
174 if (!ignoreErrors) {
175 var expectedParseErrors = expectedTokens.where((t) => t == "ParseError");
176 var receivedParseErrors = receivedTokens.where((t) => t == "ParseError");
177 expect(receivedParseErrors, equals(expectedParseErrors), reason: message);
178 }
179 }
180 }
181
182 void runTokenizerTest(Map testInfo) {
183 // XXX - move this out into the setup function
184 // concatenate all consecutive character tokens into a single token
185 if (testInfo.containsKey('doubleEscaped')) {
186 testInfo = unescape(testInfo);
187 }
188
189 var expected = concatenateCharacterTokens(testInfo['output']);
190 if (!testInfo.containsKey('lastStartTag')) {
191 testInfo['lastStartTag'] = null;
192 }
193 var parser = new TokenizerTestParser(testInfo['initialState'],
194 testInfo['lastStartTag']);
195 var tokens = parser.parse(testInfo['input']);
196 tokens = concatenateCharacterTokens(tokens);
197 var received = normalizeTokens(tokens);
198 var errorMsg = ["\n\nInitial state:",
199 testInfo['initialState'],
200 "\nInput:", testInfo['input'],
201 "\nExpected:", expected,
202 "\nreceived:", tokens].map((s) => '$s').join('\n');
203 var ignoreErrorOrder = testInfo['ignoreErrorOrder'];
204 if (ignoreErrorOrder == null) ignoreErrorOrder = false;
205
206 expectTokensMatch(expected, received, ignoreErrorOrder, true, errorMsg);
207 }
208
209 Map unescape(Map testInfo) {
210 // TODO(sigmundch,jmesserly): we currently use json.parse to unescape the
211 // unicode characters in the string, we should use a decoding that works with
212 // any control characters.
213 decode(inp) => inp == '\u0000' ? inp : json.parse('"$inp"');
214
215 testInfo["input"] = decode(testInfo["input"]);
216 for (var token in testInfo["output"]) {
217 if (token == "ParseError") {
218 continue;
219 } else {
220 token[1] = decode(token[1]);
221 if (token.length > 2) {
222 for (var pair in token[2]) {
223 var key = pair[0];
224 var value = pair[1];
225 token[2].remove(key);
226 token[2][decode(key)] = decode(value);
227 }
228 }
229 }
230 }
231 return testInfo;
232 }
233
234
235 String camelCase(String s) {
236 s = s.toLowerCase();
237 var result = new StringBuffer();
238 for (var match in new RegExp(r"\W+(\w)(\w+)").allMatches(s)) {
239 if (result.length == 0) result.write(s.substring(0, match.start));
240 result.write(match.group(1).toUpperCase());
241 result.write(match.group(2));
242 }
243 return result.toString();
244 }
245
246 void main() {
247 for (var path in getDataFiles('tokenizer')) {
248 if (!path.endsWith('.test')) continue;
249
250 var text = new File(path).readAsStringSync();
251 var tests = json.parse(text);
252 var testName = pathos.basenameWithoutExtension(path);
253 var testList = tests['tests'];
254 if (testList == null) continue;
255
256 group(testName, () {
257 for (int index = 0; index < testList.length; index++) {
258 final testInfo = testList[index];
259
260 testInfo.putIfAbsent("initialStates", () => ["Data state"]);
261 for (var initialState in testInfo["initialStates"]) {
262 test(testInfo["description"], () {
263 testInfo["initialState"] = camelCase(initialState);
264 runTokenizerTest(testInfo);
265 });
266 }
267 }
268 });
269 }
270 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698