Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(23)

Side by Side Diff: packages/html/test/tokenizer_test.dart

Issue 1400473008: Roll Observatory packages and add a roll script (Closed) Base URL: git@github.com:dart-lang/observatory_pub_packages.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « packages/html/test/support.dart ('k') | packages/initialize/.gitignore » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 library tokenizer_test;
2
3 // Note: mirrors used to match the getattr usage in the original test
4 import 'dart:convert';
5 import 'dart:io';
6 import 'dart:mirrors';
7 import 'package:path/path.dart' as pathos;
8 import 'package:unittest/unittest.dart';
9 import 'package:html/src/char_encodings.dart';
10 import 'package:html/src/token.dart';
11 import 'package:html/src/tokenizer.dart';
12 import 'package:utf/utf.dart';
13 import 'support.dart';
14
15 class TokenizerTestParser {
16 String _state;
17 var _lastStartTag;
18 List outputTokens;
19
20 TokenizerTestParser(String initialState, [lastStartTag])
21 : _state = initialState,
22 _lastStartTag = lastStartTag;
23
24 List parse(String str) {
25 // Note: we need to pass bytes to the tokenizer if we want it to handle BOM.
26 var bytes = codepointsToUtf8(toCodepoints(str));
27 var tokenizer = new HtmlTokenizer(bytes, encoding: 'utf-8');
28 outputTokens = [];
29
30 // Note: we can't get a closure of the state method. However, we can
31 // create a new closure to invoke it via mirrors.
32 var mtok = reflect(tokenizer);
33 tokenizer.state = () => mtok.invoke(new Symbol(_state), const []).reflectee;
34
35 if (_lastStartTag != null) {
36 tokenizer.currentToken = new StartTagToken(_lastStartTag);
37 }
38
39 while (tokenizer.moveNext()) {
40 var token = tokenizer.current;
41 switch (token.kind) {
42 case TokenKind.characters:
43 processCharacters(token);
44 break;
45 case TokenKind.spaceCharacters:
46 processSpaceCharacters(token);
47 break;
48 case TokenKind.startTag:
49 processStartTag(token);
50 break;
51 case TokenKind.endTag:
52 processEndTag(token);
53 break;
54 case TokenKind.comment:
55 processComment(token);
56 break;
57 case TokenKind.doctype:
58 processDoctype(token);
59 break;
60 case TokenKind.parseError:
61 processParseError(token);
62 break;
63 }
64 }
65
66 return outputTokens;
67 }
68
69 void processDoctype(DoctypeToken token) {
70 outputTokens.add(
71 ["DOCTYPE", token.name, token.publicId, token.systemId, token.correct]);
72 }
73
74 void processStartTag(StartTagToken token) {
75 outputTokens.add(["StartTag", token.name, token.data, token.selfClosing]);
76 }
77
78 void processEndTag(EndTagToken token) {
79 outputTokens.add(["EndTag", token.name, token.selfClosing]);
80 }
81
82 void processComment(StringToken token) {
83 outputTokens.add(["Comment", token.data]);
84 }
85
86 void processSpaceCharacters(StringToken token) {
87 processCharacters(token);
88 }
89
90 void processCharacters(StringToken token) {
91 outputTokens.add(["Character", token.data]);
92 }
93
94 void processEOF(token) {}
95
96 void processParseError(StringToken token) {
97 // TODO(jmesserly): when debugging test failures it can be useful to add
98 // logging here like `print('ParseError $token');`. It would be nice to
99 // use the actual logging library.
100 outputTokens.add(["ParseError", token.data]);
101 }
102 }
103
104 List concatenateCharacterTokens(List tokens) {
105 var outputTokens = [];
106 for (var token in tokens) {
107 if (token.indexOf("ParseError") == -1 && token[0] == "Character") {
108 if (outputTokens.length > 0 &&
109 outputTokens.last.indexOf("ParseError") == -1 &&
110 outputTokens.last[0] == "Character") {
111 outputTokens.last[1] = '${outputTokens.last[1]}${token[1]}';
112 } else {
113 outputTokens.add(token);
114 }
115 } else {
116 outputTokens.add(token);
117 }
118 }
119 return outputTokens;
120 }
121
122 List normalizeTokens(List tokens) {
123 // TODO: convert tests to reflect arrays
124 for (int i = 0; i < tokens.length; i++) {
125 var token = tokens[i];
126 if (token[0] == 'ParseError') {
127 tokens[i] = token[0];
128 }
129 }
130 return tokens;
131 }
132
133 /// Test whether the test has passed or failed
134 ///
135 /// If the ignoreErrorOrder flag is set to true we don't test the relative
136 /// positions of parse errors and non parse errors.
137 void expectTokensMatch(
138 List expectedTokens, List receivedTokens, bool ignoreErrorOrder,
139 [bool ignoreErrors = false, String message]) {
140 var checkSelfClosing = false;
141 for (var token in expectedTokens) {
142 if (token[0] == "StartTag" && token.length == 4 ||
143 token[0] == "EndTag" && token.length == 3) {
144 checkSelfClosing = true;
145 break;
146 }
147 }
148
149 if (!checkSelfClosing) {
150 for (var token in receivedTokens) {
151 if (token[0] == "StartTag" || token[0] == "EndTag") {
152 token.removeLast();
153 }
154 }
155 }
156
157 if (!ignoreErrorOrder && !ignoreErrors) {
158 expect(receivedTokens, equals(expectedTokens), reason: message);
159 } else {
160 // Sort the tokens into two groups; non-parse errors and parse errors
161 var expectedNonErrors = expectedTokens.where((t) => t != "ParseError");
162 var receivedNonErrors = receivedTokens.where((t) => t != "ParseError");
163
164 expect(receivedNonErrors, equals(expectedNonErrors), reason: message);
165 if (!ignoreErrors) {
166 var expectedParseErrors = expectedTokens.where((t) => t == "ParseError");
167 var receivedParseErrors = receivedTokens.where((t) => t == "ParseError");
168 expect(receivedParseErrors, equals(expectedParseErrors), reason: message);
169 }
170 }
171 }
172
173 void runTokenizerTest(Map testInfo) {
174 // XXX - move this out into the setup function
175 // concatenate all consecutive character tokens into a single token
176 if (testInfo.containsKey('doubleEscaped')) {
177 testInfo = unescape(testInfo);
178 }
179
180 var expected = concatenateCharacterTokens(testInfo['output']);
181 if (!testInfo.containsKey('lastStartTag')) {
182 testInfo['lastStartTag'] = null;
183 }
184 var parser = new TokenizerTestParser(
185 testInfo['initialState'], testInfo['lastStartTag']);
186 var tokens = parser.parse(testInfo['input']);
187 tokens = concatenateCharacterTokens(tokens);
188 var received = normalizeTokens(tokens);
189 var errorMsg = [
190 "\n\nInitial state:",
191 testInfo['initialState'],
192 "\nInput:",
193 testInfo['input'],
194 "\nExpected:",
195 expected,
196 "\nreceived:",
197 tokens
198 ].map((s) => '$s').join('\n');
199 var ignoreErrorOrder = testInfo['ignoreErrorOrder'];
200 if (ignoreErrorOrder == null) ignoreErrorOrder = false;
201
202 expectTokensMatch(expected, received, ignoreErrorOrder, true, errorMsg);
203 }
204
205 Map unescape(Map testInfo) {
206 // TODO(sigmundch,jmesserly): we currently use JSON.decode to unescape the
207 // unicode characters in the string, we should use a decoding that works with
208 // any control characters.
209 decode(inp) => inp == '\u0000' ? inp : JSON.decode('"$inp"');
210
211 testInfo["input"] = decode(testInfo["input"]);
212 for (var token in testInfo["output"]) {
213 if (token == "ParseError") {
214 continue;
215 } else {
216 token[1] = decode(token[1]);
217 if (token.length > 2) {
218 for (var pair in token[2]) {
219 var key = pair[0];
220 var value = pair[1];
221 token[2].remove(key);
222 token[2][decode(key)] = decode(value);
223 }
224 }
225 }
226 }
227 return testInfo;
228 }
229
230 String camelCase(String s) {
231 s = s.toLowerCase();
232 var result = new StringBuffer();
233 for (var match in new RegExp(r"\W+(\w)(\w+)").allMatches(s)) {
234 if (result.length == 0) result.write(s.substring(0, match.start));
235 result.write(match.group(1).toUpperCase());
236 result.write(match.group(2));
237 }
238 return result.toString();
239 }
240
241 void main() {
242 for (var path in getDataFiles('tokenizer')) {
243 if (!path.endsWith('.test')) continue;
244
245 var text = new File(path).readAsStringSync();
246 var tests = JSON.decode(text);
247 var testName = pathos.basenameWithoutExtension(path);
248 var testList = tests['tests'];
249 if (testList == null) continue;
250
251 group(testName, () {
252 for (int index = 0; index < testList.length; index++) {
253 final testInfo = testList[index];
254
255 testInfo.putIfAbsent("initialStates", () => ["Data state"]);
256 for (var initialState in testInfo["initialStates"]) {
257 test(testInfo["description"], () {
258 testInfo["initialState"] = camelCase(initialState);
259 runTokenizerTest(testInfo);
260 });
261 }
262 }
263 });
264 }
265 }
OLDNEW
« no previous file with comments | « packages/html/test/support.dart ('k') | packages/initialize/.gitignore » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698