Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(202)

Side by Side Diff: utils/tests/string_encoding/utf8_test.dart

Issue 68563004: Move unicode tests to utf package. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Simplify test. Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #!/usr/bin/env dart
2 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
3 // for details. All rights reserved. Use of this source code is governed by a
4 // BSD-style license that can be found in the LICENSE file.
5
6 library utf8_tests;
7 import 'dunit.dart';
8 import '../../../lib/convert/convert.dart';
9
10 void main() {
11 TestSuite suite = new TestSuite();
12 suite.registerTestClass(new Utf8Tests());
13 suite.run();
14 }
15
16 class Utf8Tests extends TestClass {
17 static const String testEnglishPhrase =
18 "The quick brown fox jumps over the lazy dog.";
19
20 static const List<int> testEnglishUtf8 = const<int> [
21 0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63,
22 0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20,
23 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70,
24 0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74,
25 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20,
26 0x64, 0x6f, 0x67, 0x2e];
27
28 static const String testDanishPhrase = "Quizdeltagerne spiste jordbær med " +
29 "fløde mens cirkusklovnen Wolther spillede på xylofon.";
30
31 static const List<int> testDanishUtf8 = const<int>[
32 0x51, 0x75, 0x69, 0x7a, 0x64, 0x65, 0x6c, 0x74,
33 0x61, 0x67, 0x65, 0x72, 0x6e, 0x65, 0x20, 0x73,
34 0x70, 0x69, 0x73, 0x74, 0x65, 0x20, 0x6a, 0x6f,
35 0x72, 0x64, 0x62, 0xc3, 0xa6, 0x72, 0x20, 0x6d,
36 0x65, 0x64, 0x20, 0x66, 0x6c, 0xc3, 0xb8, 0x64,
37 0x65, 0x20, 0x6d, 0x65, 0x6e, 0x73, 0x20, 0x63,
38 0x69, 0x72, 0x6b, 0x75, 0x73, 0x6b, 0x6c, 0x6f,
39 0x76, 0x6e, 0x65, 0x6e, 0x20, 0x57, 0x6f, 0x6c,
40 0x74, 0x68, 0x65, 0x72, 0x20, 0x73, 0x70, 0x69,
41 0x6c, 0x6c, 0x65, 0x64, 0x65, 0x20, 0x70, 0xc3,
42 0xa5, 0x20, 0x78, 0x79, 0x6c, 0x6f, 0x66, 0x6f,
43 0x6e, 0x2e];
44
45 // unusual formatting due to strange editor interaction w/ text direction.
46 static const String
47 testHebrewPhrase = "דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה";
48
49 static const List<int> testHebrewUtf8 = const<int>[
50 0xd7, 0x93, 0xd7, 0x92, 0x20, 0xd7, 0xa1, 0xd7,
51 0xa7, 0xd7, 0xa8, 0xd7, 0x9f, 0x20, 0xd7, 0xa9,
52 0xd7, 0x98, 0x20, 0xd7, 0x91, 0xd7, 0x99, 0xd7,
53 0x9d, 0x20, 0xd7, 0x9e, 0xd7, 0x90, 0xd7, 0x95,
54 0xd7, 0x9b, 0xd7, 0x96, 0xd7, 0x91, 0x20, 0xd7,
55 0x95, 0xd7, 0x9c, 0xd7, 0xa4, 0xd7, 0xaa, 0xd7,
56 0xa2, 0x20, 0xd7, 0x9e, 0xd7, 0xa6, 0xd7, 0x90,
57 0x20, 0xd7, 0x9c, 0xd7, 0x95, 0x20, 0xd7, 0x97,
58 0xd7, 0x91, 0xd7, 0xa8, 0xd7, 0x94, 0x20, 0xd7,
59 0x90, 0xd7, 0x99, 0xd7, 0x9a, 0x20, 0xd7, 0x94,
60 0xd7, 0xa7, 0xd7, 0x9c, 0xd7, 0x99, 0xd7, 0x98,
61 0xd7, 0x94];
62
63 static const String testRussianPhrase = "Съешь же ещё этих мягких " +
64 "французских булок да выпей чаю";
65
66 static const List<int> testRussianUtf8 = const<int>[
67 0xd0, 0xa1, 0xd1, 0x8a, 0xd0, 0xb5, 0xd1, 0x88,
68 0xd1, 0x8c, 0x20, 0xd0, 0xb6, 0xd0, 0xb5, 0x20,
69 0xd0, 0xb5, 0xd1, 0x89, 0xd1, 0x91, 0x20, 0xd1,
70 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0xd1, 0x85, 0x20,
71 0xd0, 0xbc, 0xd1, 0x8f, 0xd0, 0xb3, 0xd0, 0xba,
72 0xd0, 0xb8, 0xd1, 0x85, 0x20, 0xd1, 0x84, 0xd1,
73 0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd1, 0x86, 0xd1,
74 0x83, 0xd0, 0xb7, 0xd1, 0x81, 0xd0, 0xba, 0xd0,
75 0xb8, 0xd1, 0x85, 0x20, 0xd0, 0xb1, 0xd1, 0x83,
76 0xd0, 0xbb, 0xd0, 0xbe, 0xd0, 0xba, 0x20, 0xd0,
77 0xb4, 0xd0, 0xb0, 0x20, 0xd0, 0xb2, 0xd1, 0x8b,
78 0xd0, 0xbf, 0xd0, 0xb5, 0xd0, 0xb9, 0x20, 0xd1,
79 0x87, 0xd0, 0xb0, 0xd1, 0x8e];
80
81 static const String testGreekPhrase = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ " +
82 "στὸ χρυσαφὶ ξέφωτο";
83
84 static const List<int> testGreekUtf8 = const<int>[
85 0xce, 0x93, 0xce, 0xb1, 0xce, 0xb6, 0xce, 0xad,
86 0xce, 0xb5, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce,
87 0xb1, 0xe1, 0xbd, 0xb6, 0x20, 0xce, 0xbc, 0xcf,
88 0x85, 0xcf, 0x81, 0xcf, 0x84, 0xce, 0xb9, 0xe1,
89 0xbd, 0xb2, 0xcf, 0x82, 0x20, 0xce, 0xb4, 0xe1,
90 0xbd, 0xb2, 0xce, 0xbd, 0x20, 0xce, 0xb8, 0xe1,
91 0xbd, 0xb0, 0x20, 0xce, 0xb2, 0xcf, 0x81, 0xe1,
92 0xbf, 0xb6, 0x20, 0xcf, 0x80, 0xce, 0xb9, 0xe1,
93 0xbd, 0xb0, 0x20, 0xcf, 0x83, 0xcf, 0x84, 0xe1,
94 0xbd, 0xb8, 0x20, 0xcf, 0x87, 0xcf, 0x81, 0xcf,
95 0x85, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x86, 0xe1,
96 0xbd, 0xb6, 0x20, 0xce, 0xbe, 0xce, 0xad, 0xcf,
97 0x86, 0xcf, 0x89, 0xcf, 0x84, 0xce, 0xbf];
98
99 static const String testKatakanaPhrase = """
100 イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム
101 ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン""";
102
103 static const List<int> testKatakanaUtf8 = const<int>[
104 0xe3, 0x82, 0xa4, 0xe3, 0x83, 0xad, 0xe3, 0x83,
105 0x8f, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0x9b, 0xe3,
106 0x83, 0x98, 0xe3, 0x83, 0x88, 0x20, 0xe3, 0x83,
107 0x81, 0xe3, 0x83, 0xaa, 0xe3, 0x83, 0x8c, 0xe3,
108 0x83, 0xab, 0xe3, 0x83, 0xb2, 0x20, 0xe3, 0x83,
109 0xaf, 0xe3, 0x82, 0xab, 0xe3, 0x83, 0xa8, 0xe3,
110 0x82, 0xbf, 0xe3, 0x83, 0xac, 0xe3, 0x82, 0xbd,
111 0x20, 0xe3, 0x83, 0x84, 0xe3, 0x83, 0x8d, 0xe3,
112 0x83, 0x8a, 0xe3, 0x83, 0xa9, 0xe3, 0x83, 0xa0,
113 0x0a, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb0, 0xe3,
114 0x83, 0x8e, 0xe3, 0x82, 0xaa, 0xe3, 0x82, 0xaf,
115 0xe3, 0x83, 0xa4, 0xe3, 0x83, 0x9e, 0x20, 0xe3,
116 0x82, 0xb1, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xb3,
117 0xe3, 0x82, 0xa8, 0xe3, 0x83, 0x86, 0x20, 0xe3,
118 0x82, 0xa2, 0xe3, 0x82, 0xb5, 0xe3, 0x82, 0xad,
119 0xe3, 0x83, 0xa6, 0xe3, 0x83, 0xa1, 0xe3, 0x83,
120 0x9f, 0xe3, 0x82, 0xb7, 0x20, 0xe3, 0x83, 0xb1,
121 0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa2, 0xe3, 0x82,
122 0xbb, 0xe3, 0x82, 0xb9, 0xe3, 0x83, 0xb3];
123
124 void registerTests(TestSuite suite) {
125 register("Utf8Tests.testUtf8bytesToCodepoints", testUtf8bytesToCodepoints,
126 suite);
127 register("Utf8Tests.testUtf8BytesToString", testUtf8BytesToString, suite);
128 register("Utf8Tests.testEncodeToUtf8", testEncodeToUtf8, suite);
129 register("Utf8Tests.testIterableMethods", testIterableMethods, suite);
130 }
131
132 void testEncodeToUtf8() {
133 Expect.listEquals(testEnglishUtf8, encodeUtf8(testEnglishPhrase),
134 "english to utf8");
135
136 Expect.listEquals(testDanishUtf8, encodeUtf8(testDanishPhrase),
137 "encode danish to utf8");
138
139 Expect.listEquals(testHebrewUtf8, encodeUtf8(testHebrewPhrase),
140 "Hebrew to utf8");
141
142 Expect.listEquals(testRussianUtf8, encodeUtf8(testRussianPhrase),
143 "Russian to utf8");
144
145 Expect.listEquals(testGreekUtf8, encodeUtf8(testGreekPhrase),
146 "Greek to utf8");
147
148 Expect.listEquals(testKatakanaUtf8, encodeUtf8(testKatakanaPhrase),
149 "Katakana to utf8");
150 }
151
152 List encodeUtf8(String text) => UTF8.encode(text);
153 String decodeUtf8(List bytes) => UTF8.decode(bytes);
154 List utf8ToCodePoints(List bytes) => UTF8.decode(bytes).runes.toList();
155
156 void testUtf8bytesToCodepoints() {
157 Expect.listEquals([954, 972, 963, 956, 949],
158 utf8ToCodepoints([0xce, 0xba, 0xcf, 0x8c, 0xcf,
159 0x83, 0xce, 0xbc, 0xce, 0xb5]), "κόσμε");
160
161 // boundary conditions: First possible sequence of a certain length
162 Expect.listEquals([], utf8ToCodepoints([]), "no input");
163 Expect.listEquals([0x0], utf8ToCodepoints([0x0]), "0");
164 Expect.listEquals([0x80], utf8ToCodepoints([0xc2, 0x80]), "80");
165 Expect.listEquals([0x800],
166 utf8ToCodepoints([0xe0, 0xa0, 0x80]), "800");
167 Expect.listEquals([0x10000],
168 utf8ToCodepoints([0xf0, 0x90, 0x80, 0x80]), "10000");
169 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
170 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80, 0x80]), "200000");
171 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
172 utf8ToCodepoints([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]),
173 "4000000");
174
175 // boundary conditions: Last possible sequence of a certain length
176 Expect.listEquals([0x7f], utf8ToCodepoints([0x7f]), "7f");
177 Expect.listEquals([0x7ff], utf8ToCodepoints([0xdf, 0xbf]), "7ff");
178 Expect.listEquals([0xffff],
179 utf8ToCodepoints([0xef, 0xbf, 0xbf]), "ffff");
180 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
181 utf8ToCodepoints([0xf7, 0xbf, 0xbf, 0xbf]), "1fffff");
182 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
183 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]), "3ffffff");
184 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
185 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]),
186 "4000000");
187
188 // other boundary conditions
189 Expect.listEquals([0xd7ff],
190 utf8ToCodepoints([0xed, 0x9f, 0xbf]), "d7ff");
191 Expect.listEquals([0xe000],
192 utf8ToCodepoints([0xee, 0x80, 0x80]), "e000");
193 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
194 utf8ToCodepoints([0xef, 0xbf, 0xbd]), "fffd");
195 Expect.listEquals([0x10ffff],
196 utf8ToCodepoints([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff");
197 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
198 utf8ToCodepoints([0xf4, 0x90, 0x80, 0x80]), "110000");
199
200 // unexpected continuation bytes
201 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
202 utf8ToCodepoints([0x80]), "80 => replacement character");
203 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
204 utf8ToCodepoints([0xbf]), "bf => replacement character");
205
206 List<int> allContinuationBytes = <int>[];
207 List<int> matchingReplacementChars = <int>[];
208 for (int i = 0x80; i < 0xc0; i++) {
209 allContinuationBytes.add(i);
210 matchingReplacementChars.add(UNICODE_REPLACEMENT_CHARACTER_CODEPOINT);
211 }
212 Expect.listEquals(matchingReplacementChars,
213 utf8ToCodepoints(allContinuationBytes),
214 "80 - bf => replacement character x 64");
215
216 List<int> allFirstTwoByteSeq = <int>[];
217 matchingReplacementChars = <int>[];
218 for (int i = 0xc0; i < 0xe0; i++) {
219 allFirstTwoByteSeq.addAll([i, 0x20]);
220 matchingReplacementChars.addAll(
221 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
222 }
223 Expect.listEquals(matchingReplacementChars,
224 utf8ToCodepoints(allFirstTwoByteSeq),
225 "c0 - df + space => replacement character + space x 32");
226
227 List<int> allFirstThreeByteSeq = <int>[];
228 matchingReplacementChars = <int>[];
229 for (int i = 0xe0; i < 0xf0; i++) {
230 allFirstThreeByteSeq.addAll([i, 0x20]);
231 matchingReplacementChars.addAll(
232 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
233 }
234 Expect.listEquals(matchingReplacementChars,
235 utf8ToCodepoints(allFirstThreeByteSeq),
236 "e0 - ef + space => replacement character x 16");
237
238 List<int> allFirstFourByteSeq = <int>[];
239 matchingReplacementChars = <int>[];
240 for (int i = 0xf0; i < 0xf8; i++) {
241 allFirstFourByteSeq.addAll([i, 0x20]);
242 matchingReplacementChars.addAll(
243 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
244 }
245 Expect.listEquals(matchingReplacementChars,
246 utf8ToCodepoints(allFirstFourByteSeq),
247 "f0 - f7 + space => replacement character x 8");
248
249 List<int> allFirstFiveByteSeq = <int>[];
250 matchingReplacementChars = <int>[];
251 for (int i = 0xf8; i < 0xfc; i++) {
252 allFirstFiveByteSeq.addAll([i, 0x20]);
253 matchingReplacementChars.addAll(
254 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
255 }
256 Expect.listEquals(matchingReplacementChars,
257 utf8ToCodepoints(allFirstFiveByteSeq),
258 "f8 - fb + space => replacement character x 4");
259
260 List<int> allFirstSixByteSeq = <int>[];
261 matchingReplacementChars = <int>[];
262 for (int i = 0xfc; i < 0xfe; i++) {
263 allFirstSixByteSeq.addAll([i, 0x20]);
264 matchingReplacementChars.addAll(
265 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
266 }
267 Expect.listEquals(matchingReplacementChars,
268 utf8ToCodepoints(allFirstSixByteSeq),
269 "fc - fd + space => replacement character x 2");
270
271 // Sequences with last continuation byte missing
272 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
273 utf8ToCodepoints([0xc2]),
274 "2-byte sequence with last byte missing");
275 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
276 utf8ToCodepoints([0xe0, 0x80]),
277 "3-byte sequence with last byte missing");
278 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
279 utf8ToCodepoints([0xf0, 0x80, 0x80]),
280 "4-byte sequence with last byte missing");
281 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
282 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80]),
283 "5-byte sequence with last byte missing");
284 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
285 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80]),
286 "6-byte sequence with last byte missing");
287
288 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
289 utf8ToCodepoints([0xdf]),
290 "2-byte sequence with last byte missing (hi)");
291 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
292 utf8ToCodepoints([0xef, 0xbf]),
293 "3-byte sequence with last byte missing (hi)");
294 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
295 utf8ToCodepoints([0xf7, 0xbf, 0xbf]),
296 "4-byte sequence with last byte missing (hi)");
297 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
298 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf]),
299 "5-byte sequence with last byte missing (hi)");
300 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
301 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]),
302 "6-byte sequence with last byte missing (hi)");
303
304 // Concatenation of incomplete sequences
305 Expect.listEquals(
306 [ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
307 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
308 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
309 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
310 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
311 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
312 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
313 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
314 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
315 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT ],
316 utf8ToCodepoints(
317 [ 0xc2,
318 0xe0, 0x80,
319 0xf0, 0x80, 0x80,
320 0xf8, 0x88, 0x80, 0x80,
321 0xfc, 0x80, 0x80, 0x80, 0x80,
322 0xdf,
323 0xef, 0xbf,
324 0xf7, 0xbf, 0xbf,
325 0xfb, 0xbf, 0xbf, 0xbf,
326 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ]),
327 "Concatenation of incomplete sequences");
328
329 // Impossible bytes
330 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
331 utf8ToCodepoints([0xfe]), "fe");
332 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
333 utf8ToCodepoints([0xff]), "ff");
334 Expect.listEquals([
335 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
336 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
337 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
338 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
339 utf8ToCodepoints([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff");
340
341 // Overlong sequences
342 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
343 utf8ToCodepoints([0xc0, 0xaf]), "c0 af");
344 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
345 utf8ToCodepoints([0xe0, 0x80, 0xaf]), "e0 80 af");
346 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
347 utf8ToCodepoints([0xf0, 0x80, 0x80, 0xaf]), "f0 80 80 af");
348 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
349 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0xaf]), "f8 80 80 80 af");
350 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
351 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]),
352 "fc 80 80 80 80 af");
353
354 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
355 utf8ToCodepoints([0xc1, 0xbf]), "c1 bf");
356 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
357 utf8ToCodepoints([0xe0, 0x9f, 0xbf]), "e0 9f bf");
358 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
359 utf8ToCodepoints([0xf0, 0x8f, 0xbf, 0xbf]), "f0 8f bf bf");
360 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
361 utf8ToCodepoints([0xf8, 0x87, 0xbf, 0xbf, 0xbf]), "f8 87 bf bf bf");
362 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
363 utf8ToCodepoints([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]),
364 "fc 83 bf bf bf bf");
365
366 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
367 utf8ToCodepoints([0xc0, 0x80]), "c0 80");
368 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
369 utf8ToCodepoints([0xe0, 0x80, 0x80]), "e0 80 80");
370 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
371 utf8ToCodepoints([0xf0, 0x80, 0x80, 0x80]), "f0 80 80 80");
372 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
373 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0x80]), "f8 80 80 80 80");
374 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
375 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]),
376 "fc 80 80 80 80 80");
377
378 // Illegal code positions
379 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
380 utf8ToCodepoints([0xed, 0xa0, 0x80]), "U+D800");
381 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
382 utf8ToCodepoints([0xed, 0xad, 0xbf]), "U+DB7F");
383 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
384 utf8ToCodepoints([0xed, 0xae, 0x80]), "U+DB80");
385 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
386 utf8ToCodepoints([0xed, 0xaf, 0xbf]), "U+DBFF");
387 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
388 utf8ToCodepoints([0xed, 0xb0, 0x80]), "U+DC00");
389 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
390 utf8ToCodepoints([0xed, 0xbe, 0x80]), "U+DF80");
391 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
392 utf8ToCodepoints([0xed, 0xbf, 0xbf]), "U+DFFF");
393
394 // Paired UTF-16 surrogates
395 Expect.listEquals([
396 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
397 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
398 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80]),
399 "U+D800 U+DC00");
400 Expect.listEquals([
401 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
402 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
403 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf]),
404 "U+D800 U+DFFF");
405 Expect.listEquals([
406 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
407 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
408 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80]),
409 "U+DB7F U+DC00");
410 Expect.listEquals([
411 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
412 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
413 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf]),
414 "U+DB7F U+DFFF");
415 Expect.listEquals([
416 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
417 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
418 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xb0, 0x80]),
419 "U+DB80 U+DC00");
420 Expect.listEquals([
421 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
422 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
423 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf]),
424 "U+DB80 U+DFFF");
425 Expect.listEquals([
426 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
427 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
428 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80]),
429 "U+DBFF U+DC00");
430 Expect.listEquals([
431 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
432 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
433 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf]),
434 "U+DBFF U+DFFF");
435
436 // Other illegal code positions (???)
437 Expect.listEquals([0xfffe], utf8ToCodepoints([0xef, 0xbf, 0xbe]),
438 "U+FFFE");
439 Expect.listEquals([0xffff], utf8ToCodepoints([0xef, 0xbf, 0xbf]),
440 "U+FFFF");
441 }
442
443 void testUtf8BytesToString() {
444 Expect.stringEquals(testEnglishPhrase,
445 decodeUtf8(testEnglishUtf8), "English");
446
447 Expect.stringEquals(testDanishPhrase,
448 decodeUtf8(testDanishUtf8), "Danish");
449
450 Expect.stringEquals(testHebrewPhrase,
451 decodeUtf8(testHebrewUtf8), "Hebrew");
452
453 Expect.stringEquals(testRussianPhrase,
454 decodeUtf8(testRussianUtf8), "Russian");
455
456 Expect.stringEquals(testGreekPhrase,
457 decodeUtf8(testGreekUtf8), "Greek");
458
459 Expect.stringEquals(testKatakanaPhrase,
460 decodeUtf8(testKatakanaUtf8), "Katakana");
461 }
462
463 void testIterableMethods() {
464 IterableUtf8Decoder englishDecoder = decodeUtf8AsIterable(testEnglishUtf8);
465 // get the first character
466 Expect.equals(testEnglishUtf8[0], englishDecoder.first);
467 // get the whole translation using the Iterable interface
468 Expect.stringEquals(testEnglishPhrase,
469 new String.fromCharCodes(new List<int>.from(englishDecoder)));
470
471 IterableUtf8Decoder kataDecoder = decodeUtf8AsIterable(testKatakanaUtf8);
472 // get the first character
473 Expect.equals(testKatakanaPhrase.codeUnits[0], kataDecoder.first);
474 // get the whole translation using the Iterable interface
475 Expect.stringEquals(testKatakanaPhrase,
476 new String.fromCharCodes(new List<int>.from(kataDecoder)));
477 }
478 }
OLDNEW
« pkg/utf/test/utf32_test.dart ('K') | « utils/tests/string_encoding/utf8_benchmarks.dart ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698