Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(667)

Side by Side Diff: pkg/utf/test/utf82_test.dart

Issue 68563004: Move unicode tests to utf package. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Simplify test. Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 import 'package:expect/expect.dart';
6 import 'package:utf/utf.dart';
7
8 const String testEnglishPhrase =
9 "The quick brown fox jumps over the lazy dog.";
10
11 const List<int> testEnglishUtf8 = const<int> [
12 0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63,
13 0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20,
14 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70,
15 0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74,
16 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20,
17 0x64, 0x6f, 0x67, 0x2e];
18
19 const String testDanishPhrase = "Quizdeltagerne spiste jordbær med "
20 "fløde mens cirkusklovnen Wolther spillede på xylofon.";
21
22 const List<int> testDanishUtf8 = const<int>[
23 0x51, 0x75, 0x69, 0x7a, 0x64, 0x65, 0x6c, 0x74,
24 0x61, 0x67, 0x65, 0x72, 0x6e, 0x65, 0x20, 0x73,
25 0x70, 0x69, 0x73, 0x74, 0x65, 0x20, 0x6a, 0x6f,
26 0x72, 0x64, 0x62, 0xc3, 0xa6, 0x72, 0x20, 0x6d,
27 0x65, 0x64, 0x20, 0x66, 0x6c, 0xc3, 0xb8, 0x64,
28 0x65, 0x20, 0x6d, 0x65, 0x6e, 0x73, 0x20, 0x63,
29 0x69, 0x72, 0x6b, 0x75, 0x73, 0x6b, 0x6c, 0x6f,
30 0x76, 0x6e, 0x65, 0x6e, 0x20, 0x57, 0x6f, 0x6c,
31 0x74, 0x68, 0x65, 0x72, 0x20, 0x73, 0x70, 0x69,
32 0x6c, 0x6c, 0x65, 0x64, 0x65, 0x20, 0x70, 0xc3,
33 0xa5, 0x20, 0x78, 0x79, 0x6c, 0x6f, 0x66, 0x6f,
34 0x6e, 0x2e];
35
36 // unusual formatting due to strange editor interaction w/ text direction.
37 const String
38 testHebrewPhrase = "דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה";
39
40 const List<int> testHebrewUtf8 = const<int>[
41 0xd7, 0x93, 0xd7, 0x92, 0x20, 0xd7, 0xa1, 0xd7,
42 0xa7, 0xd7, 0xa8, 0xd7, 0x9f, 0x20, 0xd7, 0xa9,
43 0xd7, 0x98, 0x20, 0xd7, 0x91, 0xd7, 0x99, 0xd7,
44 0x9d, 0x20, 0xd7, 0x9e, 0xd7, 0x90, 0xd7, 0x95,
45 0xd7, 0x9b, 0xd7, 0x96, 0xd7, 0x91, 0x20, 0xd7,
46 0x95, 0xd7, 0x9c, 0xd7, 0xa4, 0xd7, 0xaa, 0xd7,
47 0xa2, 0x20, 0xd7, 0x9e, 0xd7, 0xa6, 0xd7, 0x90,
48 0x20, 0xd7, 0x9c, 0xd7, 0x95, 0x20, 0xd7, 0x97,
49 0xd7, 0x91, 0xd7, 0xa8, 0xd7, 0x94, 0x20, 0xd7,
50 0x90, 0xd7, 0x99, 0xd7, 0x9a, 0x20, 0xd7, 0x94,
51 0xd7, 0xa7, 0xd7, 0x9c, 0xd7, 0x99, 0xd7, 0x98,
52 0xd7, 0x94];
53
54 const String testRussianPhrase = "Съешь же ещё этих мягких "
55 "французских булок да выпей чаю";
56
57 const List<int> testRussianUtf8 = const<int>[
58 0xd0, 0xa1, 0xd1, 0x8a, 0xd0, 0xb5, 0xd1, 0x88,
59 0xd1, 0x8c, 0x20, 0xd0, 0xb6, 0xd0, 0xb5, 0x20,
60 0xd0, 0xb5, 0xd1, 0x89, 0xd1, 0x91, 0x20, 0xd1,
61 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0xd1, 0x85, 0x20,
62 0xd0, 0xbc, 0xd1, 0x8f, 0xd0, 0xb3, 0xd0, 0xba,
63 0xd0, 0xb8, 0xd1, 0x85, 0x20, 0xd1, 0x84, 0xd1,
64 0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd1, 0x86, 0xd1,
65 0x83, 0xd0, 0xb7, 0xd1, 0x81, 0xd0, 0xba, 0xd0,
66 0xb8, 0xd1, 0x85, 0x20, 0xd0, 0xb1, 0xd1, 0x83,
67 0xd0, 0xbb, 0xd0, 0xbe, 0xd0, 0xba, 0x20, 0xd0,
68 0xb4, 0xd0, 0xb0, 0x20, 0xd0, 0xb2, 0xd1, 0x8b,
69 0xd0, 0xbf, 0xd0, 0xb5, 0xd0, 0xb9, 0x20, 0xd1,
70 0x87, 0xd0, 0xb0, 0xd1, 0x8e];
71
72 const String testGreekPhrase = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ "
73 "στὸ χρυσαφὶ ξέφωτο";
74
75 const List<int> testGreekUtf8 = const<int>[
76 0xce, 0x93, 0xce, 0xb1, 0xce, 0xb6, 0xce, 0xad,
77 0xce, 0xb5, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce,
78 0xb1, 0xe1, 0xbd, 0xb6, 0x20, 0xce, 0xbc, 0xcf,
79 0x85, 0xcf, 0x81, 0xcf, 0x84, 0xce, 0xb9, 0xe1,
80 0xbd, 0xb2, 0xcf, 0x82, 0x20, 0xce, 0xb4, 0xe1,
81 0xbd, 0xb2, 0xce, 0xbd, 0x20, 0xce, 0xb8, 0xe1,
82 0xbd, 0xb0, 0x20, 0xce, 0xb2, 0xcf, 0x81, 0xe1,
83 0xbf, 0xb6, 0x20, 0xcf, 0x80, 0xce, 0xb9, 0xe1,
84 0xbd, 0xb0, 0x20, 0xcf, 0x83, 0xcf, 0x84, 0xe1,
85 0xbd, 0xb8, 0x20, 0xcf, 0x87, 0xcf, 0x81, 0xcf,
86 0x85, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x86, 0xe1,
87 0xbd, 0xb6, 0x20, 0xce, 0xbe, 0xce, 0xad, 0xcf,
88 0x86, 0xcf, 0x89, 0xcf, 0x84, 0xce, 0xbf];
89
90 const String testKatakanaPhrase = """
91 イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム
92 ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン""";
93
94 const List<int> testKatakanaUtf8 = const<int>[
95 0xe3, 0x82, 0xa4, 0xe3, 0x83, 0xad, 0xe3, 0x83,
96 0x8f, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0x9b, 0xe3,
97 0x83, 0x98, 0xe3, 0x83, 0x88, 0x20, 0xe3, 0x83,
98 0x81, 0xe3, 0x83, 0xaa, 0xe3, 0x83, 0x8c, 0xe3,
99 0x83, 0xab, 0xe3, 0x83, 0xb2, 0x20, 0xe3, 0x83,
100 0xaf, 0xe3, 0x82, 0xab, 0xe3, 0x83, 0xa8, 0xe3,
101 0x82, 0xbf, 0xe3, 0x83, 0xac, 0xe3, 0x82, 0xbd,
102 0x20, 0xe3, 0x83, 0x84, 0xe3, 0x83, 0x8d, 0xe3,
103 0x83, 0x8a, 0xe3, 0x83, 0xa9, 0xe3, 0x83, 0xa0,
104 0x0a, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb0, 0xe3,
105 0x83, 0x8e, 0xe3, 0x82, 0xaa, 0xe3, 0x82, 0xaf,
106 0xe3, 0x83, 0xa4, 0xe3, 0x83, 0x9e, 0x20, 0xe3,
107 0x82, 0xb1, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xb3,
108 0xe3, 0x82, 0xa8, 0xe3, 0x83, 0x86, 0x20, 0xe3,
109 0x82, 0xa2, 0xe3, 0x82, 0xb5, 0xe3, 0x82, 0xad,
110 0xe3, 0x83, 0xa6, 0xe3, 0x83, 0xa1, 0xe3, 0x83,
111 0x9f, 0xe3, 0x82, 0xb7, 0x20, 0xe3, 0x83, 0xb1,
112 0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa2, 0xe3, 0x82,
113 0xbb, 0xe3, 0x82, 0xb9, 0xe3, 0x83, 0xb3];
114
115 void main() {
116 testUtf8bytesToCodepoints();
117 testUtf8BytesToString();
118 testEncodeToUtf8();
119 testIterableMethods();
120 }
121
122 void testEncodeToUtf8() {
123 Expect.listEquals(testEnglishUtf8, encodeUtf8(testEnglishPhrase),
124 "english to utf8");
125
126 Expect.listEquals(testDanishUtf8, encodeUtf8(testDanishPhrase),
127 "encode danish to utf8");
128
129 Expect.listEquals(testHebrewUtf8, encodeUtf8(testHebrewPhrase),
130 "Hebrew to utf8");
131
132 Expect.listEquals(testRussianUtf8, encodeUtf8(testRussianPhrase),
133 "Russian to utf8");
134
135 Expect.listEquals(testGreekUtf8, encodeUtf8(testGreekPhrase),
136 "Greek to utf8");
137
138 Expect.listEquals(testKatakanaUtf8, encodeUtf8(testKatakanaPhrase),
139 "Katakana to utf8");
140 }
141
142 void testUtf8bytesToCodepoints() {
143 Expect.listEquals([954, 972, 963, 956, 949],
144 utf8ToCodepoints([0xce, 0xba, 0xcf, 0x8c, 0xcf,
145 0x83, 0xce, 0xbc, 0xce, 0xb5]), "κόσμε");
146
147 // boundary conditions: First possible sequence of a certain length
148 Expect.listEquals([], utf8ToCodepoints([]), "no input");
149 Expect.listEquals([0x0], utf8ToCodepoints([0x0]), "0");
150 Expect.listEquals([0x80], utf8ToCodepoints([0xc2, 0x80]), "80");
151 Expect.listEquals([0x800],
152 utf8ToCodepoints([0xe0, 0xa0, 0x80]), "800");
153 Expect.listEquals([0x10000],
154 utf8ToCodepoints([0xf0, 0x90, 0x80, 0x80]), "10000");
155 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
156 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80, 0x80]), "200000");
157 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
158 utf8ToCodepoints([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]),
159 "4000000");
160
161 // boundary conditions: Last possible sequence of a certain length
162 Expect.listEquals([0x7f], utf8ToCodepoints([0x7f]), "7f");
163 Expect.listEquals([0x7ff], utf8ToCodepoints([0xdf, 0xbf]), "7ff");
164 Expect.listEquals([0xffff],
165 utf8ToCodepoints([0xef, 0xbf, 0xbf]), "ffff");
166 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
167 utf8ToCodepoints([0xf7, 0xbf, 0xbf, 0xbf]), "1fffff");
168 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
169 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]), "3ffffff");
170 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
171 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]),
172 "4000000");
173
174 // other boundary conditions
175 Expect.listEquals([0xd7ff],
176 utf8ToCodepoints([0xed, 0x9f, 0xbf]), "d7ff");
177 Expect.listEquals([0xe000],
178 utf8ToCodepoints([0xee, 0x80, 0x80]), "e000");
179 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
180 utf8ToCodepoints([0xef, 0xbf, 0xbd]), "fffd");
181 Expect.listEquals([0x10ffff],
182 utf8ToCodepoints([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff");
183 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
184 utf8ToCodepoints([0xf4, 0x90, 0x80, 0x80]), "110000");
185
186 // unexpected continuation bytes
187 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
188 utf8ToCodepoints([0x80]), "80 => replacement character");
189 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
190 utf8ToCodepoints([0xbf]), "bf => replacement character");
191
192 List<int> allContinuationBytes = <int>[];
193 List<int> matchingReplacementChars = <int>[];
194 for (int i = 0x80; i < 0xc0; i++) {
195 allContinuationBytes.add(i);
196 matchingReplacementChars.add(UNICODE_REPLACEMENT_CHARACTER_CODEPOINT);
197 }
198 Expect.listEquals(matchingReplacementChars,
199 utf8ToCodepoints(allContinuationBytes),
200 "80 - bf => replacement character x 64");
201
202 List<int> allFirstTwoByteSeq = <int>[];
203 matchingReplacementChars = <int>[];
204 for (int i = 0xc0; i < 0xe0; i++) {
205 allFirstTwoByteSeq.addAll([i, 0x20]);
206 matchingReplacementChars.addAll(
207 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
208 }
209 Expect.listEquals(matchingReplacementChars,
210 utf8ToCodepoints(allFirstTwoByteSeq),
211 "c0 - df + space => replacement character + space x 32");
212
213 List<int> allFirstThreeByteSeq = <int>[];
214 matchingReplacementChars = <int>[];
215 for (int i = 0xe0; i < 0xf0; i++) {
216 allFirstThreeByteSeq.addAll([i, 0x20]);
217 matchingReplacementChars.addAll(
218 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
219 }
220 Expect.listEquals(matchingReplacementChars,
221 utf8ToCodepoints(allFirstThreeByteSeq),
222 "e0 - ef + space => replacement character x 16");
223
224 List<int> allFirstFourByteSeq = <int>[];
225 matchingReplacementChars = <int>[];
226 for (int i = 0xf0; i < 0xf8; i++) {
227 allFirstFourByteSeq.addAll([i, 0x20]);
228 matchingReplacementChars.addAll(
229 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
230 }
231 Expect.listEquals(matchingReplacementChars,
232 utf8ToCodepoints(allFirstFourByteSeq),
233 "f0 - f7 + space => replacement character x 8");
234
235 List<int> allFirstFiveByteSeq = <int>[];
236 matchingReplacementChars = <int>[];
237 for (int i = 0xf8; i < 0xfc; i++) {
238 allFirstFiveByteSeq.addAll([i, 0x20]);
239 matchingReplacementChars.addAll(
240 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
241 }
242 Expect.listEquals(matchingReplacementChars,
243 utf8ToCodepoints(allFirstFiveByteSeq),
244 "f8 - fb + space => replacement character x 4");
245
246 List<int> allFirstSixByteSeq = <int>[];
247 matchingReplacementChars = <int>[];
248 for (int i = 0xfc; i < 0xfe; i++) {
249 allFirstSixByteSeq.addAll([i, 0x20]);
250 matchingReplacementChars.addAll(
251 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
252 }
253 Expect.listEquals(matchingReplacementChars,
254 utf8ToCodepoints(allFirstSixByteSeq),
255 "fc - fd + space => replacement character x 2");
256
257 // Sequences with last continuation byte missing
258 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
259 utf8ToCodepoints([0xc2]),
260 "2-byte sequence with last byte missing");
261 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
262 utf8ToCodepoints([0xe0, 0x80]),
263 "3-byte sequence with last byte missing");
264 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
265 utf8ToCodepoints([0xf0, 0x80, 0x80]),
266 "4-byte sequence with last byte missing");
267 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
268 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80]),
269 "5-byte sequence with last byte missing");
270 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
271 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80]),
272 "6-byte sequence with last byte missing");
273
274 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
275 utf8ToCodepoints([0xdf]),
276 "2-byte sequence with last byte missing (hi)");
277 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
278 utf8ToCodepoints([0xef, 0xbf]),
279 "3-byte sequence with last byte missing (hi)");
280 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
281 utf8ToCodepoints([0xf7, 0xbf, 0xbf]),
282 "4-byte sequence with last byte missing (hi)");
283 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
284 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf]),
285 "5-byte sequence with last byte missing (hi)");
286 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
287 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]),
288 "6-byte sequence with last byte missing (hi)");
289
290 // Concatenation of incomplete sequences
291 Expect.listEquals(
292 [ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
293 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
294 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
295 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
296 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
297 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
298 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
299 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
300 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
301 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT ],
302 utf8ToCodepoints(
303 [ 0xc2,
304 0xe0, 0x80,
305 0xf0, 0x80, 0x80,
306 0xf8, 0x88, 0x80, 0x80,
307 0xfc, 0x80, 0x80, 0x80, 0x80,
308 0xdf,
309 0xef, 0xbf,
310 0xf7, 0xbf, 0xbf,
311 0xfb, 0xbf, 0xbf, 0xbf,
312 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ]),
313 "Concatenation of incomplete sequences");
314
315 // Impossible bytes
316 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
317 utf8ToCodepoints([0xfe]), "fe");
318 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
319 utf8ToCodepoints([0xff]), "ff");
320 Expect.listEquals([
321 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
322 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
323 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
324 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
325 utf8ToCodepoints([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff");
326
327 // Overlong sequences
328 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
329 utf8ToCodepoints([0xc0, 0xaf]), "c0 af");
330 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
331 utf8ToCodepoints([0xe0, 0x80, 0xaf]), "e0 80 af");
332 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
333 utf8ToCodepoints([0xf0, 0x80, 0x80, 0xaf]), "f0 80 80 af");
334 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
335 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0xaf]), "f8 80 80 80 af");
336 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
337 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]),
338 "fc 80 80 80 80 af");
339
340 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
341 utf8ToCodepoints([0xc1, 0xbf]), "c1 bf");
342 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
343 utf8ToCodepoints([0xe0, 0x9f, 0xbf]), "e0 9f bf");
344 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
345 utf8ToCodepoints([0xf0, 0x8f, 0xbf, 0xbf]), "f0 8f bf bf");
346 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
347 utf8ToCodepoints([0xf8, 0x87, 0xbf, 0xbf, 0xbf]), "f8 87 bf bf bf");
348 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
349 utf8ToCodepoints([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]),
350 "fc 83 bf bf bf bf");
351
352 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
353 utf8ToCodepoints([0xc0, 0x80]), "c0 80");
354 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
355 utf8ToCodepoints([0xe0, 0x80, 0x80]), "e0 80 80");
356 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
357 utf8ToCodepoints([0xf0, 0x80, 0x80, 0x80]), "f0 80 80 80");
358 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
359 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0x80]), "f8 80 80 80 80");
360 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
361 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]),
362 "fc 80 80 80 80 80");
363
364 // Illegal code positions
365 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
366 utf8ToCodepoints([0xed, 0xa0, 0x80]), "U+D800");
367 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
368 utf8ToCodepoints([0xed, 0xad, 0xbf]), "U+DB7F");
369 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
370 utf8ToCodepoints([0xed, 0xae, 0x80]), "U+DB80");
371 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
372 utf8ToCodepoints([0xed, 0xaf, 0xbf]), "U+DBFF");
373 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
374 utf8ToCodepoints([0xed, 0xb0, 0x80]), "U+DC00");
375 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
376 utf8ToCodepoints([0xed, 0xbe, 0x80]), "U+DF80");
377 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
378 utf8ToCodepoints([0xed, 0xbf, 0xbf]), "U+DFFF");
379
380 // Paired UTF-16 surrogates
381 Expect.listEquals([
382 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
383 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
384 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80]),
385 "U+D800 U+DC00");
386 Expect.listEquals([
387 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
388 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
389 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf]),
390 "U+D800 U+DFFF");
391 Expect.listEquals([
392 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
393 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
394 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80]),
395 "U+DB7F U+DC00");
396 Expect.listEquals([
397 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
398 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
399 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf]),
400 "U+DB7F U+DFFF");
401 Expect.listEquals([
402 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
403 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
404 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xb0, 0x80]),
405 "U+DB80 U+DC00");
406 Expect.listEquals([
407 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
408 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
409 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf]),
410 "U+DB80 U+DFFF");
411 Expect.listEquals([
412 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
413 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
414 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80]),
415 "U+DBFF U+DC00");
416 Expect.listEquals([
417 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
418 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
419 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf]),
420 "U+DBFF U+DFFF");
421
422 // Other illegal code positions (???)
423 Expect.listEquals([0xfffe], utf8ToCodepoints([0xef, 0xbf, 0xbe]),
424 "U+FFFE");
425 Expect.listEquals([0xffff], utf8ToCodepoints([0xef, 0xbf, 0xbf]),
426 "U+FFFF");
427 }
428
429 void testUtf8BytesToString() {
430 Expect.stringEquals(testEnglishPhrase,
431 decodeUtf8(testEnglishUtf8), "English");
432
433 Expect.stringEquals(testDanishPhrase,
434 decodeUtf8(testDanishUtf8), "Danish");
435
436 Expect.stringEquals(testHebrewPhrase,
437 decodeUtf8(testHebrewUtf8), "Hebrew");
438
439 Expect.stringEquals(testRussianPhrase,
440 decodeUtf8(testRussianUtf8), "Russian");
441
442 Expect.stringEquals(testGreekPhrase,
443 decodeUtf8(testGreekUtf8), "Greek");
444
445 Expect.stringEquals(testKatakanaPhrase,
446 decodeUtf8(testKatakanaUtf8), "Katakana");
447 }
448
449 void testIterableMethods() {
450 IterableUtf8Decoder englishDecoder = decodeUtf8AsIterable(testEnglishUtf8);
451 // get the first character
452 Expect.equals(testEnglishUtf8[0], englishDecoder.first);
453 // get the whole translation using the Iterable interface
454 Expect.stringEquals(testEnglishPhrase,
455 new String.fromCharCodes(new List<int>.from(englishDecoder)));
456
457 IterableUtf8Decoder kataDecoder = decodeUtf8AsIterable(testKatakanaUtf8);
458 // get the first character
459 Expect.equals(testKatakanaPhrase.codeUnits[0], kataDecoder.first);
460 // get the whole translation using the Iterable interface
461 Expect.stringEquals(testKatakanaPhrase,
462 new String.fromCharCodes(new List<int>.from(kataDecoder)));
463 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698