Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(51)

Side by Side Diff: packages/utf/test/utf82_test.dart

Issue 1400473008: Roll Observatory packages and add a roll script (Closed) Base URL: git@github.com:dart-lang/observatory_pub_packages.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « packages/utf/test/utf32_test.dart ('k') | packages/utf/test/utf8_test.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 library utf.utf82_test;
6
7 import 'package:expect/expect.dart';
8 import 'package:utf/utf.dart';
9
10 const String testEnglishPhrase =
11 "The quick brown fox jumps over the lazy dog.";
12
13 const List<int> testEnglishUtf8 = const<int> [
14 0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63,
15 0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20,
16 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70,
17 0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74,
18 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20,
19 0x64, 0x6f, 0x67, 0x2e];
20
21 const String testDanishPhrase = "Quizdeltagerne spiste jordbær med "
22 "fløde mens cirkusklovnen Wolther spillede på xylofon.";
23
24 const List<int> testDanishUtf8 = const<int>[
25 0x51, 0x75, 0x69, 0x7a, 0x64, 0x65, 0x6c, 0x74,
26 0x61, 0x67, 0x65, 0x72, 0x6e, 0x65, 0x20, 0x73,
27 0x70, 0x69, 0x73, 0x74, 0x65, 0x20, 0x6a, 0x6f,
28 0x72, 0x64, 0x62, 0xc3, 0xa6, 0x72, 0x20, 0x6d,
29 0x65, 0x64, 0x20, 0x66, 0x6c, 0xc3, 0xb8, 0x64,
30 0x65, 0x20, 0x6d, 0x65, 0x6e, 0x73, 0x20, 0x63,
31 0x69, 0x72, 0x6b, 0x75, 0x73, 0x6b, 0x6c, 0x6f,
32 0x76, 0x6e, 0x65, 0x6e, 0x20, 0x57, 0x6f, 0x6c,
33 0x74, 0x68, 0x65, 0x72, 0x20, 0x73, 0x70, 0x69,
34 0x6c, 0x6c, 0x65, 0x64, 0x65, 0x20, 0x70, 0xc3,
35 0xa5, 0x20, 0x78, 0x79, 0x6c, 0x6f, 0x66, 0x6f,
36 0x6e, 0x2e];
37
38 // unusual formatting due to strange editor interaction w/ text direction.
39 const String
40 testHebrewPhrase = "דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה";
41
42 const List<int> testHebrewUtf8 = const<int>[
43 0xd7, 0x93, 0xd7, 0x92, 0x20, 0xd7, 0xa1, 0xd7,
44 0xa7, 0xd7, 0xa8, 0xd7, 0x9f, 0x20, 0xd7, 0xa9,
45 0xd7, 0x98, 0x20, 0xd7, 0x91, 0xd7, 0x99, 0xd7,
46 0x9d, 0x20, 0xd7, 0x9e, 0xd7, 0x90, 0xd7, 0x95,
47 0xd7, 0x9b, 0xd7, 0x96, 0xd7, 0x91, 0x20, 0xd7,
48 0x95, 0xd7, 0x9c, 0xd7, 0xa4, 0xd7, 0xaa, 0xd7,
49 0xa2, 0x20, 0xd7, 0x9e, 0xd7, 0xa6, 0xd7, 0x90,
50 0x20, 0xd7, 0x9c, 0xd7, 0x95, 0x20, 0xd7, 0x97,
51 0xd7, 0x91, 0xd7, 0xa8, 0xd7, 0x94, 0x20, 0xd7,
52 0x90, 0xd7, 0x99, 0xd7, 0x9a, 0x20, 0xd7, 0x94,
53 0xd7, 0xa7, 0xd7, 0x9c, 0xd7, 0x99, 0xd7, 0x98,
54 0xd7, 0x94];
55
56 const String testRussianPhrase = "Съешь же ещё этих мягких "
57 "французских булок да выпей чаю";
58
59 const List<int> testRussianUtf8 = const<int>[
60 0xd0, 0xa1, 0xd1, 0x8a, 0xd0, 0xb5, 0xd1, 0x88,
61 0xd1, 0x8c, 0x20, 0xd0, 0xb6, 0xd0, 0xb5, 0x20,
62 0xd0, 0xb5, 0xd1, 0x89, 0xd1, 0x91, 0x20, 0xd1,
63 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0xd1, 0x85, 0x20,
64 0xd0, 0xbc, 0xd1, 0x8f, 0xd0, 0xb3, 0xd0, 0xba,
65 0xd0, 0xb8, 0xd1, 0x85, 0x20, 0xd1, 0x84, 0xd1,
66 0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd1, 0x86, 0xd1,
67 0x83, 0xd0, 0xb7, 0xd1, 0x81, 0xd0, 0xba, 0xd0,
68 0xb8, 0xd1, 0x85, 0x20, 0xd0, 0xb1, 0xd1, 0x83,
69 0xd0, 0xbb, 0xd0, 0xbe, 0xd0, 0xba, 0x20, 0xd0,
70 0xb4, 0xd0, 0xb0, 0x20, 0xd0, 0xb2, 0xd1, 0x8b,
71 0xd0, 0xbf, 0xd0, 0xb5, 0xd0, 0xb9, 0x20, 0xd1,
72 0x87, 0xd0, 0xb0, 0xd1, 0x8e];
73
74 const String testGreekPhrase = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ "
75 "στὸ χρυσαφὶ ξέφωτο";
76
77 const List<int> testGreekUtf8 = const<int>[
78 0xce, 0x93, 0xce, 0xb1, 0xce, 0xb6, 0xce, 0xad,
79 0xce, 0xb5, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce,
80 0xb1, 0xe1, 0xbd, 0xb6, 0x20, 0xce, 0xbc, 0xcf,
81 0x85, 0xcf, 0x81, 0xcf, 0x84, 0xce, 0xb9, 0xe1,
82 0xbd, 0xb2, 0xcf, 0x82, 0x20, 0xce, 0xb4, 0xe1,
83 0xbd, 0xb2, 0xce, 0xbd, 0x20, 0xce, 0xb8, 0xe1,
84 0xbd, 0xb0, 0x20, 0xce, 0xb2, 0xcf, 0x81, 0xe1,
85 0xbf, 0xb6, 0x20, 0xcf, 0x80, 0xce, 0xb9, 0xe1,
86 0xbd, 0xb0, 0x20, 0xcf, 0x83, 0xcf, 0x84, 0xe1,
87 0xbd, 0xb8, 0x20, 0xcf, 0x87, 0xcf, 0x81, 0xcf,
88 0x85, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x86, 0xe1,
89 0xbd, 0xb6, 0x20, 0xce, 0xbe, 0xce, 0xad, 0xcf,
90 0x86, 0xcf, 0x89, 0xcf, 0x84, 0xce, 0xbf];
91
92 const String testKatakanaPhrase = """
93 イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム
94 ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン""";
95
96 const List<int> testKatakanaUtf8 = const<int>[
97 0xe3, 0x82, 0xa4, 0xe3, 0x83, 0xad, 0xe3, 0x83,
98 0x8f, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0x9b, 0xe3,
99 0x83, 0x98, 0xe3, 0x83, 0x88, 0x20, 0xe3, 0x83,
100 0x81, 0xe3, 0x83, 0xaa, 0xe3, 0x83, 0x8c, 0xe3,
101 0x83, 0xab, 0xe3, 0x83, 0xb2, 0x20, 0xe3, 0x83,
102 0xaf, 0xe3, 0x82, 0xab, 0xe3, 0x83, 0xa8, 0xe3,
103 0x82, 0xbf, 0xe3, 0x83, 0xac, 0xe3, 0x82, 0xbd,
104 0x20, 0xe3, 0x83, 0x84, 0xe3, 0x83, 0x8d, 0xe3,
105 0x83, 0x8a, 0xe3, 0x83, 0xa9, 0xe3, 0x83, 0xa0,
106 0x0a, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb0, 0xe3,
107 0x83, 0x8e, 0xe3, 0x82, 0xaa, 0xe3, 0x82, 0xaf,
108 0xe3, 0x83, 0xa4, 0xe3, 0x83, 0x9e, 0x20, 0xe3,
109 0x82, 0xb1, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xb3,
110 0xe3, 0x82, 0xa8, 0xe3, 0x83, 0x86, 0x20, 0xe3,
111 0x82, 0xa2, 0xe3, 0x82, 0xb5, 0xe3, 0x82, 0xad,
112 0xe3, 0x83, 0xa6, 0xe3, 0x83, 0xa1, 0xe3, 0x83,
113 0x9f, 0xe3, 0x82, 0xb7, 0x20, 0xe3, 0x83, 0xb1,
114 0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa2, 0xe3, 0x82,
115 0xbb, 0xe3, 0x82, 0xb9, 0xe3, 0x83, 0xb3];
116
117 void main() {
118 testUtf8bytesToCodepoints();
119 testUtf8BytesToString();
120 testEncodeToUtf8();
121 testIterableMethods();
122 }
123
124 void testEncodeToUtf8() {
125 Expect.listEquals(testEnglishUtf8, encodeUtf8(testEnglishPhrase),
126 "english to utf8");
127
128 Expect.listEquals(testDanishUtf8, encodeUtf8(testDanishPhrase),
129 "encode danish to utf8");
130
131 Expect.listEquals(testHebrewUtf8, encodeUtf8(testHebrewPhrase),
132 "Hebrew to utf8");
133
134 Expect.listEquals(testRussianUtf8, encodeUtf8(testRussianPhrase),
135 "Russian to utf8");
136
137 Expect.listEquals(testGreekUtf8, encodeUtf8(testGreekPhrase),
138 "Greek to utf8");
139
140 Expect.listEquals(testKatakanaUtf8, encodeUtf8(testKatakanaPhrase),
141 "Katakana to utf8");
142 }
143
144 void testUtf8bytesToCodepoints() {
145 Expect.listEquals([954, 972, 963, 956, 949],
146 utf8ToCodepoints([0xce, 0xba, 0xcf, 0x8c, 0xcf,
147 0x83, 0xce, 0xbc, 0xce, 0xb5]), "κόσμε");
148
149 // boundary conditions: First possible sequence of a certain length
150 Expect.listEquals([], utf8ToCodepoints([]), "no input");
151 Expect.listEquals([0x0], utf8ToCodepoints([0x0]), "0");
152 Expect.listEquals([0x80], utf8ToCodepoints([0xc2, 0x80]), "80");
153 Expect.listEquals([0x800],
154 utf8ToCodepoints([0xe0, 0xa0, 0x80]), "800");
155 Expect.listEquals([0x10000],
156 utf8ToCodepoints([0xf0, 0x90, 0x80, 0x80]), "10000");
157 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
158 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80, 0x80]), "200000");
159 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
160 utf8ToCodepoints([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]),
161 "4000000");
162
163 // boundary conditions: Last possible sequence of a certain length
164 Expect.listEquals([0x7f], utf8ToCodepoints([0x7f]), "7f");
165 Expect.listEquals([0x7ff], utf8ToCodepoints([0xdf, 0xbf]), "7ff");
166 Expect.listEquals([0xffff],
167 utf8ToCodepoints([0xef, 0xbf, 0xbf]), "ffff");
168 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
169 utf8ToCodepoints([0xf7, 0xbf, 0xbf, 0xbf]), "1fffff");
170 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
171 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]), "3ffffff");
172 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
173 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]),
174 "4000000");
175
176 // other boundary conditions
177 Expect.listEquals([0xd7ff],
178 utf8ToCodepoints([0xed, 0x9f, 0xbf]), "d7ff");
179 Expect.listEquals([0xe000],
180 utf8ToCodepoints([0xee, 0x80, 0x80]), "e000");
181 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
182 utf8ToCodepoints([0xef, 0xbf, 0xbd]), "fffd");
183 Expect.listEquals([0x10ffff],
184 utf8ToCodepoints([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff");
185 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
186 utf8ToCodepoints([0xf4, 0x90, 0x80, 0x80]), "110000");
187
188 // unexpected continuation bytes
189 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
190 utf8ToCodepoints([0x80]), "80 => replacement character");
191 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
192 utf8ToCodepoints([0xbf]), "bf => replacement character");
193
194 List<int> allContinuationBytes = <int>[];
195 List<int> matchingReplacementChars = <int>[];
196 for (int i = 0x80; i < 0xc0; i++) {
197 allContinuationBytes.add(i);
198 matchingReplacementChars.add(UNICODE_REPLACEMENT_CHARACTER_CODEPOINT);
199 }
200 Expect.listEquals(matchingReplacementChars,
201 utf8ToCodepoints(allContinuationBytes),
202 "80 - bf => replacement character x 64");
203
204 List<int> allFirstTwoByteSeq = <int>[];
205 matchingReplacementChars = <int>[];
206 for (int i = 0xc0; i < 0xe0; i++) {
207 allFirstTwoByteSeq.addAll([i, 0x20]);
208 matchingReplacementChars.addAll(
209 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
210 }
211 Expect.listEquals(matchingReplacementChars,
212 utf8ToCodepoints(allFirstTwoByteSeq),
213 "c0 - df + space => replacement character + space x 32");
214
215 List<int> allFirstThreeByteSeq = <int>[];
216 matchingReplacementChars = <int>[];
217 for (int i = 0xe0; i < 0xf0; i++) {
218 allFirstThreeByteSeq.addAll([i, 0x20]);
219 matchingReplacementChars.addAll(
220 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
221 }
222 Expect.listEquals(matchingReplacementChars,
223 utf8ToCodepoints(allFirstThreeByteSeq),
224 "e0 - ef + space => replacement character x 16");
225
226 List<int> allFirstFourByteSeq = <int>[];
227 matchingReplacementChars = <int>[];
228 for (int i = 0xf0; i < 0xf8; i++) {
229 allFirstFourByteSeq.addAll([i, 0x20]);
230 matchingReplacementChars.addAll(
231 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
232 }
233 Expect.listEquals(matchingReplacementChars,
234 utf8ToCodepoints(allFirstFourByteSeq),
235 "f0 - f7 + space => replacement character x 8");
236
237 List<int> allFirstFiveByteSeq = <int>[];
238 matchingReplacementChars = <int>[];
239 for (int i = 0xf8; i < 0xfc; i++) {
240 allFirstFiveByteSeq.addAll([i, 0x20]);
241 matchingReplacementChars.addAll(
242 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
243 }
244 Expect.listEquals(matchingReplacementChars,
245 utf8ToCodepoints(allFirstFiveByteSeq),
246 "f8 - fb + space => replacement character x 4");
247
248 List<int> allFirstSixByteSeq = <int>[];
249 matchingReplacementChars = <int>[];
250 for (int i = 0xfc; i < 0xfe; i++) {
251 allFirstSixByteSeq.addAll([i, 0x20]);
252 matchingReplacementChars.addAll(
253 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
254 }
255 Expect.listEquals(matchingReplacementChars,
256 utf8ToCodepoints(allFirstSixByteSeq),
257 "fc - fd + space => replacement character x 2");
258
259 // Sequences with last continuation byte missing
260 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
261 utf8ToCodepoints([0xc2]),
262 "2-byte sequence with last byte missing");
263 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
264 utf8ToCodepoints([0xe0, 0x80]),
265 "3-byte sequence with last byte missing");
266 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
267 utf8ToCodepoints([0xf0, 0x80, 0x80]),
268 "4-byte sequence with last byte missing");
269 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
270 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80]),
271 "5-byte sequence with last byte missing");
272 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
273 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80]),
274 "6-byte sequence with last byte missing");
275
276 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
277 utf8ToCodepoints([0xdf]),
278 "2-byte sequence with last byte missing (hi)");
279 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
280 utf8ToCodepoints([0xef, 0xbf]),
281 "3-byte sequence with last byte missing (hi)");
282 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
283 utf8ToCodepoints([0xf7, 0xbf, 0xbf]),
284 "4-byte sequence with last byte missing (hi)");
285 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
286 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf]),
287 "5-byte sequence with last byte missing (hi)");
288 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
289 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]),
290 "6-byte sequence with last byte missing (hi)");
291
292 // Concatenation of incomplete sequences
293 Expect.listEquals(
294 [ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
295 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
296 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
297 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
298 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
299 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
300 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
301 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
302 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
303 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT ],
304 utf8ToCodepoints(
305 [ 0xc2,
306 0xe0, 0x80,
307 0xf0, 0x80, 0x80,
308 0xf8, 0x88, 0x80, 0x80,
309 0xfc, 0x80, 0x80, 0x80, 0x80,
310 0xdf,
311 0xef, 0xbf,
312 0xf7, 0xbf, 0xbf,
313 0xfb, 0xbf, 0xbf, 0xbf,
314 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ]),
315 "Concatenation of incomplete sequences");
316
317 // Impossible bytes
318 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
319 utf8ToCodepoints([0xfe]), "fe");
320 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
321 utf8ToCodepoints([0xff]), "ff");
322 Expect.listEquals([
323 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
324 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
325 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
326 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
327 utf8ToCodepoints([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff");
328
329 // Overlong sequences
330 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
331 utf8ToCodepoints([0xc0, 0xaf]), "c0 af");
332 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
333 utf8ToCodepoints([0xe0, 0x80, 0xaf]), "e0 80 af");
334 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
335 utf8ToCodepoints([0xf0, 0x80, 0x80, 0xaf]), "f0 80 80 af");
336 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
337 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0xaf]), "f8 80 80 80 af");
338 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
339 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]),
340 "fc 80 80 80 80 af");
341
342 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
343 utf8ToCodepoints([0xc1, 0xbf]), "c1 bf");
344 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
345 utf8ToCodepoints([0xe0, 0x9f, 0xbf]), "e0 9f bf");
346 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
347 utf8ToCodepoints([0xf0, 0x8f, 0xbf, 0xbf]), "f0 8f bf bf");
348 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
349 utf8ToCodepoints([0xf8, 0x87, 0xbf, 0xbf, 0xbf]), "f8 87 bf bf bf");
350 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
351 utf8ToCodepoints([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]),
352 "fc 83 bf bf bf bf");
353
354 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
355 utf8ToCodepoints([0xc0, 0x80]), "c0 80");
356 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
357 utf8ToCodepoints([0xe0, 0x80, 0x80]), "e0 80 80");
358 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
359 utf8ToCodepoints([0xf0, 0x80, 0x80, 0x80]), "f0 80 80 80");
360 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
361 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0x80]), "f8 80 80 80 80");
362 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
363 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]),
364 "fc 80 80 80 80 80");
365
366 // Illegal code positions
367 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
368 utf8ToCodepoints([0xed, 0xa0, 0x80]), "U+D800");
369 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
370 utf8ToCodepoints([0xed, 0xad, 0xbf]), "U+DB7F");
371 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
372 utf8ToCodepoints([0xed, 0xae, 0x80]), "U+DB80");
373 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
374 utf8ToCodepoints([0xed, 0xaf, 0xbf]), "U+DBFF");
375 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
376 utf8ToCodepoints([0xed, 0xb0, 0x80]), "U+DC00");
377 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
378 utf8ToCodepoints([0xed, 0xbe, 0x80]), "U+DF80");
379 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
380 utf8ToCodepoints([0xed, 0xbf, 0xbf]), "U+DFFF");
381
382 // Paired UTF-16 surrogates
383 Expect.listEquals([
384 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
385 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
386 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80]),
387 "U+D800 U+DC00");
388 Expect.listEquals([
389 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
390 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
391 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf]),
392 "U+D800 U+DFFF");
393 Expect.listEquals([
394 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
395 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
396 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80]),
397 "U+DB7F U+DC00");
398 Expect.listEquals([
399 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
400 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
401 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf]),
402 "U+DB7F U+DFFF");
403 Expect.listEquals([
404 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
405 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
406 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xb0, 0x80]),
407 "U+DB80 U+DC00");
408 Expect.listEquals([
409 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
410 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
411 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf]),
412 "U+DB80 U+DFFF");
413 Expect.listEquals([
414 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
415 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
416 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80]),
417 "U+DBFF U+DC00");
418 Expect.listEquals([
419 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
420 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
421 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf]),
422 "U+DBFF U+DFFF");
423
424 // Other illegal code positions (???)
425 Expect.listEquals([0xfffe], utf8ToCodepoints([0xef, 0xbf, 0xbe]),
426 "U+FFFE");
427 Expect.listEquals([0xffff], utf8ToCodepoints([0xef, 0xbf, 0xbf]),
428 "U+FFFF");
429 }
430
431 void testUtf8BytesToString() {
432 Expect.stringEquals(testEnglishPhrase,
433 decodeUtf8(testEnglishUtf8), "English");
434
435 Expect.stringEquals(testDanishPhrase,
436 decodeUtf8(testDanishUtf8), "Danish");
437
438 Expect.stringEquals(testHebrewPhrase,
439 decodeUtf8(testHebrewUtf8), "Hebrew");
440
441 Expect.stringEquals(testRussianPhrase,
442 decodeUtf8(testRussianUtf8), "Russian");
443
444 Expect.stringEquals(testGreekPhrase,
445 decodeUtf8(testGreekUtf8), "Greek");
446
447 Expect.stringEquals(testKatakanaPhrase,
448 decodeUtf8(testKatakanaUtf8), "Katakana");
449 }
450
451 void testIterableMethods() {
452 IterableUtf8Decoder englishDecoder = decodeUtf8AsIterable(testEnglishUtf8);
453 // get the first character
454 Expect.equals(testEnglishUtf8[0], englishDecoder.first);
455 // get the whole translation using the Iterable interface
456 Expect.stringEquals(testEnglishPhrase,
457 new String.fromCharCodes(new List<int>.from(englishDecoder)));
458
459 IterableUtf8Decoder kataDecoder = decodeUtf8AsIterable(testKatakanaUtf8);
460 // get the first character
461 Expect.equals(testKatakanaPhrase.codeUnits[0], kataDecoder.first);
462 // get the whole translation using the Iterable interface
463 Expect.stringEquals(testKatakanaPhrase,
464 new String.fromCharCodes(new List<int>.from(kataDecoder)));
465 }
OLDNEW
« no previous file with comments | « packages/utf/test/utf32_test.dart ('k') | packages/utf/test/utf8_test.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698