Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(225)

Side by Side Diff: test/codegen/lib/convert/utf84_test.dart

Issue 1965563003: Update dart:convert and dart:core Uri. (Closed) Base URL: https://github.com/dart-lang/dev_compiler.git@master
Patch Set: Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 import 'package:expect/expect.dart';
6 import 'dart:convert';
7
8 const String testEnglishPhrase =
9 "The quick brown fox jumps over the lazy dog.";
10
11 const List<int> testEnglishUtf8 = const<int> [
12 0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63,
13 0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20,
14 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70,
15 0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74,
16 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20,
17 0x64, 0x6f, 0x67, 0x2e];
18
19 const String testDanishPhrase = "Quizdeltagerne spiste jordbær med "
20 "fløde mens cirkusklovnen Wolther spillede på xylofon.";
21
22 const List<int> testDanishUtf8 = const<int>[
23 0x51, 0x75, 0x69, 0x7a, 0x64, 0x65, 0x6c, 0x74,
24 0x61, 0x67, 0x65, 0x72, 0x6e, 0x65, 0x20, 0x73,
25 0x70, 0x69, 0x73, 0x74, 0x65, 0x20, 0x6a, 0x6f,
26 0x72, 0x64, 0x62, 0xc3, 0xa6, 0x72, 0x20, 0x6d,
27 0x65, 0x64, 0x20, 0x66, 0x6c, 0xc3, 0xb8, 0x64,
28 0x65, 0x20, 0x6d, 0x65, 0x6e, 0x73, 0x20, 0x63,
29 0x69, 0x72, 0x6b, 0x75, 0x73, 0x6b, 0x6c, 0x6f,
30 0x76, 0x6e, 0x65, 0x6e, 0x20, 0x57, 0x6f, 0x6c,
31 0x74, 0x68, 0x65, 0x72, 0x20, 0x73, 0x70, 0x69,
32 0x6c, 0x6c, 0x65, 0x64, 0x65, 0x20, 0x70, 0xc3,
33 0xa5, 0x20, 0x78, 0x79, 0x6c, 0x6f, 0x66, 0x6f,
34 0x6e, 0x2e];
35
36 // unusual formatting due to strange editor interaction w/ text direction.
37 const String
38 testHebrewPhrase = "דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה";
39
40 const List<int> testHebrewUtf8 = const<int>[
41 0xd7, 0x93, 0xd7, 0x92, 0x20, 0xd7, 0xa1, 0xd7,
42 0xa7, 0xd7, 0xa8, 0xd7, 0x9f, 0x20, 0xd7, 0xa9,
43 0xd7, 0x98, 0x20, 0xd7, 0x91, 0xd7, 0x99, 0xd7,
44 0x9d, 0x20, 0xd7, 0x9e, 0xd7, 0x90, 0xd7, 0x95,
45 0xd7, 0x9b, 0xd7, 0x96, 0xd7, 0x91, 0x20, 0xd7,
46 0x95, 0xd7, 0x9c, 0xd7, 0xa4, 0xd7, 0xaa, 0xd7,
47 0xa2, 0x20, 0xd7, 0x9e, 0xd7, 0xa6, 0xd7, 0x90,
48 0x20, 0xd7, 0x9c, 0xd7, 0x95, 0x20, 0xd7, 0x97,
49 0xd7, 0x91, 0xd7, 0xa8, 0xd7, 0x94, 0x20, 0xd7,
50 0x90, 0xd7, 0x99, 0xd7, 0x9a, 0x20, 0xd7, 0x94,
51 0xd7, 0xa7, 0xd7, 0x9c, 0xd7, 0x99, 0xd7, 0x98,
52 0xd7, 0x94];
53
54 const String testRussianPhrase = "Съешь же ещё этих мягких "
55 "французских булок да выпей чаю";
56
57 const List<int> testRussianUtf8 = const<int>[
58 0xd0, 0xa1, 0xd1, 0x8a, 0xd0, 0xb5, 0xd1, 0x88,
59 0xd1, 0x8c, 0x20, 0xd0, 0xb6, 0xd0, 0xb5, 0x20,
60 0xd0, 0xb5, 0xd1, 0x89, 0xd1, 0x91, 0x20, 0xd1,
61 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0xd1, 0x85, 0x20,
62 0xd0, 0xbc, 0xd1, 0x8f, 0xd0, 0xb3, 0xd0, 0xba,
63 0xd0, 0xb8, 0xd1, 0x85, 0x20, 0xd1, 0x84, 0xd1,
64 0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd1, 0x86, 0xd1,
65 0x83, 0xd0, 0xb7, 0xd1, 0x81, 0xd0, 0xba, 0xd0,
66 0xb8, 0xd1, 0x85, 0x20, 0xd0, 0xb1, 0xd1, 0x83,
67 0xd0, 0xbb, 0xd0, 0xbe, 0xd0, 0xba, 0x20, 0xd0,
68 0xb4, 0xd0, 0xb0, 0x20, 0xd0, 0xb2, 0xd1, 0x8b,
69 0xd0, 0xbf, 0xd0, 0xb5, 0xd0, 0xb9, 0x20, 0xd1,
70 0x87, 0xd0, 0xb0, 0xd1, 0x8e];
71
72 const String testGreekPhrase = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ "
73 "στὸ χρυσαφὶ ξέφωτο";
74
75 const List<int> testGreekUtf8 = const<int>[
76 0xce, 0x93, 0xce, 0xb1, 0xce, 0xb6, 0xce, 0xad,
77 0xce, 0xb5, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce,
78 0xb1, 0xe1, 0xbd, 0xb6, 0x20, 0xce, 0xbc, 0xcf,
79 0x85, 0xcf, 0x81, 0xcf, 0x84, 0xce, 0xb9, 0xe1,
80 0xbd, 0xb2, 0xcf, 0x82, 0x20, 0xce, 0xb4, 0xe1,
81 0xbd, 0xb2, 0xce, 0xbd, 0x20, 0xce, 0xb8, 0xe1,
82 0xbd, 0xb0, 0x20, 0xce, 0xb2, 0xcf, 0x81, 0xe1,
83 0xbf, 0xb6, 0x20, 0xcf, 0x80, 0xce, 0xb9, 0xe1,
84 0xbd, 0xb0, 0x20, 0xcf, 0x83, 0xcf, 0x84, 0xe1,
85 0xbd, 0xb8, 0x20, 0xcf, 0x87, 0xcf, 0x81, 0xcf,
86 0x85, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x86, 0xe1,
87 0xbd, 0xb6, 0x20, 0xce, 0xbe, 0xce, 0xad, 0xcf,
88 0x86, 0xcf, 0x89, 0xcf, 0x84, 0xce, 0xbf];
89
90 const String testKatakanaPhrase = "イロハニホヘト チリヌルヲ ワカヨタレソ "
91 "ツネナラム ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン";
92
93 const List<int> testKatakanaUtf8 = const<int>[
94 0xe3, 0x82, 0xa4, 0xe3, 0x83, 0xad, 0xe3, 0x83,
95 0x8f, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0x9b, 0xe3,
96 0x83, 0x98, 0xe3, 0x83, 0x88, 0x20, 0xe3, 0x83,
97 0x81, 0xe3, 0x83, 0xaa, 0xe3, 0x83, 0x8c, 0xe3,
98 0x83, 0xab, 0xe3, 0x83, 0xb2, 0x20, 0xe3, 0x83,
99 0xaf, 0xe3, 0x82, 0xab, 0xe3, 0x83, 0xa8, 0xe3,
100 0x82, 0xbf, 0xe3, 0x83, 0xac, 0xe3, 0x82, 0xbd,
101 0x20, 0xe3, 0x83, 0x84, 0xe3, 0x83, 0x8d, 0xe3,
102 0x83, 0x8a, 0xe3, 0x83, 0xa9, 0xe3, 0x83, 0xa0,
103 0x20, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb0, 0xe3,
104 0x83, 0x8e, 0xe3, 0x82, 0xaa, 0xe3, 0x82, 0xaf,
105 0xe3, 0x83, 0xa4, 0xe3, 0x83, 0x9e, 0x20, 0xe3,
106 0x82, 0xb1, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xb3,
107 0xe3, 0x82, 0xa8, 0xe3, 0x83, 0x86, 0x20, 0xe3,
108 0x82, 0xa2, 0xe3, 0x82, 0xb5, 0xe3, 0x82, 0xad,
109 0xe3, 0x83, 0xa6, 0xe3, 0x83, 0xa1, 0xe3, 0x83,
110 0x9f, 0xe3, 0x82, 0xb7, 0x20, 0xe3, 0x83, 0xb1,
111 0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa2, 0xe3, 0x82,
112 0xbb, 0xe3, 0x82, 0xb9, 0xe3, 0x83, 0xb3];
113
114 void main() {
115 testUtf8bytesToCodepoints();
116 testUtf8BytesToString();
117 testEncodeToUtf8();
118 }
119
120 List<int> encodeUtf8(String str) => UTF8.encode(str);
121 List<int> utf8ToRunes(List<int> codeUnits) {
122 return UTF8.decode(codeUnits, allowMalformed: true).runes.toList();
123 }
124 String decodeUtf8(List<int> codeUnits) => UTF8.decode(codeUnits);
125
126 void testEncodeToUtf8() {
127 Expect.listEquals(testEnglishUtf8, encodeUtf8(testEnglishPhrase),
128 "english to utf8");
129
130 Expect.listEquals(testDanishUtf8, encodeUtf8(testDanishPhrase),
131 "encode danish to utf8");
132
133 Expect.listEquals(testHebrewUtf8, encodeUtf8(testHebrewPhrase),
134 "Hebrew to utf8");
135
136 Expect.listEquals(testRussianUtf8, encodeUtf8(testRussianPhrase),
137 "Russian to utf8");
138
139 Expect.listEquals(testGreekUtf8, encodeUtf8(testGreekPhrase),
140 "Greek to utf8");
141
142 Expect.listEquals(testKatakanaUtf8, encodeUtf8(testKatakanaPhrase),
143 "Katakana to utf8");
144 }
145
146 void testUtf8bytesToCodepoints() {
147 Expect.listEquals([954, 972, 963, 956, 949],
148 utf8ToRunes([0xce, 0xba, 0xcf, 0x8c, 0xcf,
149 0x83, 0xce, 0xbc, 0xce, 0xb5]), "κόσμε");
150
151 // boundary conditions: First possible sequence of a certain length
152 Expect.listEquals([], utf8ToRunes([]), "no input");
153 Expect.listEquals([0x0], utf8ToRunes([0x0]), "0");
154 Expect.listEquals([0x80], utf8ToRunes([0xc2, 0x80]), "80");
155 Expect.listEquals([0x800],
156 utf8ToRunes([0xe0, 0xa0, 0x80]), "800");
157 Expect.listEquals([0x10000],
158 utf8ToRunes([0xf0, 0x90, 0x80, 0x80]), "10000");
159 Expect.listEquals(
160 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
161 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
162 UNICODE_REPLACEMENT_CHARACTER_RUNE],
163 utf8ToRunes([0xf8, 0x88, 0x80, 0x80, 0x80]), "200000");
164 Expect.listEquals(
165 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
166 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
167 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE],
168 utf8ToRunes([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]),
169 "4000000");
170
171 // boundary conditions: Last possible sequence of a certain length
172 Expect.listEquals([0x7f], utf8ToRunes([0x7f]), "7f");
173 Expect.listEquals([0x7ff], utf8ToRunes([0xdf, 0xbf]), "7ff");
174 Expect.listEquals([0xffff],
175 utf8ToRunes([0xef, 0xbf, 0xbf]), "ffff");
176 Expect.listEquals(
177 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
178 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE],
179 utf8ToRunes([0xf7, 0xbf, 0xbf, 0xbf]), "1fffff");
180 Expect.listEquals(
181 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
182 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
183 UNICODE_REPLACEMENT_CHARACTER_RUNE],
184 utf8ToRunes([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]), "3ffffff");
185 Expect.listEquals(
186 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
187 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
188 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE],
189 utf8ToRunes([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]),
190 "4000000");
191
192 // other boundary conditions
193 Expect.listEquals([0xd7ff],
194 utf8ToRunes([0xed, 0x9f, 0xbf]), "d7ff");
195 Expect.listEquals([0xe000],
196 utf8ToRunes([0xee, 0x80, 0x80]), "e000");
197 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
198 utf8ToRunes([0xef, 0xbf, 0xbd]), "fffd");
199 Expect.listEquals([0x10ffff],
200 utf8ToRunes([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff");
201 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
202 utf8ToRunes([0xf4, 0x90, 0x80, 0x80]), "110000");
203
204 // unexpected continuation bytes
205 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
206 utf8ToRunes([0x80]), "80 => replacement character");
207 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
208 utf8ToRunes([0xbf]), "bf => replacement character");
209
210 List<int> allContinuationBytes = <int>[];
211 List<int> matchingReplacementChars = <int>[];
212 for (int i = 0x80; i < 0xc0; i++) {
213 allContinuationBytes.add(i);
214 matchingReplacementChars.add(UNICODE_REPLACEMENT_CHARACTER_RUNE);
215 }
216 Expect.listEquals(matchingReplacementChars,
217 utf8ToRunes(allContinuationBytes),
218 "80 - bf => replacement character x 64");
219
220 List<int> allFirstTwoByteSeq = <int>[];
221 matchingReplacementChars = <int>[];
222 for (int i = 0xc0; i < 0xe0; i++) {
223 allFirstTwoByteSeq.addAll([i, 0x20]);
224 matchingReplacementChars.addAll(
225 [UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]);
226 }
227 Expect.listEquals(matchingReplacementChars,
228 utf8ToRunes(allFirstTwoByteSeq),
229 "c0 - df + space => replacement character + space x 32");
230
231 List<int> allFirstThreeByteSeq = <int>[];
232 matchingReplacementChars = <int>[];
233 for (int i = 0xe0; i < 0xf0; i++) {
234 allFirstThreeByteSeq.addAll([i, 0x20]);
235 matchingReplacementChars.addAll(
236 [UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]);
237 }
238 Expect.listEquals(matchingReplacementChars,
239 utf8ToRunes(allFirstThreeByteSeq),
240 "e0 - ef + space => replacement character x 16");
241
242 List<int> allFirstFourByteSeq = <int>[];
243 matchingReplacementChars = <int>[];
244 for (int i = 0xf0; i < 0xf8; i++) {
245 allFirstFourByteSeq.addAll([i, 0x20]);
246 matchingReplacementChars.addAll(
247 [UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]);
248 }
249 Expect.listEquals(matchingReplacementChars,
250 utf8ToRunes(allFirstFourByteSeq),
251 "f0 - f7 + space => replacement character x 8");
252
253 List<int> allFirstFiveByteSeq = <int>[];
254 matchingReplacementChars = <int>[];
255 for (int i = 0xf8; i < 0xfc; i++) {
256 allFirstFiveByteSeq.addAll([i, 0x20]);
257 matchingReplacementChars.addAll(
258 [UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]);
259 }
260 Expect.listEquals(matchingReplacementChars,
261 utf8ToRunes(allFirstFiveByteSeq),
262 "f8 - fb + space => replacement character x 4");
263
264 List<int> allFirstSixByteSeq = <int>[];
265 matchingReplacementChars = <int>[];
266 for (int i = 0xfc; i < 0xfe; i++) {
267 allFirstSixByteSeq.addAll([i, 0x20]);
268 matchingReplacementChars.addAll(
269 [UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]);
270 }
271 Expect.listEquals(matchingReplacementChars,
272 utf8ToRunes(allFirstSixByteSeq),
273 "fc - fd + space => replacement character x 2");
274
275 // Sequences with last continuation byte missing
276 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
277 utf8ToRunes([0xc2]),
278 "2-byte sequence with last byte missing");
279 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
280 utf8ToRunes([0xe0, 0x80]),
281 "3-byte sequence with last byte missing");
282 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
283 utf8ToRunes([0xf0, 0x80, 0x80]),
284 "4-byte sequence with last byte missing");
285 Expect.listEquals(
286 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
287 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE],
288 utf8ToRunes([0xf8, 0x88, 0x80, 0x80]),
289 "5-byte sequence with last byte missing");
290 Expect.listEquals(
291 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
292 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
293 UNICODE_REPLACEMENT_CHARACTER_RUNE],
294 utf8ToRunes([0xfc, 0x80, 0x80, 0x80, 0x80]),
295 "6-byte sequence with last byte missing");
296
297 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
298 utf8ToRunes([0xdf]),
299 "2-byte sequence with last byte missing (hi)");
300 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
301 utf8ToRunes([0xef, 0xbf]),
302 "3-byte sequence with last byte missing (hi)");
303 Expect.listEquals(
304 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
305 UNICODE_REPLACEMENT_CHARACTER_RUNE],
306 utf8ToRunes([0xf7, 0xbf, 0xbf]),
307 "4-byte sequence with last byte missing (hi)");
308 Expect.listEquals(
309 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
310 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE],
311 utf8ToRunes([0xfb, 0xbf, 0xbf, 0xbf]),
312 "5-byte sequence with last byte missing (hi)");
313 Expect.listEquals(
314 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
315 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
316 UNICODE_REPLACEMENT_CHARACTER_RUNE],
317 utf8ToRunes([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]),
318 "6-byte sequence with last byte missing (hi)");
319
320 // Concatenation of incomplete sequences
321 Expect.listEquals(
322 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
323 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
324 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
325 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
326 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
327 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
328 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
329 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
330 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
331 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
332 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
333 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
334 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE],
335 utf8ToRunes(
336 [ 0xc2,
337 0xe0, 0x80,
338 0xf0, 0x80, 0x80,
339 0xf8, 0x88, 0x80, 0x80,
340 0xfc, 0x80, 0x80, 0x80, 0x80,
341 0xdf,
342 0xef, 0xbf,
343 0xf7, 0xbf, 0xbf,
344 0xfb, 0xbf, 0xbf, 0xbf,
345 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ]),
346 "Concatenation of incomplete sequences");
347
348 // Impossible bytes
349 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
350 utf8ToRunes([0xfe]), "fe");
351 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
352 utf8ToRunes([0xff]), "ff");
353 Expect.listEquals([
354 UNICODE_REPLACEMENT_CHARACTER_RUNE,
355 UNICODE_REPLACEMENT_CHARACTER_RUNE,
356 UNICODE_REPLACEMENT_CHARACTER_RUNE,
357 UNICODE_REPLACEMENT_CHARACTER_RUNE],
358 utf8ToRunes([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff");
359
360 // Overlong sequences
361 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
362 utf8ToRunes([0xc0, 0xaf]), "c0 af");
363 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
364 utf8ToRunes([0xe0, 0x80, 0xaf]), "e0 80 af");
365 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
366 utf8ToRunes([0xf0, 0x80, 0x80, 0xaf]), "f0 80 80 af");
367 Expect.listEquals(
368 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
369 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
370 UNICODE_REPLACEMENT_CHARACTER_RUNE],
371 utf8ToRunes([0xf8, 0x80, 0x80, 0x80, 0xaf]), "f8 80 80 80 af");
372 Expect.listEquals(
373 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
374 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
375 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE],
376 utf8ToRunes([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]),
377 "fc 80 80 80 80 af");
378
379 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
380 utf8ToRunes([0xc1, 0xbf]), "c1 bf");
381 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
382 utf8ToRunes([0xe0, 0x9f, 0xbf]), "e0 9f bf");
383 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
384 utf8ToRunes([0xf0, 0x8f, 0xbf, 0xbf]), "f0 8f bf bf");
385 Expect.listEquals(
386 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
387 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
388 UNICODE_REPLACEMENT_CHARACTER_RUNE],
389 utf8ToRunes([0xf8, 0x87, 0xbf, 0xbf, 0xbf]), "f8 87 bf bf bf");
390 Expect.listEquals(
391 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
392 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
393 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE],
394 utf8ToRunes([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]),
395 "fc 83 bf bf bf bf");
396
397 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
398 utf8ToRunes([0xc0, 0x80]), "c0 80");
399 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
400 utf8ToRunes([0xe0, 0x80, 0x80]), "e0 80 80");
401 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE],
402 utf8ToRunes([0xf0, 0x80, 0x80, 0x80]), "f0 80 80 80");
403 Expect.listEquals(
404 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
405 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
406 UNICODE_REPLACEMENT_CHARACTER_RUNE],
407 utf8ToRunes([0xf8, 0x80, 0x80, 0x80, 0x80]), "f8 80 80 80 80");
408 Expect.listEquals(
409 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
410 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE,
411 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE],
412 utf8ToRunes([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]),
413 "fc 80 80 80 80 80");
414
415 // Other illegal code positions (???)
416 Expect.listEquals([0xfffe], utf8ToRunes([0xef, 0xbf, 0xbe]),
417 "U+FFFE");
418 Expect.listEquals([0xffff], utf8ToRunes([0xef, 0xbf, 0xbf]),
419 "U+FFFF");
420 }
421
422 void testUtf8BytesToString() {
423 Expect.stringEquals(testEnglishPhrase,
424 decodeUtf8(testEnglishUtf8), "English");
425
426 Expect.stringEquals(testDanishPhrase,
427 decodeUtf8(testDanishUtf8), "Danish");
428
429 Expect.stringEquals(testHebrewPhrase,
430 decodeUtf8(testHebrewUtf8), "Hebrew");
431
432 Expect.stringEquals(testRussianPhrase,
433 decodeUtf8(testRussianUtf8), "Russian");
434
435 Expect.stringEquals(testGreekPhrase,
436 decodeUtf8(testGreekUtf8), "Greek");
437
438 Expect.stringEquals(testKatakanaPhrase,
439 decodeUtf8(testKatakanaUtf8), "Katakana");
440 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698