OLD | NEW |
| (Empty) |
1 #!/usr/bin/env dart | |
2 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
3 // for details. All rights reserved. Use of this source code is governed by a | |
4 // BSD-style license that can be found in the LICENSE file. | |
5 | |
6 library utf8_tests; | |
7 import 'dunit.dart'; | |
8 import '../../../lib/convert/convert.dart'; | |
9 | |
10 void main() { | |
11 TestSuite suite = new TestSuite(); | |
12 suite.registerTestClass(new Utf8Tests()); | |
13 suite.run(); | |
14 } | |
15 | |
16 class Utf8Tests extends TestClass { | |
17 static const String testEnglishPhrase = | |
18 "The quick brown fox jumps over the lazy dog."; | |
19 | |
20 static const List<int> testEnglishUtf8 = const<int> [ | |
21 0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, | |
22 0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20, | |
23 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, | |
24 0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74, | |
25 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20, | |
26 0x64, 0x6f, 0x67, 0x2e]; | |
27 | |
28 static const String testDanishPhrase = "Quizdeltagerne spiste jordbær med " + | |
29 "fløde mens cirkusklovnen Wolther spillede på xylofon."; | |
30 | |
31 static const List<int> testDanishUtf8 = const<int>[ | |
32 0x51, 0x75, 0x69, 0x7a, 0x64, 0x65, 0x6c, 0x74, | |
33 0x61, 0x67, 0x65, 0x72, 0x6e, 0x65, 0x20, 0x73, | |
34 0x70, 0x69, 0x73, 0x74, 0x65, 0x20, 0x6a, 0x6f, | |
35 0x72, 0x64, 0x62, 0xc3, 0xa6, 0x72, 0x20, 0x6d, | |
36 0x65, 0x64, 0x20, 0x66, 0x6c, 0xc3, 0xb8, 0x64, | |
37 0x65, 0x20, 0x6d, 0x65, 0x6e, 0x73, 0x20, 0x63, | |
38 0x69, 0x72, 0x6b, 0x75, 0x73, 0x6b, 0x6c, 0x6f, | |
39 0x76, 0x6e, 0x65, 0x6e, 0x20, 0x57, 0x6f, 0x6c, | |
40 0x74, 0x68, 0x65, 0x72, 0x20, 0x73, 0x70, 0x69, | |
41 0x6c, 0x6c, 0x65, 0x64, 0x65, 0x20, 0x70, 0xc3, | |
42 0xa5, 0x20, 0x78, 0x79, 0x6c, 0x6f, 0x66, 0x6f, | |
43 0x6e, 0x2e]; | |
44 | |
45 // unusual formatting due to strange editor interaction w/ text direction. | |
46 static const String | |
47 testHebrewPhrase = "דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה"; | |
48 | |
49 static const List<int> testHebrewUtf8 = const<int>[ | |
50 0xd7, 0x93, 0xd7, 0x92, 0x20, 0xd7, 0xa1, 0xd7, | |
51 0xa7, 0xd7, 0xa8, 0xd7, 0x9f, 0x20, 0xd7, 0xa9, | |
52 0xd7, 0x98, 0x20, 0xd7, 0x91, 0xd7, 0x99, 0xd7, | |
53 0x9d, 0x20, 0xd7, 0x9e, 0xd7, 0x90, 0xd7, 0x95, | |
54 0xd7, 0x9b, 0xd7, 0x96, 0xd7, 0x91, 0x20, 0xd7, | |
55 0x95, 0xd7, 0x9c, 0xd7, 0xa4, 0xd7, 0xaa, 0xd7, | |
56 0xa2, 0x20, 0xd7, 0x9e, 0xd7, 0xa6, 0xd7, 0x90, | |
57 0x20, 0xd7, 0x9c, 0xd7, 0x95, 0x20, 0xd7, 0x97, | |
58 0xd7, 0x91, 0xd7, 0xa8, 0xd7, 0x94, 0x20, 0xd7, | |
59 0x90, 0xd7, 0x99, 0xd7, 0x9a, 0x20, 0xd7, 0x94, | |
60 0xd7, 0xa7, 0xd7, 0x9c, 0xd7, 0x99, 0xd7, 0x98, | |
61 0xd7, 0x94]; | |
62 | |
63 static const String testRussianPhrase = "Съешь же ещё этих мягких " + | |
64 "французских булок да выпей чаю"; | |
65 | |
66 static const List<int> testRussianUtf8 = const<int>[ | |
67 0xd0, 0xa1, 0xd1, 0x8a, 0xd0, 0xb5, 0xd1, 0x88, | |
68 0xd1, 0x8c, 0x20, 0xd0, 0xb6, 0xd0, 0xb5, 0x20, | |
69 0xd0, 0xb5, 0xd1, 0x89, 0xd1, 0x91, 0x20, 0xd1, | |
70 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0xd1, 0x85, 0x20, | |
71 0xd0, 0xbc, 0xd1, 0x8f, 0xd0, 0xb3, 0xd0, 0xba, | |
72 0xd0, 0xb8, 0xd1, 0x85, 0x20, 0xd1, 0x84, 0xd1, | |
73 0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd1, 0x86, 0xd1, | |
74 0x83, 0xd0, 0xb7, 0xd1, 0x81, 0xd0, 0xba, 0xd0, | |
75 0xb8, 0xd1, 0x85, 0x20, 0xd0, 0xb1, 0xd1, 0x83, | |
76 0xd0, 0xbb, 0xd0, 0xbe, 0xd0, 0xba, 0x20, 0xd0, | |
77 0xb4, 0xd0, 0xb0, 0x20, 0xd0, 0xb2, 0xd1, 0x8b, | |
78 0xd0, 0xbf, 0xd0, 0xb5, 0xd0, 0xb9, 0x20, 0xd1, | |
79 0x87, 0xd0, 0xb0, 0xd1, 0x8e]; | |
80 | |
81 static const String testGreekPhrase = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ " + | |
82 "στὸ χρυσαφὶ ξέφωτο"; | |
83 | |
84 static const List<int> testGreekUtf8 = const<int>[ | |
85 0xce, 0x93, 0xce, 0xb1, 0xce, 0xb6, 0xce, 0xad, | |
86 0xce, 0xb5, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce, | |
87 0xb1, 0xe1, 0xbd, 0xb6, 0x20, 0xce, 0xbc, 0xcf, | |
88 0x85, 0xcf, 0x81, 0xcf, 0x84, 0xce, 0xb9, 0xe1, | |
89 0xbd, 0xb2, 0xcf, 0x82, 0x20, 0xce, 0xb4, 0xe1, | |
90 0xbd, 0xb2, 0xce, 0xbd, 0x20, 0xce, 0xb8, 0xe1, | |
91 0xbd, 0xb0, 0x20, 0xce, 0xb2, 0xcf, 0x81, 0xe1, | |
92 0xbf, 0xb6, 0x20, 0xcf, 0x80, 0xce, 0xb9, 0xe1, | |
93 0xbd, 0xb0, 0x20, 0xcf, 0x83, 0xcf, 0x84, 0xe1, | |
94 0xbd, 0xb8, 0x20, 0xcf, 0x87, 0xcf, 0x81, 0xcf, | |
95 0x85, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x86, 0xe1, | |
96 0xbd, 0xb6, 0x20, 0xce, 0xbe, 0xce, 0xad, 0xcf, | |
97 0x86, 0xcf, 0x89, 0xcf, 0x84, 0xce, 0xbf]; | |
98 | |
99 static const String testKatakanaPhrase = """ | |
100 イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム | |
101 ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン"""; | |
102 | |
103 static const List<int> testKatakanaUtf8 = const<int>[ | |
104 0xe3, 0x82, 0xa4, 0xe3, 0x83, 0xad, 0xe3, 0x83, | |
105 0x8f, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0x9b, 0xe3, | |
106 0x83, 0x98, 0xe3, 0x83, 0x88, 0x20, 0xe3, 0x83, | |
107 0x81, 0xe3, 0x83, 0xaa, 0xe3, 0x83, 0x8c, 0xe3, | |
108 0x83, 0xab, 0xe3, 0x83, 0xb2, 0x20, 0xe3, 0x83, | |
109 0xaf, 0xe3, 0x82, 0xab, 0xe3, 0x83, 0xa8, 0xe3, | |
110 0x82, 0xbf, 0xe3, 0x83, 0xac, 0xe3, 0x82, 0xbd, | |
111 0x20, 0xe3, 0x83, 0x84, 0xe3, 0x83, 0x8d, 0xe3, | |
112 0x83, 0x8a, 0xe3, 0x83, 0xa9, 0xe3, 0x83, 0xa0, | |
113 0x0a, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb0, 0xe3, | |
114 0x83, 0x8e, 0xe3, 0x82, 0xaa, 0xe3, 0x82, 0xaf, | |
115 0xe3, 0x83, 0xa4, 0xe3, 0x83, 0x9e, 0x20, 0xe3, | |
116 0x82, 0xb1, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xb3, | |
117 0xe3, 0x82, 0xa8, 0xe3, 0x83, 0x86, 0x20, 0xe3, | |
118 0x82, 0xa2, 0xe3, 0x82, 0xb5, 0xe3, 0x82, 0xad, | |
119 0xe3, 0x83, 0xa6, 0xe3, 0x83, 0xa1, 0xe3, 0x83, | |
120 0x9f, 0xe3, 0x82, 0xb7, 0x20, 0xe3, 0x83, 0xb1, | |
121 0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa2, 0xe3, 0x82, | |
122 0xbb, 0xe3, 0x82, 0xb9, 0xe3, 0x83, 0xb3]; | |
123 | |
124 void registerTests(TestSuite suite) { | |
125 register("Utf8Tests.testUtf8bytesToCodepoints", testUtf8bytesToCodepoints, | |
126 suite); | |
127 register("Utf8Tests.testUtf8BytesToString", testUtf8BytesToString, suite); | |
128 register("Utf8Tests.testEncodeToUtf8", testEncodeToUtf8, suite); | |
129 register("Utf8Tests.testIterableMethods", testIterableMethods, suite); | |
130 } | |
131 | |
132 void testEncodeToUtf8() { | |
133 Expect.listEquals(testEnglishUtf8, encodeUtf8(testEnglishPhrase), | |
134 "english to utf8"); | |
135 | |
136 Expect.listEquals(testDanishUtf8, encodeUtf8(testDanishPhrase), | |
137 "encode danish to utf8"); | |
138 | |
139 Expect.listEquals(testHebrewUtf8, encodeUtf8(testHebrewPhrase), | |
140 "Hebrew to utf8"); | |
141 | |
142 Expect.listEquals(testRussianUtf8, encodeUtf8(testRussianPhrase), | |
143 "Russian to utf8"); | |
144 | |
145 Expect.listEquals(testGreekUtf8, encodeUtf8(testGreekPhrase), | |
146 "Greek to utf8"); | |
147 | |
148 Expect.listEquals(testKatakanaUtf8, encodeUtf8(testKatakanaPhrase), | |
149 "Katakana to utf8"); | |
150 } | |
151 | |
152 List encodeUtf8(String text) => UTF8.encode(text); | |
153 String decodeUtf8(List bytes) => UTF8.decode(bytes); | |
154 List utf8ToCodePoints(List bytes) => UTF8.decode(bytes).runes.toList(); | |
155 | |
156 void testUtf8bytesToCodepoints() { | |
157 Expect.listEquals([954, 972, 963, 956, 949], | |
158 utf8ToCodepoints([0xce, 0xba, 0xcf, 0x8c, 0xcf, | |
159 0x83, 0xce, 0xbc, 0xce, 0xb5]), "κόσμε"); | |
160 | |
161 // boundary conditions: First possible sequence of a certain length | |
162 Expect.listEquals([], utf8ToCodepoints([]), "no input"); | |
163 Expect.listEquals([0x0], utf8ToCodepoints([0x0]), "0"); | |
164 Expect.listEquals([0x80], utf8ToCodepoints([0xc2, 0x80]), "80"); | |
165 Expect.listEquals([0x800], | |
166 utf8ToCodepoints([0xe0, 0xa0, 0x80]), "800"); | |
167 Expect.listEquals([0x10000], | |
168 utf8ToCodepoints([0xf0, 0x90, 0x80, 0x80]), "10000"); | |
169 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
170 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80, 0x80]), "200000"); | |
171 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
172 utf8ToCodepoints([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]), | |
173 "4000000"); | |
174 | |
175 // boundary conditions: Last possible sequence of a certain length | |
176 Expect.listEquals([0x7f], utf8ToCodepoints([0x7f]), "7f"); | |
177 Expect.listEquals([0x7ff], utf8ToCodepoints([0xdf, 0xbf]), "7ff"); | |
178 Expect.listEquals([0xffff], | |
179 utf8ToCodepoints([0xef, 0xbf, 0xbf]), "ffff"); | |
180 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
181 utf8ToCodepoints([0xf7, 0xbf, 0xbf, 0xbf]), "1fffff"); | |
182 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
183 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]), "3ffffff"); | |
184 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
185 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]), | |
186 "4000000"); | |
187 | |
188 // other boundary conditions | |
189 Expect.listEquals([0xd7ff], | |
190 utf8ToCodepoints([0xed, 0x9f, 0xbf]), "d7ff"); | |
191 Expect.listEquals([0xe000], | |
192 utf8ToCodepoints([0xee, 0x80, 0x80]), "e000"); | |
193 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
194 utf8ToCodepoints([0xef, 0xbf, 0xbd]), "fffd"); | |
195 Expect.listEquals([0x10ffff], | |
196 utf8ToCodepoints([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff"); | |
197 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
198 utf8ToCodepoints([0xf4, 0x90, 0x80, 0x80]), "110000"); | |
199 | |
200 // unexpected continuation bytes | |
201 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
202 utf8ToCodepoints([0x80]), "80 => replacement character"); | |
203 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
204 utf8ToCodepoints([0xbf]), "bf => replacement character"); | |
205 | |
206 List<int> allContinuationBytes = <int>[]; | |
207 List<int> matchingReplacementChars = <int>[]; | |
208 for (int i = 0x80; i < 0xc0; i++) { | |
209 allContinuationBytes.add(i); | |
210 matchingReplacementChars.add(UNICODE_REPLACEMENT_CHARACTER_CODEPOINT); | |
211 } | |
212 Expect.listEquals(matchingReplacementChars, | |
213 utf8ToCodepoints(allContinuationBytes), | |
214 "80 - bf => replacement character x 64"); | |
215 | |
216 List<int> allFirstTwoByteSeq = <int>[]; | |
217 matchingReplacementChars = <int>[]; | |
218 for (int i = 0xc0; i < 0xe0; i++) { | |
219 allFirstTwoByteSeq.addAll([i, 0x20]); | |
220 matchingReplacementChars.addAll( | |
221 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | |
222 } | |
223 Expect.listEquals(matchingReplacementChars, | |
224 utf8ToCodepoints(allFirstTwoByteSeq), | |
225 "c0 - df + space => replacement character + space x 32"); | |
226 | |
227 List<int> allFirstThreeByteSeq = <int>[]; | |
228 matchingReplacementChars = <int>[]; | |
229 for (int i = 0xe0; i < 0xf0; i++) { | |
230 allFirstThreeByteSeq.addAll([i, 0x20]); | |
231 matchingReplacementChars.addAll( | |
232 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | |
233 } | |
234 Expect.listEquals(matchingReplacementChars, | |
235 utf8ToCodepoints(allFirstThreeByteSeq), | |
236 "e0 - ef + space => replacement character x 16"); | |
237 | |
238 List<int> allFirstFourByteSeq = <int>[]; | |
239 matchingReplacementChars = <int>[]; | |
240 for (int i = 0xf0; i < 0xf8; i++) { | |
241 allFirstFourByteSeq.addAll([i, 0x20]); | |
242 matchingReplacementChars.addAll( | |
243 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | |
244 } | |
245 Expect.listEquals(matchingReplacementChars, | |
246 utf8ToCodepoints(allFirstFourByteSeq), | |
247 "f0 - f7 + space => replacement character x 8"); | |
248 | |
249 List<int> allFirstFiveByteSeq = <int>[]; | |
250 matchingReplacementChars = <int>[]; | |
251 for (int i = 0xf8; i < 0xfc; i++) { | |
252 allFirstFiveByteSeq.addAll([i, 0x20]); | |
253 matchingReplacementChars.addAll( | |
254 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | |
255 } | |
256 Expect.listEquals(matchingReplacementChars, | |
257 utf8ToCodepoints(allFirstFiveByteSeq), | |
258 "f8 - fb + space => replacement character x 4"); | |
259 | |
260 List<int> allFirstSixByteSeq = <int>[]; | |
261 matchingReplacementChars = <int>[]; | |
262 for (int i = 0xfc; i < 0xfe; i++) { | |
263 allFirstSixByteSeq.addAll([i, 0x20]); | |
264 matchingReplacementChars.addAll( | |
265 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | |
266 } | |
267 Expect.listEquals(matchingReplacementChars, | |
268 utf8ToCodepoints(allFirstSixByteSeq), | |
269 "fc - fd + space => replacement character x 2"); | |
270 | |
271 // Sequences with last continuation byte missing | |
272 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
273 utf8ToCodepoints([0xc2]), | |
274 "2-byte sequence with last byte missing"); | |
275 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
276 utf8ToCodepoints([0xe0, 0x80]), | |
277 "3-byte sequence with last byte missing"); | |
278 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
279 utf8ToCodepoints([0xf0, 0x80, 0x80]), | |
280 "4-byte sequence with last byte missing"); | |
281 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
282 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80]), | |
283 "5-byte sequence with last byte missing"); | |
284 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
285 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80]), | |
286 "6-byte sequence with last byte missing"); | |
287 | |
288 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
289 utf8ToCodepoints([0xdf]), | |
290 "2-byte sequence with last byte missing (hi)"); | |
291 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
292 utf8ToCodepoints([0xef, 0xbf]), | |
293 "3-byte sequence with last byte missing (hi)"); | |
294 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
295 utf8ToCodepoints([0xf7, 0xbf, 0xbf]), | |
296 "4-byte sequence with last byte missing (hi)"); | |
297 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
298 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf]), | |
299 "5-byte sequence with last byte missing (hi)"); | |
300 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
301 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]), | |
302 "6-byte sequence with last byte missing (hi)"); | |
303 | |
304 // Concatenation of incomplete sequences | |
305 Expect.listEquals( | |
306 [ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
307 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
308 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
309 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
310 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
311 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
312 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
313 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
314 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
315 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT ], | |
316 utf8ToCodepoints( | |
317 [ 0xc2, | |
318 0xe0, 0x80, | |
319 0xf0, 0x80, 0x80, | |
320 0xf8, 0x88, 0x80, 0x80, | |
321 0xfc, 0x80, 0x80, 0x80, 0x80, | |
322 0xdf, | |
323 0xef, 0xbf, | |
324 0xf7, 0xbf, 0xbf, | |
325 0xfb, 0xbf, 0xbf, 0xbf, | |
326 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ]), | |
327 "Concatenation of incomplete sequences"); | |
328 | |
329 // Impossible bytes | |
330 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
331 utf8ToCodepoints([0xfe]), "fe"); | |
332 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
333 utf8ToCodepoints([0xff]), "ff"); | |
334 Expect.listEquals([ | |
335 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
336 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
337 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
338 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
339 utf8ToCodepoints([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff"); | |
340 | |
341 // Overlong sequences | |
342 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
343 utf8ToCodepoints([0xc0, 0xaf]), "c0 af"); | |
344 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
345 utf8ToCodepoints([0xe0, 0x80, 0xaf]), "e0 80 af"); | |
346 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
347 utf8ToCodepoints([0xf0, 0x80, 0x80, 0xaf]), "f0 80 80 af"); | |
348 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
349 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0xaf]), "f8 80 80 80 af"); | |
350 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
351 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]), | |
352 "fc 80 80 80 80 af"); | |
353 | |
354 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
355 utf8ToCodepoints([0xc1, 0xbf]), "c1 bf"); | |
356 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
357 utf8ToCodepoints([0xe0, 0x9f, 0xbf]), "e0 9f bf"); | |
358 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
359 utf8ToCodepoints([0xf0, 0x8f, 0xbf, 0xbf]), "f0 8f bf bf"); | |
360 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
361 utf8ToCodepoints([0xf8, 0x87, 0xbf, 0xbf, 0xbf]), "f8 87 bf bf bf"); | |
362 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
363 utf8ToCodepoints([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]), | |
364 "fc 83 bf bf bf bf"); | |
365 | |
366 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
367 utf8ToCodepoints([0xc0, 0x80]), "c0 80"); | |
368 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
369 utf8ToCodepoints([0xe0, 0x80, 0x80]), "e0 80 80"); | |
370 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
371 utf8ToCodepoints([0xf0, 0x80, 0x80, 0x80]), "f0 80 80 80"); | |
372 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
373 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0x80]), "f8 80 80 80 80"); | |
374 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
375 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]), | |
376 "fc 80 80 80 80 80"); | |
377 | |
378 // Illegal code positions | |
379 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
380 utf8ToCodepoints([0xed, 0xa0, 0x80]), "U+D800"); | |
381 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
382 utf8ToCodepoints([0xed, 0xad, 0xbf]), "U+DB7F"); | |
383 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
384 utf8ToCodepoints([0xed, 0xae, 0x80]), "U+DB80"); | |
385 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
386 utf8ToCodepoints([0xed, 0xaf, 0xbf]), "U+DBFF"); | |
387 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
388 utf8ToCodepoints([0xed, 0xb0, 0x80]), "U+DC00"); | |
389 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
390 utf8ToCodepoints([0xed, 0xbe, 0x80]), "U+DF80"); | |
391 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
392 utf8ToCodepoints([0xed, 0xbf, 0xbf]), "U+DFFF"); | |
393 | |
394 // Paired UTF-16 surrogates | |
395 Expect.listEquals([ | |
396 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
397 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
398 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80]), | |
399 "U+D800 U+DC00"); | |
400 Expect.listEquals([ | |
401 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
402 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
403 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf]), | |
404 "U+D800 U+DFFF"); | |
405 Expect.listEquals([ | |
406 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
407 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
408 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80]), | |
409 "U+DB7F U+DC00"); | |
410 Expect.listEquals([ | |
411 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
412 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
413 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf]), | |
414 "U+DB7F U+DFFF"); | |
415 Expect.listEquals([ | |
416 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
417 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
418 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xb0, 0x80]), | |
419 "U+DB80 U+DC00"); | |
420 Expect.listEquals([ | |
421 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
422 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
423 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf]), | |
424 "U+DB80 U+DFFF"); | |
425 Expect.listEquals([ | |
426 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
427 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
428 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80]), | |
429 "U+DBFF U+DC00"); | |
430 Expect.listEquals([ | |
431 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | |
432 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | |
433 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf]), | |
434 "U+DBFF U+DFFF"); | |
435 | |
436 // Other illegal code positions (???) | |
437 Expect.listEquals([0xfffe], utf8ToCodepoints([0xef, 0xbf, 0xbe]), | |
438 "U+FFFE"); | |
439 Expect.listEquals([0xffff], utf8ToCodepoints([0xef, 0xbf, 0xbf]), | |
440 "U+FFFF"); | |
441 } | |
442 | |
443 void testUtf8BytesToString() { | |
444 Expect.stringEquals(testEnglishPhrase, | |
445 decodeUtf8(testEnglishUtf8), "English"); | |
446 | |
447 Expect.stringEquals(testDanishPhrase, | |
448 decodeUtf8(testDanishUtf8), "Danish"); | |
449 | |
450 Expect.stringEquals(testHebrewPhrase, | |
451 decodeUtf8(testHebrewUtf8), "Hebrew"); | |
452 | |
453 Expect.stringEquals(testRussianPhrase, | |
454 decodeUtf8(testRussianUtf8), "Russian"); | |
455 | |
456 Expect.stringEquals(testGreekPhrase, | |
457 decodeUtf8(testGreekUtf8), "Greek"); | |
458 | |
459 Expect.stringEquals(testKatakanaPhrase, | |
460 decodeUtf8(testKatakanaUtf8), "Katakana"); | |
461 } | |
462 | |
463 void testIterableMethods() { | |
464 IterableUtf8Decoder englishDecoder = decodeUtf8AsIterable(testEnglishUtf8); | |
465 // get the first character | |
466 Expect.equals(testEnglishUtf8[0], englishDecoder.first); | |
467 // get the whole translation using the Iterable interface | |
468 Expect.stringEquals(testEnglishPhrase, | |
469 new String.fromCharCodes(new List<int>.from(englishDecoder))); | |
470 | |
471 IterableUtf8Decoder kataDecoder = decodeUtf8AsIterable(testKatakanaUtf8); | |
472 // get the first character | |
473 Expect.equals(testKatakanaPhrase.codeUnits[0], kataDecoder.first); | |
474 // get the whole translation using the Iterable interface | |
475 Expect.stringEquals(testKatakanaPhrase, | |
476 new String.fromCharCodes(new List<int>.from(kataDecoder))); | |
477 } | |
478 } | |
OLD | NEW |