OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 import 'package:expect/expect.dart'; | 5 import 'package:expect/expect.dart'; |
6 import 'dart:convert'; | 6 import 'dart:convert'; |
7 | 7 |
8 const String testEnglishPhrase = | 8 const String testEnglishPhrase = "The quick brown fox jumps over the lazy dog."; |
9 "The quick brown fox jumps over the lazy dog."; | 9 |
10 | 10 const List<int> testEnglishUtf8 = const <int>[ |
11 const List<int> testEnglishUtf8 = const<int> [ | 11 0x54, |
12 0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, | 12 0x68, |
13 0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20, | 13 0x65, |
14 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, | 14 0x20, |
15 0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74, | 15 0x71, |
16 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20, | 16 0x75, |
17 0x64, 0x6f, 0x67, 0x2e]; | 17 0x69, |
| 18 0x63, |
| 19 0x6b, |
| 20 0x20, |
| 21 0x62, |
| 22 0x72, |
| 23 0x6f, |
| 24 0x77, |
| 25 0x6e, |
| 26 0x20, |
| 27 0x66, |
| 28 0x6f, |
| 29 0x78, |
| 30 0x20, |
| 31 0x6a, |
| 32 0x75, |
| 33 0x6d, |
| 34 0x70, |
| 35 0x73, |
| 36 0x20, |
| 37 0x6f, |
| 38 0x76, |
| 39 0x65, |
| 40 0x72, |
| 41 0x20, |
| 42 0x74, |
| 43 0x68, |
| 44 0x65, |
| 45 0x20, |
| 46 0x6c, |
| 47 0x61, |
| 48 0x7a, |
| 49 0x79, |
| 50 0x20, |
| 51 0x64, |
| 52 0x6f, |
| 53 0x67, |
| 54 0x2e |
| 55 ]; |
18 | 56 |
19 const String testDanishPhrase = "Quizdeltagerne spiste jordbær med " | 57 const String testDanishPhrase = "Quizdeltagerne spiste jordbær med " |
20 "fløde mens cirkusklovnen Wolther spillede på xylofon."; | 58 "fløde mens cirkusklovnen Wolther spillede på xylofon."; |
21 | 59 |
22 const List<int> testDanishUtf8 = const<int>[ | 60 const List<int> testDanishUtf8 = const <int>[ |
23 0x51, 0x75, 0x69, 0x7a, 0x64, 0x65, 0x6c, 0x74, | 61 0x51, |
24 0x61, 0x67, 0x65, 0x72, 0x6e, 0x65, 0x20, 0x73, | 62 0x75, |
25 0x70, 0x69, 0x73, 0x74, 0x65, 0x20, 0x6a, 0x6f, | 63 0x69, |
26 0x72, 0x64, 0x62, 0xc3, 0xa6, 0x72, 0x20, 0x6d, | 64 0x7a, |
27 0x65, 0x64, 0x20, 0x66, 0x6c, 0xc3, 0xb8, 0x64, | 65 0x64, |
28 0x65, 0x20, 0x6d, 0x65, 0x6e, 0x73, 0x20, 0x63, | 66 0x65, |
29 0x69, 0x72, 0x6b, 0x75, 0x73, 0x6b, 0x6c, 0x6f, | 67 0x6c, |
30 0x76, 0x6e, 0x65, 0x6e, 0x20, 0x57, 0x6f, 0x6c, | 68 0x74, |
31 0x74, 0x68, 0x65, 0x72, 0x20, 0x73, 0x70, 0x69, | 69 0x61, |
32 0x6c, 0x6c, 0x65, 0x64, 0x65, 0x20, 0x70, 0xc3, | 70 0x67, |
33 0xa5, 0x20, 0x78, 0x79, 0x6c, 0x6f, 0x66, 0x6f, | 71 0x65, |
34 0x6e, 0x2e]; | 72 0x72, |
| 73 0x6e, |
| 74 0x65, |
| 75 0x20, |
| 76 0x73, |
| 77 0x70, |
| 78 0x69, |
| 79 0x73, |
| 80 0x74, |
| 81 0x65, |
| 82 0x20, |
| 83 0x6a, |
| 84 0x6f, |
| 85 0x72, |
| 86 0x64, |
| 87 0x62, |
| 88 0xc3, |
| 89 0xa6, |
| 90 0x72, |
| 91 0x20, |
| 92 0x6d, |
| 93 0x65, |
| 94 0x64, |
| 95 0x20, |
| 96 0x66, |
| 97 0x6c, |
| 98 0xc3, |
| 99 0xb8, |
| 100 0x64, |
| 101 0x65, |
| 102 0x20, |
| 103 0x6d, |
| 104 0x65, |
| 105 0x6e, |
| 106 0x73, |
| 107 0x20, |
| 108 0x63, |
| 109 0x69, |
| 110 0x72, |
| 111 0x6b, |
| 112 0x75, |
| 113 0x73, |
| 114 0x6b, |
| 115 0x6c, |
| 116 0x6f, |
| 117 0x76, |
| 118 0x6e, |
| 119 0x65, |
| 120 0x6e, |
| 121 0x20, |
| 122 0x57, |
| 123 0x6f, |
| 124 0x6c, |
| 125 0x74, |
| 126 0x68, |
| 127 0x65, |
| 128 0x72, |
| 129 0x20, |
| 130 0x73, |
| 131 0x70, |
| 132 0x69, |
| 133 0x6c, |
| 134 0x6c, |
| 135 0x65, |
| 136 0x64, |
| 137 0x65, |
| 138 0x20, |
| 139 0x70, |
| 140 0xc3, |
| 141 0xa5, |
| 142 0x20, |
| 143 0x78, |
| 144 0x79, |
| 145 0x6c, |
| 146 0x6f, |
| 147 0x66, |
| 148 0x6f, |
| 149 0x6e, |
| 150 0x2e |
| 151 ]; |
35 | 152 |
36 // unusual formatting due to strange editor interaction w/ text direction. | 153 // unusual formatting due to strange editor interaction w/ text direction. |
37 const String | 154 const String testHebrewPhrase = |
38 testHebrewPhrase = "דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה"; | 155 "דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה"; |
39 | 156 |
40 const List<int> testHebrewUtf8 = const<int>[ | 157 const List<int> testHebrewUtf8 = const <int>[ |
41 0xd7, 0x93, 0xd7, 0x92, 0x20, 0xd7, 0xa1, 0xd7, | 158 0xd7, |
42 0xa7, 0xd7, 0xa8, 0xd7, 0x9f, 0x20, 0xd7, 0xa9, | 159 0x93, |
43 0xd7, 0x98, 0x20, 0xd7, 0x91, 0xd7, 0x99, 0xd7, | 160 0xd7, |
44 0x9d, 0x20, 0xd7, 0x9e, 0xd7, 0x90, 0xd7, 0x95, | 161 0x92, |
45 0xd7, 0x9b, 0xd7, 0x96, 0xd7, 0x91, 0x20, 0xd7, | 162 0x20, |
46 0x95, 0xd7, 0x9c, 0xd7, 0xa4, 0xd7, 0xaa, 0xd7, | 163 0xd7, |
47 0xa2, 0x20, 0xd7, 0x9e, 0xd7, 0xa6, 0xd7, 0x90, | 164 0xa1, |
48 0x20, 0xd7, 0x9c, 0xd7, 0x95, 0x20, 0xd7, 0x97, | 165 0xd7, |
49 0xd7, 0x91, 0xd7, 0xa8, 0xd7, 0x94, 0x20, 0xd7, | 166 0xa7, |
50 0x90, 0xd7, 0x99, 0xd7, 0x9a, 0x20, 0xd7, 0x94, | 167 0xd7, |
51 0xd7, 0xa7, 0xd7, 0x9c, 0xd7, 0x99, 0xd7, 0x98, | 168 0xa8, |
52 0xd7, 0x94]; | 169 0xd7, |
| 170 0x9f, |
| 171 0x20, |
| 172 0xd7, |
| 173 0xa9, |
| 174 0xd7, |
| 175 0x98, |
| 176 0x20, |
| 177 0xd7, |
| 178 0x91, |
| 179 0xd7, |
| 180 0x99, |
| 181 0xd7, |
| 182 0x9d, |
| 183 0x20, |
| 184 0xd7, |
| 185 0x9e, |
| 186 0xd7, |
| 187 0x90, |
| 188 0xd7, |
| 189 0x95, |
| 190 0xd7, |
| 191 0x9b, |
| 192 0xd7, |
| 193 0x96, |
| 194 0xd7, |
| 195 0x91, |
| 196 0x20, |
| 197 0xd7, |
| 198 0x95, |
| 199 0xd7, |
| 200 0x9c, |
| 201 0xd7, |
| 202 0xa4, |
| 203 0xd7, |
| 204 0xaa, |
| 205 0xd7, |
| 206 0xa2, |
| 207 0x20, |
| 208 0xd7, |
| 209 0x9e, |
| 210 0xd7, |
| 211 0xa6, |
| 212 0xd7, |
| 213 0x90, |
| 214 0x20, |
| 215 0xd7, |
| 216 0x9c, |
| 217 0xd7, |
| 218 0x95, |
| 219 0x20, |
| 220 0xd7, |
| 221 0x97, |
| 222 0xd7, |
| 223 0x91, |
| 224 0xd7, |
| 225 0xa8, |
| 226 0xd7, |
| 227 0x94, |
| 228 0x20, |
| 229 0xd7, |
| 230 0x90, |
| 231 0xd7, |
| 232 0x99, |
| 233 0xd7, |
| 234 0x9a, |
| 235 0x20, |
| 236 0xd7, |
| 237 0x94, |
| 238 0xd7, |
| 239 0xa7, |
| 240 0xd7, |
| 241 0x9c, |
| 242 0xd7, |
| 243 0x99, |
| 244 0xd7, |
| 245 0x98, |
| 246 0xd7, |
| 247 0x94 |
| 248 ]; |
53 | 249 |
54 const String testRussianPhrase = "Съешь же ещё этих мягких " | 250 const String testRussianPhrase = "Съешь же ещё этих мягких " |
55 "французских булок да выпей чаю"; | 251 "французских булок да выпей чаю"; |
56 | 252 |
57 const List<int> testRussianUtf8 = const<int>[ | 253 const List<int> testRussianUtf8 = const <int>[ |
58 0xd0, 0xa1, 0xd1, 0x8a, 0xd0, 0xb5, 0xd1, 0x88, | 254 0xd0, |
59 0xd1, 0x8c, 0x20, 0xd0, 0xb6, 0xd0, 0xb5, 0x20, | 255 0xa1, |
60 0xd0, 0xb5, 0xd1, 0x89, 0xd1, 0x91, 0x20, 0xd1, | 256 0xd1, |
61 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0xd1, 0x85, 0x20, | 257 0x8a, |
62 0xd0, 0xbc, 0xd1, 0x8f, 0xd0, 0xb3, 0xd0, 0xba, | 258 0xd0, |
63 0xd0, 0xb8, 0xd1, 0x85, 0x20, 0xd1, 0x84, 0xd1, | 259 0xb5, |
64 0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd1, 0x86, 0xd1, | 260 0xd1, |
65 0x83, 0xd0, 0xb7, 0xd1, 0x81, 0xd0, 0xba, 0xd0, | 261 0x88, |
66 0xb8, 0xd1, 0x85, 0x20, 0xd0, 0xb1, 0xd1, 0x83, | 262 0xd1, |
67 0xd0, 0xbb, 0xd0, 0xbe, 0xd0, 0xba, 0x20, 0xd0, | 263 0x8c, |
68 0xb4, 0xd0, 0xb0, 0x20, 0xd0, 0xb2, 0xd1, 0x8b, | 264 0x20, |
69 0xd0, 0xbf, 0xd0, 0xb5, 0xd0, 0xb9, 0x20, 0xd1, | 265 0xd0, |
70 0x87, 0xd0, 0xb0, 0xd1, 0x8e]; | 266 0xb6, |
| 267 0xd0, |
| 268 0xb5, |
| 269 0x20, |
| 270 0xd0, |
| 271 0xb5, |
| 272 0xd1, |
| 273 0x89, |
| 274 0xd1, |
| 275 0x91, |
| 276 0x20, |
| 277 0xd1, |
| 278 0x8d, |
| 279 0xd1, |
| 280 0x82, |
| 281 0xd0, |
| 282 0xb8, |
| 283 0xd1, |
| 284 0x85, |
| 285 0x20, |
| 286 0xd0, |
| 287 0xbc, |
| 288 0xd1, |
| 289 0x8f, |
| 290 0xd0, |
| 291 0xb3, |
| 292 0xd0, |
| 293 0xba, |
| 294 0xd0, |
| 295 0xb8, |
| 296 0xd1, |
| 297 0x85, |
| 298 0x20, |
| 299 0xd1, |
| 300 0x84, |
| 301 0xd1, |
| 302 0x80, |
| 303 0xd0, |
| 304 0xb0, |
| 305 0xd0, |
| 306 0xbd, |
| 307 0xd1, |
| 308 0x86, |
| 309 0xd1, |
| 310 0x83, |
| 311 0xd0, |
| 312 0xb7, |
| 313 0xd1, |
| 314 0x81, |
| 315 0xd0, |
| 316 0xba, |
| 317 0xd0, |
| 318 0xb8, |
| 319 0xd1, |
| 320 0x85, |
| 321 0x20, |
| 322 0xd0, |
| 323 0xb1, |
| 324 0xd1, |
| 325 0x83, |
| 326 0xd0, |
| 327 0xbb, |
| 328 0xd0, |
| 329 0xbe, |
| 330 0xd0, |
| 331 0xba, |
| 332 0x20, |
| 333 0xd0, |
| 334 0xb4, |
| 335 0xd0, |
| 336 0xb0, |
| 337 0x20, |
| 338 0xd0, |
| 339 0xb2, |
| 340 0xd1, |
| 341 0x8b, |
| 342 0xd0, |
| 343 0xbf, |
| 344 0xd0, |
| 345 0xb5, |
| 346 0xd0, |
| 347 0xb9, |
| 348 0x20, |
| 349 0xd1, |
| 350 0x87, |
| 351 0xd0, |
| 352 0xb0, |
| 353 0xd1, |
| 354 0x8e |
| 355 ]; |
71 | 356 |
72 const String testGreekPhrase = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ " | 357 const String testGreekPhrase = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ " |
73 "στὸ χρυσαφὶ ξέφωτο"; | 358 "στὸ χρυσαφὶ ξέφωτο"; |
74 | 359 |
75 const List<int> testGreekUtf8 = const<int>[ | 360 const List<int> testGreekUtf8 = const <int>[ |
76 0xce, 0x93, 0xce, 0xb1, 0xce, 0xb6, 0xce, 0xad, | 361 0xce, |
77 0xce, 0xb5, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce, | 362 0x93, |
78 0xb1, 0xe1, 0xbd, 0xb6, 0x20, 0xce, 0xbc, 0xcf, | 363 0xce, |
79 0x85, 0xcf, 0x81, 0xcf, 0x84, 0xce, 0xb9, 0xe1, | 364 0xb1, |
80 0xbd, 0xb2, 0xcf, 0x82, 0x20, 0xce, 0xb4, 0xe1, | 365 0xce, |
81 0xbd, 0xb2, 0xce, 0xbd, 0x20, 0xce, 0xb8, 0xe1, | 366 0xb6, |
82 0xbd, 0xb0, 0x20, 0xce, 0xb2, 0xcf, 0x81, 0xe1, | 367 0xce, |
83 0xbf, 0xb6, 0x20, 0xcf, 0x80, 0xce, 0xb9, 0xe1, | 368 0xad, |
84 0xbd, 0xb0, 0x20, 0xcf, 0x83, 0xcf, 0x84, 0xe1, | 369 0xce, |
85 0xbd, 0xb8, 0x20, 0xcf, 0x87, 0xcf, 0x81, 0xcf, | 370 0xb5, |
86 0x85, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x86, 0xe1, | 371 0xcf, |
87 0xbd, 0xb6, 0x20, 0xce, 0xbe, 0xce, 0xad, 0xcf, | 372 0x82, |
88 0x86, 0xcf, 0x89, 0xcf, 0x84, 0xce, 0xbf]; | 373 0x20, |
| 374 0xce, |
| 375 0xba, |
| 376 0xce, |
| 377 0xb1, |
| 378 0xe1, |
| 379 0xbd, |
| 380 0xb6, |
| 381 0x20, |
| 382 0xce, |
| 383 0xbc, |
| 384 0xcf, |
| 385 0x85, |
| 386 0xcf, |
| 387 0x81, |
| 388 0xcf, |
| 389 0x84, |
| 390 0xce, |
| 391 0xb9, |
| 392 0xe1, |
| 393 0xbd, |
| 394 0xb2, |
| 395 0xcf, |
| 396 0x82, |
| 397 0x20, |
| 398 0xce, |
| 399 0xb4, |
| 400 0xe1, |
| 401 0xbd, |
| 402 0xb2, |
| 403 0xce, |
| 404 0xbd, |
| 405 0x20, |
| 406 0xce, |
| 407 0xb8, |
| 408 0xe1, |
| 409 0xbd, |
| 410 0xb0, |
| 411 0x20, |
| 412 0xce, |
| 413 0xb2, |
| 414 0xcf, |
| 415 0x81, |
| 416 0xe1, |
| 417 0xbf, |
| 418 0xb6, |
| 419 0x20, |
| 420 0xcf, |
| 421 0x80, |
| 422 0xce, |
| 423 0xb9, |
| 424 0xe1, |
| 425 0xbd, |
| 426 0xb0, |
| 427 0x20, |
| 428 0xcf, |
| 429 0x83, |
| 430 0xcf, |
| 431 0x84, |
| 432 0xe1, |
| 433 0xbd, |
| 434 0xb8, |
| 435 0x20, |
| 436 0xcf, |
| 437 0x87, |
| 438 0xcf, |
| 439 0x81, |
| 440 0xcf, |
| 441 0x85, |
| 442 0xcf, |
| 443 0x83, |
| 444 0xce, |
| 445 0xb1, |
| 446 0xcf, |
| 447 0x86, |
| 448 0xe1, |
| 449 0xbd, |
| 450 0xb6, |
| 451 0x20, |
| 452 0xce, |
| 453 0xbe, |
| 454 0xce, |
| 455 0xad, |
| 456 0xcf, |
| 457 0x86, |
| 458 0xcf, |
| 459 0x89, |
| 460 0xcf, |
| 461 0x84, |
| 462 0xce, |
| 463 0xbf |
| 464 ]; |
89 | 465 |
90 const String testKatakanaPhrase = "イロハニホヘト チリヌルヲ ワカヨタレソ " | 466 const String testKatakanaPhrase = "イロハニホヘト チリヌルヲ ワカヨタレソ " |
91 "ツネナラム ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン"; | 467 "ツネナラム ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン"; |
92 | 468 |
93 const List<int> testKatakanaUtf8 = const<int>[ | 469 const List<int> testKatakanaUtf8 = const <int>[ |
94 0xe3, 0x82, 0xa4, 0xe3, 0x83, 0xad, 0xe3, 0x83, | 470 0xe3, |
95 0x8f, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0x9b, 0xe3, | 471 0x82, |
96 0x83, 0x98, 0xe3, 0x83, 0x88, 0x20, 0xe3, 0x83, | 472 0xa4, |
97 0x81, 0xe3, 0x83, 0xaa, 0xe3, 0x83, 0x8c, 0xe3, | 473 0xe3, |
98 0x83, 0xab, 0xe3, 0x83, 0xb2, 0x20, 0xe3, 0x83, | 474 0x83, |
99 0xaf, 0xe3, 0x82, 0xab, 0xe3, 0x83, 0xa8, 0xe3, | 475 0xad, |
100 0x82, 0xbf, 0xe3, 0x83, 0xac, 0xe3, 0x82, 0xbd, | 476 0xe3, |
101 0x20, 0xe3, 0x83, 0x84, 0xe3, 0x83, 0x8d, 0xe3, | 477 0x83, |
102 0x83, 0x8a, 0xe3, 0x83, 0xa9, 0xe3, 0x83, 0xa0, | 478 0x8f, |
103 0x20, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb0, 0xe3, | 479 0xe3, |
104 0x83, 0x8e, 0xe3, 0x82, 0xaa, 0xe3, 0x82, 0xaf, | 480 0x83, |
105 0xe3, 0x83, 0xa4, 0xe3, 0x83, 0x9e, 0x20, 0xe3, | 481 0x8b, |
106 0x82, 0xb1, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xb3, | 482 0xe3, |
107 0xe3, 0x82, 0xa8, 0xe3, 0x83, 0x86, 0x20, 0xe3, | 483 0x83, |
108 0x82, 0xa2, 0xe3, 0x82, 0xb5, 0xe3, 0x82, 0xad, | 484 0x9b, |
109 0xe3, 0x83, 0xa6, 0xe3, 0x83, 0xa1, 0xe3, 0x83, | 485 0xe3, |
110 0x9f, 0xe3, 0x82, 0xb7, 0x20, 0xe3, 0x83, 0xb1, | 486 0x83, |
111 0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa2, 0xe3, 0x82, | 487 0x98, |
112 0xbb, 0xe3, 0x82, 0xb9, 0xe3, 0x83, 0xb3]; | 488 0xe3, |
| 489 0x83, |
| 490 0x88, |
| 491 0x20, |
| 492 0xe3, |
| 493 0x83, |
| 494 0x81, |
| 495 0xe3, |
| 496 0x83, |
| 497 0xaa, |
| 498 0xe3, |
| 499 0x83, |
| 500 0x8c, |
| 501 0xe3, |
| 502 0x83, |
| 503 0xab, |
| 504 0xe3, |
| 505 0x83, |
| 506 0xb2, |
| 507 0x20, |
| 508 0xe3, |
| 509 0x83, |
| 510 0xaf, |
| 511 0xe3, |
| 512 0x82, |
| 513 0xab, |
| 514 0xe3, |
| 515 0x83, |
| 516 0xa8, |
| 517 0xe3, |
| 518 0x82, |
| 519 0xbf, |
| 520 0xe3, |
| 521 0x83, |
| 522 0xac, |
| 523 0xe3, |
| 524 0x82, |
| 525 0xbd, |
| 526 0x20, |
| 527 0xe3, |
| 528 0x83, |
| 529 0x84, |
| 530 0xe3, |
| 531 0x83, |
| 532 0x8d, |
| 533 0xe3, |
| 534 0x83, |
| 535 0x8a, |
| 536 0xe3, |
| 537 0x83, |
| 538 0xa9, |
| 539 0xe3, |
| 540 0x83, |
| 541 0xa0, |
| 542 0x20, |
| 543 0xe3, |
| 544 0x82, |
| 545 0xa6, |
| 546 0xe3, |
| 547 0x83, |
| 548 0xb0, |
| 549 0xe3, |
| 550 0x83, |
| 551 0x8e, |
| 552 0xe3, |
| 553 0x82, |
| 554 0xaa, |
| 555 0xe3, |
| 556 0x82, |
| 557 0xaf, |
| 558 0xe3, |
| 559 0x83, |
| 560 0xa4, |
| 561 0xe3, |
| 562 0x83, |
| 563 0x9e, |
| 564 0x20, |
| 565 0xe3, |
| 566 0x82, |
| 567 0xb1, |
| 568 0xe3, |
| 569 0x83, |
| 570 0x95, |
| 571 0xe3, |
| 572 0x82, |
| 573 0xb3, |
| 574 0xe3, |
| 575 0x82, |
| 576 0xa8, |
| 577 0xe3, |
| 578 0x83, |
| 579 0x86, |
| 580 0x20, |
| 581 0xe3, |
| 582 0x82, |
| 583 0xa2, |
| 584 0xe3, |
| 585 0x82, |
| 586 0xb5, |
| 587 0xe3, |
| 588 0x82, |
| 589 0xad, |
| 590 0xe3, |
| 591 0x83, |
| 592 0xa6, |
| 593 0xe3, |
| 594 0x83, |
| 595 0xa1, |
| 596 0xe3, |
| 597 0x83, |
| 598 0x9f, |
| 599 0xe3, |
| 600 0x82, |
| 601 0xb7, |
| 602 0x20, |
| 603 0xe3, |
| 604 0x83, |
| 605 0xb1, |
| 606 0xe3, |
| 607 0x83, |
| 608 0x92, |
| 609 0xe3, |
| 610 0x83, |
| 611 0xa2, |
| 612 0xe3, |
| 613 0x82, |
| 614 0xbb, |
| 615 0xe3, |
| 616 0x82, |
| 617 0xb9, |
| 618 0xe3, |
| 619 0x83, |
| 620 0xb3 |
| 621 ]; |
113 | 622 |
114 void main() { | 623 void main() { |
115 testUtf8bytesToCodepoints(); | 624 testUtf8bytesToCodepoints(); |
116 testUtf8BytesToString(); | 625 testUtf8BytesToString(); |
117 testEncodeToUtf8(); | 626 testEncodeToUtf8(); |
118 } | 627 } |
119 | 628 |
120 List<int> encodeUtf8(String str) => UTF8.encode(str); | 629 List<int> encodeUtf8(String str) => UTF8.encode(str); |
121 List<int> utf8ToRunes(List<int> codeUnits) { | 630 List<int> utf8ToRunes(List<int> codeUnits) { |
122 return UTF8.decode(codeUnits, allowMalformed: true).runes.toList(); | 631 return UTF8.decode(codeUnits, allowMalformed: true).runes.toList(); |
123 } | 632 } |
| 633 |
124 String decodeUtf8(List<int> codeUnits) => UTF8.decode(codeUnits); | 634 String decodeUtf8(List<int> codeUnits) => UTF8.decode(codeUnits); |
125 | 635 |
126 void testEncodeToUtf8() { | 636 void testEncodeToUtf8() { |
127 Expect.listEquals(testEnglishUtf8, encodeUtf8(testEnglishPhrase), | 637 Expect.listEquals( |
128 "english to utf8"); | 638 testEnglishUtf8, encodeUtf8(testEnglishPhrase), "english to utf8"); |
129 | 639 |
130 Expect.listEquals(testDanishUtf8, encodeUtf8(testDanishPhrase), | 640 Expect.listEquals( |
131 "encode danish to utf8"); | 641 testDanishUtf8, encodeUtf8(testDanishPhrase), "encode danish to utf8"); |
132 | 642 |
133 Expect.listEquals(testHebrewUtf8, encodeUtf8(testHebrewPhrase), | 643 Expect.listEquals( |
134 "Hebrew to utf8"); | 644 testHebrewUtf8, encodeUtf8(testHebrewPhrase), "Hebrew to utf8"); |
135 | 645 |
136 Expect.listEquals(testRussianUtf8, encodeUtf8(testRussianPhrase), | 646 Expect.listEquals( |
137 "Russian to utf8"); | 647 testRussianUtf8, encodeUtf8(testRussianPhrase), "Russian to utf8"); |
138 | 648 |
139 Expect.listEquals(testGreekUtf8, encodeUtf8(testGreekPhrase), | 649 Expect.listEquals( |
140 "Greek to utf8"); | 650 testGreekUtf8, encodeUtf8(testGreekPhrase), "Greek to utf8"); |
141 | 651 |
142 Expect.listEquals(testKatakanaUtf8, encodeUtf8(testKatakanaPhrase), | 652 Expect.listEquals( |
143 "Katakana to utf8"); | 653 testKatakanaUtf8, encodeUtf8(testKatakanaPhrase), "Katakana to utf8"); |
144 } | 654 } |
145 | 655 |
146 void testUtf8bytesToCodepoints() { | 656 void testUtf8bytesToCodepoints() { |
147 Expect.listEquals([954, 972, 963, 956, 949], | 657 Expect.listEquals( |
148 utf8ToRunes([0xce, 0xba, 0xcf, 0x8c, 0xcf, | 658 [954, 972, 963, 956, 949], |
149 0x83, 0xce, 0xbc, 0xce, 0xb5]), "κόσμε"); | 659 utf8ToRunes([0xce, 0xba, 0xcf, 0x8c, 0xcf, 0x83, 0xce, 0xbc, 0xce, 0xb5]), |
| 660 "κόσμε"); |
150 | 661 |
151 // boundary conditions: First possible sequence of a certain length | 662 // boundary conditions: First possible sequence of a certain length |
152 Expect.listEquals([], utf8ToRunes([]), "no input"); | 663 Expect.listEquals([], utf8ToRunes([]), "no input"); |
153 Expect.listEquals([0x0], utf8ToRunes([0x0]), "0"); | 664 Expect.listEquals([0x0], utf8ToRunes([0x0]), "0"); |
154 Expect.listEquals([0x80], utf8ToRunes([0xc2, 0x80]), "80"); | 665 Expect.listEquals([0x80], utf8ToRunes([0xc2, 0x80]), "80"); |
155 Expect.listEquals([0x800], | 666 Expect.listEquals([0x800], utf8ToRunes([0xe0, 0xa0, 0x80]), "800"); |
156 utf8ToRunes([0xe0, 0xa0, 0x80]), "800"); | 667 Expect.listEquals([0x10000], utf8ToRunes([0xf0, 0x90, 0x80, 0x80]), "10000"); |
157 Expect.listEquals([0x10000], | 668 Expect.listEquals([ |
158 utf8ToRunes([0xf0, 0x90, 0x80, 0x80]), "10000"); | 669 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
159 Expect.listEquals( | 670 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
160 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 671 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
161 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 672 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
162 UNICODE_REPLACEMENT_CHARACTER_RUNE], | 673 UNICODE_REPLACEMENT_CHARACTER_RUNE |
163 utf8ToRunes([0xf8, 0x88, 0x80, 0x80, 0x80]), "200000"); | 674 ], utf8ToRunes([0xf8, 0x88, 0x80, 0x80, 0x80]), "200000"); |
164 Expect.listEquals( | 675 Expect.listEquals([ |
165 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 676 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
166 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 677 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
167 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE], | 678 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
168 utf8ToRunes([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]), | 679 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
169 "4000000"); | 680 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 681 UNICODE_REPLACEMENT_CHARACTER_RUNE |
| 682 ], utf8ToRunes([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]), "4000000"); |
170 | 683 |
171 // boundary conditions: Last possible sequence of a certain length | 684 // boundary conditions: Last possible sequence of a certain length |
172 Expect.listEquals([0x7f], utf8ToRunes([0x7f]), "7f"); | 685 Expect.listEquals([0x7f], utf8ToRunes([0x7f]), "7f"); |
173 Expect.listEquals([0x7ff], utf8ToRunes([0xdf, 0xbf]), "7ff"); | 686 Expect.listEquals([0x7ff], utf8ToRunes([0xdf, 0xbf]), "7ff"); |
174 Expect.listEquals([0xffff], | 687 Expect.listEquals([0xffff], utf8ToRunes([0xef, 0xbf, 0xbf]), "ffff"); |
175 utf8ToRunes([0xef, 0xbf, 0xbf]), "ffff"); | 688 Expect.listEquals([ |
176 Expect.listEquals( | 689 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
177 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 690 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
178 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE], | 691 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
179 utf8ToRunes([0xf7, 0xbf, 0xbf, 0xbf]), "1fffff"); | 692 UNICODE_REPLACEMENT_CHARACTER_RUNE |
180 Expect.listEquals( | 693 ], utf8ToRunes([0xf7, 0xbf, 0xbf, 0xbf]), "1fffff"); |
181 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 694 Expect.listEquals([ |
182 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 695 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
183 UNICODE_REPLACEMENT_CHARACTER_RUNE], | 696 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
184 utf8ToRunes([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]), "3ffffff"); | 697 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
185 Expect.listEquals( | 698 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
186 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 699 UNICODE_REPLACEMENT_CHARACTER_RUNE |
187 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 700 ], utf8ToRunes([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]), "3ffffff"); |
188 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE], | 701 Expect.listEquals([ |
189 utf8ToRunes([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]), | 702 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
190 "4000000"); | 703 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 704 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 705 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 706 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 707 UNICODE_REPLACEMENT_CHARACTER_RUNE |
| 708 ], utf8ToRunes([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]), "4000000"); |
191 | 709 |
192 // other boundary conditions | 710 // other boundary conditions |
193 Expect.listEquals([0xd7ff], | 711 Expect.listEquals([0xd7ff], utf8ToRunes([0xed, 0x9f, 0xbf]), "d7ff"); |
194 utf8ToRunes([0xed, 0x9f, 0xbf]), "d7ff"); | 712 Expect.listEquals([0xe000], utf8ToRunes([0xee, 0x80, 0x80]), "e000"); |
195 Expect.listEquals([0xe000], | |
196 utf8ToRunes([0xee, 0x80, 0x80]), "e000"); | |
197 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 713 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
198 utf8ToRunes([0xef, 0xbf, 0xbd]), "fffd"); | 714 utf8ToRunes([0xef, 0xbf, 0xbd]), "fffd"); |
199 Expect.listEquals([0x10ffff], | 715 Expect |
200 utf8ToRunes([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff"); | 716 .listEquals([0x10ffff], utf8ToRunes([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff"); |
201 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 717 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
202 utf8ToRunes([0xf4, 0x90, 0x80, 0x80]), "110000"); | 718 utf8ToRunes([0xf4, 0x90, 0x80, 0x80]), "110000"); |
203 | 719 |
204 // unexpected continuation bytes | 720 // unexpected continuation bytes |
205 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 721 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], utf8ToRunes([0x80]), |
206 utf8ToRunes([0x80]), "80 => replacement character"); | 722 "80 => replacement character"); |
207 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 723 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], utf8ToRunes([0xbf]), |
208 utf8ToRunes([0xbf]), "bf => replacement character"); | 724 "bf => replacement character"); |
209 | 725 |
210 List<int> allContinuationBytes = <int>[]; | 726 List<int> allContinuationBytes = <int>[]; |
211 List<int> matchingReplacementChars = <int>[]; | 727 List<int> matchingReplacementChars = <int>[]; |
212 for (int i = 0x80; i < 0xc0; i++) { | 728 for (int i = 0x80; i < 0xc0; i++) { |
213 allContinuationBytes.add(i); | 729 allContinuationBytes.add(i); |
214 matchingReplacementChars.add(UNICODE_REPLACEMENT_CHARACTER_RUNE); | 730 matchingReplacementChars.add(UNICODE_REPLACEMENT_CHARACTER_RUNE); |
215 } | 731 } |
216 Expect.listEquals(matchingReplacementChars, | 732 Expect.listEquals(matchingReplacementChars, utf8ToRunes(allContinuationBytes), |
217 utf8ToRunes(allContinuationBytes), | |
218 "80 - bf => replacement character x 64"); | 733 "80 - bf => replacement character x 64"); |
219 | 734 |
220 List<int> allFirstTwoByteSeq = <int>[]; | 735 List<int> allFirstTwoByteSeq = <int>[]; |
221 matchingReplacementChars = <int>[]; | 736 matchingReplacementChars = <int>[]; |
222 for (int i = 0xc0; i < 0xe0; i++) { | 737 for (int i = 0xc0; i < 0xe0; i++) { |
223 allFirstTwoByteSeq.addAll([i, 0x20]); | 738 allFirstTwoByteSeq.addAll([i, 0x20]); |
224 matchingReplacementChars.addAll( | 739 matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]); |
225 [UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]); | 740 } |
226 } | 741 Expect.listEquals(matchingReplacementChars, utf8ToRunes(allFirstTwoByteSeq), |
227 Expect.listEquals(matchingReplacementChars, | |
228 utf8ToRunes(allFirstTwoByteSeq), | |
229 "c0 - df + space => replacement character + space x 32"); | 742 "c0 - df + space => replacement character + space x 32"); |
230 | 743 |
231 List<int> allFirstThreeByteSeq = <int>[]; | 744 List<int> allFirstThreeByteSeq = <int>[]; |
232 matchingReplacementChars = <int>[]; | 745 matchingReplacementChars = <int>[]; |
233 for (int i = 0xe0; i < 0xf0; i++) { | 746 for (int i = 0xe0; i < 0xf0; i++) { |
234 allFirstThreeByteSeq.addAll([i, 0x20]); | 747 allFirstThreeByteSeq.addAll([i, 0x20]); |
235 matchingReplacementChars.addAll( | 748 matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]); |
236 [UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]); | 749 } |
237 } | 750 Expect.listEquals(matchingReplacementChars, utf8ToRunes(allFirstThreeByteSeq), |
238 Expect.listEquals(matchingReplacementChars, | |
239 utf8ToRunes(allFirstThreeByteSeq), | |
240 "e0 - ef + space => replacement character x 16"); | 751 "e0 - ef + space => replacement character x 16"); |
241 | 752 |
242 List<int> allFirstFourByteSeq = <int>[]; | 753 List<int> allFirstFourByteSeq = <int>[]; |
243 matchingReplacementChars = <int>[]; | 754 matchingReplacementChars = <int>[]; |
244 for (int i = 0xf0; i < 0xf8; i++) { | 755 for (int i = 0xf0; i < 0xf8; i++) { |
245 allFirstFourByteSeq.addAll([i, 0x20]); | 756 allFirstFourByteSeq.addAll([i, 0x20]); |
246 matchingReplacementChars.addAll( | 757 matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]); |
247 [UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]); | 758 } |
248 } | 759 Expect.listEquals(matchingReplacementChars, utf8ToRunes(allFirstFourByteSeq), |
249 Expect.listEquals(matchingReplacementChars, | |
250 utf8ToRunes(allFirstFourByteSeq), | |
251 "f0 - f7 + space => replacement character x 8"); | 760 "f0 - f7 + space => replacement character x 8"); |
252 | 761 |
253 List<int> allFirstFiveByteSeq = <int>[]; | 762 List<int> allFirstFiveByteSeq = <int>[]; |
254 matchingReplacementChars = <int>[]; | 763 matchingReplacementChars = <int>[]; |
255 for (int i = 0xf8; i < 0xfc; i++) { | 764 for (int i = 0xf8; i < 0xfc; i++) { |
256 allFirstFiveByteSeq.addAll([i, 0x20]); | 765 allFirstFiveByteSeq.addAll([i, 0x20]); |
257 matchingReplacementChars.addAll( | 766 matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]); |
258 [UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]); | 767 } |
259 } | 768 Expect.listEquals(matchingReplacementChars, utf8ToRunes(allFirstFiveByteSeq), |
260 Expect.listEquals(matchingReplacementChars, | |
261 utf8ToRunes(allFirstFiveByteSeq), | |
262 "f8 - fb + space => replacement character x 4"); | 769 "f8 - fb + space => replacement character x 4"); |
263 | 770 |
264 List<int> allFirstSixByteSeq = <int>[]; | 771 List<int> allFirstSixByteSeq = <int>[]; |
265 matchingReplacementChars = <int>[]; | 772 matchingReplacementChars = <int>[]; |
266 for (int i = 0xfc; i < 0xfe; i++) { | 773 for (int i = 0xfc; i < 0xfe; i++) { |
267 allFirstSixByteSeq.addAll([i, 0x20]); | 774 allFirstSixByteSeq.addAll([i, 0x20]); |
268 matchingReplacementChars.addAll( | 775 matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]); |
269 [UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]); | 776 } |
270 } | 777 Expect.listEquals(matchingReplacementChars, utf8ToRunes(allFirstSixByteSeq), |
271 Expect.listEquals(matchingReplacementChars, | |
272 utf8ToRunes(allFirstSixByteSeq), | |
273 "fc - fd + space => replacement character x 2"); | 778 "fc - fd + space => replacement character x 2"); |
274 | 779 |
275 // Sequences with last continuation byte missing | 780 // Sequences with last continuation byte missing |
276 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 781 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], utf8ToRunes([0xc2]), |
277 utf8ToRunes([0xc2]), | |
278 "2-byte sequence with last byte missing"); | 782 "2-byte sequence with last byte missing"); |
279 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 783 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
280 utf8ToRunes([0xe0, 0x80]), | 784 utf8ToRunes([0xe0, 0x80]), "3-byte sequence with last byte missing"); |
281 "3-byte sequence with last byte missing"); | 785 Expect.listEquals( |
282 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 786 [UNICODE_REPLACEMENT_CHARACTER_RUNE], |
283 utf8ToRunes([0xf0, 0x80, 0x80]), | 787 utf8ToRunes([0xf0, 0x80, 0x80]), |
284 "4-byte sequence with last byte missing"); | 788 "4-byte sequence with last byte missing"); |
285 Expect.listEquals( | 789 Expect.listEquals([ |
286 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 790 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
287 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE], | 791 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
288 utf8ToRunes([0xf8, 0x88, 0x80, 0x80]), | 792 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 793 UNICODE_REPLACEMENT_CHARACTER_RUNE |
| 794 ], utf8ToRunes([0xf8, 0x88, 0x80, 0x80]), |
289 "5-byte sequence with last byte missing"); | 795 "5-byte sequence with last byte missing"); |
290 Expect.listEquals( | 796 Expect.listEquals([ |
291 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 797 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
292 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 798 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
293 UNICODE_REPLACEMENT_CHARACTER_RUNE], | 799 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
294 utf8ToRunes([0xfc, 0x80, 0x80, 0x80, 0x80]), | 800 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 801 UNICODE_REPLACEMENT_CHARACTER_RUNE |
| 802 ], utf8ToRunes([0xfc, 0x80, 0x80, 0x80, 0x80]), |
295 "6-byte sequence with last byte missing"); | 803 "6-byte sequence with last byte missing"); |
296 | 804 |
297 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 805 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], utf8ToRunes([0xdf]), |
298 utf8ToRunes([0xdf]), | |
299 "2-byte sequence with last byte missing (hi)"); | 806 "2-byte sequence with last byte missing (hi)"); |
300 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 807 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
301 utf8ToRunes([0xef, 0xbf]), | 808 utf8ToRunes([0xef, 0xbf]), "3-byte sequence with last byte missing (hi)"); |
302 "3-byte sequence with last byte missing (hi)"); | 809 Expect.listEquals([ |
303 Expect.listEquals( | 810 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
304 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 811 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
305 UNICODE_REPLACEMENT_CHARACTER_RUNE], | 812 UNICODE_REPLACEMENT_CHARACTER_RUNE |
306 utf8ToRunes([0xf7, 0xbf, 0xbf]), | 813 ], utf8ToRunes([0xf7, 0xbf, 0xbf]), |
307 "4-byte sequence with last byte missing (hi)"); | 814 "4-byte sequence with last byte missing (hi)"); |
308 Expect.listEquals( | 815 Expect.listEquals([ |
309 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 816 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
310 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE], | 817 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
311 utf8ToRunes([0xfb, 0xbf, 0xbf, 0xbf]), | 818 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 819 UNICODE_REPLACEMENT_CHARACTER_RUNE |
| 820 ], utf8ToRunes([0xfb, 0xbf, 0xbf, 0xbf]), |
312 "5-byte sequence with last byte missing (hi)"); | 821 "5-byte sequence with last byte missing (hi)"); |
313 Expect.listEquals( | 822 Expect.listEquals([ |
314 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 823 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
315 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 824 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
316 UNICODE_REPLACEMENT_CHARACTER_RUNE], | 825 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
317 utf8ToRunes([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]), | 826 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 827 UNICODE_REPLACEMENT_CHARACTER_RUNE |
| 828 ], utf8ToRunes([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]), |
318 "6-byte sequence with last byte missing (hi)"); | 829 "6-byte sequence with last byte missing (hi)"); |
319 | 830 |
320 // Concatenation of incomplete sequences | 831 // Concatenation of incomplete sequences |
321 Expect.listEquals( | 832 Expect.listEquals( |
322 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 833 [ |
323 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 834 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
324 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 835 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
325 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 836 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
326 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 837 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
327 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 838 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
328 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 839 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
329 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 840 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
330 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 841 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
331 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 842 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
332 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 843 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
333 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 844 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
334 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE], | 845 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
335 utf8ToRunes( | 846 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
336 [ 0xc2, | 847 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
337 0xe0, 0x80, | 848 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
338 0xf0, 0x80, 0x80, | 849 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
339 0xf8, 0x88, 0x80, 0x80, | 850 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
340 0xfc, 0x80, 0x80, 0x80, 0x80, | 851 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
341 0xdf, | 852 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
342 0xef, 0xbf, | 853 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
343 0xf7, 0xbf, 0xbf, | 854 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
344 0xfb, 0xbf, 0xbf, 0xbf, | 855 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
345 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ]), | 856 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
346 "Concatenation of incomplete sequences"); | 857 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 858 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 859 UNICODE_REPLACEMENT_CHARACTER_RUNE |
| 860 ], |
| 861 utf8ToRunes([ |
| 862 0xc2, |
| 863 0xe0, |
| 864 0x80, |
| 865 0xf0, |
| 866 0x80, |
| 867 0x80, |
| 868 0xf8, |
| 869 0x88, |
| 870 0x80, |
| 871 0x80, |
| 872 0xfc, |
| 873 0x80, |
| 874 0x80, |
| 875 0x80, |
| 876 0x80, |
| 877 0xdf, |
| 878 0xef, |
| 879 0xbf, |
| 880 0xf7, |
| 881 0xbf, |
| 882 0xbf, |
| 883 0xfb, |
| 884 0xbf, |
| 885 0xbf, |
| 886 0xbf, |
| 887 0xfd, |
| 888 0xbf, |
| 889 0xbf, |
| 890 0xbf, |
| 891 0xbf |
| 892 ]), |
| 893 "Concatenation of incomplete sequences"); |
347 | 894 |
348 // Impossible bytes | 895 // Impossible bytes |
349 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 896 Expect.listEquals( |
350 utf8ToRunes([0xfe]), "fe"); | 897 [UNICODE_REPLACEMENT_CHARACTER_RUNE], utf8ToRunes([0xfe]), "fe"); |
351 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 898 Expect.listEquals( |
352 utf8ToRunes([0xff]), "ff"); | 899 [UNICODE_REPLACEMENT_CHARACTER_RUNE], utf8ToRunes([0xff]), "ff"); |
353 Expect.listEquals([ | 900 Expect.listEquals([ |
354 UNICODE_REPLACEMENT_CHARACTER_RUNE, | 901 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
355 UNICODE_REPLACEMENT_CHARACTER_RUNE, | 902 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
356 UNICODE_REPLACEMENT_CHARACTER_RUNE, | 903 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
357 UNICODE_REPLACEMENT_CHARACTER_RUNE], | 904 UNICODE_REPLACEMENT_CHARACTER_RUNE |
358 utf8ToRunes([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff"); | 905 ], utf8ToRunes([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff"); |
359 | 906 |
360 // Overlong sequences | 907 // Overlong sequences |
361 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 908 Expect.listEquals( |
362 utf8ToRunes([0xc0, 0xaf]), "c0 af"); | 909 [UNICODE_REPLACEMENT_CHARACTER_RUNE], utf8ToRunes([0xc0, 0xaf]), "c0 af"); |
363 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 910 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
364 utf8ToRunes([0xe0, 0x80, 0xaf]), "e0 80 af"); | 911 utf8ToRunes([0xe0, 0x80, 0xaf]), "e0 80 af"); |
365 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 912 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
366 utf8ToRunes([0xf0, 0x80, 0x80, 0xaf]), "f0 80 80 af"); | 913 utf8ToRunes([0xf0, 0x80, 0x80, 0xaf]), "f0 80 80 af"); |
367 Expect.listEquals( | 914 Expect.listEquals([ |
368 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 915 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
369 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 916 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
370 UNICODE_REPLACEMENT_CHARACTER_RUNE], | 917 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
371 utf8ToRunes([0xf8, 0x80, 0x80, 0x80, 0xaf]), "f8 80 80 80 af"); | 918 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
372 Expect.listEquals( | 919 UNICODE_REPLACEMENT_CHARACTER_RUNE |
373 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 920 ], utf8ToRunes([0xf8, 0x80, 0x80, 0x80, 0xaf]), "f8 80 80 80 af"); |
374 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 921 Expect.listEquals([ |
375 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE], | 922 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
376 utf8ToRunes([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]), | 923 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
377 "fc 80 80 80 80 af"); | 924 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
378 | 925 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
379 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 926 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
380 utf8ToRunes([0xc1, 0xbf]), "c1 bf"); | 927 UNICODE_REPLACEMENT_CHARACTER_RUNE |
| 928 ], utf8ToRunes([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]), "fc 80 80 80 80 af"); |
| 929 |
| 930 Expect.listEquals( |
| 931 [UNICODE_REPLACEMENT_CHARACTER_RUNE], utf8ToRunes([0xc1, 0xbf]), "c1 bf"); |
381 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 932 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
382 utf8ToRunes([0xe0, 0x9f, 0xbf]), "e0 9f bf"); | 933 utf8ToRunes([0xe0, 0x9f, 0xbf]), "e0 9f bf"); |
383 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 934 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
384 utf8ToRunes([0xf0, 0x8f, 0xbf, 0xbf]), "f0 8f bf bf"); | 935 utf8ToRunes([0xf0, 0x8f, 0xbf, 0xbf]), "f0 8f bf bf"); |
385 Expect.listEquals( | 936 Expect.listEquals([ |
386 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 937 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
387 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 938 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
388 UNICODE_REPLACEMENT_CHARACTER_RUNE], | 939 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
389 utf8ToRunes([0xf8, 0x87, 0xbf, 0xbf, 0xbf]), "f8 87 bf bf bf"); | 940 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
390 Expect.listEquals( | 941 UNICODE_REPLACEMENT_CHARACTER_RUNE |
391 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 942 ], utf8ToRunes([0xf8, 0x87, 0xbf, 0xbf, 0xbf]), "f8 87 bf bf bf"); |
392 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 943 Expect.listEquals([ |
393 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE], | 944 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
394 utf8ToRunes([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]), | 945 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
395 "fc 83 bf bf bf bf"); | 946 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
396 | 947 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
397 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 948 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
398 utf8ToRunes([0xc0, 0x80]), "c0 80"); | 949 UNICODE_REPLACEMENT_CHARACTER_RUNE |
| 950 ], utf8ToRunes([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]), "fc 83 bf bf bf bf"); |
| 951 |
| 952 Expect.listEquals( |
| 953 [UNICODE_REPLACEMENT_CHARACTER_RUNE], utf8ToRunes([0xc0, 0x80]), "c0 80"); |
399 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 954 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
400 utf8ToRunes([0xe0, 0x80, 0x80]), "e0 80 80"); | 955 utf8ToRunes([0xe0, 0x80, 0x80]), "e0 80 80"); |
401 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], | 956 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
402 utf8ToRunes([0xf0, 0x80, 0x80, 0x80]), "f0 80 80 80"); | 957 utf8ToRunes([0xf0, 0x80, 0x80, 0x80]), "f0 80 80 80"); |
403 Expect.listEquals( | 958 Expect.listEquals([ |
404 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 959 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
405 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 960 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
406 UNICODE_REPLACEMENT_CHARACTER_RUNE], | 961 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
407 utf8ToRunes([0xf8, 0x80, 0x80, 0x80, 0x80]), "f8 80 80 80 80"); | 962 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
408 Expect.listEquals( | 963 UNICODE_REPLACEMENT_CHARACTER_RUNE |
409 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 964 ], utf8ToRunes([0xf8, 0x80, 0x80, 0x80, 0x80]), "f8 80 80 80 80"); |
410 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, | 965 Expect.listEquals([ |
411 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE], | 966 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
412 utf8ToRunes([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]), | 967 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
413 "fc 80 80 80 80 80"); | 968 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 969 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 970 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 971 UNICODE_REPLACEMENT_CHARACTER_RUNE |
| 972 ], utf8ToRunes([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]), "fc 80 80 80 80 80"); |
414 | 973 |
415 // Other illegal code positions (???) | 974 // Other illegal code positions (???) |
416 Expect.listEquals([0xfffe], utf8ToRunes([0xef, 0xbf, 0xbe]), | 975 Expect.listEquals([0xfffe], utf8ToRunes([0xef, 0xbf, 0xbe]), "U+FFFE"); |
417 "U+FFFE"); | 976 Expect.listEquals([0xffff], utf8ToRunes([0xef, 0xbf, 0xbf]), "U+FFFF"); |
418 Expect.listEquals([0xffff], utf8ToRunes([0xef, 0xbf, 0xbf]), | |
419 "U+FFFF"); | |
420 } | 977 } |
421 | 978 |
422 void testUtf8BytesToString() { | 979 void testUtf8BytesToString() { |
423 Expect.stringEquals(testEnglishPhrase, | 980 Expect.stringEquals( |
424 decodeUtf8(testEnglishUtf8), "English"); | 981 testEnglishPhrase, decodeUtf8(testEnglishUtf8), "English"); |
425 | 982 |
426 Expect.stringEquals(testDanishPhrase, | 983 Expect.stringEquals(testDanishPhrase, decodeUtf8(testDanishUtf8), "Danish"); |
427 decodeUtf8(testDanishUtf8), "Danish"); | 984 |
428 | 985 Expect.stringEquals(testHebrewPhrase, decodeUtf8(testHebrewUtf8), "Hebrew"); |
429 Expect.stringEquals(testHebrewPhrase, | 986 |
430 decodeUtf8(testHebrewUtf8), "Hebrew"); | 987 Expect.stringEquals( |
431 | 988 testRussianPhrase, decodeUtf8(testRussianUtf8), "Russian"); |
432 Expect.stringEquals(testRussianPhrase, | 989 |
433 decodeUtf8(testRussianUtf8), "Russian"); | 990 Expect.stringEquals(testGreekPhrase, decodeUtf8(testGreekUtf8), "Greek"); |
434 | 991 |
435 Expect.stringEquals(testGreekPhrase, | 992 Expect.stringEquals( |
436 decodeUtf8(testGreekUtf8), "Greek"); | 993 testKatakanaPhrase, decodeUtf8(testKatakanaUtf8), "Katakana"); |
437 | |
438 Expect.stringEquals(testKatakanaPhrase, | |
439 decodeUtf8(testKatakanaUtf8), "Katakana"); | |
440 } | 994 } |
OLD | NEW |