OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 library utf.utf82_test; | 5 library utf.utf82_test; |
6 | 6 |
7 import 'package:expect/expect.dart'; | 7 import 'package:test/test.dart'; |
8 import 'package:utf/utf.dart'; | 8 import 'package:utf/utf.dart'; |
9 | 9 |
10 const String testEnglishPhrase = | 10 import 'expect.dart' as Expect; |
11 "The quick brown fox jumps over the lazy dog."; | |
12 | 11 |
13 const List<int> testEnglishUtf8 = const<int> [ | 12 const String testEnglishPhrase = "The quick brown fox jumps over the lazy dog."; |
14 0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, | 13 |
15 0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20, | 14 const List<int> testEnglishUtf8 = const <int>[ |
16 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, | 15 0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, // 8 |
17 0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74, | 16 0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20, |
18 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20, | 17 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, |
19 0x64, 0x6f, 0x67, 0x2e]; | 18 0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74, |
| 19 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20, |
| 20 0x64, 0x6f, 0x67, 0x2e |
| 21 ]; |
20 | 22 |
21 const String testDanishPhrase = "Quizdeltagerne spiste jordbær med " | 23 const String testDanishPhrase = "Quizdeltagerne spiste jordbær med " |
22 "fløde mens cirkusklovnen Wolther spillede på xylofon."; | 24 "fløde mens cirkusklovnen Wolther spillede på xylofon."; |
23 | 25 |
24 const List<int> testDanishUtf8 = const<int>[ | 26 const List<int> testDanishUtf8 = const <int>[ |
25 0x51, 0x75, 0x69, 0x7a, 0x64, 0x65, 0x6c, 0x74, | 27 0x51, 0x75, 0x69, 0x7a, 0x64, 0x65, 0x6c, 0x74, // 8 |
26 0x61, 0x67, 0x65, 0x72, 0x6e, 0x65, 0x20, 0x73, | 28 0x61, 0x67, 0x65, 0x72, 0x6e, 0x65, 0x20, 0x73, |
27 0x70, 0x69, 0x73, 0x74, 0x65, 0x20, 0x6a, 0x6f, | 29 0x70, 0x69, 0x73, 0x74, 0x65, 0x20, 0x6a, 0x6f, |
28 0x72, 0x64, 0x62, 0xc3, 0xa6, 0x72, 0x20, 0x6d, | 30 0x72, 0x64, 0x62, 0xc3, 0xa6, 0x72, 0x20, 0x6d, |
29 0x65, 0x64, 0x20, 0x66, 0x6c, 0xc3, 0xb8, 0x64, | 31 0x65, 0x64, 0x20, 0x66, 0x6c, 0xc3, 0xb8, 0x64, |
30 0x65, 0x20, 0x6d, 0x65, 0x6e, 0x73, 0x20, 0x63, | 32 0x65, 0x20, 0x6d, 0x65, 0x6e, 0x73, 0x20, 0x63, |
31 0x69, 0x72, 0x6b, 0x75, 0x73, 0x6b, 0x6c, 0x6f, | 33 0x69, 0x72, 0x6b, 0x75, 0x73, 0x6b, 0x6c, 0x6f, |
32 0x76, 0x6e, 0x65, 0x6e, 0x20, 0x57, 0x6f, 0x6c, | 34 0x76, 0x6e, 0x65, 0x6e, 0x20, 0x57, 0x6f, 0x6c, |
33 0x74, 0x68, 0x65, 0x72, 0x20, 0x73, 0x70, 0x69, | 35 0x74, 0x68, 0x65, 0x72, 0x20, 0x73, 0x70, 0x69, |
34 0x6c, 0x6c, 0x65, 0x64, 0x65, 0x20, 0x70, 0xc3, | 36 0x6c, 0x6c, 0x65, 0x64, 0x65, 0x20, 0x70, 0xc3, |
35 0xa5, 0x20, 0x78, 0x79, 0x6c, 0x6f, 0x66, 0x6f, | 37 0xa5, 0x20, 0x78, 0x79, 0x6c, 0x6f, 0x66, 0x6f, |
36 0x6e, 0x2e]; | 38 0x6e, 0x2e |
| 39 ]; |
37 | 40 |
38 // unusual formatting due to strange editor interaction w/ text direction. | 41 // unusual formatting due to strange editor interaction w/ text direction. |
39 const String | 42 const String testHebrewPhrase = |
40 testHebrewPhrase = "דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה"; | 43 "דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה"; |
41 | 44 |
42 const List<int> testHebrewUtf8 = const<int>[ | 45 const List<int> testHebrewUtf8 = const <int>[ |
43 0xd7, 0x93, 0xd7, 0x92, 0x20, 0xd7, 0xa1, 0xd7, | 46 0xd7, 0x93, 0xd7, 0x92, 0x20, 0xd7, 0xa1, 0xd7, // 8 |
44 0xa7, 0xd7, 0xa8, 0xd7, 0x9f, 0x20, 0xd7, 0xa9, | 47 0xa7, 0xd7, 0xa8, 0xd7, 0x9f, 0x20, 0xd7, 0xa9, |
45 0xd7, 0x98, 0x20, 0xd7, 0x91, 0xd7, 0x99, 0xd7, | 48 0xd7, 0x98, 0x20, 0xd7, 0x91, 0xd7, 0x99, 0xd7, |
46 0x9d, 0x20, 0xd7, 0x9e, 0xd7, 0x90, 0xd7, 0x95, | 49 0x9d, 0x20, 0xd7, 0x9e, 0xd7, 0x90, 0xd7, 0x95, |
47 0xd7, 0x9b, 0xd7, 0x96, 0xd7, 0x91, 0x20, 0xd7, | 50 0xd7, 0x9b, 0xd7, 0x96, 0xd7, 0x91, 0x20, 0xd7, |
48 0x95, 0xd7, 0x9c, 0xd7, 0xa4, 0xd7, 0xaa, 0xd7, | 51 0x95, 0xd7, 0x9c, 0xd7, 0xa4, 0xd7, 0xaa, 0xd7, |
49 0xa2, 0x20, 0xd7, 0x9e, 0xd7, 0xa6, 0xd7, 0x90, | 52 0xa2, 0x20, 0xd7, 0x9e, 0xd7, 0xa6, 0xd7, 0x90, |
50 0x20, 0xd7, 0x9c, 0xd7, 0x95, 0x20, 0xd7, 0x97, | 53 0x20, 0xd7, 0x9c, 0xd7, 0x95, 0x20, 0xd7, 0x97, |
51 0xd7, 0x91, 0xd7, 0xa8, 0xd7, 0x94, 0x20, 0xd7, | 54 0xd7, 0x91, 0xd7, 0xa8, 0xd7, 0x94, 0x20, 0xd7, |
52 0x90, 0xd7, 0x99, 0xd7, 0x9a, 0x20, 0xd7, 0x94, | 55 0x90, 0xd7, 0x99, 0xd7, 0x9a, 0x20, 0xd7, 0x94, |
53 0xd7, 0xa7, 0xd7, 0x9c, 0xd7, 0x99, 0xd7, 0x98, | 56 0xd7, 0xa7, 0xd7, 0x9c, 0xd7, 0x99, 0xd7, 0x98, |
54 0xd7, 0x94]; | 57 0xd7, 0x94 |
| 58 ]; |
55 | 59 |
56 const String testRussianPhrase = "Съешь же ещё этих мягких " | 60 const String testRussianPhrase = "Съешь же ещё этих мягких " |
57 "французских булок да выпей чаю"; | 61 "французских булок да выпей чаю"; |
58 | 62 |
59 const List<int> testRussianUtf8 = const<int>[ | 63 const List<int> testRussianUtf8 = const <int>[ |
60 0xd0, 0xa1, 0xd1, 0x8a, 0xd0, 0xb5, 0xd1, 0x88, | 64 0xd0, 0xa1, 0xd1, 0x8a, 0xd0, 0xb5, 0xd1, 0x88, // 8 |
61 0xd1, 0x8c, 0x20, 0xd0, 0xb6, 0xd0, 0xb5, 0x20, | 65 0xd1, 0x8c, 0x20, 0xd0, 0xb6, 0xd0, 0xb5, 0x20, |
62 0xd0, 0xb5, 0xd1, 0x89, 0xd1, 0x91, 0x20, 0xd1, | 66 0xd0, 0xb5, 0xd1, 0x89, 0xd1, 0x91, 0x20, 0xd1, |
63 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0xd1, 0x85, 0x20, | 67 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0xd1, 0x85, 0x20, |
64 0xd0, 0xbc, 0xd1, 0x8f, 0xd0, 0xb3, 0xd0, 0xba, | 68 0xd0, 0xbc, 0xd1, 0x8f, 0xd0, 0xb3, 0xd0, 0xba, |
65 0xd0, 0xb8, 0xd1, 0x85, 0x20, 0xd1, 0x84, 0xd1, | 69 0xd0, 0xb8, 0xd1, 0x85, 0x20, 0xd1, 0x84, 0xd1, |
66 0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd1, 0x86, 0xd1, | 70 0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd1, 0x86, 0xd1, |
67 0x83, 0xd0, 0xb7, 0xd1, 0x81, 0xd0, 0xba, 0xd0, | 71 0x83, 0xd0, 0xb7, 0xd1, 0x81, 0xd0, 0xba, 0xd0, |
68 0xb8, 0xd1, 0x85, 0x20, 0xd0, 0xb1, 0xd1, 0x83, | 72 0xb8, 0xd1, 0x85, 0x20, 0xd0, 0xb1, 0xd1, 0x83, |
69 0xd0, 0xbb, 0xd0, 0xbe, 0xd0, 0xba, 0x20, 0xd0, | 73 0xd0, 0xbb, 0xd0, 0xbe, 0xd0, 0xba, 0x20, 0xd0, |
70 0xb4, 0xd0, 0xb0, 0x20, 0xd0, 0xb2, 0xd1, 0x8b, | 74 0xb4, 0xd0, 0xb0, 0x20, 0xd0, 0xb2, 0xd1, 0x8b, |
71 0xd0, 0xbf, 0xd0, 0xb5, 0xd0, 0xb9, 0x20, 0xd1, | 75 0xd0, 0xbf, 0xd0, 0xb5, 0xd0, 0xb9, 0x20, 0xd1, |
72 0x87, 0xd0, 0xb0, 0xd1, 0x8e]; | 76 0x87, 0xd0, 0xb0, 0xd1, 0x8e |
| 77 ]; |
73 | 78 |
74 const String testGreekPhrase = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ " | 79 const String testGreekPhrase = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ " |
75 "στὸ χρυσαφὶ ξέφωτο"; | 80 "στὸ χρυσαφὶ ξέφωτο"; |
76 | 81 |
77 const List<int> testGreekUtf8 = const<int>[ | 82 const List<int> testGreekUtf8 = const <int>[ |
78 0xce, 0x93, 0xce, 0xb1, 0xce, 0xb6, 0xce, 0xad, | 83 0xce, 0x93, 0xce, 0xb1, 0xce, 0xb6, 0xce, 0xad, // 8 |
79 0xce, 0xb5, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce, | 84 0xce, 0xb5, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce, |
80 0xb1, 0xe1, 0xbd, 0xb6, 0x20, 0xce, 0xbc, 0xcf, | 85 0xb1, 0xe1, 0xbd, 0xb6, 0x20, 0xce, 0xbc, 0xcf, |
81 0x85, 0xcf, 0x81, 0xcf, 0x84, 0xce, 0xb9, 0xe1, | 86 0x85, 0xcf, 0x81, 0xcf, 0x84, 0xce, 0xb9, 0xe1, |
82 0xbd, 0xb2, 0xcf, 0x82, 0x20, 0xce, 0xb4, 0xe1, | 87 0xbd, 0xb2, 0xcf, 0x82, 0x20, 0xce, 0xb4, 0xe1, |
83 0xbd, 0xb2, 0xce, 0xbd, 0x20, 0xce, 0xb8, 0xe1, | 88 0xbd, 0xb2, 0xce, 0xbd, 0x20, 0xce, 0xb8, 0xe1, |
84 0xbd, 0xb0, 0x20, 0xce, 0xb2, 0xcf, 0x81, 0xe1, | 89 0xbd, 0xb0, 0x20, 0xce, 0xb2, 0xcf, 0x81, 0xe1, |
85 0xbf, 0xb6, 0x20, 0xcf, 0x80, 0xce, 0xb9, 0xe1, | 90 0xbf, 0xb6, 0x20, 0xcf, 0x80, 0xce, 0xb9, 0xe1, |
86 0xbd, 0xb0, 0x20, 0xcf, 0x83, 0xcf, 0x84, 0xe1, | 91 0xbd, 0xb0, 0x20, 0xcf, 0x83, 0xcf, 0x84, 0xe1, |
87 0xbd, 0xb8, 0x20, 0xcf, 0x87, 0xcf, 0x81, 0xcf, | 92 0xbd, 0xb8, 0x20, 0xcf, 0x87, 0xcf, 0x81, 0xcf, |
88 0x85, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x86, 0xe1, | 93 0x85, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x86, 0xe1, |
89 0xbd, 0xb6, 0x20, 0xce, 0xbe, 0xce, 0xad, 0xcf, | 94 0xbd, 0xb6, 0x20, 0xce, 0xbe, 0xce, 0xad, 0xcf, |
90 0x86, 0xcf, 0x89, 0xcf, 0x84, 0xce, 0xbf]; | 95 0x86, 0xcf, 0x89, 0xcf, 0x84, 0xce, 0xbf |
| 96 ]; |
91 | 97 |
92 const String testKatakanaPhrase = """ | 98 const String testKatakanaPhrase = """ |
93 イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム | 99 イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム |
94 ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン"""; | 100 ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン"""; |
95 | 101 |
96 const List<int> testKatakanaUtf8 = const<int>[ | 102 const List<int> testKatakanaUtf8 = const <int>[ |
97 0xe3, 0x82, 0xa4, 0xe3, 0x83, 0xad, 0xe3, 0x83, | 103 0xe3, 0x82, 0xa4, 0xe3, 0x83, 0xad, 0xe3, 0x83, // 8 |
98 0x8f, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0x9b, 0xe3, | 104 0x8f, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0x9b, 0xe3, |
99 0x83, 0x98, 0xe3, 0x83, 0x88, 0x20, 0xe3, 0x83, | 105 0x83, 0x98, 0xe3, 0x83, 0x88, 0x20, 0xe3, 0x83, |
100 0x81, 0xe3, 0x83, 0xaa, 0xe3, 0x83, 0x8c, 0xe3, | 106 0x81, 0xe3, 0x83, 0xaa, 0xe3, 0x83, 0x8c, 0xe3, |
101 0x83, 0xab, 0xe3, 0x83, 0xb2, 0x20, 0xe3, 0x83, | 107 0x83, 0xab, 0xe3, 0x83, 0xb2, 0x20, 0xe3, 0x83, |
102 0xaf, 0xe3, 0x82, 0xab, 0xe3, 0x83, 0xa8, 0xe3, | 108 0xaf, 0xe3, 0x82, 0xab, 0xe3, 0x83, 0xa8, 0xe3, |
103 0x82, 0xbf, 0xe3, 0x83, 0xac, 0xe3, 0x82, 0xbd, | 109 0x82, 0xbf, 0xe3, 0x83, 0xac, 0xe3, 0x82, 0xbd, |
104 0x20, 0xe3, 0x83, 0x84, 0xe3, 0x83, 0x8d, 0xe3, | 110 0x20, 0xe3, 0x83, 0x84, 0xe3, 0x83, 0x8d, 0xe3, |
105 0x83, 0x8a, 0xe3, 0x83, 0xa9, 0xe3, 0x83, 0xa0, | 111 0x83, 0x8a, 0xe3, 0x83, 0xa9, 0xe3, 0x83, 0xa0, |
106 0x0a, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb0, 0xe3, | 112 0x0a, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb0, 0xe3, |
107 0x83, 0x8e, 0xe3, 0x82, 0xaa, 0xe3, 0x82, 0xaf, | 113 0x83, 0x8e, 0xe3, 0x82, 0xaa, 0xe3, 0x82, 0xaf, |
108 0xe3, 0x83, 0xa4, 0xe3, 0x83, 0x9e, 0x20, 0xe3, | 114 0xe3, 0x83, 0xa4, 0xe3, 0x83, 0x9e, 0x20, 0xe3, |
109 0x82, 0xb1, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xb3, | 115 0x82, 0xb1, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xb3, |
110 0xe3, 0x82, 0xa8, 0xe3, 0x83, 0x86, 0x20, 0xe3, | 116 0xe3, 0x82, 0xa8, 0xe3, 0x83, 0x86, 0x20, 0xe3, |
111 0x82, 0xa2, 0xe3, 0x82, 0xb5, 0xe3, 0x82, 0xad, | 117 0x82, 0xa2, 0xe3, 0x82, 0xb5, 0xe3, 0x82, 0xad, |
112 0xe3, 0x83, 0xa6, 0xe3, 0x83, 0xa1, 0xe3, 0x83, | 118 0xe3, 0x83, 0xa6, 0xe3, 0x83, 0xa1, 0xe3, 0x83, |
113 0x9f, 0xe3, 0x82, 0xb7, 0x20, 0xe3, 0x83, 0xb1, | 119 0x9f, 0xe3, 0x82, 0xb7, 0x20, 0xe3, 0x83, 0xb1, |
114 0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa2, 0xe3, 0x82, | 120 0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa2, 0xe3, 0x82, |
115 0xbb, 0xe3, 0x82, 0xb9, 0xe3, 0x83, 0xb3]; | 121 0xbb, 0xe3, 0x82, 0xb9, 0xe3, 0x83, 0xb3 |
| 122 ]; |
116 | 123 |
117 void main() { | 124 void main() { |
118 testUtf8bytesToCodepoints(); | 125 test('utf8 bytes to codepoints', testUtf8bytesToCodepoints); |
119 testUtf8BytesToString(); | 126 test('utf8 bytes to string', testUtf8BytesToString); |
120 testEncodeToUtf8(); | 127 test('encode to utf8', testEncodeToUtf8); |
121 testIterableMethods(); | 128 test('iterable methods', testIterableMethods); |
122 } | 129 } |
123 | 130 |
124 void testEncodeToUtf8() { | 131 void testEncodeToUtf8() { |
125 Expect.listEquals(testEnglishUtf8, encodeUtf8(testEnglishPhrase), | 132 Expect.listEquals( |
126 "english to utf8"); | 133 testEnglishUtf8, encodeUtf8(testEnglishPhrase), "english to utf8"); |
127 | 134 |
128 Expect.listEquals(testDanishUtf8, encodeUtf8(testDanishPhrase), | 135 Expect.listEquals( |
129 "encode danish to utf8"); | 136 testDanishUtf8, encodeUtf8(testDanishPhrase), "encode danish to utf8"); |
130 | 137 |
131 Expect.listEquals(testHebrewUtf8, encodeUtf8(testHebrewPhrase), | 138 Expect.listEquals( |
132 "Hebrew to utf8"); | 139 testHebrewUtf8, encodeUtf8(testHebrewPhrase), "Hebrew to utf8"); |
133 | 140 |
134 Expect.listEquals(testRussianUtf8, encodeUtf8(testRussianPhrase), | 141 Expect.listEquals( |
135 "Russian to utf8"); | 142 testRussianUtf8, encodeUtf8(testRussianPhrase), "Russian to utf8"); |
136 | 143 |
137 Expect.listEquals(testGreekUtf8, encodeUtf8(testGreekPhrase), | 144 Expect.listEquals( |
138 "Greek to utf8"); | 145 testGreekUtf8, encodeUtf8(testGreekPhrase), "Greek to utf8"); |
139 | 146 |
140 Expect.listEquals(testKatakanaUtf8, encodeUtf8(testKatakanaPhrase), | 147 Expect.listEquals( |
141 "Katakana to utf8"); | 148 testKatakanaUtf8, encodeUtf8(testKatakanaPhrase), "Katakana to utf8"); |
142 } | 149 } |
143 | 150 |
144 void testUtf8bytesToCodepoints() { | 151 void testUtf8bytesToCodepoints() { |
145 Expect.listEquals([954, 972, 963, 956, 949], | 152 Expect.listEquals( |
146 utf8ToCodepoints([0xce, 0xba, 0xcf, 0x8c, 0xcf, | 153 [954, 972, 963, 956, 949], |
147 0x83, 0xce, 0xbc, 0xce, 0xb5]), "κόσμε"); | 154 utf8ToCodepoints( |
| 155 [0xce, 0xba, 0xcf, 0x8c, 0xcf, 0x83, 0xce, 0xbc, 0xce, 0xb5]), |
| 156 "κόσμε"); |
148 | 157 |
149 // boundary conditions: First possible sequence of a certain length | 158 // boundary conditions: First possible sequence of a certain length |
150 Expect.listEquals([], utf8ToCodepoints([]), "no input"); | 159 Expect.listEquals([], utf8ToCodepoints([]), "no input"); |
151 Expect.listEquals([0x0], utf8ToCodepoints([0x0]), "0"); | 160 Expect.listEquals([0x0], utf8ToCodepoints([0x0]), "0"); |
152 Expect.listEquals([0x80], utf8ToCodepoints([0xc2, 0x80]), "80"); | 161 Expect.listEquals([0x80], utf8ToCodepoints([0xc2, 0x80]), "80"); |
153 Expect.listEquals([0x800], | 162 Expect.listEquals([0x800], utf8ToCodepoints([0xe0, 0xa0, 0x80]), "800"); |
154 utf8ToCodepoints([0xe0, 0xa0, 0x80]), "800"); | 163 Expect.listEquals( |
155 Expect.listEquals([0x10000], | 164 [0x10000], utf8ToCodepoints([0xf0, 0x90, 0x80, 0x80]), "10000"); |
156 utf8ToCodepoints([0xf0, 0x90, 0x80, 0x80]), "10000"); | |
157 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 165 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
158 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80, 0x80]), "200000"); | 166 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80, 0x80]), "200000"); |
159 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 167 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
160 utf8ToCodepoints([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]), | 168 utf8ToCodepoints([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]), "4000000"); |
161 "4000000"); | |
162 | 169 |
163 // boundary conditions: Last possible sequence of a certain length | 170 // boundary conditions: Last possible sequence of a certain length |
164 Expect.listEquals([0x7f], utf8ToCodepoints([0x7f]), "7f"); | 171 Expect.listEquals([0x7f], utf8ToCodepoints([0x7f]), "7f"); |
165 Expect.listEquals([0x7ff], utf8ToCodepoints([0xdf, 0xbf]), "7ff"); | 172 Expect.listEquals([0x7ff], utf8ToCodepoints([0xdf, 0xbf]), "7ff"); |
166 Expect.listEquals([0xffff], | 173 Expect.listEquals([0xffff], utf8ToCodepoints([0xef, 0xbf, 0xbf]), "ffff"); |
167 utf8ToCodepoints([0xef, 0xbf, 0xbf]), "ffff"); | |
168 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 174 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
169 utf8ToCodepoints([0xf7, 0xbf, 0xbf, 0xbf]), "1fffff"); | 175 utf8ToCodepoints([0xf7, 0xbf, 0xbf, 0xbf]), "1fffff"); |
170 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 176 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
171 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]), "3ffffff"); | 177 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]), "3ffffff"); |
172 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 178 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
173 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]), | 179 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]), "4000000"); |
174 "4000000"); | |
175 | 180 |
176 // other boundary conditions | 181 // other boundary conditions |
177 Expect.listEquals([0xd7ff], | 182 Expect.listEquals([0xd7ff], utf8ToCodepoints([0xed, 0x9f, 0xbf]), "d7ff"); |
178 utf8ToCodepoints([0xed, 0x9f, 0xbf]), "d7ff"); | 183 Expect.listEquals([0xe000], utf8ToCodepoints([0xee, 0x80, 0x80]), "e000"); |
179 Expect.listEquals([0xe000], | |
180 utf8ToCodepoints([0xee, 0x80, 0x80]), "e000"); | |
181 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 184 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
182 utf8ToCodepoints([0xef, 0xbf, 0xbd]), "fffd"); | 185 utf8ToCodepoints([0xef, 0xbf, 0xbd]), "fffd"); |
183 Expect.listEquals([0x10ffff], | 186 Expect.listEquals( |
184 utf8ToCodepoints([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff"); | 187 [0x10ffff], utf8ToCodepoints([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff"); |
185 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 188 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
186 utf8ToCodepoints([0xf4, 0x90, 0x80, 0x80]), "110000"); | 189 utf8ToCodepoints([0xf4, 0x90, 0x80, 0x80]), "110000"); |
187 | 190 |
188 // unexpected continuation bytes | 191 // unexpected continuation bytes |
189 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 192 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
190 utf8ToCodepoints([0x80]), "80 => replacement character"); | 193 utf8ToCodepoints([0x80]), "80 => replacement character"); |
191 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 194 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
192 utf8ToCodepoints([0xbf]), "bf => replacement character"); | 195 utf8ToCodepoints([0xbf]), "bf => replacement character"); |
193 | 196 |
194 List<int> allContinuationBytes = <int>[]; | 197 List<int> allContinuationBytes = <int>[]; |
195 List<int> matchingReplacementChars = <int>[]; | 198 List<int> matchingReplacementChars = <int>[]; |
196 for (int i = 0x80; i < 0xc0; i++) { | 199 for (int i = 0x80; i < 0xc0; i++) { |
197 allContinuationBytes.add(i); | 200 allContinuationBytes.add(i); |
198 matchingReplacementChars.add(UNICODE_REPLACEMENT_CHARACTER_CODEPOINT); | 201 matchingReplacementChars.add(UNICODE_REPLACEMENT_CHARACTER_CODEPOINT); |
199 } | 202 } |
200 Expect.listEquals(matchingReplacementChars, | 203 Expect.listEquals( |
| 204 matchingReplacementChars, |
201 utf8ToCodepoints(allContinuationBytes), | 205 utf8ToCodepoints(allContinuationBytes), |
202 "80 - bf => replacement character x 64"); | 206 "80 - bf => replacement character x 64"); |
203 | 207 |
204 List<int> allFirstTwoByteSeq = <int>[]; | 208 List<int> allFirstTwoByteSeq = <int>[]; |
205 matchingReplacementChars = <int>[]; | 209 matchingReplacementChars = <int>[]; |
206 for (int i = 0xc0; i < 0xe0; i++) { | 210 for (int i = 0xc0; i < 0xe0; i++) { |
207 allFirstTwoByteSeq.addAll([i, 0x20]); | 211 allFirstTwoByteSeq.addAll([i, 0x20]); |
208 matchingReplacementChars.addAll( | 212 matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); |
209 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | |
210 } | 213 } |
211 Expect.listEquals(matchingReplacementChars, | 214 Expect.listEquals( |
| 215 matchingReplacementChars, |
212 utf8ToCodepoints(allFirstTwoByteSeq), | 216 utf8ToCodepoints(allFirstTwoByteSeq), |
213 "c0 - df + space => replacement character + space x 32"); | 217 "c0 - df + space => replacement character + space x 32"); |
214 | 218 |
215 List<int> allFirstThreeByteSeq = <int>[]; | 219 List<int> allFirstThreeByteSeq = <int>[]; |
216 matchingReplacementChars = <int>[]; | 220 matchingReplacementChars = <int>[]; |
217 for (int i = 0xe0; i < 0xf0; i++) { | 221 for (int i = 0xe0; i < 0xf0; i++) { |
218 allFirstThreeByteSeq.addAll([i, 0x20]); | 222 allFirstThreeByteSeq.addAll([i, 0x20]); |
219 matchingReplacementChars.addAll( | 223 matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); |
220 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | |
221 } | 224 } |
222 Expect.listEquals(matchingReplacementChars, | 225 Expect.listEquals( |
| 226 matchingReplacementChars, |
223 utf8ToCodepoints(allFirstThreeByteSeq), | 227 utf8ToCodepoints(allFirstThreeByteSeq), |
224 "e0 - ef + space => replacement character x 16"); | 228 "e0 - ef + space => replacement character x 16"); |
225 | 229 |
226 List<int> allFirstFourByteSeq = <int>[]; | 230 List<int> allFirstFourByteSeq = <int>[]; |
227 matchingReplacementChars = <int>[]; | 231 matchingReplacementChars = <int>[]; |
228 for (int i = 0xf0; i < 0xf8; i++) { | 232 for (int i = 0xf0; i < 0xf8; i++) { |
229 allFirstFourByteSeq.addAll([i, 0x20]); | 233 allFirstFourByteSeq.addAll([i, 0x20]); |
230 matchingReplacementChars.addAll( | 234 matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); |
231 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | |
232 } | 235 } |
233 Expect.listEquals(matchingReplacementChars, | 236 Expect.listEquals( |
| 237 matchingReplacementChars, |
234 utf8ToCodepoints(allFirstFourByteSeq), | 238 utf8ToCodepoints(allFirstFourByteSeq), |
235 "f0 - f7 + space => replacement character x 8"); | 239 "f0 - f7 + space => replacement character x 8"); |
236 | 240 |
237 List<int> allFirstFiveByteSeq = <int>[]; | 241 List<int> allFirstFiveByteSeq = <int>[]; |
238 matchingReplacementChars = <int>[]; | 242 matchingReplacementChars = <int>[]; |
239 for (int i = 0xf8; i < 0xfc; i++) { | 243 for (int i = 0xf8; i < 0xfc; i++) { |
240 allFirstFiveByteSeq.addAll([i, 0x20]); | 244 allFirstFiveByteSeq.addAll([i, 0x20]); |
241 matchingReplacementChars.addAll( | 245 matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); |
242 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | |
243 } | 246 } |
244 Expect.listEquals(matchingReplacementChars, | 247 Expect.listEquals( |
| 248 matchingReplacementChars, |
245 utf8ToCodepoints(allFirstFiveByteSeq), | 249 utf8ToCodepoints(allFirstFiveByteSeq), |
246 "f8 - fb + space => replacement character x 4"); | 250 "f8 - fb + space => replacement character x 4"); |
247 | 251 |
248 List<int> allFirstSixByteSeq = <int>[]; | 252 List<int> allFirstSixByteSeq = <int>[]; |
249 matchingReplacementChars = <int>[]; | 253 matchingReplacementChars = <int>[]; |
250 for (int i = 0xfc; i < 0xfe; i++) { | 254 for (int i = 0xfc; i < 0xfe; i++) { |
251 allFirstSixByteSeq.addAll([i, 0x20]); | 255 allFirstSixByteSeq.addAll([i, 0x20]); |
252 matchingReplacementChars.addAll( | 256 matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); |
253 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | |
254 } | 257 } |
255 Expect.listEquals(matchingReplacementChars, | 258 Expect.listEquals( |
| 259 matchingReplacementChars, |
256 utf8ToCodepoints(allFirstSixByteSeq), | 260 utf8ToCodepoints(allFirstSixByteSeq), |
257 "fc - fd + space => replacement character x 2"); | 261 "fc - fd + space => replacement character x 2"); |
258 | 262 |
259 // Sequences with last continuation byte missing | 263 // Sequences with last continuation byte missing |
260 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 264 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
261 utf8ToCodepoints([0xc2]), | 265 utf8ToCodepoints([0xc2]), "2-byte sequence with last byte missing"); |
262 "2-byte sequence with last byte missing"); | |
263 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 266 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
264 utf8ToCodepoints([0xe0, 0x80]), | 267 utf8ToCodepoints([0xe0, 0x80]), "3-byte sequence with last byte missing"); |
265 "3-byte sequence with last byte missing"); | 268 Expect.listEquals( |
266 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 269 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
267 utf8ToCodepoints([0xf0, 0x80, 0x80]), | 270 utf8ToCodepoints([0xf0, 0x80, 0x80]), |
268 "4-byte sequence with last byte missing"); | 271 "4-byte sequence with last byte missing"); |
269 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 272 Expect.listEquals( |
| 273 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
270 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80]), | 274 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80]), |
271 "5-byte sequence with last byte missing"); | 275 "5-byte sequence with last byte missing"); |
272 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 276 Expect.listEquals( |
| 277 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
273 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80]), | 278 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80]), |
274 "6-byte sequence with last byte missing"); | 279 "6-byte sequence with last byte missing"); |
275 | 280 |
276 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 281 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
277 utf8ToCodepoints([0xdf]), | 282 utf8ToCodepoints([0xdf]), "2-byte sequence with last byte missing (hi)"); |
278 "2-byte sequence with last byte missing (hi)"); | 283 Expect.listEquals( |
279 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 284 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
280 utf8ToCodepoints([0xef, 0xbf]), | 285 utf8ToCodepoints([0xef, 0xbf]), |
281 "3-byte sequence with last byte missing (hi)"); | 286 "3-byte sequence with last byte missing (hi)"); |
282 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 287 Expect.listEquals( |
| 288 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
283 utf8ToCodepoints([0xf7, 0xbf, 0xbf]), | 289 utf8ToCodepoints([0xf7, 0xbf, 0xbf]), |
284 "4-byte sequence with last byte missing (hi)"); | 290 "4-byte sequence with last byte missing (hi)"); |
285 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 291 Expect.listEquals( |
| 292 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
286 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf]), | 293 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf]), |
287 "5-byte sequence with last byte missing (hi)"); | 294 "5-byte sequence with last byte missing (hi)"); |
288 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 295 Expect.listEquals( |
| 296 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
289 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]), | 297 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]), |
290 "6-byte sequence with last byte missing (hi)"); | 298 "6-byte sequence with last byte missing (hi)"); |
291 | 299 |
292 // Concatenation of incomplete sequences | 300 // Concatenation of incomplete sequences |
293 Expect.listEquals( | 301 Expect.listEquals( |
294 [ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 302 [ |
295 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 303 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
296 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 304 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
297 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 305 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
298 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 306 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
299 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 307 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
300 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 308 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
301 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 309 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
302 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 310 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
303 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT ], | 311 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
304 utf8ToCodepoints( | 312 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT |
305 [ 0xc2, | 313 ], |
306 0xe0, 0x80, | 314 utf8ToCodepoints([ |
307 0xf0, 0x80, 0x80, | 315 0xc2, // 1 |
308 0xf8, 0x88, 0x80, 0x80, | 316 0xe0, 0x80, |
309 0xfc, 0x80, 0x80, 0x80, 0x80, | 317 0xf0, 0x80, 0x80, |
310 0xdf, | 318 0xf8, 0x88, 0x80, 0x80, |
311 0xef, 0xbf, | 319 0xfc, 0x80, 0x80, 0x80, 0x80, |
312 0xf7, 0xbf, 0xbf, | 320 0xdf, |
313 0xfb, 0xbf, 0xbf, 0xbf, | 321 0xef, 0xbf, |
314 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ]), | 322 0xf7, 0xbf, 0xbf, |
315 "Concatenation of incomplete sequences"); | 323 0xfb, 0xbf, 0xbf, 0xbf, |
| 324 0xfd, 0xbf, 0xbf, 0xbf, 0xbf |
| 325 ]), |
| 326 "Concatenation of incomplete sequences"); |
316 | 327 |
317 // Impossible bytes | 328 // Impossible bytes |
318 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 329 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
319 utf8ToCodepoints([0xfe]), "fe"); | 330 utf8ToCodepoints([0xfe]), "fe"); |
320 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 331 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
321 utf8ToCodepoints([0xff]), "ff"); | 332 utf8ToCodepoints([0xff]), "ff"); |
322 Expect.listEquals([ | 333 Expect.listEquals([ |
323 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 334 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
324 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 335 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
325 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 336 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
326 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 337 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT |
327 utf8ToCodepoints([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff"); | 338 ], utf8ToCodepoints([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff"); |
328 | 339 |
329 // Overlong sequences | 340 // Overlong sequences |
330 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 341 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
331 utf8ToCodepoints([0xc0, 0xaf]), "c0 af"); | 342 utf8ToCodepoints([0xc0, 0xaf]), "c0 af"); |
332 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 343 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
333 utf8ToCodepoints([0xe0, 0x80, 0xaf]), "e0 80 af"); | 344 utf8ToCodepoints([0xe0, 0x80, 0xaf]), "e0 80 af"); |
334 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 345 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
335 utf8ToCodepoints([0xf0, 0x80, 0x80, 0xaf]), "f0 80 80 af"); | 346 utf8ToCodepoints([0xf0, 0x80, 0x80, 0xaf]), "f0 80 80 af"); |
336 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 347 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
337 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0xaf]), "f8 80 80 80 af"); | 348 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0xaf]), "f8 80 80 80 af"); |
338 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 349 Expect.listEquals( |
| 350 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
339 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]), | 351 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]), |
340 "fc 80 80 80 80 af"); | 352 "fc 80 80 80 80 af"); |
341 | 353 |
342 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 354 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
343 utf8ToCodepoints([0xc1, 0xbf]), "c1 bf"); | 355 utf8ToCodepoints([0xc1, 0xbf]), "c1 bf"); |
344 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 356 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
345 utf8ToCodepoints([0xe0, 0x9f, 0xbf]), "e0 9f bf"); | 357 utf8ToCodepoints([0xe0, 0x9f, 0xbf]), "e0 9f bf"); |
346 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 358 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
347 utf8ToCodepoints([0xf0, 0x8f, 0xbf, 0xbf]), "f0 8f bf bf"); | 359 utf8ToCodepoints([0xf0, 0x8f, 0xbf, 0xbf]), "f0 8f bf bf"); |
348 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 360 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
349 utf8ToCodepoints([0xf8, 0x87, 0xbf, 0xbf, 0xbf]), "f8 87 bf bf bf"); | 361 utf8ToCodepoints([0xf8, 0x87, 0xbf, 0xbf, 0xbf]), "f8 87 bf bf bf"); |
350 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 362 Expect.listEquals( |
| 363 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
351 utf8ToCodepoints([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]), | 364 utf8ToCodepoints([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]), |
352 "fc 83 bf bf bf bf"); | 365 "fc 83 bf bf bf bf"); |
353 | 366 |
354 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 367 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
355 utf8ToCodepoints([0xc0, 0x80]), "c0 80"); | 368 utf8ToCodepoints([0xc0, 0x80]), "c0 80"); |
356 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 369 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
357 utf8ToCodepoints([0xe0, 0x80, 0x80]), "e0 80 80"); | 370 utf8ToCodepoints([0xe0, 0x80, 0x80]), "e0 80 80"); |
358 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 371 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
359 utf8ToCodepoints([0xf0, 0x80, 0x80, 0x80]), "f0 80 80 80"); | 372 utf8ToCodepoints([0xf0, 0x80, 0x80, 0x80]), "f0 80 80 80"); |
360 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 373 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
361 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0x80]), "f8 80 80 80 80"); | 374 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0x80]), "f8 80 80 80 80"); |
362 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 375 Expect.listEquals( |
| 376 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
363 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]), | 377 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]), |
364 "fc 80 80 80 80 80"); | 378 "fc 80 80 80 80 80"); |
365 | 379 |
366 // Illegal code positions | 380 // Illegal code positions |
367 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 381 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
368 utf8ToCodepoints([0xed, 0xa0, 0x80]), "U+D800"); | 382 utf8ToCodepoints([0xed, 0xa0, 0x80]), "U+D800"); |
369 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 383 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
370 utf8ToCodepoints([0xed, 0xad, 0xbf]), "U+DB7F"); | 384 utf8ToCodepoints([0xed, 0xad, 0xbf]), "U+DB7F"); |
371 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 385 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
372 utf8ToCodepoints([0xed, 0xae, 0x80]), "U+DB80"); | 386 utf8ToCodepoints([0xed, 0xae, 0x80]), "U+DB80"); |
373 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 387 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
374 utf8ToCodepoints([0xed, 0xaf, 0xbf]), "U+DBFF"); | 388 utf8ToCodepoints([0xed, 0xaf, 0xbf]), "U+DBFF"); |
375 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 389 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
376 utf8ToCodepoints([0xed, 0xb0, 0x80]), "U+DC00"); | 390 utf8ToCodepoints([0xed, 0xb0, 0x80]), "U+DC00"); |
377 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 391 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
378 utf8ToCodepoints([0xed, 0xbe, 0x80]), "U+DF80"); | 392 utf8ToCodepoints([0xed, 0xbe, 0x80]), "U+DF80"); |
379 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 393 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], |
380 utf8ToCodepoints([0xed, 0xbf, 0xbf]), "U+DFFF"); | 394 utf8ToCodepoints([0xed, 0xbf, 0xbf]), "U+DFFF"); |
381 | 395 |
382 // Paired UTF-16 surrogates | 396 // Paired UTF-16 surrogates |
383 Expect.listEquals([ | 397 Expect.listEquals([ |
384 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 398 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
385 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 399 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT |
386 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80]), | 400 ], utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80]), "U+D800 U+DC00"); |
387 "U+D800 U+DC00"); | |
388 Expect.listEquals([ | 401 Expect.listEquals([ |
389 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 402 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
390 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 403 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT |
391 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf]), | 404 ], utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf]), "U+D800 U+DFFF"); |
392 "U+D800 U+DFFF"); | |
393 Expect.listEquals([ | 405 Expect.listEquals([ |
394 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 406 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
395 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 407 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT |
396 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80]), | 408 ], utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80]), "U+DB7F U+DC00"); |
397 "U+DB7F U+DC00"); | |
398 Expect.listEquals([ | 409 Expect.listEquals([ |
399 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 410 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
400 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 411 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT |
401 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf]), | 412 ], utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf]), "U+DB7F U+DFFF"); |
402 "U+DB7F U+DFFF"); | |
403 Expect.listEquals([ | 413 Expect.listEquals([ |
404 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 414 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
405 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 415 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT |
406 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xb0, 0x80]), | 416 ], utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xb0, 0x80]), "U+DB80 U+DC00"); |
407 "U+DB80 U+DC00"); | |
408 Expect.listEquals([ | 417 Expect.listEquals([ |
409 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 418 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
410 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 419 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT |
411 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf]), | 420 ], utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf]), "U+DB80 U+DFFF"); |
412 "U+DB80 U+DFFF"); | |
413 Expect.listEquals([ | 421 Expect.listEquals([ |
414 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 422 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
415 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 423 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT |
416 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80]), | 424 ], utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80]), "U+DBFF U+DC00"); |
417 "U+DBFF U+DC00"); | |
418 Expect.listEquals([ | 425 Expect.listEquals([ |
419 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 426 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, |
420 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 427 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT |
421 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf]), | 428 ], utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf]), "U+DBFF U+DFFF"); |
422 "U+DBFF U+DFFF"); | |
423 | 429 |
424 // Other illegal code positions (???) | 430 // Other illegal code positions (???) |
425 Expect.listEquals([0xfffe], utf8ToCodepoints([0xef, 0xbf, 0xbe]), | 431 Expect.listEquals([0xfffe], utf8ToCodepoints([0xef, 0xbf, 0xbe]), "U+FFFE"); |
426 "U+FFFE"); | 432 Expect.listEquals([0xffff], utf8ToCodepoints([0xef, 0xbf, 0xbf]), "U+FFFF"); |
427 Expect.listEquals([0xffff], utf8ToCodepoints([0xef, 0xbf, 0xbf]), | |
428 "U+FFFF"); | |
429 } | 433 } |
430 | 434 |
431 void testUtf8BytesToString() { | 435 void testUtf8BytesToString() { |
432 Expect.stringEquals(testEnglishPhrase, | 436 Expect.stringEquals( |
433 decodeUtf8(testEnglishUtf8), "English"); | 437 testEnglishPhrase, decodeUtf8(testEnglishUtf8), "English"); |
434 | 438 |
435 Expect.stringEquals(testDanishPhrase, | 439 Expect.stringEquals(testDanishPhrase, decodeUtf8(testDanishUtf8), "Danish"); |
436 decodeUtf8(testDanishUtf8), "Danish"); | |
437 | 440 |
438 Expect.stringEquals(testHebrewPhrase, | 441 Expect.stringEquals(testHebrewPhrase, decodeUtf8(testHebrewUtf8), "Hebrew"); |
439 decodeUtf8(testHebrewUtf8), "Hebrew"); | |
440 | 442 |
441 Expect.stringEquals(testRussianPhrase, | 443 Expect.stringEquals( |
442 decodeUtf8(testRussianUtf8), "Russian"); | 444 testRussianPhrase, decodeUtf8(testRussianUtf8), "Russian"); |
443 | 445 |
444 Expect.stringEquals(testGreekPhrase, | 446 Expect.stringEquals(testGreekPhrase, decodeUtf8(testGreekUtf8), "Greek"); |
445 decodeUtf8(testGreekUtf8), "Greek"); | |
446 | 447 |
447 Expect.stringEquals(testKatakanaPhrase, | 448 Expect.stringEquals( |
448 decodeUtf8(testKatakanaUtf8), "Katakana"); | 449 testKatakanaPhrase, decodeUtf8(testKatakanaUtf8), "Katakana"); |
449 } | 450 } |
450 | 451 |
451 void testIterableMethods() { | 452 void testIterableMethods() { |
452 IterableUtf8Decoder englishDecoder = decodeUtf8AsIterable(testEnglishUtf8); | 453 IterableUtf8Decoder englishDecoder = decodeUtf8AsIterable(testEnglishUtf8); |
453 // get the first character | 454 // get the first character |
454 Expect.equals(testEnglishUtf8[0], englishDecoder.first); | 455 Expect.equals(testEnglishUtf8[0], englishDecoder.first); |
455 // get the whole translation using the Iterable interface | 456 // get the whole translation using the Iterable interface |
456 Expect.stringEquals(testEnglishPhrase, | 457 Expect.stringEquals(testEnglishPhrase, |
457 new String.fromCharCodes(new List<int>.from(englishDecoder))); | 458 new String.fromCharCodes(new List<int>.from(englishDecoder))); |
458 | 459 |
459 IterableUtf8Decoder kataDecoder = decodeUtf8AsIterable(testKatakanaUtf8); | 460 IterableUtf8Decoder kataDecoder = decodeUtf8AsIterable(testKatakanaUtf8); |
460 // get the first character | 461 // get the first character |
461 Expect.equals(testKatakanaPhrase.codeUnits[0], kataDecoder.first); | 462 Expect.equals(testKatakanaPhrase.codeUnits[0], kataDecoder.first); |
462 // get the whole translation using the Iterable interface | 463 // get the whole translation using the Iterable interface |
463 Expect.stringEquals(testKatakanaPhrase, | 464 Expect.stringEquals(testKatakanaPhrase, |
464 new String.fromCharCodes(new List<int>.from(kataDecoder))); | 465 new String.fromCharCodes(new List<int>.from(kataDecoder))); |
465 } | 466 } |
OLD | NEW |