OLD | NEW |
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #include "vm/globals.h" | 5 #include "vm/globals.h" |
6 #include "vm/unicode.h" | 6 #include "vm/unicode.h" |
7 #include "vm/unit_test.h" | 7 #include "vm/unit_test.h" |
8 | 8 |
9 namespace dart { | 9 namespace dart { |
10 | 10 |
11 TEST_CASE(Utf8Decode) { | 11 TEST_CASE(Utf8Decode) { |
12 // Examples from the Unicode specification, chapter 3 | 12 // Examples from the Unicode specification, chapter 3 |
13 { | 13 { |
14 const char* src = "\x41\xC3\xB1\x42"; | 14 const char* src = "\x41\xC3\xB1\x42"; |
15 int32_t expected[] = { 0x41, 0xF1, 0x42 }; | 15 int32_t expected[] = {0x41, 0xF1, 0x42}; |
16 int32_t dst[ARRAY_SIZE(expected)]; | 16 int32_t dst[ARRAY_SIZE(expected)]; |
17 memset(dst, 0, sizeof(dst)); | 17 memset(dst, 0, sizeof(dst)); |
18 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 18 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
19 EXPECT(is_valid); | 19 EXPECT(is_valid); |
20 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 20 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
21 } | 21 } |
22 | 22 |
23 { | 23 { |
24 const char* src = "\x4D"; | 24 const char* src = "\x4D"; |
25 int32_t expected[] = { 0x4D }; | 25 int32_t expected[] = {0x4D}; |
26 int32_t dst[ARRAY_SIZE(expected)]; | 26 int32_t dst[ARRAY_SIZE(expected)]; |
27 memset(dst, 0, sizeof(dst)); | 27 memset(dst, 0, sizeof(dst)); |
28 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 28 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
29 EXPECT(is_valid); | 29 EXPECT(is_valid); |
30 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 30 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
31 } | 31 } |
32 | 32 |
33 { | 33 { |
34 const char* src = "\xD0\xB0"; | 34 const char* src = "\xD0\xB0"; |
35 int32_t expected[] = { 0x430 }; | 35 int32_t expected[] = {0x430}; |
36 int32_t dst[ARRAY_SIZE(expected)]; | 36 int32_t dst[ARRAY_SIZE(expected)]; |
37 memset(dst, 0, sizeof(dst)); | 37 memset(dst, 0, sizeof(dst)); |
38 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 38 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
39 EXPECT(is_valid); | 39 EXPECT(is_valid); |
40 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 40 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
41 } | 41 } |
42 | 42 |
43 { | 43 { |
44 const char* src = "\xE4\xBA\x8C"; | 44 const char* src = "\xE4\xBA\x8C"; |
45 int32_t expected[] = { 0x4E8C }; | 45 int32_t expected[] = {0x4E8C}; |
46 int32_t dst[ARRAY_SIZE(expected)]; | 46 int32_t dst[ARRAY_SIZE(expected)]; |
47 memset(dst, 0, sizeof(dst)); | 47 memset(dst, 0, sizeof(dst)); |
48 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 48 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
49 EXPECT(is_valid); | 49 EXPECT(is_valid); |
50 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 50 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
51 } | 51 } |
52 | 52 |
53 { | 53 { |
54 const char* src = "\xF0\x90\x8C\x82"; | 54 const char* src = "\xF0\x90\x8C\x82"; |
55 int32_t expected[] = { 0x10302 }; | 55 int32_t expected[] = {0x10302}; |
56 int32_t dst[ARRAY_SIZE(expected)]; | 56 int32_t dst[ARRAY_SIZE(expected)]; |
57 memset(dst, 0, sizeof(dst)); | 57 memset(dst, 0, sizeof(dst)); |
58 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 58 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
59 EXPECT(is_valid); | 59 EXPECT(is_valid); |
60 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 60 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
61 } | 61 } |
62 | 62 |
63 { | 63 { |
64 const char* src = "\x4D\xD0\xB0\xE4\xBA\x8C\xF0\x90\x8C\x82"; | 64 const char* src = "\x4D\xD0\xB0\xE4\xBA\x8C\xF0\x90\x8C\x82"; |
65 int32_t expected[] = { 0x4D, 0x430, 0x4E8C, 0x10302 }; | 65 int32_t expected[] = {0x4D, 0x430, 0x4E8C, 0x10302}; |
66 int32_t dst[ARRAY_SIZE(expected)]; | 66 int32_t dst[ARRAY_SIZE(expected)]; |
67 memset(dst, 0, sizeof(dst)); | 67 memset(dst, 0, sizeof(dst)); |
68 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 68 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
69 EXPECT(is_valid); | 69 EXPECT(is_valid); |
70 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 70 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
71 } | 71 } |
72 | 72 |
73 // Mixture of non-ASCII and ASCII characters | 73 // Mixture of non-ASCII and ASCII characters |
74 { | 74 { |
75 const char* src = "\xD7\x92\xD7\x9C\xD7\xA2\xD7\x93" | 75 const char* src = |
76 "\x20" | 76 "\xD7\x92\xD7\x9C\xD7\xA2\xD7\x93" |
77 "\xD7\x91\xD7\xA8\xD7\x9B\xD7\x94"; | 77 "\x20" |
78 int32_t expected[] = { 0x5D2, 0x5DC, 0x5E2, 0x5D3, | 78 "\xD7\x91\xD7\xA8\xD7\x9B\xD7\x94"; |
79 0x20, | 79 int32_t expected[] = {0x5D2, 0x5DC, 0x5E2, 0x5D3, 0x20, |
80 0x5D1, 0x5E8, 0x5DB, 0x5D4 }; | 80 0x5D1, 0x5E8, 0x5DB, 0x5D4}; |
81 int32_t dst[ARRAY_SIZE(expected)]; | 81 int32_t dst[ARRAY_SIZE(expected)]; |
82 memset(dst, 0, sizeof(dst)); | 82 memset(dst, 0, sizeof(dst)); |
83 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 83 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
84 EXPECT(is_valid); | 84 EXPECT(is_valid); |
85 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 85 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
86 } | 86 } |
87 | 87 |
88 // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt | 88 // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt |
89 | 89 |
90 // 1 - Some correct UTF-8 text | 90 // 1 - Some correct UTF-8 text |
91 { | 91 { |
92 const char* src = "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5"; | 92 const char* src = "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5"; |
93 int32_t expected[] = { 0x3BA, 0x1F79, 0x3C3, 0x3BC, 0x3B5 }; | 93 int32_t expected[] = {0x3BA, 0x1F79, 0x3C3, 0x3BC, 0x3B5}; |
94 int32_t dst[ARRAY_SIZE(expected)]; | 94 int32_t dst[ARRAY_SIZE(expected)]; |
95 memset(dst, 0, sizeof(dst)); | 95 memset(dst, 0, sizeof(dst)); |
96 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 96 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
97 EXPECT(is_valid); | 97 EXPECT(is_valid); |
98 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 98 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
99 } | 99 } |
100 | 100 |
101 // 2 - Boundary condition test cases | 101 // 2 - Boundary condition test cases |
102 | 102 |
103 // 2.1 - First possible sequence of a certain length | 103 // 2.1 - First possible sequence of a certain length |
104 | 104 |
105 // 2.1.1 - 1 byte (U-00000000): "\x00" | 105 // 2.1.1 - 1 byte (U-00000000): "\x00" |
106 { | 106 { |
107 const char* src = "\x00"; | 107 const char* src = "\x00"; |
108 int32_t expected[] = { 0x0 }; | 108 int32_t expected[] = {0x0}; |
109 int32_t dst[ARRAY_SIZE(expected)]; | 109 int32_t dst[ARRAY_SIZE(expected)]; |
110 memset(dst, 0xFF, sizeof(dst)); | 110 memset(dst, 0xFF, sizeof(dst)); |
111 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 111 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
112 EXPECT(is_valid); | 112 EXPECT(is_valid); |
113 EXPECT(memcmp(expected, dst, sizeof(expected))); | 113 EXPECT(memcmp(expected, dst, sizeof(expected))); |
114 } | 114 } |
115 | 115 |
116 // 2.1.2 - 2 bytes (U-00000080): "\xC2\x80" | 116 // 2.1.2 - 2 bytes (U-00000080): "\xC2\x80" |
117 { | 117 { |
118 const char* src = "\xC2\x80"; | 118 const char* src = "\xC2\x80"; |
119 int32_t expected[] = { 0x80 }; | 119 int32_t expected[] = {0x80}; |
120 int32_t dst[ARRAY_SIZE(expected)]; | 120 int32_t dst[ARRAY_SIZE(expected)]; |
121 memset(dst, 0, sizeof(dst)); | 121 memset(dst, 0, sizeof(dst)); |
122 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 122 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
123 EXPECT(is_valid); | 123 EXPECT(is_valid); |
124 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 124 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
125 } | 125 } |
126 | 126 |
127 // 2.1.3 - 3 bytes (U-00000800): "\xE0\xA0\x80" | 127 // 2.1.3 - 3 bytes (U-00000800): "\xE0\xA0\x80" |
128 { | 128 { |
129 const char* src = "\xE0\xA0\x80"; | 129 const char* src = "\xE0\xA0\x80"; |
130 int32_t expected[] = { 0x800 }; | 130 int32_t expected[] = {0x800}; |
131 int32_t dst[ARRAY_SIZE(expected)]; | 131 int32_t dst[ARRAY_SIZE(expected)]; |
132 memset(dst, 0, sizeof(dst)); | 132 memset(dst, 0, sizeof(dst)); |
133 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 133 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
134 EXPECT(is_valid); | 134 EXPECT(is_valid); |
135 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 135 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
136 } | 136 } |
137 | 137 |
138 // 2.1.4 - 4 bytes (U-00010000): "\xF0\x90\x80\x80" | 138 // 2.1.4 - 4 bytes (U-00010000): "\xF0\x90\x80\x80" |
139 { | 139 { |
140 const char* src = "\xF0\x90\x80\x80"; | 140 const char* src = "\xF0\x90\x80\x80"; |
141 int32_t expected[] = { 0x10000 }; | 141 int32_t expected[] = {0x10000}; |
142 int32_t dst[ARRAY_SIZE(expected)]; | 142 int32_t dst[ARRAY_SIZE(expected)]; |
143 memset(dst, 0, sizeof(dst)); | 143 memset(dst, 0, sizeof(dst)); |
144 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 144 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
145 EXPECT(is_valid); | 145 EXPECT(is_valid); |
146 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 146 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
147 } | 147 } |
148 | 148 |
149 // 2.1.5 - 5 bytes (U-00200000): "\xF8\x88\x80\x80\x80" | 149 // 2.1.5 - 5 bytes (U-00200000): "\xF8\x88\x80\x80\x80" |
150 { | 150 { |
151 const char* src = "\xF8\x88\x80\x80\x80"; | 151 const char* src = "\xF8\x88\x80\x80\x80"; |
152 int32_t expected[] = { 0x200000 }; | 152 int32_t expected[] = {0x200000}; |
153 int32_t dst[ARRAY_SIZE(expected)]; | 153 int32_t dst[ARRAY_SIZE(expected)]; |
154 memset(dst, 0, sizeof(dst)); | 154 memset(dst, 0, sizeof(dst)); |
155 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 155 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
156 EXPECT(!is_valid); | 156 EXPECT(!is_valid); |
157 EXPECT(memcmp(expected, dst, sizeof(expected))); | 157 EXPECT(memcmp(expected, dst, sizeof(expected))); |
158 } | 158 } |
159 | 159 |
160 // 2.1.6 - 6 bytes (U-04000000): "\xFC\x84\x80\x80\x80\x80" | 160 // 2.1.6 - 6 bytes (U-04000000): "\xFC\x84\x80\x80\x80\x80" |
161 { | 161 { |
162 const char* src = "\xFC\x84\x80\x80\x80\x80"; | 162 const char* src = "\xFC\x84\x80\x80\x80\x80"; |
163 int32_t expected[] = { 0x400000 }; | 163 int32_t expected[] = {0x400000}; |
164 int32_t dst[ARRAY_SIZE(expected)]; | 164 int32_t dst[ARRAY_SIZE(expected)]; |
165 memset(dst, 0, sizeof(dst)); | 165 memset(dst, 0, sizeof(dst)); |
166 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 166 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
167 EXPECT(!is_valid); | 167 EXPECT(!is_valid); |
168 EXPECT(memcmp(expected, dst, sizeof(expected))); | 168 EXPECT(memcmp(expected, dst, sizeof(expected))); |
169 } | 169 } |
170 | 170 |
171 // 2.2 - Last possible sequence of a certain length | 171 // 2.2 - Last possible sequence of a certain length |
172 | 172 |
173 // 2.2.1 - 1 byte (U-0000007F): "\x7F" | 173 // 2.2.1 - 1 byte (U-0000007F): "\x7F" |
174 { | 174 { |
175 const char* src = "\x7F"; | 175 const char* src = "\x7F"; |
176 int32_t expected[] = { 0x7F }; | 176 int32_t expected[] = {0x7F}; |
177 int32_t dst[ARRAY_SIZE(expected)]; | 177 int32_t dst[ARRAY_SIZE(expected)]; |
178 memset(dst, 0, sizeof(dst)); | 178 memset(dst, 0, sizeof(dst)); |
179 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 179 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
180 EXPECT(is_valid); | 180 EXPECT(is_valid); |
181 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 181 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
182 } | 182 } |
183 | 183 |
184 // 2.2.2 - 2 bytes (U-000007FF): "\xDF\xBF" | 184 // 2.2.2 - 2 bytes (U-000007FF): "\xDF\xBF" |
185 { | 185 { |
186 const char* src = "\xDF\xBF"; | 186 const char* src = "\xDF\xBF"; |
187 int32_t expected[] = { 0x7FF }; | 187 int32_t expected[] = {0x7FF}; |
188 int32_t dst[ARRAY_SIZE(expected)]; | 188 int32_t dst[ARRAY_SIZE(expected)]; |
189 memset(dst, 0, sizeof(dst)); | 189 memset(dst, 0, sizeof(dst)); |
190 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 190 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
191 EXPECT(is_valid); | 191 EXPECT(is_valid); |
192 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 192 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
193 } | 193 } |
194 | 194 |
195 // 2.2.3 - 3 bytes (U-0000FFFF): "\xEF\xBF\xBF" | 195 // 2.2.3 - 3 bytes (U-0000FFFF): "\xEF\xBF\xBF" |
196 { | 196 { |
197 const char* src = "\xEF\xBF\xBF"; | 197 const char* src = "\xEF\xBF\xBF"; |
198 int32_t expected[] = { 0xFFFF }; | 198 int32_t expected[] = {0xFFFF}; |
199 int32_t dst[ARRAY_SIZE(expected)]; | 199 int32_t dst[ARRAY_SIZE(expected)]; |
200 memset(dst, 0, sizeof(dst)); | 200 memset(dst, 0, sizeof(dst)); |
201 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 201 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
202 EXPECT(is_valid); | 202 EXPECT(is_valid); |
203 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 203 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
204 } | 204 } |
205 | 205 |
206 // 2.2.4 - 4 bytes (U-001FFFFF): "\xF7\xBF\xBF\xBF" | 206 // 2.2.4 - 4 bytes (U-001FFFFF): "\xF7\xBF\xBF\xBF" |
207 { | 207 { |
208 const char* src = "\xF7\xBF\xBF\xBF"; | 208 const char* src = "\xF7\xBF\xBF\xBF"; |
209 int32_t expected[] = { 0x1FFFF }; | 209 int32_t expected[] = {0x1FFFF}; |
210 int32_t dst[ARRAY_SIZE(expected)]; | 210 int32_t dst[ARRAY_SIZE(expected)]; |
211 memset(dst, 0, sizeof(dst)); | 211 memset(dst, 0, sizeof(dst)); |
212 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 212 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
213 EXPECT(!is_valid); | 213 EXPECT(!is_valid); |
214 EXPECT(memcmp(expected, dst, sizeof(expected))); | 214 EXPECT(memcmp(expected, dst, sizeof(expected))); |
215 } | 215 } |
216 | 216 |
217 // 2.2.5 - 5 bytes (U-03FFFFFF): "\xFB\xBF\xBF\xBF\xBF" | 217 // 2.2.5 - 5 bytes (U-03FFFFFF): "\xFB\xBF\xBF\xBF\xBF" |
218 { | 218 { |
219 const char* src = "\xFB\xBF\xBF\xBF\xBF"; | 219 const char* src = "\xFB\xBF\xBF\xBF\xBF"; |
220 int32_t expected[] = { 0x3FFFFFF }; | 220 int32_t expected[] = {0x3FFFFFF}; |
221 int32_t dst[ARRAY_SIZE(expected)]; | 221 int32_t dst[ARRAY_SIZE(expected)]; |
222 memset(dst, 0, sizeof(dst)); | 222 memset(dst, 0, sizeof(dst)); |
223 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 223 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
224 EXPECT(!is_valid); | 224 EXPECT(!is_valid); |
225 EXPECT(memcmp(expected, dst, sizeof(expected))); | 225 EXPECT(memcmp(expected, dst, sizeof(expected))); |
226 } | 226 } |
227 | 227 |
228 // 2.2.6 - 6 bytes (U-7FFFFFFF): "\xFD\xBF\xBF\xBF\xBF\xBF" | 228 // 2.2.6 - 6 bytes (U-7FFFFFFF): "\xFD\xBF\xBF\xBF\xBF\xBF" |
229 { | 229 { |
230 const char* src = "\xFD\xBF\xBF\xBF\xBF\xBF"; | 230 const char* src = "\xFD\xBF\xBF\xBF\xBF\xBF"; |
231 int32_t expected[] = { 0x7FFFFFF }; | 231 int32_t expected[] = {0x7FFFFFF}; |
232 int32_t dst[ARRAY_SIZE(expected)]; | 232 int32_t dst[ARRAY_SIZE(expected)]; |
233 memset(dst, 0, sizeof(dst)); | 233 memset(dst, 0, sizeof(dst)); |
234 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 234 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
235 EXPECT(!is_valid); | 235 EXPECT(!is_valid); |
236 EXPECT(memcmp(expected, dst, sizeof(expected))); | 236 EXPECT(memcmp(expected, dst, sizeof(expected))); |
237 } | 237 } |
238 | 238 |
239 // 2.3 - Other boundary conditions | 239 // 2.3 - Other boundary conditions |
240 | 240 |
241 // 2.3.1 - U-0000D7FF = ed 9f bf = "\xED\x9F\xBF" | 241 // 2.3.1 - U-0000D7FF = ed 9f bf = "\xED\x9F\xBF" |
242 { | 242 { |
243 const char* src = "\xED\x9F\xBF"; | 243 const char* src = "\xED\x9F\xBF"; |
244 int32_t expected[] = { 0xD7FF }; | 244 int32_t expected[] = {0xD7FF}; |
245 int32_t dst[ARRAY_SIZE(expected)]; | 245 int32_t dst[ARRAY_SIZE(expected)]; |
246 memset(dst, 0, sizeof(dst)); | 246 memset(dst, 0, sizeof(dst)); |
247 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 247 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
248 EXPECT(is_valid); | 248 EXPECT(is_valid); |
249 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 249 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
250 } | 250 } |
251 | 251 |
252 // 2.3.2 - U-0000E000 = ee 80 80 = "\xEE\x80\x80" | 252 // 2.3.2 - U-0000E000 = ee 80 80 = "\xEE\x80\x80" |
253 { | 253 { |
254 const char* src = "\xEE\x80\x80"; | 254 const char* src = "\xEE\x80\x80"; |
255 int32_t expected[] = { 0xE000 }; | 255 int32_t expected[] = {0xE000}; |
256 int32_t dst[ARRAY_SIZE(expected)]; | 256 int32_t dst[ARRAY_SIZE(expected)]; |
257 memset(dst, 0, sizeof(dst)); | 257 memset(dst, 0, sizeof(dst)); |
258 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 258 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
259 EXPECT(is_valid); | 259 EXPECT(is_valid); |
260 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 260 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
261 } | 261 } |
262 | 262 |
263 // 2.3.3 - U-0000FFFD = ef bf bd = "\xEF\xBF\xBD" | 263 // 2.3.3 - U-0000FFFD = ef bf bd = "\xEF\xBF\xBD" |
264 { | 264 { |
265 const char* src = "\xEF\xBF\xBD"; | 265 const char* src = "\xEF\xBF\xBD"; |
266 int32_t expected[] = { 0xFFFD }; | 266 int32_t expected[] = {0xFFFD}; |
267 int32_t dst[ARRAY_SIZE(expected)]; | 267 int32_t dst[ARRAY_SIZE(expected)]; |
268 memset(dst, 0, sizeof(dst)); | 268 memset(dst, 0, sizeof(dst)); |
269 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 269 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
270 EXPECT(is_valid); | 270 EXPECT(is_valid); |
271 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 271 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
272 } | 272 } |
273 | 273 |
274 // 2.3.4 - U-0010FFFF = f4 8f bf bf = "\xF4\x8F\xBF\xBF" | 274 // 2.3.4 - U-0010FFFF = f4 8f bf bf = "\xF4\x8F\xBF\xBF" |
275 { | 275 { |
276 const char* src = "\xF4\x8F\xBF\xBF"; | 276 const char* src = "\xF4\x8F\xBF\xBF"; |
277 int32_t expected[] = { 0x10FFFF }; | 277 int32_t expected[] = {0x10FFFF}; |
278 int32_t dst[ARRAY_SIZE(expected)]; | 278 int32_t dst[ARRAY_SIZE(expected)]; |
279 memset(dst, 0, sizeof(dst)); | 279 memset(dst, 0, sizeof(dst)); |
280 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 280 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
281 EXPECT(is_valid); | 281 EXPECT(is_valid); |
282 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 282 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
283 } | 283 } |
284 | 284 |
285 // 2.3.5 - U-00110000 = f4 90 80 80 = "\xF4\x90\x80\x80" | 285 // 2.3.5 - U-00110000 = f4 90 80 80 = "\xF4\x90\x80\x80" |
286 { | 286 { |
287 const char* src = "\xF4\x90\x80\x80"; | 287 const char* src = "\xF4\x90\x80\x80"; |
288 int32_t expected[] = { 0x110000 }; | 288 int32_t expected[] = {0x110000}; |
289 int32_t dst[ARRAY_SIZE(expected)]; | 289 int32_t dst[ARRAY_SIZE(expected)]; |
290 memset(dst, 0, sizeof(dst)); | 290 memset(dst, 0, sizeof(dst)); |
291 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 291 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
292 EXPECT(!is_valid); | 292 EXPECT(!is_valid); |
293 EXPECT(memcmp(expected, dst, sizeof(expected))); | 293 EXPECT(memcmp(expected, dst, sizeof(expected))); |
294 } | 294 } |
295 | 295 |
296 // 3 - Malformed sequences | 296 // 3 - Malformed sequences |
297 | 297 |
298 // 3.1 - Unexpected continuation bytes | 298 // 3.1 - Unexpected continuation bytes |
299 | 299 |
300 // 3.1.1 - First continuation byte 0x80: "\x80" | 300 // 3.1.1 - First continuation byte 0x80: "\x80" |
301 { | 301 { |
302 const char* src = "\x80"; | 302 const char* src = "\x80"; |
303 int32_t expected[] = { 0x80 }; | 303 int32_t expected[] = {0x80}; |
304 int32_t dst[ARRAY_SIZE(expected)]; | 304 int32_t dst[ARRAY_SIZE(expected)]; |
305 memset(dst, 0, sizeof(dst)); | 305 memset(dst, 0, sizeof(dst)); |
306 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 306 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
307 EXPECT(!is_valid); | 307 EXPECT(!is_valid); |
308 EXPECT(memcmp(expected, dst, sizeof(expected))); | 308 EXPECT(memcmp(expected, dst, sizeof(expected))); |
309 } | 309 } |
310 | 310 |
311 // 3.1.2 - Last continuation byte 0xbf: "\xBF" | 311 // 3.1.2 - Last continuation byte 0xbf: "\xBF" |
312 { | 312 { |
313 const char* src = "\xBF"; | 313 const char* src = "\xBF"; |
314 int32_t expected[] = { 0xBF }; | 314 int32_t expected[] = {0xBF}; |
315 int32_t dst[ARRAY_SIZE(expected)]; | 315 int32_t dst[ARRAY_SIZE(expected)]; |
316 memset(dst, 0, sizeof(dst)); | 316 memset(dst, 0, sizeof(dst)); |
317 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 317 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
318 EXPECT(!is_valid); | 318 EXPECT(!is_valid); |
319 EXPECT(memcmp(expected, dst, sizeof(expected))); | 319 EXPECT(memcmp(expected, dst, sizeof(expected))); |
320 } | 320 } |
321 | 321 |
322 // 3.1.3 - 2 continuation bytes: "\x80\xBF" | 322 // 3.1.3 - 2 continuation bytes: "\x80\xBF" |
323 { | 323 { |
324 const char* src = "\x80\xBF"; | 324 const char* src = "\x80\xBF"; |
325 int32_t expected[] = { 0x80, 0xBF }; | 325 int32_t expected[] = {0x80, 0xBF}; |
326 int32_t dst[ARRAY_SIZE(expected)]; | 326 int32_t dst[ARRAY_SIZE(expected)]; |
327 memset(dst, 0, sizeof(dst)); | 327 memset(dst, 0, sizeof(dst)); |
328 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 328 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
329 EXPECT(!is_valid); | 329 EXPECT(!is_valid); |
330 EXPECT(memcmp(expected, dst, sizeof(expected))); | 330 EXPECT(memcmp(expected, dst, sizeof(expected))); |
331 } | 331 } |
332 | 332 |
333 // 3.1.4 - 3 continuation bytes: "\x80\xBF\x80" | 333 // 3.1.4 - 3 continuation bytes: "\x80\xBF\x80" |
334 { | 334 { |
335 const char* src = "\x80\xBF\x80"; | 335 const char* src = "\x80\xBF\x80"; |
336 int32_t expected[] = { 0x80, 0xBF, 0x80 }; | 336 int32_t expected[] = {0x80, 0xBF, 0x80}; |
337 int32_t dst[ARRAY_SIZE(expected)]; | 337 int32_t dst[ARRAY_SIZE(expected)]; |
338 memset(dst, 0, sizeof(dst)); | 338 memset(dst, 0, sizeof(dst)); |
339 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 339 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
340 EXPECT(!is_valid); | 340 EXPECT(!is_valid); |
341 EXPECT(memcmp(expected, dst, sizeof(expected))); | 341 EXPECT(memcmp(expected, dst, sizeof(expected))); |
342 } | 342 } |
343 | 343 |
344 // 3.1.5 - 4 continuation bytes: "\x80\xBF\x80\xBF" | 344 // 3.1.5 - 4 continuation bytes: "\x80\xBF\x80\xBF" |
345 { | 345 { |
346 const char* src = "\x80\xBF\x80\xBF"; | 346 const char* src = "\x80\xBF\x80\xBF"; |
347 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF }; | 347 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF}; |
348 int32_t dst[ARRAY_SIZE(expected)]; | 348 int32_t dst[ARRAY_SIZE(expected)]; |
349 memset(dst, 0, sizeof(dst)); | 349 memset(dst, 0, sizeof(dst)); |
350 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 350 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
351 EXPECT(!is_valid); | 351 EXPECT(!is_valid); |
352 EXPECT(memcmp(expected, dst, sizeof(expected))); | 352 EXPECT(memcmp(expected, dst, sizeof(expected))); |
353 } | 353 } |
354 | 354 |
355 // 3.1.6 - 5 continuation bytes: "\x80\xBF\x80\xBF\x80" | 355 // 3.1.6 - 5 continuation bytes: "\x80\xBF\x80\xBF\x80" |
356 { | 356 { |
357 const char* src = "\x80\xBF\x80\xBF\x80"; | 357 const char* src = "\x80\xBF\x80\xBF\x80"; |
358 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80 }; | 358 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF, 0x80}; |
359 int32_t dst[ARRAY_SIZE(expected)]; | 359 int32_t dst[ARRAY_SIZE(expected)]; |
360 memset(dst, 0, sizeof(dst)); | 360 memset(dst, 0, sizeof(dst)); |
361 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 361 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
362 EXPECT(!is_valid); | 362 EXPECT(!is_valid); |
363 EXPECT(memcmp(expected, dst, sizeof(expected))); | 363 EXPECT(memcmp(expected, dst, sizeof(expected))); |
364 } | 364 } |
365 | 365 |
366 // 3.1.7 - 6 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF" | 366 // 3.1.7 - 6 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF" |
367 { | 367 { |
368 const char* src = "\x80\xBF\x80\xBF\x80\xBF"; | 368 const char* src = "\x80\xBF\x80\xBF\x80\xBF"; |
369 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF }; | 369 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}; |
370 int32_t dst[ARRAY_SIZE(expected)]; | 370 int32_t dst[ARRAY_SIZE(expected)]; |
371 memset(dst, 0, sizeof(dst)); | 371 memset(dst, 0, sizeof(dst)); |
372 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 372 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
373 EXPECT(!is_valid); | 373 EXPECT(!is_valid); |
374 EXPECT(memcmp(expected, dst, sizeof(expected))); | 374 EXPECT(memcmp(expected, dst, sizeof(expected))); |
375 } | 375 } |
376 | 376 |
377 // 3.1.8 - 7 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF\x80" | 377 // 3.1.8 - 7 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF\x80" |
378 { | 378 { |
379 const char* src = "\x80\xBF\x80\xBF\x80\xBF\x80"; | 379 const char* src = "\x80\xBF\x80\xBF\x80\xBF\x80"; |
380 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 }; | 380 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80}; |
381 int32_t dst[ARRAY_SIZE(expected)]; | 381 int32_t dst[ARRAY_SIZE(expected)]; |
382 memset(dst, 0, sizeof(dst)); | 382 memset(dst, 0, sizeof(dst)); |
383 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 383 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
384 EXPECT(!is_valid); | 384 EXPECT(!is_valid); |
385 EXPECT(memcmp(expected, dst, sizeof(expected))); | 385 EXPECT(memcmp(expected, dst, sizeof(expected))); |
386 } | 386 } |
387 | 387 |
388 // 3.1.9 - Sequence of all 64 possible continuation bytes (0x80-0xbf): | 388 // 3.1.9 - Sequence of all 64 possible continuation bytes (0x80-0xbf): |
389 { | 389 { |
390 const char* src = "\x80\x81\x82\x83\x84\x85\x86\x87" | 390 const char* src = |
391 "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" | 391 "\x80\x81\x82\x83\x84\x85\x86\x87" |
392 "\x90\x91\x92\x93\x94\x95\x96\x97" | 392 "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" |
393 "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F" | 393 "\x90\x91\x92\x93\x94\x95\x96\x97" |
394 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7" | 394 "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F" |
395 "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" | 395 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7" |
396 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7" | 396 "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" |
397 "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"; | 397 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7" |
398 int32_t expected[] = { 0x0 }; | 398 "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"; |
| 399 int32_t expected[] = {0x0}; |
399 int32_t dst[ARRAY_SIZE(expected)]; | 400 int32_t dst[ARRAY_SIZE(expected)]; |
400 for (size_t i = 0; i < strlen(src); ++i) { | 401 for (size_t i = 0; i < strlen(src); ++i) { |
401 memset(dst, 0xFF, sizeof(dst)); | 402 memset(dst, 0xFF, sizeof(dst)); |
402 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); | 403 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
403 EXPECT(!is_valid); | 404 EXPECT(!is_valid); |
404 EXPECT(memcmp(expected, dst, sizeof(expected))); | 405 EXPECT(memcmp(expected, dst, sizeof(expected))); |
405 } | 406 } |
406 } | 407 } |
407 | 408 |
408 // 3.2 - Lonely start character | 409 // 3.2 - Lonely start character |
409 | 410 |
410 // 3.2.1 - All 32 first bytes of 2-byte sequences (0xc0-0xdf), each | 411 // 3.2.1 - All 32 first bytes of 2-byte sequences (0xc0-0xdf), each |
411 // followed by a space character: | 412 // followed by a space character: |
412 { | 413 { |
413 const char* src = "\xC0\x20\xC1\x20\xC2\x20\xC3\x20" | 414 const char* src = |
414 "\xC4\x20\xC5\x20\xC6\x20\xC7\x20" | 415 "\xC0\x20\xC1\x20\xC2\x20\xC3\x20" |
415 "\xC8\x20\xC9\x20\xCA\x20\xCB\x20" | 416 "\xC4\x20\xC5\x20\xC6\x20\xC7\x20" |
416 "\xCC\x20\xCD\x20\xCE\x20\xCF\x20" | 417 "\xC8\x20\xC9\x20\xCA\x20\xCB\x20" |
417 "\xD0\x20\xD1\x20\xD2\x20\xD3\x20" | 418 "\xCC\x20\xCD\x20\xCE\x20\xCF\x20" |
418 "\xD4\x20\xD5\x20\xD6\x20\xD7\x20" | 419 "\xD0\x20\xD1\x20\xD2\x20\xD3\x20" |
419 "\xD8\x20\xD9\x20\xDA\x20\xDB\x20" | 420 "\xD4\x20\xD5\x20\xD6\x20\xD7\x20" |
420 "\xDC\x20\xDD\x20\xDE\x20\xDF\x20"; | 421 "\xD8\x20\xD9\x20\xDA\x20\xDB\x20" |
421 int32_t expected[] = { 0x0 }; | 422 "\xDC\x20\xDD\x20\xDE\x20\xDF\x20"; |
| 423 int32_t expected[] = {0x0}; |
422 int32_t dst[ARRAY_SIZE(expected)]; | 424 int32_t dst[ARRAY_SIZE(expected)]; |
423 for (size_t i = 0; i < strlen(src); i += 2) { | 425 for (size_t i = 0; i < strlen(src); i += 2) { |
424 memset(dst, 0xFF, sizeof(dst)); | 426 memset(dst, 0xFF, sizeof(dst)); |
425 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); | 427 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
426 EXPECT(!is_valid); | 428 EXPECT(!is_valid); |
427 EXPECT(memcmp(expected, dst, sizeof(expected))); | 429 EXPECT(memcmp(expected, dst, sizeof(expected))); |
428 } | 430 } |
429 } | 431 } |
430 | 432 |
431 // 3.2.2 - All 16 first bytes of 3-byte sequences (0xe0-0xef), each | 433 // 3.2.2 - All 16 first bytes of 3-byte sequences (0xe0-0xef), each |
432 // followed by a space character: | 434 // followed by a space character: |
433 { | 435 { |
434 const char* src = "\xE0\x20\xE1\x20\xE2\x20\xE3\x20" | 436 const char* src = |
435 "\xE4\x20\xE5\x20\xE6\x20\xE7\x20" | 437 "\xE0\x20\xE1\x20\xE2\x20\xE3\x20" |
436 "\xE8\x20\xE9\x20\xEA\x20\xEB\x20" | 438 "\xE4\x20\xE5\x20\xE6\x20\xE7\x20" |
437 "\xEC\x20\xED\x20\xEE\x20\xEF\x20"; | 439 "\xE8\x20\xE9\x20\xEA\x20\xEB\x20" |
438 int32_t expected[] = { 0x0 }; | 440 "\xEC\x20\xED\x20\xEE\x20\xEF\x20"; |
| 441 int32_t expected[] = {0x0}; |
439 int32_t dst[ARRAY_SIZE(expected)]; | 442 int32_t dst[ARRAY_SIZE(expected)]; |
440 for (size_t i = 0; i < strlen(src); i += 2) { | 443 for (size_t i = 0; i < strlen(src); i += 2) { |
441 memset(dst, 0xFF, sizeof(dst)); | 444 memset(dst, 0xFF, sizeof(dst)); |
442 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); | 445 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
443 EXPECT(!is_valid); | 446 EXPECT(!is_valid); |
444 EXPECT(memcmp(expected, dst, sizeof(expected))); | 447 EXPECT(memcmp(expected, dst, sizeof(expected))); |
445 } | 448 } |
446 } | 449 } |
447 | 450 |
448 // 3.2.3 - All 8 first bytes of 4-byte sequences (0xf0-0xf7), each | 451 // 3.2.3 - All 8 first bytes of 4-byte sequences (0xf0-0xf7), each |
449 // followed by a space character: | 452 // followed by a space character: |
450 { | 453 { |
451 const char* src = "\xF0\x20\xF1\x20\xF2\x20\xF3\x20" | 454 const char* src = |
452 "\xF4\x20\xF5\x20\xF6\x20\xF7\x20"; | 455 "\xF0\x20\xF1\x20\xF2\x20\xF3\x20" |
453 int32_t expected[] = { 0x0 }; | 456 "\xF4\x20\xF5\x20\xF6\x20\xF7\x20"; |
| 457 int32_t expected[] = {0x0}; |
454 int32_t dst[ARRAY_SIZE(expected)]; | 458 int32_t dst[ARRAY_SIZE(expected)]; |
455 for (size_t i = 0; i < strlen(src); i += 2) { | 459 for (size_t i = 0; i < strlen(src); i += 2) { |
456 memset(dst, 0xFF, sizeof(dst)); | 460 memset(dst, 0xFF, sizeof(dst)); |
457 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); | 461 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
458 EXPECT(!is_valid); | 462 EXPECT(!is_valid); |
459 EXPECT(memcmp(expected, dst, sizeof(expected))); | 463 EXPECT(memcmp(expected, dst, sizeof(expected))); |
460 } | 464 } |
461 } | 465 } |
462 | 466 |
463 // 3.2.4 - All 4 first bytes of 5-byte sequences (0xf8-0xfb), each | 467 // 3.2.4 - All 4 first bytes of 5-byte sequences (0xf8-0xfb), each |
464 // followed by a space character: | 468 // followed by a space character: |
465 { | 469 { |
466 const char* src = "\xF8\x20\xF9\x20\xFA\x20\xFB\x20"; | 470 const char* src = "\xF8\x20\xF9\x20\xFA\x20\xFB\x20"; |
467 int32_t expected[] = { 0x0 }; | 471 int32_t expected[] = {0x0}; |
468 int32_t dst[ARRAY_SIZE(expected)]; | 472 int32_t dst[ARRAY_SIZE(expected)]; |
469 for (size_t i = 0; i < strlen(src); i += 2) { | 473 for (size_t i = 0; i < strlen(src); i += 2) { |
470 memset(dst, 0xFF, sizeof(dst)); | 474 memset(dst, 0xFF, sizeof(dst)); |
471 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); | 475 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
472 EXPECT(!is_valid); | 476 EXPECT(!is_valid); |
473 EXPECT(memcmp(expected, dst, sizeof(expected))); | 477 EXPECT(memcmp(expected, dst, sizeof(expected))); |
474 } | 478 } |
475 } | 479 } |
476 | 480 |
477 // 3.2.5 - All 2 first bytes of 6-byte sequences (0xfc-0xfd), each | 481 // 3.2.5 - All 2 first bytes of 6-byte sequences (0xfc-0xfd), each |
478 // followed by a space character: | 482 // followed by a space character: |
479 { | 483 { |
480 const char* src = "\xFC\x20\xFD\x20"; | 484 const char* src = "\xFC\x20\xFD\x20"; |
481 int32_t expected[] = { 0x0 }; | 485 int32_t expected[] = {0x0}; |
482 int32_t dst[ARRAY_SIZE(expected)]; | 486 int32_t dst[ARRAY_SIZE(expected)]; |
483 for (size_t i = 0; i < strlen(src); i += 2) { | 487 for (size_t i = 0; i < strlen(src); i += 2) { |
484 memset(dst, 0xFF, sizeof(dst)); | 488 memset(dst, 0xFF, sizeof(dst)); |
485 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); | 489 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
486 EXPECT(!is_valid); | 490 EXPECT(!is_valid); |
487 EXPECT(memcmp(expected, dst, sizeof(expected))); | 491 EXPECT(memcmp(expected, dst, sizeof(expected))); |
488 } | 492 } |
489 } | 493 } |
490 | 494 |
491 // 3.3 - Sequences with last continuation byte missing | 495 // 3.3 - Sequences with last continuation byte missing |
492 | 496 |
493 // 3.3.1 - 2-byte sequence with last byte missing (U+0000): "\xC0" | 497 // 3.3.1 - 2-byte sequence with last byte missing (U+0000): "\xC0" |
494 { | 498 { |
495 const char* src = "\xC0"; | 499 const char* src = "\xC0"; |
496 int32_t expected[] = { 0x0 }; | 500 int32_t expected[] = {0x0}; |
497 int32_t dst[ARRAY_SIZE(expected)]; | 501 int32_t dst[ARRAY_SIZE(expected)]; |
498 memset(dst, 0xFF, sizeof(dst)); | 502 memset(dst, 0xFF, sizeof(dst)); |
499 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 503 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
500 EXPECT(!is_valid); | 504 EXPECT(!is_valid); |
501 EXPECT(memcmp(expected, dst, sizeof(expected))); | 505 EXPECT(memcmp(expected, dst, sizeof(expected))); |
502 } | 506 } |
503 | 507 |
504 // 3.3.2 - 3-byte sequence with last byte missing (U+0000): "\xE0\x80" | 508 // 3.3.2 - 3-byte sequence with last byte missing (U+0000): "\xE0\x80" |
505 { | 509 { |
506 const char* src = "\xE0\x80"; | 510 const char* src = "\xE0\x80"; |
507 int32_t expected[] = { 0x0 }; | 511 int32_t expected[] = {0x0}; |
508 int32_t dst[ARRAY_SIZE(expected)]; | 512 int32_t dst[ARRAY_SIZE(expected)]; |
509 memset(dst, 0xFF, sizeof(dst)); | 513 memset(dst, 0xFF, sizeof(dst)); |
510 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 514 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
511 EXPECT(!is_valid); | 515 EXPECT(!is_valid); |
512 EXPECT(memcmp(expected, dst, sizeof(expected))); | 516 EXPECT(memcmp(expected, dst, sizeof(expected))); |
513 } | 517 } |
514 | 518 |
515 // 3.3.3 - 4-byte sequence with last byte missing (U+0000): "\xF0\x80\x80" | 519 // 3.3.3 - 4-byte sequence with last byte missing (U+0000): "\xF0\x80\x80" |
516 { | 520 { |
517 const char* src = "\xF0\x80\x80"; | 521 const char* src = "\xF0\x80\x80"; |
518 int32_t expected[] = { 0x0 }; | 522 int32_t expected[] = {0x0}; |
519 int32_t dst[ARRAY_SIZE(expected)]; | 523 int32_t dst[ARRAY_SIZE(expected)]; |
520 memset(dst, 0xFF, sizeof(dst)); | 524 memset(dst, 0xFF, sizeof(dst)); |
521 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 525 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
522 EXPECT(!is_valid); | 526 EXPECT(!is_valid); |
523 EXPECT(memcmp(expected, dst, sizeof(expected))); | 527 EXPECT(memcmp(expected, dst, sizeof(expected))); |
524 } | 528 } |
525 | 529 |
526 // 3.3.4 - 5-byte sequence with last byte missing (U+0000): "\xF8\x80\x80\x80" | 530 // 3.3.4 - 5-byte sequence with last byte missing (U+0000): "\xF8\x80\x80\x80" |
527 { | 531 { |
528 const char* src = "\xF8\x80\x80\x80"; | 532 const char* src = "\xF8\x80\x80\x80"; |
529 int32_t expected[] = { 0x0 }; | 533 int32_t expected[] = {0x0}; |
530 int32_t dst[ARRAY_SIZE(expected)]; | 534 int32_t dst[ARRAY_SIZE(expected)]; |
531 memset(dst, 0xFF, sizeof(dst)); | 535 memset(dst, 0xFF, sizeof(dst)); |
532 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 536 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
533 EXPECT(!is_valid); | 537 EXPECT(!is_valid); |
534 EXPECT(memcmp(expected, dst, sizeof(expected))); | 538 EXPECT(memcmp(expected, dst, sizeof(expected))); |
535 } | 539 } |
536 | 540 |
537 // 3.3.5 - 6-byte sequence with last byte missing (U+0000): | 541 // 3.3.5 - 6-byte sequence with last byte missing (U+0000): |
538 // "\xFC\x80\x80\x80\x80" | 542 // "\xFC\x80\x80\x80\x80" |
539 { | 543 { |
540 const char* src = "\xFC\x80\x80\x80\x80"; | 544 const char* src = "\xFC\x80\x80\x80\x80"; |
541 int32_t expected[] = { 0x0 }; | 545 int32_t expected[] = {0x0}; |
542 int32_t dst[ARRAY_SIZE(expected)]; | 546 int32_t dst[ARRAY_SIZE(expected)]; |
543 memset(dst, 0xFF, sizeof(dst)); | 547 memset(dst, 0xFF, sizeof(dst)); |
544 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 548 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
545 EXPECT(!is_valid); | 549 EXPECT(!is_valid); |
546 EXPECT(memcmp(expected, dst, sizeof(expected))); | 550 EXPECT(memcmp(expected, dst, sizeof(expected))); |
547 } | 551 } |
548 | 552 |
549 // 3.3.6 - 2-byte sequence with last byte missing (U-000007FF): "\xDF" | 553 // 3.3.6 - 2-byte sequence with last byte missing (U-000007FF): "\xDF" |
550 { | 554 { |
551 const char* src = "\xDF"; | 555 const char* src = "\xDF"; |
552 int32_t expected[] = { 0x0 }; | 556 int32_t expected[] = {0x0}; |
553 int32_t dst[ARRAY_SIZE(expected)]; | 557 int32_t dst[ARRAY_SIZE(expected)]; |
554 memset(dst, 0xFF, sizeof(dst)); | 558 memset(dst, 0xFF, sizeof(dst)); |
555 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 559 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
556 EXPECT(!is_valid); | 560 EXPECT(!is_valid); |
557 EXPECT(memcmp(expected, dst, sizeof(expected))); | 561 EXPECT(memcmp(expected, dst, sizeof(expected))); |
558 } | 562 } |
559 | 563 |
560 // 3.3.7 - 3-byte sequence with last byte missing (U-0000FFFF): "\xEF\xBF" | 564 // 3.3.7 - 3-byte sequence with last byte missing (U-0000FFFF): "\xEF\xBF" |
561 { | 565 { |
562 const char* src = "\xEF\xBF"; | 566 const char* src = "\xEF\xBF"; |
563 int32_t expected[] = { 0x0 }; | 567 int32_t expected[] = {0x0}; |
564 int32_t dst[ARRAY_SIZE(expected)]; | 568 int32_t dst[ARRAY_SIZE(expected)]; |
565 memset(dst, 0xFF, sizeof(dst)); | 569 memset(dst, 0xFF, sizeof(dst)); |
566 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 570 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
567 EXPECT(!is_valid); | 571 EXPECT(!is_valid); |
568 EXPECT(memcmp(expected, dst, sizeof(expected))); | 572 EXPECT(memcmp(expected, dst, sizeof(expected))); |
569 } | 573 } |
570 | 574 |
571 // 3.3.8 - 4-byte sequence with last byte missing (U-001FFFFF): "\xF7\xBF\xBF" | 575 // 3.3.8 - 4-byte sequence with last byte missing (U-001FFFFF): "\xF7\xBF\xBF" |
572 { | 576 { |
573 const char* src = "\xF7\xBF\xBF"; | 577 const char* src = "\xF7\xBF\xBF"; |
574 int32_t expected[] = { 0x0 }; | 578 int32_t expected[] = {0x0}; |
575 int32_t dst[ARRAY_SIZE(expected)]; | 579 int32_t dst[ARRAY_SIZE(expected)]; |
576 memset(dst, 0xFF, sizeof(dst)); | 580 memset(dst, 0xFF, sizeof(dst)); |
577 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 581 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
578 EXPECT(!is_valid); | 582 EXPECT(!is_valid); |
579 EXPECT(memcmp(expected, dst, sizeof(expected))); | 583 EXPECT(memcmp(expected, dst, sizeof(expected))); |
580 } | 584 } |
581 | 585 |
582 // 3.3.9 - 5-byte sequence with last byte missing (U-03FFFFFF): | 586 // 3.3.9 - 5-byte sequence with last byte missing (U-03FFFFFF): |
583 // "\xFB\xBF\xBF\xBF" | 587 // "\xFB\xBF\xBF\xBF" |
584 { | 588 { |
585 const char* src = "\xFB\xBF\xBF\xBF"; | 589 const char* src = "\xFB\xBF\xBF\xBF"; |
586 int32_t expected[] = { 0x0 }; | 590 int32_t expected[] = {0x0}; |
587 int32_t dst[ARRAY_SIZE(expected)]; | 591 int32_t dst[ARRAY_SIZE(expected)]; |
588 memset(dst, 0xFF, sizeof(dst)); | 592 memset(dst, 0xFF, sizeof(dst)); |
589 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 593 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
590 EXPECT(!is_valid); | 594 EXPECT(!is_valid); |
591 EXPECT(memcmp(expected, dst, sizeof(expected))); | 595 EXPECT(memcmp(expected, dst, sizeof(expected))); |
592 } | 596 } |
593 | 597 |
594 // 3.3.10 - 6-byte sequence with last byte missing (U-7FFFFFFF): | 598 // 3.3.10 - 6-byte sequence with last byte missing (U-7FFFFFFF): |
595 // "\xFD\xBF\xBF\xBF\xBF" | 599 // "\xFD\xBF\xBF\xBF\xBF" |
596 { | 600 { |
597 const char* src = "\xFD\xBF\xBF\xBF\xBF"; | 601 const char* src = "\xFD\xBF\xBF\xBF\xBF"; |
598 int32_t expected[] = { 0x0 }; | 602 int32_t expected[] = {0x0}; |
599 int32_t dst[ARRAY_SIZE(expected)]; | 603 int32_t dst[ARRAY_SIZE(expected)]; |
600 memset(dst, 0xFF, sizeof(dst)); | 604 memset(dst, 0xFF, sizeof(dst)); |
601 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 605 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
602 EXPECT(!is_valid); | 606 EXPECT(!is_valid); |
603 EXPECT(memcmp(expected, dst, sizeof(expected))); | 607 EXPECT(memcmp(expected, dst, sizeof(expected))); |
604 } | 608 } |
605 | 609 |
606 // 3.4 - Concatenation of incomplete sequences | 610 // 3.4 - Concatenation of incomplete sequences |
607 { | 611 { |
608 const char* src = "\xC0\xE0\x80\xF0\x80\x80" | 612 const char* src = |
609 "\xF8\x80\x80\x80\xFC\x80" | 613 "\xC0\xE0\x80\xF0\x80\x80" |
610 "\x80\x80\x80\xDF\xEF\xBF" | 614 "\xF8\x80\x80\x80\xFC\x80" |
611 "\xF7\xBF\xBF\xFB\xBF\xBF" | 615 "\x80\x80\x80\xDF\xEF\xBF" |
612 "\xBF\xFD\xBF\xBF\xBF\xBF"; | 616 "\xF7\xBF\xBF\xFB\xBF\xBF" |
613 int32_t expected[] = { 0x0 }; | 617 "\xBF\xFD\xBF\xBF\xBF\xBF"; |
| 618 int32_t expected[] = {0x0}; |
614 int32_t dst[ARRAY_SIZE(expected)]; | 619 int32_t dst[ARRAY_SIZE(expected)]; |
615 for (size_t i = 0; i < strlen(src); ++i) { | 620 for (size_t i = 0; i < strlen(src); ++i) { |
616 for (size_t j = 1; j < (strlen(src) - i); ++j) { | 621 for (size_t j = 1; j < (strlen(src) - i); ++j) { |
617 memset(dst, 0xFF, sizeof(dst)); | 622 memset(dst, 0xFF, sizeof(dst)); |
618 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], | 623 bool is_valid = |
619 dst, ARRAY_SIZE(dst)); | 624 Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
620 EXPECT(!is_valid); | 625 EXPECT(!is_valid); |
621 EXPECT(memcmp(expected, dst, sizeof(expected))); | 626 EXPECT(memcmp(expected, dst, sizeof(expected))); |
622 } | 627 } |
623 } | 628 } |
624 } | 629 } |
625 | 630 |
626 // 3.5 - Impossible bytes | 631 // 3.5 - Impossible bytes |
627 | 632 |
628 // 3.5.1 - fe = "\xFE" | 633 // 3.5.1 - fe = "\xFE" |
629 { | 634 { |
630 const char* src = "\xFE"; | 635 const char* src = "\xFE"; |
631 int32_t expected[] = { 0xFE }; | 636 int32_t expected[] = {0xFE}; |
632 int32_t dst[ARRAY_SIZE(expected)]; | 637 int32_t dst[ARRAY_SIZE(expected)]; |
633 memset(dst, 0, sizeof(dst)); | 638 memset(dst, 0, sizeof(dst)); |
634 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 639 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
635 EXPECT(!is_valid); | 640 EXPECT(!is_valid); |
636 EXPECT(memcmp(expected, dst, sizeof(expected))); | 641 EXPECT(memcmp(expected, dst, sizeof(expected))); |
637 } | 642 } |
638 | 643 |
639 // 3.5.2 - ff = "\xFF" | 644 // 3.5.2 - ff = "\xFF" |
640 { | 645 { |
641 const char* src = "\xFF"; | 646 const char* src = "\xFF"; |
642 int32_t expected[] = { 0xFF }; | 647 int32_t expected[] = {0xFF}; |
643 int32_t dst[ARRAY_SIZE(expected)]; | 648 int32_t dst[ARRAY_SIZE(expected)]; |
644 memset(dst, 0, sizeof(dst)); | 649 memset(dst, 0, sizeof(dst)); |
645 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 650 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
646 EXPECT(!is_valid); | 651 EXPECT(!is_valid); |
647 EXPECT(memcmp(expected, dst, sizeof(expected))); | 652 EXPECT(memcmp(expected, dst, sizeof(expected))); |
648 } | 653 } |
649 | 654 |
650 // 3.5.3 - fe fe ff ff = "\xFE\xFE\xFF\xFF" | 655 // 3.5.3 - fe fe ff ff = "\xFE\xFE\xFF\xFF" |
651 { | 656 { |
652 const char* src = "\xFE\xFE\xFF\xFF"; | 657 const char* src = "\xFE\xFE\xFF\xFF"; |
653 int32_t expected[] = { 0xFF }; | 658 int32_t expected[] = {0xFF}; |
654 int32_t dst[ARRAY_SIZE(expected)]; | 659 int32_t dst[ARRAY_SIZE(expected)]; |
655 memset(dst, 0, sizeof(dst)); | 660 memset(dst, 0, sizeof(dst)); |
656 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 661 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
657 EXPECT(!is_valid); | 662 EXPECT(!is_valid); |
658 EXPECT(memcmp(expected, dst, sizeof(expected))); | 663 EXPECT(memcmp(expected, dst, sizeof(expected))); |
659 } | 664 } |
660 | 665 |
661 // 4 - Overlong sequences | 666 // 4 - Overlong sequences |
662 | 667 |
663 // 4.1 - Examples of an overlong ASCII character | 668 // 4.1 - Examples of an overlong ASCII character |
664 | 669 |
665 // 4.1.1 - U+002F = c0 af = "\xC0\xAF" | 670 // 4.1.1 - U+002F = c0 af = "\xC0\xAF" |
666 { | 671 { |
667 const char* src = "\xC0\xAF"; | 672 const char* src = "\xC0\xAF"; |
668 int32_t expected[] = { 0x2F }; | 673 int32_t expected[] = {0x2F}; |
669 int32_t dst[ARRAY_SIZE(expected)]; | 674 int32_t dst[ARRAY_SIZE(expected)]; |
670 memset(dst, 0, sizeof(dst)); | 675 memset(dst, 0, sizeof(dst)); |
671 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 676 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
672 EXPECT(!is_valid); | 677 EXPECT(!is_valid); |
673 EXPECT(memcmp(expected, dst, sizeof(expected))); | 678 EXPECT(memcmp(expected, dst, sizeof(expected))); |
674 } | 679 } |
675 | 680 |
676 // 4.1.2 - U+002F = e0 80 af = "\xE0\x80\xAF" | 681 // 4.1.2 - U+002F = e0 80 af = "\xE0\x80\xAF" |
677 { | 682 { |
678 const char* src = "\xE0\x80\xAF"; | 683 const char* src = "\xE0\x80\xAF"; |
679 int32_t expected[] = { 0x2F }; | 684 int32_t expected[] = {0x2F}; |
680 int32_t dst[ARRAY_SIZE(expected)]; | 685 int32_t dst[ARRAY_SIZE(expected)]; |
681 memset(dst, 0, sizeof(dst)); | 686 memset(dst, 0, sizeof(dst)); |
682 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 687 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
683 EXPECT(!is_valid); | 688 EXPECT(!is_valid); |
684 EXPECT(memcmp(expected, dst, sizeof(expected))); | 689 EXPECT(memcmp(expected, dst, sizeof(expected))); |
685 } | 690 } |
686 | 691 |
687 // 4.1.3 - U+002F = f0 80 80 af = "\xF0\x80\x80\xAF" | 692 // 4.1.3 - U+002F = f0 80 80 af = "\xF0\x80\x80\xAF" |
688 { | 693 { |
689 const char* src = "\xF0\x80\x80\xAF"; | 694 const char* src = "\xF0\x80\x80\xAF"; |
690 int32_t expected[] = { 0x2F }; | 695 int32_t expected[] = {0x2F}; |
691 int32_t dst[ARRAY_SIZE(expected)]; | 696 int32_t dst[ARRAY_SIZE(expected)]; |
692 memset(dst, 0, sizeof(dst)); | 697 memset(dst, 0, sizeof(dst)); |
693 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 698 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
694 EXPECT(!is_valid); | 699 EXPECT(!is_valid); |
695 EXPECT(memcmp(expected, dst, sizeof(expected))); | 700 EXPECT(memcmp(expected, dst, sizeof(expected))); |
696 } | 701 } |
697 | 702 |
698 // 4.1.4 - U+002F = f8 80 80 80 af = "\xF8\x80\x80\x80\xAF" | 703 // 4.1.4 - U+002F = f8 80 80 80 af = "\xF8\x80\x80\x80\xAF" |
699 { | 704 { |
700 const char* src = "\xF8\x80\x80\x80\xAF"; | 705 const char* src = "\xF8\x80\x80\x80\xAF"; |
701 int32_t expected[] = { 0x2F }; | 706 int32_t expected[] = {0x2F}; |
702 int32_t dst[ARRAY_SIZE(expected)]; | 707 int32_t dst[ARRAY_SIZE(expected)]; |
703 memset(dst, 0, sizeof(dst)); | 708 memset(dst, 0, sizeof(dst)); |
704 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 709 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
705 EXPECT(!is_valid); | 710 EXPECT(!is_valid); |
706 EXPECT(memcmp(expected, dst, sizeof(expected))); | 711 EXPECT(memcmp(expected, dst, sizeof(expected))); |
707 } | 712 } |
708 | 713 |
709 // 4.1.5 - U+002F = fc 80 80 80 80 af = "\xFC\x80\x80\x80\x80\xAF" | 714 // 4.1.5 - U+002F = fc 80 80 80 80 af = "\xFC\x80\x80\x80\x80\xAF" |
710 { | 715 { |
711 const char* src = "\xFC\x80\x80\x80\x80\xAF"; | 716 const char* src = "\xFC\x80\x80\x80\x80\xAF"; |
712 int32_t expected[] = { 0x2F }; | 717 int32_t expected[] = {0x2F}; |
713 int32_t dst[ARRAY_SIZE(expected)]; | 718 int32_t dst[ARRAY_SIZE(expected)]; |
714 memset(dst, 0, sizeof(dst)); | 719 memset(dst, 0, sizeof(dst)); |
715 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 720 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
716 EXPECT(!is_valid); | 721 EXPECT(!is_valid); |
717 EXPECT(memcmp(expected, dst, sizeof(expected))); | 722 EXPECT(memcmp(expected, dst, sizeof(expected))); |
718 } | 723 } |
719 | 724 |
720 // 4.2 Maximum overlong sequences | 725 // 4.2 Maximum overlong sequences |
721 | 726 |
722 // 4.2.1 - U-0000007F = c1 bf = "\xC1\xBF" | 727 // 4.2.1 - U-0000007F = c1 bf = "\xC1\xBF" |
723 { | 728 { |
724 const char* src = "\xC1\xBF"; | 729 const char* src = "\xC1\xBF"; |
725 int32_t expected[] = { 0x7F }; | 730 int32_t expected[] = {0x7F}; |
726 int32_t dst[ARRAY_SIZE(expected)]; | 731 int32_t dst[ARRAY_SIZE(expected)]; |
727 memset(dst, 0, sizeof(dst)); | 732 memset(dst, 0, sizeof(dst)); |
728 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 733 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
729 EXPECT(!is_valid); | 734 EXPECT(!is_valid); |
730 EXPECT(memcmp(expected, dst, sizeof(expected))); | 735 EXPECT(memcmp(expected, dst, sizeof(expected))); |
731 } | 736 } |
732 | 737 |
733 // 4.2.2 U+000007FF = e0 9f bf = "\xE0\x9F\xBF" | 738 // 4.2.2 U+000007FF = e0 9f bf = "\xE0\x9F\xBF" |
734 { | 739 { |
735 const char* src = "\xE0\x9F\xBF"; | 740 const char* src = "\xE0\x9F\xBF"; |
736 int32_t expected[] = { 0x7FF }; | 741 int32_t expected[] = {0x7FF}; |
737 int32_t dst[ARRAY_SIZE(expected)]; | 742 int32_t dst[ARRAY_SIZE(expected)]; |
738 memset(dst, 0, sizeof(dst)); | 743 memset(dst, 0, sizeof(dst)); |
739 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 744 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
740 EXPECT(!is_valid); | 745 EXPECT(!is_valid); |
741 EXPECT(memcmp(expected, dst, sizeof(expected))); | 746 EXPECT(memcmp(expected, dst, sizeof(expected))); |
742 } | 747 } |
743 | 748 |
744 // 4.2.3 - U+0000FFFF = f0 8f bf bf = "\xF0\x8F\xBF\xBF" | 749 // 4.2.3 - U+0000FFFF = f0 8f bf bf = "\xF0\x8F\xBF\xBF" |
745 { | 750 { |
746 const char* src = "\xF0\x8F\xBF\xBF"; | 751 const char* src = "\xF0\x8F\xBF\xBF"; |
747 int32_t expected[] = { 0xFFFF }; | 752 int32_t expected[] = {0xFFFF}; |
748 int32_t dst[ARRAY_SIZE(expected)]; | 753 int32_t dst[ARRAY_SIZE(expected)]; |
749 memset(dst, 0, sizeof(dst)); | 754 memset(dst, 0, sizeof(dst)); |
750 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 755 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
751 EXPECT(!is_valid); | 756 EXPECT(!is_valid); |
752 EXPECT(memcmp(expected, dst, sizeof(expected))); | 757 EXPECT(memcmp(expected, dst, sizeof(expected))); |
753 } | 758 } |
754 | 759 |
755 // 4.2.4 U-001FFFFF = f8 87 bf bf bf = "\xF8\x87\xBF\xBF\xBF" | 760 // 4.2.4 U-001FFFFF = f8 87 bf bf bf = "\xF8\x87\xBF\xBF\xBF" |
756 { | 761 { |
757 const char* src = "\xF8\x87\xBF\xBF\xBF"; | 762 const char* src = "\xF8\x87\xBF\xBF\xBF"; |
758 int32_t expected[] = { 0x1FFFFF }; | 763 int32_t expected[] = {0x1FFFFF}; |
759 int32_t dst[ARRAY_SIZE(expected)]; | 764 int32_t dst[ARRAY_SIZE(expected)]; |
760 memset(dst, 0, sizeof(dst)); | 765 memset(dst, 0, sizeof(dst)); |
761 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 766 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
762 EXPECT(!is_valid); | 767 EXPECT(!is_valid); |
763 EXPECT(memcmp(expected, dst, sizeof(expected))); | 768 EXPECT(memcmp(expected, dst, sizeof(expected))); |
764 } | 769 } |
765 | 770 |
766 // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "\xFC\x83\xBF\xBF\xBF\xBF" | 771 // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "\xFC\x83\xBF\xBF\xBF\xBF" |
767 { | 772 { |
768 const char* src = "\xFC\x83\xBF\xBF\xBF\xBF"; | 773 const char* src = "\xFC\x83\xBF\xBF\xBF\xBF"; |
769 int32_t expected[] = { 0x3FFFFFF }; | 774 int32_t expected[] = {0x3FFFFFF}; |
770 int32_t dst[ARRAY_SIZE(expected)]; | 775 int32_t dst[ARRAY_SIZE(expected)]; |
771 memset(dst, 0, sizeof(dst)); | 776 memset(dst, 0, sizeof(dst)); |
772 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 777 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
773 EXPECT(!is_valid); | 778 EXPECT(!is_valid); |
774 EXPECT(memcmp(expected, dst, sizeof(expected))); | 779 EXPECT(memcmp(expected, dst, sizeof(expected))); |
775 } | 780 } |
776 | 781 |
777 // 4.3 - Overlong representation of the NUL character | 782 // 4.3 - Overlong representation of the NUL character |
778 | 783 |
779 // 4.3.1 - U+0000 = "\xC0\x80" | 784 // 4.3.1 - U+0000 = "\xC0\x80" |
780 { | 785 { |
781 const char* src = "\xC0\x80"; | 786 const char* src = "\xC0\x80"; |
782 int32_t expected[] = { 0x0 }; | 787 int32_t expected[] = {0x0}; |
783 int32_t dst[ARRAY_SIZE(expected)]; | 788 int32_t dst[ARRAY_SIZE(expected)]; |
784 memset(dst, 0xFF, sizeof(dst)); | 789 memset(dst, 0xFF, sizeof(dst)); |
785 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 790 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
786 EXPECT(!is_valid); | 791 EXPECT(!is_valid); |
787 EXPECT(memcmp(expected, dst, sizeof(expected))); | 792 EXPECT(memcmp(expected, dst, sizeof(expected))); |
788 } | 793 } |
789 | 794 |
790 // 4.3.2 U+0000 = e0 80 80 = "\xE0\x80\x80" | 795 // 4.3.2 U+0000 = e0 80 80 = "\xE0\x80\x80" |
791 { | 796 { |
792 const char* src = "\xE0\x80\x80"; | 797 const char* src = "\xE0\x80\x80"; |
793 int32_t expected[] = { 0x0 }; | 798 int32_t expected[] = {0x0}; |
794 int32_t dst[ARRAY_SIZE(expected)]; | 799 int32_t dst[ARRAY_SIZE(expected)]; |
795 memset(dst, 0xFF, sizeof(dst)); | 800 memset(dst, 0xFF, sizeof(dst)); |
796 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 801 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
797 EXPECT(!is_valid); | 802 EXPECT(!is_valid); |
798 EXPECT(memcmp(expected, dst, sizeof(expected))); | 803 EXPECT(memcmp(expected, dst, sizeof(expected))); |
799 } | 804 } |
800 | 805 |
801 // 4.3.3 U+0000 = f0 80 80 80 = "\xF0\x80\x80\x80" | 806 // 4.3.3 U+0000 = f0 80 80 80 = "\xF0\x80\x80\x80" |
802 { | 807 { |
803 const char* src = "\xF0\x80\x80\x80"; | 808 const char* src = "\xF0\x80\x80\x80"; |
804 int32_t expected[] = { 0x0 }; | 809 int32_t expected[] = {0x0}; |
805 int32_t dst[ARRAY_SIZE(expected)]; | 810 int32_t dst[ARRAY_SIZE(expected)]; |
806 memset(dst, 0xFF, sizeof(dst)); | 811 memset(dst, 0xFF, sizeof(dst)); |
807 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 812 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
808 EXPECT(!is_valid); | 813 EXPECT(!is_valid); |
809 EXPECT(memcmp(expected, dst, sizeof(expected))); | 814 EXPECT(memcmp(expected, dst, sizeof(expected))); |
810 } | 815 } |
811 | 816 |
812 // 4.3.4 U+0000 = f8 80 80 80 80 = "\xF8\x80\x80\x80\x80" | 817 // 4.3.4 U+0000 = f8 80 80 80 80 = "\xF8\x80\x80\x80\x80" |
813 { | 818 { |
814 const char* src = "\xF8\x80\x80\x80\x80"; | 819 const char* src = "\xF8\x80\x80\x80\x80"; |
815 int32_t expected[] = { 0x0 }; | 820 int32_t expected[] = {0x0}; |
816 int32_t dst[ARRAY_SIZE(expected)]; | 821 int32_t dst[ARRAY_SIZE(expected)]; |
817 memset(dst, 0xFF, sizeof(dst)); | 822 memset(dst, 0xFF, sizeof(dst)); |
818 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 823 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
819 EXPECT(!is_valid); | 824 EXPECT(!is_valid); |
820 EXPECT(memcmp(expected, dst, sizeof(expected))); | 825 EXPECT(memcmp(expected, dst, sizeof(expected))); |
821 } | 826 } |
822 | 827 |
823 // 4.3.5 U+0000 = fc 80 80 80 80 80 = "\xFC\x80\x80\x80\x80\x80" | 828 // 4.3.5 U+0000 = fc 80 80 80 80 80 = "\xFC\x80\x80\x80\x80\x80" |
824 { | 829 { |
825 const char* src = "\xFC\x80\x80\x80\x80\x80"; | 830 const char* src = "\xFC\x80\x80\x80\x80\x80"; |
826 int32_t expected[] = { 0x0 }; | 831 int32_t expected[] = {0x0}; |
827 int32_t dst[ARRAY_SIZE(expected)]; | 832 int32_t dst[ARRAY_SIZE(expected)]; |
828 memset(dst, 0xFF, sizeof(dst)); | 833 memset(dst, 0xFF, sizeof(dst)); |
829 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 834 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
830 EXPECT(!is_valid); | 835 EXPECT(!is_valid); |
831 EXPECT(memcmp(expected, dst, sizeof(expected))); | 836 EXPECT(memcmp(expected, dst, sizeof(expected))); |
832 } | 837 } |
833 | 838 |
834 // 5.1 - Single UTF-16 surrogates | 839 // 5.1 - Single UTF-16 surrogates |
835 // UTF-8 suggests single surrogates are invalid, but both JS and | 840 // UTF-8 suggests single surrogates are invalid, but both JS and |
836 // Dart allow them and make use of them. | 841 // Dart allow them and make use of them. |
837 | 842 |
838 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80" | 843 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80" |
839 { | 844 { |
840 const char* src = "\xED\xA0\x80"; | 845 const char* src = "\xED\xA0\x80"; |
841 int32_t expected[] = { 0xD800 }; | 846 int32_t expected[] = {0xD800}; |
842 int32_t dst[ARRAY_SIZE(expected)]; | 847 int32_t dst[ARRAY_SIZE(expected)]; |
843 memset(dst, 0, sizeof(dst)); | 848 memset(dst, 0, sizeof(dst)); |
844 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 849 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
845 EXPECT(is_valid); | 850 EXPECT(is_valid); |
846 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 851 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
847 } | 852 } |
848 | 853 |
849 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF" | 854 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF" |
850 { | 855 { |
851 const char* src = "\xED\xAD\xBF"; | 856 const char* src = "\xED\xAD\xBF"; |
852 int32_t expected[] = { 0xDB7F }; | 857 int32_t expected[] = {0xDB7F}; |
853 int32_t dst[ARRAY_SIZE(expected)]; | 858 int32_t dst[ARRAY_SIZE(expected)]; |
854 memset(dst, 0, sizeof(dst)); | 859 memset(dst, 0, sizeof(dst)); |
855 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 860 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
856 EXPECT(is_valid); | 861 EXPECT(is_valid); |
857 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 862 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
858 } | 863 } |
859 | 864 |
860 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80" | 865 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80" |
861 { | 866 { |
862 const char* src = "\xED\xAE\x80"; | 867 const char* src = "\xED\xAE\x80"; |
863 int32_t expected[] = { 0xDB80 }; | 868 int32_t expected[] = {0xDB80}; |
864 int32_t dst[ARRAY_SIZE(expected)]; | 869 int32_t dst[ARRAY_SIZE(expected)]; |
865 memset(dst, 0, sizeof(dst)); | 870 memset(dst, 0, sizeof(dst)); |
866 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 871 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
867 EXPECT(is_valid); | 872 EXPECT(is_valid); |
868 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 873 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
869 } | 874 } |
870 | 875 |
871 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF" | 876 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF" |
872 { | 877 { |
873 const char* src = "\xED\xAF\xBF"; | 878 const char* src = "\xED\xAF\xBF"; |
874 int32_t expected[] = { 0xDBFF }; | 879 int32_t expected[] = {0xDBFF}; |
875 int32_t dst[ARRAY_SIZE(expected)]; | 880 int32_t dst[ARRAY_SIZE(expected)]; |
876 memset(dst, 0, sizeof(dst)); | 881 memset(dst, 0, sizeof(dst)); |
877 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 882 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
878 EXPECT(is_valid); | 883 EXPECT(is_valid); |
879 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 884 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
880 } | 885 } |
881 | 886 |
882 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80" | 887 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80" |
883 { | 888 { |
884 const char* src = "\xED\xB0\x80"; | 889 const char* src = "\xED\xB0\x80"; |
885 int32_t expected[] = { 0xDC00 }; | 890 int32_t expected[] = {0xDC00}; |
886 int32_t dst[ARRAY_SIZE(expected)]; | 891 int32_t dst[ARRAY_SIZE(expected)]; |
887 memset(dst, 0, sizeof(dst)); | 892 memset(dst, 0, sizeof(dst)); |
888 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 893 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
889 EXPECT(is_valid); | 894 EXPECT(is_valid); |
890 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 895 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
891 } | 896 } |
892 | 897 |
893 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80" | 898 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80" |
894 { | 899 { |
895 const char* src = "\xED\xBE\x80"; | 900 const char* src = "\xED\xBE\x80"; |
896 int32_t expected[] = { 0xDF80 }; | 901 int32_t expected[] = {0xDF80}; |
897 int32_t dst[ARRAY_SIZE(expected)]; | 902 int32_t dst[ARRAY_SIZE(expected)]; |
898 memset(dst, 0, sizeof(dst)); | 903 memset(dst, 0, sizeof(dst)); |
899 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 904 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
900 EXPECT(is_valid); | 905 EXPECT(is_valid); |
901 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 906 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
902 } | 907 } |
903 | 908 |
904 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF" | 909 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF" |
905 { | 910 { |
906 const char* src = "\xED\xBF\xBF"; | 911 const char* src = "\xED\xBF\xBF"; |
907 int32_t expected[] = { 0xDFFF }; | 912 int32_t expected[] = {0xDFFF}; |
908 int32_t dst[ARRAY_SIZE(expected)]; | 913 int32_t dst[ARRAY_SIZE(expected)]; |
909 memset(dst, 0, sizeof(dst)); | 914 memset(dst, 0, sizeof(dst)); |
910 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 915 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
911 EXPECT(is_valid); | 916 EXPECT(is_valid); |
912 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 917 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
913 } | 918 } |
914 | 919 |
915 // 5.2 Paired UTF-16 surrogates | 920 // 5.2 Paired UTF-16 surrogates |
916 // Also not a valid string, but accepted in Dart, even if it doesn't make | 921 // Also not a valid string, but accepted in Dart, even if it doesn't make |
917 // sense. e.g. | 922 // sense. e.g. |
918 // var s = new String.fromCharCodes([0xd800, 0xDC00]); | 923 // var s = new String.fromCharCodes([0xd800, 0xDC00]); |
919 // print(s.runes); // (65536) (0x10000) | 924 // print(s.runes); // (65536) (0x10000) |
920 // print(s.codeUnits); // [55296, 56320] | 925 // print(s.codeUnits); // [55296, 56320] |
921 | 926 |
922 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80" | 927 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80" |
923 { | 928 { |
924 const char* src = "\xED\xA0\x80\xED\xB0\x80"; | 929 const char* src = "\xED\xA0\x80\xED\xB0\x80"; |
925 int32_t expected[] = { 0xD800, 0xDC00 }; | 930 int32_t expected[] = {0xD800, 0xDC00}; |
926 int32_t dst[ARRAY_SIZE(expected)]; | 931 int32_t dst[ARRAY_SIZE(expected)]; |
927 memset(dst, 0, sizeof(dst)); | 932 memset(dst, 0, sizeof(dst)); |
928 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 933 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
929 EXPECT(is_valid); | 934 EXPECT(is_valid); |
930 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 935 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
931 } | 936 } |
932 | 937 |
933 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF" | 938 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF" |
934 { | 939 { |
935 const char* src = "\xED\xA0\x80\xED\xBF\xBF"; | 940 const char* src = "\xED\xA0\x80\xED\xBF\xBF"; |
936 int32_t expected[] = { 0xD800, 0xDFFF }; | 941 int32_t expected[] = {0xD800, 0xDFFF}; |
937 int32_t dst[ARRAY_SIZE(expected)]; | 942 int32_t dst[ARRAY_SIZE(expected)]; |
938 memset(dst, 0, sizeof(dst)); | 943 memset(dst, 0, sizeof(dst)); |
939 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 944 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
940 EXPECT(is_valid); | 945 EXPECT(is_valid); |
941 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 946 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
942 } | 947 } |
943 | 948 |
944 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80" | 949 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80" |
945 { | 950 { |
946 const char* src = "\xED\xAD\xBF\xED\xB0\x80"; | 951 const char* src = "\xED\xAD\xBF\xED\xB0\x80"; |
947 int32_t expected[] = { 0xDB7F, 0xDC00 }; | 952 int32_t expected[] = {0xDB7F, 0xDC00}; |
948 int32_t dst[ARRAY_SIZE(expected)]; | 953 int32_t dst[ARRAY_SIZE(expected)]; |
949 memset(dst, 0, sizeof(dst)); | 954 memset(dst, 0, sizeof(dst)); |
950 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 955 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
951 EXPECT(is_valid); | 956 EXPECT(is_valid); |
952 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 957 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
953 } | 958 } |
954 | 959 |
955 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF" | 960 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF" |
956 { | 961 { |
957 const char* src = "\xED\xAD\xBF\xED\xBF\xBF"; | 962 const char* src = "\xED\xAD\xBF\xED\xBF\xBF"; |
958 int32_t expected[] = { 0xDB7F, 0xDFFF }; | 963 int32_t expected[] = {0xDB7F, 0xDFFF}; |
959 int32_t dst[ARRAY_SIZE(expected)]; | 964 int32_t dst[ARRAY_SIZE(expected)]; |
960 memset(dst, 0, sizeof(dst)); | 965 memset(dst, 0, sizeof(dst)); |
961 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 966 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
962 EXPECT(is_valid); | 967 EXPECT(is_valid); |
963 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 968 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
964 } | 969 } |
965 | 970 |
966 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80" | 971 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80" |
967 { | 972 { |
968 const char* src = "\xED\xAE\x80\xED\xB0\x80"; | 973 const char* src = "\xED\xAE\x80\xED\xB0\x80"; |
969 int32_t expected[] = { 0xDB80, 0xDC00 }; | 974 int32_t expected[] = {0xDB80, 0xDC00}; |
970 int32_t dst[ARRAY_SIZE(expected)]; | 975 int32_t dst[ARRAY_SIZE(expected)]; |
971 memset(dst, 0, sizeof(dst)); | 976 memset(dst, 0, sizeof(dst)); |
972 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 977 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
973 EXPECT(is_valid); | 978 EXPECT(is_valid); |
974 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 979 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
975 } | 980 } |
976 | 981 |
977 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF" | 982 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF" |
978 { | 983 { |
979 const char* src = "\xED\xAE\x80\xED\xBF\xBF"; | 984 const char* src = "\xED\xAE\x80\xED\xBF\xBF"; |
980 int32_t expected[] = { 0xDB80, 0xDFFF }; | 985 int32_t expected[] = {0xDB80, 0xDFFF}; |
981 int32_t dst[ARRAY_SIZE(expected)]; | 986 int32_t dst[ARRAY_SIZE(expected)]; |
982 memset(dst, 0, sizeof(dst)); | 987 memset(dst, 0, sizeof(dst)); |
983 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 988 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
984 EXPECT(is_valid); | 989 EXPECT(is_valid); |
985 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 990 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
986 } | 991 } |
987 | 992 |
988 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80" | 993 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80" |
989 { | 994 { |
990 const char* src = "\xED\xAF\xBF\xED\xB0\x80"; | 995 const char* src = "\xED\xAF\xBF\xED\xB0\x80"; |
991 int32_t expected[] = { 0xDBFF, 0xDC00 }; | 996 int32_t expected[] = {0xDBFF, 0xDC00}; |
992 int32_t dst[ARRAY_SIZE(expected)]; | 997 int32_t dst[ARRAY_SIZE(expected)]; |
993 memset(dst, 0, sizeof(dst)); | 998 memset(dst, 0, sizeof(dst)); |
994 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 999 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
995 EXPECT(is_valid); | 1000 EXPECT(is_valid); |
996 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 1001 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
997 } | 1002 } |
998 | 1003 |
999 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF" | 1004 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF" |
1000 { | 1005 { |
1001 const char* src = "\xED\xAF\xBF\xED\xBF\xBF"; | 1006 const char* src = "\xED\xAF\xBF\xED\xBF\xBF"; |
1002 int32_t expected[] = { 0xDBFF, 0xDFFF }; | 1007 int32_t expected[] = {0xDBFF, 0xDFFF}; |
1003 int32_t dst[ARRAY_SIZE(expected)]; | 1008 int32_t dst[ARRAY_SIZE(expected)]; |
1004 memset(dst, 0, sizeof(dst)); | 1009 memset(dst, 0, sizeof(dst)); |
1005 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 1010 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
1006 EXPECT(is_valid); | 1011 EXPECT(is_valid); |
1007 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 1012 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
1008 } | 1013 } |
1009 | 1014 |
1010 // 5.3 - Other illegal code positions | 1015 // 5.3 - Other illegal code positions |
1011 | 1016 |
1012 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE" | 1017 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE" |
1013 { | 1018 { |
1014 const char* src = "\xEF\xBF\xBE"; | 1019 const char* src = "\xEF\xBF\xBE"; |
1015 int32_t expected[] = { 0xFFFE }; | 1020 int32_t expected[] = {0xFFFE}; |
1016 int32_t dst[ARRAY_SIZE(expected)]; | 1021 int32_t dst[ARRAY_SIZE(expected)]; |
1017 memset(dst, 0, sizeof(dst)); | 1022 memset(dst, 0, sizeof(dst)); |
1018 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 1023 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
1019 EXPECT(is_valid); | 1024 EXPECT(is_valid); |
1020 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 1025 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
1021 } | 1026 } |
1022 | 1027 |
1023 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF" | 1028 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF" |
1024 { | 1029 { |
1025 const char* src = "\xEF\xBF\xBF"; | 1030 const char* src = "\xEF\xBF\xBF"; |
1026 int32_t expected[] = { 0xFFFF }; | 1031 int32_t expected[] = {0xFFFF}; |
1027 int32_t dst[ARRAY_SIZE(expected)]; | 1032 int32_t dst[ARRAY_SIZE(expected)]; |
1028 memset(dst, 0, sizeof(dst)); | 1033 memset(dst, 0, sizeof(dst)); |
1029 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 1034 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
1030 EXPECT(is_valid); | 1035 EXPECT(is_valid); |
1031 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 1036 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
1032 } | 1037 } |
1033 } | 1038 } |
1034 | 1039 |
1035 } // namespace dart | 1040 } // namespace dart |
OLD | NEW |