OLD | NEW |
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #include "vm/globals.h" | 5 #include "vm/globals.h" |
6 #include "vm/unicode.h" | 6 #include "vm/unicode.h" |
7 #include "vm/unit_test.h" | 7 #include "vm/unit_test.h" |
8 | 8 |
9 namespace dart { | 9 namespace dart { |
10 | 10 |
11 TEST_CASE(Utf8Decode) { | 11 TEST_CASE(Utf8Decode) { |
12 // Examples from the Unicode specification, chapter 3 | 12 // Examples from the Unicode specification, chapter 3 |
13 { | 13 { |
14 const char* src = "\x41\xC3\xB1\x42"; | 14 const char* src = "\x41\xC3\xB1\x42"; |
15 uint32_t expected[] = { 0x41, 0xF1, 0x42 }; | 15 int32_t expected[] = { 0x41, 0xF1, 0x42 }; |
16 uint32_t dst[ARRAY_SIZE(expected)]; | 16 int32_t dst[ARRAY_SIZE(expected)]; |
17 memset(dst, 0, sizeof(dst)); | 17 memset(dst, 0, sizeof(dst)); |
18 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 18 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
19 EXPECT(is_valid); | 19 EXPECT(is_valid); |
20 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 20 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
21 } | 21 } |
22 | 22 |
23 { | 23 { |
24 const char* src = "\x4D"; | 24 const char* src = "\x4D"; |
25 uint32_t expected[] = { 0x4D }; | 25 int32_t expected[] = { 0x4D }; |
26 uint32_t dst[ARRAY_SIZE(expected)]; | 26 int32_t dst[ARRAY_SIZE(expected)]; |
27 memset(dst, 0, sizeof(dst)); | 27 memset(dst, 0, sizeof(dst)); |
28 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 28 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
29 EXPECT(is_valid); | 29 EXPECT(is_valid); |
30 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 30 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
31 } | 31 } |
32 | 32 |
33 { | 33 { |
34 const char* src = "\xD0\xB0"; | 34 const char* src = "\xD0\xB0"; |
35 uint32_t expected[] = { 0x430 }; | 35 int32_t expected[] = { 0x430 }; |
36 uint32_t dst[ARRAY_SIZE(expected)]; | 36 int32_t dst[ARRAY_SIZE(expected)]; |
37 memset(dst, 0, sizeof(dst)); | 37 memset(dst, 0, sizeof(dst)); |
38 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 38 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
39 EXPECT(is_valid); | 39 EXPECT(is_valid); |
40 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 40 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
41 } | 41 } |
42 | 42 |
43 { | 43 { |
44 const char* src = "\xE4\xBA\x8C"; | 44 const char* src = "\xE4\xBA\x8C"; |
45 uint32_t expected[] = { 0x4E8C }; | 45 int32_t expected[] = { 0x4E8C }; |
46 uint32_t dst[ARRAY_SIZE(expected)]; | 46 int32_t dst[ARRAY_SIZE(expected)]; |
47 memset(dst, 0, sizeof(dst)); | 47 memset(dst, 0, sizeof(dst)); |
48 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 48 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
49 EXPECT(is_valid); | 49 EXPECT(is_valid); |
50 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 50 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
51 } | 51 } |
52 | 52 |
53 { | 53 { |
54 const char* src = "\xF0\x90\x8C\x82"; | 54 const char* src = "\xF0\x90\x8C\x82"; |
55 uint32_t expected[] = { 0x10302 }; | 55 int32_t expected[] = { 0x10302 }; |
56 uint32_t dst[ARRAY_SIZE(expected)]; | 56 int32_t dst[ARRAY_SIZE(expected)]; |
57 memset(dst, 0, sizeof(dst)); | 57 memset(dst, 0, sizeof(dst)); |
58 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 58 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
59 EXPECT(is_valid); | 59 EXPECT(is_valid); |
60 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 60 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
61 } | 61 } |
62 | 62 |
63 { | 63 { |
64 const char* src = "\x4D\xD0\xB0\xE4\xBA\x8C\xF0\x90\x8C\x82"; | 64 const char* src = "\x4D\xD0\xB0\xE4\xBA\x8C\xF0\x90\x8C\x82"; |
65 uint32_t expected[] = { 0x4D, 0x430, 0x4E8C, 0x10302 }; | 65 int32_t expected[] = { 0x4D, 0x430, 0x4E8C, 0x10302 }; |
66 uint32_t dst[ARRAY_SIZE(expected)]; | 66 int32_t dst[ARRAY_SIZE(expected)]; |
67 memset(dst, 0, sizeof(dst)); | 67 memset(dst, 0, sizeof(dst)); |
68 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 68 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
69 EXPECT(is_valid); | 69 EXPECT(is_valid); |
70 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 70 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
71 } | 71 } |
72 | 72 |
73 // Mixture of non-ASCII and ASCII characters | 73 // Mixture of non-ASCII and ASCII characters |
74 { | 74 { |
75 const char* src = "\xD7\x92\xD7\x9C\xD7\xA2\xD7\x93" | 75 const char* src = "\xD7\x92\xD7\x9C\xD7\xA2\xD7\x93" |
76 "\x20" | 76 "\x20" |
77 "\xD7\x91\xD7\xA8\xD7\x9B\xD7\x94"; | 77 "\xD7\x91\xD7\xA8\xD7\x9B\xD7\x94"; |
78 uint32_t expected[] = { 0x5D2, 0x5DC, 0x5E2, 0x5D3, | 78 int32_t expected[] = { 0x5D2, 0x5DC, 0x5E2, 0x5D3, |
79 0x20, | 79 0x20, |
80 0x5D1, 0x5E8, 0x5DB, 0x5D4 }; | 80 0x5D1, 0x5E8, 0x5DB, 0x5D4 }; |
81 uint32_t dst[ARRAY_SIZE(expected)]; | 81 int32_t dst[ARRAY_SIZE(expected)]; |
82 memset(dst, 0, sizeof(dst)); | 82 memset(dst, 0, sizeof(dst)); |
83 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 83 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
84 EXPECT(is_valid); | 84 EXPECT(is_valid); |
85 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 85 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
86 } | 86 } |
87 | 87 |
88 // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt | 88 // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt |
89 | 89 |
90 // 1 - Some correct UTF-8 text | 90 // 1 - Some correct UTF-8 text |
91 { | 91 { |
92 const char* src = "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5"; | 92 const char* src = "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5"; |
93 uint32_t expected[] = { 0x3BA, 0x1F79, 0x3C3, 0x3BC, 0x3B5 }; | 93 int32_t expected[] = { 0x3BA, 0x1F79, 0x3C3, 0x3BC, 0x3B5 }; |
94 uint32_t dst[ARRAY_SIZE(expected)]; | 94 int32_t dst[ARRAY_SIZE(expected)]; |
95 memset(dst, 0, sizeof(dst)); | 95 memset(dst, 0, sizeof(dst)); |
96 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 96 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
97 EXPECT(is_valid); | 97 EXPECT(is_valid); |
98 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 98 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
99 } | 99 } |
100 | 100 |
101 // 2 - Boundary condition test cases | 101 // 2 - Boundary condition test cases |
102 | 102 |
103 // 2.1 - First possible sequence of a certain length | 103 // 2.1 - First possible sequence of a certain length |
104 | 104 |
105 // 2.1.1 - 1 byte (U-00000000): "\x00" | 105 // 2.1.1 - 1 byte (U-00000000): "\x00" |
106 { | 106 { |
107 const char* src = "\x00"; | 107 const char* src = "\x00"; |
108 uint32_t expected[] = { 0x0 }; | 108 int32_t expected[] = { 0x0 }; |
109 uint32_t dst[ARRAY_SIZE(expected)]; | 109 int32_t dst[ARRAY_SIZE(expected)]; |
110 memset(dst, 0xFF, sizeof(dst)); | 110 memset(dst, 0xFF, sizeof(dst)); |
111 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 111 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
112 EXPECT(is_valid); | 112 EXPECT(is_valid); |
113 EXPECT(memcmp(expected, dst, sizeof(expected))); | 113 EXPECT(memcmp(expected, dst, sizeof(expected))); |
114 } | 114 } |
115 | 115 |
116 // 2.1.2 - 2 bytes (U-00000080): "\xC2\x80" | 116 // 2.1.2 - 2 bytes (U-00000080): "\xC2\x80" |
117 { | 117 { |
118 const char* src = "\xC2\x80"; | 118 const char* src = "\xC2\x80"; |
119 uint32_t expected[] = { 0x80 }; | 119 int32_t expected[] = { 0x80 }; |
120 uint32_t dst[ARRAY_SIZE(expected)]; | 120 int32_t dst[ARRAY_SIZE(expected)]; |
121 memset(dst, 0, sizeof(dst)); | 121 memset(dst, 0, sizeof(dst)); |
122 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 122 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
123 EXPECT(is_valid); | 123 EXPECT(is_valid); |
124 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 124 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
125 } | 125 } |
126 | 126 |
127 // 2.1.3 - 3 bytes (U-00000800): "\xE0\xA0\x80" | 127 // 2.1.3 - 3 bytes (U-00000800): "\xE0\xA0\x80" |
128 { | 128 { |
129 const char* src = "\xE0\xA0\x80"; | 129 const char* src = "\xE0\xA0\x80"; |
130 uint32_t expected[] = { 0x800 }; | 130 int32_t expected[] = { 0x800 }; |
131 uint32_t dst[ARRAY_SIZE(expected)]; | 131 int32_t dst[ARRAY_SIZE(expected)]; |
132 memset(dst, 0, sizeof(dst)); | 132 memset(dst, 0, sizeof(dst)); |
133 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 133 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
134 EXPECT(is_valid); | 134 EXPECT(is_valid); |
135 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 135 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
136 } | 136 } |
137 | 137 |
138 // 2.1.4 - 4 bytes (U-00010000): "\xF0\x90\x80\x80" | 138 // 2.1.4 - 4 bytes (U-00010000): "\xF0\x90\x80\x80" |
139 { | 139 { |
140 const char* src = "\xF0\x90\x80\x80"; | 140 const char* src = "\xF0\x90\x80\x80"; |
141 uint32_t expected[] = { 0x10000 }; | 141 int32_t expected[] = { 0x10000 }; |
142 uint32_t dst[ARRAY_SIZE(expected)]; | 142 int32_t dst[ARRAY_SIZE(expected)]; |
143 memset(dst, 0, sizeof(dst)); | 143 memset(dst, 0, sizeof(dst)); |
144 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 144 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
145 EXPECT(is_valid); | 145 EXPECT(is_valid); |
146 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 146 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
147 } | 147 } |
148 | 148 |
149 // 2.1.5 - 5 bytes (U-00200000): "\xF8\x88\x80\x80\x80" | 149 // 2.1.5 - 5 bytes (U-00200000): "\xF8\x88\x80\x80\x80" |
150 { | 150 { |
151 const char* src = "\xF8\x88\x80\x80\x80"; | 151 const char* src = "\xF8\x88\x80\x80\x80"; |
152 uint32_t expected[] = { 0x200000 }; | 152 int32_t expected[] = { 0x200000 }; |
153 uint32_t dst[ARRAY_SIZE(expected)]; | 153 int32_t dst[ARRAY_SIZE(expected)]; |
154 memset(dst, 0, sizeof(dst)); | 154 memset(dst, 0, sizeof(dst)); |
155 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 155 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
156 EXPECT(!is_valid); | 156 EXPECT(!is_valid); |
157 EXPECT(memcmp(expected, dst, sizeof(expected))); | 157 EXPECT(memcmp(expected, dst, sizeof(expected))); |
158 } | 158 } |
159 | 159 |
160 // 2.1.6 - 6 bytes (U-04000000): "\xFC\x84\x80\x80\x80\x80" | 160 // 2.1.6 - 6 bytes (U-04000000): "\xFC\x84\x80\x80\x80\x80" |
161 { | 161 { |
162 const char* src = "\xFC\x84\x80\x80\x80\x80"; | 162 const char* src = "\xFC\x84\x80\x80\x80\x80"; |
163 uint32_t expected[] = { 0x400000 }; | 163 int32_t expected[] = { 0x400000 }; |
164 uint32_t dst[ARRAY_SIZE(expected)]; | 164 int32_t dst[ARRAY_SIZE(expected)]; |
165 memset(dst, 0, sizeof(dst)); | 165 memset(dst, 0, sizeof(dst)); |
166 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 166 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
167 EXPECT(!is_valid); | 167 EXPECT(!is_valid); |
168 EXPECT(memcmp(expected, dst, sizeof(expected))); | 168 EXPECT(memcmp(expected, dst, sizeof(expected))); |
169 } | 169 } |
170 | 170 |
171 // 2.2 - Last possible sequence of a certain length | 171 // 2.2 - Last possible sequence of a certain length |
172 | 172 |
173 // 2.2.1 - 1 byte (U-0000007F): "\x7F" | 173 // 2.2.1 - 1 byte (U-0000007F): "\x7F" |
174 { | 174 { |
175 const char* src = "\x7F"; | 175 const char* src = "\x7F"; |
176 uint32_t expected[] = { 0x7F }; | 176 int32_t expected[] = { 0x7F }; |
177 uint32_t dst[ARRAY_SIZE(expected)]; | 177 int32_t dst[ARRAY_SIZE(expected)]; |
178 memset(dst, 0, sizeof(dst)); | 178 memset(dst, 0, sizeof(dst)); |
179 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 179 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
180 EXPECT(is_valid); | 180 EXPECT(is_valid); |
181 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 181 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
182 } | 182 } |
183 | 183 |
184 // 2.2.2 - 2 bytes (U-000007FF): "\xDF\xBF" | 184 // 2.2.2 - 2 bytes (U-000007FF): "\xDF\xBF" |
185 { | 185 { |
186 const char* src = "\xDF\xBF"; | 186 const char* src = "\xDF\xBF"; |
187 uint32_t expected[] = { 0x7FF }; | 187 int32_t expected[] = { 0x7FF }; |
188 uint32_t dst[ARRAY_SIZE(expected)]; | 188 int32_t dst[ARRAY_SIZE(expected)]; |
189 memset(dst, 0, sizeof(dst)); | 189 memset(dst, 0, sizeof(dst)); |
190 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 190 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
191 EXPECT(is_valid); | 191 EXPECT(is_valid); |
192 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 192 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
193 } | 193 } |
194 | 194 |
195 // 2.2.3 - 3 bytes (U-0000FFFF): "\xEF\xBF\xBF" | 195 // 2.2.3 - 3 bytes (U-0000FFFF): "\xEF\xBF\xBF" |
196 { | 196 { |
197 const char* src = "\xEF\xBF\xBF"; | 197 const char* src = "\xEF\xBF\xBF"; |
198 uint32_t expected[] = { 0xFFFF }; | 198 int32_t expected[] = { 0xFFFF }; |
199 uint32_t dst[ARRAY_SIZE(expected)]; | 199 int32_t dst[ARRAY_SIZE(expected)]; |
200 memset(dst, 0, sizeof(dst)); | 200 memset(dst, 0, sizeof(dst)); |
201 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 201 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
202 EXPECT(is_valid); | 202 EXPECT(is_valid); |
203 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 203 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
204 } | 204 } |
205 | 205 |
206 // 2.2.4 - 4 bytes (U-001FFFFF): "\xF7\xBF\xBF\xBF" | 206 // 2.2.4 - 4 bytes (U-001FFFFF): "\xF7\xBF\xBF\xBF" |
207 { | 207 { |
208 const char* src = "\xF7\xBF\xBF\xBF"; | 208 const char* src = "\xF7\xBF\xBF\xBF"; |
209 uint32_t expected[] = { 0x1FFFF }; | 209 int32_t expected[] = { 0x1FFFF }; |
210 uint32_t dst[ARRAY_SIZE(expected)]; | 210 int32_t dst[ARRAY_SIZE(expected)]; |
211 memset(dst, 0, sizeof(dst)); | 211 memset(dst, 0, sizeof(dst)); |
212 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 212 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
213 EXPECT(!is_valid); | 213 EXPECT(!is_valid); |
214 EXPECT(memcmp(expected, dst, sizeof(expected))); | 214 EXPECT(memcmp(expected, dst, sizeof(expected))); |
215 } | 215 } |
216 | 216 |
217 // 2.2.5 - 5 bytes (U-03FFFFFF): "\xFB\xBF\xBF\xBF\xBF" | 217 // 2.2.5 - 5 bytes (U-03FFFFFF): "\xFB\xBF\xBF\xBF\xBF" |
218 { | 218 { |
219 const char* src = "\xFB\xBF\xBF\xBF\xBF"; | 219 const char* src = "\xFB\xBF\xBF\xBF\xBF"; |
220 uint32_t expected[] = { 0x3FFFFFF }; | 220 int32_t expected[] = { 0x3FFFFFF }; |
221 uint32_t dst[ARRAY_SIZE(expected)]; | 221 int32_t dst[ARRAY_SIZE(expected)]; |
222 memset(dst, 0, sizeof(dst)); | 222 memset(dst, 0, sizeof(dst)); |
223 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 223 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
224 EXPECT(!is_valid); | 224 EXPECT(!is_valid); |
225 EXPECT(memcmp(expected, dst, sizeof(expected))); | 225 EXPECT(memcmp(expected, dst, sizeof(expected))); |
226 } | 226 } |
227 | 227 |
228 // 2.2.6 - 6 bytes (U-7FFFFFFF): "\xFD\xBF\xBF\xBF\xBF\xBF" | 228 // 2.2.6 - 6 bytes (U-7FFFFFFF): "\xFD\xBF\xBF\xBF\xBF\xBF" |
229 { | 229 { |
230 const char* src = "\xFD\xBF\xBF\xBF\xBF\xBF"; | 230 const char* src = "\xFD\xBF\xBF\xBF\xBF\xBF"; |
231 uint32_t expected[] = { 0x7FFFFFF }; | 231 int32_t expected[] = { 0x7FFFFFF }; |
232 uint32_t dst[ARRAY_SIZE(expected)]; | 232 int32_t dst[ARRAY_SIZE(expected)]; |
233 memset(dst, 0, sizeof(dst)); | 233 memset(dst, 0, sizeof(dst)); |
234 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 234 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
235 EXPECT(!is_valid); | 235 EXPECT(!is_valid); |
236 EXPECT(memcmp(expected, dst, sizeof(expected))); | 236 EXPECT(memcmp(expected, dst, sizeof(expected))); |
237 } | 237 } |
238 | 238 |
239 // 2.3 - Other boundary conditions | 239 // 2.3 - Other boundary conditions |
240 | 240 |
241 // 2.3.1 - U-0000D7FF = ed 9f bf = "\xED\x9F\xBF" | 241 // 2.3.1 - U-0000D7FF = ed 9f bf = "\xED\x9F\xBF" |
242 { | 242 { |
243 const char* src = "\xED\x9F\xBF"; | 243 const char* src = "\xED\x9F\xBF"; |
244 uint32_t expected[] = { 0xD7FF }; | 244 int32_t expected[] = { 0xD7FF }; |
245 uint32_t dst[ARRAY_SIZE(expected)]; | 245 int32_t dst[ARRAY_SIZE(expected)]; |
246 memset(dst, 0, sizeof(dst)); | 246 memset(dst, 0, sizeof(dst)); |
247 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 247 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
248 EXPECT(is_valid); | 248 EXPECT(is_valid); |
249 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 249 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
250 } | 250 } |
251 | 251 |
252 // 2.3.2 - U-0000E000 = ee 80 80 = "\xEE\x80\x80" | 252 // 2.3.2 - U-0000E000 = ee 80 80 = "\xEE\x80\x80" |
253 { | 253 { |
254 const char* src = "\xEE\x80\x80"; | 254 const char* src = "\xEE\x80\x80"; |
255 uint32_t expected[] = { 0xE000 }; | 255 int32_t expected[] = { 0xE000 }; |
256 uint32_t dst[ARRAY_SIZE(expected)]; | 256 int32_t dst[ARRAY_SIZE(expected)]; |
257 memset(dst, 0, sizeof(dst)); | 257 memset(dst, 0, sizeof(dst)); |
258 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 258 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
259 EXPECT(is_valid); | 259 EXPECT(is_valid); |
260 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 260 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
261 } | 261 } |
262 | 262 |
263 // 2.3.3 - U-0000FFFD = ef bf bd = "\xEF\xBF\xBD" | 263 // 2.3.3 - U-0000FFFD = ef bf bd = "\xEF\xBF\xBD" |
264 { | 264 { |
265 const char* src = "\xEF\xBF\xBD"; | 265 const char* src = "\xEF\xBF\xBD"; |
266 uint32_t expected[] = { 0xFFFD }; | 266 int32_t expected[] = { 0xFFFD }; |
267 uint32_t dst[ARRAY_SIZE(expected)]; | 267 int32_t dst[ARRAY_SIZE(expected)]; |
268 memset(dst, 0, sizeof(dst)); | 268 memset(dst, 0, sizeof(dst)); |
269 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 269 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
270 EXPECT(is_valid); | 270 EXPECT(is_valid); |
271 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 271 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
272 } | 272 } |
273 | 273 |
274 // 2.3.4 - U-0010FFFF = f4 8f bf bf = "\xF4\x8F\xBF\xBF" | 274 // 2.3.4 - U-0010FFFF = f4 8f bf bf = "\xF4\x8F\xBF\xBF" |
275 { | 275 { |
276 const char* src = "\xF4\x8F\xBF\xBF"; | 276 const char* src = "\xF4\x8F\xBF\xBF"; |
277 uint32_t expected[] = { 0x10FFFF }; | 277 int32_t expected[] = { 0x10FFFF }; |
278 uint32_t dst[ARRAY_SIZE(expected)]; | 278 int32_t dst[ARRAY_SIZE(expected)]; |
279 memset(dst, 0, sizeof(dst)); | 279 memset(dst, 0, sizeof(dst)); |
280 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 280 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
281 EXPECT(is_valid); | 281 EXPECT(is_valid); |
282 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 282 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
283 } | 283 } |
284 | 284 |
285 // 2.3.5 - U-00110000 = f4 90 80 80 = "\xF4\x90\x80\x80" | 285 // 2.3.5 - U-00110000 = f4 90 80 80 = "\xF4\x90\x80\x80" |
286 { | 286 { |
287 const char* src = "\xF4\x90\x80\x80"; | 287 const char* src = "\xF4\x90\x80\x80"; |
288 uint32_t expected[] = { 0x110000 }; | 288 int32_t expected[] = { 0x110000 }; |
289 uint32_t dst[ARRAY_SIZE(expected)]; | 289 int32_t dst[ARRAY_SIZE(expected)]; |
290 memset(dst, 0, sizeof(dst)); | 290 memset(dst, 0, sizeof(dst)); |
291 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 291 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
292 EXPECT(!is_valid); | 292 EXPECT(!is_valid); |
293 EXPECT(memcmp(expected, dst, sizeof(expected))); | 293 EXPECT(memcmp(expected, dst, sizeof(expected))); |
294 } | 294 } |
295 | 295 |
296 // 3 - Malformed sequences | 296 // 3 - Malformed sequences |
297 | 297 |
298 // 3.1 - Unexpected continuation bytes | 298 // 3.1 - Unexpected continuation bytes |
299 | 299 |
300 // 3.1.1 - First continuation byte 0x80: "\x80" | 300 // 3.1.1 - First continuation byte 0x80: "\x80" |
301 { | 301 { |
302 const char* src = "\x80"; | 302 const char* src = "\x80"; |
303 uint32_t expected[] = { 0x80 }; | 303 int32_t expected[] = { 0x80 }; |
304 uint32_t dst[ARRAY_SIZE(expected)]; | 304 int32_t dst[ARRAY_SIZE(expected)]; |
305 memset(dst, 0, sizeof(dst)); | 305 memset(dst, 0, sizeof(dst)); |
306 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 306 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
307 EXPECT(!is_valid); | 307 EXPECT(!is_valid); |
308 EXPECT(memcmp(expected, dst, sizeof(expected))); | 308 EXPECT(memcmp(expected, dst, sizeof(expected))); |
309 } | 309 } |
310 | 310 |
311 // 3.1.2 - Last continuation byte 0xbf: "\xBF" | 311 // 3.1.2 - Last continuation byte 0xbf: "\xBF" |
312 { | 312 { |
313 const char* src = "\xBF"; | 313 const char* src = "\xBF"; |
314 uint32_t expected[] = { 0xBF }; | 314 int32_t expected[] = { 0xBF }; |
315 uint32_t dst[ARRAY_SIZE(expected)]; | 315 int32_t dst[ARRAY_SIZE(expected)]; |
316 memset(dst, 0, sizeof(dst)); | 316 memset(dst, 0, sizeof(dst)); |
317 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 317 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
318 EXPECT(!is_valid); | 318 EXPECT(!is_valid); |
319 EXPECT(memcmp(expected, dst, sizeof(expected))); | 319 EXPECT(memcmp(expected, dst, sizeof(expected))); |
320 } | 320 } |
321 | 321 |
322 // 3.1.3 - 2 continuation bytes: "\x80\xBF" | 322 // 3.1.3 - 2 continuation bytes: "\x80\xBF" |
323 { | 323 { |
324 const char* src = "\x80\xBF"; | 324 const char* src = "\x80\xBF"; |
325 uint32_t expected[] = { 0x80, 0xBF }; | 325 int32_t expected[] = { 0x80, 0xBF }; |
326 uint32_t dst[ARRAY_SIZE(expected)]; | 326 int32_t dst[ARRAY_SIZE(expected)]; |
327 memset(dst, 0, sizeof(dst)); | 327 memset(dst, 0, sizeof(dst)); |
328 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 328 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
329 EXPECT(!is_valid); | 329 EXPECT(!is_valid); |
330 EXPECT(memcmp(expected, dst, sizeof(expected))); | 330 EXPECT(memcmp(expected, dst, sizeof(expected))); |
331 } | 331 } |
332 | 332 |
333 // 3.1.4 - 3 continuation bytes: "\x80\xBF\x80" | 333 // 3.1.4 - 3 continuation bytes: "\x80\xBF\x80" |
334 { | 334 { |
335 const char* src = "\x80\xBF\x80"; | 335 const char* src = "\x80\xBF\x80"; |
336 uint32_t expected[] = { 0x80, 0xBF, 0x80 }; | 336 int32_t expected[] = { 0x80, 0xBF, 0x80 }; |
337 uint32_t dst[ARRAY_SIZE(expected)]; | 337 int32_t dst[ARRAY_SIZE(expected)]; |
338 memset(dst, 0, sizeof(dst)); | 338 memset(dst, 0, sizeof(dst)); |
339 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 339 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
340 EXPECT(!is_valid); | 340 EXPECT(!is_valid); |
341 EXPECT(memcmp(expected, dst, sizeof(expected))); | 341 EXPECT(memcmp(expected, dst, sizeof(expected))); |
342 } | 342 } |
343 | 343 |
344 // 3.1.5 - 4 continuation bytes: "\x80\xBF\x80\xBF" | 344 // 3.1.5 - 4 continuation bytes: "\x80\xBF\x80\xBF" |
345 { | 345 { |
346 const char* src = "\x80\xBF\x80\xBF"; | 346 const char* src = "\x80\xBF\x80\xBF"; |
347 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF }; | 347 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF }; |
348 uint32_t dst[ARRAY_SIZE(expected)]; | 348 int32_t dst[ARRAY_SIZE(expected)]; |
349 memset(dst, 0, sizeof(dst)); | 349 memset(dst, 0, sizeof(dst)); |
350 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 350 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
351 EXPECT(!is_valid); | 351 EXPECT(!is_valid); |
352 EXPECT(memcmp(expected, dst, sizeof(expected))); | 352 EXPECT(memcmp(expected, dst, sizeof(expected))); |
353 } | 353 } |
354 | 354 |
355 // 3.1.6 - 5 continuation bytes: "\x80\xBF\x80\xBF\x80" | 355 // 3.1.6 - 5 continuation bytes: "\x80\xBF\x80\xBF\x80" |
356 { | 356 { |
357 const char* src = "\x80\xBF\x80\xBF\x80"; | 357 const char* src = "\x80\xBF\x80\xBF\x80"; |
358 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80 }; | 358 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80 }; |
359 uint32_t dst[ARRAY_SIZE(expected)]; | 359 int32_t dst[ARRAY_SIZE(expected)]; |
360 memset(dst, 0, sizeof(dst)); | 360 memset(dst, 0, sizeof(dst)); |
361 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 361 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
362 EXPECT(!is_valid); | 362 EXPECT(!is_valid); |
363 EXPECT(memcmp(expected, dst, sizeof(expected))); | 363 EXPECT(memcmp(expected, dst, sizeof(expected))); |
364 } | 364 } |
365 | 365 |
366 // 3.1.7 - 6 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF" | 366 // 3.1.7 - 6 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF" |
367 { | 367 { |
368 const char* src = "\x80\xBF\x80\xBF\x80\xBF"; | 368 const char* src = "\x80\xBF\x80\xBF\x80\xBF"; |
369 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF }; | 369 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF }; |
370 uint32_t dst[ARRAY_SIZE(expected)]; | 370 int32_t dst[ARRAY_SIZE(expected)]; |
371 memset(dst, 0, sizeof(dst)); | 371 memset(dst, 0, sizeof(dst)); |
372 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 372 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
373 EXPECT(!is_valid); | 373 EXPECT(!is_valid); |
374 EXPECT(memcmp(expected, dst, sizeof(expected))); | 374 EXPECT(memcmp(expected, dst, sizeof(expected))); |
375 } | 375 } |
376 | 376 |
377 // 3.1.8 - 7 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF\x80" | 377 // 3.1.8 - 7 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF\x80" |
378 { | 378 { |
379 const char* src = "\x80\xBF\x80\xBF\x80\xBF\x80"; | 379 const char* src = "\x80\xBF\x80\xBF\x80\xBF\x80"; |
380 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 }; | 380 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 }; |
381 uint32_t dst[ARRAY_SIZE(expected)]; | 381 int32_t dst[ARRAY_SIZE(expected)]; |
382 memset(dst, 0, sizeof(dst)); | 382 memset(dst, 0, sizeof(dst)); |
383 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 383 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
384 EXPECT(!is_valid); | 384 EXPECT(!is_valid); |
385 EXPECT(memcmp(expected, dst, sizeof(expected))); | 385 EXPECT(memcmp(expected, dst, sizeof(expected))); |
386 } | 386 } |
387 | 387 |
388 // 3.1.9 - Sequence of all 64 possible continuation bytes (0x80-0xbf): | 388 // 3.1.9 - Sequence of all 64 possible continuation bytes (0x80-0xbf): |
389 { | 389 { |
390 const char* src = "\x80\x81\x82\x83\x84\x85\x86\x87" | 390 const char* src = "\x80\x81\x82\x83\x84\x85\x86\x87" |
391 "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" | 391 "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" |
392 "\x90\x91\x92\x93\x94\x95\x96\x97" | 392 "\x90\x91\x92\x93\x94\x95\x96\x97" |
393 "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F" | 393 "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F" |
394 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7" | 394 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7" |
395 "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" | 395 "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" |
396 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7" | 396 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7" |
397 "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"; | 397 "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"; |
398 uint32_t expected[] = { 0x0 }; | 398 int32_t expected[] = { 0x0 }; |
399 uint32_t dst[ARRAY_SIZE(expected)]; | 399 int32_t dst[ARRAY_SIZE(expected)]; |
400 for (size_t i = 0; i < strlen(src); ++i) { | 400 for (size_t i = 0; i < strlen(src); ++i) { |
401 memset(dst, 0xFF, sizeof(dst)); | 401 memset(dst, 0xFF, sizeof(dst)); |
402 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); | 402 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
403 EXPECT(!is_valid); | 403 EXPECT(!is_valid); |
404 EXPECT(memcmp(expected, dst, sizeof(expected))); | 404 EXPECT(memcmp(expected, dst, sizeof(expected))); |
405 } | 405 } |
406 } | 406 } |
407 | 407 |
408 // 3.2 - Lonely start character | 408 // 3.2 - Lonely start character |
409 | 409 |
410 // 3.2.1 - All 32 first bytes of 2-byte sequences (0xc0-0xdf), each | 410 // 3.2.1 - All 32 first bytes of 2-byte sequences (0xc0-0xdf), each |
411 // followed by a space character: | 411 // followed by a space character: |
412 { | 412 { |
413 const char* src = "\xC0\x20\xC1\x20\xC2\x20\xC3\x20" | 413 const char* src = "\xC0\x20\xC1\x20\xC2\x20\xC3\x20" |
414 "\xC4\x20\xC5\x20\xC6\x20\xC7\x20" | 414 "\xC4\x20\xC5\x20\xC6\x20\xC7\x20" |
415 "\xC8\x20\xC9\x20\xCA\x20\xCB\x20" | 415 "\xC8\x20\xC9\x20\xCA\x20\xCB\x20" |
416 "\xCC\x20\xCD\x20\xCE\x20\xCF\x20" | 416 "\xCC\x20\xCD\x20\xCE\x20\xCF\x20" |
417 "\xD0\x20\xD1\x20\xD2\x20\xD3\x20" | 417 "\xD0\x20\xD1\x20\xD2\x20\xD3\x20" |
418 "\xD4\x20\xD5\x20\xD6\x20\xD7\x20" | 418 "\xD4\x20\xD5\x20\xD6\x20\xD7\x20" |
419 "\xD8\x20\xD9\x20\xDA\x20\xDB\x20" | 419 "\xD8\x20\xD9\x20\xDA\x20\xDB\x20" |
420 "\xDC\x20\xDD\x20\xDE\x20\xDF\x20"; | 420 "\xDC\x20\xDD\x20\xDE\x20\xDF\x20"; |
421 uint32_t expected[] = { 0x0 }; | 421 int32_t expected[] = { 0x0 }; |
422 uint32_t dst[ARRAY_SIZE(expected)]; | 422 int32_t dst[ARRAY_SIZE(expected)]; |
423 for (size_t i = 0; i < strlen(src); i += 2) { | 423 for (size_t i = 0; i < strlen(src); i += 2) { |
424 memset(dst, 0xFF, sizeof(dst)); | 424 memset(dst, 0xFF, sizeof(dst)); |
425 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); | 425 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
426 EXPECT(!is_valid); | 426 EXPECT(!is_valid); |
427 EXPECT(memcmp(expected, dst, sizeof(expected))); | 427 EXPECT(memcmp(expected, dst, sizeof(expected))); |
428 } | 428 } |
429 } | 429 } |
430 | 430 |
431 // 3.2.2 - All 16 first bytes of 3-byte sequences (0xe0-0xef), each | 431 // 3.2.2 - All 16 first bytes of 3-byte sequences (0xe0-0xef), each |
432 // followed by a space character: | 432 // followed by a space character: |
433 { | 433 { |
434 const char* src = "\xE0\x20\xE1\x20\xE2\x20\xE3\x20" | 434 const char* src = "\xE0\x20\xE1\x20\xE2\x20\xE3\x20" |
435 "\xE4\x20\xE5\x20\xE6\x20\xE7\x20" | 435 "\xE4\x20\xE5\x20\xE6\x20\xE7\x20" |
436 "\xE8\x20\xE9\x20\xEA\x20\xEB\x20" | 436 "\xE8\x20\xE9\x20\xEA\x20\xEB\x20" |
437 "\xEC\x20\xED\x20\xEE\x20\xEF\x20"; | 437 "\xEC\x20\xED\x20\xEE\x20\xEF\x20"; |
438 uint32_t expected[] = { 0x0 }; | 438 int32_t expected[] = { 0x0 }; |
439 uint32_t dst[ARRAY_SIZE(expected)]; | 439 int32_t dst[ARRAY_SIZE(expected)]; |
440 for (size_t i = 0; i < strlen(src); i += 2) { | 440 for (size_t i = 0; i < strlen(src); i += 2) { |
441 memset(dst, 0xFF, sizeof(dst)); | 441 memset(dst, 0xFF, sizeof(dst)); |
442 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); | 442 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
443 EXPECT(!is_valid); | 443 EXPECT(!is_valid); |
444 EXPECT(memcmp(expected, dst, sizeof(expected))); | 444 EXPECT(memcmp(expected, dst, sizeof(expected))); |
445 } | 445 } |
446 } | 446 } |
447 | 447 |
448 // 3.2.3 - All 8 first bytes of 4-byte sequences (0xf0-0xf7), each | 448 // 3.2.3 - All 8 first bytes of 4-byte sequences (0xf0-0xf7), each |
449 // followed by a space character: | 449 // followed by a space character: |
450 { | 450 { |
451 const char* src = "\xF0\x20\xF1\x20\xF2\x20\xF3\x20" | 451 const char* src = "\xF0\x20\xF1\x20\xF2\x20\xF3\x20" |
452 "\xF4\x20\xF5\x20\xF6\x20\xF7\x20"; | 452 "\xF4\x20\xF5\x20\xF6\x20\xF7\x20"; |
453 uint32_t expected[] = { 0x0 }; | 453 int32_t expected[] = { 0x0 }; |
454 uint32_t dst[ARRAY_SIZE(expected)]; | 454 int32_t dst[ARRAY_SIZE(expected)]; |
455 for (size_t i = 0; i < strlen(src); i += 2) { | 455 for (size_t i = 0; i < strlen(src); i += 2) { |
456 memset(dst, 0xFF, sizeof(dst)); | 456 memset(dst, 0xFF, sizeof(dst)); |
457 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); | 457 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
458 EXPECT(!is_valid); | 458 EXPECT(!is_valid); |
459 EXPECT(memcmp(expected, dst, sizeof(expected))); | 459 EXPECT(memcmp(expected, dst, sizeof(expected))); |
460 } | 460 } |
461 } | 461 } |
462 | 462 |
463 // 3.2.4 - All 4 first bytes of 5-byte sequences (0xf8-0xfb), each | 463 // 3.2.4 - All 4 first bytes of 5-byte sequences (0xf8-0xfb), each |
464 // followed by a space character: | 464 // followed by a space character: |
465 { | 465 { |
466 const char* src = "\xF8\x20\xF9\x20\xFA\x20\xFB\x20"; | 466 const char* src = "\xF8\x20\xF9\x20\xFA\x20\xFB\x20"; |
467 uint32_t expected[] = { 0x0 }; | 467 int32_t expected[] = { 0x0 }; |
468 uint32_t dst[ARRAY_SIZE(expected)]; | 468 int32_t dst[ARRAY_SIZE(expected)]; |
469 for (size_t i = 0; i < strlen(src); i += 2) { | 469 for (size_t i = 0; i < strlen(src); i += 2) { |
470 memset(dst, 0xFF, sizeof(dst)); | 470 memset(dst, 0xFF, sizeof(dst)); |
471 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); | 471 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
472 EXPECT(!is_valid); | 472 EXPECT(!is_valid); |
473 EXPECT(memcmp(expected, dst, sizeof(expected))); | 473 EXPECT(memcmp(expected, dst, sizeof(expected))); |
474 } | 474 } |
475 } | 475 } |
476 | 476 |
477 // 3.2.5 - All 2 first bytes of 6-byte sequences (0xfc-0xfd), each | 477 // 3.2.5 - All 2 first bytes of 6-byte sequences (0xfc-0xfd), each |
478 // followed by a space character: | 478 // followed by a space character: |
479 { | 479 { |
480 const char* src = "\xFC\x20\xFD\x20"; | 480 const char* src = "\xFC\x20\xFD\x20"; |
481 uint32_t expected[] = { 0x0 }; | 481 int32_t expected[] = { 0x0 }; |
482 uint32_t dst[ARRAY_SIZE(expected)]; | 482 int32_t dst[ARRAY_SIZE(expected)]; |
483 for (size_t i = 0; i < strlen(src); i += 2) { | 483 for (size_t i = 0; i < strlen(src); i += 2) { |
484 memset(dst, 0xFF, sizeof(dst)); | 484 memset(dst, 0xFF, sizeof(dst)); |
485 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); | 485 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
486 EXPECT(!is_valid); | 486 EXPECT(!is_valid); |
487 EXPECT(memcmp(expected, dst, sizeof(expected))); | 487 EXPECT(memcmp(expected, dst, sizeof(expected))); |
488 } | 488 } |
489 } | 489 } |
490 | 490 |
491 // 3.3 - Sequences with last continuation byte missing | 491 // 3.3 - Sequences with last continuation byte missing |
492 | 492 |
493 // 3.3.1 - 2-byte sequence with last byte missing (U+0000): "\xC0" | 493 // 3.3.1 - 2-byte sequence with last byte missing (U+0000): "\xC0" |
494 { | 494 { |
495 const char* src = "\xC0"; | 495 const char* src = "\xC0"; |
496 uint32_t expected[] = { 0x0 }; | 496 int32_t expected[] = { 0x0 }; |
497 uint32_t dst[ARRAY_SIZE(expected)]; | 497 int32_t dst[ARRAY_SIZE(expected)]; |
498 memset(dst, 0xFF, sizeof(dst)); | 498 memset(dst, 0xFF, sizeof(dst)); |
499 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 499 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
500 EXPECT(!is_valid); | 500 EXPECT(!is_valid); |
501 EXPECT(memcmp(expected, dst, sizeof(expected))); | 501 EXPECT(memcmp(expected, dst, sizeof(expected))); |
502 } | 502 } |
503 | 503 |
504 // 3.3.2 - 3-byte sequence with last byte missing (U+0000): "\xE0\x80" | 504 // 3.3.2 - 3-byte sequence with last byte missing (U+0000): "\xE0\x80" |
505 { | 505 { |
506 const char* src = "\xE0\x80"; | 506 const char* src = "\xE0\x80"; |
507 uint32_t expected[] = { 0x0 }; | 507 int32_t expected[] = { 0x0 }; |
508 uint32_t dst[ARRAY_SIZE(expected)]; | 508 int32_t dst[ARRAY_SIZE(expected)]; |
509 memset(dst, 0xFF, sizeof(dst)); | 509 memset(dst, 0xFF, sizeof(dst)); |
510 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 510 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
511 EXPECT(!is_valid); | 511 EXPECT(!is_valid); |
512 EXPECT(memcmp(expected, dst, sizeof(expected))); | 512 EXPECT(memcmp(expected, dst, sizeof(expected))); |
513 } | 513 } |
514 | 514 |
515 // 3.3.3 - 4-byte sequence with last byte missing (U+0000): "\xF0\x80\x80" | 515 // 3.3.3 - 4-byte sequence with last byte missing (U+0000): "\xF0\x80\x80" |
516 { | 516 { |
517 const char* src = "\xF0\x80\x80"; | 517 const char* src = "\xF0\x80\x80"; |
518 uint32_t expected[] = { 0x0 }; | 518 int32_t expected[] = { 0x0 }; |
519 uint32_t dst[ARRAY_SIZE(expected)]; | 519 int32_t dst[ARRAY_SIZE(expected)]; |
520 memset(dst, 0xFF, sizeof(dst)); | 520 memset(dst, 0xFF, sizeof(dst)); |
521 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 521 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
522 EXPECT(!is_valid); | 522 EXPECT(!is_valid); |
523 EXPECT(memcmp(expected, dst, sizeof(expected))); | 523 EXPECT(memcmp(expected, dst, sizeof(expected))); |
524 } | 524 } |
525 | 525 |
526 // 3.3.4 - 5-byte sequence with last byte missing (U+0000): "\xF8\x80\x80\x80" | 526 // 3.3.4 - 5-byte sequence with last byte missing (U+0000): "\xF8\x80\x80\x80" |
527 { | 527 { |
528 const char* src = "\xF8\x80\x80\x80"; | 528 const char* src = "\xF8\x80\x80\x80"; |
529 uint32_t expected[] = { 0x0 }; | 529 int32_t expected[] = { 0x0 }; |
530 uint32_t dst[ARRAY_SIZE(expected)]; | 530 int32_t dst[ARRAY_SIZE(expected)]; |
531 memset(dst, 0xFF, sizeof(dst)); | 531 memset(dst, 0xFF, sizeof(dst)); |
532 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 532 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
533 EXPECT(!is_valid); | 533 EXPECT(!is_valid); |
534 EXPECT(memcmp(expected, dst, sizeof(expected))); | 534 EXPECT(memcmp(expected, dst, sizeof(expected))); |
535 } | 535 } |
536 | 536 |
537 // 3.3.5 - 6-byte sequence with last byte missing (U+0000): | 537 // 3.3.5 - 6-byte sequence with last byte missing (U+0000): |
538 // "\xFC\x80\x80\x80\x80" | 538 // "\xFC\x80\x80\x80\x80" |
539 { | 539 { |
540 const char* src = "\xFC\x80\x80\x80\x80"; | 540 const char* src = "\xFC\x80\x80\x80\x80"; |
541 uint32_t expected[] = { 0x0 }; | 541 int32_t expected[] = { 0x0 }; |
542 uint32_t dst[ARRAY_SIZE(expected)]; | 542 int32_t dst[ARRAY_SIZE(expected)]; |
543 memset(dst, 0xFF, sizeof(dst)); | 543 memset(dst, 0xFF, sizeof(dst)); |
544 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 544 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
545 EXPECT(!is_valid); | 545 EXPECT(!is_valid); |
546 EXPECT(memcmp(expected, dst, sizeof(expected))); | 546 EXPECT(memcmp(expected, dst, sizeof(expected))); |
547 } | 547 } |
548 | 548 |
549 // 3.3.6 - 2-byte sequence with last byte missing (U-000007FF): "\xDF" | 549 // 3.3.6 - 2-byte sequence with last byte missing (U-000007FF): "\xDF" |
550 { | 550 { |
551 const char* src = "\xDF"; | 551 const char* src = "\xDF"; |
552 uint32_t expected[] = { 0x0 }; | 552 int32_t expected[] = { 0x0 }; |
553 uint32_t dst[ARRAY_SIZE(expected)]; | 553 int32_t dst[ARRAY_SIZE(expected)]; |
554 memset(dst, 0xFF, sizeof(dst)); | 554 memset(dst, 0xFF, sizeof(dst)); |
555 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 555 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
556 EXPECT(!is_valid); | 556 EXPECT(!is_valid); |
557 EXPECT(memcmp(expected, dst, sizeof(expected))); | 557 EXPECT(memcmp(expected, dst, sizeof(expected))); |
558 } | 558 } |
559 | 559 |
560 // 3.3.7 - 3-byte sequence with last byte missing (U-0000FFFF): "\xEF\xBF" | 560 // 3.3.7 - 3-byte sequence with last byte missing (U-0000FFFF): "\xEF\xBF" |
561 { | 561 { |
562 const char* src = "\xEF\xBF"; | 562 const char* src = "\xEF\xBF"; |
563 uint32_t expected[] = { 0x0 }; | 563 int32_t expected[] = { 0x0 }; |
564 uint32_t dst[ARRAY_SIZE(expected)]; | 564 int32_t dst[ARRAY_SIZE(expected)]; |
565 memset(dst, 0xFF, sizeof(dst)); | 565 memset(dst, 0xFF, sizeof(dst)); |
566 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 566 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
567 EXPECT(!is_valid); | 567 EXPECT(!is_valid); |
568 EXPECT(memcmp(expected, dst, sizeof(expected))); | 568 EXPECT(memcmp(expected, dst, sizeof(expected))); |
569 } | 569 } |
570 | 570 |
571 // 3.3.8 - 4-byte sequence with last byte missing (U-001FFFFF): "\xF7\xBF\xBF" | 571 // 3.3.8 - 4-byte sequence with last byte missing (U-001FFFFF): "\xF7\xBF\xBF" |
572 { | 572 { |
573 const char* src = "\xF7\xBF\xBF"; | 573 const char* src = "\xF7\xBF\xBF"; |
574 uint32_t expected[] = { 0x0 }; | 574 int32_t expected[] = { 0x0 }; |
575 uint32_t dst[ARRAY_SIZE(expected)]; | 575 int32_t dst[ARRAY_SIZE(expected)]; |
576 memset(dst, 0xFF, sizeof(dst)); | 576 memset(dst, 0xFF, sizeof(dst)); |
577 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 577 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
578 EXPECT(!is_valid); | 578 EXPECT(!is_valid); |
579 EXPECT(memcmp(expected, dst, sizeof(expected))); | 579 EXPECT(memcmp(expected, dst, sizeof(expected))); |
580 } | 580 } |
581 | 581 |
582 // 3.3.9 - 5-byte sequence with last byte missing (U-03FFFFFF): | 582 // 3.3.9 - 5-byte sequence with last byte missing (U-03FFFFFF): |
583 // "\xFB\xBF\xBF\xBF" | 583 // "\xFB\xBF\xBF\xBF" |
584 { | 584 { |
585 const char* src = "\xFB\xBF\xBF\xBF"; | 585 const char* src = "\xFB\xBF\xBF\xBF"; |
586 uint32_t expected[] = { 0x0 }; | 586 int32_t expected[] = { 0x0 }; |
587 uint32_t dst[ARRAY_SIZE(expected)]; | 587 int32_t dst[ARRAY_SIZE(expected)]; |
588 memset(dst, 0xFF, sizeof(dst)); | 588 memset(dst, 0xFF, sizeof(dst)); |
589 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 589 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
590 EXPECT(!is_valid); | 590 EXPECT(!is_valid); |
591 EXPECT(memcmp(expected, dst, sizeof(expected))); | 591 EXPECT(memcmp(expected, dst, sizeof(expected))); |
592 } | 592 } |
593 | 593 |
594 // 3.3.10 - 6-byte sequence with last byte missing (U-7FFFFFFF): | 594 // 3.3.10 - 6-byte sequence with last byte missing (U-7FFFFFFF): |
595 // "\xFD\xBF\xBF\xBF\xBF" | 595 // "\xFD\xBF\xBF\xBF\xBF" |
596 { | 596 { |
597 const char* src = "\xFD\xBF\xBF\xBF\xBF"; | 597 const char* src = "\xFD\xBF\xBF\xBF\xBF"; |
598 uint32_t expected[] = { 0x0 }; | 598 int32_t expected[] = { 0x0 }; |
599 uint32_t dst[ARRAY_SIZE(expected)]; | 599 int32_t dst[ARRAY_SIZE(expected)]; |
600 memset(dst, 0xFF, sizeof(dst)); | 600 memset(dst, 0xFF, sizeof(dst)); |
601 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 601 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
602 EXPECT(!is_valid); | 602 EXPECT(!is_valid); |
603 EXPECT(memcmp(expected, dst, sizeof(expected))); | 603 EXPECT(memcmp(expected, dst, sizeof(expected))); |
604 } | 604 } |
605 | 605 |
606 // 3.4 - Concatenation of incomplete sequences | 606 // 3.4 - Concatenation of incomplete sequences |
607 { | 607 { |
608 const char* src = "\xC0\xE0\x80\xF0\x80\x80" | 608 const char* src = "\xC0\xE0\x80\xF0\x80\x80" |
609 "\xF8\x80\x80\x80\xFC\x80" | 609 "\xF8\x80\x80\x80\xFC\x80" |
610 "\x80\x80\x80\xDF\xEF\xBF" | 610 "\x80\x80\x80\xDF\xEF\xBF" |
611 "\xF7\xBF\xBF\xFB\xBF\xBF" | 611 "\xF7\xBF\xBF\xFB\xBF\xBF" |
612 "\xBF\xFD\xBF\xBF\xBF\xBF"; | 612 "\xBF\xFD\xBF\xBF\xBF\xBF"; |
613 uint32_t expected[] = { 0x0 }; | 613 int32_t expected[] = { 0x0 }; |
614 uint32_t dst[ARRAY_SIZE(expected)]; | 614 int32_t dst[ARRAY_SIZE(expected)]; |
615 for (size_t i = 0; i < strlen(src); ++i) { | 615 for (size_t i = 0; i < strlen(src); ++i) { |
616 for (size_t j = 1; j < (strlen(src) - i); ++j) { | 616 for (size_t j = 1; j < (strlen(src) - i); ++j) { |
617 memset(dst, 0xFF, sizeof(dst)); | 617 memset(dst, 0xFF, sizeof(dst)); |
618 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], | 618 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], |
619 dst, ARRAY_SIZE(dst)); | 619 dst, ARRAY_SIZE(dst)); |
620 EXPECT(!is_valid); | 620 EXPECT(!is_valid); |
621 EXPECT(memcmp(expected, dst, sizeof(expected))); | 621 EXPECT(memcmp(expected, dst, sizeof(expected))); |
622 } | 622 } |
623 } | 623 } |
624 } | 624 } |
625 | 625 |
626 // 3.5 - Impossible bytes | 626 // 3.5 - Impossible bytes |
627 | 627 |
628 // 3.5.1 - fe = "\xFE" | 628 // 3.5.1 - fe = "\xFE" |
629 { | 629 { |
630 const char* src = "\xFE"; | 630 const char* src = "\xFE"; |
631 uint32_t expected[] = { 0xFE }; | 631 int32_t expected[] = { 0xFE }; |
632 uint32_t dst[ARRAY_SIZE(expected)]; | 632 int32_t dst[ARRAY_SIZE(expected)]; |
633 memset(dst, 0, sizeof(dst)); | 633 memset(dst, 0, sizeof(dst)); |
634 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 634 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
635 EXPECT(!is_valid); | 635 EXPECT(!is_valid); |
636 EXPECT(memcmp(expected, dst, sizeof(expected))); | 636 EXPECT(memcmp(expected, dst, sizeof(expected))); |
637 } | 637 } |
638 | 638 |
639 // 3.5.2 - ff = "\xFF" | 639 // 3.5.2 - ff = "\xFF" |
640 { | 640 { |
641 const char* src = "\xFF"; | 641 const char* src = "\xFF"; |
642 uint32_t expected[] = { 0xFF }; | 642 int32_t expected[] = { 0xFF }; |
643 uint32_t dst[ARRAY_SIZE(expected)]; | 643 int32_t dst[ARRAY_SIZE(expected)]; |
644 memset(dst, 0, sizeof(dst)); | 644 memset(dst, 0, sizeof(dst)); |
645 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 645 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
646 EXPECT(!is_valid); | 646 EXPECT(!is_valid); |
647 EXPECT(memcmp(expected, dst, sizeof(expected))); | 647 EXPECT(memcmp(expected, dst, sizeof(expected))); |
648 } | 648 } |
649 | 649 |
650 // 3.5.3 - fe fe ff ff = "\xFE\xFE\xFF\xFF" | 650 // 3.5.3 - fe fe ff ff = "\xFE\xFE\xFF\xFF" |
651 { | 651 { |
652 const char* src = "\xFE\xFE\xFF\xFF"; | 652 const char* src = "\xFE\xFE\xFF\xFF"; |
653 uint32_t expected[] = { 0xFF }; | 653 int32_t expected[] = { 0xFF }; |
654 uint32_t dst[ARRAY_SIZE(expected)]; | 654 int32_t dst[ARRAY_SIZE(expected)]; |
655 memset(dst, 0, sizeof(dst)); | 655 memset(dst, 0, sizeof(dst)); |
656 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 656 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
657 EXPECT(!is_valid); | 657 EXPECT(!is_valid); |
658 EXPECT(memcmp(expected, dst, sizeof(expected))); | 658 EXPECT(memcmp(expected, dst, sizeof(expected))); |
659 } | 659 } |
660 | 660 |
661 // 4 - Overlong sequences | 661 // 4 - Overlong sequences |
662 | 662 |
663 // 4.1 - Examples of an overlong ASCII character | 663 // 4.1 - Examples of an overlong ASCII character |
664 | 664 |
665 // 4.1.1 - U+002F = c0 af = "\xC0\xAF" | 665 // 4.1.1 - U+002F = c0 af = "\xC0\xAF" |
666 { | 666 { |
667 const char* src = "\xC0\xAF"; | 667 const char* src = "\xC0\xAF"; |
668 uint32_t expected[] = { 0x2F }; | 668 int32_t expected[] = { 0x2F }; |
669 uint32_t dst[ARRAY_SIZE(expected)]; | 669 int32_t dst[ARRAY_SIZE(expected)]; |
670 memset(dst, 0, sizeof(dst)); | 670 memset(dst, 0, sizeof(dst)); |
671 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 671 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
672 EXPECT(!is_valid); | 672 EXPECT(!is_valid); |
673 EXPECT(memcmp(expected, dst, sizeof(expected))); | 673 EXPECT(memcmp(expected, dst, sizeof(expected))); |
674 } | 674 } |
675 | 675 |
676 // 4.1.2 - U+002F = e0 80 af = "\xE0\x80\xAF" | 676 // 4.1.2 - U+002F = e0 80 af = "\xE0\x80\xAF" |
677 { | 677 { |
678 const char* src = "\xE0\x80\xAF"; | 678 const char* src = "\xE0\x80\xAF"; |
679 uint32_t expected[] = { 0x2F }; | 679 int32_t expected[] = { 0x2F }; |
680 uint32_t dst[ARRAY_SIZE(expected)]; | 680 int32_t dst[ARRAY_SIZE(expected)]; |
681 memset(dst, 0, sizeof(dst)); | 681 memset(dst, 0, sizeof(dst)); |
682 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 682 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
683 EXPECT(!is_valid); | 683 EXPECT(!is_valid); |
684 EXPECT(memcmp(expected, dst, sizeof(expected))); | 684 EXPECT(memcmp(expected, dst, sizeof(expected))); |
685 } | 685 } |
686 | 686 |
687 // 4.1.3 - U+002F = f0 80 80 af = "\xF0\x80\x80\xAF" | 687 // 4.1.3 - U+002F = f0 80 80 af = "\xF0\x80\x80\xAF" |
688 { | 688 { |
689 const char* src = "\xF0\x80\x80\xAF"; | 689 const char* src = "\xF0\x80\x80\xAF"; |
690 uint32_t expected[] = { 0x2F }; | 690 int32_t expected[] = { 0x2F }; |
691 uint32_t dst[ARRAY_SIZE(expected)]; | 691 int32_t dst[ARRAY_SIZE(expected)]; |
692 memset(dst, 0, sizeof(dst)); | 692 memset(dst, 0, sizeof(dst)); |
693 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 693 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
694 EXPECT(!is_valid); | 694 EXPECT(!is_valid); |
695 EXPECT(memcmp(expected, dst, sizeof(expected))); | 695 EXPECT(memcmp(expected, dst, sizeof(expected))); |
696 } | 696 } |
697 | 697 |
698 // 4.1.4 - U+002F = f8 80 80 80 af = "\xF8\x80\x80\x80\xAF" | 698 // 4.1.4 - U+002F = f8 80 80 80 af = "\xF8\x80\x80\x80\xAF" |
699 { | 699 { |
700 const char* src = "\xF8\x80\x80\x80\xAF"; | 700 const char* src = "\xF8\x80\x80\x80\xAF"; |
701 uint32_t expected[] = { 0x2F }; | 701 int32_t expected[] = { 0x2F }; |
702 uint32_t dst[ARRAY_SIZE(expected)]; | 702 int32_t dst[ARRAY_SIZE(expected)]; |
703 memset(dst, 0, sizeof(dst)); | 703 memset(dst, 0, sizeof(dst)); |
704 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 704 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
705 EXPECT(!is_valid); | 705 EXPECT(!is_valid); |
706 EXPECT(memcmp(expected, dst, sizeof(expected))); | 706 EXPECT(memcmp(expected, dst, sizeof(expected))); |
707 } | 707 } |
708 | 708 |
709 // 4.1.5 - U+002F = fc 80 80 80 80 af = "\xFC\x80\x80\x80\x80\xAF" | 709 // 4.1.5 - U+002F = fc 80 80 80 80 af = "\xFC\x80\x80\x80\x80\xAF" |
710 { | 710 { |
711 const char* src = "\xFC\x80\x80\x80\x80\xAF"; | 711 const char* src = "\xFC\x80\x80\x80\x80\xAF"; |
712 uint32_t expected[] = { 0x2F }; | 712 int32_t expected[] = { 0x2F }; |
713 uint32_t dst[ARRAY_SIZE(expected)]; | 713 int32_t dst[ARRAY_SIZE(expected)]; |
714 memset(dst, 0, sizeof(dst)); | 714 memset(dst, 0, sizeof(dst)); |
715 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 715 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
716 EXPECT(!is_valid); | 716 EXPECT(!is_valid); |
717 EXPECT(memcmp(expected, dst, sizeof(expected))); | 717 EXPECT(memcmp(expected, dst, sizeof(expected))); |
718 } | 718 } |
719 | 719 |
720 // 4.2 Maximum overlong sequences | 720 // 4.2 Maximum overlong sequences |
721 | 721 |
722 // 4.2.1 - U-0000007F = c1 bf = "\xC1\xBF" | 722 // 4.2.1 - U-0000007F = c1 bf = "\xC1\xBF" |
723 { | 723 { |
724 const char* src = "\xC1\xBF"; | 724 const char* src = "\xC1\xBF"; |
725 uint32_t expected[] = { 0x7F }; | 725 int32_t expected[] = { 0x7F }; |
726 uint32_t dst[ARRAY_SIZE(expected)]; | 726 int32_t dst[ARRAY_SIZE(expected)]; |
727 memset(dst, 0, sizeof(dst)); | 727 memset(dst, 0, sizeof(dst)); |
728 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 728 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
729 EXPECT(!is_valid); | 729 EXPECT(!is_valid); |
730 EXPECT(memcmp(expected, dst, sizeof(expected))); | 730 EXPECT(memcmp(expected, dst, sizeof(expected))); |
731 } | 731 } |
732 | 732 |
733 // 4.2.2 U+000007FF = e0 9f bf = "\xE0\x9F\xBF" | 733 // 4.2.2 U+000007FF = e0 9f bf = "\xE0\x9F\xBF" |
734 { | 734 { |
735 const char* src = "\xE0\x9F\xBF"; | 735 const char* src = "\xE0\x9F\xBF"; |
736 uint32_t expected[] = { 0x7FF }; | 736 int32_t expected[] = { 0x7FF }; |
737 uint32_t dst[ARRAY_SIZE(expected)]; | 737 int32_t dst[ARRAY_SIZE(expected)]; |
738 memset(dst, 0, sizeof(dst)); | 738 memset(dst, 0, sizeof(dst)); |
739 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 739 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
740 EXPECT(!is_valid); | 740 EXPECT(!is_valid); |
741 EXPECT(memcmp(expected, dst, sizeof(expected))); | 741 EXPECT(memcmp(expected, dst, sizeof(expected))); |
742 } | 742 } |
743 | 743 |
744 // 4.2.3 - U+0000FFFF = f0 8f bf bf = "\xF0\x8F\xBF\xBF" | 744 // 4.2.3 - U+0000FFFF = f0 8f bf bf = "\xF0\x8F\xBF\xBF" |
745 { | 745 { |
746 const char* src = "\xF0\x8F\xBF\xBF"; | 746 const char* src = "\xF0\x8F\xBF\xBF"; |
747 uint32_t expected[] = { 0xFFFF }; | 747 int32_t expected[] = { 0xFFFF }; |
748 uint32_t dst[ARRAY_SIZE(expected)]; | 748 int32_t dst[ARRAY_SIZE(expected)]; |
749 memset(dst, 0, sizeof(dst)); | 749 memset(dst, 0, sizeof(dst)); |
750 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 750 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
751 EXPECT(!is_valid); | 751 EXPECT(!is_valid); |
752 EXPECT(memcmp(expected, dst, sizeof(expected))); | 752 EXPECT(memcmp(expected, dst, sizeof(expected))); |
753 } | 753 } |
754 | 754 |
755 // 4.2.4 U-001FFFFF = f8 87 bf bf bf = "\xF8\x87\xBF\xBF\xBF" | 755 // 4.2.4 U-001FFFFF = f8 87 bf bf bf = "\xF8\x87\xBF\xBF\xBF" |
756 { | 756 { |
757 const char* src = "\xF8\x87\xBF\xBF\xBF"; | 757 const char* src = "\xF8\x87\xBF\xBF\xBF"; |
758 uint32_t expected[] = { 0x1FFFFF }; | 758 int32_t expected[] = { 0x1FFFFF }; |
759 uint32_t dst[ARRAY_SIZE(expected)]; | 759 int32_t dst[ARRAY_SIZE(expected)]; |
760 memset(dst, 0, sizeof(dst)); | 760 memset(dst, 0, sizeof(dst)); |
761 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 761 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
762 EXPECT(!is_valid); | 762 EXPECT(!is_valid); |
763 EXPECT(memcmp(expected, dst, sizeof(expected))); | 763 EXPECT(memcmp(expected, dst, sizeof(expected))); |
764 } | 764 } |
765 | 765 |
766 // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "\xFC\x83\xBF\xBF\xBF\xBF" | 766 // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "\xFC\x83\xBF\xBF\xBF\xBF" |
767 { | 767 { |
768 const char* src = "\xFC\x83\xBF\xBF\xBF\xBF"; | 768 const char* src = "\xFC\x83\xBF\xBF\xBF\xBF"; |
769 uint32_t expected[] = { 0x3FFFFFF }; | 769 int32_t expected[] = { 0x3FFFFFF }; |
770 uint32_t dst[ARRAY_SIZE(expected)]; | 770 int32_t dst[ARRAY_SIZE(expected)]; |
771 memset(dst, 0, sizeof(dst)); | 771 memset(dst, 0, sizeof(dst)); |
772 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 772 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
773 EXPECT(!is_valid); | 773 EXPECT(!is_valid); |
774 EXPECT(memcmp(expected, dst, sizeof(expected))); | 774 EXPECT(memcmp(expected, dst, sizeof(expected))); |
775 } | 775 } |
776 | 776 |
777 // 4.3 - Overlong representation of the NUL character | 777 // 4.3 - Overlong representation of the NUL character |
778 | 778 |
779 // 4.3.1 - U+0000 = "\xC0\x80" | 779 // 4.3.1 - U+0000 = "\xC0\x80" |
780 { | 780 { |
781 const char* src = "\xC0\x80"; | 781 const char* src = "\xC0\x80"; |
782 uint32_t expected[] = { 0x0 }; | 782 int32_t expected[] = { 0x0 }; |
783 uint32_t dst[ARRAY_SIZE(expected)]; | 783 int32_t dst[ARRAY_SIZE(expected)]; |
784 memset(dst, 0xFF, sizeof(dst)); | 784 memset(dst, 0xFF, sizeof(dst)); |
785 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 785 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
786 EXPECT(!is_valid); | 786 EXPECT(!is_valid); |
787 EXPECT(memcmp(expected, dst, sizeof(expected))); | 787 EXPECT(memcmp(expected, dst, sizeof(expected))); |
788 } | 788 } |
789 | 789 |
790 // 4.3.2 U+0000 = e0 80 80 = "\xE0\x80\x80" | 790 // 4.3.2 U+0000 = e0 80 80 = "\xE0\x80\x80" |
791 { | 791 { |
792 const char* src = "\xE0\x80\x80"; | 792 const char* src = "\xE0\x80\x80"; |
793 uint32_t expected[] = { 0x0 }; | 793 int32_t expected[] = { 0x0 }; |
794 uint32_t dst[ARRAY_SIZE(expected)]; | 794 int32_t dst[ARRAY_SIZE(expected)]; |
795 memset(dst, 0xFF, sizeof(dst)); | 795 memset(dst, 0xFF, sizeof(dst)); |
796 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 796 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
797 EXPECT(!is_valid); | 797 EXPECT(!is_valid); |
798 EXPECT(memcmp(expected, dst, sizeof(expected))); | 798 EXPECT(memcmp(expected, dst, sizeof(expected))); |
799 } | 799 } |
800 | 800 |
801 // 4.3.3 U+0000 = f0 80 80 80 = "\xF0\x80\x80\x80" | 801 // 4.3.3 U+0000 = f0 80 80 80 = "\xF0\x80\x80\x80" |
802 { | 802 { |
803 const char* src = "\xF0\x80\x80\x80"; | 803 const char* src = "\xF0\x80\x80\x80"; |
804 uint32_t expected[] = { 0x0 }; | 804 int32_t expected[] = { 0x0 }; |
805 uint32_t dst[ARRAY_SIZE(expected)]; | 805 int32_t dst[ARRAY_SIZE(expected)]; |
806 memset(dst, 0xFF, sizeof(dst)); | 806 memset(dst, 0xFF, sizeof(dst)); |
807 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 807 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
808 EXPECT(!is_valid); | 808 EXPECT(!is_valid); |
809 EXPECT(memcmp(expected, dst, sizeof(expected))); | 809 EXPECT(memcmp(expected, dst, sizeof(expected))); |
810 } | 810 } |
811 | 811 |
812 // 4.3.4 U+0000 = f8 80 80 80 80 = "\xF8\x80\x80\x80\x80" | 812 // 4.3.4 U+0000 = f8 80 80 80 80 = "\xF8\x80\x80\x80\x80" |
813 { | 813 { |
814 const char* src = "\xF8\x80\x80\x80\x80"; | 814 const char* src = "\xF8\x80\x80\x80\x80"; |
815 uint32_t expected[] = { 0x0 }; | 815 int32_t expected[] = { 0x0 }; |
816 uint32_t dst[ARRAY_SIZE(expected)]; | 816 int32_t dst[ARRAY_SIZE(expected)]; |
817 memset(dst, 0xFF, sizeof(dst)); | 817 memset(dst, 0xFF, sizeof(dst)); |
818 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 818 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
819 EXPECT(!is_valid); | 819 EXPECT(!is_valid); |
820 EXPECT(memcmp(expected, dst, sizeof(expected))); | 820 EXPECT(memcmp(expected, dst, sizeof(expected))); |
821 } | 821 } |
822 | 822 |
823 // 4.3.5 U+0000 = fc 80 80 80 80 80 = "\xFC\x80\x80\x80\x80\x80" | 823 // 4.3.5 U+0000 = fc 80 80 80 80 80 = "\xFC\x80\x80\x80\x80\x80" |
824 { | 824 { |
825 const char* src = "\xFC\x80\x80\x80\x80\x80"; | 825 const char* src = "\xFC\x80\x80\x80\x80\x80"; |
826 uint32_t expected[] = { 0x0 }; | 826 int32_t expected[] = { 0x0 }; |
827 uint32_t dst[ARRAY_SIZE(expected)]; | 827 int32_t dst[ARRAY_SIZE(expected)]; |
828 memset(dst, 0xFF, sizeof(dst)); | 828 memset(dst, 0xFF, sizeof(dst)); |
829 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 829 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
830 EXPECT(!is_valid); | 830 EXPECT(!is_valid); |
831 EXPECT(memcmp(expected, dst, sizeof(expected))); | 831 EXPECT(memcmp(expected, dst, sizeof(expected))); |
832 } | 832 } |
833 | 833 |
834 // 5.1 - Single UTF-16 surrogates | 834 // 5.1 - Single UTF-16 surrogates |
835 | 835 |
836 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80" | 836 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80" |
837 { | 837 { |
838 const char* src = "\xED\xA0\x80"; | 838 const char* src = "\xED\xA0\x80"; |
839 uint32_t expected[] = { 0xD800 }; | 839 int32_t expected[] = { 0xD800 }; |
840 uint32_t dst[ARRAY_SIZE(expected)]; | 840 int32_t dst[ARRAY_SIZE(expected)]; |
841 memset(dst, 0, sizeof(dst)); | 841 memset(dst, 0, sizeof(dst)); |
842 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 842 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
843 EXPECT(!is_valid); | 843 EXPECT(!is_valid); |
844 EXPECT(memcmp(expected, dst, sizeof(expected))); | 844 EXPECT(memcmp(expected, dst, sizeof(expected))); |
845 } | 845 } |
846 | 846 |
847 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF" | 847 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF" |
848 { | 848 { |
849 const char* src = "\xED\xAD\xBF"; | 849 const char* src = "\xED\xAD\xBF"; |
850 uint32_t expected[] = { 0xDB7F }; | 850 int32_t expected[] = { 0xDB7F }; |
851 uint32_t dst[ARRAY_SIZE(expected)]; | 851 int32_t dst[ARRAY_SIZE(expected)]; |
852 memset(dst, 0, sizeof(dst)); | 852 memset(dst, 0, sizeof(dst)); |
853 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 853 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
854 EXPECT(!is_valid); | 854 EXPECT(!is_valid); |
855 EXPECT(memcmp(expected, dst, sizeof(expected))); | 855 EXPECT(memcmp(expected, dst, sizeof(expected))); |
856 } | 856 } |
857 | 857 |
858 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80" | 858 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80" |
859 { | 859 { |
860 const char* src = "\xED\xAE\x80"; | 860 const char* src = "\xED\xAE\x80"; |
861 uint32_t expected[] = { 0xDB80 }; | 861 int32_t expected[] = { 0xDB80 }; |
862 uint32_t dst[ARRAY_SIZE(expected)]; | 862 int32_t dst[ARRAY_SIZE(expected)]; |
863 memset(dst, 0, sizeof(dst)); | 863 memset(dst, 0, sizeof(dst)); |
864 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 864 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
865 EXPECT(!is_valid); | 865 EXPECT(!is_valid); |
866 EXPECT(memcmp(expected, dst, sizeof(expected))); | 866 EXPECT(memcmp(expected, dst, sizeof(expected))); |
867 } | 867 } |
868 | 868 |
869 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF" | 869 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF" |
870 { | 870 { |
871 const char* src = "\xED\xAF\xBF"; | 871 const char* src = "\xED\xAF\xBF"; |
872 uint32_t expected[] = { 0xDBFF }; | 872 int32_t expected[] = { 0xDBFF }; |
873 uint32_t dst[ARRAY_SIZE(expected)]; | 873 int32_t dst[ARRAY_SIZE(expected)]; |
874 memset(dst, 0, sizeof(dst)); | 874 memset(dst, 0, sizeof(dst)); |
875 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 875 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
876 EXPECT(!is_valid); | 876 EXPECT(!is_valid); |
877 EXPECT(memcmp(expected, dst, sizeof(expected))); | 877 EXPECT(memcmp(expected, dst, sizeof(expected))); |
878 } | 878 } |
879 | 879 |
880 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80" | 880 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80" |
881 { | 881 { |
882 const char* src = "\xED\xB0\x80"; | 882 const char* src = "\xED\xB0\x80"; |
883 uint32_t expected[] = { 0xDC00 }; | 883 int32_t expected[] = { 0xDC00 }; |
884 uint32_t dst[ARRAY_SIZE(expected)]; | 884 int32_t dst[ARRAY_SIZE(expected)]; |
885 memset(dst, 0, sizeof(dst)); | 885 memset(dst, 0, sizeof(dst)); |
886 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 886 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
887 EXPECT(!is_valid); | 887 EXPECT(!is_valid); |
888 EXPECT(memcmp(expected, dst, sizeof(expected))); | 888 EXPECT(memcmp(expected, dst, sizeof(expected))); |
889 } | 889 } |
890 | 890 |
891 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80" | 891 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80" |
892 { | 892 { |
893 const char* src = "\xED\xBE\x80"; | 893 const char* src = "\xED\xBE\x80"; |
894 uint32_t expected[] = { 0xDF80 }; | 894 int32_t expected[] = { 0xDF80 }; |
895 uint32_t dst[ARRAY_SIZE(expected)]; | 895 int32_t dst[ARRAY_SIZE(expected)]; |
896 memset(dst, 0, sizeof(dst)); | 896 memset(dst, 0, sizeof(dst)); |
897 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 897 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
898 EXPECT(!is_valid); | 898 EXPECT(!is_valid); |
899 EXPECT(memcmp(expected, dst, sizeof(expected))); | 899 EXPECT(memcmp(expected, dst, sizeof(expected))); |
900 } | 900 } |
901 | 901 |
902 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF" | 902 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF" |
903 { | 903 { |
904 const char* src = "\xED\xBF\xBF"; | 904 const char* src = "\xED\xBF\xBF"; |
905 uint32_t expected[] = { 0xDFFF }; | 905 int32_t expected[] = { 0xDFFF }; |
906 uint32_t dst[ARRAY_SIZE(expected)]; | 906 int32_t dst[ARRAY_SIZE(expected)]; |
907 memset(dst, 0, sizeof(dst)); | 907 memset(dst, 0, sizeof(dst)); |
908 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 908 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
909 EXPECT(!is_valid); | 909 EXPECT(!is_valid); |
910 EXPECT(memcmp(expected, dst, sizeof(expected))); | 910 EXPECT(memcmp(expected, dst, sizeof(expected))); |
911 } | 911 } |
912 | 912 |
913 // 5.2 Paired UTF-16 surrogates | 913 // 5.2 Paired UTF-16 surrogates |
914 | 914 |
915 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80" | 915 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80" |
916 { | 916 { |
917 const char* src = "\xED\xA0\x80\xED\xB0\x80"; | 917 const char* src = "\xED\xA0\x80\xED\xB0\x80"; |
918 uint32_t expected[] = { 0xD800, 0xDC00 }; | 918 int32_t expected[] = { 0xD800, 0xDC00 }; |
919 uint32_t dst[ARRAY_SIZE(expected)]; | 919 int32_t dst[ARRAY_SIZE(expected)]; |
920 memset(dst, 0, sizeof(dst)); | 920 memset(dst, 0, sizeof(dst)); |
921 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 921 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
922 EXPECT(!is_valid); | 922 EXPECT(!is_valid); |
923 EXPECT(memcmp(expected, dst, sizeof(expected))); | 923 EXPECT(memcmp(expected, dst, sizeof(expected))); |
924 } | 924 } |
925 | 925 |
926 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF" | 926 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF" |
927 { | 927 { |
928 const char* src = "\xED\xA0\x80\xED\xBF\xBF"; | 928 const char* src = "\xED\xA0\x80\xED\xBF\xBF"; |
929 uint32_t expected[] = { 0xD800, 0xDFFF }; | 929 int32_t expected[] = { 0xD800, 0xDFFF }; |
930 uint32_t dst[ARRAY_SIZE(expected)]; | 930 int32_t dst[ARRAY_SIZE(expected)]; |
931 memset(dst, 0, sizeof(dst)); | 931 memset(dst, 0, sizeof(dst)); |
932 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 932 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
933 EXPECT(!is_valid); | 933 EXPECT(!is_valid); |
934 EXPECT(memcmp(expected, dst, sizeof(expected))); | 934 EXPECT(memcmp(expected, dst, sizeof(expected))); |
935 } | 935 } |
936 | 936 |
937 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80" | 937 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80" |
938 { | 938 { |
939 const char* src = "\xED\xAD\xBF\xED\xB0\x80"; | 939 const char* src = "\xED\xAD\xBF\xED\xB0\x80"; |
940 uint32_t expected[] = { 0xDB7F, 0xDC00 }; | 940 int32_t expected[] = { 0xDB7F, 0xDC00 }; |
941 uint32_t dst[ARRAY_SIZE(expected)]; | 941 int32_t dst[ARRAY_SIZE(expected)]; |
942 memset(dst, 0, sizeof(dst)); | 942 memset(dst, 0, sizeof(dst)); |
943 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 943 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
944 EXPECT(!is_valid); | 944 EXPECT(!is_valid); |
945 EXPECT(memcmp(expected, dst, sizeof(expected))); | 945 EXPECT(memcmp(expected, dst, sizeof(expected))); |
946 } | 946 } |
947 | 947 |
948 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF" | 948 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF" |
949 { | 949 { |
950 const char* src = "\xED\xAD\xBF\xED\xBF\xBF"; | 950 const char* src = "\xED\xAD\xBF\xED\xBF\xBF"; |
951 uint32_t expected[] = { 0xDB7F, 0xDFFF }; | 951 int32_t expected[] = { 0xDB7F, 0xDFFF }; |
952 uint32_t dst[ARRAY_SIZE(expected)]; | 952 int32_t dst[ARRAY_SIZE(expected)]; |
953 memset(dst, 0, sizeof(dst)); | 953 memset(dst, 0, sizeof(dst)); |
954 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 954 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
955 EXPECT(!is_valid); | 955 EXPECT(!is_valid); |
956 EXPECT(memcmp(expected, dst, sizeof(expected))); | 956 EXPECT(memcmp(expected, dst, sizeof(expected))); |
957 } | 957 } |
958 | 958 |
959 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80" | 959 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80" |
960 { | 960 { |
961 const char* src = "\xED\xAE\x80\xED\xB0\x80"; | 961 const char* src = "\xED\xAE\x80\xED\xB0\x80"; |
962 uint32_t expected[] = { 0xDB80, 0xDC00 }; | 962 int32_t expected[] = { 0xDB80, 0xDC00 }; |
963 uint32_t dst[ARRAY_SIZE(expected)]; | 963 int32_t dst[ARRAY_SIZE(expected)]; |
964 memset(dst, 0, sizeof(dst)); | 964 memset(dst, 0, sizeof(dst)); |
965 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 965 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
966 EXPECT(!is_valid); | 966 EXPECT(!is_valid); |
967 EXPECT(memcmp(expected, dst, sizeof(expected))); | 967 EXPECT(memcmp(expected, dst, sizeof(expected))); |
968 } | 968 } |
969 | 969 |
970 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF" | 970 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF" |
971 { | 971 { |
972 const char* src = "\xED\xAE\x80\xED\xBF\xBF"; | 972 const char* src = "\xED\xAE\x80\xED\xBF\xBF"; |
973 uint32_t expected[] = { 0xDB80, 0xDFFF }; | 973 int32_t expected[] = { 0xDB80, 0xDFFF }; |
974 uint32_t dst[ARRAY_SIZE(expected)]; | 974 int32_t dst[ARRAY_SIZE(expected)]; |
975 memset(dst, 0, sizeof(dst)); | 975 memset(dst, 0, sizeof(dst)); |
976 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 976 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
977 EXPECT(!is_valid); | 977 EXPECT(!is_valid); |
978 EXPECT(memcmp(expected, dst, sizeof(expected))); | 978 EXPECT(memcmp(expected, dst, sizeof(expected))); |
979 } | 979 } |
980 | 980 |
981 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80" | 981 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80" |
982 { | 982 { |
983 const char* src = "\xED\xAF\xBF\xED\xB0\x80"; | 983 const char* src = "\xED\xAF\xBF\xED\xB0\x80"; |
984 uint32_t expected[] = { 0xDBFF, 0xDC00 }; | 984 int32_t expected[] = { 0xDBFF, 0xDC00 }; |
985 uint32_t dst[ARRAY_SIZE(expected)]; | 985 int32_t dst[ARRAY_SIZE(expected)]; |
986 memset(dst, 0, sizeof(dst)); | 986 memset(dst, 0, sizeof(dst)); |
987 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 987 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
988 EXPECT(!is_valid); | 988 EXPECT(!is_valid); |
989 EXPECT(memcmp(expected, dst, sizeof(expected))); | 989 EXPECT(memcmp(expected, dst, sizeof(expected))); |
990 } | 990 } |
991 | 991 |
992 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF" | 992 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF" |
993 { | 993 { |
994 const char* src = "\xED\xAF\xBF\xED\xBF\xBF"; | 994 const char* src = "\xED\xAF\xBF\xED\xBF\xBF"; |
995 uint32_t expected[] = { 0xDBFF, 0xDFFF }; | 995 int32_t expected[] = { 0xDBFF, 0xDFFF }; |
996 uint32_t dst[ARRAY_SIZE(expected)]; | 996 int32_t dst[ARRAY_SIZE(expected)]; |
997 memset(dst, 0, sizeof(dst)); | 997 memset(dst, 0, sizeof(dst)); |
998 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 998 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
999 EXPECT(!is_valid); | 999 EXPECT(!is_valid); |
1000 EXPECT(memcmp(expected, dst, sizeof(expected))); | 1000 EXPECT(memcmp(expected, dst, sizeof(expected))); |
1001 } | 1001 } |
1002 | 1002 |
1003 // 5.3 - Other illegal code positions | 1003 // 5.3 - Other illegal code positions |
1004 | 1004 |
1005 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE" | 1005 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE" |
1006 { | 1006 { |
1007 const char* src = "\xEF\xBF\xBE"; | 1007 const char* src = "\xEF\xBF\xBE"; |
1008 uint32_t expected[] = { 0xFFFE }; | 1008 int32_t expected[] = { 0xFFFE }; |
1009 uint32_t dst[ARRAY_SIZE(expected)]; | 1009 int32_t dst[ARRAY_SIZE(expected)]; |
1010 memset(dst, 0, sizeof(dst)); | 1010 memset(dst, 0, sizeof(dst)); |
1011 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 1011 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
1012 EXPECT(is_valid); | 1012 EXPECT(is_valid); |
1013 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 1013 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
1014 } | 1014 } |
1015 | 1015 |
1016 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF" | 1016 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF" |
1017 { | 1017 { |
1018 const char* src = "\xEF\xBF\xBF"; | 1018 const char* src = "\xEF\xBF\xBF"; |
1019 uint32_t expected[] = { 0xFFFF }; | 1019 int32_t expected[] = { 0xFFFF }; |
1020 uint32_t dst[ARRAY_SIZE(expected)]; | 1020 int32_t dst[ARRAY_SIZE(expected)]; |
1021 memset(dst, 0, sizeof(dst)); | 1021 memset(dst, 0, sizeof(dst)); |
1022 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 1022 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
1023 EXPECT(is_valid); | 1023 EXPECT(is_valid); |
1024 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 1024 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
1025 } | 1025 } |
1026 } | 1026 } |
1027 | 1027 |
1028 } // namespace dart | 1028 } // namespace dart |
OLD | NEW |