OLD | NEW |
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #include "vm/globals.h" | 5 #include "vm/globals.h" |
6 #include "vm/unicode.h" | 6 #include "vm/unicode.h" |
7 #include "vm/unit_test.h" | 7 #include "vm/unit_test.h" |
8 | 8 |
9 namespace dart { | 9 namespace dart { |
10 | 10 |
11 TEST_CASE(Utf8Decode) { | 11 TEST_CASE(Utf8Decode) { |
12 // Examples from the Unicode specification, chapter 3 | 12 // Examples from the Unicode specification, chapter 3 |
13 { | 13 { |
14 const char* src = "\x41\xC3\xB1\x42"; | 14 const char* src = "\x41\xC3\xB1\x42"; |
15 uint32_t expected[] = { 0x41, 0xF1, 0x42 }; | 15 uint32_t expected[] = { 0x41, 0xF1, 0x42 }; |
16 uint32_t dst[ARRAY_SIZE(expected)]; | 16 uint32_t dst[ARRAY_SIZE(expected)]; |
17 memset(dst, 0, sizeof(dst)); | 17 memset(dst, 0, sizeof(dst)); |
18 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 18 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
19 EXPECT(is_valid); | 19 EXPECT(is_valid); |
20 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 20 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
21 } | 21 } |
22 | 22 |
23 { | 23 { |
24 const char* src = "\x4D"; | 24 const char* src = "\x4D"; |
25 uint32_t expected[] = { 0x4D }; | 25 uint32_t expected[] = { 0x4D }; |
26 uint32_t dst[ARRAY_SIZE(expected)]; | 26 uint32_t dst[ARRAY_SIZE(expected)]; |
27 memset(dst, 0, sizeof(dst)); | 27 memset(dst, 0, sizeof(dst)); |
28 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 28 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
29 EXPECT(is_valid); | 29 EXPECT(is_valid); |
30 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 30 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
31 } | 31 } |
32 | 32 |
33 { | 33 { |
34 const char* src = "\xD0\xB0"; | 34 const char* src = "\xD0\xB0"; |
35 uint32_t expected[] = { 0x430 }; | 35 uint32_t expected[] = { 0x430 }; |
36 uint32_t dst[ARRAY_SIZE(expected)]; | 36 uint32_t dst[ARRAY_SIZE(expected)]; |
37 memset(dst, 0, sizeof(dst)); | 37 memset(dst, 0, sizeof(dst)); |
38 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 38 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
39 EXPECT(is_valid); | 39 EXPECT(is_valid); |
40 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 40 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
41 } | 41 } |
42 | 42 |
43 { | 43 { |
44 const char* src = "\xE4\xBA\x8C"; | 44 const char* src = "\xE4\xBA\x8C"; |
45 uint32_t expected[] = { 0x4E8C }; | 45 uint32_t expected[] = { 0x4E8C }; |
46 uint32_t dst[ARRAY_SIZE(expected)]; | 46 uint32_t dst[ARRAY_SIZE(expected)]; |
47 memset(dst, 0, sizeof(dst)); | 47 memset(dst, 0, sizeof(dst)); |
48 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 48 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
49 EXPECT(is_valid); | 49 EXPECT(is_valid); |
50 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 50 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
51 } | 51 } |
52 | 52 |
53 { | 53 { |
54 const char* src = "\xF0\x90\x8C\x82"; | 54 const char* src = "\xF0\x90\x8C\x82"; |
55 uint32_t expected[] = { 0x10302 }; | 55 uint32_t expected[] = { 0x10302 }; |
56 uint32_t dst[ARRAY_SIZE(expected)]; | 56 uint32_t dst[ARRAY_SIZE(expected)]; |
57 memset(dst, 0, sizeof(dst)); | 57 memset(dst, 0, sizeof(dst)); |
58 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 58 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
59 EXPECT(is_valid); | 59 EXPECT(is_valid); |
60 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 60 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
61 } | 61 } |
62 | 62 |
63 { | 63 { |
64 const char* src = "\x4D\xD0\xB0\xE4\xBA\x8C\xF0\x90\x8C\x82"; | 64 const char* src = "\x4D\xD0\xB0\xE4\xBA\x8C\xF0\x90\x8C\x82"; |
65 uint32_t expected[] = { 0x4D, 0x430, 0x4E8C, 0x10302 }; | 65 uint32_t expected[] = { 0x4D, 0x430, 0x4E8C, 0x10302 }; |
66 uint32_t dst[ARRAY_SIZE(expected)]; | 66 uint32_t dst[ARRAY_SIZE(expected)]; |
67 memset(dst, 0, sizeof(dst)); | 67 memset(dst, 0, sizeof(dst)); |
68 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 68 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
69 EXPECT(is_valid); | 69 EXPECT(is_valid); |
70 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 70 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
71 } | 71 } |
72 | 72 |
73 // Mixture of non-ASCII and ASCII characters | 73 // Mixture of non-ASCII and ASCII characters |
74 { | 74 { |
75 const char* src = "\xD7\x92\xD7\x9C\xD7\xA2\xD7\x93" | 75 const char* src = "\xD7\x92\xD7\x9C\xD7\xA2\xD7\x93" |
76 "\x20" | 76 "\x20" |
77 "\xD7\x91\xD7\xA8\xD7\x9B\xD7\x94"; | 77 "\xD7\x91\xD7\xA8\xD7\x9B\xD7\x94"; |
78 uint32_t expected[] = { 0x5D2, 0x5DC, 0x5E2, 0x5D3, | 78 uint32_t expected[] = { 0x5D2, 0x5DC, 0x5E2, 0x5D3, |
79 0x20, | 79 0x20, |
80 0x5D1, 0x5E8, 0x5DB, 0x5D4 }; | 80 0x5D1, 0x5E8, 0x5DB, 0x5D4 }; |
81 uint32_t dst[ARRAY_SIZE(expected)]; | 81 uint32_t dst[ARRAY_SIZE(expected)]; |
82 memset(dst, 0, sizeof(dst)); | 82 memset(dst, 0, sizeof(dst)); |
83 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 83 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
84 EXPECT(is_valid); | 84 EXPECT(is_valid); |
85 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 85 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
86 } | 86 } |
87 | 87 |
88 // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt | 88 // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt |
89 | 89 |
90 // 1 - Some correct UTF-8 text | 90 // 1 - Some correct UTF-8 text |
91 { | 91 { |
92 const char* src = "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5"; | 92 const char* src = "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5"; |
93 uint32_t expected[] = { 0x3BA, 0x1F79, 0x3C3, 0x3BC, 0x3B5 }; | 93 uint32_t expected[] = { 0x3BA, 0x1F79, 0x3C3, 0x3BC, 0x3B5 }; |
94 uint32_t dst[ARRAY_SIZE(expected)]; | 94 uint32_t dst[ARRAY_SIZE(expected)]; |
95 memset(dst, 0, sizeof(dst)); | 95 memset(dst, 0, sizeof(dst)); |
96 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 96 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
97 EXPECT(is_valid); | 97 EXPECT(is_valid); |
98 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 98 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
99 } | 99 } |
100 | 100 |
101 // 2 - Boundary condition test cases | 101 // 2 - Boundary condition test cases |
102 | 102 |
103 // 2.1 - First possible sequence of a certain length | 103 // 2.1 - First possible sequence of a certain length |
104 | 104 |
105 // 2.1.1 - 1 byte (U-00000000): "\x00" | 105 // 2.1.1 - 1 byte (U-00000000): "\x00" |
106 { | 106 { |
107 const char* src = "\x00"; | 107 const char* src = "\x00"; |
108 uint32_t expected[] = { 0x0 }; | 108 uint32_t expected[] = { 0x0 }; |
109 uint32_t dst[ARRAY_SIZE(expected)]; | 109 uint32_t dst[ARRAY_SIZE(expected)]; |
110 memset(dst, 0xFF, sizeof(dst)); | 110 memset(dst, 0xFF, sizeof(dst)); |
111 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 111 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
112 EXPECT(is_valid); | 112 EXPECT(is_valid); |
113 EXPECT(memcmp(expected, dst, sizeof(expected))); | 113 EXPECT(memcmp(expected, dst, sizeof(expected))); |
114 } | 114 } |
115 | 115 |
116 // 2.1.2 - 2 bytes (U-00000080): "\xC2\x80" | 116 // 2.1.2 - 2 bytes (U-00000080): "\xC2\x80" |
117 { | 117 { |
118 const char* src = "\xC2\x80"; | 118 const char* src = "\xC2\x80"; |
119 uint32_t expected[] = { 0x80 }; | 119 uint32_t expected[] = { 0x80 }; |
120 uint32_t dst[ARRAY_SIZE(expected)]; | 120 uint32_t dst[ARRAY_SIZE(expected)]; |
121 memset(dst, 0, sizeof(dst)); | 121 memset(dst, 0, sizeof(dst)); |
122 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 122 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
123 EXPECT(is_valid); | 123 EXPECT(is_valid); |
124 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 124 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
125 } | 125 } |
126 | 126 |
127 // 2.1.3 - 3 bytes (U-00000800): "\xE0\xA0\x80" | 127 // 2.1.3 - 3 bytes (U-00000800): "\xE0\xA0\x80" |
128 { | 128 { |
129 const char* src = "\xE0\xA0\x80"; | 129 const char* src = "\xE0\xA0\x80"; |
130 uint32_t expected[] = { 0x800 }; | 130 uint32_t expected[] = { 0x800 }; |
131 uint32_t dst[ARRAY_SIZE(expected)]; | 131 uint32_t dst[ARRAY_SIZE(expected)]; |
132 memset(dst, 0, sizeof(dst)); | 132 memset(dst, 0, sizeof(dst)); |
133 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 133 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
134 EXPECT(is_valid); | 134 EXPECT(is_valid); |
135 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 135 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
136 } | 136 } |
137 | 137 |
138 // 2.1.4 - 4 bytes (U-00010000): "\xF0\x90\x80\x80" | 138 // 2.1.4 - 4 bytes (U-00010000): "\xF0\x90\x80\x80" |
139 { | 139 { |
140 const char* src = "\xF0\x90\x80\x80"; | 140 const char* src = "\xF0\x90\x80\x80"; |
141 uint32_t expected[] = { 0x10000 }; | 141 uint32_t expected[] = { 0x10000 }; |
142 uint32_t dst[ARRAY_SIZE(expected)]; | 142 uint32_t dst[ARRAY_SIZE(expected)]; |
143 memset(dst, 0, sizeof(dst)); | 143 memset(dst, 0, sizeof(dst)); |
144 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 144 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
145 EXPECT(is_valid); | 145 EXPECT(is_valid); |
146 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 146 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
147 } | 147 } |
148 | 148 |
149 // 2.1.5 - 5 bytes (U-00200000): "\xF8\x88\x80\x80\x80" | 149 // 2.1.5 - 5 bytes (U-00200000): "\xF8\x88\x80\x80\x80" |
150 { | 150 { |
151 const char* src = "\xF8\x88\x80\x80\x80"; | 151 const char* src = "\xF8\x88\x80\x80\x80"; |
152 uint32_t expected[] = { 0x200000 }; | 152 uint32_t expected[] = { 0x200000 }; |
153 uint32_t dst[ARRAY_SIZE(expected)]; | 153 uint32_t dst[ARRAY_SIZE(expected)]; |
154 memset(dst, 0, sizeof(dst)); | 154 memset(dst, 0, sizeof(dst)); |
155 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 155 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
156 EXPECT(!is_valid); | 156 EXPECT(!is_valid); |
157 EXPECT(memcmp(expected, dst, sizeof(expected))); | 157 EXPECT(memcmp(expected, dst, sizeof(expected))); |
158 } | 158 } |
159 | 159 |
160 // 2.1.6 - 6 bytes (U-04000000): "\xFC\x84\x80\x80\x80\x80" | 160 // 2.1.6 - 6 bytes (U-04000000): "\xFC\x84\x80\x80\x80\x80" |
161 { | 161 { |
162 const char* src = "\xFC\x84\x80\x80\x80\x80"; | 162 const char* src = "\xFC\x84\x80\x80\x80\x80"; |
163 uint32_t expected[] = { 0x400000 }; | 163 uint32_t expected[] = { 0x400000 }; |
164 uint32_t dst[ARRAY_SIZE(expected)]; | 164 uint32_t dst[ARRAY_SIZE(expected)]; |
165 memset(dst, 0, sizeof(dst)); | 165 memset(dst, 0, sizeof(dst)); |
166 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 166 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
167 EXPECT(!is_valid); | 167 EXPECT(!is_valid); |
168 EXPECT(memcmp(expected, dst, sizeof(expected))); | 168 EXPECT(memcmp(expected, dst, sizeof(expected))); |
169 } | 169 } |
170 | 170 |
171 // 2.2 - Last possible sequence of a certain length | 171 // 2.2 - Last possible sequence of a certain length |
172 | 172 |
173 // 2.2.1 - 1 byte (U-0000007F): "\x7F" | 173 // 2.2.1 - 1 byte (U-0000007F): "\x7F" |
174 { | 174 { |
175 const char* src = "\x7F"; | 175 const char* src = "\x7F"; |
176 uint32_t expected[] = { 0x7F }; | 176 uint32_t expected[] = { 0x7F }; |
177 uint32_t dst[ARRAY_SIZE(expected)]; | 177 uint32_t dst[ARRAY_SIZE(expected)]; |
178 memset(dst, 0, sizeof(dst)); | 178 memset(dst, 0, sizeof(dst)); |
179 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 179 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
180 EXPECT(is_valid); | 180 EXPECT(is_valid); |
181 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 181 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
182 } | 182 } |
183 | 183 |
184 // 2.2.2 - 2 bytes (U-000007FF): "\xDF\xBF" | 184 // 2.2.2 - 2 bytes (U-000007FF): "\xDF\xBF" |
185 { | 185 { |
186 const char* src = "\xDF\xBF"; | 186 const char* src = "\xDF\xBF"; |
187 uint32_t expected[] = { 0x7FF }; | 187 uint32_t expected[] = { 0x7FF }; |
188 uint32_t dst[ARRAY_SIZE(expected)]; | 188 uint32_t dst[ARRAY_SIZE(expected)]; |
189 memset(dst, 0, sizeof(dst)); | 189 memset(dst, 0, sizeof(dst)); |
190 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 190 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
191 EXPECT(is_valid); | 191 EXPECT(is_valid); |
192 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 192 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
193 } | 193 } |
194 | 194 |
195 // 2.2.3 - 3 bytes (U-0000FFFF): "\xEF\xBF\xBF" | 195 // 2.2.3 - 3 bytes (U-0000FFFF): "\xEF\xBF\xBF" |
196 { | 196 { |
197 const char* src = "\xEF\xBF\xBF"; | 197 const char* src = "\xEF\xBF\xBF"; |
198 uint32_t expected[] = { 0xFFFF }; | 198 uint32_t expected[] = { 0xFFFF }; |
199 uint32_t dst[ARRAY_SIZE(expected)]; | 199 uint32_t dst[ARRAY_SIZE(expected)]; |
200 memset(dst, 0, sizeof(dst)); | 200 memset(dst, 0, sizeof(dst)); |
201 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 201 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
202 EXPECT(is_valid); | 202 EXPECT(is_valid); |
203 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 203 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
204 } | 204 } |
205 | 205 |
206 // 2.2.4 - 4 bytes (U-001FFFFF): "\xF7\xBF\xBF\xBF" | 206 // 2.2.4 - 4 bytes (U-001FFFFF): "\xF7\xBF\xBF\xBF" |
207 { | 207 { |
208 const char* src = "\xF7\xBF\xBF\xBF"; | 208 const char* src = "\xF7\xBF\xBF\xBF"; |
209 uint32_t expected[] = { 0x1FFFF }; | 209 uint32_t expected[] = { 0x1FFFF }; |
210 uint32_t dst[ARRAY_SIZE(expected)]; | 210 uint32_t dst[ARRAY_SIZE(expected)]; |
211 memset(dst, 0, sizeof(dst)); | 211 memset(dst, 0, sizeof(dst)); |
212 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 212 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
213 EXPECT(!is_valid); | 213 EXPECT(!is_valid); |
214 EXPECT(memcmp(expected, dst, sizeof(expected))); | 214 EXPECT(memcmp(expected, dst, sizeof(expected))); |
215 } | 215 } |
216 | 216 |
217 // 2.2.5 - 5 bytes (U-03FFFFFF): "\xFB\xBF\xBF\xBF\xBF" | 217 // 2.2.5 - 5 bytes (U-03FFFFFF): "\xFB\xBF\xBF\xBF\xBF" |
218 { | 218 { |
219 const char* src = "\xFB\xBF\xBF\xBF\xBF"; | 219 const char* src = "\xFB\xBF\xBF\xBF\xBF"; |
220 uint32_t expected[] = { 0x3FFFFFF }; | 220 uint32_t expected[] = { 0x3FFFFFF }; |
221 uint32_t dst[ARRAY_SIZE(expected)]; | 221 uint32_t dst[ARRAY_SIZE(expected)]; |
222 memset(dst, 0, sizeof(dst)); | 222 memset(dst, 0, sizeof(dst)); |
223 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 223 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
224 EXPECT(!is_valid); | 224 EXPECT(!is_valid); |
225 EXPECT(memcmp(expected, dst, sizeof(expected))); | 225 EXPECT(memcmp(expected, dst, sizeof(expected))); |
226 } | 226 } |
227 | 227 |
228 // 2.2.6 - 6 bytes (U-7FFFFFFF): "\xFD\xBF\xBF\xBF\xBF\xBF" | 228 // 2.2.6 - 6 bytes (U-7FFFFFFF): "\xFD\xBF\xBF\xBF\xBF\xBF" |
229 { | 229 { |
230 const char* src = "\xFD\xBF\xBF\xBF\xBF\xBF"; | 230 const char* src = "\xFD\xBF\xBF\xBF\xBF\xBF"; |
231 uint32_t expected[] = { 0x7FFFFFF }; | 231 uint32_t expected[] = { 0x7FFFFFF }; |
232 uint32_t dst[ARRAY_SIZE(expected)]; | 232 uint32_t dst[ARRAY_SIZE(expected)]; |
233 memset(dst, 0, sizeof(dst)); | 233 memset(dst, 0, sizeof(dst)); |
234 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 234 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
235 EXPECT(!is_valid); | 235 EXPECT(!is_valid); |
236 EXPECT(memcmp(expected, dst, sizeof(expected))); | 236 EXPECT(memcmp(expected, dst, sizeof(expected))); |
237 } | 237 } |
238 | 238 |
239 // 2.3 - Other boundary conditions | 239 // 2.3 - Other boundary conditions |
240 | 240 |
241 // 2.3.1 - U-0000D7FF = ed 9f bf = "\xED\x9F\xBF" | 241 // 2.3.1 - U-0000D7FF = ed 9f bf = "\xED\x9F\xBF" |
242 { | 242 { |
243 const char* src = "\xED\x9F\xBF"; | 243 const char* src = "\xED\x9F\xBF"; |
244 uint32_t expected[] = { 0xD7FF }; | 244 uint32_t expected[] = { 0xD7FF }; |
245 uint32_t dst[ARRAY_SIZE(expected)]; | 245 uint32_t dst[ARRAY_SIZE(expected)]; |
246 memset(dst, 0, sizeof(dst)); | 246 memset(dst, 0, sizeof(dst)); |
247 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 247 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
248 EXPECT(is_valid); | 248 EXPECT(is_valid); |
249 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 249 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
250 } | 250 } |
251 | 251 |
252 // 2.3.2 - U-0000E000 = ee 80 80 = "\xEE\x80\x80" | 252 // 2.3.2 - U-0000E000 = ee 80 80 = "\xEE\x80\x80" |
253 { | 253 { |
254 const char* src = "\xEE\x80\x80"; | 254 const char* src = "\xEE\x80\x80"; |
255 uint32_t expected[] = { 0xE000 }; | 255 uint32_t expected[] = { 0xE000 }; |
256 uint32_t dst[ARRAY_SIZE(expected)]; | 256 uint32_t dst[ARRAY_SIZE(expected)]; |
257 memset(dst, 0, sizeof(dst)); | 257 memset(dst, 0, sizeof(dst)); |
258 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 258 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
259 EXPECT(is_valid); | 259 EXPECT(is_valid); |
260 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 260 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
261 } | 261 } |
262 | 262 |
263 // 2.3.3 - U-0000FFFD = ef bf bd = "\xEF\xBF\xBD" | 263 // 2.3.3 - U-0000FFFD = ef bf bd = "\xEF\xBF\xBD" |
264 { | 264 { |
265 const char* src = "\xEF\xBF\xBD"; | 265 const char* src = "\xEF\xBF\xBD"; |
266 uint32_t expected[] = { 0xFFFD }; | 266 uint32_t expected[] = { 0xFFFD }; |
267 uint32_t dst[ARRAY_SIZE(expected)]; | 267 uint32_t dst[ARRAY_SIZE(expected)]; |
268 memset(dst, 0, sizeof(dst)); | 268 memset(dst, 0, sizeof(dst)); |
269 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 269 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
270 EXPECT(is_valid); | 270 EXPECT(is_valid); |
271 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 271 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
272 } | 272 } |
273 | 273 |
274 // 2.3.4 - U-0010FFFF = f4 8f bf bf = "\xF4\x8F\xBF\xBF" | 274 // 2.3.4 - U-0010FFFF = f4 8f bf bf = "\xF4\x8F\xBF\xBF" |
275 { | 275 { |
276 const char* src = "\xF4\x8F\xBF\xBF"; | 276 const char* src = "\xF4\x8F\xBF\xBF"; |
277 uint32_t expected[] = { 0x10FFFF }; | 277 uint32_t expected[] = { 0x10FFFF }; |
278 uint32_t dst[ARRAY_SIZE(expected)]; | 278 uint32_t dst[ARRAY_SIZE(expected)]; |
279 memset(dst, 0, sizeof(dst)); | 279 memset(dst, 0, sizeof(dst)); |
280 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 280 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
281 EXPECT(is_valid); | 281 EXPECT(is_valid); |
282 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 282 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
283 } | 283 } |
284 | 284 |
285 // 2.3.5 - U-00110000 = f4 90 80 80 = "\xF4\x90\x80\x80" | 285 // 2.3.5 - U-00110000 = f4 90 80 80 = "\xF4\x90\x80\x80" |
286 { | 286 { |
287 const char* src = "\xF4\x90\x80\x80"; | 287 const char* src = "\xF4\x90\x80\x80"; |
288 uint32_t expected[] = { 0x110000 }; | 288 uint32_t expected[] = { 0x110000 }; |
289 uint32_t dst[ARRAY_SIZE(expected)]; | 289 uint32_t dst[ARRAY_SIZE(expected)]; |
290 memset(dst, 0, sizeof(dst)); | 290 memset(dst, 0, sizeof(dst)); |
291 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 291 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
292 EXPECT(!is_valid); | 292 EXPECT(!is_valid); |
293 EXPECT(memcmp(expected, dst, sizeof(expected))); | 293 EXPECT(memcmp(expected, dst, sizeof(expected))); |
294 } | 294 } |
295 | 295 |
296 // 3 - Malformed sequences | 296 // 3 - Malformed sequences |
297 | 297 |
298 // 3.1 - Unexpected continuation bytes | 298 // 3.1 - Unexpected continuation bytes |
299 | 299 |
300 // 3.1.1 - First continuation byte 0x80: "\x80" | 300 // 3.1.1 - First continuation byte 0x80: "\x80" |
301 { | 301 { |
302 const char* src = "\x80"; | 302 const char* src = "\x80"; |
303 uint32_t expected[] = { 0x80 }; | 303 uint32_t expected[] = { 0x80 }; |
304 uint32_t dst[ARRAY_SIZE(expected)]; | 304 uint32_t dst[ARRAY_SIZE(expected)]; |
305 memset(dst, 0, sizeof(dst)); | 305 memset(dst, 0, sizeof(dst)); |
306 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 306 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
307 EXPECT(!is_valid); | 307 EXPECT(!is_valid); |
308 EXPECT(memcmp(expected, dst, sizeof(expected))); | 308 EXPECT(memcmp(expected, dst, sizeof(expected))); |
309 } | 309 } |
310 | 310 |
311 // 3.1.2 - Last continuation byte 0xbf: "\xBF" | 311 // 3.1.2 - Last continuation byte 0xbf: "\xBF" |
312 { | 312 { |
313 const char* src = "\xBF"; | 313 const char* src = "\xBF"; |
314 uint32_t expected[] = { 0xBF }; | 314 uint32_t expected[] = { 0xBF }; |
315 uint32_t dst[ARRAY_SIZE(expected)]; | 315 uint32_t dst[ARRAY_SIZE(expected)]; |
316 memset(dst, 0, sizeof(dst)); | 316 memset(dst, 0, sizeof(dst)); |
317 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 317 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
318 EXPECT(!is_valid); | 318 EXPECT(!is_valid); |
319 EXPECT(memcmp(expected, dst, sizeof(expected))); | 319 EXPECT(memcmp(expected, dst, sizeof(expected))); |
320 } | 320 } |
321 | 321 |
322 // 3.1.3 - 2 continuation bytes: "\x80\xBF" | 322 // 3.1.3 - 2 continuation bytes: "\x80\xBF" |
323 { | 323 { |
324 const char* src = "\x80\xBF"; | 324 const char* src = "\x80\xBF"; |
325 uint32_t expected[] = { 0x80, 0xBF }; | 325 uint32_t expected[] = { 0x80, 0xBF }; |
326 uint32_t dst[ARRAY_SIZE(expected)]; | 326 uint32_t dst[ARRAY_SIZE(expected)]; |
327 memset(dst, 0, sizeof(dst)); | 327 memset(dst, 0, sizeof(dst)); |
328 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 328 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
329 EXPECT(!is_valid); | 329 EXPECT(!is_valid); |
330 EXPECT(memcmp(expected, dst, sizeof(expected))); | 330 EXPECT(memcmp(expected, dst, sizeof(expected))); |
331 } | 331 } |
332 | 332 |
333 // 3.1.4 - 3 continuation bytes: "\x80\xBF\x80" | 333 // 3.1.4 - 3 continuation bytes: "\x80\xBF\x80" |
334 { | 334 { |
335 const char* src = "\x80\xBF\x80"; | 335 const char* src = "\x80\xBF\x80"; |
336 uint32_t expected[] = { 0x80, 0xBF, 0x80 }; | 336 uint32_t expected[] = { 0x80, 0xBF, 0x80 }; |
337 uint32_t dst[ARRAY_SIZE(expected)]; | 337 uint32_t dst[ARRAY_SIZE(expected)]; |
338 memset(dst, 0, sizeof(dst)); | 338 memset(dst, 0, sizeof(dst)); |
339 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 339 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
340 EXPECT(!is_valid); | 340 EXPECT(!is_valid); |
341 EXPECT(memcmp(expected, dst, sizeof(expected))); | 341 EXPECT(memcmp(expected, dst, sizeof(expected))); |
342 } | 342 } |
343 | 343 |
344 // 3.1.5 - 4 continuation bytes: "\x80\xBF\x80\xBF" | 344 // 3.1.5 - 4 continuation bytes: "\x80\xBF\x80\xBF" |
345 { | 345 { |
346 const char* src = "\x80\xBF\x80\xBF"; | 346 const char* src = "\x80\xBF\x80\xBF"; |
347 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF }; | 347 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF }; |
348 uint32_t dst[ARRAY_SIZE(expected)]; | 348 uint32_t dst[ARRAY_SIZE(expected)]; |
349 memset(dst, 0, sizeof(dst)); | 349 memset(dst, 0, sizeof(dst)); |
350 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 350 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
351 EXPECT(!is_valid); | 351 EXPECT(!is_valid); |
352 EXPECT(memcmp(expected, dst, sizeof(expected))); | 352 EXPECT(memcmp(expected, dst, sizeof(expected))); |
353 } | 353 } |
354 | 354 |
355 // 3.1.6 - 5 continuation bytes: "\x80\xBF\x80\xBF\x80" | 355 // 3.1.6 - 5 continuation bytes: "\x80\xBF\x80\xBF\x80" |
356 { | 356 { |
357 const char* src = "\x80\xBF\x80\xBF\x80"; | 357 const char* src = "\x80\xBF\x80\xBF\x80"; |
358 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80 }; | 358 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80 }; |
359 uint32_t dst[ARRAY_SIZE(expected)]; | 359 uint32_t dst[ARRAY_SIZE(expected)]; |
360 memset(dst, 0, sizeof(dst)); | 360 memset(dst, 0, sizeof(dst)); |
361 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 361 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
362 EXPECT(!is_valid); | 362 EXPECT(!is_valid); |
363 EXPECT(memcmp(expected, dst, sizeof(expected))); | 363 EXPECT(memcmp(expected, dst, sizeof(expected))); |
364 } | 364 } |
365 | 365 |
366 // 3.1.7 - 6 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF" | 366 // 3.1.7 - 6 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF" |
367 { | 367 { |
368 const char* src = "\x80\xBF\x80\xBF\x80\xBF"; | 368 const char* src = "\x80\xBF\x80\xBF\x80\xBF"; |
369 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF }; | 369 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF }; |
370 uint32_t dst[ARRAY_SIZE(expected)]; | 370 uint32_t dst[ARRAY_SIZE(expected)]; |
371 memset(dst, 0, sizeof(dst)); | 371 memset(dst, 0, sizeof(dst)); |
372 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 372 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
373 EXPECT(!is_valid); | 373 EXPECT(!is_valid); |
374 EXPECT(memcmp(expected, dst, sizeof(expected))); | 374 EXPECT(memcmp(expected, dst, sizeof(expected))); |
375 } | 375 } |
376 | 376 |
377 // 3.1.8 - 7 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF\x80" | 377 // 3.1.8 - 7 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF\x80" |
378 { | 378 { |
379 const char* src = "\x80\xBF\x80\xBF\x80\xBF\x80"; | 379 const char* src = "\x80\xBF\x80\xBF\x80\xBF\x80"; |
380 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 }; | 380 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 }; |
381 uint32_t dst[ARRAY_SIZE(expected)]; | 381 uint32_t dst[ARRAY_SIZE(expected)]; |
382 memset(dst, 0, sizeof(dst)); | 382 memset(dst, 0, sizeof(dst)); |
383 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 383 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
384 EXPECT(!is_valid); | 384 EXPECT(!is_valid); |
385 EXPECT(memcmp(expected, dst, sizeof(expected))); | 385 EXPECT(memcmp(expected, dst, sizeof(expected))); |
386 } | 386 } |
387 | 387 |
388 // 3.1.9 - Sequence of all 64 possible continuation bytes (0x80-0xbf): | 388 // 3.1.9 - Sequence of all 64 possible continuation bytes (0x80-0xbf): |
389 { | 389 { |
390 const char* src = "\x80\x81\x82\x83\x84\x85\x86\x87" | 390 const char* src = "\x80\x81\x82\x83\x84\x85\x86\x87" |
391 "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" | 391 "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" |
392 "\x90\x91\x92\x93\x94\x95\x96\x97" | 392 "\x90\x91\x92\x93\x94\x95\x96\x97" |
393 "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F" | 393 "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F" |
394 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7" | 394 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7" |
395 "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" | 395 "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" |
396 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7" | 396 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7" |
397 "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"; | 397 "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"; |
398 uint32_t expected[] = { 0x0 }; | 398 uint32_t expected[] = { 0x0 }; |
399 uint32_t dst[ARRAY_SIZE(expected)]; | 399 uint32_t dst[ARRAY_SIZE(expected)]; |
400 for (size_t i = 0; i < strlen(src); ++i) { | 400 for (size_t i = 0; i < strlen(src); ++i) { |
401 memset(dst, 0xFF, sizeof(dst)); | 401 memset(dst, 0xFF, sizeof(dst)); |
402 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); | 402 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
403 EXPECT(!is_valid); | 403 EXPECT(!is_valid); |
404 EXPECT(memcmp(expected, dst, sizeof(expected))); | 404 EXPECT(memcmp(expected, dst, sizeof(expected))); |
405 } | 405 } |
406 } | 406 } |
407 | 407 |
408 // 3.2 - Lonely start character | 408 // 3.2 - Lonely start character |
409 | 409 |
410 // 3.2.1 - All 32 first bytes of 2-byte sequences (0xc0-0xdf), each | 410 // 3.2.1 - All 32 first bytes of 2-byte sequences (0xc0-0xdf), each |
411 // followed by a space character: | 411 // followed by a space character: |
412 { | 412 { |
413 const char* src = "\xC0\x20\xC1\x20\xC2\x20\xC3\x20" | 413 const char* src = "\xC0\x20\xC1\x20\xC2\x20\xC3\x20" |
414 "\xC4\x20\xC5\x20\xC6\x20\xC7\x20" | 414 "\xC4\x20\xC5\x20\xC6\x20\xC7\x20" |
415 "\xC8\x20\xC9\x20\xCA\x20\xCB\x20" | 415 "\xC8\x20\xC9\x20\xCA\x20\xCB\x20" |
416 "\xCC\x20\xCD\x20\xCE\x20\xCF\x20" | 416 "\xCC\x20\xCD\x20\xCE\x20\xCF\x20" |
417 "\xD0\x20\xD1\x20\xD2\x20\xD3\x20" | 417 "\xD0\x20\xD1\x20\xD2\x20\xD3\x20" |
418 "\xD4\x20\xD5\x20\xD6\x20\xD7\x20" | 418 "\xD4\x20\xD5\x20\xD6\x20\xD7\x20" |
419 "\xD8\x20\xD9\x20\xDA\x20\xDB\x20" | 419 "\xD8\x20\xD9\x20\xDA\x20\xDB\x20" |
420 "\xDC\x20\xDD\x20\xDE\x20\xDF\x20"; | 420 "\xDC\x20\xDD\x20\xDE\x20\xDF\x20"; |
421 uint32_t expected[] = { 0x0 }; | 421 uint32_t expected[] = { 0x0 }; |
422 uint32_t dst[ARRAY_SIZE(expected)]; | 422 uint32_t dst[ARRAY_SIZE(expected)]; |
423 for (size_t i = 0; i < strlen(src); i += 2) { | 423 for (size_t i = 0; i < strlen(src); i += 2) { |
424 memset(dst, 0xFF, sizeof(dst)); | 424 memset(dst, 0xFF, sizeof(dst)); |
425 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); | 425 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
426 EXPECT(!is_valid); | 426 EXPECT(!is_valid); |
427 EXPECT(memcmp(expected, dst, sizeof(expected))); | 427 EXPECT(memcmp(expected, dst, sizeof(expected))); |
428 } | 428 } |
429 } | 429 } |
430 | 430 |
431 // 3.2.2 - All 16 first bytes of 3-byte sequences (0xe0-0xef), each | 431 // 3.2.2 - All 16 first bytes of 3-byte sequences (0xe0-0xef), each |
432 // followed by a space character: | 432 // followed by a space character: |
433 { | 433 { |
434 const char* src = "\xE0\x20\xE1\x20\xE2\x20\xE3\x20" | 434 const char* src = "\xE0\x20\xE1\x20\xE2\x20\xE3\x20" |
435 "\xE4\x20\xE5\x20\xE6\x20\xE7\x20" | 435 "\xE4\x20\xE5\x20\xE6\x20\xE7\x20" |
436 "\xE8\x20\xE9\x20\xEA\x20\xEB\x20" | 436 "\xE8\x20\xE9\x20\xEA\x20\xEB\x20" |
437 "\xEC\x20\xED\x20\xEE\x20\xEF\x20"; | 437 "\xEC\x20\xED\x20\xEE\x20\xEF\x20"; |
438 uint32_t expected[] = { 0x0 }; | 438 uint32_t expected[] = { 0x0 }; |
439 uint32_t dst[ARRAY_SIZE(expected)]; | 439 uint32_t dst[ARRAY_SIZE(expected)]; |
440 for (size_t i = 0; i < strlen(src); i += 2) { | 440 for (size_t i = 0; i < strlen(src); i += 2) { |
441 memset(dst, 0xFF, sizeof(dst)); | 441 memset(dst, 0xFF, sizeof(dst)); |
442 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); | 442 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
443 EXPECT(!is_valid); | 443 EXPECT(!is_valid); |
444 EXPECT(memcmp(expected, dst, sizeof(expected))); | 444 EXPECT(memcmp(expected, dst, sizeof(expected))); |
445 } | 445 } |
446 } | 446 } |
447 | 447 |
448 // 3.2.3 - All 8 first bytes of 4-byte sequences (0xf0-0xf7), each | 448 // 3.2.3 - All 8 first bytes of 4-byte sequences (0xf0-0xf7), each |
449 // followed by a space character: | 449 // followed by a space character: |
450 { | 450 { |
451 const char* src = "\xF0\x20\xF1\x20\xF2\x20\xF3\x20" | 451 const char* src = "\xF0\x20\xF1\x20\xF2\x20\xF3\x20" |
452 "\xF4\x20\xF5\x20\xF6\x20\xF7\x20"; | 452 "\xF4\x20\xF5\x20\xF6\x20\xF7\x20"; |
453 uint32_t expected[] = { 0x0 }; | 453 uint32_t expected[] = { 0x0 }; |
454 uint32_t dst[ARRAY_SIZE(expected)]; | 454 uint32_t dst[ARRAY_SIZE(expected)]; |
455 for (size_t i = 0; i < strlen(src); i += 2) { | 455 for (size_t i = 0; i < strlen(src); i += 2) { |
456 memset(dst, 0xFF, sizeof(dst)); | 456 memset(dst, 0xFF, sizeof(dst)); |
457 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); | 457 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
458 EXPECT(!is_valid); | 458 EXPECT(!is_valid); |
459 EXPECT(memcmp(expected, dst, sizeof(expected))); | 459 EXPECT(memcmp(expected, dst, sizeof(expected))); |
460 } | 460 } |
461 } | 461 } |
462 | 462 |
463 // 3.2.4 - All 4 first bytes of 5-byte sequences (0xf8-0xfb), each | 463 // 3.2.4 - All 4 first bytes of 5-byte sequences (0xf8-0xfb), each |
464 // followed by a space character: | 464 // followed by a space character: |
465 { | 465 { |
466 const char* src = "\xF8\x20\xF9\x20\xFA\x20\xFB\x20"; | 466 const char* src = "\xF8\x20\xF9\x20\xFA\x20\xFB\x20"; |
467 uint32_t expected[] = { 0x0 }; | 467 uint32_t expected[] = { 0x0 }; |
468 uint32_t dst[ARRAY_SIZE(expected)]; | 468 uint32_t dst[ARRAY_SIZE(expected)]; |
469 for (size_t i = 0; i < strlen(src); i += 2) { | 469 for (size_t i = 0; i < strlen(src); i += 2) { |
470 memset(dst, 0xFF, sizeof(dst)); | 470 memset(dst, 0xFF, sizeof(dst)); |
471 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); | 471 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
472 EXPECT(!is_valid); | 472 EXPECT(!is_valid); |
473 EXPECT(memcmp(expected, dst, sizeof(expected))); | 473 EXPECT(memcmp(expected, dst, sizeof(expected))); |
474 } | 474 } |
475 } | 475 } |
476 | 476 |
477 // 3.2.5 - All 2 first bytes of 6-byte sequences (0xfc-0xfd), each | 477 // 3.2.5 - All 2 first bytes of 6-byte sequences (0xfc-0xfd), each |
478 // followed by a space character: | 478 // followed by a space character: |
479 { | 479 { |
480 const char* src = "\xFC\x20\xFD\x20"; | 480 const char* src = "\xFC\x20\xFD\x20"; |
481 uint32_t expected[] = { 0x0 }; | 481 uint32_t expected[] = { 0x0 }; |
482 uint32_t dst[ARRAY_SIZE(expected)]; | 482 uint32_t dst[ARRAY_SIZE(expected)]; |
483 for (size_t i = 0; i < strlen(src); i += 2) { | 483 for (size_t i = 0; i < strlen(src); i += 2) { |
484 memset(dst, 0xFF, sizeof(dst)); | 484 memset(dst, 0xFF, sizeof(dst)); |
485 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); | 485 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
486 EXPECT(!is_valid); | 486 EXPECT(!is_valid); |
487 EXPECT(memcmp(expected, dst, sizeof(expected))); | 487 EXPECT(memcmp(expected, dst, sizeof(expected))); |
488 } | 488 } |
489 } | 489 } |
490 | 490 |
491 // 3.3 - Sequences with last continuation byte missing | 491 // 3.3 - Sequences with last continuation byte missing |
492 | 492 |
493 // 3.3.1 - 2-byte sequence with last byte missing (U+0000): "\xC0" | 493 // 3.3.1 - 2-byte sequence with last byte missing (U+0000): "\xC0" |
494 { | 494 { |
495 const char* src = "\xC0"; | 495 const char* src = "\xC0"; |
496 uint32_t expected[] = { 0x0 }; | 496 uint32_t expected[] = { 0x0 }; |
497 uint32_t dst[ARRAY_SIZE(expected)]; | 497 uint32_t dst[ARRAY_SIZE(expected)]; |
498 memset(dst, 0xFF, sizeof(dst)); | 498 memset(dst, 0xFF, sizeof(dst)); |
499 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 499 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
500 EXPECT(!is_valid); | 500 EXPECT(!is_valid); |
501 EXPECT(memcmp(expected, dst, sizeof(expected))); | 501 EXPECT(memcmp(expected, dst, sizeof(expected))); |
502 } | 502 } |
503 | 503 |
504 // 3.3.2 - 3-byte sequence with last byte missing (U+0000): "\xE0\x80" | 504 // 3.3.2 - 3-byte sequence with last byte missing (U+0000): "\xE0\x80" |
505 { | 505 { |
506 const char* src = "\xE0\x80"; | 506 const char* src = "\xE0\x80"; |
507 uint32_t expected[] = { 0x0 }; | 507 uint32_t expected[] = { 0x0 }; |
508 uint32_t dst[ARRAY_SIZE(expected)]; | 508 uint32_t dst[ARRAY_SIZE(expected)]; |
509 memset(dst, 0xFF, sizeof(dst)); | 509 memset(dst, 0xFF, sizeof(dst)); |
510 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 510 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
511 EXPECT(!is_valid); | 511 EXPECT(!is_valid); |
512 EXPECT(memcmp(expected, dst, sizeof(expected))); | 512 EXPECT(memcmp(expected, dst, sizeof(expected))); |
513 } | 513 } |
514 | 514 |
515 // 3.3.3 - 4-byte sequence with last byte missing (U+0000): "\xF0\x80\x80" | 515 // 3.3.3 - 4-byte sequence with last byte missing (U+0000): "\xF0\x80\x80" |
516 { | 516 { |
517 const char* src = "\xF0\x80\x80"; | 517 const char* src = "\xF0\x80\x80"; |
518 uint32_t expected[] = { 0x0 }; | 518 uint32_t expected[] = { 0x0 }; |
519 uint32_t dst[ARRAY_SIZE(expected)]; | 519 uint32_t dst[ARRAY_SIZE(expected)]; |
520 memset(dst, 0xFF, sizeof(dst)); | 520 memset(dst, 0xFF, sizeof(dst)); |
521 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 521 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
522 EXPECT(!is_valid); | 522 EXPECT(!is_valid); |
523 EXPECT(memcmp(expected, dst, sizeof(expected))); | 523 EXPECT(memcmp(expected, dst, sizeof(expected))); |
524 } | 524 } |
525 | 525 |
526 // 3.3.4 - 5-byte sequence with last byte missing (U+0000): "\xF8\x80\x80\x80" | 526 // 3.3.4 - 5-byte sequence with last byte missing (U+0000): "\xF8\x80\x80\x80" |
527 { | 527 { |
528 const char* src = "\xF8\x80\x80\x80"; | 528 const char* src = "\xF8\x80\x80\x80"; |
529 uint32_t expected[] = { 0x0 }; | 529 uint32_t expected[] = { 0x0 }; |
530 uint32_t dst[ARRAY_SIZE(expected)]; | 530 uint32_t dst[ARRAY_SIZE(expected)]; |
531 memset(dst, 0xFF, sizeof(dst)); | 531 memset(dst, 0xFF, sizeof(dst)); |
532 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 532 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
533 EXPECT(!is_valid); | 533 EXPECT(!is_valid); |
534 EXPECT(memcmp(expected, dst, sizeof(expected))); | 534 EXPECT(memcmp(expected, dst, sizeof(expected))); |
535 } | 535 } |
536 | 536 |
537 // 3.3.5 - 6-byte sequence with last byte missing (U+0000): | 537 // 3.3.5 - 6-byte sequence with last byte missing (U+0000): |
538 // "\xFC\x80\x80\x80\x80" | 538 // "\xFC\x80\x80\x80\x80" |
539 { | 539 { |
540 const char* src = "\xFC\x80\x80\x80\x80"; | 540 const char* src = "\xFC\x80\x80\x80\x80"; |
541 uint32_t expected[] = { 0x0 }; | 541 uint32_t expected[] = { 0x0 }; |
542 uint32_t dst[ARRAY_SIZE(expected)]; | 542 uint32_t dst[ARRAY_SIZE(expected)]; |
543 memset(dst, 0xFF, sizeof(dst)); | 543 memset(dst, 0xFF, sizeof(dst)); |
544 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 544 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
545 EXPECT(!is_valid); | 545 EXPECT(!is_valid); |
546 EXPECT(memcmp(expected, dst, sizeof(expected))); | 546 EXPECT(memcmp(expected, dst, sizeof(expected))); |
547 } | 547 } |
548 | 548 |
549 // 3.3.6 - 2-byte sequence with last byte missing (U-000007FF): "\xDF" | 549 // 3.3.6 - 2-byte sequence with last byte missing (U-000007FF): "\xDF" |
550 { | 550 { |
551 const char* src = "\xDF"; | 551 const char* src = "\xDF"; |
552 uint32_t expected[] = { 0x0 }; | 552 uint32_t expected[] = { 0x0 }; |
553 uint32_t dst[ARRAY_SIZE(expected)]; | 553 uint32_t dst[ARRAY_SIZE(expected)]; |
554 memset(dst, 0xFF, sizeof(dst)); | 554 memset(dst, 0xFF, sizeof(dst)); |
555 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 555 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
556 EXPECT(!is_valid); | 556 EXPECT(!is_valid); |
557 EXPECT(memcmp(expected, dst, sizeof(expected))); | 557 EXPECT(memcmp(expected, dst, sizeof(expected))); |
558 } | 558 } |
559 | 559 |
560 // 3.3.7 - 3-byte sequence with last byte missing (U-0000FFFF): "\xEF\xBF" | 560 // 3.3.7 - 3-byte sequence with last byte missing (U-0000FFFF): "\xEF\xBF" |
561 { | 561 { |
562 const char* src = "\xEF\xBF"; | 562 const char* src = "\xEF\xBF"; |
563 uint32_t expected[] = { 0x0 }; | 563 uint32_t expected[] = { 0x0 }; |
564 uint32_t dst[ARRAY_SIZE(expected)]; | 564 uint32_t dst[ARRAY_SIZE(expected)]; |
565 memset(dst, 0xFF, sizeof(dst)); | 565 memset(dst, 0xFF, sizeof(dst)); |
566 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 566 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
567 EXPECT(!is_valid); | 567 EXPECT(!is_valid); |
568 EXPECT(memcmp(expected, dst, sizeof(expected))); | 568 EXPECT(memcmp(expected, dst, sizeof(expected))); |
569 } | 569 } |
570 | 570 |
571 // 3.3.8 - 4-byte sequence with last byte missing (U-001FFFFF): "\xF7\xBF\xBF" | 571 // 3.3.8 - 4-byte sequence with last byte missing (U-001FFFFF): "\xF7\xBF\xBF" |
572 { | 572 { |
573 const char* src = "\xF7\xBF\xBF"; | 573 const char* src = "\xF7\xBF\xBF"; |
574 uint32_t expected[] = { 0x0 }; | 574 uint32_t expected[] = { 0x0 }; |
575 uint32_t dst[ARRAY_SIZE(expected)]; | 575 uint32_t dst[ARRAY_SIZE(expected)]; |
576 memset(dst, 0xFF, sizeof(dst)); | 576 memset(dst, 0xFF, sizeof(dst)); |
577 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 577 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
578 EXPECT(!is_valid); | 578 EXPECT(!is_valid); |
579 EXPECT(memcmp(expected, dst, sizeof(expected))); | 579 EXPECT(memcmp(expected, dst, sizeof(expected))); |
580 } | 580 } |
581 | 581 |
582 // 3.3.9 - 5-byte sequence with last byte missing (U-03FFFFFF): | 582 // 3.3.9 - 5-byte sequence with last byte missing (U-03FFFFFF): |
583 // "\xFB\xBF\xBF\xBF" | 583 // "\xFB\xBF\xBF\xBF" |
584 { | 584 { |
585 const char* src = "\xFB\xBF\xBF\xBF"; | 585 const char* src = "\xFB\xBF\xBF\xBF"; |
586 uint32_t expected[] = { 0x0 }; | 586 uint32_t expected[] = { 0x0 }; |
587 uint32_t dst[ARRAY_SIZE(expected)]; | 587 uint32_t dst[ARRAY_SIZE(expected)]; |
588 memset(dst, 0xFF, sizeof(dst)); | 588 memset(dst, 0xFF, sizeof(dst)); |
589 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 589 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
590 EXPECT(!is_valid); | 590 EXPECT(!is_valid); |
591 EXPECT(memcmp(expected, dst, sizeof(expected))); | 591 EXPECT(memcmp(expected, dst, sizeof(expected))); |
592 } | 592 } |
593 | 593 |
594 // 3.3.10 - 6-byte sequence with last byte missing (U-7FFFFFFF): | 594 // 3.3.10 - 6-byte sequence with last byte missing (U-7FFFFFFF): |
595 // "\xFD\xBF\xBF\xBF\xBF" | 595 // "\xFD\xBF\xBF\xBF\xBF" |
596 { | 596 { |
597 const char* src = "\xFD\xBF\xBF\xBF\xBF"; | 597 const char* src = "\xFD\xBF\xBF\xBF\xBF"; |
598 uint32_t expected[] = { 0x0 }; | 598 uint32_t expected[] = { 0x0 }; |
599 uint32_t dst[ARRAY_SIZE(expected)]; | 599 uint32_t dst[ARRAY_SIZE(expected)]; |
600 memset(dst, 0xFF, sizeof(dst)); | 600 memset(dst, 0xFF, sizeof(dst)); |
601 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 601 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
602 EXPECT(!is_valid); | 602 EXPECT(!is_valid); |
603 EXPECT(memcmp(expected, dst, sizeof(expected))); | 603 EXPECT(memcmp(expected, dst, sizeof(expected))); |
604 } | 604 } |
605 | 605 |
606 // 3.4 - Concatenation of incomplete sequences | 606 // 3.4 - Concatenation of incomplete sequences |
607 { | 607 { |
608 const char* src = "\xC0\xE0\x80\xF0\x80\x80" | 608 const char* src = "\xC0\xE0\x80\xF0\x80\x80" |
609 "\xF8\x80\x80\x80\xFC\x80" | 609 "\xF8\x80\x80\x80\xFC\x80" |
610 "\x80\x80\x80\xDF\xEF\xBF" | 610 "\x80\x80\x80\xDF\xEF\xBF" |
611 "\xF7\xBF\xBF\xFB\xBF\xBF" | 611 "\xF7\xBF\xBF\xFB\xBF\xBF" |
612 "\xBF\xFD\xBF\xBF\xBF\xBF"; | 612 "\xBF\xFD\xBF\xBF\xBF\xBF"; |
613 uint32_t expected[] = { 0x0 }; | 613 uint32_t expected[] = { 0x0 }; |
614 uint32_t dst[ARRAY_SIZE(expected)]; | 614 uint32_t dst[ARRAY_SIZE(expected)]; |
615 for (size_t i = 0; i < strlen(src); ++i) { | 615 for (size_t i = 0; i < strlen(src); ++i) { |
616 for (size_t j = 1; j < (strlen(src) - i); ++j) { | 616 for (size_t j = 1; j < (strlen(src) - i); ++j) { |
617 memset(dst, 0xFF, sizeof(dst)); | 617 memset(dst, 0xFF, sizeof(dst)); |
618 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); | 618 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], |
| 619 dst, ARRAY_SIZE(dst)); |
619 EXPECT(!is_valid); | 620 EXPECT(!is_valid); |
620 EXPECT(memcmp(expected, dst, sizeof(expected))); | 621 EXPECT(memcmp(expected, dst, sizeof(expected))); |
621 } | 622 } |
622 } | 623 } |
623 } | 624 } |
624 | 625 |
625 // 3.5 - Impossible bytes | 626 // 3.5 - Impossible bytes |
626 | 627 |
627 // 3.5.1 - fe = "\xFE" | 628 // 3.5.1 - fe = "\xFE" |
628 { | 629 { |
629 const char* src = "\xFE"; | 630 const char* src = "\xFE"; |
630 uint32_t expected[] = { 0xFE }; | 631 uint32_t expected[] = { 0xFE }; |
631 uint32_t dst[ARRAY_SIZE(expected)]; | 632 uint32_t dst[ARRAY_SIZE(expected)]; |
632 memset(dst, 0, sizeof(dst)); | 633 memset(dst, 0, sizeof(dst)); |
633 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 634 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
634 EXPECT(!is_valid); | 635 EXPECT(!is_valid); |
635 EXPECT(memcmp(expected, dst, sizeof(expected))); | 636 EXPECT(memcmp(expected, dst, sizeof(expected))); |
636 } | 637 } |
637 | 638 |
638 // 3.5.2 - ff = "\xFF" | 639 // 3.5.2 - ff = "\xFF" |
639 { | 640 { |
640 const char* src = "\xFF"; | 641 const char* src = "\xFF"; |
641 uint32_t expected[] = { 0xFF }; | 642 uint32_t expected[] = { 0xFF }; |
642 uint32_t dst[ARRAY_SIZE(expected)]; | 643 uint32_t dst[ARRAY_SIZE(expected)]; |
643 memset(dst, 0, sizeof(dst)); | 644 memset(dst, 0, sizeof(dst)); |
644 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 645 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
645 EXPECT(!is_valid); | 646 EXPECT(!is_valid); |
646 EXPECT(memcmp(expected, dst, sizeof(expected))); | 647 EXPECT(memcmp(expected, dst, sizeof(expected))); |
647 } | 648 } |
648 | 649 |
649 // 3.5.3 - fe fe ff ff = "\xFE\xFE\xFF\xFF" | 650 // 3.5.3 - fe fe ff ff = "\xFE\xFE\xFF\xFF" |
650 { | 651 { |
651 const char* src = "\xFE\xFE\xFF\xFF"; | 652 const char* src = "\xFE\xFE\xFF\xFF"; |
652 uint32_t expected[] = { 0xFF }; | 653 uint32_t expected[] = { 0xFF }; |
653 uint32_t dst[ARRAY_SIZE(expected)]; | 654 uint32_t dst[ARRAY_SIZE(expected)]; |
654 memset(dst, 0, sizeof(dst)); | 655 memset(dst, 0, sizeof(dst)); |
655 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 656 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
656 EXPECT(!is_valid); | 657 EXPECT(!is_valid); |
657 EXPECT(memcmp(expected, dst, sizeof(expected))); | 658 EXPECT(memcmp(expected, dst, sizeof(expected))); |
658 } | 659 } |
659 | 660 |
660 // 4 - Overlong sequences | 661 // 4 - Overlong sequences |
661 | 662 |
662 // 4.1 - Examples of an overlong ASCII character | 663 // 4.1 - Examples of an overlong ASCII character |
663 | 664 |
664 // 4.1.1 - U+002F = c0 af = "\xC0\xAF" | 665 // 4.1.1 - U+002F = c0 af = "\xC0\xAF" |
665 { | 666 { |
666 const char* src = "\xC0\xAF"; | 667 const char* src = "\xC0\xAF"; |
667 uint32_t expected[] = { 0x2F }; | 668 uint32_t expected[] = { 0x2F }; |
668 uint32_t dst[ARRAY_SIZE(expected)]; | 669 uint32_t dst[ARRAY_SIZE(expected)]; |
669 memset(dst, 0, sizeof(dst)); | 670 memset(dst, 0, sizeof(dst)); |
670 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 671 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
671 EXPECT(!is_valid); | 672 EXPECT(!is_valid); |
672 EXPECT(memcmp(expected, dst, sizeof(expected))); | 673 EXPECT(memcmp(expected, dst, sizeof(expected))); |
673 } | 674 } |
674 | 675 |
675 // 4.1.2 - U+002F = e0 80 af = "\xE0\x80\xAF" | 676 // 4.1.2 - U+002F = e0 80 af = "\xE0\x80\xAF" |
676 { | 677 { |
677 const char* src = "\xE0\x80\xAF"; | 678 const char* src = "\xE0\x80\xAF"; |
678 uint32_t expected[] = { 0x2F }; | 679 uint32_t expected[] = { 0x2F }; |
679 uint32_t dst[ARRAY_SIZE(expected)]; | 680 uint32_t dst[ARRAY_SIZE(expected)]; |
680 memset(dst, 0, sizeof(dst)); | 681 memset(dst, 0, sizeof(dst)); |
681 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 682 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
682 EXPECT(!is_valid); | 683 EXPECT(!is_valid); |
683 EXPECT(memcmp(expected, dst, sizeof(expected))); | 684 EXPECT(memcmp(expected, dst, sizeof(expected))); |
684 } | 685 } |
685 | 686 |
686 // 4.1.3 - U+002F = f0 80 80 af = "\xF0\x80\x80\xAF" | 687 // 4.1.3 - U+002F = f0 80 80 af = "\xF0\x80\x80\xAF" |
687 { | 688 { |
688 const char* src = "\xF0\x80\x80\xAF"; | 689 const char* src = "\xF0\x80\x80\xAF"; |
689 uint32_t expected[] = { 0x2F }; | 690 uint32_t expected[] = { 0x2F }; |
690 uint32_t dst[ARRAY_SIZE(expected)]; | 691 uint32_t dst[ARRAY_SIZE(expected)]; |
691 memset(dst, 0, sizeof(dst)); | 692 memset(dst, 0, sizeof(dst)); |
692 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 693 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
693 EXPECT(!is_valid); | 694 EXPECT(!is_valid); |
694 EXPECT(memcmp(expected, dst, sizeof(expected))); | 695 EXPECT(memcmp(expected, dst, sizeof(expected))); |
695 } | 696 } |
696 | 697 |
697 // 4.1.4 - U+002F = f8 80 80 80 af = "\xF8\x80\x80\x80\xAF" | 698 // 4.1.4 - U+002F = f8 80 80 80 af = "\xF8\x80\x80\x80\xAF" |
698 { | 699 { |
699 const char* src = "\xF8\x80\x80\x80\xAF"; | 700 const char* src = "\xF8\x80\x80\x80\xAF"; |
700 uint32_t expected[] = { 0x2F }; | 701 uint32_t expected[] = { 0x2F }; |
701 uint32_t dst[ARRAY_SIZE(expected)]; | 702 uint32_t dst[ARRAY_SIZE(expected)]; |
702 memset(dst, 0, sizeof(dst)); | 703 memset(dst, 0, sizeof(dst)); |
703 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 704 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
704 EXPECT(!is_valid); | 705 EXPECT(!is_valid); |
705 EXPECT(memcmp(expected, dst, sizeof(expected))); | 706 EXPECT(memcmp(expected, dst, sizeof(expected))); |
706 } | 707 } |
707 | 708 |
708 // 4.1.5 - U+002F = fc 80 80 80 80 af = "\xFC\x80\x80\x80\x80\xAF" | 709 // 4.1.5 - U+002F = fc 80 80 80 80 af = "\xFC\x80\x80\x80\x80\xAF" |
709 { | 710 { |
710 const char* src = "\xFC\x80\x80\x80\x80\xAF"; | 711 const char* src = "\xFC\x80\x80\x80\x80\xAF"; |
711 uint32_t expected[] = { 0x2F }; | 712 uint32_t expected[] = { 0x2F }; |
712 uint32_t dst[ARRAY_SIZE(expected)]; | 713 uint32_t dst[ARRAY_SIZE(expected)]; |
713 memset(dst, 0, sizeof(dst)); | 714 memset(dst, 0, sizeof(dst)); |
714 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 715 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
715 EXPECT(!is_valid); | 716 EXPECT(!is_valid); |
716 EXPECT(memcmp(expected, dst, sizeof(expected))); | 717 EXPECT(memcmp(expected, dst, sizeof(expected))); |
717 } | 718 } |
718 | 719 |
719 // 4.2 Maximum overlong sequences | 720 // 4.2 Maximum overlong sequences |
720 | 721 |
721 // 4.2.1 - U-0000007F = c1 bf = "\xC1\xBF" | 722 // 4.2.1 - U-0000007F = c1 bf = "\xC1\xBF" |
722 { | 723 { |
723 const char* src = "\xC1\xBF"; | 724 const char* src = "\xC1\xBF"; |
724 uint32_t expected[] = { 0x7F }; | 725 uint32_t expected[] = { 0x7F }; |
725 uint32_t dst[ARRAY_SIZE(expected)]; | 726 uint32_t dst[ARRAY_SIZE(expected)]; |
726 memset(dst, 0, sizeof(dst)); | 727 memset(dst, 0, sizeof(dst)); |
727 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 728 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
728 EXPECT(!is_valid); | 729 EXPECT(!is_valid); |
729 EXPECT(memcmp(expected, dst, sizeof(expected))); | 730 EXPECT(memcmp(expected, dst, sizeof(expected))); |
730 } | 731 } |
731 | 732 |
732 // 4.2.2 U+000007FF = e0 9f bf = "\xE0\x9F\xBF" | 733 // 4.2.2 U+000007FF = e0 9f bf = "\xE0\x9F\xBF" |
733 { | 734 { |
734 const char* src = "\xE0\x9F\xBF"; | 735 const char* src = "\xE0\x9F\xBF"; |
735 uint32_t expected[] = { 0x7FF }; | 736 uint32_t expected[] = { 0x7FF }; |
736 uint32_t dst[ARRAY_SIZE(expected)]; | 737 uint32_t dst[ARRAY_SIZE(expected)]; |
737 memset(dst, 0, sizeof(dst)); | 738 memset(dst, 0, sizeof(dst)); |
738 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 739 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
739 EXPECT(!is_valid); | 740 EXPECT(!is_valid); |
740 EXPECT(memcmp(expected, dst, sizeof(expected))); | 741 EXPECT(memcmp(expected, dst, sizeof(expected))); |
741 } | 742 } |
742 | 743 |
743 // 4.2.3 - U+0000FFFF = f0 8f bf bf = "\xF0\x8F\xBF\xBF" | 744 // 4.2.3 - U+0000FFFF = f0 8f bf bf = "\xF0\x8F\xBF\xBF" |
744 { | 745 { |
745 const char* src = "\xF0\x8F\xBF\xBF"; | 746 const char* src = "\xF0\x8F\xBF\xBF"; |
746 uint32_t expected[] = { 0xFFFF }; | 747 uint32_t expected[] = { 0xFFFF }; |
747 uint32_t dst[ARRAY_SIZE(expected)]; | 748 uint32_t dst[ARRAY_SIZE(expected)]; |
748 memset(dst, 0, sizeof(dst)); | 749 memset(dst, 0, sizeof(dst)); |
749 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 750 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
750 EXPECT(!is_valid); | 751 EXPECT(!is_valid); |
751 EXPECT(memcmp(expected, dst, sizeof(expected))); | 752 EXPECT(memcmp(expected, dst, sizeof(expected))); |
752 } | 753 } |
753 | 754 |
754 // 4.2.4 U-001FFFFF = f8 87 bf bf bf = "\xF8\x87\xBF\xBF\xBF" | 755 // 4.2.4 U-001FFFFF = f8 87 bf bf bf = "\xF8\x87\xBF\xBF\xBF" |
755 { | 756 { |
756 const char* src = "\xF8\x87\xBF\xBF\xBF"; | 757 const char* src = "\xF8\x87\xBF\xBF\xBF"; |
757 uint32_t expected[] = { 0x1FFFFF }; | 758 uint32_t expected[] = { 0x1FFFFF }; |
758 uint32_t dst[ARRAY_SIZE(expected)]; | 759 uint32_t dst[ARRAY_SIZE(expected)]; |
759 memset(dst, 0, sizeof(dst)); | 760 memset(dst, 0, sizeof(dst)); |
760 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 761 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
761 EXPECT(!is_valid); | 762 EXPECT(!is_valid); |
762 EXPECT(memcmp(expected, dst, sizeof(expected))); | 763 EXPECT(memcmp(expected, dst, sizeof(expected))); |
763 } | 764 } |
764 | 765 |
765 // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "\xFC\x83\xBF\xBF\xBF\xBF" | 766 // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "\xFC\x83\xBF\xBF\xBF\xBF" |
766 { | 767 { |
767 const char* src = "\xFC\x83\xBF\xBF\xBF\xBF"; | 768 const char* src = "\xFC\x83\xBF\xBF\xBF\xBF"; |
768 uint32_t expected[] = { 0x3FFFFFF }; | 769 uint32_t expected[] = { 0x3FFFFFF }; |
769 uint32_t dst[ARRAY_SIZE(expected)]; | 770 uint32_t dst[ARRAY_SIZE(expected)]; |
770 memset(dst, 0, sizeof(dst)); | 771 memset(dst, 0, sizeof(dst)); |
771 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 772 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
772 EXPECT(!is_valid); | 773 EXPECT(!is_valid); |
773 EXPECT(memcmp(expected, dst, sizeof(expected))); | 774 EXPECT(memcmp(expected, dst, sizeof(expected))); |
774 } | 775 } |
775 | 776 |
776 // 4.3 - Overlong representation of the NUL character | 777 // 4.3 - Overlong representation of the NUL character |
777 | 778 |
778 // 4.3.1 - U+0000 = "\xC0\x80" | 779 // 4.3.1 - U+0000 = "\xC0\x80" |
779 { | 780 { |
780 const char* src = "\xC0\x80"; | 781 const char* src = "\xC0\x80"; |
781 uint32_t expected[] = { 0x0 }; | 782 uint32_t expected[] = { 0x0 }; |
782 uint32_t dst[ARRAY_SIZE(expected)]; | 783 uint32_t dst[ARRAY_SIZE(expected)]; |
783 memset(dst, 0xFF, sizeof(dst)); | 784 memset(dst, 0xFF, sizeof(dst)); |
784 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 785 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
785 EXPECT(!is_valid); | 786 EXPECT(!is_valid); |
786 EXPECT(memcmp(expected, dst, sizeof(expected))); | 787 EXPECT(memcmp(expected, dst, sizeof(expected))); |
787 } | 788 } |
788 | 789 |
789 // 4.3.2 U+0000 = e0 80 80 = "\xE0\x80\x80" | 790 // 4.3.2 U+0000 = e0 80 80 = "\xE0\x80\x80" |
790 { | 791 { |
791 const char* src = "\xE0\x80\x80"; | 792 const char* src = "\xE0\x80\x80"; |
792 uint32_t expected[] = { 0x0 }; | 793 uint32_t expected[] = { 0x0 }; |
793 uint32_t dst[ARRAY_SIZE(expected)]; | 794 uint32_t dst[ARRAY_SIZE(expected)]; |
794 memset(dst, 0xFF, sizeof(dst)); | 795 memset(dst, 0xFF, sizeof(dst)); |
795 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 796 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
796 EXPECT(!is_valid); | 797 EXPECT(!is_valid); |
797 EXPECT(memcmp(expected, dst, sizeof(expected))); | 798 EXPECT(memcmp(expected, dst, sizeof(expected))); |
798 } | 799 } |
799 | 800 |
800 // 4.3.3 U+0000 = f0 80 80 80 = "\xF0\x80\x80\x80" | 801 // 4.3.3 U+0000 = f0 80 80 80 = "\xF0\x80\x80\x80" |
801 { | 802 { |
802 const char* src = "\xF0\x80\x80\x80"; | 803 const char* src = "\xF0\x80\x80\x80"; |
803 uint32_t expected[] = { 0x0 }; | 804 uint32_t expected[] = { 0x0 }; |
804 uint32_t dst[ARRAY_SIZE(expected)]; | 805 uint32_t dst[ARRAY_SIZE(expected)]; |
805 memset(dst, 0xFF, sizeof(dst)); | 806 memset(dst, 0xFF, sizeof(dst)); |
806 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 807 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
807 EXPECT(!is_valid); | 808 EXPECT(!is_valid); |
808 EXPECT(memcmp(expected, dst, sizeof(expected))); | 809 EXPECT(memcmp(expected, dst, sizeof(expected))); |
809 } | 810 } |
810 | 811 |
811 // 4.3.4 U+0000 = f8 80 80 80 80 = "\xF8\x80\x80\x80\x80" | 812 // 4.3.4 U+0000 = f8 80 80 80 80 = "\xF8\x80\x80\x80\x80" |
812 { | 813 { |
813 const char* src = "\xF8\x80\x80\x80\x80"; | 814 const char* src = "\xF8\x80\x80\x80\x80"; |
814 uint32_t expected[] = { 0x0 }; | 815 uint32_t expected[] = { 0x0 }; |
815 uint32_t dst[ARRAY_SIZE(expected)]; | 816 uint32_t dst[ARRAY_SIZE(expected)]; |
816 memset(dst, 0xFF, sizeof(dst)); | 817 memset(dst, 0xFF, sizeof(dst)); |
817 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 818 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
818 EXPECT(!is_valid); | 819 EXPECT(!is_valid); |
819 EXPECT(memcmp(expected, dst, sizeof(expected))); | 820 EXPECT(memcmp(expected, dst, sizeof(expected))); |
820 } | 821 } |
821 | 822 |
822 // 4.3.5 U+0000 = fc 80 80 80 80 80 = "\xFC\x80\x80\x80\x80\x80" | 823 // 4.3.5 U+0000 = fc 80 80 80 80 80 = "\xFC\x80\x80\x80\x80\x80" |
823 { | 824 { |
824 const char* src = "\xFC\x80\x80\x80\x80\x80"; | 825 const char* src = "\xFC\x80\x80\x80\x80\x80"; |
825 uint32_t expected[] = { 0x0 }; | 826 uint32_t expected[] = { 0x0 }; |
826 uint32_t dst[ARRAY_SIZE(expected)]; | 827 uint32_t dst[ARRAY_SIZE(expected)]; |
827 memset(dst, 0xFF, sizeof(dst)); | 828 memset(dst, 0xFF, sizeof(dst)); |
828 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 829 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
829 EXPECT(!is_valid); | 830 EXPECT(!is_valid); |
830 EXPECT(memcmp(expected, dst, sizeof(expected))); | 831 EXPECT(memcmp(expected, dst, sizeof(expected))); |
831 } | 832 } |
832 | 833 |
833 // 5.1 - Single UTF-16 surrogates | 834 // 5.1 - Single UTF-16 surrogates |
834 | 835 |
835 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80" | 836 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80" |
836 { | 837 { |
837 const char* src = "\xED\xA0\x80"; | 838 const char* src = "\xED\xA0\x80"; |
838 uint32_t expected[] = { 0xD800 }; | 839 uint32_t expected[] = { 0xD800 }; |
839 uint32_t dst[ARRAY_SIZE(expected)]; | 840 uint32_t dst[ARRAY_SIZE(expected)]; |
840 memset(dst, 0, sizeof(dst)); | 841 memset(dst, 0, sizeof(dst)); |
841 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 842 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
842 EXPECT(!is_valid); | 843 EXPECT(!is_valid); |
843 EXPECT(memcmp(expected, dst, sizeof(expected))); | 844 EXPECT(memcmp(expected, dst, sizeof(expected))); |
844 } | 845 } |
845 | 846 |
846 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF" | 847 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF" |
847 { | 848 { |
848 const char* src = "\xED\xAD\xBF"; | 849 const char* src = "\xED\xAD\xBF"; |
849 uint32_t expected[] = { 0xDB7F }; | 850 uint32_t expected[] = { 0xDB7F }; |
850 uint32_t dst[ARRAY_SIZE(expected)]; | 851 uint32_t dst[ARRAY_SIZE(expected)]; |
851 memset(dst, 0, sizeof(dst)); | 852 memset(dst, 0, sizeof(dst)); |
852 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 853 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
853 EXPECT(!is_valid); | 854 EXPECT(!is_valid); |
854 EXPECT(memcmp(expected, dst, sizeof(expected))); | 855 EXPECT(memcmp(expected, dst, sizeof(expected))); |
855 } | 856 } |
856 | 857 |
857 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80" | 858 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80" |
858 { | 859 { |
859 const char* src = "\xED\xAE\x80"; | 860 const char* src = "\xED\xAE\x80"; |
860 uint32_t expected[] = { 0xDB80 }; | 861 uint32_t expected[] = { 0xDB80 }; |
861 uint32_t dst[ARRAY_SIZE(expected)]; | 862 uint32_t dst[ARRAY_SIZE(expected)]; |
862 memset(dst, 0, sizeof(dst)); | 863 memset(dst, 0, sizeof(dst)); |
863 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 864 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
864 EXPECT(!is_valid); | 865 EXPECT(!is_valid); |
865 EXPECT(memcmp(expected, dst, sizeof(expected))); | 866 EXPECT(memcmp(expected, dst, sizeof(expected))); |
866 } | 867 } |
867 | 868 |
868 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF" | 869 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF" |
869 { | 870 { |
870 const char* src = "\xED\xAF\xBF"; | 871 const char* src = "\xED\xAF\xBF"; |
871 uint32_t expected[] = { 0xDBFF }; | 872 uint32_t expected[] = { 0xDBFF }; |
872 uint32_t dst[ARRAY_SIZE(expected)]; | 873 uint32_t dst[ARRAY_SIZE(expected)]; |
873 memset(dst, 0, sizeof(dst)); | 874 memset(dst, 0, sizeof(dst)); |
874 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 875 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
875 EXPECT(!is_valid); | 876 EXPECT(!is_valid); |
876 EXPECT(memcmp(expected, dst, sizeof(expected))); | 877 EXPECT(memcmp(expected, dst, sizeof(expected))); |
877 } | 878 } |
878 | 879 |
879 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80" | 880 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80" |
880 { | 881 { |
881 const char* src = "\xED\xB0\x80"; | 882 const char* src = "\xED\xB0\x80"; |
882 uint32_t expected[] = { 0xDC00 }; | 883 uint32_t expected[] = { 0xDC00 }; |
883 uint32_t dst[ARRAY_SIZE(expected)]; | 884 uint32_t dst[ARRAY_SIZE(expected)]; |
884 memset(dst, 0, sizeof(dst)); | 885 memset(dst, 0, sizeof(dst)); |
885 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 886 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
886 EXPECT(!is_valid); | 887 EXPECT(!is_valid); |
887 EXPECT(memcmp(expected, dst, sizeof(expected))); | 888 EXPECT(memcmp(expected, dst, sizeof(expected))); |
888 } | 889 } |
889 | 890 |
890 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80" | 891 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80" |
891 { | 892 { |
892 const char* src = "\xED\xBE\x80"; | 893 const char* src = "\xED\xBE\x80"; |
893 uint32_t expected[] = { 0xDF80 }; | 894 uint32_t expected[] = { 0xDF80 }; |
894 uint32_t dst[ARRAY_SIZE(expected)]; | 895 uint32_t dst[ARRAY_SIZE(expected)]; |
895 memset(dst, 0, sizeof(dst)); | 896 memset(dst, 0, sizeof(dst)); |
896 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 897 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
897 EXPECT(!is_valid); | 898 EXPECT(!is_valid); |
898 EXPECT(memcmp(expected, dst, sizeof(expected))); | 899 EXPECT(memcmp(expected, dst, sizeof(expected))); |
899 } | 900 } |
900 | 901 |
901 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF" | 902 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF" |
902 { | 903 { |
903 const char* src = "\xED\xBF\xBF"; | 904 const char* src = "\xED\xBF\xBF"; |
904 uint32_t expected[] = { 0xDFFF }; | 905 uint32_t expected[] = { 0xDFFF }; |
905 uint32_t dst[ARRAY_SIZE(expected)]; | 906 uint32_t dst[ARRAY_SIZE(expected)]; |
906 memset(dst, 0, sizeof(dst)); | 907 memset(dst, 0, sizeof(dst)); |
907 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 908 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
908 EXPECT(!is_valid); | 909 EXPECT(!is_valid); |
909 EXPECT(memcmp(expected, dst, sizeof(expected))); | 910 EXPECT(memcmp(expected, dst, sizeof(expected))); |
910 } | 911 } |
911 | 912 |
912 // 5.2 Paired UTF-16 surrogates | 913 // 5.2 Paired UTF-16 surrogates |
913 | 914 |
914 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80" | 915 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80" |
915 { | 916 { |
916 const char* src = "\xED\xA0\x80\xED\xB0\x80"; | 917 const char* src = "\xED\xA0\x80\xED\xB0\x80"; |
917 uint32_t expected[] = { 0xD800, 0xDC00 }; | 918 uint32_t expected[] = { 0xD800, 0xDC00 }; |
918 uint32_t dst[ARRAY_SIZE(expected)]; | 919 uint32_t dst[ARRAY_SIZE(expected)]; |
919 memset(dst, 0, sizeof(dst)); | 920 memset(dst, 0, sizeof(dst)); |
920 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 921 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
921 EXPECT(!is_valid); | 922 EXPECT(!is_valid); |
922 EXPECT(memcmp(expected, dst, sizeof(expected))); | 923 EXPECT(memcmp(expected, dst, sizeof(expected))); |
923 } | 924 } |
924 | 925 |
925 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF" | 926 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF" |
926 { | 927 { |
927 const char* src = "\xED\xA0\x80\xED\xBF\xBF"; | 928 const char* src = "\xED\xA0\x80\xED\xBF\xBF"; |
928 uint32_t expected[] = { 0xD800, 0xDFFF }; | 929 uint32_t expected[] = { 0xD800, 0xDFFF }; |
929 uint32_t dst[ARRAY_SIZE(expected)]; | 930 uint32_t dst[ARRAY_SIZE(expected)]; |
930 memset(dst, 0, sizeof(dst)); | 931 memset(dst, 0, sizeof(dst)); |
931 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 932 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
932 EXPECT(!is_valid); | 933 EXPECT(!is_valid); |
933 EXPECT(memcmp(expected, dst, sizeof(expected))); | 934 EXPECT(memcmp(expected, dst, sizeof(expected))); |
934 } | 935 } |
935 | 936 |
936 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80" | 937 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80" |
937 { | 938 { |
938 const char* src = "\xED\xAD\xBF\xED\xB0\x80"; | 939 const char* src = "\xED\xAD\xBF\xED\xB0\x80"; |
939 uint32_t expected[] = { 0xDB7F, 0xDC00 }; | 940 uint32_t expected[] = { 0xDB7F, 0xDC00 }; |
940 uint32_t dst[ARRAY_SIZE(expected)]; | 941 uint32_t dst[ARRAY_SIZE(expected)]; |
941 memset(dst, 0, sizeof(dst)); | 942 memset(dst, 0, sizeof(dst)); |
942 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 943 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
943 EXPECT(!is_valid); | 944 EXPECT(!is_valid); |
944 EXPECT(memcmp(expected, dst, sizeof(expected))); | 945 EXPECT(memcmp(expected, dst, sizeof(expected))); |
945 } | 946 } |
946 | 947 |
947 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF" | 948 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF" |
948 { | 949 { |
949 const char* src = "\xED\xAD\xBF\xED\xBF\xBF"; | 950 const char* src = "\xED\xAD\xBF\xED\xBF\xBF"; |
950 uint32_t expected[] = { 0xDB7F, 0xDFFF }; | 951 uint32_t expected[] = { 0xDB7F, 0xDFFF }; |
951 uint32_t dst[ARRAY_SIZE(expected)]; | 952 uint32_t dst[ARRAY_SIZE(expected)]; |
952 memset(dst, 0, sizeof(dst)); | 953 memset(dst, 0, sizeof(dst)); |
953 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 954 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
954 EXPECT(!is_valid); | 955 EXPECT(!is_valid); |
955 EXPECT(memcmp(expected, dst, sizeof(expected))); | 956 EXPECT(memcmp(expected, dst, sizeof(expected))); |
956 } | 957 } |
957 | 958 |
958 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80" | 959 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80" |
959 { | 960 { |
960 const char* src = "\xED\xAE\x80\xED\xB0\x80"; | 961 const char* src = "\xED\xAE\x80\xED\xB0\x80"; |
961 uint32_t expected[] = { 0xDB80, 0xDC00 }; | 962 uint32_t expected[] = { 0xDB80, 0xDC00 }; |
962 uint32_t dst[ARRAY_SIZE(expected)]; | 963 uint32_t dst[ARRAY_SIZE(expected)]; |
963 memset(dst, 0, sizeof(dst)); | 964 memset(dst, 0, sizeof(dst)); |
964 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 965 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
965 EXPECT(!is_valid); | 966 EXPECT(!is_valid); |
966 EXPECT(memcmp(expected, dst, sizeof(expected))); | 967 EXPECT(memcmp(expected, dst, sizeof(expected))); |
967 } | 968 } |
968 | 969 |
969 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF" | 970 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF" |
970 { | 971 { |
971 const char* src = "\xED\xAE\x80\xED\xBF\xBF"; | 972 const char* src = "\xED\xAE\x80\xED\xBF\xBF"; |
972 uint32_t expected[] = { 0xDB80, 0xDFFF }; | 973 uint32_t expected[] = { 0xDB80, 0xDFFF }; |
973 uint32_t dst[ARRAY_SIZE(expected)]; | 974 uint32_t dst[ARRAY_SIZE(expected)]; |
974 memset(dst, 0, sizeof(dst)); | 975 memset(dst, 0, sizeof(dst)); |
975 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 976 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
976 EXPECT(!is_valid); | 977 EXPECT(!is_valid); |
977 EXPECT(memcmp(expected, dst, sizeof(expected))); | 978 EXPECT(memcmp(expected, dst, sizeof(expected))); |
978 } | 979 } |
979 | 980 |
980 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80" | 981 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80" |
981 { | 982 { |
982 const char* src = "\xED\xAF\xBF\xED\xB0\x80"; | 983 const char* src = "\xED\xAF\xBF\xED\xB0\x80"; |
983 uint32_t expected[] = { 0xDBFF, 0xDC00 }; | 984 uint32_t expected[] = { 0xDBFF, 0xDC00 }; |
984 uint32_t dst[ARRAY_SIZE(expected)]; | 985 uint32_t dst[ARRAY_SIZE(expected)]; |
985 memset(dst, 0, sizeof(dst)); | 986 memset(dst, 0, sizeof(dst)); |
986 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 987 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
987 EXPECT(!is_valid); | 988 EXPECT(!is_valid); |
988 EXPECT(memcmp(expected, dst, sizeof(expected))); | 989 EXPECT(memcmp(expected, dst, sizeof(expected))); |
989 } | 990 } |
990 | 991 |
991 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF" | 992 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF" |
992 { | 993 { |
993 const char* src = "\xED\xAF\xBF\xED\xBF\xBF"; | 994 const char* src = "\xED\xAF\xBF\xED\xBF\xBF"; |
994 uint32_t expected[] = { 0xDBFF, 0xDFFF }; | 995 uint32_t expected[] = { 0xDBFF, 0xDFFF }; |
995 uint32_t dst[ARRAY_SIZE(expected)]; | 996 uint32_t dst[ARRAY_SIZE(expected)]; |
996 memset(dst, 0, sizeof(dst)); | 997 memset(dst, 0, sizeof(dst)); |
997 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 998 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
998 EXPECT(!is_valid); | 999 EXPECT(!is_valid); |
999 EXPECT(memcmp(expected, dst, sizeof(expected))); | 1000 EXPECT(memcmp(expected, dst, sizeof(expected))); |
1000 } | 1001 } |
1001 | 1002 |
1002 // 5.3 - Other illegal code positions | 1003 // 5.3 - Other illegal code positions |
1003 | 1004 |
1004 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE" | 1005 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE" |
1005 { | 1006 { |
1006 const char* src = "\xEF\xBF\xBE"; | 1007 const char* src = "\xEF\xBF\xBE"; |
1007 uint32_t expected[] = { 0xFFFE }; | 1008 uint32_t expected[] = { 0xFFFE }; |
1008 uint32_t dst[ARRAY_SIZE(expected)]; | 1009 uint32_t dst[ARRAY_SIZE(expected)]; |
1009 memset(dst, 0, sizeof(dst)); | 1010 memset(dst, 0, sizeof(dst)); |
1010 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 1011 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
1011 EXPECT(is_valid); | 1012 EXPECT(is_valid); |
1012 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 1013 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
1013 } | 1014 } |
1014 | 1015 |
1015 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF" | 1016 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF" |
1016 { | 1017 { |
1017 const char* src = "\xEF\xBF\xBF"; | 1018 const char* src = "\xEF\xBF\xBF"; |
1018 uint32_t expected[] = { 0xFFFF }; | 1019 uint32_t expected[] = { 0xFFFF }; |
1019 uint32_t dst[ARRAY_SIZE(expected)]; | 1020 uint32_t dst[ARRAY_SIZE(expected)]; |
1020 memset(dst, 0, sizeof(dst)); | 1021 memset(dst, 0, sizeof(dst)); |
1021 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 1022 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
1022 EXPECT(is_valid); | 1023 EXPECT(is_valid); |
1023 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 1024 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
1024 } | 1025 } |
1025 } | 1026 } |
1026 | 1027 |
1027 } // namespace dart | 1028 } // namespace dart |
OLD | NEW |