Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(329)

Side by Side Diff: runtime/vm/unicode_test.cc

Issue 11318018: - Represent strings internally in UTF-16 format, this makes it (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/
Patch Set: Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/globals.h" 5 #include "vm/globals.h"
6 #include "vm/unicode.h" 6 #include "vm/unicode.h"
7 #include "vm/unit_test.h" 7 #include "vm/unit_test.h"
8 8
9 namespace dart { 9 namespace dart {
10 10
11 TEST_CASE(Utf8Decode) { 11 TEST_CASE(Utf8Decode) {
12 // Examples from the Unicode specification, chapter 3 12 // Examples from the Unicode specification, chapter 3
13 { 13 {
14 const char* src = "\x41\xC3\xB1\x42"; 14 const char* src = "\x41\xC3\xB1\x42";
15 uint32_t expected[] = { 0x41, 0xF1, 0x42 }; 15 uint32_t expected[] = { 0x41, 0xF1, 0x42 };
16 uint32_t dst[ARRAY_SIZE(expected)]; 16 uint32_t dst[ARRAY_SIZE(expected)];
17 memset(dst, 0, sizeof(dst)); 17 memset(dst, 0, sizeof(dst));
18 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 18 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
19 EXPECT(is_valid); 19 EXPECT(is_valid);
20 EXPECT(!memcmp(expected, dst, sizeof(expected))); 20 EXPECT(!memcmp(expected, dst, sizeof(expected)));
21 } 21 }
22 22
23 { 23 {
24 const char* src = "\x4D"; 24 const char* src = "\x4D";
25 uint32_t expected[] = { 0x4D }; 25 uint32_t expected[] = { 0x4D };
26 uint32_t dst[ARRAY_SIZE(expected)]; 26 uint32_t dst[ARRAY_SIZE(expected)];
27 memset(dst, 0, sizeof(dst)); 27 memset(dst, 0, sizeof(dst));
28 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 28 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
29 EXPECT(is_valid); 29 EXPECT(is_valid);
30 EXPECT(!memcmp(expected, dst, sizeof(expected))); 30 EXPECT(!memcmp(expected, dst, sizeof(expected)));
31 } 31 }
32 32
33 { 33 {
34 const char* src = "\xD0\xB0"; 34 const char* src = "\xD0\xB0";
35 uint32_t expected[] = { 0x430 }; 35 uint32_t expected[] = { 0x430 };
36 uint32_t dst[ARRAY_SIZE(expected)]; 36 uint32_t dst[ARRAY_SIZE(expected)];
37 memset(dst, 0, sizeof(dst)); 37 memset(dst, 0, sizeof(dst));
38 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 38 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
39 EXPECT(is_valid); 39 EXPECT(is_valid);
40 EXPECT(!memcmp(expected, dst, sizeof(expected))); 40 EXPECT(!memcmp(expected, dst, sizeof(expected)));
41 } 41 }
42 42
43 { 43 {
44 const char* src = "\xE4\xBA\x8C"; 44 const char* src = "\xE4\xBA\x8C";
45 uint32_t expected[] = { 0x4E8C }; 45 uint32_t expected[] = { 0x4E8C };
46 uint32_t dst[ARRAY_SIZE(expected)]; 46 uint32_t dst[ARRAY_SIZE(expected)];
47 memset(dst, 0, sizeof(dst)); 47 memset(dst, 0, sizeof(dst));
48 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 48 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
49 EXPECT(is_valid); 49 EXPECT(is_valid);
50 EXPECT(!memcmp(expected, dst, sizeof(expected))); 50 EXPECT(!memcmp(expected, dst, sizeof(expected)));
51 } 51 }
52 52
53 { 53 {
54 const char* src = "\xF0\x90\x8C\x82"; 54 const char* src = "\xF0\x90\x8C\x82";
55 uint32_t expected[] = { 0x10302 }; 55 uint32_t expected[] = { 0x10302 };
56 uint32_t dst[ARRAY_SIZE(expected)]; 56 uint32_t dst[ARRAY_SIZE(expected)];
57 memset(dst, 0, sizeof(dst)); 57 memset(dst, 0, sizeof(dst));
58 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 58 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
59 EXPECT(is_valid); 59 EXPECT(is_valid);
60 EXPECT(!memcmp(expected, dst, sizeof(expected))); 60 EXPECT(!memcmp(expected, dst, sizeof(expected)));
61 } 61 }
62 62
63 { 63 {
64 const char* src = "\x4D\xD0\xB0\xE4\xBA\x8C\xF0\x90\x8C\x82"; 64 const char* src = "\x4D\xD0\xB0\xE4\xBA\x8C\xF0\x90\x8C\x82";
65 uint32_t expected[] = { 0x4D, 0x430, 0x4E8C, 0x10302 }; 65 uint32_t expected[] = { 0x4D, 0x430, 0x4E8C, 0x10302 };
66 uint32_t dst[ARRAY_SIZE(expected)]; 66 uint32_t dst[ARRAY_SIZE(expected)];
67 memset(dst, 0, sizeof(dst)); 67 memset(dst, 0, sizeof(dst));
68 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 68 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
69 EXPECT(is_valid); 69 EXPECT(is_valid);
70 EXPECT(!memcmp(expected, dst, sizeof(expected))); 70 EXPECT(!memcmp(expected, dst, sizeof(expected)));
71 } 71 }
72 72
73 // Mixture of non-ASCII and ASCII characters 73 // Mixture of non-ASCII and ASCII characters
74 { 74 {
75 const char* src = "\xD7\x92\xD7\x9C\xD7\xA2\xD7\x93" 75 const char* src = "\xD7\x92\xD7\x9C\xD7\xA2\xD7\x93"
76 "\x20" 76 "\x20"
77 "\xD7\x91\xD7\xA8\xD7\x9B\xD7\x94"; 77 "\xD7\x91\xD7\xA8\xD7\x9B\xD7\x94";
78 uint32_t expected[] = { 0x5D2, 0x5DC, 0x5E2, 0x5D3, 78 uint32_t expected[] = { 0x5D2, 0x5DC, 0x5E2, 0x5D3,
79 0x20, 79 0x20,
80 0x5D1, 0x5E8, 0x5DB, 0x5D4 }; 80 0x5D1, 0x5E8, 0x5DB, 0x5D4 };
81 uint32_t dst[ARRAY_SIZE(expected)]; 81 uint32_t dst[ARRAY_SIZE(expected)];
82 memset(dst, 0, sizeof(dst)); 82 memset(dst, 0, sizeof(dst));
83 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 83 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
84 EXPECT(is_valid); 84 EXPECT(is_valid);
85 EXPECT(!memcmp(expected, dst, sizeof(expected))); 85 EXPECT(!memcmp(expected, dst, sizeof(expected)));
86 } 86 }
87 87
88 // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt 88 // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
89 89
90 // 1 - Some correct UTF-8 text 90 // 1 - Some correct UTF-8 text
91 { 91 {
92 const char* src = "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5"; 92 const char* src = "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5";
93 uint32_t expected[] = { 0x3BA, 0x1F79, 0x3C3, 0x3BC, 0x3B5 }; 93 uint32_t expected[] = { 0x3BA, 0x1F79, 0x3C3, 0x3BC, 0x3B5 };
94 uint32_t dst[ARRAY_SIZE(expected)]; 94 uint32_t dst[ARRAY_SIZE(expected)];
95 memset(dst, 0, sizeof(dst)); 95 memset(dst, 0, sizeof(dst));
96 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 96 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
97 EXPECT(is_valid); 97 EXPECT(is_valid);
98 EXPECT(!memcmp(expected, dst, sizeof(expected))); 98 EXPECT(!memcmp(expected, dst, sizeof(expected)));
99 } 99 }
100 100
101 // 2 - Boundary condition test cases 101 // 2 - Boundary condition test cases
102 102
103 // 2.1 - First possible sequence of a certain length 103 // 2.1 - First possible sequence of a certain length
104 104
105 // 2.1.1 - 1 byte (U-00000000): "\x00" 105 // 2.1.1 - 1 byte (U-00000000): "\x00"
106 { 106 {
107 const char* src = "\x00"; 107 const char* src = "\x00";
108 uint32_t expected[] = { 0x0 }; 108 uint32_t expected[] = { 0x0 };
109 uint32_t dst[ARRAY_SIZE(expected)]; 109 uint32_t dst[ARRAY_SIZE(expected)];
110 memset(dst, 0xFF, sizeof(dst)); 110 memset(dst, 0xFF, sizeof(dst));
111 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 111 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
112 EXPECT(is_valid); 112 EXPECT(is_valid);
113 EXPECT(memcmp(expected, dst, sizeof(expected))); 113 EXPECT(memcmp(expected, dst, sizeof(expected)));
114 } 114 }
115 115
116 // 2.1.2 - 2 bytes (U-00000080): "\xC2\x80" 116 // 2.1.2 - 2 bytes (U-00000080): "\xC2\x80"
117 { 117 {
118 const char* src = "\xC2\x80"; 118 const char* src = "\xC2\x80";
119 uint32_t expected[] = { 0x80 }; 119 uint32_t expected[] = { 0x80 };
120 uint32_t dst[ARRAY_SIZE(expected)]; 120 uint32_t dst[ARRAY_SIZE(expected)];
121 memset(dst, 0, sizeof(dst)); 121 memset(dst, 0, sizeof(dst));
122 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 122 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
123 EXPECT(is_valid); 123 EXPECT(is_valid);
124 EXPECT(!memcmp(expected, dst, sizeof(expected))); 124 EXPECT(!memcmp(expected, dst, sizeof(expected)));
125 } 125 }
126 126
127 // 2.1.3 - 3 bytes (U-00000800): "\xE0\xA0\x80" 127 // 2.1.3 - 3 bytes (U-00000800): "\xE0\xA0\x80"
128 { 128 {
129 const char* src = "\xE0\xA0\x80"; 129 const char* src = "\xE0\xA0\x80";
130 uint32_t expected[] = { 0x800 }; 130 uint32_t expected[] = { 0x800 };
131 uint32_t dst[ARRAY_SIZE(expected)]; 131 uint32_t dst[ARRAY_SIZE(expected)];
132 memset(dst, 0, sizeof(dst)); 132 memset(dst, 0, sizeof(dst));
133 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 133 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
134 EXPECT(is_valid); 134 EXPECT(is_valid);
135 EXPECT(!memcmp(expected, dst, sizeof(expected))); 135 EXPECT(!memcmp(expected, dst, sizeof(expected)));
136 } 136 }
137 137
138 // 2.1.4 - 4 bytes (U-00010000): "\xF0\x90\x80\x80" 138 // 2.1.4 - 4 bytes (U-00010000): "\xF0\x90\x80\x80"
139 { 139 {
140 const char* src = "\xF0\x90\x80\x80"; 140 const char* src = "\xF0\x90\x80\x80";
141 uint32_t expected[] = { 0x10000 }; 141 uint32_t expected[] = { 0x10000 };
142 uint32_t dst[ARRAY_SIZE(expected)]; 142 uint32_t dst[ARRAY_SIZE(expected)];
143 memset(dst, 0, sizeof(dst)); 143 memset(dst, 0, sizeof(dst));
144 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 144 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
145 EXPECT(is_valid); 145 EXPECT(is_valid);
146 EXPECT(!memcmp(expected, dst, sizeof(expected))); 146 EXPECT(!memcmp(expected, dst, sizeof(expected)));
147 } 147 }
148 148
149 // 2.1.5 - 5 bytes (U-00200000): "\xF8\x88\x80\x80\x80" 149 // 2.1.5 - 5 bytes (U-00200000): "\xF8\x88\x80\x80\x80"
150 { 150 {
151 const char* src = "\xF8\x88\x80\x80\x80"; 151 const char* src = "\xF8\x88\x80\x80\x80";
152 uint32_t expected[] = { 0x200000 }; 152 uint32_t expected[] = { 0x200000 };
153 uint32_t dst[ARRAY_SIZE(expected)]; 153 uint32_t dst[ARRAY_SIZE(expected)];
154 memset(dst, 0, sizeof(dst)); 154 memset(dst, 0, sizeof(dst));
155 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 155 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
156 EXPECT(!is_valid); 156 EXPECT(!is_valid);
157 EXPECT(memcmp(expected, dst, sizeof(expected))); 157 EXPECT(memcmp(expected, dst, sizeof(expected)));
158 } 158 }
159 159
160 // 2.1.6 - 6 bytes (U-04000000): "\xFC\x84\x80\x80\x80\x80" 160 // 2.1.6 - 6 bytes (U-04000000): "\xFC\x84\x80\x80\x80\x80"
161 { 161 {
162 const char* src = "\xFC\x84\x80\x80\x80\x80"; 162 const char* src = "\xFC\x84\x80\x80\x80\x80";
163 uint32_t expected[] = { 0x400000 }; 163 uint32_t expected[] = { 0x400000 };
164 uint32_t dst[ARRAY_SIZE(expected)]; 164 uint32_t dst[ARRAY_SIZE(expected)];
165 memset(dst, 0, sizeof(dst)); 165 memset(dst, 0, sizeof(dst));
166 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 166 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
167 EXPECT(!is_valid); 167 EXPECT(!is_valid);
168 EXPECT(memcmp(expected, dst, sizeof(expected))); 168 EXPECT(memcmp(expected, dst, sizeof(expected)));
169 } 169 }
170 170
171 // 2.2 - Last possible sequence of a certain length 171 // 2.2 - Last possible sequence of a certain length
172 172
173 // 2.2.1 - 1 byte (U-0000007F): "\x7F" 173 // 2.2.1 - 1 byte (U-0000007F): "\x7F"
174 { 174 {
175 const char* src = "\x7F"; 175 const char* src = "\x7F";
176 uint32_t expected[] = { 0x7F }; 176 uint32_t expected[] = { 0x7F };
177 uint32_t dst[ARRAY_SIZE(expected)]; 177 uint32_t dst[ARRAY_SIZE(expected)];
178 memset(dst, 0, sizeof(dst)); 178 memset(dst, 0, sizeof(dst));
179 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 179 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
180 EXPECT(is_valid); 180 EXPECT(is_valid);
181 EXPECT(!memcmp(expected, dst, sizeof(expected))); 181 EXPECT(!memcmp(expected, dst, sizeof(expected)));
182 } 182 }
183 183
184 // 2.2.2 - 2 bytes (U-000007FF): "\xDF\xBF" 184 // 2.2.2 - 2 bytes (U-000007FF): "\xDF\xBF"
185 { 185 {
186 const char* src = "\xDF\xBF"; 186 const char* src = "\xDF\xBF";
187 uint32_t expected[] = { 0x7FF }; 187 uint32_t expected[] = { 0x7FF };
188 uint32_t dst[ARRAY_SIZE(expected)]; 188 uint32_t dst[ARRAY_SIZE(expected)];
189 memset(dst, 0, sizeof(dst)); 189 memset(dst, 0, sizeof(dst));
190 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 190 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
191 EXPECT(is_valid); 191 EXPECT(is_valid);
192 EXPECT(!memcmp(expected, dst, sizeof(expected))); 192 EXPECT(!memcmp(expected, dst, sizeof(expected)));
193 } 193 }
194 194
195 // 2.2.3 - 3 bytes (U-0000FFFF): "\xEF\xBF\xBF" 195 // 2.2.3 - 3 bytes (U-0000FFFF): "\xEF\xBF\xBF"
196 { 196 {
197 const char* src = "\xEF\xBF\xBF"; 197 const char* src = "\xEF\xBF\xBF";
198 uint32_t expected[] = { 0xFFFF }; 198 uint32_t expected[] = { 0xFFFF };
199 uint32_t dst[ARRAY_SIZE(expected)]; 199 uint32_t dst[ARRAY_SIZE(expected)];
200 memset(dst, 0, sizeof(dst)); 200 memset(dst, 0, sizeof(dst));
201 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 201 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
202 EXPECT(is_valid); 202 EXPECT(is_valid);
203 EXPECT(!memcmp(expected, dst, sizeof(expected))); 203 EXPECT(!memcmp(expected, dst, sizeof(expected)));
204 } 204 }
205 205
206 // 2.2.4 - 4 bytes (U-001FFFFF): "\xF7\xBF\xBF\xBF" 206 // 2.2.4 - 4 bytes (U-001FFFFF): "\xF7\xBF\xBF\xBF"
207 { 207 {
208 const char* src = "\xF7\xBF\xBF\xBF"; 208 const char* src = "\xF7\xBF\xBF\xBF";
209 uint32_t expected[] = { 0x1FFFF }; 209 uint32_t expected[] = { 0x1FFFF };
210 uint32_t dst[ARRAY_SIZE(expected)]; 210 uint32_t dst[ARRAY_SIZE(expected)];
211 memset(dst, 0, sizeof(dst)); 211 memset(dst, 0, sizeof(dst));
212 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 212 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
213 EXPECT(!is_valid); 213 EXPECT(!is_valid);
214 EXPECT(memcmp(expected, dst, sizeof(expected))); 214 EXPECT(memcmp(expected, dst, sizeof(expected)));
215 } 215 }
216 216
217 // 2.2.5 - 5 bytes (U-03FFFFFF): "\xFB\xBF\xBF\xBF\xBF" 217 // 2.2.5 - 5 bytes (U-03FFFFFF): "\xFB\xBF\xBF\xBF\xBF"
218 { 218 {
219 const char* src = "\xFB\xBF\xBF\xBF\xBF"; 219 const char* src = "\xFB\xBF\xBF\xBF\xBF";
220 uint32_t expected[] = { 0x3FFFFFF }; 220 uint32_t expected[] = { 0x3FFFFFF };
221 uint32_t dst[ARRAY_SIZE(expected)]; 221 uint32_t dst[ARRAY_SIZE(expected)];
222 memset(dst, 0, sizeof(dst)); 222 memset(dst, 0, sizeof(dst));
223 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 223 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
224 EXPECT(!is_valid); 224 EXPECT(!is_valid);
225 EXPECT(memcmp(expected, dst, sizeof(expected))); 225 EXPECT(memcmp(expected, dst, sizeof(expected)));
226 } 226 }
227 227
228 // 2.2.6 - 6 bytes (U-7FFFFFFF): "\xFD\xBF\xBF\xBF\xBF\xBF" 228 // 2.2.6 - 6 bytes (U-7FFFFFFF): "\xFD\xBF\xBF\xBF\xBF\xBF"
229 { 229 {
230 const char* src = "\xFD\xBF\xBF\xBF\xBF\xBF"; 230 const char* src = "\xFD\xBF\xBF\xBF\xBF\xBF";
231 uint32_t expected[] = { 0x7FFFFFF }; 231 uint32_t expected[] = { 0x7FFFFFF };
232 uint32_t dst[ARRAY_SIZE(expected)]; 232 uint32_t dst[ARRAY_SIZE(expected)];
233 memset(dst, 0, sizeof(dst)); 233 memset(dst, 0, sizeof(dst));
234 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 234 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
235 EXPECT(!is_valid); 235 EXPECT(!is_valid);
236 EXPECT(memcmp(expected, dst, sizeof(expected))); 236 EXPECT(memcmp(expected, dst, sizeof(expected)));
237 } 237 }
238 238
239 // 2.3 - Other boundary conditions 239 // 2.3 - Other boundary conditions
240 240
241 // 2.3.1 - U-0000D7FF = ed 9f bf = "\xED\x9F\xBF" 241 // 2.3.1 - U-0000D7FF = ed 9f bf = "\xED\x9F\xBF"
242 { 242 {
243 const char* src = "\xED\x9F\xBF"; 243 const char* src = "\xED\x9F\xBF";
244 uint32_t expected[] = { 0xD7FF }; 244 uint32_t expected[] = { 0xD7FF };
245 uint32_t dst[ARRAY_SIZE(expected)]; 245 uint32_t dst[ARRAY_SIZE(expected)];
246 memset(dst, 0, sizeof(dst)); 246 memset(dst, 0, sizeof(dst));
247 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 247 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
248 EXPECT(is_valid); 248 EXPECT(is_valid);
249 EXPECT(!memcmp(expected, dst, sizeof(expected))); 249 EXPECT(!memcmp(expected, dst, sizeof(expected)));
250 } 250 }
251 251
252 // 2.3.2 - U-0000E000 = ee 80 80 = "\xEE\x80\x80" 252 // 2.3.2 - U-0000E000 = ee 80 80 = "\xEE\x80\x80"
253 { 253 {
254 const char* src = "\xEE\x80\x80"; 254 const char* src = "\xEE\x80\x80";
255 uint32_t expected[] = { 0xE000 }; 255 uint32_t expected[] = { 0xE000 };
256 uint32_t dst[ARRAY_SIZE(expected)]; 256 uint32_t dst[ARRAY_SIZE(expected)];
257 memset(dst, 0, sizeof(dst)); 257 memset(dst, 0, sizeof(dst));
258 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 258 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
259 EXPECT(is_valid); 259 EXPECT(is_valid);
260 EXPECT(!memcmp(expected, dst, sizeof(expected))); 260 EXPECT(!memcmp(expected, dst, sizeof(expected)));
261 } 261 }
262 262
263 // 2.3.3 - U-0000FFFD = ef bf bd = "\xEF\xBF\xBD" 263 // 2.3.3 - U-0000FFFD = ef bf bd = "\xEF\xBF\xBD"
264 { 264 {
265 const char* src = "\xEF\xBF\xBD"; 265 const char* src = "\xEF\xBF\xBD";
266 uint32_t expected[] = { 0xFFFD }; 266 uint32_t expected[] = { 0xFFFD };
267 uint32_t dst[ARRAY_SIZE(expected)]; 267 uint32_t dst[ARRAY_SIZE(expected)];
268 memset(dst, 0, sizeof(dst)); 268 memset(dst, 0, sizeof(dst));
269 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 269 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
270 EXPECT(is_valid); 270 EXPECT(is_valid);
271 EXPECT(!memcmp(expected, dst, sizeof(expected))); 271 EXPECT(!memcmp(expected, dst, sizeof(expected)));
272 } 272 }
273 273
274 // 2.3.4 - U-0010FFFF = f4 8f bf bf = "\xF4\x8F\xBF\xBF" 274 // 2.3.4 - U-0010FFFF = f4 8f bf bf = "\xF4\x8F\xBF\xBF"
275 { 275 {
276 const char* src = "\xF4\x8F\xBF\xBF"; 276 const char* src = "\xF4\x8F\xBF\xBF";
277 uint32_t expected[] = { 0x10FFFF }; 277 uint32_t expected[] = { 0x10FFFF };
278 uint32_t dst[ARRAY_SIZE(expected)]; 278 uint32_t dst[ARRAY_SIZE(expected)];
279 memset(dst, 0, sizeof(dst)); 279 memset(dst, 0, sizeof(dst));
280 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 280 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
281 EXPECT(is_valid); 281 EXPECT(is_valid);
282 EXPECT(!memcmp(expected, dst, sizeof(expected))); 282 EXPECT(!memcmp(expected, dst, sizeof(expected)));
283 } 283 }
284 284
285 // 2.3.5 - U-00110000 = f4 90 80 80 = "\xF4\x90\x80\x80" 285 // 2.3.5 - U-00110000 = f4 90 80 80 = "\xF4\x90\x80\x80"
286 { 286 {
287 const char* src = "\xF4\x90\x80\x80"; 287 const char* src = "\xF4\x90\x80\x80";
288 uint32_t expected[] = { 0x110000 }; 288 uint32_t expected[] = { 0x110000 };
289 uint32_t dst[ARRAY_SIZE(expected)]; 289 uint32_t dst[ARRAY_SIZE(expected)];
290 memset(dst, 0, sizeof(dst)); 290 memset(dst, 0, sizeof(dst));
291 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 291 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
292 EXPECT(!is_valid); 292 EXPECT(!is_valid);
293 EXPECT(memcmp(expected, dst, sizeof(expected))); 293 EXPECT(memcmp(expected, dst, sizeof(expected)));
294 } 294 }
295 295
296 // 3 - Malformed sequences 296 // 3 - Malformed sequences
297 297
298 // 3.1 - Unexpected continuation bytes 298 // 3.1 - Unexpected continuation bytes
299 299
300 // 3.1.1 - First continuation byte 0x80: "\x80" 300 // 3.1.1 - First continuation byte 0x80: "\x80"
301 { 301 {
302 const char* src = "\x80"; 302 const char* src = "\x80";
303 uint32_t expected[] = { 0x80 }; 303 uint32_t expected[] = { 0x80 };
304 uint32_t dst[ARRAY_SIZE(expected)]; 304 uint32_t dst[ARRAY_SIZE(expected)];
305 memset(dst, 0, sizeof(dst)); 305 memset(dst, 0, sizeof(dst));
306 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 306 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
307 EXPECT(!is_valid); 307 EXPECT(!is_valid);
308 EXPECT(memcmp(expected, dst, sizeof(expected))); 308 EXPECT(memcmp(expected, dst, sizeof(expected)));
309 } 309 }
310 310
311 // 3.1.2 - Last continuation byte 0xbf: "\xBF" 311 // 3.1.2 - Last continuation byte 0xbf: "\xBF"
312 { 312 {
313 const char* src = "\xBF"; 313 const char* src = "\xBF";
314 uint32_t expected[] = { 0xBF }; 314 uint32_t expected[] = { 0xBF };
315 uint32_t dst[ARRAY_SIZE(expected)]; 315 uint32_t dst[ARRAY_SIZE(expected)];
316 memset(dst, 0, sizeof(dst)); 316 memset(dst, 0, sizeof(dst));
317 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 317 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
318 EXPECT(!is_valid); 318 EXPECT(!is_valid);
319 EXPECT(memcmp(expected, dst, sizeof(expected))); 319 EXPECT(memcmp(expected, dst, sizeof(expected)));
320 } 320 }
321 321
322 // 3.1.3 - 2 continuation bytes: "\x80\xBF" 322 // 3.1.3 - 2 continuation bytes: "\x80\xBF"
323 { 323 {
324 const char* src = "\x80\xBF"; 324 const char* src = "\x80\xBF";
325 uint32_t expected[] = { 0x80, 0xBF }; 325 uint32_t expected[] = { 0x80, 0xBF };
326 uint32_t dst[ARRAY_SIZE(expected)]; 326 uint32_t dst[ARRAY_SIZE(expected)];
327 memset(dst, 0, sizeof(dst)); 327 memset(dst, 0, sizeof(dst));
328 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 328 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
329 EXPECT(!is_valid); 329 EXPECT(!is_valid);
330 EXPECT(memcmp(expected, dst, sizeof(expected))); 330 EXPECT(memcmp(expected, dst, sizeof(expected)));
331 } 331 }
332 332
333 // 3.1.4 - 3 continuation bytes: "\x80\xBF\x80" 333 // 3.1.4 - 3 continuation bytes: "\x80\xBF\x80"
334 { 334 {
335 const char* src = "\x80\xBF\x80"; 335 const char* src = "\x80\xBF\x80";
336 uint32_t expected[] = { 0x80, 0xBF, 0x80 }; 336 uint32_t expected[] = { 0x80, 0xBF, 0x80 };
337 uint32_t dst[ARRAY_SIZE(expected)]; 337 uint32_t dst[ARRAY_SIZE(expected)];
338 memset(dst, 0, sizeof(dst)); 338 memset(dst, 0, sizeof(dst));
339 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 339 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
340 EXPECT(!is_valid); 340 EXPECT(!is_valid);
341 EXPECT(memcmp(expected, dst, sizeof(expected))); 341 EXPECT(memcmp(expected, dst, sizeof(expected)));
342 } 342 }
343 343
344 // 3.1.5 - 4 continuation bytes: "\x80\xBF\x80\xBF" 344 // 3.1.5 - 4 continuation bytes: "\x80\xBF\x80\xBF"
345 { 345 {
346 const char* src = "\x80\xBF\x80\xBF"; 346 const char* src = "\x80\xBF\x80\xBF";
347 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF }; 347 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF };
348 uint32_t dst[ARRAY_SIZE(expected)]; 348 uint32_t dst[ARRAY_SIZE(expected)];
349 memset(dst, 0, sizeof(dst)); 349 memset(dst, 0, sizeof(dst));
350 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 350 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
351 EXPECT(!is_valid); 351 EXPECT(!is_valid);
352 EXPECT(memcmp(expected, dst, sizeof(expected))); 352 EXPECT(memcmp(expected, dst, sizeof(expected)));
353 } 353 }
354 354
355 // 3.1.6 - 5 continuation bytes: "\x80\xBF\x80\xBF\x80" 355 // 3.1.6 - 5 continuation bytes: "\x80\xBF\x80\xBF\x80"
356 { 356 {
357 const char* src = "\x80\xBF\x80\xBF\x80"; 357 const char* src = "\x80\xBF\x80\xBF\x80";
358 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80 }; 358 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80 };
359 uint32_t dst[ARRAY_SIZE(expected)]; 359 uint32_t dst[ARRAY_SIZE(expected)];
360 memset(dst, 0, sizeof(dst)); 360 memset(dst, 0, sizeof(dst));
361 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 361 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
362 EXPECT(!is_valid); 362 EXPECT(!is_valid);
363 EXPECT(memcmp(expected, dst, sizeof(expected))); 363 EXPECT(memcmp(expected, dst, sizeof(expected)));
364 } 364 }
365 365
366 // 3.1.7 - 6 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF" 366 // 3.1.7 - 6 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF"
367 { 367 {
368 const char* src = "\x80\xBF\x80\xBF\x80\xBF"; 368 const char* src = "\x80\xBF\x80\xBF\x80\xBF";
369 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF }; 369 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF };
370 uint32_t dst[ARRAY_SIZE(expected)]; 370 uint32_t dst[ARRAY_SIZE(expected)];
371 memset(dst, 0, sizeof(dst)); 371 memset(dst, 0, sizeof(dst));
372 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 372 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
373 EXPECT(!is_valid); 373 EXPECT(!is_valid);
374 EXPECT(memcmp(expected, dst, sizeof(expected))); 374 EXPECT(memcmp(expected, dst, sizeof(expected)));
375 } 375 }
376 376
377 // 3.1.8 - 7 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF\x80" 377 // 3.1.8 - 7 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF\x80"
378 { 378 {
379 const char* src = "\x80\xBF\x80\xBF\x80\xBF\x80"; 379 const char* src = "\x80\xBF\x80\xBF\x80\xBF\x80";
380 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 }; 380 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 };
381 uint32_t dst[ARRAY_SIZE(expected)]; 381 uint32_t dst[ARRAY_SIZE(expected)];
382 memset(dst, 0, sizeof(dst)); 382 memset(dst, 0, sizeof(dst));
383 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 383 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
384 EXPECT(!is_valid); 384 EXPECT(!is_valid);
385 EXPECT(memcmp(expected, dst, sizeof(expected))); 385 EXPECT(memcmp(expected, dst, sizeof(expected)));
386 } 386 }
387 387
388 // 3.1.9 - Sequence of all 64 possible continuation bytes (0x80-0xbf): 388 // 3.1.9 - Sequence of all 64 possible continuation bytes (0x80-0xbf):
389 { 389 {
390 const char* src = "\x80\x81\x82\x83\x84\x85\x86\x87" 390 const char* src = "\x80\x81\x82\x83\x84\x85\x86\x87"
391 "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" 391 "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
392 "\x90\x91\x92\x93\x94\x95\x96\x97" 392 "\x90\x91\x92\x93\x94\x95\x96\x97"
393 "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F" 393 "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F"
394 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7" 394 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7"
395 "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" 395 "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
396 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7" 396 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7"
397 "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"; 397 "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF";
398 uint32_t expected[] = { 0x0 }; 398 uint32_t expected[] = { 0x0 };
399 uint32_t dst[ARRAY_SIZE(expected)]; 399 uint32_t dst[ARRAY_SIZE(expected)];
400 for (size_t i = 0; i < strlen(src); ++i) { 400 for (size_t i = 0; i < strlen(src); ++i) {
401 memset(dst, 0xFF, sizeof(dst)); 401 memset(dst, 0xFF, sizeof(dst));
402 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); 402 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
403 EXPECT(!is_valid); 403 EXPECT(!is_valid);
404 EXPECT(memcmp(expected, dst, sizeof(expected))); 404 EXPECT(memcmp(expected, dst, sizeof(expected)));
405 } 405 }
406 } 406 }
407 407
408 // 3.2 - Lonely start character 408 // 3.2 - Lonely start character
409 409
410 // 3.2.1 - All 32 first bytes of 2-byte sequences (0xc0-0xdf), each 410 // 3.2.1 - All 32 first bytes of 2-byte sequences (0xc0-0xdf), each
411 // followed by a space character: 411 // followed by a space character:
412 { 412 {
413 const char* src = "\xC0\x20\xC1\x20\xC2\x20\xC3\x20" 413 const char* src = "\xC0\x20\xC1\x20\xC2\x20\xC3\x20"
414 "\xC4\x20\xC5\x20\xC6\x20\xC7\x20" 414 "\xC4\x20\xC5\x20\xC6\x20\xC7\x20"
415 "\xC8\x20\xC9\x20\xCA\x20\xCB\x20" 415 "\xC8\x20\xC9\x20\xCA\x20\xCB\x20"
416 "\xCC\x20\xCD\x20\xCE\x20\xCF\x20" 416 "\xCC\x20\xCD\x20\xCE\x20\xCF\x20"
417 "\xD0\x20\xD1\x20\xD2\x20\xD3\x20" 417 "\xD0\x20\xD1\x20\xD2\x20\xD3\x20"
418 "\xD4\x20\xD5\x20\xD6\x20\xD7\x20" 418 "\xD4\x20\xD5\x20\xD6\x20\xD7\x20"
419 "\xD8\x20\xD9\x20\xDA\x20\xDB\x20" 419 "\xD8\x20\xD9\x20\xDA\x20\xDB\x20"
420 "\xDC\x20\xDD\x20\xDE\x20\xDF\x20"; 420 "\xDC\x20\xDD\x20\xDE\x20\xDF\x20";
421 uint32_t expected[] = { 0x0 }; 421 uint32_t expected[] = { 0x0 };
422 uint32_t dst[ARRAY_SIZE(expected)]; 422 uint32_t dst[ARRAY_SIZE(expected)];
423 for (size_t i = 0; i < strlen(src); i += 2) { 423 for (size_t i = 0; i < strlen(src); i += 2) {
424 memset(dst, 0xFF, sizeof(dst)); 424 memset(dst, 0xFF, sizeof(dst));
425 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); 425 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
426 EXPECT(!is_valid); 426 EXPECT(!is_valid);
427 EXPECT(memcmp(expected, dst, sizeof(expected))); 427 EXPECT(memcmp(expected, dst, sizeof(expected)));
428 } 428 }
429 } 429 }
430 430
431 // 3.2.2 - All 16 first bytes of 3-byte sequences (0xe0-0xef), each 431 // 3.2.2 - All 16 first bytes of 3-byte sequences (0xe0-0xef), each
432 // followed by a space character: 432 // followed by a space character:
433 { 433 {
434 const char* src = "\xE0\x20\xE1\x20\xE2\x20\xE3\x20" 434 const char* src = "\xE0\x20\xE1\x20\xE2\x20\xE3\x20"
435 "\xE4\x20\xE5\x20\xE6\x20\xE7\x20" 435 "\xE4\x20\xE5\x20\xE6\x20\xE7\x20"
436 "\xE8\x20\xE9\x20\xEA\x20\xEB\x20" 436 "\xE8\x20\xE9\x20\xEA\x20\xEB\x20"
437 "\xEC\x20\xED\x20\xEE\x20\xEF\x20"; 437 "\xEC\x20\xED\x20\xEE\x20\xEF\x20";
438 uint32_t expected[] = { 0x0 }; 438 uint32_t expected[] = { 0x0 };
439 uint32_t dst[ARRAY_SIZE(expected)]; 439 uint32_t dst[ARRAY_SIZE(expected)];
440 for (size_t i = 0; i < strlen(src); i += 2) { 440 for (size_t i = 0; i < strlen(src); i += 2) {
441 memset(dst, 0xFF, sizeof(dst)); 441 memset(dst, 0xFF, sizeof(dst));
442 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); 442 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
443 EXPECT(!is_valid); 443 EXPECT(!is_valid);
444 EXPECT(memcmp(expected, dst, sizeof(expected))); 444 EXPECT(memcmp(expected, dst, sizeof(expected)));
445 } 445 }
446 } 446 }
447 447
448 // 3.2.3 - All 8 first bytes of 4-byte sequences (0xf0-0xf7), each 448 // 3.2.3 - All 8 first bytes of 4-byte sequences (0xf0-0xf7), each
449 // followed by a space character: 449 // followed by a space character:
450 { 450 {
451 const char* src = "\xF0\x20\xF1\x20\xF2\x20\xF3\x20" 451 const char* src = "\xF0\x20\xF1\x20\xF2\x20\xF3\x20"
452 "\xF4\x20\xF5\x20\xF6\x20\xF7\x20"; 452 "\xF4\x20\xF5\x20\xF6\x20\xF7\x20";
453 uint32_t expected[] = { 0x0 }; 453 uint32_t expected[] = { 0x0 };
454 uint32_t dst[ARRAY_SIZE(expected)]; 454 uint32_t dst[ARRAY_SIZE(expected)];
455 for (size_t i = 0; i < strlen(src); i += 2) { 455 for (size_t i = 0; i < strlen(src); i += 2) {
456 memset(dst, 0xFF, sizeof(dst)); 456 memset(dst, 0xFF, sizeof(dst));
457 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); 457 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
458 EXPECT(!is_valid); 458 EXPECT(!is_valid);
459 EXPECT(memcmp(expected, dst, sizeof(expected))); 459 EXPECT(memcmp(expected, dst, sizeof(expected)));
460 } 460 }
461 } 461 }
462 462
463 // 3.2.4 - All 4 first bytes of 5-byte sequences (0xf8-0xfb), each 463 // 3.2.4 - All 4 first bytes of 5-byte sequences (0xf8-0xfb), each
464 // followed by a space character: 464 // followed by a space character:
465 { 465 {
466 const char* src = "\xF8\x20\xF9\x20\xFA\x20\xFB\x20"; 466 const char* src = "\xF8\x20\xF9\x20\xFA\x20\xFB\x20";
467 uint32_t expected[] = { 0x0 }; 467 uint32_t expected[] = { 0x0 };
468 uint32_t dst[ARRAY_SIZE(expected)]; 468 uint32_t dst[ARRAY_SIZE(expected)];
469 for (size_t i = 0; i < strlen(src); i += 2) { 469 for (size_t i = 0; i < strlen(src); i += 2) {
470 memset(dst, 0xFF, sizeof(dst)); 470 memset(dst, 0xFF, sizeof(dst));
471 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); 471 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
472 EXPECT(!is_valid); 472 EXPECT(!is_valid);
473 EXPECT(memcmp(expected, dst, sizeof(expected))); 473 EXPECT(memcmp(expected, dst, sizeof(expected)));
474 } 474 }
475 } 475 }
476 476
477 // 3.2.5 - All 2 first bytes of 6-byte sequences (0xfc-0xfd), each 477 // 3.2.5 - All 2 first bytes of 6-byte sequences (0xfc-0xfd), each
478 // followed by a space character: 478 // followed by a space character:
479 { 479 {
480 const char* src = "\xFC\x20\xFD\x20"; 480 const char* src = "\xFC\x20\xFD\x20";
481 uint32_t expected[] = { 0x0 }; 481 uint32_t expected[] = { 0x0 };
482 uint32_t dst[ARRAY_SIZE(expected)]; 482 uint32_t dst[ARRAY_SIZE(expected)];
483 for (size_t i = 0; i < strlen(src); i += 2) { 483 for (size_t i = 0; i < strlen(src); i += 2) {
484 memset(dst, 0xFF, sizeof(dst)); 484 memset(dst, 0xFF, sizeof(dst));
485 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); 485 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
486 EXPECT(!is_valid); 486 EXPECT(!is_valid);
487 EXPECT(memcmp(expected, dst, sizeof(expected))); 487 EXPECT(memcmp(expected, dst, sizeof(expected)));
488 } 488 }
489 } 489 }
490 490
491 // 3.3 - Sequences with last continuation byte missing 491 // 3.3 - Sequences with last continuation byte missing
492 492
493 // 3.3.1 - 2-byte sequence with last byte missing (U+0000): "\xC0" 493 // 3.3.1 - 2-byte sequence with last byte missing (U+0000): "\xC0"
494 { 494 {
495 const char* src = "\xC0"; 495 const char* src = "\xC0";
496 uint32_t expected[] = { 0x0 }; 496 uint32_t expected[] = { 0x0 };
497 uint32_t dst[ARRAY_SIZE(expected)]; 497 uint32_t dst[ARRAY_SIZE(expected)];
498 memset(dst, 0xFF, sizeof(dst)); 498 memset(dst, 0xFF, sizeof(dst));
499 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 499 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
500 EXPECT(!is_valid); 500 EXPECT(!is_valid);
501 EXPECT(memcmp(expected, dst, sizeof(expected))); 501 EXPECT(memcmp(expected, dst, sizeof(expected)));
502 } 502 }
503 503
504 // 3.3.2 - 3-byte sequence with last byte missing (U+0000): "\xE0\x80" 504 // 3.3.2 - 3-byte sequence with last byte missing (U+0000): "\xE0\x80"
505 { 505 {
506 const char* src = "\xE0\x80"; 506 const char* src = "\xE0\x80";
507 uint32_t expected[] = { 0x0 }; 507 uint32_t expected[] = { 0x0 };
508 uint32_t dst[ARRAY_SIZE(expected)]; 508 uint32_t dst[ARRAY_SIZE(expected)];
509 memset(dst, 0xFF, sizeof(dst)); 509 memset(dst, 0xFF, sizeof(dst));
510 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 510 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
511 EXPECT(!is_valid); 511 EXPECT(!is_valid);
512 EXPECT(memcmp(expected, dst, sizeof(expected))); 512 EXPECT(memcmp(expected, dst, sizeof(expected)));
513 } 513 }
514 514
515 // 3.3.3 - 4-byte sequence with last byte missing (U+0000): "\xF0\x80\x80" 515 // 3.3.3 - 4-byte sequence with last byte missing (U+0000): "\xF0\x80\x80"
516 { 516 {
517 const char* src = "\xF0\x80\x80"; 517 const char* src = "\xF0\x80\x80";
518 uint32_t expected[] = { 0x0 }; 518 uint32_t expected[] = { 0x0 };
519 uint32_t dst[ARRAY_SIZE(expected)]; 519 uint32_t dst[ARRAY_SIZE(expected)];
520 memset(dst, 0xFF, sizeof(dst)); 520 memset(dst, 0xFF, sizeof(dst));
521 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 521 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
522 EXPECT(!is_valid); 522 EXPECT(!is_valid);
523 EXPECT(memcmp(expected, dst, sizeof(expected))); 523 EXPECT(memcmp(expected, dst, sizeof(expected)));
524 } 524 }
525 525
526 // 3.3.4 - 5-byte sequence with last byte missing (U+0000): "\xF8\x80\x80\x80" 526 // 3.3.4 - 5-byte sequence with last byte missing (U+0000): "\xF8\x80\x80\x80"
527 { 527 {
528 const char* src = "\xF8\x80\x80\x80"; 528 const char* src = "\xF8\x80\x80\x80";
529 uint32_t expected[] = { 0x0 }; 529 uint32_t expected[] = { 0x0 };
530 uint32_t dst[ARRAY_SIZE(expected)]; 530 uint32_t dst[ARRAY_SIZE(expected)];
531 memset(dst, 0xFF, sizeof(dst)); 531 memset(dst, 0xFF, sizeof(dst));
532 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 532 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
533 EXPECT(!is_valid); 533 EXPECT(!is_valid);
534 EXPECT(memcmp(expected, dst, sizeof(expected))); 534 EXPECT(memcmp(expected, dst, sizeof(expected)));
535 } 535 }
536 536
537 // 3.3.5 - 6-byte sequence with last byte missing (U+0000): 537 // 3.3.5 - 6-byte sequence with last byte missing (U+0000):
538 // "\xFC\x80\x80\x80\x80" 538 // "\xFC\x80\x80\x80\x80"
539 { 539 {
540 const char* src = "\xFC\x80\x80\x80\x80"; 540 const char* src = "\xFC\x80\x80\x80\x80";
541 uint32_t expected[] = { 0x0 }; 541 uint32_t expected[] = { 0x0 };
542 uint32_t dst[ARRAY_SIZE(expected)]; 542 uint32_t dst[ARRAY_SIZE(expected)];
543 memset(dst, 0xFF, sizeof(dst)); 543 memset(dst, 0xFF, sizeof(dst));
544 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 544 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
545 EXPECT(!is_valid); 545 EXPECT(!is_valid);
546 EXPECT(memcmp(expected, dst, sizeof(expected))); 546 EXPECT(memcmp(expected, dst, sizeof(expected)));
547 } 547 }
548 548
549 // 3.3.6 - 2-byte sequence with last byte missing (U-000007FF): "\xDF" 549 // 3.3.6 - 2-byte sequence with last byte missing (U-000007FF): "\xDF"
550 { 550 {
551 const char* src = "\xDF"; 551 const char* src = "\xDF";
552 uint32_t expected[] = { 0x0 }; 552 uint32_t expected[] = { 0x0 };
553 uint32_t dst[ARRAY_SIZE(expected)]; 553 uint32_t dst[ARRAY_SIZE(expected)];
554 memset(dst, 0xFF, sizeof(dst)); 554 memset(dst, 0xFF, sizeof(dst));
555 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 555 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
556 EXPECT(!is_valid); 556 EXPECT(!is_valid);
557 EXPECT(memcmp(expected, dst, sizeof(expected))); 557 EXPECT(memcmp(expected, dst, sizeof(expected)));
558 } 558 }
559 559
560 // 3.3.7 - 3-byte sequence with last byte missing (U-0000FFFF): "\xEF\xBF" 560 // 3.3.7 - 3-byte sequence with last byte missing (U-0000FFFF): "\xEF\xBF"
561 { 561 {
562 const char* src = "\xEF\xBF"; 562 const char* src = "\xEF\xBF";
563 uint32_t expected[] = { 0x0 }; 563 uint32_t expected[] = { 0x0 };
564 uint32_t dst[ARRAY_SIZE(expected)]; 564 uint32_t dst[ARRAY_SIZE(expected)];
565 memset(dst, 0xFF, sizeof(dst)); 565 memset(dst, 0xFF, sizeof(dst));
566 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 566 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
567 EXPECT(!is_valid); 567 EXPECT(!is_valid);
568 EXPECT(memcmp(expected, dst, sizeof(expected))); 568 EXPECT(memcmp(expected, dst, sizeof(expected)));
569 } 569 }
570 570
571 // 3.3.8 - 4-byte sequence with last byte missing (U-001FFFFF): "\xF7\xBF\xBF" 571 // 3.3.8 - 4-byte sequence with last byte missing (U-001FFFFF): "\xF7\xBF\xBF"
572 { 572 {
573 const char* src = "\xF7\xBF\xBF"; 573 const char* src = "\xF7\xBF\xBF";
574 uint32_t expected[] = { 0x0 }; 574 uint32_t expected[] = { 0x0 };
575 uint32_t dst[ARRAY_SIZE(expected)]; 575 uint32_t dst[ARRAY_SIZE(expected)];
576 memset(dst, 0xFF, sizeof(dst)); 576 memset(dst, 0xFF, sizeof(dst));
577 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 577 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
578 EXPECT(!is_valid); 578 EXPECT(!is_valid);
579 EXPECT(memcmp(expected, dst, sizeof(expected))); 579 EXPECT(memcmp(expected, dst, sizeof(expected)));
580 } 580 }
581 581
582 // 3.3.9 - 5-byte sequence with last byte missing (U-03FFFFFF): 582 // 3.3.9 - 5-byte sequence with last byte missing (U-03FFFFFF):
583 // "\xFB\xBF\xBF\xBF" 583 // "\xFB\xBF\xBF\xBF"
584 { 584 {
585 const char* src = "\xFB\xBF\xBF\xBF"; 585 const char* src = "\xFB\xBF\xBF\xBF";
586 uint32_t expected[] = { 0x0 }; 586 uint32_t expected[] = { 0x0 };
587 uint32_t dst[ARRAY_SIZE(expected)]; 587 uint32_t dst[ARRAY_SIZE(expected)];
588 memset(dst, 0xFF, sizeof(dst)); 588 memset(dst, 0xFF, sizeof(dst));
589 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 589 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
590 EXPECT(!is_valid); 590 EXPECT(!is_valid);
591 EXPECT(memcmp(expected, dst, sizeof(expected))); 591 EXPECT(memcmp(expected, dst, sizeof(expected)));
592 } 592 }
593 593
594 // 3.3.10 - 6-byte sequence with last byte missing (U-7FFFFFFF): 594 // 3.3.10 - 6-byte sequence with last byte missing (U-7FFFFFFF):
595 // "\xFD\xBF\xBF\xBF\xBF" 595 // "\xFD\xBF\xBF\xBF\xBF"
596 { 596 {
597 const char* src = "\xFD\xBF\xBF\xBF\xBF"; 597 const char* src = "\xFD\xBF\xBF\xBF\xBF";
598 uint32_t expected[] = { 0x0 }; 598 uint32_t expected[] = { 0x0 };
599 uint32_t dst[ARRAY_SIZE(expected)]; 599 uint32_t dst[ARRAY_SIZE(expected)];
600 memset(dst, 0xFF, sizeof(dst)); 600 memset(dst, 0xFF, sizeof(dst));
601 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 601 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
602 EXPECT(!is_valid); 602 EXPECT(!is_valid);
603 EXPECT(memcmp(expected, dst, sizeof(expected))); 603 EXPECT(memcmp(expected, dst, sizeof(expected)));
604 } 604 }
605 605
606 // 3.4 - Concatenation of incomplete sequences 606 // 3.4 - Concatenation of incomplete sequences
607 { 607 {
608 const char* src = "\xC0\xE0\x80\xF0\x80\x80" 608 const char* src = "\xC0\xE0\x80\xF0\x80\x80"
609 "\xF8\x80\x80\x80\xFC\x80" 609 "\xF8\x80\x80\x80\xFC\x80"
610 "\x80\x80\x80\xDF\xEF\xBF" 610 "\x80\x80\x80\xDF\xEF\xBF"
611 "\xF7\xBF\xBF\xFB\xBF\xBF" 611 "\xF7\xBF\xBF\xFB\xBF\xBF"
612 "\xBF\xFD\xBF\xBF\xBF\xBF"; 612 "\xBF\xFD\xBF\xBF\xBF\xBF";
613 uint32_t expected[] = { 0x0 }; 613 uint32_t expected[] = { 0x0 };
614 uint32_t dst[ARRAY_SIZE(expected)]; 614 uint32_t dst[ARRAY_SIZE(expected)];
615 for (size_t i = 0; i < strlen(src); ++i) { 615 for (size_t i = 0; i < strlen(src); ++i) {
616 for (size_t j = 1; j < (strlen(src) - i); ++j) { 616 for (size_t j = 1; j < (strlen(src) - i); ++j) {
617 memset(dst, 0xFF, sizeof(dst)); 617 memset(dst, 0xFF, sizeof(dst));
618 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); 618 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i],
619 dst, ARRAY_SIZE(dst));
619 EXPECT(!is_valid); 620 EXPECT(!is_valid);
620 EXPECT(memcmp(expected, dst, sizeof(expected))); 621 EXPECT(memcmp(expected, dst, sizeof(expected)));
621 } 622 }
622 } 623 }
623 } 624 }
624 625
625 // 3.5 - Impossible bytes 626 // 3.5 - Impossible bytes
626 627
627 // 3.5.1 - fe = "\xFE" 628 // 3.5.1 - fe = "\xFE"
628 { 629 {
629 const char* src = "\xFE"; 630 const char* src = "\xFE";
630 uint32_t expected[] = { 0xFE }; 631 uint32_t expected[] = { 0xFE };
631 uint32_t dst[ARRAY_SIZE(expected)]; 632 uint32_t dst[ARRAY_SIZE(expected)];
632 memset(dst, 0, sizeof(dst)); 633 memset(dst, 0, sizeof(dst));
633 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 634 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
634 EXPECT(!is_valid); 635 EXPECT(!is_valid);
635 EXPECT(memcmp(expected, dst, sizeof(expected))); 636 EXPECT(memcmp(expected, dst, sizeof(expected)));
636 } 637 }
637 638
638 // 3.5.2 - ff = "\xFF" 639 // 3.5.2 - ff = "\xFF"
639 { 640 {
640 const char* src = "\xFF"; 641 const char* src = "\xFF";
641 uint32_t expected[] = { 0xFF }; 642 uint32_t expected[] = { 0xFF };
642 uint32_t dst[ARRAY_SIZE(expected)]; 643 uint32_t dst[ARRAY_SIZE(expected)];
643 memset(dst, 0, sizeof(dst)); 644 memset(dst, 0, sizeof(dst));
644 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 645 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
645 EXPECT(!is_valid); 646 EXPECT(!is_valid);
646 EXPECT(memcmp(expected, dst, sizeof(expected))); 647 EXPECT(memcmp(expected, dst, sizeof(expected)));
647 } 648 }
648 649
649 // 3.5.3 - fe fe ff ff = "\xFE\xFE\xFF\xFF" 650 // 3.5.3 - fe fe ff ff = "\xFE\xFE\xFF\xFF"
650 { 651 {
651 const char* src = "\xFE\xFE\xFF\xFF"; 652 const char* src = "\xFE\xFE\xFF\xFF";
652 uint32_t expected[] = { 0xFF }; 653 uint32_t expected[] = { 0xFF };
653 uint32_t dst[ARRAY_SIZE(expected)]; 654 uint32_t dst[ARRAY_SIZE(expected)];
654 memset(dst, 0, sizeof(dst)); 655 memset(dst, 0, sizeof(dst));
655 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 656 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
656 EXPECT(!is_valid); 657 EXPECT(!is_valid);
657 EXPECT(memcmp(expected, dst, sizeof(expected))); 658 EXPECT(memcmp(expected, dst, sizeof(expected)));
658 } 659 }
659 660
660 // 4 - Overlong sequences 661 // 4 - Overlong sequences
661 662
662 // 4.1 - Examples of an overlong ASCII character 663 // 4.1 - Examples of an overlong ASCII character
663 664
664 // 4.1.1 - U+002F = c0 af = "\xC0\xAF" 665 // 4.1.1 - U+002F = c0 af = "\xC0\xAF"
665 { 666 {
666 const char* src = "\xC0\xAF"; 667 const char* src = "\xC0\xAF";
667 uint32_t expected[] = { 0x2F }; 668 uint32_t expected[] = { 0x2F };
668 uint32_t dst[ARRAY_SIZE(expected)]; 669 uint32_t dst[ARRAY_SIZE(expected)];
669 memset(dst, 0, sizeof(dst)); 670 memset(dst, 0, sizeof(dst));
670 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 671 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
671 EXPECT(!is_valid); 672 EXPECT(!is_valid);
672 EXPECT(memcmp(expected, dst, sizeof(expected))); 673 EXPECT(memcmp(expected, dst, sizeof(expected)));
673 } 674 }
674 675
675 // 4.1.2 - U+002F = e0 80 af = "\xE0\x80\xAF" 676 // 4.1.2 - U+002F = e0 80 af = "\xE0\x80\xAF"
676 { 677 {
677 const char* src = "\xE0\x80\xAF"; 678 const char* src = "\xE0\x80\xAF";
678 uint32_t expected[] = { 0x2F }; 679 uint32_t expected[] = { 0x2F };
679 uint32_t dst[ARRAY_SIZE(expected)]; 680 uint32_t dst[ARRAY_SIZE(expected)];
680 memset(dst, 0, sizeof(dst)); 681 memset(dst, 0, sizeof(dst));
681 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 682 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
682 EXPECT(!is_valid); 683 EXPECT(!is_valid);
683 EXPECT(memcmp(expected, dst, sizeof(expected))); 684 EXPECT(memcmp(expected, dst, sizeof(expected)));
684 } 685 }
685 686
686 // 4.1.3 - U+002F = f0 80 80 af = "\xF0\x80\x80\xAF" 687 // 4.1.3 - U+002F = f0 80 80 af = "\xF0\x80\x80\xAF"
687 { 688 {
688 const char* src = "\xF0\x80\x80\xAF"; 689 const char* src = "\xF0\x80\x80\xAF";
689 uint32_t expected[] = { 0x2F }; 690 uint32_t expected[] = { 0x2F };
690 uint32_t dst[ARRAY_SIZE(expected)]; 691 uint32_t dst[ARRAY_SIZE(expected)];
691 memset(dst, 0, sizeof(dst)); 692 memset(dst, 0, sizeof(dst));
692 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 693 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
693 EXPECT(!is_valid); 694 EXPECT(!is_valid);
694 EXPECT(memcmp(expected, dst, sizeof(expected))); 695 EXPECT(memcmp(expected, dst, sizeof(expected)));
695 } 696 }
696 697
697 // 4.1.4 - U+002F = f8 80 80 80 af = "\xF8\x80\x80\x80\xAF" 698 // 4.1.4 - U+002F = f8 80 80 80 af = "\xF8\x80\x80\x80\xAF"
698 { 699 {
699 const char* src = "\xF8\x80\x80\x80\xAF"; 700 const char* src = "\xF8\x80\x80\x80\xAF";
700 uint32_t expected[] = { 0x2F }; 701 uint32_t expected[] = { 0x2F };
701 uint32_t dst[ARRAY_SIZE(expected)]; 702 uint32_t dst[ARRAY_SIZE(expected)];
702 memset(dst, 0, sizeof(dst)); 703 memset(dst, 0, sizeof(dst));
703 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 704 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
704 EXPECT(!is_valid); 705 EXPECT(!is_valid);
705 EXPECT(memcmp(expected, dst, sizeof(expected))); 706 EXPECT(memcmp(expected, dst, sizeof(expected)));
706 } 707 }
707 708
708 // 4.1.5 - U+002F = fc 80 80 80 80 af = "\xFC\x80\x80\x80\x80\xAF" 709 // 4.1.5 - U+002F = fc 80 80 80 80 af = "\xFC\x80\x80\x80\x80\xAF"
709 { 710 {
710 const char* src = "\xFC\x80\x80\x80\x80\xAF"; 711 const char* src = "\xFC\x80\x80\x80\x80\xAF";
711 uint32_t expected[] = { 0x2F }; 712 uint32_t expected[] = { 0x2F };
712 uint32_t dst[ARRAY_SIZE(expected)]; 713 uint32_t dst[ARRAY_SIZE(expected)];
713 memset(dst, 0, sizeof(dst)); 714 memset(dst, 0, sizeof(dst));
714 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 715 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
715 EXPECT(!is_valid); 716 EXPECT(!is_valid);
716 EXPECT(memcmp(expected, dst, sizeof(expected))); 717 EXPECT(memcmp(expected, dst, sizeof(expected)));
717 } 718 }
718 719
719 // 4.2 Maximum overlong sequences 720 // 4.2 Maximum overlong sequences
720 721
721 // 4.2.1 - U-0000007F = c1 bf = "\xC1\xBF" 722 // 4.2.1 - U-0000007F = c1 bf = "\xC1\xBF"
722 { 723 {
723 const char* src = "\xC1\xBF"; 724 const char* src = "\xC1\xBF";
724 uint32_t expected[] = { 0x7F }; 725 uint32_t expected[] = { 0x7F };
725 uint32_t dst[ARRAY_SIZE(expected)]; 726 uint32_t dst[ARRAY_SIZE(expected)];
726 memset(dst, 0, sizeof(dst)); 727 memset(dst, 0, sizeof(dst));
727 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 728 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
728 EXPECT(!is_valid); 729 EXPECT(!is_valid);
729 EXPECT(memcmp(expected, dst, sizeof(expected))); 730 EXPECT(memcmp(expected, dst, sizeof(expected)));
730 } 731 }
731 732
732 // 4.2.2 U+000007FF = e0 9f bf = "\xE0\x9F\xBF" 733 // 4.2.2 U+000007FF = e0 9f bf = "\xE0\x9F\xBF"
733 { 734 {
734 const char* src = "\xE0\x9F\xBF"; 735 const char* src = "\xE0\x9F\xBF";
735 uint32_t expected[] = { 0x7FF }; 736 uint32_t expected[] = { 0x7FF };
736 uint32_t dst[ARRAY_SIZE(expected)]; 737 uint32_t dst[ARRAY_SIZE(expected)];
737 memset(dst, 0, sizeof(dst)); 738 memset(dst, 0, sizeof(dst));
738 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 739 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
739 EXPECT(!is_valid); 740 EXPECT(!is_valid);
740 EXPECT(memcmp(expected, dst, sizeof(expected))); 741 EXPECT(memcmp(expected, dst, sizeof(expected)));
741 } 742 }
742 743
743 // 4.2.3 - U+0000FFFF = f0 8f bf bf = "\xF0\x8F\xBF\xBF" 744 // 4.2.3 - U+0000FFFF = f0 8f bf bf = "\xF0\x8F\xBF\xBF"
744 { 745 {
745 const char* src = "\xF0\x8F\xBF\xBF"; 746 const char* src = "\xF0\x8F\xBF\xBF";
746 uint32_t expected[] = { 0xFFFF }; 747 uint32_t expected[] = { 0xFFFF };
747 uint32_t dst[ARRAY_SIZE(expected)]; 748 uint32_t dst[ARRAY_SIZE(expected)];
748 memset(dst, 0, sizeof(dst)); 749 memset(dst, 0, sizeof(dst));
749 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 750 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
750 EXPECT(!is_valid); 751 EXPECT(!is_valid);
751 EXPECT(memcmp(expected, dst, sizeof(expected))); 752 EXPECT(memcmp(expected, dst, sizeof(expected)));
752 } 753 }
753 754
754 // 4.2.4 U-001FFFFF = f8 87 bf bf bf = "\xF8\x87\xBF\xBF\xBF" 755 // 4.2.4 U-001FFFFF = f8 87 bf bf bf = "\xF8\x87\xBF\xBF\xBF"
755 { 756 {
756 const char* src = "\xF8\x87\xBF\xBF\xBF"; 757 const char* src = "\xF8\x87\xBF\xBF\xBF";
757 uint32_t expected[] = { 0x1FFFFF }; 758 uint32_t expected[] = { 0x1FFFFF };
758 uint32_t dst[ARRAY_SIZE(expected)]; 759 uint32_t dst[ARRAY_SIZE(expected)];
759 memset(dst, 0, sizeof(dst)); 760 memset(dst, 0, sizeof(dst));
760 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 761 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
761 EXPECT(!is_valid); 762 EXPECT(!is_valid);
762 EXPECT(memcmp(expected, dst, sizeof(expected))); 763 EXPECT(memcmp(expected, dst, sizeof(expected)));
763 } 764 }
764 765
765 // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "\xFC\x83\xBF\xBF\xBF\xBF" 766 // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "\xFC\x83\xBF\xBF\xBF\xBF"
766 { 767 {
767 const char* src = "\xFC\x83\xBF\xBF\xBF\xBF"; 768 const char* src = "\xFC\x83\xBF\xBF\xBF\xBF";
768 uint32_t expected[] = { 0x3FFFFFF }; 769 uint32_t expected[] = { 0x3FFFFFF };
769 uint32_t dst[ARRAY_SIZE(expected)]; 770 uint32_t dst[ARRAY_SIZE(expected)];
770 memset(dst, 0, sizeof(dst)); 771 memset(dst, 0, sizeof(dst));
771 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 772 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
772 EXPECT(!is_valid); 773 EXPECT(!is_valid);
773 EXPECT(memcmp(expected, dst, sizeof(expected))); 774 EXPECT(memcmp(expected, dst, sizeof(expected)));
774 } 775 }
775 776
776 // 4.3 - Overlong representation of the NUL character 777 // 4.3 - Overlong representation of the NUL character
777 778
778 // 4.3.1 - U+0000 = "\xC0\x80" 779 // 4.3.1 - U+0000 = "\xC0\x80"
779 { 780 {
780 const char* src = "\xC0\x80"; 781 const char* src = "\xC0\x80";
781 uint32_t expected[] = { 0x0 }; 782 uint32_t expected[] = { 0x0 };
782 uint32_t dst[ARRAY_SIZE(expected)]; 783 uint32_t dst[ARRAY_SIZE(expected)];
783 memset(dst, 0xFF, sizeof(dst)); 784 memset(dst, 0xFF, sizeof(dst));
784 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 785 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
785 EXPECT(!is_valid); 786 EXPECT(!is_valid);
786 EXPECT(memcmp(expected, dst, sizeof(expected))); 787 EXPECT(memcmp(expected, dst, sizeof(expected)));
787 } 788 }
788 789
789 // 4.3.2 U+0000 = e0 80 80 = "\xE0\x80\x80" 790 // 4.3.2 U+0000 = e0 80 80 = "\xE0\x80\x80"
790 { 791 {
791 const char* src = "\xE0\x80\x80"; 792 const char* src = "\xE0\x80\x80";
792 uint32_t expected[] = { 0x0 }; 793 uint32_t expected[] = { 0x0 };
793 uint32_t dst[ARRAY_SIZE(expected)]; 794 uint32_t dst[ARRAY_SIZE(expected)];
794 memset(dst, 0xFF, sizeof(dst)); 795 memset(dst, 0xFF, sizeof(dst));
795 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 796 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
796 EXPECT(!is_valid); 797 EXPECT(!is_valid);
797 EXPECT(memcmp(expected, dst, sizeof(expected))); 798 EXPECT(memcmp(expected, dst, sizeof(expected)));
798 } 799 }
799 800
800 // 4.3.3 U+0000 = f0 80 80 80 = "\xF0\x80\x80\x80" 801 // 4.3.3 U+0000 = f0 80 80 80 = "\xF0\x80\x80\x80"
801 { 802 {
802 const char* src = "\xF0\x80\x80\x80"; 803 const char* src = "\xF0\x80\x80\x80";
803 uint32_t expected[] = { 0x0 }; 804 uint32_t expected[] = { 0x0 };
804 uint32_t dst[ARRAY_SIZE(expected)]; 805 uint32_t dst[ARRAY_SIZE(expected)];
805 memset(dst, 0xFF, sizeof(dst)); 806 memset(dst, 0xFF, sizeof(dst));
806 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 807 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
807 EXPECT(!is_valid); 808 EXPECT(!is_valid);
808 EXPECT(memcmp(expected, dst, sizeof(expected))); 809 EXPECT(memcmp(expected, dst, sizeof(expected)));
809 } 810 }
810 811
811 // 4.3.4 U+0000 = f8 80 80 80 80 = "\xF8\x80\x80\x80\x80" 812 // 4.3.4 U+0000 = f8 80 80 80 80 = "\xF8\x80\x80\x80\x80"
812 { 813 {
813 const char* src = "\xF8\x80\x80\x80\x80"; 814 const char* src = "\xF8\x80\x80\x80\x80";
814 uint32_t expected[] = { 0x0 }; 815 uint32_t expected[] = { 0x0 };
815 uint32_t dst[ARRAY_SIZE(expected)]; 816 uint32_t dst[ARRAY_SIZE(expected)];
816 memset(dst, 0xFF, sizeof(dst)); 817 memset(dst, 0xFF, sizeof(dst));
817 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 818 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
818 EXPECT(!is_valid); 819 EXPECT(!is_valid);
819 EXPECT(memcmp(expected, dst, sizeof(expected))); 820 EXPECT(memcmp(expected, dst, sizeof(expected)));
820 } 821 }
821 822
822 // 4.3.5 U+0000 = fc 80 80 80 80 80 = "\xFC\x80\x80\x80\x80\x80" 823 // 4.3.5 U+0000 = fc 80 80 80 80 80 = "\xFC\x80\x80\x80\x80\x80"
823 { 824 {
824 const char* src = "\xFC\x80\x80\x80\x80\x80"; 825 const char* src = "\xFC\x80\x80\x80\x80\x80";
825 uint32_t expected[] = { 0x0 }; 826 uint32_t expected[] = { 0x0 };
826 uint32_t dst[ARRAY_SIZE(expected)]; 827 uint32_t dst[ARRAY_SIZE(expected)];
827 memset(dst, 0xFF, sizeof(dst)); 828 memset(dst, 0xFF, sizeof(dst));
828 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 829 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
829 EXPECT(!is_valid); 830 EXPECT(!is_valid);
830 EXPECT(memcmp(expected, dst, sizeof(expected))); 831 EXPECT(memcmp(expected, dst, sizeof(expected)));
831 } 832 }
832 833
833 // 5.1 - Single UTF-16 surrogates 834 // 5.1 - Single UTF-16 surrogates
834 835
835 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80" 836 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80"
836 { 837 {
837 const char* src = "\xED\xA0\x80"; 838 const char* src = "\xED\xA0\x80";
838 uint32_t expected[] = { 0xD800 }; 839 uint32_t expected[] = { 0xD800 };
839 uint32_t dst[ARRAY_SIZE(expected)]; 840 uint32_t dst[ARRAY_SIZE(expected)];
840 memset(dst, 0, sizeof(dst)); 841 memset(dst, 0, sizeof(dst));
841 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 842 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
842 EXPECT(!is_valid); 843 EXPECT(!is_valid);
843 EXPECT(memcmp(expected, dst, sizeof(expected))); 844 EXPECT(memcmp(expected, dst, sizeof(expected)));
844 } 845 }
845 846
846 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF" 847 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF"
847 { 848 {
848 const char* src = "\xED\xAD\xBF"; 849 const char* src = "\xED\xAD\xBF";
849 uint32_t expected[] = { 0xDB7F }; 850 uint32_t expected[] = { 0xDB7F };
850 uint32_t dst[ARRAY_SIZE(expected)]; 851 uint32_t dst[ARRAY_SIZE(expected)];
851 memset(dst, 0, sizeof(dst)); 852 memset(dst, 0, sizeof(dst));
852 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 853 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
853 EXPECT(!is_valid); 854 EXPECT(!is_valid);
854 EXPECT(memcmp(expected, dst, sizeof(expected))); 855 EXPECT(memcmp(expected, dst, sizeof(expected)));
855 } 856 }
856 857
857 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80" 858 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80"
858 { 859 {
859 const char* src = "\xED\xAE\x80"; 860 const char* src = "\xED\xAE\x80";
860 uint32_t expected[] = { 0xDB80 }; 861 uint32_t expected[] = { 0xDB80 };
861 uint32_t dst[ARRAY_SIZE(expected)]; 862 uint32_t dst[ARRAY_SIZE(expected)];
862 memset(dst, 0, sizeof(dst)); 863 memset(dst, 0, sizeof(dst));
863 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 864 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
864 EXPECT(!is_valid); 865 EXPECT(!is_valid);
865 EXPECT(memcmp(expected, dst, sizeof(expected))); 866 EXPECT(memcmp(expected, dst, sizeof(expected)));
866 } 867 }
867 868
868 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF" 869 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF"
869 { 870 {
870 const char* src = "\xED\xAF\xBF"; 871 const char* src = "\xED\xAF\xBF";
871 uint32_t expected[] = { 0xDBFF }; 872 uint32_t expected[] = { 0xDBFF };
872 uint32_t dst[ARRAY_SIZE(expected)]; 873 uint32_t dst[ARRAY_SIZE(expected)];
873 memset(dst, 0, sizeof(dst)); 874 memset(dst, 0, sizeof(dst));
874 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 875 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
875 EXPECT(!is_valid); 876 EXPECT(!is_valid);
876 EXPECT(memcmp(expected, dst, sizeof(expected))); 877 EXPECT(memcmp(expected, dst, sizeof(expected)));
877 } 878 }
878 879
879 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80" 880 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80"
880 { 881 {
881 const char* src = "\xED\xB0\x80"; 882 const char* src = "\xED\xB0\x80";
882 uint32_t expected[] = { 0xDC00 }; 883 uint32_t expected[] = { 0xDC00 };
883 uint32_t dst[ARRAY_SIZE(expected)]; 884 uint32_t dst[ARRAY_SIZE(expected)];
884 memset(dst, 0, sizeof(dst)); 885 memset(dst, 0, sizeof(dst));
885 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 886 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
886 EXPECT(!is_valid); 887 EXPECT(!is_valid);
887 EXPECT(memcmp(expected, dst, sizeof(expected))); 888 EXPECT(memcmp(expected, dst, sizeof(expected)));
888 } 889 }
889 890
890 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80" 891 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80"
891 { 892 {
892 const char* src = "\xED\xBE\x80"; 893 const char* src = "\xED\xBE\x80";
893 uint32_t expected[] = { 0xDF80 }; 894 uint32_t expected[] = { 0xDF80 };
894 uint32_t dst[ARRAY_SIZE(expected)]; 895 uint32_t dst[ARRAY_SIZE(expected)];
895 memset(dst, 0, sizeof(dst)); 896 memset(dst, 0, sizeof(dst));
896 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 897 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
897 EXPECT(!is_valid); 898 EXPECT(!is_valid);
898 EXPECT(memcmp(expected, dst, sizeof(expected))); 899 EXPECT(memcmp(expected, dst, sizeof(expected)));
899 } 900 }
900 901
901 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF" 902 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF"
902 { 903 {
903 const char* src = "\xED\xBF\xBF"; 904 const char* src = "\xED\xBF\xBF";
904 uint32_t expected[] = { 0xDFFF }; 905 uint32_t expected[] = { 0xDFFF };
905 uint32_t dst[ARRAY_SIZE(expected)]; 906 uint32_t dst[ARRAY_SIZE(expected)];
906 memset(dst, 0, sizeof(dst)); 907 memset(dst, 0, sizeof(dst));
907 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 908 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
908 EXPECT(!is_valid); 909 EXPECT(!is_valid);
909 EXPECT(memcmp(expected, dst, sizeof(expected))); 910 EXPECT(memcmp(expected, dst, sizeof(expected)));
910 } 911 }
911 912
912 // 5.2 Paired UTF-16 surrogates 913 // 5.2 Paired UTF-16 surrogates
913 914
914 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80" 915 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80"
915 { 916 {
916 const char* src = "\xED\xA0\x80\xED\xB0\x80"; 917 const char* src = "\xED\xA0\x80\xED\xB0\x80";
917 uint32_t expected[] = { 0xD800, 0xDC00 }; 918 uint32_t expected[] = { 0xD800, 0xDC00 };
918 uint32_t dst[ARRAY_SIZE(expected)]; 919 uint32_t dst[ARRAY_SIZE(expected)];
919 memset(dst, 0, sizeof(dst)); 920 memset(dst, 0, sizeof(dst));
920 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 921 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
921 EXPECT(!is_valid); 922 EXPECT(!is_valid);
922 EXPECT(memcmp(expected, dst, sizeof(expected))); 923 EXPECT(memcmp(expected, dst, sizeof(expected)));
923 } 924 }
924 925
925 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF" 926 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF"
926 { 927 {
927 const char* src = "\xED\xA0\x80\xED\xBF\xBF"; 928 const char* src = "\xED\xA0\x80\xED\xBF\xBF";
928 uint32_t expected[] = { 0xD800, 0xDFFF }; 929 uint32_t expected[] = { 0xD800, 0xDFFF };
929 uint32_t dst[ARRAY_SIZE(expected)]; 930 uint32_t dst[ARRAY_SIZE(expected)];
930 memset(dst, 0, sizeof(dst)); 931 memset(dst, 0, sizeof(dst));
931 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 932 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
932 EXPECT(!is_valid); 933 EXPECT(!is_valid);
933 EXPECT(memcmp(expected, dst, sizeof(expected))); 934 EXPECT(memcmp(expected, dst, sizeof(expected)));
934 } 935 }
935 936
936 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80" 937 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80"
937 { 938 {
938 const char* src = "\xED\xAD\xBF\xED\xB0\x80"; 939 const char* src = "\xED\xAD\xBF\xED\xB0\x80";
939 uint32_t expected[] = { 0xDB7F, 0xDC00 }; 940 uint32_t expected[] = { 0xDB7F, 0xDC00 };
940 uint32_t dst[ARRAY_SIZE(expected)]; 941 uint32_t dst[ARRAY_SIZE(expected)];
941 memset(dst, 0, sizeof(dst)); 942 memset(dst, 0, sizeof(dst));
942 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 943 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
943 EXPECT(!is_valid); 944 EXPECT(!is_valid);
944 EXPECT(memcmp(expected, dst, sizeof(expected))); 945 EXPECT(memcmp(expected, dst, sizeof(expected)));
945 } 946 }
946 947
947 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF" 948 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF"
948 { 949 {
949 const char* src = "\xED\xAD\xBF\xED\xBF\xBF"; 950 const char* src = "\xED\xAD\xBF\xED\xBF\xBF";
950 uint32_t expected[] = { 0xDB7F, 0xDFFF }; 951 uint32_t expected[] = { 0xDB7F, 0xDFFF };
951 uint32_t dst[ARRAY_SIZE(expected)]; 952 uint32_t dst[ARRAY_SIZE(expected)];
952 memset(dst, 0, sizeof(dst)); 953 memset(dst, 0, sizeof(dst));
953 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 954 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
954 EXPECT(!is_valid); 955 EXPECT(!is_valid);
955 EXPECT(memcmp(expected, dst, sizeof(expected))); 956 EXPECT(memcmp(expected, dst, sizeof(expected)));
956 } 957 }
957 958
958 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80" 959 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80"
959 { 960 {
960 const char* src = "\xED\xAE\x80\xED\xB0\x80"; 961 const char* src = "\xED\xAE\x80\xED\xB0\x80";
961 uint32_t expected[] = { 0xDB80, 0xDC00 }; 962 uint32_t expected[] = { 0xDB80, 0xDC00 };
962 uint32_t dst[ARRAY_SIZE(expected)]; 963 uint32_t dst[ARRAY_SIZE(expected)];
963 memset(dst, 0, sizeof(dst)); 964 memset(dst, 0, sizeof(dst));
964 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 965 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
965 EXPECT(!is_valid); 966 EXPECT(!is_valid);
966 EXPECT(memcmp(expected, dst, sizeof(expected))); 967 EXPECT(memcmp(expected, dst, sizeof(expected)));
967 } 968 }
968 969
969 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF" 970 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF"
970 { 971 {
971 const char* src = "\xED\xAE\x80\xED\xBF\xBF"; 972 const char* src = "\xED\xAE\x80\xED\xBF\xBF";
972 uint32_t expected[] = { 0xDB80, 0xDFFF }; 973 uint32_t expected[] = { 0xDB80, 0xDFFF };
973 uint32_t dst[ARRAY_SIZE(expected)]; 974 uint32_t dst[ARRAY_SIZE(expected)];
974 memset(dst, 0, sizeof(dst)); 975 memset(dst, 0, sizeof(dst));
975 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 976 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
976 EXPECT(!is_valid); 977 EXPECT(!is_valid);
977 EXPECT(memcmp(expected, dst, sizeof(expected))); 978 EXPECT(memcmp(expected, dst, sizeof(expected)));
978 } 979 }
979 980
980 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80" 981 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80"
981 { 982 {
982 const char* src = "\xED\xAF\xBF\xED\xB0\x80"; 983 const char* src = "\xED\xAF\xBF\xED\xB0\x80";
983 uint32_t expected[] = { 0xDBFF, 0xDC00 }; 984 uint32_t expected[] = { 0xDBFF, 0xDC00 };
984 uint32_t dst[ARRAY_SIZE(expected)]; 985 uint32_t dst[ARRAY_SIZE(expected)];
985 memset(dst, 0, sizeof(dst)); 986 memset(dst, 0, sizeof(dst));
986 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 987 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
987 EXPECT(!is_valid); 988 EXPECT(!is_valid);
988 EXPECT(memcmp(expected, dst, sizeof(expected))); 989 EXPECT(memcmp(expected, dst, sizeof(expected)));
989 } 990 }
990 991
991 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF" 992 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF"
992 { 993 {
993 const char* src = "\xED\xAF\xBF\xED\xBF\xBF"; 994 const char* src = "\xED\xAF\xBF\xED\xBF\xBF";
994 uint32_t expected[] = { 0xDBFF, 0xDFFF }; 995 uint32_t expected[] = { 0xDBFF, 0xDFFF };
995 uint32_t dst[ARRAY_SIZE(expected)]; 996 uint32_t dst[ARRAY_SIZE(expected)];
996 memset(dst, 0, sizeof(dst)); 997 memset(dst, 0, sizeof(dst));
997 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 998 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
998 EXPECT(!is_valid); 999 EXPECT(!is_valid);
999 EXPECT(memcmp(expected, dst, sizeof(expected))); 1000 EXPECT(memcmp(expected, dst, sizeof(expected)));
1000 } 1001 }
1001 1002
1002 // 5.3 - Other illegal code positions 1003 // 5.3 - Other illegal code positions
1003 1004
1004 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE" 1005 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE"
1005 { 1006 {
1006 const char* src = "\xEF\xBF\xBE"; 1007 const char* src = "\xEF\xBF\xBE";
1007 uint32_t expected[] = { 0xFFFE }; 1008 uint32_t expected[] = { 0xFFFE };
1008 uint32_t dst[ARRAY_SIZE(expected)]; 1009 uint32_t dst[ARRAY_SIZE(expected)];
1009 memset(dst, 0, sizeof(dst)); 1010 memset(dst, 0, sizeof(dst));
1010 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 1011 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
1011 EXPECT(is_valid); 1012 EXPECT(is_valid);
1012 EXPECT(!memcmp(expected, dst, sizeof(expected))); 1013 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1013 } 1014 }
1014 1015
1015 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF" 1016 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF"
1016 { 1017 {
1017 const char* src = "\xEF\xBF\xBF"; 1018 const char* src = "\xEF\xBF\xBF";
1018 uint32_t expected[] = { 0xFFFF }; 1019 uint32_t expected[] = { 0xFFFF };
1019 uint32_t dst[ARRAY_SIZE(expected)]; 1020 uint32_t dst[ARRAY_SIZE(expected)];
1020 memset(dst, 0, sizeof(dst)); 1021 memset(dst, 0, sizeof(dst));
1021 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); 1022 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
1022 EXPECT(is_valid); 1023 EXPECT(is_valid);
1023 EXPECT(!memcmp(expected, dst, sizeof(expected))); 1024 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1024 } 1025 }
1025 } 1026 }
1026 1027
1027 } // namespace dart 1028 } // namespace dart
OLDNEW
« runtime/vm/dart_api_impl.cc ('K') | « runtime/vm/unicode.cc ('k') | runtime/vm/unit_test.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698