Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(543)

Side by Side Diff: runtime/vm/unicode_test.cc

Issue 11419086: Use a signed 32-bit integer for representing code points. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: add missing files Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/unicode.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/globals.h" 5 #include "vm/globals.h"
6 #include "vm/unicode.h" 6 #include "vm/unicode.h"
7 #include "vm/unit_test.h" 7 #include "vm/unit_test.h"
8 8
9 namespace dart { 9 namespace dart {
10 10
11 TEST_CASE(Utf8Decode) { 11 TEST_CASE(Utf8Decode) {
12 // Examples from the Unicode specification, chapter 3 12 // Examples from the Unicode specification, chapter 3
13 { 13 {
14 const char* src = "\x41\xC3\xB1\x42"; 14 const char* src = "\x41\xC3\xB1\x42";
15 uint32_t expected[] = { 0x41, 0xF1, 0x42 }; 15 int32_t expected[] = { 0x41, 0xF1, 0x42 };
16 uint32_t dst[ARRAY_SIZE(expected)]; 16 int32_t dst[ARRAY_SIZE(expected)];
17 memset(dst, 0, sizeof(dst)); 17 memset(dst, 0, sizeof(dst));
18 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 18 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
19 EXPECT(is_valid); 19 EXPECT(is_valid);
20 EXPECT(!memcmp(expected, dst, sizeof(expected))); 20 EXPECT(!memcmp(expected, dst, sizeof(expected)));
21 } 21 }
22 22
23 { 23 {
24 const char* src = "\x4D"; 24 const char* src = "\x4D";
25 uint32_t expected[] = { 0x4D }; 25 int32_t expected[] = { 0x4D };
26 uint32_t dst[ARRAY_SIZE(expected)]; 26 int32_t dst[ARRAY_SIZE(expected)];
27 memset(dst, 0, sizeof(dst)); 27 memset(dst, 0, sizeof(dst));
28 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 28 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
29 EXPECT(is_valid); 29 EXPECT(is_valid);
30 EXPECT(!memcmp(expected, dst, sizeof(expected))); 30 EXPECT(!memcmp(expected, dst, sizeof(expected)));
31 } 31 }
32 32
33 { 33 {
34 const char* src = "\xD0\xB0"; 34 const char* src = "\xD0\xB0";
35 uint32_t expected[] = { 0x430 }; 35 int32_t expected[] = { 0x430 };
36 uint32_t dst[ARRAY_SIZE(expected)]; 36 int32_t dst[ARRAY_SIZE(expected)];
37 memset(dst, 0, sizeof(dst)); 37 memset(dst, 0, sizeof(dst));
38 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 38 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
39 EXPECT(is_valid); 39 EXPECT(is_valid);
40 EXPECT(!memcmp(expected, dst, sizeof(expected))); 40 EXPECT(!memcmp(expected, dst, sizeof(expected)));
41 } 41 }
42 42
43 { 43 {
44 const char* src = "\xE4\xBA\x8C"; 44 const char* src = "\xE4\xBA\x8C";
45 uint32_t expected[] = { 0x4E8C }; 45 int32_t expected[] = { 0x4E8C };
46 uint32_t dst[ARRAY_SIZE(expected)]; 46 int32_t dst[ARRAY_SIZE(expected)];
47 memset(dst, 0, sizeof(dst)); 47 memset(dst, 0, sizeof(dst));
48 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 48 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
49 EXPECT(is_valid); 49 EXPECT(is_valid);
50 EXPECT(!memcmp(expected, dst, sizeof(expected))); 50 EXPECT(!memcmp(expected, dst, sizeof(expected)));
51 } 51 }
52 52
53 { 53 {
54 const char* src = "\xF0\x90\x8C\x82"; 54 const char* src = "\xF0\x90\x8C\x82";
55 uint32_t expected[] = { 0x10302 }; 55 int32_t expected[] = { 0x10302 };
56 uint32_t dst[ARRAY_SIZE(expected)]; 56 int32_t dst[ARRAY_SIZE(expected)];
57 memset(dst, 0, sizeof(dst)); 57 memset(dst, 0, sizeof(dst));
58 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 58 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
59 EXPECT(is_valid); 59 EXPECT(is_valid);
60 EXPECT(!memcmp(expected, dst, sizeof(expected))); 60 EXPECT(!memcmp(expected, dst, sizeof(expected)));
61 } 61 }
62 62
63 { 63 {
64 const char* src = "\x4D\xD0\xB0\xE4\xBA\x8C\xF0\x90\x8C\x82"; 64 const char* src = "\x4D\xD0\xB0\xE4\xBA\x8C\xF0\x90\x8C\x82";
65 uint32_t expected[] = { 0x4D, 0x430, 0x4E8C, 0x10302 }; 65 int32_t expected[] = { 0x4D, 0x430, 0x4E8C, 0x10302 };
66 uint32_t dst[ARRAY_SIZE(expected)]; 66 int32_t dst[ARRAY_SIZE(expected)];
67 memset(dst, 0, sizeof(dst)); 67 memset(dst, 0, sizeof(dst));
68 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 68 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
69 EXPECT(is_valid); 69 EXPECT(is_valid);
70 EXPECT(!memcmp(expected, dst, sizeof(expected))); 70 EXPECT(!memcmp(expected, dst, sizeof(expected)));
71 } 71 }
72 72
73 // Mixture of non-ASCII and ASCII characters 73 // Mixture of non-ASCII and ASCII characters
74 { 74 {
75 const char* src = "\xD7\x92\xD7\x9C\xD7\xA2\xD7\x93" 75 const char* src = "\xD7\x92\xD7\x9C\xD7\xA2\xD7\x93"
76 "\x20" 76 "\x20"
77 "\xD7\x91\xD7\xA8\xD7\x9B\xD7\x94"; 77 "\xD7\x91\xD7\xA8\xD7\x9B\xD7\x94";
78 uint32_t expected[] = { 0x5D2, 0x5DC, 0x5E2, 0x5D3, 78 int32_t expected[] = { 0x5D2, 0x5DC, 0x5E2, 0x5D3,
79 0x20, 79 0x20,
80 0x5D1, 0x5E8, 0x5DB, 0x5D4 }; 80 0x5D1, 0x5E8, 0x5DB, 0x5D4 };
81 uint32_t dst[ARRAY_SIZE(expected)]; 81 int32_t dst[ARRAY_SIZE(expected)];
82 memset(dst, 0, sizeof(dst)); 82 memset(dst, 0, sizeof(dst));
83 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 83 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
84 EXPECT(is_valid); 84 EXPECT(is_valid);
85 EXPECT(!memcmp(expected, dst, sizeof(expected))); 85 EXPECT(!memcmp(expected, dst, sizeof(expected)));
86 } 86 }
87 87
88 // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt 88 // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
89 89
90 // 1 - Some correct UTF-8 text 90 // 1 - Some correct UTF-8 text
91 { 91 {
92 const char* src = "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5"; 92 const char* src = "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5";
93 uint32_t expected[] = { 0x3BA, 0x1F79, 0x3C3, 0x3BC, 0x3B5 }; 93 int32_t expected[] = { 0x3BA, 0x1F79, 0x3C3, 0x3BC, 0x3B5 };
94 uint32_t dst[ARRAY_SIZE(expected)]; 94 int32_t dst[ARRAY_SIZE(expected)];
95 memset(dst, 0, sizeof(dst)); 95 memset(dst, 0, sizeof(dst));
96 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 96 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
97 EXPECT(is_valid); 97 EXPECT(is_valid);
98 EXPECT(!memcmp(expected, dst, sizeof(expected))); 98 EXPECT(!memcmp(expected, dst, sizeof(expected)));
99 } 99 }
100 100
101 // 2 - Boundary condition test cases 101 // 2 - Boundary condition test cases
102 102
103 // 2.1 - First possible sequence of a certain length 103 // 2.1 - First possible sequence of a certain length
104 104
105 // 2.1.1 - 1 byte (U-00000000): "\x00" 105 // 2.1.1 - 1 byte (U-00000000): "\x00"
106 { 106 {
107 const char* src = "\x00"; 107 const char* src = "\x00";
108 uint32_t expected[] = { 0x0 }; 108 int32_t expected[] = { 0x0 };
109 uint32_t dst[ARRAY_SIZE(expected)]; 109 int32_t dst[ARRAY_SIZE(expected)];
110 memset(dst, 0xFF, sizeof(dst)); 110 memset(dst, 0xFF, sizeof(dst));
111 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 111 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
112 EXPECT(is_valid); 112 EXPECT(is_valid);
113 EXPECT(memcmp(expected, dst, sizeof(expected))); 113 EXPECT(memcmp(expected, dst, sizeof(expected)));
114 } 114 }
115 115
116 // 2.1.2 - 2 bytes (U-00000080): "\xC2\x80" 116 // 2.1.2 - 2 bytes (U-00000080): "\xC2\x80"
117 { 117 {
118 const char* src = "\xC2\x80"; 118 const char* src = "\xC2\x80";
119 uint32_t expected[] = { 0x80 }; 119 int32_t expected[] = { 0x80 };
120 uint32_t dst[ARRAY_SIZE(expected)]; 120 int32_t dst[ARRAY_SIZE(expected)];
121 memset(dst, 0, sizeof(dst)); 121 memset(dst, 0, sizeof(dst));
122 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 122 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
123 EXPECT(is_valid); 123 EXPECT(is_valid);
124 EXPECT(!memcmp(expected, dst, sizeof(expected))); 124 EXPECT(!memcmp(expected, dst, sizeof(expected)));
125 } 125 }
126 126
127 // 2.1.3 - 3 bytes (U-00000800): "\xE0\xA0\x80" 127 // 2.1.3 - 3 bytes (U-00000800): "\xE0\xA0\x80"
128 { 128 {
129 const char* src = "\xE0\xA0\x80"; 129 const char* src = "\xE0\xA0\x80";
130 uint32_t expected[] = { 0x800 }; 130 int32_t expected[] = { 0x800 };
131 uint32_t dst[ARRAY_SIZE(expected)]; 131 int32_t dst[ARRAY_SIZE(expected)];
132 memset(dst, 0, sizeof(dst)); 132 memset(dst, 0, sizeof(dst));
133 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 133 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
134 EXPECT(is_valid); 134 EXPECT(is_valid);
135 EXPECT(!memcmp(expected, dst, sizeof(expected))); 135 EXPECT(!memcmp(expected, dst, sizeof(expected)));
136 } 136 }
137 137
138 // 2.1.4 - 4 bytes (U-00010000): "\xF0\x90\x80\x80" 138 // 2.1.4 - 4 bytes (U-00010000): "\xF0\x90\x80\x80"
139 { 139 {
140 const char* src = "\xF0\x90\x80\x80"; 140 const char* src = "\xF0\x90\x80\x80";
141 uint32_t expected[] = { 0x10000 }; 141 int32_t expected[] = { 0x10000 };
142 uint32_t dst[ARRAY_SIZE(expected)]; 142 int32_t dst[ARRAY_SIZE(expected)];
143 memset(dst, 0, sizeof(dst)); 143 memset(dst, 0, sizeof(dst));
144 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 144 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
145 EXPECT(is_valid); 145 EXPECT(is_valid);
146 EXPECT(!memcmp(expected, dst, sizeof(expected))); 146 EXPECT(!memcmp(expected, dst, sizeof(expected)));
147 } 147 }
148 148
149 // 2.1.5 - 5 bytes (U-00200000): "\xF8\x88\x80\x80\x80" 149 // 2.1.5 - 5 bytes (U-00200000): "\xF8\x88\x80\x80\x80"
150 { 150 {
151 const char* src = "\xF8\x88\x80\x80\x80"; 151 const char* src = "\xF8\x88\x80\x80\x80";
152 uint32_t expected[] = { 0x200000 }; 152 int32_t expected[] = { 0x200000 };
153 uint32_t dst[ARRAY_SIZE(expected)]; 153 int32_t dst[ARRAY_SIZE(expected)];
154 memset(dst, 0, sizeof(dst)); 154 memset(dst, 0, sizeof(dst));
155 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 155 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
156 EXPECT(!is_valid); 156 EXPECT(!is_valid);
157 EXPECT(memcmp(expected, dst, sizeof(expected))); 157 EXPECT(memcmp(expected, dst, sizeof(expected)));
158 } 158 }
159 159
160 // 2.1.6 - 6 bytes (U-04000000): "\xFC\x84\x80\x80\x80\x80" 160 // 2.1.6 - 6 bytes (U-04000000): "\xFC\x84\x80\x80\x80\x80"
161 { 161 {
162 const char* src = "\xFC\x84\x80\x80\x80\x80"; 162 const char* src = "\xFC\x84\x80\x80\x80\x80";
163 uint32_t expected[] = { 0x400000 }; 163 int32_t expected[] = { 0x400000 };
164 uint32_t dst[ARRAY_SIZE(expected)]; 164 int32_t dst[ARRAY_SIZE(expected)];
165 memset(dst, 0, sizeof(dst)); 165 memset(dst, 0, sizeof(dst));
166 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 166 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
167 EXPECT(!is_valid); 167 EXPECT(!is_valid);
168 EXPECT(memcmp(expected, dst, sizeof(expected))); 168 EXPECT(memcmp(expected, dst, sizeof(expected)));
169 } 169 }
170 170
171 // 2.2 - Last possible sequence of a certain length 171 // 2.2 - Last possible sequence of a certain length
172 172
173 // 2.2.1 - 1 byte (U-0000007F): "\x7F" 173 // 2.2.1 - 1 byte (U-0000007F): "\x7F"
174 { 174 {
175 const char* src = "\x7F"; 175 const char* src = "\x7F";
176 uint32_t expected[] = { 0x7F }; 176 int32_t expected[] = { 0x7F };
177 uint32_t dst[ARRAY_SIZE(expected)]; 177 int32_t dst[ARRAY_SIZE(expected)];
178 memset(dst, 0, sizeof(dst)); 178 memset(dst, 0, sizeof(dst));
179 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 179 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
180 EXPECT(is_valid); 180 EXPECT(is_valid);
181 EXPECT(!memcmp(expected, dst, sizeof(expected))); 181 EXPECT(!memcmp(expected, dst, sizeof(expected)));
182 } 182 }
183 183
184 // 2.2.2 - 2 bytes (U-000007FF): "\xDF\xBF" 184 // 2.2.2 - 2 bytes (U-000007FF): "\xDF\xBF"
185 { 185 {
186 const char* src = "\xDF\xBF"; 186 const char* src = "\xDF\xBF";
187 uint32_t expected[] = { 0x7FF }; 187 int32_t expected[] = { 0x7FF };
188 uint32_t dst[ARRAY_SIZE(expected)]; 188 int32_t dst[ARRAY_SIZE(expected)];
189 memset(dst, 0, sizeof(dst)); 189 memset(dst, 0, sizeof(dst));
190 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 190 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
191 EXPECT(is_valid); 191 EXPECT(is_valid);
192 EXPECT(!memcmp(expected, dst, sizeof(expected))); 192 EXPECT(!memcmp(expected, dst, sizeof(expected)));
193 } 193 }
194 194
195 // 2.2.3 - 3 bytes (U-0000FFFF): "\xEF\xBF\xBF" 195 // 2.2.3 - 3 bytes (U-0000FFFF): "\xEF\xBF\xBF"
196 { 196 {
197 const char* src = "\xEF\xBF\xBF"; 197 const char* src = "\xEF\xBF\xBF";
198 uint32_t expected[] = { 0xFFFF }; 198 int32_t expected[] = { 0xFFFF };
199 uint32_t dst[ARRAY_SIZE(expected)]; 199 int32_t dst[ARRAY_SIZE(expected)];
200 memset(dst, 0, sizeof(dst)); 200 memset(dst, 0, sizeof(dst));
201 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 201 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
202 EXPECT(is_valid); 202 EXPECT(is_valid);
203 EXPECT(!memcmp(expected, dst, sizeof(expected))); 203 EXPECT(!memcmp(expected, dst, sizeof(expected)));
204 } 204 }
205 205
206 // 2.2.4 - 4 bytes (U-001FFFFF): "\xF7\xBF\xBF\xBF" 206 // 2.2.4 - 4 bytes (U-001FFFFF): "\xF7\xBF\xBF\xBF"
207 { 207 {
208 const char* src = "\xF7\xBF\xBF\xBF"; 208 const char* src = "\xF7\xBF\xBF\xBF";
209 uint32_t expected[] = { 0x1FFFF }; 209 int32_t expected[] = { 0x1FFFF };
210 uint32_t dst[ARRAY_SIZE(expected)]; 210 int32_t dst[ARRAY_SIZE(expected)];
211 memset(dst, 0, sizeof(dst)); 211 memset(dst, 0, sizeof(dst));
212 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 212 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
213 EXPECT(!is_valid); 213 EXPECT(!is_valid);
214 EXPECT(memcmp(expected, dst, sizeof(expected))); 214 EXPECT(memcmp(expected, dst, sizeof(expected)));
215 } 215 }
216 216
217 // 2.2.5 - 5 bytes (U-03FFFFFF): "\xFB\xBF\xBF\xBF\xBF" 217 // 2.2.5 - 5 bytes (U-03FFFFFF): "\xFB\xBF\xBF\xBF\xBF"
218 { 218 {
219 const char* src = "\xFB\xBF\xBF\xBF\xBF"; 219 const char* src = "\xFB\xBF\xBF\xBF\xBF";
220 uint32_t expected[] = { 0x3FFFFFF }; 220 int32_t expected[] = { 0x3FFFFFF };
221 uint32_t dst[ARRAY_SIZE(expected)]; 221 int32_t dst[ARRAY_SIZE(expected)];
222 memset(dst, 0, sizeof(dst)); 222 memset(dst, 0, sizeof(dst));
223 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 223 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
224 EXPECT(!is_valid); 224 EXPECT(!is_valid);
225 EXPECT(memcmp(expected, dst, sizeof(expected))); 225 EXPECT(memcmp(expected, dst, sizeof(expected)));
226 } 226 }
227 227
228 // 2.2.6 - 6 bytes (U-7FFFFFFF): "\xFD\xBF\xBF\xBF\xBF\xBF" 228 // 2.2.6 - 6 bytes (U-7FFFFFFF): "\xFD\xBF\xBF\xBF\xBF\xBF"
229 { 229 {
230 const char* src = "\xFD\xBF\xBF\xBF\xBF\xBF"; 230 const char* src = "\xFD\xBF\xBF\xBF\xBF\xBF";
231 uint32_t expected[] = { 0x7FFFFFF }; 231 int32_t expected[] = { 0x7FFFFFF };
232 uint32_t dst[ARRAY_SIZE(expected)]; 232 int32_t dst[ARRAY_SIZE(expected)];
233 memset(dst, 0, sizeof(dst)); 233 memset(dst, 0, sizeof(dst));
234 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 234 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
235 EXPECT(!is_valid); 235 EXPECT(!is_valid);
236 EXPECT(memcmp(expected, dst, sizeof(expected))); 236 EXPECT(memcmp(expected, dst, sizeof(expected)));
237 } 237 }
238 238
239 // 2.3 - Other boundary conditions 239 // 2.3 - Other boundary conditions
240 240
241 // 2.3.1 - U-0000D7FF = ed 9f bf = "\xED\x9F\xBF" 241 // 2.3.1 - U-0000D7FF = ed 9f bf = "\xED\x9F\xBF"
242 { 242 {
243 const char* src = "\xED\x9F\xBF"; 243 const char* src = "\xED\x9F\xBF";
244 uint32_t expected[] = { 0xD7FF }; 244 int32_t expected[] = { 0xD7FF };
245 uint32_t dst[ARRAY_SIZE(expected)]; 245 int32_t dst[ARRAY_SIZE(expected)];
246 memset(dst, 0, sizeof(dst)); 246 memset(dst, 0, sizeof(dst));
247 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 247 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
248 EXPECT(is_valid); 248 EXPECT(is_valid);
249 EXPECT(!memcmp(expected, dst, sizeof(expected))); 249 EXPECT(!memcmp(expected, dst, sizeof(expected)));
250 } 250 }
251 251
252 // 2.3.2 - U-0000E000 = ee 80 80 = "\xEE\x80\x80" 252 // 2.3.2 - U-0000E000 = ee 80 80 = "\xEE\x80\x80"
253 { 253 {
254 const char* src = "\xEE\x80\x80"; 254 const char* src = "\xEE\x80\x80";
255 uint32_t expected[] = { 0xE000 }; 255 int32_t expected[] = { 0xE000 };
256 uint32_t dst[ARRAY_SIZE(expected)]; 256 int32_t dst[ARRAY_SIZE(expected)];
257 memset(dst, 0, sizeof(dst)); 257 memset(dst, 0, sizeof(dst));
258 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 258 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
259 EXPECT(is_valid); 259 EXPECT(is_valid);
260 EXPECT(!memcmp(expected, dst, sizeof(expected))); 260 EXPECT(!memcmp(expected, dst, sizeof(expected)));
261 } 261 }
262 262
263 // 2.3.3 - U-0000FFFD = ef bf bd = "\xEF\xBF\xBD" 263 // 2.3.3 - U-0000FFFD = ef bf bd = "\xEF\xBF\xBD"
264 { 264 {
265 const char* src = "\xEF\xBF\xBD"; 265 const char* src = "\xEF\xBF\xBD";
266 uint32_t expected[] = { 0xFFFD }; 266 int32_t expected[] = { 0xFFFD };
267 uint32_t dst[ARRAY_SIZE(expected)]; 267 int32_t dst[ARRAY_SIZE(expected)];
268 memset(dst, 0, sizeof(dst)); 268 memset(dst, 0, sizeof(dst));
269 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 269 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
270 EXPECT(is_valid); 270 EXPECT(is_valid);
271 EXPECT(!memcmp(expected, dst, sizeof(expected))); 271 EXPECT(!memcmp(expected, dst, sizeof(expected)));
272 } 272 }
273 273
274 // 2.3.4 - U-0010FFFF = f4 8f bf bf = "\xF4\x8F\xBF\xBF" 274 // 2.3.4 - U-0010FFFF = f4 8f bf bf = "\xF4\x8F\xBF\xBF"
275 { 275 {
276 const char* src = "\xF4\x8F\xBF\xBF"; 276 const char* src = "\xF4\x8F\xBF\xBF";
277 uint32_t expected[] = { 0x10FFFF }; 277 int32_t expected[] = { 0x10FFFF };
278 uint32_t dst[ARRAY_SIZE(expected)]; 278 int32_t dst[ARRAY_SIZE(expected)];
279 memset(dst, 0, sizeof(dst)); 279 memset(dst, 0, sizeof(dst));
280 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 280 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
281 EXPECT(is_valid); 281 EXPECT(is_valid);
282 EXPECT(!memcmp(expected, dst, sizeof(expected))); 282 EXPECT(!memcmp(expected, dst, sizeof(expected)));
283 } 283 }
284 284
285 // 2.3.5 - U-00110000 = f4 90 80 80 = "\xF4\x90\x80\x80" 285 // 2.3.5 - U-00110000 = f4 90 80 80 = "\xF4\x90\x80\x80"
286 { 286 {
287 const char* src = "\xF4\x90\x80\x80"; 287 const char* src = "\xF4\x90\x80\x80";
288 uint32_t expected[] = { 0x110000 }; 288 int32_t expected[] = { 0x110000 };
289 uint32_t dst[ARRAY_SIZE(expected)]; 289 int32_t dst[ARRAY_SIZE(expected)];
290 memset(dst, 0, sizeof(dst)); 290 memset(dst, 0, sizeof(dst));
291 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 291 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
292 EXPECT(!is_valid); 292 EXPECT(!is_valid);
293 EXPECT(memcmp(expected, dst, sizeof(expected))); 293 EXPECT(memcmp(expected, dst, sizeof(expected)));
294 } 294 }
295 295
296 // 3 - Malformed sequences 296 // 3 - Malformed sequences
297 297
298 // 3.1 - Unexpected continuation bytes 298 // 3.1 - Unexpected continuation bytes
299 299
300 // 3.1.1 - First continuation byte 0x80: "\x80" 300 // 3.1.1 - First continuation byte 0x80: "\x80"
301 { 301 {
302 const char* src = "\x80"; 302 const char* src = "\x80";
303 uint32_t expected[] = { 0x80 }; 303 int32_t expected[] = { 0x80 };
304 uint32_t dst[ARRAY_SIZE(expected)]; 304 int32_t dst[ARRAY_SIZE(expected)];
305 memset(dst, 0, sizeof(dst)); 305 memset(dst, 0, sizeof(dst));
306 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 306 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
307 EXPECT(!is_valid); 307 EXPECT(!is_valid);
308 EXPECT(memcmp(expected, dst, sizeof(expected))); 308 EXPECT(memcmp(expected, dst, sizeof(expected)));
309 } 309 }
310 310
311 // 3.1.2 - Last continuation byte 0xbf: "\xBF" 311 // 3.1.2 - Last continuation byte 0xbf: "\xBF"
312 { 312 {
313 const char* src = "\xBF"; 313 const char* src = "\xBF";
314 uint32_t expected[] = { 0xBF }; 314 int32_t expected[] = { 0xBF };
315 uint32_t dst[ARRAY_SIZE(expected)]; 315 int32_t dst[ARRAY_SIZE(expected)];
316 memset(dst, 0, sizeof(dst)); 316 memset(dst, 0, sizeof(dst));
317 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 317 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
318 EXPECT(!is_valid); 318 EXPECT(!is_valid);
319 EXPECT(memcmp(expected, dst, sizeof(expected))); 319 EXPECT(memcmp(expected, dst, sizeof(expected)));
320 } 320 }
321 321
322 // 3.1.3 - 2 continuation bytes: "\x80\xBF" 322 // 3.1.3 - 2 continuation bytes: "\x80\xBF"
323 { 323 {
324 const char* src = "\x80\xBF"; 324 const char* src = "\x80\xBF";
325 uint32_t expected[] = { 0x80, 0xBF }; 325 int32_t expected[] = { 0x80, 0xBF };
326 uint32_t dst[ARRAY_SIZE(expected)]; 326 int32_t dst[ARRAY_SIZE(expected)];
327 memset(dst, 0, sizeof(dst)); 327 memset(dst, 0, sizeof(dst));
328 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 328 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
329 EXPECT(!is_valid); 329 EXPECT(!is_valid);
330 EXPECT(memcmp(expected, dst, sizeof(expected))); 330 EXPECT(memcmp(expected, dst, sizeof(expected)));
331 } 331 }
332 332
333 // 3.1.4 - 3 continuation bytes: "\x80\xBF\x80" 333 // 3.1.4 - 3 continuation bytes: "\x80\xBF\x80"
334 { 334 {
335 const char* src = "\x80\xBF\x80"; 335 const char* src = "\x80\xBF\x80";
336 uint32_t expected[] = { 0x80, 0xBF, 0x80 }; 336 int32_t expected[] = { 0x80, 0xBF, 0x80 };
337 uint32_t dst[ARRAY_SIZE(expected)]; 337 int32_t dst[ARRAY_SIZE(expected)];
338 memset(dst, 0, sizeof(dst)); 338 memset(dst, 0, sizeof(dst));
339 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 339 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
340 EXPECT(!is_valid); 340 EXPECT(!is_valid);
341 EXPECT(memcmp(expected, dst, sizeof(expected))); 341 EXPECT(memcmp(expected, dst, sizeof(expected)));
342 } 342 }
343 343
344 // 3.1.5 - 4 continuation bytes: "\x80\xBF\x80\xBF" 344 // 3.1.5 - 4 continuation bytes: "\x80\xBF\x80\xBF"
345 { 345 {
346 const char* src = "\x80\xBF\x80\xBF"; 346 const char* src = "\x80\xBF\x80\xBF";
347 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF }; 347 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF };
348 uint32_t dst[ARRAY_SIZE(expected)]; 348 int32_t dst[ARRAY_SIZE(expected)];
349 memset(dst, 0, sizeof(dst)); 349 memset(dst, 0, sizeof(dst));
350 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 350 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
351 EXPECT(!is_valid); 351 EXPECT(!is_valid);
352 EXPECT(memcmp(expected, dst, sizeof(expected))); 352 EXPECT(memcmp(expected, dst, sizeof(expected)));
353 } 353 }
354 354
355 // 3.1.6 - 5 continuation bytes: "\x80\xBF\x80\xBF\x80" 355 // 3.1.6 - 5 continuation bytes: "\x80\xBF\x80\xBF\x80"
356 { 356 {
357 const char* src = "\x80\xBF\x80\xBF\x80"; 357 const char* src = "\x80\xBF\x80\xBF\x80";
358 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80 }; 358 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80 };
359 uint32_t dst[ARRAY_SIZE(expected)]; 359 int32_t dst[ARRAY_SIZE(expected)];
360 memset(dst, 0, sizeof(dst)); 360 memset(dst, 0, sizeof(dst));
361 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 361 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
362 EXPECT(!is_valid); 362 EXPECT(!is_valid);
363 EXPECT(memcmp(expected, dst, sizeof(expected))); 363 EXPECT(memcmp(expected, dst, sizeof(expected)));
364 } 364 }
365 365
366 // 3.1.7 - 6 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF" 366 // 3.1.7 - 6 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF"
367 { 367 {
368 const char* src = "\x80\xBF\x80\xBF\x80\xBF"; 368 const char* src = "\x80\xBF\x80\xBF\x80\xBF";
369 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF }; 369 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF };
370 uint32_t dst[ARRAY_SIZE(expected)]; 370 int32_t dst[ARRAY_SIZE(expected)];
371 memset(dst, 0, sizeof(dst)); 371 memset(dst, 0, sizeof(dst));
372 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 372 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
373 EXPECT(!is_valid); 373 EXPECT(!is_valid);
374 EXPECT(memcmp(expected, dst, sizeof(expected))); 374 EXPECT(memcmp(expected, dst, sizeof(expected)));
375 } 375 }
376 376
377 // 3.1.8 - 7 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF\x80" 377 // 3.1.8 - 7 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF\x80"
378 { 378 {
379 const char* src = "\x80\xBF\x80\xBF\x80\xBF\x80"; 379 const char* src = "\x80\xBF\x80\xBF\x80\xBF\x80";
380 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 }; 380 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 };
381 uint32_t dst[ARRAY_SIZE(expected)]; 381 int32_t dst[ARRAY_SIZE(expected)];
382 memset(dst, 0, sizeof(dst)); 382 memset(dst, 0, sizeof(dst));
383 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 383 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
384 EXPECT(!is_valid); 384 EXPECT(!is_valid);
385 EXPECT(memcmp(expected, dst, sizeof(expected))); 385 EXPECT(memcmp(expected, dst, sizeof(expected)));
386 } 386 }
387 387
388 // 3.1.9 - Sequence of all 64 possible continuation bytes (0x80-0xbf): 388 // 3.1.9 - Sequence of all 64 possible continuation bytes (0x80-0xbf):
389 { 389 {
390 const char* src = "\x80\x81\x82\x83\x84\x85\x86\x87" 390 const char* src = "\x80\x81\x82\x83\x84\x85\x86\x87"
391 "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" 391 "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
392 "\x90\x91\x92\x93\x94\x95\x96\x97" 392 "\x90\x91\x92\x93\x94\x95\x96\x97"
393 "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F" 393 "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F"
394 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7" 394 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7"
395 "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" 395 "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
396 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7" 396 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7"
397 "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"; 397 "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF";
398 uint32_t expected[] = { 0x0 }; 398 int32_t expected[] = { 0x0 };
399 uint32_t dst[ARRAY_SIZE(expected)]; 399 int32_t dst[ARRAY_SIZE(expected)];
400 for (size_t i = 0; i < strlen(src); ++i) { 400 for (size_t i = 0; i < strlen(src); ++i) {
401 memset(dst, 0xFF, sizeof(dst)); 401 memset(dst, 0xFF, sizeof(dst));
402 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); 402 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
403 EXPECT(!is_valid); 403 EXPECT(!is_valid);
404 EXPECT(memcmp(expected, dst, sizeof(expected))); 404 EXPECT(memcmp(expected, dst, sizeof(expected)));
405 } 405 }
406 } 406 }
407 407
408 // 3.2 - Lonely start character 408 // 3.2 - Lonely start character
409 409
410 // 3.2.1 - All 32 first bytes of 2-byte sequences (0xc0-0xdf), each 410 // 3.2.1 - All 32 first bytes of 2-byte sequences (0xc0-0xdf), each
411 // followed by a space character: 411 // followed by a space character:
412 { 412 {
413 const char* src = "\xC0\x20\xC1\x20\xC2\x20\xC3\x20" 413 const char* src = "\xC0\x20\xC1\x20\xC2\x20\xC3\x20"
414 "\xC4\x20\xC5\x20\xC6\x20\xC7\x20" 414 "\xC4\x20\xC5\x20\xC6\x20\xC7\x20"
415 "\xC8\x20\xC9\x20\xCA\x20\xCB\x20" 415 "\xC8\x20\xC9\x20\xCA\x20\xCB\x20"
416 "\xCC\x20\xCD\x20\xCE\x20\xCF\x20" 416 "\xCC\x20\xCD\x20\xCE\x20\xCF\x20"
417 "\xD0\x20\xD1\x20\xD2\x20\xD3\x20" 417 "\xD0\x20\xD1\x20\xD2\x20\xD3\x20"
418 "\xD4\x20\xD5\x20\xD6\x20\xD7\x20" 418 "\xD4\x20\xD5\x20\xD6\x20\xD7\x20"
419 "\xD8\x20\xD9\x20\xDA\x20\xDB\x20" 419 "\xD8\x20\xD9\x20\xDA\x20\xDB\x20"
420 "\xDC\x20\xDD\x20\xDE\x20\xDF\x20"; 420 "\xDC\x20\xDD\x20\xDE\x20\xDF\x20";
421 uint32_t expected[] = { 0x0 }; 421 int32_t expected[] = { 0x0 };
422 uint32_t dst[ARRAY_SIZE(expected)]; 422 int32_t dst[ARRAY_SIZE(expected)];
423 for (size_t i = 0; i < strlen(src); i += 2) { 423 for (size_t i = 0; i < strlen(src); i += 2) {
424 memset(dst, 0xFF, sizeof(dst)); 424 memset(dst, 0xFF, sizeof(dst));
425 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); 425 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
426 EXPECT(!is_valid); 426 EXPECT(!is_valid);
427 EXPECT(memcmp(expected, dst, sizeof(expected))); 427 EXPECT(memcmp(expected, dst, sizeof(expected)));
428 } 428 }
429 } 429 }
430 430
431 // 3.2.2 - All 16 first bytes of 3-byte sequences (0xe0-0xef), each 431 // 3.2.2 - All 16 first bytes of 3-byte sequences (0xe0-0xef), each
432 // followed by a space character: 432 // followed by a space character:
433 { 433 {
434 const char* src = "\xE0\x20\xE1\x20\xE2\x20\xE3\x20" 434 const char* src = "\xE0\x20\xE1\x20\xE2\x20\xE3\x20"
435 "\xE4\x20\xE5\x20\xE6\x20\xE7\x20" 435 "\xE4\x20\xE5\x20\xE6\x20\xE7\x20"
436 "\xE8\x20\xE9\x20\xEA\x20\xEB\x20" 436 "\xE8\x20\xE9\x20\xEA\x20\xEB\x20"
437 "\xEC\x20\xED\x20\xEE\x20\xEF\x20"; 437 "\xEC\x20\xED\x20\xEE\x20\xEF\x20";
438 uint32_t expected[] = { 0x0 }; 438 int32_t expected[] = { 0x0 };
439 uint32_t dst[ARRAY_SIZE(expected)]; 439 int32_t dst[ARRAY_SIZE(expected)];
440 for (size_t i = 0; i < strlen(src); i += 2) { 440 for (size_t i = 0; i < strlen(src); i += 2) {
441 memset(dst, 0xFF, sizeof(dst)); 441 memset(dst, 0xFF, sizeof(dst));
442 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); 442 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
443 EXPECT(!is_valid); 443 EXPECT(!is_valid);
444 EXPECT(memcmp(expected, dst, sizeof(expected))); 444 EXPECT(memcmp(expected, dst, sizeof(expected)));
445 } 445 }
446 } 446 }
447 447
448 // 3.2.3 - All 8 first bytes of 4-byte sequences (0xf0-0xf7), each 448 // 3.2.3 - All 8 first bytes of 4-byte sequences (0xf0-0xf7), each
449 // followed by a space character: 449 // followed by a space character:
450 { 450 {
451 const char* src = "\xF0\x20\xF1\x20\xF2\x20\xF3\x20" 451 const char* src = "\xF0\x20\xF1\x20\xF2\x20\xF3\x20"
452 "\xF4\x20\xF5\x20\xF6\x20\xF7\x20"; 452 "\xF4\x20\xF5\x20\xF6\x20\xF7\x20";
453 uint32_t expected[] = { 0x0 }; 453 int32_t expected[] = { 0x0 };
454 uint32_t dst[ARRAY_SIZE(expected)]; 454 int32_t dst[ARRAY_SIZE(expected)];
455 for (size_t i = 0; i < strlen(src); i += 2) { 455 for (size_t i = 0; i < strlen(src); i += 2) {
456 memset(dst, 0xFF, sizeof(dst)); 456 memset(dst, 0xFF, sizeof(dst));
457 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); 457 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
458 EXPECT(!is_valid); 458 EXPECT(!is_valid);
459 EXPECT(memcmp(expected, dst, sizeof(expected))); 459 EXPECT(memcmp(expected, dst, sizeof(expected)));
460 } 460 }
461 } 461 }
462 462
463 // 3.2.4 - All 4 first bytes of 5-byte sequences (0xf8-0xfb), each 463 // 3.2.4 - All 4 first bytes of 5-byte sequences (0xf8-0xfb), each
464 // followed by a space character: 464 // followed by a space character:
465 { 465 {
466 const char* src = "\xF8\x20\xF9\x20\xFA\x20\xFB\x20"; 466 const char* src = "\xF8\x20\xF9\x20\xFA\x20\xFB\x20";
467 uint32_t expected[] = { 0x0 }; 467 int32_t expected[] = { 0x0 };
468 uint32_t dst[ARRAY_SIZE(expected)]; 468 int32_t dst[ARRAY_SIZE(expected)];
469 for (size_t i = 0; i < strlen(src); i += 2) { 469 for (size_t i = 0; i < strlen(src); i += 2) {
470 memset(dst, 0xFF, sizeof(dst)); 470 memset(dst, 0xFF, sizeof(dst));
471 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); 471 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
472 EXPECT(!is_valid); 472 EXPECT(!is_valid);
473 EXPECT(memcmp(expected, dst, sizeof(expected))); 473 EXPECT(memcmp(expected, dst, sizeof(expected)));
474 } 474 }
475 } 475 }
476 476
477 // 3.2.5 - All 2 first bytes of 6-byte sequences (0xfc-0xfd), each 477 // 3.2.5 - All 2 first bytes of 6-byte sequences (0xfc-0xfd), each
478 // followed by a space character: 478 // followed by a space character:
479 { 479 {
480 const char* src = "\xFC\x20\xFD\x20"; 480 const char* src = "\xFC\x20\xFD\x20";
481 uint32_t expected[] = { 0x0 }; 481 int32_t expected[] = { 0x0 };
482 uint32_t dst[ARRAY_SIZE(expected)]; 482 int32_t dst[ARRAY_SIZE(expected)];
483 for (size_t i = 0; i < strlen(src); i += 2) { 483 for (size_t i = 0; i < strlen(src); i += 2) {
484 memset(dst, 0xFF, sizeof(dst)); 484 memset(dst, 0xFF, sizeof(dst));
485 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); 485 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
486 EXPECT(!is_valid); 486 EXPECT(!is_valid);
487 EXPECT(memcmp(expected, dst, sizeof(expected))); 487 EXPECT(memcmp(expected, dst, sizeof(expected)));
488 } 488 }
489 } 489 }
490 490
491 // 3.3 - Sequences with last continuation byte missing 491 // 3.3 - Sequences with last continuation byte missing
492 492
493 // 3.3.1 - 2-byte sequence with last byte missing (U+0000): "\xC0" 493 // 3.3.1 - 2-byte sequence with last byte missing (U+0000): "\xC0"
494 { 494 {
495 const char* src = "\xC0"; 495 const char* src = "\xC0";
496 uint32_t expected[] = { 0x0 }; 496 int32_t expected[] = { 0x0 };
497 uint32_t dst[ARRAY_SIZE(expected)]; 497 int32_t dst[ARRAY_SIZE(expected)];
498 memset(dst, 0xFF, sizeof(dst)); 498 memset(dst, 0xFF, sizeof(dst));
499 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 499 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
500 EXPECT(!is_valid); 500 EXPECT(!is_valid);
501 EXPECT(memcmp(expected, dst, sizeof(expected))); 501 EXPECT(memcmp(expected, dst, sizeof(expected)));
502 } 502 }
503 503
504 // 3.3.2 - 3-byte sequence with last byte missing (U+0000): "\xE0\x80" 504 // 3.3.2 - 3-byte sequence with last byte missing (U+0000): "\xE0\x80"
505 { 505 {
506 const char* src = "\xE0\x80"; 506 const char* src = "\xE0\x80";
507 uint32_t expected[] = { 0x0 }; 507 int32_t expected[] = { 0x0 };
508 uint32_t dst[ARRAY_SIZE(expected)]; 508 int32_t dst[ARRAY_SIZE(expected)];
509 memset(dst, 0xFF, sizeof(dst)); 509 memset(dst, 0xFF, sizeof(dst));
510 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 510 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
511 EXPECT(!is_valid); 511 EXPECT(!is_valid);
512 EXPECT(memcmp(expected, dst, sizeof(expected))); 512 EXPECT(memcmp(expected, dst, sizeof(expected)));
513 } 513 }
514 514
515 // 3.3.3 - 4-byte sequence with last byte missing (U+0000): "\xF0\x80\x80" 515 // 3.3.3 - 4-byte sequence with last byte missing (U+0000): "\xF0\x80\x80"
516 { 516 {
517 const char* src = "\xF0\x80\x80"; 517 const char* src = "\xF0\x80\x80";
518 uint32_t expected[] = { 0x0 }; 518 int32_t expected[] = { 0x0 };
519 uint32_t dst[ARRAY_SIZE(expected)]; 519 int32_t dst[ARRAY_SIZE(expected)];
520 memset(dst, 0xFF, sizeof(dst)); 520 memset(dst, 0xFF, sizeof(dst));
521 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 521 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
522 EXPECT(!is_valid); 522 EXPECT(!is_valid);
523 EXPECT(memcmp(expected, dst, sizeof(expected))); 523 EXPECT(memcmp(expected, dst, sizeof(expected)));
524 } 524 }
525 525
526 // 3.3.4 - 5-byte sequence with last byte missing (U+0000): "\xF8\x80\x80\x80" 526 // 3.3.4 - 5-byte sequence with last byte missing (U+0000): "\xF8\x80\x80\x80"
527 { 527 {
528 const char* src = "\xF8\x80\x80\x80"; 528 const char* src = "\xF8\x80\x80\x80";
529 uint32_t expected[] = { 0x0 }; 529 int32_t expected[] = { 0x0 };
530 uint32_t dst[ARRAY_SIZE(expected)]; 530 int32_t dst[ARRAY_SIZE(expected)];
531 memset(dst, 0xFF, sizeof(dst)); 531 memset(dst, 0xFF, sizeof(dst));
532 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 532 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
533 EXPECT(!is_valid); 533 EXPECT(!is_valid);
534 EXPECT(memcmp(expected, dst, sizeof(expected))); 534 EXPECT(memcmp(expected, dst, sizeof(expected)));
535 } 535 }
536 536
537 // 3.3.5 - 6-byte sequence with last byte missing (U+0000): 537 // 3.3.5 - 6-byte sequence with last byte missing (U+0000):
538 // "\xFC\x80\x80\x80\x80" 538 // "\xFC\x80\x80\x80\x80"
539 { 539 {
540 const char* src = "\xFC\x80\x80\x80\x80"; 540 const char* src = "\xFC\x80\x80\x80\x80";
541 uint32_t expected[] = { 0x0 }; 541 int32_t expected[] = { 0x0 };
542 uint32_t dst[ARRAY_SIZE(expected)]; 542 int32_t dst[ARRAY_SIZE(expected)];
543 memset(dst, 0xFF, sizeof(dst)); 543 memset(dst, 0xFF, sizeof(dst));
544 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 544 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
545 EXPECT(!is_valid); 545 EXPECT(!is_valid);
546 EXPECT(memcmp(expected, dst, sizeof(expected))); 546 EXPECT(memcmp(expected, dst, sizeof(expected)));
547 } 547 }
548 548
549 // 3.3.6 - 2-byte sequence with last byte missing (U-000007FF): "\xDF" 549 // 3.3.6 - 2-byte sequence with last byte missing (U-000007FF): "\xDF"
550 { 550 {
551 const char* src = "\xDF"; 551 const char* src = "\xDF";
552 uint32_t expected[] = { 0x0 }; 552 int32_t expected[] = { 0x0 };
553 uint32_t dst[ARRAY_SIZE(expected)]; 553 int32_t dst[ARRAY_SIZE(expected)];
554 memset(dst, 0xFF, sizeof(dst)); 554 memset(dst, 0xFF, sizeof(dst));
555 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 555 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
556 EXPECT(!is_valid); 556 EXPECT(!is_valid);
557 EXPECT(memcmp(expected, dst, sizeof(expected))); 557 EXPECT(memcmp(expected, dst, sizeof(expected)));
558 } 558 }
559 559
560 // 3.3.7 - 3-byte sequence with last byte missing (U-0000FFFF): "\xEF\xBF" 560 // 3.3.7 - 3-byte sequence with last byte missing (U-0000FFFF): "\xEF\xBF"
561 { 561 {
562 const char* src = "\xEF\xBF"; 562 const char* src = "\xEF\xBF";
563 uint32_t expected[] = { 0x0 }; 563 int32_t expected[] = { 0x0 };
564 uint32_t dst[ARRAY_SIZE(expected)]; 564 int32_t dst[ARRAY_SIZE(expected)];
565 memset(dst, 0xFF, sizeof(dst)); 565 memset(dst, 0xFF, sizeof(dst));
566 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 566 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
567 EXPECT(!is_valid); 567 EXPECT(!is_valid);
568 EXPECT(memcmp(expected, dst, sizeof(expected))); 568 EXPECT(memcmp(expected, dst, sizeof(expected)));
569 } 569 }
570 570
571 // 3.3.8 - 4-byte sequence with last byte missing (U-001FFFFF): "\xF7\xBF\xBF" 571 // 3.3.8 - 4-byte sequence with last byte missing (U-001FFFFF): "\xF7\xBF\xBF"
572 { 572 {
573 const char* src = "\xF7\xBF\xBF"; 573 const char* src = "\xF7\xBF\xBF";
574 uint32_t expected[] = { 0x0 }; 574 int32_t expected[] = { 0x0 };
575 uint32_t dst[ARRAY_SIZE(expected)]; 575 int32_t dst[ARRAY_SIZE(expected)];
576 memset(dst, 0xFF, sizeof(dst)); 576 memset(dst, 0xFF, sizeof(dst));
577 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 577 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
578 EXPECT(!is_valid); 578 EXPECT(!is_valid);
579 EXPECT(memcmp(expected, dst, sizeof(expected))); 579 EXPECT(memcmp(expected, dst, sizeof(expected)));
580 } 580 }
581 581
582 // 3.3.9 - 5-byte sequence with last byte missing (U-03FFFFFF): 582 // 3.3.9 - 5-byte sequence with last byte missing (U-03FFFFFF):
583 // "\xFB\xBF\xBF\xBF" 583 // "\xFB\xBF\xBF\xBF"
584 { 584 {
585 const char* src = "\xFB\xBF\xBF\xBF"; 585 const char* src = "\xFB\xBF\xBF\xBF";
586 uint32_t expected[] = { 0x0 }; 586 int32_t expected[] = { 0x0 };
587 uint32_t dst[ARRAY_SIZE(expected)]; 587 int32_t dst[ARRAY_SIZE(expected)];
588 memset(dst, 0xFF, sizeof(dst)); 588 memset(dst, 0xFF, sizeof(dst));
589 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 589 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
590 EXPECT(!is_valid); 590 EXPECT(!is_valid);
591 EXPECT(memcmp(expected, dst, sizeof(expected))); 591 EXPECT(memcmp(expected, dst, sizeof(expected)));
592 } 592 }
593 593
594 // 3.3.10 - 6-byte sequence with last byte missing (U-7FFFFFFF): 594 // 3.3.10 - 6-byte sequence with last byte missing (U-7FFFFFFF):
595 // "\xFD\xBF\xBF\xBF\xBF" 595 // "\xFD\xBF\xBF\xBF\xBF"
596 { 596 {
597 const char* src = "\xFD\xBF\xBF\xBF\xBF"; 597 const char* src = "\xFD\xBF\xBF\xBF\xBF";
598 uint32_t expected[] = { 0x0 }; 598 int32_t expected[] = { 0x0 };
599 uint32_t dst[ARRAY_SIZE(expected)]; 599 int32_t dst[ARRAY_SIZE(expected)];
600 memset(dst, 0xFF, sizeof(dst)); 600 memset(dst, 0xFF, sizeof(dst));
601 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 601 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
602 EXPECT(!is_valid); 602 EXPECT(!is_valid);
603 EXPECT(memcmp(expected, dst, sizeof(expected))); 603 EXPECT(memcmp(expected, dst, sizeof(expected)));
604 } 604 }
605 605
606 // 3.4 - Concatenation of incomplete sequences 606 // 3.4 - Concatenation of incomplete sequences
607 { 607 {
608 const char* src = "\xC0\xE0\x80\xF0\x80\x80" 608 const char* src = "\xC0\xE0\x80\xF0\x80\x80"
609 "\xF8\x80\x80\x80\xFC\x80" 609 "\xF8\x80\x80\x80\xFC\x80"
610 "\x80\x80\x80\xDF\xEF\xBF" 610 "\x80\x80\x80\xDF\xEF\xBF"
611 "\xF7\xBF\xBF\xFB\xBF\xBF" 611 "\xF7\xBF\xBF\xFB\xBF\xBF"
612 "\xBF\xFD\xBF\xBF\xBF\xBF"; 612 "\xBF\xFD\xBF\xBF\xBF\xBF";
613 uint32_t expected[] = { 0x0 }; 613 int32_t expected[] = { 0x0 };
614 uint32_t dst[ARRAY_SIZE(expected)]; 614 int32_t dst[ARRAY_SIZE(expected)];
615 for (size_t i = 0; i < strlen(src); ++i) { 615 for (size_t i = 0; i < strlen(src); ++i) {
616 for (size_t j = 1; j < (strlen(src) - i); ++j) { 616 for (size_t j = 1; j < (strlen(src) - i); ++j) {
617 memset(dst, 0xFF, sizeof(dst)); 617 memset(dst, 0xFF, sizeof(dst));
618 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], 618 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i],
619 dst, ARRAY_SIZE(dst)); 619 dst, ARRAY_SIZE(dst));
620 EXPECT(!is_valid); 620 EXPECT(!is_valid);
621 EXPECT(memcmp(expected, dst, sizeof(expected))); 621 EXPECT(memcmp(expected, dst, sizeof(expected)));
622 } 622 }
623 } 623 }
624 } 624 }
625 625
626 // 3.5 - Impossible bytes 626 // 3.5 - Impossible bytes
627 627
628 // 3.5.1 - fe = "\xFE" 628 // 3.5.1 - fe = "\xFE"
629 { 629 {
630 const char* src = "\xFE"; 630 const char* src = "\xFE";
631 uint32_t expected[] = { 0xFE }; 631 int32_t expected[] = { 0xFE };
632 uint32_t dst[ARRAY_SIZE(expected)]; 632 int32_t dst[ARRAY_SIZE(expected)];
633 memset(dst, 0, sizeof(dst)); 633 memset(dst, 0, sizeof(dst));
634 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 634 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
635 EXPECT(!is_valid); 635 EXPECT(!is_valid);
636 EXPECT(memcmp(expected, dst, sizeof(expected))); 636 EXPECT(memcmp(expected, dst, sizeof(expected)));
637 } 637 }
638 638
639 // 3.5.2 - ff = "\xFF" 639 // 3.5.2 - ff = "\xFF"
640 { 640 {
641 const char* src = "\xFF"; 641 const char* src = "\xFF";
642 uint32_t expected[] = { 0xFF }; 642 int32_t expected[] = { 0xFF };
643 uint32_t dst[ARRAY_SIZE(expected)]; 643 int32_t dst[ARRAY_SIZE(expected)];
644 memset(dst, 0, sizeof(dst)); 644 memset(dst, 0, sizeof(dst));
645 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 645 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
646 EXPECT(!is_valid); 646 EXPECT(!is_valid);
647 EXPECT(memcmp(expected, dst, sizeof(expected))); 647 EXPECT(memcmp(expected, dst, sizeof(expected)));
648 } 648 }
649 649
650 // 3.5.3 - fe fe ff ff = "\xFE\xFE\xFF\xFF" 650 // 3.5.3 - fe fe ff ff = "\xFE\xFE\xFF\xFF"
651 { 651 {
652 const char* src = "\xFE\xFE\xFF\xFF"; 652 const char* src = "\xFE\xFE\xFF\xFF";
653 uint32_t expected[] = { 0xFF }; 653 int32_t expected[] = { 0xFF };
654 uint32_t dst[ARRAY_SIZE(expected)]; 654 int32_t dst[ARRAY_SIZE(expected)];
655 memset(dst, 0, sizeof(dst)); 655 memset(dst, 0, sizeof(dst));
656 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 656 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
657 EXPECT(!is_valid); 657 EXPECT(!is_valid);
658 EXPECT(memcmp(expected, dst, sizeof(expected))); 658 EXPECT(memcmp(expected, dst, sizeof(expected)));
659 } 659 }
660 660
661 // 4 - Overlong sequences 661 // 4 - Overlong sequences
662 662
663 // 4.1 - Examples of an overlong ASCII character 663 // 4.1 - Examples of an overlong ASCII character
664 664
665 // 4.1.1 - U+002F = c0 af = "\xC0\xAF" 665 // 4.1.1 - U+002F = c0 af = "\xC0\xAF"
666 { 666 {
667 const char* src = "\xC0\xAF"; 667 const char* src = "\xC0\xAF";
668 uint32_t expected[] = { 0x2F }; 668 int32_t expected[] = { 0x2F };
669 uint32_t dst[ARRAY_SIZE(expected)]; 669 int32_t dst[ARRAY_SIZE(expected)];
670 memset(dst, 0, sizeof(dst)); 670 memset(dst, 0, sizeof(dst));
671 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 671 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
672 EXPECT(!is_valid); 672 EXPECT(!is_valid);
673 EXPECT(memcmp(expected, dst, sizeof(expected))); 673 EXPECT(memcmp(expected, dst, sizeof(expected)));
674 } 674 }
675 675
676 // 4.1.2 - U+002F = e0 80 af = "\xE0\x80\xAF" 676 // 4.1.2 - U+002F = e0 80 af = "\xE0\x80\xAF"
677 { 677 {
678 const char* src = "\xE0\x80\xAF"; 678 const char* src = "\xE0\x80\xAF";
679 uint32_t expected[] = { 0x2F }; 679 int32_t expected[] = { 0x2F };
680 uint32_t dst[ARRAY_SIZE(expected)]; 680 int32_t dst[ARRAY_SIZE(expected)];
681 memset(dst, 0, sizeof(dst)); 681 memset(dst, 0, sizeof(dst));
682 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 682 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
683 EXPECT(!is_valid); 683 EXPECT(!is_valid);
684 EXPECT(memcmp(expected, dst, sizeof(expected))); 684 EXPECT(memcmp(expected, dst, sizeof(expected)));
685 } 685 }
686 686
687 // 4.1.3 - U+002F = f0 80 80 af = "\xF0\x80\x80\xAF" 687 // 4.1.3 - U+002F = f0 80 80 af = "\xF0\x80\x80\xAF"
688 { 688 {
689 const char* src = "\xF0\x80\x80\xAF"; 689 const char* src = "\xF0\x80\x80\xAF";
690 uint32_t expected[] = { 0x2F }; 690 int32_t expected[] = { 0x2F };
691 uint32_t dst[ARRAY_SIZE(expected)]; 691 int32_t dst[ARRAY_SIZE(expected)];
692 memset(dst, 0, sizeof(dst)); 692 memset(dst, 0, sizeof(dst));
693 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 693 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
694 EXPECT(!is_valid); 694 EXPECT(!is_valid);
695 EXPECT(memcmp(expected, dst, sizeof(expected))); 695 EXPECT(memcmp(expected, dst, sizeof(expected)));
696 } 696 }
697 697
698 // 4.1.4 - U+002F = f8 80 80 80 af = "\xF8\x80\x80\x80\xAF" 698 // 4.1.4 - U+002F = f8 80 80 80 af = "\xF8\x80\x80\x80\xAF"
699 { 699 {
700 const char* src = "\xF8\x80\x80\x80\xAF"; 700 const char* src = "\xF8\x80\x80\x80\xAF";
701 uint32_t expected[] = { 0x2F }; 701 int32_t expected[] = { 0x2F };
702 uint32_t dst[ARRAY_SIZE(expected)]; 702 int32_t dst[ARRAY_SIZE(expected)];
703 memset(dst, 0, sizeof(dst)); 703 memset(dst, 0, sizeof(dst));
704 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 704 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
705 EXPECT(!is_valid); 705 EXPECT(!is_valid);
706 EXPECT(memcmp(expected, dst, sizeof(expected))); 706 EXPECT(memcmp(expected, dst, sizeof(expected)));
707 } 707 }
708 708
709 // 4.1.5 - U+002F = fc 80 80 80 80 af = "\xFC\x80\x80\x80\x80\xAF" 709 // 4.1.5 - U+002F = fc 80 80 80 80 af = "\xFC\x80\x80\x80\x80\xAF"
710 { 710 {
711 const char* src = "\xFC\x80\x80\x80\x80\xAF"; 711 const char* src = "\xFC\x80\x80\x80\x80\xAF";
712 uint32_t expected[] = { 0x2F }; 712 int32_t expected[] = { 0x2F };
713 uint32_t dst[ARRAY_SIZE(expected)]; 713 int32_t dst[ARRAY_SIZE(expected)];
714 memset(dst, 0, sizeof(dst)); 714 memset(dst, 0, sizeof(dst));
715 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 715 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
716 EXPECT(!is_valid); 716 EXPECT(!is_valid);
717 EXPECT(memcmp(expected, dst, sizeof(expected))); 717 EXPECT(memcmp(expected, dst, sizeof(expected)));
718 } 718 }
719 719
720 // 4.2 Maximum overlong sequences 720 // 4.2 Maximum overlong sequences
721 721
722 // 4.2.1 - U-0000007F = c1 bf = "\xC1\xBF" 722 // 4.2.1 - U-0000007F = c1 bf = "\xC1\xBF"
723 { 723 {
724 const char* src = "\xC1\xBF"; 724 const char* src = "\xC1\xBF";
725 uint32_t expected[] = { 0x7F }; 725 int32_t expected[] = { 0x7F };
726 uint32_t dst[ARRAY_SIZE(expected)]; 726 int32_t dst[ARRAY_SIZE(expected)];
727 memset(dst, 0, sizeof(dst)); 727 memset(dst, 0, sizeof(dst));
728 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 728 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
729 EXPECT(!is_valid); 729 EXPECT(!is_valid);
730 EXPECT(memcmp(expected, dst, sizeof(expected))); 730 EXPECT(memcmp(expected, dst, sizeof(expected)));
731 } 731 }
732 732
733 // 4.2.2 U+000007FF = e0 9f bf = "\xE0\x9F\xBF" 733 // 4.2.2 U+000007FF = e0 9f bf = "\xE0\x9F\xBF"
734 { 734 {
735 const char* src = "\xE0\x9F\xBF"; 735 const char* src = "\xE0\x9F\xBF";
736 uint32_t expected[] = { 0x7FF }; 736 int32_t expected[] = { 0x7FF };
737 uint32_t dst[ARRAY_SIZE(expected)]; 737 int32_t dst[ARRAY_SIZE(expected)];
738 memset(dst, 0, sizeof(dst)); 738 memset(dst, 0, sizeof(dst));
739 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 739 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
740 EXPECT(!is_valid); 740 EXPECT(!is_valid);
741 EXPECT(memcmp(expected, dst, sizeof(expected))); 741 EXPECT(memcmp(expected, dst, sizeof(expected)));
742 } 742 }
743 743
744 // 4.2.3 - U+0000FFFF = f0 8f bf bf = "\xF0\x8F\xBF\xBF" 744 // 4.2.3 - U+0000FFFF = f0 8f bf bf = "\xF0\x8F\xBF\xBF"
745 { 745 {
746 const char* src = "\xF0\x8F\xBF\xBF"; 746 const char* src = "\xF0\x8F\xBF\xBF";
747 uint32_t expected[] = { 0xFFFF }; 747 int32_t expected[] = { 0xFFFF };
748 uint32_t dst[ARRAY_SIZE(expected)]; 748 int32_t dst[ARRAY_SIZE(expected)];
749 memset(dst, 0, sizeof(dst)); 749 memset(dst, 0, sizeof(dst));
750 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 750 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
751 EXPECT(!is_valid); 751 EXPECT(!is_valid);
752 EXPECT(memcmp(expected, dst, sizeof(expected))); 752 EXPECT(memcmp(expected, dst, sizeof(expected)));
753 } 753 }
754 754
755 // 4.2.4 U-001FFFFF = f8 87 bf bf bf = "\xF8\x87\xBF\xBF\xBF" 755 // 4.2.4 U-001FFFFF = f8 87 bf bf bf = "\xF8\x87\xBF\xBF\xBF"
756 { 756 {
757 const char* src = "\xF8\x87\xBF\xBF\xBF"; 757 const char* src = "\xF8\x87\xBF\xBF\xBF";
758 uint32_t expected[] = { 0x1FFFFF }; 758 int32_t expected[] = { 0x1FFFFF };
759 uint32_t dst[ARRAY_SIZE(expected)]; 759 int32_t dst[ARRAY_SIZE(expected)];
760 memset(dst, 0, sizeof(dst)); 760 memset(dst, 0, sizeof(dst));
761 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 761 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
762 EXPECT(!is_valid); 762 EXPECT(!is_valid);
763 EXPECT(memcmp(expected, dst, sizeof(expected))); 763 EXPECT(memcmp(expected, dst, sizeof(expected)));
764 } 764 }
765 765
766 // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "\xFC\x83\xBF\xBF\xBF\xBF" 766 // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "\xFC\x83\xBF\xBF\xBF\xBF"
767 { 767 {
768 const char* src = "\xFC\x83\xBF\xBF\xBF\xBF"; 768 const char* src = "\xFC\x83\xBF\xBF\xBF\xBF";
769 uint32_t expected[] = { 0x3FFFFFF }; 769 int32_t expected[] = { 0x3FFFFFF };
770 uint32_t dst[ARRAY_SIZE(expected)]; 770 int32_t dst[ARRAY_SIZE(expected)];
771 memset(dst, 0, sizeof(dst)); 771 memset(dst, 0, sizeof(dst));
772 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 772 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
773 EXPECT(!is_valid); 773 EXPECT(!is_valid);
774 EXPECT(memcmp(expected, dst, sizeof(expected))); 774 EXPECT(memcmp(expected, dst, sizeof(expected)));
775 } 775 }
776 776
777 // 4.3 - Overlong representation of the NUL character 777 // 4.3 - Overlong representation of the NUL character
778 778
779 // 4.3.1 - U+0000 = "\xC0\x80" 779 // 4.3.1 - U+0000 = "\xC0\x80"
780 { 780 {
781 const char* src = "\xC0\x80"; 781 const char* src = "\xC0\x80";
782 uint32_t expected[] = { 0x0 }; 782 int32_t expected[] = { 0x0 };
783 uint32_t dst[ARRAY_SIZE(expected)]; 783 int32_t dst[ARRAY_SIZE(expected)];
784 memset(dst, 0xFF, sizeof(dst)); 784 memset(dst, 0xFF, sizeof(dst));
785 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 785 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
786 EXPECT(!is_valid); 786 EXPECT(!is_valid);
787 EXPECT(memcmp(expected, dst, sizeof(expected))); 787 EXPECT(memcmp(expected, dst, sizeof(expected)));
788 } 788 }
789 789
790 // 4.3.2 U+0000 = e0 80 80 = "\xE0\x80\x80" 790 // 4.3.2 U+0000 = e0 80 80 = "\xE0\x80\x80"
791 { 791 {
792 const char* src = "\xE0\x80\x80"; 792 const char* src = "\xE0\x80\x80";
793 uint32_t expected[] = { 0x0 }; 793 int32_t expected[] = { 0x0 };
794 uint32_t dst[ARRAY_SIZE(expected)]; 794 int32_t dst[ARRAY_SIZE(expected)];
795 memset(dst, 0xFF, sizeof(dst)); 795 memset(dst, 0xFF, sizeof(dst));
796 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 796 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
797 EXPECT(!is_valid); 797 EXPECT(!is_valid);
798 EXPECT(memcmp(expected, dst, sizeof(expected))); 798 EXPECT(memcmp(expected, dst, sizeof(expected)));
799 } 799 }
800 800
801 // 4.3.3 U+0000 = f0 80 80 80 = "\xF0\x80\x80\x80" 801 // 4.3.3 U+0000 = f0 80 80 80 = "\xF0\x80\x80\x80"
802 { 802 {
803 const char* src = "\xF0\x80\x80\x80"; 803 const char* src = "\xF0\x80\x80\x80";
804 uint32_t expected[] = { 0x0 }; 804 int32_t expected[] = { 0x0 };
805 uint32_t dst[ARRAY_SIZE(expected)]; 805 int32_t dst[ARRAY_SIZE(expected)];
806 memset(dst, 0xFF, sizeof(dst)); 806 memset(dst, 0xFF, sizeof(dst));
807 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 807 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
808 EXPECT(!is_valid); 808 EXPECT(!is_valid);
809 EXPECT(memcmp(expected, dst, sizeof(expected))); 809 EXPECT(memcmp(expected, dst, sizeof(expected)));
810 } 810 }
811 811
812 // 4.3.4 U+0000 = f8 80 80 80 80 = "\xF8\x80\x80\x80\x80" 812 // 4.3.4 U+0000 = f8 80 80 80 80 = "\xF8\x80\x80\x80\x80"
813 { 813 {
814 const char* src = "\xF8\x80\x80\x80\x80"; 814 const char* src = "\xF8\x80\x80\x80\x80";
815 uint32_t expected[] = { 0x0 }; 815 int32_t expected[] = { 0x0 };
816 uint32_t dst[ARRAY_SIZE(expected)]; 816 int32_t dst[ARRAY_SIZE(expected)];
817 memset(dst, 0xFF, sizeof(dst)); 817 memset(dst, 0xFF, sizeof(dst));
818 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 818 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
819 EXPECT(!is_valid); 819 EXPECT(!is_valid);
820 EXPECT(memcmp(expected, dst, sizeof(expected))); 820 EXPECT(memcmp(expected, dst, sizeof(expected)));
821 } 821 }
822 822
823 // 4.3.5 U+0000 = fc 80 80 80 80 80 = "\xFC\x80\x80\x80\x80\x80" 823 // 4.3.5 U+0000 = fc 80 80 80 80 80 = "\xFC\x80\x80\x80\x80\x80"
824 { 824 {
825 const char* src = "\xFC\x80\x80\x80\x80\x80"; 825 const char* src = "\xFC\x80\x80\x80\x80\x80";
826 uint32_t expected[] = { 0x0 }; 826 int32_t expected[] = { 0x0 };
827 uint32_t dst[ARRAY_SIZE(expected)]; 827 int32_t dst[ARRAY_SIZE(expected)];
828 memset(dst, 0xFF, sizeof(dst)); 828 memset(dst, 0xFF, sizeof(dst));
829 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 829 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
830 EXPECT(!is_valid); 830 EXPECT(!is_valid);
831 EXPECT(memcmp(expected, dst, sizeof(expected))); 831 EXPECT(memcmp(expected, dst, sizeof(expected)));
832 } 832 }
833 833
834 // 5.1 - Single UTF-16 surrogates 834 // 5.1 - Single UTF-16 surrogates
835 835
836 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80" 836 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80"
837 { 837 {
838 const char* src = "\xED\xA0\x80"; 838 const char* src = "\xED\xA0\x80";
839 uint32_t expected[] = { 0xD800 }; 839 int32_t expected[] = { 0xD800 };
840 uint32_t dst[ARRAY_SIZE(expected)]; 840 int32_t dst[ARRAY_SIZE(expected)];
841 memset(dst, 0, sizeof(dst)); 841 memset(dst, 0, sizeof(dst));
842 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 842 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
843 EXPECT(!is_valid); 843 EXPECT(!is_valid);
844 EXPECT(memcmp(expected, dst, sizeof(expected))); 844 EXPECT(memcmp(expected, dst, sizeof(expected)));
845 } 845 }
846 846
847 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF" 847 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF"
848 { 848 {
849 const char* src = "\xED\xAD\xBF"; 849 const char* src = "\xED\xAD\xBF";
850 uint32_t expected[] = { 0xDB7F }; 850 int32_t expected[] = { 0xDB7F };
851 uint32_t dst[ARRAY_SIZE(expected)]; 851 int32_t dst[ARRAY_SIZE(expected)];
852 memset(dst, 0, sizeof(dst)); 852 memset(dst, 0, sizeof(dst));
853 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 853 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
854 EXPECT(!is_valid); 854 EXPECT(!is_valid);
855 EXPECT(memcmp(expected, dst, sizeof(expected))); 855 EXPECT(memcmp(expected, dst, sizeof(expected)));
856 } 856 }
857 857
858 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80" 858 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80"
859 { 859 {
860 const char* src = "\xED\xAE\x80"; 860 const char* src = "\xED\xAE\x80";
861 uint32_t expected[] = { 0xDB80 }; 861 int32_t expected[] = { 0xDB80 };
862 uint32_t dst[ARRAY_SIZE(expected)]; 862 int32_t dst[ARRAY_SIZE(expected)];
863 memset(dst, 0, sizeof(dst)); 863 memset(dst, 0, sizeof(dst));
864 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 864 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
865 EXPECT(!is_valid); 865 EXPECT(!is_valid);
866 EXPECT(memcmp(expected, dst, sizeof(expected))); 866 EXPECT(memcmp(expected, dst, sizeof(expected)));
867 } 867 }
868 868
869 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF" 869 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF"
870 { 870 {
871 const char* src = "\xED\xAF\xBF"; 871 const char* src = "\xED\xAF\xBF";
872 uint32_t expected[] = { 0xDBFF }; 872 int32_t expected[] = { 0xDBFF };
873 uint32_t dst[ARRAY_SIZE(expected)]; 873 int32_t dst[ARRAY_SIZE(expected)];
874 memset(dst, 0, sizeof(dst)); 874 memset(dst, 0, sizeof(dst));
875 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 875 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
876 EXPECT(!is_valid); 876 EXPECT(!is_valid);
877 EXPECT(memcmp(expected, dst, sizeof(expected))); 877 EXPECT(memcmp(expected, dst, sizeof(expected)));
878 } 878 }
879 879
880 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80" 880 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80"
881 { 881 {
882 const char* src = "\xED\xB0\x80"; 882 const char* src = "\xED\xB0\x80";
883 uint32_t expected[] = { 0xDC00 }; 883 int32_t expected[] = { 0xDC00 };
884 uint32_t dst[ARRAY_SIZE(expected)]; 884 int32_t dst[ARRAY_SIZE(expected)];
885 memset(dst, 0, sizeof(dst)); 885 memset(dst, 0, sizeof(dst));
886 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 886 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
887 EXPECT(!is_valid); 887 EXPECT(!is_valid);
888 EXPECT(memcmp(expected, dst, sizeof(expected))); 888 EXPECT(memcmp(expected, dst, sizeof(expected)));
889 } 889 }
890 890
891 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80" 891 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80"
892 { 892 {
893 const char* src = "\xED\xBE\x80"; 893 const char* src = "\xED\xBE\x80";
894 uint32_t expected[] = { 0xDF80 }; 894 int32_t expected[] = { 0xDF80 };
895 uint32_t dst[ARRAY_SIZE(expected)]; 895 int32_t dst[ARRAY_SIZE(expected)];
896 memset(dst, 0, sizeof(dst)); 896 memset(dst, 0, sizeof(dst));
897 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 897 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
898 EXPECT(!is_valid); 898 EXPECT(!is_valid);
899 EXPECT(memcmp(expected, dst, sizeof(expected))); 899 EXPECT(memcmp(expected, dst, sizeof(expected)));
900 } 900 }
901 901
902 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF" 902 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF"
903 { 903 {
904 const char* src = "\xED\xBF\xBF"; 904 const char* src = "\xED\xBF\xBF";
905 uint32_t expected[] = { 0xDFFF }; 905 int32_t expected[] = { 0xDFFF };
906 uint32_t dst[ARRAY_SIZE(expected)]; 906 int32_t dst[ARRAY_SIZE(expected)];
907 memset(dst, 0, sizeof(dst)); 907 memset(dst, 0, sizeof(dst));
908 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 908 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
909 EXPECT(!is_valid); 909 EXPECT(!is_valid);
910 EXPECT(memcmp(expected, dst, sizeof(expected))); 910 EXPECT(memcmp(expected, dst, sizeof(expected)));
911 } 911 }
912 912
913 // 5.2 Paired UTF-16 surrogates 913 // 5.2 Paired UTF-16 surrogates
914 914
915 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80" 915 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80"
916 { 916 {
917 const char* src = "\xED\xA0\x80\xED\xB0\x80"; 917 const char* src = "\xED\xA0\x80\xED\xB0\x80";
918 uint32_t expected[] = { 0xD800, 0xDC00 }; 918 int32_t expected[] = { 0xD800, 0xDC00 };
919 uint32_t dst[ARRAY_SIZE(expected)]; 919 int32_t dst[ARRAY_SIZE(expected)];
920 memset(dst, 0, sizeof(dst)); 920 memset(dst, 0, sizeof(dst));
921 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 921 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
922 EXPECT(!is_valid); 922 EXPECT(!is_valid);
923 EXPECT(memcmp(expected, dst, sizeof(expected))); 923 EXPECT(memcmp(expected, dst, sizeof(expected)));
924 } 924 }
925 925
926 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF" 926 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF"
927 { 927 {
928 const char* src = "\xED\xA0\x80\xED\xBF\xBF"; 928 const char* src = "\xED\xA0\x80\xED\xBF\xBF";
929 uint32_t expected[] = { 0xD800, 0xDFFF }; 929 int32_t expected[] = { 0xD800, 0xDFFF };
930 uint32_t dst[ARRAY_SIZE(expected)]; 930 int32_t dst[ARRAY_SIZE(expected)];
931 memset(dst, 0, sizeof(dst)); 931 memset(dst, 0, sizeof(dst));
932 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 932 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
933 EXPECT(!is_valid); 933 EXPECT(!is_valid);
934 EXPECT(memcmp(expected, dst, sizeof(expected))); 934 EXPECT(memcmp(expected, dst, sizeof(expected)));
935 } 935 }
936 936
937 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80" 937 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80"
938 { 938 {
939 const char* src = "\xED\xAD\xBF\xED\xB0\x80"; 939 const char* src = "\xED\xAD\xBF\xED\xB0\x80";
940 uint32_t expected[] = { 0xDB7F, 0xDC00 }; 940 int32_t expected[] = { 0xDB7F, 0xDC00 };
941 uint32_t dst[ARRAY_SIZE(expected)]; 941 int32_t dst[ARRAY_SIZE(expected)];
942 memset(dst, 0, sizeof(dst)); 942 memset(dst, 0, sizeof(dst));
943 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 943 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
944 EXPECT(!is_valid); 944 EXPECT(!is_valid);
945 EXPECT(memcmp(expected, dst, sizeof(expected))); 945 EXPECT(memcmp(expected, dst, sizeof(expected)));
946 } 946 }
947 947
948 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF" 948 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF"
949 { 949 {
950 const char* src = "\xED\xAD\xBF\xED\xBF\xBF"; 950 const char* src = "\xED\xAD\xBF\xED\xBF\xBF";
951 uint32_t expected[] = { 0xDB7F, 0xDFFF }; 951 int32_t expected[] = { 0xDB7F, 0xDFFF };
952 uint32_t dst[ARRAY_SIZE(expected)]; 952 int32_t dst[ARRAY_SIZE(expected)];
953 memset(dst, 0, sizeof(dst)); 953 memset(dst, 0, sizeof(dst));
954 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 954 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
955 EXPECT(!is_valid); 955 EXPECT(!is_valid);
956 EXPECT(memcmp(expected, dst, sizeof(expected))); 956 EXPECT(memcmp(expected, dst, sizeof(expected)));
957 } 957 }
958 958
959 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80" 959 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80"
960 { 960 {
961 const char* src = "\xED\xAE\x80\xED\xB0\x80"; 961 const char* src = "\xED\xAE\x80\xED\xB0\x80";
962 uint32_t expected[] = { 0xDB80, 0xDC00 }; 962 int32_t expected[] = { 0xDB80, 0xDC00 };
963 uint32_t dst[ARRAY_SIZE(expected)]; 963 int32_t dst[ARRAY_SIZE(expected)];
964 memset(dst, 0, sizeof(dst)); 964 memset(dst, 0, sizeof(dst));
965 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 965 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
966 EXPECT(!is_valid); 966 EXPECT(!is_valid);
967 EXPECT(memcmp(expected, dst, sizeof(expected))); 967 EXPECT(memcmp(expected, dst, sizeof(expected)));
968 } 968 }
969 969
970 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF" 970 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF"
971 { 971 {
972 const char* src = "\xED\xAE\x80\xED\xBF\xBF"; 972 const char* src = "\xED\xAE\x80\xED\xBF\xBF";
973 uint32_t expected[] = { 0xDB80, 0xDFFF }; 973 int32_t expected[] = { 0xDB80, 0xDFFF };
974 uint32_t dst[ARRAY_SIZE(expected)]; 974 int32_t dst[ARRAY_SIZE(expected)];
975 memset(dst, 0, sizeof(dst)); 975 memset(dst, 0, sizeof(dst));
976 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 976 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
977 EXPECT(!is_valid); 977 EXPECT(!is_valid);
978 EXPECT(memcmp(expected, dst, sizeof(expected))); 978 EXPECT(memcmp(expected, dst, sizeof(expected)));
979 } 979 }
980 980
981 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80" 981 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80"
982 { 982 {
983 const char* src = "\xED\xAF\xBF\xED\xB0\x80"; 983 const char* src = "\xED\xAF\xBF\xED\xB0\x80";
984 uint32_t expected[] = { 0xDBFF, 0xDC00 }; 984 int32_t expected[] = { 0xDBFF, 0xDC00 };
985 uint32_t dst[ARRAY_SIZE(expected)]; 985 int32_t dst[ARRAY_SIZE(expected)];
986 memset(dst, 0, sizeof(dst)); 986 memset(dst, 0, sizeof(dst));
987 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 987 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
988 EXPECT(!is_valid); 988 EXPECT(!is_valid);
989 EXPECT(memcmp(expected, dst, sizeof(expected))); 989 EXPECT(memcmp(expected, dst, sizeof(expected)));
990 } 990 }
991 991
992 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF" 992 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF"
993 { 993 {
994 const char* src = "\xED\xAF\xBF\xED\xBF\xBF"; 994 const char* src = "\xED\xAF\xBF\xED\xBF\xBF";
995 uint32_t expected[] = { 0xDBFF, 0xDFFF }; 995 int32_t expected[] = { 0xDBFF, 0xDFFF };
996 uint32_t dst[ARRAY_SIZE(expected)]; 996 int32_t dst[ARRAY_SIZE(expected)];
997 memset(dst, 0, sizeof(dst)); 997 memset(dst, 0, sizeof(dst));
998 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 998 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
999 EXPECT(!is_valid); 999 EXPECT(!is_valid);
1000 EXPECT(memcmp(expected, dst, sizeof(expected))); 1000 EXPECT(memcmp(expected, dst, sizeof(expected)));
1001 } 1001 }
1002 1002
1003 // 5.3 - Other illegal code positions 1003 // 5.3 - Other illegal code positions
1004 1004
1005 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE" 1005 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE"
1006 { 1006 {
1007 const char* src = "\xEF\xBF\xBE"; 1007 const char* src = "\xEF\xBF\xBE";
1008 uint32_t expected[] = { 0xFFFE }; 1008 int32_t expected[] = { 0xFFFE };
1009 uint32_t dst[ARRAY_SIZE(expected)]; 1009 int32_t dst[ARRAY_SIZE(expected)];
1010 memset(dst, 0, sizeof(dst)); 1010 memset(dst, 0, sizeof(dst));
1011 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 1011 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
1012 EXPECT(is_valid); 1012 EXPECT(is_valid);
1013 EXPECT(!memcmp(expected, dst, sizeof(expected))); 1013 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1014 } 1014 }
1015 1015
1016 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF" 1016 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF"
1017 { 1017 {
1018 const char* src = "\xEF\xBF\xBF"; 1018 const char* src = "\xEF\xBF\xBF";
1019 uint32_t expected[] = { 0xFFFF }; 1019 int32_t expected[] = { 0xFFFF };
1020 uint32_t dst[ARRAY_SIZE(expected)]; 1020 int32_t dst[ARRAY_SIZE(expected)];
1021 memset(dst, 0, sizeof(dst)); 1021 memset(dst, 0, sizeof(dst));
1022 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 1022 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
1023 EXPECT(is_valid); 1023 EXPECT(is_valid);
1024 EXPECT(!memcmp(expected, dst, sizeof(expected))); 1024 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1025 } 1025 }
1026 } 1026 }
1027 1027
1028 } // namespace dart 1028 } // namespace dart
OLDNEW
« no previous file with comments | « runtime/vm/unicode.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698