Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(83)

Side by Side Diff: runtime/vm/unicode_test.cc

Issue 2481873005: clang-format runtime/vm (Closed)
Patch Set: Merge Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « runtime/vm/unicode_data.cc ('k') | runtime/vm/unit_test.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/globals.h" 5 #include "vm/globals.h"
6 #include "vm/unicode.h" 6 #include "vm/unicode.h"
7 #include "vm/unit_test.h" 7 #include "vm/unit_test.h"
8 8
9 namespace dart { 9 namespace dart {
10 10
11 TEST_CASE(Utf8Decode) { 11 TEST_CASE(Utf8Decode) {
12 // Examples from the Unicode specification, chapter 3 12 // Examples from the Unicode specification, chapter 3
13 { 13 {
14 const char* src = "\x41\xC3\xB1\x42"; 14 const char* src = "\x41\xC3\xB1\x42";
15 int32_t expected[] = { 0x41, 0xF1, 0x42 }; 15 int32_t expected[] = {0x41, 0xF1, 0x42};
16 int32_t dst[ARRAY_SIZE(expected)]; 16 int32_t dst[ARRAY_SIZE(expected)];
17 memset(dst, 0, sizeof(dst)); 17 memset(dst, 0, sizeof(dst));
18 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 18 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
19 EXPECT(is_valid); 19 EXPECT(is_valid);
20 EXPECT(!memcmp(expected, dst, sizeof(expected))); 20 EXPECT(!memcmp(expected, dst, sizeof(expected)));
21 } 21 }
22 22
23 { 23 {
24 const char* src = "\x4D"; 24 const char* src = "\x4D";
25 int32_t expected[] = { 0x4D }; 25 int32_t expected[] = {0x4D};
26 int32_t dst[ARRAY_SIZE(expected)]; 26 int32_t dst[ARRAY_SIZE(expected)];
27 memset(dst, 0, sizeof(dst)); 27 memset(dst, 0, sizeof(dst));
28 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 28 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
29 EXPECT(is_valid); 29 EXPECT(is_valid);
30 EXPECT(!memcmp(expected, dst, sizeof(expected))); 30 EXPECT(!memcmp(expected, dst, sizeof(expected)));
31 } 31 }
32 32
33 { 33 {
34 const char* src = "\xD0\xB0"; 34 const char* src = "\xD0\xB0";
35 int32_t expected[] = { 0x430 }; 35 int32_t expected[] = {0x430};
36 int32_t dst[ARRAY_SIZE(expected)]; 36 int32_t dst[ARRAY_SIZE(expected)];
37 memset(dst, 0, sizeof(dst)); 37 memset(dst, 0, sizeof(dst));
38 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 38 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
39 EXPECT(is_valid); 39 EXPECT(is_valid);
40 EXPECT(!memcmp(expected, dst, sizeof(expected))); 40 EXPECT(!memcmp(expected, dst, sizeof(expected)));
41 } 41 }
42 42
43 { 43 {
44 const char* src = "\xE4\xBA\x8C"; 44 const char* src = "\xE4\xBA\x8C";
45 int32_t expected[] = { 0x4E8C }; 45 int32_t expected[] = {0x4E8C};
46 int32_t dst[ARRAY_SIZE(expected)]; 46 int32_t dst[ARRAY_SIZE(expected)];
47 memset(dst, 0, sizeof(dst)); 47 memset(dst, 0, sizeof(dst));
48 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 48 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
49 EXPECT(is_valid); 49 EXPECT(is_valid);
50 EXPECT(!memcmp(expected, dst, sizeof(expected))); 50 EXPECT(!memcmp(expected, dst, sizeof(expected)));
51 } 51 }
52 52
53 { 53 {
54 const char* src = "\xF0\x90\x8C\x82"; 54 const char* src = "\xF0\x90\x8C\x82";
55 int32_t expected[] = { 0x10302 }; 55 int32_t expected[] = {0x10302};
56 int32_t dst[ARRAY_SIZE(expected)]; 56 int32_t dst[ARRAY_SIZE(expected)];
57 memset(dst, 0, sizeof(dst)); 57 memset(dst, 0, sizeof(dst));
58 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 58 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
59 EXPECT(is_valid); 59 EXPECT(is_valid);
60 EXPECT(!memcmp(expected, dst, sizeof(expected))); 60 EXPECT(!memcmp(expected, dst, sizeof(expected)));
61 } 61 }
62 62
63 { 63 {
64 const char* src = "\x4D\xD0\xB0\xE4\xBA\x8C\xF0\x90\x8C\x82"; 64 const char* src = "\x4D\xD0\xB0\xE4\xBA\x8C\xF0\x90\x8C\x82";
65 int32_t expected[] = { 0x4D, 0x430, 0x4E8C, 0x10302 }; 65 int32_t expected[] = {0x4D, 0x430, 0x4E8C, 0x10302};
66 int32_t dst[ARRAY_SIZE(expected)]; 66 int32_t dst[ARRAY_SIZE(expected)];
67 memset(dst, 0, sizeof(dst)); 67 memset(dst, 0, sizeof(dst));
68 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 68 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
69 EXPECT(is_valid); 69 EXPECT(is_valid);
70 EXPECT(!memcmp(expected, dst, sizeof(expected))); 70 EXPECT(!memcmp(expected, dst, sizeof(expected)));
71 } 71 }
72 72
73 // Mixture of non-ASCII and ASCII characters 73 // Mixture of non-ASCII and ASCII characters
74 { 74 {
75 const char* src = "\xD7\x92\xD7\x9C\xD7\xA2\xD7\x93" 75 const char* src =
76 "\x20" 76 "\xD7\x92\xD7\x9C\xD7\xA2\xD7\x93"
77 "\xD7\x91\xD7\xA8\xD7\x9B\xD7\x94"; 77 "\x20"
78 int32_t expected[] = { 0x5D2, 0x5DC, 0x5E2, 0x5D3, 78 "\xD7\x91\xD7\xA8\xD7\x9B\xD7\x94";
79 0x20, 79 int32_t expected[] = {0x5D2, 0x5DC, 0x5E2, 0x5D3, 0x20,
80 0x5D1, 0x5E8, 0x5DB, 0x5D4 }; 80 0x5D1, 0x5E8, 0x5DB, 0x5D4};
81 int32_t dst[ARRAY_SIZE(expected)]; 81 int32_t dst[ARRAY_SIZE(expected)];
82 memset(dst, 0, sizeof(dst)); 82 memset(dst, 0, sizeof(dst));
83 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 83 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
84 EXPECT(is_valid); 84 EXPECT(is_valid);
85 EXPECT(!memcmp(expected, dst, sizeof(expected))); 85 EXPECT(!memcmp(expected, dst, sizeof(expected)));
86 } 86 }
87 87
88 // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt 88 // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
89 89
90 // 1 - Some correct UTF-8 text 90 // 1 - Some correct UTF-8 text
91 { 91 {
92 const char* src = "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5"; 92 const char* src = "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5";
93 int32_t expected[] = { 0x3BA, 0x1F79, 0x3C3, 0x3BC, 0x3B5 }; 93 int32_t expected[] = {0x3BA, 0x1F79, 0x3C3, 0x3BC, 0x3B5};
94 int32_t dst[ARRAY_SIZE(expected)]; 94 int32_t dst[ARRAY_SIZE(expected)];
95 memset(dst, 0, sizeof(dst)); 95 memset(dst, 0, sizeof(dst));
96 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 96 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
97 EXPECT(is_valid); 97 EXPECT(is_valid);
98 EXPECT(!memcmp(expected, dst, sizeof(expected))); 98 EXPECT(!memcmp(expected, dst, sizeof(expected)));
99 } 99 }
100 100
101 // 2 - Boundary condition test cases 101 // 2 - Boundary condition test cases
102 102
103 // 2.1 - First possible sequence of a certain length 103 // 2.1 - First possible sequence of a certain length
104 104
105 // 2.1.1 - 1 byte (U-00000000): "\x00" 105 // 2.1.1 - 1 byte (U-00000000): "\x00"
106 { 106 {
107 const char* src = "\x00"; 107 const char* src = "\x00";
108 int32_t expected[] = { 0x0 }; 108 int32_t expected[] = {0x0};
109 int32_t dst[ARRAY_SIZE(expected)]; 109 int32_t dst[ARRAY_SIZE(expected)];
110 memset(dst, 0xFF, sizeof(dst)); 110 memset(dst, 0xFF, sizeof(dst));
111 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 111 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
112 EXPECT(is_valid); 112 EXPECT(is_valid);
113 EXPECT(memcmp(expected, dst, sizeof(expected))); 113 EXPECT(memcmp(expected, dst, sizeof(expected)));
114 } 114 }
115 115
116 // 2.1.2 - 2 bytes (U-00000080): "\xC2\x80" 116 // 2.1.2 - 2 bytes (U-00000080): "\xC2\x80"
117 { 117 {
118 const char* src = "\xC2\x80"; 118 const char* src = "\xC2\x80";
119 int32_t expected[] = { 0x80 }; 119 int32_t expected[] = {0x80};
120 int32_t dst[ARRAY_SIZE(expected)]; 120 int32_t dst[ARRAY_SIZE(expected)];
121 memset(dst, 0, sizeof(dst)); 121 memset(dst, 0, sizeof(dst));
122 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 122 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
123 EXPECT(is_valid); 123 EXPECT(is_valid);
124 EXPECT(!memcmp(expected, dst, sizeof(expected))); 124 EXPECT(!memcmp(expected, dst, sizeof(expected)));
125 } 125 }
126 126
127 // 2.1.3 - 3 bytes (U-00000800): "\xE0\xA0\x80" 127 // 2.1.3 - 3 bytes (U-00000800): "\xE0\xA0\x80"
128 { 128 {
129 const char* src = "\xE0\xA0\x80"; 129 const char* src = "\xE0\xA0\x80";
130 int32_t expected[] = { 0x800 }; 130 int32_t expected[] = {0x800};
131 int32_t dst[ARRAY_SIZE(expected)]; 131 int32_t dst[ARRAY_SIZE(expected)];
132 memset(dst, 0, sizeof(dst)); 132 memset(dst, 0, sizeof(dst));
133 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 133 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
134 EXPECT(is_valid); 134 EXPECT(is_valid);
135 EXPECT(!memcmp(expected, dst, sizeof(expected))); 135 EXPECT(!memcmp(expected, dst, sizeof(expected)));
136 } 136 }
137 137
138 // 2.1.4 - 4 bytes (U-00010000): "\xF0\x90\x80\x80" 138 // 2.1.4 - 4 bytes (U-00010000): "\xF0\x90\x80\x80"
139 { 139 {
140 const char* src = "\xF0\x90\x80\x80"; 140 const char* src = "\xF0\x90\x80\x80";
141 int32_t expected[] = { 0x10000 }; 141 int32_t expected[] = {0x10000};
142 int32_t dst[ARRAY_SIZE(expected)]; 142 int32_t dst[ARRAY_SIZE(expected)];
143 memset(dst, 0, sizeof(dst)); 143 memset(dst, 0, sizeof(dst));
144 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 144 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
145 EXPECT(is_valid); 145 EXPECT(is_valid);
146 EXPECT(!memcmp(expected, dst, sizeof(expected))); 146 EXPECT(!memcmp(expected, dst, sizeof(expected)));
147 } 147 }
148 148
149 // 2.1.5 - 5 bytes (U-00200000): "\xF8\x88\x80\x80\x80" 149 // 2.1.5 - 5 bytes (U-00200000): "\xF8\x88\x80\x80\x80"
150 { 150 {
151 const char* src = "\xF8\x88\x80\x80\x80"; 151 const char* src = "\xF8\x88\x80\x80\x80";
152 int32_t expected[] = { 0x200000 }; 152 int32_t expected[] = {0x200000};
153 int32_t dst[ARRAY_SIZE(expected)]; 153 int32_t dst[ARRAY_SIZE(expected)];
154 memset(dst, 0, sizeof(dst)); 154 memset(dst, 0, sizeof(dst));
155 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 155 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
156 EXPECT(!is_valid); 156 EXPECT(!is_valid);
157 EXPECT(memcmp(expected, dst, sizeof(expected))); 157 EXPECT(memcmp(expected, dst, sizeof(expected)));
158 } 158 }
159 159
160 // 2.1.6 - 6 bytes (U-04000000): "\xFC\x84\x80\x80\x80\x80" 160 // 2.1.6 - 6 bytes (U-04000000): "\xFC\x84\x80\x80\x80\x80"
161 { 161 {
162 const char* src = "\xFC\x84\x80\x80\x80\x80"; 162 const char* src = "\xFC\x84\x80\x80\x80\x80";
163 int32_t expected[] = { 0x400000 }; 163 int32_t expected[] = {0x400000};
164 int32_t dst[ARRAY_SIZE(expected)]; 164 int32_t dst[ARRAY_SIZE(expected)];
165 memset(dst, 0, sizeof(dst)); 165 memset(dst, 0, sizeof(dst));
166 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 166 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
167 EXPECT(!is_valid); 167 EXPECT(!is_valid);
168 EXPECT(memcmp(expected, dst, sizeof(expected))); 168 EXPECT(memcmp(expected, dst, sizeof(expected)));
169 } 169 }
170 170
171 // 2.2 - Last possible sequence of a certain length 171 // 2.2 - Last possible sequence of a certain length
172 172
173 // 2.2.1 - 1 byte (U-0000007F): "\x7F" 173 // 2.2.1 - 1 byte (U-0000007F): "\x7F"
174 { 174 {
175 const char* src = "\x7F"; 175 const char* src = "\x7F";
176 int32_t expected[] = { 0x7F }; 176 int32_t expected[] = {0x7F};
177 int32_t dst[ARRAY_SIZE(expected)]; 177 int32_t dst[ARRAY_SIZE(expected)];
178 memset(dst, 0, sizeof(dst)); 178 memset(dst, 0, sizeof(dst));
179 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 179 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
180 EXPECT(is_valid); 180 EXPECT(is_valid);
181 EXPECT(!memcmp(expected, dst, sizeof(expected))); 181 EXPECT(!memcmp(expected, dst, sizeof(expected)));
182 } 182 }
183 183
184 // 2.2.2 - 2 bytes (U-000007FF): "\xDF\xBF" 184 // 2.2.2 - 2 bytes (U-000007FF): "\xDF\xBF"
185 { 185 {
186 const char* src = "\xDF\xBF"; 186 const char* src = "\xDF\xBF";
187 int32_t expected[] = { 0x7FF }; 187 int32_t expected[] = {0x7FF};
188 int32_t dst[ARRAY_SIZE(expected)]; 188 int32_t dst[ARRAY_SIZE(expected)];
189 memset(dst, 0, sizeof(dst)); 189 memset(dst, 0, sizeof(dst));
190 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 190 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
191 EXPECT(is_valid); 191 EXPECT(is_valid);
192 EXPECT(!memcmp(expected, dst, sizeof(expected))); 192 EXPECT(!memcmp(expected, dst, sizeof(expected)));
193 } 193 }
194 194
195 // 2.2.3 - 3 bytes (U-0000FFFF): "\xEF\xBF\xBF" 195 // 2.2.3 - 3 bytes (U-0000FFFF): "\xEF\xBF\xBF"
196 { 196 {
197 const char* src = "\xEF\xBF\xBF"; 197 const char* src = "\xEF\xBF\xBF";
198 int32_t expected[] = { 0xFFFF }; 198 int32_t expected[] = {0xFFFF};
199 int32_t dst[ARRAY_SIZE(expected)]; 199 int32_t dst[ARRAY_SIZE(expected)];
200 memset(dst, 0, sizeof(dst)); 200 memset(dst, 0, sizeof(dst));
201 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 201 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
202 EXPECT(is_valid); 202 EXPECT(is_valid);
203 EXPECT(!memcmp(expected, dst, sizeof(expected))); 203 EXPECT(!memcmp(expected, dst, sizeof(expected)));
204 } 204 }
205 205
206 // 2.2.4 - 4 bytes (U-001FFFFF): "\xF7\xBF\xBF\xBF" 206 // 2.2.4 - 4 bytes (U-001FFFFF): "\xF7\xBF\xBF\xBF"
207 { 207 {
208 const char* src = "\xF7\xBF\xBF\xBF"; 208 const char* src = "\xF7\xBF\xBF\xBF";
209 int32_t expected[] = { 0x1FFFF }; 209 int32_t expected[] = {0x1FFFF};
210 int32_t dst[ARRAY_SIZE(expected)]; 210 int32_t dst[ARRAY_SIZE(expected)];
211 memset(dst, 0, sizeof(dst)); 211 memset(dst, 0, sizeof(dst));
212 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 212 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
213 EXPECT(!is_valid); 213 EXPECT(!is_valid);
214 EXPECT(memcmp(expected, dst, sizeof(expected))); 214 EXPECT(memcmp(expected, dst, sizeof(expected)));
215 } 215 }
216 216
217 // 2.2.5 - 5 bytes (U-03FFFFFF): "\xFB\xBF\xBF\xBF\xBF" 217 // 2.2.5 - 5 bytes (U-03FFFFFF): "\xFB\xBF\xBF\xBF\xBF"
218 { 218 {
219 const char* src = "\xFB\xBF\xBF\xBF\xBF"; 219 const char* src = "\xFB\xBF\xBF\xBF\xBF";
220 int32_t expected[] = { 0x3FFFFFF }; 220 int32_t expected[] = {0x3FFFFFF};
221 int32_t dst[ARRAY_SIZE(expected)]; 221 int32_t dst[ARRAY_SIZE(expected)];
222 memset(dst, 0, sizeof(dst)); 222 memset(dst, 0, sizeof(dst));
223 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 223 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
224 EXPECT(!is_valid); 224 EXPECT(!is_valid);
225 EXPECT(memcmp(expected, dst, sizeof(expected))); 225 EXPECT(memcmp(expected, dst, sizeof(expected)));
226 } 226 }
227 227
228 // 2.2.6 - 6 bytes (U-7FFFFFFF): "\xFD\xBF\xBF\xBF\xBF\xBF" 228 // 2.2.6 - 6 bytes (U-7FFFFFFF): "\xFD\xBF\xBF\xBF\xBF\xBF"
229 { 229 {
230 const char* src = "\xFD\xBF\xBF\xBF\xBF\xBF"; 230 const char* src = "\xFD\xBF\xBF\xBF\xBF\xBF";
231 int32_t expected[] = { 0x7FFFFFF }; 231 int32_t expected[] = {0x7FFFFFF};
232 int32_t dst[ARRAY_SIZE(expected)]; 232 int32_t dst[ARRAY_SIZE(expected)];
233 memset(dst, 0, sizeof(dst)); 233 memset(dst, 0, sizeof(dst));
234 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 234 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
235 EXPECT(!is_valid); 235 EXPECT(!is_valid);
236 EXPECT(memcmp(expected, dst, sizeof(expected))); 236 EXPECT(memcmp(expected, dst, sizeof(expected)));
237 } 237 }
238 238
239 // 2.3 - Other boundary conditions 239 // 2.3 - Other boundary conditions
240 240
241 // 2.3.1 - U-0000D7FF = ed 9f bf = "\xED\x9F\xBF" 241 // 2.3.1 - U-0000D7FF = ed 9f bf = "\xED\x9F\xBF"
242 { 242 {
243 const char* src = "\xED\x9F\xBF"; 243 const char* src = "\xED\x9F\xBF";
244 int32_t expected[] = { 0xD7FF }; 244 int32_t expected[] = {0xD7FF};
245 int32_t dst[ARRAY_SIZE(expected)]; 245 int32_t dst[ARRAY_SIZE(expected)];
246 memset(dst, 0, sizeof(dst)); 246 memset(dst, 0, sizeof(dst));
247 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 247 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
248 EXPECT(is_valid); 248 EXPECT(is_valid);
249 EXPECT(!memcmp(expected, dst, sizeof(expected))); 249 EXPECT(!memcmp(expected, dst, sizeof(expected)));
250 } 250 }
251 251
252 // 2.3.2 - U-0000E000 = ee 80 80 = "\xEE\x80\x80" 252 // 2.3.2 - U-0000E000 = ee 80 80 = "\xEE\x80\x80"
253 { 253 {
254 const char* src = "\xEE\x80\x80"; 254 const char* src = "\xEE\x80\x80";
255 int32_t expected[] = { 0xE000 }; 255 int32_t expected[] = {0xE000};
256 int32_t dst[ARRAY_SIZE(expected)]; 256 int32_t dst[ARRAY_SIZE(expected)];
257 memset(dst, 0, sizeof(dst)); 257 memset(dst, 0, sizeof(dst));
258 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 258 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
259 EXPECT(is_valid); 259 EXPECT(is_valid);
260 EXPECT(!memcmp(expected, dst, sizeof(expected))); 260 EXPECT(!memcmp(expected, dst, sizeof(expected)));
261 } 261 }
262 262
263 // 2.3.3 - U-0000FFFD = ef bf bd = "\xEF\xBF\xBD" 263 // 2.3.3 - U-0000FFFD = ef bf bd = "\xEF\xBF\xBD"
264 { 264 {
265 const char* src = "\xEF\xBF\xBD"; 265 const char* src = "\xEF\xBF\xBD";
266 int32_t expected[] = { 0xFFFD }; 266 int32_t expected[] = {0xFFFD};
267 int32_t dst[ARRAY_SIZE(expected)]; 267 int32_t dst[ARRAY_SIZE(expected)];
268 memset(dst, 0, sizeof(dst)); 268 memset(dst, 0, sizeof(dst));
269 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 269 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
270 EXPECT(is_valid); 270 EXPECT(is_valid);
271 EXPECT(!memcmp(expected, dst, sizeof(expected))); 271 EXPECT(!memcmp(expected, dst, sizeof(expected)));
272 } 272 }
273 273
274 // 2.3.4 - U-0010FFFF = f4 8f bf bf = "\xF4\x8F\xBF\xBF" 274 // 2.3.4 - U-0010FFFF = f4 8f bf bf = "\xF4\x8F\xBF\xBF"
275 { 275 {
276 const char* src = "\xF4\x8F\xBF\xBF"; 276 const char* src = "\xF4\x8F\xBF\xBF";
277 int32_t expected[] = { 0x10FFFF }; 277 int32_t expected[] = {0x10FFFF};
278 int32_t dst[ARRAY_SIZE(expected)]; 278 int32_t dst[ARRAY_SIZE(expected)];
279 memset(dst, 0, sizeof(dst)); 279 memset(dst, 0, sizeof(dst));
280 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 280 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
281 EXPECT(is_valid); 281 EXPECT(is_valid);
282 EXPECT(!memcmp(expected, dst, sizeof(expected))); 282 EXPECT(!memcmp(expected, dst, sizeof(expected)));
283 } 283 }
284 284
285 // 2.3.5 - U-00110000 = f4 90 80 80 = "\xF4\x90\x80\x80" 285 // 2.3.5 - U-00110000 = f4 90 80 80 = "\xF4\x90\x80\x80"
286 { 286 {
287 const char* src = "\xF4\x90\x80\x80"; 287 const char* src = "\xF4\x90\x80\x80";
288 int32_t expected[] = { 0x110000 }; 288 int32_t expected[] = {0x110000};
289 int32_t dst[ARRAY_SIZE(expected)]; 289 int32_t dst[ARRAY_SIZE(expected)];
290 memset(dst, 0, sizeof(dst)); 290 memset(dst, 0, sizeof(dst));
291 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 291 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
292 EXPECT(!is_valid); 292 EXPECT(!is_valid);
293 EXPECT(memcmp(expected, dst, sizeof(expected))); 293 EXPECT(memcmp(expected, dst, sizeof(expected)));
294 } 294 }
295 295
296 // 3 - Malformed sequences 296 // 3 - Malformed sequences
297 297
298 // 3.1 - Unexpected continuation bytes 298 // 3.1 - Unexpected continuation bytes
299 299
300 // 3.1.1 - First continuation byte 0x80: "\x80" 300 // 3.1.1 - First continuation byte 0x80: "\x80"
301 { 301 {
302 const char* src = "\x80"; 302 const char* src = "\x80";
303 int32_t expected[] = { 0x80 }; 303 int32_t expected[] = {0x80};
304 int32_t dst[ARRAY_SIZE(expected)]; 304 int32_t dst[ARRAY_SIZE(expected)];
305 memset(dst, 0, sizeof(dst)); 305 memset(dst, 0, sizeof(dst));
306 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 306 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
307 EXPECT(!is_valid); 307 EXPECT(!is_valid);
308 EXPECT(memcmp(expected, dst, sizeof(expected))); 308 EXPECT(memcmp(expected, dst, sizeof(expected)));
309 } 309 }
310 310
311 // 3.1.2 - Last continuation byte 0xbf: "\xBF" 311 // 3.1.2 - Last continuation byte 0xbf: "\xBF"
312 { 312 {
313 const char* src = "\xBF"; 313 const char* src = "\xBF";
314 int32_t expected[] = { 0xBF }; 314 int32_t expected[] = {0xBF};
315 int32_t dst[ARRAY_SIZE(expected)]; 315 int32_t dst[ARRAY_SIZE(expected)];
316 memset(dst, 0, sizeof(dst)); 316 memset(dst, 0, sizeof(dst));
317 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 317 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
318 EXPECT(!is_valid); 318 EXPECT(!is_valid);
319 EXPECT(memcmp(expected, dst, sizeof(expected))); 319 EXPECT(memcmp(expected, dst, sizeof(expected)));
320 } 320 }
321 321
322 // 3.1.3 - 2 continuation bytes: "\x80\xBF" 322 // 3.1.3 - 2 continuation bytes: "\x80\xBF"
323 { 323 {
324 const char* src = "\x80\xBF"; 324 const char* src = "\x80\xBF";
325 int32_t expected[] = { 0x80, 0xBF }; 325 int32_t expected[] = {0x80, 0xBF};
326 int32_t dst[ARRAY_SIZE(expected)]; 326 int32_t dst[ARRAY_SIZE(expected)];
327 memset(dst, 0, sizeof(dst)); 327 memset(dst, 0, sizeof(dst));
328 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 328 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
329 EXPECT(!is_valid); 329 EXPECT(!is_valid);
330 EXPECT(memcmp(expected, dst, sizeof(expected))); 330 EXPECT(memcmp(expected, dst, sizeof(expected)));
331 } 331 }
332 332
333 // 3.1.4 - 3 continuation bytes: "\x80\xBF\x80" 333 // 3.1.4 - 3 continuation bytes: "\x80\xBF\x80"
334 { 334 {
335 const char* src = "\x80\xBF\x80"; 335 const char* src = "\x80\xBF\x80";
336 int32_t expected[] = { 0x80, 0xBF, 0x80 }; 336 int32_t expected[] = {0x80, 0xBF, 0x80};
337 int32_t dst[ARRAY_SIZE(expected)]; 337 int32_t dst[ARRAY_SIZE(expected)];
338 memset(dst, 0, sizeof(dst)); 338 memset(dst, 0, sizeof(dst));
339 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 339 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
340 EXPECT(!is_valid); 340 EXPECT(!is_valid);
341 EXPECT(memcmp(expected, dst, sizeof(expected))); 341 EXPECT(memcmp(expected, dst, sizeof(expected)));
342 } 342 }
343 343
344 // 3.1.5 - 4 continuation bytes: "\x80\xBF\x80\xBF" 344 // 3.1.5 - 4 continuation bytes: "\x80\xBF\x80\xBF"
345 { 345 {
346 const char* src = "\x80\xBF\x80\xBF"; 346 const char* src = "\x80\xBF\x80\xBF";
347 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF }; 347 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF};
348 int32_t dst[ARRAY_SIZE(expected)]; 348 int32_t dst[ARRAY_SIZE(expected)];
349 memset(dst, 0, sizeof(dst)); 349 memset(dst, 0, sizeof(dst));
350 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 350 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
351 EXPECT(!is_valid); 351 EXPECT(!is_valid);
352 EXPECT(memcmp(expected, dst, sizeof(expected))); 352 EXPECT(memcmp(expected, dst, sizeof(expected)));
353 } 353 }
354 354
355 // 3.1.6 - 5 continuation bytes: "\x80\xBF\x80\xBF\x80" 355 // 3.1.6 - 5 continuation bytes: "\x80\xBF\x80\xBF\x80"
356 { 356 {
357 const char* src = "\x80\xBF\x80\xBF\x80"; 357 const char* src = "\x80\xBF\x80\xBF\x80";
358 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80 }; 358 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF, 0x80};
359 int32_t dst[ARRAY_SIZE(expected)]; 359 int32_t dst[ARRAY_SIZE(expected)];
360 memset(dst, 0, sizeof(dst)); 360 memset(dst, 0, sizeof(dst));
361 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 361 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
362 EXPECT(!is_valid); 362 EXPECT(!is_valid);
363 EXPECT(memcmp(expected, dst, sizeof(expected))); 363 EXPECT(memcmp(expected, dst, sizeof(expected)));
364 } 364 }
365 365
366 // 3.1.7 - 6 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF" 366 // 3.1.7 - 6 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF"
367 { 367 {
368 const char* src = "\x80\xBF\x80\xBF\x80\xBF"; 368 const char* src = "\x80\xBF\x80\xBF\x80\xBF";
369 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF }; 369 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF};
370 int32_t dst[ARRAY_SIZE(expected)]; 370 int32_t dst[ARRAY_SIZE(expected)];
371 memset(dst, 0, sizeof(dst)); 371 memset(dst, 0, sizeof(dst));
372 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 372 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
373 EXPECT(!is_valid); 373 EXPECT(!is_valid);
374 EXPECT(memcmp(expected, dst, sizeof(expected))); 374 EXPECT(memcmp(expected, dst, sizeof(expected)));
375 } 375 }
376 376
377 // 3.1.8 - 7 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF\x80" 377 // 3.1.8 - 7 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF\x80"
378 { 378 {
379 const char* src = "\x80\xBF\x80\xBF\x80\xBF\x80"; 379 const char* src = "\x80\xBF\x80\xBF\x80\xBF\x80";
380 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 }; 380 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80};
381 int32_t dst[ARRAY_SIZE(expected)]; 381 int32_t dst[ARRAY_SIZE(expected)];
382 memset(dst, 0, sizeof(dst)); 382 memset(dst, 0, sizeof(dst));
383 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 383 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
384 EXPECT(!is_valid); 384 EXPECT(!is_valid);
385 EXPECT(memcmp(expected, dst, sizeof(expected))); 385 EXPECT(memcmp(expected, dst, sizeof(expected)));
386 } 386 }
387 387
388 // 3.1.9 - Sequence of all 64 possible continuation bytes (0x80-0xbf): 388 // 3.1.9 - Sequence of all 64 possible continuation bytes (0x80-0xbf):
389 { 389 {
390 const char* src = "\x80\x81\x82\x83\x84\x85\x86\x87" 390 const char* src =
391 "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" 391 "\x80\x81\x82\x83\x84\x85\x86\x87"
392 "\x90\x91\x92\x93\x94\x95\x96\x97" 392 "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
393 "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F" 393 "\x90\x91\x92\x93\x94\x95\x96\x97"
394 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7" 394 "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F"
395 "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" 395 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7"
396 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7" 396 "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
397 "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"; 397 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7"
398 int32_t expected[] = { 0x0 }; 398 "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF";
399 int32_t expected[] = {0x0};
399 int32_t dst[ARRAY_SIZE(expected)]; 400 int32_t dst[ARRAY_SIZE(expected)];
400 for (size_t i = 0; i < strlen(src); ++i) { 401 for (size_t i = 0; i < strlen(src); ++i) {
401 memset(dst, 0xFF, sizeof(dst)); 402 memset(dst, 0xFF, sizeof(dst));
402 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); 403 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
403 EXPECT(!is_valid); 404 EXPECT(!is_valid);
404 EXPECT(memcmp(expected, dst, sizeof(expected))); 405 EXPECT(memcmp(expected, dst, sizeof(expected)));
405 } 406 }
406 } 407 }
407 408
408 // 3.2 - Lonely start character 409 // 3.2 - Lonely start character
409 410
410 // 3.2.1 - All 32 first bytes of 2-byte sequences (0xc0-0xdf), each 411 // 3.2.1 - All 32 first bytes of 2-byte sequences (0xc0-0xdf), each
411 // followed by a space character: 412 // followed by a space character:
412 { 413 {
413 const char* src = "\xC0\x20\xC1\x20\xC2\x20\xC3\x20" 414 const char* src =
414 "\xC4\x20\xC5\x20\xC6\x20\xC7\x20" 415 "\xC0\x20\xC1\x20\xC2\x20\xC3\x20"
415 "\xC8\x20\xC9\x20\xCA\x20\xCB\x20" 416 "\xC4\x20\xC5\x20\xC6\x20\xC7\x20"
416 "\xCC\x20\xCD\x20\xCE\x20\xCF\x20" 417 "\xC8\x20\xC9\x20\xCA\x20\xCB\x20"
417 "\xD0\x20\xD1\x20\xD2\x20\xD3\x20" 418 "\xCC\x20\xCD\x20\xCE\x20\xCF\x20"
418 "\xD4\x20\xD5\x20\xD6\x20\xD7\x20" 419 "\xD0\x20\xD1\x20\xD2\x20\xD3\x20"
419 "\xD8\x20\xD9\x20\xDA\x20\xDB\x20" 420 "\xD4\x20\xD5\x20\xD6\x20\xD7\x20"
420 "\xDC\x20\xDD\x20\xDE\x20\xDF\x20"; 421 "\xD8\x20\xD9\x20\xDA\x20\xDB\x20"
421 int32_t expected[] = { 0x0 }; 422 "\xDC\x20\xDD\x20\xDE\x20\xDF\x20";
423 int32_t expected[] = {0x0};
422 int32_t dst[ARRAY_SIZE(expected)]; 424 int32_t dst[ARRAY_SIZE(expected)];
423 for (size_t i = 0; i < strlen(src); i += 2) { 425 for (size_t i = 0; i < strlen(src); i += 2) {
424 memset(dst, 0xFF, sizeof(dst)); 426 memset(dst, 0xFF, sizeof(dst));
425 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); 427 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
426 EXPECT(!is_valid); 428 EXPECT(!is_valid);
427 EXPECT(memcmp(expected, dst, sizeof(expected))); 429 EXPECT(memcmp(expected, dst, sizeof(expected)));
428 } 430 }
429 } 431 }
430 432
431 // 3.2.2 - All 16 first bytes of 3-byte sequences (0xe0-0xef), each 433 // 3.2.2 - All 16 first bytes of 3-byte sequences (0xe0-0xef), each
432 // followed by a space character: 434 // followed by a space character:
433 { 435 {
434 const char* src = "\xE0\x20\xE1\x20\xE2\x20\xE3\x20" 436 const char* src =
435 "\xE4\x20\xE5\x20\xE6\x20\xE7\x20" 437 "\xE0\x20\xE1\x20\xE2\x20\xE3\x20"
436 "\xE8\x20\xE9\x20\xEA\x20\xEB\x20" 438 "\xE4\x20\xE5\x20\xE6\x20\xE7\x20"
437 "\xEC\x20\xED\x20\xEE\x20\xEF\x20"; 439 "\xE8\x20\xE9\x20\xEA\x20\xEB\x20"
438 int32_t expected[] = { 0x0 }; 440 "\xEC\x20\xED\x20\xEE\x20\xEF\x20";
441 int32_t expected[] = {0x0};
439 int32_t dst[ARRAY_SIZE(expected)]; 442 int32_t dst[ARRAY_SIZE(expected)];
440 for (size_t i = 0; i < strlen(src); i += 2) { 443 for (size_t i = 0; i < strlen(src); i += 2) {
441 memset(dst, 0xFF, sizeof(dst)); 444 memset(dst, 0xFF, sizeof(dst));
442 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); 445 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
443 EXPECT(!is_valid); 446 EXPECT(!is_valid);
444 EXPECT(memcmp(expected, dst, sizeof(expected))); 447 EXPECT(memcmp(expected, dst, sizeof(expected)));
445 } 448 }
446 } 449 }
447 450
448 // 3.2.3 - All 8 first bytes of 4-byte sequences (0xf0-0xf7), each 451 // 3.2.3 - All 8 first bytes of 4-byte sequences (0xf0-0xf7), each
449 // followed by a space character: 452 // followed by a space character:
450 { 453 {
451 const char* src = "\xF0\x20\xF1\x20\xF2\x20\xF3\x20" 454 const char* src =
452 "\xF4\x20\xF5\x20\xF6\x20\xF7\x20"; 455 "\xF0\x20\xF1\x20\xF2\x20\xF3\x20"
453 int32_t expected[] = { 0x0 }; 456 "\xF4\x20\xF5\x20\xF6\x20\xF7\x20";
457 int32_t expected[] = {0x0};
454 int32_t dst[ARRAY_SIZE(expected)]; 458 int32_t dst[ARRAY_SIZE(expected)];
455 for (size_t i = 0; i < strlen(src); i += 2) { 459 for (size_t i = 0; i < strlen(src); i += 2) {
456 memset(dst, 0xFF, sizeof(dst)); 460 memset(dst, 0xFF, sizeof(dst));
457 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); 461 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
458 EXPECT(!is_valid); 462 EXPECT(!is_valid);
459 EXPECT(memcmp(expected, dst, sizeof(expected))); 463 EXPECT(memcmp(expected, dst, sizeof(expected)));
460 } 464 }
461 } 465 }
462 466
463 // 3.2.4 - All 4 first bytes of 5-byte sequences (0xf8-0xfb), each 467 // 3.2.4 - All 4 first bytes of 5-byte sequences (0xf8-0xfb), each
464 // followed by a space character: 468 // followed by a space character:
465 { 469 {
466 const char* src = "\xF8\x20\xF9\x20\xFA\x20\xFB\x20"; 470 const char* src = "\xF8\x20\xF9\x20\xFA\x20\xFB\x20";
467 int32_t expected[] = { 0x0 }; 471 int32_t expected[] = {0x0};
468 int32_t dst[ARRAY_SIZE(expected)]; 472 int32_t dst[ARRAY_SIZE(expected)];
469 for (size_t i = 0; i < strlen(src); i += 2) { 473 for (size_t i = 0; i < strlen(src); i += 2) {
470 memset(dst, 0xFF, sizeof(dst)); 474 memset(dst, 0xFF, sizeof(dst));
471 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); 475 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
472 EXPECT(!is_valid); 476 EXPECT(!is_valid);
473 EXPECT(memcmp(expected, dst, sizeof(expected))); 477 EXPECT(memcmp(expected, dst, sizeof(expected)));
474 } 478 }
475 } 479 }
476 480
477 // 3.2.5 - All 2 first bytes of 6-byte sequences (0xfc-0xfd), each 481 // 3.2.5 - All 2 first bytes of 6-byte sequences (0xfc-0xfd), each
478 // followed by a space character: 482 // followed by a space character:
479 { 483 {
480 const char* src = "\xFC\x20\xFD\x20"; 484 const char* src = "\xFC\x20\xFD\x20";
481 int32_t expected[] = { 0x0 }; 485 int32_t expected[] = {0x0};
482 int32_t dst[ARRAY_SIZE(expected)]; 486 int32_t dst[ARRAY_SIZE(expected)];
483 for (size_t i = 0; i < strlen(src); i += 2) { 487 for (size_t i = 0; i < strlen(src); i += 2) {
484 memset(dst, 0xFF, sizeof(dst)); 488 memset(dst, 0xFF, sizeof(dst));
485 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); 489 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
486 EXPECT(!is_valid); 490 EXPECT(!is_valid);
487 EXPECT(memcmp(expected, dst, sizeof(expected))); 491 EXPECT(memcmp(expected, dst, sizeof(expected)));
488 } 492 }
489 } 493 }
490 494
491 // 3.3 - Sequences with last continuation byte missing 495 // 3.3 - Sequences with last continuation byte missing
492 496
493 // 3.3.1 - 2-byte sequence with last byte missing (U+0000): "\xC0" 497 // 3.3.1 - 2-byte sequence with last byte missing (U+0000): "\xC0"
494 { 498 {
495 const char* src = "\xC0"; 499 const char* src = "\xC0";
496 int32_t expected[] = { 0x0 }; 500 int32_t expected[] = {0x0};
497 int32_t dst[ARRAY_SIZE(expected)]; 501 int32_t dst[ARRAY_SIZE(expected)];
498 memset(dst, 0xFF, sizeof(dst)); 502 memset(dst, 0xFF, sizeof(dst));
499 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 503 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
500 EXPECT(!is_valid); 504 EXPECT(!is_valid);
501 EXPECT(memcmp(expected, dst, sizeof(expected))); 505 EXPECT(memcmp(expected, dst, sizeof(expected)));
502 } 506 }
503 507
504 // 3.3.2 - 3-byte sequence with last byte missing (U+0000): "\xE0\x80" 508 // 3.3.2 - 3-byte sequence with last byte missing (U+0000): "\xE0\x80"
505 { 509 {
506 const char* src = "\xE0\x80"; 510 const char* src = "\xE0\x80";
507 int32_t expected[] = { 0x0 }; 511 int32_t expected[] = {0x0};
508 int32_t dst[ARRAY_SIZE(expected)]; 512 int32_t dst[ARRAY_SIZE(expected)];
509 memset(dst, 0xFF, sizeof(dst)); 513 memset(dst, 0xFF, sizeof(dst));
510 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 514 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
511 EXPECT(!is_valid); 515 EXPECT(!is_valid);
512 EXPECT(memcmp(expected, dst, sizeof(expected))); 516 EXPECT(memcmp(expected, dst, sizeof(expected)));
513 } 517 }
514 518
515 // 3.3.3 - 4-byte sequence with last byte missing (U+0000): "\xF0\x80\x80" 519 // 3.3.3 - 4-byte sequence with last byte missing (U+0000): "\xF0\x80\x80"
516 { 520 {
517 const char* src = "\xF0\x80\x80"; 521 const char* src = "\xF0\x80\x80";
518 int32_t expected[] = { 0x0 }; 522 int32_t expected[] = {0x0};
519 int32_t dst[ARRAY_SIZE(expected)]; 523 int32_t dst[ARRAY_SIZE(expected)];
520 memset(dst, 0xFF, sizeof(dst)); 524 memset(dst, 0xFF, sizeof(dst));
521 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 525 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
522 EXPECT(!is_valid); 526 EXPECT(!is_valid);
523 EXPECT(memcmp(expected, dst, sizeof(expected))); 527 EXPECT(memcmp(expected, dst, sizeof(expected)));
524 } 528 }
525 529
526 // 3.3.4 - 5-byte sequence with last byte missing (U+0000): "\xF8\x80\x80\x80" 530 // 3.3.4 - 5-byte sequence with last byte missing (U+0000): "\xF8\x80\x80\x80"
527 { 531 {
528 const char* src = "\xF8\x80\x80\x80"; 532 const char* src = "\xF8\x80\x80\x80";
529 int32_t expected[] = { 0x0 }; 533 int32_t expected[] = {0x0};
530 int32_t dst[ARRAY_SIZE(expected)]; 534 int32_t dst[ARRAY_SIZE(expected)];
531 memset(dst, 0xFF, sizeof(dst)); 535 memset(dst, 0xFF, sizeof(dst));
532 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 536 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
533 EXPECT(!is_valid); 537 EXPECT(!is_valid);
534 EXPECT(memcmp(expected, dst, sizeof(expected))); 538 EXPECT(memcmp(expected, dst, sizeof(expected)));
535 } 539 }
536 540
537 // 3.3.5 - 6-byte sequence with last byte missing (U+0000): 541 // 3.3.5 - 6-byte sequence with last byte missing (U+0000):
538 // "\xFC\x80\x80\x80\x80" 542 // "\xFC\x80\x80\x80\x80"
539 { 543 {
540 const char* src = "\xFC\x80\x80\x80\x80"; 544 const char* src = "\xFC\x80\x80\x80\x80";
541 int32_t expected[] = { 0x0 }; 545 int32_t expected[] = {0x0};
542 int32_t dst[ARRAY_SIZE(expected)]; 546 int32_t dst[ARRAY_SIZE(expected)];
543 memset(dst, 0xFF, sizeof(dst)); 547 memset(dst, 0xFF, sizeof(dst));
544 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 548 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
545 EXPECT(!is_valid); 549 EXPECT(!is_valid);
546 EXPECT(memcmp(expected, dst, sizeof(expected))); 550 EXPECT(memcmp(expected, dst, sizeof(expected)));
547 } 551 }
548 552
549 // 3.3.6 - 2-byte sequence with last byte missing (U-000007FF): "\xDF" 553 // 3.3.6 - 2-byte sequence with last byte missing (U-000007FF): "\xDF"
550 { 554 {
551 const char* src = "\xDF"; 555 const char* src = "\xDF";
552 int32_t expected[] = { 0x0 }; 556 int32_t expected[] = {0x0};
553 int32_t dst[ARRAY_SIZE(expected)]; 557 int32_t dst[ARRAY_SIZE(expected)];
554 memset(dst, 0xFF, sizeof(dst)); 558 memset(dst, 0xFF, sizeof(dst));
555 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 559 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
556 EXPECT(!is_valid); 560 EXPECT(!is_valid);
557 EXPECT(memcmp(expected, dst, sizeof(expected))); 561 EXPECT(memcmp(expected, dst, sizeof(expected)));
558 } 562 }
559 563
560 // 3.3.7 - 3-byte sequence with last byte missing (U-0000FFFF): "\xEF\xBF" 564 // 3.3.7 - 3-byte sequence with last byte missing (U-0000FFFF): "\xEF\xBF"
561 { 565 {
562 const char* src = "\xEF\xBF"; 566 const char* src = "\xEF\xBF";
563 int32_t expected[] = { 0x0 }; 567 int32_t expected[] = {0x0};
564 int32_t dst[ARRAY_SIZE(expected)]; 568 int32_t dst[ARRAY_SIZE(expected)];
565 memset(dst, 0xFF, sizeof(dst)); 569 memset(dst, 0xFF, sizeof(dst));
566 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 570 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
567 EXPECT(!is_valid); 571 EXPECT(!is_valid);
568 EXPECT(memcmp(expected, dst, sizeof(expected))); 572 EXPECT(memcmp(expected, dst, sizeof(expected)));
569 } 573 }
570 574
571 // 3.3.8 - 4-byte sequence with last byte missing (U-001FFFFF): "\xF7\xBF\xBF" 575 // 3.3.8 - 4-byte sequence with last byte missing (U-001FFFFF): "\xF7\xBF\xBF"
572 { 576 {
573 const char* src = "\xF7\xBF\xBF"; 577 const char* src = "\xF7\xBF\xBF";
574 int32_t expected[] = { 0x0 }; 578 int32_t expected[] = {0x0};
575 int32_t dst[ARRAY_SIZE(expected)]; 579 int32_t dst[ARRAY_SIZE(expected)];
576 memset(dst, 0xFF, sizeof(dst)); 580 memset(dst, 0xFF, sizeof(dst));
577 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 581 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
578 EXPECT(!is_valid); 582 EXPECT(!is_valid);
579 EXPECT(memcmp(expected, dst, sizeof(expected))); 583 EXPECT(memcmp(expected, dst, sizeof(expected)));
580 } 584 }
581 585
582 // 3.3.9 - 5-byte sequence with last byte missing (U-03FFFFFF): 586 // 3.3.9 - 5-byte sequence with last byte missing (U-03FFFFFF):
583 // "\xFB\xBF\xBF\xBF" 587 // "\xFB\xBF\xBF\xBF"
584 { 588 {
585 const char* src = "\xFB\xBF\xBF\xBF"; 589 const char* src = "\xFB\xBF\xBF\xBF";
586 int32_t expected[] = { 0x0 }; 590 int32_t expected[] = {0x0};
587 int32_t dst[ARRAY_SIZE(expected)]; 591 int32_t dst[ARRAY_SIZE(expected)];
588 memset(dst, 0xFF, sizeof(dst)); 592 memset(dst, 0xFF, sizeof(dst));
589 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 593 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
590 EXPECT(!is_valid); 594 EXPECT(!is_valid);
591 EXPECT(memcmp(expected, dst, sizeof(expected))); 595 EXPECT(memcmp(expected, dst, sizeof(expected)));
592 } 596 }
593 597
594 // 3.3.10 - 6-byte sequence with last byte missing (U-7FFFFFFF): 598 // 3.3.10 - 6-byte sequence with last byte missing (U-7FFFFFFF):
595 // "\xFD\xBF\xBF\xBF\xBF" 599 // "\xFD\xBF\xBF\xBF\xBF"
596 { 600 {
597 const char* src = "\xFD\xBF\xBF\xBF\xBF"; 601 const char* src = "\xFD\xBF\xBF\xBF\xBF";
598 int32_t expected[] = { 0x0 }; 602 int32_t expected[] = {0x0};
599 int32_t dst[ARRAY_SIZE(expected)]; 603 int32_t dst[ARRAY_SIZE(expected)];
600 memset(dst, 0xFF, sizeof(dst)); 604 memset(dst, 0xFF, sizeof(dst));
601 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 605 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
602 EXPECT(!is_valid); 606 EXPECT(!is_valid);
603 EXPECT(memcmp(expected, dst, sizeof(expected))); 607 EXPECT(memcmp(expected, dst, sizeof(expected)));
604 } 608 }
605 609
606 // 3.4 - Concatenation of incomplete sequences 610 // 3.4 - Concatenation of incomplete sequences
607 { 611 {
608 const char* src = "\xC0\xE0\x80\xF0\x80\x80" 612 const char* src =
609 "\xF8\x80\x80\x80\xFC\x80" 613 "\xC0\xE0\x80\xF0\x80\x80"
610 "\x80\x80\x80\xDF\xEF\xBF" 614 "\xF8\x80\x80\x80\xFC\x80"
611 "\xF7\xBF\xBF\xFB\xBF\xBF" 615 "\x80\x80\x80\xDF\xEF\xBF"
612 "\xBF\xFD\xBF\xBF\xBF\xBF"; 616 "\xF7\xBF\xBF\xFB\xBF\xBF"
613 int32_t expected[] = { 0x0 }; 617 "\xBF\xFD\xBF\xBF\xBF\xBF";
618 int32_t expected[] = {0x0};
614 int32_t dst[ARRAY_SIZE(expected)]; 619 int32_t dst[ARRAY_SIZE(expected)];
615 for (size_t i = 0; i < strlen(src); ++i) { 620 for (size_t i = 0; i < strlen(src); ++i) {
616 for (size_t j = 1; j < (strlen(src) - i); ++j) { 621 for (size_t j = 1; j < (strlen(src) - i); ++j) {
617 memset(dst, 0xFF, sizeof(dst)); 622 memset(dst, 0xFF, sizeof(dst));
618 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], 623 bool is_valid =
619 dst, ARRAY_SIZE(dst)); 624 Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
620 EXPECT(!is_valid); 625 EXPECT(!is_valid);
621 EXPECT(memcmp(expected, dst, sizeof(expected))); 626 EXPECT(memcmp(expected, dst, sizeof(expected)));
622 } 627 }
623 } 628 }
624 } 629 }
625 630
626 // 3.5 - Impossible bytes 631 // 3.5 - Impossible bytes
627 632
628 // 3.5.1 - fe = "\xFE" 633 // 3.5.1 - fe = "\xFE"
629 { 634 {
630 const char* src = "\xFE"; 635 const char* src = "\xFE";
631 int32_t expected[] = { 0xFE }; 636 int32_t expected[] = {0xFE};
632 int32_t dst[ARRAY_SIZE(expected)]; 637 int32_t dst[ARRAY_SIZE(expected)];
633 memset(dst, 0, sizeof(dst)); 638 memset(dst, 0, sizeof(dst));
634 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 639 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
635 EXPECT(!is_valid); 640 EXPECT(!is_valid);
636 EXPECT(memcmp(expected, dst, sizeof(expected))); 641 EXPECT(memcmp(expected, dst, sizeof(expected)));
637 } 642 }
638 643
639 // 3.5.2 - ff = "\xFF" 644 // 3.5.2 - ff = "\xFF"
640 { 645 {
641 const char* src = "\xFF"; 646 const char* src = "\xFF";
642 int32_t expected[] = { 0xFF }; 647 int32_t expected[] = {0xFF};
643 int32_t dst[ARRAY_SIZE(expected)]; 648 int32_t dst[ARRAY_SIZE(expected)];
644 memset(dst, 0, sizeof(dst)); 649 memset(dst, 0, sizeof(dst));
645 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 650 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
646 EXPECT(!is_valid); 651 EXPECT(!is_valid);
647 EXPECT(memcmp(expected, dst, sizeof(expected))); 652 EXPECT(memcmp(expected, dst, sizeof(expected)));
648 } 653 }
649 654
650 // 3.5.3 - fe fe ff ff = "\xFE\xFE\xFF\xFF" 655 // 3.5.3 - fe fe ff ff = "\xFE\xFE\xFF\xFF"
651 { 656 {
652 const char* src = "\xFE\xFE\xFF\xFF"; 657 const char* src = "\xFE\xFE\xFF\xFF";
653 int32_t expected[] = { 0xFF }; 658 int32_t expected[] = {0xFF};
654 int32_t dst[ARRAY_SIZE(expected)]; 659 int32_t dst[ARRAY_SIZE(expected)];
655 memset(dst, 0, sizeof(dst)); 660 memset(dst, 0, sizeof(dst));
656 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 661 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
657 EXPECT(!is_valid); 662 EXPECT(!is_valid);
658 EXPECT(memcmp(expected, dst, sizeof(expected))); 663 EXPECT(memcmp(expected, dst, sizeof(expected)));
659 } 664 }
660 665
661 // 4 - Overlong sequences 666 // 4 - Overlong sequences
662 667
663 // 4.1 - Examples of an overlong ASCII character 668 // 4.1 - Examples of an overlong ASCII character
664 669
665 // 4.1.1 - U+002F = c0 af = "\xC0\xAF" 670 // 4.1.1 - U+002F = c0 af = "\xC0\xAF"
666 { 671 {
667 const char* src = "\xC0\xAF"; 672 const char* src = "\xC0\xAF";
668 int32_t expected[] = { 0x2F }; 673 int32_t expected[] = {0x2F};
669 int32_t dst[ARRAY_SIZE(expected)]; 674 int32_t dst[ARRAY_SIZE(expected)];
670 memset(dst, 0, sizeof(dst)); 675 memset(dst, 0, sizeof(dst));
671 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 676 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
672 EXPECT(!is_valid); 677 EXPECT(!is_valid);
673 EXPECT(memcmp(expected, dst, sizeof(expected))); 678 EXPECT(memcmp(expected, dst, sizeof(expected)));
674 } 679 }
675 680
676 // 4.1.2 - U+002F = e0 80 af = "\xE0\x80\xAF" 681 // 4.1.2 - U+002F = e0 80 af = "\xE0\x80\xAF"
677 { 682 {
678 const char* src = "\xE0\x80\xAF"; 683 const char* src = "\xE0\x80\xAF";
679 int32_t expected[] = { 0x2F }; 684 int32_t expected[] = {0x2F};
680 int32_t dst[ARRAY_SIZE(expected)]; 685 int32_t dst[ARRAY_SIZE(expected)];
681 memset(dst, 0, sizeof(dst)); 686 memset(dst, 0, sizeof(dst));
682 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 687 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
683 EXPECT(!is_valid); 688 EXPECT(!is_valid);
684 EXPECT(memcmp(expected, dst, sizeof(expected))); 689 EXPECT(memcmp(expected, dst, sizeof(expected)));
685 } 690 }
686 691
687 // 4.1.3 - U+002F = f0 80 80 af = "\xF0\x80\x80\xAF" 692 // 4.1.3 - U+002F = f0 80 80 af = "\xF0\x80\x80\xAF"
688 { 693 {
689 const char* src = "\xF0\x80\x80\xAF"; 694 const char* src = "\xF0\x80\x80\xAF";
690 int32_t expected[] = { 0x2F }; 695 int32_t expected[] = {0x2F};
691 int32_t dst[ARRAY_SIZE(expected)]; 696 int32_t dst[ARRAY_SIZE(expected)];
692 memset(dst, 0, sizeof(dst)); 697 memset(dst, 0, sizeof(dst));
693 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 698 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
694 EXPECT(!is_valid); 699 EXPECT(!is_valid);
695 EXPECT(memcmp(expected, dst, sizeof(expected))); 700 EXPECT(memcmp(expected, dst, sizeof(expected)));
696 } 701 }
697 702
698 // 4.1.4 - U+002F = f8 80 80 80 af = "\xF8\x80\x80\x80\xAF" 703 // 4.1.4 - U+002F = f8 80 80 80 af = "\xF8\x80\x80\x80\xAF"
699 { 704 {
700 const char* src = "\xF8\x80\x80\x80\xAF"; 705 const char* src = "\xF8\x80\x80\x80\xAF";
701 int32_t expected[] = { 0x2F }; 706 int32_t expected[] = {0x2F};
702 int32_t dst[ARRAY_SIZE(expected)]; 707 int32_t dst[ARRAY_SIZE(expected)];
703 memset(dst, 0, sizeof(dst)); 708 memset(dst, 0, sizeof(dst));
704 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 709 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
705 EXPECT(!is_valid); 710 EXPECT(!is_valid);
706 EXPECT(memcmp(expected, dst, sizeof(expected))); 711 EXPECT(memcmp(expected, dst, sizeof(expected)));
707 } 712 }
708 713
709 // 4.1.5 - U+002F = fc 80 80 80 80 af = "\xFC\x80\x80\x80\x80\xAF" 714 // 4.1.5 - U+002F = fc 80 80 80 80 af = "\xFC\x80\x80\x80\x80\xAF"
710 { 715 {
711 const char* src = "\xFC\x80\x80\x80\x80\xAF"; 716 const char* src = "\xFC\x80\x80\x80\x80\xAF";
712 int32_t expected[] = { 0x2F }; 717 int32_t expected[] = {0x2F};
713 int32_t dst[ARRAY_SIZE(expected)]; 718 int32_t dst[ARRAY_SIZE(expected)];
714 memset(dst, 0, sizeof(dst)); 719 memset(dst, 0, sizeof(dst));
715 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 720 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
716 EXPECT(!is_valid); 721 EXPECT(!is_valid);
717 EXPECT(memcmp(expected, dst, sizeof(expected))); 722 EXPECT(memcmp(expected, dst, sizeof(expected)));
718 } 723 }
719 724
720 // 4.2 Maximum overlong sequences 725 // 4.2 Maximum overlong sequences
721 726
722 // 4.2.1 - U-0000007F = c1 bf = "\xC1\xBF" 727 // 4.2.1 - U-0000007F = c1 bf = "\xC1\xBF"
723 { 728 {
724 const char* src = "\xC1\xBF"; 729 const char* src = "\xC1\xBF";
725 int32_t expected[] = { 0x7F }; 730 int32_t expected[] = {0x7F};
726 int32_t dst[ARRAY_SIZE(expected)]; 731 int32_t dst[ARRAY_SIZE(expected)];
727 memset(dst, 0, sizeof(dst)); 732 memset(dst, 0, sizeof(dst));
728 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 733 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
729 EXPECT(!is_valid); 734 EXPECT(!is_valid);
730 EXPECT(memcmp(expected, dst, sizeof(expected))); 735 EXPECT(memcmp(expected, dst, sizeof(expected)));
731 } 736 }
732 737
733 // 4.2.2 U+000007FF = e0 9f bf = "\xE0\x9F\xBF" 738 // 4.2.2 U+000007FF = e0 9f bf = "\xE0\x9F\xBF"
734 { 739 {
735 const char* src = "\xE0\x9F\xBF"; 740 const char* src = "\xE0\x9F\xBF";
736 int32_t expected[] = { 0x7FF }; 741 int32_t expected[] = {0x7FF};
737 int32_t dst[ARRAY_SIZE(expected)]; 742 int32_t dst[ARRAY_SIZE(expected)];
738 memset(dst, 0, sizeof(dst)); 743 memset(dst, 0, sizeof(dst));
739 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 744 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
740 EXPECT(!is_valid); 745 EXPECT(!is_valid);
741 EXPECT(memcmp(expected, dst, sizeof(expected))); 746 EXPECT(memcmp(expected, dst, sizeof(expected)));
742 } 747 }
743 748
744 // 4.2.3 - U+0000FFFF = f0 8f bf bf = "\xF0\x8F\xBF\xBF" 749 // 4.2.3 - U+0000FFFF = f0 8f bf bf = "\xF0\x8F\xBF\xBF"
745 { 750 {
746 const char* src = "\xF0\x8F\xBF\xBF"; 751 const char* src = "\xF0\x8F\xBF\xBF";
747 int32_t expected[] = { 0xFFFF }; 752 int32_t expected[] = {0xFFFF};
748 int32_t dst[ARRAY_SIZE(expected)]; 753 int32_t dst[ARRAY_SIZE(expected)];
749 memset(dst, 0, sizeof(dst)); 754 memset(dst, 0, sizeof(dst));
750 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 755 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
751 EXPECT(!is_valid); 756 EXPECT(!is_valid);
752 EXPECT(memcmp(expected, dst, sizeof(expected))); 757 EXPECT(memcmp(expected, dst, sizeof(expected)));
753 } 758 }
754 759
755 // 4.2.4 U-001FFFFF = f8 87 bf bf bf = "\xF8\x87\xBF\xBF\xBF" 760 // 4.2.4 U-001FFFFF = f8 87 bf bf bf = "\xF8\x87\xBF\xBF\xBF"
756 { 761 {
757 const char* src = "\xF8\x87\xBF\xBF\xBF"; 762 const char* src = "\xF8\x87\xBF\xBF\xBF";
758 int32_t expected[] = { 0x1FFFFF }; 763 int32_t expected[] = {0x1FFFFF};
759 int32_t dst[ARRAY_SIZE(expected)]; 764 int32_t dst[ARRAY_SIZE(expected)];
760 memset(dst, 0, sizeof(dst)); 765 memset(dst, 0, sizeof(dst));
761 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 766 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
762 EXPECT(!is_valid); 767 EXPECT(!is_valid);
763 EXPECT(memcmp(expected, dst, sizeof(expected))); 768 EXPECT(memcmp(expected, dst, sizeof(expected)));
764 } 769 }
765 770
766 // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "\xFC\x83\xBF\xBF\xBF\xBF" 771 // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "\xFC\x83\xBF\xBF\xBF\xBF"
767 { 772 {
768 const char* src = "\xFC\x83\xBF\xBF\xBF\xBF"; 773 const char* src = "\xFC\x83\xBF\xBF\xBF\xBF";
769 int32_t expected[] = { 0x3FFFFFF }; 774 int32_t expected[] = {0x3FFFFFF};
770 int32_t dst[ARRAY_SIZE(expected)]; 775 int32_t dst[ARRAY_SIZE(expected)];
771 memset(dst, 0, sizeof(dst)); 776 memset(dst, 0, sizeof(dst));
772 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 777 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
773 EXPECT(!is_valid); 778 EXPECT(!is_valid);
774 EXPECT(memcmp(expected, dst, sizeof(expected))); 779 EXPECT(memcmp(expected, dst, sizeof(expected)));
775 } 780 }
776 781
777 // 4.3 - Overlong representation of the NUL character 782 // 4.3 - Overlong representation of the NUL character
778 783
779 // 4.3.1 - U+0000 = "\xC0\x80" 784 // 4.3.1 - U+0000 = "\xC0\x80"
780 { 785 {
781 const char* src = "\xC0\x80"; 786 const char* src = "\xC0\x80";
782 int32_t expected[] = { 0x0 }; 787 int32_t expected[] = {0x0};
783 int32_t dst[ARRAY_SIZE(expected)]; 788 int32_t dst[ARRAY_SIZE(expected)];
784 memset(dst, 0xFF, sizeof(dst)); 789 memset(dst, 0xFF, sizeof(dst));
785 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 790 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
786 EXPECT(!is_valid); 791 EXPECT(!is_valid);
787 EXPECT(memcmp(expected, dst, sizeof(expected))); 792 EXPECT(memcmp(expected, dst, sizeof(expected)));
788 } 793 }
789 794
790 // 4.3.2 U+0000 = e0 80 80 = "\xE0\x80\x80" 795 // 4.3.2 U+0000 = e0 80 80 = "\xE0\x80\x80"
791 { 796 {
792 const char* src = "\xE0\x80\x80"; 797 const char* src = "\xE0\x80\x80";
793 int32_t expected[] = { 0x0 }; 798 int32_t expected[] = {0x0};
794 int32_t dst[ARRAY_SIZE(expected)]; 799 int32_t dst[ARRAY_SIZE(expected)];
795 memset(dst, 0xFF, sizeof(dst)); 800 memset(dst, 0xFF, sizeof(dst));
796 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 801 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
797 EXPECT(!is_valid); 802 EXPECT(!is_valid);
798 EXPECT(memcmp(expected, dst, sizeof(expected))); 803 EXPECT(memcmp(expected, dst, sizeof(expected)));
799 } 804 }
800 805
801 // 4.3.3 U+0000 = f0 80 80 80 = "\xF0\x80\x80\x80" 806 // 4.3.3 U+0000 = f0 80 80 80 = "\xF0\x80\x80\x80"
802 { 807 {
803 const char* src = "\xF0\x80\x80\x80"; 808 const char* src = "\xF0\x80\x80\x80";
804 int32_t expected[] = { 0x0 }; 809 int32_t expected[] = {0x0};
805 int32_t dst[ARRAY_SIZE(expected)]; 810 int32_t dst[ARRAY_SIZE(expected)];
806 memset(dst, 0xFF, sizeof(dst)); 811 memset(dst, 0xFF, sizeof(dst));
807 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 812 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
808 EXPECT(!is_valid); 813 EXPECT(!is_valid);
809 EXPECT(memcmp(expected, dst, sizeof(expected))); 814 EXPECT(memcmp(expected, dst, sizeof(expected)));
810 } 815 }
811 816
812 // 4.3.4 U+0000 = f8 80 80 80 80 = "\xF8\x80\x80\x80\x80" 817 // 4.3.4 U+0000 = f8 80 80 80 80 = "\xF8\x80\x80\x80\x80"
813 { 818 {
814 const char* src = "\xF8\x80\x80\x80\x80"; 819 const char* src = "\xF8\x80\x80\x80\x80";
815 int32_t expected[] = { 0x0 }; 820 int32_t expected[] = {0x0};
816 int32_t dst[ARRAY_SIZE(expected)]; 821 int32_t dst[ARRAY_SIZE(expected)];
817 memset(dst, 0xFF, sizeof(dst)); 822 memset(dst, 0xFF, sizeof(dst));
818 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 823 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
819 EXPECT(!is_valid); 824 EXPECT(!is_valid);
820 EXPECT(memcmp(expected, dst, sizeof(expected))); 825 EXPECT(memcmp(expected, dst, sizeof(expected)));
821 } 826 }
822 827
823 // 4.3.5 U+0000 = fc 80 80 80 80 80 = "\xFC\x80\x80\x80\x80\x80" 828 // 4.3.5 U+0000 = fc 80 80 80 80 80 = "\xFC\x80\x80\x80\x80\x80"
824 { 829 {
825 const char* src = "\xFC\x80\x80\x80\x80\x80"; 830 const char* src = "\xFC\x80\x80\x80\x80\x80";
826 int32_t expected[] = { 0x0 }; 831 int32_t expected[] = {0x0};
827 int32_t dst[ARRAY_SIZE(expected)]; 832 int32_t dst[ARRAY_SIZE(expected)];
828 memset(dst, 0xFF, sizeof(dst)); 833 memset(dst, 0xFF, sizeof(dst));
829 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 834 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
830 EXPECT(!is_valid); 835 EXPECT(!is_valid);
831 EXPECT(memcmp(expected, dst, sizeof(expected))); 836 EXPECT(memcmp(expected, dst, sizeof(expected)));
832 } 837 }
833 838
834 // 5.1 - Single UTF-16 surrogates 839 // 5.1 - Single UTF-16 surrogates
835 // UTF-8 suggests single surrogates are invalid, but both JS and 840 // UTF-8 suggests single surrogates are invalid, but both JS and
836 // Dart allow them and make use of them. 841 // Dart allow them and make use of them.
837 842
838 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80" 843 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80"
839 { 844 {
840 const char* src = "\xED\xA0\x80"; 845 const char* src = "\xED\xA0\x80";
841 int32_t expected[] = { 0xD800 }; 846 int32_t expected[] = {0xD800};
842 int32_t dst[ARRAY_SIZE(expected)]; 847 int32_t dst[ARRAY_SIZE(expected)];
843 memset(dst, 0, sizeof(dst)); 848 memset(dst, 0, sizeof(dst));
844 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 849 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
845 EXPECT(is_valid); 850 EXPECT(is_valid);
846 EXPECT(!memcmp(expected, dst, sizeof(expected))); 851 EXPECT(!memcmp(expected, dst, sizeof(expected)));
847 } 852 }
848 853
849 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF" 854 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF"
850 { 855 {
851 const char* src = "\xED\xAD\xBF"; 856 const char* src = "\xED\xAD\xBF";
852 int32_t expected[] = { 0xDB7F }; 857 int32_t expected[] = {0xDB7F};
853 int32_t dst[ARRAY_SIZE(expected)]; 858 int32_t dst[ARRAY_SIZE(expected)];
854 memset(dst, 0, sizeof(dst)); 859 memset(dst, 0, sizeof(dst));
855 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 860 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
856 EXPECT(is_valid); 861 EXPECT(is_valid);
857 EXPECT(!memcmp(expected, dst, sizeof(expected))); 862 EXPECT(!memcmp(expected, dst, sizeof(expected)));
858 } 863 }
859 864
860 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80" 865 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80"
861 { 866 {
862 const char* src = "\xED\xAE\x80"; 867 const char* src = "\xED\xAE\x80";
863 int32_t expected[] = { 0xDB80 }; 868 int32_t expected[] = {0xDB80};
864 int32_t dst[ARRAY_SIZE(expected)]; 869 int32_t dst[ARRAY_SIZE(expected)];
865 memset(dst, 0, sizeof(dst)); 870 memset(dst, 0, sizeof(dst));
866 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 871 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
867 EXPECT(is_valid); 872 EXPECT(is_valid);
868 EXPECT(!memcmp(expected, dst, sizeof(expected))); 873 EXPECT(!memcmp(expected, dst, sizeof(expected)));
869 } 874 }
870 875
871 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF" 876 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF"
872 { 877 {
873 const char* src = "\xED\xAF\xBF"; 878 const char* src = "\xED\xAF\xBF";
874 int32_t expected[] = { 0xDBFF }; 879 int32_t expected[] = {0xDBFF};
875 int32_t dst[ARRAY_SIZE(expected)]; 880 int32_t dst[ARRAY_SIZE(expected)];
876 memset(dst, 0, sizeof(dst)); 881 memset(dst, 0, sizeof(dst));
877 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 882 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
878 EXPECT(is_valid); 883 EXPECT(is_valid);
879 EXPECT(!memcmp(expected, dst, sizeof(expected))); 884 EXPECT(!memcmp(expected, dst, sizeof(expected)));
880 } 885 }
881 886
882 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80" 887 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80"
883 { 888 {
884 const char* src = "\xED\xB0\x80"; 889 const char* src = "\xED\xB0\x80";
885 int32_t expected[] = { 0xDC00 }; 890 int32_t expected[] = {0xDC00};
886 int32_t dst[ARRAY_SIZE(expected)]; 891 int32_t dst[ARRAY_SIZE(expected)];
887 memset(dst, 0, sizeof(dst)); 892 memset(dst, 0, sizeof(dst));
888 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 893 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
889 EXPECT(is_valid); 894 EXPECT(is_valid);
890 EXPECT(!memcmp(expected, dst, sizeof(expected))); 895 EXPECT(!memcmp(expected, dst, sizeof(expected)));
891 } 896 }
892 897
893 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80" 898 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80"
894 { 899 {
895 const char* src = "\xED\xBE\x80"; 900 const char* src = "\xED\xBE\x80";
896 int32_t expected[] = { 0xDF80 }; 901 int32_t expected[] = {0xDF80};
897 int32_t dst[ARRAY_SIZE(expected)]; 902 int32_t dst[ARRAY_SIZE(expected)];
898 memset(dst, 0, sizeof(dst)); 903 memset(dst, 0, sizeof(dst));
899 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 904 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
900 EXPECT(is_valid); 905 EXPECT(is_valid);
901 EXPECT(!memcmp(expected, dst, sizeof(expected))); 906 EXPECT(!memcmp(expected, dst, sizeof(expected)));
902 } 907 }
903 908
904 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF" 909 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF"
905 { 910 {
906 const char* src = "\xED\xBF\xBF"; 911 const char* src = "\xED\xBF\xBF";
907 int32_t expected[] = { 0xDFFF }; 912 int32_t expected[] = {0xDFFF};
908 int32_t dst[ARRAY_SIZE(expected)]; 913 int32_t dst[ARRAY_SIZE(expected)];
909 memset(dst, 0, sizeof(dst)); 914 memset(dst, 0, sizeof(dst));
910 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 915 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
911 EXPECT(is_valid); 916 EXPECT(is_valid);
912 EXPECT(!memcmp(expected, dst, sizeof(expected))); 917 EXPECT(!memcmp(expected, dst, sizeof(expected)));
913 } 918 }
914 919
915 // 5.2 Paired UTF-16 surrogates 920 // 5.2 Paired UTF-16 surrogates
916 // Also not a valid string, but accepted in Dart, even if it doesn't make 921 // Also not a valid string, but accepted in Dart, even if it doesn't make
917 // sense. e.g. 922 // sense. e.g.
918 // var s = new String.fromCharCodes([0xd800, 0xDC00]); 923 // var s = new String.fromCharCodes([0xd800, 0xDC00]);
919 // print(s.runes); // (65536) (0x10000) 924 // print(s.runes); // (65536) (0x10000)
920 // print(s.codeUnits); // [55296, 56320] 925 // print(s.codeUnits); // [55296, 56320]
921 926
922 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80" 927 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80"
923 { 928 {
924 const char* src = "\xED\xA0\x80\xED\xB0\x80"; 929 const char* src = "\xED\xA0\x80\xED\xB0\x80";
925 int32_t expected[] = { 0xD800, 0xDC00 }; 930 int32_t expected[] = {0xD800, 0xDC00};
926 int32_t dst[ARRAY_SIZE(expected)]; 931 int32_t dst[ARRAY_SIZE(expected)];
927 memset(dst, 0, sizeof(dst)); 932 memset(dst, 0, sizeof(dst));
928 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 933 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
929 EXPECT(is_valid); 934 EXPECT(is_valid);
930 EXPECT(!memcmp(expected, dst, sizeof(expected))); 935 EXPECT(!memcmp(expected, dst, sizeof(expected)));
931 } 936 }
932 937
933 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF" 938 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF"
934 { 939 {
935 const char* src = "\xED\xA0\x80\xED\xBF\xBF"; 940 const char* src = "\xED\xA0\x80\xED\xBF\xBF";
936 int32_t expected[] = { 0xD800, 0xDFFF }; 941 int32_t expected[] = {0xD800, 0xDFFF};
937 int32_t dst[ARRAY_SIZE(expected)]; 942 int32_t dst[ARRAY_SIZE(expected)];
938 memset(dst, 0, sizeof(dst)); 943 memset(dst, 0, sizeof(dst));
939 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 944 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
940 EXPECT(is_valid); 945 EXPECT(is_valid);
941 EXPECT(!memcmp(expected, dst, sizeof(expected))); 946 EXPECT(!memcmp(expected, dst, sizeof(expected)));
942 } 947 }
943 948
944 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80" 949 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80"
945 { 950 {
946 const char* src = "\xED\xAD\xBF\xED\xB0\x80"; 951 const char* src = "\xED\xAD\xBF\xED\xB0\x80";
947 int32_t expected[] = { 0xDB7F, 0xDC00 }; 952 int32_t expected[] = {0xDB7F, 0xDC00};
948 int32_t dst[ARRAY_SIZE(expected)]; 953 int32_t dst[ARRAY_SIZE(expected)];
949 memset(dst, 0, sizeof(dst)); 954 memset(dst, 0, sizeof(dst));
950 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 955 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
951 EXPECT(is_valid); 956 EXPECT(is_valid);
952 EXPECT(!memcmp(expected, dst, sizeof(expected))); 957 EXPECT(!memcmp(expected, dst, sizeof(expected)));
953 } 958 }
954 959
955 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF" 960 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF"
956 { 961 {
957 const char* src = "\xED\xAD\xBF\xED\xBF\xBF"; 962 const char* src = "\xED\xAD\xBF\xED\xBF\xBF";
958 int32_t expected[] = { 0xDB7F, 0xDFFF }; 963 int32_t expected[] = {0xDB7F, 0xDFFF};
959 int32_t dst[ARRAY_SIZE(expected)]; 964 int32_t dst[ARRAY_SIZE(expected)];
960 memset(dst, 0, sizeof(dst)); 965 memset(dst, 0, sizeof(dst));
961 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 966 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
962 EXPECT(is_valid); 967 EXPECT(is_valid);
963 EXPECT(!memcmp(expected, dst, sizeof(expected))); 968 EXPECT(!memcmp(expected, dst, sizeof(expected)));
964 } 969 }
965 970
966 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80" 971 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80"
967 { 972 {
968 const char* src = "\xED\xAE\x80\xED\xB0\x80"; 973 const char* src = "\xED\xAE\x80\xED\xB0\x80";
969 int32_t expected[] = { 0xDB80, 0xDC00 }; 974 int32_t expected[] = {0xDB80, 0xDC00};
970 int32_t dst[ARRAY_SIZE(expected)]; 975 int32_t dst[ARRAY_SIZE(expected)];
971 memset(dst, 0, sizeof(dst)); 976 memset(dst, 0, sizeof(dst));
972 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 977 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
973 EXPECT(is_valid); 978 EXPECT(is_valid);
974 EXPECT(!memcmp(expected, dst, sizeof(expected))); 979 EXPECT(!memcmp(expected, dst, sizeof(expected)));
975 } 980 }
976 981
977 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF" 982 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF"
978 { 983 {
979 const char* src = "\xED\xAE\x80\xED\xBF\xBF"; 984 const char* src = "\xED\xAE\x80\xED\xBF\xBF";
980 int32_t expected[] = { 0xDB80, 0xDFFF }; 985 int32_t expected[] = {0xDB80, 0xDFFF};
981 int32_t dst[ARRAY_SIZE(expected)]; 986 int32_t dst[ARRAY_SIZE(expected)];
982 memset(dst, 0, sizeof(dst)); 987 memset(dst, 0, sizeof(dst));
983 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 988 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
984 EXPECT(is_valid); 989 EXPECT(is_valid);
985 EXPECT(!memcmp(expected, dst, sizeof(expected))); 990 EXPECT(!memcmp(expected, dst, sizeof(expected)));
986 } 991 }
987 992
988 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80" 993 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80"
989 { 994 {
990 const char* src = "\xED\xAF\xBF\xED\xB0\x80"; 995 const char* src = "\xED\xAF\xBF\xED\xB0\x80";
991 int32_t expected[] = { 0xDBFF, 0xDC00 }; 996 int32_t expected[] = {0xDBFF, 0xDC00};
992 int32_t dst[ARRAY_SIZE(expected)]; 997 int32_t dst[ARRAY_SIZE(expected)];
993 memset(dst, 0, sizeof(dst)); 998 memset(dst, 0, sizeof(dst));
994 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 999 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
995 EXPECT(is_valid); 1000 EXPECT(is_valid);
996 EXPECT(!memcmp(expected, dst, sizeof(expected))); 1001 EXPECT(!memcmp(expected, dst, sizeof(expected)));
997 } 1002 }
998 1003
999 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF" 1004 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF"
1000 { 1005 {
1001 const char* src = "\xED\xAF\xBF\xED\xBF\xBF"; 1006 const char* src = "\xED\xAF\xBF\xED\xBF\xBF";
1002 int32_t expected[] = { 0xDBFF, 0xDFFF }; 1007 int32_t expected[] = {0xDBFF, 0xDFFF};
1003 int32_t dst[ARRAY_SIZE(expected)]; 1008 int32_t dst[ARRAY_SIZE(expected)];
1004 memset(dst, 0, sizeof(dst)); 1009 memset(dst, 0, sizeof(dst));
1005 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 1010 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
1006 EXPECT(is_valid); 1011 EXPECT(is_valid);
1007 EXPECT(!memcmp(expected, dst, sizeof(expected))); 1012 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1008 } 1013 }
1009 1014
1010 // 5.3 - Other illegal code positions 1015 // 5.3 - Other illegal code positions
1011 1016
1012 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE" 1017 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE"
1013 { 1018 {
1014 const char* src = "\xEF\xBF\xBE"; 1019 const char* src = "\xEF\xBF\xBE";
1015 int32_t expected[] = { 0xFFFE }; 1020 int32_t expected[] = {0xFFFE};
1016 int32_t dst[ARRAY_SIZE(expected)]; 1021 int32_t dst[ARRAY_SIZE(expected)];
1017 memset(dst, 0, sizeof(dst)); 1022 memset(dst, 0, sizeof(dst));
1018 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 1023 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
1019 EXPECT(is_valid); 1024 EXPECT(is_valid);
1020 EXPECT(!memcmp(expected, dst, sizeof(expected))); 1025 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1021 } 1026 }
1022 1027
1023 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF" 1028 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF"
1024 { 1029 {
1025 const char* src = "\xEF\xBF\xBF"; 1030 const char* src = "\xEF\xBF\xBF";
1026 int32_t expected[] = { 0xFFFF }; 1031 int32_t expected[] = {0xFFFF};
1027 int32_t dst[ARRAY_SIZE(expected)]; 1032 int32_t dst[ARRAY_SIZE(expected)];
1028 memset(dst, 0, sizeof(dst)); 1033 memset(dst, 0, sizeof(dst));
1029 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 1034 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
1030 EXPECT(is_valid); 1035 EXPECT(is_valid);
1031 EXPECT(!memcmp(expected, dst, sizeof(expected))); 1036 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1032 } 1037 }
1033 } 1038 }
1034 1039
1035 } // namespace dart 1040 } // namespace dart
OLDNEW
« no previous file with comments | « runtime/vm/unicode_data.cc ('k') | runtime/vm/unit_test.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698