OLD | NEW |
| (Empty) |
1 /* This Source Code Form is subject to the terms of the Mozilla Public | |
2 * License, v. 2.0. If a copy of the MPL was not distributed with this | |
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | |
4 | |
5 #ifdef DEBUG | |
6 static const char CVS_ID[] = "@(#) $RCSfile: utf8.c,v $ $Revision: 1.14 $ $Date:
2012/04/25 14:50:16 $"; | |
7 #endif /* DEBUG */ | |
8 | |
9 #include "seccomon.h" | |
10 #include "secport.h" | |
11 | |
12 #ifdef TEST_UTF8 | |
13 #include <assert.h> | |
14 #undef PORT_Assert | |
15 #define PORT_Assert assert | |
16 #endif | |
17 | |
18 /* | |
19 * From RFC 2044: | |
20 * | |
21 * UCS-4 range (hex.) UTF-8 octet sequence (binary) | |
22 * 0000 0000-0000 007F 0xxxxxxx | |
23 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx | |
24 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx | |
25 * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | |
26 * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx | |
27 * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx | |
28 */ | |
29 | |
30 /* | |
31 * From http://www.imc.org/draft-hoffman-utf16 | |
32 * | |
33 * For U on [0x00010000,0x0010FFFF]: Let U' = U - 0x00010000 | |
34 * | |
35 * U' = yyyyyyyyyyxxxxxxxxxx | |
36 * W1 = 110110yyyyyyyyyy | |
37 * W2 = 110111xxxxxxxxxx | |
38 */ | |
39 | |
40 /* | |
41 * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit | |
42 * character values. If you wish to use this code for working with | |
43 * host byte order values, define the following: | |
44 * | |
45 * #if IS_BIG_ENDIAN | |
46 * #define L_0 0 | |
47 * #define L_1 1 | |
48 * #define L_2 2 | |
49 * #define L_3 3 | |
50 * #define H_0 0 | |
51 * #define H_1 1 | |
52 * #else / * not everyone has elif * / | |
53 * #if IS_LITTLE_ENDIAN | |
54 * #define L_0 3 | |
55 * #define L_1 2 | |
56 * #define L_2 1 | |
57 * #define L_3 0 | |
58 * #define H_0 1 | |
59 * #define H_1 0 | |
60 * #else | |
61 * #error "PDP and NUXI support deferred" | |
62 * #endif / * IS_LITTLE_ENDIAN * / | |
63 * #endif / * IS_BIG_ENDIAN * / | |
64 */ | |
65 | |
66 #define L_0 0 | |
67 #define L_1 1 | |
68 #define L_2 2 | |
69 #define L_3 3 | |
70 #define H_0 0 | |
71 #define H_1 1 | |
72 | |
73 #define BAD_UTF8 ((PRUint32)-1) | |
74 | |
75 /* | |
76 * Parse a single UTF-8 character per the spec. in section 3.9 (D36) | |
77 * of Unicode 4.0.0. | |
78 * | |
79 * Parameters: | |
80 * index - Points to the byte offset in inBuf of character to read. On success, | |
81 * updated to the offset of the following character. | |
82 * inBuf - Input buffer, UTF-8 encoded | |
83 * inbufLen - Length of input buffer, in bytes. | |
84 * | |
85 * Returns: | |
86 * Success - The UCS4 encoded character | |
87 * Failure - BAD_UTF8 | |
88 */ | |
89 static PRUint32 | |
90 sec_port_read_utf8(unsigned int *index, unsigned char *inBuf, unsigned int inBuf
Len) | |
91 { | |
92 PRUint32 result; | |
93 unsigned int i = *index; | |
94 int bytes_left; | |
95 PRUint32 min_value; | |
96 | |
97 PORT_Assert(i < inBufLen); | |
98 | |
99 if ( (inBuf[i] & 0x80) == 0x00 ) { | |
100 result = inBuf[i++]; | |
101 bytes_left = 0; | |
102 min_value = 0; | |
103 } else if ( (inBuf[i] & 0xE0) == 0xC0 ) { | |
104 result = inBuf[i++] & 0x1F; | |
105 bytes_left = 1; | |
106 min_value = 0x80; | |
107 } else if ( (inBuf[i] & 0xF0) == 0xE0) { | |
108 result = inBuf[i++] & 0x0F; | |
109 bytes_left = 2; | |
110 min_value = 0x800; | |
111 } else if ( (inBuf[i] & 0xF8) == 0xF0) { | |
112 result = inBuf[i++] & 0x07; | |
113 bytes_left = 3; | |
114 min_value = 0x10000; | |
115 } else { | |
116 return BAD_UTF8; | |
117 } | |
118 | |
119 while (bytes_left--) { | |
120 if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80) return BAD_UTF8; | |
121 result = (result << 6) | (inBuf[i++] & 0x3F); | |
122 } | |
123 | |
124 /* Check for overlong sequences, surrogates, and outside unicode range */ | |
125 if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF
) { | |
126 return BAD_UTF8; | |
127 } | |
128 | |
129 *index = i; | |
130 return result; | |
131 } | |
132 | |
133 PRBool | |
134 sec_port_ucs4_utf8_conversion_function | |
135 ( | |
136 PRBool toUnicode, | |
137 unsigned char *inBuf, | |
138 unsigned int inBufLen, | |
139 unsigned char *outBuf, | |
140 unsigned int maxOutBufLen, | |
141 unsigned int *outBufLen | |
142 ) | |
143 { | |
144 PORT_Assert((unsigned int *)NULL != outBufLen); | |
145 | |
146 if( toUnicode ) { | |
147 unsigned int i, len = 0; | |
148 | |
149 for( i = 0; i < inBufLen; ) { | |
150 if( (inBuf[i] & 0x80) == 0x00 ) i += 1; | |
151 else if( (inBuf[i] & 0xE0) == 0xC0 ) i += 2; | |
152 else if( (inBuf[i] & 0xF0) == 0xE0 ) i += 3; | |
153 else if( (inBuf[i] & 0xF8) == 0xF0 ) i += 4; | |
154 else return PR_FALSE; | |
155 | |
156 len += 4; | |
157 } | |
158 | |
159 if( len > maxOutBufLen ) { | |
160 *outBufLen = len; | |
161 return PR_FALSE; | |
162 } | |
163 | |
164 len = 0; | |
165 | |
166 for( i = 0; i < inBufLen; ) { | |
167 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen); | |
168 | |
169 if (ucs4 == BAD_UTF8) return PR_FALSE; | |
170 | |
171 outBuf[len+L_0] = 0x00; | |
172 outBuf[len+L_1] = (unsigned char)(ucs4 >> 16); | |
173 outBuf[len+L_2] = (unsigned char)(ucs4 >> 8); | |
174 outBuf[len+L_3] = (unsigned char)ucs4; | |
175 | |
176 len += 4; | |
177 } | |
178 | |
179 *outBufLen = len; | |
180 return PR_TRUE; | |
181 } else { | |
182 unsigned int i, len = 0; | |
183 PORT_Assert((inBufLen % 4) == 0); | |
184 if ((inBufLen % 4) != 0) { | |
185 *outBufLen = 0; | |
186 return PR_FALSE; | |
187 } | |
188 | |
189 for( i = 0; i < inBufLen; i += 4 ) { | |
190 if( (inBuf[i+L_0] > 0x00) || (inBuf[i+L_1] > 0x10) ) { | |
191 *outBufLen = 0; | |
192 return PR_FALSE; | |
193 } else if( inBuf[i+L_1] >= 0x01 ) len += 4; | |
194 else if( inBuf[i+L_2] >= 0x08 ) len += 3; | |
195 else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) len += 2; | |
196 else len += 1; | |
197 } | |
198 | |
199 if( len > maxOutBufLen ) { | |
200 *outBufLen = len; | |
201 return PR_FALSE; | |
202 } | |
203 | |
204 len = 0; | |
205 | |
206 for( i = 0; i < inBufLen; i += 4 ) { | |
207 if( inBuf[i+L_1] >= 0x01 ) { | |
208 /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ | |
209 /* 00000000 000abcde fghijklm nopqrstu -> | |
210 11110abc 10defghi 10jklmno 10pqrstu */ | |
211 | |
212 outBuf[len+0] = 0xF0 | ((inBuf[i+L_1] & 0x1C) >> 2); | |
213 outBuf[len+1] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4) | |
214 | ((inBuf[i+L_2] & 0xF0) >> 4); | |
215 outBuf[len+2] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2) | |
216 | ((inBuf[i+L_3] & 0xC0) >> 6); | |
217 outBuf[len+3] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); | |
218 | |
219 len += 4; | |
220 } else if( inBuf[i+L_2] >= 0x08 ) { | |
221 /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ | |
222 /* 00000000 00000000 abcdefgh ijklmnop -> | |
223 1110abcd 10efghij 10klmnop */ | |
224 | |
225 outBuf[len+0] = 0xE0 | ((inBuf[i+L_2] & 0xF0) >> 4); | |
226 outBuf[len+1] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2) | |
227 | ((inBuf[i+L_3] & 0xC0) >> 6); | |
228 outBuf[len+2] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); | |
229 | |
230 len += 3; | |
231 } else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) { | |
232 /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */ | |
233 /* 00000000 00000000 00000abc defghijk -> | |
234 110abcde 10fghijk */ | |
235 | |
236 outBuf[len+0] = 0xC0 | ((inBuf[i+L_2] & 0x07) << 2) | |
237 | ((inBuf[i+L_3] & 0xC0) >> 6); | |
238 outBuf[len+1] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); | |
239 | |
240 len += 2; | |
241 } else { | |
242 /* 0000 0000-0000 007F -> 0xxxxxx */ | |
243 /* 00000000 00000000 00000000 0abcdefg -> | |
244 0abcdefg */ | |
245 | |
246 outBuf[len+0] = (inBuf[i+L_3] & 0x7F); | |
247 | |
248 len += 1; | |
249 } | |
250 } | |
251 | |
252 *outBufLen = len; | |
253 return PR_TRUE; | |
254 } | |
255 } | |
256 | |
257 PRBool | |
258 sec_port_ucs2_utf8_conversion_function | |
259 ( | |
260 PRBool toUnicode, | |
261 unsigned char *inBuf, | |
262 unsigned int inBufLen, | |
263 unsigned char *outBuf, | |
264 unsigned int maxOutBufLen, | |
265 unsigned int *outBufLen | |
266 ) | |
267 { | |
268 PORT_Assert((unsigned int *)NULL != outBufLen); | |
269 | |
270 if( toUnicode ) { | |
271 unsigned int i, len = 0; | |
272 | |
273 for( i = 0; i < inBufLen; ) { | |
274 if( (inBuf[i] & 0x80) == 0x00 ) { | |
275 i += 1; | |
276 len += 2; | |
277 } else if( (inBuf[i] & 0xE0) == 0xC0 ) { | |
278 i += 2; | |
279 len += 2; | |
280 } else if( (inBuf[i] & 0xF0) == 0xE0 ) { | |
281 i += 3; | |
282 len += 2; | |
283 } else if( (inBuf[i] & 0xF8) == 0xF0 ) { | |
284 i += 4; | |
285 len += 4; | |
286 } else return PR_FALSE; | |
287 } | |
288 | |
289 if( len > maxOutBufLen ) { | |
290 *outBufLen = len; | |
291 return PR_FALSE; | |
292 } | |
293 | |
294 len = 0; | |
295 | |
296 for( i = 0; i < inBufLen; ) { | |
297 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen); | |
298 | |
299 if (ucs4 == BAD_UTF8) return PR_FALSE; | |
300 | |
301 if( ucs4 < 0x10000) { | |
302 outBuf[len+H_0] = (unsigned char)(ucs4 >> 8); | |
303 outBuf[len+H_1] = (unsigned char)ucs4; | |
304 len += 2; | |
305 } else { | |
306 ucs4 -= 0x10000; | |
307 outBuf[len+0+H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3)); | |
308 outBuf[len+0+H_1] = (unsigned char)(ucs4 >> 10); | |
309 outBuf[len+2+H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3)); | |
310 outBuf[len+2+H_1] = (unsigned char)ucs4; | |
311 len += 4; | |
312 } | |
313 } | |
314 | |
315 *outBufLen = len; | |
316 return PR_TRUE; | |
317 } else { | |
318 unsigned int i, len = 0; | |
319 PORT_Assert((inBufLen % 2) == 0); | |
320 if ((inBufLen % 2) != 0) { | |
321 *outBufLen = 0; | |
322 return PR_FALSE; | |
323 } | |
324 | |
325 for( i = 0; i < inBufLen; i += 2 ) { | |
326 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_0] & 0x80) == 0x00) ) len += 1; | |
327 else if( inBuf[i+H_0] < 0x08 ) len += 2; | |
328 else if( ((inBuf[i+0+H_0] & 0xDC) == 0xD8) ) { | |
329 if( ((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2) ) { | |
330 i += 2; | |
331 len += 4; | |
332 } else { | |
333 return PR_FALSE; | |
334 } | |
335 } | |
336 else len += 3; | |
337 } | |
338 | |
339 if( len > maxOutBufLen ) { | |
340 *outBufLen = len; | |
341 return PR_FALSE; | |
342 } | |
343 | |
344 len = 0; | |
345 | |
346 for( i = 0; i < inBufLen; i += 2 ) { | |
347 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) { | |
348 /* 0000-007F -> 0xxxxxx */ | |
349 /* 00000000 0abcdefg -> 0abcdefg */ | |
350 | |
351 outBuf[len] = inBuf[i+H_1] & 0x7F; | |
352 | |
353 len += 1; | |
354 } else if( inBuf[i+H_0] < 0x08 ) { | |
355 /* 0080-07FF -> 110xxxxx 10xxxxxx */ | |
356 /* 00000abc defghijk -> 110abcde 10fghijk */ | |
357 | |
358 outBuf[len+0] = 0xC0 | ((inBuf[i+H_0] & 0x07) << 2) | |
359 | ((inBuf[i+H_1] & 0xC0) >> 6); | |
360 outBuf[len+1] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0); | |
361 | |
362 len += 2; | |
363 } else if( (inBuf[i+H_0] & 0xDC) == 0xD8 ) { | |
364 int abcde, BCDE; | |
365 | |
366 PORT_Assert(((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2)); | |
367 | |
368 /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ | |
369 /* 110110BC DEfghijk 110111lm nopqrstu -> | |
370 { Let abcde = BCDE + 1 } | |
371 11110abc 10defghi 10jklmno 10pqrstu */ | |
372 | |
373 BCDE = ((inBuf[i+H_0] & 0x03) << 2) | ((inBuf[i+H_1] & 0xC0) >> 6); | |
374 abcde = BCDE + 1; | |
375 | |
376 outBuf[len+0] = 0xF0 | ((abcde & 0x1C) >> 2); | |
377 outBuf[len+1] = 0x80 | ((abcde & 0x03) << 4) | |
378 | ((inBuf[i+0+H_1] & 0x3C) >> 2); | |
379 outBuf[len+2] = 0x80 | ((inBuf[i+0+H_1] & 0x03) << 4) | |
380 | ((inBuf[i+2+H_0] & 0x03) << 2) | |
381 | ((inBuf[i+2+H_1] & 0xC0) >> 6); | |
382 outBuf[len+3] = 0x80 | ((inBuf[i+2+H_1] & 0x3F) >> 0); | |
383 | |
384 i += 2; | |
385 len += 4; | |
386 } else { | |
387 /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ | |
388 /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */ | |
389 | |
390 outBuf[len+0] = 0xE0 | ((inBuf[i+H_0] & 0xF0) >> 4); | |
391 outBuf[len+1] = 0x80 | ((inBuf[i+H_0] & 0x0F) << 2) | |
392 | ((inBuf[i+H_1] & 0xC0) >> 6); | |
393 outBuf[len+2] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0); | |
394 | |
395 len += 3; | |
396 } | |
397 } | |
398 | |
399 *outBufLen = len; | |
400 return PR_TRUE; | |
401 } | |
402 } | |
403 | |
404 PRBool | |
405 sec_port_iso88591_utf8_conversion_function | |
406 ( | |
407 const unsigned char *inBuf, | |
408 unsigned int inBufLen, | |
409 unsigned char *outBuf, | |
410 unsigned int maxOutBufLen, | |
411 unsigned int *outBufLen | |
412 ) | |
413 { | |
414 unsigned int i, len = 0; | |
415 | |
416 PORT_Assert((unsigned int *)NULL != outBufLen); | |
417 | |
418 for( i = 0; i < inBufLen; i++) { | |
419 if( (inBuf[i] & 0x80) == 0x00 ) len += 1; | |
420 else len += 2; | |
421 } | |
422 | |
423 if( len > maxOutBufLen ) { | |
424 *outBufLen = len; | |
425 return PR_FALSE; | |
426 } | |
427 | |
428 len = 0; | |
429 | |
430 for( i = 0; i < inBufLen; i++) { | |
431 if( (inBuf[i] & 0x80) == 0x00 ) { | |
432 /* 00-7F -> 0xxxxxxx */ | |
433 /* 0abcdefg -> 0abcdefg */ | |
434 | |
435 outBuf[len] = inBuf[i]; | |
436 len += 1; | |
437 } else { | |
438 /* 80-FF <- 110xxxxx 10xxxxxx */ | |
439 /* 00000000 abcdefgh -> 110000ab 10cdefgh */ | |
440 | |
441 outBuf[len+0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6); | |
442 outBuf[len+1] = 0x80 | ((inBuf[i] & 0x3F) >> 0); | |
443 | |
444 len += 2; | |
445 } | |
446 } | |
447 | |
448 *outBufLen = len; | |
449 return PR_TRUE; | |
450 } | |
451 | |
452 #ifdef TEST_UTF8 | |
453 | |
454 #include <stdio.h> | |
455 #include <string.h> | |
456 #include <stdlib.h> | |
457 #include <netinet/in.h> /* for htonl and htons */ | |
458 | |
459 /* | |
460 * UCS-4 vectors | |
461 */ | |
462 | |
463 struct ucs4 { | |
464 PRUint32 c; | |
465 char *utf8; | |
466 }; | |
467 | |
468 /* | |
469 * UCS-2 vectors | |
470 */ | |
471 | |
472 struct ucs2 { | |
473 PRUint16 c; | |
474 char *utf8; | |
475 }; | |
476 | |
477 /* | |
478 * UTF-16 vectors | |
479 */ | |
480 | |
481 struct utf16 { | |
482 PRUint32 c; | |
483 PRUint16 w[2]; | |
484 }; | |
485 | |
486 | |
487 /* | |
488 * UCS-4 vectors | |
489 */ | |
490 | |
491 struct ucs4 ucs4[] = { | |
492 { 0x00000001, "\x01" }, | |
493 { 0x00000002, "\x02" }, | |
494 { 0x00000003, "\x03" }, | |
495 { 0x00000004, "\x04" }, | |
496 { 0x00000007, "\x07" }, | |
497 { 0x00000008, "\x08" }, | |
498 { 0x0000000F, "\x0F" }, | |
499 { 0x00000010, "\x10" }, | |
500 { 0x0000001F, "\x1F" }, | |
501 { 0x00000020, "\x20" }, | |
502 { 0x0000003F, "\x3F" }, | |
503 { 0x00000040, "\x40" }, | |
504 { 0x0000007F, "\x7F" }, | |
505 | |
506 { 0x00000080, "\xC2\x80" }, | |
507 { 0x00000081, "\xC2\x81" }, | |
508 { 0x00000082, "\xC2\x82" }, | |
509 { 0x00000084, "\xC2\x84" }, | |
510 { 0x00000088, "\xC2\x88" }, | |
511 { 0x00000090, "\xC2\x90" }, | |
512 { 0x000000A0, "\xC2\xA0" }, | |
513 { 0x000000C0, "\xC3\x80" }, | |
514 { 0x000000FF, "\xC3\xBF" }, | |
515 { 0x00000100, "\xC4\x80" }, | |
516 { 0x00000101, "\xC4\x81" }, | |
517 { 0x00000102, "\xC4\x82" }, | |
518 { 0x00000104, "\xC4\x84" }, | |
519 { 0x00000108, "\xC4\x88" }, | |
520 { 0x00000110, "\xC4\x90" }, | |
521 { 0x00000120, "\xC4\xA0" }, | |
522 { 0x00000140, "\xC5\x80" }, | |
523 { 0x00000180, "\xC6\x80" }, | |
524 { 0x000001FF, "\xC7\xBF" }, | |
525 { 0x00000200, "\xC8\x80" }, | |
526 { 0x00000201, "\xC8\x81" }, | |
527 { 0x00000202, "\xC8\x82" }, | |
528 { 0x00000204, "\xC8\x84" }, | |
529 { 0x00000208, "\xC8\x88" }, | |
530 { 0x00000210, "\xC8\x90" }, | |
531 { 0x00000220, "\xC8\xA0" }, | |
532 { 0x00000240, "\xC9\x80" }, | |
533 { 0x00000280, "\xCA\x80" }, | |
534 { 0x00000300, "\xCC\x80" }, | |
535 { 0x000003FF, "\xCF\xBF" }, | |
536 { 0x00000400, "\xD0\x80" }, | |
537 { 0x00000401, "\xD0\x81" }, | |
538 { 0x00000402, "\xD0\x82" }, | |
539 { 0x00000404, "\xD0\x84" }, | |
540 { 0x00000408, "\xD0\x88" }, | |
541 { 0x00000410, "\xD0\x90" }, | |
542 { 0x00000420, "\xD0\xA0" }, | |
543 { 0x00000440, "\xD1\x80" }, | |
544 { 0x00000480, "\xD2\x80" }, | |
545 { 0x00000500, "\xD4\x80" }, | |
546 { 0x00000600, "\xD8\x80" }, | |
547 { 0x000007FF, "\xDF\xBF" }, | |
548 | |
549 { 0x00000800, "\xE0\xA0\x80" }, | |
550 { 0x00000801, "\xE0\xA0\x81" }, | |
551 { 0x00000802, "\xE0\xA0\x82" }, | |
552 { 0x00000804, "\xE0\xA0\x84" }, | |
553 { 0x00000808, "\xE0\xA0\x88" }, | |
554 { 0x00000810, "\xE0\xA0\x90" }, | |
555 { 0x00000820, "\xE0\xA0\xA0" }, | |
556 { 0x00000840, "\xE0\xA1\x80" }, | |
557 { 0x00000880, "\xE0\xA2\x80" }, | |
558 { 0x00000900, "\xE0\xA4\x80" }, | |
559 { 0x00000A00, "\xE0\xA8\x80" }, | |
560 { 0x00000C00, "\xE0\xB0\x80" }, | |
561 { 0x00000FFF, "\xE0\xBF\xBF" }, | |
562 { 0x00001000, "\xE1\x80\x80" }, | |
563 { 0x00001001, "\xE1\x80\x81" }, | |
564 { 0x00001002, "\xE1\x80\x82" }, | |
565 { 0x00001004, "\xE1\x80\x84" }, | |
566 { 0x00001008, "\xE1\x80\x88" }, | |
567 { 0x00001010, "\xE1\x80\x90" }, | |
568 { 0x00001020, "\xE1\x80\xA0" }, | |
569 { 0x00001040, "\xE1\x81\x80" }, | |
570 { 0x00001080, "\xE1\x82\x80" }, | |
571 { 0x00001100, "\xE1\x84\x80" }, | |
572 { 0x00001200, "\xE1\x88\x80" }, | |
573 { 0x00001400, "\xE1\x90\x80" }, | |
574 { 0x00001800, "\xE1\xA0\x80" }, | |
575 { 0x00001FFF, "\xE1\xBF\xBF" }, | |
576 { 0x00002000, "\xE2\x80\x80" }, | |
577 { 0x00002001, "\xE2\x80\x81" }, | |
578 { 0x00002002, "\xE2\x80\x82" }, | |
579 { 0x00002004, "\xE2\x80\x84" }, | |
580 { 0x00002008, "\xE2\x80\x88" }, | |
581 { 0x00002010, "\xE2\x80\x90" }, | |
582 { 0x00002020, "\xE2\x80\xA0" }, | |
583 { 0x00002040, "\xE2\x81\x80" }, | |
584 { 0x00002080, "\xE2\x82\x80" }, | |
585 { 0x00002100, "\xE2\x84\x80" }, | |
586 { 0x00002200, "\xE2\x88\x80" }, | |
587 { 0x00002400, "\xE2\x90\x80" }, | |
588 { 0x00002800, "\xE2\xA0\x80" }, | |
589 { 0x00003000, "\xE3\x80\x80" }, | |
590 { 0x00003FFF, "\xE3\xBF\xBF" }, | |
591 { 0x00004000, "\xE4\x80\x80" }, | |
592 { 0x00004001, "\xE4\x80\x81" }, | |
593 { 0x00004002, "\xE4\x80\x82" }, | |
594 { 0x00004004, "\xE4\x80\x84" }, | |
595 { 0x00004008, "\xE4\x80\x88" }, | |
596 { 0x00004010, "\xE4\x80\x90" }, | |
597 { 0x00004020, "\xE4\x80\xA0" }, | |
598 { 0x00004040, "\xE4\x81\x80" }, | |
599 { 0x00004080, "\xE4\x82\x80" }, | |
600 { 0x00004100, "\xE4\x84\x80" }, | |
601 { 0x00004200, "\xE4\x88\x80" }, | |
602 { 0x00004400, "\xE4\x90\x80" }, | |
603 { 0x00004800, "\xE4\xA0\x80" }, | |
604 { 0x00005000, "\xE5\x80\x80" }, | |
605 { 0x00006000, "\xE6\x80\x80" }, | |
606 { 0x00007FFF, "\xE7\xBF\xBF" }, | |
607 { 0x00008000, "\xE8\x80\x80" }, | |
608 { 0x00008001, "\xE8\x80\x81" }, | |
609 { 0x00008002, "\xE8\x80\x82" }, | |
610 { 0x00008004, "\xE8\x80\x84" }, | |
611 { 0x00008008, "\xE8\x80\x88" }, | |
612 { 0x00008010, "\xE8\x80\x90" }, | |
613 { 0x00008020, "\xE8\x80\xA0" }, | |
614 { 0x00008040, "\xE8\x81\x80" }, | |
615 { 0x00008080, "\xE8\x82\x80" }, | |
616 { 0x00008100, "\xE8\x84\x80" }, | |
617 { 0x00008200, "\xE8\x88\x80" }, | |
618 { 0x00008400, "\xE8\x90\x80" }, | |
619 { 0x00008800, "\xE8\xA0\x80" }, | |
620 { 0x00009000, "\xE9\x80\x80" }, | |
621 { 0x0000A000, "\xEA\x80\x80" }, | |
622 { 0x0000C000, "\xEC\x80\x80" }, | |
623 { 0x0000FFFF, "\xEF\xBF\xBF" }, | |
624 | |
625 { 0x00010000, "\xF0\x90\x80\x80" }, | |
626 { 0x00010001, "\xF0\x90\x80\x81" }, | |
627 { 0x00010002, "\xF0\x90\x80\x82" }, | |
628 { 0x00010004, "\xF0\x90\x80\x84" }, | |
629 { 0x00010008, "\xF0\x90\x80\x88" }, | |
630 { 0x00010010, "\xF0\x90\x80\x90" }, | |
631 { 0x00010020, "\xF0\x90\x80\xA0" }, | |
632 { 0x00010040, "\xF0\x90\x81\x80" }, | |
633 { 0x00010080, "\xF0\x90\x82\x80" }, | |
634 { 0x00010100, "\xF0\x90\x84\x80" }, | |
635 { 0x00010200, "\xF0\x90\x88\x80" }, | |
636 { 0x00010400, "\xF0\x90\x90\x80" }, | |
637 { 0x00010800, "\xF0\x90\xA0\x80" }, | |
638 { 0x00011000, "\xF0\x91\x80\x80" }, | |
639 { 0x00012000, "\xF0\x92\x80\x80" }, | |
640 { 0x00014000, "\xF0\x94\x80\x80" }, | |
641 { 0x00018000, "\xF0\x98\x80\x80" }, | |
642 { 0x0001FFFF, "\xF0\x9F\xBF\xBF" }, | |
643 { 0x00020000, "\xF0\xA0\x80\x80" }, | |
644 { 0x00020001, "\xF0\xA0\x80\x81" }, | |
645 { 0x00020002, "\xF0\xA0\x80\x82" }, | |
646 { 0x00020004, "\xF0\xA0\x80\x84" }, | |
647 { 0x00020008, "\xF0\xA0\x80\x88" }, | |
648 { 0x00020010, "\xF0\xA0\x80\x90" }, | |
649 { 0x00020020, "\xF0\xA0\x80\xA0" }, | |
650 { 0x00020040, "\xF0\xA0\x81\x80" }, | |
651 { 0x00020080, "\xF0\xA0\x82\x80" }, | |
652 { 0x00020100, "\xF0\xA0\x84\x80" }, | |
653 { 0x00020200, "\xF0\xA0\x88\x80" }, | |
654 { 0x00020400, "\xF0\xA0\x90\x80" }, | |
655 { 0x00020800, "\xF0\xA0\xA0\x80" }, | |
656 { 0x00021000, "\xF0\xA1\x80\x80" }, | |
657 { 0x00022000, "\xF0\xA2\x80\x80" }, | |
658 { 0x00024000, "\xF0\xA4\x80\x80" }, | |
659 { 0x00028000, "\xF0\xA8\x80\x80" }, | |
660 { 0x00030000, "\xF0\xB0\x80\x80" }, | |
661 { 0x0003FFFF, "\xF0\xBF\xBF\xBF" }, | |
662 { 0x00040000, "\xF1\x80\x80\x80" }, | |
663 { 0x00040001, "\xF1\x80\x80\x81" }, | |
664 { 0x00040002, "\xF1\x80\x80\x82" }, | |
665 { 0x00040004, "\xF1\x80\x80\x84" }, | |
666 { 0x00040008, "\xF1\x80\x80\x88" }, | |
667 { 0x00040010, "\xF1\x80\x80\x90" }, | |
668 { 0x00040020, "\xF1\x80\x80\xA0" }, | |
669 { 0x00040040, "\xF1\x80\x81\x80" }, | |
670 { 0x00040080, "\xF1\x80\x82\x80" }, | |
671 { 0x00040100, "\xF1\x80\x84\x80" }, | |
672 { 0x00040200, "\xF1\x80\x88\x80" }, | |
673 { 0x00040400, "\xF1\x80\x90\x80" }, | |
674 { 0x00040800, "\xF1\x80\xA0\x80" }, | |
675 { 0x00041000, "\xF1\x81\x80\x80" }, | |
676 { 0x00042000, "\xF1\x82\x80\x80" }, | |
677 { 0x00044000, "\xF1\x84\x80\x80" }, | |
678 { 0x00048000, "\xF1\x88\x80\x80" }, | |
679 { 0x00050000, "\xF1\x90\x80\x80" }, | |
680 { 0x00060000, "\xF1\xA0\x80\x80" }, | |
681 { 0x0007FFFF, "\xF1\xBF\xBF\xBF" }, | |
682 { 0x00080000, "\xF2\x80\x80\x80" }, | |
683 { 0x00080001, "\xF2\x80\x80\x81" }, | |
684 { 0x00080002, "\xF2\x80\x80\x82" }, | |
685 { 0x00080004, "\xF2\x80\x80\x84" }, | |
686 { 0x00080008, "\xF2\x80\x80\x88" }, | |
687 { 0x00080010, "\xF2\x80\x80\x90" }, | |
688 { 0x00080020, "\xF2\x80\x80\xA0" }, | |
689 { 0x00080040, "\xF2\x80\x81\x80" }, | |
690 { 0x00080080, "\xF2\x80\x82\x80" }, | |
691 { 0x00080100, "\xF2\x80\x84\x80" }, | |
692 { 0x00080200, "\xF2\x80\x88\x80" }, | |
693 { 0x00080400, "\xF2\x80\x90\x80" }, | |
694 { 0x00080800, "\xF2\x80\xA0\x80" }, | |
695 { 0x00081000, "\xF2\x81\x80\x80" }, | |
696 { 0x00082000, "\xF2\x82\x80\x80" }, | |
697 { 0x00084000, "\xF2\x84\x80\x80" }, | |
698 { 0x00088000, "\xF2\x88\x80\x80" }, | |
699 { 0x00090000, "\xF2\x90\x80\x80" }, | |
700 { 0x000A0000, "\xF2\xA0\x80\x80" }, | |
701 { 0x000C0000, "\xF3\x80\x80\x80" }, | |
702 { 0x000FFFFF, "\xF3\xBF\xBF\xBF" }, | |
703 { 0x00100000, "\xF4\x80\x80\x80" }, | |
704 { 0x00100001, "\xF4\x80\x80\x81" }, | |
705 { 0x00100002, "\xF4\x80\x80\x82" }, | |
706 { 0x00100004, "\xF4\x80\x80\x84" }, | |
707 { 0x00100008, "\xF4\x80\x80\x88" }, | |
708 { 0x00100010, "\xF4\x80\x80\x90" }, | |
709 { 0x00100020, "\xF4\x80\x80\xA0" }, | |
710 { 0x00100040, "\xF4\x80\x81\x80" }, | |
711 { 0x00100080, "\xF4\x80\x82\x80" }, | |
712 { 0x00100100, "\xF4\x80\x84\x80" }, | |
713 { 0x00100200, "\xF4\x80\x88\x80" }, | |
714 { 0x00100400, "\xF4\x80\x90\x80" }, | |
715 { 0x00100800, "\xF4\x80\xA0\x80" }, | |
716 { 0x00101000, "\xF4\x81\x80\x80" }, | |
717 { 0x00102000, "\xF4\x82\x80\x80" }, | |
718 { 0x00104000, "\xF4\x84\x80\x80" }, | |
719 { 0x00108000, "\xF4\x88\x80\x80" }, | |
720 { 0x0010FFFF, "\xF4\x8F\xBF\xBF" }, | |
721 }; | |
722 | |
723 /* | |
724 * UCS-2 vectors | |
725 */ | |
726 | |
727 struct ucs2 ucs2[] = { | |
728 { 0x0001, "\x01" }, | |
729 { 0x0002, "\x02" }, | |
730 { 0x0003, "\x03" }, | |
731 { 0x0004, "\x04" }, | |
732 { 0x0007, "\x07" }, | |
733 { 0x0008, "\x08" }, | |
734 { 0x000F, "\x0F" }, | |
735 { 0x0010, "\x10" }, | |
736 { 0x001F, "\x1F" }, | |
737 { 0x0020, "\x20" }, | |
738 { 0x003F, "\x3F" }, | |
739 { 0x0040, "\x40" }, | |
740 { 0x007F, "\x7F" }, | |
741 | |
742 { 0x0080, "\xC2\x80" }, | |
743 { 0x0081, "\xC2\x81" }, | |
744 { 0x0082, "\xC2\x82" }, | |
745 { 0x0084, "\xC2\x84" }, | |
746 { 0x0088, "\xC2\x88" }, | |
747 { 0x0090, "\xC2\x90" }, | |
748 { 0x00A0, "\xC2\xA0" }, | |
749 { 0x00C0, "\xC3\x80" }, | |
750 { 0x00FF, "\xC3\xBF" }, | |
751 { 0x0100, "\xC4\x80" }, | |
752 { 0x0101, "\xC4\x81" }, | |
753 { 0x0102, "\xC4\x82" }, | |
754 { 0x0104, "\xC4\x84" }, | |
755 { 0x0108, "\xC4\x88" }, | |
756 { 0x0110, "\xC4\x90" }, | |
757 { 0x0120, "\xC4\xA0" }, | |
758 { 0x0140, "\xC5\x80" }, | |
759 { 0x0180, "\xC6\x80" }, | |
760 { 0x01FF, "\xC7\xBF" }, | |
761 { 0x0200, "\xC8\x80" }, | |
762 { 0x0201, "\xC8\x81" }, | |
763 { 0x0202, "\xC8\x82" }, | |
764 { 0x0204, "\xC8\x84" }, | |
765 { 0x0208, "\xC8\x88" }, | |
766 { 0x0210, "\xC8\x90" }, | |
767 { 0x0220, "\xC8\xA0" }, | |
768 { 0x0240, "\xC9\x80" }, | |
769 { 0x0280, "\xCA\x80" }, | |
770 { 0x0300, "\xCC\x80" }, | |
771 { 0x03FF, "\xCF\xBF" }, | |
772 { 0x0400, "\xD0\x80" }, | |
773 { 0x0401, "\xD0\x81" }, | |
774 { 0x0402, "\xD0\x82" }, | |
775 { 0x0404, "\xD0\x84" }, | |
776 { 0x0408, "\xD0\x88" }, | |
777 { 0x0410, "\xD0\x90" }, | |
778 { 0x0420, "\xD0\xA0" }, | |
779 { 0x0440, "\xD1\x80" }, | |
780 { 0x0480, "\xD2\x80" }, | |
781 { 0x0500, "\xD4\x80" }, | |
782 { 0x0600, "\xD8\x80" }, | |
783 { 0x07FF, "\xDF\xBF" }, | |
784 | |
785 { 0x0800, "\xE0\xA0\x80" }, | |
786 { 0x0801, "\xE0\xA0\x81" }, | |
787 { 0x0802, "\xE0\xA0\x82" }, | |
788 { 0x0804, "\xE0\xA0\x84" }, | |
789 { 0x0808, "\xE0\xA0\x88" }, | |
790 { 0x0810, "\xE0\xA0\x90" }, | |
791 { 0x0820, "\xE0\xA0\xA0" }, | |
792 { 0x0840, "\xE0\xA1\x80" }, | |
793 { 0x0880, "\xE0\xA2\x80" }, | |
794 { 0x0900, "\xE0\xA4\x80" }, | |
795 { 0x0A00, "\xE0\xA8\x80" }, | |
796 { 0x0C00, "\xE0\xB0\x80" }, | |
797 { 0x0FFF, "\xE0\xBF\xBF" }, | |
798 { 0x1000, "\xE1\x80\x80" }, | |
799 { 0x1001, "\xE1\x80\x81" }, | |
800 { 0x1002, "\xE1\x80\x82" }, | |
801 { 0x1004, "\xE1\x80\x84" }, | |
802 { 0x1008, "\xE1\x80\x88" }, | |
803 { 0x1010, "\xE1\x80\x90" }, | |
804 { 0x1020, "\xE1\x80\xA0" }, | |
805 { 0x1040, "\xE1\x81\x80" }, | |
806 { 0x1080, "\xE1\x82\x80" }, | |
807 { 0x1100, "\xE1\x84\x80" }, | |
808 { 0x1200, "\xE1\x88\x80" }, | |
809 { 0x1400, "\xE1\x90\x80" }, | |
810 { 0x1800, "\xE1\xA0\x80" }, | |
811 { 0x1FFF, "\xE1\xBF\xBF" }, | |
812 { 0x2000, "\xE2\x80\x80" }, | |
813 { 0x2001, "\xE2\x80\x81" }, | |
814 { 0x2002, "\xE2\x80\x82" }, | |
815 { 0x2004, "\xE2\x80\x84" }, | |
816 { 0x2008, "\xE2\x80\x88" }, | |
817 { 0x2010, "\xE2\x80\x90" }, | |
818 { 0x2020, "\xE2\x80\xA0" }, | |
819 { 0x2040, "\xE2\x81\x80" }, | |
820 { 0x2080, "\xE2\x82\x80" }, | |
821 { 0x2100, "\xE2\x84\x80" }, | |
822 { 0x2200, "\xE2\x88\x80" }, | |
823 { 0x2400, "\xE2\x90\x80" }, | |
824 { 0x2800, "\xE2\xA0\x80" }, | |
825 { 0x3000, "\xE3\x80\x80" }, | |
826 { 0x3FFF, "\xE3\xBF\xBF" }, | |
827 { 0x4000, "\xE4\x80\x80" }, | |
828 { 0x4001, "\xE4\x80\x81" }, | |
829 { 0x4002, "\xE4\x80\x82" }, | |
830 { 0x4004, "\xE4\x80\x84" }, | |
831 { 0x4008, "\xE4\x80\x88" }, | |
832 { 0x4010, "\xE4\x80\x90" }, | |
833 { 0x4020, "\xE4\x80\xA0" }, | |
834 { 0x4040, "\xE4\x81\x80" }, | |
835 { 0x4080, "\xE4\x82\x80" }, | |
836 { 0x4100, "\xE4\x84\x80" }, | |
837 { 0x4200, "\xE4\x88\x80" }, | |
838 { 0x4400, "\xE4\x90\x80" }, | |
839 { 0x4800, "\xE4\xA0\x80" }, | |
840 { 0x5000, "\xE5\x80\x80" }, | |
841 { 0x6000, "\xE6\x80\x80" }, | |
842 { 0x7FFF, "\xE7\xBF\xBF" }, | |
843 { 0x8000, "\xE8\x80\x80" }, | |
844 { 0x8001, "\xE8\x80\x81" }, | |
845 { 0x8002, "\xE8\x80\x82" }, | |
846 { 0x8004, "\xE8\x80\x84" }, | |
847 { 0x8008, "\xE8\x80\x88" }, | |
848 { 0x8010, "\xE8\x80\x90" }, | |
849 { 0x8020, "\xE8\x80\xA0" }, | |
850 { 0x8040, "\xE8\x81\x80" }, | |
851 { 0x8080, "\xE8\x82\x80" }, | |
852 { 0x8100, "\xE8\x84\x80" }, | |
853 { 0x8200, "\xE8\x88\x80" }, | |
854 { 0x8400, "\xE8\x90\x80" }, | |
855 { 0x8800, "\xE8\xA0\x80" }, | |
856 { 0x9000, "\xE9\x80\x80" }, | |
857 { 0xA000, "\xEA\x80\x80" }, | |
858 { 0xC000, "\xEC\x80\x80" }, | |
859 { 0xFFFF, "\xEF\xBF\xBF" } | |
860 | |
861 }; | |
862 | |
863 /* | |
864 * UTF-16 vectors | |
865 */ | |
866 | |
867 struct utf16 utf16[] = { | |
868 { 0x00010000, { 0xD800, 0xDC00 } }, | |
869 { 0x00010001, { 0xD800, 0xDC01 } }, | |
870 { 0x00010002, { 0xD800, 0xDC02 } }, | |
871 { 0x00010003, { 0xD800, 0xDC03 } }, | |
872 { 0x00010004, { 0xD800, 0xDC04 } }, | |
873 { 0x00010007, { 0xD800, 0xDC07 } }, | |
874 { 0x00010008, { 0xD800, 0xDC08 } }, | |
875 { 0x0001000F, { 0xD800, 0xDC0F } }, | |
876 { 0x00010010, { 0xD800, 0xDC10 } }, | |
877 { 0x0001001F, { 0xD800, 0xDC1F } }, | |
878 { 0x00010020, { 0xD800, 0xDC20 } }, | |
879 { 0x0001003F, { 0xD800, 0xDC3F } }, | |
880 { 0x00010040, { 0xD800, 0xDC40 } }, | |
881 { 0x0001007F, { 0xD800, 0xDC7F } }, | |
882 { 0x00010080, { 0xD800, 0xDC80 } }, | |
883 { 0x00010081, { 0xD800, 0xDC81 } }, | |
884 { 0x00010082, { 0xD800, 0xDC82 } }, | |
885 { 0x00010084, { 0xD800, 0xDC84 } }, | |
886 { 0x00010088, { 0xD800, 0xDC88 } }, | |
887 { 0x00010090, { 0xD800, 0xDC90 } }, | |
888 { 0x000100A0, { 0xD800, 0xDCA0 } }, | |
889 { 0x000100C0, { 0xD800, 0xDCC0 } }, | |
890 { 0x000100FF, { 0xD800, 0xDCFF } }, | |
891 { 0x00010100, { 0xD800, 0xDD00 } }, | |
892 { 0x00010101, { 0xD800, 0xDD01 } }, | |
893 { 0x00010102, { 0xD800, 0xDD02 } }, | |
894 { 0x00010104, { 0xD800, 0xDD04 } }, | |
895 { 0x00010108, { 0xD800, 0xDD08 } }, | |
896 { 0x00010110, { 0xD800, 0xDD10 } }, | |
897 { 0x00010120, { 0xD800, 0xDD20 } }, | |
898 { 0x00010140, { 0xD800, 0xDD40 } }, | |
899 { 0x00010180, { 0xD800, 0xDD80 } }, | |
900 { 0x000101FF, { 0xD800, 0xDDFF } }, | |
901 { 0x00010200, { 0xD800, 0xDE00 } }, | |
902 { 0x00010201, { 0xD800, 0xDE01 } }, | |
903 { 0x00010202, { 0xD800, 0xDE02 } }, | |
904 { 0x00010204, { 0xD800, 0xDE04 } }, | |
905 { 0x00010208, { 0xD800, 0xDE08 } }, | |
906 { 0x00010210, { 0xD800, 0xDE10 } }, | |
907 { 0x00010220, { 0xD800, 0xDE20 } }, | |
908 { 0x00010240, { 0xD800, 0xDE40 } }, | |
909 { 0x00010280, { 0xD800, 0xDE80 } }, | |
910 { 0x00010300, { 0xD800, 0xDF00 } }, | |
911 { 0x000103FF, { 0xD800, 0xDFFF } }, | |
912 { 0x00010400, { 0xD801, 0xDC00 } }, | |
913 { 0x00010401, { 0xD801, 0xDC01 } }, | |
914 { 0x00010402, { 0xD801, 0xDC02 } }, | |
915 { 0x00010404, { 0xD801, 0xDC04 } }, | |
916 { 0x00010408, { 0xD801, 0xDC08 } }, | |
917 { 0x00010410, { 0xD801, 0xDC10 } }, | |
918 { 0x00010420, { 0xD801, 0xDC20 } }, | |
919 { 0x00010440, { 0xD801, 0xDC40 } }, | |
920 { 0x00010480, { 0xD801, 0xDC80 } }, | |
921 { 0x00010500, { 0xD801, 0xDD00 } }, | |
922 { 0x00010600, { 0xD801, 0xDE00 } }, | |
923 { 0x000107FF, { 0xD801, 0xDFFF } }, | |
924 { 0x00010800, { 0xD802, 0xDC00 } }, | |
925 { 0x00010801, { 0xD802, 0xDC01 } }, | |
926 { 0x00010802, { 0xD802, 0xDC02 } }, | |
927 { 0x00010804, { 0xD802, 0xDC04 } }, | |
928 { 0x00010808, { 0xD802, 0xDC08 } }, | |
929 { 0x00010810, { 0xD802, 0xDC10 } }, | |
930 { 0x00010820, { 0xD802, 0xDC20 } }, | |
931 { 0x00010840, { 0xD802, 0xDC40 } }, | |
932 { 0x00010880, { 0xD802, 0xDC80 } }, | |
933 { 0x00010900, { 0xD802, 0xDD00 } }, | |
934 { 0x00010A00, { 0xD802, 0xDE00 } }, | |
935 { 0x00010C00, { 0xD803, 0xDC00 } }, | |
936 { 0x00010FFF, { 0xD803, 0xDFFF } }, | |
937 { 0x00011000, { 0xD804, 0xDC00 } }, | |
938 { 0x00011001, { 0xD804, 0xDC01 } }, | |
939 { 0x00011002, { 0xD804, 0xDC02 } }, | |
940 { 0x00011004, { 0xD804, 0xDC04 } }, | |
941 { 0x00011008, { 0xD804, 0xDC08 } }, | |
942 { 0x00011010, { 0xD804, 0xDC10 } }, | |
943 { 0x00011020, { 0xD804, 0xDC20 } }, | |
944 { 0x00011040, { 0xD804, 0xDC40 } }, | |
945 { 0x00011080, { 0xD804, 0xDC80 } }, | |
946 { 0x00011100, { 0xD804, 0xDD00 } }, | |
947 { 0x00011200, { 0xD804, 0xDE00 } }, | |
948 { 0x00011400, { 0xD805, 0xDC00 } }, | |
949 { 0x00011800, { 0xD806, 0xDC00 } }, | |
950 { 0x00011FFF, { 0xD807, 0xDFFF } }, | |
951 { 0x00012000, { 0xD808, 0xDC00 } }, | |
952 { 0x00012001, { 0xD808, 0xDC01 } }, | |
953 { 0x00012002, { 0xD808, 0xDC02 } }, | |
954 { 0x00012004, { 0xD808, 0xDC04 } }, | |
955 { 0x00012008, { 0xD808, 0xDC08 } }, | |
956 { 0x00012010, { 0xD808, 0xDC10 } }, | |
957 { 0x00012020, { 0xD808, 0xDC20 } }, | |
958 { 0x00012040, { 0xD808, 0xDC40 } }, | |
959 { 0x00012080, { 0xD808, 0xDC80 } }, | |
960 { 0x00012100, { 0xD808, 0xDD00 } }, | |
961 { 0x00012200, { 0xD808, 0xDE00 } }, | |
962 { 0x00012400, { 0xD809, 0xDC00 } }, | |
963 { 0x00012800, { 0xD80A, 0xDC00 } }, | |
964 { 0x00013000, { 0xD80C, 0xDC00 } }, | |
965 { 0x00013FFF, { 0xD80F, 0xDFFF } }, | |
966 { 0x00014000, { 0xD810, 0xDC00 } }, | |
967 { 0x00014001, { 0xD810, 0xDC01 } }, | |
968 { 0x00014002, { 0xD810, 0xDC02 } }, | |
969 { 0x00014004, { 0xD810, 0xDC04 } }, | |
970 { 0x00014008, { 0xD810, 0xDC08 } }, | |
971 { 0x00014010, { 0xD810, 0xDC10 } }, | |
972 { 0x00014020, { 0xD810, 0xDC20 } }, | |
973 { 0x00014040, { 0xD810, 0xDC40 } }, | |
974 { 0x00014080, { 0xD810, 0xDC80 } }, | |
975 { 0x00014100, { 0xD810, 0xDD00 } }, | |
976 { 0x00014200, { 0xD810, 0xDE00 } }, | |
977 { 0x00014400, { 0xD811, 0xDC00 } }, | |
978 { 0x00014800, { 0xD812, 0xDC00 } }, | |
979 { 0x00015000, { 0xD814, 0xDC00 } }, | |
980 { 0x00016000, { 0xD818, 0xDC00 } }, | |
981 { 0x00017FFF, { 0xD81F, 0xDFFF } }, | |
982 { 0x00018000, { 0xD820, 0xDC00 } }, | |
983 { 0x00018001, { 0xD820, 0xDC01 } }, | |
984 { 0x00018002, { 0xD820, 0xDC02 } }, | |
985 { 0x00018004, { 0xD820, 0xDC04 } }, | |
986 { 0x00018008, { 0xD820, 0xDC08 } }, | |
987 { 0x00018010, { 0xD820, 0xDC10 } }, | |
988 { 0x00018020, { 0xD820, 0xDC20 } }, | |
989 { 0x00018040, { 0xD820, 0xDC40 } }, | |
990 { 0x00018080, { 0xD820, 0xDC80 } }, | |
991 { 0x00018100, { 0xD820, 0xDD00 } }, | |
992 { 0x00018200, { 0xD820, 0xDE00 } }, | |
993 { 0x00018400, { 0xD821, 0xDC00 } }, | |
994 { 0x00018800, { 0xD822, 0xDC00 } }, | |
995 { 0x00019000, { 0xD824, 0xDC00 } }, | |
996 { 0x0001A000, { 0xD828, 0xDC00 } }, | |
997 { 0x0001C000, { 0xD830, 0xDC00 } }, | |
998 { 0x0001FFFF, { 0xD83F, 0xDFFF } }, | |
999 { 0x00020000, { 0xD840, 0xDC00 } }, | |
1000 { 0x00020001, { 0xD840, 0xDC01 } }, | |
1001 { 0x00020002, { 0xD840, 0xDC02 } }, | |
1002 { 0x00020004, { 0xD840, 0xDC04 } }, | |
1003 { 0x00020008, { 0xD840, 0xDC08 } }, | |
1004 { 0x00020010, { 0xD840, 0xDC10 } }, | |
1005 { 0x00020020, { 0xD840, 0xDC20 } }, | |
1006 { 0x00020040, { 0xD840, 0xDC40 } }, | |
1007 { 0x00020080, { 0xD840, 0xDC80 } }, | |
1008 { 0x00020100, { 0xD840, 0xDD00 } }, | |
1009 { 0x00020200, { 0xD840, 0xDE00 } }, | |
1010 { 0x00020400, { 0xD841, 0xDC00 } }, | |
1011 { 0x00020800, { 0xD842, 0xDC00 } }, | |
1012 { 0x00021000, { 0xD844, 0xDC00 } }, | |
1013 { 0x00022000, { 0xD848, 0xDC00 } }, | |
1014 { 0x00024000, { 0xD850, 0xDC00 } }, | |
1015 { 0x00028000, { 0xD860, 0xDC00 } }, | |
1016 { 0x0002FFFF, { 0xD87F, 0xDFFF } }, | |
1017 { 0x00030000, { 0xD880, 0xDC00 } }, | |
1018 { 0x00030001, { 0xD880, 0xDC01 } }, | |
1019 { 0x00030002, { 0xD880, 0xDC02 } }, | |
1020 { 0x00030004, { 0xD880, 0xDC04 } }, | |
1021 { 0x00030008, { 0xD880, 0xDC08 } }, | |
1022 { 0x00030010, { 0xD880, 0xDC10 } }, | |
1023 { 0x00030020, { 0xD880, 0xDC20 } }, | |
1024 { 0x00030040, { 0xD880, 0xDC40 } }, | |
1025 { 0x00030080, { 0xD880, 0xDC80 } }, | |
1026 { 0x00030100, { 0xD880, 0xDD00 } }, | |
1027 { 0x00030200, { 0xD880, 0xDE00 } }, | |
1028 { 0x00030400, { 0xD881, 0xDC00 } }, | |
1029 { 0x00030800, { 0xD882, 0xDC00 } }, | |
1030 { 0x00031000, { 0xD884, 0xDC00 } }, | |
1031 { 0x00032000, { 0xD888, 0xDC00 } }, | |
1032 { 0x00034000, { 0xD890, 0xDC00 } }, | |
1033 { 0x00038000, { 0xD8A0, 0xDC00 } }, | |
1034 { 0x0003FFFF, { 0xD8BF, 0xDFFF } }, | |
1035 { 0x00040000, { 0xD8C0, 0xDC00 } }, | |
1036 { 0x00040001, { 0xD8C0, 0xDC01 } }, | |
1037 { 0x00040002, { 0xD8C0, 0xDC02 } }, | |
1038 { 0x00040004, { 0xD8C0, 0xDC04 } }, | |
1039 { 0x00040008, { 0xD8C0, 0xDC08 } }, | |
1040 { 0x00040010, { 0xD8C0, 0xDC10 } }, | |
1041 { 0x00040020, { 0xD8C0, 0xDC20 } }, | |
1042 { 0x00040040, { 0xD8C0, 0xDC40 } }, | |
1043 { 0x00040080, { 0xD8C0, 0xDC80 } }, | |
1044 { 0x00040100, { 0xD8C0, 0xDD00 } }, | |
1045 { 0x00040200, { 0xD8C0, 0xDE00 } }, | |
1046 { 0x00040400, { 0xD8C1, 0xDC00 } }, | |
1047 { 0x00040800, { 0xD8C2, 0xDC00 } }, | |
1048 { 0x00041000, { 0xD8C4, 0xDC00 } }, | |
1049 { 0x00042000, { 0xD8C8, 0xDC00 } }, | |
1050 { 0x00044000, { 0xD8D0, 0xDC00 } }, | |
1051 { 0x00048000, { 0xD8E0, 0xDC00 } }, | |
1052 { 0x0004FFFF, { 0xD8FF, 0xDFFF } }, | |
1053 { 0x00050000, { 0xD900, 0xDC00 } }, | |
1054 { 0x00050001, { 0xD900, 0xDC01 } }, | |
1055 { 0x00050002, { 0xD900, 0xDC02 } }, | |
1056 { 0x00050004, { 0xD900, 0xDC04 } }, | |
1057 { 0x00050008, { 0xD900, 0xDC08 } }, | |
1058 { 0x00050010, { 0xD900, 0xDC10 } }, | |
1059 { 0x00050020, { 0xD900, 0xDC20 } }, | |
1060 { 0x00050040, { 0xD900, 0xDC40 } }, | |
1061 { 0x00050080, { 0xD900, 0xDC80 } }, | |
1062 { 0x00050100, { 0xD900, 0xDD00 } }, | |
1063 { 0x00050200, { 0xD900, 0xDE00 } }, | |
1064 { 0x00050400, { 0xD901, 0xDC00 } }, | |
1065 { 0x00050800, { 0xD902, 0xDC00 } }, | |
1066 { 0x00051000, { 0xD904, 0xDC00 } }, | |
1067 { 0x00052000, { 0xD908, 0xDC00 } }, | |
1068 { 0x00054000, { 0xD910, 0xDC00 } }, | |
1069 { 0x00058000, { 0xD920, 0xDC00 } }, | |
1070 { 0x00060000, { 0xD940, 0xDC00 } }, | |
1071 { 0x00070000, { 0xD980, 0xDC00 } }, | |
1072 { 0x0007FFFF, { 0xD9BF, 0xDFFF } }, | |
1073 { 0x00080000, { 0xD9C0, 0xDC00 } }, | |
1074 { 0x00080001, { 0xD9C0, 0xDC01 } }, | |
1075 { 0x00080002, { 0xD9C0, 0xDC02 } }, | |
1076 { 0x00080004, { 0xD9C0, 0xDC04 } }, | |
1077 { 0x00080008, { 0xD9C0, 0xDC08 } }, | |
1078 { 0x00080010, { 0xD9C0, 0xDC10 } }, | |
1079 { 0x00080020, { 0xD9C0, 0xDC20 } }, | |
1080 { 0x00080040, { 0xD9C0, 0xDC40 } }, | |
1081 { 0x00080080, { 0xD9C0, 0xDC80 } }, | |
1082 { 0x00080100, { 0xD9C0, 0xDD00 } }, | |
1083 { 0x00080200, { 0xD9C0, 0xDE00 } }, | |
1084 { 0x00080400, { 0xD9C1, 0xDC00 } }, | |
1085 { 0x00080800, { 0xD9C2, 0xDC00 } }, | |
1086 { 0x00081000, { 0xD9C4, 0xDC00 } }, | |
1087 { 0x00082000, { 0xD9C8, 0xDC00 } }, | |
1088 { 0x00084000, { 0xD9D0, 0xDC00 } }, | |
1089 { 0x00088000, { 0xD9E0, 0xDC00 } }, | |
1090 { 0x0008FFFF, { 0xD9FF, 0xDFFF } }, | |
1091 { 0x00090000, { 0xDA00, 0xDC00 } }, | |
1092 { 0x00090001, { 0xDA00, 0xDC01 } }, | |
1093 { 0x00090002, { 0xDA00, 0xDC02 } }, | |
1094 { 0x00090004, { 0xDA00, 0xDC04 } }, | |
1095 { 0x00090008, { 0xDA00, 0xDC08 } }, | |
1096 { 0x00090010, { 0xDA00, 0xDC10 } }, | |
1097 { 0x00090020, { 0xDA00, 0xDC20 } }, | |
1098 { 0x00090040, { 0xDA00, 0xDC40 } }, | |
1099 { 0x00090080, { 0xDA00, 0xDC80 } }, | |
1100 { 0x00090100, { 0xDA00, 0xDD00 } }, | |
1101 { 0x00090200, { 0xDA00, 0xDE00 } }, | |
1102 { 0x00090400, { 0xDA01, 0xDC00 } }, | |
1103 { 0x00090800, { 0xDA02, 0xDC00 } }, | |
1104 { 0x00091000, { 0xDA04, 0xDC00 } }, | |
1105 { 0x00092000, { 0xDA08, 0xDC00 } }, | |
1106 { 0x00094000, { 0xDA10, 0xDC00 } }, | |
1107 { 0x00098000, { 0xDA20, 0xDC00 } }, | |
1108 { 0x000A0000, { 0xDA40, 0xDC00 } }, | |
1109 { 0x000B0000, { 0xDA80, 0xDC00 } }, | |
1110 { 0x000C0000, { 0xDAC0, 0xDC00 } }, | |
1111 { 0x000D0000, { 0xDB00, 0xDC00 } }, | |
1112 { 0x000FFFFF, { 0xDBBF, 0xDFFF } }, | |
1113 { 0x0010FFFF, { 0xDBFF, 0xDFFF } } | |
1114 | |
1115 }; | |
1116 | |
1117 /* illegal utf8 sequences */ | |
1118 char *utf8_bad[] = { | |
1119 "\xC0\x80", | |
1120 "\xC1\xBF", | |
1121 "\xE0\x80\x80", | |
1122 "\xE0\x9F\xBF", | |
1123 "\xF0\x80\x80\x80", | |
1124 "\xF0\x8F\xBF\xBF", | |
1125 "\xF4\x90\x80\x80", | |
1126 "\xF7\xBF\xBF\xBF", | |
1127 "\xF8\x80\x80\x80\x80", | |
1128 "\xF8\x88\x80\x80\x80", | |
1129 "\xF8\x92\x80\x80\x80", | |
1130 "\xF8\x9F\xBF\xBF\xBF", | |
1131 "\xF8\xA0\x80\x80\x80", | |
1132 "\xF8\xA8\x80\x80\x80", | |
1133 "\xF8\xB0\x80\x80\x80", | |
1134 "\xF8\xBF\xBF\xBF\xBF", | |
1135 "\xF9\x80\x80\x80\x88", | |
1136 "\xF9\x84\x80\x80\x80", | |
1137 "\xF9\xBF\xBF\xBF\xBF", | |
1138 "\xFA\x80\x80\x80\x80", | |
1139 "\xFA\x90\x80\x80\x80", | |
1140 "\xFB\xBF\xBF\xBF\xBF", | |
1141 "\xFC\x84\x80\x80\x80\x81", | |
1142 "\xFC\x85\x80\x80\x80\x80", | |
1143 "\xFC\x86\x80\x80\x80\x80", | |
1144 "\xFC\x87\xBF\xBF\xBF\xBF", | |
1145 "\xFC\x88\xA0\x80\x80\x80", | |
1146 "\xFC\x89\x80\x80\x80\x80", | |
1147 "\xFC\x8A\x80\x80\x80\x80", | |
1148 "\xFC\x90\x80\x80\x80\x82", | |
1149 "\xFD\x80\x80\x80\x80\x80", | |
1150 "\xFD\xBF\xBF\xBF\xBF\xBF", | |
1151 "\x80", | |
1152 "\xC3", | |
1153 "\xC3\xC3\x80", | |
1154 "\xED\xA0\x80", | |
1155 "\xED\xBF\x80", | |
1156 "\xED\xBF\xBF", | |
1157 "\xED\xA0\x80\xE0\xBF\xBF", | |
1158 }; | |
1159 | |
1160 static void | |
1161 dump_utf8 | |
1162 ( | |
1163 char *word, | |
1164 unsigned char *utf8, | |
1165 char *end | |
1166 ) | |
1167 { | |
1168 fprintf(stdout, "%s ", word); | |
1169 for( ; *utf8; utf8++ ) { | |
1170 fprintf(stdout, "%02.2x ", (unsigned int)*utf8); | |
1171 } | |
1172 fprintf(stdout, "%s", end); | |
1173 } | |
1174 | |
1175 static PRBool | |
1176 test_ucs4_chars | |
1177 ( | |
1178 void | |
1179 ) | |
1180 { | |
1181 PRBool rv = PR_TRUE; | |
1182 int i; | |
1183 | |
1184 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { | |
1185 struct ucs4 *e = &ucs4[i]; | |
1186 PRBool result; | |
1187 unsigned char utf8[8]; | |
1188 unsigned int len = 0; | |
1189 PRUint32 back = 0; | |
1190 | |
1191 (void)memset(utf8, 0, sizeof(utf8)); | |
1192 | |
1193 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, | |
1194 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len); | |
1195 | |
1196 if( !result ) { | |
1197 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8\n", e->c); | |
1198 rv = PR_FALSE; | |
1199 continue; | |
1200 } | |
1201 | |
1202 if( (len >= sizeof(utf8)) || | |
1203 (strlen(e->utf8) != len) || | |
1204 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) { | |
1205 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8: ", e->c); | |
1206 dump_utf8("expected", e->utf8, ", "); | |
1207 dump_utf8("received", utf8, "\n"); | |
1208 rv = PR_FALSE; | |
1209 continue; | |
1210 } | |
1211 | |
1212 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
1213 utf8, len, (unsigned char *)&back, sizeof(back), &len); | |
1214 | |
1215 if( !result ) { | |
1216 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4\n"); | |
1217 rv = PR_FALSE; | |
1218 continue; | |
1219 } | |
1220 | |
1221 if( (sizeof(back) != len) || (e->c != back) ) { | |
1222 dump_utf8("Wrong conversion of UTF-8", utf8, " to UCS-4:"); | |
1223 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back); | |
1224 rv = PR_FALSE; | |
1225 continue; | |
1226 } | |
1227 } | |
1228 | |
1229 return rv; | |
1230 } | |
1231 | |
1232 static PRBool | |
1233 test_ucs2_chars | |
1234 ( | |
1235 void | |
1236 ) | |
1237 { | |
1238 PRBool rv = PR_TRUE; | |
1239 int i; | |
1240 | |
1241 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
1242 struct ucs2 *e = &ucs2[i]; | |
1243 PRBool result; | |
1244 unsigned char utf8[8]; | |
1245 unsigned int len = 0; | |
1246 PRUint16 back = 0; | |
1247 | |
1248 (void)memset(utf8, 0, sizeof(utf8)); | |
1249 | |
1250 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
1251 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len); | |
1252 | |
1253 if( !result ) { | |
1254 fprintf(stdout, "Failed to convert UCS-2 0x%04.4x to UTF-8\n", e->c); | |
1255 rv = PR_FALSE; | |
1256 continue; | |
1257 } | |
1258 | |
1259 if( (len >= sizeof(utf8)) || | |
1260 (strlen(e->utf8) != len) || | |
1261 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) { | |
1262 fprintf(stdout, "Wrong conversion of UCS-2 0x%04.4x to UTF-8: ", e->c); | |
1263 dump_utf8("expected", e->utf8, ", "); | |
1264 dump_utf8("received", utf8, "\n"); | |
1265 rv = PR_FALSE; | |
1266 continue; | |
1267 } | |
1268 | |
1269 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
1270 utf8, len, (unsigned char *)&back, sizeof(back), &len); | |
1271 | |
1272 if( !result ) { | |
1273 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-2\n"); | |
1274 rv = PR_FALSE; | |
1275 continue; | |
1276 } | |
1277 | |
1278 if( (sizeof(back) != len) || (e->c != back) ) { | |
1279 dump_utf8("Wrong conversion of UTF-8", utf8, "to UCS-2:"); | |
1280 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back); | |
1281 rv = PR_FALSE; | |
1282 continue; | |
1283 } | |
1284 } | |
1285 | |
1286 return rv; | |
1287 } | |
1288 | |
1289 static PRBool | |
1290 test_utf16_chars | |
1291 ( | |
1292 void | |
1293 ) | |
1294 { | |
1295 PRBool rv = PR_TRUE; | |
1296 int i; | |
1297 | |
1298 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) { | |
1299 struct utf16 *e = &utf16[i]; | |
1300 PRBool result; | |
1301 unsigned char utf8[8]; | |
1302 unsigned int len = 0; | |
1303 PRUint32 back32 = 0; | |
1304 PRUint16 back[2]; | |
1305 | |
1306 (void)memset(utf8, 0, sizeof(utf8)); | |
1307 | |
1308 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
1309 (unsigned char *)&e->w[0], sizeof(e->w), utf8, sizeof(utf8), &len); | |
1310 | |
1311 if( !result ) { | |
1312 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8\n", | |
1313 e->w[0], e->w[1]); | |
1314 rv = PR_FALSE; | |
1315 continue; | |
1316 } | |
1317 | |
1318 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
1319 utf8, len, (unsigned char *)&back32, sizeof(back32), &len); | |
1320 | |
1321 if( 4 != len ) { | |
1322 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8: " | |
1323 "unexpected len %d\n", e->w[0], e->w[1], len); | |
1324 rv = PR_FALSE; | |
1325 continue; | |
1326 } | |
1327 | |
1328 utf8[len] = '\0'; /* null-terminate for printing */ | |
1329 | |
1330 if( !result ) { | |
1331 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4 (utf-16 test)\n"); | |
1332 rv = PR_FALSE; | |
1333 continue; | |
1334 } | |
1335 | |
1336 if( (sizeof(back32) != len) || (e->c != back32) ) { | |
1337 fprintf(stdout, "Wrong conversion of UTF-16 0x%04.4x 0x%04.4x ", | |
1338 e->w[0], e->w[1]); | |
1339 dump_utf8("to UTF-8", utf8, "and then to UCS-4: "); | |
1340 if( sizeof(back32) != len ) { | |
1341 fprintf(stdout, "len is %d\n", len); | |
1342 } else { | |
1343 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back32); | |
1344 } | |
1345 rv = PR_FALSE; | |
1346 continue; | |
1347 } | |
1348 | |
1349 (void)memset(utf8, 0, sizeof(utf8)); | |
1350 back[0] = back[1] = 0; | |
1351 | |
1352 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, | |
1353 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len); | |
1354 | |
1355 if( !result ) { | |
1356 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8 (utf-16 test)\n
", | |
1357 e->c); | |
1358 rv = PR_FALSE; | |
1359 continue; | |
1360 } | |
1361 | |
1362 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
1363 utf8, len, (unsigned char *)&back[0], sizeof(back), &len); | |
1364 | |
1365 if( 4 != len ) { | |
1366 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8: " | |
1367 "unexpected len %d\n", e->c, len); | |
1368 rv = PR_FALSE; | |
1369 continue; | |
1370 } | |
1371 | |
1372 utf8[len] = '\0'; /* null-terminate for printing */ | |
1373 | |
1374 if( !result ) { | |
1375 dump_utf8("Failed to convert UTF-8", utf8, "to UTF-16\n"); | |
1376 rv = PR_FALSE; | |
1377 continue; | |
1378 } | |
1379 | |
1380 if( (sizeof(back) != len) || (e->w[0] != back[0]) || (e->w[1] != back[1]) )
{ | |
1381 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8", e->c); | |
1382 dump_utf8("", utf8, "and then to UTF-16:"); | |
1383 if( sizeof(back) != len ) { | |
1384 fprintf(stdout, "len is %d\n", len); | |
1385 } else { | |
1386 fprintf(stdout, "expected 0x%04.4x 0x%04.4x, received 0x%04.4x 0x%04.4xx
\n", | |
1387 e->w[0], e->w[1], back[0], back[1]); | |
1388 } | |
1389 rv = PR_FALSE; | |
1390 continue; | |
1391 } | |
1392 } | |
1393 | |
1394 return rv; | |
1395 } | |
1396 | |
1397 static PRBool | |
1398 test_utf8_bad_chars | |
1399 ( | |
1400 void | |
1401 ) | |
1402 { | |
1403 PRBool rv = PR_TRUE; | |
1404 int i; | |
1405 | |
1406 for( i = 0; i < sizeof(utf8_bad)/sizeof(utf8_bad[0]); i++ ) { | |
1407 PRBool result; | |
1408 unsigned char destbuf[30]; | |
1409 unsigned int len = 0; | |
1410 | |
1411 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
1412 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf
), &len); | |
1413 | |
1414 if( result ) { | |
1415 dump_utf8("Failed to detect bad UTF-8 string converting to UCS2: ", utf8_b
ad[i], "\n"); | |
1416 rv = PR_FALSE; | |
1417 continue; | |
1418 } | |
1419 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
1420 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf
), &len); | |
1421 | |
1422 if( result ) { | |
1423 dump_utf8("Failed to detect bad UTF-8 string converting to UCS4: ", utf8_b
ad[i], "\n"); | |
1424 rv = PR_FALSE; | |
1425 continue; | |
1426 } | |
1427 | |
1428 } | |
1429 | |
1430 return rv; | |
1431 } | |
1432 | |
1433 static PRBool | |
1434 test_iso88591_chars | |
1435 ( | |
1436 void | |
1437 ) | |
1438 { | |
1439 PRBool rv = PR_TRUE; | |
1440 int i; | |
1441 | |
1442 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
1443 struct ucs2 *e = &ucs2[i]; | |
1444 PRBool result; | |
1445 unsigned char iso88591; | |
1446 unsigned char utf8[3]; | |
1447 unsigned int len = 0; | |
1448 | |
1449 if (ntohs(e->c) > 0xFF) continue; | |
1450 | |
1451 (void)memset(utf8, 0, sizeof(utf8)); | |
1452 iso88591 = ntohs(e->c); | |
1453 | |
1454 result = sec_port_iso88591_utf8_conversion_function(&iso88591, | |
1455 1, utf8, sizeof(utf8), &len); | |
1456 | |
1457 if( !result ) { | |
1458 fprintf(stdout, "Failed to convert ISO-8859-1 0x%02.2x to UTF-8\n", iso885
91); | |
1459 rv = PR_FALSE; | |
1460 continue; | |
1461 } | |
1462 | |
1463 if( (len >= sizeof(utf8)) || | |
1464 (strlen(e->utf8) != len) || | |
1465 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) { | |
1466 fprintf(stdout, "Wrong conversion of ISO-8859-1 0x%02.2x to UTF-8: ", iso8
8591); | |
1467 dump_utf8("expected", e->utf8, ", "); | |
1468 dump_utf8("received", utf8, "\n"); | |
1469 rv = PR_FALSE; | |
1470 continue; | |
1471 } | |
1472 | |
1473 } | |
1474 | |
1475 return rv; | |
1476 } | |
1477 | |
1478 static PRBool | |
1479 test_zeroes | |
1480 ( | |
1481 void | |
1482 ) | |
1483 { | |
1484 PRBool rv = PR_TRUE; | |
1485 PRBool result; | |
1486 PRUint32 lzero = 0; | |
1487 PRUint16 szero = 0; | |
1488 unsigned char utf8[8]; | |
1489 unsigned int len = 0; | |
1490 PRUint32 lback = 1; | |
1491 PRUint16 sback = 1; | |
1492 | |
1493 (void)memset(utf8, 1, sizeof(utf8)); | |
1494 | |
1495 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, | |
1496 (unsigned char *)&lzero, sizeof(lzero), utf8, sizeof(utf8), &len); | |
1497 | |
1498 if( !result ) { | |
1499 fprintf(stdout, "Failed to convert UCS-4 0x00000000 to UTF-8\n"); | |
1500 rv = PR_FALSE; | |
1501 } else if( 1 != len ) { | |
1502 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: len = %d\n", len); | |
1503 rv = PR_FALSE; | |
1504 } else if( '\0' != *utf8 ) { | |
1505 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: expected 00 ," | |
1506 "received %02.2x\n", (unsigned int)*utf8); | |
1507 rv = PR_FALSE; | |
1508 } | |
1509 | |
1510 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
1511 "", 1, (unsigned char *)&lback, sizeof(lback), &len); | |
1512 | |
1513 if( !result ) { | |
1514 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-4\n"); | |
1515 rv = PR_FALSE; | |
1516 } else if( 4 != len ) { | |
1517 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: len = %d\n", len); | |
1518 rv = PR_FALSE; | |
1519 } else if( 0 != lback ) { | |
1520 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: " | |
1521 "expected 0x00000000, received 0x%08.8x\n", lback); | |
1522 rv = PR_FALSE; | |
1523 } | |
1524 | |
1525 (void)memset(utf8, 1, sizeof(utf8)); | |
1526 | |
1527 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
1528 (unsigned char *)&szero, sizeof(szero), utf8, sizeof(utf8), &len); | |
1529 | |
1530 if( !result ) { | |
1531 fprintf(stdout, "Failed to convert UCS-2 0x0000 to UTF-8\n"); | |
1532 rv = PR_FALSE; | |
1533 } else if( 1 != len ) { | |
1534 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: len = %d\n", len); | |
1535 rv = PR_FALSE; | |
1536 } else if( '\0' != *utf8 ) { | |
1537 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: expected 00 ," | |
1538 "received %02.2x\n", (unsigned int)*utf8); | |
1539 rv = PR_FALSE; | |
1540 } | |
1541 | |
1542 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
1543 "", 1, (unsigned char *)&sback, sizeof(sback), &len); | |
1544 | |
1545 if( !result ) { | |
1546 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-2\n"); | |
1547 rv = PR_FALSE; | |
1548 } else if( 2 != len ) { | |
1549 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: len = %d\n", len); | |
1550 rv = PR_FALSE; | |
1551 } else if( 0 != sback ) { | |
1552 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: " | |
1553 "expected 0x0000, received 0x%04.4x\n", sback); | |
1554 rv = PR_FALSE; | |
1555 } | |
1556 | |
1557 return rv; | |
1558 } | |
1559 | |
1560 static PRBool | |
1561 test_multichars | |
1562 ( | |
1563 void | |
1564 ) | |
1565 { | |
1566 int i; | |
1567 unsigned int len, lenout; | |
1568 PRUint32 *ucs4s; | |
1569 char *ucs4_utf8; | |
1570 PRUint16 *ucs2s; | |
1571 char *ucs2_utf8; | |
1572 void *tmp; | |
1573 PRBool result; | |
1574 | |
1575 ucs4s = (PRUint32 *)calloc(sizeof(ucs4)/sizeof(ucs4[0]), sizeof(PRUint32)); | |
1576 ucs2s = (PRUint16 *)calloc(sizeof(ucs2)/sizeof(ucs2[0]), sizeof(PRUint16)); | |
1577 | |
1578 if( ((PRUint32 *)NULL == ucs4s) || ((PRUint16 *)NULL == ucs2s) ) { | |
1579 fprintf(stderr, "out of memory\n"); | |
1580 exit(1); | |
1581 } | |
1582 | |
1583 len = 0; | |
1584 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { | |
1585 ucs4s[i] = ucs4[i].c; | |
1586 len += strlen(ucs4[i].utf8); | |
1587 } | |
1588 | |
1589 ucs4_utf8 = (char *)malloc(len); | |
1590 | |
1591 len = 0; | |
1592 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
1593 ucs2s[i] = ucs2[i].c; | |
1594 len += strlen(ucs2[i].utf8); | |
1595 } | |
1596 | |
1597 ucs2_utf8 = (char *)malloc(len); | |
1598 | |
1599 if( ((char *)NULL == ucs4_utf8) || ((char *)NULL == ucs2_utf8) ) { | |
1600 fprintf(stderr, "out of memory\n"); | |
1601 exit(1); | |
1602 } | |
1603 | |
1604 *ucs4_utf8 = '\0'; | |
1605 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { | |
1606 strcat(ucs4_utf8, ucs4[i].utf8); | |
1607 } | |
1608 | |
1609 *ucs2_utf8 = '\0'; | |
1610 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
1611 strcat(ucs2_utf8, ucs2[i].utf8); | |
1612 } | |
1613 | |
1614 /* UTF-8 -> UCS-4 */ | |
1615 len = sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32); | |
1616 tmp = calloc(len, 1); | |
1617 if( (void *)NULL == tmp ) { | |
1618 fprintf(stderr, "out of memory\n"); | |
1619 exit(1); | |
1620 } | |
1621 | |
1622 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
1623 ucs4_utf8, strlen(ucs4_utf8), tmp, len, &lenout); | |
1624 if( !result ) { | |
1625 fprintf(stdout, "Failed to convert much UTF-8 to UCS-4\n"); | |
1626 goto done; | |
1627 } | |
1628 | |
1629 if( lenout != len ) { | |
1630 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-4\n"); | |
1631 goto loser; | |
1632 } | |
1633 | |
1634 if( 0 != memcmp(ucs4s, tmp, len) ) { | |
1635 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-4\n"); | |
1636 goto loser; | |
1637 } | |
1638 | |
1639 free(tmp); tmp = (void *)NULL; | |
1640 | |
1641 /* UCS-4 -> UTF-8 */ | |
1642 len = strlen(ucs4_utf8); | |
1643 tmp = calloc(len, 1); | |
1644 if( (void *)NULL == tmp ) { | |
1645 fprintf(stderr, "out of memory\n"); | |
1646 exit(1); | |
1647 } | |
1648 | |
1649 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, | |
1650 (unsigned char *)ucs4s, sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32), | |
1651 tmp, len, &lenout); | |
1652 if( !result ) { | |
1653 fprintf(stdout, "Failed to convert much UCS-4 to UTF-8\n"); | |
1654 goto done; | |
1655 } | |
1656 | |
1657 if( lenout != len ) { | |
1658 fprintf(stdout, "Unexpected length converting much UCS-4 to UTF-8\n"); | |
1659 goto loser; | |
1660 } | |
1661 | |
1662 if( 0 != strncmp(ucs4_utf8, tmp, len) ) { | |
1663 fprintf(stdout, "Wrong conversion of much UCS-4 to UTF-8\n"); | |
1664 goto loser; | |
1665 } | |
1666 | |
1667 free(tmp); tmp = (void *)NULL; | |
1668 | |
1669 /* UTF-8 -> UCS-2 */ | |
1670 len = sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16); | |
1671 tmp = calloc(len, 1); | |
1672 if( (void *)NULL == tmp ) { | |
1673 fprintf(stderr, "out of memory\n"); | |
1674 exit(1); | |
1675 } | |
1676 | |
1677 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
1678 ucs2_utf8, strlen(ucs2_utf8), tmp, len, &lenout); | |
1679 if( !result ) { | |
1680 fprintf(stdout, "Failed to convert much UTF-8 to UCS-2\n"); | |
1681 goto done; | |
1682 } | |
1683 | |
1684 if( lenout != len ) { | |
1685 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-2\n"); | |
1686 goto loser; | |
1687 } | |
1688 | |
1689 if( 0 != memcmp(ucs2s, tmp, len) ) { | |
1690 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-2\n"); | |
1691 goto loser; | |
1692 } | |
1693 | |
1694 free(tmp); tmp = (void *)NULL; | |
1695 | |
1696 /* UCS-2 -> UTF-8 */ | |
1697 len = strlen(ucs2_utf8); | |
1698 tmp = calloc(len, 1); | |
1699 if( (void *)NULL == tmp ) { | |
1700 fprintf(stderr, "out of memory\n"); | |
1701 exit(1); | |
1702 } | |
1703 | |
1704 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
1705 (unsigned char *)ucs2s, sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16), | |
1706 tmp, len, &lenout); | |
1707 if( !result ) { | |
1708 fprintf(stdout, "Failed to convert much UCS-2 to UTF-8\n"); | |
1709 goto done; | |
1710 } | |
1711 | |
1712 if( lenout != len ) { | |
1713 fprintf(stdout, "Unexpected length converting much UCS-2 to UTF-8\n"); | |
1714 goto loser; | |
1715 } | |
1716 | |
1717 if( 0 != strncmp(ucs2_utf8, tmp, len) ) { | |
1718 fprintf(stdout, "Wrong conversion of much UCS-2 to UTF-8\n"); | |
1719 goto loser; | |
1720 } | |
1721 | |
1722 /* implement UTF16 */ | |
1723 | |
1724 result = PR_TRUE; | |
1725 goto done; | |
1726 | |
1727 loser: | |
1728 result = PR_FALSE; | |
1729 done: | |
1730 free(ucs4s); | |
1731 free(ucs4_utf8); | |
1732 free(ucs2s); | |
1733 free(ucs2_utf8); | |
1734 if( (void *)NULL != tmp ) free(tmp); | |
1735 return result; | |
1736 } | |
1737 | |
1738 void | |
1739 byte_order | |
1740 ( | |
1741 void | |
1742 ) | |
1743 { | |
1744 /* | |
1745 * The implementation (now) expects the 16- and 32-bit characters | |
1746 * to be in network byte order, not host byte order. Therefore I | |
1747 * have to byteswap all those test vectors above. hton[ls] may be | |
1748 * functions, so I have to do this dynamically. If you want to | |
1749 * use this code to do host byte order conversions, just remove | |
1750 * the call in main() to this function. | |
1751 */ | |
1752 | |
1753 int i; | |
1754 | |
1755 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { | |
1756 struct ucs4 *e = &ucs4[i]; | |
1757 e->c = htonl(e->c); | |
1758 } | |
1759 | |
1760 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
1761 struct ucs2 *e = &ucs2[i]; | |
1762 e->c = htons(e->c); | |
1763 } | |
1764 | |
1765 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) { | |
1766 struct utf16 *e = &utf16[i]; | |
1767 e->c = htonl(e->c); | |
1768 e->w[0] = htons(e->w[0]); | |
1769 e->w[1] = htons(e->w[1]); | |
1770 } | |
1771 | |
1772 return; | |
1773 } | |
1774 | |
1775 int | |
1776 main | |
1777 ( | |
1778 int argc, | |
1779 char *argv[] | |
1780 ) | |
1781 { | |
1782 byte_order(); | |
1783 | |
1784 if( test_ucs4_chars() && | |
1785 test_ucs2_chars() && | |
1786 test_utf16_chars() && | |
1787 test_utf8_bad_chars() && | |
1788 test_iso88591_chars() && | |
1789 test_zeroes() && | |
1790 test_multichars() && | |
1791 PR_TRUE ) { | |
1792 fprintf(stderr, "PASS\n"); | |
1793 return 1; | |
1794 } else { | |
1795 fprintf(stderr, "FAIL\n"); | |
1796 return 0; | |
1797 } | |
1798 } | |
1799 | |
1800 #endif /* TEST_UTF8 */ | |
OLD | NEW |