OLD | NEW |
| (Empty) |
1 /* This Source Code Form is subject to the terms of the Mozilla Public | |
2 * License, v. 2.0. If a copy of the MPL was not distributed with this | |
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | |
4 | |
5 #include "seccomon.h" | |
6 #include "secport.h" | |
7 | |
8 #ifdef TEST_UTF8 | |
9 #include <assert.h> | |
10 #undef PORT_Assert | |
11 #define PORT_Assert assert | |
12 #endif | |
13 | |
14 /* | |
15 * From RFC 2044: | |
16 * | |
17 * UCS-4 range (hex.) UTF-8 octet sequence (binary) | |
18 * 0000 0000-0000 007F 0xxxxxxx | |
19 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx | |
20 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx | |
21 * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | |
22 * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx | |
23 * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx | |
24 */ | |
25 | |
26 /* | |
27 * From http://www.imc.org/draft-hoffman-utf16 | |
28 * | |
29 * For U on [0x00010000,0x0010FFFF]: Let U' = U - 0x00010000 | |
30 * | |
31 * U' = yyyyyyyyyyxxxxxxxxxx | |
32 * W1 = 110110yyyyyyyyyy | |
33 * W2 = 110111xxxxxxxxxx | |
34 */ | |
35 | |
36 /* | |
37 * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit | |
38 * character values. If you wish to use this code for working with | |
39 * host byte order values, define the following: | |
40 * | |
41 * #if IS_BIG_ENDIAN | |
42 * #define L_0 0 | |
43 * #define L_1 1 | |
44 * #define L_2 2 | |
45 * #define L_3 3 | |
46 * #define H_0 0 | |
47 * #define H_1 1 | |
48 * #else / * not everyone has elif * / | |
49 * #if IS_LITTLE_ENDIAN | |
50 * #define L_0 3 | |
51 * #define L_1 2 | |
52 * #define L_2 1 | |
53 * #define L_3 0 | |
54 * #define H_0 1 | |
55 * #define H_1 0 | |
56 * #else | |
57 * #error "PDP and NUXI support deferred" | |
58 * #endif / * IS_LITTLE_ENDIAN * / | |
59 * #endif / * IS_BIG_ENDIAN * / | |
60 */ | |
61 | |
62 #define L_0 0 | |
63 #define L_1 1 | |
64 #define L_2 2 | |
65 #define L_3 3 | |
66 #define H_0 0 | |
67 #define H_1 1 | |
68 | |
69 #define BAD_UTF8 ((PRUint32)-1) | |
70 | |
71 /* | |
72 * Parse a single UTF-8 character per the spec. in section 3.9 (D36) | |
73 * of Unicode 4.0.0. | |
74 * | |
75 * Parameters: | |
76 * index - Points to the byte offset in inBuf of character to read. On success, | |
77 * updated to the offset of the following character. | |
78 * inBuf - Input buffer, UTF-8 encoded | |
79 * inbufLen - Length of input buffer, in bytes. | |
80 * | |
81 * Returns: | |
82 * Success - The UCS4 encoded character | |
83 * Failure - BAD_UTF8 | |
84 */ | |
85 static PRUint32 | |
86 sec_port_read_utf8(unsigned int *index, unsigned char *inBuf, unsigned int inBuf
Len) | |
87 { | |
88 PRUint32 result; | |
89 unsigned int i = *index; | |
90 int bytes_left; | |
91 PRUint32 min_value; | |
92 | |
93 PORT_Assert(i < inBufLen); | |
94 | |
95 if ( (inBuf[i] & 0x80) == 0x00 ) { | |
96 result = inBuf[i++]; | |
97 bytes_left = 0; | |
98 min_value = 0; | |
99 } else if ( (inBuf[i] & 0xE0) == 0xC0 ) { | |
100 result = inBuf[i++] & 0x1F; | |
101 bytes_left = 1; | |
102 min_value = 0x80; | |
103 } else if ( (inBuf[i] & 0xF0) == 0xE0) { | |
104 result = inBuf[i++] & 0x0F; | |
105 bytes_left = 2; | |
106 min_value = 0x800; | |
107 } else if ( (inBuf[i] & 0xF8) == 0xF0) { | |
108 result = inBuf[i++] & 0x07; | |
109 bytes_left = 3; | |
110 min_value = 0x10000; | |
111 } else { | |
112 return BAD_UTF8; | |
113 } | |
114 | |
115 while (bytes_left--) { | |
116 if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80) return BAD_UTF8; | |
117 result = (result << 6) | (inBuf[i++] & 0x3F); | |
118 } | |
119 | |
120 /* Check for overlong sequences, surrogates, and outside unicode range */ | |
121 if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF
) { | |
122 return BAD_UTF8; | |
123 } | |
124 | |
125 *index = i; | |
126 return result; | |
127 } | |
128 | |
129 PRBool | |
130 sec_port_ucs4_utf8_conversion_function | |
131 ( | |
132 PRBool toUnicode, | |
133 unsigned char *inBuf, | |
134 unsigned int inBufLen, | |
135 unsigned char *outBuf, | |
136 unsigned int maxOutBufLen, | |
137 unsigned int *outBufLen | |
138 ) | |
139 { | |
140 PORT_Assert((unsigned int *)NULL != outBufLen); | |
141 | |
142 if( toUnicode ) { | |
143 unsigned int i, len = 0; | |
144 | |
145 for( i = 0; i < inBufLen; ) { | |
146 if( (inBuf[i] & 0x80) == 0x00 ) i += 1; | |
147 else if( (inBuf[i] & 0xE0) == 0xC0 ) i += 2; | |
148 else if( (inBuf[i] & 0xF0) == 0xE0 ) i += 3; | |
149 else if( (inBuf[i] & 0xF8) == 0xF0 ) i += 4; | |
150 else return PR_FALSE; | |
151 | |
152 len += 4; | |
153 } | |
154 | |
155 if( len > maxOutBufLen ) { | |
156 *outBufLen = len; | |
157 return PR_FALSE; | |
158 } | |
159 | |
160 len = 0; | |
161 | |
162 for( i = 0; i < inBufLen; ) { | |
163 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen); | |
164 | |
165 if (ucs4 == BAD_UTF8) return PR_FALSE; | |
166 | |
167 outBuf[len+L_0] = 0x00; | |
168 outBuf[len+L_1] = (unsigned char)(ucs4 >> 16); | |
169 outBuf[len+L_2] = (unsigned char)(ucs4 >> 8); | |
170 outBuf[len+L_3] = (unsigned char)ucs4; | |
171 | |
172 len += 4; | |
173 } | |
174 | |
175 *outBufLen = len; | |
176 return PR_TRUE; | |
177 } else { | |
178 unsigned int i, len = 0; | |
179 PORT_Assert((inBufLen % 4) == 0); | |
180 if ((inBufLen % 4) != 0) { | |
181 *outBufLen = 0; | |
182 return PR_FALSE; | |
183 } | |
184 | |
185 for( i = 0; i < inBufLen; i += 4 ) { | |
186 if( (inBuf[i+L_0] > 0x00) || (inBuf[i+L_1] > 0x10) ) { | |
187 *outBufLen = 0; | |
188 return PR_FALSE; | |
189 } else if( inBuf[i+L_1] >= 0x01 ) len += 4; | |
190 else if( inBuf[i+L_2] >= 0x08 ) len += 3; | |
191 else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) len += 2; | |
192 else len += 1; | |
193 } | |
194 | |
195 if( len > maxOutBufLen ) { | |
196 *outBufLen = len; | |
197 return PR_FALSE; | |
198 } | |
199 | |
200 len = 0; | |
201 | |
202 for( i = 0; i < inBufLen; i += 4 ) { | |
203 if( inBuf[i+L_1] >= 0x01 ) { | |
204 /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ | |
205 /* 00000000 000abcde fghijklm nopqrstu -> | |
206 11110abc 10defghi 10jklmno 10pqrstu */ | |
207 | |
208 outBuf[len+0] = 0xF0 | ((inBuf[i+L_1] & 0x1C) >> 2); | |
209 outBuf[len+1] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4) | |
210 | ((inBuf[i+L_2] & 0xF0) >> 4); | |
211 outBuf[len+2] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2) | |
212 | ((inBuf[i+L_3] & 0xC0) >> 6); | |
213 outBuf[len+3] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); | |
214 | |
215 len += 4; | |
216 } else if( inBuf[i+L_2] >= 0x08 ) { | |
217 /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ | |
218 /* 00000000 00000000 abcdefgh ijklmnop -> | |
219 1110abcd 10efghij 10klmnop */ | |
220 | |
221 outBuf[len+0] = 0xE0 | ((inBuf[i+L_2] & 0xF0) >> 4); | |
222 outBuf[len+1] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2) | |
223 | ((inBuf[i+L_3] & 0xC0) >> 6); | |
224 outBuf[len+2] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); | |
225 | |
226 len += 3; | |
227 } else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) { | |
228 /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */ | |
229 /* 00000000 00000000 00000abc defghijk -> | |
230 110abcde 10fghijk */ | |
231 | |
232 outBuf[len+0] = 0xC0 | ((inBuf[i+L_2] & 0x07) << 2) | |
233 | ((inBuf[i+L_3] & 0xC0) >> 6); | |
234 outBuf[len+1] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); | |
235 | |
236 len += 2; | |
237 } else { | |
238 /* 0000 0000-0000 007F -> 0xxxxxx */ | |
239 /* 00000000 00000000 00000000 0abcdefg -> | |
240 0abcdefg */ | |
241 | |
242 outBuf[len+0] = (inBuf[i+L_3] & 0x7F); | |
243 | |
244 len += 1; | |
245 } | |
246 } | |
247 | |
248 *outBufLen = len; | |
249 return PR_TRUE; | |
250 } | |
251 } | |
252 | |
253 PRBool | |
254 sec_port_ucs2_utf8_conversion_function | |
255 ( | |
256 PRBool toUnicode, | |
257 unsigned char *inBuf, | |
258 unsigned int inBufLen, | |
259 unsigned char *outBuf, | |
260 unsigned int maxOutBufLen, | |
261 unsigned int *outBufLen | |
262 ) | |
263 { | |
264 PORT_Assert((unsigned int *)NULL != outBufLen); | |
265 | |
266 if( toUnicode ) { | |
267 unsigned int i, len = 0; | |
268 | |
269 for( i = 0; i < inBufLen; ) { | |
270 if( (inBuf[i] & 0x80) == 0x00 ) { | |
271 i += 1; | |
272 len += 2; | |
273 } else if( (inBuf[i] & 0xE0) == 0xC0 ) { | |
274 i += 2; | |
275 len += 2; | |
276 } else if( (inBuf[i] & 0xF0) == 0xE0 ) { | |
277 i += 3; | |
278 len += 2; | |
279 } else if( (inBuf[i] & 0xF8) == 0xF0 ) { | |
280 i += 4; | |
281 len += 4; | |
282 } else return PR_FALSE; | |
283 } | |
284 | |
285 if( len > maxOutBufLen ) { | |
286 *outBufLen = len; | |
287 return PR_FALSE; | |
288 } | |
289 | |
290 len = 0; | |
291 | |
292 for( i = 0; i < inBufLen; ) { | |
293 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen); | |
294 | |
295 if (ucs4 == BAD_UTF8) return PR_FALSE; | |
296 | |
297 if( ucs4 < 0x10000) { | |
298 outBuf[len+H_0] = (unsigned char)(ucs4 >> 8); | |
299 outBuf[len+H_1] = (unsigned char)ucs4; | |
300 len += 2; | |
301 } else { | |
302 ucs4 -= 0x10000; | |
303 outBuf[len+0+H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3)); | |
304 outBuf[len+0+H_1] = (unsigned char)(ucs4 >> 10); | |
305 outBuf[len+2+H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3)); | |
306 outBuf[len+2+H_1] = (unsigned char)ucs4; | |
307 len += 4; | |
308 } | |
309 } | |
310 | |
311 *outBufLen = len; | |
312 return PR_TRUE; | |
313 } else { | |
314 unsigned int i, len = 0; | |
315 PORT_Assert((inBufLen % 2) == 0); | |
316 if ((inBufLen % 2) != 0) { | |
317 *outBufLen = 0; | |
318 return PR_FALSE; | |
319 } | |
320 | |
321 for( i = 0; i < inBufLen; i += 2 ) { | |
322 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) len += 1; | |
323 else if( inBuf[i+H_0] < 0x08 ) len += 2; | |
324 else if( ((inBuf[i+0+H_0] & 0xFC) == 0xD8) ) { | |
325 if( ((inBufLen - i) > 2) && ((inBuf[i+2+H_0] & 0xFC) == 0xDC) ) { | |
326 i += 2; | |
327 len += 4; | |
328 } else { | |
329 return PR_FALSE; | |
330 } | |
331 } | |
332 else len += 3; | |
333 } | |
334 | |
335 if( len > maxOutBufLen ) { | |
336 *outBufLen = len; | |
337 return PR_FALSE; | |
338 } | |
339 | |
340 len = 0; | |
341 | |
342 for( i = 0; i < inBufLen; i += 2 ) { | |
343 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) { | |
344 /* 0000-007F -> 0xxxxxx */ | |
345 /* 00000000 0abcdefg -> 0abcdefg */ | |
346 | |
347 outBuf[len] = inBuf[i+H_1] & 0x7F; | |
348 | |
349 len += 1; | |
350 } else if( inBuf[i+H_0] < 0x08 ) { | |
351 /* 0080-07FF -> 110xxxxx 10xxxxxx */ | |
352 /* 00000abc defghijk -> 110abcde 10fghijk */ | |
353 | |
354 outBuf[len+0] = 0xC0 | ((inBuf[i+H_0] & 0x07) << 2) | |
355 | ((inBuf[i+H_1] & 0xC0) >> 6); | |
356 outBuf[len+1] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0); | |
357 | |
358 len += 2; | |
359 } else if( (inBuf[i+H_0] & 0xFC) == 0xD8 ) { | |
360 int abcde, BCDE; | |
361 | |
362 PORT_Assert(((inBufLen - i) > 2) && ((inBuf[i+2+H_0] & 0xFC) == 0xDC) ); | |
363 | |
364 /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ | |
365 /* 110110BC DEfghijk 110111lm nopqrstu -> | |
366 { Let abcde = BCDE + 1 } | |
367 11110abc 10defghi 10jklmno 10pqrstu */ | |
368 | |
369 BCDE = ((inBuf[i+H_0] & 0x03) << 2) | ((inBuf[i+H_1] & 0xC0) >> 6); | |
370 abcde = BCDE + 1; | |
371 | |
372 outBuf[len+0] = 0xF0 | ((abcde & 0x1C) >> 2); | |
373 outBuf[len+1] = 0x80 | ((abcde & 0x03) << 4) | |
374 | ((inBuf[i+0+H_1] & 0x3C) >> 2); | |
375 outBuf[len+2] = 0x80 | ((inBuf[i+0+H_1] & 0x03) << 4) | |
376 | ((inBuf[i+2+H_0] & 0x03) << 2) | |
377 | ((inBuf[i+2+H_1] & 0xC0) >> 6); | |
378 outBuf[len+3] = 0x80 | ((inBuf[i+2+H_1] & 0x3F) >> 0); | |
379 | |
380 i += 2; | |
381 len += 4; | |
382 } else { | |
383 /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ | |
384 /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */ | |
385 | |
386 outBuf[len+0] = 0xE0 | ((inBuf[i+H_0] & 0xF0) >> 4); | |
387 outBuf[len+1] = 0x80 | ((inBuf[i+H_0] & 0x0F) << 2) | |
388 | ((inBuf[i+H_1] & 0xC0) >> 6); | |
389 outBuf[len+2] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0); | |
390 | |
391 len += 3; | |
392 } | |
393 } | |
394 | |
395 *outBufLen = len; | |
396 return PR_TRUE; | |
397 } | |
398 } | |
399 | |
400 PRBool | |
401 sec_port_iso88591_utf8_conversion_function | |
402 ( | |
403 const unsigned char *inBuf, | |
404 unsigned int inBufLen, | |
405 unsigned char *outBuf, | |
406 unsigned int maxOutBufLen, | |
407 unsigned int *outBufLen | |
408 ) | |
409 { | |
410 unsigned int i, len = 0; | |
411 | |
412 PORT_Assert((unsigned int *)NULL != outBufLen); | |
413 | |
414 for( i = 0; i < inBufLen; i++) { | |
415 if( (inBuf[i] & 0x80) == 0x00 ) len += 1; | |
416 else len += 2; | |
417 } | |
418 | |
419 if( len > maxOutBufLen ) { | |
420 *outBufLen = len; | |
421 return PR_FALSE; | |
422 } | |
423 | |
424 len = 0; | |
425 | |
426 for( i = 0; i < inBufLen; i++) { | |
427 if( (inBuf[i] & 0x80) == 0x00 ) { | |
428 /* 00-7F -> 0xxxxxxx */ | |
429 /* 0abcdefg -> 0abcdefg */ | |
430 | |
431 outBuf[len] = inBuf[i]; | |
432 len += 1; | |
433 } else { | |
434 /* 80-FF <- 110xxxxx 10xxxxxx */ | |
435 /* 00000000 abcdefgh -> 110000ab 10cdefgh */ | |
436 | |
437 outBuf[len+0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6); | |
438 outBuf[len+1] = 0x80 | ((inBuf[i] & 0x3F) >> 0); | |
439 | |
440 len += 2; | |
441 } | |
442 } | |
443 | |
444 *outBufLen = len; | |
445 return PR_TRUE; | |
446 } | |
447 | |
448 #ifdef TEST_UTF8 | |
449 | |
450 #include <stdio.h> | |
451 #include <string.h> | |
452 #include <stdlib.h> | |
453 #include <netinet/in.h> /* for htonl and htons */ | |
454 | |
455 /* | |
456 * UCS-4 vectors | |
457 */ | |
458 | |
459 struct ucs4 { | |
460 PRUint32 c; | |
461 char *utf8; | |
462 }; | |
463 | |
464 /* | |
465 * UCS-2 vectors | |
466 */ | |
467 | |
468 struct ucs2 { | |
469 PRUint16 c; | |
470 char *utf8; | |
471 }; | |
472 | |
473 /* | |
474 * UTF-16 vectors | |
475 */ | |
476 | |
477 struct utf16 { | |
478 PRUint32 c; | |
479 PRUint16 w[2]; | |
480 }; | |
481 | |
482 | |
483 /* | |
484 * UCS-4 vectors | |
485 */ | |
486 | |
487 struct ucs4 ucs4[] = { | |
488 { 0x00000001, "\x01" }, | |
489 { 0x00000002, "\x02" }, | |
490 { 0x00000003, "\x03" }, | |
491 { 0x00000004, "\x04" }, | |
492 { 0x00000007, "\x07" }, | |
493 { 0x00000008, "\x08" }, | |
494 { 0x0000000F, "\x0F" }, | |
495 { 0x00000010, "\x10" }, | |
496 { 0x0000001F, "\x1F" }, | |
497 { 0x00000020, "\x20" }, | |
498 { 0x0000003F, "\x3F" }, | |
499 { 0x00000040, "\x40" }, | |
500 { 0x0000007F, "\x7F" }, | |
501 | |
502 { 0x00000080, "\xC2\x80" }, | |
503 { 0x00000081, "\xC2\x81" }, | |
504 { 0x00000082, "\xC2\x82" }, | |
505 { 0x00000084, "\xC2\x84" }, | |
506 { 0x00000088, "\xC2\x88" }, | |
507 { 0x00000090, "\xC2\x90" }, | |
508 { 0x000000A0, "\xC2\xA0" }, | |
509 { 0x000000C0, "\xC3\x80" }, | |
510 { 0x000000FF, "\xC3\xBF" }, | |
511 { 0x00000100, "\xC4\x80" }, | |
512 { 0x00000101, "\xC4\x81" }, | |
513 { 0x00000102, "\xC4\x82" }, | |
514 { 0x00000104, "\xC4\x84" }, | |
515 { 0x00000108, "\xC4\x88" }, | |
516 { 0x00000110, "\xC4\x90" }, | |
517 { 0x00000120, "\xC4\xA0" }, | |
518 { 0x00000140, "\xC5\x80" }, | |
519 { 0x00000180, "\xC6\x80" }, | |
520 { 0x000001FF, "\xC7\xBF" }, | |
521 { 0x00000200, "\xC8\x80" }, | |
522 { 0x00000201, "\xC8\x81" }, | |
523 { 0x00000202, "\xC8\x82" }, | |
524 { 0x00000204, "\xC8\x84" }, | |
525 { 0x00000208, "\xC8\x88" }, | |
526 { 0x00000210, "\xC8\x90" }, | |
527 { 0x00000220, "\xC8\xA0" }, | |
528 { 0x00000240, "\xC9\x80" }, | |
529 { 0x00000280, "\xCA\x80" }, | |
530 { 0x00000300, "\xCC\x80" }, | |
531 { 0x000003FF, "\xCF\xBF" }, | |
532 { 0x00000400, "\xD0\x80" }, | |
533 { 0x00000401, "\xD0\x81" }, | |
534 { 0x00000402, "\xD0\x82" }, | |
535 { 0x00000404, "\xD0\x84" }, | |
536 { 0x00000408, "\xD0\x88" }, | |
537 { 0x00000410, "\xD0\x90" }, | |
538 { 0x00000420, "\xD0\xA0" }, | |
539 { 0x00000440, "\xD1\x80" }, | |
540 { 0x00000480, "\xD2\x80" }, | |
541 { 0x00000500, "\xD4\x80" }, | |
542 { 0x00000600, "\xD8\x80" }, | |
543 { 0x000007FF, "\xDF\xBF" }, | |
544 | |
545 { 0x00000800, "\xE0\xA0\x80" }, | |
546 { 0x00000801, "\xE0\xA0\x81" }, | |
547 { 0x00000802, "\xE0\xA0\x82" }, | |
548 { 0x00000804, "\xE0\xA0\x84" }, | |
549 { 0x00000808, "\xE0\xA0\x88" }, | |
550 { 0x00000810, "\xE0\xA0\x90" }, | |
551 { 0x00000820, "\xE0\xA0\xA0" }, | |
552 { 0x00000840, "\xE0\xA1\x80" }, | |
553 { 0x00000880, "\xE0\xA2\x80" }, | |
554 { 0x00000900, "\xE0\xA4\x80" }, | |
555 { 0x00000A00, "\xE0\xA8\x80" }, | |
556 { 0x00000C00, "\xE0\xB0\x80" }, | |
557 { 0x00000FFF, "\xE0\xBF\xBF" }, | |
558 { 0x00001000, "\xE1\x80\x80" }, | |
559 { 0x00001001, "\xE1\x80\x81" }, | |
560 { 0x00001002, "\xE1\x80\x82" }, | |
561 { 0x00001004, "\xE1\x80\x84" }, | |
562 { 0x00001008, "\xE1\x80\x88" }, | |
563 { 0x00001010, "\xE1\x80\x90" }, | |
564 { 0x00001020, "\xE1\x80\xA0" }, | |
565 { 0x00001040, "\xE1\x81\x80" }, | |
566 { 0x00001080, "\xE1\x82\x80" }, | |
567 { 0x00001100, "\xE1\x84\x80" }, | |
568 { 0x00001200, "\xE1\x88\x80" }, | |
569 { 0x00001400, "\xE1\x90\x80" }, | |
570 { 0x00001800, "\xE1\xA0\x80" }, | |
571 { 0x00001FFF, "\xE1\xBF\xBF" }, | |
572 { 0x00002000, "\xE2\x80\x80" }, | |
573 { 0x00002001, "\xE2\x80\x81" }, | |
574 { 0x00002002, "\xE2\x80\x82" }, | |
575 { 0x00002004, "\xE2\x80\x84" }, | |
576 { 0x00002008, "\xE2\x80\x88" }, | |
577 { 0x00002010, "\xE2\x80\x90" }, | |
578 { 0x00002020, "\xE2\x80\xA0" }, | |
579 { 0x00002040, "\xE2\x81\x80" }, | |
580 { 0x00002080, "\xE2\x82\x80" }, | |
581 { 0x00002100, "\xE2\x84\x80" }, | |
582 { 0x00002200, "\xE2\x88\x80" }, | |
583 { 0x00002400, "\xE2\x90\x80" }, | |
584 { 0x00002800, "\xE2\xA0\x80" }, | |
585 { 0x00003000, "\xE3\x80\x80" }, | |
586 { 0x00003FFF, "\xE3\xBF\xBF" }, | |
587 { 0x00004000, "\xE4\x80\x80" }, | |
588 { 0x00004001, "\xE4\x80\x81" }, | |
589 { 0x00004002, "\xE4\x80\x82" }, | |
590 { 0x00004004, "\xE4\x80\x84" }, | |
591 { 0x00004008, "\xE4\x80\x88" }, | |
592 { 0x00004010, "\xE4\x80\x90" }, | |
593 { 0x00004020, "\xE4\x80\xA0" }, | |
594 { 0x00004040, "\xE4\x81\x80" }, | |
595 { 0x00004080, "\xE4\x82\x80" }, | |
596 { 0x00004100, "\xE4\x84\x80" }, | |
597 { 0x00004200, "\xE4\x88\x80" }, | |
598 { 0x00004400, "\xE4\x90\x80" }, | |
599 { 0x00004800, "\xE4\xA0\x80" }, | |
600 { 0x00005000, "\xE5\x80\x80" }, | |
601 { 0x00006000, "\xE6\x80\x80" }, | |
602 { 0x00007FFF, "\xE7\xBF\xBF" }, | |
603 { 0x00008000, "\xE8\x80\x80" }, | |
604 { 0x00008001, "\xE8\x80\x81" }, | |
605 { 0x00008002, "\xE8\x80\x82" }, | |
606 { 0x00008004, "\xE8\x80\x84" }, | |
607 { 0x00008008, "\xE8\x80\x88" }, | |
608 { 0x00008010, "\xE8\x80\x90" }, | |
609 { 0x00008020, "\xE8\x80\xA0" }, | |
610 { 0x00008040, "\xE8\x81\x80" }, | |
611 { 0x00008080, "\xE8\x82\x80" }, | |
612 { 0x00008100, "\xE8\x84\x80" }, | |
613 { 0x00008200, "\xE8\x88\x80" }, | |
614 { 0x00008400, "\xE8\x90\x80" }, | |
615 { 0x00008800, "\xE8\xA0\x80" }, | |
616 { 0x00009000, "\xE9\x80\x80" }, | |
617 { 0x0000A000, "\xEA\x80\x80" }, | |
618 { 0x0000C000, "\xEC\x80\x80" }, | |
619 { 0x0000FFFF, "\xEF\xBF\xBF" }, | |
620 | |
621 { 0x00010000, "\xF0\x90\x80\x80" }, | |
622 { 0x00010001, "\xF0\x90\x80\x81" }, | |
623 { 0x00010002, "\xF0\x90\x80\x82" }, | |
624 { 0x00010004, "\xF0\x90\x80\x84" }, | |
625 { 0x00010008, "\xF0\x90\x80\x88" }, | |
626 { 0x00010010, "\xF0\x90\x80\x90" }, | |
627 { 0x00010020, "\xF0\x90\x80\xA0" }, | |
628 { 0x00010040, "\xF0\x90\x81\x80" }, | |
629 { 0x00010080, "\xF0\x90\x82\x80" }, | |
630 { 0x00010100, "\xF0\x90\x84\x80" }, | |
631 { 0x00010200, "\xF0\x90\x88\x80" }, | |
632 { 0x00010400, "\xF0\x90\x90\x80" }, | |
633 { 0x00010800, "\xF0\x90\xA0\x80" }, | |
634 { 0x00011000, "\xF0\x91\x80\x80" }, | |
635 { 0x00012000, "\xF0\x92\x80\x80" }, | |
636 { 0x00014000, "\xF0\x94\x80\x80" }, | |
637 { 0x00018000, "\xF0\x98\x80\x80" }, | |
638 { 0x0001FFFF, "\xF0\x9F\xBF\xBF" }, | |
639 { 0x00020000, "\xF0\xA0\x80\x80" }, | |
640 { 0x00020001, "\xF0\xA0\x80\x81" }, | |
641 { 0x00020002, "\xF0\xA0\x80\x82" }, | |
642 { 0x00020004, "\xF0\xA0\x80\x84" }, | |
643 { 0x00020008, "\xF0\xA0\x80\x88" }, | |
644 { 0x00020010, "\xF0\xA0\x80\x90" }, | |
645 { 0x00020020, "\xF0\xA0\x80\xA0" }, | |
646 { 0x00020040, "\xF0\xA0\x81\x80" }, | |
647 { 0x00020080, "\xF0\xA0\x82\x80" }, | |
648 { 0x00020100, "\xF0\xA0\x84\x80" }, | |
649 { 0x00020200, "\xF0\xA0\x88\x80" }, | |
650 { 0x00020400, "\xF0\xA0\x90\x80" }, | |
651 { 0x00020800, "\xF0\xA0\xA0\x80" }, | |
652 { 0x00021000, "\xF0\xA1\x80\x80" }, | |
653 { 0x00022000, "\xF0\xA2\x80\x80" }, | |
654 { 0x00024000, "\xF0\xA4\x80\x80" }, | |
655 { 0x00028000, "\xF0\xA8\x80\x80" }, | |
656 { 0x00030000, "\xF0\xB0\x80\x80" }, | |
657 { 0x0003FFFF, "\xF0\xBF\xBF\xBF" }, | |
658 { 0x00040000, "\xF1\x80\x80\x80" }, | |
659 { 0x00040001, "\xF1\x80\x80\x81" }, | |
660 { 0x00040002, "\xF1\x80\x80\x82" }, | |
661 { 0x00040004, "\xF1\x80\x80\x84" }, | |
662 { 0x00040008, "\xF1\x80\x80\x88" }, | |
663 { 0x00040010, "\xF1\x80\x80\x90" }, | |
664 { 0x00040020, "\xF1\x80\x80\xA0" }, | |
665 { 0x00040040, "\xF1\x80\x81\x80" }, | |
666 { 0x00040080, "\xF1\x80\x82\x80" }, | |
667 { 0x00040100, "\xF1\x80\x84\x80" }, | |
668 { 0x00040200, "\xF1\x80\x88\x80" }, | |
669 { 0x00040400, "\xF1\x80\x90\x80" }, | |
670 { 0x00040800, "\xF1\x80\xA0\x80" }, | |
671 { 0x00041000, "\xF1\x81\x80\x80" }, | |
672 { 0x00042000, "\xF1\x82\x80\x80" }, | |
673 { 0x00044000, "\xF1\x84\x80\x80" }, | |
674 { 0x00048000, "\xF1\x88\x80\x80" }, | |
675 { 0x00050000, "\xF1\x90\x80\x80" }, | |
676 { 0x00060000, "\xF1\xA0\x80\x80" }, | |
677 { 0x0007FFFF, "\xF1\xBF\xBF\xBF" }, | |
678 { 0x00080000, "\xF2\x80\x80\x80" }, | |
679 { 0x00080001, "\xF2\x80\x80\x81" }, | |
680 { 0x00080002, "\xF2\x80\x80\x82" }, | |
681 { 0x00080004, "\xF2\x80\x80\x84" }, | |
682 { 0x00080008, "\xF2\x80\x80\x88" }, | |
683 { 0x00080010, "\xF2\x80\x80\x90" }, | |
684 { 0x00080020, "\xF2\x80\x80\xA0" }, | |
685 { 0x00080040, "\xF2\x80\x81\x80" }, | |
686 { 0x00080080, "\xF2\x80\x82\x80" }, | |
687 { 0x00080100, "\xF2\x80\x84\x80" }, | |
688 { 0x00080200, "\xF2\x80\x88\x80" }, | |
689 { 0x00080400, "\xF2\x80\x90\x80" }, | |
690 { 0x00080800, "\xF2\x80\xA0\x80" }, | |
691 { 0x00081000, "\xF2\x81\x80\x80" }, | |
692 { 0x00082000, "\xF2\x82\x80\x80" }, | |
693 { 0x00084000, "\xF2\x84\x80\x80" }, | |
694 { 0x00088000, "\xF2\x88\x80\x80" }, | |
695 { 0x00090000, "\xF2\x90\x80\x80" }, | |
696 { 0x000A0000, "\xF2\xA0\x80\x80" }, | |
697 { 0x000C0000, "\xF3\x80\x80\x80" }, | |
698 { 0x000FFFFF, "\xF3\xBF\xBF\xBF" }, | |
699 { 0x00100000, "\xF4\x80\x80\x80" }, | |
700 { 0x00100001, "\xF4\x80\x80\x81" }, | |
701 { 0x00100002, "\xF4\x80\x80\x82" }, | |
702 { 0x00100004, "\xF4\x80\x80\x84" }, | |
703 { 0x00100008, "\xF4\x80\x80\x88" }, | |
704 { 0x00100010, "\xF4\x80\x80\x90" }, | |
705 { 0x00100020, "\xF4\x80\x80\xA0" }, | |
706 { 0x00100040, "\xF4\x80\x81\x80" }, | |
707 { 0x00100080, "\xF4\x80\x82\x80" }, | |
708 { 0x00100100, "\xF4\x80\x84\x80" }, | |
709 { 0x00100200, "\xF4\x80\x88\x80" }, | |
710 { 0x00100400, "\xF4\x80\x90\x80" }, | |
711 { 0x00100800, "\xF4\x80\xA0\x80" }, | |
712 { 0x00101000, "\xF4\x81\x80\x80" }, | |
713 { 0x00102000, "\xF4\x82\x80\x80" }, | |
714 { 0x00104000, "\xF4\x84\x80\x80" }, | |
715 { 0x00108000, "\xF4\x88\x80\x80" }, | |
716 { 0x0010FFFF, "\xF4\x8F\xBF\xBF" }, | |
717 }; | |
718 | |
719 /* | |
720 * UCS-2 vectors | |
721 */ | |
722 | |
723 struct ucs2 ucs2[] = { | |
724 { 0x0001, "\x01" }, | |
725 { 0x0002, "\x02" }, | |
726 { 0x0003, "\x03" }, | |
727 { 0x0004, "\x04" }, | |
728 { 0x0007, "\x07" }, | |
729 { 0x0008, "\x08" }, | |
730 { 0x000F, "\x0F" }, | |
731 { 0x0010, "\x10" }, | |
732 { 0x001F, "\x1F" }, | |
733 { 0x0020, "\x20" }, | |
734 { 0x003F, "\x3F" }, | |
735 { 0x0040, "\x40" }, | |
736 { 0x007F, "\x7F" }, | |
737 | |
738 { 0x0080, "\xC2\x80" }, | |
739 { 0x0081, "\xC2\x81" }, | |
740 { 0x0082, "\xC2\x82" }, | |
741 { 0x0084, "\xC2\x84" }, | |
742 { 0x0088, "\xC2\x88" }, | |
743 { 0x0090, "\xC2\x90" }, | |
744 { 0x00A0, "\xC2\xA0" }, | |
745 { 0x00C0, "\xC3\x80" }, | |
746 { 0x00FF, "\xC3\xBF" }, | |
747 { 0x0100, "\xC4\x80" }, | |
748 { 0x0101, "\xC4\x81" }, | |
749 { 0x0102, "\xC4\x82" }, | |
750 { 0x0104, "\xC4\x84" }, | |
751 { 0x0108, "\xC4\x88" }, | |
752 { 0x0110, "\xC4\x90" }, | |
753 { 0x0120, "\xC4\xA0" }, | |
754 { 0x0140, "\xC5\x80" }, | |
755 { 0x0180, "\xC6\x80" }, | |
756 { 0x01FF, "\xC7\xBF" }, | |
757 { 0x0200, "\xC8\x80" }, | |
758 { 0x0201, "\xC8\x81" }, | |
759 { 0x0202, "\xC8\x82" }, | |
760 { 0x0204, "\xC8\x84" }, | |
761 { 0x0208, "\xC8\x88" }, | |
762 { 0x0210, "\xC8\x90" }, | |
763 { 0x0220, "\xC8\xA0" }, | |
764 { 0x0240, "\xC9\x80" }, | |
765 { 0x0280, "\xCA\x80" }, | |
766 { 0x0300, "\xCC\x80" }, | |
767 { 0x03FF, "\xCF\xBF" }, | |
768 { 0x0400, "\xD0\x80" }, | |
769 { 0x0401, "\xD0\x81" }, | |
770 { 0x0402, "\xD0\x82" }, | |
771 { 0x0404, "\xD0\x84" }, | |
772 { 0x0408, "\xD0\x88" }, | |
773 { 0x0410, "\xD0\x90" }, | |
774 { 0x0420, "\xD0\xA0" }, | |
775 { 0x0440, "\xD1\x80" }, | |
776 { 0x0480, "\xD2\x80" }, | |
777 { 0x0500, "\xD4\x80" }, | |
778 { 0x0600, "\xD8\x80" }, | |
779 { 0x07FF, "\xDF\xBF" }, | |
780 | |
781 { 0x0800, "\xE0\xA0\x80" }, | |
782 { 0x0801, "\xE0\xA0\x81" }, | |
783 { 0x0802, "\xE0\xA0\x82" }, | |
784 { 0x0804, "\xE0\xA0\x84" }, | |
785 { 0x0808, "\xE0\xA0\x88" }, | |
786 { 0x0810, "\xE0\xA0\x90" }, | |
787 { 0x0820, "\xE0\xA0\xA0" }, | |
788 { 0x0840, "\xE0\xA1\x80" }, | |
789 { 0x0880, "\xE0\xA2\x80" }, | |
790 { 0x0900, "\xE0\xA4\x80" }, | |
791 { 0x0A00, "\xE0\xA8\x80" }, | |
792 { 0x0C00, "\xE0\xB0\x80" }, | |
793 { 0x0FFF, "\xE0\xBF\xBF" }, | |
794 { 0x1000, "\xE1\x80\x80" }, | |
795 { 0x1001, "\xE1\x80\x81" }, | |
796 { 0x1002, "\xE1\x80\x82" }, | |
797 { 0x1004, "\xE1\x80\x84" }, | |
798 { 0x1008, "\xE1\x80\x88" }, | |
799 { 0x1010, "\xE1\x80\x90" }, | |
800 { 0x1020, "\xE1\x80\xA0" }, | |
801 { 0x1040, "\xE1\x81\x80" }, | |
802 { 0x1080, "\xE1\x82\x80" }, | |
803 { 0x1100, "\xE1\x84\x80" }, | |
804 { 0x1200, "\xE1\x88\x80" }, | |
805 { 0x1400, "\xE1\x90\x80" }, | |
806 { 0x1800, "\xE1\xA0\x80" }, | |
807 { 0x1FFF, "\xE1\xBF\xBF" }, | |
808 { 0x2000, "\xE2\x80\x80" }, | |
809 { 0x2001, "\xE2\x80\x81" }, | |
810 { 0x2002, "\xE2\x80\x82" }, | |
811 { 0x2004, "\xE2\x80\x84" }, | |
812 { 0x2008, "\xE2\x80\x88" }, | |
813 { 0x2010, "\xE2\x80\x90" }, | |
814 { 0x2020, "\xE2\x80\xA0" }, | |
815 { 0x2040, "\xE2\x81\x80" }, | |
816 { 0x2080, "\xE2\x82\x80" }, | |
817 { 0x2100, "\xE2\x84\x80" }, | |
818 { 0x2200, "\xE2\x88\x80" }, | |
819 { 0x2400, "\xE2\x90\x80" }, | |
820 { 0x2800, "\xE2\xA0\x80" }, | |
821 { 0x3000, "\xE3\x80\x80" }, | |
822 { 0x3FFF, "\xE3\xBF\xBF" }, | |
823 { 0x4000, "\xE4\x80\x80" }, | |
824 { 0x4001, "\xE4\x80\x81" }, | |
825 { 0x4002, "\xE4\x80\x82" }, | |
826 { 0x4004, "\xE4\x80\x84" }, | |
827 { 0x4008, "\xE4\x80\x88" }, | |
828 { 0x4010, "\xE4\x80\x90" }, | |
829 { 0x4020, "\xE4\x80\xA0" }, | |
830 { 0x4040, "\xE4\x81\x80" }, | |
831 { 0x4080, "\xE4\x82\x80" }, | |
832 { 0x4100, "\xE4\x84\x80" }, | |
833 { 0x4200, "\xE4\x88\x80" }, | |
834 { 0x4400, "\xE4\x90\x80" }, | |
835 { 0x4800, "\xE4\xA0\x80" }, | |
836 { 0x5000, "\xE5\x80\x80" }, | |
837 { 0x6000, "\xE6\x80\x80" }, | |
838 { 0x7FFF, "\xE7\xBF\xBF" }, | |
839 { 0x8000, "\xE8\x80\x80" }, | |
840 { 0x8001, "\xE8\x80\x81" }, | |
841 { 0x8002, "\xE8\x80\x82" }, | |
842 { 0x8004, "\xE8\x80\x84" }, | |
843 { 0x8008, "\xE8\x80\x88" }, | |
844 { 0x8010, "\xE8\x80\x90" }, | |
845 { 0x8020, "\xE8\x80\xA0" }, | |
846 { 0x8040, "\xE8\x81\x80" }, | |
847 { 0x8080, "\xE8\x82\x80" }, | |
848 { 0x8100, "\xE8\x84\x80" }, | |
849 { 0x8200, "\xE8\x88\x80" }, | |
850 { 0x8400, "\xE8\x90\x80" }, | |
851 { 0x8800, "\xE8\xA0\x80" }, | |
852 { 0x9000, "\xE9\x80\x80" }, | |
853 { 0xA000, "\xEA\x80\x80" }, | |
854 { 0xC000, "\xEC\x80\x80" }, | |
855 { 0xFB01, "\xEF\xAC\x81" }, | |
856 { 0xFFFF, "\xEF\xBF\xBF" } | |
857 | |
858 }; | |
859 | |
860 /* | |
861 * UTF-16 vectors | |
862 */ | |
863 | |
864 struct utf16 utf16[] = { | |
865 { 0x00010000, { 0xD800, 0xDC00 } }, | |
866 { 0x00010001, { 0xD800, 0xDC01 } }, | |
867 { 0x00010002, { 0xD800, 0xDC02 } }, | |
868 { 0x00010003, { 0xD800, 0xDC03 } }, | |
869 { 0x00010004, { 0xD800, 0xDC04 } }, | |
870 { 0x00010007, { 0xD800, 0xDC07 } }, | |
871 { 0x00010008, { 0xD800, 0xDC08 } }, | |
872 { 0x0001000F, { 0xD800, 0xDC0F } }, | |
873 { 0x00010010, { 0xD800, 0xDC10 } }, | |
874 { 0x0001001F, { 0xD800, 0xDC1F } }, | |
875 { 0x00010020, { 0xD800, 0xDC20 } }, | |
876 { 0x0001003F, { 0xD800, 0xDC3F } }, | |
877 { 0x00010040, { 0xD800, 0xDC40 } }, | |
878 { 0x0001007F, { 0xD800, 0xDC7F } }, | |
879 { 0x00010080, { 0xD800, 0xDC80 } }, | |
880 { 0x00010081, { 0xD800, 0xDC81 } }, | |
881 { 0x00010082, { 0xD800, 0xDC82 } }, | |
882 { 0x00010084, { 0xD800, 0xDC84 } }, | |
883 { 0x00010088, { 0xD800, 0xDC88 } }, | |
884 { 0x00010090, { 0xD800, 0xDC90 } }, | |
885 { 0x000100A0, { 0xD800, 0xDCA0 } }, | |
886 { 0x000100C0, { 0xD800, 0xDCC0 } }, | |
887 { 0x000100FF, { 0xD800, 0xDCFF } }, | |
888 { 0x00010100, { 0xD800, 0xDD00 } }, | |
889 { 0x00010101, { 0xD800, 0xDD01 } }, | |
890 { 0x00010102, { 0xD800, 0xDD02 } }, | |
891 { 0x00010104, { 0xD800, 0xDD04 } }, | |
892 { 0x00010108, { 0xD800, 0xDD08 } }, | |
893 { 0x00010110, { 0xD800, 0xDD10 } }, | |
894 { 0x00010120, { 0xD800, 0xDD20 } }, | |
895 { 0x00010140, { 0xD800, 0xDD40 } }, | |
896 { 0x00010180, { 0xD800, 0xDD80 } }, | |
897 { 0x000101FF, { 0xD800, 0xDDFF } }, | |
898 { 0x00010200, { 0xD800, 0xDE00 } }, | |
899 { 0x00010201, { 0xD800, 0xDE01 } }, | |
900 { 0x00010202, { 0xD800, 0xDE02 } }, | |
901 { 0x00010204, { 0xD800, 0xDE04 } }, | |
902 { 0x00010208, { 0xD800, 0xDE08 } }, | |
903 { 0x00010210, { 0xD800, 0xDE10 } }, | |
904 { 0x00010220, { 0xD800, 0xDE20 } }, | |
905 { 0x00010240, { 0xD800, 0xDE40 } }, | |
906 { 0x00010280, { 0xD800, 0xDE80 } }, | |
907 { 0x00010300, { 0xD800, 0xDF00 } }, | |
908 { 0x000103FF, { 0xD800, 0xDFFF } }, | |
909 { 0x00010400, { 0xD801, 0xDC00 } }, | |
910 { 0x00010401, { 0xD801, 0xDC01 } }, | |
911 { 0x00010402, { 0xD801, 0xDC02 } }, | |
912 { 0x00010404, { 0xD801, 0xDC04 } }, | |
913 { 0x00010408, { 0xD801, 0xDC08 } }, | |
914 { 0x00010410, { 0xD801, 0xDC10 } }, | |
915 { 0x00010420, { 0xD801, 0xDC20 } }, | |
916 { 0x00010440, { 0xD801, 0xDC40 } }, | |
917 { 0x00010480, { 0xD801, 0xDC80 } }, | |
918 { 0x00010500, { 0xD801, 0xDD00 } }, | |
919 { 0x00010600, { 0xD801, 0xDE00 } }, | |
920 { 0x000107FF, { 0xD801, 0xDFFF } }, | |
921 { 0x00010800, { 0xD802, 0xDC00 } }, | |
922 { 0x00010801, { 0xD802, 0xDC01 } }, | |
923 { 0x00010802, { 0xD802, 0xDC02 } }, | |
924 { 0x00010804, { 0xD802, 0xDC04 } }, | |
925 { 0x00010808, { 0xD802, 0xDC08 } }, | |
926 { 0x00010810, { 0xD802, 0xDC10 } }, | |
927 { 0x00010820, { 0xD802, 0xDC20 } }, | |
928 { 0x00010840, { 0xD802, 0xDC40 } }, | |
929 { 0x00010880, { 0xD802, 0xDC80 } }, | |
930 { 0x00010900, { 0xD802, 0xDD00 } }, | |
931 { 0x00010A00, { 0xD802, 0xDE00 } }, | |
932 { 0x00010C00, { 0xD803, 0xDC00 } }, | |
933 { 0x00010FFF, { 0xD803, 0xDFFF } }, | |
934 { 0x00011000, { 0xD804, 0xDC00 } }, | |
935 { 0x00011001, { 0xD804, 0xDC01 } }, | |
936 { 0x00011002, { 0xD804, 0xDC02 } }, | |
937 { 0x00011004, { 0xD804, 0xDC04 } }, | |
938 { 0x00011008, { 0xD804, 0xDC08 } }, | |
939 { 0x00011010, { 0xD804, 0xDC10 } }, | |
940 { 0x00011020, { 0xD804, 0xDC20 } }, | |
941 { 0x00011040, { 0xD804, 0xDC40 } }, | |
942 { 0x00011080, { 0xD804, 0xDC80 } }, | |
943 { 0x00011100, { 0xD804, 0xDD00 } }, | |
944 { 0x00011200, { 0xD804, 0xDE00 } }, | |
945 { 0x00011400, { 0xD805, 0xDC00 } }, | |
946 { 0x00011800, { 0xD806, 0xDC00 } }, | |
947 { 0x00011FFF, { 0xD807, 0xDFFF } }, | |
948 { 0x00012000, { 0xD808, 0xDC00 } }, | |
949 { 0x00012001, { 0xD808, 0xDC01 } }, | |
950 { 0x00012002, { 0xD808, 0xDC02 } }, | |
951 { 0x00012004, { 0xD808, 0xDC04 } }, | |
952 { 0x00012008, { 0xD808, 0xDC08 } }, | |
953 { 0x00012010, { 0xD808, 0xDC10 } }, | |
954 { 0x00012020, { 0xD808, 0xDC20 } }, | |
955 { 0x00012040, { 0xD808, 0xDC40 } }, | |
956 { 0x00012080, { 0xD808, 0xDC80 } }, | |
957 { 0x00012100, { 0xD808, 0xDD00 } }, | |
958 { 0x00012200, { 0xD808, 0xDE00 } }, | |
959 { 0x00012400, { 0xD809, 0xDC00 } }, | |
960 { 0x00012800, { 0xD80A, 0xDC00 } }, | |
961 { 0x00013000, { 0xD80C, 0xDC00 } }, | |
962 { 0x00013FFF, { 0xD80F, 0xDFFF } }, | |
963 { 0x00014000, { 0xD810, 0xDC00 } }, | |
964 { 0x00014001, { 0xD810, 0xDC01 } }, | |
965 { 0x00014002, { 0xD810, 0xDC02 } }, | |
966 { 0x00014004, { 0xD810, 0xDC04 } }, | |
967 { 0x00014008, { 0xD810, 0xDC08 } }, | |
968 { 0x00014010, { 0xD810, 0xDC10 } }, | |
969 { 0x00014020, { 0xD810, 0xDC20 } }, | |
970 { 0x00014040, { 0xD810, 0xDC40 } }, | |
971 { 0x00014080, { 0xD810, 0xDC80 } }, | |
972 { 0x00014100, { 0xD810, 0xDD00 } }, | |
973 { 0x00014200, { 0xD810, 0xDE00 } }, | |
974 { 0x00014400, { 0xD811, 0xDC00 } }, | |
975 { 0x00014800, { 0xD812, 0xDC00 } }, | |
976 { 0x00015000, { 0xD814, 0xDC00 } }, | |
977 { 0x00016000, { 0xD818, 0xDC00 } }, | |
978 { 0x00017FFF, { 0xD81F, 0xDFFF } }, | |
979 { 0x00018000, { 0xD820, 0xDC00 } }, | |
980 { 0x00018001, { 0xD820, 0xDC01 } }, | |
981 { 0x00018002, { 0xD820, 0xDC02 } }, | |
982 { 0x00018004, { 0xD820, 0xDC04 } }, | |
983 { 0x00018008, { 0xD820, 0xDC08 } }, | |
984 { 0x00018010, { 0xD820, 0xDC10 } }, | |
985 { 0x00018020, { 0xD820, 0xDC20 } }, | |
986 { 0x00018040, { 0xD820, 0xDC40 } }, | |
987 { 0x00018080, { 0xD820, 0xDC80 } }, | |
988 { 0x00018100, { 0xD820, 0xDD00 } }, | |
989 { 0x00018200, { 0xD820, 0xDE00 } }, | |
990 { 0x00018400, { 0xD821, 0xDC00 } }, | |
991 { 0x00018800, { 0xD822, 0xDC00 } }, | |
992 { 0x00019000, { 0xD824, 0xDC00 } }, | |
993 { 0x0001A000, { 0xD828, 0xDC00 } }, | |
994 { 0x0001C000, { 0xD830, 0xDC00 } }, | |
995 { 0x0001FFFF, { 0xD83F, 0xDFFF } }, | |
996 { 0x00020000, { 0xD840, 0xDC00 } }, | |
997 { 0x00020001, { 0xD840, 0xDC01 } }, | |
998 { 0x00020002, { 0xD840, 0xDC02 } }, | |
999 { 0x00020004, { 0xD840, 0xDC04 } }, | |
1000 { 0x00020008, { 0xD840, 0xDC08 } }, | |
1001 { 0x00020010, { 0xD840, 0xDC10 } }, | |
1002 { 0x00020020, { 0xD840, 0xDC20 } }, | |
1003 { 0x00020040, { 0xD840, 0xDC40 } }, | |
1004 { 0x00020080, { 0xD840, 0xDC80 } }, | |
1005 { 0x00020100, { 0xD840, 0xDD00 } }, | |
1006 { 0x00020200, { 0xD840, 0xDE00 } }, | |
1007 { 0x00020400, { 0xD841, 0xDC00 } }, | |
1008 { 0x00020800, { 0xD842, 0xDC00 } }, | |
1009 { 0x00021000, { 0xD844, 0xDC00 } }, | |
1010 { 0x00022000, { 0xD848, 0xDC00 } }, | |
1011 { 0x00024000, { 0xD850, 0xDC00 } }, | |
1012 { 0x00028000, { 0xD860, 0xDC00 } }, | |
1013 { 0x0002FFFF, { 0xD87F, 0xDFFF } }, | |
1014 { 0x00030000, { 0xD880, 0xDC00 } }, | |
1015 { 0x00030001, { 0xD880, 0xDC01 } }, | |
1016 { 0x00030002, { 0xD880, 0xDC02 } }, | |
1017 { 0x00030004, { 0xD880, 0xDC04 } }, | |
1018 { 0x00030008, { 0xD880, 0xDC08 } }, | |
1019 { 0x00030010, { 0xD880, 0xDC10 } }, | |
1020 { 0x00030020, { 0xD880, 0xDC20 } }, | |
1021 { 0x00030040, { 0xD880, 0xDC40 } }, | |
1022 { 0x00030080, { 0xD880, 0xDC80 } }, | |
1023 { 0x00030100, { 0xD880, 0xDD00 } }, | |
1024 { 0x00030200, { 0xD880, 0xDE00 } }, | |
1025 { 0x00030400, { 0xD881, 0xDC00 } }, | |
1026 { 0x00030800, { 0xD882, 0xDC00 } }, | |
1027 { 0x00031000, { 0xD884, 0xDC00 } }, | |
1028 { 0x00032000, { 0xD888, 0xDC00 } }, | |
1029 { 0x00034000, { 0xD890, 0xDC00 } }, | |
1030 { 0x00038000, { 0xD8A0, 0xDC00 } }, | |
1031 { 0x0003FFFF, { 0xD8BF, 0xDFFF } }, | |
1032 { 0x00040000, { 0xD8C0, 0xDC00 } }, | |
1033 { 0x00040001, { 0xD8C0, 0xDC01 } }, | |
1034 { 0x00040002, { 0xD8C0, 0xDC02 } }, | |
1035 { 0x00040004, { 0xD8C0, 0xDC04 } }, | |
1036 { 0x00040008, { 0xD8C0, 0xDC08 } }, | |
1037 { 0x00040010, { 0xD8C0, 0xDC10 } }, | |
1038 { 0x00040020, { 0xD8C0, 0xDC20 } }, | |
1039 { 0x00040040, { 0xD8C0, 0xDC40 } }, | |
1040 { 0x00040080, { 0xD8C0, 0xDC80 } }, | |
1041 { 0x00040100, { 0xD8C0, 0xDD00 } }, | |
1042 { 0x00040200, { 0xD8C0, 0xDE00 } }, | |
1043 { 0x00040400, { 0xD8C1, 0xDC00 } }, | |
1044 { 0x00040800, { 0xD8C2, 0xDC00 } }, | |
1045 { 0x00041000, { 0xD8C4, 0xDC00 } }, | |
1046 { 0x00042000, { 0xD8C8, 0xDC00 } }, | |
1047 { 0x00044000, { 0xD8D0, 0xDC00 } }, | |
1048 { 0x00048000, { 0xD8E0, 0xDC00 } }, | |
1049 { 0x0004FFFF, { 0xD8FF, 0xDFFF } }, | |
1050 { 0x00050000, { 0xD900, 0xDC00 } }, | |
1051 { 0x00050001, { 0xD900, 0xDC01 } }, | |
1052 { 0x00050002, { 0xD900, 0xDC02 } }, | |
1053 { 0x00050004, { 0xD900, 0xDC04 } }, | |
1054 { 0x00050008, { 0xD900, 0xDC08 } }, | |
1055 { 0x00050010, { 0xD900, 0xDC10 } }, | |
1056 { 0x00050020, { 0xD900, 0xDC20 } }, | |
1057 { 0x00050040, { 0xD900, 0xDC40 } }, | |
1058 { 0x00050080, { 0xD900, 0xDC80 } }, | |
1059 { 0x00050100, { 0xD900, 0xDD00 } }, | |
1060 { 0x00050200, { 0xD900, 0xDE00 } }, | |
1061 { 0x00050400, { 0xD901, 0xDC00 } }, | |
1062 { 0x00050800, { 0xD902, 0xDC00 } }, | |
1063 { 0x00051000, { 0xD904, 0xDC00 } }, | |
1064 { 0x00052000, { 0xD908, 0xDC00 } }, | |
1065 { 0x00054000, { 0xD910, 0xDC00 } }, | |
1066 { 0x00058000, { 0xD920, 0xDC00 } }, | |
1067 { 0x00060000, { 0xD940, 0xDC00 } }, | |
1068 { 0x00070000, { 0xD980, 0xDC00 } }, | |
1069 { 0x0007FFFF, { 0xD9BF, 0xDFFF } }, | |
1070 { 0x00080000, { 0xD9C0, 0xDC00 } }, | |
1071 { 0x00080001, { 0xD9C0, 0xDC01 } }, | |
1072 { 0x00080002, { 0xD9C0, 0xDC02 } }, | |
1073 { 0x00080004, { 0xD9C0, 0xDC04 } }, | |
1074 { 0x00080008, { 0xD9C0, 0xDC08 } }, | |
1075 { 0x00080010, { 0xD9C0, 0xDC10 } }, | |
1076 { 0x00080020, { 0xD9C0, 0xDC20 } }, | |
1077 { 0x00080040, { 0xD9C0, 0xDC40 } }, | |
1078 { 0x00080080, { 0xD9C0, 0xDC80 } }, | |
1079 { 0x00080100, { 0xD9C0, 0xDD00 } }, | |
1080 { 0x00080200, { 0xD9C0, 0xDE00 } }, | |
1081 { 0x00080400, { 0xD9C1, 0xDC00 } }, | |
1082 { 0x00080800, { 0xD9C2, 0xDC00 } }, | |
1083 { 0x00081000, { 0xD9C4, 0xDC00 } }, | |
1084 { 0x00082000, { 0xD9C8, 0xDC00 } }, | |
1085 { 0x00084000, { 0xD9D0, 0xDC00 } }, | |
1086 { 0x00088000, { 0xD9E0, 0xDC00 } }, | |
1087 { 0x0008FFFF, { 0xD9FF, 0xDFFF } }, | |
1088 { 0x00090000, { 0xDA00, 0xDC00 } }, | |
1089 { 0x00090001, { 0xDA00, 0xDC01 } }, | |
1090 { 0x00090002, { 0xDA00, 0xDC02 } }, | |
1091 { 0x00090004, { 0xDA00, 0xDC04 } }, | |
1092 { 0x00090008, { 0xDA00, 0xDC08 } }, | |
1093 { 0x00090010, { 0xDA00, 0xDC10 } }, | |
1094 { 0x00090020, { 0xDA00, 0xDC20 } }, | |
1095 { 0x00090040, { 0xDA00, 0xDC40 } }, | |
1096 { 0x00090080, { 0xDA00, 0xDC80 } }, | |
1097 { 0x00090100, { 0xDA00, 0xDD00 } }, | |
1098 { 0x00090200, { 0xDA00, 0xDE00 } }, | |
1099 { 0x00090400, { 0xDA01, 0xDC00 } }, | |
1100 { 0x00090800, { 0xDA02, 0xDC00 } }, | |
1101 { 0x00091000, { 0xDA04, 0xDC00 } }, | |
1102 { 0x00092000, { 0xDA08, 0xDC00 } }, | |
1103 { 0x00094000, { 0xDA10, 0xDC00 } }, | |
1104 { 0x00098000, { 0xDA20, 0xDC00 } }, | |
1105 { 0x000A0000, { 0xDA40, 0xDC00 } }, | |
1106 { 0x000B0000, { 0xDA80, 0xDC00 } }, | |
1107 { 0x000C0000, { 0xDAC0, 0xDC00 } }, | |
1108 { 0x000D0000, { 0xDB00, 0xDC00 } }, | |
1109 { 0x000FFFFF, { 0xDBBF, 0xDFFF } }, | |
1110 { 0x0010FFFF, { 0xDBFF, 0xDFFF } } | |
1111 | |
1112 }; | |
1113 | |
1114 /* illegal utf8 sequences */ | |
1115 char *utf8_bad[] = { | |
1116 "\xC0\x80", | |
1117 "\xC1\xBF", | |
1118 "\xE0\x80\x80", | |
1119 "\xE0\x9F\xBF", | |
1120 "\xF0\x80\x80\x80", | |
1121 "\xF0\x8F\xBF\xBF", | |
1122 "\xF4\x90\x80\x80", | |
1123 "\xF7\xBF\xBF\xBF", | |
1124 "\xF8\x80\x80\x80\x80", | |
1125 "\xF8\x88\x80\x80\x80", | |
1126 "\xF8\x92\x80\x80\x80", | |
1127 "\xF8\x9F\xBF\xBF\xBF", | |
1128 "\xF8\xA0\x80\x80\x80", | |
1129 "\xF8\xA8\x80\x80\x80", | |
1130 "\xF8\xB0\x80\x80\x80", | |
1131 "\xF8\xBF\xBF\xBF\xBF", | |
1132 "\xF9\x80\x80\x80\x88", | |
1133 "\xF9\x84\x80\x80\x80", | |
1134 "\xF9\xBF\xBF\xBF\xBF", | |
1135 "\xFA\x80\x80\x80\x80", | |
1136 "\xFA\x90\x80\x80\x80", | |
1137 "\xFB\xBF\xBF\xBF\xBF", | |
1138 "\xFC\x84\x80\x80\x80\x81", | |
1139 "\xFC\x85\x80\x80\x80\x80", | |
1140 "\xFC\x86\x80\x80\x80\x80", | |
1141 "\xFC\x87\xBF\xBF\xBF\xBF", | |
1142 "\xFC\x88\xA0\x80\x80\x80", | |
1143 "\xFC\x89\x80\x80\x80\x80", | |
1144 "\xFC\x8A\x80\x80\x80\x80", | |
1145 "\xFC\x90\x80\x80\x80\x82", | |
1146 "\xFD\x80\x80\x80\x80\x80", | |
1147 "\xFD\xBF\xBF\xBF\xBF\xBF", | |
1148 "\x80", | |
1149 "\xC3", | |
1150 "\xC3\xC3\x80", | |
1151 "\xED\xA0\x80", | |
1152 "\xED\xBF\x80", | |
1153 "\xED\xBF\xBF", | |
1154 "\xED\xA0\x80\xE0\xBF\xBF", | |
1155 }; | |
1156 | |
1157 /* illegal UTF-16 sequences, 0-terminated */ | |
1158 uint16_t utf16_bad[][3] = { | |
1159 /* leading surrogate not followed by trailing surrogate */ | |
1160 { 0xD800, 0, 0 }, | |
1161 { 0xD800, 0x41, 0 }, | |
1162 { 0xD800, 0xfe, 0 }, | |
1163 { 0xD800, 0x3bb, 0 }, | |
1164 { 0xD800, 0xD800, 0 }, | |
1165 { 0xD800, 0xFEFF, 0 }, | |
1166 { 0xD800, 0xFFFD, 0 }, | |
1167 }; | |
1168 | |
1169 static void | |
1170 dump_utf8 | |
1171 ( | |
1172 char *word, | |
1173 unsigned char *utf8, | |
1174 char *end | |
1175 ) | |
1176 { | |
1177 fprintf(stdout, "%s ", word); | |
1178 for( ; *utf8; utf8++ ) { | |
1179 fprintf(stdout, "%02.2x ", (unsigned int)*utf8); | |
1180 } | |
1181 fprintf(stdout, "%s", end); | |
1182 } | |
1183 | |
1184 static PRBool | |
1185 test_ucs4_chars | |
1186 ( | |
1187 void | |
1188 ) | |
1189 { | |
1190 PRBool rv = PR_TRUE; | |
1191 int i; | |
1192 | |
1193 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { | |
1194 struct ucs4 *e = &ucs4[i]; | |
1195 PRBool result; | |
1196 unsigned char utf8[8]; | |
1197 unsigned int len = 0; | |
1198 PRUint32 back = 0; | |
1199 | |
1200 (void)memset(utf8, 0, sizeof(utf8)); | |
1201 | |
1202 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, | |
1203 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len); | |
1204 | |
1205 if( !result ) { | |
1206 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8\n", e->c); | |
1207 rv = PR_FALSE; | |
1208 continue; | |
1209 } | |
1210 | |
1211 if( (len >= sizeof(utf8)) || | |
1212 (strlen(e->utf8) != len) || | |
1213 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) { | |
1214 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8: ", e->c); | |
1215 dump_utf8("expected", e->utf8, ", "); | |
1216 dump_utf8("received", utf8, "\n"); | |
1217 rv = PR_FALSE; | |
1218 continue; | |
1219 } | |
1220 | |
1221 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
1222 utf8, len, (unsigned char *)&back, sizeof(back), &len); | |
1223 | |
1224 if( !result ) { | |
1225 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4\n"); | |
1226 rv = PR_FALSE; | |
1227 continue; | |
1228 } | |
1229 | |
1230 if( (sizeof(back) != len) || (e->c != back) ) { | |
1231 dump_utf8("Wrong conversion of UTF-8", utf8, " to UCS-4:"); | |
1232 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back); | |
1233 rv = PR_FALSE; | |
1234 continue; | |
1235 } | |
1236 | |
1237 len = strlen(e->utf8) - 1; | |
1238 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, | |
1239 (unsigned char *)&e->c, sizeof(e->c), utf8 + sizeof(utf8) - len, len, | |
1240 &len); | |
1241 | |
1242 if( result || len != strlen(e->utf8) ) { | |
1243 fprintf(stdout, "Length computation error converting UCS-4 0x%08.8x" | |
1244 " to UTF-8\n", e->c); | |
1245 rv = PR_FALSE; | |
1246 continue; | |
1247 } | |
1248 } | |
1249 | |
1250 return rv; | |
1251 } | |
1252 | |
1253 static PRBool | |
1254 test_ucs2_chars | |
1255 ( | |
1256 void | |
1257 ) | |
1258 { | |
1259 PRBool rv = PR_TRUE; | |
1260 int i; | |
1261 | |
1262 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
1263 struct ucs2 *e = &ucs2[i]; | |
1264 PRBool result; | |
1265 unsigned char utf8[8]; | |
1266 unsigned int len = 0; | |
1267 PRUint16 back = 0; | |
1268 | |
1269 (void)memset(utf8, 0, sizeof(utf8)); | |
1270 | |
1271 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
1272 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len); | |
1273 | |
1274 if( !result ) { | |
1275 fprintf(stdout, "Failed to convert UCS-2 0x%04.4x to UTF-8\n", e->c); | |
1276 rv = PR_FALSE; | |
1277 continue; | |
1278 } | |
1279 | |
1280 if( (len >= sizeof(utf8)) || | |
1281 (strlen(e->utf8) != len) || | |
1282 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) { | |
1283 fprintf(stdout, "Wrong conversion of UCS-2 0x%04.4x to UTF-8: ", e->c); | |
1284 dump_utf8("expected", e->utf8, ", "); | |
1285 dump_utf8("received", utf8, "\n"); | |
1286 rv = PR_FALSE; | |
1287 continue; | |
1288 } | |
1289 | |
1290 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
1291 utf8, len, (unsigned char *)&back, sizeof(back), &len); | |
1292 | |
1293 if( !result ) { | |
1294 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-2\n"); | |
1295 rv = PR_FALSE; | |
1296 continue; | |
1297 } | |
1298 | |
1299 if( (sizeof(back) != len) || (e->c != back) ) { | |
1300 dump_utf8("Wrong conversion of UTF-8", utf8, "to UCS-2:"); | |
1301 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back); | |
1302 rv = PR_FALSE; | |
1303 continue; | |
1304 } | |
1305 | |
1306 len = strlen(e->utf8) - 1; | |
1307 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
1308 (unsigned char *)&e->c, sizeof(e->c), utf8 + sizeof(utf8) - len, len, | |
1309 &len); | |
1310 | |
1311 if( result || len != strlen(e->utf8) ) { | |
1312 fprintf(stdout, "Length computation error converting UCS-2 0x%04.4x" | |
1313 " to UTF-8\n", e->c); | |
1314 rv = PR_FALSE; | |
1315 continue; | |
1316 } | |
1317 } | |
1318 | |
1319 return rv; | |
1320 } | |
1321 | |
1322 static PRBool | |
1323 test_utf16_chars | |
1324 ( | |
1325 void | |
1326 ) | |
1327 { | |
1328 PRBool rv = PR_TRUE; | |
1329 int i; | |
1330 | |
1331 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) { | |
1332 struct utf16 *e = &utf16[i]; | |
1333 PRBool result; | |
1334 unsigned char utf8[8]; | |
1335 unsigned int len = 0; | |
1336 PRUint32 back32 = 0; | |
1337 PRUint16 back[2]; | |
1338 | |
1339 (void)memset(utf8, 0, sizeof(utf8)); | |
1340 | |
1341 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
1342 (unsigned char *)&e->w[0], sizeof(e->w), utf8, sizeof(utf8), &len); | |
1343 | |
1344 if( !result ) { | |
1345 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8\n", | |
1346 e->w[0], e->w[1]); | |
1347 rv = PR_FALSE; | |
1348 continue; | |
1349 } | |
1350 | |
1351 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
1352 utf8, len, (unsigned char *)&back32, sizeof(back32), &len); | |
1353 | |
1354 if( 4 != len ) { | |
1355 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8: " | |
1356 "unexpected len %d\n", e->w[0], e->w[1], len); | |
1357 rv = PR_FALSE; | |
1358 continue; | |
1359 } | |
1360 | |
1361 utf8[len] = '\0'; /* null-terminate for printing */ | |
1362 | |
1363 if( !result ) { | |
1364 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4 (utf-16 test)\n"); | |
1365 rv = PR_FALSE; | |
1366 continue; | |
1367 } | |
1368 | |
1369 if( (sizeof(back32) != len) || (e->c != back32) ) { | |
1370 fprintf(stdout, "Wrong conversion of UTF-16 0x%04.4x 0x%04.4x ", | |
1371 e->w[0], e->w[1]); | |
1372 dump_utf8("to UTF-8", utf8, "and then to UCS-4: "); | |
1373 if( sizeof(back32) != len ) { | |
1374 fprintf(stdout, "len is %d\n", len); | |
1375 } else { | |
1376 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back32); | |
1377 } | |
1378 rv = PR_FALSE; | |
1379 continue; | |
1380 } | |
1381 | |
1382 (void)memset(utf8, 0, sizeof(utf8)); | |
1383 back[0] = back[1] = 0; | |
1384 | |
1385 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, | |
1386 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len); | |
1387 | |
1388 if( !result ) { | |
1389 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8 (utf-16 test)\n
", | |
1390 e->c); | |
1391 rv = PR_FALSE; | |
1392 continue; | |
1393 } | |
1394 | |
1395 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
1396 utf8, len, (unsigned char *)&back[0], sizeof(back), &len); | |
1397 | |
1398 if( 4 != len ) { | |
1399 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8: " | |
1400 "unexpected len %d\n", e->c, len); | |
1401 rv = PR_FALSE; | |
1402 continue; | |
1403 } | |
1404 | |
1405 utf8[len] = '\0'; /* null-terminate for printing */ | |
1406 | |
1407 if( !result ) { | |
1408 dump_utf8("Failed to convert UTF-8", utf8, "to UTF-16\n"); | |
1409 rv = PR_FALSE; | |
1410 continue; | |
1411 } | |
1412 | |
1413 if( (sizeof(back) != len) || (e->w[0] != back[0]) || (e->w[1] != back[1]) )
{ | |
1414 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8", e->c); | |
1415 dump_utf8("", utf8, "and then to UTF-16:"); | |
1416 if( sizeof(back) != len ) { | |
1417 fprintf(stdout, "len is %d\n", len); | |
1418 } else { | |
1419 fprintf(stdout, "expected 0x%04.4x 0x%04.4x, received 0x%04.4x 0x%04.4xx
\n", | |
1420 e->w[0], e->w[1], back[0], back[1]); | |
1421 } | |
1422 rv = PR_FALSE; | |
1423 continue; | |
1424 } | |
1425 } | |
1426 | |
1427 return rv; | |
1428 } | |
1429 | |
1430 static PRBool | |
1431 test_utf8_bad_chars | |
1432 ( | |
1433 void | |
1434 ) | |
1435 { | |
1436 PRBool rv = PR_TRUE; | |
1437 int i; | |
1438 | |
1439 for( i = 0; i < sizeof(utf8_bad)/sizeof(utf8_bad[0]); i++ ) { | |
1440 PRBool result; | |
1441 unsigned char destbuf[30]; | |
1442 unsigned int len = 0; | |
1443 | |
1444 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
1445 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf
), &len); | |
1446 | |
1447 if( result ) { | |
1448 dump_utf8("Failed to detect bad UTF-8 string converting to UCS2: ", utf8_b
ad[i], "\n"); | |
1449 rv = PR_FALSE; | |
1450 continue; | |
1451 } | |
1452 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
1453 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf
), &len); | |
1454 | |
1455 if( result ) { | |
1456 dump_utf8("Failed to detect bad UTF-8 string converting to UCS4: ", utf8_b
ad[i], "\n"); | |
1457 rv = PR_FALSE; | |
1458 continue; | |
1459 } | |
1460 | |
1461 } | |
1462 | |
1463 return rv; | |
1464 } | |
1465 | |
1466 static PRBool | |
1467 test_utf16_bad_chars(void) | |
1468 { | |
1469 PRBool rv = PR_TRUE; | |
1470 int i; | |
1471 | |
1472 for( i = 0; i < sizeof(utf16_bad)/sizeof(utf16_bad[0]); ++i ) { | |
1473 PRBool result; | |
1474 unsigned char destbuf[18]; | |
1475 unsigned int j, len, destlen; | |
1476 uint16_t *buf; | |
1477 | |
1478 for( len = 0; utf16_bad[i][len] != 0; ++len ) | |
1479 /* nothing */; | |
1480 | |
1481 buf = malloc(sizeof(uint16_t) * len); | |
1482 for( j = 0; j < len; ++j ) | |
1483 buf[j] = htons(utf16_bad[i][j]); | |
1484 | |
1485 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
1486 (unsigned char *)buf, sizeof(uint16_t) * len, destbuf, sizeof(destbuf), | |
1487 &destlen); | |
1488 if( result ) { | |
1489 fprintf(stdout, "Failed to detect bad UTF-16 string conversion for " | |
1490 "{0x%x,0x%x} (UTF-8 len = %u)\n", utf16_bad[i][0], utf16_bad[i][1], | |
1491 destlen); | |
1492 rv = PR_FALSE; | |
1493 } | |
1494 free(buf); | |
1495 } | |
1496 } | |
1497 | |
1498 static PRBool | |
1499 test_iso88591_chars | |
1500 ( | |
1501 void | |
1502 ) | |
1503 { | |
1504 PRBool rv = PR_TRUE; | |
1505 int i; | |
1506 | |
1507 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
1508 struct ucs2 *e = &ucs2[i]; | |
1509 PRBool result; | |
1510 unsigned char iso88591; | |
1511 unsigned char utf8[3]; | |
1512 unsigned int len = 0; | |
1513 | |
1514 if (ntohs(e->c) > 0xFF) continue; | |
1515 | |
1516 (void)memset(utf8, 0, sizeof(utf8)); | |
1517 iso88591 = ntohs(e->c); | |
1518 | |
1519 result = sec_port_iso88591_utf8_conversion_function(&iso88591, | |
1520 1, utf8, sizeof(utf8), &len); | |
1521 | |
1522 if( !result ) { | |
1523 fprintf(stdout, "Failed to convert ISO-8859-1 0x%02.2x to UTF-8\n", iso885
91); | |
1524 rv = PR_FALSE; | |
1525 continue; | |
1526 } | |
1527 | |
1528 if( (len >= sizeof(utf8)) || | |
1529 (strlen(e->utf8) != len) || | |
1530 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) { | |
1531 fprintf(stdout, "Wrong conversion of ISO-8859-1 0x%02.2x to UTF-8: ", iso8
8591); | |
1532 dump_utf8("expected", e->utf8, ", "); | |
1533 dump_utf8("received", utf8, "\n"); | |
1534 rv = PR_FALSE; | |
1535 continue; | |
1536 } | |
1537 | |
1538 } | |
1539 | |
1540 return rv; | |
1541 } | |
1542 | |
1543 static PRBool | |
1544 test_zeroes | |
1545 ( | |
1546 void | |
1547 ) | |
1548 { | |
1549 PRBool rv = PR_TRUE; | |
1550 PRBool result; | |
1551 PRUint32 lzero = 0; | |
1552 PRUint16 szero = 0; | |
1553 unsigned char utf8[8]; | |
1554 unsigned int len = 0; | |
1555 PRUint32 lback = 1; | |
1556 PRUint16 sback = 1; | |
1557 | |
1558 (void)memset(utf8, 1, sizeof(utf8)); | |
1559 | |
1560 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, | |
1561 (unsigned char *)&lzero, sizeof(lzero), utf8, sizeof(utf8), &len); | |
1562 | |
1563 if( !result ) { | |
1564 fprintf(stdout, "Failed to convert UCS-4 0x00000000 to UTF-8\n"); | |
1565 rv = PR_FALSE; | |
1566 } else if( 1 != len ) { | |
1567 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: len = %d\n", len); | |
1568 rv = PR_FALSE; | |
1569 } else if( '\0' != *utf8 ) { | |
1570 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: expected 00 ," | |
1571 "received %02.2x\n", (unsigned int)*utf8); | |
1572 rv = PR_FALSE; | |
1573 } | |
1574 | |
1575 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
1576 "", 1, (unsigned char *)&lback, sizeof(lback), &len); | |
1577 | |
1578 if( !result ) { | |
1579 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-4\n"); | |
1580 rv = PR_FALSE; | |
1581 } else if( 4 != len ) { | |
1582 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: len = %d\n", len); | |
1583 rv = PR_FALSE; | |
1584 } else if( 0 != lback ) { | |
1585 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: " | |
1586 "expected 0x00000000, received 0x%08.8x\n", lback); | |
1587 rv = PR_FALSE; | |
1588 } | |
1589 | |
1590 (void)memset(utf8, 1, sizeof(utf8)); | |
1591 | |
1592 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
1593 (unsigned char *)&szero, sizeof(szero), utf8, sizeof(utf8), &len); | |
1594 | |
1595 if( !result ) { | |
1596 fprintf(stdout, "Failed to convert UCS-2 0x0000 to UTF-8\n"); | |
1597 rv = PR_FALSE; | |
1598 } else if( 1 != len ) { | |
1599 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: len = %d\n", len); | |
1600 rv = PR_FALSE; | |
1601 } else if( '\0' != *utf8 ) { | |
1602 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: expected 00 ," | |
1603 "received %02.2x\n", (unsigned int)*utf8); | |
1604 rv = PR_FALSE; | |
1605 } | |
1606 | |
1607 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
1608 "", 1, (unsigned char *)&sback, sizeof(sback), &len); | |
1609 | |
1610 if( !result ) { | |
1611 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-2\n"); | |
1612 rv = PR_FALSE; | |
1613 } else if( 2 != len ) { | |
1614 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: len = %d\n", len); | |
1615 rv = PR_FALSE; | |
1616 } else if( 0 != sback ) { | |
1617 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: " | |
1618 "expected 0x0000, received 0x%04.4x\n", sback); | |
1619 rv = PR_FALSE; | |
1620 } | |
1621 | |
1622 return rv; | |
1623 } | |
1624 | |
1625 static PRBool | |
1626 test_multichars | |
1627 ( | |
1628 void | |
1629 ) | |
1630 { | |
1631 int i; | |
1632 unsigned int len, lenout; | |
1633 PRUint32 *ucs4s; | |
1634 char *ucs4_utf8; | |
1635 PRUint16 *ucs2s; | |
1636 char *ucs2_utf8; | |
1637 void *tmp; | |
1638 PRBool result; | |
1639 | |
1640 ucs4s = (PRUint32 *)calloc(sizeof(ucs4)/sizeof(ucs4[0]), sizeof(PRUint32)); | |
1641 ucs2s = (PRUint16 *)calloc(sizeof(ucs2)/sizeof(ucs2[0]), sizeof(PRUint16)); | |
1642 | |
1643 if( ((PRUint32 *)NULL == ucs4s) || ((PRUint16 *)NULL == ucs2s) ) { | |
1644 fprintf(stderr, "out of memory\n"); | |
1645 exit(1); | |
1646 } | |
1647 | |
1648 len = 1; | |
1649 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { | |
1650 ucs4s[i] = ucs4[i].c; | |
1651 len += strlen(ucs4[i].utf8); | |
1652 } | |
1653 | |
1654 ucs4_utf8 = (char *)malloc(len); | |
1655 | |
1656 len = 1; | |
1657 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
1658 ucs2s[i] = ucs2[i].c; | |
1659 len += strlen(ucs2[i].utf8); | |
1660 } | |
1661 | |
1662 ucs2_utf8 = (char *)malloc(len); | |
1663 | |
1664 if( ((char *)NULL == ucs4_utf8) || ((char *)NULL == ucs2_utf8) ) { | |
1665 fprintf(stderr, "out of memory\n"); | |
1666 exit(1); | |
1667 } | |
1668 | |
1669 *ucs4_utf8 = '\0'; | |
1670 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { | |
1671 strcat(ucs4_utf8, ucs4[i].utf8); | |
1672 } | |
1673 | |
1674 *ucs2_utf8 = '\0'; | |
1675 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
1676 strcat(ucs2_utf8, ucs2[i].utf8); | |
1677 } | |
1678 | |
1679 /* UTF-8 -> UCS-4 */ | |
1680 len = sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32); | |
1681 tmp = calloc(len, 1); | |
1682 if( (void *)NULL == tmp ) { | |
1683 fprintf(stderr, "out of memory\n"); | |
1684 exit(1); | |
1685 } | |
1686 | |
1687 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
1688 ucs4_utf8, strlen(ucs4_utf8), tmp, len, &lenout); | |
1689 if( !result ) { | |
1690 fprintf(stdout, "Failed to convert much UTF-8 to UCS-4\n"); | |
1691 goto done; | |
1692 } | |
1693 | |
1694 if( lenout != len ) { | |
1695 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-4\n"); | |
1696 goto loser; | |
1697 } | |
1698 | |
1699 if( 0 != memcmp(ucs4s, tmp, len) ) { | |
1700 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-4\n"); | |
1701 goto loser; | |
1702 } | |
1703 | |
1704 free(tmp); tmp = (void *)NULL; | |
1705 | |
1706 /* UCS-4 -> UTF-8 */ | |
1707 len = strlen(ucs4_utf8); | |
1708 tmp = calloc(len, 1); | |
1709 if( (void *)NULL == tmp ) { | |
1710 fprintf(stderr, "out of memory\n"); | |
1711 exit(1); | |
1712 } | |
1713 | |
1714 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, | |
1715 (unsigned char *)ucs4s, sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32), | |
1716 tmp, len, &lenout); | |
1717 if( !result ) { | |
1718 fprintf(stdout, "Failed to convert much UCS-4 to UTF-8\n"); | |
1719 goto done; | |
1720 } | |
1721 | |
1722 if( lenout != len ) { | |
1723 fprintf(stdout, "Unexpected length converting much UCS-4 to UTF-8\n"); | |
1724 goto loser; | |
1725 } | |
1726 | |
1727 if( 0 != strncmp(ucs4_utf8, tmp, len) ) { | |
1728 fprintf(stdout, "Wrong conversion of much UCS-4 to UTF-8\n"); | |
1729 goto loser; | |
1730 } | |
1731 | |
1732 free(tmp); tmp = (void *)NULL; | |
1733 | |
1734 /* UTF-8 -> UCS-2 */ | |
1735 len = sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16); | |
1736 tmp = calloc(len, 1); | |
1737 if( (void *)NULL == tmp ) { | |
1738 fprintf(stderr, "out of memory\n"); | |
1739 exit(1); | |
1740 } | |
1741 | |
1742 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
1743 ucs2_utf8, strlen(ucs2_utf8), tmp, len, &lenout); | |
1744 if( !result ) { | |
1745 fprintf(stdout, "Failed to convert much UTF-8 to UCS-2\n"); | |
1746 goto done; | |
1747 } | |
1748 | |
1749 if( lenout != len ) { | |
1750 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-2\n"); | |
1751 goto loser; | |
1752 } | |
1753 | |
1754 if( 0 != memcmp(ucs2s, tmp, len) ) { | |
1755 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-2\n"); | |
1756 goto loser; | |
1757 } | |
1758 | |
1759 free(tmp); tmp = (void *)NULL; | |
1760 | |
1761 /* UCS-2 -> UTF-8 */ | |
1762 len = strlen(ucs2_utf8); | |
1763 tmp = calloc(len, 1); | |
1764 if( (void *)NULL == tmp ) { | |
1765 fprintf(stderr, "out of memory\n"); | |
1766 exit(1); | |
1767 } | |
1768 | |
1769 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
1770 (unsigned char *)ucs2s, sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16), | |
1771 tmp, len, &lenout); | |
1772 if( !result ) { | |
1773 fprintf(stdout, "Failed to convert much UCS-2 to UTF-8\n"); | |
1774 goto done; | |
1775 } | |
1776 | |
1777 if( lenout != len ) { | |
1778 fprintf(stdout, "Unexpected length converting much UCS-2 to UTF-8\n"); | |
1779 goto loser; | |
1780 } | |
1781 | |
1782 if( 0 != strncmp(ucs2_utf8, tmp, len) ) { | |
1783 fprintf(stdout, "Wrong conversion of much UCS-2 to UTF-8\n"); | |
1784 goto loser; | |
1785 } | |
1786 | |
1787 /* implement UTF16 */ | |
1788 | |
1789 result = PR_TRUE; | |
1790 goto done; | |
1791 | |
1792 loser: | |
1793 result = PR_FALSE; | |
1794 done: | |
1795 free(ucs4s); | |
1796 free(ucs4_utf8); | |
1797 free(ucs2s); | |
1798 free(ucs2_utf8); | |
1799 if( (void *)NULL != tmp ) free(tmp); | |
1800 return result; | |
1801 } | |
1802 | |
1803 void | |
1804 byte_order | |
1805 ( | |
1806 void | |
1807 ) | |
1808 { | |
1809 /* | |
1810 * The implementation (now) expects the 16- and 32-bit characters | |
1811 * to be in network byte order, not host byte order. Therefore I | |
1812 * have to byteswap all those test vectors above. hton[ls] may be | |
1813 * functions, so I have to do this dynamically. If you want to | |
1814 * use this code to do host byte order conversions, just remove | |
1815 * the call in main() to this function. | |
1816 */ | |
1817 | |
1818 int i; | |
1819 | |
1820 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { | |
1821 struct ucs4 *e = &ucs4[i]; | |
1822 e->c = htonl(e->c); | |
1823 } | |
1824 | |
1825 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
1826 struct ucs2 *e = &ucs2[i]; | |
1827 e->c = htons(e->c); | |
1828 } | |
1829 | |
1830 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) { | |
1831 struct utf16 *e = &utf16[i]; | |
1832 e->c = htonl(e->c); | |
1833 e->w[0] = htons(e->w[0]); | |
1834 e->w[1] = htons(e->w[1]); | |
1835 } | |
1836 | |
1837 return; | |
1838 } | |
1839 | |
1840 int | |
1841 main | |
1842 ( | |
1843 int argc, | |
1844 char *argv[] | |
1845 ) | |
1846 { | |
1847 byte_order(); | |
1848 | |
1849 if( test_ucs4_chars() && | |
1850 test_ucs2_chars() && | |
1851 test_utf16_chars() && | |
1852 test_utf8_bad_chars() && | |
1853 test_utf16_bad_chars() && | |
1854 test_iso88591_chars() && | |
1855 test_zeroes() && | |
1856 test_multichars() && | |
1857 PR_TRUE ) { | |
1858 fprintf(stderr, "PASS\n"); | |
1859 return 1; | |
1860 } else { | |
1861 fprintf(stderr, "FAIL\n"); | |
1862 return 0; | |
1863 } | |
1864 } | |
1865 | |
1866 #endif /* TEST_UTF8 */ | |
OLD | NEW |