Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(905)

Side by Side Diff: mozilla/security/nss/lib/util/utf8.c

Issue 14249009: Change the NSS and NSPR source tree to the new directory structure to be (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/nss/
Patch Set: Created 7 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « mozilla/security/nss/lib/util/templates.c ('k') | mozilla/security/nss/lib/util/utilmod.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 #ifdef DEBUG
6 static const char CVS_ID[] = "@(#) $RCSfile: utf8.c,v $ $Revision: 1.14 $ $Date: 2012/04/25 14:50:16 $";
7 #endif /* DEBUG */
8
9 #include "seccomon.h"
10 #include "secport.h"
11
12 #ifdef TEST_UTF8
13 #include <assert.h>
14 #undef PORT_Assert
15 #define PORT_Assert assert
16 #endif
17
18 /*
19 * From RFC 2044:
20 *
21 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
22 * 0000 0000-0000 007F 0xxxxxxx
23 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
24 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
25 * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
26 * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
27 * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx
28 */
29
30 /*
31 * From http://www.imc.org/draft-hoffman-utf16
32 *
33 * For U on [0x00010000,0x0010FFFF]: Let U' = U - 0x00010000
34 *
35 * U' = yyyyyyyyyyxxxxxxxxxx
36 * W1 = 110110yyyyyyyyyy
37 * W2 = 110111xxxxxxxxxx
38 */
39
40 /*
41 * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit
42 * character values. If you wish to use this code for working with
43 * host byte order values, define the following:
44 *
45 * #if IS_BIG_ENDIAN
46 * #define L_0 0
47 * #define L_1 1
48 * #define L_2 2
49 * #define L_3 3
50 * #define H_0 0
51 * #define H_1 1
52 * #else / * not everyone has elif * /
53 * #if IS_LITTLE_ENDIAN
54 * #define L_0 3
55 * #define L_1 2
56 * #define L_2 1
57 * #define L_3 0
58 * #define H_0 1
59 * #define H_1 0
60 * #else
61 * #error "PDP and NUXI support deferred"
62 * #endif / * IS_LITTLE_ENDIAN * /
63 * #endif / * IS_BIG_ENDIAN * /
64 */
65
66 #define L_0 0
67 #define L_1 1
68 #define L_2 2
69 #define L_3 3
70 #define H_0 0
71 #define H_1 1
72
73 #define BAD_UTF8 ((PRUint32)-1)
74
75 /*
76 * Parse a single UTF-8 character per the spec. in section 3.9 (D36)
77 * of Unicode 4.0.0.
78 *
79 * Parameters:
80 * index - Points to the byte offset in inBuf of character to read. On success,
81 * updated to the offset of the following character.
82 * inBuf - Input buffer, UTF-8 encoded
83 * inbufLen - Length of input buffer, in bytes.
84 *
85 * Returns:
86 * Success - The UCS4 encoded character
87 * Failure - BAD_UTF8
88 */
89 static PRUint32
90 sec_port_read_utf8(unsigned int *index, unsigned char *inBuf, unsigned int inBuf Len)
91 {
92 PRUint32 result;
93 unsigned int i = *index;
94 int bytes_left;
95 PRUint32 min_value;
96
97 PORT_Assert(i < inBufLen);
98
99 if ( (inBuf[i] & 0x80) == 0x00 ) {
100 result = inBuf[i++];
101 bytes_left = 0;
102 min_value = 0;
103 } else if ( (inBuf[i] & 0xE0) == 0xC0 ) {
104 result = inBuf[i++] & 0x1F;
105 bytes_left = 1;
106 min_value = 0x80;
107 } else if ( (inBuf[i] & 0xF0) == 0xE0) {
108 result = inBuf[i++] & 0x0F;
109 bytes_left = 2;
110 min_value = 0x800;
111 } else if ( (inBuf[i] & 0xF8) == 0xF0) {
112 result = inBuf[i++] & 0x07;
113 bytes_left = 3;
114 min_value = 0x10000;
115 } else {
116 return BAD_UTF8;
117 }
118
119 while (bytes_left--) {
120 if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80) return BAD_UTF8;
121 result = (result << 6) | (inBuf[i++] & 0x3F);
122 }
123
124 /* Check for overlong sequences, surrogates, and outside unicode range */
125 if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF ) {
126 return BAD_UTF8;
127 }
128
129 *index = i;
130 return result;
131 }
132
133 PRBool
134 sec_port_ucs4_utf8_conversion_function
135 (
136 PRBool toUnicode,
137 unsigned char *inBuf,
138 unsigned int inBufLen,
139 unsigned char *outBuf,
140 unsigned int maxOutBufLen,
141 unsigned int *outBufLen
142 )
143 {
144 PORT_Assert((unsigned int *)NULL != outBufLen);
145
146 if( toUnicode ) {
147 unsigned int i, len = 0;
148
149 for( i = 0; i < inBufLen; ) {
150 if( (inBuf[i] & 0x80) == 0x00 ) i += 1;
151 else if( (inBuf[i] & 0xE0) == 0xC0 ) i += 2;
152 else if( (inBuf[i] & 0xF0) == 0xE0 ) i += 3;
153 else if( (inBuf[i] & 0xF8) == 0xF0 ) i += 4;
154 else return PR_FALSE;
155
156 len += 4;
157 }
158
159 if( len > maxOutBufLen ) {
160 *outBufLen = len;
161 return PR_FALSE;
162 }
163
164 len = 0;
165
166 for( i = 0; i < inBufLen; ) {
167 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
168
169 if (ucs4 == BAD_UTF8) return PR_FALSE;
170
171 outBuf[len+L_0] = 0x00;
172 outBuf[len+L_1] = (unsigned char)(ucs4 >> 16);
173 outBuf[len+L_2] = (unsigned char)(ucs4 >> 8);
174 outBuf[len+L_3] = (unsigned char)ucs4;
175
176 len += 4;
177 }
178
179 *outBufLen = len;
180 return PR_TRUE;
181 } else {
182 unsigned int i, len = 0;
183 PORT_Assert((inBufLen % 4) == 0);
184 if ((inBufLen % 4) != 0) {
185 *outBufLen = 0;
186 return PR_FALSE;
187 }
188
189 for( i = 0; i < inBufLen; i += 4 ) {
190 if( (inBuf[i+L_0] > 0x00) || (inBuf[i+L_1] > 0x10) ) {
191 *outBufLen = 0;
192 return PR_FALSE;
193 } else if( inBuf[i+L_1] >= 0x01 ) len += 4;
194 else if( inBuf[i+L_2] >= 0x08 ) len += 3;
195 else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) len += 2;
196 else len += 1;
197 }
198
199 if( len > maxOutBufLen ) {
200 *outBufLen = len;
201 return PR_FALSE;
202 }
203
204 len = 0;
205
206 for( i = 0; i < inBufLen; i += 4 ) {
207 if( inBuf[i+L_1] >= 0x01 ) {
208 /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
209 /* 00000000 000abcde fghijklm nopqrstu ->
210 11110abc 10defghi 10jklmno 10pqrstu */
211
212 outBuf[len+0] = 0xF0 | ((inBuf[i+L_1] & 0x1C) >> 2);
213 outBuf[len+1] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4)
214 | ((inBuf[i+L_2] & 0xF0) >> 4);
215 outBuf[len+2] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
216 | ((inBuf[i+L_3] & 0xC0) >> 6);
217 outBuf[len+3] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
218
219 len += 4;
220 } else if( inBuf[i+L_2] >= 0x08 ) {
221 /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
222 /* 00000000 00000000 abcdefgh ijklmnop ->
223 1110abcd 10efghij 10klmnop */
224
225 outBuf[len+0] = 0xE0 | ((inBuf[i+L_2] & 0xF0) >> 4);
226 outBuf[len+1] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
227 | ((inBuf[i+L_3] & 0xC0) >> 6);
228 outBuf[len+2] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
229
230 len += 3;
231 } else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) {
232 /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */
233 /* 00000000 00000000 00000abc defghijk ->
234 110abcde 10fghijk */
235
236 outBuf[len+0] = 0xC0 | ((inBuf[i+L_2] & 0x07) << 2)
237 | ((inBuf[i+L_3] & 0xC0) >> 6);
238 outBuf[len+1] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
239
240 len += 2;
241 } else {
242 /* 0000 0000-0000 007F -> 0xxxxxx */
243 /* 00000000 00000000 00000000 0abcdefg ->
244 0abcdefg */
245
246 outBuf[len+0] = (inBuf[i+L_3] & 0x7F);
247
248 len += 1;
249 }
250 }
251
252 *outBufLen = len;
253 return PR_TRUE;
254 }
255 }
256
257 PRBool
258 sec_port_ucs2_utf8_conversion_function
259 (
260 PRBool toUnicode,
261 unsigned char *inBuf,
262 unsigned int inBufLen,
263 unsigned char *outBuf,
264 unsigned int maxOutBufLen,
265 unsigned int *outBufLen
266 )
267 {
268 PORT_Assert((unsigned int *)NULL != outBufLen);
269
270 if( toUnicode ) {
271 unsigned int i, len = 0;
272
273 for( i = 0; i < inBufLen; ) {
274 if( (inBuf[i] & 0x80) == 0x00 ) {
275 i += 1;
276 len += 2;
277 } else if( (inBuf[i] & 0xE0) == 0xC0 ) {
278 i += 2;
279 len += 2;
280 } else if( (inBuf[i] & 0xF0) == 0xE0 ) {
281 i += 3;
282 len += 2;
283 } else if( (inBuf[i] & 0xF8) == 0xF0 ) {
284 i += 4;
285 len += 4;
286 } else return PR_FALSE;
287 }
288
289 if( len > maxOutBufLen ) {
290 *outBufLen = len;
291 return PR_FALSE;
292 }
293
294 len = 0;
295
296 for( i = 0; i < inBufLen; ) {
297 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
298
299 if (ucs4 == BAD_UTF8) return PR_FALSE;
300
301 if( ucs4 < 0x10000) {
302 outBuf[len+H_0] = (unsigned char)(ucs4 >> 8);
303 outBuf[len+H_1] = (unsigned char)ucs4;
304 len += 2;
305 } else {
306 ucs4 -= 0x10000;
307 outBuf[len+0+H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3));
308 outBuf[len+0+H_1] = (unsigned char)(ucs4 >> 10);
309 outBuf[len+2+H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3));
310 outBuf[len+2+H_1] = (unsigned char)ucs4;
311 len += 4;
312 }
313 }
314
315 *outBufLen = len;
316 return PR_TRUE;
317 } else {
318 unsigned int i, len = 0;
319 PORT_Assert((inBufLen % 2) == 0);
320 if ((inBufLen % 2) != 0) {
321 *outBufLen = 0;
322 return PR_FALSE;
323 }
324
325 for( i = 0; i < inBufLen; i += 2 ) {
326 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_0] & 0x80) == 0x00) ) len += 1;
327 else if( inBuf[i+H_0] < 0x08 ) len += 2;
328 else if( ((inBuf[i+0+H_0] & 0xDC) == 0xD8) ) {
329 if( ((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2) ) {
330 i += 2;
331 len += 4;
332 } else {
333 return PR_FALSE;
334 }
335 }
336 else len += 3;
337 }
338
339 if( len > maxOutBufLen ) {
340 *outBufLen = len;
341 return PR_FALSE;
342 }
343
344 len = 0;
345
346 for( i = 0; i < inBufLen; i += 2 ) {
347 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) {
348 /* 0000-007F -> 0xxxxxx */
349 /* 00000000 0abcdefg -> 0abcdefg */
350
351 outBuf[len] = inBuf[i+H_1] & 0x7F;
352
353 len += 1;
354 } else if( inBuf[i+H_0] < 0x08 ) {
355 /* 0080-07FF -> 110xxxxx 10xxxxxx */
356 /* 00000abc defghijk -> 110abcde 10fghijk */
357
358 outBuf[len+0] = 0xC0 | ((inBuf[i+H_0] & 0x07) << 2)
359 | ((inBuf[i+H_1] & 0xC0) >> 6);
360 outBuf[len+1] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);
361
362 len += 2;
363 } else if( (inBuf[i+H_0] & 0xDC) == 0xD8 ) {
364 int abcde, BCDE;
365
366 PORT_Assert(((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2));
367
368 /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
369 /* 110110BC DEfghijk 110111lm nopqrstu ->
370 { Let abcde = BCDE + 1 }
371 11110abc 10defghi 10jklmno 10pqrstu */
372
373 BCDE = ((inBuf[i+H_0] & 0x03) << 2) | ((inBuf[i+H_1] & 0xC0) >> 6);
374 abcde = BCDE + 1;
375
376 outBuf[len+0] = 0xF0 | ((abcde & 0x1C) >> 2);
377 outBuf[len+1] = 0x80 | ((abcde & 0x03) << 4)
378 | ((inBuf[i+0+H_1] & 0x3C) >> 2);
379 outBuf[len+2] = 0x80 | ((inBuf[i+0+H_1] & 0x03) << 4)
380 | ((inBuf[i+2+H_0] & 0x03) << 2)
381 | ((inBuf[i+2+H_1] & 0xC0) >> 6);
382 outBuf[len+3] = 0x80 | ((inBuf[i+2+H_1] & 0x3F) >> 0);
383
384 i += 2;
385 len += 4;
386 } else {
387 /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
388 /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */
389
390 outBuf[len+0] = 0xE0 | ((inBuf[i+H_0] & 0xF0) >> 4);
391 outBuf[len+1] = 0x80 | ((inBuf[i+H_0] & 0x0F) << 2)
392 | ((inBuf[i+H_1] & 0xC0) >> 6);
393 outBuf[len+2] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);
394
395 len += 3;
396 }
397 }
398
399 *outBufLen = len;
400 return PR_TRUE;
401 }
402 }
403
404 PRBool
405 sec_port_iso88591_utf8_conversion_function
406 (
407 const unsigned char *inBuf,
408 unsigned int inBufLen,
409 unsigned char *outBuf,
410 unsigned int maxOutBufLen,
411 unsigned int *outBufLen
412 )
413 {
414 unsigned int i, len = 0;
415
416 PORT_Assert((unsigned int *)NULL != outBufLen);
417
418 for( i = 0; i < inBufLen; i++) {
419 if( (inBuf[i] & 0x80) == 0x00 ) len += 1;
420 else len += 2;
421 }
422
423 if( len > maxOutBufLen ) {
424 *outBufLen = len;
425 return PR_FALSE;
426 }
427
428 len = 0;
429
430 for( i = 0; i < inBufLen; i++) {
431 if( (inBuf[i] & 0x80) == 0x00 ) {
432 /* 00-7F -> 0xxxxxxx */
433 /* 0abcdefg -> 0abcdefg */
434
435 outBuf[len] = inBuf[i];
436 len += 1;
437 } else {
438 /* 80-FF <- 110xxxxx 10xxxxxx */
439 /* 00000000 abcdefgh -> 110000ab 10cdefgh */
440
441 outBuf[len+0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6);
442 outBuf[len+1] = 0x80 | ((inBuf[i] & 0x3F) >> 0);
443
444 len += 2;
445 }
446 }
447
448 *outBufLen = len;
449 return PR_TRUE;
450 }
451
452 #ifdef TEST_UTF8
453
454 #include <stdio.h>
455 #include <string.h>
456 #include <stdlib.h>
457 #include <netinet/in.h> /* for htonl and htons */
458
459 /*
460 * UCS-4 vectors
461 */
462
463 struct ucs4 {
464 PRUint32 c;
465 char *utf8;
466 };
467
468 /*
469 * UCS-2 vectors
470 */
471
472 struct ucs2 {
473 PRUint16 c;
474 char *utf8;
475 };
476
477 /*
478 * UTF-16 vectors
479 */
480
481 struct utf16 {
482 PRUint32 c;
483 PRUint16 w[2];
484 };
485
486
487 /*
488 * UCS-4 vectors
489 */
490
491 struct ucs4 ucs4[] = {
492 { 0x00000001, "\x01" },
493 { 0x00000002, "\x02" },
494 { 0x00000003, "\x03" },
495 { 0x00000004, "\x04" },
496 { 0x00000007, "\x07" },
497 { 0x00000008, "\x08" },
498 { 0x0000000F, "\x0F" },
499 { 0x00000010, "\x10" },
500 { 0x0000001F, "\x1F" },
501 { 0x00000020, "\x20" },
502 { 0x0000003F, "\x3F" },
503 { 0x00000040, "\x40" },
504 { 0x0000007F, "\x7F" },
505
506 { 0x00000080, "\xC2\x80" },
507 { 0x00000081, "\xC2\x81" },
508 { 0x00000082, "\xC2\x82" },
509 { 0x00000084, "\xC2\x84" },
510 { 0x00000088, "\xC2\x88" },
511 { 0x00000090, "\xC2\x90" },
512 { 0x000000A0, "\xC2\xA0" },
513 { 0x000000C0, "\xC3\x80" },
514 { 0x000000FF, "\xC3\xBF" },
515 { 0x00000100, "\xC4\x80" },
516 { 0x00000101, "\xC4\x81" },
517 { 0x00000102, "\xC4\x82" },
518 { 0x00000104, "\xC4\x84" },
519 { 0x00000108, "\xC4\x88" },
520 { 0x00000110, "\xC4\x90" },
521 { 0x00000120, "\xC4\xA0" },
522 { 0x00000140, "\xC5\x80" },
523 { 0x00000180, "\xC6\x80" },
524 { 0x000001FF, "\xC7\xBF" },
525 { 0x00000200, "\xC8\x80" },
526 { 0x00000201, "\xC8\x81" },
527 { 0x00000202, "\xC8\x82" },
528 { 0x00000204, "\xC8\x84" },
529 { 0x00000208, "\xC8\x88" },
530 { 0x00000210, "\xC8\x90" },
531 { 0x00000220, "\xC8\xA0" },
532 { 0x00000240, "\xC9\x80" },
533 { 0x00000280, "\xCA\x80" },
534 { 0x00000300, "\xCC\x80" },
535 { 0x000003FF, "\xCF\xBF" },
536 { 0x00000400, "\xD0\x80" },
537 { 0x00000401, "\xD0\x81" },
538 { 0x00000402, "\xD0\x82" },
539 { 0x00000404, "\xD0\x84" },
540 { 0x00000408, "\xD0\x88" },
541 { 0x00000410, "\xD0\x90" },
542 { 0x00000420, "\xD0\xA0" },
543 { 0x00000440, "\xD1\x80" },
544 { 0x00000480, "\xD2\x80" },
545 { 0x00000500, "\xD4\x80" },
546 { 0x00000600, "\xD8\x80" },
547 { 0x000007FF, "\xDF\xBF" },
548
549 { 0x00000800, "\xE0\xA0\x80" },
550 { 0x00000801, "\xE0\xA0\x81" },
551 { 0x00000802, "\xE0\xA0\x82" },
552 { 0x00000804, "\xE0\xA0\x84" },
553 { 0x00000808, "\xE0\xA0\x88" },
554 { 0x00000810, "\xE0\xA0\x90" },
555 { 0x00000820, "\xE0\xA0\xA0" },
556 { 0x00000840, "\xE0\xA1\x80" },
557 { 0x00000880, "\xE0\xA2\x80" },
558 { 0x00000900, "\xE0\xA4\x80" },
559 { 0x00000A00, "\xE0\xA8\x80" },
560 { 0x00000C00, "\xE0\xB0\x80" },
561 { 0x00000FFF, "\xE0\xBF\xBF" },
562 { 0x00001000, "\xE1\x80\x80" },
563 { 0x00001001, "\xE1\x80\x81" },
564 { 0x00001002, "\xE1\x80\x82" },
565 { 0x00001004, "\xE1\x80\x84" },
566 { 0x00001008, "\xE1\x80\x88" },
567 { 0x00001010, "\xE1\x80\x90" },
568 { 0x00001020, "\xE1\x80\xA0" },
569 { 0x00001040, "\xE1\x81\x80" },
570 { 0x00001080, "\xE1\x82\x80" },
571 { 0x00001100, "\xE1\x84\x80" },
572 { 0x00001200, "\xE1\x88\x80" },
573 { 0x00001400, "\xE1\x90\x80" },
574 { 0x00001800, "\xE1\xA0\x80" },
575 { 0x00001FFF, "\xE1\xBF\xBF" },
576 { 0x00002000, "\xE2\x80\x80" },
577 { 0x00002001, "\xE2\x80\x81" },
578 { 0x00002002, "\xE2\x80\x82" },
579 { 0x00002004, "\xE2\x80\x84" },
580 { 0x00002008, "\xE2\x80\x88" },
581 { 0x00002010, "\xE2\x80\x90" },
582 { 0x00002020, "\xE2\x80\xA0" },
583 { 0x00002040, "\xE2\x81\x80" },
584 { 0x00002080, "\xE2\x82\x80" },
585 { 0x00002100, "\xE2\x84\x80" },
586 { 0x00002200, "\xE2\x88\x80" },
587 { 0x00002400, "\xE2\x90\x80" },
588 { 0x00002800, "\xE2\xA0\x80" },
589 { 0x00003000, "\xE3\x80\x80" },
590 { 0x00003FFF, "\xE3\xBF\xBF" },
591 { 0x00004000, "\xE4\x80\x80" },
592 { 0x00004001, "\xE4\x80\x81" },
593 { 0x00004002, "\xE4\x80\x82" },
594 { 0x00004004, "\xE4\x80\x84" },
595 { 0x00004008, "\xE4\x80\x88" },
596 { 0x00004010, "\xE4\x80\x90" },
597 { 0x00004020, "\xE4\x80\xA0" },
598 { 0x00004040, "\xE4\x81\x80" },
599 { 0x00004080, "\xE4\x82\x80" },
600 { 0x00004100, "\xE4\x84\x80" },
601 { 0x00004200, "\xE4\x88\x80" },
602 { 0x00004400, "\xE4\x90\x80" },
603 { 0x00004800, "\xE4\xA0\x80" },
604 { 0x00005000, "\xE5\x80\x80" },
605 { 0x00006000, "\xE6\x80\x80" },
606 { 0x00007FFF, "\xE7\xBF\xBF" },
607 { 0x00008000, "\xE8\x80\x80" },
608 { 0x00008001, "\xE8\x80\x81" },
609 { 0x00008002, "\xE8\x80\x82" },
610 { 0x00008004, "\xE8\x80\x84" },
611 { 0x00008008, "\xE8\x80\x88" },
612 { 0x00008010, "\xE8\x80\x90" },
613 { 0x00008020, "\xE8\x80\xA0" },
614 { 0x00008040, "\xE8\x81\x80" },
615 { 0x00008080, "\xE8\x82\x80" },
616 { 0x00008100, "\xE8\x84\x80" },
617 { 0x00008200, "\xE8\x88\x80" },
618 { 0x00008400, "\xE8\x90\x80" },
619 { 0x00008800, "\xE8\xA0\x80" },
620 { 0x00009000, "\xE9\x80\x80" },
621 { 0x0000A000, "\xEA\x80\x80" },
622 { 0x0000C000, "\xEC\x80\x80" },
623 { 0x0000FFFF, "\xEF\xBF\xBF" },
624
625 { 0x00010000, "\xF0\x90\x80\x80" },
626 { 0x00010001, "\xF0\x90\x80\x81" },
627 { 0x00010002, "\xF0\x90\x80\x82" },
628 { 0x00010004, "\xF0\x90\x80\x84" },
629 { 0x00010008, "\xF0\x90\x80\x88" },
630 { 0x00010010, "\xF0\x90\x80\x90" },
631 { 0x00010020, "\xF0\x90\x80\xA0" },
632 { 0x00010040, "\xF0\x90\x81\x80" },
633 { 0x00010080, "\xF0\x90\x82\x80" },
634 { 0x00010100, "\xF0\x90\x84\x80" },
635 { 0x00010200, "\xF0\x90\x88\x80" },
636 { 0x00010400, "\xF0\x90\x90\x80" },
637 { 0x00010800, "\xF0\x90\xA0\x80" },
638 { 0x00011000, "\xF0\x91\x80\x80" },
639 { 0x00012000, "\xF0\x92\x80\x80" },
640 { 0x00014000, "\xF0\x94\x80\x80" },
641 { 0x00018000, "\xF0\x98\x80\x80" },
642 { 0x0001FFFF, "\xF0\x9F\xBF\xBF" },
643 { 0x00020000, "\xF0\xA0\x80\x80" },
644 { 0x00020001, "\xF0\xA0\x80\x81" },
645 { 0x00020002, "\xF0\xA0\x80\x82" },
646 { 0x00020004, "\xF0\xA0\x80\x84" },
647 { 0x00020008, "\xF0\xA0\x80\x88" },
648 { 0x00020010, "\xF0\xA0\x80\x90" },
649 { 0x00020020, "\xF0\xA0\x80\xA0" },
650 { 0x00020040, "\xF0\xA0\x81\x80" },
651 { 0x00020080, "\xF0\xA0\x82\x80" },
652 { 0x00020100, "\xF0\xA0\x84\x80" },
653 { 0x00020200, "\xF0\xA0\x88\x80" },
654 { 0x00020400, "\xF0\xA0\x90\x80" },
655 { 0x00020800, "\xF0\xA0\xA0\x80" },
656 { 0x00021000, "\xF0\xA1\x80\x80" },
657 { 0x00022000, "\xF0\xA2\x80\x80" },
658 { 0x00024000, "\xF0\xA4\x80\x80" },
659 { 0x00028000, "\xF0\xA8\x80\x80" },
660 { 0x00030000, "\xF0\xB0\x80\x80" },
661 { 0x0003FFFF, "\xF0\xBF\xBF\xBF" },
662 { 0x00040000, "\xF1\x80\x80\x80" },
663 { 0x00040001, "\xF1\x80\x80\x81" },
664 { 0x00040002, "\xF1\x80\x80\x82" },
665 { 0x00040004, "\xF1\x80\x80\x84" },
666 { 0x00040008, "\xF1\x80\x80\x88" },
667 { 0x00040010, "\xF1\x80\x80\x90" },
668 { 0x00040020, "\xF1\x80\x80\xA0" },
669 { 0x00040040, "\xF1\x80\x81\x80" },
670 { 0x00040080, "\xF1\x80\x82\x80" },
671 { 0x00040100, "\xF1\x80\x84\x80" },
672 { 0x00040200, "\xF1\x80\x88\x80" },
673 { 0x00040400, "\xF1\x80\x90\x80" },
674 { 0x00040800, "\xF1\x80\xA0\x80" },
675 { 0x00041000, "\xF1\x81\x80\x80" },
676 { 0x00042000, "\xF1\x82\x80\x80" },
677 { 0x00044000, "\xF1\x84\x80\x80" },
678 { 0x00048000, "\xF1\x88\x80\x80" },
679 { 0x00050000, "\xF1\x90\x80\x80" },
680 { 0x00060000, "\xF1\xA0\x80\x80" },
681 { 0x0007FFFF, "\xF1\xBF\xBF\xBF" },
682 { 0x00080000, "\xF2\x80\x80\x80" },
683 { 0x00080001, "\xF2\x80\x80\x81" },
684 { 0x00080002, "\xF2\x80\x80\x82" },
685 { 0x00080004, "\xF2\x80\x80\x84" },
686 { 0x00080008, "\xF2\x80\x80\x88" },
687 { 0x00080010, "\xF2\x80\x80\x90" },
688 { 0x00080020, "\xF2\x80\x80\xA0" },
689 { 0x00080040, "\xF2\x80\x81\x80" },
690 { 0x00080080, "\xF2\x80\x82\x80" },
691 { 0x00080100, "\xF2\x80\x84\x80" },
692 { 0x00080200, "\xF2\x80\x88\x80" },
693 { 0x00080400, "\xF2\x80\x90\x80" },
694 { 0x00080800, "\xF2\x80\xA0\x80" },
695 { 0x00081000, "\xF2\x81\x80\x80" },
696 { 0x00082000, "\xF2\x82\x80\x80" },
697 { 0x00084000, "\xF2\x84\x80\x80" },
698 { 0x00088000, "\xF2\x88\x80\x80" },
699 { 0x00090000, "\xF2\x90\x80\x80" },
700 { 0x000A0000, "\xF2\xA0\x80\x80" },
701 { 0x000C0000, "\xF3\x80\x80\x80" },
702 { 0x000FFFFF, "\xF3\xBF\xBF\xBF" },
703 { 0x00100000, "\xF4\x80\x80\x80" },
704 { 0x00100001, "\xF4\x80\x80\x81" },
705 { 0x00100002, "\xF4\x80\x80\x82" },
706 { 0x00100004, "\xF4\x80\x80\x84" },
707 { 0x00100008, "\xF4\x80\x80\x88" },
708 { 0x00100010, "\xF4\x80\x80\x90" },
709 { 0x00100020, "\xF4\x80\x80\xA0" },
710 { 0x00100040, "\xF4\x80\x81\x80" },
711 { 0x00100080, "\xF4\x80\x82\x80" },
712 { 0x00100100, "\xF4\x80\x84\x80" },
713 { 0x00100200, "\xF4\x80\x88\x80" },
714 { 0x00100400, "\xF4\x80\x90\x80" },
715 { 0x00100800, "\xF4\x80\xA0\x80" },
716 { 0x00101000, "\xF4\x81\x80\x80" },
717 { 0x00102000, "\xF4\x82\x80\x80" },
718 { 0x00104000, "\xF4\x84\x80\x80" },
719 { 0x00108000, "\xF4\x88\x80\x80" },
720 { 0x0010FFFF, "\xF4\x8F\xBF\xBF" },
721 };
722
723 /*
724 * UCS-2 vectors
725 */
726
727 struct ucs2 ucs2[] = {
728 { 0x0001, "\x01" },
729 { 0x0002, "\x02" },
730 { 0x0003, "\x03" },
731 { 0x0004, "\x04" },
732 { 0x0007, "\x07" },
733 { 0x0008, "\x08" },
734 { 0x000F, "\x0F" },
735 { 0x0010, "\x10" },
736 { 0x001F, "\x1F" },
737 { 0x0020, "\x20" },
738 { 0x003F, "\x3F" },
739 { 0x0040, "\x40" },
740 { 0x007F, "\x7F" },
741
742 { 0x0080, "\xC2\x80" },
743 { 0x0081, "\xC2\x81" },
744 { 0x0082, "\xC2\x82" },
745 { 0x0084, "\xC2\x84" },
746 { 0x0088, "\xC2\x88" },
747 { 0x0090, "\xC2\x90" },
748 { 0x00A0, "\xC2\xA0" },
749 { 0x00C0, "\xC3\x80" },
750 { 0x00FF, "\xC3\xBF" },
751 { 0x0100, "\xC4\x80" },
752 { 0x0101, "\xC4\x81" },
753 { 0x0102, "\xC4\x82" },
754 { 0x0104, "\xC4\x84" },
755 { 0x0108, "\xC4\x88" },
756 { 0x0110, "\xC4\x90" },
757 { 0x0120, "\xC4\xA0" },
758 { 0x0140, "\xC5\x80" },
759 { 0x0180, "\xC6\x80" },
760 { 0x01FF, "\xC7\xBF" },
761 { 0x0200, "\xC8\x80" },
762 { 0x0201, "\xC8\x81" },
763 { 0x0202, "\xC8\x82" },
764 { 0x0204, "\xC8\x84" },
765 { 0x0208, "\xC8\x88" },
766 { 0x0210, "\xC8\x90" },
767 { 0x0220, "\xC8\xA0" },
768 { 0x0240, "\xC9\x80" },
769 { 0x0280, "\xCA\x80" },
770 { 0x0300, "\xCC\x80" },
771 { 0x03FF, "\xCF\xBF" },
772 { 0x0400, "\xD0\x80" },
773 { 0x0401, "\xD0\x81" },
774 { 0x0402, "\xD0\x82" },
775 { 0x0404, "\xD0\x84" },
776 { 0x0408, "\xD0\x88" },
777 { 0x0410, "\xD0\x90" },
778 { 0x0420, "\xD0\xA0" },
779 { 0x0440, "\xD1\x80" },
780 { 0x0480, "\xD2\x80" },
781 { 0x0500, "\xD4\x80" },
782 { 0x0600, "\xD8\x80" },
783 { 0x07FF, "\xDF\xBF" },
784
785 { 0x0800, "\xE0\xA0\x80" },
786 { 0x0801, "\xE0\xA0\x81" },
787 { 0x0802, "\xE0\xA0\x82" },
788 { 0x0804, "\xE0\xA0\x84" },
789 { 0x0808, "\xE0\xA0\x88" },
790 { 0x0810, "\xE0\xA0\x90" },
791 { 0x0820, "\xE0\xA0\xA0" },
792 { 0x0840, "\xE0\xA1\x80" },
793 { 0x0880, "\xE0\xA2\x80" },
794 { 0x0900, "\xE0\xA4\x80" },
795 { 0x0A00, "\xE0\xA8\x80" },
796 { 0x0C00, "\xE0\xB0\x80" },
797 { 0x0FFF, "\xE0\xBF\xBF" },
798 { 0x1000, "\xE1\x80\x80" },
799 { 0x1001, "\xE1\x80\x81" },
800 { 0x1002, "\xE1\x80\x82" },
801 { 0x1004, "\xE1\x80\x84" },
802 { 0x1008, "\xE1\x80\x88" },
803 { 0x1010, "\xE1\x80\x90" },
804 { 0x1020, "\xE1\x80\xA0" },
805 { 0x1040, "\xE1\x81\x80" },
806 { 0x1080, "\xE1\x82\x80" },
807 { 0x1100, "\xE1\x84\x80" },
808 { 0x1200, "\xE1\x88\x80" },
809 { 0x1400, "\xE1\x90\x80" },
810 { 0x1800, "\xE1\xA0\x80" },
811 { 0x1FFF, "\xE1\xBF\xBF" },
812 { 0x2000, "\xE2\x80\x80" },
813 { 0x2001, "\xE2\x80\x81" },
814 { 0x2002, "\xE2\x80\x82" },
815 { 0x2004, "\xE2\x80\x84" },
816 { 0x2008, "\xE2\x80\x88" },
817 { 0x2010, "\xE2\x80\x90" },
818 { 0x2020, "\xE2\x80\xA0" },
819 { 0x2040, "\xE2\x81\x80" },
820 { 0x2080, "\xE2\x82\x80" },
821 { 0x2100, "\xE2\x84\x80" },
822 { 0x2200, "\xE2\x88\x80" },
823 { 0x2400, "\xE2\x90\x80" },
824 { 0x2800, "\xE2\xA0\x80" },
825 { 0x3000, "\xE3\x80\x80" },
826 { 0x3FFF, "\xE3\xBF\xBF" },
827 { 0x4000, "\xE4\x80\x80" },
828 { 0x4001, "\xE4\x80\x81" },
829 { 0x4002, "\xE4\x80\x82" },
830 { 0x4004, "\xE4\x80\x84" },
831 { 0x4008, "\xE4\x80\x88" },
832 { 0x4010, "\xE4\x80\x90" },
833 { 0x4020, "\xE4\x80\xA0" },
834 { 0x4040, "\xE4\x81\x80" },
835 { 0x4080, "\xE4\x82\x80" },
836 { 0x4100, "\xE4\x84\x80" },
837 { 0x4200, "\xE4\x88\x80" },
838 { 0x4400, "\xE4\x90\x80" },
839 { 0x4800, "\xE4\xA0\x80" },
840 { 0x5000, "\xE5\x80\x80" },
841 { 0x6000, "\xE6\x80\x80" },
842 { 0x7FFF, "\xE7\xBF\xBF" },
843 { 0x8000, "\xE8\x80\x80" },
844 { 0x8001, "\xE8\x80\x81" },
845 { 0x8002, "\xE8\x80\x82" },
846 { 0x8004, "\xE8\x80\x84" },
847 { 0x8008, "\xE8\x80\x88" },
848 { 0x8010, "\xE8\x80\x90" },
849 { 0x8020, "\xE8\x80\xA0" },
850 { 0x8040, "\xE8\x81\x80" },
851 { 0x8080, "\xE8\x82\x80" },
852 { 0x8100, "\xE8\x84\x80" },
853 { 0x8200, "\xE8\x88\x80" },
854 { 0x8400, "\xE8\x90\x80" },
855 { 0x8800, "\xE8\xA0\x80" },
856 { 0x9000, "\xE9\x80\x80" },
857 { 0xA000, "\xEA\x80\x80" },
858 { 0xC000, "\xEC\x80\x80" },
859 { 0xFFFF, "\xEF\xBF\xBF" }
860
861 };
862
863 /*
864 * UTF-16 vectors
865 */
866
867 struct utf16 utf16[] = {
868 { 0x00010000, { 0xD800, 0xDC00 } },
869 { 0x00010001, { 0xD800, 0xDC01 } },
870 { 0x00010002, { 0xD800, 0xDC02 } },
871 { 0x00010003, { 0xD800, 0xDC03 } },
872 { 0x00010004, { 0xD800, 0xDC04 } },
873 { 0x00010007, { 0xD800, 0xDC07 } },
874 { 0x00010008, { 0xD800, 0xDC08 } },
875 { 0x0001000F, { 0xD800, 0xDC0F } },
876 { 0x00010010, { 0xD800, 0xDC10 } },
877 { 0x0001001F, { 0xD800, 0xDC1F } },
878 { 0x00010020, { 0xD800, 0xDC20 } },
879 { 0x0001003F, { 0xD800, 0xDC3F } },
880 { 0x00010040, { 0xD800, 0xDC40 } },
881 { 0x0001007F, { 0xD800, 0xDC7F } },
882 { 0x00010080, { 0xD800, 0xDC80 } },
883 { 0x00010081, { 0xD800, 0xDC81 } },
884 { 0x00010082, { 0xD800, 0xDC82 } },
885 { 0x00010084, { 0xD800, 0xDC84 } },
886 { 0x00010088, { 0xD800, 0xDC88 } },
887 { 0x00010090, { 0xD800, 0xDC90 } },
888 { 0x000100A0, { 0xD800, 0xDCA0 } },
889 { 0x000100C0, { 0xD800, 0xDCC0 } },
890 { 0x000100FF, { 0xD800, 0xDCFF } },
891 { 0x00010100, { 0xD800, 0xDD00 } },
892 { 0x00010101, { 0xD800, 0xDD01 } },
893 { 0x00010102, { 0xD800, 0xDD02 } },
894 { 0x00010104, { 0xD800, 0xDD04 } },
895 { 0x00010108, { 0xD800, 0xDD08 } },
896 { 0x00010110, { 0xD800, 0xDD10 } },
897 { 0x00010120, { 0xD800, 0xDD20 } },
898 { 0x00010140, { 0xD800, 0xDD40 } },
899 { 0x00010180, { 0xD800, 0xDD80 } },
900 { 0x000101FF, { 0xD800, 0xDDFF } },
901 { 0x00010200, { 0xD800, 0xDE00 } },
902 { 0x00010201, { 0xD800, 0xDE01 } },
903 { 0x00010202, { 0xD800, 0xDE02 } },
904 { 0x00010204, { 0xD800, 0xDE04 } },
905 { 0x00010208, { 0xD800, 0xDE08 } },
906 { 0x00010210, { 0xD800, 0xDE10 } },
907 { 0x00010220, { 0xD800, 0xDE20 } },
908 { 0x00010240, { 0xD800, 0xDE40 } },
909 { 0x00010280, { 0xD800, 0xDE80 } },
910 { 0x00010300, { 0xD800, 0xDF00 } },
911 { 0x000103FF, { 0xD800, 0xDFFF } },
912 { 0x00010400, { 0xD801, 0xDC00 } },
913 { 0x00010401, { 0xD801, 0xDC01 } },
914 { 0x00010402, { 0xD801, 0xDC02 } },
915 { 0x00010404, { 0xD801, 0xDC04 } },
916 { 0x00010408, { 0xD801, 0xDC08 } },
917 { 0x00010410, { 0xD801, 0xDC10 } },
918 { 0x00010420, { 0xD801, 0xDC20 } },
919 { 0x00010440, { 0xD801, 0xDC40 } },
920 { 0x00010480, { 0xD801, 0xDC80 } },
921 { 0x00010500, { 0xD801, 0xDD00 } },
922 { 0x00010600, { 0xD801, 0xDE00 } },
923 { 0x000107FF, { 0xD801, 0xDFFF } },
924 { 0x00010800, { 0xD802, 0xDC00 } },
925 { 0x00010801, { 0xD802, 0xDC01 } },
926 { 0x00010802, { 0xD802, 0xDC02 } },
927 { 0x00010804, { 0xD802, 0xDC04 } },
928 { 0x00010808, { 0xD802, 0xDC08 } },
929 { 0x00010810, { 0xD802, 0xDC10 } },
930 { 0x00010820, { 0xD802, 0xDC20 } },
931 { 0x00010840, { 0xD802, 0xDC40 } },
932 { 0x00010880, { 0xD802, 0xDC80 } },
933 { 0x00010900, { 0xD802, 0xDD00 } },
934 { 0x00010A00, { 0xD802, 0xDE00 } },
935 { 0x00010C00, { 0xD803, 0xDC00 } },
936 { 0x00010FFF, { 0xD803, 0xDFFF } },
937 { 0x00011000, { 0xD804, 0xDC00 } },
938 { 0x00011001, { 0xD804, 0xDC01 } },
939 { 0x00011002, { 0xD804, 0xDC02 } },
940 { 0x00011004, { 0xD804, 0xDC04 } },
941 { 0x00011008, { 0xD804, 0xDC08 } },
942 { 0x00011010, { 0xD804, 0xDC10 } },
943 { 0x00011020, { 0xD804, 0xDC20 } },
944 { 0x00011040, { 0xD804, 0xDC40 } },
945 { 0x00011080, { 0xD804, 0xDC80 } },
946 { 0x00011100, { 0xD804, 0xDD00 } },
947 { 0x00011200, { 0xD804, 0xDE00 } },
948 { 0x00011400, { 0xD805, 0xDC00 } },
949 { 0x00011800, { 0xD806, 0xDC00 } },
950 { 0x00011FFF, { 0xD807, 0xDFFF } },
951 { 0x00012000, { 0xD808, 0xDC00 } },
952 { 0x00012001, { 0xD808, 0xDC01 } },
953 { 0x00012002, { 0xD808, 0xDC02 } },
954 { 0x00012004, { 0xD808, 0xDC04 } },
955 { 0x00012008, { 0xD808, 0xDC08 } },
956 { 0x00012010, { 0xD808, 0xDC10 } },
957 { 0x00012020, { 0xD808, 0xDC20 } },
958 { 0x00012040, { 0xD808, 0xDC40 } },
959 { 0x00012080, { 0xD808, 0xDC80 } },
960 { 0x00012100, { 0xD808, 0xDD00 } },
961 { 0x00012200, { 0xD808, 0xDE00 } },
962 { 0x00012400, { 0xD809, 0xDC00 } },
963 { 0x00012800, { 0xD80A, 0xDC00 } },
964 { 0x00013000, { 0xD80C, 0xDC00 } },
965 { 0x00013FFF, { 0xD80F, 0xDFFF } },
966 { 0x00014000, { 0xD810, 0xDC00 } },
967 { 0x00014001, { 0xD810, 0xDC01 } },
968 { 0x00014002, { 0xD810, 0xDC02 } },
969 { 0x00014004, { 0xD810, 0xDC04 } },
970 { 0x00014008, { 0xD810, 0xDC08 } },
971 { 0x00014010, { 0xD810, 0xDC10 } },
972 { 0x00014020, { 0xD810, 0xDC20 } },
973 { 0x00014040, { 0xD810, 0xDC40 } },
974 { 0x00014080, { 0xD810, 0xDC80 } },
975 { 0x00014100, { 0xD810, 0xDD00 } },
976 { 0x00014200, { 0xD810, 0xDE00 } },
977 { 0x00014400, { 0xD811, 0xDC00 } },
978 { 0x00014800, { 0xD812, 0xDC00 } },
979 { 0x00015000, { 0xD814, 0xDC00 } },
980 { 0x00016000, { 0xD818, 0xDC00 } },
981 { 0x00017FFF, { 0xD81F, 0xDFFF } },
982 { 0x00018000, { 0xD820, 0xDC00 } },
983 { 0x00018001, { 0xD820, 0xDC01 } },
984 { 0x00018002, { 0xD820, 0xDC02 } },
985 { 0x00018004, { 0xD820, 0xDC04 } },
986 { 0x00018008, { 0xD820, 0xDC08 } },
987 { 0x00018010, { 0xD820, 0xDC10 } },
988 { 0x00018020, { 0xD820, 0xDC20 } },
989 { 0x00018040, { 0xD820, 0xDC40 } },
990 { 0x00018080, { 0xD820, 0xDC80 } },
991 { 0x00018100, { 0xD820, 0xDD00 } },
992 { 0x00018200, { 0xD820, 0xDE00 } },
993 { 0x00018400, { 0xD821, 0xDC00 } },
994 { 0x00018800, { 0xD822, 0xDC00 } },
995 { 0x00019000, { 0xD824, 0xDC00 } },
996 { 0x0001A000, { 0xD828, 0xDC00 } },
997 { 0x0001C000, { 0xD830, 0xDC00 } },
998 { 0x0001FFFF, { 0xD83F, 0xDFFF } },
999 { 0x00020000, { 0xD840, 0xDC00 } },
1000 { 0x00020001, { 0xD840, 0xDC01 } },
1001 { 0x00020002, { 0xD840, 0xDC02 } },
1002 { 0x00020004, { 0xD840, 0xDC04 } },
1003 { 0x00020008, { 0xD840, 0xDC08 } },
1004 { 0x00020010, { 0xD840, 0xDC10 } },
1005 { 0x00020020, { 0xD840, 0xDC20 } },
1006 { 0x00020040, { 0xD840, 0xDC40 } },
1007 { 0x00020080, { 0xD840, 0xDC80 } },
1008 { 0x00020100, { 0xD840, 0xDD00 } },
1009 { 0x00020200, { 0xD840, 0xDE00 } },
1010 { 0x00020400, { 0xD841, 0xDC00 } },
1011 { 0x00020800, { 0xD842, 0xDC00 } },
1012 { 0x00021000, { 0xD844, 0xDC00 } },
1013 { 0x00022000, { 0xD848, 0xDC00 } },
1014 { 0x00024000, { 0xD850, 0xDC00 } },
1015 { 0x00028000, { 0xD860, 0xDC00 } },
1016 { 0x0002FFFF, { 0xD87F, 0xDFFF } },
1017 { 0x00030000, { 0xD880, 0xDC00 } },
1018 { 0x00030001, { 0xD880, 0xDC01 } },
1019 { 0x00030002, { 0xD880, 0xDC02 } },
1020 { 0x00030004, { 0xD880, 0xDC04 } },
1021 { 0x00030008, { 0xD880, 0xDC08 } },
1022 { 0x00030010, { 0xD880, 0xDC10 } },
1023 { 0x00030020, { 0xD880, 0xDC20 } },
1024 { 0x00030040, { 0xD880, 0xDC40 } },
1025 { 0x00030080, { 0xD880, 0xDC80 } },
1026 { 0x00030100, { 0xD880, 0xDD00 } },
1027 { 0x00030200, { 0xD880, 0xDE00 } },
1028 { 0x00030400, { 0xD881, 0xDC00 } },
1029 { 0x00030800, { 0xD882, 0xDC00 } },
1030 { 0x00031000, { 0xD884, 0xDC00 } },
1031 { 0x00032000, { 0xD888, 0xDC00 } },
1032 { 0x00034000, { 0xD890, 0xDC00 } },
1033 { 0x00038000, { 0xD8A0, 0xDC00 } },
1034 { 0x0003FFFF, { 0xD8BF, 0xDFFF } },
1035 { 0x00040000, { 0xD8C0, 0xDC00 } },
1036 { 0x00040001, { 0xD8C0, 0xDC01 } },
1037 { 0x00040002, { 0xD8C0, 0xDC02 } },
1038 { 0x00040004, { 0xD8C0, 0xDC04 } },
1039 { 0x00040008, { 0xD8C0, 0xDC08 } },
1040 { 0x00040010, { 0xD8C0, 0xDC10 } },
1041 { 0x00040020, { 0xD8C0, 0xDC20 } },
1042 { 0x00040040, { 0xD8C0, 0xDC40 } },
1043 { 0x00040080, { 0xD8C0, 0xDC80 } },
1044 { 0x00040100, { 0xD8C0, 0xDD00 } },
1045 { 0x00040200, { 0xD8C0, 0xDE00 } },
1046 { 0x00040400, { 0xD8C1, 0xDC00 } },
1047 { 0x00040800, { 0xD8C2, 0xDC00 } },
1048 { 0x00041000, { 0xD8C4, 0xDC00 } },
1049 { 0x00042000, { 0xD8C8, 0xDC00 } },
1050 { 0x00044000, { 0xD8D0, 0xDC00 } },
1051 { 0x00048000, { 0xD8E0, 0xDC00 } },
1052 { 0x0004FFFF, { 0xD8FF, 0xDFFF } },
1053 { 0x00050000, { 0xD900, 0xDC00 } },
1054 { 0x00050001, { 0xD900, 0xDC01 } },
1055 { 0x00050002, { 0xD900, 0xDC02 } },
1056 { 0x00050004, { 0xD900, 0xDC04 } },
1057 { 0x00050008, { 0xD900, 0xDC08 } },
1058 { 0x00050010, { 0xD900, 0xDC10 } },
1059 { 0x00050020, { 0xD900, 0xDC20 } },
1060 { 0x00050040, { 0xD900, 0xDC40 } },
1061 { 0x00050080, { 0xD900, 0xDC80 } },
1062 { 0x00050100, { 0xD900, 0xDD00 } },
1063 { 0x00050200, { 0xD900, 0xDE00 } },
1064 { 0x00050400, { 0xD901, 0xDC00 } },
1065 { 0x00050800, { 0xD902, 0xDC00 } },
1066 { 0x00051000, { 0xD904, 0xDC00 } },
1067 { 0x00052000, { 0xD908, 0xDC00 } },
1068 { 0x00054000, { 0xD910, 0xDC00 } },
1069 { 0x00058000, { 0xD920, 0xDC00 } },
1070 { 0x00060000, { 0xD940, 0xDC00 } },
1071 { 0x00070000, { 0xD980, 0xDC00 } },
1072 { 0x0007FFFF, { 0xD9BF, 0xDFFF } },
1073 { 0x00080000, { 0xD9C0, 0xDC00 } },
1074 { 0x00080001, { 0xD9C0, 0xDC01 } },
1075 { 0x00080002, { 0xD9C0, 0xDC02 } },
1076 { 0x00080004, { 0xD9C0, 0xDC04 } },
1077 { 0x00080008, { 0xD9C0, 0xDC08 } },
1078 { 0x00080010, { 0xD9C0, 0xDC10 } },
1079 { 0x00080020, { 0xD9C0, 0xDC20 } },
1080 { 0x00080040, { 0xD9C0, 0xDC40 } },
1081 { 0x00080080, { 0xD9C0, 0xDC80 } },
1082 { 0x00080100, { 0xD9C0, 0xDD00 } },
1083 { 0x00080200, { 0xD9C0, 0xDE00 } },
1084 { 0x00080400, { 0xD9C1, 0xDC00 } },
1085 { 0x00080800, { 0xD9C2, 0xDC00 } },
1086 { 0x00081000, { 0xD9C4, 0xDC00 } },
1087 { 0x00082000, { 0xD9C8, 0xDC00 } },
1088 { 0x00084000, { 0xD9D0, 0xDC00 } },
1089 { 0x00088000, { 0xD9E0, 0xDC00 } },
1090 { 0x0008FFFF, { 0xD9FF, 0xDFFF } },
1091 { 0x00090000, { 0xDA00, 0xDC00 } },
1092 { 0x00090001, { 0xDA00, 0xDC01 } },
1093 { 0x00090002, { 0xDA00, 0xDC02 } },
1094 { 0x00090004, { 0xDA00, 0xDC04 } },
1095 { 0x00090008, { 0xDA00, 0xDC08 } },
1096 { 0x00090010, { 0xDA00, 0xDC10 } },
1097 { 0x00090020, { 0xDA00, 0xDC20 } },
1098 { 0x00090040, { 0xDA00, 0xDC40 } },
1099 { 0x00090080, { 0xDA00, 0xDC80 } },
1100 { 0x00090100, { 0xDA00, 0xDD00 } },
1101 { 0x00090200, { 0xDA00, 0xDE00 } },
1102 { 0x00090400, { 0xDA01, 0xDC00 } },
1103 { 0x00090800, { 0xDA02, 0xDC00 } },
1104 { 0x00091000, { 0xDA04, 0xDC00 } },
1105 { 0x00092000, { 0xDA08, 0xDC00 } },
1106 { 0x00094000, { 0xDA10, 0xDC00 } },
1107 { 0x00098000, { 0xDA20, 0xDC00 } },
1108 { 0x000A0000, { 0xDA40, 0xDC00 } },
1109 { 0x000B0000, { 0xDA80, 0xDC00 } },
1110 { 0x000C0000, { 0xDAC0, 0xDC00 } },
1111 { 0x000D0000, { 0xDB00, 0xDC00 } },
1112 { 0x000FFFFF, { 0xDBBF, 0xDFFF } },
1113 { 0x0010FFFF, { 0xDBFF, 0xDFFF } }
1114
1115 };
1116
1117 /* illegal utf8 sequences */
1118 char *utf8_bad[] = {
1119 "\xC0\x80",
1120 "\xC1\xBF",
1121 "\xE0\x80\x80",
1122 "\xE0\x9F\xBF",
1123 "\xF0\x80\x80\x80",
1124 "\xF0\x8F\xBF\xBF",
1125 "\xF4\x90\x80\x80",
1126 "\xF7\xBF\xBF\xBF",
1127 "\xF8\x80\x80\x80\x80",
1128 "\xF8\x88\x80\x80\x80",
1129 "\xF8\x92\x80\x80\x80",
1130 "\xF8\x9F\xBF\xBF\xBF",
1131 "\xF8\xA0\x80\x80\x80",
1132 "\xF8\xA8\x80\x80\x80",
1133 "\xF8\xB0\x80\x80\x80",
1134 "\xF8\xBF\xBF\xBF\xBF",
1135 "\xF9\x80\x80\x80\x88",
1136 "\xF9\x84\x80\x80\x80",
1137 "\xF9\xBF\xBF\xBF\xBF",
1138 "\xFA\x80\x80\x80\x80",
1139 "\xFA\x90\x80\x80\x80",
1140 "\xFB\xBF\xBF\xBF\xBF",
1141 "\xFC\x84\x80\x80\x80\x81",
1142 "\xFC\x85\x80\x80\x80\x80",
1143 "\xFC\x86\x80\x80\x80\x80",
1144 "\xFC\x87\xBF\xBF\xBF\xBF",
1145 "\xFC\x88\xA0\x80\x80\x80",
1146 "\xFC\x89\x80\x80\x80\x80",
1147 "\xFC\x8A\x80\x80\x80\x80",
1148 "\xFC\x90\x80\x80\x80\x82",
1149 "\xFD\x80\x80\x80\x80\x80",
1150 "\xFD\xBF\xBF\xBF\xBF\xBF",
1151 "\x80",
1152 "\xC3",
1153 "\xC3\xC3\x80",
1154 "\xED\xA0\x80",
1155 "\xED\xBF\x80",
1156 "\xED\xBF\xBF",
1157 "\xED\xA0\x80\xE0\xBF\xBF",
1158 };
1159
1160 static void
1161 dump_utf8
1162 (
1163 char *word,
1164 unsigned char *utf8,
1165 char *end
1166 )
1167 {
1168 fprintf(stdout, "%s ", word);
1169 for( ; *utf8; utf8++ ) {
1170 fprintf(stdout, "%02.2x ", (unsigned int)*utf8);
1171 }
1172 fprintf(stdout, "%s", end);
1173 }
1174
1175 static PRBool
1176 test_ucs4_chars
1177 (
1178 void
1179 )
1180 {
1181 PRBool rv = PR_TRUE;
1182 int i;
1183
1184 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1185 struct ucs4 *e = &ucs4[i];
1186 PRBool result;
1187 unsigned char utf8[8];
1188 unsigned int len = 0;
1189 PRUint32 back = 0;
1190
1191 (void)memset(utf8, 0, sizeof(utf8));
1192
1193 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1194 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
1195
1196 if( !result ) {
1197 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8\n", e->c);
1198 rv = PR_FALSE;
1199 continue;
1200 }
1201
1202 if( (len >= sizeof(utf8)) ||
1203 (strlen(e->utf8) != len) ||
1204 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
1205 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8: ", e->c);
1206 dump_utf8("expected", e->utf8, ", ");
1207 dump_utf8("received", utf8, "\n");
1208 rv = PR_FALSE;
1209 continue;
1210 }
1211
1212 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1213 utf8, len, (unsigned char *)&back, sizeof(back), &len);
1214
1215 if( !result ) {
1216 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4\n");
1217 rv = PR_FALSE;
1218 continue;
1219 }
1220
1221 if( (sizeof(back) != len) || (e->c != back) ) {
1222 dump_utf8("Wrong conversion of UTF-8", utf8, " to UCS-4:");
1223 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back);
1224 rv = PR_FALSE;
1225 continue;
1226 }
1227 }
1228
1229 return rv;
1230 }
1231
1232 static PRBool
1233 test_ucs2_chars
1234 (
1235 void
1236 )
1237 {
1238 PRBool rv = PR_TRUE;
1239 int i;
1240
1241 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1242 struct ucs2 *e = &ucs2[i];
1243 PRBool result;
1244 unsigned char utf8[8];
1245 unsigned int len = 0;
1246 PRUint16 back = 0;
1247
1248 (void)memset(utf8, 0, sizeof(utf8));
1249
1250 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1251 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
1252
1253 if( !result ) {
1254 fprintf(stdout, "Failed to convert UCS-2 0x%04.4x to UTF-8\n", e->c);
1255 rv = PR_FALSE;
1256 continue;
1257 }
1258
1259 if( (len >= sizeof(utf8)) ||
1260 (strlen(e->utf8) != len) ||
1261 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
1262 fprintf(stdout, "Wrong conversion of UCS-2 0x%04.4x to UTF-8: ", e->c);
1263 dump_utf8("expected", e->utf8, ", ");
1264 dump_utf8("received", utf8, "\n");
1265 rv = PR_FALSE;
1266 continue;
1267 }
1268
1269 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1270 utf8, len, (unsigned char *)&back, sizeof(back), &len);
1271
1272 if( !result ) {
1273 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-2\n");
1274 rv = PR_FALSE;
1275 continue;
1276 }
1277
1278 if( (sizeof(back) != len) || (e->c != back) ) {
1279 dump_utf8("Wrong conversion of UTF-8", utf8, "to UCS-2:");
1280 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back);
1281 rv = PR_FALSE;
1282 continue;
1283 }
1284 }
1285
1286 return rv;
1287 }
1288
1289 static PRBool
1290 test_utf16_chars
1291 (
1292 void
1293 )
1294 {
1295 PRBool rv = PR_TRUE;
1296 int i;
1297
1298 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) {
1299 struct utf16 *e = &utf16[i];
1300 PRBool result;
1301 unsigned char utf8[8];
1302 unsigned int len = 0;
1303 PRUint32 back32 = 0;
1304 PRUint16 back[2];
1305
1306 (void)memset(utf8, 0, sizeof(utf8));
1307
1308 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1309 (unsigned char *)&e->w[0], sizeof(e->w), utf8, sizeof(utf8), &len);
1310
1311 if( !result ) {
1312 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8\n",
1313 e->w[0], e->w[1]);
1314 rv = PR_FALSE;
1315 continue;
1316 }
1317
1318 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1319 utf8, len, (unsigned char *)&back32, sizeof(back32), &len);
1320
1321 if( 4 != len ) {
1322 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8: "
1323 "unexpected len %d\n", e->w[0], e->w[1], len);
1324 rv = PR_FALSE;
1325 continue;
1326 }
1327
1328 utf8[len] = '\0'; /* null-terminate for printing */
1329
1330 if( !result ) {
1331 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4 (utf-16 test)\n");
1332 rv = PR_FALSE;
1333 continue;
1334 }
1335
1336 if( (sizeof(back32) != len) || (e->c != back32) ) {
1337 fprintf(stdout, "Wrong conversion of UTF-16 0x%04.4x 0x%04.4x ",
1338 e->w[0], e->w[1]);
1339 dump_utf8("to UTF-8", utf8, "and then to UCS-4: ");
1340 if( sizeof(back32) != len ) {
1341 fprintf(stdout, "len is %d\n", len);
1342 } else {
1343 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back32);
1344 }
1345 rv = PR_FALSE;
1346 continue;
1347 }
1348
1349 (void)memset(utf8, 0, sizeof(utf8));
1350 back[0] = back[1] = 0;
1351
1352 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1353 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
1354
1355 if( !result ) {
1356 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8 (utf-16 test)\n ",
1357 e->c);
1358 rv = PR_FALSE;
1359 continue;
1360 }
1361
1362 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1363 utf8, len, (unsigned char *)&back[0], sizeof(back), &len);
1364
1365 if( 4 != len ) {
1366 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8: "
1367 "unexpected len %d\n", e->c, len);
1368 rv = PR_FALSE;
1369 continue;
1370 }
1371
1372 utf8[len] = '\0'; /* null-terminate for printing */
1373
1374 if( !result ) {
1375 dump_utf8("Failed to convert UTF-8", utf8, "to UTF-16\n");
1376 rv = PR_FALSE;
1377 continue;
1378 }
1379
1380 if( (sizeof(back) != len) || (e->w[0] != back[0]) || (e->w[1] != back[1]) ) {
1381 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8", e->c);
1382 dump_utf8("", utf8, "and then to UTF-16:");
1383 if( sizeof(back) != len ) {
1384 fprintf(stdout, "len is %d\n", len);
1385 } else {
1386 fprintf(stdout, "expected 0x%04.4x 0x%04.4x, received 0x%04.4x 0x%04.4xx \n",
1387 e->w[0], e->w[1], back[0], back[1]);
1388 }
1389 rv = PR_FALSE;
1390 continue;
1391 }
1392 }
1393
1394 return rv;
1395 }
1396
1397 static PRBool
1398 test_utf8_bad_chars
1399 (
1400 void
1401 )
1402 {
1403 PRBool rv = PR_TRUE;
1404 int i;
1405
1406 for( i = 0; i < sizeof(utf8_bad)/sizeof(utf8_bad[0]); i++ ) {
1407 PRBool result;
1408 unsigned char destbuf[30];
1409 unsigned int len = 0;
1410
1411 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1412 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf ), &len);
1413
1414 if( result ) {
1415 dump_utf8("Failed to detect bad UTF-8 string converting to UCS2: ", utf8_b ad[i], "\n");
1416 rv = PR_FALSE;
1417 continue;
1418 }
1419 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1420 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf ), &len);
1421
1422 if( result ) {
1423 dump_utf8("Failed to detect bad UTF-8 string converting to UCS4: ", utf8_b ad[i], "\n");
1424 rv = PR_FALSE;
1425 continue;
1426 }
1427
1428 }
1429
1430 return rv;
1431 }
1432
1433 static PRBool
1434 test_iso88591_chars
1435 (
1436 void
1437 )
1438 {
1439 PRBool rv = PR_TRUE;
1440 int i;
1441
1442 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1443 struct ucs2 *e = &ucs2[i];
1444 PRBool result;
1445 unsigned char iso88591;
1446 unsigned char utf8[3];
1447 unsigned int len = 0;
1448
1449 if (ntohs(e->c) > 0xFF) continue;
1450
1451 (void)memset(utf8, 0, sizeof(utf8));
1452 iso88591 = ntohs(e->c);
1453
1454 result = sec_port_iso88591_utf8_conversion_function(&iso88591,
1455 1, utf8, sizeof(utf8), &len);
1456
1457 if( !result ) {
1458 fprintf(stdout, "Failed to convert ISO-8859-1 0x%02.2x to UTF-8\n", iso885 91);
1459 rv = PR_FALSE;
1460 continue;
1461 }
1462
1463 if( (len >= sizeof(utf8)) ||
1464 (strlen(e->utf8) != len) ||
1465 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
1466 fprintf(stdout, "Wrong conversion of ISO-8859-1 0x%02.2x to UTF-8: ", iso8 8591);
1467 dump_utf8("expected", e->utf8, ", ");
1468 dump_utf8("received", utf8, "\n");
1469 rv = PR_FALSE;
1470 continue;
1471 }
1472
1473 }
1474
1475 return rv;
1476 }
1477
1478 static PRBool
1479 test_zeroes
1480 (
1481 void
1482 )
1483 {
1484 PRBool rv = PR_TRUE;
1485 PRBool result;
1486 PRUint32 lzero = 0;
1487 PRUint16 szero = 0;
1488 unsigned char utf8[8];
1489 unsigned int len = 0;
1490 PRUint32 lback = 1;
1491 PRUint16 sback = 1;
1492
1493 (void)memset(utf8, 1, sizeof(utf8));
1494
1495 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1496 (unsigned char *)&lzero, sizeof(lzero), utf8, sizeof(utf8), &len);
1497
1498 if( !result ) {
1499 fprintf(stdout, "Failed to convert UCS-4 0x00000000 to UTF-8\n");
1500 rv = PR_FALSE;
1501 } else if( 1 != len ) {
1502 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: len = %d\n", len);
1503 rv = PR_FALSE;
1504 } else if( '\0' != *utf8 ) {
1505 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: expected 00 ,"
1506 "received %02.2x\n", (unsigned int)*utf8);
1507 rv = PR_FALSE;
1508 }
1509
1510 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1511 "", 1, (unsigned char *)&lback, sizeof(lback), &len);
1512
1513 if( !result ) {
1514 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-4\n");
1515 rv = PR_FALSE;
1516 } else if( 4 != len ) {
1517 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: len = %d\n", len);
1518 rv = PR_FALSE;
1519 } else if( 0 != lback ) {
1520 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: "
1521 "expected 0x00000000, received 0x%08.8x\n", lback);
1522 rv = PR_FALSE;
1523 }
1524
1525 (void)memset(utf8, 1, sizeof(utf8));
1526
1527 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1528 (unsigned char *)&szero, sizeof(szero), utf8, sizeof(utf8), &len);
1529
1530 if( !result ) {
1531 fprintf(stdout, "Failed to convert UCS-2 0x0000 to UTF-8\n");
1532 rv = PR_FALSE;
1533 } else if( 1 != len ) {
1534 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: len = %d\n", len);
1535 rv = PR_FALSE;
1536 } else if( '\0' != *utf8 ) {
1537 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: expected 00 ,"
1538 "received %02.2x\n", (unsigned int)*utf8);
1539 rv = PR_FALSE;
1540 }
1541
1542 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1543 "", 1, (unsigned char *)&sback, sizeof(sback), &len);
1544
1545 if( !result ) {
1546 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-2\n");
1547 rv = PR_FALSE;
1548 } else if( 2 != len ) {
1549 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: len = %d\n", len);
1550 rv = PR_FALSE;
1551 } else if( 0 != sback ) {
1552 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: "
1553 "expected 0x0000, received 0x%04.4x\n", sback);
1554 rv = PR_FALSE;
1555 }
1556
1557 return rv;
1558 }
1559
1560 static PRBool
1561 test_multichars
1562 (
1563 void
1564 )
1565 {
1566 int i;
1567 unsigned int len, lenout;
1568 PRUint32 *ucs4s;
1569 char *ucs4_utf8;
1570 PRUint16 *ucs2s;
1571 char *ucs2_utf8;
1572 void *tmp;
1573 PRBool result;
1574
1575 ucs4s = (PRUint32 *)calloc(sizeof(ucs4)/sizeof(ucs4[0]), sizeof(PRUint32));
1576 ucs2s = (PRUint16 *)calloc(sizeof(ucs2)/sizeof(ucs2[0]), sizeof(PRUint16));
1577
1578 if( ((PRUint32 *)NULL == ucs4s) || ((PRUint16 *)NULL == ucs2s) ) {
1579 fprintf(stderr, "out of memory\n");
1580 exit(1);
1581 }
1582
1583 len = 0;
1584 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1585 ucs4s[i] = ucs4[i].c;
1586 len += strlen(ucs4[i].utf8);
1587 }
1588
1589 ucs4_utf8 = (char *)malloc(len);
1590
1591 len = 0;
1592 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1593 ucs2s[i] = ucs2[i].c;
1594 len += strlen(ucs2[i].utf8);
1595 }
1596
1597 ucs2_utf8 = (char *)malloc(len);
1598
1599 if( ((char *)NULL == ucs4_utf8) || ((char *)NULL == ucs2_utf8) ) {
1600 fprintf(stderr, "out of memory\n");
1601 exit(1);
1602 }
1603
1604 *ucs4_utf8 = '\0';
1605 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1606 strcat(ucs4_utf8, ucs4[i].utf8);
1607 }
1608
1609 *ucs2_utf8 = '\0';
1610 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1611 strcat(ucs2_utf8, ucs2[i].utf8);
1612 }
1613
1614 /* UTF-8 -> UCS-4 */
1615 len = sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32);
1616 tmp = calloc(len, 1);
1617 if( (void *)NULL == tmp ) {
1618 fprintf(stderr, "out of memory\n");
1619 exit(1);
1620 }
1621
1622 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1623 ucs4_utf8, strlen(ucs4_utf8), tmp, len, &lenout);
1624 if( !result ) {
1625 fprintf(stdout, "Failed to convert much UTF-8 to UCS-4\n");
1626 goto done;
1627 }
1628
1629 if( lenout != len ) {
1630 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-4\n");
1631 goto loser;
1632 }
1633
1634 if( 0 != memcmp(ucs4s, tmp, len) ) {
1635 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-4\n");
1636 goto loser;
1637 }
1638
1639 free(tmp); tmp = (void *)NULL;
1640
1641 /* UCS-4 -> UTF-8 */
1642 len = strlen(ucs4_utf8);
1643 tmp = calloc(len, 1);
1644 if( (void *)NULL == tmp ) {
1645 fprintf(stderr, "out of memory\n");
1646 exit(1);
1647 }
1648
1649 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1650 (unsigned char *)ucs4s, sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32),
1651 tmp, len, &lenout);
1652 if( !result ) {
1653 fprintf(stdout, "Failed to convert much UCS-4 to UTF-8\n");
1654 goto done;
1655 }
1656
1657 if( lenout != len ) {
1658 fprintf(stdout, "Unexpected length converting much UCS-4 to UTF-8\n");
1659 goto loser;
1660 }
1661
1662 if( 0 != strncmp(ucs4_utf8, tmp, len) ) {
1663 fprintf(stdout, "Wrong conversion of much UCS-4 to UTF-8\n");
1664 goto loser;
1665 }
1666
1667 free(tmp); tmp = (void *)NULL;
1668
1669 /* UTF-8 -> UCS-2 */
1670 len = sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16);
1671 tmp = calloc(len, 1);
1672 if( (void *)NULL == tmp ) {
1673 fprintf(stderr, "out of memory\n");
1674 exit(1);
1675 }
1676
1677 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1678 ucs2_utf8, strlen(ucs2_utf8), tmp, len, &lenout);
1679 if( !result ) {
1680 fprintf(stdout, "Failed to convert much UTF-8 to UCS-2\n");
1681 goto done;
1682 }
1683
1684 if( lenout != len ) {
1685 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-2\n");
1686 goto loser;
1687 }
1688
1689 if( 0 != memcmp(ucs2s, tmp, len) ) {
1690 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-2\n");
1691 goto loser;
1692 }
1693
1694 free(tmp); tmp = (void *)NULL;
1695
1696 /* UCS-2 -> UTF-8 */
1697 len = strlen(ucs2_utf8);
1698 tmp = calloc(len, 1);
1699 if( (void *)NULL == tmp ) {
1700 fprintf(stderr, "out of memory\n");
1701 exit(1);
1702 }
1703
1704 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1705 (unsigned char *)ucs2s, sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16),
1706 tmp, len, &lenout);
1707 if( !result ) {
1708 fprintf(stdout, "Failed to convert much UCS-2 to UTF-8\n");
1709 goto done;
1710 }
1711
1712 if( lenout != len ) {
1713 fprintf(stdout, "Unexpected length converting much UCS-2 to UTF-8\n");
1714 goto loser;
1715 }
1716
1717 if( 0 != strncmp(ucs2_utf8, tmp, len) ) {
1718 fprintf(stdout, "Wrong conversion of much UCS-2 to UTF-8\n");
1719 goto loser;
1720 }
1721
1722 /* implement UTF16 */
1723
1724 result = PR_TRUE;
1725 goto done;
1726
1727 loser:
1728 result = PR_FALSE;
1729 done:
1730 free(ucs4s);
1731 free(ucs4_utf8);
1732 free(ucs2s);
1733 free(ucs2_utf8);
1734 if( (void *)NULL != tmp ) free(tmp);
1735 return result;
1736 }
1737
1738 void
1739 byte_order
1740 (
1741 void
1742 )
1743 {
1744 /*
1745 * The implementation (now) expects the 16- and 32-bit characters
1746 * to be in network byte order, not host byte order. Therefore I
1747 * have to byteswap all those test vectors above. hton[ls] may be
1748 * functions, so I have to do this dynamically. If you want to
1749 * use this code to do host byte order conversions, just remove
1750 * the call in main() to this function.
1751 */
1752
1753 int i;
1754
1755 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1756 struct ucs4 *e = &ucs4[i];
1757 e->c = htonl(e->c);
1758 }
1759
1760 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1761 struct ucs2 *e = &ucs2[i];
1762 e->c = htons(e->c);
1763 }
1764
1765 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) {
1766 struct utf16 *e = &utf16[i];
1767 e->c = htonl(e->c);
1768 e->w[0] = htons(e->w[0]);
1769 e->w[1] = htons(e->w[1]);
1770 }
1771
1772 return;
1773 }
1774
1775 int
1776 main
1777 (
1778 int argc,
1779 char *argv[]
1780 )
1781 {
1782 byte_order();
1783
1784 if( test_ucs4_chars() &&
1785 test_ucs2_chars() &&
1786 test_utf16_chars() &&
1787 test_utf8_bad_chars() &&
1788 test_iso88591_chars() &&
1789 test_zeroes() &&
1790 test_multichars() &&
1791 PR_TRUE ) {
1792 fprintf(stderr, "PASS\n");
1793 return 1;
1794 } else {
1795 fprintf(stderr, "FAIL\n");
1796 return 0;
1797 }
1798 }
1799
1800 #endif /* TEST_UTF8 */
OLDNEW
« no previous file with comments | « mozilla/security/nss/lib/util/templates.c ('k') | mozilla/security/nss/lib/util/utilmod.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698