Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(71)

Side by Side Diff: nss/lib/util/utf8.c

Issue 2078763002: Delete bundled copy of NSS and replace with README. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/nss@master
Patch Set: Delete bundled copy of NSS and replace with README. Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « nss/lib/util/templates.c ('k') | nss/lib/util/utilmod.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 #include "seccomon.h"
6 #include "secport.h"
7
8 #ifdef TEST_UTF8
9 #include <assert.h>
10 #undef PORT_Assert
11 #define PORT_Assert assert
12 #endif
13
14 /*
15 * From RFC 2044:
16 *
17 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
18 * 0000 0000-0000 007F 0xxxxxxx
19 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
20 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
21 * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
22 * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
23 * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx
24 */
25
26 /*
27 * From http://www.imc.org/draft-hoffman-utf16
28 *
29 * For U on [0x00010000,0x0010FFFF]: Let U' = U - 0x00010000
30 *
31 * U' = yyyyyyyyyyxxxxxxxxxx
32 * W1 = 110110yyyyyyyyyy
33 * W2 = 110111xxxxxxxxxx
34 */
35
36 /*
37 * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit
38 * character values. If you wish to use this code for working with
39 * host byte order values, define the following:
40 *
41 * #if IS_BIG_ENDIAN
42 * #define L_0 0
43 * #define L_1 1
44 * #define L_2 2
45 * #define L_3 3
46 * #define H_0 0
47 * #define H_1 1
48 * #else / * not everyone has elif * /
49 * #if IS_LITTLE_ENDIAN
50 * #define L_0 3
51 * #define L_1 2
52 * #define L_2 1
53 * #define L_3 0
54 * #define H_0 1
55 * #define H_1 0
56 * #else
57 * #error "PDP and NUXI support deferred"
58 * #endif / * IS_LITTLE_ENDIAN * /
59 * #endif / * IS_BIG_ENDIAN * /
60 */
61
62 #define L_0 0
63 #define L_1 1
64 #define L_2 2
65 #define L_3 3
66 #define H_0 0
67 #define H_1 1
68
69 #define BAD_UTF8 ((PRUint32)-1)
70
71 /*
72 * Parse a single UTF-8 character per the spec. in section 3.9 (D36)
73 * of Unicode 4.0.0.
74 *
75 * Parameters:
76 * index - Points to the byte offset in inBuf of character to read. On success,
77 * updated to the offset of the following character.
78 * inBuf - Input buffer, UTF-8 encoded
79 * inbufLen - Length of input buffer, in bytes.
80 *
81 * Returns:
82 * Success - The UCS4 encoded character
83 * Failure - BAD_UTF8
84 */
85 static PRUint32
86 sec_port_read_utf8(unsigned int *index, unsigned char *inBuf, unsigned int inBuf Len)
87 {
88 PRUint32 result;
89 unsigned int i = *index;
90 int bytes_left;
91 PRUint32 min_value;
92
93 PORT_Assert(i < inBufLen);
94
95 if ( (inBuf[i] & 0x80) == 0x00 ) {
96 result = inBuf[i++];
97 bytes_left = 0;
98 min_value = 0;
99 } else if ( (inBuf[i] & 0xE0) == 0xC0 ) {
100 result = inBuf[i++] & 0x1F;
101 bytes_left = 1;
102 min_value = 0x80;
103 } else if ( (inBuf[i] & 0xF0) == 0xE0) {
104 result = inBuf[i++] & 0x0F;
105 bytes_left = 2;
106 min_value = 0x800;
107 } else if ( (inBuf[i] & 0xF8) == 0xF0) {
108 result = inBuf[i++] & 0x07;
109 bytes_left = 3;
110 min_value = 0x10000;
111 } else {
112 return BAD_UTF8;
113 }
114
115 while (bytes_left--) {
116 if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80) return BAD_UTF8;
117 result = (result << 6) | (inBuf[i++] & 0x3F);
118 }
119
120 /* Check for overlong sequences, surrogates, and outside unicode range */
121 if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF ) {
122 return BAD_UTF8;
123 }
124
125 *index = i;
126 return result;
127 }
128
129 PRBool
130 sec_port_ucs4_utf8_conversion_function
131 (
132 PRBool toUnicode,
133 unsigned char *inBuf,
134 unsigned int inBufLen,
135 unsigned char *outBuf,
136 unsigned int maxOutBufLen,
137 unsigned int *outBufLen
138 )
139 {
140 PORT_Assert((unsigned int *)NULL != outBufLen);
141
142 if( toUnicode ) {
143 unsigned int i, len = 0;
144
145 for( i = 0; i < inBufLen; ) {
146 if( (inBuf[i] & 0x80) == 0x00 ) i += 1;
147 else if( (inBuf[i] & 0xE0) == 0xC0 ) i += 2;
148 else if( (inBuf[i] & 0xF0) == 0xE0 ) i += 3;
149 else if( (inBuf[i] & 0xF8) == 0xF0 ) i += 4;
150 else return PR_FALSE;
151
152 len += 4;
153 }
154
155 if( len > maxOutBufLen ) {
156 *outBufLen = len;
157 return PR_FALSE;
158 }
159
160 len = 0;
161
162 for( i = 0; i < inBufLen; ) {
163 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
164
165 if (ucs4 == BAD_UTF8) return PR_FALSE;
166
167 outBuf[len+L_0] = 0x00;
168 outBuf[len+L_1] = (unsigned char)(ucs4 >> 16);
169 outBuf[len+L_2] = (unsigned char)(ucs4 >> 8);
170 outBuf[len+L_3] = (unsigned char)ucs4;
171
172 len += 4;
173 }
174
175 *outBufLen = len;
176 return PR_TRUE;
177 } else {
178 unsigned int i, len = 0;
179 PORT_Assert((inBufLen % 4) == 0);
180 if ((inBufLen % 4) != 0) {
181 *outBufLen = 0;
182 return PR_FALSE;
183 }
184
185 for( i = 0; i < inBufLen; i += 4 ) {
186 if( (inBuf[i+L_0] > 0x00) || (inBuf[i+L_1] > 0x10) ) {
187 *outBufLen = 0;
188 return PR_FALSE;
189 } else if( inBuf[i+L_1] >= 0x01 ) len += 4;
190 else if( inBuf[i+L_2] >= 0x08 ) len += 3;
191 else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) len += 2;
192 else len += 1;
193 }
194
195 if( len > maxOutBufLen ) {
196 *outBufLen = len;
197 return PR_FALSE;
198 }
199
200 len = 0;
201
202 for( i = 0; i < inBufLen; i += 4 ) {
203 if( inBuf[i+L_1] >= 0x01 ) {
204 /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
205 /* 00000000 000abcde fghijklm nopqrstu ->
206 11110abc 10defghi 10jklmno 10pqrstu */
207
208 outBuf[len+0] = 0xF0 | ((inBuf[i+L_1] & 0x1C) >> 2);
209 outBuf[len+1] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4)
210 | ((inBuf[i+L_2] & 0xF0) >> 4);
211 outBuf[len+2] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
212 | ((inBuf[i+L_3] & 0xC0) >> 6);
213 outBuf[len+3] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
214
215 len += 4;
216 } else if( inBuf[i+L_2] >= 0x08 ) {
217 /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
218 /* 00000000 00000000 abcdefgh ijklmnop ->
219 1110abcd 10efghij 10klmnop */
220
221 outBuf[len+0] = 0xE0 | ((inBuf[i+L_2] & 0xF0) >> 4);
222 outBuf[len+1] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
223 | ((inBuf[i+L_3] & 0xC0) >> 6);
224 outBuf[len+2] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
225
226 len += 3;
227 } else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) {
228 /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */
229 /* 00000000 00000000 00000abc defghijk ->
230 110abcde 10fghijk */
231
232 outBuf[len+0] = 0xC0 | ((inBuf[i+L_2] & 0x07) << 2)
233 | ((inBuf[i+L_3] & 0xC0) >> 6);
234 outBuf[len+1] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
235
236 len += 2;
237 } else {
238 /* 0000 0000-0000 007F -> 0xxxxxx */
239 /* 00000000 00000000 00000000 0abcdefg ->
240 0abcdefg */
241
242 outBuf[len+0] = (inBuf[i+L_3] & 0x7F);
243
244 len += 1;
245 }
246 }
247
248 *outBufLen = len;
249 return PR_TRUE;
250 }
251 }
252
253 PRBool
254 sec_port_ucs2_utf8_conversion_function
255 (
256 PRBool toUnicode,
257 unsigned char *inBuf,
258 unsigned int inBufLen,
259 unsigned char *outBuf,
260 unsigned int maxOutBufLen,
261 unsigned int *outBufLen
262 )
263 {
264 PORT_Assert((unsigned int *)NULL != outBufLen);
265
266 if( toUnicode ) {
267 unsigned int i, len = 0;
268
269 for( i = 0; i < inBufLen; ) {
270 if( (inBuf[i] & 0x80) == 0x00 ) {
271 i += 1;
272 len += 2;
273 } else if( (inBuf[i] & 0xE0) == 0xC0 ) {
274 i += 2;
275 len += 2;
276 } else if( (inBuf[i] & 0xF0) == 0xE0 ) {
277 i += 3;
278 len += 2;
279 } else if( (inBuf[i] & 0xF8) == 0xF0 ) {
280 i += 4;
281 len += 4;
282 } else return PR_FALSE;
283 }
284
285 if( len > maxOutBufLen ) {
286 *outBufLen = len;
287 return PR_FALSE;
288 }
289
290 len = 0;
291
292 for( i = 0; i < inBufLen; ) {
293 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
294
295 if (ucs4 == BAD_UTF8) return PR_FALSE;
296
297 if( ucs4 < 0x10000) {
298 outBuf[len+H_0] = (unsigned char)(ucs4 >> 8);
299 outBuf[len+H_1] = (unsigned char)ucs4;
300 len += 2;
301 } else {
302 ucs4 -= 0x10000;
303 outBuf[len+0+H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3));
304 outBuf[len+0+H_1] = (unsigned char)(ucs4 >> 10);
305 outBuf[len+2+H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3));
306 outBuf[len+2+H_1] = (unsigned char)ucs4;
307 len += 4;
308 }
309 }
310
311 *outBufLen = len;
312 return PR_TRUE;
313 } else {
314 unsigned int i, len = 0;
315 PORT_Assert((inBufLen % 2) == 0);
316 if ((inBufLen % 2) != 0) {
317 *outBufLen = 0;
318 return PR_FALSE;
319 }
320
321 for( i = 0; i < inBufLen; i += 2 ) {
322 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) len += 1;
323 else if( inBuf[i+H_0] < 0x08 ) len += 2;
324 else if( ((inBuf[i+0+H_0] & 0xFC) == 0xD8) ) {
325 if( ((inBufLen - i) > 2) && ((inBuf[i+2+H_0] & 0xFC) == 0xDC) ) {
326 i += 2;
327 len += 4;
328 } else {
329 return PR_FALSE;
330 }
331 }
332 else len += 3;
333 }
334
335 if( len > maxOutBufLen ) {
336 *outBufLen = len;
337 return PR_FALSE;
338 }
339
340 len = 0;
341
342 for( i = 0; i < inBufLen; i += 2 ) {
343 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) {
344 /* 0000-007F -> 0xxxxxx */
345 /* 00000000 0abcdefg -> 0abcdefg */
346
347 outBuf[len] = inBuf[i+H_1] & 0x7F;
348
349 len += 1;
350 } else if( inBuf[i+H_0] < 0x08 ) {
351 /* 0080-07FF -> 110xxxxx 10xxxxxx */
352 /* 00000abc defghijk -> 110abcde 10fghijk */
353
354 outBuf[len+0] = 0xC0 | ((inBuf[i+H_0] & 0x07) << 2)
355 | ((inBuf[i+H_1] & 0xC0) >> 6);
356 outBuf[len+1] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);
357
358 len += 2;
359 } else if( (inBuf[i+H_0] & 0xFC) == 0xD8 ) {
360 int abcde, BCDE;
361
362 PORT_Assert(((inBufLen - i) > 2) && ((inBuf[i+2+H_0] & 0xFC) == 0xDC) );
363
364 /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
365 /* 110110BC DEfghijk 110111lm nopqrstu ->
366 { Let abcde = BCDE + 1 }
367 11110abc 10defghi 10jklmno 10pqrstu */
368
369 BCDE = ((inBuf[i+H_0] & 0x03) << 2) | ((inBuf[i+H_1] & 0xC0) >> 6);
370 abcde = BCDE + 1;
371
372 outBuf[len+0] = 0xF0 | ((abcde & 0x1C) >> 2);
373 outBuf[len+1] = 0x80 | ((abcde & 0x03) << 4)
374 | ((inBuf[i+0+H_1] & 0x3C) >> 2);
375 outBuf[len+2] = 0x80 | ((inBuf[i+0+H_1] & 0x03) << 4)
376 | ((inBuf[i+2+H_0] & 0x03) << 2)
377 | ((inBuf[i+2+H_1] & 0xC0) >> 6);
378 outBuf[len+3] = 0x80 | ((inBuf[i+2+H_1] & 0x3F) >> 0);
379
380 i += 2;
381 len += 4;
382 } else {
383 /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
384 /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */
385
386 outBuf[len+0] = 0xE0 | ((inBuf[i+H_0] & 0xF0) >> 4);
387 outBuf[len+1] = 0x80 | ((inBuf[i+H_0] & 0x0F) << 2)
388 | ((inBuf[i+H_1] & 0xC0) >> 6);
389 outBuf[len+2] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);
390
391 len += 3;
392 }
393 }
394
395 *outBufLen = len;
396 return PR_TRUE;
397 }
398 }
399
400 PRBool
401 sec_port_iso88591_utf8_conversion_function
402 (
403 const unsigned char *inBuf,
404 unsigned int inBufLen,
405 unsigned char *outBuf,
406 unsigned int maxOutBufLen,
407 unsigned int *outBufLen
408 )
409 {
410 unsigned int i, len = 0;
411
412 PORT_Assert((unsigned int *)NULL != outBufLen);
413
414 for( i = 0; i < inBufLen; i++) {
415 if( (inBuf[i] & 0x80) == 0x00 ) len += 1;
416 else len += 2;
417 }
418
419 if( len > maxOutBufLen ) {
420 *outBufLen = len;
421 return PR_FALSE;
422 }
423
424 len = 0;
425
426 for( i = 0; i < inBufLen; i++) {
427 if( (inBuf[i] & 0x80) == 0x00 ) {
428 /* 00-7F -> 0xxxxxxx */
429 /* 0abcdefg -> 0abcdefg */
430
431 outBuf[len] = inBuf[i];
432 len += 1;
433 } else {
434 /* 80-FF <- 110xxxxx 10xxxxxx */
435 /* 00000000 abcdefgh -> 110000ab 10cdefgh */
436
437 outBuf[len+0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6);
438 outBuf[len+1] = 0x80 | ((inBuf[i] & 0x3F) >> 0);
439
440 len += 2;
441 }
442 }
443
444 *outBufLen = len;
445 return PR_TRUE;
446 }
447
448 #ifdef TEST_UTF8
449
450 #include <stdio.h>
451 #include <string.h>
452 #include <stdlib.h>
453 #include <netinet/in.h> /* for htonl and htons */
454
455 /*
456 * UCS-4 vectors
457 */
458
459 struct ucs4 {
460 PRUint32 c;
461 char *utf8;
462 };
463
464 /*
465 * UCS-2 vectors
466 */
467
468 struct ucs2 {
469 PRUint16 c;
470 char *utf8;
471 };
472
473 /*
474 * UTF-16 vectors
475 */
476
477 struct utf16 {
478 PRUint32 c;
479 PRUint16 w[2];
480 };
481
482
483 /*
484 * UCS-4 vectors
485 */
486
487 struct ucs4 ucs4[] = {
488 { 0x00000001, "\x01" },
489 { 0x00000002, "\x02" },
490 { 0x00000003, "\x03" },
491 { 0x00000004, "\x04" },
492 { 0x00000007, "\x07" },
493 { 0x00000008, "\x08" },
494 { 0x0000000F, "\x0F" },
495 { 0x00000010, "\x10" },
496 { 0x0000001F, "\x1F" },
497 { 0x00000020, "\x20" },
498 { 0x0000003F, "\x3F" },
499 { 0x00000040, "\x40" },
500 { 0x0000007F, "\x7F" },
501
502 { 0x00000080, "\xC2\x80" },
503 { 0x00000081, "\xC2\x81" },
504 { 0x00000082, "\xC2\x82" },
505 { 0x00000084, "\xC2\x84" },
506 { 0x00000088, "\xC2\x88" },
507 { 0x00000090, "\xC2\x90" },
508 { 0x000000A0, "\xC2\xA0" },
509 { 0x000000C0, "\xC3\x80" },
510 { 0x000000FF, "\xC3\xBF" },
511 { 0x00000100, "\xC4\x80" },
512 { 0x00000101, "\xC4\x81" },
513 { 0x00000102, "\xC4\x82" },
514 { 0x00000104, "\xC4\x84" },
515 { 0x00000108, "\xC4\x88" },
516 { 0x00000110, "\xC4\x90" },
517 { 0x00000120, "\xC4\xA0" },
518 { 0x00000140, "\xC5\x80" },
519 { 0x00000180, "\xC6\x80" },
520 { 0x000001FF, "\xC7\xBF" },
521 { 0x00000200, "\xC8\x80" },
522 { 0x00000201, "\xC8\x81" },
523 { 0x00000202, "\xC8\x82" },
524 { 0x00000204, "\xC8\x84" },
525 { 0x00000208, "\xC8\x88" },
526 { 0x00000210, "\xC8\x90" },
527 { 0x00000220, "\xC8\xA0" },
528 { 0x00000240, "\xC9\x80" },
529 { 0x00000280, "\xCA\x80" },
530 { 0x00000300, "\xCC\x80" },
531 { 0x000003FF, "\xCF\xBF" },
532 { 0x00000400, "\xD0\x80" },
533 { 0x00000401, "\xD0\x81" },
534 { 0x00000402, "\xD0\x82" },
535 { 0x00000404, "\xD0\x84" },
536 { 0x00000408, "\xD0\x88" },
537 { 0x00000410, "\xD0\x90" },
538 { 0x00000420, "\xD0\xA0" },
539 { 0x00000440, "\xD1\x80" },
540 { 0x00000480, "\xD2\x80" },
541 { 0x00000500, "\xD4\x80" },
542 { 0x00000600, "\xD8\x80" },
543 { 0x000007FF, "\xDF\xBF" },
544
545 { 0x00000800, "\xE0\xA0\x80" },
546 { 0x00000801, "\xE0\xA0\x81" },
547 { 0x00000802, "\xE0\xA0\x82" },
548 { 0x00000804, "\xE0\xA0\x84" },
549 { 0x00000808, "\xE0\xA0\x88" },
550 { 0x00000810, "\xE0\xA0\x90" },
551 { 0x00000820, "\xE0\xA0\xA0" },
552 { 0x00000840, "\xE0\xA1\x80" },
553 { 0x00000880, "\xE0\xA2\x80" },
554 { 0x00000900, "\xE0\xA4\x80" },
555 { 0x00000A00, "\xE0\xA8\x80" },
556 { 0x00000C00, "\xE0\xB0\x80" },
557 { 0x00000FFF, "\xE0\xBF\xBF" },
558 { 0x00001000, "\xE1\x80\x80" },
559 { 0x00001001, "\xE1\x80\x81" },
560 { 0x00001002, "\xE1\x80\x82" },
561 { 0x00001004, "\xE1\x80\x84" },
562 { 0x00001008, "\xE1\x80\x88" },
563 { 0x00001010, "\xE1\x80\x90" },
564 { 0x00001020, "\xE1\x80\xA0" },
565 { 0x00001040, "\xE1\x81\x80" },
566 { 0x00001080, "\xE1\x82\x80" },
567 { 0x00001100, "\xE1\x84\x80" },
568 { 0x00001200, "\xE1\x88\x80" },
569 { 0x00001400, "\xE1\x90\x80" },
570 { 0x00001800, "\xE1\xA0\x80" },
571 { 0x00001FFF, "\xE1\xBF\xBF" },
572 { 0x00002000, "\xE2\x80\x80" },
573 { 0x00002001, "\xE2\x80\x81" },
574 { 0x00002002, "\xE2\x80\x82" },
575 { 0x00002004, "\xE2\x80\x84" },
576 { 0x00002008, "\xE2\x80\x88" },
577 { 0x00002010, "\xE2\x80\x90" },
578 { 0x00002020, "\xE2\x80\xA0" },
579 { 0x00002040, "\xE2\x81\x80" },
580 { 0x00002080, "\xE2\x82\x80" },
581 { 0x00002100, "\xE2\x84\x80" },
582 { 0x00002200, "\xE2\x88\x80" },
583 { 0x00002400, "\xE2\x90\x80" },
584 { 0x00002800, "\xE2\xA0\x80" },
585 { 0x00003000, "\xE3\x80\x80" },
586 { 0x00003FFF, "\xE3\xBF\xBF" },
587 { 0x00004000, "\xE4\x80\x80" },
588 { 0x00004001, "\xE4\x80\x81" },
589 { 0x00004002, "\xE4\x80\x82" },
590 { 0x00004004, "\xE4\x80\x84" },
591 { 0x00004008, "\xE4\x80\x88" },
592 { 0x00004010, "\xE4\x80\x90" },
593 { 0x00004020, "\xE4\x80\xA0" },
594 { 0x00004040, "\xE4\x81\x80" },
595 { 0x00004080, "\xE4\x82\x80" },
596 { 0x00004100, "\xE4\x84\x80" },
597 { 0x00004200, "\xE4\x88\x80" },
598 { 0x00004400, "\xE4\x90\x80" },
599 { 0x00004800, "\xE4\xA0\x80" },
600 { 0x00005000, "\xE5\x80\x80" },
601 { 0x00006000, "\xE6\x80\x80" },
602 { 0x00007FFF, "\xE7\xBF\xBF" },
603 { 0x00008000, "\xE8\x80\x80" },
604 { 0x00008001, "\xE8\x80\x81" },
605 { 0x00008002, "\xE8\x80\x82" },
606 { 0x00008004, "\xE8\x80\x84" },
607 { 0x00008008, "\xE8\x80\x88" },
608 { 0x00008010, "\xE8\x80\x90" },
609 { 0x00008020, "\xE8\x80\xA0" },
610 { 0x00008040, "\xE8\x81\x80" },
611 { 0x00008080, "\xE8\x82\x80" },
612 { 0x00008100, "\xE8\x84\x80" },
613 { 0x00008200, "\xE8\x88\x80" },
614 { 0x00008400, "\xE8\x90\x80" },
615 { 0x00008800, "\xE8\xA0\x80" },
616 { 0x00009000, "\xE9\x80\x80" },
617 { 0x0000A000, "\xEA\x80\x80" },
618 { 0x0000C000, "\xEC\x80\x80" },
619 { 0x0000FFFF, "\xEF\xBF\xBF" },
620
621 { 0x00010000, "\xF0\x90\x80\x80" },
622 { 0x00010001, "\xF0\x90\x80\x81" },
623 { 0x00010002, "\xF0\x90\x80\x82" },
624 { 0x00010004, "\xF0\x90\x80\x84" },
625 { 0x00010008, "\xF0\x90\x80\x88" },
626 { 0x00010010, "\xF0\x90\x80\x90" },
627 { 0x00010020, "\xF0\x90\x80\xA0" },
628 { 0x00010040, "\xF0\x90\x81\x80" },
629 { 0x00010080, "\xF0\x90\x82\x80" },
630 { 0x00010100, "\xF0\x90\x84\x80" },
631 { 0x00010200, "\xF0\x90\x88\x80" },
632 { 0x00010400, "\xF0\x90\x90\x80" },
633 { 0x00010800, "\xF0\x90\xA0\x80" },
634 { 0x00011000, "\xF0\x91\x80\x80" },
635 { 0x00012000, "\xF0\x92\x80\x80" },
636 { 0x00014000, "\xF0\x94\x80\x80" },
637 { 0x00018000, "\xF0\x98\x80\x80" },
638 { 0x0001FFFF, "\xF0\x9F\xBF\xBF" },
639 { 0x00020000, "\xF0\xA0\x80\x80" },
640 { 0x00020001, "\xF0\xA0\x80\x81" },
641 { 0x00020002, "\xF0\xA0\x80\x82" },
642 { 0x00020004, "\xF0\xA0\x80\x84" },
643 { 0x00020008, "\xF0\xA0\x80\x88" },
644 { 0x00020010, "\xF0\xA0\x80\x90" },
645 { 0x00020020, "\xF0\xA0\x80\xA0" },
646 { 0x00020040, "\xF0\xA0\x81\x80" },
647 { 0x00020080, "\xF0\xA0\x82\x80" },
648 { 0x00020100, "\xF0\xA0\x84\x80" },
649 { 0x00020200, "\xF0\xA0\x88\x80" },
650 { 0x00020400, "\xF0\xA0\x90\x80" },
651 { 0x00020800, "\xF0\xA0\xA0\x80" },
652 { 0x00021000, "\xF0\xA1\x80\x80" },
653 { 0x00022000, "\xF0\xA2\x80\x80" },
654 { 0x00024000, "\xF0\xA4\x80\x80" },
655 { 0x00028000, "\xF0\xA8\x80\x80" },
656 { 0x00030000, "\xF0\xB0\x80\x80" },
657 { 0x0003FFFF, "\xF0\xBF\xBF\xBF" },
658 { 0x00040000, "\xF1\x80\x80\x80" },
659 { 0x00040001, "\xF1\x80\x80\x81" },
660 { 0x00040002, "\xF1\x80\x80\x82" },
661 { 0x00040004, "\xF1\x80\x80\x84" },
662 { 0x00040008, "\xF1\x80\x80\x88" },
663 { 0x00040010, "\xF1\x80\x80\x90" },
664 { 0x00040020, "\xF1\x80\x80\xA0" },
665 { 0x00040040, "\xF1\x80\x81\x80" },
666 { 0x00040080, "\xF1\x80\x82\x80" },
667 { 0x00040100, "\xF1\x80\x84\x80" },
668 { 0x00040200, "\xF1\x80\x88\x80" },
669 { 0x00040400, "\xF1\x80\x90\x80" },
670 { 0x00040800, "\xF1\x80\xA0\x80" },
671 { 0x00041000, "\xF1\x81\x80\x80" },
672 { 0x00042000, "\xF1\x82\x80\x80" },
673 { 0x00044000, "\xF1\x84\x80\x80" },
674 { 0x00048000, "\xF1\x88\x80\x80" },
675 { 0x00050000, "\xF1\x90\x80\x80" },
676 { 0x00060000, "\xF1\xA0\x80\x80" },
677 { 0x0007FFFF, "\xF1\xBF\xBF\xBF" },
678 { 0x00080000, "\xF2\x80\x80\x80" },
679 { 0x00080001, "\xF2\x80\x80\x81" },
680 { 0x00080002, "\xF2\x80\x80\x82" },
681 { 0x00080004, "\xF2\x80\x80\x84" },
682 { 0x00080008, "\xF2\x80\x80\x88" },
683 { 0x00080010, "\xF2\x80\x80\x90" },
684 { 0x00080020, "\xF2\x80\x80\xA0" },
685 { 0x00080040, "\xF2\x80\x81\x80" },
686 { 0x00080080, "\xF2\x80\x82\x80" },
687 { 0x00080100, "\xF2\x80\x84\x80" },
688 { 0x00080200, "\xF2\x80\x88\x80" },
689 { 0x00080400, "\xF2\x80\x90\x80" },
690 { 0x00080800, "\xF2\x80\xA0\x80" },
691 { 0x00081000, "\xF2\x81\x80\x80" },
692 { 0x00082000, "\xF2\x82\x80\x80" },
693 { 0x00084000, "\xF2\x84\x80\x80" },
694 { 0x00088000, "\xF2\x88\x80\x80" },
695 { 0x00090000, "\xF2\x90\x80\x80" },
696 { 0x000A0000, "\xF2\xA0\x80\x80" },
697 { 0x000C0000, "\xF3\x80\x80\x80" },
698 { 0x000FFFFF, "\xF3\xBF\xBF\xBF" },
699 { 0x00100000, "\xF4\x80\x80\x80" },
700 { 0x00100001, "\xF4\x80\x80\x81" },
701 { 0x00100002, "\xF4\x80\x80\x82" },
702 { 0x00100004, "\xF4\x80\x80\x84" },
703 { 0x00100008, "\xF4\x80\x80\x88" },
704 { 0x00100010, "\xF4\x80\x80\x90" },
705 { 0x00100020, "\xF4\x80\x80\xA0" },
706 { 0x00100040, "\xF4\x80\x81\x80" },
707 { 0x00100080, "\xF4\x80\x82\x80" },
708 { 0x00100100, "\xF4\x80\x84\x80" },
709 { 0x00100200, "\xF4\x80\x88\x80" },
710 { 0x00100400, "\xF4\x80\x90\x80" },
711 { 0x00100800, "\xF4\x80\xA0\x80" },
712 { 0x00101000, "\xF4\x81\x80\x80" },
713 { 0x00102000, "\xF4\x82\x80\x80" },
714 { 0x00104000, "\xF4\x84\x80\x80" },
715 { 0x00108000, "\xF4\x88\x80\x80" },
716 { 0x0010FFFF, "\xF4\x8F\xBF\xBF" },
717 };
718
719 /*
720 * UCS-2 vectors
721 */
722
723 struct ucs2 ucs2[] = {
724 { 0x0001, "\x01" },
725 { 0x0002, "\x02" },
726 { 0x0003, "\x03" },
727 { 0x0004, "\x04" },
728 { 0x0007, "\x07" },
729 { 0x0008, "\x08" },
730 { 0x000F, "\x0F" },
731 { 0x0010, "\x10" },
732 { 0x001F, "\x1F" },
733 { 0x0020, "\x20" },
734 { 0x003F, "\x3F" },
735 { 0x0040, "\x40" },
736 { 0x007F, "\x7F" },
737
738 { 0x0080, "\xC2\x80" },
739 { 0x0081, "\xC2\x81" },
740 { 0x0082, "\xC2\x82" },
741 { 0x0084, "\xC2\x84" },
742 { 0x0088, "\xC2\x88" },
743 { 0x0090, "\xC2\x90" },
744 { 0x00A0, "\xC2\xA0" },
745 { 0x00C0, "\xC3\x80" },
746 { 0x00FF, "\xC3\xBF" },
747 { 0x0100, "\xC4\x80" },
748 { 0x0101, "\xC4\x81" },
749 { 0x0102, "\xC4\x82" },
750 { 0x0104, "\xC4\x84" },
751 { 0x0108, "\xC4\x88" },
752 { 0x0110, "\xC4\x90" },
753 { 0x0120, "\xC4\xA0" },
754 { 0x0140, "\xC5\x80" },
755 { 0x0180, "\xC6\x80" },
756 { 0x01FF, "\xC7\xBF" },
757 { 0x0200, "\xC8\x80" },
758 { 0x0201, "\xC8\x81" },
759 { 0x0202, "\xC8\x82" },
760 { 0x0204, "\xC8\x84" },
761 { 0x0208, "\xC8\x88" },
762 { 0x0210, "\xC8\x90" },
763 { 0x0220, "\xC8\xA0" },
764 { 0x0240, "\xC9\x80" },
765 { 0x0280, "\xCA\x80" },
766 { 0x0300, "\xCC\x80" },
767 { 0x03FF, "\xCF\xBF" },
768 { 0x0400, "\xD0\x80" },
769 { 0x0401, "\xD0\x81" },
770 { 0x0402, "\xD0\x82" },
771 { 0x0404, "\xD0\x84" },
772 { 0x0408, "\xD0\x88" },
773 { 0x0410, "\xD0\x90" },
774 { 0x0420, "\xD0\xA0" },
775 { 0x0440, "\xD1\x80" },
776 { 0x0480, "\xD2\x80" },
777 { 0x0500, "\xD4\x80" },
778 { 0x0600, "\xD8\x80" },
779 { 0x07FF, "\xDF\xBF" },
780
781 { 0x0800, "\xE0\xA0\x80" },
782 { 0x0801, "\xE0\xA0\x81" },
783 { 0x0802, "\xE0\xA0\x82" },
784 { 0x0804, "\xE0\xA0\x84" },
785 { 0x0808, "\xE0\xA0\x88" },
786 { 0x0810, "\xE0\xA0\x90" },
787 { 0x0820, "\xE0\xA0\xA0" },
788 { 0x0840, "\xE0\xA1\x80" },
789 { 0x0880, "\xE0\xA2\x80" },
790 { 0x0900, "\xE0\xA4\x80" },
791 { 0x0A00, "\xE0\xA8\x80" },
792 { 0x0C00, "\xE0\xB0\x80" },
793 { 0x0FFF, "\xE0\xBF\xBF" },
794 { 0x1000, "\xE1\x80\x80" },
795 { 0x1001, "\xE1\x80\x81" },
796 { 0x1002, "\xE1\x80\x82" },
797 { 0x1004, "\xE1\x80\x84" },
798 { 0x1008, "\xE1\x80\x88" },
799 { 0x1010, "\xE1\x80\x90" },
800 { 0x1020, "\xE1\x80\xA0" },
801 { 0x1040, "\xE1\x81\x80" },
802 { 0x1080, "\xE1\x82\x80" },
803 { 0x1100, "\xE1\x84\x80" },
804 { 0x1200, "\xE1\x88\x80" },
805 { 0x1400, "\xE1\x90\x80" },
806 { 0x1800, "\xE1\xA0\x80" },
807 { 0x1FFF, "\xE1\xBF\xBF" },
808 { 0x2000, "\xE2\x80\x80" },
809 { 0x2001, "\xE2\x80\x81" },
810 { 0x2002, "\xE2\x80\x82" },
811 { 0x2004, "\xE2\x80\x84" },
812 { 0x2008, "\xE2\x80\x88" },
813 { 0x2010, "\xE2\x80\x90" },
814 { 0x2020, "\xE2\x80\xA0" },
815 { 0x2040, "\xE2\x81\x80" },
816 { 0x2080, "\xE2\x82\x80" },
817 { 0x2100, "\xE2\x84\x80" },
818 { 0x2200, "\xE2\x88\x80" },
819 { 0x2400, "\xE2\x90\x80" },
820 { 0x2800, "\xE2\xA0\x80" },
821 { 0x3000, "\xE3\x80\x80" },
822 { 0x3FFF, "\xE3\xBF\xBF" },
823 { 0x4000, "\xE4\x80\x80" },
824 { 0x4001, "\xE4\x80\x81" },
825 { 0x4002, "\xE4\x80\x82" },
826 { 0x4004, "\xE4\x80\x84" },
827 { 0x4008, "\xE4\x80\x88" },
828 { 0x4010, "\xE4\x80\x90" },
829 { 0x4020, "\xE4\x80\xA0" },
830 { 0x4040, "\xE4\x81\x80" },
831 { 0x4080, "\xE4\x82\x80" },
832 { 0x4100, "\xE4\x84\x80" },
833 { 0x4200, "\xE4\x88\x80" },
834 { 0x4400, "\xE4\x90\x80" },
835 { 0x4800, "\xE4\xA0\x80" },
836 { 0x5000, "\xE5\x80\x80" },
837 { 0x6000, "\xE6\x80\x80" },
838 { 0x7FFF, "\xE7\xBF\xBF" },
839 { 0x8000, "\xE8\x80\x80" },
840 { 0x8001, "\xE8\x80\x81" },
841 { 0x8002, "\xE8\x80\x82" },
842 { 0x8004, "\xE8\x80\x84" },
843 { 0x8008, "\xE8\x80\x88" },
844 { 0x8010, "\xE8\x80\x90" },
845 { 0x8020, "\xE8\x80\xA0" },
846 { 0x8040, "\xE8\x81\x80" },
847 { 0x8080, "\xE8\x82\x80" },
848 { 0x8100, "\xE8\x84\x80" },
849 { 0x8200, "\xE8\x88\x80" },
850 { 0x8400, "\xE8\x90\x80" },
851 { 0x8800, "\xE8\xA0\x80" },
852 { 0x9000, "\xE9\x80\x80" },
853 { 0xA000, "\xEA\x80\x80" },
854 { 0xC000, "\xEC\x80\x80" },
855 { 0xFB01, "\xEF\xAC\x81" },
856 { 0xFFFF, "\xEF\xBF\xBF" }
857
858 };
859
860 /*
861 * UTF-16 vectors
862 */
863
864 struct utf16 utf16[] = {
865 { 0x00010000, { 0xD800, 0xDC00 } },
866 { 0x00010001, { 0xD800, 0xDC01 } },
867 { 0x00010002, { 0xD800, 0xDC02 } },
868 { 0x00010003, { 0xD800, 0xDC03 } },
869 { 0x00010004, { 0xD800, 0xDC04 } },
870 { 0x00010007, { 0xD800, 0xDC07 } },
871 { 0x00010008, { 0xD800, 0xDC08 } },
872 { 0x0001000F, { 0xD800, 0xDC0F } },
873 { 0x00010010, { 0xD800, 0xDC10 } },
874 { 0x0001001F, { 0xD800, 0xDC1F } },
875 { 0x00010020, { 0xD800, 0xDC20 } },
876 { 0x0001003F, { 0xD800, 0xDC3F } },
877 { 0x00010040, { 0xD800, 0xDC40 } },
878 { 0x0001007F, { 0xD800, 0xDC7F } },
879 { 0x00010080, { 0xD800, 0xDC80 } },
880 { 0x00010081, { 0xD800, 0xDC81 } },
881 { 0x00010082, { 0xD800, 0xDC82 } },
882 { 0x00010084, { 0xD800, 0xDC84 } },
883 { 0x00010088, { 0xD800, 0xDC88 } },
884 { 0x00010090, { 0xD800, 0xDC90 } },
885 { 0x000100A0, { 0xD800, 0xDCA0 } },
886 { 0x000100C0, { 0xD800, 0xDCC0 } },
887 { 0x000100FF, { 0xD800, 0xDCFF } },
888 { 0x00010100, { 0xD800, 0xDD00 } },
889 { 0x00010101, { 0xD800, 0xDD01 } },
890 { 0x00010102, { 0xD800, 0xDD02 } },
891 { 0x00010104, { 0xD800, 0xDD04 } },
892 { 0x00010108, { 0xD800, 0xDD08 } },
893 { 0x00010110, { 0xD800, 0xDD10 } },
894 { 0x00010120, { 0xD800, 0xDD20 } },
895 { 0x00010140, { 0xD800, 0xDD40 } },
896 { 0x00010180, { 0xD800, 0xDD80 } },
897 { 0x000101FF, { 0xD800, 0xDDFF } },
898 { 0x00010200, { 0xD800, 0xDE00 } },
899 { 0x00010201, { 0xD800, 0xDE01 } },
900 { 0x00010202, { 0xD800, 0xDE02 } },
901 { 0x00010204, { 0xD800, 0xDE04 } },
902 { 0x00010208, { 0xD800, 0xDE08 } },
903 { 0x00010210, { 0xD800, 0xDE10 } },
904 { 0x00010220, { 0xD800, 0xDE20 } },
905 { 0x00010240, { 0xD800, 0xDE40 } },
906 { 0x00010280, { 0xD800, 0xDE80 } },
907 { 0x00010300, { 0xD800, 0xDF00 } },
908 { 0x000103FF, { 0xD800, 0xDFFF } },
909 { 0x00010400, { 0xD801, 0xDC00 } },
910 { 0x00010401, { 0xD801, 0xDC01 } },
911 { 0x00010402, { 0xD801, 0xDC02 } },
912 { 0x00010404, { 0xD801, 0xDC04 } },
913 { 0x00010408, { 0xD801, 0xDC08 } },
914 { 0x00010410, { 0xD801, 0xDC10 } },
915 { 0x00010420, { 0xD801, 0xDC20 } },
916 { 0x00010440, { 0xD801, 0xDC40 } },
917 { 0x00010480, { 0xD801, 0xDC80 } },
918 { 0x00010500, { 0xD801, 0xDD00 } },
919 { 0x00010600, { 0xD801, 0xDE00 } },
920 { 0x000107FF, { 0xD801, 0xDFFF } },
921 { 0x00010800, { 0xD802, 0xDC00 } },
922 { 0x00010801, { 0xD802, 0xDC01 } },
923 { 0x00010802, { 0xD802, 0xDC02 } },
924 { 0x00010804, { 0xD802, 0xDC04 } },
925 { 0x00010808, { 0xD802, 0xDC08 } },
926 { 0x00010810, { 0xD802, 0xDC10 } },
927 { 0x00010820, { 0xD802, 0xDC20 } },
928 { 0x00010840, { 0xD802, 0xDC40 } },
929 { 0x00010880, { 0xD802, 0xDC80 } },
930 { 0x00010900, { 0xD802, 0xDD00 } },
931 { 0x00010A00, { 0xD802, 0xDE00 } },
932 { 0x00010C00, { 0xD803, 0xDC00 } },
933 { 0x00010FFF, { 0xD803, 0xDFFF } },
934 { 0x00011000, { 0xD804, 0xDC00 } },
935 { 0x00011001, { 0xD804, 0xDC01 } },
936 { 0x00011002, { 0xD804, 0xDC02 } },
937 { 0x00011004, { 0xD804, 0xDC04 } },
938 { 0x00011008, { 0xD804, 0xDC08 } },
939 { 0x00011010, { 0xD804, 0xDC10 } },
940 { 0x00011020, { 0xD804, 0xDC20 } },
941 { 0x00011040, { 0xD804, 0xDC40 } },
942 { 0x00011080, { 0xD804, 0xDC80 } },
943 { 0x00011100, { 0xD804, 0xDD00 } },
944 { 0x00011200, { 0xD804, 0xDE00 } },
945 { 0x00011400, { 0xD805, 0xDC00 } },
946 { 0x00011800, { 0xD806, 0xDC00 } },
947 { 0x00011FFF, { 0xD807, 0xDFFF } },
948 { 0x00012000, { 0xD808, 0xDC00 } },
949 { 0x00012001, { 0xD808, 0xDC01 } },
950 { 0x00012002, { 0xD808, 0xDC02 } },
951 { 0x00012004, { 0xD808, 0xDC04 } },
952 { 0x00012008, { 0xD808, 0xDC08 } },
953 { 0x00012010, { 0xD808, 0xDC10 } },
954 { 0x00012020, { 0xD808, 0xDC20 } },
955 { 0x00012040, { 0xD808, 0xDC40 } },
956 { 0x00012080, { 0xD808, 0xDC80 } },
957 { 0x00012100, { 0xD808, 0xDD00 } },
958 { 0x00012200, { 0xD808, 0xDE00 } },
959 { 0x00012400, { 0xD809, 0xDC00 } },
960 { 0x00012800, { 0xD80A, 0xDC00 } },
961 { 0x00013000, { 0xD80C, 0xDC00 } },
962 { 0x00013FFF, { 0xD80F, 0xDFFF } },
963 { 0x00014000, { 0xD810, 0xDC00 } },
964 { 0x00014001, { 0xD810, 0xDC01 } },
965 { 0x00014002, { 0xD810, 0xDC02 } },
966 { 0x00014004, { 0xD810, 0xDC04 } },
967 { 0x00014008, { 0xD810, 0xDC08 } },
968 { 0x00014010, { 0xD810, 0xDC10 } },
969 { 0x00014020, { 0xD810, 0xDC20 } },
970 { 0x00014040, { 0xD810, 0xDC40 } },
971 { 0x00014080, { 0xD810, 0xDC80 } },
972 { 0x00014100, { 0xD810, 0xDD00 } },
973 { 0x00014200, { 0xD810, 0xDE00 } },
974 { 0x00014400, { 0xD811, 0xDC00 } },
975 { 0x00014800, { 0xD812, 0xDC00 } },
976 { 0x00015000, { 0xD814, 0xDC00 } },
977 { 0x00016000, { 0xD818, 0xDC00 } },
978 { 0x00017FFF, { 0xD81F, 0xDFFF } },
979 { 0x00018000, { 0xD820, 0xDC00 } },
980 { 0x00018001, { 0xD820, 0xDC01 } },
981 { 0x00018002, { 0xD820, 0xDC02 } },
982 { 0x00018004, { 0xD820, 0xDC04 } },
983 { 0x00018008, { 0xD820, 0xDC08 } },
984 { 0x00018010, { 0xD820, 0xDC10 } },
985 { 0x00018020, { 0xD820, 0xDC20 } },
986 { 0x00018040, { 0xD820, 0xDC40 } },
987 { 0x00018080, { 0xD820, 0xDC80 } },
988 { 0x00018100, { 0xD820, 0xDD00 } },
989 { 0x00018200, { 0xD820, 0xDE00 } },
990 { 0x00018400, { 0xD821, 0xDC00 } },
991 { 0x00018800, { 0xD822, 0xDC00 } },
992 { 0x00019000, { 0xD824, 0xDC00 } },
993 { 0x0001A000, { 0xD828, 0xDC00 } },
994 { 0x0001C000, { 0xD830, 0xDC00 } },
995 { 0x0001FFFF, { 0xD83F, 0xDFFF } },
996 { 0x00020000, { 0xD840, 0xDC00 } },
997 { 0x00020001, { 0xD840, 0xDC01 } },
998 { 0x00020002, { 0xD840, 0xDC02 } },
999 { 0x00020004, { 0xD840, 0xDC04 } },
1000 { 0x00020008, { 0xD840, 0xDC08 } },
1001 { 0x00020010, { 0xD840, 0xDC10 } },
1002 { 0x00020020, { 0xD840, 0xDC20 } },
1003 { 0x00020040, { 0xD840, 0xDC40 } },
1004 { 0x00020080, { 0xD840, 0xDC80 } },
1005 { 0x00020100, { 0xD840, 0xDD00 } },
1006 { 0x00020200, { 0xD840, 0xDE00 } },
1007 { 0x00020400, { 0xD841, 0xDC00 } },
1008 { 0x00020800, { 0xD842, 0xDC00 } },
1009 { 0x00021000, { 0xD844, 0xDC00 } },
1010 { 0x00022000, { 0xD848, 0xDC00 } },
1011 { 0x00024000, { 0xD850, 0xDC00 } },
1012 { 0x00028000, { 0xD860, 0xDC00 } },
1013 { 0x0002FFFF, { 0xD87F, 0xDFFF } },
1014 { 0x00030000, { 0xD880, 0xDC00 } },
1015 { 0x00030001, { 0xD880, 0xDC01 } },
1016 { 0x00030002, { 0xD880, 0xDC02 } },
1017 { 0x00030004, { 0xD880, 0xDC04 } },
1018 { 0x00030008, { 0xD880, 0xDC08 } },
1019 { 0x00030010, { 0xD880, 0xDC10 } },
1020 { 0x00030020, { 0xD880, 0xDC20 } },
1021 { 0x00030040, { 0xD880, 0xDC40 } },
1022 { 0x00030080, { 0xD880, 0xDC80 } },
1023 { 0x00030100, { 0xD880, 0xDD00 } },
1024 { 0x00030200, { 0xD880, 0xDE00 } },
1025 { 0x00030400, { 0xD881, 0xDC00 } },
1026 { 0x00030800, { 0xD882, 0xDC00 } },
1027 { 0x00031000, { 0xD884, 0xDC00 } },
1028 { 0x00032000, { 0xD888, 0xDC00 } },
1029 { 0x00034000, { 0xD890, 0xDC00 } },
1030 { 0x00038000, { 0xD8A0, 0xDC00 } },
1031 { 0x0003FFFF, { 0xD8BF, 0xDFFF } },
1032 { 0x00040000, { 0xD8C0, 0xDC00 } },
1033 { 0x00040001, { 0xD8C0, 0xDC01 } },
1034 { 0x00040002, { 0xD8C0, 0xDC02 } },
1035 { 0x00040004, { 0xD8C0, 0xDC04 } },
1036 { 0x00040008, { 0xD8C0, 0xDC08 } },
1037 { 0x00040010, { 0xD8C0, 0xDC10 } },
1038 { 0x00040020, { 0xD8C0, 0xDC20 } },
1039 { 0x00040040, { 0xD8C0, 0xDC40 } },
1040 { 0x00040080, { 0xD8C0, 0xDC80 } },
1041 { 0x00040100, { 0xD8C0, 0xDD00 } },
1042 { 0x00040200, { 0xD8C0, 0xDE00 } },
1043 { 0x00040400, { 0xD8C1, 0xDC00 } },
1044 { 0x00040800, { 0xD8C2, 0xDC00 } },
1045 { 0x00041000, { 0xD8C4, 0xDC00 } },
1046 { 0x00042000, { 0xD8C8, 0xDC00 } },
1047 { 0x00044000, { 0xD8D0, 0xDC00 } },
1048 { 0x00048000, { 0xD8E0, 0xDC00 } },
1049 { 0x0004FFFF, { 0xD8FF, 0xDFFF } },
1050 { 0x00050000, { 0xD900, 0xDC00 } },
1051 { 0x00050001, { 0xD900, 0xDC01 } },
1052 { 0x00050002, { 0xD900, 0xDC02 } },
1053 { 0x00050004, { 0xD900, 0xDC04 } },
1054 { 0x00050008, { 0xD900, 0xDC08 } },
1055 { 0x00050010, { 0xD900, 0xDC10 } },
1056 { 0x00050020, { 0xD900, 0xDC20 } },
1057 { 0x00050040, { 0xD900, 0xDC40 } },
1058 { 0x00050080, { 0xD900, 0xDC80 } },
1059 { 0x00050100, { 0xD900, 0xDD00 } },
1060 { 0x00050200, { 0xD900, 0xDE00 } },
1061 { 0x00050400, { 0xD901, 0xDC00 } },
1062 { 0x00050800, { 0xD902, 0xDC00 } },
1063 { 0x00051000, { 0xD904, 0xDC00 } },
1064 { 0x00052000, { 0xD908, 0xDC00 } },
1065 { 0x00054000, { 0xD910, 0xDC00 } },
1066 { 0x00058000, { 0xD920, 0xDC00 } },
1067 { 0x00060000, { 0xD940, 0xDC00 } },
1068 { 0x00070000, { 0xD980, 0xDC00 } },
1069 { 0x0007FFFF, { 0xD9BF, 0xDFFF } },
1070 { 0x00080000, { 0xD9C0, 0xDC00 } },
1071 { 0x00080001, { 0xD9C0, 0xDC01 } },
1072 { 0x00080002, { 0xD9C0, 0xDC02 } },
1073 { 0x00080004, { 0xD9C0, 0xDC04 } },
1074 { 0x00080008, { 0xD9C0, 0xDC08 } },
1075 { 0x00080010, { 0xD9C0, 0xDC10 } },
1076 { 0x00080020, { 0xD9C0, 0xDC20 } },
1077 { 0x00080040, { 0xD9C0, 0xDC40 } },
1078 { 0x00080080, { 0xD9C0, 0xDC80 } },
1079 { 0x00080100, { 0xD9C0, 0xDD00 } },
1080 { 0x00080200, { 0xD9C0, 0xDE00 } },
1081 { 0x00080400, { 0xD9C1, 0xDC00 } },
1082 { 0x00080800, { 0xD9C2, 0xDC00 } },
1083 { 0x00081000, { 0xD9C4, 0xDC00 } },
1084 { 0x00082000, { 0xD9C8, 0xDC00 } },
1085 { 0x00084000, { 0xD9D0, 0xDC00 } },
1086 { 0x00088000, { 0xD9E0, 0xDC00 } },
1087 { 0x0008FFFF, { 0xD9FF, 0xDFFF } },
1088 { 0x00090000, { 0xDA00, 0xDC00 } },
1089 { 0x00090001, { 0xDA00, 0xDC01 } },
1090 { 0x00090002, { 0xDA00, 0xDC02 } },
1091 { 0x00090004, { 0xDA00, 0xDC04 } },
1092 { 0x00090008, { 0xDA00, 0xDC08 } },
1093 { 0x00090010, { 0xDA00, 0xDC10 } },
1094 { 0x00090020, { 0xDA00, 0xDC20 } },
1095 { 0x00090040, { 0xDA00, 0xDC40 } },
1096 { 0x00090080, { 0xDA00, 0xDC80 } },
1097 { 0x00090100, { 0xDA00, 0xDD00 } },
1098 { 0x00090200, { 0xDA00, 0xDE00 } },
1099 { 0x00090400, { 0xDA01, 0xDC00 } },
1100 { 0x00090800, { 0xDA02, 0xDC00 } },
1101 { 0x00091000, { 0xDA04, 0xDC00 } },
1102 { 0x00092000, { 0xDA08, 0xDC00 } },
1103 { 0x00094000, { 0xDA10, 0xDC00 } },
1104 { 0x00098000, { 0xDA20, 0xDC00 } },
1105 { 0x000A0000, { 0xDA40, 0xDC00 } },
1106 { 0x000B0000, { 0xDA80, 0xDC00 } },
1107 { 0x000C0000, { 0xDAC0, 0xDC00 } },
1108 { 0x000D0000, { 0xDB00, 0xDC00 } },
1109 { 0x000FFFFF, { 0xDBBF, 0xDFFF } },
1110 { 0x0010FFFF, { 0xDBFF, 0xDFFF } }
1111
1112 };
1113
1114 /* illegal utf8 sequences */
1115 char *utf8_bad[] = {
1116 "\xC0\x80",
1117 "\xC1\xBF",
1118 "\xE0\x80\x80",
1119 "\xE0\x9F\xBF",
1120 "\xF0\x80\x80\x80",
1121 "\xF0\x8F\xBF\xBF",
1122 "\xF4\x90\x80\x80",
1123 "\xF7\xBF\xBF\xBF",
1124 "\xF8\x80\x80\x80\x80",
1125 "\xF8\x88\x80\x80\x80",
1126 "\xF8\x92\x80\x80\x80",
1127 "\xF8\x9F\xBF\xBF\xBF",
1128 "\xF8\xA0\x80\x80\x80",
1129 "\xF8\xA8\x80\x80\x80",
1130 "\xF8\xB0\x80\x80\x80",
1131 "\xF8\xBF\xBF\xBF\xBF",
1132 "\xF9\x80\x80\x80\x88",
1133 "\xF9\x84\x80\x80\x80",
1134 "\xF9\xBF\xBF\xBF\xBF",
1135 "\xFA\x80\x80\x80\x80",
1136 "\xFA\x90\x80\x80\x80",
1137 "\xFB\xBF\xBF\xBF\xBF",
1138 "\xFC\x84\x80\x80\x80\x81",
1139 "\xFC\x85\x80\x80\x80\x80",
1140 "\xFC\x86\x80\x80\x80\x80",
1141 "\xFC\x87\xBF\xBF\xBF\xBF",
1142 "\xFC\x88\xA0\x80\x80\x80",
1143 "\xFC\x89\x80\x80\x80\x80",
1144 "\xFC\x8A\x80\x80\x80\x80",
1145 "\xFC\x90\x80\x80\x80\x82",
1146 "\xFD\x80\x80\x80\x80\x80",
1147 "\xFD\xBF\xBF\xBF\xBF\xBF",
1148 "\x80",
1149 "\xC3",
1150 "\xC3\xC3\x80",
1151 "\xED\xA0\x80",
1152 "\xED\xBF\x80",
1153 "\xED\xBF\xBF",
1154 "\xED\xA0\x80\xE0\xBF\xBF",
1155 };
1156
1157 /* illegal UTF-16 sequences, 0-terminated */
1158 uint16_t utf16_bad[][3] = {
1159 /* leading surrogate not followed by trailing surrogate */
1160 { 0xD800, 0, 0 },
1161 { 0xD800, 0x41, 0 },
1162 { 0xD800, 0xfe, 0 },
1163 { 0xD800, 0x3bb, 0 },
1164 { 0xD800, 0xD800, 0 },
1165 { 0xD800, 0xFEFF, 0 },
1166 { 0xD800, 0xFFFD, 0 },
1167 };
1168
1169 static void
1170 dump_utf8
1171 (
1172 char *word,
1173 unsigned char *utf8,
1174 char *end
1175 )
1176 {
1177 fprintf(stdout, "%s ", word);
1178 for( ; *utf8; utf8++ ) {
1179 fprintf(stdout, "%02.2x ", (unsigned int)*utf8);
1180 }
1181 fprintf(stdout, "%s", end);
1182 }
1183
1184 static PRBool
1185 test_ucs4_chars
1186 (
1187 void
1188 )
1189 {
1190 PRBool rv = PR_TRUE;
1191 int i;
1192
1193 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1194 struct ucs4 *e = &ucs4[i];
1195 PRBool result;
1196 unsigned char utf8[8];
1197 unsigned int len = 0;
1198 PRUint32 back = 0;
1199
1200 (void)memset(utf8, 0, sizeof(utf8));
1201
1202 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1203 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
1204
1205 if( !result ) {
1206 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8\n", e->c);
1207 rv = PR_FALSE;
1208 continue;
1209 }
1210
1211 if( (len >= sizeof(utf8)) ||
1212 (strlen(e->utf8) != len) ||
1213 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
1214 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8: ", e->c);
1215 dump_utf8("expected", e->utf8, ", ");
1216 dump_utf8("received", utf8, "\n");
1217 rv = PR_FALSE;
1218 continue;
1219 }
1220
1221 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1222 utf8, len, (unsigned char *)&back, sizeof(back), &len);
1223
1224 if( !result ) {
1225 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4\n");
1226 rv = PR_FALSE;
1227 continue;
1228 }
1229
1230 if( (sizeof(back) != len) || (e->c != back) ) {
1231 dump_utf8("Wrong conversion of UTF-8", utf8, " to UCS-4:");
1232 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back);
1233 rv = PR_FALSE;
1234 continue;
1235 }
1236
1237 len = strlen(e->utf8) - 1;
1238 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1239 (unsigned char *)&e->c, sizeof(e->c), utf8 + sizeof(utf8) - len, len,
1240 &len);
1241
1242 if( result || len != strlen(e->utf8) ) {
1243 fprintf(stdout, "Length computation error converting UCS-4 0x%08.8x"
1244 " to UTF-8\n", e->c);
1245 rv = PR_FALSE;
1246 continue;
1247 }
1248 }
1249
1250 return rv;
1251 }
1252
1253 static PRBool
1254 test_ucs2_chars
1255 (
1256 void
1257 )
1258 {
1259 PRBool rv = PR_TRUE;
1260 int i;
1261
1262 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1263 struct ucs2 *e = &ucs2[i];
1264 PRBool result;
1265 unsigned char utf8[8];
1266 unsigned int len = 0;
1267 PRUint16 back = 0;
1268
1269 (void)memset(utf8, 0, sizeof(utf8));
1270
1271 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1272 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
1273
1274 if( !result ) {
1275 fprintf(stdout, "Failed to convert UCS-2 0x%04.4x to UTF-8\n", e->c);
1276 rv = PR_FALSE;
1277 continue;
1278 }
1279
1280 if( (len >= sizeof(utf8)) ||
1281 (strlen(e->utf8) != len) ||
1282 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
1283 fprintf(stdout, "Wrong conversion of UCS-2 0x%04.4x to UTF-8: ", e->c);
1284 dump_utf8("expected", e->utf8, ", ");
1285 dump_utf8("received", utf8, "\n");
1286 rv = PR_FALSE;
1287 continue;
1288 }
1289
1290 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1291 utf8, len, (unsigned char *)&back, sizeof(back), &len);
1292
1293 if( !result ) {
1294 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-2\n");
1295 rv = PR_FALSE;
1296 continue;
1297 }
1298
1299 if( (sizeof(back) != len) || (e->c != back) ) {
1300 dump_utf8("Wrong conversion of UTF-8", utf8, "to UCS-2:");
1301 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back);
1302 rv = PR_FALSE;
1303 continue;
1304 }
1305
1306 len = strlen(e->utf8) - 1;
1307 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1308 (unsigned char *)&e->c, sizeof(e->c), utf8 + sizeof(utf8) - len, len,
1309 &len);
1310
1311 if( result || len != strlen(e->utf8) ) {
1312 fprintf(stdout, "Length computation error converting UCS-2 0x%04.4x"
1313 " to UTF-8\n", e->c);
1314 rv = PR_FALSE;
1315 continue;
1316 }
1317 }
1318
1319 return rv;
1320 }
1321
1322 static PRBool
1323 test_utf16_chars
1324 (
1325 void
1326 )
1327 {
1328 PRBool rv = PR_TRUE;
1329 int i;
1330
1331 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) {
1332 struct utf16 *e = &utf16[i];
1333 PRBool result;
1334 unsigned char utf8[8];
1335 unsigned int len = 0;
1336 PRUint32 back32 = 0;
1337 PRUint16 back[2];
1338
1339 (void)memset(utf8, 0, sizeof(utf8));
1340
1341 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1342 (unsigned char *)&e->w[0], sizeof(e->w), utf8, sizeof(utf8), &len);
1343
1344 if( !result ) {
1345 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8\n",
1346 e->w[0], e->w[1]);
1347 rv = PR_FALSE;
1348 continue;
1349 }
1350
1351 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1352 utf8, len, (unsigned char *)&back32, sizeof(back32), &len);
1353
1354 if( 4 != len ) {
1355 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8: "
1356 "unexpected len %d\n", e->w[0], e->w[1], len);
1357 rv = PR_FALSE;
1358 continue;
1359 }
1360
1361 utf8[len] = '\0'; /* null-terminate for printing */
1362
1363 if( !result ) {
1364 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4 (utf-16 test)\n");
1365 rv = PR_FALSE;
1366 continue;
1367 }
1368
1369 if( (sizeof(back32) != len) || (e->c != back32) ) {
1370 fprintf(stdout, "Wrong conversion of UTF-16 0x%04.4x 0x%04.4x ",
1371 e->w[0], e->w[1]);
1372 dump_utf8("to UTF-8", utf8, "and then to UCS-4: ");
1373 if( sizeof(back32) != len ) {
1374 fprintf(stdout, "len is %d\n", len);
1375 } else {
1376 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back32);
1377 }
1378 rv = PR_FALSE;
1379 continue;
1380 }
1381
1382 (void)memset(utf8, 0, sizeof(utf8));
1383 back[0] = back[1] = 0;
1384
1385 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1386 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
1387
1388 if( !result ) {
1389 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8 (utf-16 test)\n ",
1390 e->c);
1391 rv = PR_FALSE;
1392 continue;
1393 }
1394
1395 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1396 utf8, len, (unsigned char *)&back[0], sizeof(back), &len);
1397
1398 if( 4 != len ) {
1399 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8: "
1400 "unexpected len %d\n", e->c, len);
1401 rv = PR_FALSE;
1402 continue;
1403 }
1404
1405 utf8[len] = '\0'; /* null-terminate for printing */
1406
1407 if( !result ) {
1408 dump_utf8("Failed to convert UTF-8", utf8, "to UTF-16\n");
1409 rv = PR_FALSE;
1410 continue;
1411 }
1412
1413 if( (sizeof(back) != len) || (e->w[0] != back[0]) || (e->w[1] != back[1]) ) {
1414 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8", e->c);
1415 dump_utf8("", utf8, "and then to UTF-16:");
1416 if( sizeof(back) != len ) {
1417 fprintf(stdout, "len is %d\n", len);
1418 } else {
1419 fprintf(stdout, "expected 0x%04.4x 0x%04.4x, received 0x%04.4x 0x%04.4xx \n",
1420 e->w[0], e->w[1], back[0], back[1]);
1421 }
1422 rv = PR_FALSE;
1423 continue;
1424 }
1425 }
1426
1427 return rv;
1428 }
1429
1430 static PRBool
1431 test_utf8_bad_chars
1432 (
1433 void
1434 )
1435 {
1436 PRBool rv = PR_TRUE;
1437 int i;
1438
1439 for( i = 0; i < sizeof(utf8_bad)/sizeof(utf8_bad[0]); i++ ) {
1440 PRBool result;
1441 unsigned char destbuf[30];
1442 unsigned int len = 0;
1443
1444 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1445 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf ), &len);
1446
1447 if( result ) {
1448 dump_utf8("Failed to detect bad UTF-8 string converting to UCS2: ", utf8_b ad[i], "\n");
1449 rv = PR_FALSE;
1450 continue;
1451 }
1452 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1453 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf ), &len);
1454
1455 if( result ) {
1456 dump_utf8("Failed to detect bad UTF-8 string converting to UCS4: ", utf8_b ad[i], "\n");
1457 rv = PR_FALSE;
1458 continue;
1459 }
1460
1461 }
1462
1463 return rv;
1464 }
1465
1466 static PRBool
1467 test_utf16_bad_chars(void)
1468 {
1469 PRBool rv = PR_TRUE;
1470 int i;
1471
1472 for( i = 0; i < sizeof(utf16_bad)/sizeof(utf16_bad[0]); ++i ) {
1473 PRBool result;
1474 unsigned char destbuf[18];
1475 unsigned int j, len, destlen;
1476 uint16_t *buf;
1477
1478 for( len = 0; utf16_bad[i][len] != 0; ++len )
1479 /* nothing */;
1480
1481 buf = malloc(sizeof(uint16_t) * len);
1482 for( j = 0; j < len; ++j )
1483 buf[j] = htons(utf16_bad[i][j]);
1484
1485 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1486 (unsigned char *)buf, sizeof(uint16_t) * len, destbuf, sizeof(destbuf),
1487 &destlen);
1488 if( result ) {
1489 fprintf(stdout, "Failed to detect bad UTF-16 string conversion for "
1490 "{0x%x,0x%x} (UTF-8 len = %u)\n", utf16_bad[i][0], utf16_bad[i][1],
1491 destlen);
1492 rv = PR_FALSE;
1493 }
1494 free(buf);
1495 }
1496 }
1497
1498 static PRBool
1499 test_iso88591_chars
1500 (
1501 void
1502 )
1503 {
1504 PRBool rv = PR_TRUE;
1505 int i;
1506
1507 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1508 struct ucs2 *e = &ucs2[i];
1509 PRBool result;
1510 unsigned char iso88591;
1511 unsigned char utf8[3];
1512 unsigned int len = 0;
1513
1514 if (ntohs(e->c) > 0xFF) continue;
1515
1516 (void)memset(utf8, 0, sizeof(utf8));
1517 iso88591 = ntohs(e->c);
1518
1519 result = sec_port_iso88591_utf8_conversion_function(&iso88591,
1520 1, utf8, sizeof(utf8), &len);
1521
1522 if( !result ) {
1523 fprintf(stdout, "Failed to convert ISO-8859-1 0x%02.2x to UTF-8\n", iso885 91);
1524 rv = PR_FALSE;
1525 continue;
1526 }
1527
1528 if( (len >= sizeof(utf8)) ||
1529 (strlen(e->utf8) != len) ||
1530 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
1531 fprintf(stdout, "Wrong conversion of ISO-8859-1 0x%02.2x to UTF-8: ", iso8 8591);
1532 dump_utf8("expected", e->utf8, ", ");
1533 dump_utf8("received", utf8, "\n");
1534 rv = PR_FALSE;
1535 continue;
1536 }
1537
1538 }
1539
1540 return rv;
1541 }
1542
1543 static PRBool
1544 test_zeroes
1545 (
1546 void
1547 )
1548 {
1549 PRBool rv = PR_TRUE;
1550 PRBool result;
1551 PRUint32 lzero = 0;
1552 PRUint16 szero = 0;
1553 unsigned char utf8[8];
1554 unsigned int len = 0;
1555 PRUint32 lback = 1;
1556 PRUint16 sback = 1;
1557
1558 (void)memset(utf8, 1, sizeof(utf8));
1559
1560 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1561 (unsigned char *)&lzero, sizeof(lzero), utf8, sizeof(utf8), &len);
1562
1563 if( !result ) {
1564 fprintf(stdout, "Failed to convert UCS-4 0x00000000 to UTF-8\n");
1565 rv = PR_FALSE;
1566 } else if( 1 != len ) {
1567 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: len = %d\n", len);
1568 rv = PR_FALSE;
1569 } else if( '\0' != *utf8 ) {
1570 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: expected 00 ,"
1571 "received %02.2x\n", (unsigned int)*utf8);
1572 rv = PR_FALSE;
1573 }
1574
1575 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1576 "", 1, (unsigned char *)&lback, sizeof(lback), &len);
1577
1578 if( !result ) {
1579 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-4\n");
1580 rv = PR_FALSE;
1581 } else if( 4 != len ) {
1582 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: len = %d\n", len);
1583 rv = PR_FALSE;
1584 } else if( 0 != lback ) {
1585 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: "
1586 "expected 0x00000000, received 0x%08.8x\n", lback);
1587 rv = PR_FALSE;
1588 }
1589
1590 (void)memset(utf8, 1, sizeof(utf8));
1591
1592 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1593 (unsigned char *)&szero, sizeof(szero), utf8, sizeof(utf8), &len);
1594
1595 if( !result ) {
1596 fprintf(stdout, "Failed to convert UCS-2 0x0000 to UTF-8\n");
1597 rv = PR_FALSE;
1598 } else if( 1 != len ) {
1599 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: len = %d\n", len);
1600 rv = PR_FALSE;
1601 } else if( '\0' != *utf8 ) {
1602 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: expected 00 ,"
1603 "received %02.2x\n", (unsigned int)*utf8);
1604 rv = PR_FALSE;
1605 }
1606
1607 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1608 "", 1, (unsigned char *)&sback, sizeof(sback), &len);
1609
1610 if( !result ) {
1611 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-2\n");
1612 rv = PR_FALSE;
1613 } else if( 2 != len ) {
1614 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: len = %d\n", len);
1615 rv = PR_FALSE;
1616 } else if( 0 != sback ) {
1617 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: "
1618 "expected 0x0000, received 0x%04.4x\n", sback);
1619 rv = PR_FALSE;
1620 }
1621
1622 return rv;
1623 }
1624
1625 static PRBool
1626 test_multichars
1627 (
1628 void
1629 )
1630 {
1631 int i;
1632 unsigned int len, lenout;
1633 PRUint32 *ucs4s;
1634 char *ucs4_utf8;
1635 PRUint16 *ucs2s;
1636 char *ucs2_utf8;
1637 void *tmp;
1638 PRBool result;
1639
1640 ucs4s = (PRUint32 *)calloc(sizeof(ucs4)/sizeof(ucs4[0]), sizeof(PRUint32));
1641 ucs2s = (PRUint16 *)calloc(sizeof(ucs2)/sizeof(ucs2[0]), sizeof(PRUint16));
1642
1643 if( ((PRUint32 *)NULL == ucs4s) || ((PRUint16 *)NULL == ucs2s) ) {
1644 fprintf(stderr, "out of memory\n");
1645 exit(1);
1646 }
1647
1648 len = 1;
1649 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1650 ucs4s[i] = ucs4[i].c;
1651 len += strlen(ucs4[i].utf8);
1652 }
1653
1654 ucs4_utf8 = (char *)malloc(len);
1655
1656 len = 1;
1657 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1658 ucs2s[i] = ucs2[i].c;
1659 len += strlen(ucs2[i].utf8);
1660 }
1661
1662 ucs2_utf8 = (char *)malloc(len);
1663
1664 if( ((char *)NULL == ucs4_utf8) || ((char *)NULL == ucs2_utf8) ) {
1665 fprintf(stderr, "out of memory\n");
1666 exit(1);
1667 }
1668
1669 *ucs4_utf8 = '\0';
1670 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1671 strcat(ucs4_utf8, ucs4[i].utf8);
1672 }
1673
1674 *ucs2_utf8 = '\0';
1675 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1676 strcat(ucs2_utf8, ucs2[i].utf8);
1677 }
1678
1679 /* UTF-8 -> UCS-4 */
1680 len = sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32);
1681 tmp = calloc(len, 1);
1682 if( (void *)NULL == tmp ) {
1683 fprintf(stderr, "out of memory\n");
1684 exit(1);
1685 }
1686
1687 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1688 ucs4_utf8, strlen(ucs4_utf8), tmp, len, &lenout);
1689 if( !result ) {
1690 fprintf(stdout, "Failed to convert much UTF-8 to UCS-4\n");
1691 goto done;
1692 }
1693
1694 if( lenout != len ) {
1695 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-4\n");
1696 goto loser;
1697 }
1698
1699 if( 0 != memcmp(ucs4s, tmp, len) ) {
1700 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-4\n");
1701 goto loser;
1702 }
1703
1704 free(tmp); tmp = (void *)NULL;
1705
1706 /* UCS-4 -> UTF-8 */
1707 len = strlen(ucs4_utf8);
1708 tmp = calloc(len, 1);
1709 if( (void *)NULL == tmp ) {
1710 fprintf(stderr, "out of memory\n");
1711 exit(1);
1712 }
1713
1714 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1715 (unsigned char *)ucs4s, sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32),
1716 tmp, len, &lenout);
1717 if( !result ) {
1718 fprintf(stdout, "Failed to convert much UCS-4 to UTF-8\n");
1719 goto done;
1720 }
1721
1722 if( lenout != len ) {
1723 fprintf(stdout, "Unexpected length converting much UCS-4 to UTF-8\n");
1724 goto loser;
1725 }
1726
1727 if( 0 != strncmp(ucs4_utf8, tmp, len) ) {
1728 fprintf(stdout, "Wrong conversion of much UCS-4 to UTF-8\n");
1729 goto loser;
1730 }
1731
1732 free(tmp); tmp = (void *)NULL;
1733
1734 /* UTF-8 -> UCS-2 */
1735 len = sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16);
1736 tmp = calloc(len, 1);
1737 if( (void *)NULL == tmp ) {
1738 fprintf(stderr, "out of memory\n");
1739 exit(1);
1740 }
1741
1742 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1743 ucs2_utf8, strlen(ucs2_utf8), tmp, len, &lenout);
1744 if( !result ) {
1745 fprintf(stdout, "Failed to convert much UTF-8 to UCS-2\n");
1746 goto done;
1747 }
1748
1749 if( lenout != len ) {
1750 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-2\n");
1751 goto loser;
1752 }
1753
1754 if( 0 != memcmp(ucs2s, tmp, len) ) {
1755 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-2\n");
1756 goto loser;
1757 }
1758
1759 free(tmp); tmp = (void *)NULL;
1760
1761 /* UCS-2 -> UTF-8 */
1762 len = strlen(ucs2_utf8);
1763 tmp = calloc(len, 1);
1764 if( (void *)NULL == tmp ) {
1765 fprintf(stderr, "out of memory\n");
1766 exit(1);
1767 }
1768
1769 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1770 (unsigned char *)ucs2s, sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16),
1771 tmp, len, &lenout);
1772 if( !result ) {
1773 fprintf(stdout, "Failed to convert much UCS-2 to UTF-8\n");
1774 goto done;
1775 }
1776
1777 if( lenout != len ) {
1778 fprintf(stdout, "Unexpected length converting much UCS-2 to UTF-8\n");
1779 goto loser;
1780 }
1781
1782 if( 0 != strncmp(ucs2_utf8, tmp, len) ) {
1783 fprintf(stdout, "Wrong conversion of much UCS-2 to UTF-8\n");
1784 goto loser;
1785 }
1786
1787 /* implement UTF16 */
1788
1789 result = PR_TRUE;
1790 goto done;
1791
1792 loser:
1793 result = PR_FALSE;
1794 done:
1795 free(ucs4s);
1796 free(ucs4_utf8);
1797 free(ucs2s);
1798 free(ucs2_utf8);
1799 if( (void *)NULL != tmp ) free(tmp);
1800 return result;
1801 }
1802
1803 void
1804 byte_order
1805 (
1806 void
1807 )
1808 {
1809 /*
1810 * The implementation (now) expects the 16- and 32-bit characters
1811 * to be in network byte order, not host byte order. Therefore I
1812 * have to byteswap all those test vectors above. hton[ls] may be
1813 * functions, so I have to do this dynamically. If you want to
1814 * use this code to do host byte order conversions, just remove
1815 * the call in main() to this function.
1816 */
1817
1818 int i;
1819
1820 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1821 struct ucs4 *e = &ucs4[i];
1822 e->c = htonl(e->c);
1823 }
1824
1825 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1826 struct ucs2 *e = &ucs2[i];
1827 e->c = htons(e->c);
1828 }
1829
1830 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) {
1831 struct utf16 *e = &utf16[i];
1832 e->c = htonl(e->c);
1833 e->w[0] = htons(e->w[0]);
1834 e->w[1] = htons(e->w[1]);
1835 }
1836
1837 return;
1838 }
1839
1840 int
1841 main
1842 (
1843 int argc,
1844 char *argv[]
1845 )
1846 {
1847 byte_order();
1848
1849 if( test_ucs4_chars() &&
1850 test_ucs2_chars() &&
1851 test_utf16_chars() &&
1852 test_utf8_bad_chars() &&
1853 test_utf16_bad_chars() &&
1854 test_iso88591_chars() &&
1855 test_zeroes() &&
1856 test_multichars() &&
1857 PR_TRUE ) {
1858 fprintf(stderr, "PASS\n");
1859 return 1;
1860 } else {
1861 fprintf(stderr, "FAIL\n");
1862 return 0;
1863 }
1864 }
1865
1866 #endif /* TEST_UTF8 */
OLDNEW
« no previous file with comments | « nss/lib/util/templates.c ('k') | nss/lib/util/utilmod.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698