| OLD | NEW |
| (Empty) |
| 1 /* This Source Code Form is subject to the terms of the Mozilla Public | |
| 2 * License, v. 2.0. If a copy of the MPL was not distributed with this | |
| 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | |
| 4 | |
| 5 #include "seccomon.h" | |
| 6 #include "secport.h" | |
| 7 | |
| 8 #ifdef TEST_UTF8 | |
| 9 #include <assert.h> | |
| 10 #undef PORT_Assert | |
| 11 #define PORT_Assert assert | |
| 12 #endif | |
| 13 | |
| 14 /* | |
| 15 * From RFC 2044: | |
| 16 * | |
| 17 * UCS-4 range (hex.) UTF-8 octet sequence (binary) | |
| 18 * 0000 0000-0000 007F 0xxxxxxx | |
| 19 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx | |
| 20 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx | |
| 21 * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | |
| 22 * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx | |
| 23 * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx | |
| 24 */ | |
| 25 | |
| 26 /* | |
| 27 * From http://www.imc.org/draft-hoffman-utf16 | |
| 28 * | |
| 29 * For U on [0x00010000,0x0010FFFF]: Let U' = U - 0x00010000 | |
| 30 * | |
| 31 * U' = yyyyyyyyyyxxxxxxxxxx | |
| 32 * W1 = 110110yyyyyyyyyy | |
| 33 * W2 = 110111xxxxxxxxxx | |
| 34 */ | |
| 35 | |
| 36 /* | |
| 37 * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit | |
| 38 * character values. If you wish to use this code for working with | |
| 39 * host byte order values, define the following: | |
| 40 * | |
| 41 * #if IS_BIG_ENDIAN | |
| 42 * #define L_0 0 | |
| 43 * #define L_1 1 | |
| 44 * #define L_2 2 | |
| 45 * #define L_3 3 | |
| 46 * #define H_0 0 | |
| 47 * #define H_1 1 | |
| 48 * #else / * not everyone has elif * / | |
| 49 * #if IS_LITTLE_ENDIAN | |
| 50 * #define L_0 3 | |
| 51 * #define L_1 2 | |
| 52 * #define L_2 1 | |
| 53 * #define L_3 0 | |
| 54 * #define H_0 1 | |
| 55 * #define H_1 0 | |
| 56 * #else | |
| 57 * #error "PDP and NUXI support deferred" | |
| 58 * #endif / * IS_LITTLE_ENDIAN * / | |
| 59 * #endif / * IS_BIG_ENDIAN * / | |
| 60 */ | |
| 61 | |
| 62 #define L_0 0 | |
| 63 #define L_1 1 | |
| 64 #define L_2 2 | |
| 65 #define L_3 3 | |
| 66 #define H_0 0 | |
| 67 #define H_1 1 | |
| 68 | |
| 69 #define BAD_UTF8 ((PRUint32)-1) | |
| 70 | |
| 71 /* | |
| 72 * Parse a single UTF-8 character per the spec. in section 3.9 (D36) | |
| 73 * of Unicode 4.0.0. | |
| 74 * | |
| 75 * Parameters: | |
| 76 * index - Points to the byte offset in inBuf of character to read. On success, | |
| 77 * updated to the offset of the following character. | |
| 78 * inBuf - Input buffer, UTF-8 encoded | |
| 79 * inbufLen - Length of input buffer, in bytes. | |
| 80 * | |
| 81 * Returns: | |
| 82 * Success - The UCS4 encoded character | |
| 83 * Failure - BAD_UTF8 | |
| 84 */ | |
| 85 static PRUint32 | |
| 86 sec_port_read_utf8(unsigned int *index, unsigned char *inBuf, unsigned int inBuf
Len) | |
| 87 { | |
| 88 PRUint32 result; | |
| 89 unsigned int i = *index; | |
| 90 int bytes_left; | |
| 91 PRUint32 min_value; | |
| 92 | |
| 93 PORT_Assert(i < inBufLen); | |
| 94 | |
| 95 if ( (inBuf[i] & 0x80) == 0x00 ) { | |
| 96 result = inBuf[i++]; | |
| 97 bytes_left = 0; | |
| 98 min_value = 0; | |
| 99 } else if ( (inBuf[i] & 0xE0) == 0xC0 ) { | |
| 100 result = inBuf[i++] & 0x1F; | |
| 101 bytes_left = 1; | |
| 102 min_value = 0x80; | |
| 103 } else if ( (inBuf[i] & 0xF0) == 0xE0) { | |
| 104 result = inBuf[i++] & 0x0F; | |
| 105 bytes_left = 2; | |
| 106 min_value = 0x800; | |
| 107 } else if ( (inBuf[i] & 0xF8) == 0xF0) { | |
| 108 result = inBuf[i++] & 0x07; | |
| 109 bytes_left = 3; | |
| 110 min_value = 0x10000; | |
| 111 } else { | |
| 112 return BAD_UTF8; | |
| 113 } | |
| 114 | |
| 115 while (bytes_left--) { | |
| 116 if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80) return BAD_UTF8; | |
| 117 result = (result << 6) | (inBuf[i++] & 0x3F); | |
| 118 } | |
| 119 | |
| 120 /* Check for overlong sequences, surrogates, and outside unicode range */ | |
| 121 if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF
) { | |
| 122 return BAD_UTF8; | |
| 123 } | |
| 124 | |
| 125 *index = i; | |
| 126 return result; | |
| 127 } | |
| 128 | |
| 129 PRBool | |
| 130 sec_port_ucs4_utf8_conversion_function | |
| 131 ( | |
| 132 PRBool toUnicode, | |
| 133 unsigned char *inBuf, | |
| 134 unsigned int inBufLen, | |
| 135 unsigned char *outBuf, | |
| 136 unsigned int maxOutBufLen, | |
| 137 unsigned int *outBufLen | |
| 138 ) | |
| 139 { | |
| 140 PORT_Assert((unsigned int *)NULL != outBufLen); | |
| 141 | |
| 142 if( toUnicode ) { | |
| 143 unsigned int i, len = 0; | |
| 144 | |
| 145 for( i = 0; i < inBufLen; ) { | |
| 146 if( (inBuf[i] & 0x80) == 0x00 ) i += 1; | |
| 147 else if( (inBuf[i] & 0xE0) == 0xC0 ) i += 2; | |
| 148 else if( (inBuf[i] & 0xF0) == 0xE0 ) i += 3; | |
| 149 else if( (inBuf[i] & 0xF8) == 0xF0 ) i += 4; | |
| 150 else return PR_FALSE; | |
| 151 | |
| 152 len += 4; | |
| 153 } | |
| 154 | |
| 155 if( len > maxOutBufLen ) { | |
| 156 *outBufLen = len; | |
| 157 return PR_FALSE; | |
| 158 } | |
| 159 | |
| 160 len = 0; | |
| 161 | |
| 162 for( i = 0; i < inBufLen; ) { | |
| 163 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen); | |
| 164 | |
| 165 if (ucs4 == BAD_UTF8) return PR_FALSE; | |
| 166 | |
| 167 outBuf[len+L_0] = 0x00; | |
| 168 outBuf[len+L_1] = (unsigned char)(ucs4 >> 16); | |
| 169 outBuf[len+L_2] = (unsigned char)(ucs4 >> 8); | |
| 170 outBuf[len+L_3] = (unsigned char)ucs4; | |
| 171 | |
| 172 len += 4; | |
| 173 } | |
| 174 | |
| 175 *outBufLen = len; | |
| 176 return PR_TRUE; | |
| 177 } else { | |
| 178 unsigned int i, len = 0; | |
| 179 PORT_Assert((inBufLen % 4) == 0); | |
| 180 if ((inBufLen % 4) != 0) { | |
| 181 *outBufLen = 0; | |
| 182 return PR_FALSE; | |
| 183 } | |
| 184 | |
| 185 for( i = 0; i < inBufLen; i += 4 ) { | |
| 186 if( (inBuf[i+L_0] > 0x00) || (inBuf[i+L_1] > 0x10) ) { | |
| 187 *outBufLen = 0; | |
| 188 return PR_FALSE; | |
| 189 } else if( inBuf[i+L_1] >= 0x01 ) len += 4; | |
| 190 else if( inBuf[i+L_2] >= 0x08 ) len += 3; | |
| 191 else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) len += 2; | |
| 192 else len += 1; | |
| 193 } | |
| 194 | |
| 195 if( len > maxOutBufLen ) { | |
| 196 *outBufLen = len; | |
| 197 return PR_FALSE; | |
| 198 } | |
| 199 | |
| 200 len = 0; | |
| 201 | |
| 202 for( i = 0; i < inBufLen; i += 4 ) { | |
| 203 if( inBuf[i+L_1] >= 0x01 ) { | |
| 204 /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ | |
| 205 /* 00000000 000abcde fghijklm nopqrstu -> | |
| 206 11110abc 10defghi 10jklmno 10pqrstu */ | |
| 207 | |
| 208 outBuf[len+0] = 0xF0 | ((inBuf[i+L_1] & 0x1C) >> 2); | |
| 209 outBuf[len+1] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4) | |
| 210 | ((inBuf[i+L_2] & 0xF0) >> 4); | |
| 211 outBuf[len+2] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2) | |
| 212 | ((inBuf[i+L_3] & 0xC0) >> 6); | |
| 213 outBuf[len+3] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); | |
| 214 | |
| 215 len += 4; | |
| 216 } else if( inBuf[i+L_2] >= 0x08 ) { | |
| 217 /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ | |
| 218 /* 00000000 00000000 abcdefgh ijklmnop -> | |
| 219 1110abcd 10efghij 10klmnop */ | |
| 220 | |
| 221 outBuf[len+0] = 0xE0 | ((inBuf[i+L_2] & 0xF0) >> 4); | |
| 222 outBuf[len+1] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2) | |
| 223 | ((inBuf[i+L_3] & 0xC0) >> 6); | |
| 224 outBuf[len+2] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); | |
| 225 | |
| 226 len += 3; | |
| 227 } else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) { | |
| 228 /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */ | |
| 229 /* 00000000 00000000 00000abc defghijk -> | |
| 230 110abcde 10fghijk */ | |
| 231 | |
| 232 outBuf[len+0] = 0xC0 | ((inBuf[i+L_2] & 0x07) << 2) | |
| 233 | ((inBuf[i+L_3] & 0xC0) >> 6); | |
| 234 outBuf[len+1] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); | |
| 235 | |
| 236 len += 2; | |
| 237 } else { | |
| 238 /* 0000 0000-0000 007F -> 0xxxxxx */ | |
| 239 /* 00000000 00000000 00000000 0abcdefg -> | |
| 240 0abcdefg */ | |
| 241 | |
| 242 outBuf[len+0] = (inBuf[i+L_3] & 0x7F); | |
| 243 | |
| 244 len += 1; | |
| 245 } | |
| 246 } | |
| 247 | |
| 248 *outBufLen = len; | |
| 249 return PR_TRUE; | |
| 250 } | |
| 251 } | |
| 252 | |
| 253 PRBool | |
| 254 sec_port_ucs2_utf8_conversion_function | |
| 255 ( | |
| 256 PRBool toUnicode, | |
| 257 unsigned char *inBuf, | |
| 258 unsigned int inBufLen, | |
| 259 unsigned char *outBuf, | |
| 260 unsigned int maxOutBufLen, | |
| 261 unsigned int *outBufLen | |
| 262 ) | |
| 263 { | |
| 264 PORT_Assert((unsigned int *)NULL != outBufLen); | |
| 265 | |
| 266 if( toUnicode ) { | |
| 267 unsigned int i, len = 0; | |
| 268 | |
| 269 for( i = 0; i < inBufLen; ) { | |
| 270 if( (inBuf[i] & 0x80) == 0x00 ) { | |
| 271 i += 1; | |
| 272 len += 2; | |
| 273 } else if( (inBuf[i] & 0xE0) == 0xC0 ) { | |
| 274 i += 2; | |
| 275 len += 2; | |
| 276 } else if( (inBuf[i] & 0xF0) == 0xE0 ) { | |
| 277 i += 3; | |
| 278 len += 2; | |
| 279 } else if( (inBuf[i] & 0xF8) == 0xF0 ) { | |
| 280 i += 4; | |
| 281 len += 4; | |
| 282 } else return PR_FALSE; | |
| 283 } | |
| 284 | |
| 285 if( len > maxOutBufLen ) { | |
| 286 *outBufLen = len; | |
| 287 return PR_FALSE; | |
| 288 } | |
| 289 | |
| 290 len = 0; | |
| 291 | |
| 292 for( i = 0; i < inBufLen; ) { | |
| 293 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen); | |
| 294 | |
| 295 if (ucs4 == BAD_UTF8) return PR_FALSE; | |
| 296 | |
| 297 if( ucs4 < 0x10000) { | |
| 298 outBuf[len+H_0] = (unsigned char)(ucs4 >> 8); | |
| 299 outBuf[len+H_1] = (unsigned char)ucs4; | |
| 300 len += 2; | |
| 301 } else { | |
| 302 ucs4 -= 0x10000; | |
| 303 outBuf[len+0+H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3)); | |
| 304 outBuf[len+0+H_1] = (unsigned char)(ucs4 >> 10); | |
| 305 outBuf[len+2+H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3)); | |
| 306 outBuf[len+2+H_1] = (unsigned char)ucs4; | |
| 307 len += 4; | |
| 308 } | |
| 309 } | |
| 310 | |
| 311 *outBufLen = len; | |
| 312 return PR_TRUE; | |
| 313 } else { | |
| 314 unsigned int i, len = 0; | |
| 315 PORT_Assert((inBufLen % 2) == 0); | |
| 316 if ((inBufLen % 2) != 0) { | |
| 317 *outBufLen = 0; | |
| 318 return PR_FALSE; | |
| 319 } | |
| 320 | |
| 321 for( i = 0; i < inBufLen; i += 2 ) { | |
| 322 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) len += 1; | |
| 323 else if( inBuf[i+H_0] < 0x08 ) len += 2; | |
| 324 else if( ((inBuf[i+0+H_0] & 0xFC) == 0xD8) ) { | |
| 325 if( ((inBufLen - i) > 2) && ((inBuf[i+2+H_0] & 0xFC) == 0xDC) ) { | |
| 326 i += 2; | |
| 327 len += 4; | |
| 328 } else { | |
| 329 return PR_FALSE; | |
| 330 } | |
| 331 } | |
| 332 else len += 3; | |
| 333 } | |
| 334 | |
| 335 if( len > maxOutBufLen ) { | |
| 336 *outBufLen = len; | |
| 337 return PR_FALSE; | |
| 338 } | |
| 339 | |
| 340 len = 0; | |
| 341 | |
| 342 for( i = 0; i < inBufLen; i += 2 ) { | |
| 343 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) { | |
| 344 /* 0000-007F -> 0xxxxxx */ | |
| 345 /* 00000000 0abcdefg -> 0abcdefg */ | |
| 346 | |
| 347 outBuf[len] = inBuf[i+H_1] & 0x7F; | |
| 348 | |
| 349 len += 1; | |
| 350 } else if( inBuf[i+H_0] < 0x08 ) { | |
| 351 /* 0080-07FF -> 110xxxxx 10xxxxxx */ | |
| 352 /* 00000abc defghijk -> 110abcde 10fghijk */ | |
| 353 | |
| 354 outBuf[len+0] = 0xC0 | ((inBuf[i+H_0] & 0x07) << 2) | |
| 355 | ((inBuf[i+H_1] & 0xC0) >> 6); | |
| 356 outBuf[len+1] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0); | |
| 357 | |
| 358 len += 2; | |
| 359 } else if( (inBuf[i+H_0] & 0xFC) == 0xD8 ) { | |
| 360 int abcde, BCDE; | |
| 361 | |
| 362 PORT_Assert(((inBufLen - i) > 2) && ((inBuf[i+2+H_0] & 0xFC) == 0xDC) ); | |
| 363 | |
| 364 /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ | |
| 365 /* 110110BC DEfghijk 110111lm nopqrstu -> | |
| 366 { Let abcde = BCDE + 1 } | |
| 367 11110abc 10defghi 10jklmno 10pqrstu */ | |
| 368 | |
| 369 BCDE = ((inBuf[i+H_0] & 0x03) << 2) | ((inBuf[i+H_1] & 0xC0) >> 6); | |
| 370 abcde = BCDE + 1; | |
| 371 | |
| 372 outBuf[len+0] = 0xF0 | ((abcde & 0x1C) >> 2); | |
| 373 outBuf[len+1] = 0x80 | ((abcde & 0x03) << 4) | |
| 374 | ((inBuf[i+0+H_1] & 0x3C) >> 2); | |
| 375 outBuf[len+2] = 0x80 | ((inBuf[i+0+H_1] & 0x03) << 4) | |
| 376 | ((inBuf[i+2+H_0] & 0x03) << 2) | |
| 377 | ((inBuf[i+2+H_1] & 0xC0) >> 6); | |
| 378 outBuf[len+3] = 0x80 | ((inBuf[i+2+H_1] & 0x3F) >> 0); | |
| 379 | |
| 380 i += 2; | |
| 381 len += 4; | |
| 382 } else { | |
| 383 /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ | |
| 384 /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */ | |
| 385 | |
| 386 outBuf[len+0] = 0xE0 | ((inBuf[i+H_0] & 0xF0) >> 4); | |
| 387 outBuf[len+1] = 0x80 | ((inBuf[i+H_0] & 0x0F) << 2) | |
| 388 | ((inBuf[i+H_1] & 0xC0) >> 6); | |
| 389 outBuf[len+2] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0); | |
| 390 | |
| 391 len += 3; | |
| 392 } | |
| 393 } | |
| 394 | |
| 395 *outBufLen = len; | |
| 396 return PR_TRUE; | |
| 397 } | |
| 398 } | |
| 399 | |
| 400 PRBool | |
| 401 sec_port_iso88591_utf8_conversion_function | |
| 402 ( | |
| 403 const unsigned char *inBuf, | |
| 404 unsigned int inBufLen, | |
| 405 unsigned char *outBuf, | |
| 406 unsigned int maxOutBufLen, | |
| 407 unsigned int *outBufLen | |
| 408 ) | |
| 409 { | |
| 410 unsigned int i, len = 0; | |
| 411 | |
| 412 PORT_Assert((unsigned int *)NULL != outBufLen); | |
| 413 | |
| 414 for( i = 0; i < inBufLen; i++) { | |
| 415 if( (inBuf[i] & 0x80) == 0x00 ) len += 1; | |
| 416 else len += 2; | |
| 417 } | |
| 418 | |
| 419 if( len > maxOutBufLen ) { | |
| 420 *outBufLen = len; | |
| 421 return PR_FALSE; | |
| 422 } | |
| 423 | |
| 424 len = 0; | |
| 425 | |
| 426 for( i = 0; i < inBufLen; i++) { | |
| 427 if( (inBuf[i] & 0x80) == 0x00 ) { | |
| 428 /* 00-7F -> 0xxxxxxx */ | |
| 429 /* 0abcdefg -> 0abcdefg */ | |
| 430 | |
| 431 outBuf[len] = inBuf[i]; | |
| 432 len += 1; | |
| 433 } else { | |
| 434 /* 80-FF <- 110xxxxx 10xxxxxx */ | |
| 435 /* 00000000 abcdefgh -> 110000ab 10cdefgh */ | |
| 436 | |
| 437 outBuf[len+0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6); | |
| 438 outBuf[len+1] = 0x80 | ((inBuf[i] & 0x3F) >> 0); | |
| 439 | |
| 440 len += 2; | |
| 441 } | |
| 442 } | |
| 443 | |
| 444 *outBufLen = len; | |
| 445 return PR_TRUE; | |
| 446 } | |
| 447 | |
| 448 #ifdef TEST_UTF8 | |
| 449 | |
| 450 #include <stdio.h> | |
| 451 #include <string.h> | |
| 452 #include <stdlib.h> | |
| 453 #include <netinet/in.h> /* for htonl and htons */ | |
| 454 | |
| 455 /* | |
| 456 * UCS-4 vectors | |
| 457 */ | |
| 458 | |
| 459 struct ucs4 { | |
| 460 PRUint32 c; | |
| 461 char *utf8; | |
| 462 }; | |
| 463 | |
| 464 /* | |
| 465 * UCS-2 vectors | |
| 466 */ | |
| 467 | |
| 468 struct ucs2 { | |
| 469 PRUint16 c; | |
| 470 char *utf8; | |
| 471 }; | |
| 472 | |
| 473 /* | |
| 474 * UTF-16 vectors | |
| 475 */ | |
| 476 | |
| 477 struct utf16 { | |
| 478 PRUint32 c; | |
| 479 PRUint16 w[2]; | |
| 480 }; | |
| 481 | |
| 482 | |
| 483 /* | |
| 484 * UCS-4 vectors | |
| 485 */ | |
| 486 | |
| 487 struct ucs4 ucs4[] = { | |
| 488 { 0x00000001, "\x01" }, | |
| 489 { 0x00000002, "\x02" }, | |
| 490 { 0x00000003, "\x03" }, | |
| 491 { 0x00000004, "\x04" }, | |
| 492 { 0x00000007, "\x07" }, | |
| 493 { 0x00000008, "\x08" }, | |
| 494 { 0x0000000F, "\x0F" }, | |
| 495 { 0x00000010, "\x10" }, | |
| 496 { 0x0000001F, "\x1F" }, | |
| 497 { 0x00000020, "\x20" }, | |
| 498 { 0x0000003F, "\x3F" }, | |
| 499 { 0x00000040, "\x40" }, | |
| 500 { 0x0000007F, "\x7F" }, | |
| 501 | |
| 502 { 0x00000080, "\xC2\x80" }, | |
| 503 { 0x00000081, "\xC2\x81" }, | |
| 504 { 0x00000082, "\xC2\x82" }, | |
| 505 { 0x00000084, "\xC2\x84" }, | |
| 506 { 0x00000088, "\xC2\x88" }, | |
| 507 { 0x00000090, "\xC2\x90" }, | |
| 508 { 0x000000A0, "\xC2\xA0" }, | |
| 509 { 0x000000C0, "\xC3\x80" }, | |
| 510 { 0x000000FF, "\xC3\xBF" }, | |
| 511 { 0x00000100, "\xC4\x80" }, | |
| 512 { 0x00000101, "\xC4\x81" }, | |
| 513 { 0x00000102, "\xC4\x82" }, | |
| 514 { 0x00000104, "\xC4\x84" }, | |
| 515 { 0x00000108, "\xC4\x88" }, | |
| 516 { 0x00000110, "\xC4\x90" }, | |
| 517 { 0x00000120, "\xC4\xA0" }, | |
| 518 { 0x00000140, "\xC5\x80" }, | |
| 519 { 0x00000180, "\xC6\x80" }, | |
| 520 { 0x000001FF, "\xC7\xBF" }, | |
| 521 { 0x00000200, "\xC8\x80" }, | |
| 522 { 0x00000201, "\xC8\x81" }, | |
| 523 { 0x00000202, "\xC8\x82" }, | |
| 524 { 0x00000204, "\xC8\x84" }, | |
| 525 { 0x00000208, "\xC8\x88" }, | |
| 526 { 0x00000210, "\xC8\x90" }, | |
| 527 { 0x00000220, "\xC8\xA0" }, | |
| 528 { 0x00000240, "\xC9\x80" }, | |
| 529 { 0x00000280, "\xCA\x80" }, | |
| 530 { 0x00000300, "\xCC\x80" }, | |
| 531 { 0x000003FF, "\xCF\xBF" }, | |
| 532 { 0x00000400, "\xD0\x80" }, | |
| 533 { 0x00000401, "\xD0\x81" }, | |
| 534 { 0x00000402, "\xD0\x82" }, | |
| 535 { 0x00000404, "\xD0\x84" }, | |
| 536 { 0x00000408, "\xD0\x88" }, | |
| 537 { 0x00000410, "\xD0\x90" }, | |
| 538 { 0x00000420, "\xD0\xA0" }, | |
| 539 { 0x00000440, "\xD1\x80" }, | |
| 540 { 0x00000480, "\xD2\x80" }, | |
| 541 { 0x00000500, "\xD4\x80" }, | |
| 542 { 0x00000600, "\xD8\x80" }, | |
| 543 { 0x000007FF, "\xDF\xBF" }, | |
| 544 | |
| 545 { 0x00000800, "\xE0\xA0\x80" }, | |
| 546 { 0x00000801, "\xE0\xA0\x81" }, | |
| 547 { 0x00000802, "\xE0\xA0\x82" }, | |
| 548 { 0x00000804, "\xE0\xA0\x84" }, | |
| 549 { 0x00000808, "\xE0\xA0\x88" }, | |
| 550 { 0x00000810, "\xE0\xA0\x90" }, | |
| 551 { 0x00000820, "\xE0\xA0\xA0" }, | |
| 552 { 0x00000840, "\xE0\xA1\x80" }, | |
| 553 { 0x00000880, "\xE0\xA2\x80" }, | |
| 554 { 0x00000900, "\xE0\xA4\x80" }, | |
| 555 { 0x00000A00, "\xE0\xA8\x80" }, | |
| 556 { 0x00000C00, "\xE0\xB0\x80" }, | |
| 557 { 0x00000FFF, "\xE0\xBF\xBF" }, | |
| 558 { 0x00001000, "\xE1\x80\x80" }, | |
| 559 { 0x00001001, "\xE1\x80\x81" }, | |
| 560 { 0x00001002, "\xE1\x80\x82" }, | |
| 561 { 0x00001004, "\xE1\x80\x84" }, | |
| 562 { 0x00001008, "\xE1\x80\x88" }, | |
| 563 { 0x00001010, "\xE1\x80\x90" }, | |
| 564 { 0x00001020, "\xE1\x80\xA0" }, | |
| 565 { 0x00001040, "\xE1\x81\x80" }, | |
| 566 { 0x00001080, "\xE1\x82\x80" }, | |
| 567 { 0x00001100, "\xE1\x84\x80" }, | |
| 568 { 0x00001200, "\xE1\x88\x80" }, | |
| 569 { 0x00001400, "\xE1\x90\x80" }, | |
| 570 { 0x00001800, "\xE1\xA0\x80" }, | |
| 571 { 0x00001FFF, "\xE1\xBF\xBF" }, | |
| 572 { 0x00002000, "\xE2\x80\x80" }, | |
| 573 { 0x00002001, "\xE2\x80\x81" }, | |
| 574 { 0x00002002, "\xE2\x80\x82" }, | |
| 575 { 0x00002004, "\xE2\x80\x84" }, | |
| 576 { 0x00002008, "\xE2\x80\x88" }, | |
| 577 { 0x00002010, "\xE2\x80\x90" }, | |
| 578 { 0x00002020, "\xE2\x80\xA0" }, | |
| 579 { 0x00002040, "\xE2\x81\x80" }, | |
| 580 { 0x00002080, "\xE2\x82\x80" }, | |
| 581 { 0x00002100, "\xE2\x84\x80" }, | |
| 582 { 0x00002200, "\xE2\x88\x80" }, | |
| 583 { 0x00002400, "\xE2\x90\x80" }, | |
| 584 { 0x00002800, "\xE2\xA0\x80" }, | |
| 585 { 0x00003000, "\xE3\x80\x80" }, | |
| 586 { 0x00003FFF, "\xE3\xBF\xBF" }, | |
| 587 { 0x00004000, "\xE4\x80\x80" }, | |
| 588 { 0x00004001, "\xE4\x80\x81" }, | |
| 589 { 0x00004002, "\xE4\x80\x82" }, | |
| 590 { 0x00004004, "\xE4\x80\x84" }, | |
| 591 { 0x00004008, "\xE4\x80\x88" }, | |
| 592 { 0x00004010, "\xE4\x80\x90" }, | |
| 593 { 0x00004020, "\xE4\x80\xA0" }, | |
| 594 { 0x00004040, "\xE4\x81\x80" }, | |
| 595 { 0x00004080, "\xE4\x82\x80" }, | |
| 596 { 0x00004100, "\xE4\x84\x80" }, | |
| 597 { 0x00004200, "\xE4\x88\x80" }, | |
| 598 { 0x00004400, "\xE4\x90\x80" }, | |
| 599 { 0x00004800, "\xE4\xA0\x80" }, | |
| 600 { 0x00005000, "\xE5\x80\x80" }, | |
| 601 { 0x00006000, "\xE6\x80\x80" }, | |
| 602 { 0x00007FFF, "\xE7\xBF\xBF" }, | |
| 603 { 0x00008000, "\xE8\x80\x80" }, | |
| 604 { 0x00008001, "\xE8\x80\x81" }, | |
| 605 { 0x00008002, "\xE8\x80\x82" }, | |
| 606 { 0x00008004, "\xE8\x80\x84" }, | |
| 607 { 0x00008008, "\xE8\x80\x88" }, | |
| 608 { 0x00008010, "\xE8\x80\x90" }, | |
| 609 { 0x00008020, "\xE8\x80\xA0" }, | |
| 610 { 0x00008040, "\xE8\x81\x80" }, | |
| 611 { 0x00008080, "\xE8\x82\x80" }, | |
| 612 { 0x00008100, "\xE8\x84\x80" }, | |
| 613 { 0x00008200, "\xE8\x88\x80" }, | |
| 614 { 0x00008400, "\xE8\x90\x80" }, | |
| 615 { 0x00008800, "\xE8\xA0\x80" }, | |
| 616 { 0x00009000, "\xE9\x80\x80" }, | |
| 617 { 0x0000A000, "\xEA\x80\x80" }, | |
| 618 { 0x0000C000, "\xEC\x80\x80" }, | |
| 619 { 0x0000FFFF, "\xEF\xBF\xBF" }, | |
| 620 | |
| 621 { 0x00010000, "\xF0\x90\x80\x80" }, | |
| 622 { 0x00010001, "\xF0\x90\x80\x81" }, | |
| 623 { 0x00010002, "\xF0\x90\x80\x82" }, | |
| 624 { 0x00010004, "\xF0\x90\x80\x84" }, | |
| 625 { 0x00010008, "\xF0\x90\x80\x88" }, | |
| 626 { 0x00010010, "\xF0\x90\x80\x90" }, | |
| 627 { 0x00010020, "\xF0\x90\x80\xA0" }, | |
| 628 { 0x00010040, "\xF0\x90\x81\x80" }, | |
| 629 { 0x00010080, "\xF0\x90\x82\x80" }, | |
| 630 { 0x00010100, "\xF0\x90\x84\x80" }, | |
| 631 { 0x00010200, "\xF0\x90\x88\x80" }, | |
| 632 { 0x00010400, "\xF0\x90\x90\x80" }, | |
| 633 { 0x00010800, "\xF0\x90\xA0\x80" }, | |
| 634 { 0x00011000, "\xF0\x91\x80\x80" }, | |
| 635 { 0x00012000, "\xF0\x92\x80\x80" }, | |
| 636 { 0x00014000, "\xF0\x94\x80\x80" }, | |
| 637 { 0x00018000, "\xF0\x98\x80\x80" }, | |
| 638 { 0x0001FFFF, "\xF0\x9F\xBF\xBF" }, | |
| 639 { 0x00020000, "\xF0\xA0\x80\x80" }, | |
| 640 { 0x00020001, "\xF0\xA0\x80\x81" }, | |
| 641 { 0x00020002, "\xF0\xA0\x80\x82" }, | |
| 642 { 0x00020004, "\xF0\xA0\x80\x84" }, | |
| 643 { 0x00020008, "\xF0\xA0\x80\x88" }, | |
| 644 { 0x00020010, "\xF0\xA0\x80\x90" }, | |
| 645 { 0x00020020, "\xF0\xA0\x80\xA0" }, | |
| 646 { 0x00020040, "\xF0\xA0\x81\x80" }, | |
| 647 { 0x00020080, "\xF0\xA0\x82\x80" }, | |
| 648 { 0x00020100, "\xF0\xA0\x84\x80" }, | |
| 649 { 0x00020200, "\xF0\xA0\x88\x80" }, | |
| 650 { 0x00020400, "\xF0\xA0\x90\x80" }, | |
| 651 { 0x00020800, "\xF0\xA0\xA0\x80" }, | |
| 652 { 0x00021000, "\xF0\xA1\x80\x80" }, | |
| 653 { 0x00022000, "\xF0\xA2\x80\x80" }, | |
| 654 { 0x00024000, "\xF0\xA4\x80\x80" }, | |
| 655 { 0x00028000, "\xF0\xA8\x80\x80" }, | |
| 656 { 0x00030000, "\xF0\xB0\x80\x80" }, | |
| 657 { 0x0003FFFF, "\xF0\xBF\xBF\xBF" }, | |
| 658 { 0x00040000, "\xF1\x80\x80\x80" }, | |
| 659 { 0x00040001, "\xF1\x80\x80\x81" }, | |
| 660 { 0x00040002, "\xF1\x80\x80\x82" }, | |
| 661 { 0x00040004, "\xF1\x80\x80\x84" }, | |
| 662 { 0x00040008, "\xF1\x80\x80\x88" }, | |
| 663 { 0x00040010, "\xF1\x80\x80\x90" }, | |
| 664 { 0x00040020, "\xF1\x80\x80\xA0" }, | |
| 665 { 0x00040040, "\xF1\x80\x81\x80" }, | |
| 666 { 0x00040080, "\xF1\x80\x82\x80" }, | |
| 667 { 0x00040100, "\xF1\x80\x84\x80" }, | |
| 668 { 0x00040200, "\xF1\x80\x88\x80" }, | |
| 669 { 0x00040400, "\xF1\x80\x90\x80" }, | |
| 670 { 0x00040800, "\xF1\x80\xA0\x80" }, | |
| 671 { 0x00041000, "\xF1\x81\x80\x80" }, | |
| 672 { 0x00042000, "\xF1\x82\x80\x80" }, | |
| 673 { 0x00044000, "\xF1\x84\x80\x80" }, | |
| 674 { 0x00048000, "\xF1\x88\x80\x80" }, | |
| 675 { 0x00050000, "\xF1\x90\x80\x80" }, | |
| 676 { 0x00060000, "\xF1\xA0\x80\x80" }, | |
| 677 { 0x0007FFFF, "\xF1\xBF\xBF\xBF" }, | |
| 678 { 0x00080000, "\xF2\x80\x80\x80" }, | |
| 679 { 0x00080001, "\xF2\x80\x80\x81" }, | |
| 680 { 0x00080002, "\xF2\x80\x80\x82" }, | |
| 681 { 0x00080004, "\xF2\x80\x80\x84" }, | |
| 682 { 0x00080008, "\xF2\x80\x80\x88" }, | |
| 683 { 0x00080010, "\xF2\x80\x80\x90" }, | |
| 684 { 0x00080020, "\xF2\x80\x80\xA0" }, | |
| 685 { 0x00080040, "\xF2\x80\x81\x80" }, | |
| 686 { 0x00080080, "\xF2\x80\x82\x80" }, | |
| 687 { 0x00080100, "\xF2\x80\x84\x80" }, | |
| 688 { 0x00080200, "\xF2\x80\x88\x80" }, | |
| 689 { 0x00080400, "\xF2\x80\x90\x80" }, | |
| 690 { 0x00080800, "\xF2\x80\xA0\x80" }, | |
| 691 { 0x00081000, "\xF2\x81\x80\x80" }, | |
| 692 { 0x00082000, "\xF2\x82\x80\x80" }, | |
| 693 { 0x00084000, "\xF2\x84\x80\x80" }, | |
| 694 { 0x00088000, "\xF2\x88\x80\x80" }, | |
| 695 { 0x00090000, "\xF2\x90\x80\x80" }, | |
| 696 { 0x000A0000, "\xF2\xA0\x80\x80" }, | |
| 697 { 0x000C0000, "\xF3\x80\x80\x80" }, | |
| 698 { 0x000FFFFF, "\xF3\xBF\xBF\xBF" }, | |
| 699 { 0x00100000, "\xF4\x80\x80\x80" }, | |
| 700 { 0x00100001, "\xF4\x80\x80\x81" }, | |
| 701 { 0x00100002, "\xF4\x80\x80\x82" }, | |
| 702 { 0x00100004, "\xF4\x80\x80\x84" }, | |
| 703 { 0x00100008, "\xF4\x80\x80\x88" }, | |
| 704 { 0x00100010, "\xF4\x80\x80\x90" }, | |
| 705 { 0x00100020, "\xF4\x80\x80\xA0" }, | |
| 706 { 0x00100040, "\xF4\x80\x81\x80" }, | |
| 707 { 0x00100080, "\xF4\x80\x82\x80" }, | |
| 708 { 0x00100100, "\xF4\x80\x84\x80" }, | |
| 709 { 0x00100200, "\xF4\x80\x88\x80" }, | |
| 710 { 0x00100400, "\xF4\x80\x90\x80" }, | |
| 711 { 0x00100800, "\xF4\x80\xA0\x80" }, | |
| 712 { 0x00101000, "\xF4\x81\x80\x80" }, | |
| 713 { 0x00102000, "\xF4\x82\x80\x80" }, | |
| 714 { 0x00104000, "\xF4\x84\x80\x80" }, | |
| 715 { 0x00108000, "\xF4\x88\x80\x80" }, | |
| 716 { 0x0010FFFF, "\xF4\x8F\xBF\xBF" }, | |
| 717 }; | |
| 718 | |
| 719 /* | |
| 720 * UCS-2 vectors | |
| 721 */ | |
| 722 | |
| 723 struct ucs2 ucs2[] = { | |
| 724 { 0x0001, "\x01" }, | |
| 725 { 0x0002, "\x02" }, | |
| 726 { 0x0003, "\x03" }, | |
| 727 { 0x0004, "\x04" }, | |
| 728 { 0x0007, "\x07" }, | |
| 729 { 0x0008, "\x08" }, | |
| 730 { 0x000F, "\x0F" }, | |
| 731 { 0x0010, "\x10" }, | |
| 732 { 0x001F, "\x1F" }, | |
| 733 { 0x0020, "\x20" }, | |
| 734 { 0x003F, "\x3F" }, | |
| 735 { 0x0040, "\x40" }, | |
| 736 { 0x007F, "\x7F" }, | |
| 737 | |
| 738 { 0x0080, "\xC2\x80" }, | |
| 739 { 0x0081, "\xC2\x81" }, | |
| 740 { 0x0082, "\xC2\x82" }, | |
| 741 { 0x0084, "\xC2\x84" }, | |
| 742 { 0x0088, "\xC2\x88" }, | |
| 743 { 0x0090, "\xC2\x90" }, | |
| 744 { 0x00A0, "\xC2\xA0" }, | |
| 745 { 0x00C0, "\xC3\x80" }, | |
| 746 { 0x00FF, "\xC3\xBF" }, | |
| 747 { 0x0100, "\xC4\x80" }, | |
| 748 { 0x0101, "\xC4\x81" }, | |
| 749 { 0x0102, "\xC4\x82" }, | |
| 750 { 0x0104, "\xC4\x84" }, | |
| 751 { 0x0108, "\xC4\x88" }, | |
| 752 { 0x0110, "\xC4\x90" }, | |
| 753 { 0x0120, "\xC4\xA0" }, | |
| 754 { 0x0140, "\xC5\x80" }, | |
| 755 { 0x0180, "\xC6\x80" }, | |
| 756 { 0x01FF, "\xC7\xBF" }, | |
| 757 { 0x0200, "\xC8\x80" }, | |
| 758 { 0x0201, "\xC8\x81" }, | |
| 759 { 0x0202, "\xC8\x82" }, | |
| 760 { 0x0204, "\xC8\x84" }, | |
| 761 { 0x0208, "\xC8\x88" }, | |
| 762 { 0x0210, "\xC8\x90" }, | |
| 763 { 0x0220, "\xC8\xA0" }, | |
| 764 { 0x0240, "\xC9\x80" }, | |
| 765 { 0x0280, "\xCA\x80" }, | |
| 766 { 0x0300, "\xCC\x80" }, | |
| 767 { 0x03FF, "\xCF\xBF" }, | |
| 768 { 0x0400, "\xD0\x80" }, | |
| 769 { 0x0401, "\xD0\x81" }, | |
| 770 { 0x0402, "\xD0\x82" }, | |
| 771 { 0x0404, "\xD0\x84" }, | |
| 772 { 0x0408, "\xD0\x88" }, | |
| 773 { 0x0410, "\xD0\x90" }, | |
| 774 { 0x0420, "\xD0\xA0" }, | |
| 775 { 0x0440, "\xD1\x80" }, | |
| 776 { 0x0480, "\xD2\x80" }, | |
| 777 { 0x0500, "\xD4\x80" }, | |
| 778 { 0x0600, "\xD8\x80" }, | |
| 779 { 0x07FF, "\xDF\xBF" }, | |
| 780 | |
| 781 { 0x0800, "\xE0\xA0\x80" }, | |
| 782 { 0x0801, "\xE0\xA0\x81" }, | |
| 783 { 0x0802, "\xE0\xA0\x82" }, | |
| 784 { 0x0804, "\xE0\xA0\x84" }, | |
| 785 { 0x0808, "\xE0\xA0\x88" }, | |
| 786 { 0x0810, "\xE0\xA0\x90" }, | |
| 787 { 0x0820, "\xE0\xA0\xA0" }, | |
| 788 { 0x0840, "\xE0\xA1\x80" }, | |
| 789 { 0x0880, "\xE0\xA2\x80" }, | |
| 790 { 0x0900, "\xE0\xA4\x80" }, | |
| 791 { 0x0A00, "\xE0\xA8\x80" }, | |
| 792 { 0x0C00, "\xE0\xB0\x80" }, | |
| 793 { 0x0FFF, "\xE0\xBF\xBF" }, | |
| 794 { 0x1000, "\xE1\x80\x80" }, | |
| 795 { 0x1001, "\xE1\x80\x81" }, | |
| 796 { 0x1002, "\xE1\x80\x82" }, | |
| 797 { 0x1004, "\xE1\x80\x84" }, | |
| 798 { 0x1008, "\xE1\x80\x88" }, | |
| 799 { 0x1010, "\xE1\x80\x90" }, | |
| 800 { 0x1020, "\xE1\x80\xA0" }, | |
| 801 { 0x1040, "\xE1\x81\x80" }, | |
| 802 { 0x1080, "\xE1\x82\x80" }, | |
| 803 { 0x1100, "\xE1\x84\x80" }, | |
| 804 { 0x1200, "\xE1\x88\x80" }, | |
| 805 { 0x1400, "\xE1\x90\x80" }, | |
| 806 { 0x1800, "\xE1\xA0\x80" }, | |
| 807 { 0x1FFF, "\xE1\xBF\xBF" }, | |
| 808 { 0x2000, "\xE2\x80\x80" }, | |
| 809 { 0x2001, "\xE2\x80\x81" }, | |
| 810 { 0x2002, "\xE2\x80\x82" }, | |
| 811 { 0x2004, "\xE2\x80\x84" }, | |
| 812 { 0x2008, "\xE2\x80\x88" }, | |
| 813 { 0x2010, "\xE2\x80\x90" }, | |
| 814 { 0x2020, "\xE2\x80\xA0" }, | |
| 815 { 0x2040, "\xE2\x81\x80" }, | |
| 816 { 0x2080, "\xE2\x82\x80" }, | |
| 817 { 0x2100, "\xE2\x84\x80" }, | |
| 818 { 0x2200, "\xE2\x88\x80" }, | |
| 819 { 0x2400, "\xE2\x90\x80" }, | |
| 820 { 0x2800, "\xE2\xA0\x80" }, | |
| 821 { 0x3000, "\xE3\x80\x80" }, | |
| 822 { 0x3FFF, "\xE3\xBF\xBF" }, | |
| 823 { 0x4000, "\xE4\x80\x80" }, | |
| 824 { 0x4001, "\xE4\x80\x81" }, | |
| 825 { 0x4002, "\xE4\x80\x82" }, | |
| 826 { 0x4004, "\xE4\x80\x84" }, | |
| 827 { 0x4008, "\xE4\x80\x88" }, | |
| 828 { 0x4010, "\xE4\x80\x90" }, | |
| 829 { 0x4020, "\xE4\x80\xA0" }, | |
| 830 { 0x4040, "\xE4\x81\x80" }, | |
| 831 { 0x4080, "\xE4\x82\x80" }, | |
| 832 { 0x4100, "\xE4\x84\x80" }, | |
| 833 { 0x4200, "\xE4\x88\x80" }, | |
| 834 { 0x4400, "\xE4\x90\x80" }, | |
| 835 { 0x4800, "\xE4\xA0\x80" }, | |
| 836 { 0x5000, "\xE5\x80\x80" }, | |
| 837 { 0x6000, "\xE6\x80\x80" }, | |
| 838 { 0x7FFF, "\xE7\xBF\xBF" }, | |
| 839 { 0x8000, "\xE8\x80\x80" }, | |
| 840 { 0x8001, "\xE8\x80\x81" }, | |
| 841 { 0x8002, "\xE8\x80\x82" }, | |
| 842 { 0x8004, "\xE8\x80\x84" }, | |
| 843 { 0x8008, "\xE8\x80\x88" }, | |
| 844 { 0x8010, "\xE8\x80\x90" }, | |
| 845 { 0x8020, "\xE8\x80\xA0" }, | |
| 846 { 0x8040, "\xE8\x81\x80" }, | |
| 847 { 0x8080, "\xE8\x82\x80" }, | |
| 848 { 0x8100, "\xE8\x84\x80" }, | |
| 849 { 0x8200, "\xE8\x88\x80" }, | |
| 850 { 0x8400, "\xE8\x90\x80" }, | |
| 851 { 0x8800, "\xE8\xA0\x80" }, | |
| 852 { 0x9000, "\xE9\x80\x80" }, | |
| 853 { 0xA000, "\xEA\x80\x80" }, | |
| 854 { 0xC000, "\xEC\x80\x80" }, | |
| 855 { 0xFB01, "\xEF\xAC\x81" }, | |
| 856 { 0xFFFF, "\xEF\xBF\xBF" } | |
| 857 | |
| 858 }; | |
| 859 | |
| 860 /* | |
| 861 * UTF-16 vectors | |
| 862 */ | |
| 863 | |
| 864 struct utf16 utf16[] = { | |
| 865 { 0x00010000, { 0xD800, 0xDC00 } }, | |
| 866 { 0x00010001, { 0xD800, 0xDC01 } }, | |
| 867 { 0x00010002, { 0xD800, 0xDC02 } }, | |
| 868 { 0x00010003, { 0xD800, 0xDC03 } }, | |
| 869 { 0x00010004, { 0xD800, 0xDC04 } }, | |
| 870 { 0x00010007, { 0xD800, 0xDC07 } }, | |
| 871 { 0x00010008, { 0xD800, 0xDC08 } }, | |
| 872 { 0x0001000F, { 0xD800, 0xDC0F } }, | |
| 873 { 0x00010010, { 0xD800, 0xDC10 } }, | |
| 874 { 0x0001001F, { 0xD800, 0xDC1F } }, | |
| 875 { 0x00010020, { 0xD800, 0xDC20 } }, | |
| 876 { 0x0001003F, { 0xD800, 0xDC3F } }, | |
| 877 { 0x00010040, { 0xD800, 0xDC40 } }, | |
| 878 { 0x0001007F, { 0xD800, 0xDC7F } }, | |
| 879 { 0x00010080, { 0xD800, 0xDC80 } }, | |
| 880 { 0x00010081, { 0xD800, 0xDC81 } }, | |
| 881 { 0x00010082, { 0xD800, 0xDC82 } }, | |
| 882 { 0x00010084, { 0xD800, 0xDC84 } }, | |
| 883 { 0x00010088, { 0xD800, 0xDC88 } }, | |
| 884 { 0x00010090, { 0xD800, 0xDC90 } }, | |
| 885 { 0x000100A0, { 0xD800, 0xDCA0 } }, | |
| 886 { 0x000100C0, { 0xD800, 0xDCC0 } }, | |
| 887 { 0x000100FF, { 0xD800, 0xDCFF } }, | |
| 888 { 0x00010100, { 0xD800, 0xDD00 } }, | |
| 889 { 0x00010101, { 0xD800, 0xDD01 } }, | |
| 890 { 0x00010102, { 0xD800, 0xDD02 } }, | |
| 891 { 0x00010104, { 0xD800, 0xDD04 } }, | |
| 892 { 0x00010108, { 0xD800, 0xDD08 } }, | |
| 893 { 0x00010110, { 0xD800, 0xDD10 } }, | |
| 894 { 0x00010120, { 0xD800, 0xDD20 } }, | |
| 895 { 0x00010140, { 0xD800, 0xDD40 } }, | |
| 896 { 0x00010180, { 0xD800, 0xDD80 } }, | |
| 897 { 0x000101FF, { 0xD800, 0xDDFF } }, | |
| 898 { 0x00010200, { 0xD800, 0xDE00 } }, | |
| 899 { 0x00010201, { 0xD800, 0xDE01 } }, | |
| 900 { 0x00010202, { 0xD800, 0xDE02 } }, | |
| 901 { 0x00010204, { 0xD800, 0xDE04 } }, | |
| 902 { 0x00010208, { 0xD800, 0xDE08 } }, | |
| 903 { 0x00010210, { 0xD800, 0xDE10 } }, | |
| 904 { 0x00010220, { 0xD800, 0xDE20 } }, | |
| 905 { 0x00010240, { 0xD800, 0xDE40 } }, | |
| 906 { 0x00010280, { 0xD800, 0xDE80 } }, | |
| 907 { 0x00010300, { 0xD800, 0xDF00 } }, | |
| 908 { 0x000103FF, { 0xD800, 0xDFFF } }, | |
| 909 { 0x00010400, { 0xD801, 0xDC00 } }, | |
| 910 { 0x00010401, { 0xD801, 0xDC01 } }, | |
| 911 { 0x00010402, { 0xD801, 0xDC02 } }, | |
| 912 { 0x00010404, { 0xD801, 0xDC04 } }, | |
| 913 { 0x00010408, { 0xD801, 0xDC08 } }, | |
| 914 { 0x00010410, { 0xD801, 0xDC10 } }, | |
| 915 { 0x00010420, { 0xD801, 0xDC20 } }, | |
| 916 { 0x00010440, { 0xD801, 0xDC40 } }, | |
| 917 { 0x00010480, { 0xD801, 0xDC80 } }, | |
| 918 { 0x00010500, { 0xD801, 0xDD00 } }, | |
| 919 { 0x00010600, { 0xD801, 0xDE00 } }, | |
| 920 { 0x000107FF, { 0xD801, 0xDFFF } }, | |
| 921 { 0x00010800, { 0xD802, 0xDC00 } }, | |
| 922 { 0x00010801, { 0xD802, 0xDC01 } }, | |
| 923 { 0x00010802, { 0xD802, 0xDC02 } }, | |
| 924 { 0x00010804, { 0xD802, 0xDC04 } }, | |
| 925 { 0x00010808, { 0xD802, 0xDC08 } }, | |
| 926 { 0x00010810, { 0xD802, 0xDC10 } }, | |
| 927 { 0x00010820, { 0xD802, 0xDC20 } }, | |
| 928 { 0x00010840, { 0xD802, 0xDC40 } }, | |
| 929 { 0x00010880, { 0xD802, 0xDC80 } }, | |
| 930 { 0x00010900, { 0xD802, 0xDD00 } }, | |
| 931 { 0x00010A00, { 0xD802, 0xDE00 } }, | |
| 932 { 0x00010C00, { 0xD803, 0xDC00 } }, | |
| 933 { 0x00010FFF, { 0xD803, 0xDFFF } }, | |
| 934 { 0x00011000, { 0xD804, 0xDC00 } }, | |
| 935 { 0x00011001, { 0xD804, 0xDC01 } }, | |
| 936 { 0x00011002, { 0xD804, 0xDC02 } }, | |
| 937 { 0x00011004, { 0xD804, 0xDC04 } }, | |
| 938 { 0x00011008, { 0xD804, 0xDC08 } }, | |
| 939 { 0x00011010, { 0xD804, 0xDC10 } }, | |
| 940 { 0x00011020, { 0xD804, 0xDC20 } }, | |
| 941 { 0x00011040, { 0xD804, 0xDC40 } }, | |
| 942 { 0x00011080, { 0xD804, 0xDC80 } }, | |
| 943 { 0x00011100, { 0xD804, 0xDD00 } }, | |
| 944 { 0x00011200, { 0xD804, 0xDE00 } }, | |
| 945 { 0x00011400, { 0xD805, 0xDC00 } }, | |
| 946 { 0x00011800, { 0xD806, 0xDC00 } }, | |
| 947 { 0x00011FFF, { 0xD807, 0xDFFF } }, | |
| 948 { 0x00012000, { 0xD808, 0xDC00 } }, | |
| 949 { 0x00012001, { 0xD808, 0xDC01 } }, | |
| 950 { 0x00012002, { 0xD808, 0xDC02 } }, | |
| 951 { 0x00012004, { 0xD808, 0xDC04 } }, | |
| 952 { 0x00012008, { 0xD808, 0xDC08 } }, | |
| 953 { 0x00012010, { 0xD808, 0xDC10 } }, | |
| 954 { 0x00012020, { 0xD808, 0xDC20 } }, | |
| 955 { 0x00012040, { 0xD808, 0xDC40 } }, | |
| 956 { 0x00012080, { 0xD808, 0xDC80 } }, | |
| 957 { 0x00012100, { 0xD808, 0xDD00 } }, | |
| 958 { 0x00012200, { 0xD808, 0xDE00 } }, | |
| 959 { 0x00012400, { 0xD809, 0xDC00 } }, | |
| 960 { 0x00012800, { 0xD80A, 0xDC00 } }, | |
| 961 { 0x00013000, { 0xD80C, 0xDC00 } }, | |
| 962 { 0x00013FFF, { 0xD80F, 0xDFFF } }, | |
| 963 { 0x00014000, { 0xD810, 0xDC00 } }, | |
| 964 { 0x00014001, { 0xD810, 0xDC01 } }, | |
| 965 { 0x00014002, { 0xD810, 0xDC02 } }, | |
| 966 { 0x00014004, { 0xD810, 0xDC04 } }, | |
| 967 { 0x00014008, { 0xD810, 0xDC08 } }, | |
| 968 { 0x00014010, { 0xD810, 0xDC10 } }, | |
| 969 { 0x00014020, { 0xD810, 0xDC20 } }, | |
| 970 { 0x00014040, { 0xD810, 0xDC40 } }, | |
| 971 { 0x00014080, { 0xD810, 0xDC80 } }, | |
| 972 { 0x00014100, { 0xD810, 0xDD00 } }, | |
| 973 { 0x00014200, { 0xD810, 0xDE00 } }, | |
| 974 { 0x00014400, { 0xD811, 0xDC00 } }, | |
| 975 { 0x00014800, { 0xD812, 0xDC00 } }, | |
| 976 { 0x00015000, { 0xD814, 0xDC00 } }, | |
| 977 { 0x00016000, { 0xD818, 0xDC00 } }, | |
| 978 { 0x00017FFF, { 0xD81F, 0xDFFF } }, | |
| 979 { 0x00018000, { 0xD820, 0xDC00 } }, | |
| 980 { 0x00018001, { 0xD820, 0xDC01 } }, | |
| 981 { 0x00018002, { 0xD820, 0xDC02 } }, | |
| 982 { 0x00018004, { 0xD820, 0xDC04 } }, | |
| 983 { 0x00018008, { 0xD820, 0xDC08 } }, | |
| 984 { 0x00018010, { 0xD820, 0xDC10 } }, | |
| 985 { 0x00018020, { 0xD820, 0xDC20 } }, | |
| 986 { 0x00018040, { 0xD820, 0xDC40 } }, | |
| 987 { 0x00018080, { 0xD820, 0xDC80 } }, | |
| 988 { 0x00018100, { 0xD820, 0xDD00 } }, | |
| 989 { 0x00018200, { 0xD820, 0xDE00 } }, | |
| 990 { 0x00018400, { 0xD821, 0xDC00 } }, | |
| 991 { 0x00018800, { 0xD822, 0xDC00 } }, | |
| 992 { 0x00019000, { 0xD824, 0xDC00 } }, | |
| 993 { 0x0001A000, { 0xD828, 0xDC00 } }, | |
| 994 { 0x0001C000, { 0xD830, 0xDC00 } }, | |
| 995 { 0x0001FFFF, { 0xD83F, 0xDFFF } }, | |
| 996 { 0x00020000, { 0xD840, 0xDC00 } }, | |
| 997 { 0x00020001, { 0xD840, 0xDC01 } }, | |
| 998 { 0x00020002, { 0xD840, 0xDC02 } }, | |
| 999 { 0x00020004, { 0xD840, 0xDC04 } }, | |
| 1000 { 0x00020008, { 0xD840, 0xDC08 } }, | |
| 1001 { 0x00020010, { 0xD840, 0xDC10 } }, | |
| 1002 { 0x00020020, { 0xD840, 0xDC20 } }, | |
| 1003 { 0x00020040, { 0xD840, 0xDC40 } }, | |
| 1004 { 0x00020080, { 0xD840, 0xDC80 } }, | |
| 1005 { 0x00020100, { 0xD840, 0xDD00 } }, | |
| 1006 { 0x00020200, { 0xD840, 0xDE00 } }, | |
| 1007 { 0x00020400, { 0xD841, 0xDC00 } }, | |
| 1008 { 0x00020800, { 0xD842, 0xDC00 } }, | |
| 1009 { 0x00021000, { 0xD844, 0xDC00 } }, | |
| 1010 { 0x00022000, { 0xD848, 0xDC00 } }, | |
| 1011 { 0x00024000, { 0xD850, 0xDC00 } }, | |
| 1012 { 0x00028000, { 0xD860, 0xDC00 } }, | |
| 1013 { 0x0002FFFF, { 0xD87F, 0xDFFF } }, | |
| 1014 { 0x00030000, { 0xD880, 0xDC00 } }, | |
| 1015 { 0x00030001, { 0xD880, 0xDC01 } }, | |
| 1016 { 0x00030002, { 0xD880, 0xDC02 } }, | |
| 1017 { 0x00030004, { 0xD880, 0xDC04 } }, | |
| 1018 { 0x00030008, { 0xD880, 0xDC08 } }, | |
| 1019 { 0x00030010, { 0xD880, 0xDC10 } }, | |
| 1020 { 0x00030020, { 0xD880, 0xDC20 } }, | |
| 1021 { 0x00030040, { 0xD880, 0xDC40 } }, | |
| 1022 { 0x00030080, { 0xD880, 0xDC80 } }, | |
| 1023 { 0x00030100, { 0xD880, 0xDD00 } }, | |
| 1024 { 0x00030200, { 0xD880, 0xDE00 } }, | |
| 1025 { 0x00030400, { 0xD881, 0xDC00 } }, | |
| 1026 { 0x00030800, { 0xD882, 0xDC00 } }, | |
| 1027 { 0x00031000, { 0xD884, 0xDC00 } }, | |
| 1028 { 0x00032000, { 0xD888, 0xDC00 } }, | |
| 1029 { 0x00034000, { 0xD890, 0xDC00 } }, | |
| 1030 { 0x00038000, { 0xD8A0, 0xDC00 } }, | |
| 1031 { 0x0003FFFF, { 0xD8BF, 0xDFFF } }, | |
| 1032 { 0x00040000, { 0xD8C0, 0xDC00 } }, | |
| 1033 { 0x00040001, { 0xD8C0, 0xDC01 } }, | |
| 1034 { 0x00040002, { 0xD8C0, 0xDC02 } }, | |
| 1035 { 0x00040004, { 0xD8C0, 0xDC04 } }, | |
| 1036 { 0x00040008, { 0xD8C0, 0xDC08 } }, | |
| 1037 { 0x00040010, { 0xD8C0, 0xDC10 } }, | |
| 1038 { 0x00040020, { 0xD8C0, 0xDC20 } }, | |
| 1039 { 0x00040040, { 0xD8C0, 0xDC40 } }, | |
| 1040 { 0x00040080, { 0xD8C0, 0xDC80 } }, | |
| 1041 { 0x00040100, { 0xD8C0, 0xDD00 } }, | |
| 1042 { 0x00040200, { 0xD8C0, 0xDE00 } }, | |
| 1043 { 0x00040400, { 0xD8C1, 0xDC00 } }, | |
| 1044 { 0x00040800, { 0xD8C2, 0xDC00 } }, | |
| 1045 { 0x00041000, { 0xD8C4, 0xDC00 } }, | |
| 1046 { 0x00042000, { 0xD8C8, 0xDC00 } }, | |
| 1047 { 0x00044000, { 0xD8D0, 0xDC00 } }, | |
| 1048 { 0x00048000, { 0xD8E0, 0xDC00 } }, | |
| 1049 { 0x0004FFFF, { 0xD8FF, 0xDFFF } }, | |
| 1050 { 0x00050000, { 0xD900, 0xDC00 } }, | |
| 1051 { 0x00050001, { 0xD900, 0xDC01 } }, | |
| 1052 { 0x00050002, { 0xD900, 0xDC02 } }, | |
| 1053 { 0x00050004, { 0xD900, 0xDC04 } }, | |
| 1054 { 0x00050008, { 0xD900, 0xDC08 } }, | |
| 1055 { 0x00050010, { 0xD900, 0xDC10 } }, | |
| 1056 { 0x00050020, { 0xD900, 0xDC20 } }, | |
| 1057 { 0x00050040, { 0xD900, 0xDC40 } }, | |
| 1058 { 0x00050080, { 0xD900, 0xDC80 } }, | |
| 1059 { 0x00050100, { 0xD900, 0xDD00 } }, | |
| 1060 { 0x00050200, { 0xD900, 0xDE00 } }, | |
| 1061 { 0x00050400, { 0xD901, 0xDC00 } }, | |
| 1062 { 0x00050800, { 0xD902, 0xDC00 } }, | |
| 1063 { 0x00051000, { 0xD904, 0xDC00 } }, | |
| 1064 { 0x00052000, { 0xD908, 0xDC00 } }, | |
| 1065 { 0x00054000, { 0xD910, 0xDC00 } }, | |
| 1066 { 0x00058000, { 0xD920, 0xDC00 } }, | |
| 1067 { 0x00060000, { 0xD940, 0xDC00 } }, | |
| 1068 { 0x00070000, { 0xD980, 0xDC00 } }, | |
| 1069 { 0x0007FFFF, { 0xD9BF, 0xDFFF } }, | |
| 1070 { 0x00080000, { 0xD9C0, 0xDC00 } }, | |
| 1071 { 0x00080001, { 0xD9C0, 0xDC01 } }, | |
| 1072 { 0x00080002, { 0xD9C0, 0xDC02 } }, | |
| 1073 { 0x00080004, { 0xD9C0, 0xDC04 } }, | |
| 1074 { 0x00080008, { 0xD9C0, 0xDC08 } }, | |
| 1075 { 0x00080010, { 0xD9C0, 0xDC10 } }, | |
| 1076 { 0x00080020, { 0xD9C0, 0xDC20 } }, | |
| 1077 { 0x00080040, { 0xD9C0, 0xDC40 } }, | |
| 1078 { 0x00080080, { 0xD9C0, 0xDC80 } }, | |
| 1079 { 0x00080100, { 0xD9C0, 0xDD00 } }, | |
| 1080 { 0x00080200, { 0xD9C0, 0xDE00 } }, | |
| 1081 { 0x00080400, { 0xD9C1, 0xDC00 } }, | |
| 1082 { 0x00080800, { 0xD9C2, 0xDC00 } }, | |
| 1083 { 0x00081000, { 0xD9C4, 0xDC00 } }, | |
| 1084 { 0x00082000, { 0xD9C8, 0xDC00 } }, | |
| 1085 { 0x00084000, { 0xD9D0, 0xDC00 } }, | |
| 1086 { 0x00088000, { 0xD9E0, 0xDC00 } }, | |
| 1087 { 0x0008FFFF, { 0xD9FF, 0xDFFF } }, | |
| 1088 { 0x00090000, { 0xDA00, 0xDC00 } }, | |
| 1089 { 0x00090001, { 0xDA00, 0xDC01 } }, | |
| 1090 { 0x00090002, { 0xDA00, 0xDC02 } }, | |
| 1091 { 0x00090004, { 0xDA00, 0xDC04 } }, | |
| 1092 { 0x00090008, { 0xDA00, 0xDC08 } }, | |
| 1093 { 0x00090010, { 0xDA00, 0xDC10 } }, | |
| 1094 { 0x00090020, { 0xDA00, 0xDC20 } }, | |
| 1095 { 0x00090040, { 0xDA00, 0xDC40 } }, | |
| 1096 { 0x00090080, { 0xDA00, 0xDC80 } }, | |
| 1097 { 0x00090100, { 0xDA00, 0xDD00 } }, | |
| 1098 { 0x00090200, { 0xDA00, 0xDE00 } }, | |
| 1099 { 0x00090400, { 0xDA01, 0xDC00 } }, | |
| 1100 { 0x00090800, { 0xDA02, 0xDC00 } }, | |
| 1101 { 0x00091000, { 0xDA04, 0xDC00 } }, | |
| 1102 { 0x00092000, { 0xDA08, 0xDC00 } }, | |
| 1103 { 0x00094000, { 0xDA10, 0xDC00 } }, | |
| 1104 { 0x00098000, { 0xDA20, 0xDC00 } }, | |
| 1105 { 0x000A0000, { 0xDA40, 0xDC00 } }, | |
| 1106 { 0x000B0000, { 0xDA80, 0xDC00 } }, | |
| 1107 { 0x000C0000, { 0xDAC0, 0xDC00 } }, | |
| 1108 { 0x000D0000, { 0xDB00, 0xDC00 } }, | |
| 1109 { 0x000FFFFF, { 0xDBBF, 0xDFFF } }, | |
| 1110 { 0x0010FFFF, { 0xDBFF, 0xDFFF } } | |
| 1111 | |
| 1112 }; | |
| 1113 | |
| 1114 /* illegal utf8 sequences */ | |
| 1115 char *utf8_bad[] = { | |
| 1116 "\xC0\x80", | |
| 1117 "\xC1\xBF", | |
| 1118 "\xE0\x80\x80", | |
| 1119 "\xE0\x9F\xBF", | |
| 1120 "\xF0\x80\x80\x80", | |
| 1121 "\xF0\x8F\xBF\xBF", | |
| 1122 "\xF4\x90\x80\x80", | |
| 1123 "\xF7\xBF\xBF\xBF", | |
| 1124 "\xF8\x80\x80\x80\x80", | |
| 1125 "\xF8\x88\x80\x80\x80", | |
| 1126 "\xF8\x92\x80\x80\x80", | |
| 1127 "\xF8\x9F\xBF\xBF\xBF", | |
| 1128 "\xF8\xA0\x80\x80\x80", | |
| 1129 "\xF8\xA8\x80\x80\x80", | |
| 1130 "\xF8\xB0\x80\x80\x80", | |
| 1131 "\xF8\xBF\xBF\xBF\xBF", | |
| 1132 "\xF9\x80\x80\x80\x88", | |
| 1133 "\xF9\x84\x80\x80\x80", | |
| 1134 "\xF9\xBF\xBF\xBF\xBF", | |
| 1135 "\xFA\x80\x80\x80\x80", | |
| 1136 "\xFA\x90\x80\x80\x80", | |
| 1137 "\xFB\xBF\xBF\xBF\xBF", | |
| 1138 "\xFC\x84\x80\x80\x80\x81", | |
| 1139 "\xFC\x85\x80\x80\x80\x80", | |
| 1140 "\xFC\x86\x80\x80\x80\x80", | |
| 1141 "\xFC\x87\xBF\xBF\xBF\xBF", | |
| 1142 "\xFC\x88\xA0\x80\x80\x80", | |
| 1143 "\xFC\x89\x80\x80\x80\x80", | |
| 1144 "\xFC\x8A\x80\x80\x80\x80", | |
| 1145 "\xFC\x90\x80\x80\x80\x82", | |
| 1146 "\xFD\x80\x80\x80\x80\x80", | |
| 1147 "\xFD\xBF\xBF\xBF\xBF\xBF", | |
| 1148 "\x80", | |
| 1149 "\xC3", | |
| 1150 "\xC3\xC3\x80", | |
| 1151 "\xED\xA0\x80", | |
| 1152 "\xED\xBF\x80", | |
| 1153 "\xED\xBF\xBF", | |
| 1154 "\xED\xA0\x80\xE0\xBF\xBF", | |
| 1155 }; | |
| 1156 | |
| 1157 /* illegal UTF-16 sequences, 0-terminated */ | |
| 1158 uint16_t utf16_bad[][3] = { | |
| 1159 /* leading surrogate not followed by trailing surrogate */ | |
| 1160 { 0xD800, 0, 0 }, | |
| 1161 { 0xD800, 0x41, 0 }, | |
| 1162 { 0xD800, 0xfe, 0 }, | |
| 1163 { 0xD800, 0x3bb, 0 }, | |
| 1164 { 0xD800, 0xD800, 0 }, | |
| 1165 { 0xD800, 0xFEFF, 0 }, | |
| 1166 { 0xD800, 0xFFFD, 0 }, | |
| 1167 }; | |
| 1168 | |
| 1169 static void | |
| 1170 dump_utf8 | |
| 1171 ( | |
| 1172 char *word, | |
| 1173 unsigned char *utf8, | |
| 1174 char *end | |
| 1175 ) | |
| 1176 { | |
| 1177 fprintf(stdout, "%s ", word); | |
| 1178 for( ; *utf8; utf8++ ) { | |
| 1179 fprintf(stdout, "%02.2x ", (unsigned int)*utf8); | |
| 1180 } | |
| 1181 fprintf(stdout, "%s", end); | |
| 1182 } | |
| 1183 | |
| 1184 static PRBool | |
| 1185 test_ucs4_chars | |
| 1186 ( | |
| 1187 void | |
| 1188 ) | |
| 1189 { | |
| 1190 PRBool rv = PR_TRUE; | |
| 1191 int i; | |
| 1192 | |
| 1193 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { | |
| 1194 struct ucs4 *e = &ucs4[i]; | |
| 1195 PRBool result; | |
| 1196 unsigned char utf8[8]; | |
| 1197 unsigned int len = 0; | |
| 1198 PRUint32 back = 0; | |
| 1199 | |
| 1200 (void)memset(utf8, 0, sizeof(utf8)); | |
| 1201 | |
| 1202 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, | |
| 1203 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len); | |
| 1204 | |
| 1205 if( !result ) { | |
| 1206 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8\n", e->c); | |
| 1207 rv = PR_FALSE; | |
| 1208 continue; | |
| 1209 } | |
| 1210 | |
| 1211 if( (len >= sizeof(utf8)) || | |
| 1212 (strlen(e->utf8) != len) || | |
| 1213 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) { | |
| 1214 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8: ", e->c); | |
| 1215 dump_utf8("expected", e->utf8, ", "); | |
| 1216 dump_utf8("received", utf8, "\n"); | |
| 1217 rv = PR_FALSE; | |
| 1218 continue; | |
| 1219 } | |
| 1220 | |
| 1221 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
| 1222 utf8, len, (unsigned char *)&back, sizeof(back), &len); | |
| 1223 | |
| 1224 if( !result ) { | |
| 1225 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4\n"); | |
| 1226 rv = PR_FALSE; | |
| 1227 continue; | |
| 1228 } | |
| 1229 | |
| 1230 if( (sizeof(back) != len) || (e->c != back) ) { | |
| 1231 dump_utf8("Wrong conversion of UTF-8", utf8, " to UCS-4:"); | |
| 1232 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back); | |
| 1233 rv = PR_FALSE; | |
| 1234 continue; | |
| 1235 } | |
| 1236 | |
| 1237 len = strlen(e->utf8) - 1; | |
| 1238 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, | |
| 1239 (unsigned char *)&e->c, sizeof(e->c), utf8 + sizeof(utf8) - len, len, | |
| 1240 &len); | |
| 1241 | |
| 1242 if( result || len != strlen(e->utf8) ) { | |
| 1243 fprintf(stdout, "Length computation error converting UCS-4 0x%08.8x" | |
| 1244 " to UTF-8\n", e->c); | |
| 1245 rv = PR_FALSE; | |
| 1246 continue; | |
| 1247 } | |
| 1248 } | |
| 1249 | |
| 1250 return rv; | |
| 1251 } | |
| 1252 | |
| 1253 static PRBool | |
| 1254 test_ucs2_chars | |
| 1255 ( | |
| 1256 void | |
| 1257 ) | |
| 1258 { | |
| 1259 PRBool rv = PR_TRUE; | |
| 1260 int i; | |
| 1261 | |
| 1262 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
| 1263 struct ucs2 *e = &ucs2[i]; | |
| 1264 PRBool result; | |
| 1265 unsigned char utf8[8]; | |
| 1266 unsigned int len = 0; | |
| 1267 PRUint16 back = 0; | |
| 1268 | |
| 1269 (void)memset(utf8, 0, sizeof(utf8)); | |
| 1270 | |
| 1271 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
| 1272 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len); | |
| 1273 | |
| 1274 if( !result ) { | |
| 1275 fprintf(stdout, "Failed to convert UCS-2 0x%04.4x to UTF-8\n", e->c); | |
| 1276 rv = PR_FALSE; | |
| 1277 continue; | |
| 1278 } | |
| 1279 | |
| 1280 if( (len >= sizeof(utf8)) || | |
| 1281 (strlen(e->utf8) != len) || | |
| 1282 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) { | |
| 1283 fprintf(stdout, "Wrong conversion of UCS-2 0x%04.4x to UTF-8: ", e->c); | |
| 1284 dump_utf8("expected", e->utf8, ", "); | |
| 1285 dump_utf8("received", utf8, "\n"); | |
| 1286 rv = PR_FALSE; | |
| 1287 continue; | |
| 1288 } | |
| 1289 | |
| 1290 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
| 1291 utf8, len, (unsigned char *)&back, sizeof(back), &len); | |
| 1292 | |
| 1293 if( !result ) { | |
| 1294 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-2\n"); | |
| 1295 rv = PR_FALSE; | |
| 1296 continue; | |
| 1297 } | |
| 1298 | |
| 1299 if( (sizeof(back) != len) || (e->c != back) ) { | |
| 1300 dump_utf8("Wrong conversion of UTF-8", utf8, "to UCS-2:"); | |
| 1301 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back); | |
| 1302 rv = PR_FALSE; | |
| 1303 continue; | |
| 1304 } | |
| 1305 | |
| 1306 len = strlen(e->utf8) - 1; | |
| 1307 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
| 1308 (unsigned char *)&e->c, sizeof(e->c), utf8 + sizeof(utf8) - len, len, | |
| 1309 &len); | |
| 1310 | |
| 1311 if( result || len != strlen(e->utf8) ) { | |
| 1312 fprintf(stdout, "Length computation error converting UCS-2 0x%04.4x" | |
| 1313 " to UTF-8\n", e->c); | |
| 1314 rv = PR_FALSE; | |
| 1315 continue; | |
| 1316 } | |
| 1317 } | |
| 1318 | |
| 1319 return rv; | |
| 1320 } | |
| 1321 | |
| 1322 static PRBool | |
| 1323 test_utf16_chars | |
| 1324 ( | |
| 1325 void | |
| 1326 ) | |
| 1327 { | |
| 1328 PRBool rv = PR_TRUE; | |
| 1329 int i; | |
| 1330 | |
| 1331 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) { | |
| 1332 struct utf16 *e = &utf16[i]; | |
| 1333 PRBool result; | |
| 1334 unsigned char utf8[8]; | |
| 1335 unsigned int len = 0; | |
| 1336 PRUint32 back32 = 0; | |
| 1337 PRUint16 back[2]; | |
| 1338 | |
| 1339 (void)memset(utf8, 0, sizeof(utf8)); | |
| 1340 | |
| 1341 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
| 1342 (unsigned char *)&e->w[0], sizeof(e->w), utf8, sizeof(utf8), &len); | |
| 1343 | |
| 1344 if( !result ) { | |
| 1345 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8\n", | |
| 1346 e->w[0], e->w[1]); | |
| 1347 rv = PR_FALSE; | |
| 1348 continue; | |
| 1349 } | |
| 1350 | |
| 1351 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
| 1352 utf8, len, (unsigned char *)&back32, sizeof(back32), &len); | |
| 1353 | |
| 1354 if( 4 != len ) { | |
| 1355 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8: " | |
| 1356 "unexpected len %d\n", e->w[0], e->w[1], len); | |
| 1357 rv = PR_FALSE; | |
| 1358 continue; | |
| 1359 } | |
| 1360 | |
| 1361 utf8[len] = '\0'; /* null-terminate for printing */ | |
| 1362 | |
| 1363 if( !result ) { | |
| 1364 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4 (utf-16 test)\n"); | |
| 1365 rv = PR_FALSE; | |
| 1366 continue; | |
| 1367 } | |
| 1368 | |
| 1369 if( (sizeof(back32) != len) || (e->c != back32) ) { | |
| 1370 fprintf(stdout, "Wrong conversion of UTF-16 0x%04.4x 0x%04.4x ", | |
| 1371 e->w[0], e->w[1]); | |
| 1372 dump_utf8("to UTF-8", utf8, "and then to UCS-4: "); | |
| 1373 if( sizeof(back32) != len ) { | |
| 1374 fprintf(stdout, "len is %d\n", len); | |
| 1375 } else { | |
| 1376 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back32); | |
| 1377 } | |
| 1378 rv = PR_FALSE; | |
| 1379 continue; | |
| 1380 } | |
| 1381 | |
| 1382 (void)memset(utf8, 0, sizeof(utf8)); | |
| 1383 back[0] = back[1] = 0; | |
| 1384 | |
| 1385 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, | |
| 1386 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len); | |
| 1387 | |
| 1388 if( !result ) { | |
| 1389 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8 (utf-16 test)\n
", | |
| 1390 e->c); | |
| 1391 rv = PR_FALSE; | |
| 1392 continue; | |
| 1393 } | |
| 1394 | |
| 1395 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
| 1396 utf8, len, (unsigned char *)&back[0], sizeof(back), &len); | |
| 1397 | |
| 1398 if( 4 != len ) { | |
| 1399 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8: " | |
| 1400 "unexpected len %d\n", e->c, len); | |
| 1401 rv = PR_FALSE; | |
| 1402 continue; | |
| 1403 } | |
| 1404 | |
| 1405 utf8[len] = '\0'; /* null-terminate for printing */ | |
| 1406 | |
| 1407 if( !result ) { | |
| 1408 dump_utf8("Failed to convert UTF-8", utf8, "to UTF-16\n"); | |
| 1409 rv = PR_FALSE; | |
| 1410 continue; | |
| 1411 } | |
| 1412 | |
| 1413 if( (sizeof(back) != len) || (e->w[0] != back[0]) || (e->w[1] != back[1]) )
{ | |
| 1414 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8", e->c); | |
| 1415 dump_utf8("", utf8, "and then to UTF-16:"); | |
| 1416 if( sizeof(back) != len ) { | |
| 1417 fprintf(stdout, "len is %d\n", len); | |
| 1418 } else { | |
| 1419 fprintf(stdout, "expected 0x%04.4x 0x%04.4x, received 0x%04.4x 0x%04.4xx
\n", | |
| 1420 e->w[0], e->w[1], back[0], back[1]); | |
| 1421 } | |
| 1422 rv = PR_FALSE; | |
| 1423 continue; | |
| 1424 } | |
| 1425 } | |
| 1426 | |
| 1427 return rv; | |
| 1428 } | |
| 1429 | |
| 1430 static PRBool | |
| 1431 test_utf8_bad_chars | |
| 1432 ( | |
| 1433 void | |
| 1434 ) | |
| 1435 { | |
| 1436 PRBool rv = PR_TRUE; | |
| 1437 int i; | |
| 1438 | |
| 1439 for( i = 0; i < sizeof(utf8_bad)/sizeof(utf8_bad[0]); i++ ) { | |
| 1440 PRBool result; | |
| 1441 unsigned char destbuf[30]; | |
| 1442 unsigned int len = 0; | |
| 1443 | |
| 1444 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
| 1445 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf
), &len); | |
| 1446 | |
| 1447 if( result ) { | |
| 1448 dump_utf8("Failed to detect bad UTF-8 string converting to UCS2: ", utf8_b
ad[i], "\n"); | |
| 1449 rv = PR_FALSE; | |
| 1450 continue; | |
| 1451 } | |
| 1452 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
| 1453 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf
), &len); | |
| 1454 | |
| 1455 if( result ) { | |
| 1456 dump_utf8("Failed to detect bad UTF-8 string converting to UCS4: ", utf8_b
ad[i], "\n"); | |
| 1457 rv = PR_FALSE; | |
| 1458 continue; | |
| 1459 } | |
| 1460 | |
| 1461 } | |
| 1462 | |
| 1463 return rv; | |
| 1464 } | |
| 1465 | |
| 1466 static PRBool | |
| 1467 test_utf16_bad_chars(void) | |
| 1468 { | |
| 1469 PRBool rv = PR_TRUE; | |
| 1470 int i; | |
| 1471 | |
| 1472 for( i = 0; i < sizeof(utf16_bad)/sizeof(utf16_bad[0]); ++i ) { | |
| 1473 PRBool result; | |
| 1474 unsigned char destbuf[18]; | |
| 1475 unsigned int j, len, destlen; | |
| 1476 uint16_t *buf; | |
| 1477 | |
| 1478 for( len = 0; utf16_bad[i][len] != 0; ++len ) | |
| 1479 /* nothing */; | |
| 1480 | |
| 1481 buf = malloc(sizeof(uint16_t) * len); | |
| 1482 for( j = 0; j < len; ++j ) | |
| 1483 buf[j] = htons(utf16_bad[i][j]); | |
| 1484 | |
| 1485 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
| 1486 (unsigned char *)buf, sizeof(uint16_t) * len, destbuf, sizeof(destbuf), | |
| 1487 &destlen); | |
| 1488 if( result ) { | |
| 1489 fprintf(stdout, "Failed to detect bad UTF-16 string conversion for " | |
| 1490 "{0x%x,0x%x} (UTF-8 len = %u)\n", utf16_bad[i][0], utf16_bad[i][1], | |
| 1491 destlen); | |
| 1492 rv = PR_FALSE; | |
| 1493 } | |
| 1494 free(buf); | |
| 1495 } | |
| 1496 } | |
| 1497 | |
| 1498 static PRBool | |
| 1499 test_iso88591_chars | |
| 1500 ( | |
| 1501 void | |
| 1502 ) | |
| 1503 { | |
| 1504 PRBool rv = PR_TRUE; | |
| 1505 int i; | |
| 1506 | |
| 1507 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
| 1508 struct ucs2 *e = &ucs2[i]; | |
| 1509 PRBool result; | |
| 1510 unsigned char iso88591; | |
| 1511 unsigned char utf8[3]; | |
| 1512 unsigned int len = 0; | |
| 1513 | |
| 1514 if (ntohs(e->c) > 0xFF) continue; | |
| 1515 | |
| 1516 (void)memset(utf8, 0, sizeof(utf8)); | |
| 1517 iso88591 = ntohs(e->c); | |
| 1518 | |
| 1519 result = sec_port_iso88591_utf8_conversion_function(&iso88591, | |
| 1520 1, utf8, sizeof(utf8), &len); | |
| 1521 | |
| 1522 if( !result ) { | |
| 1523 fprintf(stdout, "Failed to convert ISO-8859-1 0x%02.2x to UTF-8\n", iso885
91); | |
| 1524 rv = PR_FALSE; | |
| 1525 continue; | |
| 1526 } | |
| 1527 | |
| 1528 if( (len >= sizeof(utf8)) || | |
| 1529 (strlen(e->utf8) != len) || | |
| 1530 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) { | |
| 1531 fprintf(stdout, "Wrong conversion of ISO-8859-1 0x%02.2x to UTF-8: ", iso8
8591); | |
| 1532 dump_utf8("expected", e->utf8, ", "); | |
| 1533 dump_utf8("received", utf8, "\n"); | |
| 1534 rv = PR_FALSE; | |
| 1535 continue; | |
| 1536 } | |
| 1537 | |
| 1538 } | |
| 1539 | |
| 1540 return rv; | |
| 1541 } | |
| 1542 | |
| 1543 static PRBool | |
| 1544 test_zeroes | |
| 1545 ( | |
| 1546 void | |
| 1547 ) | |
| 1548 { | |
| 1549 PRBool rv = PR_TRUE; | |
| 1550 PRBool result; | |
| 1551 PRUint32 lzero = 0; | |
| 1552 PRUint16 szero = 0; | |
| 1553 unsigned char utf8[8]; | |
| 1554 unsigned int len = 0; | |
| 1555 PRUint32 lback = 1; | |
| 1556 PRUint16 sback = 1; | |
| 1557 | |
| 1558 (void)memset(utf8, 1, sizeof(utf8)); | |
| 1559 | |
| 1560 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, | |
| 1561 (unsigned char *)&lzero, sizeof(lzero), utf8, sizeof(utf8), &len); | |
| 1562 | |
| 1563 if( !result ) { | |
| 1564 fprintf(stdout, "Failed to convert UCS-4 0x00000000 to UTF-8\n"); | |
| 1565 rv = PR_FALSE; | |
| 1566 } else if( 1 != len ) { | |
| 1567 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: len = %d\n", len); | |
| 1568 rv = PR_FALSE; | |
| 1569 } else if( '\0' != *utf8 ) { | |
| 1570 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: expected 00 ," | |
| 1571 "received %02.2x\n", (unsigned int)*utf8); | |
| 1572 rv = PR_FALSE; | |
| 1573 } | |
| 1574 | |
| 1575 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
| 1576 "", 1, (unsigned char *)&lback, sizeof(lback), &len); | |
| 1577 | |
| 1578 if( !result ) { | |
| 1579 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-4\n"); | |
| 1580 rv = PR_FALSE; | |
| 1581 } else if( 4 != len ) { | |
| 1582 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: len = %d\n", len); | |
| 1583 rv = PR_FALSE; | |
| 1584 } else if( 0 != lback ) { | |
| 1585 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: " | |
| 1586 "expected 0x00000000, received 0x%08.8x\n", lback); | |
| 1587 rv = PR_FALSE; | |
| 1588 } | |
| 1589 | |
| 1590 (void)memset(utf8, 1, sizeof(utf8)); | |
| 1591 | |
| 1592 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
| 1593 (unsigned char *)&szero, sizeof(szero), utf8, sizeof(utf8), &len); | |
| 1594 | |
| 1595 if( !result ) { | |
| 1596 fprintf(stdout, "Failed to convert UCS-2 0x0000 to UTF-8\n"); | |
| 1597 rv = PR_FALSE; | |
| 1598 } else if( 1 != len ) { | |
| 1599 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: len = %d\n", len); | |
| 1600 rv = PR_FALSE; | |
| 1601 } else if( '\0' != *utf8 ) { | |
| 1602 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: expected 00 ," | |
| 1603 "received %02.2x\n", (unsigned int)*utf8); | |
| 1604 rv = PR_FALSE; | |
| 1605 } | |
| 1606 | |
| 1607 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
| 1608 "", 1, (unsigned char *)&sback, sizeof(sback), &len); | |
| 1609 | |
| 1610 if( !result ) { | |
| 1611 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-2\n"); | |
| 1612 rv = PR_FALSE; | |
| 1613 } else if( 2 != len ) { | |
| 1614 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: len = %d\n", len); | |
| 1615 rv = PR_FALSE; | |
| 1616 } else if( 0 != sback ) { | |
| 1617 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: " | |
| 1618 "expected 0x0000, received 0x%04.4x\n", sback); | |
| 1619 rv = PR_FALSE; | |
| 1620 } | |
| 1621 | |
| 1622 return rv; | |
| 1623 } | |
| 1624 | |
| 1625 static PRBool | |
| 1626 test_multichars | |
| 1627 ( | |
| 1628 void | |
| 1629 ) | |
| 1630 { | |
| 1631 int i; | |
| 1632 unsigned int len, lenout; | |
| 1633 PRUint32 *ucs4s; | |
| 1634 char *ucs4_utf8; | |
| 1635 PRUint16 *ucs2s; | |
| 1636 char *ucs2_utf8; | |
| 1637 void *tmp; | |
| 1638 PRBool result; | |
| 1639 | |
| 1640 ucs4s = (PRUint32 *)calloc(sizeof(ucs4)/sizeof(ucs4[0]), sizeof(PRUint32)); | |
| 1641 ucs2s = (PRUint16 *)calloc(sizeof(ucs2)/sizeof(ucs2[0]), sizeof(PRUint16)); | |
| 1642 | |
| 1643 if( ((PRUint32 *)NULL == ucs4s) || ((PRUint16 *)NULL == ucs2s) ) { | |
| 1644 fprintf(stderr, "out of memory\n"); | |
| 1645 exit(1); | |
| 1646 } | |
| 1647 | |
| 1648 len = 1; | |
| 1649 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { | |
| 1650 ucs4s[i] = ucs4[i].c; | |
| 1651 len += strlen(ucs4[i].utf8); | |
| 1652 } | |
| 1653 | |
| 1654 ucs4_utf8 = (char *)malloc(len); | |
| 1655 | |
| 1656 len = 1; | |
| 1657 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
| 1658 ucs2s[i] = ucs2[i].c; | |
| 1659 len += strlen(ucs2[i].utf8); | |
| 1660 } | |
| 1661 | |
| 1662 ucs2_utf8 = (char *)malloc(len); | |
| 1663 | |
| 1664 if( ((char *)NULL == ucs4_utf8) || ((char *)NULL == ucs2_utf8) ) { | |
| 1665 fprintf(stderr, "out of memory\n"); | |
| 1666 exit(1); | |
| 1667 } | |
| 1668 | |
| 1669 *ucs4_utf8 = '\0'; | |
| 1670 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { | |
| 1671 strcat(ucs4_utf8, ucs4[i].utf8); | |
| 1672 } | |
| 1673 | |
| 1674 *ucs2_utf8 = '\0'; | |
| 1675 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
| 1676 strcat(ucs2_utf8, ucs2[i].utf8); | |
| 1677 } | |
| 1678 | |
| 1679 /* UTF-8 -> UCS-4 */ | |
| 1680 len = sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32); | |
| 1681 tmp = calloc(len, 1); | |
| 1682 if( (void *)NULL == tmp ) { | |
| 1683 fprintf(stderr, "out of memory\n"); | |
| 1684 exit(1); | |
| 1685 } | |
| 1686 | |
| 1687 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, | |
| 1688 ucs4_utf8, strlen(ucs4_utf8), tmp, len, &lenout); | |
| 1689 if( !result ) { | |
| 1690 fprintf(stdout, "Failed to convert much UTF-8 to UCS-4\n"); | |
| 1691 goto done; | |
| 1692 } | |
| 1693 | |
| 1694 if( lenout != len ) { | |
| 1695 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-4\n"); | |
| 1696 goto loser; | |
| 1697 } | |
| 1698 | |
| 1699 if( 0 != memcmp(ucs4s, tmp, len) ) { | |
| 1700 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-4\n"); | |
| 1701 goto loser; | |
| 1702 } | |
| 1703 | |
| 1704 free(tmp); tmp = (void *)NULL; | |
| 1705 | |
| 1706 /* UCS-4 -> UTF-8 */ | |
| 1707 len = strlen(ucs4_utf8); | |
| 1708 tmp = calloc(len, 1); | |
| 1709 if( (void *)NULL == tmp ) { | |
| 1710 fprintf(stderr, "out of memory\n"); | |
| 1711 exit(1); | |
| 1712 } | |
| 1713 | |
| 1714 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, | |
| 1715 (unsigned char *)ucs4s, sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32), | |
| 1716 tmp, len, &lenout); | |
| 1717 if( !result ) { | |
| 1718 fprintf(stdout, "Failed to convert much UCS-4 to UTF-8\n"); | |
| 1719 goto done; | |
| 1720 } | |
| 1721 | |
| 1722 if( lenout != len ) { | |
| 1723 fprintf(stdout, "Unexpected length converting much UCS-4 to UTF-8\n"); | |
| 1724 goto loser; | |
| 1725 } | |
| 1726 | |
| 1727 if( 0 != strncmp(ucs4_utf8, tmp, len) ) { | |
| 1728 fprintf(stdout, "Wrong conversion of much UCS-4 to UTF-8\n"); | |
| 1729 goto loser; | |
| 1730 } | |
| 1731 | |
| 1732 free(tmp); tmp = (void *)NULL; | |
| 1733 | |
| 1734 /* UTF-8 -> UCS-2 */ | |
| 1735 len = sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16); | |
| 1736 tmp = calloc(len, 1); | |
| 1737 if( (void *)NULL == tmp ) { | |
| 1738 fprintf(stderr, "out of memory\n"); | |
| 1739 exit(1); | |
| 1740 } | |
| 1741 | |
| 1742 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, | |
| 1743 ucs2_utf8, strlen(ucs2_utf8), tmp, len, &lenout); | |
| 1744 if( !result ) { | |
| 1745 fprintf(stdout, "Failed to convert much UTF-8 to UCS-2\n"); | |
| 1746 goto done; | |
| 1747 } | |
| 1748 | |
| 1749 if( lenout != len ) { | |
| 1750 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-2\n"); | |
| 1751 goto loser; | |
| 1752 } | |
| 1753 | |
| 1754 if( 0 != memcmp(ucs2s, tmp, len) ) { | |
| 1755 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-2\n"); | |
| 1756 goto loser; | |
| 1757 } | |
| 1758 | |
| 1759 free(tmp); tmp = (void *)NULL; | |
| 1760 | |
| 1761 /* UCS-2 -> UTF-8 */ | |
| 1762 len = strlen(ucs2_utf8); | |
| 1763 tmp = calloc(len, 1); | |
| 1764 if( (void *)NULL == tmp ) { | |
| 1765 fprintf(stderr, "out of memory\n"); | |
| 1766 exit(1); | |
| 1767 } | |
| 1768 | |
| 1769 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, | |
| 1770 (unsigned char *)ucs2s, sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16), | |
| 1771 tmp, len, &lenout); | |
| 1772 if( !result ) { | |
| 1773 fprintf(stdout, "Failed to convert much UCS-2 to UTF-8\n"); | |
| 1774 goto done; | |
| 1775 } | |
| 1776 | |
| 1777 if( lenout != len ) { | |
| 1778 fprintf(stdout, "Unexpected length converting much UCS-2 to UTF-8\n"); | |
| 1779 goto loser; | |
| 1780 } | |
| 1781 | |
| 1782 if( 0 != strncmp(ucs2_utf8, tmp, len) ) { | |
| 1783 fprintf(stdout, "Wrong conversion of much UCS-2 to UTF-8\n"); | |
| 1784 goto loser; | |
| 1785 } | |
| 1786 | |
| 1787 /* implement UTF16 */ | |
| 1788 | |
| 1789 result = PR_TRUE; | |
| 1790 goto done; | |
| 1791 | |
| 1792 loser: | |
| 1793 result = PR_FALSE; | |
| 1794 done: | |
| 1795 free(ucs4s); | |
| 1796 free(ucs4_utf8); | |
| 1797 free(ucs2s); | |
| 1798 free(ucs2_utf8); | |
| 1799 if( (void *)NULL != tmp ) free(tmp); | |
| 1800 return result; | |
| 1801 } | |
| 1802 | |
| 1803 void | |
| 1804 byte_order | |
| 1805 ( | |
| 1806 void | |
| 1807 ) | |
| 1808 { | |
| 1809 /* | |
| 1810 * The implementation (now) expects the 16- and 32-bit characters | |
| 1811 * to be in network byte order, not host byte order. Therefore I | |
| 1812 * have to byteswap all those test vectors above. hton[ls] may be | |
| 1813 * functions, so I have to do this dynamically. If you want to | |
| 1814 * use this code to do host byte order conversions, just remove | |
| 1815 * the call in main() to this function. | |
| 1816 */ | |
| 1817 | |
| 1818 int i; | |
| 1819 | |
| 1820 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { | |
| 1821 struct ucs4 *e = &ucs4[i]; | |
| 1822 e->c = htonl(e->c); | |
| 1823 } | |
| 1824 | |
| 1825 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | |
| 1826 struct ucs2 *e = &ucs2[i]; | |
| 1827 e->c = htons(e->c); | |
| 1828 } | |
| 1829 | |
| 1830 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) { | |
| 1831 struct utf16 *e = &utf16[i]; | |
| 1832 e->c = htonl(e->c); | |
| 1833 e->w[0] = htons(e->w[0]); | |
| 1834 e->w[1] = htons(e->w[1]); | |
| 1835 } | |
| 1836 | |
| 1837 return; | |
| 1838 } | |
| 1839 | |
| 1840 int | |
| 1841 main | |
| 1842 ( | |
| 1843 int argc, | |
| 1844 char *argv[] | |
| 1845 ) | |
| 1846 { | |
| 1847 byte_order(); | |
| 1848 | |
| 1849 if( test_ucs4_chars() && | |
| 1850 test_ucs2_chars() && | |
| 1851 test_utf16_chars() && | |
| 1852 test_utf8_bad_chars() && | |
| 1853 test_utf16_bad_chars() && | |
| 1854 test_iso88591_chars() && | |
| 1855 test_zeroes() && | |
| 1856 test_multichars() && | |
| 1857 PR_TRUE ) { | |
| 1858 fprintf(stderr, "PASS\n"); | |
| 1859 return 1; | |
| 1860 } else { | |
| 1861 fprintf(stderr, "FAIL\n"); | |
| 1862 return 0; | |
| 1863 } | |
| 1864 } | |
| 1865 | |
| 1866 #endif /* TEST_UTF8 */ | |
| OLD | NEW |