OLD | NEW |
(Empty) | |
| 1 /******************************************************************** |
| 2 * COPYRIGHT: |
| 3 * Copyright (c) 1997-2010, International Business Machines Corporation and |
| 4 * others. All Rights Reserved. |
| 5 ********************************************************************/ |
| 6 /******************************************************************************* |
| 7 * |
| 8 * File CCONVTST.C |
| 9 * |
| 10 * Modification History: |
| 11 * Name Description |
| 12 * Steven R. Loomis 7/8/1999 Adding input buffer test |
| 13 ******************************************************************************** |
| 14 */ |
| 15 #include <stdio.h> |
| 16 #include "cstring.h" |
| 17 #include "unicode/uloc.h" |
| 18 #include "unicode/ucnv.h" |
| 19 #include "unicode/ucnv_err.h" |
| 20 #include "unicode/ucnv_cb.h" |
| 21 #include "cintltst.h" |
| 22 #include "unicode/utypes.h" |
| 23 #include "unicode/ustring.h" |
| 24 #include "unicode/ucol.h" |
| 25 #include "cmemory.h" |
| 26 #include "nucnvtst.h" |
| 27 |
| 28 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit
, const int32_t results[], const char* message); |
| 29 static void TestNextUCharError(UConverter* cnv, const char* source, const char*
limit, UErrorCode expected, const char* message); |
| 30 #if !UCONFIG_NO_COLLATION |
| 31 static void TestJitterbug981(void); |
| 32 #endif |
| 33 static void TestJitterbug1293(void); |
| 34 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; |
| 35 static void TestConverterTypesAndStarters(void); |
| 36 static void TestAmbiguous(void); |
| 37 static void TestSignatureDetection(void); |
| 38 static void TestUTF7(void); |
| 39 static void TestIMAP(void); |
| 40 static void TestUTF8(void); |
| 41 static void TestCESU8(void); |
| 42 static void TestUTF16(void); |
| 43 static void TestUTF16BE(void); |
| 44 static void TestUTF16LE(void); |
| 45 static void TestUTF32(void); |
| 46 static void TestUTF32BE(void); |
| 47 static void TestUTF32LE(void); |
| 48 static void TestLATIN1(void); |
| 49 |
| 50 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 51 static void TestSBCS(void); |
| 52 static void TestDBCS(void); |
| 53 static void TestMBCS(void); |
| 54 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO |
| 55 static void TestICCRunout(void); |
| 56 #endif |
| 57 |
| 58 #ifdef U_ENABLE_GENERIC_ISO_2022 |
| 59 static void TestISO_2022(void); |
| 60 #endif |
| 61 |
| 62 static void TestISO_2022_JP(void); |
| 63 static void TestISO_2022_JP_1(void); |
| 64 static void TestISO_2022_JP_2(void); |
| 65 static void TestISO_2022_KR(void); |
| 66 static void TestISO_2022_KR_1(void); |
| 67 static void TestISO_2022_CN(void); |
| 68 #if 0 |
| 69 /* |
| 70 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 |
| 71 */ |
| 72 static void TestISO_2022_CN_EXT(void); |
| 73 #endif |
| 74 static void TestJIS(void); |
| 75 static void TestHZ(void); |
| 76 #endif |
| 77 |
| 78 static void TestSCSU(void); |
| 79 |
| 80 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 81 static void TestEBCDIC_STATEFUL(void); |
| 82 static void TestGB18030(void); |
| 83 static void TestLMBCS(void); |
| 84 static void TestJitterbug255(void); |
| 85 static void TestEBCDICUS4XML(void); |
| 86 #if 0 |
| 87 /* |
| 88 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 |
| 89 */ |
| 90 static void TestJitterbug915(void); |
| 91 #endif |
| 92 static void TestISCII(void); |
| 93 |
| 94 static void TestCoverageMBCS(void); |
| 95 static void TestJitterbug2346(void); |
| 96 static void TestJitterbug2411(void); |
| 97 static void TestJB5275(void); |
| 98 static void TestJB5275_1(void); |
| 99 static void TestJitterbug6175(void); |
| 100 #endif |
| 101 |
| 102 static void TestInBufSizes(void); |
| 103 |
| 104 static void TestRoundTrippingAllUTF(void); |
| 105 static void TestConv(const uint16_t in[], |
| 106 int len, |
| 107 const char* conv, |
| 108 const char* lang, |
| 109 char byteArr[], |
| 110 int byteArrLen); |
| 111 |
| 112 /* open a converter, using test data if it begins with '@' */ |
| 113 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err); |
| 114 |
| 115 |
| 116 #define NEW_MAX_BUFFER 999 |
| 117 |
| 118 static int32_t gInBufferSize = NEW_MAX_BUFFER; |
| 119 static int32_t gOutBufferSize = NEW_MAX_BUFFER; |
| 120 static char gNuConvTestName[1024]; |
| 121 |
| 122 #define nct_min(x,y) ((x<y) ? x : y) |
| 123 |
| 124 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err) |
| 125 { |
| 126 if(cnv && cnv[0] == '@') { |
| 127 return ucnv_openPackage(loadTestData(err), cnv+1, err); |
| 128 } else { |
| 129 return ucnv_open(cnv, err); |
| 130 } |
| 131 } |
| 132 |
| 133 static void printSeq(const unsigned char* a, int len) |
| 134 { |
| 135 int i=0; |
| 136 log_verbose("{"); |
| 137 while (i<len) |
| 138 log_verbose("0x%02x ", a[i++]); |
| 139 log_verbose("}\n"); |
| 140 } |
| 141 |
| 142 static void printUSeq(const UChar* a, int len) |
| 143 { |
| 144 int i=0; |
| 145 log_verbose("{U+"); |
| 146 while (i<len) log_verbose("0x%04x ", a[i++]); |
| 147 log_verbose("}\n"); |
| 148 } |
| 149 |
| 150 static void printSeqErr(const unsigned char* a, int len) |
| 151 { |
| 152 int i=0; |
| 153 fprintf(stderr, "{"); |
| 154 while (i<len) |
| 155 fprintf(stderr, "0x%02x ", a[i++]); |
| 156 fprintf(stderr, "}\n"); |
| 157 } |
| 158 |
| 159 static void printUSeqErr(const UChar* a, int len) |
| 160 { |
| 161 int i=0; |
| 162 fprintf(stderr, "{U+"); |
| 163 while (i<len) |
| 164 fprintf(stderr, "0x%04x ", a[i++]); |
| 165 fprintf(stderr,"}\n"); |
| 166 } |
| 167 |
| 168 static void |
| 169 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int3
2_t results[], const char* message) |
| 170 { |
| 171 const char* s0; |
| 172 const char* s=(char*)source; |
| 173 const int32_t *r=results; |
| 174 UErrorCode errorCode=U_ZERO_ERROR; |
| 175 UChar32 c; |
| 176 |
| 177 while(s<limit) { |
| 178 s0=s; |
| 179 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); |
| 180 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { |
| 181 break; /* no more significant input */ |
| 182 } else if(U_FAILURE(errorCode)) { |
| 183 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(
errorCode)); |
| 184 break; |
| 185 } else if( |
| 186 /* test the expected number of input bytes only if >=0 */ |
| 187 (*r>=0 && (int32_t)(s-s0)!=*r) || |
| 188 c!=*(r+1) |
| 189 ) { |
| 190 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should hav
e been %lx from %d bytes.\n", |
| 191 message, c, (s-s0), *(r+1), *r); |
| 192 break; |
| 193 } |
| 194 r+=2; |
| 195 } |
| 196 } |
| 197 |
| 198 static void |
| 199 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErro
rCode expected, const char* message) |
| 200 { |
| 201 const char* s=(char*)source; |
| 202 UErrorCode errorCode=U_ZERO_ERROR; |
| 203 uint32_t c; |
| 204 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); |
| 205 if(errorCode != expected){ |
| 206 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected),
message, myErrorName(errorCode)); |
| 207 } |
| 208 if(c != 0xFFFD && c != 0xffff){ |
| 209 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got
0x%lx\n", message, c); |
| 210 } |
| 211 |
| 212 } |
| 213 |
| 214 static void TestInBufSizes(void) |
| 215 { |
| 216 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1); |
| 217 #if 1 |
| 218 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2); |
| 219 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3); |
| 220 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4); |
| 221 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5); |
| 222 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6); |
| 223 TestNewConvertWithBufferSizes(1,1); |
| 224 TestNewConvertWithBufferSizes(2,3); |
| 225 TestNewConvertWithBufferSizes(3,2); |
| 226 #endif |
| 227 } |
| 228 |
| 229 static void TestOutBufSizes(void) |
| 230 { |
| 231 #if 1 |
| 232 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER); |
| 233 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER); |
| 234 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER); |
| 235 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER); |
| 236 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER); |
| 237 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER); |
| 238 |
| 239 #endif |
| 240 } |
| 241 |
| 242 |
| 243 void addTestNewConvert(TestNode** root) |
| 244 { |
| 245 #if !UCONFIG_NO_FILE_IO |
| 246 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes"); |
| 247 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes"); |
| 248 #endif |
| 249 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterT
ypesAndStarters"); |
| 250 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous"); |
| 251 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetectio
n"); |
| 252 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7"); |
| 253 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP"); |
| 254 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8"); |
| 255 |
| 256 /* test ucnv_getNextUChar() for charsets that encode single surrogates with c
omplete byte sequences */ |
| 257 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8"); |
| 258 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16"); |
| 259 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE"); |
| 260 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE"); |
| 261 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32"); |
| 262 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE"); |
| 263 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE"); |
| 264 |
| 265 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 266 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS"); |
| 267 #endif |
| 268 |
| 269 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1"); |
| 270 |
| 271 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 272 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS"); |
| 273 #if !UCONFIG_NO_FILE_IO |
| 274 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS"); |
| 275 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout"); |
| 276 #endif |
| 277 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS"); |
| 278 |
| 279 #ifdef U_ENABLE_GENERIC_ISO_2022 |
| 280 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022"); |
| 281 #endif |
| 282 |
| 283 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP"); |
| 284 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS"); |
| 285 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1"); |
| 286 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2"); |
| 287 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR"); |
| 288 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1"); |
| 289 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN"); |
| 290 /* |
| 291 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 |
| 292 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT"); |
| 293 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915"); |
| 294 */ |
| 295 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ"); |
| 296 #endif |
| 297 |
| 298 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU"); |
| 299 |
| 300 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 301 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL"); |
| 302 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030"); |
| 303 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255"); |
| 304 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML"); |
| 305 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII"); |
| 306 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275"); |
| 307 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1"); |
| 308 #if !UCONFIG_NO_COLLATION |
| 309 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981"); |
| 310 #endif |
| 311 |
| 312 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293"); |
| 313 #endif |
| 314 |
| 315 |
| 316 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO |
| 317 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS"); |
| 318 #endif |
| 319 |
| 320 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAll
UTF"); |
| 321 |
| 322 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 323 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346"); |
| 324 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411"); |
| 325 addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175"); |
| 326 #endif |
| 327 |
| 328 } |
| 329 |
| 330 |
| 331 /* Note that this test already makes use of statics, so it's not really |
| 332 multithread safe. |
| 333 This convenience function lets us make the error messages actually useful. |
| 334 */ |
| 335 |
| 336 static void setNuConvTestName(const char *codepage, const char *direction) |
| 337 { |
| 338 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufS
iz=%d]", |
| 339 codepage, |
| 340 direction, |
| 341 (int)gInBufferSize, |
| 342 (int)gOutBufferSize); |
| 343 } |
| 344 |
| 345 typedef enum |
| 346 { |
| 347 TC_OK = 0, /* test was OK */ |
| 348 TC_MISMATCH = 1, /* Match failed - err was printed */ |
| 349 TC_FAIL = 2 /* Test failed, don't print an err because it was already pr
inted. */ |
| 350 } ETestConvertResult; |
| 351 |
| 352 /* Note: This function uses global variables and it will not do offset |
| 353 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ |
| 354 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,
const uint8_t *expect, int expectLen, |
| 355 const char *codepage, const int32_t *expectOffsets , UBool useFa
llback) |
| 356 { |
| 357 UErrorCode status = U_ZERO_ERROR; |
| 358 UConverter *conv = 0; |
| 359 char junkout[NEW_MAX_BUFFER]; /* FIX */ |
| 360 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ |
| 361 char *p; |
| 362 const UChar *src; |
| 363 char *end; |
| 364 char *targ; |
| 365 int32_t *offs; |
| 366 int i; |
| 367 int32_t realBufferSize; |
| 368 char *realBufferEnd; |
| 369 const UChar *realSourceEnd; |
| 370 const UChar *sourceLimit; |
| 371 UBool checkOffsets = TRUE; |
| 372 UBool doFlush; |
| 373 |
| 374 for(i=0;i<NEW_MAX_BUFFER;i++) |
| 375 junkout[i] = (char)0xF0; |
| 376 for(i=0;i<NEW_MAX_BUFFER;i++) |
| 377 junokout[i] = 0xFF; |
| 378 |
| 379 setNuConvTestName(codepage, "FROM"); |
| 380 |
| 381 log_verbose("\n========= %s\n", gNuConvTestName); |
| 382 |
| 383 conv = my_ucnv_open(codepage, &status); |
| 384 |
| 385 if(U_FAILURE(status)) |
| 386 { |
| 387 log_data_err("Couldn't open converter %s\n",codepage); |
| 388 return TC_FAIL; |
| 389 } |
| 390 if(useFallback){ |
| 391 ucnv_setFallback(conv,useFallback); |
| 392 } |
| 393 |
| 394 log_verbose("Converter opened..\n"); |
| 395 |
| 396 src = source; |
| 397 targ = junkout; |
| 398 offs = junokout; |
| 399 |
| 400 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); |
| 401 realBufferEnd = junkout + realBufferSize; |
| 402 realSourceEnd = source + sourceLen; |
| 403 |
| 404 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) |
| 405 checkOffsets = FALSE; |
| 406 |
| 407 do |
| 408 { |
| 409 end = nct_min(targ + gOutBufferSize, realBufferEnd); |
| 410 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); |
| 411 |
| 412 doFlush = (UBool)(sourceLimit == realSourceEnd); |
| 413 |
| 414 if(targ == realBufferEnd) { |
| 415 log_err("Error, overflowed the real buffer while about to call fromUnico
de! targ=%08lx %s", targ, gNuConvTestName); |
| 416 return TC_FAIL; |
| 417 } |
| 418 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to
%08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); |
| 419 |
| 420 |
| 421 status = U_ZERO_ERROR; |
| 422 |
| 423 ucnv_fromUnicode (conv, |
| 424 &targ, |
| 425 end, |
| 426 &src, |
| 427 sourceLimit, |
| 428 checkOffsets ? offs : NULL, |
| 429 doFlush, /* flush if we're at the end of the input data
*/ |
| 430 &status); |
| 431 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourc
eLimit < realSourceEnd) ); |
| 432 |
| 433 if(U_FAILURE(status)) { |
| 434 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myEr
rorName(status), gNuConvTestName); |
| 435 return TC_FAIL; |
| 436 } |
| 437 |
| 438 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", |
| 439 sourceLen, targ-junkout); |
| 440 |
| 441 if(getTestOption(VERBOSITY_OPTION)) |
| 442 { |
| 443 char junk[9999]; |
| 444 char offset_str[9999]; |
| 445 char *ptr; |
| 446 |
| 447 junk[0] = 0; |
| 448 offset_str[0] = 0; |
| 449 for(ptr = junkout;ptr<targ;ptr++) { |
| 450 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr)); |
| 451 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junoko
ut[ptr-junkout])); |
| 452 } |
| 453 |
| 454 log_verbose(junk); |
| 455 printSeq((const uint8_t *)expect, expectLen); |
| 456 if ( checkOffsets ) { |
| 457 log_verbose("\nOffsets:"); |
| 458 log_verbose(offset_str); |
| 459 } |
| 460 log_verbose("\n"); |
| 461 } |
| 462 ucnv_close(conv); |
| 463 |
| 464 if(expectLen != targ-junkout) { |
| 465 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNu
ConvTestName); |
| 466 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout,
gNuConvTestName); |
| 467 printf("\nGot:"); |
| 468 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); |
| 469 printf("\nExpected:"); |
| 470 printSeqErr((const unsigned char*)expect, expectLen); |
| 471 return TC_MISMATCH; |
| 472 } |
| 473 |
| 474 if (checkOffsets && (expectOffsets != 0) ) { |
| 475 log_verbose("comparing %d offsets..\n", targ-junkout); |
| 476 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ |
| 477 log_err("did not get the expected offsets. %s\n", gNuConvTestName); |
| 478 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); |
| 479 log_err("\n"); |
| 480 log_err("Got : "); |
| 481 for(p=junkout;p<targ;p++) { |
| 482 log_err("%d,", junokout[p-junkout]); |
| 483 } |
| 484 log_err("\n"); |
| 485 log_err("Expected: "); |
| 486 for(i=0; i<(targ-junkout); i++) { |
| 487 log_err("%d,", expectOffsets[i]); |
| 488 } |
| 489 log_err("\n"); |
| 490 } |
| 491 } |
| 492 |
| 493 log_verbose("comparing..\n"); |
| 494 if(!memcmp(junkout, expect, expectLen)) { |
| 495 log_verbose("Matches!\n"); |
| 496 return TC_OK; |
| 497 } else { |
| 498 log_err("String does not match u->%s\n", gNuConvTestName); |
| 499 printUSeqErr(source, sourceLen); |
| 500 printf("\nGot:"); |
| 501 printSeqErr((const unsigned char *)junkout, expectLen); |
| 502 printf("\nExpected:"); |
| 503 printSeqErr((const unsigned char *)expect, expectLen); |
| 504 |
| 505 return TC_MISMATCH; |
| 506 } |
| 507 } |
| 508 |
| 509 /* Note: This function uses global variables and it will not do offset |
| 510 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ |
| 511 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen,
const UChar *expect, int expectlen, |
| 512 const char *codepage, const int32_t *e
xpectOffsets, UBool useFallback) |
| 513 { |
| 514 UErrorCode status = U_ZERO_ERROR; |
| 515 UConverter *conv = 0; |
| 516 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ |
| 517 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ |
| 518 const char *src; |
| 519 const char *realSourceEnd; |
| 520 const char *srcLimit; |
| 521 UChar *p; |
| 522 UChar *targ; |
| 523 UChar *end; |
| 524 int32_t *offs; |
| 525 int i; |
| 526 UBool checkOffsets = TRUE; |
| 527 |
| 528 int32_t realBufferSize; |
| 529 UChar *realBufferEnd; |
| 530 |
| 531 |
| 532 for(i=0;i<NEW_MAX_BUFFER;i++) |
| 533 junkout[i] = 0xFFFE; |
| 534 |
| 535 for(i=0;i<NEW_MAX_BUFFER;i++) |
| 536 junokout[i] = -1; |
| 537 |
| 538 setNuConvTestName(codepage, "TO"); |
| 539 |
| 540 log_verbose("\n========= %s\n", gNuConvTestName); |
| 541 |
| 542 conv = my_ucnv_open(codepage, &status); |
| 543 |
| 544 if(U_FAILURE(status)) |
| 545 { |
| 546 log_data_err("Couldn't open converter %s\n",gNuConvTestName); |
| 547 return TC_FAIL; |
| 548 } |
| 549 if(useFallback){ |
| 550 ucnv_setFallback(conv,useFallback); |
| 551 } |
| 552 log_verbose("Converter opened..\n"); |
| 553 |
| 554 src = (const char *)source; |
| 555 targ = junkout; |
| 556 offs = junokout; |
| 557 |
| 558 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); |
| 559 realBufferEnd = junkout + realBufferSize; |
| 560 realSourceEnd = src + sourcelen; |
| 561 |
| 562 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) |
| 563 checkOffsets = FALSE; |
| 564 |
| 565 do |
| 566 { |
| 567 end = nct_min( targ + gOutBufferSize, realBufferEnd); |
| 568 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); |
| 569 |
| 570 if(targ == realBufferEnd) |
| 571 { |
| 572 log_err("Error, the end would overflow the real output buffer while
about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName); |
| 573 return TC_FAIL; |
| 574 } |
| 575 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); |
| 576 |
| 577 /* oldTarg = targ; */ |
| 578 |
| 579 status = U_ZERO_ERROR; |
| 580 |
| 581 ucnv_toUnicode (conv, |
| 582 &targ, |
| 583 end, |
| 584 &src, |
| 585 srcLimit, |
| 586 checkOffsets ? offs : NULL, |
| 587 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end
of hte source data */ |
| 588 &status); |
| 589 |
| 590 /* offs += (targ-oldTarg); */ |
| 591 |
| 592 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sr
cLimit < realSourceEnd)) ); /* while we just need another buffer */ |
| 593 |
| 594 if(U_FAILURE(status)) |
| 595 { |
| 596 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myError
Name(status), gNuConvTestName); |
| 597 return TC_FAIL; |
| 598 } |
| 599 |
| 600 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", |
| 601 sourcelen, targ-junkout); |
| 602 if(getTestOption(VERBOSITY_OPTION)) |
| 603 { |
| 604 char junk[9999]; |
| 605 char offset_str[9999]; |
| 606 UChar *ptr; |
| 607 |
| 608 junk[0] = 0; |
| 609 offset_str[0] = 0; |
| 610 |
| 611 for(ptr = junkout;ptr<targ;ptr++) |
| 612 { |
| 613 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p
tr); |
| 614 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (uns
igned int)junokout[ptr-junkout]); |
| 615 } |
| 616 |
| 617 log_verbose(junk); |
| 618 printUSeq(expect, expectlen); |
| 619 if ( checkOffsets ) |
| 620 { |
| 621 log_verbose("\nOffsets:"); |
| 622 log_verbose(offset_str); |
| 623 } |
| 624 log_verbose("\n"); |
| 625 } |
| 626 ucnv_close(conv); |
| 627 |
| 628 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); |
| 629 |
| 630 if (checkOffsets && (expectOffsets != 0)) |
| 631 { |
| 632 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ |
| 633 log_err("did not get the expected offsets. %s\n",gNuConvTestName); |
| 634 log_err("Got: "); |
| 635 for(p=junkout;p<targ;p++) { |
| 636 log_err("%d,", junokout[p-junkout]); |
| 637 } |
| 638 log_err("\n"); |
| 639 log_err("Expected: "); |
| 640 for(i=0; i<(targ-junkout); i++) { |
| 641 log_err("%d,", expectOffsets[i]); |
| 642 } |
| 643 log_err("\n"); |
| 644 log_err("output: "); |
| 645 for(i=0; i<(targ-junkout); i++) { |
| 646 log_err("%X,", junkout[i]); |
| 647 } |
| 648 log_err("\n"); |
| 649 log_err("input: "); |
| 650 for(i=0; i<(src-(const char *)source); i++) { |
| 651 log_err("%X,", (unsigned char)source[i]); |
| 652 } |
| 653 log_err("\n"); |
| 654 } |
| 655 } |
| 656 |
| 657 if(!memcmp(junkout, expect, expectlen*2)) |
| 658 { |
| 659 log_verbose("Matches!\n"); |
| 660 return TC_OK; |
| 661 } |
| 662 else |
| 663 { |
| 664 log_err("String does not match. %s\n", gNuConvTestName); |
| 665 log_verbose("String does not match. %s\n", gNuConvTestName); |
| 666 printf("\nGot:"); |
| 667 printUSeqErr(junkout, expectlen); |
| 668 printf("\nExpected:"); |
| 669 printUSeqErr(expect, expectlen); |
| 670 return TC_MISMATCH; |
| 671 } |
| 672 } |
| 673 |
| 674 |
| 675 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) |
| 676 { |
| 677 /** test chars #1 */ |
| 678 /* 1 2 3 1Han 2Han 3Han . */ |
| 679 static const UChar sampleText[] = |
| 680 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0
xDC21 }; |
| 681 static const UChar sampleTextRoundTripUnmappable[] = |
| 682 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd }; |
| 683 |
| 684 |
| 685 static const uint8_t expectedUTF8[] = |
| 686 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0
x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 }; |
| 687 static const int32_t toUTF8Offs[] = |
| 688 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0
x06, 0x07, 0x08, 0x08, 0x08, 0x08 }; |
| 689 static const int32_t fmUTF8Offs[] = |
| 690 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0
x000e }; |
| 691 |
| 692 #ifdef U_ENABLE_GENERIC_ISO_2022 |
| 693 /* Same as UTF8, but with ^[%B preceeding */ |
| 694 static const const uint8_t expectedISO2022[] = |
| 695 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0
x8c, 0xe4, 0xb8, 0x89, 0x2E }; |
| 696 static const int32_t toISO2022Offs[] = |
| 697 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, |
| 698 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */ |
| 699 static const int32_t fmISO2022Offs[] = |
| 700 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is t
his right? */ |
| 701 #endif |
| 702 |
| 703 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */ |
| 704 static const uint8_t expectedIBM930[] = |
| 705 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0
x4B, 0x0e, 0xfe, 0xfe, 0x0f }; |
| 706 static const int32_t toIBM930Offs[] = |
| 707 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0
x07, 0x08, 0x08, 0x08, -1 }; |
| 708 static const int32_t fmIBM930Offs[] = |
| 709 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e }; |
| 710 |
| 711 /* 1 2 3 0 h1 h2 h3 . MBCS*/ |
| 712 static const uint8_t expectedIBM943[] = |
| 713 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc,
0xfc }; |
| 714 static const int32_t toIBM943Offs [] = |
| 715 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08,
0x08 }; |
| 716 static const int32_t fmIBM943Offs[] = |
| 717 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b }; |
| 718 |
| 719 /* 1 2 3 0 h1 h2 h3 . DBCS*/ |
| 720 static const uint8_t expectedIBM9027[] = |
| 721 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48,
0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe }; |
| 722 static const int32_t toIBM9027Offs [] = |
| 723 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05,
0x06, 0x06, 0x07, 0x07, 0x08, 0x08 }; |
| 724 |
| 725 /* 1 2 3 0 <?> <?> <?> . SBCS*/ |
| 726 static const uint8_t expectedIBM920[] = |
| 727 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a }; |
| 728 static const int32_t toIBM920Offs [] = |
| 729 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; |
| 730 |
| 731 /* 1 2 3 0 <?> <?> <?> . SBCS*/ |
| 732 static const uint8_t expectedISO88593[] = |
| 733 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; |
| 734 static const int32_t toISO88593Offs[] = |
| 735 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; |
| 736 |
| 737 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/ |
| 738 static const uint8_t expectedLATIN1[] = |
| 739 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; |
| 740 static const int32_t toLATIN1Offs[] = |
| 741 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; |
| 742 |
| 743 |
| 744 /* etc */ |
| 745 static const uint8_t expectedUTF16BE[] = |
| 746 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0
x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 }; |
| 747 static const int32_t toUTF16BEOffs[]= |
| 748 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0
x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; |
| 749 static const int32_t fmUTF16BEOffs[] = |
| 750 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010,
0x0010 }; |
| 751 |
| 752 static const uint8_t expectedUTF16LE[] = |
| 753 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0
x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc }; |
| 754 static const int32_t toUTF16LEOffs[]= |
| 755 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0
x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; |
| 756 static const int32_t fmUTF16LEOffs[] = |
| 757 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0
x0010 }; |
| 758 |
| 759 static const uint8_t expectedUTF32BE[] = |
| 760 { 0x00, 0x00, 0x00, 0x31, |
| 761 0x00, 0x00, 0x00, 0x32, |
| 762 0x00, 0x00, 0x00, 0x33, |
| 763 0x00, 0x00, 0x00, 0x00, |
| 764 0x00, 0x00, 0x4e, 0x00, |
| 765 0x00, 0x00, 0x4e, 0x8c, |
| 766 0x00, 0x00, 0x4e, 0x09, |
| 767 0x00, 0x00, 0x00, 0x2e, |
| 768 0x00, 0x02, 0x00, 0x21 }; |
| 769 static const int32_t toUTF32BEOffs[]= |
| 770 { 0x00, 0x00, 0x00, 0x00, |
| 771 0x01, 0x01, 0x01, 0x01, |
| 772 0x02, 0x02, 0x02, 0x02, |
| 773 0x03, 0x03, 0x03, 0x03, |
| 774 0x04, 0x04, 0x04, 0x04, |
| 775 0x05, 0x05, 0x05, 0x05, |
| 776 0x06, 0x06, 0x06, 0x06, |
| 777 0x07, 0x07, 0x07, 0x07, |
| 778 0x08, 0x08, 0x08, 0x08, |
| 779 0x08, 0x08, 0x08, 0x08 }; |
| 780 static const int32_t fmUTF32BEOffs[] = |
| 781 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020,
0x0020 }; |
| 782 |
| 783 static const uint8_t expectedUTF32LE[] = |
| 784 { 0x31, 0x00, 0x00, 0x00, |
| 785 0x32, 0x00, 0x00, 0x00, |
| 786 0x33, 0x00, 0x00, 0x00, |
| 787 0x00, 0x00, 0x00, 0x00, |
| 788 0x00, 0x4e, 0x00, 0x00, |
| 789 0x8c, 0x4e, 0x00, 0x00, |
| 790 0x09, 0x4e, 0x00, 0x00, |
| 791 0x2e, 0x00, 0x00, 0x00, |
| 792 0x21, 0x00, 0x02, 0x00 }; |
| 793 static const int32_t toUTF32LEOffs[]= |
| 794 { 0x00, 0x00, 0x00, 0x00, |
| 795 0x01, 0x01, 0x01, 0x01, |
| 796 0x02, 0x02, 0x02, 0x02, |
| 797 0x03, 0x03, 0x03, 0x03, |
| 798 0x04, 0x04, 0x04, 0x04, |
| 799 0x05, 0x05, 0x05, 0x05, |
| 800 0x06, 0x06, 0x06, 0x06, |
| 801 0x07, 0x07, 0x07, 0x07, |
| 802 0x08, 0x08, 0x08, 0x08, |
| 803 0x08, 0x08, 0x08, 0x08 }; |
| 804 static const int32_t fmUTF32LEOffs[] = |
| 805 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0
x0020 }; |
| 806 |
| 807 |
| 808 |
| 809 |
| 810 /** Test chars #2 **/ |
| 811 |
| 812 /* Sahha [health], slashed h's */ |
| 813 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x006
1 }; |
| 814 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }
; |
| 815 |
| 816 /* LMBCS */ |
| 817 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2
666, 0x0220 }; |
| 818 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73,
0x01, 0x04, 0x14, 0x02, 0x20 }; |
| 819 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03,
0x04, 0x04 , 0x05, 0x05, 0x05 }; |
| 820 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0
006, 0x0008}; |
| 821 /*********************************** START OF CODE finally *************/ |
| 822 |
| 823 gInBufferSize = insize; |
| 824 gOutBufferSize = outsize; |
| 825 |
| 826 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBuff
erSize = %d\n", gInBufferSize, gOutBufferSize); |
| 827 |
| 828 |
| 829 /*UTF-8*/ |
| 830 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| 831 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE ); |
| 832 |
| 833 log_verbose("Test surrogate behaviour for UTF8\n"); |
| 834 { |
| 835 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; |
| 836 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, |
| 837 0xf0, 0x90, 0x90, 0x81, |
| 838 0xef, 0xbf, 0xbd |
| 839 }; |
| 840 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; |
| 841 testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]), |
| 842 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", o
ffsets,FALSE ); |
| 843 |
| 844 |
| 845 } |
| 846 |
| 847 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) |
| 848 /*ISO-2022*/ |
| 849 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| 850 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALS
E ); |
| 851 #endif |
| 852 |
| 853 /*UTF16 LE*/ |
| 854 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| 855 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALS
E ); |
| 856 /*UTF16 BE*/ |
| 857 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| 858 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALS
E ); |
| 859 /*UTF32 LE*/ |
| 860 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| 861 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALS
E ); |
| 862 /*UTF32 BE*/ |
| 863 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| 864 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALS
E ); |
| 865 |
| 866 /*LATIN_1*/ |
| 867 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| 868 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE ); |
| 869 |
| 870 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 871 /*EBCDIC_STATEFUL*/ |
| 872 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| 873 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE ); |
| 874 |
| 875 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| 876 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs
,FALSE ); |
| 877 |
| 878 /*MBCS*/ |
| 879 |
| 880 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| 881 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE ); |
| 882 /*DBCS*/ |
| 883 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| 884 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALS
E ); |
| 885 /*SBCS*/ |
| 886 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| 887 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE ); |
| 888 /*SBCS*/ |
| 889 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
| 890 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs
,FALSE ); |
| 891 #endif |
| 892 |
| 893 |
| 894 /****/ |
| 895 |
| 896 /*UTF-8*/ |
| 897 testConvertToU(expectedUTF8, sizeof(expectedUTF8), |
| 898 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs
,FALSE); |
| 899 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) |
| 900 /*ISO-2022*/ |
| 901 testConvertToU(expectedISO2022, sizeof(expectedISO2022), |
| 902 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2
022Offs,FALSE); |
| 903 #endif |
| 904 |
| 905 /*UTF16 LE*/ |
| 906 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), |
| 907 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF1
6LEOffs,FALSE); |
| 908 /*UTF16 BE*/ |
| 909 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE), |
| 910 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF1
6BEOffs,FALSE); |
| 911 /*UTF32 LE*/ |
| 912 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE), |
| 913 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF3
2LEOffs,FALSE); |
| 914 /*UTF32 BE*/ |
| 915 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE), |
| 916 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF3
2BEOffs,FALSE); |
| 917 |
| 918 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 919 /*EBCDIC_STATEFUL*/ |
| 920 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUn
mappable, |
| 921 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnma
ppable[0]), "ibm-930", fmIBM930Offs,FALSE); |
| 922 /*MBCS*/ |
| 923 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnm
appable, |
| 924 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnma
ppable[0]), "ibm-943", fmIBM943Offs,FALSE); |
| 925 #endif |
| 926 |
| 927 /* Try it again to make sure it still works */ |
| 928 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), |
| 929 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF1
6LEOffs,FALSE); |
| 930 |
| 931 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 932 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), |
| 933 malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3",
NULL,FALSE); |
| 934 |
| 935 testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0
]), |
| 936 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE
); |
| 937 |
| 938 /*LMBCS*/ |
| 939 testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), |
| 940 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE ); |
| 941 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS), |
| 942 LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLM
BCSOffs,FALSE); |
| 943 #endif |
| 944 |
| 945 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */ |
| 946 { |
| 947 /* encode directly set D and set O */ |
| 948 static const uint8_t utf7[] = { |
| 949 /* |
| 950 Hi Mom -+Jjo--! |
| 951 A+ImIDkQ. |
| 952 +- |
| 953 +ZeVnLIqe |
| 954 */ |
| 955 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x
6f, 0x2d, 0x2d, 0x21, |
| 956 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, |
| 957 0x2b, 0x2d, |
| 958 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65 |
| 959 }; |
| 960 static const UChar unicode[] = { |
| 961 /* |
| 962 Hi Mom -<WHITE SMILING FACE>-! |
| 963 A<NOT IDENTICAL TO><ALPHA>. |
| 964 + |
| 965 [Japanese word "nihongo"] |
| 966 */ |
| 967 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, |
| 968 0x41, 0x2262, 0x0391, 0x2e, |
| 969 0x2b, |
| 970 0x65e5, 0x672c, 0x8a9e |
| 971 }; |
| 972 static const int32_t toUnicodeOffsets[] = { |
| 973 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, |
| 974 15, 17, 19, 23, |
| 975 24, |
| 976 27, 29, 32 |
| 977 }; |
| 978 static const int32_t fromUnicodeOffsets[] = { |
| 979 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, |
| 980 11, 12, 12, 12, 13, 13, 13, 13, 14, |
| 981 15, 15, |
| 982 16, 16, 16, 17, 17, 17, 18, 18, 18 |
| 983 }; |
| 984 |
| 985 /* same but escaping set O (the exclamation mark) */ |
| 986 static const uint8_t utf7Restricted[] = { |
| 987 /* |
| 988 Hi Mom -+Jjo--+ACE- |
| 989 A+ImIDkQ. |
| 990 +- |
| 991 +ZeVnLIqe |
| 992 */ |
| 993 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x
6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d, |
| 994 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, |
| 995 0x2b, 0x2d, |
| 996 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65 |
| 997 }; |
| 998 static const int32_t toUnicodeOffsetsR[] = { |
| 999 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15, |
| 1000 19, 21, 23, 27, |
| 1001 28, |
| 1002 31, 33, 36 |
| 1003 }; |
| 1004 static const int32_t fromUnicodeOffsetsR[] = { |
| 1005 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10, |
| 1006 11, 12, 12, 12, 13, 13, 13, 13, 14, |
| 1007 15, 15, |
| 1008 16, 16, 16, 17, 17, 17, 18, 18, 18 |
| 1009 }; |
| 1010 |
| 1011 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(u
tf7), "UTF-7", fromUnicodeOffsets,FALSE); |
| 1012 |
| 1013 testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCH
AR, "UTF-7", toUnicodeOffsets,FALSE); |
| 1014 |
| 1015 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted
, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); |
| 1016 |
| 1017 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(u
nicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE); |
| 1018 } |
| 1019 |
| 1020 /* |
| 1021 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152, |
| 1022 * modified according to RFC 2060, |
| 1023 * and supplemented with the one example in RFC 2060 itself. |
| 1024 */ |
| 1025 { |
| 1026 static const uint8_t imap[] = { |
| 1027 /* Hi Mom -&Jjo--! |
| 1028 A&ImIDkQ-. |
| 1029 &- |
| 1030 &ZeVnLIqe- |
| 1031 \ |
| 1032 ~peter |
| 1033 /mail |
| 1034 /&ZeVnLIqe- |
| 1035 /&U,BTFw- |
| 1036 */ |
| 1037 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x
6f, 0x2d, 0x2d, 0x21, |
| 1038 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e, |
| 1039 0x26, 0x2d, |
| 1040 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, |
| 1041 0x5c, |
| 1042 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, |
| 1043 0x2f, 0x6d, 0x61, 0x69, 0x6c, |
| 1044 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, |
| 1045 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d |
| 1046 }; |
| 1047 static const UChar unicode[] = { |
| 1048 /* Hi Mom -<WHITE SMILING FACE>-! |
| 1049 A<NOT IDENTICAL TO><ALPHA>. |
| 1050 & |
| 1051 [Japanese word "nihongo"] |
| 1052 \ |
| 1053 ~peter |
| 1054 /mail |
| 1055 /<65e5, 672c, 8a9e> |
| 1056 /<53f0, 5317> |
| 1057 */ |
| 1058 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, |
| 1059 0x41, 0x2262, 0x0391, 0x2e, |
| 1060 0x26, |
| 1061 0x65e5, 0x672c, 0x8a9e, |
| 1062 0x5c, |
| 1063 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, |
| 1064 0x2f, 0x6d, 0x61, 0x69, 0x6c, |
| 1065 0x2f, 0x65e5, 0x672c, 0x8a9e, |
| 1066 0x2f, 0x53f0, 0x5317 |
| 1067 }; |
| 1068 static const int32_t toUnicodeOffsets[] = { |
| 1069 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, |
| 1070 15, 17, 19, 24, |
| 1071 25, |
| 1072 28, 30, 33, |
| 1073 37, |
| 1074 38, 39, 40, 41, 42, 43, |
| 1075 44, 45, 46, 47, 48, |
| 1076 49, 51, 53, 56, |
| 1077 60, 62, 64 |
| 1078 }; |
| 1079 static const int32_t fromUnicodeOffsets[] = { |
| 1080 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, |
| 1081 11, 12, 12, 12, 13, 13, 13, 13, 13, 14, |
| 1082 15, 15, |
| 1083 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, |
| 1084 19, |
| 1085 20, 21, 22, 23, 24, 25, |
| 1086 26, 27, 28, 29, 30, |
| 1087 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, |
| 1088 35, 36, 36, 36, 37, 37, 37, 37, 37 |
| 1089 }; |
| 1090 |
| 1091 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(i
map), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); |
| 1092 |
| 1093 testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCH
AR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE); |
| 1094 } |
| 1095 |
| 1096 /* Test UTF-8 bad data handling*/ |
| 1097 { |
| 1098 static const uint8_t utf8[]={ |
| 1099 0x61, |
| 1100 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ |
| 1101 0x00, |
| 1102 0x62, |
| 1103 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ |
| 1104 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ |
| 1105 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */ |
| 1106 0xdf, 0xbf, /* 7ff */ |
| 1107 0xbf, /* truncated tail */ |
| 1108 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */ |
| 1109 0x02 |
| 1110 }; |
| 1111 |
| 1112 static const uint16_t utf8Expected[]={ |
| 1113 0x0061, |
| 1114 0xfffd, |
| 1115 0x0000, |
| 1116 0x0062, |
| 1117 0xfffd, |
| 1118 0xfffd, |
| 1119 0xdbff, 0xdfff, |
| 1120 0x07ff, |
| 1121 0xfffd, |
| 1122 0xfffd, |
| 1123 0x0002 |
| 1124 }; |
| 1125 |
| 1126 static const int32_t utf8Offsets[]={ |
| 1127 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28 |
| 1128 }; |
| 1129 testConvertToU(utf8, sizeof(utf8), |
| 1130 utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]
), "utf-8", utf8Offsets ,FALSE); |
| 1131 |
| 1132 } |
| 1133 |
| 1134 /* Test UTF-32BE bad data handling*/ |
| 1135 { |
| 1136 static const uint8_t utf32[]={ |
| 1137 0x00, 0x00, 0x00, 0x61, |
| 1138 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ |
| 1139 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ |
| 1140 0x00, 0x00, 0x00, 0x62, |
| 1141 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ |
| 1142 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ |
| 1143 0x00, 0x00, 0x01, 0x62, |
| 1144 0x00, 0x00, 0x02, 0x62 |
| 1145 }; |
| 1146 static const uint16_t utf32Expected[]={ |
| 1147 0x0061, |
| 1148 0xfffd, /* 0x110000 out of range */ |
| 1149 0xDBFF, /* 0x10FFFF in range */ |
| 1150 0xDFFF, |
| 1151 0x0062, |
| 1152 0xfffd, /* 0xffffffff out of range */ |
| 1153 0xfffd, /* 0x7fffffff out of range */ |
| 1154 0x0162, |
| 1155 0x0262 |
| 1156 }; |
| 1157 static const int32_t utf32Offsets[]={ |
| 1158 0, 4, 8, 8, 12, 16, 20, 24, 28 |
| 1159 }; |
| 1160 static const uint8_t utf32ExpectedBack[]={ |
| 1161 0x00, 0x00, 0x00, 0x61, |
| 1162 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */ |
| 1163 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ |
| 1164 0x00, 0x00, 0x00, 0x62, |
| 1165 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */ |
| 1166 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */ |
| 1167 0x00, 0x00, 0x01, 0x62, |
| 1168 0x00, 0x00, 0x02, 0x62 |
| 1169 }; |
| 1170 static const int32_t utf32OffsetsBack[]={ |
| 1171 0,0,0,0, |
| 1172 1,1,1,1, |
| 1173 2,2,2,2, |
| 1174 4,4,4,4, |
| 1175 5,5,5,5, |
| 1176 6,6,6,6, |
| 1177 7,7,7,7, |
| 1178 8,8,8,8 |
| 1179 }; |
| 1180 |
| 1181 testConvertToU(utf32, sizeof(utf32), |
| 1182 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected
[0]), "utf-32be", utf32Offsets ,FALSE); |
| 1183 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expect
ed[0]), |
| 1184 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32Offse
tsBack, FALSE); |
| 1185 } |
| 1186 |
| 1187 /* Test UTF-32LE bad data handling*/ |
| 1188 { |
| 1189 static const uint8_t utf32[]={ |
| 1190 0x61, 0x00, 0x00, 0x00, |
| 1191 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ |
| 1192 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ |
| 1193 0x62, 0x00, 0x00, 0x00, |
| 1194 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ |
| 1195 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ |
| 1196 0x62, 0x01, 0x00, 0x00, |
| 1197 0x62, 0x02, 0x00, 0x00, |
| 1198 }; |
| 1199 |
| 1200 static const uint16_t utf32Expected[]={ |
| 1201 0x0061, |
| 1202 0xfffd, /* 0x110000 out of range */ |
| 1203 0xDBFF, /* 0x10FFFF in range */ |
| 1204 0xDFFF, |
| 1205 0x0062, |
| 1206 0xfffd, /* 0xffffffff out of range */ |
| 1207 0xfffd, /* 0x7fffffff out of range */ |
| 1208 0x0162, |
| 1209 0x0262 |
| 1210 }; |
| 1211 static const int32_t utf32Offsets[]={ |
| 1212 0, 4, 8, 8, 12, 16, 20, 24, 28 |
| 1213 }; |
| 1214 static const uint8_t utf32ExpectedBack[]={ |
| 1215 0x61, 0x00, 0x00, 0x00, |
| 1216 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */ |
| 1217 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ |
| 1218 0x62, 0x00, 0x00, 0x00, |
| 1219 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */ |
| 1220 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */ |
| 1221 0x62, 0x01, 0x00, 0x00, |
| 1222 0x62, 0x02, 0x00, 0x00 |
| 1223 }; |
| 1224 static const int32_t utf32OffsetsBack[]={ |
| 1225 0,0,0,0, |
| 1226 1,1,1,1, |
| 1227 2,2,2,2, |
| 1228 4,4,4,4, |
| 1229 5,5,5,5, |
| 1230 6,6,6,6, |
| 1231 7,7,7,7, |
| 1232 8,8,8,8 |
| 1233 }; |
| 1234 testConvertToU(utf32, sizeof(utf32), |
| 1235 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-
32le", utf32Offsets,FALSE ); |
| 1236 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expect
ed[0]), |
| 1237 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32Offse
tsBack, FALSE); |
| 1238 } |
| 1239 } |
| 1240 |
| 1241 static void TestCoverageMBCS(){ |
| 1242 #if 0 |
| 1243 UErrorCode status = U_ZERO_ERROR; |
| 1244 const char *directory = loadTestData(&status); |
| 1245 char* tdpath = NULL; |
| 1246 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory
())+1)); |
| 1247 int len = strlen(directory); |
| 1248 char* index=NULL; |
| 1249 |
| 1250 tdpath = (char*) malloc(sizeof(char) * (len * 2)); |
| 1251 uprv_strcpy(saveDirectory,u_getDataDirectory()); |
| 1252 log_verbose("Retrieved data directory %s \n",saveDirectory); |
| 1253 uprv_strcpy(tdpath,directory); |
| 1254 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR); |
| 1255 |
| 1256 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){ |
| 1257 *(index+1)=0; |
| 1258 } |
| 1259 u_setDataDirectory(tdpath); |
| 1260 log_verbose("ICU data directory is set to: %s \n" ,tdpath); |
| 1261 #endif |
| 1262 |
| 1263 /*some more test to increase the code coverage in MBCS. Create an test conv
erter from test1.ucm |
| 1264 which is test file for MBCS conversion with single-byte codepage data.*/ |
| 1265 { |
| 1266 |
| 1267 /* MBCS with single byte codepage data test1.ucm*/ |
| 1268 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34
, 0x0003}; |
| 1269 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,}; |
| 1270 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; |
| 1271 |
| 1272 /*from Unicode*/ |
| 1273 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[
0]), |
| 1274 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); |
| 1275 } |
| 1276 |
| 1277 /*some more test to increase the code coverage in MBCS. Create an test conv
erter from test3.ucm |
| 1278 which is test file for MBCS conversion with three-byte codepage data.*/ |
| 1279 { |
| 1280 |
| 1281 /* MBCS with three byte codepage data test3.ucm*/ |
| 1282 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4
, 0xde34, 0xd84d, 0xdc56, 0x000e}; |
| 1283 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0
x07, 0x01, 0x02, 0x0a, 0xff,}; |
| 1284 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8}; |
| 1285 |
| 1286 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0
x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,}; |
| 1287 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4
, 0xde34, 0xd84d, 0xdc56, 0xfffd}; |
| 1288 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 }; |
| 1289 |
| 1290 /*from Unicode*/ |
| 1291 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[
0]), |
| 1292 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE ); |
| 1293 |
| 1294 /*to Unicode*/ |
| 1295 testConvertToU(test3input, sizeof(test3input), |
| 1296 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]),
"@test3", fromtest3Offs ,FALSE); |
| 1297 |
| 1298 } |
| 1299 |
| 1300 /*some more test to increase the code coverage in MBCS. Create an test conv
erter from test4.ucm |
| 1301 which is test file for MBCS conversion with four-byte codepage data.*/ |
| 1302 { |
| 1303 |
| 1304 /* MBCS with three byte codepage data test4.ucm*/ |
| 1305 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b,
0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; |
| 1306 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0
x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,}; |
| 1307 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6
, 6, 8,}; |
| 1308 |
| 1309 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0
x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,}; |
| 1310 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b,
0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; |
| 1311 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,}; |
| 1312 |
| 1313 /*from Unicode*/ |
| 1314 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[
0]), |
| 1315 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE ); |
| 1316 |
| 1317 /*to Unicode*/ |
| 1318 testConvertToU(test4input, sizeof(test4input), |
| 1319 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]),
"@test4", fromtest4Offs,FALSE ); |
| 1320 |
| 1321 } |
| 1322 #if 0 |
| 1323 free(tdpath); |
| 1324 /* restore the original data directory */ |
| 1325 log_verbose("Setting the data directory to %s \n", saveDirectory); |
| 1326 u_setDataDirectory(saveDirectory); |
| 1327 free(saveDirectory); |
| 1328 #endif |
| 1329 |
| 1330 } |
| 1331 |
| 1332 static void TestConverterType(const char *convName, UConverterType convType) { |
| 1333 UConverter* myConverter; |
| 1334 UErrorCode err = U_ZERO_ERROR; |
| 1335 |
| 1336 myConverter = my_ucnv_open(convName, &err); |
| 1337 |
| 1338 if (U_FAILURE(err)) { |
| 1339 log_data_err("Failed to create an %s converter\n", convName); |
| 1340 return; |
| 1341 } |
| 1342 else |
| 1343 { |
| 1344 if (ucnv_getType(myConverter)!=convType) { |
| 1345 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n", |
| 1346 convName, convType); |
| 1347 } |
| 1348 else { |
| 1349 log_verbose("ucnv_getType %s ok\n", convName); |
| 1350 } |
| 1351 } |
| 1352 ucnv_close(myConverter); |
| 1353 } |
| 1354 |
| 1355 static void TestConverterTypesAndStarters() |
| 1356 { |
| 1357 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 1358 UConverter* myConverter; |
| 1359 UErrorCode err = U_ZERO_ERROR; |
| 1360 UBool mystarters[256]; |
| 1361 |
| 1362 /* const UBool expectedKSCstarters[256] = { |
| 1363 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| 1364 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| 1365 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| 1366 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| 1367 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| 1368 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| 1369 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| 1370 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| 1371 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| 1372 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| 1373 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| 1374 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| 1375 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| 1376 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, |
| 1377 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| 1378 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| 1379 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| 1380 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| 1381 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| 1382 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| 1383 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| 1384 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| 1385 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| 1386 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| 1387 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, |
| 1388 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/ |
| 1389 |
| 1390 |
| 1391 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversio
n types."); |
| 1392 |
| 1393 myConverter = ucnv_open("ksc", &err); |
| 1394 if (U_FAILURE(err)) { |
| 1395 log_data_err("Failed to create an ibm-ksc converter\n"); |
| 1396 return; |
| 1397 } |
| 1398 else |
| 1399 { |
| 1400 if (ucnv_getType(myConverter)!=UCNV_MBCS) |
| 1401 log_err("ucnv_getType Failed for ibm-949\n"); |
| 1402 else |
| 1403 log_verbose("ucnv_getType ibm-949 ok\n"); |
| 1404 |
| 1405 if(myConverter!=NULL) |
| 1406 ucnv_getStarters(myConverter, mystarters, &err); |
| 1407 |
| 1408 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters
))) |
| 1409 log_err("Failed ucnv_getStarters for ksc\n"); |
| 1410 else |
| 1411 log_verbose("ucnv_getStarters ok\n");*/ |
| 1412 |
| 1413 } |
| 1414 ucnv_close(myConverter); |
| 1415 |
| 1416 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL); |
| 1417 TestConverterType("ibm-878", UCNV_SBCS); |
| 1418 #endif |
| 1419 |
| 1420 TestConverterType("iso-8859-1", UCNV_LATIN_1); |
| 1421 |
| 1422 TestConverterType("ibm-1208", UCNV_UTF8); |
| 1423 |
| 1424 TestConverterType("utf-8", UCNV_UTF8); |
| 1425 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian); |
| 1426 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian); |
| 1427 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian); |
| 1428 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian); |
| 1429 |
| 1430 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 1431 |
| 1432 #if defined(U_ENABLE_GENERIC_ISO_2022) |
| 1433 TestConverterType("iso-2022", UCNV_ISO_2022); |
| 1434 #endif |
| 1435 |
| 1436 TestConverterType("hz", UCNV_HZ); |
| 1437 #endif |
| 1438 |
| 1439 TestConverterType("scsu", UCNV_SCSU); |
| 1440 |
| 1441 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 1442 TestConverterType("x-iscii-de", UCNV_ISCII); |
| 1443 #endif |
| 1444 |
| 1445 TestConverterType("ascii", UCNV_US_ASCII); |
| 1446 TestConverterType("utf-7", UCNV_UTF7); |
| 1447 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX); |
| 1448 TestConverterType("bocu-1", UCNV_BOCU1); |
| 1449 } |
| 1450 |
| 1451 static void |
| 1452 TestAmbiguousConverter(UConverter *cnv) { |
| 1453 static const char inBytes[3]={ 0x61, 0x5B, 0x5c }; |
| 1454 UChar outUnicode[20]={ 0, 0, 0, 0 }; |
| 1455 |
| 1456 const char *s; |
| 1457 UChar *u; |
| 1458 UErrorCode errorCode; |
| 1459 UBool isAmbiguous; |
| 1460 |
| 1461 /* try to convert an 'a', a square bracket and a US-ASCII backslash */ |
| 1462 errorCode=U_ZERO_ERROR; |
| 1463 s=inBytes; |
| 1464 u=outUnicode; |
| 1465 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode); |
| 1466 if(U_FAILURE(errorCode)) { |
| 1467 /* we do not care about general failures in this test; the input may jus
t not be mappable */ |
| 1468 return; |
| 1469 } |
| 1470 |
| 1471 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) { |
| 1472 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: thi
s test is not applicable */ |
| 1473 /* There are some encodings that are partially ASCII based, |
| 1474 like the ISO-7 and GSM series of codepages, which we ignore. */ |
| 1475 return; |
| 1476 } |
| 1477 |
| 1478 isAmbiguous=ucnv_isAmbiguous(cnv); |
| 1479 |
| 1480 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous()
*/ |
| 1481 if((outUnicode[2]!=0x5c)!=isAmbiguous) { |
| 1482 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAm
biguous()==%d\n", |
| 1483 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous); |
| 1484 return; |
| 1485 } |
| 1486 |
| 1487 if(outUnicode[2]!=0x5c) { |
| 1488 /* needs fixup, fix it */ |
| 1489 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode)); |
| 1490 if(outUnicode[2]!=0x5c) { |
| 1491 /* the fix failed */ |
| 1492 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cn
v, &errorCode)); |
| 1493 return; |
| 1494 } |
| 1495 } |
| 1496 } |
| 1497 |
| 1498 static void TestAmbiguous() |
| 1499 { |
| 1500 UErrorCode status = U_ZERO_ERROR; |
| 1501 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv; |
| 1502 static const char target[] = { |
| 1503 /* "\\usr\\local\\share\\data\\icutest.txt" */ |
| 1504 0x5c, 0x75, 0x73, 0x72, |
| 1505 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, |
| 1506 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65, |
| 1507 0x5c, 0x64, 0x61, 0x74, 0x61, |
| 1508 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74, |
| 1509 0 |
| 1510 }; |
| 1511 UChar asciiResult[200], sjisResult[200]; |
| 1512 int32_t /*asciiLength = 0,*/ sjisLength = 0, i; |
| 1513 const char *name; |
| 1514 |
| 1515 /* enumerate all converters */ |
| 1516 status=U_ZERO_ERROR; |
| 1517 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) { |
| 1518 cnv=ucnv_open(name, &status); |
| 1519 if(U_SUCCESS(status)) { |
| 1520 TestAmbiguousConverter(cnv); |
| 1521 ucnv_close(cnv); |
| 1522 } else { |
| 1523 log_err("error: unable to open available converter \"%s\"\n", name); |
| 1524 status=U_ZERO_ERROR; |
| 1525 } |
| 1526 } |
| 1527 |
| 1528 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 1529 sjis_cnv = ucnv_open("ibm-943", &status); |
| 1530 if (U_FAILURE(status)) |
| 1531 { |
| 1532 log_data_err("Failed to create a SJIS converter\n"); |
| 1533 return; |
| 1534 } |
| 1535 ascii_cnv = ucnv_open("LATIN-1", &status); |
| 1536 if (U_FAILURE(status)) |
| 1537 { |
| 1538 log_data_err("Failed to create a LATIN-1 converter\n"); |
| 1539 ucnv_close(sjis_cnv); |
| 1540 return; |
| 1541 } |
| 1542 /* convert target from SJIS to Unicode */ |
| 1543 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF
_UCHAR, target, (int32_t)strlen(target), &status); |
| 1544 if (U_FAILURE(status)) |
| 1545 { |
| 1546 log_err("Failed to convert the SJIS string.\n"); |
| 1547 ucnv_close(sjis_cnv); |
| 1548 ucnv_close(ascii_cnv); |
| 1549 return; |
| 1550 } |
| 1551 /* convert target from Latin-1 to Unicode */ |
| 1552 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/
U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); |
| 1553 if (U_FAILURE(status)) |
| 1554 { |
| 1555 log_err("Failed to convert the Latin-1 string.\n"); |
| 1556 ucnv_close(sjis_cnv); |
| 1557 ucnv_close(ascii_cnv); |
| 1558 return; |
| 1559 } |
| 1560 if (!ucnv_isAmbiguous(sjis_cnv)) |
| 1561 { |
| 1562 log_err("SJIS converter should contain ambiguous character mappings.\n")
; |
| 1563 ucnv_close(sjis_cnv); |
| 1564 ucnv_close(ascii_cnv); |
| 1565 return; |
| 1566 } |
| 1567 if (u_strcmp(sjisResult, asciiResult) == 0) |
| 1568 { |
| 1569 log_err("File separators for SJIS don't need to be fixed.\n"); |
| 1570 } |
| 1571 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength); |
| 1572 if (u_strcmp(sjisResult, asciiResult) != 0) |
| 1573 { |
| 1574 log_err("Fixing file separator for SJIS failed.\n"); |
| 1575 } |
| 1576 ucnv_close(sjis_cnv); |
| 1577 ucnv_close(ascii_cnv); |
| 1578 #endif |
| 1579 } |
| 1580 |
| 1581 static void |
| 1582 TestSignatureDetection(){ |
| 1583 /* with null terminated strings */ |
| 1584 { |
| 1585 static const char* data[] = { |
| 1586 "\xFE\xFF\x00\x00", /* UTF-16BE */ |
| 1587 "\xFF\xFE\x00\x00", /* UTF-16LE */ |
| 1588 "\xEF\xBB\xBF\x00", /* UTF-8 */ |
| 1589 "\x0E\xFE\xFF\x00", /* SCSU */ |
| 1590 |
| 1591 "\xFE\xFF", /* UTF-16BE */ |
| 1592 "\xFF\xFE", /* UTF-16LE */ |
| 1593 "\xEF\xBB\xBF", /* UTF-8 */ |
| 1594 "\x0E\xFE\xFF", /* SCSU */ |
| 1595 |
| 1596 "\xFE\xFF\x41\x42", /* UTF-16BE */ |
| 1597 "\xFF\xFE\x41\x41", /* UTF-16LE */ |
| 1598 "\xEF\xBB\xBF\x41", /* UTF-8 */ |
| 1599 "\x0E\xFE\xFF\x41", /* SCSU */ |
| 1600 |
| 1601 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */ |
| 1602 "\x2B\x2F\x76\x38\x41", /* UTF-7 */ |
| 1603 "\x2B\x2F\x76\x39\x41", /* UTF-7 */ |
| 1604 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */ |
| 1605 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */ |
| 1606 |
| 1607 "\xDD\x73\x66\x73" /* UTF-EBCDIC */ |
| 1608 }; |
| 1609 static const char* expected[] = { |
| 1610 "UTF-16BE", |
| 1611 "UTF-16LE", |
| 1612 "UTF-8", |
| 1613 "SCSU", |
| 1614 |
| 1615 "UTF-16BE", |
| 1616 "UTF-16LE", |
| 1617 "UTF-8", |
| 1618 "SCSU", |
| 1619 |
| 1620 "UTF-16BE", |
| 1621 "UTF-16LE", |
| 1622 "UTF-8", |
| 1623 "SCSU", |
| 1624 |
| 1625 "UTF-7", |
| 1626 "UTF-7", |
| 1627 "UTF-7", |
| 1628 "UTF-7", |
| 1629 "UTF-7", |
| 1630 "UTF-EBCDIC" |
| 1631 }; |
| 1632 static const int32_t expectedLength[] ={ |
| 1633 2, |
| 1634 2, |
| 1635 3, |
| 1636 3, |
| 1637 |
| 1638 2, |
| 1639 2, |
| 1640 3, |
| 1641 3, |
| 1642 |
| 1643 2, |
| 1644 2, |
| 1645 3, |
| 1646 3, |
| 1647 |
| 1648 5, |
| 1649 4, |
| 1650 4, |
| 1651 4, |
| 1652 4, |
| 1653 4 |
| 1654 }; |
| 1655 int i=0; |
| 1656 UErrorCode err; |
| 1657 int32_t signatureLength = -1; |
| 1658 const char* source = NULL; |
| 1659 const char* enc = NULL; |
| 1660 for( ; i<sizeof(data)/sizeof(char*); i++){ |
| 1661 err = U_ZERO_ERROR; |
| 1662 source = data[i]; |
| 1663 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &er
r); |
| 1664 if(U_FAILURE(err)){ |
| 1665 log_err("ucnv_detectUnicodeSignature failed for source : %s at i
ndex :%i. Error: %s\n", source,i,u_errorName(err)); |
| 1666 continue; |
| 1667 } |
| 1668 if(enc == NULL || strcmp(enc,expected[i]) !=0){ |
| 1669 log_err("ucnv_detectUnicodeSignature failed for source : %s at i
ndex :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); |
| 1670 continue; |
| 1671 } |
| 1672 if(signatureLength != expectedLength[i]){ |
| 1673 log_err("ucnv_detectUnicodeSignature failed for source : %s at i
ndex :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expecte
dLength[i]); |
| 1674 } |
| 1675 } |
| 1676 } |
| 1677 { |
| 1678 static const char* data[] = { |
| 1679 "\xFE\xFF\x00", /* UTF-16BE */ |
| 1680 "\xFF\xFE\x00", /* UTF-16LE */ |
| 1681 "\xEF\xBB\xBF\x00", /* UTF-8 */ |
| 1682 "\x0E\xFE\xFF\x00", /* SCSU */ |
| 1683 "\x00\x00\xFE\xFF", /* UTF-32BE */ |
| 1684 "\xFF\xFE\x00\x00", /* UTF-32LE */ |
| 1685 "\xFE\xFF", /* UTF-16BE */ |
| 1686 "\xFF\xFE", /* UTF-16LE */ |
| 1687 "\xEF\xBB\xBF", /* UTF-8 */ |
| 1688 "\x0E\xFE\xFF", /* SCSU */ |
| 1689 "\x00\x00\xFE\xFF", /* UTF-32BE */ |
| 1690 "\xFF\xFE\x00\x00", /* UTF-32LE */ |
| 1691 "\xFE\xFF\x41\x42", /* UTF-16BE */ |
| 1692 "\xFF\xFE\x41\x41", /* UTF-16LE */ |
| 1693 "\xEF\xBB\xBF\x41", /* UTF-8 */ |
| 1694 "\x0E\xFE\xFF\x41", /* SCSU */ |
| 1695 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */ |
| 1696 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */ |
| 1697 "\xFB\xEE\x28", /* BOCU-1 */ |
| 1698 "\xFF\x41\x42" /* NULL */ |
| 1699 }; |
| 1700 static const int len[] = { |
| 1701 3, |
| 1702 3, |
| 1703 4, |
| 1704 4, |
| 1705 4, |
| 1706 4, |
| 1707 2, |
| 1708 2, |
| 1709 3, |
| 1710 3, |
| 1711 4, |
| 1712 4, |
| 1713 4, |
| 1714 4, |
| 1715 4, |
| 1716 4, |
| 1717 5, |
| 1718 5, |
| 1719 3, |
| 1720 3 |
| 1721 }; |
| 1722 |
| 1723 static const char* expected[] = { |
| 1724 "UTF-16BE", |
| 1725 "UTF-16LE", |
| 1726 "UTF-8", |
| 1727 "SCSU", |
| 1728 "UTF-32BE", |
| 1729 "UTF-32LE", |
| 1730 "UTF-16BE", |
| 1731 "UTF-16LE", |
| 1732 "UTF-8", |
| 1733 "SCSU", |
| 1734 "UTF-32BE", |
| 1735 "UTF-32LE", |
| 1736 "UTF-16BE", |
| 1737 "UTF-16LE", |
| 1738 "UTF-8", |
| 1739 "SCSU", |
| 1740 "UTF-32BE", |
| 1741 "UTF-32LE", |
| 1742 "BOCU-1", |
| 1743 NULL |
| 1744 }; |
| 1745 static const int32_t expectedLength[] ={ |
| 1746 2, |
| 1747 2, |
| 1748 3, |
| 1749 3, |
| 1750 4, |
| 1751 4, |
| 1752 2, |
| 1753 2, |
| 1754 3, |
| 1755 3, |
| 1756 4, |
| 1757 4, |
| 1758 2, |
| 1759 2, |
| 1760 3, |
| 1761 3, |
| 1762 4, |
| 1763 4, |
| 1764 3, |
| 1765 0 |
| 1766 }; |
| 1767 int i=0; |
| 1768 UErrorCode err; |
| 1769 int32_t signatureLength = -1; |
| 1770 int32_t sourceLength=-1; |
| 1771 const char* source = NULL; |
| 1772 const char* enc = NULL; |
| 1773 for( ; i<sizeof(data)/sizeof(char*); i++){ |
| 1774 err = U_ZERO_ERROR; |
| 1775 source = data[i]; |
| 1776 sourceLength = len[i]; |
| 1777 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureL
ength, &err); |
| 1778 if(U_FAILURE(err)){ |
| 1779 log_err("ucnv_detectUnicodeSignature test2 failed for source : %
s at index :%i. Error: %s\n", source,i,u_errorName(err)); |
| 1780 continue; |
| 1781 } |
| 1782 if(enc == NULL || strcmp(enc,expected[i]) !=0){ |
| 1783 if(expected[i] !=NULL){ |
| 1784 log_err("ucnv_detectUnicodeSignature test2 failed for source :
%s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); |
| 1785 continue; |
| 1786 } |
| 1787 } |
| 1788 if(signatureLength != expectedLength[i]){ |
| 1789 log_err("ucnv_detectUnicodeSignature test2 failed for source : %
s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,e
xpectedLength[i]); |
| 1790 } |
| 1791 } |
| 1792 } |
| 1793 } |
| 1794 |
| 1795 static void TestUTF7() { |
| 1796 /* test input */ |
| 1797 static const uint8_t in[]={ |
| 1798 /* H - +Jjo- - ! +- +2AHcAQ */ |
| 1799 0x48, |
| 1800 0x2d, |
| 1801 0x2b, 0x4a, 0x6a, 0x6f, |
| 1802 0x2d, 0x2d, |
| 1803 0x21, |
| 1804 0x2b, 0x2d, |
| 1805 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51 |
| 1806 }; |
| 1807 |
| 1808 /* expected test results */ |
| 1809 static const int32_t results[]={ |
| 1810 /* number of bytes read, code point */ |
| 1811 1, 0x48, |
| 1812 1, 0x2d, |
| 1813 4, 0x263a, /* <WHITE SMILING FACE> */ |
| 1814 2, 0x2d, |
| 1815 1, 0x21, |
| 1816 2, 0x2b, |
| 1817 7, 0x10401 |
| 1818 }; |
| 1819 |
| 1820 const char *cnvName; |
| 1821 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); |
| 1822 UErrorCode errorCode=U_ZERO_ERROR; |
| 1823 UConverter *cnv=ucnv_open("UTF-7", &errorCode); |
| 1824 if(U_FAILURE(errorCode)) { |
| 1825 log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)
); /* sholdn't be a data err */ |
| 1826 return; |
| 1827 } |
| 1828 TestNextUChar(cnv, source, limit, results, "UTF-7"); |
| 1829 /* Test the condition when source >= sourceLimit */ |
| 1830 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); |
| 1831 cnvName = ucnv_getName(cnv, &errorCode); |
| 1832 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) { |
| 1833 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(error
Code)); |
| 1834 } |
| 1835 ucnv_close(cnv); |
| 1836 } |
| 1837 |
| 1838 static void TestIMAP() { |
| 1839 /* test input */ |
| 1840 static const uint8_t in[]={ |
| 1841 /* H - &Jjo- - ! &- &2AHcAQ- \ */ |
| 1842 0x48, |
| 1843 0x2d, |
| 1844 0x26, 0x4a, 0x6a, 0x6f, |
| 1845 0x2d, 0x2d, |
| 1846 0x21, |
| 1847 0x26, 0x2d, |
| 1848 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d |
| 1849 }; |
| 1850 |
| 1851 /* expected test results */ |
| 1852 static const int32_t results[]={ |
| 1853 /* number of bytes read, code point */ |
| 1854 1, 0x48, |
| 1855 1, 0x2d, |
| 1856 4, 0x263a, /* <WHITE SMILING FACE> */ |
| 1857 2, 0x2d, |
| 1858 1, 0x21, |
| 1859 2, 0x26, |
| 1860 7, 0x10401 |
| 1861 }; |
| 1862 |
| 1863 const char *cnvName; |
| 1864 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); |
| 1865 UErrorCode errorCode=U_ZERO_ERROR; |
| 1866 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode); |
| 1867 if(U_FAILURE(errorCode)) { |
| 1868 log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorNam
e(errorCode)); /* sholdn't be a data err */ |
| 1869 return; |
| 1870 } |
| 1871 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name"); |
| 1872 /* Test the condition when source >= sourceLimit */ |
| 1873 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); |
| 1874 cnvName = ucnv_getName(cnv, &errorCode); |
| 1875 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0)
{ |
| 1876 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_err
orName(errorCode)); |
| 1877 } |
| 1878 ucnv_close(cnv); |
| 1879 } |
| 1880 |
| 1881 static void TestUTF8() { |
| 1882 /* test input */ |
| 1883 static const uint8_t in[]={ |
| 1884 0x61, |
| 1885 0xc2, 0x80, |
| 1886 0xe0, 0xa0, 0x80, |
| 1887 0xf0, 0x90, 0x80, 0x80, |
| 1888 0xf4, 0x84, 0x8c, 0xa1, |
| 1889 0xf0, 0x90, 0x90, 0x81 |
| 1890 }; |
| 1891 |
| 1892 /* expected test results */ |
| 1893 static const int32_t results[]={ |
| 1894 /* number of bytes read, code point */ |
| 1895 1, 0x61, |
| 1896 2, 0x80, |
| 1897 3, 0x800, |
| 1898 4, 0x10000, |
| 1899 4, 0x104321, |
| 1900 4, 0x10401 |
| 1901 }; |
| 1902 |
| 1903 /* error test input */ |
| 1904 static const uint8_t in2[]={ |
| 1905 0x61, |
| 1906 0xc0, 0x80, /* illegal non-shortest form */ |
| 1907 0xe0, 0x80, 0x80, /* illegal non-shortest form */ |
| 1908 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ |
| 1909 0xc0, 0xc0, /* illegal trail byte */ |
| 1910 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ |
| 1911 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ |
| 1912 0xfe, /* illegal byte altogether */ |
| 1913 0x62 |
| 1914 }; |
| 1915 |
| 1916 /* expected error test results */ |
| 1917 static const int32_t results2[]={ |
| 1918 /* number of bytes read, code point */ |
| 1919 1, 0x61, |
| 1920 22, 0x62 |
| 1921 }; |
| 1922 |
| 1923 UConverterToUCallback cb; |
| 1924 const void *p; |
| 1925 |
| 1926 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); |
| 1927 UErrorCode errorCode=U_ZERO_ERROR; |
| 1928 UConverter *cnv=ucnv_open("UTF-8", &errorCode); |
| 1929 if(U_FAILURE(errorCode)) { |
| 1930 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode)
); |
| 1931 return; |
| 1932 } |
| 1933 TestNextUChar(cnv, source, limit, results, "UTF-8"); |
| 1934 /* Test the condition when source >= sourceLimit */ |
| 1935 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); |
| 1936 |
| 1937 /* test error behavior with a skip callback */ |
| 1938 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode)
; |
| 1939 source=(const char *)in2; |
| 1940 limit=(const char *)(in2+sizeof(in2)); |
| 1941 TestNextUChar(cnv, source, limit, results2, "UTF-8"); |
| 1942 |
| 1943 ucnv_close(cnv); |
| 1944 } |
| 1945 |
| 1946 static void TestCESU8() { |
| 1947 /* test input */ |
| 1948 static const uint8_t in[]={ |
| 1949 0x61, |
| 1950 0xc2, 0x80, |
| 1951 0xe0, 0xa0, 0x80, |
| 1952 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, |
| 1953 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82, |
| 1954 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, |
| 1955 0xef, 0xbf, 0xbc |
| 1956 }; |
| 1957 |
| 1958 /* expected test results */ |
| 1959 static const int32_t results[]={ |
| 1960 /* number of bytes read, code point */ |
| 1961 1, 0x61, |
| 1962 2, 0x80, |
| 1963 3, 0x800, |
| 1964 6, 0x10000, |
| 1965 3, 0xdc01, |
| 1966 -1,0xd802, /* may read 3 or 6 bytes */ |
| 1967 -1,0x10ffff,/* may read 0 or 3 bytes */ |
| 1968 3, 0xfffc |
| 1969 }; |
| 1970 |
| 1971 /* error test input */ |
| 1972 static const uint8_t in2[]={ |
| 1973 0x61, |
| 1974 0xc0, 0x80, /* illegal non-shortest form */ |
| 1975 0xe0, 0x80, 0x80, /* illegal non-shortest form */ |
| 1976 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ |
| 1977 0xc0, 0xc0, /* illegal trail byte */ |
| 1978 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code poi
nt */ |
| 1979 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code poi
nt */ |
| 1980 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code poi
nt */ |
| 1981 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ |
| 1982 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ |
| 1983 0xfe, /* illegal byte altogether */ |
| 1984 0x62 |
| 1985 }; |
| 1986 |
| 1987 /* expected error test results */ |
| 1988 static const int32_t results2[]={ |
| 1989 /* number of bytes read, code point */ |
| 1990 1, 0x61, |
| 1991 34, 0x62 |
| 1992 }; |
| 1993 |
| 1994 UConverterToUCallback cb; |
| 1995 const void *p; |
| 1996 |
| 1997 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); |
| 1998 UErrorCode errorCode=U_ZERO_ERROR; |
| 1999 UConverter *cnv=ucnv_open("CESU-8", &errorCode); |
| 2000 if(U_FAILURE(errorCode)) { |
| 2001 log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode
)); |
| 2002 return; |
| 2003 } |
| 2004 TestNextUChar(cnv, source, limit, results, "CESU-8"); |
| 2005 /* Test the condition when source >= sourceLimit */ |
| 2006 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); |
| 2007 |
| 2008 /* test error behavior with a skip callback */ |
| 2009 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode)
; |
| 2010 source=(const char *)in2; |
| 2011 limit=(const char *)(in2+sizeof(in2)); |
| 2012 TestNextUChar(cnv, source, limit, results2, "CESU-8"); |
| 2013 |
| 2014 ucnv_close(cnv); |
| 2015 } |
| 2016 |
| 2017 static void TestUTF16() { |
| 2018 /* test input */ |
| 2019 static const uint8_t in1[]={ |
| 2020 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff |
| 2021 }; |
| 2022 static const uint8_t in2[]={ |
| 2023 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff |
| 2024 }; |
| 2025 static const uint8_t in3[]={ |
| 2026 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01 |
| 2027 }; |
| 2028 |
| 2029 /* expected test results */ |
| 2030 static const int32_t results1[]={ |
| 2031 /* number of bytes read, code point */ |
| 2032 4, 0x4e00, |
| 2033 2, 0xfeff |
| 2034 }; |
| 2035 static const int32_t results2[]={ |
| 2036 /* number of bytes read, code point */ |
| 2037 4, 0x004e, |
| 2038 2, 0xfffe |
| 2039 }; |
| 2040 static const int32_t results3[]={ |
| 2041 /* number of bytes read, code point */ |
| 2042 2, 0xfefe, |
| 2043 2, 0x4e00, |
| 2044 2, 0xfeff, |
| 2045 4, 0x20001 |
| 2046 }; |
| 2047 |
| 2048 const char *source, *limit; |
| 2049 |
| 2050 UErrorCode errorCode=U_ZERO_ERROR; |
| 2051 UConverter *cnv=ucnv_open("UTF-16", &errorCode); |
| 2052 if(U_FAILURE(errorCode)) { |
| 2053 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode
)); |
| 2054 return; |
| 2055 } |
| 2056 |
| 2057 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); |
| 2058 TestNextUChar(cnv, source, limit, results1, "UTF-16"); |
| 2059 |
| 2060 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); |
| 2061 ucnv_resetToUnicode(cnv); |
| 2062 TestNextUChar(cnv, source, limit, results2, "UTF-16"); |
| 2063 |
| 2064 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); |
| 2065 ucnv_resetToUnicode(cnv); |
| 2066 TestNextUChar(cnv, source, limit, results3, "UTF-16"); |
| 2067 |
| 2068 /* Test the condition when source >= sourceLimit */ |
| 2069 ucnv_resetToUnicode(cnv); |
| 2070 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); |
| 2071 |
| 2072 ucnv_close(cnv); |
| 2073 } |
| 2074 |
| 2075 static void TestUTF16BE() { |
| 2076 /* test input */ |
| 2077 static const uint8_t in[]={ |
| 2078 0x00, 0x61, |
| 2079 0x00, 0xc0, |
| 2080 0x00, 0x31, |
| 2081 0x00, 0xf4, |
| 2082 0xce, 0xfe, |
| 2083 0xd8, 0x01, 0xdc, 0x01 |
| 2084 }; |
| 2085 |
| 2086 /* expected test results */ |
| 2087 static const int32_t results[]={ |
| 2088 /* number of bytes read, code point */ |
| 2089 2, 0x61, |
| 2090 2, 0xc0, |
| 2091 2, 0x31, |
| 2092 2, 0xf4, |
| 2093 2, 0xcefe, |
| 2094 4, 0x10401 |
| 2095 }; |
| 2096 |
| 2097 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); |
| 2098 UErrorCode errorCode=U_ZERO_ERROR; |
| 2099 UConverter *cnv=ucnv_open("utf-16be", &errorCode); |
| 2100 if(U_FAILURE(errorCode)) { |
| 2101 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCo
de)); |
| 2102 return; |
| 2103 } |
| 2104 TestNextUChar(cnv, source, limit, results, "UTF-16BE"); |
| 2105 /* Test the condition when source >= sourceLimit */ |
| 2106 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); |
| 2107 /*Test for the condition where there is an invalid character*/ |
| 2108 { |
| 2109 static const uint8_t source2[]={0x61}; |
| 2110 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); |
| 2111 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); |
| 2112 } |
| 2113 #if 0 |
| 2114 /* |
| 2115 * Test disabled because currently the UTF-16BE/LE converters are supposed |
| 2116 * to not set errors for unpaired surrogates. |
| 2117 * This may change with |
| 2118 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 |
| 2119 */ |
| 2120 |
| 2121 /*Test for the condition where there is a surrogate pair*/ |
| 2122 { |
| 2123 const uint8_t source2[]={0xd8, 0x01}; |
| 2124 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); |
| 2125 } |
| 2126 #endif |
| 2127 ucnv_close(cnv); |
| 2128 } |
| 2129 |
| 2130 static void |
| 2131 TestUTF16LE() { |
| 2132 /* test input */ |
| 2133 static const uint8_t in[]={ |
| 2134 0x61, 0x00, |
| 2135 0x31, 0x00, |
| 2136 0x4e, 0x2e, |
| 2137 0x4e, 0x00, |
| 2138 0x01, 0xd8, 0x01, 0xdc |
| 2139 }; |
| 2140 |
| 2141 /* expected test results */ |
| 2142 static const int32_t results[]={ |
| 2143 /* number of bytes read, code point */ |
| 2144 2, 0x61, |
| 2145 2, 0x31, |
| 2146 2, 0x2e4e, |
| 2147 2, 0x4e, |
| 2148 4, 0x10401 |
| 2149 }; |
| 2150 |
| 2151 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); |
| 2152 UErrorCode errorCode=U_ZERO_ERROR; |
| 2153 UConverter *cnv=ucnv_open("utf-16le", &errorCode); |
| 2154 if(U_FAILURE(errorCode)) { |
| 2155 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCo
de)); |
| 2156 return; |
| 2157 } |
| 2158 TestNextUChar(cnv, source, limit, results, "UTF-16LE"); |
| 2159 /* Test the condition when source >= sourceLimit */ |
| 2160 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); |
| 2161 /*Test for the condition where there is an invalid character*/ |
| 2162 { |
| 2163 static const uint8_t source2[]={0x61}; |
| 2164 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); |
| 2165 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); |
| 2166 } |
| 2167 #if 0 |
| 2168 /* |
| 2169 * Test disabled because currently the UTF-16BE/LE converters are supposed |
| 2170 * to not set errors for unpaired surrogates. |
| 2171 * This may change with |
| 2172 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 |
| 2173 */ |
| 2174 |
| 2175 /*Test for the condition where there is a surrogate character*/ |
| 2176 { |
| 2177 static const uint8_t source2[]={0x01, 0xd8}; |
| 2178 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); |
| 2179 } |
| 2180 #endif |
| 2181 |
| 2182 ucnv_close(cnv); |
| 2183 } |
| 2184 |
| 2185 static void TestUTF32() { |
| 2186 /* test input */ |
| 2187 static const uint8_t in1[]={ |
| 2188 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0x
ff |
| 2189 }; |
| 2190 static const uint8_t in2[]={ |
| 2191 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x
00 |
| 2192 }; |
| 2193 static const uint8_t in3[]={ |
| 2194 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x
40, 0x00, 0x00, 0xdc, 0x01 |
| 2195 }; |
| 2196 |
| 2197 /* expected test results */ |
| 2198 static const int32_t results1[]={ |
| 2199 /* number of bytes read, code point */ |
| 2200 8, 0x100f00, |
| 2201 4, 0xfeff |
| 2202 }; |
| 2203 static const int32_t results2[]={ |
| 2204 /* number of bytes read, code point */ |
| 2205 8, 0x0f1000, |
| 2206 4, 0xfffe |
| 2207 }; |
| 2208 static const int32_t results3[]={ |
| 2209 /* number of bytes read, code point */ |
| 2210 4, 0xfefe, |
| 2211 4, 0x100f00, |
| 2212 4, 0xfffd, /* unmatched surrogate */ |
| 2213 4, 0xfffd /* unmatched surrogate */ |
| 2214 }; |
| 2215 |
| 2216 const char *source, *limit; |
| 2217 |
| 2218 UErrorCode errorCode=U_ZERO_ERROR; |
| 2219 UConverter *cnv=ucnv_open("UTF-32", &errorCode); |
| 2220 if(U_FAILURE(errorCode)) { |
| 2221 log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode
)); |
| 2222 return; |
| 2223 } |
| 2224 |
| 2225 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); |
| 2226 TestNextUChar(cnv, source, limit, results1, "UTF-32"); |
| 2227 |
| 2228 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); |
| 2229 ucnv_resetToUnicode(cnv); |
| 2230 TestNextUChar(cnv, source, limit, results2, "UTF-32"); |
| 2231 |
| 2232 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); |
| 2233 ucnv_resetToUnicode(cnv); |
| 2234 TestNextUChar(cnv, source, limit, results3, "UTF-32"); |
| 2235 |
| 2236 /* Test the condition when source >= sourceLimit */ |
| 2237 ucnv_resetToUnicode(cnv); |
| 2238 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); |
| 2239 |
| 2240 ucnv_close(cnv); |
| 2241 } |
| 2242 |
| 2243 static void |
| 2244 TestUTF32BE() { |
| 2245 /* test input */ |
| 2246 static const uint8_t in[]={ |
| 2247 0x00, 0x00, 0x00, 0x61, |
| 2248 0x00, 0x00, 0x30, 0x61, |
| 2249 0x00, 0x00, 0xdc, 0x00, |
| 2250 0x00, 0x00, 0xd8, 0x00, |
| 2251 0x00, 0x00, 0xdf, 0xff, |
| 2252 0x00, 0x00, 0xff, 0xfe, |
| 2253 0x00, 0x10, 0xab, 0xcd, |
| 2254 0x00, 0x10, 0xff, 0xff |
| 2255 }; |
| 2256 |
| 2257 /* expected test results */ |
| 2258 static const int32_t results[]={ |
| 2259 /* number of bytes read, code point */ |
| 2260 4, 0x61, |
| 2261 4, 0x3061, |
| 2262 4, 0xfffd, |
| 2263 4, 0xfffd, |
| 2264 4, 0xfffd, |
| 2265 4, 0xfffe, |
| 2266 4, 0x10abcd, |
| 2267 4, 0x10ffff |
| 2268 }; |
| 2269 |
| 2270 /* error test input */ |
| 2271 static const uint8_t in2[]={ |
| 2272 0x00, 0x00, 0x00, 0x61, |
| 2273 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ |
| 2274 0x00, 0x00, 0x00, 0x62, |
| 2275 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ |
| 2276 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ |
| 2277 0x00, 0x00, 0x01, 0x62, |
| 2278 0x00, 0x00, 0x02, 0x62 |
| 2279 }; |
| 2280 |
| 2281 /* expected error test results */ |
| 2282 static const int32_t results2[]={ |
| 2283 /* number of bytes read, code point */ |
| 2284 4, 0x61, |
| 2285 8, 0x62, |
| 2286 12, 0x162, |
| 2287 4, 0x262 |
| 2288 }; |
| 2289 |
| 2290 UConverterToUCallback cb; |
| 2291 const void *p; |
| 2292 |
| 2293 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); |
| 2294 UErrorCode errorCode=U_ZERO_ERROR; |
| 2295 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode); |
| 2296 if(U_FAILURE(errorCode)) { |
| 2297 log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCo
de)); |
| 2298 return; |
| 2299 } |
| 2300 TestNextUChar(cnv, source, limit, results, "UTF-32BE"); |
| 2301 |
| 2302 /* Test the condition when source >= sourceLimit */ |
| 2303 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); |
| 2304 |
| 2305 /* test error behavior with a skip callback */ |
| 2306 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode)
; |
| 2307 source=(const char *)in2; |
| 2308 limit=(const char *)(in2+sizeof(in2)); |
| 2309 TestNextUChar(cnv, source, limit, results2, "UTF-32BE"); |
| 2310 |
| 2311 ucnv_close(cnv); |
| 2312 } |
| 2313 |
| 2314 static void |
| 2315 TestUTF32LE() { |
| 2316 /* test input */ |
| 2317 static const uint8_t in[]={ |
| 2318 0x61, 0x00, 0x00, 0x00, |
| 2319 0x61, 0x30, 0x00, 0x00, |
| 2320 0x00, 0xdc, 0x00, 0x00, |
| 2321 0x00, 0xd8, 0x00, 0x00, |
| 2322 0xff, 0xdf, 0x00, 0x00, |
| 2323 0xfe, 0xff, 0x00, 0x00, |
| 2324 0xcd, 0xab, 0x10, 0x00, |
| 2325 0xff, 0xff, 0x10, 0x00 |
| 2326 }; |
| 2327 |
| 2328 /* expected test results */ |
| 2329 static const int32_t results[]={ |
| 2330 /* number of bytes read, code point */ |
| 2331 4, 0x61, |
| 2332 4, 0x3061, |
| 2333 4, 0xfffd, |
| 2334 4, 0xfffd, |
| 2335 4, 0xfffd, |
| 2336 4, 0xfffe, |
| 2337 4, 0x10abcd, |
| 2338 4, 0x10ffff |
| 2339 }; |
| 2340 |
| 2341 /* error test input */ |
| 2342 static const uint8_t in2[]={ |
| 2343 0x61, 0x00, 0x00, 0x00, |
| 2344 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ |
| 2345 0x62, 0x00, 0x00, 0x00, |
| 2346 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ |
| 2347 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ |
| 2348 0x62, 0x01, 0x00, 0x00, |
| 2349 0x62, 0x02, 0x00, 0x00, |
| 2350 }; |
| 2351 |
| 2352 /* expected error test results */ |
| 2353 static const int32_t results2[]={ |
| 2354 /* number of bytes read, code point */ |
| 2355 4, 0x61, |
| 2356 8, 0x62, |
| 2357 12, 0x162, |
| 2358 4, 0x262, |
| 2359 }; |
| 2360 |
| 2361 UConverterToUCallback cb; |
| 2362 const void *p; |
| 2363 |
| 2364 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); |
| 2365 UErrorCode errorCode=U_ZERO_ERROR; |
| 2366 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode); |
| 2367 if(U_FAILURE(errorCode)) { |
| 2368 log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCo
de)); |
| 2369 return; |
| 2370 } |
| 2371 TestNextUChar(cnv, source, limit, results, "UTF-32LE"); |
| 2372 |
| 2373 /* Test the condition when source >= sourceLimit */ |
| 2374 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); |
| 2375 |
| 2376 /* test error behavior with a skip callback */ |
| 2377 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode)
; |
| 2378 source=(const char *)in2; |
| 2379 limit=(const char *)(in2+sizeof(in2)); |
| 2380 TestNextUChar(cnv, source, limit, results2, "UTF-32LE"); |
| 2381 |
| 2382 ucnv_close(cnv); |
| 2383 } |
| 2384 |
| 2385 static void |
| 2386 TestLATIN1() { |
| 2387 /* test input */ |
| 2388 static const uint8_t in[]={ |
| 2389 0x61, |
| 2390 0x31, |
| 2391 0x32, |
| 2392 0xc0, |
| 2393 0xf0, |
| 2394 0xf4, |
| 2395 }; |
| 2396 |
| 2397 /* expected test results */ |
| 2398 static const int32_t results[]={ |
| 2399 /* number of bytes read, code point */ |
| 2400 1, 0x61, |
| 2401 1, 0x31, |
| 2402 1, 0x32, |
| 2403 1, 0xc0, |
| 2404 1, 0xf0, |
| 2405 1, 0xf4, |
| 2406 }; |
| 2407 static const uint16_t in1[] = { |
| 2408 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef,
0x61, 0x1b, 0xe5, 0x84, |
| 2409 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3,
0x94, 0x08, 0x02, 0x0f, |
| 2410 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b,
0x6d, 0x41, 0x88, 0x4c, |
| 2411 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e,
0x6b, 0x4c, 0x08, 0x0d, |
| 2412 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa,
0x84, 0x08, 0x02, 0x0e, |
| 2413 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc,
0x9f, 0x0e, 0x79, 0x3e, |
| 2414 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08,
0x88, 0xbe, 0xa3, 0x8d, |
| 2415 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08,
0x01, 0x93, 0xc8, 0xaa, |
| 2416 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae,
0x93, 0xa8, 0xa0, 0x08, |
| 2417 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80,
0x05, 0xec, 0x60, 0x8d, |
| 2418 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4,
0xfe, 0xe7, 0xc2, 0x06, |
| 2419 0xcb, 0x82 |
| 2420 }; |
| 2421 static const uint8_t out1[] = { |
| 2422 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef,
0x61, 0x1b, 0xe5, 0x84, |
| 2423 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3,
0x94, 0x08, 0x02, 0x0f, |
| 2424 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b,
0x6d, 0x41, 0x88, 0x4c, |
| 2425 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e,
0x6b, 0x4c, 0x08, 0x0d, |
| 2426 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa,
0x84, 0x08, 0x02, 0x0e, |
| 2427 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc,
0x9f, 0x0e, 0x79, 0x3e, |
| 2428 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08,
0x88, 0xbe, 0xa3, 0x8d, |
| 2429 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08,
0x01, 0x93, 0xc8, 0xaa, |
| 2430 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae,
0x93, 0xa8, 0xa0, 0x08, |
| 2431 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80,
0x05, 0xec, 0x60, 0x8d, |
| 2432 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4,
0xfe, 0xe7, 0xc2, 0x06, |
| 2433 0xcb, 0x82 |
| 2434 }; |
| 2435 static const uint16_t in2[]={ |
| 2436 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, |
| 2437 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, |
| 2438 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, |
| 2439 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, |
| 2440 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, |
| 2441 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, |
| 2442 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, |
| 2443 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, |
| 2444 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, |
| 2445 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, |
| 2446 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, |
| 2447 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, |
| 2448 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, |
| 2449 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, |
| 2450 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, |
| 2451 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, |
| 2452 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, |
| 2453 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, |
| 2454 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, |
| 2455 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, |
| 2456 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, |
| 2457 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, |
| 2458 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, |
| 2459 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, |
| 2460 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, |
| 2461 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, |
| 2462 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, |
| 2463 0x37, 0x20, 0x2A, 0x2F, |
| 2464 }; |
| 2465 static const unsigned char out2[]={ |
| 2466 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, |
| 2467 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, |
| 2468 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, |
| 2469 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, |
| 2470 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, |
| 2471 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, |
| 2472 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, |
| 2473 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, |
| 2474 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, |
| 2475 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, |
| 2476 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, |
| 2477 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, |
| 2478 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, |
| 2479 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, |
| 2480 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, |
| 2481 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, |
| 2482 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, |
| 2483 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, |
| 2484 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, |
| 2485 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, |
| 2486 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, |
| 2487 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, |
| 2488 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, |
| 2489 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, |
| 2490 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, |
| 2491 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, |
| 2492 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, |
| 2493 0x37, 0x20, 0x2A, 0x2F, |
| 2494 }; |
| 2495 const char *source=(const char *)in; |
| 2496 const char *limit=(const char *)in+sizeof(in); |
| 2497 |
| 2498 UErrorCode errorCode=U_ZERO_ERROR; |
| 2499 UConverter *cnv=ucnv_open("LATIN_1", &errorCode); |
| 2500 if(U_FAILURE(errorCode)) { |
| 2501 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(err
orCode)); |
| 2502 return; |
| 2503 } |
| 2504 TestNextUChar(cnv, source, limit, results, "LATIN_1"); |
| 2505 /* Test the condition when source >= sourceLimit */ |
| 2506 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); |
| 2507 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof
(out1)); |
| 2508 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out
2)); |
| 2509 |
| 2510 ucnv_close(cnv); |
| 2511 } |
| 2512 |
| 2513 static void |
| 2514 TestSBCS() { |
| 2515 /* test input */ |
| 2516 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4}; |
| 2517 /* expected test results */ |
| 2518 static const int32_t results[]={ |
| 2519 /* number of bytes read, code point */ |
| 2520 1, 0x61, |
| 2521 1, 0xbf, |
| 2522 1, 0xc4, |
| 2523 1, 0x2021, |
| 2524 1, 0xf8ff, |
| 2525 1, 0x00d9 |
| 2526 }; |
| 2527 |
| 2528 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); |
| 2529 UErrorCode errorCode=U_ZERO_ERROR; |
| 2530 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode); |
| 2531 if(U_FAILURE(errorCode)) { |
| 2532 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_e
rrorName(errorCode)); |
| 2533 return; |
| 2534 } |
| 2535 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)"); |
| 2536 /* Test the condition when source >= sourceLimit */ |
| 2537 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); |
| 2538 /*Test for Illegal character */ /* |
| 2539 { |
| 2540 static const uint8_t input1[]={ 0xA1 }; |
| 2541 const char* illegalsource=(const char*)input1; |
| 2542 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource),
U_INVALID_CHAR_FOUND, "source has a illegal characte"); |
| 2543 } |
| 2544 */ |
| 2545 ucnv_close(cnv); |
| 2546 } |
| 2547 |
| 2548 static void |
| 2549 TestDBCS() { |
| 2550 /* test input */ |
| 2551 static const uint8_t in[]={ |
| 2552 0x44, 0x6a, |
| 2553 0xc4, 0x9c, |
| 2554 0x7a, 0x74, |
| 2555 0x46, 0xab, |
| 2556 0x42, 0x5b, |
| 2557 |
| 2558 }; |
| 2559 |
| 2560 /* expected test results */ |
| 2561 static const int32_t results[]={ |
| 2562 /* number of bytes read, code point */ |
| 2563 2, 0x00a7, |
| 2564 2, 0xe1d2, |
| 2565 2, 0x6962, |
| 2566 2, 0xf842, |
| 2567 2, 0xffe5, |
| 2568 }; |
| 2569 |
| 2570 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); |
| 2571 UErrorCode errorCode=U_ZERO_ERROR; |
| 2572 |
| 2573 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode); |
| 2574 if(U_FAILURE(errorCode)) { |
| 2575 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorN
ame(errorCode)); |
| 2576 return; |
| 2577 } |
| 2578 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)"); |
| 2579 /* Test the condition when source >= sourceLimit */ |
| 2580 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); |
| 2581 /*Test for the condition where there is an invalid character*/ |
| 2582 { |
| 2583 static const uint8_t source2[]={0x1a, 0x1b}; |
| 2584 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character"); |
| 2585 } |
| 2586 /*Test for the condition where we have a truncated char*/ |
| 2587 { |
| 2588 static const uint8_t source1[]={0xc4}; |
| 2589 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); |
| 2590 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeo
f(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); |
| 2591 } |
| 2592 ucnv_close(cnv); |
| 2593 } |
| 2594 |
| 2595 static void |
| 2596 TestMBCS() { |
| 2597 /* test input */ |
| 2598 static const uint8_t in[]={ |
| 2599 0x01, |
| 2600 0xa6, 0xa3, |
| 2601 0x00, |
| 2602 0xa6, 0xa1, |
| 2603 0x08, |
| 2604 0xc2, 0x76, |
| 2605 0xc2, 0x78, |
| 2606 |
| 2607 }; |
| 2608 |
| 2609 /* expected test results */ |
| 2610 static const int32_t results[]={ |
| 2611 /* number of bytes read, code point */ |
| 2612 1, 0x0001, |
| 2613 2, 0x250c, |
| 2614 1, 0x0000, |
| 2615 2, 0x2500, |
| 2616 1, 0x0008, |
| 2617 2, 0xd60c, |
| 2618 2, 0xd60e, |
| 2619 }; |
| 2620 |
| 2621 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); |
| 2622 UErrorCode errorCode=U_ZERO_ERROR; |
| 2623 |
| 2624 UConverter *cnv=ucnv_open("ibm-1363", &errorCode); |
| 2625 if(U_FAILURE(errorCode)) { |
| 2626 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorN
ame(errorCode)); |
| 2627 return; |
| 2628 } |
| 2629 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)"); |
| 2630 /* Test the condition when source >= sourceLimit */ |
| 2631 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); |
| 2632 /*Test for the condition where there is an invalid character*/ |
| 2633 { |
| 2634 static const uint8_t source2[]={0xa1, 0x80}; |
| 2635 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character"); |
| 2636 } |
| 2637 /*Test for the condition where we have a truncated char*/ |
| 2638 { |
| 2639 static const uint8_t source1[]={0xc4}; |
| 2640 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); |
| 2641 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeo
f(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); |
| 2642 } |
| 2643 ucnv_close(cnv); |
| 2644 |
| 2645 } |
| 2646 |
| 2647 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO |
| 2648 static void |
| 2649 TestICCRunout() { |
| 2650 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1},
:int{0}, "\", "?", :bin{""} } */ |
| 2651 |
| 2652 const char *cnvName = "ibm-1363"; |
| 2653 UErrorCode status = U_ZERO_ERROR; |
| 2654 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 }; |
| 2655 /* UChar expectUData[] = { 0x00a1, 0x001a }; */ |
| 2656 const char *source = sourceData; |
| 2657 const char *sourceLim = sourceData+sizeof(sourceData); |
| 2658 UChar c1, c2, c3; |
| 2659 UConverter *cnv=ucnv_open(cnvName, &status); |
| 2660 if(U_FAILURE(status)) { |
| 2661 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(s
tatus)); |
| 2662 return; |
| 2663 } |
| 2664 |
| 2665 #if 0 |
| 2666 { |
| 2667 UChar targetBuf[256]; |
| 2668 UChar *target = targetBuf; |
| 2669 UChar *targetLim = target+256; |
| 2670 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &sta
tus); |
| 2671 |
| 2672 log_info("After convert: target@%d, source@%d, status%s\n", |
| 2673 target-targetBuf, source-sourceData, u_errorName(status)); |
| 2674 |
| 2675 if(U_FAILURE(status)) { |
| 2676 log_err("Failed to convert: %s\n", u_errorName(status)); |
| 2677 } else { |
| 2678 |
| 2679 } |
| 2680 } |
| 2681 #endif |
| 2682 |
| 2683 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status); |
| 2684 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_e
rrorName(status)); |
| 2685 |
| 2686 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status); |
| 2687 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_e
rrorName(status)); |
| 2688 |
| 2689 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status); |
| 2690 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_e
rrorName(status)); |
| 2691 |
| 2692 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) { |
| 2693 log_verbose("OK\n"); |
| 2694 } else { |
| 2695 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n
"); |
| 2696 } |
| 2697 |
| 2698 ucnv_close(cnv); |
| 2699 |
| 2700 } |
| 2701 #endif |
| 2702 |
| 2703 #ifdef U_ENABLE_GENERIC_ISO_2022 |
| 2704 |
| 2705 static void |
| 2706 TestISO_2022() { |
| 2707 /* test input */ |
| 2708 static const uint8_t in[]={ |
| 2709 0x1b, 0x25, 0x42, |
| 2710 0x31, |
| 2711 0x32, |
| 2712 0x61, |
| 2713 0xc2, 0x80, |
| 2714 0xe0, 0xa0, 0x80, |
| 2715 0xf0, 0x90, 0x80, 0x80 |
| 2716 }; |
| 2717 |
| 2718 |
| 2719 |
| 2720 /* expected test results */ |
| 2721 static const int32_t results[]={ |
| 2722 /* number of bytes read, code point */ |
| 2723 4, 0x0031, /* 4 bytes including the escape sequence */ |
| 2724 1, 0x0032, |
| 2725 1, 0x61, |
| 2726 2, 0x80, |
| 2727 3, 0x800, |
| 2728 4, 0x10000 |
| 2729 }; |
| 2730 |
| 2731 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); |
| 2732 UErrorCode errorCode=U_ZERO_ERROR; |
| 2733 UConverter *cnv; |
| 2734 |
| 2735 cnv=ucnv_open("ISO_2022", &errorCode); |
| 2736 if(U_FAILURE(errorCode)) { |
| 2737 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); |
| 2738 return; |
| 2739 } |
| 2740 TestNextUChar(cnv, source, limit, results, "ISO_2022"); |
| 2741 |
| 2742 /* Test the condition when source >= sourceLimit */ |
| 2743 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceL
imit < source"); |
| 2744 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); |
| 2745 /*Test for the condition where we have a truncated char*/ |
| 2746 { |
| 2747 static const uint8_t source1[]={0xc4}; |
| 2748 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); |
| 2749 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeo
f(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); |
| 2750 } |
| 2751 /*Test for the condition where there is an invalid character*/ |
| 2752 { |
| 2753 static const uint8_t source2[]={0xa1, 0x01}; |
| 2754 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character"); |
| 2755 } |
| 2756 ucnv_close(cnv); |
| 2757 } |
| 2758 |
| 2759 #endif |
| 2760 |
| 2761 static void |
| 2762 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverte
r* cnv){ |
| 2763 const UChar* uSource; |
| 2764 const UChar* uSourceLimit; |
| 2765 const char* cSource; |
| 2766 const char* cSourceLimit; |
| 2767 UChar *uTargetLimit =NULL; |
| 2768 UChar *uTarget; |
| 2769 char *cTarget; |
| 2770 const char *cTargetLimit; |
| 2771 char *cBuf; |
| 2772 UChar *uBuf; /*,*test;*/ |
| 2773 int32_t uBufSize = 120; |
| 2774 int len=0; |
| 2775 int i=2; |
| 2776 UErrorCode errorCode=U_ZERO_ERROR; |
| 2777 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); |
| 2778 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); |
| 2779 ucnv_reset(cnv); |
| 2780 for(;--i>0; ){ |
| 2781 uSource = (UChar*) source; |
| 2782 uSourceLimit=(const UChar*)sourceLimit; |
| 2783 cTarget = cBuf; |
| 2784 uTarget = uBuf; |
| 2785 cSource = cBuf; |
| 2786 cTargetLimit = cBuf; |
| 2787 uTargetLimit = uBuf; |
| 2788 |
| 2789 do{ |
| 2790 |
| 2791 cTargetLimit = cTargetLimit+ i; |
| 2792 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit
,NULL,FALSE, &errorCode); |
| 2793 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ |
| 2794 errorCode=U_ZERO_ERROR; |
| 2795 continue; |
| 2796 } |
| 2797 |
| 2798 if(U_FAILURE(errorCode)){ |
| 2799 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorN
ame(errorCode)); |
| 2800 return; |
| 2801 } |
| 2802 |
| 2803 }while (uSource<uSourceLimit); |
| 2804 |
| 2805 cSourceLimit =cTarget; |
| 2806 do{ |
| 2807 uTargetLimit=uTargetLimit+i; |
| 2808 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,
FALSE,&errorCode); |
| 2809 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ |
| 2810 errorCode=U_ZERO_ERROR; |
| 2811 continue; |
| 2812 } |
| 2813 if(U_FAILURE(errorCode)){ |
| 2814 log_err("ucnv_toUnicode conversion failed reason %s\n", u_err
orName(errorCode)); |
| 2815 return; |
| 2816 } |
| 2817 }while(cSource<cSourceLimit); |
| 2818 |
| 2819 uSource = source; |
| 2820 /*test =uBuf;*/ |
| 2821 for(len=0;len<(int)(source - sourceLimit);len++){ |
| 2822 if(uBuf[len]!=uSource[len]){ |
| 2823 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int
)uBuf[len]) ; |
| 2824 } |
| 2825 } |
| 2826 } |
| 2827 free(uBuf); |
| 2828 free(cBuf); |
| 2829 } |
| 2830 /* Test for Jitterbug 778 */ |
| 2831 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit
,UConverter* cnv){ |
| 2832 const UChar* uSource; |
| 2833 const UChar* uSourceLimit; |
| 2834 const char* cSource; |
| 2835 UChar *uTargetLimit =NULL; |
| 2836 UChar *uTarget; |
| 2837 char *cTarget; |
| 2838 const char *cTargetLimit; |
| 2839 char *cBuf; |
| 2840 UChar *uBuf,*test; |
| 2841 int32_t uBufSize = 120; |
| 2842 int numCharsInTarget=0; |
| 2843 UErrorCode errorCode=U_ZERO_ERROR; |
| 2844 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); |
| 2845 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); |
| 2846 uSource = source; |
| 2847 uSourceLimit=sourceLimit; |
| 2848 cTarget = cBuf; |
| 2849 cTargetLimit = cBuf +uBufSize*5; |
| 2850 uTarget = uBuf; |
| 2851 uTargetLimit = uBuf+ uBufSize*5; |
| 2852 ucnv_reset(cnv); |
| 2853 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarge
t), uSource, (int32_t)(uSourceLimit-uSource), &errorCode); |
| 2854 if(U_FAILURE(errorCode)){ |
| 2855 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); |
| 2856 return; |
| 2857 } |
| 2858 cSource = cBuf; |
| 2859 test =uBuf; |
| 2860 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsIn
Target,&errorCode); |
| 2861 if(U_FAILURE(errorCode)){ |
| 2862 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(erro
rCode)); |
| 2863 return; |
| 2864 } |
| 2865 uSource = source; |
| 2866 while(uSource<uSourceLimit){ |
| 2867 if(*test!=*uSource){ |
| 2868 |
| 2869 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; |
| 2870 } |
| 2871 uSource++; |
| 2872 test++; |
| 2873 } |
| 2874 free(uBuf); |
| 2875 free(cBuf); |
| 2876 } |
| 2877 |
| 2878 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLim
it,UConverter* cnv){ |
| 2879 const UChar* uSource; |
| 2880 const UChar* uSourceLimit; |
| 2881 const char* cSource; |
| 2882 const char* cSourceLimit; |
| 2883 UChar *uTargetLimit =NULL; |
| 2884 UChar *uTarget; |
| 2885 char *cTarget; |
| 2886 const char *cTargetLimit; |
| 2887 char *cBuf; |
| 2888 UChar *uBuf; /*,*test;*/ |
| 2889 int32_t uBufSize = 120; |
| 2890 int len=0; |
| 2891 int i=2; |
| 2892 const UChar *temp = sourceLimit; |
| 2893 UErrorCode errorCode=U_ZERO_ERROR; |
| 2894 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); |
| 2895 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); |
| 2896 |
| 2897 ucnv_reset(cnv); |
| 2898 for(;--i>0;){ |
| 2899 uSource = (UChar*) source; |
| 2900 cTarget = cBuf; |
| 2901 uTarget = uBuf; |
| 2902 cSource = cBuf; |
| 2903 cTargetLimit = cBuf; |
| 2904 uTargetLimit = uBuf+uBufSize*5; |
| 2905 cTargetLimit = cTargetLimit+uBufSize*10; |
| 2906 uSourceLimit=uSource; |
| 2907 do{ |
| 2908 |
| 2909 if (uSourceLimit < sourceLimit) { |
| 2910 uSourceLimit = uSourceLimit+1; |
| 2911 } |
| 2912 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit
,NULL,FALSE, &errorCode); |
| 2913 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ |
| 2914 errorCode=U_ZERO_ERROR; |
| 2915 continue; |
| 2916 } |
| 2917 |
| 2918 if(U_FAILURE(errorCode)){ |
| 2919 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorN
ame(errorCode)); |
| 2920 return; |
| 2921 } |
| 2922 |
| 2923 }while (uSource<temp); |
| 2924 |
| 2925 cSourceLimit =cBuf; |
| 2926 do{ |
| 2927 if (cSourceLimit < cBuf + (cTarget - cBuf)) { |
| 2928 cSourceLimit = cSourceLimit+1; |
| 2929 } |
| 2930 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,
FALSE,&errorCode); |
| 2931 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ |
| 2932 errorCode=U_ZERO_ERROR; |
| 2933 continue; |
| 2934 } |
| 2935 if(U_FAILURE(errorCode)){ |
| 2936 log_err("ucnv_toUnicode conversion failed reason %s\n", u_err
orName(errorCode)); |
| 2937 return; |
| 2938 } |
| 2939 }while(cSource<cTarget); |
| 2940 |
| 2941 uSource = source; |
| 2942 /*test =uBuf;*/ |
| 2943 for(;len<(int)(source - sourceLimit);len++){ |
| 2944 if(uBuf[len]!=uSource[len]){ |
| 2945 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int
)uBuf[len]) ; |
| 2946 } |
| 2947 } |
| 2948 } |
| 2949 free(uBuf); |
| 2950 free(cBuf); |
| 2951 } |
| 2952 static void |
| 2953 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit, |
| 2954 const uint16_t results[], const char* message){ |
| 2955 /* const char* s0; */ |
| 2956 const char* s=(char*)source; |
| 2957 const uint16_t *r=results; |
| 2958 UErrorCode errorCode=U_ZERO_ERROR; |
| 2959 uint32_t c,exC; |
| 2960 ucnv_reset(cnv); |
| 2961 while(s<limit) { |
| 2962 /* s0=s; */ |
| 2963 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); |
| 2964 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { |
| 2965 break; /* no more significant input */ |
| 2966 } else if(U_FAILURE(errorCode)) { |
| 2967 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(
errorCode)); |
| 2968 break; |
| 2969 } else { |
| 2970 if(UTF_IS_FIRST_SURROGATE(*r)){ |
| 2971 int i =0, len = 2; |
| 2972 UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE); |
| 2973 r++; |
| 2974 }else{ |
| 2975 exC = *r; |
| 2976 } |
| 2977 if(c!=(uint32_t)(exC)) |
| 2978 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X
\n",message,(uint32_t) (*r),c); |
| 2979 } |
| 2980 r++; |
| 2981 } |
| 2982 } |
| 2983 |
| 2984 static int TestJitterbug930(const char* enc){ |
| 2985 UErrorCode err = U_ZERO_ERROR; |
| 2986 UConverter*converter; |
| 2987 char out[80]; |
| 2988 char*target = out; |
| 2989 UChar in[4]; |
| 2990 const UChar*source = in; |
| 2991 int32_t off[80]; |
| 2992 int32_t* offsets = off; |
| 2993 int numOffWritten=0; |
| 2994 UBool flush = 0; |
| 2995 converter = my_ucnv_open(enc, &err); |
| 2996 |
| 2997 in[0] = 0x41; /* 0x4E00;*/ |
| 2998 in[1] = 0x4E01; |
| 2999 in[2] = 0x4E02; |
| 3000 in[3] = 0x4E03; |
| 3001 |
| 3002 memset(off, '*', sizeof(off)); |
| 3003 |
| 3004 ucnv_fromUnicode (converter, |
| 3005 &target, |
| 3006 target+2, |
| 3007 &source, |
| 3008 source+3, |
| 3009 offsets, |
| 3010 flush, |
| 3011 &err); |
| 3012 |
| 3013 /* writes three bytes into the output buffer: 41 1B 24 |
| 3014 * but offsets contains 0 1 1 |
| 3015 */ |
| 3016 while(*offsets< off[10]){ |
| 3017 numOffWritten++; |
| 3018 offsets++; |
| 3019 } |
| 3020 log_verbose("Testing Jitterbug 930 for encoding %s",enc); |
| 3021 if(numOffWritten!= (int)(target-out)){ |
| 3022 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",en
c, (int)(target-out),numOffWritten); |
| 3023 } |
| 3024 |
| 3025 err = U_ZERO_ERROR; |
| 3026 |
| 3027 memset(off,'*' , sizeof(off)); |
| 3028 |
| 3029 flush = 1; |
| 3030 offsets=off; |
| 3031 ucnv_fromUnicode (converter, |
| 3032 &target, |
| 3033 target+4, |
| 3034 &source, |
| 3035 source, |
| 3036 offsets, |
| 3037 flush, |
| 3038 &err); |
| 3039 numOffWritten=0; |
| 3040 while(*offsets< off[10]){ |
| 3041 numOffWritten++; |
| 3042 if(*offsets!= -1){ |
| 3043 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i
",enc,-1,*offsets) ; |
| 3044 } |
| 3045 offsets++; |
| 3046 } |
| 3047 |
| 3048 /* writes 42 43 7A into output buffer, |
| 3049 * offsets contains -1 -1 -1 |
| 3050 */ |
| 3051 ucnv_close(converter); |
| 3052 return 0; |
| 3053 } |
| 3054 |
| 3055 static void |
| 3056 TestHZ() { |
| 3057 /* test input */ |
| 3058 static const uint16_t in[]={ |
| 3059 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x30
05, 0x2014, |
| 3060 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73
BB, 0x83E0, |
| 3061 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94
C2, 0x7B94, |
| 3062 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A
73, 0x6355, |
| 3063 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C
3F, 0x90E8, |
| 3064 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x24
95, 0x2496, |
| 3065 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x24
76, 0x2477, |
| 3066 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x24
7F, 0x2480, |
| 3067 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x00
46, 0x007E, |
| 3068 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x00
4F, 0x0050, |
| 3069 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x00
58, 0x0059, |
| 3070 0x005A, 0x005B, 0x005C, 0x000A |
| 3071 }; |
| 3072 const UChar* uSource; |
| 3073 const UChar* uSourceLimit; |
| 3074 const char* cSource; |
| 3075 const char* cSourceLimit; |
| 3076 UChar *uTargetLimit =NULL; |
| 3077 UChar *uTarget; |
| 3078 char *cTarget; |
| 3079 const char *cTargetLimit; |
| 3080 char *cBuf; |
| 3081 UChar *uBuf,*test; |
| 3082 int32_t uBufSize = 120; |
| 3083 UErrorCode errorCode=U_ZERO_ERROR; |
| 3084 UConverter *cnv; |
| 3085 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); |
| 3086 int32_t* myOff= offsets; |
| 3087 cnv=ucnv_open("HZ", &errorCode); |
| 3088 if(U_FAILURE(errorCode)) { |
| 3089 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode)
); |
| 3090 return; |
| 3091 } |
| 3092 |
| 3093 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); |
| 3094 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); |
| 3095 uSource = (const UChar*)in; |
| 3096 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); |
| 3097 cTarget = cBuf; |
| 3098 cTargetLimit = cBuf +uBufSize*5; |
| 3099 uTarget = uBuf; |
| 3100 uTargetLimit = uBuf+ uBufSize*5; |
| 3101 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); |
| 3102 if(U_FAILURE(errorCode)){ |
| 3103 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); |
| 3104 return; |
| 3105 } |
| 3106 cSource = cBuf; |
| 3107 cSourceLimit =cTarget; |
| 3108 test =uBuf; |
| 3109 myOff=offsets; |
| 3110 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); |
| 3111 if(U_FAILURE(errorCode)){ |
| 3112 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); |
| 3113 return; |
| 3114 } |
| 3115 uSource = (const UChar*)in; |
| 3116 while(uSource<uSourceLimit){ |
| 3117 if(*test!=*uSource){ |
| 3118 |
| 3119 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; |
| 3120 } |
| 3121 uSource++; |
| 3122 test++; |
| 3123 } |
| 3124 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding"); |
| 3125 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 3126 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 3127 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 3128 TestJitterbug930("csISO2022JP"); |
| 3129 ucnv_close(cnv); |
| 3130 free(offsets); |
| 3131 free(uBuf); |
| 3132 free(cBuf); |
| 3133 } |
| 3134 |
| 3135 static void |
| 3136 TestISCII(){ |
| 3137 /* test input */ |
| 3138 static const uint16_t in[]={ |
| 3139 /* test full range of Devanagari */ |
| 3140 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A, |
| 3141 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911, |
| 3142 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D, |
| 3143 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926, |
| 3144 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F, |
| 3145 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937, |
| 3146 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943, |
| 3147 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D, |
| 3148 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C, |
| 3149 0x096D,0x096E,0x096F, |
| 3150 /* test Soft halant*/ |
| 3151 0x0915,0x094d, 0x200D, |
| 3152 /* test explicit halant */ |
| 3153 0x0915,0x094d, 0x200c, |
| 3154 /* test double danda */ |
| 3155 0x965, |
| 3156 /* test ASCII */ |
| 3157 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, |
| 3158 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, |
| 3159 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, |
| 3160 /* tests from Lotus */ |
| 3161 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043, |
| 3162 0x0930,0x094D,0x200D, |
| 3163 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043, |
| 3164 0x0915,0x0921,0x002B,0x095F, |
| 3165 /* tamil range */ |
| 3166 0x0B86, 0xB87, 0xB88, |
| 3167 /* telugu range */ |
| 3168 0x0C05, 0x0C02, 0x0C03,0x0c31, |
| 3169 /* kannada range */ |
| 3170 0x0C85, 0xC82, 0x0C83, |
| 3171 /* test Abbr sign and Anudatta */ |
| 3172 0x0970, 0x952, |
| 3173 /* 0x0958, |
| 3174 0x0959, |
| 3175 0x095A, |
| 3176 0x095B, |
| 3177 0x095C, |
| 3178 0x095D, |
| 3179 0x095E, |
| 3180 0x095F,*/ |
| 3181 0x0960 /* Vocallic RRI 0xAB, 0xE9*/, |
| 3182 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */, |
| 3183 0x090C , |
| 3184 0x0962, |
| 3185 0x0961 /* Vocallic LL 0xa6, 0xE9 */, |
| 3186 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */, |
| 3187 0x0950 /* OM Symbol 0xa1, 0xE9,*/, |
| 3188 0x093D /* Avagraha 0xEA, 0xE9*/, |
| 3189 0x0958, |
| 3190 0x0959, |
| 3191 0x095A, |
| 3192 0x095B, |
| 3193 0x095C, |
| 3194 0x095D, |
| 3195 0x095E, |
| 3196 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0 |
| 3197 }; |
| 3198 static const unsigned char byteArr[]={ |
| 3199 |
| 3200 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9, |
| 3201 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2, |
| 3202 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb, |
| 3203 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4, |
| 3204 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd, |
| 3205 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, |
| 3206 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, |
| 3207 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8, |
| 3208 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7, |
| 3209 0xf8,0xf9,0xfa, |
| 3210 /* test soft halant */ |
| 3211 0xb3, 0xE8, 0xE9, |
| 3212 /* test explicit halant */ |
| 3213 0xb3, 0xE8, 0xE8, |
| 3214 /* test double danda */ |
| 3215 0xea, 0xea, |
| 3216 /* test ASCII */ |
| 3217 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, |
| 3218 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, |
| 3219 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, |
| 3220 /* test ATR code */ |
| 3221 |
| 3222 /* tests from Lotus */ |
| 3223 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43, |
| 3224 0xEF,0x42,0xCF,0xE8,0xD9, |
| 3225 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43, |
| 3226 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE, |
| 3227 /* tamil range */ |
| 3228 0xEF, 0x44, 0xa5, 0xa6, 0xa7, |
| 3229 /* telugu range */ |
| 3230 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0, |
| 3231 /* kannada range */ |
| 3232 0xEF, 0x48,0xa4, 0xa2, 0xa3, |
| 3233 /* anudatta and abbreviation sign */ |
| 3234 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8, |
| 3235 |
| 3236 |
| 3237 0xAA, 0xE9,/* RI + NUKTA 0x0960*/ |
| 3238 |
| 3239 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/ |
| 3240 |
| 3241 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/ |
| 3242 |
| 3243 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/ |
| 3244 |
| 3245 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/ |
| 3246 |
| 3247 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/ |
| 3248 |
| 3249 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/ |
| 3250 |
| 3251 0xEA, 0xE9, /* Danda + Nukta 0x093D*/ |
| 3252 |
| 3253 0xB3, 0xE9, /* Ka + NUKTA */ |
| 3254 |
| 3255 0xB4, 0xE9, /* Kha + NUKTA */ |
| 3256 |
| 3257 0xB5, 0xE9, /* Ga + NUKTA */ |
| 3258 |
| 3259 0xBA, 0xE9, |
| 3260 |
| 3261 0xBF, 0xE9, |
| 3262 |
| 3263 0xC0, 0xE9, |
| 3264 |
| 3265 0xC9, 0xE9, |
| 3266 /* INV halant RA */ |
| 3267 0xD9, 0xE8, 0xCF, |
| 3268 0x00, 0x00A0, |
| 3269 /* just consume unhandled codepoints */ |
| 3270 0xEF, 0x30, |
| 3271 |
| 3272 }; |
| 3273 testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-i
scii-de",NULL,TRUE); |
| 3274 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof
(byteArr)); |
| 3275 |
| 3276 } |
| 3277 |
| 3278 static void |
| 3279 TestISO_2022_JP() { |
| 3280 /* test input */ |
| 3281 static const uint16_t in[]={ |
| 3282 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A, |
| 3283 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D,
0x000A, |
| 3284 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D,
0x000A, |
| 3285 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D,
0x000A, |
| 3286 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, |
| 3287 0x201D, 0x3014, 0x000D, 0x000A, |
| 3288 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D,
0x000A, |
| 3289 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D,
0x000A, |
| 3290 }; |
| 3291 const UChar* uSource; |
| 3292 const UChar* uSourceLimit; |
| 3293 const char* cSource; |
| 3294 const char* cSourceLimit; |
| 3295 UChar *uTargetLimit =NULL; |
| 3296 UChar *uTarget; |
| 3297 char *cTarget; |
| 3298 const char *cTargetLimit; |
| 3299 char *cBuf; |
| 3300 UChar *uBuf,*test; |
| 3301 int32_t uBufSize = 120; |
| 3302 UErrorCode errorCode=U_ZERO_ERROR; |
| 3303 UConverter *cnv; |
| 3304 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); |
| 3305 int32_t* myOff= offsets; |
| 3306 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); |
| 3307 if(U_FAILURE(errorCode)) { |
| 3308 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorN
ame(errorCode)); |
| 3309 return; |
| 3310 } |
| 3311 |
| 3312 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); |
| 3313 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); |
| 3314 uSource = (const UChar*)in; |
| 3315 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); |
| 3316 cTarget = cBuf; |
| 3317 cTargetLimit = cBuf +uBufSize*5; |
| 3318 uTarget = uBuf; |
| 3319 uTargetLimit = uBuf+ uBufSize*5; |
| 3320 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); |
| 3321 if(U_FAILURE(errorCode)){ |
| 3322 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); |
| 3323 return; |
| 3324 } |
| 3325 cSource = cBuf; |
| 3326 cSourceLimit =cTarget; |
| 3327 test =uBuf; |
| 3328 myOff=offsets; |
| 3329 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); |
| 3330 if(U_FAILURE(errorCode)){ |
| 3331 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); |
| 3332 return; |
| 3333 } |
| 3334 |
| 3335 uSource = (const UChar*)in; |
| 3336 while(uSource<uSourceLimit){ |
| 3337 if(*test!=*uSource){ |
| 3338 |
| 3339 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; |
| 3340 } |
| 3341 uSource++; |
| 3342 test++; |
| 3343 } |
| 3344 |
| 3345 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 3346 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 3347 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding"); |
| 3348 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 3349 TestJitterbug930("csISO2022JP"); |
| 3350 ucnv_close(cnv); |
| 3351 free(uBuf); |
| 3352 free(cBuf); |
| 3353 free(offsets); |
| 3354 } |
| 3355 |
| 3356 static void TestConv(const uint16_t in[],int len, const char* conv, const char*
lang, char byteArr[],int byteArrLen){ |
| 3357 const UChar* uSource; |
| 3358 const UChar* uSourceLimit; |
| 3359 const char* cSource; |
| 3360 const char* cSourceLimit; |
| 3361 UChar *uTargetLimit =NULL; |
| 3362 UChar *uTarget; |
| 3363 char *cTarget; |
| 3364 const char *cTargetLimit; |
| 3365 char *cBuf; |
| 3366 UChar *uBuf,*test; |
| 3367 int32_t uBufSize = 120*10; |
| 3368 UErrorCode errorCode=U_ZERO_ERROR; |
| 3369 UConverter *cnv; |
| 3370 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) ); |
| 3371 int32_t* myOff= offsets; |
| 3372 cnv=my_ucnv_open(conv, &errorCode); |
| 3373 if(U_FAILURE(errorCode)) { |
| 3374 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(er
rorCode)); |
| 3375 return; |
| 3376 } |
| 3377 |
| 3378 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)); |
| 3379 cBuf =(char*)malloc(uBufSize * sizeof(char)); |
| 3380 uSource = (const UChar*)in; |
| 3381 uSourceLimit=uSource+len; |
| 3382 cTarget = cBuf; |
| 3383 cTargetLimit = cBuf +uBufSize; |
| 3384 uTarget = uBuf; |
| 3385 uTargetLimit = uBuf+ uBufSize; |
| 3386 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); |
| 3387 if(U_FAILURE(errorCode)){ |
| 3388 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); |
| 3389 return; |
| 3390 } |
| 3391 /*log_verbose("length of compressed string for language %s using %s:%i \n",c
onv,lang,(cTarget-cBuf));*/ |
| 3392 cSource = cBuf; |
| 3393 cSourceLimit =cTarget; |
| 3394 test =uBuf; |
| 3395 myOff=offsets; |
| 3396 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); |
| 3397 if(U_FAILURE(errorCode)){ |
| 3398 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(er
rorCode)); |
| 3399 return; |
| 3400 } |
| 3401 |
| 3402 uSource = (const UChar*)in; |
| 3403 while(uSource<uSourceLimit){ |
| 3404 if(*test!=*uSource){ |
| 3405 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",con
v,*uSource,(int)*test) ; |
| 3406 } |
| 3407 uSource++; |
| 3408 test++; |
| 3409 } |
| 3410 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv); |
| 3411 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv); |
| 3412 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv); |
| 3413 if(byteArr && byteArrLen!=0){ |
| 3414 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang); |
| 3415 TestToAndFromUChars(in,(const UChar*)&in[len],cnv); |
| 3416 { |
| 3417 cSource = byteArr; |
| 3418 cSourceLimit = cSource+byteArrLen; |
| 3419 test=uBuf; |
| 3420 myOff = offsets; |
| 3421 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff
,TRUE,&errorCode); |
| 3422 if(U_FAILURE(errorCode)){ |
| 3423 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorN
ame(errorCode)); |
| 3424 return; |
| 3425 } |
| 3426 |
| 3427 uSource = (const UChar*)in; |
| 3428 while(uSource<uSourceLimit){ |
| 3429 if(*test!=*uSource){ |
| 3430 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int
)*test) ; |
| 3431 } |
| 3432 uSource++; |
| 3433 test++; |
| 3434 } |
| 3435 } |
| 3436 } |
| 3437 |
| 3438 ucnv_close(cnv); |
| 3439 free(uBuf); |
| 3440 free(cBuf); |
| 3441 free(offsets); |
| 3442 } |
| 3443 static UChar U_CALLCONV |
| 3444 _charAt(int32_t offset, void *context) { |
| 3445 return ((char*)context)[offset]; |
| 3446 } |
| 3447 |
| 3448 static int32_t |
| 3449 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *s
tatus){ |
| 3450 int32_t srcIndex=0; |
| 3451 int32_t dstIndex=0; |
| 3452 if(U_FAILURE(*status)){ |
| 3453 return 0; |
| 3454 } |
| 3455 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){ |
| 3456 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 3457 return 0; |
| 3458 } |
| 3459 if(srcLen==-1){ |
| 3460 srcLen = (int32_t)uprv_strlen(src); |
| 3461 } |
| 3462 |
| 3463 for (; srcIndex<srcLen; ) { |
| 3464 UChar32 c = src[srcIndex++]; |
| 3465 if (c == 0x005C /*'\\'*/) { |
| 3466 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i
*/ |
| 3467 if (c == (UChar32)0xFFFFFFFF) { |
| 3468 *status=U_INVALID_CHAR_FOUND; /* return empty string */ |
| 3469 break; /* invalid escape sequence */ |
| 3470 } |
| 3471 } |
| 3472 if(dstIndex < dstLen){ |
| 3473 if(c>0xFFFF){ |
| 3474 dst[dstIndex++] = UTF16_LEAD(c); |
| 3475 if(dstIndex<dstLen){ |
| 3476 dst[dstIndex]=UTF16_TRAIL(c); |
| 3477 }else{ |
| 3478 *status=U_BUFFER_OVERFLOW_ERROR; |
| 3479 } |
| 3480 }else{ |
| 3481 dst[dstIndex]=(UChar)c; |
| 3482 } |
| 3483 |
| 3484 }else{ |
| 3485 *status = U_BUFFER_OVERFLOW_ERROR; |
| 3486 } |
| 3487 dstIndex++; /* for preflighting */ |
| 3488 } |
| 3489 return dstIndex; |
| 3490 } |
| 3491 |
| 3492 static void |
| 3493 TestFullRoundtrip(const char* cp){ |
| 3494 UChar usource[10] ={0}; |
| 3495 UChar nsrc[10] = {0}; |
| 3496 uint32_t i=1; |
| 3497 int len=0, ulen; |
| 3498 nsrc[0]=0x0061; |
| 3499 /* Test codepoint 0 */ |
| 3500 TestConv(usource,1,cp,"",NULL,0); |
| 3501 TestConv(usource,2,cp,"",NULL,0); |
| 3502 nsrc[2]=0x5555; |
| 3503 TestConv(nsrc,3,cp,"",NULL,0); |
| 3504 |
| 3505 for(;i<=0x10FFFF;i++){ |
| 3506 if(i==0xD800){ |
| 3507 i=0xDFFF; |
| 3508 continue; |
| 3509 } |
| 3510 if(i<=0xFFFF){ |
| 3511 usource[0] =(UChar) i; |
| 3512 len=1; |
| 3513 }else{ |
| 3514 usource[0]=UTF16_LEAD(i); |
| 3515 usource[1]=UTF16_TRAIL(i); |
| 3516 len=2; |
| 3517 } |
| 3518 ulen=len; |
| 3519 if(i==0x80) { |
| 3520 usource[2]=0; |
| 3521 } |
| 3522 /* Test only single code points */ |
| 3523 TestConv(usource,ulen,cp,"",NULL,0); |
| 3524 /* Test codepoint repeated twice */ |
| 3525 usource[ulen]=usource[0]; |
| 3526 usource[ulen+1]=usource[1]; |
| 3527 ulen+=len; |
| 3528 TestConv(usource,ulen,cp,"",NULL,0); |
| 3529 /* Test codepoint repeated 3 times */ |
| 3530 usource[ulen]=usource[0]; |
| 3531 usource[ulen+1]=usource[1]; |
| 3532 ulen+=len; |
| 3533 TestConv(usource,ulen,cp,"",NULL,0); |
| 3534 /* Test codepoint in between 2 codepoints */ |
| 3535 nsrc[1]=usource[0]; |
| 3536 nsrc[2]=usource[1]; |
| 3537 nsrc[len+1]=0x5555; |
| 3538 TestConv(nsrc,len+2,cp,"",NULL,0); |
| 3539 uprv_memset(usource,0,sizeof(UChar)*10); |
| 3540 } |
| 3541 } |
| 3542 |
| 3543 static void |
| 3544 TestRoundTrippingAllUTF(void){ |
| 3545 if(!getTestOption(QUICK_OPTION)){ |
| 3546 log_verbose("Running exhaustive round trip test for BOCU-1\n"); |
| 3547 TestFullRoundtrip("BOCU-1"); |
| 3548 log_verbose("Running exhaustive round trip test for SCSU\n"); |
| 3549 TestFullRoundtrip("SCSU"); |
| 3550 log_verbose("Running exhaustive round trip test for UTF-8\n"); |
| 3551 TestFullRoundtrip("UTF-8"); |
| 3552 log_verbose("Running exhaustive round trip test for CESU-8\n"); |
| 3553 TestFullRoundtrip("CESU-8"); |
| 3554 log_verbose("Running exhaustive round trip test for UTF-16BE\n"); |
| 3555 TestFullRoundtrip("UTF-16BE"); |
| 3556 log_verbose("Running exhaustive round trip test for UTF-16LE\n"); |
| 3557 TestFullRoundtrip("UTF-16LE"); |
| 3558 log_verbose("Running exhaustive round trip test for UTF-16\n"); |
| 3559 TestFullRoundtrip("UTF-16"); |
| 3560 log_verbose("Running exhaustive round trip test for UTF-32BE\n"); |
| 3561 TestFullRoundtrip("UTF-32BE"); |
| 3562 log_verbose("Running exhaustive round trip test for UTF-32LE\n"); |
| 3563 TestFullRoundtrip("UTF-32LE"); |
| 3564 log_verbose("Running exhaustive round trip test for UTF-32\n"); |
| 3565 TestFullRoundtrip("UTF-32"); |
| 3566 log_verbose("Running exhaustive round trip test for UTF-7\n"); |
| 3567 TestFullRoundtrip("UTF-7"); |
| 3568 log_verbose("Running exhaustive round trip test for UTF-7\n"); |
| 3569 TestFullRoundtrip("UTF-7,version=1"); |
| 3570 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n"
); |
| 3571 TestFullRoundtrip("IMAP-mailbox-name"); |
| 3572 log_verbose("Running exhaustive round trip test for GB18030\n"); |
| 3573 TestFullRoundtrip("GB18030"); |
| 3574 } |
| 3575 } |
| 3576 |
| 3577 static void |
| 3578 TestSCSU() { |
| 3579 |
| 3580 static const uint16_t germanUTF16[]={ |
| 3581 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 |
| 3582 }; |
| 3583 |
| 3584 static const uint8_t germanSCSU[]={ |
| 3585 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74 |
| 3586 }; |
| 3587 |
| 3588 static const uint16_t russianUTF16[]={ |
| 3589 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 |
| 3590 }; |
| 3591 |
| 3592 static const uint8_t russianSCSU[]={ |
| 3593 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0 |
| 3594 }; |
| 3595 |
| 3596 static const uint16_t japaneseUTF16[]={ |
| 3597 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, |
| 3598 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, |
| 3599 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, |
| 3600 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, |
| 3601 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, |
| 3602 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, |
| 3603 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, |
| 3604 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, |
| 3605 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, |
| 3606 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, |
| 3607 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, |
| 3608 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, |
| 3609 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, |
| 3610 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, |
| 3611 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 |
| 3612 }; |
| 3613 |
| 3614 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of
one different choice: |
| 3615 it uses an SQn once where a longer look-ahead could have shown that SCn is
more efficient */ |
| 3616 static const uint8_t japaneseSCSU[]={ |
| 3617 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef,
0x61, 0x1b, 0xe5, 0x84, |
| 3618 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3,
0x94, 0x08, 0x02, 0x0f, |
| 3619 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b,
0x6d, 0x41, 0x88, 0x4c, |
| 3620 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e,
0x6b, 0x4c, 0x08, 0x0d, |
| 3621 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa,
0x84, 0x08, 0x02, 0x0e, |
| 3622 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc,
0x9f, 0x0e, 0x79, 0x3e, |
| 3623 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08,
0x88, 0xbe, 0xa3, 0x8d, |
| 3624 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08,
0x01, 0x93, 0xc8, 0xaa, |
| 3625 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae,
0x93, 0xa8, 0xa0, 0x08, |
| 3626 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80,
0x05, 0xec, 0x60, 0x8d, |
| 3627 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4,
0xfe, 0xe7, 0xc2, 0x06, |
| 3628 0xcb, 0x82 |
| 3629 }; |
| 3630 |
| 3631 static const uint16_t allFeaturesUTF16[]={ |
| 3632 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, |
| 3633 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, |
| 3634 0x01df, 0xf000, 0xdbff, 0xdfff |
| 3635 }; |
| 3636 |
| 3637 /* see comment at japaneseSCSU: the same kind of different choice yields a s
lightly shorter |
| 3638 * result here (34B vs. 35B) |
| 3639 */ |
| 3640 static const uint8_t allFeaturesSCSU[]={ |
| 3641 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03, |
| 3642 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a, |
| 3643 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13, |
| 3644 0xdf, 0x14, 0x80, 0x15, 0xff |
| 3645 }; |
| 3646 static const uint16_t monkeyIn[]={ |
| 3647 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D,
0x000A, |
| 3648 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D,
0x000A, |
| 3649 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D,
0x000A, |
| 3650 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D,
0x000A, |
| 3651 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D,
0x000A, |
| 3652 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D,
0x000A, |
| 3653 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D,
0x000A, |
| 3654 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D,
0x000A, |
| 3655 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D,
0x000A, |
| 3656 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D,
0x000A, |
| 3657 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D,
0x000A, |
| 3658 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D,
0x000A, |
| 3659 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D,
0x000A, |
| 3660 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D,
0x000A, |
| 3661 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D,
0x000A, |
| 3662 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D,
0x000A, |
| 3663 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D,
0x000A, |
| 3664 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D,
0x000A, |
| 3665 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D,
0x000A, |
| 3666 /* test non-BMP code points */ |
| 3667 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869,
0xDE9F, |
| 3668 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869,
0xDEA8, |
| 3669 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869,
0xDEAF, |
| 3670 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869,
0xDEB6, |
| 3671 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869,
0xDEBB, |
| 3672 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869,
0xDEC0, |
| 3673 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869,
0xDEC8, |
| 3674 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869,
0xDECF, |
| 3675 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869,
0xDED4, |
| 3676 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF,
0xDFFF, |
| 3677 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, |
| 3678 |
| 3679 |
| 3680 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D,
0x000A, |
| 3681 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D,
0x000A, |
| 3682 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D,
0x000A, |
| 3683 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D,
0x000A, |
| 3684 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D,
0x000A, |
| 3685 }; |
| 3686 static const char *fTestCases [] = { |
| 3687 "\\ud800\\udc00", /* smallest surrogate*/ |
| 3688 "\\ud8ff\\udcff", |
| 3689 "\\udBff\\udFff", /* largest surrogate pair*/ |
| 3690 "\\ud834\\udc00", |
| 3691 "\\U0010FFFF", |
| 3692 "Hello \\u9292 \\u9192 World!", |
| 3693 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!", |
| 3694 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", |
| 3695 |
| 3696 "\\u0648\\u06c8", /* catch missing reset*/ |
| 3697 "\\u0648\\u06c8", |
| 3698 |
| 3699 "\\u4444\\uE001", /* lowest quotable*/ |
| 3700 "\\u4444\\uf2FF", /* highest quotable*/ |
| 3701 "\\u4444\\uf188\\u4444", |
| 3702 "\\u4444\\uf188\\uf288", |
| 3703 "\\u4444\\uf188abc\\u0429\\uf288", |
| 3704 "\\u9292\\u2222", |
| 3705 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!", |
| 3706 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", |
| 3707 "Hello World!123456", |
| 3708 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/ |
| 3709 |
| 3710 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/ |
| 3711 "abc\\u4411d", /* uses SQU*/ |
| 3712 "abc\\u4411\\u4412d",/* uses SCU*/ |
| 3713 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/ |
| 3714 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data
*/ |
| 3715 "\\u9292\\u2222", |
| 3716 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", |
| 3717 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u306
5\\u300c", |
| 3718 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53e
f\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002", |
| 3719 |
| 3720 "", /* empty input*/ |
| 3721 "\\u0000", /* smallest BMP character*/ |
| 3722 "\\uFFFF", /* largest BMP character*/ |
| 3723 |
| 3724 /* regression tests*/ |
| 3725 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49f
d\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa", |
| 3726 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u0
15f\\u00df\\u01df\\uf000\\udbff\\udfff", |
| 3727 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e
1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c", |
| 3728 "\\u0041\\u00df\\u0401\\u015f", |
| 3729 "\\u9066\\u2123abc", |
| 3730 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u
539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf51
3\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\
u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\ucc
d8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\
\u0bc0\\u06c5", |
| 3731 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b
5\\u0cf3\\u6059\\u7489", |
| 3732 }; |
| 3733 int i=0; |
| 3734 for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){ |
| 3735 const char* cSrc = fTestCases[i]; |
| 3736 UErrorCode status = U_ZERO_ERROR; |
| 3737 int32_t cSrcLen,srcLen; |
| 3738 UChar* src; |
| 3739 /* UConverter* cnv = ucnv_open("SCSU",&status); */ |
| 3740 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]); |
| 3741 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar)); |
| 3742 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status); |
| 3743 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i); |
| 3744 TestConv(src,srcLen,"SCSU","Coverage",NULL,0); |
| 3745 free(src); |
| 3746 } |
| 3747 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features"
, (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); |
| 3748 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features"
,(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); |
| 3749 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)
japaneseSCSU,sizeof(japaneseSCSU)); |
| 3750 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese
",(char *)japaneseSCSU,sizeof(japaneseSCSU)); |
| 3751 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanS
CSU,sizeof(germanSCSU)); |
| 3752 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)rus
sianSCSU,sizeof(russianSCSU)); |
| 3753 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0); |
| 3754 } |
| 3755 |
| 3756 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 3757 static void TestJitterbug2346(){ |
| 3758 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a, |
| 3759 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a}; |
| 3760 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A}; |
| 3761 |
| 3762 UChar uTarget[500]={'\0'}; |
| 3763 UChar* utarget=uTarget; |
| 3764 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; |
| 3765 |
| 3766 char cTarget[500]={'\0'}; |
| 3767 char* ctarget=cTarget; |
| 3768 char* ctargetLimit=cTarget+sizeof(cTarget); |
| 3769 const char* csource=source; |
| 3770 UChar* temp = expected; |
| 3771 UErrorCode err=U_ZERO_ERROR; |
| 3772 |
| 3773 UConverter* conv =ucnv_open("ISO_2022_JP",&err); |
| 3774 if(U_FAILURE(err)) { |
| 3775 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
r)); |
| 3776 return; |
| 3777 } |
| 3778 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NU
LL,TRUE,&err); |
| 3779 if(U_FAILURE(err)) { |
| 3780 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(er
r)); |
| 3781 return; |
| 3782 } |
| 3783 utargetLimit=utarget; |
| 3784 utarget = uTarget; |
| 3785 while(utarget<utargetLimit){ |
| 3786 if(*temp!=*utarget){ |
| 3787 |
| 3788 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp)
; |
| 3789 } |
| 3790 utarget++; |
| 3791 temp++; |
| 3792 } |
| 3793 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetL
imit,NULL,TRUE,&err); |
| 3794 if(U_FAILURE(err)) { |
| 3795 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(
err)); |
| 3796 return; |
| 3797 } |
| 3798 ctargetLimit=ctarget; |
| 3799 ctarget =cTarget; |
| 3800 ucnv_close(conv); |
| 3801 |
| 3802 |
| 3803 } |
| 3804 |
| 3805 static void |
| 3806 TestISO_2022_JP_1() { |
| 3807 /* test input */ |
| 3808 static const uint16_t in[]={ |
| 3809 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D,
0x000A, |
| 3810 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D,
0x000A, |
| 3811 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D,
0x000A, |
| 3812 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D,
0x000A, |
| 3813 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D,
0x000A, |
| 3814 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, |
| 3815 0x201D, 0x000D, 0x000A, |
| 3816 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D,
0x000A, |
| 3817 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D,
0x000A, |
| 3818 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D,
0x000A, |
| 3819 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D,
0x000A, |
| 3820 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D,
0x000A, |
| 3821 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A |
| 3822 }; |
| 3823 const UChar* uSource; |
| 3824 const UChar* uSourceLimit; |
| 3825 const char* cSource; |
| 3826 const char* cSourceLimit; |
| 3827 UChar *uTargetLimit =NULL; |
| 3828 UChar *uTarget; |
| 3829 char *cTarget; |
| 3830 const char *cTargetLimit; |
| 3831 char *cBuf; |
| 3832 UChar *uBuf,*test; |
| 3833 int32_t uBufSize = 120; |
| 3834 UErrorCode errorCode=U_ZERO_ERROR; |
| 3835 UConverter *cnv; |
| 3836 |
| 3837 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); |
| 3838 if(U_FAILURE(errorCode)) { |
| 3839 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); |
| 3840 return; |
| 3841 } |
| 3842 |
| 3843 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); |
| 3844 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); |
| 3845 uSource = (const UChar*)in; |
| 3846 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); |
| 3847 cTarget = cBuf; |
| 3848 cTargetLimit = cBuf +uBufSize*5; |
| 3849 uTarget = uBuf; |
| 3850 uTargetLimit = uBuf+ uBufSize*5; |
| 3851 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TR
UE, &errorCode); |
| 3852 if(U_FAILURE(errorCode)){ |
| 3853 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); |
| 3854 return; |
| 3855 } |
| 3856 cSource = cBuf; |
| 3857 cSourceLimit =cTarget; |
| 3858 test =uBuf; |
| 3859 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&er
rorCode); |
| 3860 if(U_FAILURE(errorCode)){ |
| 3861 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); |
| 3862 return; |
| 3863 } |
| 3864 uSource = (const UChar*)in; |
| 3865 while(uSource<uSourceLimit){ |
| 3866 if(*test!=*uSource){ |
| 3867 |
| 3868 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; |
| 3869 } |
| 3870 uSource++; |
| 3871 test++; |
| 3872 } |
| 3873 /*ucnv_close(cnv); |
| 3874 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/ |
| 3875 /*Test for the condition where there is an invalid character*/ |
| 3876 ucnv_reset(cnv); |
| 3877 { |
| 3878 static const uint8_t source2[]={0x0e,0x24,0x053}; |
| 3879 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]"); |
| 3880 } |
| 3881 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 3882 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 3883 ucnv_close(cnv); |
| 3884 free(uBuf); |
| 3885 free(cBuf); |
| 3886 } |
| 3887 |
| 3888 static void |
| 3889 TestISO_2022_JP_2() { |
| 3890 /* test input */ |
| 3891 static const uint16_t in[]={ |
| 3892 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D,
0x000A, |
| 3893 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D,
0x000A, |
| 3894 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D,
0x000A, |
| 3895 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D,
0x000A, |
| 3896 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D,
0x000A, |
| 3897 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D,
0x000A, |
| 3898 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D,
0x000A, |
| 3899 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D,
0x000A, |
| 3900 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D,
0x000A, |
| 3901 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D,
0x000A, |
| 3902 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D,
0x000A, |
| 3903 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D,
0x000A, |
| 3904 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D,
0x000A, |
| 3905 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D,
0x000A, |
| 3906 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D,
0x000A, |
| 3907 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D,
0x000A, |
| 3908 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D,
0x000A, |
| 3909 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D,
0x000A, |
| 3910 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D,
0x000A |
| 3911 }; |
| 3912 const UChar* uSource; |
| 3913 const UChar* uSourceLimit; |
| 3914 const char* cSource; |
| 3915 const char* cSourceLimit; |
| 3916 UChar *uTargetLimit =NULL; |
| 3917 UChar *uTarget; |
| 3918 char *cTarget; |
| 3919 const char *cTargetLimit; |
| 3920 char *cBuf; |
| 3921 UChar *uBuf,*test; |
| 3922 int32_t uBufSize = 120; |
| 3923 UErrorCode errorCode=U_ZERO_ERROR; |
| 3924 UConverter *cnv; |
| 3925 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); |
| 3926 int32_t* myOff= offsets; |
| 3927 cnv=ucnv_open("ISO_2022_JP_2", &errorCode); |
| 3928 if(U_FAILURE(errorCode)) { |
| 3929 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); |
| 3930 return; |
| 3931 } |
| 3932 |
| 3933 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); |
| 3934 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); |
| 3935 uSource = (const UChar*)in; |
| 3936 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); |
| 3937 cTarget = cBuf; |
| 3938 cTargetLimit = cBuf +uBufSize*5; |
| 3939 uTarget = uBuf; |
| 3940 uTargetLimit = uBuf+ uBufSize*5; |
| 3941 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); |
| 3942 if(U_FAILURE(errorCode)){ |
| 3943 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); |
| 3944 return; |
| 3945 } |
| 3946 cSource = cBuf; |
| 3947 cSourceLimit =cTarget; |
| 3948 test =uBuf; |
| 3949 myOff=offsets; |
| 3950 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); |
| 3951 if(U_FAILURE(errorCode)){ |
| 3952 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); |
| 3953 return; |
| 3954 } |
| 3955 uSource = (const UChar*)in; |
| 3956 while(uSource<uSourceLimit){ |
| 3957 if(*test!=*uSource){ |
| 3958 |
| 3959 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; |
| 3960 } |
| 3961 uSource++; |
| 3962 test++; |
| 3963 } |
| 3964 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 3965 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 3966 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 3967 /*Test for the condition where there is an invalid character*/ |
| 3968 ucnv_reset(cnv); |
| 3969 { |
| 3970 static const uint8_t source2[]={0x0e,0x24,0x053}; |
| 3971 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]"); |
| 3972 } |
| 3973 ucnv_close(cnv); |
| 3974 free(uBuf); |
| 3975 free(cBuf); |
| 3976 free(offsets); |
| 3977 } |
| 3978 |
| 3979 static void |
| 3980 TestISO_2022_KR() { |
| 3981 /* test input */ |
| 3982 static const uint16_t in[]={ |
| 3983 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x00
0D |
| 3984 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC
04 |
| 3985 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x00
28,0x0029 |
| 3986 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53
CA,0x53CB |
| 3987 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53
E2 |
| 3988 ,0x53E3,0x53E4,0x000A,0x000D}; |
| 3989 const UChar* uSource; |
| 3990 const UChar* uSourceLimit; |
| 3991 const char* cSource; |
| 3992 const char* cSourceLimit; |
| 3993 UChar *uTargetLimit =NULL; |
| 3994 UChar *uTarget; |
| 3995 char *cTarget; |
| 3996 const char *cTargetLimit; |
| 3997 char *cBuf; |
| 3998 UChar *uBuf,*test; |
| 3999 int32_t uBufSize = 120; |
| 4000 UErrorCode errorCode=U_ZERO_ERROR; |
| 4001 UConverter *cnv; |
| 4002 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); |
| 4003 int32_t* myOff= offsets; |
| 4004 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode); |
| 4005 if(U_FAILURE(errorCode)) { |
| 4006 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); |
| 4007 return; |
| 4008 } |
| 4009 |
| 4010 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); |
| 4011 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); |
| 4012 uSource = (const UChar*)in; |
| 4013 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); |
| 4014 cTarget = cBuf; |
| 4015 cTargetLimit = cBuf +uBufSize*5; |
| 4016 uTarget = uBuf; |
| 4017 uTargetLimit = uBuf+ uBufSize*5; |
| 4018 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); |
| 4019 if(U_FAILURE(errorCode)){ |
| 4020 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); |
| 4021 return; |
| 4022 } |
| 4023 cSource = cBuf; |
| 4024 cSourceLimit =cTarget; |
| 4025 test =uBuf; |
| 4026 myOff=offsets; |
| 4027 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); |
| 4028 if(U_FAILURE(errorCode)){ |
| 4029 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); |
| 4030 return; |
| 4031 } |
| 4032 uSource = (const UChar*)in; |
| 4033 while(uSource<uSourceLimit){ |
| 4034 if(*test!=*uSource){ |
| 4035 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; |
| 4036 } |
| 4037 uSource++; |
| 4038 test++; |
| 4039 } |
| 4040 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); |
| 4041 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 4042 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 4043 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 4044 TestJitterbug930("csISO2022KR"); |
| 4045 /*Test for the condition where there is an invalid character*/ |
| 4046 ucnv_reset(cnv); |
| 4047 { |
| 4048 static const uint8_t source2[]={0x1b,0x24,0x053}; |
| 4049 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); |
| 4050 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); |
| 4051 } |
| 4052 ucnv_close(cnv); |
| 4053 free(uBuf); |
| 4054 free(cBuf); |
| 4055 free(offsets); |
| 4056 } |
| 4057 |
| 4058 static void |
| 4059 TestISO_2022_KR_1() { |
| 4060 /* test input */ |
| 4061 static const uint16_t in[]={ |
| 4062 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x00
0D |
| 4063 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC
04 |
| 4064 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x00
28,0x0029 |
| 4065 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53
CA,0x53CB |
| 4066 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53
E2 |
| 4067 ,0x53E3,0x53E4,0x000A,0x000D}; |
| 4068 const UChar* uSource; |
| 4069 const UChar* uSourceLimit; |
| 4070 const char* cSource; |
| 4071 const char* cSourceLimit; |
| 4072 UChar *uTargetLimit =NULL; |
| 4073 UChar *uTarget; |
| 4074 char *cTarget; |
| 4075 const char *cTargetLimit; |
| 4076 char *cBuf; |
| 4077 UChar *uBuf,*test; |
| 4078 int32_t uBufSize = 120; |
| 4079 UErrorCode errorCode=U_ZERO_ERROR; |
| 4080 UConverter *cnv; |
| 4081 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); |
| 4082 int32_t* myOff= offsets; |
| 4083 cnv=ucnv_open("ibm-25546", &errorCode); |
| 4084 if(U_FAILURE(errorCode)) { |
| 4085 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); |
| 4086 return; |
| 4087 } |
| 4088 |
| 4089 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); |
| 4090 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); |
| 4091 uSource = (const UChar*)in; |
| 4092 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); |
| 4093 cTarget = cBuf; |
| 4094 cTargetLimit = cBuf +uBufSize*5; |
| 4095 uTarget = uBuf; |
| 4096 uTargetLimit = uBuf+ uBufSize*5; |
| 4097 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); |
| 4098 if(U_FAILURE(errorCode)){ |
| 4099 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); |
| 4100 return; |
| 4101 } |
| 4102 cSource = cBuf; |
| 4103 cSourceLimit =cTarget; |
| 4104 test =uBuf; |
| 4105 myOff=offsets; |
| 4106 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); |
| 4107 if(U_FAILURE(errorCode)){ |
| 4108 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); |
| 4109 return; |
| 4110 } |
| 4111 uSource = (const UChar*)in; |
| 4112 while(uSource<uSourceLimit){ |
| 4113 if(*test!=*uSource){ |
| 4114 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; |
| 4115 } |
| 4116 uSource++; |
| 4117 test++; |
| 4118 } |
| 4119 ucnv_reset(cnv); |
| 4120 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); |
| 4121 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 4122 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 4123 ucnv_reset(cnv); |
| 4124 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 4125 /*Test for the condition where there is an invalid character*/ |
| 4126 ucnv_reset(cnv); |
| 4127 { |
| 4128 static const uint8_t source2[]={0x1b,0x24,0x053}; |
| 4129 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); |
| 4130 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); |
| 4131 } |
| 4132 ucnv_close(cnv); |
| 4133 free(uBuf); |
| 4134 free(cBuf); |
| 4135 free(offsets); |
| 4136 } |
| 4137 |
| 4138 static void TestJitterbug2411(){ |
| 4139 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6
f\x69\x75\x79\x71\x77\x65\x68\x67\x0A" |
| 4140 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x
66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43"; |
| 4141 UConverter* kr=NULL, *kr1=NULL; |
| 4142 UErrorCode errorCode = U_ZERO_ERROR; |
| 4143 UChar tgt[100]={'\0'}; |
| 4144 UChar* target = tgt; |
| 4145 UChar* targetLimit = target+100; |
| 4146 kr=ucnv_open("iso-2022-kr", &errorCode); |
| 4147 if(U_FAILURE(errorCode)) { |
| 4148 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName
(errorCode)); |
| 4149 return; |
| 4150 } |
| 4151 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NUL
L,TRUE,&errorCode); |
| 4152 if(U_FAILURE(errorCode)) { |
| 4153 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_
errorName(errorCode)); |
| 4154 return; |
| 4155 } |
| 4156 kr1 = ucnv_open("ibm-25546", &errorCode); |
| 4157 if(U_FAILURE(errorCode)) { |
| 4158 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorNa
me(errorCode)); |
| 4159 return; |
| 4160 } |
| 4161 target = tgt; |
| 4162 targetLimit = target+100; |
| 4163 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NUL
L,TRUE,&errorCode); |
| 4164 |
| 4165 if(U_FAILURE(errorCode)) { |
| 4166 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n",
u_errorName(errorCode)); |
| 4167 return; |
| 4168 } |
| 4169 |
| 4170 ucnv_close(kr); |
| 4171 ucnv_close(kr1); |
| 4172 |
| 4173 } |
| 4174 |
| 4175 static void |
| 4176 TestJIS(){ |
| 4177 /* From Unicode moved to testdata/conversion.txt */ |
| 4178 /*To Unicode*/ |
| 4179 { |
| 4180 static const uint8_t sampleTextJIS[] = { |
| 4181 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/ |
| 4182 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ |
| 4183 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>
&@*/ |
| 4184 }; |
| 4185 static const uint16_t expectedISO2022JIS[] = { |
| 4186 0x0041, 0x0042, |
| 4187 0xFF81, 0xFF82, |
| 4188 0x3000 |
| 4189 }; |
| 4190 static const int32_t toISO2022JISOffs[]={ |
| 4191 3,4, |
| 4192 8,9, |
| 4193 16 |
| 4194 }; |
| 4195 |
| 4196 static const uint8_t sampleTextJIS7[] = { |
| 4197 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/ |
| 4198 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ |
| 4199 0x1b,0x24,0x42,0x21,0x21, |
| 4200 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */ |
| 4201 0x21,0x22, |
| 4202 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>
&@*/ |
| 4203 }; |
| 4204 static const uint16_t expectedISO2022JIS7[] = { |
| 4205 0x0041, 0x0042, |
| 4206 0xFF81, 0xFF82, |
| 4207 0x3000, |
| 4208 0xFF81, 0xFF82, |
| 4209 0x3001, |
| 4210 0x3000 |
| 4211 }; |
| 4212 static const int32_t toISO2022JIS7Offs[]={ |
| 4213 3,4, |
| 4214 8,9, |
| 4215 13,16, |
| 4216 17, |
| 4217 19,27 |
| 4218 }; |
| 4219 static const uint8_t sampleTextJIS8[] = { |
| 4220 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/ |
| 4221 0xa1,0xc8,0xd9,/*Katakana Set*/ |
| 4222 0x1b,0x28,0x42, |
| 4223 0x41,0x42, |
| 4224 0xb1,0xc3, /*Katakana Set*/ |
| 4225 0x1b,0x24,0x42,0x21,0x21 |
| 4226 }; |
| 4227 static const uint16_t expectedISO2022JIS8[] = { |
| 4228 0x0041, 0x0042, |
| 4229 0xff61, 0xff88, 0xff99, |
| 4230 0x0041, 0x0042, |
| 4231 0xff71, 0xff83, |
| 4232 0x3000 |
| 4233 }; |
| 4234 static const int32_t toISO2022JIS8Offs[]={ |
| 4235 3, 4, 5, 6, |
| 4236 7, 11, 12, 13, |
| 4237 14, 18, |
| 4238 }; |
| 4239 |
| 4240 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS, |
| 4241 sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toIS
O2022JISOffs,TRUE); |
| 4242 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7
, |
| 4243 sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", t
oISO2022JIS7Offs,TRUE); |
| 4244 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8
, |
| 4245 sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", t
oISO2022JIS8Offs,TRUE); |
| 4246 } |
| 4247 |
| 4248 } |
| 4249 |
| 4250 |
| 4251 #if 0 |
| 4252 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 |
| 4253 |
| 4254 static void TestJitterbug915(){ |
| 4255 /* tests for roundtripping of the below sequence |
| 4256 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * / |
| 4257 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * / |
| 4258 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * / |
| 4259 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * / |
| 4260 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * / |
| 4261 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * / |
| 4262 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * / |
| 4263 */ |
| 4264 static const char cSource[]={ |
| 4265 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, |
| 4266 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, |
| 4267 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, |
| 4268 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, |
| 4269 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, |
| 4270 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, |
| 4271 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70, |
| 4272 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, |
| 4273 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, |
| 4274 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, |
| 4275 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61, |
| 4276 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, |
| 4277 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, |
| 4278 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, |
| 4279 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, |
| 4280 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, |
| 4281 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, |
| 4282 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, |
| 4283 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, |
| 4284 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, |
| 4285 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, |
| 4286 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, |
| 4287 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, |
| 4288 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, |
| 4289 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, |
| 4290 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, |
| 4291 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, |
| 4292 0x37, 0x20, 0x2A, 0x2F |
| 4293 }; |
| 4294 UChar uTarget[500]={'\0'}; |
| 4295 UChar* utarget=uTarget; |
| 4296 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; |
| 4297 |
| 4298 char cTarget[500]={'\0'}; |
| 4299 char* ctarget=cTarget; |
| 4300 char* ctargetLimit=cTarget+sizeof(cTarget); |
| 4301 const char* csource=cSource; |
| 4302 const char* tempSrc = cSource; |
| 4303 UErrorCode err=U_ZERO_ERROR; |
| 4304 |
| 4305 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err); |
| 4306 if(U_FAILURE(err)) { |
| 4307 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
r)); |
| 4308 return; |
| 4309 } |
| 4310 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),N
ULL,TRUE,&err); |
| 4311 if(U_FAILURE(err)) { |
| 4312 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(er
r)); |
| 4313 return; |
| 4314 } |
| 4315 utargetLimit=utarget; |
| 4316 utarget = uTarget; |
| 4317 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetL
imit,NULL,TRUE,&err); |
| 4318 if(U_FAILURE(err)) { |
| 4319 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(
err)); |
| 4320 return; |
| 4321 } |
| 4322 ctargetLimit=ctarget; |
| 4323 ctarget =cTarget; |
| 4324 while(ctarget<ctargetLimit){ |
| 4325 if(*ctarget != *tempSrc){ |
| 4326 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarg
et-cTarget), *ctarget,(int)*tempSrc) ; |
| 4327 } |
| 4328 ++ctarget; |
| 4329 ++tempSrc; |
| 4330 } |
| 4331 |
| 4332 ucnv_close(conv); |
| 4333 } |
| 4334 |
| 4335 static void |
| 4336 TestISO_2022_CN_EXT() { |
| 4337 /* test input */ |
| 4338 static const uint16_t in[]={ |
| 4339 /* test Non-BMP code points */ |
| 4340 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869,
0xDE9F, |
| 4341 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869,
0xDEA8, |
| 4342 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869,
0xDEAF, |
| 4343 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869,
0xDEB6, |
| 4344 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869,
0xDEBB, |
| 4345 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869,
0xDEC0, |
| 4346 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869,
0xDEC8, |
| 4347 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869,
0xDECF, |
| 4348 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869,
0xDED4, |
| 4349 0xD869, 0xDED5, |
| 4350 |
| 4351 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D,
0x000A, |
| 4352 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D,
0x000A, |
| 4353 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D,
0x000A, |
| 4354 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D,
0x000A, |
| 4355 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D,
0x000A, |
| 4356 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D,
0x000A, |
| 4357 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D,
0x000A, |
| 4358 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D,
0x000A, |
| 4359 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D,
0x000A, |
| 4360 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D,
0x000A, |
| 4361 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D,
0x000A, |
| 4362 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D,
0x000A, |
| 4363 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D,
0x000A, |
| 4364 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D,
0x000A, |
| 4365 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D,
0x000A, |
| 4366 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D,
0x000A, |
| 4367 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D,
0x000A, |
| 4368 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D,
0x000A, |
| 4369 |
| 4370 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A |
| 4371 |
| 4372 }; |
| 4373 |
| 4374 const UChar* uSource; |
| 4375 const UChar* uSourceLimit; |
| 4376 const char* cSource; |
| 4377 const char* cSourceLimit; |
| 4378 UChar *uTargetLimit =NULL; |
| 4379 UChar *uTarget; |
| 4380 char *cTarget; |
| 4381 const char *cTargetLimit; |
| 4382 char *cBuf; |
| 4383 UChar *uBuf,*test; |
| 4384 int32_t uBufSize = 180; |
| 4385 UErrorCode errorCode=U_ZERO_ERROR; |
| 4386 UConverter *cnv; |
| 4387 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); |
| 4388 int32_t* myOff= offsets; |
| 4389 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode); |
| 4390 if(U_FAILURE(errorCode)) { |
| 4391 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); |
| 4392 return; |
| 4393 } |
| 4394 |
| 4395 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); |
| 4396 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); |
| 4397 uSource = (const UChar*)in; |
| 4398 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); |
| 4399 cTarget = cBuf; |
| 4400 cTargetLimit = cBuf +uBufSize*5; |
| 4401 uTarget = uBuf; |
| 4402 uTargetLimit = uBuf+ uBufSize*5; |
| 4403 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); |
| 4404 if(U_FAILURE(errorCode)){ |
| 4405 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); |
| 4406 return; |
| 4407 } |
| 4408 cSource = cBuf; |
| 4409 cSourceLimit =cTarget; |
| 4410 test =uBuf; |
| 4411 myOff=offsets; |
| 4412 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); |
| 4413 if(U_FAILURE(errorCode)){ |
| 4414 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); |
| 4415 return; |
| 4416 } |
| 4417 uSource = (const UChar*)in; |
| 4418 while(uSource<uSourceLimit){ |
| 4419 if(*test!=*uSource){ |
| 4420 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; |
| 4421 } |
| 4422 else{ |
| 4423 log_verbose(" Got: \\u%04X\n",(int)*test) ; |
| 4424 } |
| 4425 uSource++; |
| 4426 test++; |
| 4427 } |
| 4428 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 4429 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 4430 /*Test for the condition where there is an invalid character*/ |
| 4431 ucnv_reset(cnv); |
| 4432 { |
| 4433 static const uint8_t source2[]={0x0e,0x24,0x053}; |
| 4434 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]"); |
| 4435 } |
| 4436 ucnv_close(cnv); |
| 4437 free(uBuf); |
| 4438 free(cBuf); |
| 4439 free(offsets); |
| 4440 } |
| 4441 #endif |
| 4442 |
| 4443 static void |
| 4444 TestISO_2022_CN() { |
| 4445 /* test input */ |
| 4446 static const uint16_t in[]={ |
| 4447 /* jitterbug 951 */ |
| 4448 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41,
0xFF52, |
| 4449 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16,
0xFF17, |
| 4450 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45,
0xFF52, |
| 4451 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E,
0xFF45, |
| 4452 0x0020, 0x0045, 0x004e, 0x0044, |
| 4453 /**/ |
| 4454 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D,
0x000A, |
| 4455 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D,
0x000A, |
| 4456 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D,
0x000A, |
| 4457 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D,
0x000A, |
| 4458 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D,
0x000A, |
| 4459 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D,
0x000A, |
| 4460 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D,
0x000A, |
| 4461 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D,
0x000A, |
| 4462 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D,
0x000A, |
| 4463 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D,
0x000A, |
| 4464 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D,
0x000A, |
| 4465 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D,
0x000A, |
| 4466 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D,
0x000A, |
| 4467 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D,
0x000A, |
| 4468 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D,
0x000A, |
| 4469 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485,
0x2486, |
| 4470 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D,
0x000A, |
| 4471 |
| 4472 }; |
| 4473 const UChar* uSource; |
| 4474 const UChar* uSourceLimit; |
| 4475 const char* cSource; |
| 4476 const char* cSourceLimit; |
| 4477 UChar *uTargetLimit =NULL; |
| 4478 UChar *uTarget; |
| 4479 char *cTarget; |
| 4480 const char *cTargetLimit; |
| 4481 char *cBuf; |
| 4482 UChar *uBuf,*test; |
| 4483 int32_t uBufSize = 180; |
| 4484 UErrorCode errorCode=U_ZERO_ERROR; |
| 4485 UConverter *cnv; |
| 4486 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); |
| 4487 int32_t* myOff= offsets; |
| 4488 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode); |
| 4489 if(U_FAILURE(errorCode)) { |
| 4490 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); |
| 4491 return; |
| 4492 } |
| 4493 |
| 4494 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); |
| 4495 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); |
| 4496 uSource = (const UChar*)in; |
| 4497 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); |
| 4498 cTarget = cBuf; |
| 4499 cTargetLimit = cBuf +uBufSize*5; |
| 4500 uTarget = uBuf; |
| 4501 uTargetLimit = uBuf+ uBufSize*5; |
| 4502 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); |
| 4503 if(U_FAILURE(errorCode)){ |
| 4504 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); |
| 4505 return; |
| 4506 } |
| 4507 cSource = cBuf; |
| 4508 cSourceLimit =cTarget; |
| 4509 test =uBuf; |
| 4510 myOff=offsets; |
| 4511 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); |
| 4512 if(U_FAILURE(errorCode)){ |
| 4513 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); |
| 4514 return; |
| 4515 } |
| 4516 uSource = (const UChar*)in; |
| 4517 while(uSource<uSourceLimit){ |
| 4518 if(*test!=*uSource){ |
| 4519 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; |
| 4520 } |
| 4521 else{ |
| 4522 log_verbose(" Got: \\u%04X\n",(int)*test) ; |
| 4523 } |
| 4524 uSource++; |
| 4525 test++; |
| 4526 } |
| 4527 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding"); |
| 4528 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 4529 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 4530 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); |
| 4531 TestJitterbug930("csISO2022CN"); |
| 4532 /*Test for the condition where there is an invalid character*/ |
| 4533 ucnv_reset(cnv); |
| 4534 { |
| 4535 static const uint8_t source2[]={0x0e,0x24,0x053}; |
| 4536 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]"); |
| 4537 } |
| 4538 |
| 4539 ucnv_close(cnv); |
| 4540 free(uBuf); |
| 4541 free(cBuf); |
| 4542 free(offsets); |
| 4543 } |
| 4544 |
| 4545 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallb
ackReason is UCNV_IRREGULAR */ |
| 4546 typedef struct { |
| 4547 const char * converterName; |
| 4548 const char * inputText; |
| 4549 int inputTextLength; |
| 4550 } EmptySegmentTest; |
| 4551 |
| 4552 /* Callback for TestJitterbug6175, should only get called for empty segment erro
rs */ |
| 4553 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUn
icodeArgs *toArgs, const char* codeUnits, |
| 4554 int32_t length, UConverterCallbackR
eason reason, UErrorCode * err ) { |
| 4555 if (reason > UCNV_IRREGULAR) { |
| 4556 return; |
| 4557 } |
| 4558 if (reason != UCNV_IRREGULAR) { |
| 4559 log_err("toUnicode callback invoked for empty segment but reason is not
UCNV_IRREGULAR\n"); |
| 4560 } |
| 4561 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */ |
| 4562 *err = U_ZERO_ERROR; |
| 4563 ucnv_cbToUWriteSub(toArgs,0,err); |
| 4564 } |
| 4565 |
| 4566 enum { kEmptySegmentToUCharsMax = 64 }; |
| 4567 static void TestJitterbug6175(void) { |
| 4568 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0
x42, 0x63, 0x64, 0x0D, 0x0A }; |
| 4569 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F,
0x62, 0x0D, 0x0A }; |
| 4570 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E,
0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A }; |
| 4571 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E,
0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A }; |
| 4572 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63,
0x64 }; |
| 4573 static const EmptySegmentTest emptySegmentTests[] = { |
| 4574 /* converterName inputText inputTextLength */ |
| 4575 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) }, |
| 4576 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) }, |
| 4577 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) }, |
| 4578 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) }, |
| 4579 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) }, |
| 4580 /* terminator: */ |
| 4581 { NULL, NULL, 0, } |
| 4582 }; |
| 4583 const EmptySegmentTest * testPtr; |
| 4584 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr)
{ |
| 4585 UErrorCode err = U_ZERO_ERROR; |
| 4586 UConverter * cnv = ucnv_open(testPtr->converterName, &err); |
| 4587 if (U_FAILURE(err)) { |
| 4588 log_data_err("Unable to open %s converter: %s\n", testPtr->converter
Name, u_errorName(err)); |
| 4589 return; |
| 4590 } |
| 4591 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NU
LL, &err); |
| 4592 if (U_FAILURE(err)) { |
| 4593 log_data_err("Unable to setToUCallBack for %s converter: %s\n", test
Ptr->converterName, u_errorName(err)); |
| 4594 ucnv_close(cnv); |
| 4595 return; |
| 4596 } |
| 4597 { |
| 4598 UChar toUChars[kEmptySegmentToUCharsMax]; |
| 4599 UChar * toUCharsPtr = toUChars; |
| 4600 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax
; |
| 4601 const char * inCharsPtr = testPtr->inputText; |
| 4602 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength; |
| 4603 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inChar
sLimit, NULL, TRUE, &err); |
| 4604 } |
| 4605 ucnv_close(cnv); |
| 4606 } |
| 4607 } |
| 4608 |
| 4609 static void |
| 4610 TestEBCDIC_STATEFUL() { |
| 4611 /* test input */ |
| 4612 static const uint8_t in[]={ |
| 4613 0x61, |
| 4614 0x1a, |
| 4615 0x0f, 0x4b, |
| 4616 0x42, |
| 4617 0x40, |
| 4618 0x36, |
| 4619 }; |
| 4620 |
| 4621 /* expected test results */ |
| 4622 static const int32_t results[]={ |
| 4623 /* number of bytes read, code point */ |
| 4624 1, 0x002f, |
| 4625 1, 0x0092, |
| 4626 2, 0x002e, |
| 4627 1, 0xff62, |
| 4628 1, 0x0020, |
| 4629 1, 0x0096, |
| 4630 |
| 4631 }; |
| 4632 static const uint8_t in2[]={ |
| 4633 0x0f, |
| 4634 0xa1, |
| 4635 0x01 |
| 4636 }; |
| 4637 |
| 4638 /* expected test results */ |
| 4639 static const int32_t results2[]={ |
| 4640 /* number of bytes read, code point */ |
| 4641 2, 0x203E, |
| 4642 1, 0x0001, |
| 4643 }; |
| 4644 |
| 4645 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); |
| 4646 UErrorCode errorCode=U_ZERO_ERROR; |
| 4647 UConverter *cnv=ucnv_open("ibm-930", &errorCode); |
| 4648 if(U_FAILURE(errorCode)) { |
| 4649 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n"
, u_errorName(errorCode)); |
| 4650 return; |
| 4651 } |
| 4652 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)"); |
| 4653 ucnv_reset(cnv); |
| 4654 /* Test the condition when source >= sourceLimit */ |
| 4655 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); |
| 4656 ucnv_reset(cnv); |
| 4657 /*Test for the condition where source > sourcelimit after consuming the shif
t chracter */ |
| 4658 { |
| 4659 static const uint8_t source1[]={0x0f}; |
| 4660 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeo
f(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated"); |
| 4661 } |
| 4662 /*Test for the condition where there is an invalid character*/ |
| 4663 ucnv_reset(cnv); |
| 4664 { |
| 4665 static const uint8_t source2[]={0x0e, 0x7F, 0xFF}; |
| 4666 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]"); |
| 4667 } |
| 4668 ucnv_reset(cnv); |
| 4669 source=(const char*)in2; |
| 4670 limit=(const char*)in2+sizeof(in2); |
| 4671 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2"); |
| 4672 ucnv_close(cnv); |
| 4673 |
| 4674 } |
| 4675 |
| 4676 static void |
| 4677 TestGB18030() { |
| 4678 /* test input */ |
| 4679 static const uint8_t in[]={ |
| 4680 0x24, |
| 4681 0x7f, |
| 4682 0x81, 0x30, 0x81, 0x30, |
| 4683 0xa8, 0xbf, |
| 4684 0xa2, 0xe3, |
| 4685 0xd2, 0xbb, |
| 4686 0x82, 0x35, 0x8f, 0x33, |
| 4687 0x84, 0x31, 0xa4, 0x39, |
| 4688 0x90, 0x30, 0x81, 0x30, |
| 4689 0xe3, 0x32, 0x9a, 0x35 |
| 4690 #if 0 |
| 4691 /* |
| 4692 * Feature removed markus 2000-oct-26 |
| 4693 * Only some codepages must match surrogate pairs into supplementary cod
e points - |
| 4694 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvm
bcs.c . |
| 4695 * GB 18030 provides direct encodings for supplementary code points, the
refore |
| 4696 * it must not combine two single-encoded surrogates into one code point
. |
| 4697 */ |
| 4698 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded sur
rogates */ |
| 4699 #endif |
| 4700 }; |
| 4701 |
| 4702 /* expected test results */ |
| 4703 static const int32_t results[]={ |
| 4704 /* number of bytes read, code point */ |
| 4705 1, 0x24, |
| 4706 1, 0x7f, |
| 4707 4, 0x80, |
| 4708 2, 0x1f9, |
| 4709 2, 0x20ac, |
| 4710 2, 0x4e00, |
| 4711 4, 0x9fa6, |
| 4712 4, 0xffff, |
| 4713 4, 0x10000, |
| 4714 4, 0x10ffff |
| 4715 #if 0 |
| 4716 /* Feature removed. See comment above. */ |
| 4717 8, 0x10000 |
| 4718 #endif |
| 4719 }; |
| 4720 |
| 4721 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */ |
| 4722 UErrorCode errorCode=U_ZERO_ERROR; |
| 4723 UConverter *cnv=ucnv_open("gb18030", &errorCode); |
| 4724 if(U_FAILURE(errorCode)) { |
| 4725 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(err
orCode)); |
| 4726 return; |
| 4727 } |
| 4728 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "
gb18030"); |
| 4729 ucnv_close(cnv); |
| 4730 } |
| 4731 |
| 4732 static void |
| 4733 TestLMBCS() { |
| 4734 /* LMBCS-1 string */ |
| 4735 static const uint8_t pszLMBCS[]={ |
| 4736 0x61, |
| 4737 0x01, 0x29, |
| 4738 0x81, |
| 4739 0xA0, |
| 4740 0x0F, 0x27, |
| 4741 0x0F, 0x91, |
| 4742 0x14, 0x0a, 0x74, |
| 4743 0x14, 0xF6, 0x02, |
| 4744 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */ |
| 4745 0x10, 0x88, 0xA0, |
| 4746 }; |
| 4747 |
| 4748 /* Unicode UChar32 equivalents */ |
| 4749 static const UChar32 pszUnicode32[]={ |
| 4750 /* code point */ |
| 4751 0x00000061, |
| 4752 0x00002013, |
| 4753 0x000000FC, |
| 4754 0x000000E1, |
| 4755 0x00000007, |
| 4756 0x00000091, |
| 4757 0x00000a74, |
| 4758 0x00000200, |
| 4759 0x00023456, /* code point for surrogate pair */ |
| 4760 0x00005516 |
| 4761 }; |
| 4762 |
| 4763 /* Unicode UChar equivalents */ |
| 4764 static const UChar pszUnicode[]={ |
| 4765 /* code point */ |
| 4766 0x0061, |
| 4767 0x2013, |
| 4768 0x00FC, |
| 4769 0x00E1, |
| 4770 0x0007, |
| 4771 0x0091, |
| 4772 0x0a74, |
| 4773 0x0200, |
| 4774 0xD84D, /* low surrogate */ |
| 4775 0xDC56, /* high surrogate */ |
| 4776 0x5516 |
| 4777 }; |
| 4778 |
| 4779 /* expected test results */ |
| 4780 static const int offsets32[]={ |
| 4781 /* number of bytes read, code point */ |
| 4782 0, |
| 4783 1, |
| 4784 3, |
| 4785 4, |
| 4786 5, |
| 4787 7, |
| 4788 9, |
| 4789 12, |
| 4790 15, |
| 4791 21, |
| 4792 24 |
| 4793 }; |
| 4794 |
| 4795 /* expected test results */ |
| 4796 static const int offsets[]={ |
| 4797 /* number of bytes read, code point */ |
| 4798 0, |
| 4799 1, |
| 4800 3, |
| 4801 4, |
| 4802 5, |
| 4803 7, |
| 4804 9, |
| 4805 12, |
| 4806 15, |
| 4807 18, |
| 4808 21, |
| 4809 24 |
| 4810 }; |
| 4811 |
| 4812 |
| 4813 UConverter *cnv; |
| 4814 |
| 4815 #define NAME_LMBCS_1 "LMBCS-1" |
| 4816 #define NAME_LMBCS_2 "LMBCS-2" |
| 4817 |
| 4818 |
| 4819 /* Some basic open/close/property tests on some LMBCS converters */ |
| 4820 { |
| 4821 |
| 4822 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */ |
| 4823 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/ |
| 4824 char get_subchars [1]; |
| 4825 const char * get_name; |
| 4826 UConverter *cnv1; |
| 4827 UConverter *cnv2; |
| 4828 |
| 4829 int8_t len = sizeof(get_subchars); |
| 4830 |
| 4831 UErrorCode errorCode=U_ZERO_ERROR; |
| 4832 |
| 4833 /* Open */ |
| 4834 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode); |
| 4835 if(U_FAILURE(errorCode)) { |
| 4836 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(er
rorCode)); |
| 4837 return; |
| 4838 } |
| 4839 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode); |
| 4840 if(U_FAILURE(errorCode)) { |
| 4841 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(er
rorCode)); |
| 4842 return; |
| 4843 } |
| 4844 |
| 4845 /* Name */ |
| 4846 get_name = ucnv_getName (cnv1, &errorCode); |
| 4847 if (strcmp(NAME_LMBCS_1,get_name)){ |
| 4848 log_err("Unexpected converter name: %s\n", get_name); |
| 4849 } |
| 4850 get_name = ucnv_getName (cnv2, &errorCode); |
| 4851 if (strcmp(NAME_LMBCS_2,get_name)){ |
| 4852 log_err("Unexpected converter name: %s\n", get_name); |
| 4853 } |
| 4854 |
| 4855 /* substitution chars */ |
| 4856 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode); |
| 4857 if(U_FAILURE(errorCode)) { |
| 4858 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); |
| 4859 } |
| 4860 if (len!=1){ |
| 4861 log_err("Unexpected length of sub chars\n"); |
| 4862 } |
| 4863 if (get_subchars[0] != expected_subchars[0]){ |
| 4864 log_err("Unexpected value of sub chars\n"); |
| 4865 } |
| 4866 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode); |
| 4867 if(U_FAILURE(errorCode)) { |
| 4868 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode)); |
| 4869 } |
| 4870 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode); |
| 4871 if(U_FAILURE(errorCode)) { |
| 4872 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); |
| 4873 } |
| 4874 if (len!=1){ |
| 4875 log_err("Unexpected length of sub chars\n"); |
| 4876 } |
| 4877 if (get_subchars[0] != new_subchars[0]){ |
| 4878 log_err("Unexpected value of sub chars\n"); |
| 4879 } |
| 4880 ucnv_close(cnv1); |
| 4881 ucnv_close(cnv2); |
| 4882 |
| 4883 } |
| 4884 |
| 4885 /* LMBCS to Unicode - offsets */ |
| 4886 { |
| 4887 UErrorCode errorCode=U_ZERO_ERROR; |
| 4888 |
| 4889 const char * pSource = (const char *)pszLMBCS; |
| 4890 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); |
| 4891 |
| 4892 UChar Out [sizeof(pszUnicode) + 1]; |
| 4893 UChar * pOut = Out; |
| 4894 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); |
| 4895 |
| 4896 int32_t off [sizeof(offsets)]; |
| 4897 |
| 4898 /* last 'offset' in expected results is just the final size. |
| 4899 (Makes other tests easier). Compensate here: */ |
| 4900 |
| 4901 off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS); |
| 4902 |
| 4903 |
| 4904 |
| 4905 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */ |
| 4906 if(U_FAILURE(errorCode)) { |
| 4907 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(er
rorCode)); |
| 4908 return; |
| 4909 } |
| 4910 |
| 4911 |
| 4912 |
| 4913 ucnv_toUnicode (cnv, |
| 4914 &pOut, |
| 4915 OutLimit, |
| 4916 &pSource, |
| 4917 sourceLimit, |
| 4918 off, |
| 4919 TRUE, |
| 4920 &errorCode); |
| 4921 |
| 4922 |
| 4923 if (memcmp(off,offsets,sizeof(offsets))) |
| 4924 { |
| 4925 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n
"); |
| 4926 } |
| 4927 if (memcmp(Out,pszUnicode,sizeof(pszUnicode))) |
| 4928 { |
| 4929 log_err("LMBCS->Uni: Calculated codepoints do not match expected result
s\n"); |
| 4930 } |
| 4931 ucnv_close(cnv); |
| 4932 } |
| 4933 { |
| 4934 /* LMBCS to Unicode - getNextUChar */ |
| 4935 const char * sourceStart; |
| 4936 const char *source=(const char *)pszLMBCS; |
| 4937 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS); |
| 4938 const UChar32 *results= pszUnicode32; |
| 4939 const int *off = offsets32; |
| 4940 |
| 4941 UErrorCode errorCode=U_ZERO_ERROR; |
| 4942 UChar32 uniChar; |
| 4943 |
| 4944 cnv=ucnv_open("LMBCS-1", &errorCode); |
| 4945 if(U_FAILURE(errorCode)) { |
| 4946 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(
errorCode)); |
| 4947 return; |
| 4948 } |
| 4949 else |
| 4950 { |
| 4951 |
| 4952 while(source<limit) { |
| 4953 sourceStart=source; |
| 4954 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]),
&errorCode); |
| 4955 if(U_FAILURE(errorCode)) { |
| 4956 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorNam
e(errorCode)); |
| 4957 break; |
| 4958 } else if(source-sourceStart != off[1] - off[0] || uniChar != *resul
ts) { |
| 4959 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, sh
ould have been %lx from %d bytes.\n", |
| 4960 uniChar, (source-sourceStart), *results, *off); |
| 4961 break; |
| 4962 } |
| 4963 results++; |
| 4964 off++; |
| 4965 } |
| 4966 } |
| 4967 ucnv_close(cnv); |
| 4968 } |
| 4969 { /* test locale & optimization group operations: Unicode to LMBCS */ |
| 4970 |
| 4971 UErrorCode errorCode=U_ZERO_ERROR; |
| 4972 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode); |
| 4973 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode); |
| 4974 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode); |
| 4975 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */ |
| 4976 const UChar * pUniOut = uniString; |
| 4977 UChar * pUniIn = uniString; |
| 4978 uint8_t lmbcsString [4]; |
| 4979 const char * pLMBCSOut = (const char *)lmbcsString; |
| 4980 char * pLMBCSIn = (char *)lmbcsString; |
| 4981 |
| 4982 /* 0192 (hook) converts to both group 3 & group 1. input locale should dif
ferentiate */ |
| 4983 ucnv_fromUnicode (cnv16he, |
| 4984 &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsS
tring[0])), |
| 4985 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0
]), |
| 4986 NULL, 1, &errorCode); |
| 4987 |
| 4988 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83) |
| 4989 { |
| 4990 log_err("LMBCS-16,locale=he gives unexpected translation\n"); |
| 4991 } |
| 4992 |
| 4993 pLMBCSIn= (char *)lmbcsString; |
| 4994 pUniOut = uniString; |
| 4995 ucnv_fromUnicode (cnv01us, |
| 4996 &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsStri
ng)/sizeof(lmbcsString[0])), |
| 4997 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0
]), |
| 4998 NULL, 1, &errorCode); |
| 4999 |
| 5000 if (lmbcsString[0] != 0x9F) |
| 5001 { |
| 5002 log_err("LMBCS-1,locale=US gives unexpected translation\n"); |
| 5003 } |
| 5004 |
| 5005 /* single byte char from mbcs char set */ |
| 5006 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */ |
| 5007 pLMBCSOut = (const char *)lmbcsString; |
| 5008 pUniIn = uniString; |
| 5009 ucnv_toUnicode (cnv16jp, |
| 5010 &pUniIn, pUniIn + 1, |
| 5011 &pLMBCSOut, (pLMBCSOut + 1), |
| 5012 NULL, 1, &errorCode); |
| 5013 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pU
niIn != uniString+1 || uniString[0] != 0xFF6E) |
| 5014 { |
| 5015 log_err("Unexpected results from LMBCS-16 single byte char\n"); |
| 5016 } |
| 5017 /* convert to group 1: should be 3 bytes */ |
| 5018 pLMBCSIn = (char *)lmbcsString; |
| 5019 pUniOut = uniString; |
| 5020 ucnv_fromUnicode (cnv01us, |
| 5021 &pLMBCSIn, (const char *)(pLMBCSIn + 3), |
| 5022 &pUniOut, pUniOut + 1, |
| 5023 NULL, 1, &errorCode); |
| 5024 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUn
iOut != uniString+1 |
| 5025 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] !
= 0xAE) |
| 5026 { |
| 5027 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n"); |
| 5028 } |
| 5029 pLMBCSOut = (const char *)lmbcsString; |
| 5030 pUniIn = uniString; |
| 5031 ucnv_toUnicode (cnv01us, |
| 5032 &pUniIn, pUniIn + 1, |
| 5033 &pLMBCSOut, (const char *)(pLMBCSOut + 3), |
| 5034 NULL, 1, &errorCode); |
| 5035 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pU
niIn != uniString+1 || uniString[0] != 0xFF6E) |
| 5036 { |
| 5037 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n"); |
| 5038 } |
| 5039 pLMBCSIn = (char *)lmbcsString; |
| 5040 pUniOut = uniString; |
| 5041 ucnv_fromUnicode (cnv16jp, |
| 5042 &pLMBCSIn, (const char *)(pLMBCSIn + 1), |
| 5043 &pUniOut, pUniOut + 1, |
| 5044 NULL, 1, &errorCode); |
| 5045 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUn
iOut != uniString+1 || lmbcsString[0] != 0xAE) |
| 5046 { |
| 5047 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n"); |
| 5048 } |
| 5049 ucnv_close(cnv16he); |
| 5050 ucnv_close(cnv16jp); |
| 5051 ucnv_close(cnv01us); |
| 5052 } |
| 5053 { |
| 5054 /* Small source buffer testing, LMBCS -> Unicode */ |
| 5055 |
| 5056 UErrorCode errorCode=U_ZERO_ERROR; |
| 5057 |
| 5058 const char * pSource = (const char *)pszLMBCS; |
| 5059 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); |
| 5060 int codepointCount = 0; |
| 5061 |
| 5062 UChar Out [sizeof(pszUnicode) + 1]; |
| 5063 UChar * pOut = Out; |
| 5064 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); |
| 5065 |
| 5066 |
| 5067 cnv = ucnv_open(NAME_LMBCS_1, &errorCode); |
| 5068 if(U_FAILURE(errorCode)) { |
| 5069 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(error
Code)); |
| 5070 return; |
| 5071 } |
| 5072 |
| 5073 |
| 5074 while ((pSource < sourceLimit) && U_SUCCESS (errorCode)) |
| 5075 { |
| 5076 ucnv_toUnicode (cnv, |
| 5077 &pOut, |
| 5078 OutLimit, |
| 5079 &pSource, |
| 5080 (pSource+1), /* claim that this is a 1- byte buffer */ |
| 5081 NULL, |
| 5082 FALSE, /* FALSE means there might be more chars in the next bu
ffer */ |
| 5083 &errorCode); |
| 5084 |
| 5085 if (U_SUCCESS (errorCode)) |
| 5086 { |
| 5087 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount
+1]) |
| 5088 { |
| 5089 /* we are on to the next code point: check value */ |
| 5090 |
| 5091 if (Out[0] != pszUnicode[codepointCount]){ |
| 5092 log_err("LMBCS->Uni result %lx should have been %lx \n", |
| 5093 Out[0], pszUnicode[codepointCount]); |
| 5094 } |
| 5095 |
| 5096 pOut = Out; /* reset for accumulating next code point */ |
| 5097 codepointCount++; |
| 5098 } |
| 5099 } |
| 5100 else |
| 5101 { |
| 5102 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorC
ode)); |
| 5103 } |
| 5104 } |
| 5105 { |
| 5106 /* limits & surrogate error testing */ |
| 5107 char LIn [sizeof(pszLMBCS)]; |
| 5108 const char * pLIn = LIn; |
| 5109 |
| 5110 char LOut [sizeof(pszLMBCS)]; |
| 5111 char * pLOut = LOut; |
| 5112 |
| 5113 UChar UOut [sizeof(pszUnicode)]; |
| 5114 UChar * pUOut = UOut; |
| 5115 |
| 5116 UChar UIn [sizeof(pszUnicode)]; |
| 5117 const UChar * pUIn = UIn; |
| 5118 |
| 5119 int32_t off [sizeof(offsets)]; |
| 5120 UChar32 uniChar; |
| 5121 |
| 5122 errorCode=U_ZERO_ERROR; |
| 5123 |
| 5124 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERRO
R */ |
| 5125 pUIn++; |
| 5126 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &erro
rCode); |
| 5127 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) |
| 5128 { |
| 5129 log_err("Unexpected Error on negative source request to ucnv_fromUni
code: %s\n", u_errorName(errorCode)); |
| 5130 } |
| 5131 pUIn--; |
| 5132 |
| 5133 errorCode=U_ZERO_ERROR; |
| 5134 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(
pLIn-1),off,FALSE, &errorCode); |
| 5135 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) |
| 5136 { |
| 5137 log_err("Unexpected Error on negative source request to ucnv_toUnico
de: %s\n", u_errorName(errorCode)); |
| 5138 } |
| 5139 errorCode=U_ZERO_ERROR; |
| 5140 |
| 5141 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(p
LIn-1), &errorCode); |
| 5142 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) |
| 5143 { |
| 5144 log_err("Unexpected Error on negative source request to ucnv_getNext
UChar: %s\n", u_errorName(errorCode)); |
| 5145 } |
| 5146 errorCode=U_ZERO_ERROR; |
| 5147 |
| 5148 /* 0 byte source request - no error, no pointer movement */ |
| 5149 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)p
LIn,off,FALSE, &errorCode); |
| 5150 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode); |
| 5151 if(U_FAILURE(errorCode)) { |
| 5152 log_err("0 byte source request: unexpected error: %s\n", u_errorName
(errorCode)); |
| 5153 } |
| 5154 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn
)) |
| 5155 { |
| 5156 log_err("Unexpected pointer move in 0 byte source request \n"); |
| 5157 } |
| 5158 /*0 byte source request - GetNextUChar : error & value == fffe or ffff
*/ |
| 5159 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pL
In, &errorCode); |
| 5160 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR) |
| 5161 { |
| 5162 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUC
har: %s\n", u_errorName(errorCode)); |
| 5163 } |
| 5164 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */ |
| 5165 { |
| 5166 log_err("Unexpected value on 0-byte source request to ucnv_getnextUC
har \n"); |
| 5167 } |
| 5168 errorCode = U_ZERO_ERROR; |
| 5169 |
| 5170 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */ |
| 5171 |
| 5172 pUIn = pszUnicode; |
| 5173 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnic
ode)/sizeof(UChar),off,FALSE, &errorCode); |
| 5174 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4]
|| pUIn != pszUnicode+4 ) |
| 5175 { |
| 5176 log_err("Unexpected results on out of target room to ucnv_fromUnicod
e\n"); |
| 5177 } |
| 5178 |
| 5179 errorCode = U_ZERO_ERROR; |
| 5180 |
| 5181 pLIn = (const char *)pszLMBCS; |
| 5182 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FA
LSE, &errorCode); |
| 5183 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn !
= (const char *)pszLMBCS+offsets[4]) |
| 5184 { |
| 5185 log_err("Unexpected results on out of target room to ucnv_toUnicode\
n"); |
| 5186 } |
| 5187 |
| 5188 /* unpaired or chopped LMBCS surrogates */ |
| 5189 |
| 5190 /* OK high surrogate, Low surrogate is chopped */ |
| 5191 LIn [0] = (char)0x14; |
| 5192 LIn [1] = (char)0xD8; |
| 5193 LIn [2] = (char)0x01; |
| 5194 LIn [3] = (char)0x14; |
| 5195 LIn [4] = (char)0xDC; |
| 5196 pLIn = LIn; |
| 5197 errorCode = U_ZERO_ERROR; |
| 5198 pUOut = UOut; |
| 5199 |
| 5200 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &er
rorCode); |
| 5201 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char
**)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); |
| 5202 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut !
= UOut + 1 || pLIn != LIn + 5) |
| 5203 { |
| 5204 log_err("Unexpected results on chopped low surrogate\n"); |
| 5205 } |
| 5206 |
| 5207 /* chopped at surrogate boundary */ |
| 5208 LIn [0] = (char)0x14; |
| 5209 LIn [1] = (char)0xD8; |
| 5210 LIn [2] = (char)0x01; |
| 5211 pLIn = LIn; |
| 5212 errorCode = U_ZERO_ERROR; |
| 5213 pUOut = UOut; |
| 5214 |
| 5215 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char
**)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode); |
| 5216 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || p
LIn != LIn + 3) |
| 5217 { |
| 5218 log_err("Unexpected results on chopped at surrogate boundary \n"); |
| 5219 } |
| 5220 |
| 5221 /* unpaired surrogate plus valid Unichar */ |
| 5222 LIn [0] = (char)0x14; |
| 5223 LIn [1] = (char)0xD8; |
| 5224 LIn [2] = (char)0x01; |
| 5225 LIn [3] = (char)0x14; |
| 5226 LIn [4] = (char)0xC9; |
| 5227 LIn [5] = (char)0xD0; |
| 5228 pLIn = LIn; |
| 5229 errorCode = U_ZERO_ERROR; |
| 5230 pUOut = UOut; |
| 5231 |
| 5232 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char
**)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode); |
| 5233 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || p
UOut != UOut + 2 || pLIn != LIn + 6) |
| 5234 { |
| 5235 log_err("Unexpected results after unpaired surrogate plus valid Unic
har \n"); |
| 5236 } |
| 5237 |
| 5238 /* unpaired surrogate plus chopped Unichar */ |
| 5239 LIn [0] = (char)0x14; |
| 5240 LIn [1] = (char)0xD8; |
| 5241 LIn [2] = (char)0x01; |
| 5242 LIn [3] = (char)0x14; |
| 5243 LIn [4] = (char)0xC9; |
| 5244 |
| 5245 pLIn = LIn; |
| 5246 errorCode = U_ZERO_ERROR; |
| 5247 pUOut = UOut; |
| 5248 |
| 5249 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char
**)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); |
| 5250 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut !
= UOut + 1 || pLIn != LIn + 5) |
| 5251 { |
| 5252 log_err("Unexpected results after unpaired surrogate plus chopped Un
ichar \n"); |
| 5253 } |
| 5254 |
| 5255 /* unpaired surrogate plus valid non-Unichar */ |
| 5256 LIn [0] = (char)0x14; |
| 5257 LIn [1] = (char)0xD8; |
| 5258 LIn [2] = (char)0x01; |
| 5259 LIn [3] = (char)0x0F; |
| 5260 LIn [4] = (char)0x3B; |
| 5261 |
| 5262 pLIn = LIn; |
| 5263 errorCode = U_ZERO_ERROR; |
| 5264 pUOut = UOut; |
| 5265 |
| 5266 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char
**)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); |
| 5267 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUO
ut != UOut + 2 || pLIn != LIn + 5) |
| 5268 { |
| 5269 log_err("Unexpected results after unpaired surrogate plus valid non-
Unichar\n"); |
| 5270 } |
| 5271 |
| 5272 /* unpaired surrogate plus chopped non-Unichar */ |
| 5273 LIn [0] = (char)0x14; |
| 5274 LIn [1] = (char)0xD8; |
| 5275 LIn [2] = (char)0x01; |
| 5276 LIn [3] = (char)0x0F; |
| 5277 |
| 5278 pLIn = LIn; |
| 5279 errorCode = U_ZERO_ERROR; |
| 5280 pUOut = UOut; |
| 5281 |
| 5282 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char
**)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode); |
| 5283 |
| 5284 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut !
= UOut + 1 || pLIn != LIn + 4) |
| 5285 { |
| 5286 log_err("Unexpected results after unpaired surrogate plus chopped no
n-Unichar\n"); |
| 5287 } |
| 5288 } |
| 5289 } |
| 5290 ucnv_close(cnv); /* final cleanup */ |
| 5291 } |
| 5292 |
| 5293 |
| 5294 static void TestJitterbug255() |
| 5295 { |
| 5296 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x0
0 }; |
| 5297 const char *testBuffer = (const char *)testBytes; |
| 5298 const char *testEnd = (const char *)testBytes + sizeof(testBytes); |
| 5299 UErrorCode status = U_ZERO_ERROR; |
| 5300 /*UChar32 result;*/ |
| 5301 UConverter *cnv = 0; |
| 5302 |
| 5303 cnv = ucnv_open("shift-jis", &status); |
| 5304 if (U_FAILURE(status) || cnv == 0) { |
| 5305 log_data_err("Failed to open the converter for SJIS.\n"); |
| 5306 return; |
| 5307 } |
| 5308 while (testBuffer != testEnd) |
| 5309 { |
| 5310 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status); |
| 5311 if (U_FAILURE(status)) |
| 5312 { |
| 5313 log_err("Failed to convert the next UChar for SJIS.\n"); |
| 5314 break; |
| 5315 } |
| 5316 } |
| 5317 ucnv_close(cnv); |
| 5318 } |
| 5319 |
| 5320 static void TestEBCDICUS4XML() |
| 5321 { |
| 5322 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000}; |
| 5323 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000}; |
| 5324 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00}; |
| 5325 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00}; |
| 5326 char target_x[] = {0x00, 0x00, 0x00, 0x00}; |
| 5327 UChar *unicodes = unicodes_x; |
| 5328 const UChar *toUnicodeMaps = toUnicodeMaps_x; |
| 5329 char *target = target_x; |
| 5330 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x; |
| 5331 UErrorCode status = U_ZERO_ERROR; |
| 5332 UConverter *cnv = 0; |
| 5333 |
| 5334 cnv = ucnv_open("ebcdic-xml-us", &status); |
| 5335 if (U_FAILURE(status) || cnv == 0) { |
| 5336 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n"); |
| 5337 return; |
| 5338 } |
| 5339 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines
+3, NULL, TRUE, &status); |
| 5340 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3)
!= 0) { |
| 5341 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n", |
| 5342 u_errorName(status)); |
| 5343 printUSeqErr(unicodes_x, 3); |
| 5344 printUSeqErr(toUnicodeMaps, 3); |
| 5345 } |
| 5346 status = U_ZERO_ERROR; |
| 5347 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUn
icodeMaps+3, NULL, TRUE, &status); |
| 5348 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) !
= 0) { |
| 5349 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n", |
| 5350 u_errorName(status)); |
| 5351 printSeqErr((const unsigned char*)target_x, 3); |
| 5352 printSeqErr((const unsigned char*)fromUnicodeMaps, 3); |
| 5353 } |
| 5354 ucnv_close(cnv); |
| 5355 } |
| 5356 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */ |
| 5357 |
| 5358 #if !UCONFIG_NO_COLLATION |
| 5359 |
| 5360 static void TestJitterbug981(){ |
| 5361 const UChar* rules; |
| 5362 int32_t rules_length, target_cap, bytes_needed, buff_size; |
| 5363 UErrorCode status = U_ZERO_ERROR; |
| 5364 UConverter *utf8cnv; |
| 5365 UCollator* myCollator; |
| 5366 char *buff; |
| 5367 int numNeeded=0; |
| 5368 utf8cnv = ucnv_open ("utf8", &status); |
| 5369 if(U_FAILURE(status)){ |
| 5370 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(statu
s)); |
| 5371 return; |
| 5372 } |
| 5373 myCollator = ucol_open("zh", &status); |
| 5374 if(U_FAILURE(status)){ |
| 5375 log_data_err("Could not open collator for zh locale. Error: %s\n", u_err
orName(status)); |
| 5376 ucnv_close(utf8cnv); |
| 5377 return; |
| 5378 } |
| 5379 |
| 5380 rules = ucol_getRules(myCollator, &rules_length); |
| 5381 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv); |
| 5382 buff = malloc(buff_size); |
| 5383 |
| 5384 target_cap = 0; |
| 5385 do { |
| 5386 ucnv_reset(utf8cnv); |
| 5387 status = U_ZERO_ERROR; |
| 5388 if(target_cap >= buff_size) { |
| 5389 log_err("wanted %d bytes, only %d available\n", target_cap, buff_siz
e); |
| 5390 break; |
| 5391 } |
| 5392 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap, |
| 5393 rules, rules_length, &status); |
| 5394 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; |
| 5395 if(numNeeded!=0 && numNeeded!= bytes_needed){ |
| 5396 log_err("ucnv_fromUChars returns different values for required capac
ity in pre-flight and conversion modes"); |
| 5397 break; |
| 5398 } |
| 5399 numNeeded = bytes_needed; |
| 5400 } while (status == U_BUFFER_OVERFLOW_ERROR); |
| 5401 ucol_close(myCollator); |
| 5402 ucnv_close(utf8cnv); |
| 5403 free(buff); |
| 5404 } |
| 5405 |
| 5406 #endif |
| 5407 |
| 5408 static void TestJitterbug1293(){ |
| 5409 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4,
0x30D7,0x000}; |
| 5410 char target[256]; |
| 5411 UErrorCode status = U_ZERO_ERROR; |
| 5412 UConverter* conv=NULL; |
| 5413 int32_t target_cap, bytes_needed, numNeeded = 0; |
| 5414 conv = ucnv_open("shift-jis",&status); |
| 5415 if(U_FAILURE(status)){ |
| 5416 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(
status)); |
| 5417 return; |
| 5418 } |
| 5419 |
| 5420 do{ |
| 5421 target_cap =0; |
| 5422 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status
); |
| 5423 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; |
| 5424 if(numNeeded!=0 && numNeeded!= bytes_needed){ |
| 5425 log_err("ucnv_fromUChars returns different values for required capacit
y in pre-flight and conversion modes"); |
| 5426 } |
| 5427 numNeeded = bytes_needed; |
| 5428 } while (status == U_BUFFER_OVERFLOW_ERROR); |
| 5429 if(U_FAILURE(status)){ |
| 5430 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(stat
us)); |
| 5431 return; |
| 5432 } |
| 5433 ucnv_close(conv); |
| 5434 } |
| 5435 static void TestJB5275_1(){ |
| 5436 |
| 5437 static const char* data = "\x3B\xB3\x0A" /* Easy characters */ |
| 5438 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test
*/ |
| 5439 /* Switch script: */ |
| 5440 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengal
i test */ |
| 5441 "\x3B\xB3\x0A" /* Easy characters - new line, so
should default!*/ |
| 5442 "\xEF\x40\x3B\xB3\x0A"; |
| 5443 static const UChar expected[] ={ |
| 5444 0x003b, 0x0a15, 0x000a, /* Easy characters */ |
| 5445 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi
test */ |
| 5446 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali
*/ |
| 5447 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should def
ault!*/ |
| 5448 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/ |
| 5449 }; |
| 5450 |
| 5451 UErrorCode status = U_ZERO_ERROR; |
| 5452 UConverter* conv = ucnv_open("iscii-gur", &status); |
| 5453 UChar dest[100] = {'\0'}; |
| 5454 UChar* target = dest; |
| 5455 UChar* targetLimit = dest+100; |
| 5456 const char* source = data; |
| 5457 const char* sourceLimit = data+strlen(data); |
| 5458 const UChar* exp = expected; |
| 5459 |
| 5460 if (U_FAILURE(status)) { |
| 5461 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n",
u_errorName(status)); |
| 5462 return; |
| 5463 } |
| 5464 |
| 5465 log_verbose("Testing switching back to default script when new line is encou
ntered.\n"); |
| 5466 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE,
&status); |
| 5467 if(U_FAILURE(status)){ |
| 5468 log_err("conversion failed: %s \n", u_errorName(status)); |
| 5469 } |
| 5470 targetLimit = target; |
| 5471 target = dest; |
| 5472 printUSeq(target, targetLimit-target); |
| 5473 while(target<targetLimit){ |
| 5474 if(*exp!=*target){ |
| 5475 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n
", *exp, *target); |
| 5476 } |
| 5477 target++; |
| 5478 exp++; |
| 5479 } |
| 5480 ucnv_close(conv); |
| 5481 } |
| 5482 |
| 5483 static void TestJB5275(){ |
| 5484 static const char* data = |
| 5485 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41
*/ |
| 5486 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41
*/ |
| 5487 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsuppor
ted sequence \xEF\x41 */ |
| 5488 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ |
| 5489 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */ |
| 5490 "\xEF\x48\x38\xB3\x0A" /* Kannada test */ |
| 5491 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */ |
| 5492 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */ |
| 5493 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */ |
| 5494 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */; |
| 5495 static const UChar expected[] ={ |
| 5496 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test
*/ |
| 5497 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati
test */ |
| 5498 0x0038, 0x0C95, 0x000A, /* Kannada test */ |
| 5499 0x0039, 0x0D15, 0x000A, /* Malayalam test */ |
| 5500 0x003A, 0x0A95, 0x000A, /* Gujarati test */ |
| 5501 0x003B, 0x0A15, 0x000A, /* Punjabi test */ |
| 5502 }; |
| 5503 |
| 5504 UErrorCode status = U_ZERO_ERROR; |
| 5505 UConverter* conv = ucnv_open("iscii", &status); |
| 5506 UChar dest[100] = {'\0'}; |
| 5507 UChar* target = dest; |
| 5508 UChar* targetLimit = dest+100; |
| 5509 const char* source = data; |
| 5510 const char* sourceLimit = data+strlen(data); |
| 5511 const UChar* exp = expected; |
| 5512 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE,
&status); |
| 5513 if(U_FAILURE(status)){ |
| 5514 log_err("conversion failed: %s \n", u_errorName(status)); |
| 5515 } |
| 5516 targetLimit = target; |
| 5517 target = dest; |
| 5518 |
| 5519 printUSeq(target, targetLimit-target); |
| 5520 |
| 5521 while(target<targetLimit){ |
| 5522 if(*exp!=*target){ |
| 5523 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n
", *exp, *target); |
| 5524 } |
| 5525 target++; |
| 5526 exp++; |
| 5527 } |
| 5528 ucnv_close(conv); |
| 5529 } |
OLD | NEW |