| OLD | NEW |
| (Empty) |
| 1 /******************************************************************** | |
| 2 * COPYRIGHT: | |
| 3 * Copyright (c) 1997-2015, International Business Machines Corporation and | |
| 4 * others. All Rights Reserved. | |
| 5 ********************************************************************/ | |
| 6 /******************************************************************************* | |
| 7 * | |
| 8 * File nucnvtst.c | |
| 9 * | |
| 10 * Modification History: | |
| 11 * Name Description | |
| 12 * Steven R. Loomis 7/8/1999 Adding input buffer test | |
| 13 ******************************************************************************** | |
| 14 */ | |
| 15 #include <stdio.h> | |
| 16 #include "cstring.h" | |
| 17 #include "unicode/uloc.h" | |
| 18 #include "unicode/ucnv.h" | |
| 19 #include "unicode/ucnv_err.h" | |
| 20 #include "unicode/ucnv_cb.h" | |
| 21 #include "cintltst.h" | |
| 22 #include "unicode/utypes.h" | |
| 23 #include "unicode/ustring.h" | |
| 24 #include "unicode/ucol.h" | |
| 25 #include "unicode/utf16.h" | |
| 26 #include "cmemory.h" | |
| 27 #include "nucnvtst.h" | |
| 28 | |
| 29 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit
, const int32_t results[], const char* message); | |
| 30 static void TestNextUCharError(UConverter* cnv, const char* source, const char*
limit, UErrorCode expected, const char* message); | |
| 31 #if !UCONFIG_NO_COLLATION | |
| 32 static void TestJitterbug981(void); | |
| 33 #endif | |
| 34 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 35 static void TestJitterbug1293(void); | |
| 36 #endif | |
| 37 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; | |
| 38 static void TestConverterTypesAndStarters(void); | |
| 39 static void TestAmbiguous(void); | |
| 40 static void TestSignatureDetection(void); | |
| 41 static void TestUTF7(void); | |
| 42 static void TestIMAP(void); | |
| 43 static void TestUTF8(void); | |
| 44 static void TestCESU8(void); | |
| 45 static void TestUTF16(void); | |
| 46 static void TestUTF16BE(void); | |
| 47 static void TestUTF16LE(void); | |
| 48 static void TestUTF32(void); | |
| 49 static void TestUTF32BE(void); | |
| 50 static void TestUTF32LE(void); | |
| 51 static void TestLATIN1(void); | |
| 52 | |
| 53 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 54 static void TestSBCS(void); | |
| 55 static void TestDBCS(void); | |
| 56 static void TestMBCS(void); | |
| 57 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO | |
| 58 static void TestICCRunout(void); | |
| 59 #endif | |
| 60 | |
| 61 #ifdef U_ENABLE_GENERIC_ISO_2022 | |
| 62 static void TestISO_2022(void); | |
| 63 #endif | |
| 64 | |
| 65 static void TestISO_2022_JP(void); | |
| 66 static void TestISO_2022_JP_1(void); | |
| 67 static void TestISO_2022_JP_2(void); | |
| 68 static void TestISO_2022_KR(void); | |
| 69 static void TestISO_2022_KR_1(void); | |
| 70 static void TestISO_2022_CN(void); | |
| 71 #if 0 | |
| 72 /* | |
| 73 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 | |
| 74 */ | |
| 75 static void TestISO_2022_CN_EXT(void); | |
| 76 #endif | |
| 77 static void TestJIS(void); | |
| 78 static void TestHZ(void); | |
| 79 #endif | |
| 80 | |
| 81 static void TestSCSU(void); | |
| 82 | |
| 83 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 84 static void TestEBCDIC_STATEFUL(void); | |
| 85 static void TestGB18030(void); | |
| 86 static void TestLMBCS(void); | |
| 87 static void TestJitterbug255(void); | |
| 88 static void TestEBCDICUS4XML(void); | |
| 89 #if 0 | |
| 90 /* | |
| 91 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 | |
| 92 */ | |
| 93 static void TestJitterbug915(void); | |
| 94 #endif | |
| 95 static void TestISCII(void); | |
| 96 | |
| 97 static void TestCoverageMBCS(void); | |
| 98 static void TestJitterbug2346(void); | |
| 99 static void TestJitterbug2411(void); | |
| 100 static void TestJB5275(void); | |
| 101 static void TestJB5275_1(void); | |
| 102 static void TestJitterbug6175(void); | |
| 103 | |
| 104 static void TestIsFixedWidth(void); | |
| 105 #endif | |
| 106 | |
| 107 static void TestInBufSizes(void); | |
| 108 | |
| 109 static void TestRoundTrippingAllUTF(void); | |
| 110 static void TestConv(const uint16_t in[], | |
| 111 int len, | |
| 112 const char* conv, | |
| 113 const char* lang, | |
| 114 char byteArr[], | |
| 115 int byteArrLen); | |
| 116 | |
| 117 /* open a converter, using test data if it begins with '@' */ | |
| 118 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err); | |
| 119 | |
| 120 | |
| 121 #define NEW_MAX_BUFFER 999 | |
| 122 | |
| 123 static int32_t gInBufferSize = NEW_MAX_BUFFER; | |
| 124 static int32_t gOutBufferSize = NEW_MAX_BUFFER; | |
| 125 static char gNuConvTestName[1024]; | |
| 126 | |
| 127 #define nct_min(x,y) ((x<y) ? x : y) | |
| 128 | |
| 129 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err) | |
| 130 { | |
| 131 if(cnv && cnv[0] == '@') { | |
| 132 return ucnv_openPackage(loadTestData(err), cnv+1, err); | |
| 133 } else { | |
| 134 return ucnv_open(cnv, err); | |
| 135 } | |
| 136 } | |
| 137 | |
| 138 static void printSeq(const unsigned char* a, int len) | |
| 139 { | |
| 140 int i=0; | |
| 141 log_verbose("{"); | |
| 142 while (i<len) | |
| 143 log_verbose("0x%02x ", a[i++]); | |
| 144 log_verbose("}\n"); | |
| 145 } | |
| 146 | |
| 147 static void printUSeq(const UChar* a, int len) | |
| 148 { | |
| 149 int i=0; | |
| 150 log_verbose("{U+"); | |
| 151 while (i<len) log_verbose("0x%04x ", a[i++]); | |
| 152 log_verbose("}\n"); | |
| 153 } | |
| 154 | |
| 155 static void printSeqErr(const unsigned char* a, int len) | |
| 156 { | |
| 157 int i=0; | |
| 158 fprintf(stderr, "{"); | |
| 159 while (i<len) | |
| 160 fprintf(stderr, "0x%02x ", a[i++]); | |
| 161 fprintf(stderr, "}\n"); | |
| 162 } | |
| 163 | |
| 164 static void printUSeqErr(const UChar* a, int len) | |
| 165 { | |
| 166 int i=0; | |
| 167 fprintf(stderr, "{U+"); | |
| 168 while (i<len) | |
| 169 fprintf(stderr, "0x%04x ", a[i++]); | |
| 170 fprintf(stderr,"}\n"); | |
| 171 } | |
| 172 | |
| 173 static void | |
| 174 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int3
2_t results[], const char* message) | |
| 175 { | |
| 176 const char* s0; | |
| 177 const char* s=(char*)source; | |
| 178 const int32_t *r=results; | |
| 179 UErrorCode errorCode=U_ZERO_ERROR; | |
| 180 UChar32 c; | |
| 181 | |
| 182 while(s<limit) { | |
| 183 s0=s; | |
| 184 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); | |
| 185 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { | |
| 186 break; /* no more significant input */ | |
| 187 } else if(U_FAILURE(errorCode)) { | |
| 188 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(
errorCode)); | |
| 189 break; | |
| 190 } else if( | |
| 191 /* test the expected number of input bytes only if >=0 */ | |
| 192 (*r>=0 && (int32_t)(s-s0)!=*r) || | |
| 193 c!=*(r+1) | |
| 194 ) { | |
| 195 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should hav
e been %lx from %d bytes.\n", | |
| 196 message, c, (s-s0), *(r+1), *r); | |
| 197 break; | |
| 198 } | |
| 199 r+=2; | |
| 200 } | |
| 201 } | |
| 202 | |
| 203 static void | |
| 204 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErro
rCode expected, const char* message) | |
| 205 { | |
| 206 const char* s=(char*)source; | |
| 207 UErrorCode errorCode=U_ZERO_ERROR; | |
| 208 uint32_t c; | |
| 209 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); | |
| 210 if(errorCode != expected){ | |
| 211 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected),
message, myErrorName(errorCode)); | |
| 212 } | |
| 213 if(c != 0xFFFD && c != 0xffff){ | |
| 214 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got
0x%lx\n", message, c); | |
| 215 } | |
| 216 | |
| 217 } | |
| 218 | |
| 219 static void TestInBufSizes(void) | |
| 220 { | |
| 221 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1); | |
| 222 #if 1 | |
| 223 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2); | |
| 224 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3); | |
| 225 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4); | |
| 226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5); | |
| 227 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6); | |
| 228 TestNewConvertWithBufferSizes(1,1); | |
| 229 TestNewConvertWithBufferSizes(2,3); | |
| 230 TestNewConvertWithBufferSizes(3,2); | |
| 231 #endif | |
| 232 } | |
| 233 | |
| 234 static void TestOutBufSizes(void) | |
| 235 { | |
| 236 #if 1 | |
| 237 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER); | |
| 238 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER); | |
| 239 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER); | |
| 240 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER); | |
| 241 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER); | |
| 242 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER); | |
| 243 | |
| 244 #endif | |
| 245 } | |
| 246 | |
| 247 | |
| 248 void addTestNewConvert(TestNode** root) | |
| 249 { | |
| 250 #if !UCONFIG_NO_FILE_IO | |
| 251 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes"); | |
| 252 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes"); | |
| 253 #endif | |
| 254 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterT
ypesAndStarters"); | |
| 255 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous"); | |
| 256 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetectio
n"); | |
| 257 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7"); | |
| 258 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP"); | |
| 259 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8"); | |
| 260 | |
| 261 /* test ucnv_getNextUChar() for charsets that encode single surrogates with c
omplete byte sequences */ | |
| 262 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8"); | |
| 263 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16"); | |
| 264 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE"); | |
| 265 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE"); | |
| 266 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32"); | |
| 267 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE"); | |
| 268 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE"); | |
| 269 | |
| 270 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 271 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS"); | |
| 272 #endif | |
| 273 | |
| 274 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1"); | |
| 275 | |
| 276 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 277 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS"); | |
| 278 #if !UCONFIG_NO_FILE_IO | |
| 279 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS"); | |
| 280 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout"); | |
| 281 #endif | |
| 282 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS"); | |
| 283 | |
| 284 #ifdef U_ENABLE_GENERIC_ISO_2022 | |
| 285 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022"); | |
| 286 #endif | |
| 287 | |
| 288 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP"); | |
| 289 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS"); | |
| 290 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1"); | |
| 291 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2"); | |
| 292 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR"); | |
| 293 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1"); | |
| 294 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN"); | |
| 295 /* | |
| 296 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 | |
| 297 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT"); | |
| 298 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915"); | |
| 299 */ | |
| 300 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ"); | |
| 301 #endif | |
| 302 | |
| 303 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU"); | |
| 304 | |
| 305 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 306 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL"); | |
| 307 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030"); | |
| 308 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255"); | |
| 309 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML"); | |
| 310 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII"); | |
| 311 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275"); | |
| 312 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1"); | |
| 313 #if !UCONFIG_NO_COLLATION | |
| 314 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981"); | |
| 315 #endif | |
| 316 | |
| 317 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293"); | |
| 318 #endif | |
| 319 | |
| 320 | |
| 321 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO | |
| 322 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS"); | |
| 323 #endif | |
| 324 | |
| 325 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAll
UTF"); | |
| 326 | |
| 327 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 328 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346"); | |
| 329 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411"); | |
| 330 addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175"); | |
| 331 | |
| 332 addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth"); | |
| 333 #endif | |
| 334 } | |
| 335 | |
| 336 | |
| 337 /* Note that this test already makes use of statics, so it's not really | |
| 338 multithread safe. | |
| 339 This convenience function lets us make the error messages actually useful. | |
| 340 */ | |
| 341 | |
| 342 static void setNuConvTestName(const char *codepage, const char *direction) | |
| 343 { | |
| 344 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufS
iz=%d]", | |
| 345 codepage, | |
| 346 direction, | |
| 347 (int)gInBufferSize, | |
| 348 (int)gOutBufferSize); | |
| 349 } | |
| 350 | |
| 351 typedef enum | |
| 352 { | |
| 353 TC_OK = 0, /* test was OK */ | |
| 354 TC_MISMATCH = 1, /* Match failed - err was printed */ | |
| 355 TC_FAIL = 2 /* Test failed, don't print an err because it was already pr
inted. */ | |
| 356 } ETestConvertResult; | |
| 357 | |
| 358 /* Note: This function uses global variables and it will not do offset | |
| 359 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ | |
| 360 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,
const uint8_t *expect, int expectLen, | |
| 361 const char *codepage, const int32_t *expectOffsets , UBool useFa
llback) | |
| 362 { | |
| 363 UErrorCode status = U_ZERO_ERROR; | |
| 364 UConverter *conv = 0; | |
| 365 char junkout[NEW_MAX_BUFFER]; /* FIX */ | |
| 366 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
| 367 char *p; | |
| 368 const UChar *src; | |
| 369 char *end; | |
| 370 char *targ; | |
| 371 int32_t *offs; | |
| 372 int i; | |
| 373 int32_t realBufferSize; | |
| 374 char *realBufferEnd; | |
| 375 const UChar *realSourceEnd; | |
| 376 const UChar *sourceLimit; | |
| 377 UBool checkOffsets = TRUE; | |
| 378 UBool doFlush; | |
| 379 | |
| 380 for(i=0;i<NEW_MAX_BUFFER;i++) | |
| 381 junkout[i] = (char)0xF0; | |
| 382 for(i=0;i<NEW_MAX_BUFFER;i++) | |
| 383 junokout[i] = 0xFF; | |
| 384 | |
| 385 setNuConvTestName(codepage, "FROM"); | |
| 386 | |
| 387 log_verbose("\n========= %s\n", gNuConvTestName); | |
| 388 | |
| 389 conv = my_ucnv_open(codepage, &status); | |
| 390 | |
| 391 if(U_FAILURE(status)) | |
| 392 { | |
| 393 log_data_err("Couldn't open converter %s\n",codepage); | |
| 394 return TC_FAIL; | |
| 395 } | |
| 396 if(useFallback){ | |
| 397 ucnv_setFallback(conv,useFallback); | |
| 398 } | |
| 399 | |
| 400 log_verbose("Converter opened..\n"); | |
| 401 | |
| 402 src = source; | |
| 403 targ = junkout; | |
| 404 offs = junokout; | |
| 405 | |
| 406 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
| 407 realBufferEnd = junkout + realBufferSize; | |
| 408 realSourceEnd = source + sourceLen; | |
| 409 | |
| 410 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) | |
| 411 checkOffsets = FALSE; | |
| 412 | |
| 413 do | |
| 414 { | |
| 415 end = nct_min(targ + gOutBufferSize, realBufferEnd); | |
| 416 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); | |
| 417 | |
| 418 doFlush = (UBool)(sourceLimit == realSourceEnd); | |
| 419 | |
| 420 if(targ == realBufferEnd) { | |
| 421 log_err("Error, overflowed the real buffer while about to call fromUnico
de! targ=%08lx %s", targ, gNuConvTestName); | |
| 422 return TC_FAIL; | |
| 423 } | |
| 424 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to
%08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); | |
| 425 | |
| 426 | |
| 427 status = U_ZERO_ERROR; | |
| 428 | |
| 429 ucnv_fromUnicode (conv, | |
| 430 &targ, | |
| 431 end, | |
| 432 &src, | |
| 433 sourceLimit, | |
| 434 checkOffsets ? offs : NULL, | |
| 435 doFlush, /* flush if we're at the end of the input data
*/ | |
| 436 &status); | |
| 437 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourc
eLimit < realSourceEnd) ); | |
| 438 | |
| 439 if(U_FAILURE(status)) { | |
| 440 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myEr
rorName(status), gNuConvTestName); | |
| 441 return TC_FAIL; | |
| 442 } | |
| 443 | |
| 444 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", | |
| 445 sourceLen, targ-junkout); | |
| 446 | |
| 447 if(getTestOption(VERBOSITY_OPTION)) | |
| 448 { | |
| 449 char junk[9999]; | |
| 450 char offset_str[9999]; | |
| 451 char *ptr; | |
| 452 | |
| 453 junk[0] = 0; | |
| 454 offset_str[0] = 0; | |
| 455 for(ptr = junkout;ptr<targ;ptr++) { | |
| 456 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr)); | |
| 457 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junoko
ut[ptr-junkout])); | |
| 458 } | |
| 459 | |
| 460 log_verbose(junk); | |
| 461 printSeq((const uint8_t *)expect, expectLen); | |
| 462 if ( checkOffsets ) { | |
| 463 log_verbose("\nOffsets:"); | |
| 464 log_verbose(offset_str); | |
| 465 } | |
| 466 log_verbose("\n"); | |
| 467 } | |
| 468 ucnv_close(conv); | |
| 469 | |
| 470 if(expectLen != targ-junkout) { | |
| 471 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNu
ConvTestName); | |
| 472 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout,
gNuConvTestName); | |
| 473 fprintf(stderr, "Got:\n"); | |
| 474 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); | |
| 475 fprintf(stderr, "Expected:\n"); | |
| 476 printSeqErr((const unsigned char*)expect, expectLen); | |
| 477 return TC_MISMATCH; | |
| 478 } | |
| 479 | |
| 480 if (checkOffsets && (expectOffsets != 0) ) { | |
| 481 log_verbose("comparing %d offsets..\n", targ-junkout); | |
| 482 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ | |
| 483 log_err("did not get the expected offsets. %s\n", gNuConvTestName); | |
| 484 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); | |
| 485 log_err("\n"); | |
| 486 log_err("Got : "); | |
| 487 for(p=junkout;p<targ;p++) { | |
| 488 log_err("%d,", junokout[p-junkout]); | |
| 489 } | |
| 490 log_err("\n"); | |
| 491 log_err("Expected: "); | |
| 492 for(i=0; i<(targ-junkout); i++) { | |
| 493 log_err("%d,", expectOffsets[i]); | |
| 494 } | |
| 495 log_err("\n"); | |
| 496 } | |
| 497 } | |
| 498 | |
| 499 log_verbose("comparing..\n"); | |
| 500 if(!memcmp(junkout, expect, expectLen)) { | |
| 501 log_verbose("Matches!\n"); | |
| 502 return TC_OK; | |
| 503 } else { | |
| 504 log_err("String does not match u->%s\n", gNuConvTestName); | |
| 505 printUSeqErr(source, sourceLen); | |
| 506 fprintf(stderr, "Got:\n"); | |
| 507 printSeqErr((const unsigned char *)junkout, expectLen); | |
| 508 fprintf(stderr, "Expected:\n"); | |
| 509 printSeqErr((const unsigned char *)expect, expectLen); | |
| 510 | |
| 511 return TC_MISMATCH; | |
| 512 } | |
| 513 } | |
| 514 | |
| 515 /* Note: This function uses global variables and it will not do offset | |
| 516 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ | |
| 517 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen,
const UChar *expect, int expectlen, | |
| 518 const char *codepage, const int32_t *e
xpectOffsets, UBool useFallback) | |
| 519 { | |
| 520 UErrorCode status = U_ZERO_ERROR; | |
| 521 UConverter *conv = 0; | |
| 522 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ | |
| 523 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
| 524 const char *src; | |
| 525 const char *realSourceEnd; | |
| 526 const char *srcLimit; | |
| 527 UChar *p; | |
| 528 UChar *targ; | |
| 529 UChar *end; | |
| 530 int32_t *offs; | |
| 531 int i; | |
| 532 UBool checkOffsets = TRUE; | |
| 533 | |
| 534 int32_t realBufferSize; | |
| 535 UChar *realBufferEnd; | |
| 536 | |
| 537 | |
| 538 for(i=0;i<NEW_MAX_BUFFER;i++) | |
| 539 junkout[i] = 0xFFFE; | |
| 540 | |
| 541 for(i=0;i<NEW_MAX_BUFFER;i++) | |
| 542 junokout[i] = -1; | |
| 543 | |
| 544 setNuConvTestName(codepage, "TO"); | |
| 545 | |
| 546 log_verbose("\n========= %s\n", gNuConvTestName); | |
| 547 | |
| 548 conv = my_ucnv_open(codepage, &status); | |
| 549 | |
| 550 if(U_FAILURE(status)) | |
| 551 { | |
| 552 log_data_err("Couldn't open converter %s\n",gNuConvTestName); | |
| 553 return TC_FAIL; | |
| 554 } | |
| 555 if(useFallback){ | |
| 556 ucnv_setFallback(conv,useFallback); | |
| 557 } | |
| 558 log_verbose("Converter opened..\n"); | |
| 559 | |
| 560 src = (const char *)source; | |
| 561 targ = junkout; | |
| 562 offs = junokout; | |
| 563 | |
| 564 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
| 565 realBufferEnd = junkout + realBufferSize; | |
| 566 realSourceEnd = src + sourcelen; | |
| 567 | |
| 568 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) | |
| 569 checkOffsets = FALSE; | |
| 570 | |
| 571 do | |
| 572 { | |
| 573 end = nct_min( targ + gOutBufferSize, realBufferEnd); | |
| 574 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); | |
| 575 | |
| 576 if(targ == realBufferEnd) | |
| 577 { | |
| 578 log_err("Error, the end would overflow the real output buffer while
about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName); | |
| 579 return TC_FAIL; | |
| 580 } | |
| 581 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); | |
| 582 | |
| 583 /* oldTarg = targ; */ | |
| 584 | |
| 585 status = U_ZERO_ERROR; | |
| 586 | |
| 587 ucnv_toUnicode (conv, | |
| 588 &targ, | |
| 589 end, | |
| 590 &src, | |
| 591 srcLimit, | |
| 592 checkOffsets ? offs : NULL, | |
| 593 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end
of hte source data */ | |
| 594 &status); | |
| 595 | |
| 596 /* offs += (targ-oldTarg); */ | |
| 597 | |
| 598 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sr
cLimit < realSourceEnd)) ); /* while we just need another buffer */ | |
| 599 | |
| 600 if(U_FAILURE(status)) | |
| 601 { | |
| 602 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myError
Name(status), gNuConvTestName); | |
| 603 return TC_FAIL; | |
| 604 } | |
| 605 | |
| 606 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", | |
| 607 sourcelen, targ-junkout); | |
| 608 if(getTestOption(VERBOSITY_OPTION)) | |
| 609 { | |
| 610 char junk[9999]; | |
| 611 char offset_str[9999]; | |
| 612 UChar *ptr; | |
| 613 | |
| 614 junk[0] = 0; | |
| 615 offset_str[0] = 0; | |
| 616 | |
| 617 for(ptr = junkout;ptr<targ;ptr++) | |
| 618 { | |
| 619 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p
tr); | |
| 620 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (uns
igned int)junokout[ptr-junkout]); | |
| 621 } | |
| 622 | |
| 623 log_verbose(junk); | |
| 624 printUSeq(expect, expectlen); | |
| 625 if ( checkOffsets ) | |
| 626 { | |
| 627 log_verbose("\nOffsets:"); | |
| 628 log_verbose(offset_str); | |
| 629 } | |
| 630 log_verbose("\n"); | |
| 631 } | |
| 632 ucnv_close(conv); | |
| 633 | |
| 634 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); | |
| 635 | |
| 636 if (checkOffsets && (expectOffsets != 0)) | |
| 637 { | |
| 638 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ | |
| 639 log_err("did not get the expected offsets. %s\n",gNuConvTestName); | |
| 640 log_err("Got: "); | |
| 641 for(p=junkout;p<targ;p++) { | |
| 642 log_err("%d,", junokout[p-junkout]); | |
| 643 } | |
| 644 log_err("\n"); | |
| 645 log_err("Expected: "); | |
| 646 for(i=0; i<(targ-junkout); i++) { | |
| 647 log_err("%d,", expectOffsets[i]); | |
| 648 } | |
| 649 log_err("\n"); | |
| 650 log_err("output: "); | |
| 651 for(i=0; i<(targ-junkout); i++) { | |
| 652 log_err("%X,", junkout[i]); | |
| 653 } | |
| 654 log_err("\n"); | |
| 655 log_err("input: "); | |
| 656 for(i=0; i<(src-(const char *)source); i++) { | |
| 657 log_err("%X,", (unsigned char)source[i]); | |
| 658 } | |
| 659 log_err("\n"); | |
| 660 } | |
| 661 } | |
| 662 | |
| 663 if(!memcmp(junkout, expect, expectlen*2)) | |
| 664 { | |
| 665 log_verbose("Matches!\n"); | |
| 666 return TC_OK; | |
| 667 } | |
| 668 else | |
| 669 { | |
| 670 log_err("String does not match. %s\n", gNuConvTestName); | |
| 671 log_verbose("String does not match. %s\n", gNuConvTestName); | |
| 672 printf("\nGot:"); | |
| 673 printUSeqErr(junkout, expectlen); | |
| 674 printf("\nExpected:"); | |
| 675 printUSeqErr(expect, expectlen); | |
| 676 return TC_MISMATCH; | |
| 677 } | |
| 678 } | |
| 679 | |
| 680 | |
| 681 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) | |
| 682 { | |
| 683 /** test chars #1 */ | |
| 684 /* 1 2 3 1Han 2Han 3Han . */ | |
| 685 static const UChar sampleText[] = | |
| 686 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0
xDC21 }; | |
| 687 static const UChar sampleTextRoundTripUnmappable[] = | |
| 688 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd }; | |
| 689 | |
| 690 | |
| 691 static const uint8_t expectedUTF8[] = | |
| 692 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0
x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 }; | |
| 693 static const int32_t toUTF8Offs[] = | |
| 694 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0
x06, 0x07, 0x08, 0x08, 0x08, 0x08 }; | |
| 695 static const int32_t fmUTF8Offs[] = | |
| 696 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0
x000e }; | |
| 697 | |
| 698 #ifdef U_ENABLE_GENERIC_ISO_2022 | |
| 699 /* Same as UTF8, but with ^[%B preceeding */ | |
| 700 static const const uint8_t expectedISO2022[] = | |
| 701 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0
x8c, 0xe4, 0xb8, 0x89, 0x2E }; | |
| 702 static const int32_t toISO2022Offs[] = | |
| 703 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, | |
| 704 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */ | |
| 705 static const int32_t fmISO2022Offs[] = | |
| 706 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is t
his right? */ | |
| 707 #endif | |
| 708 | |
| 709 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */ | |
| 710 static const uint8_t expectedIBM930[] = | |
| 711 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0
x4B, 0x0e, 0xfe, 0xfe, 0x0f }; | |
| 712 static const int32_t toIBM930Offs[] = | |
| 713 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0
x07, 0x08, 0x08, 0x08, -1 }; | |
| 714 static const int32_t fmIBM930Offs[] = | |
| 715 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e }; | |
| 716 | |
| 717 /* 1 2 3 0 h1 h2 h3 . MBCS*/ | |
| 718 static const uint8_t expectedIBM943[] = | |
| 719 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc,
0xfc }; | |
| 720 static const int32_t toIBM943Offs [] = | |
| 721 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08,
0x08 }; | |
| 722 static const int32_t fmIBM943Offs[] = | |
| 723 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b }; | |
| 724 | |
| 725 /* 1 2 3 0 h1 h2 h3 . DBCS*/ | |
| 726 static const uint8_t expectedIBM9027[] = | |
| 727 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48,
0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe }; | |
| 728 static const int32_t toIBM9027Offs [] = | |
| 729 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05,
0x06, 0x06, 0x07, 0x07, 0x08, 0x08 }; | |
| 730 | |
| 731 /* 1 2 3 0 <?> <?> <?> . SBCS*/ | |
| 732 static const uint8_t expectedIBM920[] = | |
| 733 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a }; | |
| 734 static const int32_t toIBM920Offs [] = | |
| 735 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; | |
| 736 | |
| 737 /* 1 2 3 0 <?> <?> <?> . SBCS*/ | |
| 738 static const uint8_t expectedISO88593[] = | |
| 739 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; | |
| 740 static const int32_t toISO88593Offs[] = | |
| 741 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; | |
| 742 | |
| 743 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/ | |
| 744 static const uint8_t expectedLATIN1[] = | |
| 745 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; | |
| 746 static const int32_t toLATIN1Offs[] = | |
| 747 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; | |
| 748 | |
| 749 | |
| 750 /* etc */ | |
| 751 static const uint8_t expectedUTF16BE[] = | |
| 752 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0
x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 }; | |
| 753 static const int32_t toUTF16BEOffs[]= | |
| 754 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0
x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; | |
| 755 static const int32_t fmUTF16BEOffs[] = | |
| 756 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010,
0x0010 }; | |
| 757 | |
| 758 static const uint8_t expectedUTF16LE[] = | |
| 759 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0
x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc }; | |
| 760 static const int32_t toUTF16LEOffs[]= | |
| 761 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0
x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; | |
| 762 static const int32_t fmUTF16LEOffs[] = | |
| 763 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0
x0010 }; | |
| 764 | |
| 765 static const uint8_t expectedUTF32BE[] = | |
| 766 { 0x00, 0x00, 0x00, 0x31, | |
| 767 0x00, 0x00, 0x00, 0x32, | |
| 768 0x00, 0x00, 0x00, 0x33, | |
| 769 0x00, 0x00, 0x00, 0x00, | |
| 770 0x00, 0x00, 0x4e, 0x00, | |
| 771 0x00, 0x00, 0x4e, 0x8c, | |
| 772 0x00, 0x00, 0x4e, 0x09, | |
| 773 0x00, 0x00, 0x00, 0x2e, | |
| 774 0x00, 0x02, 0x00, 0x21 }; | |
| 775 static const int32_t toUTF32BEOffs[]= | |
| 776 { 0x00, 0x00, 0x00, 0x00, | |
| 777 0x01, 0x01, 0x01, 0x01, | |
| 778 0x02, 0x02, 0x02, 0x02, | |
| 779 0x03, 0x03, 0x03, 0x03, | |
| 780 0x04, 0x04, 0x04, 0x04, | |
| 781 0x05, 0x05, 0x05, 0x05, | |
| 782 0x06, 0x06, 0x06, 0x06, | |
| 783 0x07, 0x07, 0x07, 0x07, | |
| 784 0x08, 0x08, 0x08, 0x08, | |
| 785 0x08, 0x08, 0x08, 0x08 }; | |
| 786 static const int32_t fmUTF32BEOffs[] = | |
| 787 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020,
0x0020 }; | |
| 788 | |
| 789 static const uint8_t expectedUTF32LE[] = | |
| 790 { 0x31, 0x00, 0x00, 0x00, | |
| 791 0x32, 0x00, 0x00, 0x00, | |
| 792 0x33, 0x00, 0x00, 0x00, | |
| 793 0x00, 0x00, 0x00, 0x00, | |
| 794 0x00, 0x4e, 0x00, 0x00, | |
| 795 0x8c, 0x4e, 0x00, 0x00, | |
| 796 0x09, 0x4e, 0x00, 0x00, | |
| 797 0x2e, 0x00, 0x00, 0x00, | |
| 798 0x21, 0x00, 0x02, 0x00 }; | |
| 799 static const int32_t toUTF32LEOffs[]= | |
| 800 { 0x00, 0x00, 0x00, 0x00, | |
| 801 0x01, 0x01, 0x01, 0x01, | |
| 802 0x02, 0x02, 0x02, 0x02, | |
| 803 0x03, 0x03, 0x03, 0x03, | |
| 804 0x04, 0x04, 0x04, 0x04, | |
| 805 0x05, 0x05, 0x05, 0x05, | |
| 806 0x06, 0x06, 0x06, 0x06, | |
| 807 0x07, 0x07, 0x07, 0x07, | |
| 808 0x08, 0x08, 0x08, 0x08, | |
| 809 0x08, 0x08, 0x08, 0x08 }; | |
| 810 static const int32_t fmUTF32LEOffs[] = | |
| 811 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0
x0020 }; | |
| 812 | |
| 813 | |
| 814 | |
| 815 | |
| 816 /** Test chars #2 **/ | |
| 817 | |
| 818 /* Sahha [health], slashed h's */ | |
| 819 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x006
1 }; | |
| 820 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }
; | |
| 821 | |
| 822 /* LMBCS */ | |
| 823 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2
666, 0x0220 }; | |
| 824 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73,
0x01, 0x04, 0x14, 0x02, 0x20 }; | |
| 825 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03,
0x04, 0x04 , 0x05, 0x05, 0x05 }; | |
| 826 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0
006, 0x0008}; | |
| 827 /*********************************** START OF CODE finally *************/ | |
| 828 | |
| 829 gInBufferSize = insize; | |
| 830 gOutBufferSize = outsize; | |
| 831 | |
| 832 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBuff
erSize = %d\n", gInBufferSize, gOutBufferSize); | |
| 833 | |
| 834 | |
| 835 /*UTF-8*/ | |
| 836 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
| 837 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE ); | |
| 838 | |
| 839 log_verbose("Test surrogate behaviour for UTF8\n"); | |
| 840 { | |
| 841 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; | |
| 842 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, | |
| 843 0xf0, 0x90, 0x90, 0x81, | |
| 844 0xef, 0xbf, 0xbd | |
| 845 }; | |
| 846 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; | |
| 847 testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]), | |
| 848 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", o
ffsets,FALSE ); | |
| 849 | |
| 850 | |
| 851 } | |
| 852 | |
| 853 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) | |
| 854 /*ISO-2022*/ | |
| 855 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
| 856 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALS
E ); | |
| 857 #endif | |
| 858 | |
| 859 /*UTF16 LE*/ | |
| 860 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
| 861 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALS
E ); | |
| 862 /*UTF16 BE*/ | |
| 863 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
| 864 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALS
E ); | |
| 865 /*UTF32 LE*/ | |
| 866 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
| 867 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALS
E ); | |
| 868 /*UTF32 BE*/ | |
| 869 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
| 870 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALS
E ); | |
| 871 | |
| 872 /*LATIN_1*/ | |
| 873 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
| 874 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE ); | |
| 875 | |
| 876 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 877 /*EBCDIC_STATEFUL*/ | |
| 878 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
| 879 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE ); | |
| 880 | |
| 881 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
| 882 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs
,FALSE ); | |
| 883 | |
| 884 /*MBCS*/ | |
| 885 | |
| 886 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
| 887 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE ); | |
| 888 /*DBCS*/ | |
| 889 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
| 890 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALS
E ); | |
| 891 /*SBCS*/ | |
| 892 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
| 893 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE ); | |
| 894 /*SBCS*/ | |
| 895 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
| 896 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs
,FALSE ); | |
| 897 #endif | |
| 898 | |
| 899 | |
| 900 /****/ | |
| 901 | |
| 902 /*UTF-8*/ | |
| 903 testConvertToU(expectedUTF8, sizeof(expectedUTF8), | |
| 904 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs
,FALSE); | |
| 905 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) | |
| 906 /*ISO-2022*/ | |
| 907 testConvertToU(expectedISO2022, sizeof(expectedISO2022), | |
| 908 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2
022Offs,FALSE); | |
| 909 #endif | |
| 910 | |
| 911 /*UTF16 LE*/ | |
| 912 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), | |
| 913 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF1
6LEOffs,FALSE); | |
| 914 /*UTF16 BE*/ | |
| 915 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE), | |
| 916 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF1
6BEOffs,FALSE); | |
| 917 /*UTF32 LE*/ | |
| 918 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE), | |
| 919 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF3
2LEOffs,FALSE); | |
| 920 /*UTF32 BE*/ | |
| 921 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE), | |
| 922 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF3
2BEOffs,FALSE); | |
| 923 | |
| 924 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 925 /*EBCDIC_STATEFUL*/ | |
| 926 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUn
mappable, | |
| 927 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnma
ppable[0]), "ibm-930", fmIBM930Offs,FALSE); | |
| 928 /*MBCS*/ | |
| 929 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnm
appable, | |
| 930 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnma
ppable[0]), "ibm-943", fmIBM943Offs,FALSE); | |
| 931 #endif | |
| 932 | |
| 933 /* Try it again to make sure it still works */ | |
| 934 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), | |
| 935 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF1
6LEOffs,FALSE); | |
| 936 | |
| 937 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 938 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), | |
| 939 malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3",
NULL,FALSE); | |
| 940 | |
| 941 testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0
]), | |
| 942 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE
); | |
| 943 | |
| 944 /*LMBCS*/ | |
| 945 testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), | |
| 946 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE ); | |
| 947 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS), | |
| 948 LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLM
BCSOffs,FALSE); | |
| 949 #endif | |
| 950 | |
| 951 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */ | |
| 952 { | |
| 953 /* encode directly set D and set O */ | |
| 954 static const uint8_t utf7[] = { | |
| 955 /* | |
| 956 Hi Mom -+Jjo--! | |
| 957 A+ImIDkQ. | |
| 958 +- | |
| 959 +ZeVnLIqe- | |
| 960 */ | |
| 961 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x
6f, 0x2d, 0x2d, 0x21, | |
| 962 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, | |
| 963 0x2b, 0x2d, | |
| 964 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d | |
| 965 }; | |
| 966 static const UChar unicode[] = { | |
| 967 /* | |
| 968 Hi Mom -<WHITE SMILING FACE>-! | |
| 969 A<NOT IDENTICAL TO><ALPHA>. | |
| 970 + | |
| 971 [Japanese word "nihongo"] | |
| 972 */ | |
| 973 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, | |
| 974 0x41, 0x2262, 0x0391, 0x2e, | |
| 975 0x2b, | |
| 976 0x65e5, 0x672c, 0x8a9e | |
| 977 }; | |
| 978 static const int32_t toUnicodeOffsets[] = { | |
| 979 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, | |
| 980 15, 17, 19, 23, | |
| 981 24, | |
| 982 27, 29, 32 | |
| 983 }; | |
| 984 static const int32_t fromUnicodeOffsets[] = { | |
| 985 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, | |
| 986 11, 12, 12, 12, 13, 13, 13, 13, 14, | |
| 987 15, 15, | |
| 988 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 | |
| 989 }; | |
| 990 | |
| 991 /* same but escaping set O (the exclamation mark) */ | |
| 992 static const uint8_t utf7Restricted[] = { | |
| 993 /* | |
| 994 Hi Mom -+Jjo--+ACE- | |
| 995 A+ImIDkQ. | |
| 996 +- | |
| 997 +ZeVnLIqe- | |
| 998 */ | |
| 999 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x
6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d, | |
| 1000 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, | |
| 1001 0x2b, 0x2d, | |
| 1002 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d | |
| 1003 }; | |
| 1004 static const int32_t toUnicodeOffsetsR[] = { | |
| 1005 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15, | |
| 1006 19, 21, 23, 27, | |
| 1007 28, | |
| 1008 31, 33, 36 | |
| 1009 }; | |
| 1010 static const int32_t fromUnicodeOffsetsR[] = { | |
| 1011 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10, | |
| 1012 11, 12, 12, 12, 13, 13, 13, 13, 14, | |
| 1013 15, 15, | |
| 1014 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 | |
| 1015 }; | |
| 1016 | |
| 1017 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(u
tf7), "UTF-7", fromUnicodeOffsets,FALSE); | |
| 1018 | |
| 1019 testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCH
AR, "UTF-7", toUnicodeOffsets,FALSE); | |
| 1020 | |
| 1021 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted
, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); | |
| 1022 | |
| 1023 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(u
nicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE); | |
| 1024 } | |
| 1025 | |
| 1026 /* | |
| 1027 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152, | |
| 1028 * modified according to RFC 2060, | |
| 1029 * and supplemented with the one example in RFC 2060 itself. | |
| 1030 */ | |
| 1031 { | |
| 1032 static const uint8_t imap[] = { | |
| 1033 /* Hi Mom -&Jjo--! | |
| 1034 A&ImIDkQ-. | |
| 1035 &- | |
| 1036 &ZeVnLIqe- | |
| 1037 \ | |
| 1038 ~peter | |
| 1039 /mail | |
| 1040 /&ZeVnLIqe- | |
| 1041 /&U,BTFw- | |
| 1042 */ | |
| 1043 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x
6f, 0x2d, 0x2d, 0x21, | |
| 1044 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e, | |
| 1045 0x26, 0x2d, | |
| 1046 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, | |
| 1047 0x5c, | |
| 1048 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, | |
| 1049 0x2f, 0x6d, 0x61, 0x69, 0x6c, | |
| 1050 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, | |
| 1051 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d | |
| 1052 }; | |
| 1053 static const UChar unicode[] = { | |
| 1054 /* Hi Mom -<WHITE SMILING FACE>-! | |
| 1055 A<NOT IDENTICAL TO><ALPHA>. | |
| 1056 & | |
| 1057 [Japanese word "nihongo"] | |
| 1058 \ | |
| 1059 ~peter | |
| 1060 /mail | |
| 1061 /<65e5, 672c, 8a9e> | |
| 1062 /<53f0, 5317> | |
| 1063 */ | |
| 1064 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, | |
| 1065 0x41, 0x2262, 0x0391, 0x2e, | |
| 1066 0x26, | |
| 1067 0x65e5, 0x672c, 0x8a9e, | |
| 1068 0x5c, | |
| 1069 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, | |
| 1070 0x2f, 0x6d, 0x61, 0x69, 0x6c, | |
| 1071 0x2f, 0x65e5, 0x672c, 0x8a9e, | |
| 1072 0x2f, 0x53f0, 0x5317 | |
| 1073 }; | |
| 1074 static const int32_t toUnicodeOffsets[] = { | |
| 1075 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, | |
| 1076 15, 17, 19, 24, | |
| 1077 25, | |
| 1078 28, 30, 33, | |
| 1079 37, | |
| 1080 38, 39, 40, 41, 42, 43, | |
| 1081 44, 45, 46, 47, 48, | |
| 1082 49, 51, 53, 56, | |
| 1083 60, 62, 64 | |
| 1084 }; | |
| 1085 static const int32_t fromUnicodeOffsets[] = { | |
| 1086 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, | |
| 1087 11, 12, 12, 12, 13, 13, 13, 13, 13, 14, | |
| 1088 15, 15, | |
| 1089 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, | |
| 1090 19, | |
| 1091 20, 21, 22, 23, 24, 25, | |
| 1092 26, 27, 28, 29, 30, | |
| 1093 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, | |
| 1094 35, 36, 36, 36, 37, 37, 37, 37, 37 | |
| 1095 }; | |
| 1096 | |
| 1097 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(i
map), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); | |
| 1098 | |
| 1099 testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCH
AR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE); | |
| 1100 } | |
| 1101 | |
| 1102 /* Test UTF-8 bad data handling*/ | |
| 1103 { | |
| 1104 static const uint8_t utf8[]={ | |
| 1105 0x61, | |
| 1106 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ | |
| 1107 0x00, | |
| 1108 0x62, | |
| 1109 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ | |
| 1110 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ | |
| 1111 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */ | |
| 1112 0xdf, 0xbf, /* 7ff */ | |
| 1113 0xbf, /* truncated tail */ | |
| 1114 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */ | |
| 1115 0x02 | |
| 1116 }; | |
| 1117 | |
| 1118 static const uint16_t utf8Expected[]={ | |
| 1119 0x0061, | |
| 1120 0xfffd, | |
| 1121 0x0000, | |
| 1122 0x0062, | |
| 1123 0xfffd, | |
| 1124 0xfffd, | |
| 1125 0xdbff, 0xdfff, | |
| 1126 0x07ff, | |
| 1127 0xfffd, | |
| 1128 0xfffd, | |
| 1129 0x0002 | |
| 1130 }; | |
| 1131 | |
| 1132 static const int32_t utf8Offsets[]={ | |
| 1133 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28 | |
| 1134 }; | |
| 1135 testConvertToU(utf8, sizeof(utf8), | |
| 1136 utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]
), "utf-8", utf8Offsets ,FALSE); | |
| 1137 | |
| 1138 } | |
| 1139 | |
| 1140 /* Test UTF-32BE bad data handling*/ | |
| 1141 { | |
| 1142 static const uint8_t utf32[]={ | |
| 1143 0x00, 0x00, 0x00, 0x61, | |
| 1144 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ | |
| 1145 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ | |
| 1146 0x00, 0x00, 0x00, 0x62, | |
| 1147 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ | |
| 1148 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ | |
| 1149 0x00, 0x00, 0x01, 0x62, | |
| 1150 0x00, 0x00, 0x02, 0x62 | |
| 1151 }; | |
| 1152 static const uint16_t utf32Expected[]={ | |
| 1153 0x0061, | |
| 1154 0xfffd, /* 0x110000 out of range */ | |
| 1155 0xDBFF, /* 0x10FFFF in range */ | |
| 1156 0xDFFF, | |
| 1157 0x0062, | |
| 1158 0xfffd, /* 0xffffffff out of range */ | |
| 1159 0xfffd, /* 0x7fffffff out of range */ | |
| 1160 0x0162, | |
| 1161 0x0262 | |
| 1162 }; | |
| 1163 static const int32_t utf32Offsets[]={ | |
| 1164 0, 4, 8, 8, 12, 16, 20, 24, 28 | |
| 1165 }; | |
| 1166 static const uint8_t utf32ExpectedBack[]={ | |
| 1167 0x00, 0x00, 0x00, 0x61, | |
| 1168 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */ | |
| 1169 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ | |
| 1170 0x00, 0x00, 0x00, 0x62, | |
| 1171 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */ | |
| 1172 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */ | |
| 1173 0x00, 0x00, 0x01, 0x62, | |
| 1174 0x00, 0x00, 0x02, 0x62 | |
| 1175 }; | |
| 1176 static const int32_t utf32OffsetsBack[]={ | |
| 1177 0,0,0,0, | |
| 1178 1,1,1,1, | |
| 1179 2,2,2,2, | |
| 1180 4,4,4,4, | |
| 1181 5,5,5,5, | |
| 1182 6,6,6,6, | |
| 1183 7,7,7,7, | |
| 1184 8,8,8,8 | |
| 1185 }; | |
| 1186 | |
| 1187 testConvertToU(utf32, sizeof(utf32), | |
| 1188 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected
[0]), "utf-32be", utf32Offsets ,FALSE); | |
| 1189 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expect
ed[0]), | |
| 1190 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32Offse
tsBack, FALSE); | |
| 1191 } | |
| 1192 | |
| 1193 /* Test UTF-32LE bad data handling*/ | |
| 1194 { | |
| 1195 static const uint8_t utf32[]={ | |
| 1196 0x61, 0x00, 0x00, 0x00, | |
| 1197 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ | |
| 1198 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ | |
| 1199 0x62, 0x00, 0x00, 0x00, | |
| 1200 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ | |
| 1201 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ | |
| 1202 0x62, 0x01, 0x00, 0x00, | |
| 1203 0x62, 0x02, 0x00, 0x00, | |
| 1204 }; | |
| 1205 | |
| 1206 static const uint16_t utf32Expected[]={ | |
| 1207 0x0061, | |
| 1208 0xfffd, /* 0x110000 out of range */ | |
| 1209 0xDBFF, /* 0x10FFFF in range */ | |
| 1210 0xDFFF, | |
| 1211 0x0062, | |
| 1212 0xfffd, /* 0xffffffff out of range */ | |
| 1213 0xfffd, /* 0x7fffffff out of range */ | |
| 1214 0x0162, | |
| 1215 0x0262 | |
| 1216 }; | |
| 1217 static const int32_t utf32Offsets[]={ | |
| 1218 0, 4, 8, 8, 12, 16, 20, 24, 28 | |
| 1219 }; | |
| 1220 static const uint8_t utf32ExpectedBack[]={ | |
| 1221 0x61, 0x00, 0x00, 0x00, | |
| 1222 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */ | |
| 1223 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ | |
| 1224 0x62, 0x00, 0x00, 0x00, | |
| 1225 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */ | |
| 1226 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */ | |
| 1227 0x62, 0x01, 0x00, 0x00, | |
| 1228 0x62, 0x02, 0x00, 0x00 | |
| 1229 }; | |
| 1230 static const int32_t utf32OffsetsBack[]={ | |
| 1231 0,0,0,0, | |
| 1232 1,1,1,1, | |
| 1233 2,2,2,2, | |
| 1234 4,4,4,4, | |
| 1235 5,5,5,5, | |
| 1236 6,6,6,6, | |
| 1237 7,7,7,7, | |
| 1238 8,8,8,8 | |
| 1239 }; | |
| 1240 testConvertToU(utf32, sizeof(utf32), | |
| 1241 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-
32le", utf32Offsets,FALSE ); | |
| 1242 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expect
ed[0]), | |
| 1243 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32Offse
tsBack, FALSE); | |
| 1244 } | |
| 1245 } | |
| 1246 | |
| 1247 static void TestCoverageMBCS(){ | |
| 1248 #if 0 | |
| 1249 UErrorCode status = U_ZERO_ERROR; | |
| 1250 const char *directory = loadTestData(&status); | |
| 1251 char* tdpath = NULL; | |
| 1252 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory
())+1)); | |
| 1253 int len = strlen(directory); | |
| 1254 char* index=NULL; | |
| 1255 | |
| 1256 tdpath = (char*) malloc(sizeof(char) * (len * 2)); | |
| 1257 uprv_strcpy(saveDirectory,u_getDataDirectory()); | |
| 1258 log_verbose("Retrieved data directory %s \n",saveDirectory); | |
| 1259 uprv_strcpy(tdpath,directory); | |
| 1260 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR); | |
| 1261 | |
| 1262 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){ | |
| 1263 *(index+1)=0; | |
| 1264 } | |
| 1265 u_setDataDirectory(tdpath); | |
| 1266 log_verbose("ICU data directory is set to: %s \n" ,tdpath); | |
| 1267 #endif | |
| 1268 | |
| 1269 /*some more test to increase the code coverage in MBCS. Create an test conv
erter from test1.ucm | |
| 1270 which is test file for MBCS conversion with single-byte codepage data.*/ | |
| 1271 { | |
| 1272 | |
| 1273 /* MBCS with single byte codepage data test1.ucm*/ | |
| 1274 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34
, 0x0003}; | |
| 1275 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,}; | |
| 1276 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; | |
| 1277 | |
| 1278 /*from Unicode*/ | |
| 1279 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[
0]), | |
| 1280 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); | |
| 1281 } | |
| 1282 | |
| 1283 /*some more test to increase the code coverage in MBCS. Create an test conv
erter from test3.ucm | |
| 1284 which is test file for MBCS conversion with three-byte codepage data.*/ | |
| 1285 { | |
| 1286 | |
| 1287 /* MBCS with three byte codepage data test3.ucm*/ | |
| 1288 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4
, 0xde34, 0xd84d, 0xdc56, 0x000e}; | |
| 1289 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0
x07, 0x01, 0x02, 0x0a, 0xff,}; | |
| 1290 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8}; | |
| 1291 | |
| 1292 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0
x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,}; | |
| 1293 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4
, 0xde34, 0xd84d, 0xdc56, 0xfffd}; | |
| 1294 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 }; | |
| 1295 | |
| 1296 /*from Unicode*/ | |
| 1297 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[
0]), | |
| 1298 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE ); | |
| 1299 | |
| 1300 /*to Unicode*/ | |
| 1301 testConvertToU(test3input, sizeof(test3input), | |
| 1302 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]),
"@test3", fromtest3Offs ,FALSE); | |
| 1303 | |
| 1304 } | |
| 1305 | |
| 1306 /*some more test to increase the code coverage in MBCS. Create an test conv
erter from test4.ucm | |
| 1307 which is test file for MBCS conversion with four-byte codepage data.*/ | |
| 1308 { | |
| 1309 | |
| 1310 /* MBCS with three byte codepage data test4.ucm*/ | |
| 1311 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b,
0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; | |
| 1312 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0
x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,}; | |
| 1313 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6
, 6, 8,}; | |
| 1314 | |
| 1315 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0
x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,}; | |
| 1316 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b,
0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; | |
| 1317 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,}; | |
| 1318 | |
| 1319 /*from Unicode*/ | |
| 1320 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[
0]), | |
| 1321 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE ); | |
| 1322 | |
| 1323 /*to Unicode*/ | |
| 1324 testConvertToU(test4input, sizeof(test4input), | |
| 1325 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]),
"@test4", fromtest4Offs,FALSE ); | |
| 1326 | |
| 1327 } | |
| 1328 #if 0 | |
| 1329 free(tdpath); | |
| 1330 /* restore the original data directory */ | |
| 1331 log_verbose("Setting the data directory to %s \n", saveDirectory); | |
| 1332 u_setDataDirectory(saveDirectory); | |
| 1333 free(saveDirectory); | |
| 1334 #endif | |
| 1335 | |
| 1336 } | |
| 1337 | |
| 1338 static void TestConverterType(const char *convName, UConverterType convType) { | |
| 1339 UConverter* myConverter; | |
| 1340 UErrorCode err = U_ZERO_ERROR; | |
| 1341 | |
| 1342 myConverter = my_ucnv_open(convName, &err); | |
| 1343 | |
| 1344 if (U_FAILURE(err)) { | |
| 1345 log_data_err("Failed to create an %s converter\n", convName); | |
| 1346 return; | |
| 1347 } | |
| 1348 else | |
| 1349 { | |
| 1350 if (ucnv_getType(myConverter)!=convType) { | |
| 1351 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n", | |
| 1352 convName, convType); | |
| 1353 } | |
| 1354 else { | |
| 1355 log_verbose("ucnv_getType %s ok\n", convName); | |
| 1356 } | |
| 1357 } | |
| 1358 ucnv_close(myConverter); | |
| 1359 } | |
| 1360 | |
| 1361 static void TestConverterTypesAndStarters() | |
| 1362 { | |
| 1363 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 1364 UConverter* myConverter; | |
| 1365 UErrorCode err = U_ZERO_ERROR; | |
| 1366 UBool mystarters[256]; | |
| 1367 | |
| 1368 /* const UBool expectedKSCstarters[256] = { | |
| 1369 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
| 1370 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
| 1371 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
| 1372 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
| 1373 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
| 1374 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
| 1375 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
| 1376 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
| 1377 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
| 1378 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
| 1379 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
| 1380 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
| 1381 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
| 1382 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
| 1383 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
| 1384 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
| 1385 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
| 1386 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
| 1387 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
| 1388 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
| 1389 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
| 1390 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
| 1391 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
| 1392 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
| 1393 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
| 1394 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/ | |
| 1395 | |
| 1396 | |
| 1397 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversio
n types."); | |
| 1398 | |
| 1399 myConverter = ucnv_open("ksc", &err); | |
| 1400 if (U_FAILURE(err)) { | |
| 1401 log_data_err("Failed to create an ibm-ksc converter\n"); | |
| 1402 return; | |
| 1403 } | |
| 1404 else | |
| 1405 { | |
| 1406 if (ucnv_getType(myConverter)!=UCNV_MBCS) | |
| 1407 log_err("ucnv_getType Failed for ibm-949\n"); | |
| 1408 else | |
| 1409 log_verbose("ucnv_getType ibm-949 ok\n"); | |
| 1410 | |
| 1411 if(myConverter!=NULL) | |
| 1412 ucnv_getStarters(myConverter, mystarters, &err); | |
| 1413 | |
| 1414 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters
))) | |
| 1415 log_err("Failed ucnv_getStarters for ksc\n"); | |
| 1416 else | |
| 1417 log_verbose("ucnv_getStarters ok\n");*/ | |
| 1418 | |
| 1419 } | |
| 1420 ucnv_close(myConverter); | |
| 1421 | |
| 1422 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL); | |
| 1423 TestConverterType("ibm-878", UCNV_SBCS); | |
| 1424 #endif | |
| 1425 | |
| 1426 TestConverterType("iso-8859-1", UCNV_LATIN_1); | |
| 1427 | |
| 1428 TestConverterType("ibm-1208", UCNV_UTF8); | |
| 1429 | |
| 1430 TestConverterType("utf-8", UCNV_UTF8); | |
| 1431 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian); | |
| 1432 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian); | |
| 1433 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian); | |
| 1434 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian); | |
| 1435 | |
| 1436 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 1437 | |
| 1438 #if defined(U_ENABLE_GENERIC_ISO_2022) | |
| 1439 TestConverterType("iso-2022", UCNV_ISO_2022); | |
| 1440 #endif | |
| 1441 | |
| 1442 TestConverterType("hz", UCNV_HZ); | |
| 1443 #endif | |
| 1444 | |
| 1445 TestConverterType("scsu", UCNV_SCSU); | |
| 1446 | |
| 1447 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 1448 TestConverterType("x-iscii-de", UCNV_ISCII); | |
| 1449 #endif | |
| 1450 | |
| 1451 TestConverterType("ascii", UCNV_US_ASCII); | |
| 1452 TestConverterType("utf-7", UCNV_UTF7); | |
| 1453 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX); | |
| 1454 TestConverterType("bocu-1", UCNV_BOCU1); | |
| 1455 } | |
| 1456 | |
| 1457 static void | |
| 1458 TestAmbiguousConverter(UConverter *cnv) { | |
| 1459 static const char inBytes[3]={ 0x61, 0x5B, 0x5c }; | |
| 1460 UChar outUnicode[20]={ 0, 0, 0, 0 }; | |
| 1461 | |
| 1462 const char *s; | |
| 1463 UChar *u; | |
| 1464 UErrorCode errorCode; | |
| 1465 UBool isAmbiguous; | |
| 1466 | |
| 1467 /* try to convert an 'a', a square bracket and a US-ASCII backslash */ | |
| 1468 errorCode=U_ZERO_ERROR; | |
| 1469 s=inBytes; | |
| 1470 u=outUnicode; | |
| 1471 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode); | |
| 1472 if(U_FAILURE(errorCode)) { | |
| 1473 /* we do not care about general failures in this test; the input may jus
t not be mappable */ | |
| 1474 return; | |
| 1475 } | |
| 1476 | |
| 1477 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) { | |
| 1478 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: thi
s test is not applicable */ | |
| 1479 /* There are some encodings that are partially ASCII based, | |
| 1480 like the ISO-7 and GSM series of codepages, which we ignore. */ | |
| 1481 return; | |
| 1482 } | |
| 1483 | |
| 1484 isAmbiguous=ucnv_isAmbiguous(cnv); | |
| 1485 | |
| 1486 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous()
*/ | |
| 1487 if((outUnicode[2]!=0x5c)!=isAmbiguous) { | |
| 1488 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAm
biguous()==%d\n", | |
| 1489 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous); | |
| 1490 return; | |
| 1491 } | |
| 1492 | |
| 1493 if(outUnicode[2]!=0x5c) { | |
| 1494 /* needs fixup, fix it */ | |
| 1495 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode)); | |
| 1496 if(outUnicode[2]!=0x5c) { | |
| 1497 /* the fix failed */ | |
| 1498 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cn
v, &errorCode)); | |
| 1499 return; | |
| 1500 } | |
| 1501 } | |
| 1502 } | |
| 1503 | |
| 1504 static void TestAmbiguous() | |
| 1505 { | |
| 1506 UErrorCode status = U_ZERO_ERROR; | |
| 1507 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv; | |
| 1508 static const char target[] = { | |
| 1509 /* "\\usr\\local\\share\\data\\icutest.txt" */ | |
| 1510 0x5c, 0x75, 0x73, 0x72, | |
| 1511 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, | |
| 1512 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65, | |
| 1513 0x5c, 0x64, 0x61, 0x74, 0x61, | |
| 1514 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74, | |
| 1515 0 | |
| 1516 }; | |
| 1517 UChar asciiResult[200], sjisResult[200]; | |
| 1518 int32_t /*asciiLength = 0,*/ sjisLength = 0, i; | |
| 1519 const char *name; | |
| 1520 | |
| 1521 /* enumerate all converters */ | |
| 1522 status=U_ZERO_ERROR; | |
| 1523 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) { | |
| 1524 cnv=ucnv_open(name, &status); | |
| 1525 if(U_SUCCESS(status)) { | |
| 1526 TestAmbiguousConverter(cnv); | |
| 1527 ucnv_close(cnv); | |
| 1528 } else { | |
| 1529 log_err("error: unable to open available converter \"%s\"\n", name); | |
| 1530 status=U_ZERO_ERROR; | |
| 1531 } | |
| 1532 } | |
| 1533 | |
| 1534 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 1535 sjis_cnv = ucnv_open("ibm-943", &status); | |
| 1536 if (U_FAILURE(status)) | |
| 1537 { | |
| 1538 log_data_err("Failed to create a SJIS converter\n"); | |
| 1539 return; | |
| 1540 } | |
| 1541 ascii_cnv = ucnv_open("LATIN-1", &status); | |
| 1542 if (U_FAILURE(status)) | |
| 1543 { | |
| 1544 log_data_err("Failed to create a LATIN-1 converter\n"); | |
| 1545 ucnv_close(sjis_cnv); | |
| 1546 return; | |
| 1547 } | |
| 1548 /* convert target from SJIS to Unicode */ | |
| 1549 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF
_UCHAR, target, (int32_t)strlen(target), &status); | |
| 1550 if (U_FAILURE(status)) | |
| 1551 { | |
| 1552 log_err("Failed to convert the SJIS string.\n"); | |
| 1553 ucnv_close(sjis_cnv); | |
| 1554 ucnv_close(ascii_cnv); | |
| 1555 return; | |
| 1556 } | |
| 1557 /* convert target from Latin-1 to Unicode */ | |
| 1558 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/
U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); | |
| 1559 if (U_FAILURE(status)) | |
| 1560 { | |
| 1561 log_err("Failed to convert the Latin-1 string.\n"); | |
| 1562 ucnv_close(sjis_cnv); | |
| 1563 ucnv_close(ascii_cnv); | |
| 1564 return; | |
| 1565 } | |
| 1566 if (!ucnv_isAmbiguous(sjis_cnv)) | |
| 1567 { | |
| 1568 log_err("SJIS converter should contain ambiguous character mappings.\n")
; | |
| 1569 ucnv_close(sjis_cnv); | |
| 1570 ucnv_close(ascii_cnv); | |
| 1571 return; | |
| 1572 } | |
| 1573 if (u_strcmp(sjisResult, asciiResult) == 0) | |
| 1574 { | |
| 1575 log_err("File separators for SJIS don't need to be fixed.\n"); | |
| 1576 } | |
| 1577 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength); | |
| 1578 if (u_strcmp(sjisResult, asciiResult) != 0) | |
| 1579 { | |
| 1580 log_err("Fixing file separator for SJIS failed.\n"); | |
| 1581 } | |
| 1582 ucnv_close(sjis_cnv); | |
| 1583 ucnv_close(ascii_cnv); | |
| 1584 #endif | |
| 1585 } | |
| 1586 | |
| 1587 static void | |
| 1588 TestSignatureDetection(){ | |
| 1589 /* with null terminated strings */ | |
| 1590 { | |
| 1591 static const char* data[] = { | |
| 1592 "\xFE\xFF\x00\x00", /* UTF-16BE */ | |
| 1593 "\xFF\xFE\x00\x00", /* UTF-16LE */ | |
| 1594 "\xEF\xBB\xBF\x00", /* UTF-8 */ | |
| 1595 "\x0E\xFE\xFF\x00", /* SCSU */ | |
| 1596 | |
| 1597 "\xFE\xFF", /* UTF-16BE */ | |
| 1598 "\xFF\xFE", /* UTF-16LE */ | |
| 1599 "\xEF\xBB\xBF", /* UTF-8 */ | |
| 1600 "\x0E\xFE\xFF", /* SCSU */ | |
| 1601 | |
| 1602 "\xFE\xFF\x41\x42", /* UTF-16BE */ | |
| 1603 "\xFF\xFE\x41\x41", /* UTF-16LE */ | |
| 1604 "\xEF\xBB\xBF\x41", /* UTF-8 */ | |
| 1605 "\x0E\xFE\xFF\x41", /* SCSU */ | |
| 1606 | |
| 1607 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */ | |
| 1608 "\x2B\x2F\x76\x38\x41", /* UTF-7 */ | |
| 1609 "\x2B\x2F\x76\x39\x41", /* UTF-7 */ | |
| 1610 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */ | |
| 1611 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */ | |
| 1612 | |
| 1613 "\xDD\x73\x66\x73" /* UTF-EBCDIC */ | |
| 1614 }; | |
| 1615 static const char* expected[] = { | |
| 1616 "UTF-16BE", | |
| 1617 "UTF-16LE", | |
| 1618 "UTF-8", | |
| 1619 "SCSU", | |
| 1620 | |
| 1621 "UTF-16BE", | |
| 1622 "UTF-16LE", | |
| 1623 "UTF-8", | |
| 1624 "SCSU", | |
| 1625 | |
| 1626 "UTF-16BE", | |
| 1627 "UTF-16LE", | |
| 1628 "UTF-8", | |
| 1629 "SCSU", | |
| 1630 | |
| 1631 "UTF-7", | |
| 1632 "UTF-7", | |
| 1633 "UTF-7", | |
| 1634 "UTF-7", | |
| 1635 "UTF-7", | |
| 1636 "UTF-EBCDIC" | |
| 1637 }; | |
| 1638 static const int32_t expectedLength[] ={ | |
| 1639 2, | |
| 1640 2, | |
| 1641 3, | |
| 1642 3, | |
| 1643 | |
| 1644 2, | |
| 1645 2, | |
| 1646 3, | |
| 1647 3, | |
| 1648 | |
| 1649 2, | |
| 1650 2, | |
| 1651 3, | |
| 1652 3, | |
| 1653 | |
| 1654 5, | |
| 1655 4, | |
| 1656 4, | |
| 1657 4, | |
| 1658 4, | |
| 1659 4 | |
| 1660 }; | |
| 1661 int i=0; | |
| 1662 UErrorCode err; | |
| 1663 int32_t signatureLength = -1; | |
| 1664 const char* source = NULL; | |
| 1665 const char* enc = NULL; | |
| 1666 for( ; i<sizeof(data)/sizeof(char*); i++){ | |
| 1667 err = U_ZERO_ERROR; | |
| 1668 source = data[i]; | |
| 1669 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &er
r); | |
| 1670 if(U_FAILURE(err)){ | |
| 1671 log_err("ucnv_detectUnicodeSignature failed for source : %s at i
ndex :%i. Error: %s\n", source,i,u_errorName(err)); | |
| 1672 continue; | |
| 1673 } | |
| 1674 if(enc == NULL || strcmp(enc,expected[i]) !=0){ | |
| 1675 log_err("ucnv_detectUnicodeSignature failed for source : %s at i
ndex :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); | |
| 1676 continue; | |
| 1677 } | |
| 1678 if(signatureLength != expectedLength[i]){ | |
| 1679 log_err("ucnv_detectUnicodeSignature failed for source : %s at i
ndex :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expecte
dLength[i]); | |
| 1680 } | |
| 1681 } | |
| 1682 } | |
| 1683 { | |
| 1684 static const char* data[] = { | |
| 1685 "\xFE\xFF\x00", /* UTF-16BE */ | |
| 1686 "\xFF\xFE\x00", /* UTF-16LE */ | |
| 1687 "\xEF\xBB\xBF\x00", /* UTF-8 */ | |
| 1688 "\x0E\xFE\xFF\x00", /* SCSU */ | |
| 1689 "\x00\x00\xFE\xFF", /* UTF-32BE */ | |
| 1690 "\xFF\xFE\x00\x00", /* UTF-32LE */ | |
| 1691 "\xFE\xFF", /* UTF-16BE */ | |
| 1692 "\xFF\xFE", /* UTF-16LE */ | |
| 1693 "\xEF\xBB\xBF", /* UTF-8 */ | |
| 1694 "\x0E\xFE\xFF", /* SCSU */ | |
| 1695 "\x00\x00\xFE\xFF", /* UTF-32BE */ | |
| 1696 "\xFF\xFE\x00\x00", /* UTF-32LE */ | |
| 1697 "\xFE\xFF\x41\x42", /* UTF-16BE */ | |
| 1698 "\xFF\xFE\x41\x41", /* UTF-16LE */ | |
| 1699 "\xEF\xBB\xBF\x41", /* UTF-8 */ | |
| 1700 "\x0E\xFE\xFF\x41", /* SCSU */ | |
| 1701 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */ | |
| 1702 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */ | |
| 1703 "\xFB\xEE\x28", /* BOCU-1 */ | |
| 1704 "\xFF\x41\x42" /* NULL */ | |
| 1705 }; | |
| 1706 static const int len[] = { | |
| 1707 3, | |
| 1708 3, | |
| 1709 4, | |
| 1710 4, | |
| 1711 4, | |
| 1712 4, | |
| 1713 2, | |
| 1714 2, | |
| 1715 3, | |
| 1716 3, | |
| 1717 4, | |
| 1718 4, | |
| 1719 4, | |
| 1720 4, | |
| 1721 4, | |
| 1722 4, | |
| 1723 5, | |
| 1724 5, | |
| 1725 3, | |
| 1726 3 | |
| 1727 }; | |
| 1728 | |
| 1729 static const char* expected[] = { | |
| 1730 "UTF-16BE", | |
| 1731 "UTF-16LE", | |
| 1732 "UTF-8", | |
| 1733 "SCSU", | |
| 1734 "UTF-32BE", | |
| 1735 "UTF-32LE", | |
| 1736 "UTF-16BE", | |
| 1737 "UTF-16LE", | |
| 1738 "UTF-8", | |
| 1739 "SCSU", | |
| 1740 "UTF-32BE", | |
| 1741 "UTF-32LE", | |
| 1742 "UTF-16BE", | |
| 1743 "UTF-16LE", | |
| 1744 "UTF-8", | |
| 1745 "SCSU", | |
| 1746 "UTF-32BE", | |
| 1747 "UTF-32LE", | |
| 1748 "BOCU-1", | |
| 1749 NULL | |
| 1750 }; | |
| 1751 static const int32_t expectedLength[] ={ | |
| 1752 2, | |
| 1753 2, | |
| 1754 3, | |
| 1755 3, | |
| 1756 4, | |
| 1757 4, | |
| 1758 2, | |
| 1759 2, | |
| 1760 3, | |
| 1761 3, | |
| 1762 4, | |
| 1763 4, | |
| 1764 2, | |
| 1765 2, | |
| 1766 3, | |
| 1767 3, | |
| 1768 4, | |
| 1769 4, | |
| 1770 3, | |
| 1771 0 | |
| 1772 }; | |
| 1773 int i=0; | |
| 1774 UErrorCode err; | |
| 1775 int32_t signatureLength = -1; | |
| 1776 int32_t sourceLength=-1; | |
| 1777 const char* source = NULL; | |
| 1778 const char* enc = NULL; | |
| 1779 for( ; i<sizeof(data)/sizeof(char*); i++){ | |
| 1780 err = U_ZERO_ERROR; | |
| 1781 source = data[i]; | |
| 1782 sourceLength = len[i]; | |
| 1783 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureL
ength, &err); | |
| 1784 if(U_FAILURE(err)){ | |
| 1785 log_err("ucnv_detectUnicodeSignature test2 failed for source : %
s at index :%i. Error: %s\n", source,i,u_errorName(err)); | |
| 1786 continue; | |
| 1787 } | |
| 1788 if(enc == NULL || strcmp(enc,expected[i]) !=0){ | |
| 1789 if(expected[i] !=NULL){ | |
| 1790 log_err("ucnv_detectUnicodeSignature test2 failed for source :
%s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); | |
| 1791 continue; | |
| 1792 } | |
| 1793 } | |
| 1794 if(signatureLength != expectedLength[i]){ | |
| 1795 log_err("ucnv_detectUnicodeSignature test2 failed for source : %
s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,e
xpectedLength[i]); | |
| 1796 } | |
| 1797 } | |
| 1798 } | |
| 1799 } | |
| 1800 | |
| 1801 static void TestUTF7() { | |
| 1802 /* test input */ | |
| 1803 static const uint8_t in[]={ | |
| 1804 /* H - +Jjo- - ! +- +2AHcAQ */ | |
| 1805 0x48, | |
| 1806 0x2d, | |
| 1807 0x2b, 0x4a, 0x6a, 0x6f, | |
| 1808 0x2d, 0x2d, | |
| 1809 0x21, | |
| 1810 0x2b, 0x2d, | |
| 1811 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51 | |
| 1812 }; | |
| 1813 | |
| 1814 /* expected test results */ | |
| 1815 static const int32_t results[]={ | |
| 1816 /* number of bytes read, code point */ | |
| 1817 1, 0x48, | |
| 1818 1, 0x2d, | |
| 1819 4, 0x263a, /* <WHITE SMILING FACE> */ | |
| 1820 2, 0x2d, | |
| 1821 1, 0x21, | |
| 1822 2, 0x2b, | |
| 1823 7, 0x10401 | |
| 1824 }; | |
| 1825 | |
| 1826 const char *cnvName; | |
| 1827 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
| 1828 UErrorCode errorCode=U_ZERO_ERROR; | |
| 1829 UConverter *cnv=ucnv_open("UTF-7", &errorCode); | |
| 1830 if(U_FAILURE(errorCode)) { | |
| 1831 log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(error
Code)); | |
| 1832 return; | |
| 1833 } | |
| 1834 TestNextUChar(cnv, source, limit, results, "UTF-7"); | |
| 1835 /* Test the condition when source >= sourceLimit */ | |
| 1836 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
| 1837 cnvName = ucnv_getName(cnv, &errorCode); | |
| 1838 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) { | |
| 1839 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(error
Code)); | |
| 1840 } | |
| 1841 ucnv_close(cnv); | |
| 1842 } | |
| 1843 | |
| 1844 static void TestIMAP() { | |
| 1845 /* test input */ | |
| 1846 static const uint8_t in[]={ | |
| 1847 /* H - &Jjo- - ! &- &2AHcAQ- \ */ | |
| 1848 0x48, | |
| 1849 0x2d, | |
| 1850 0x26, 0x4a, 0x6a, 0x6f, | |
| 1851 0x2d, 0x2d, | |
| 1852 0x21, | |
| 1853 0x26, 0x2d, | |
| 1854 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d | |
| 1855 }; | |
| 1856 | |
| 1857 /* expected test results */ | |
| 1858 static const int32_t results[]={ | |
| 1859 /* number of bytes read, code point */ | |
| 1860 1, 0x48, | |
| 1861 1, 0x2d, | |
| 1862 4, 0x263a, /* <WHITE SMILING FACE> */ | |
| 1863 2, 0x2d, | |
| 1864 1, 0x21, | |
| 1865 2, 0x26, | |
| 1866 7, 0x10401 | |
| 1867 }; | |
| 1868 | |
| 1869 const char *cnvName; | |
| 1870 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
| 1871 UErrorCode errorCode=U_ZERO_ERROR; | |
| 1872 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode); | |
| 1873 if(U_FAILURE(errorCode)) { | |
| 1874 log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_err
orName(errorCode)); | |
| 1875 return; | |
| 1876 } | |
| 1877 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name"); | |
| 1878 /* Test the condition when source >= sourceLimit */ | |
| 1879 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
| 1880 cnvName = ucnv_getName(cnv, &errorCode); | |
| 1881 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0)
{ | |
| 1882 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_err
orName(errorCode)); | |
| 1883 } | |
| 1884 ucnv_close(cnv); | |
| 1885 } | |
| 1886 | |
| 1887 static void TestUTF8() { | |
| 1888 /* test input */ | |
| 1889 static const uint8_t in[]={ | |
| 1890 0x61, | |
| 1891 0xc2, 0x80, | |
| 1892 0xe0, 0xa0, 0x80, | |
| 1893 0xf0, 0x90, 0x80, 0x80, | |
| 1894 0xf4, 0x84, 0x8c, 0xa1, | |
| 1895 0xf0, 0x90, 0x90, 0x81 | |
| 1896 }; | |
| 1897 | |
| 1898 /* expected test results */ | |
| 1899 static const int32_t results[]={ | |
| 1900 /* number of bytes read, code point */ | |
| 1901 1, 0x61, | |
| 1902 2, 0x80, | |
| 1903 3, 0x800, | |
| 1904 4, 0x10000, | |
| 1905 4, 0x104321, | |
| 1906 4, 0x10401 | |
| 1907 }; | |
| 1908 | |
| 1909 /* error test input */ | |
| 1910 static const uint8_t in2[]={ | |
| 1911 0x61, | |
| 1912 0xc0, 0x80, /* illegal non-shortest form */ | |
| 1913 0xe0, 0x80, 0x80, /* illegal non-shortest form */ | |
| 1914 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ | |
| 1915 0xc0, 0xc0, /* illegal trail byte */ | |
| 1916 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ | |
| 1917 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ | |
| 1918 0xfe, /* illegal byte altogether */ | |
| 1919 0x62 | |
| 1920 }; | |
| 1921 | |
| 1922 /* expected error test results */ | |
| 1923 static const int32_t results2[]={ | |
| 1924 /* number of bytes read, code point */ | |
| 1925 1, 0x61, | |
| 1926 22, 0x62 | |
| 1927 }; | |
| 1928 | |
| 1929 UConverterToUCallback cb; | |
| 1930 const void *p; | |
| 1931 | |
| 1932 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); | |
| 1933 UErrorCode errorCode=U_ZERO_ERROR; | |
| 1934 UConverter *cnv=ucnv_open("UTF-8", &errorCode); | |
| 1935 if(U_FAILURE(errorCode)) { | |
| 1936 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode)
); | |
| 1937 return; | |
| 1938 } | |
| 1939 TestNextUChar(cnv, source, limit, results, "UTF-8"); | |
| 1940 /* Test the condition when source >= sourceLimit */ | |
| 1941 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
| 1942 | |
| 1943 /* test error behavior with a skip callback */ | |
| 1944 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode)
; | |
| 1945 source=(const char *)in2; | |
| 1946 limit=(const char *)(in2+sizeof(in2)); | |
| 1947 TestNextUChar(cnv, source, limit, results2, "UTF-8"); | |
| 1948 | |
| 1949 ucnv_close(cnv); | |
| 1950 } | |
| 1951 | |
| 1952 static void TestCESU8() { | |
| 1953 /* test input */ | |
| 1954 static const uint8_t in[]={ | |
| 1955 0x61, | |
| 1956 0xc2, 0x80, | |
| 1957 0xe0, 0xa0, 0x80, | |
| 1958 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, | |
| 1959 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82, | |
| 1960 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, | |
| 1961 0xef, 0xbf, 0xbc | |
| 1962 }; | |
| 1963 | |
| 1964 /* expected test results */ | |
| 1965 static const int32_t results[]={ | |
| 1966 /* number of bytes read, code point */ | |
| 1967 1, 0x61, | |
| 1968 2, 0x80, | |
| 1969 3, 0x800, | |
| 1970 6, 0x10000, | |
| 1971 3, 0xdc01, | |
| 1972 -1,0xd802, /* may read 3 or 6 bytes */ | |
| 1973 -1,0x10ffff,/* may read 0 or 3 bytes */ | |
| 1974 3, 0xfffc | |
| 1975 }; | |
| 1976 | |
| 1977 /* error test input */ | |
| 1978 static const uint8_t in2[]={ | |
| 1979 0x61, | |
| 1980 0xc0, 0x80, /* illegal non-shortest form */ | |
| 1981 0xe0, 0x80, 0x80, /* illegal non-shortest form */ | |
| 1982 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ | |
| 1983 0xc0, 0xc0, /* illegal trail byte */ | |
| 1984 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code poi
nt */ | |
| 1985 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code poi
nt */ | |
| 1986 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code poi
nt */ | |
| 1987 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ | |
| 1988 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ | |
| 1989 0xfe, /* illegal byte altogether */ | |
| 1990 0x62 | |
| 1991 }; | |
| 1992 | |
| 1993 /* expected error test results */ | |
| 1994 static const int32_t results2[]={ | |
| 1995 /* number of bytes read, code point */ | |
| 1996 1, 0x61, | |
| 1997 34, 0x62 | |
| 1998 }; | |
| 1999 | |
| 2000 UConverterToUCallback cb; | |
| 2001 const void *p; | |
| 2002 | |
| 2003 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); | |
| 2004 UErrorCode errorCode=U_ZERO_ERROR; | |
| 2005 UConverter *cnv=ucnv_open("CESU-8", &errorCode); | |
| 2006 if(U_FAILURE(errorCode)) { | |
| 2007 log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(erro
rCode)); | |
| 2008 return; | |
| 2009 } | |
| 2010 TestNextUChar(cnv, source, limit, results, "CESU-8"); | |
| 2011 /* Test the condition when source >= sourceLimit */ | |
| 2012 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
| 2013 | |
| 2014 /* test error behavior with a skip callback */ | |
| 2015 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode)
; | |
| 2016 source=(const char *)in2; | |
| 2017 limit=(const char *)(in2+sizeof(in2)); | |
| 2018 TestNextUChar(cnv, source, limit, results2, "CESU-8"); | |
| 2019 | |
| 2020 ucnv_close(cnv); | |
| 2021 } | |
| 2022 | |
| 2023 static void TestUTF16() { | |
| 2024 /* test input */ | |
| 2025 static const uint8_t in1[]={ | |
| 2026 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff | |
| 2027 }; | |
| 2028 static const uint8_t in2[]={ | |
| 2029 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff | |
| 2030 }; | |
| 2031 static const uint8_t in3[]={ | |
| 2032 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01 | |
| 2033 }; | |
| 2034 | |
| 2035 /* expected test results */ | |
| 2036 static const int32_t results1[]={ | |
| 2037 /* number of bytes read, code point */ | |
| 2038 4, 0x4e00, | |
| 2039 2, 0xfeff | |
| 2040 }; | |
| 2041 static const int32_t results2[]={ | |
| 2042 /* number of bytes read, code point */ | |
| 2043 4, 0x004e, | |
| 2044 2, 0xfffe | |
| 2045 }; | |
| 2046 static const int32_t results3[]={ | |
| 2047 /* number of bytes read, code point */ | |
| 2048 2, 0xfefe, | |
| 2049 2, 0x4e00, | |
| 2050 2, 0xfeff, | |
| 2051 4, 0x20001 | |
| 2052 }; | |
| 2053 | |
| 2054 const char *source, *limit; | |
| 2055 | |
| 2056 UErrorCode errorCode=U_ZERO_ERROR; | |
| 2057 UConverter *cnv=ucnv_open("UTF-16", &errorCode); | |
| 2058 if(U_FAILURE(errorCode)) { | |
| 2059 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode
)); | |
| 2060 return; | |
| 2061 } | |
| 2062 | |
| 2063 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); | |
| 2064 TestNextUChar(cnv, source, limit, results1, "UTF-16"); | |
| 2065 | |
| 2066 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); | |
| 2067 ucnv_resetToUnicode(cnv); | |
| 2068 TestNextUChar(cnv, source, limit, results2, "UTF-16"); | |
| 2069 | |
| 2070 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); | |
| 2071 ucnv_resetToUnicode(cnv); | |
| 2072 TestNextUChar(cnv, source, limit, results3, "UTF-16"); | |
| 2073 | |
| 2074 /* Test the condition when source >= sourceLimit */ | |
| 2075 ucnv_resetToUnicode(cnv); | |
| 2076 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
| 2077 | |
| 2078 ucnv_close(cnv); | |
| 2079 } | |
| 2080 | |
| 2081 static void TestUTF16BE() { | |
| 2082 /* test input */ | |
| 2083 static const uint8_t in[]={ | |
| 2084 0x00, 0x61, | |
| 2085 0x00, 0xc0, | |
| 2086 0x00, 0x31, | |
| 2087 0x00, 0xf4, | |
| 2088 0xce, 0xfe, | |
| 2089 0xd8, 0x01, 0xdc, 0x01 | |
| 2090 }; | |
| 2091 | |
| 2092 /* expected test results */ | |
| 2093 static const int32_t results[]={ | |
| 2094 /* number of bytes read, code point */ | |
| 2095 2, 0x61, | |
| 2096 2, 0xc0, | |
| 2097 2, 0x31, | |
| 2098 2, 0xf4, | |
| 2099 2, 0xcefe, | |
| 2100 4, 0x10401 | |
| 2101 }; | |
| 2102 | |
| 2103 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
| 2104 UErrorCode errorCode=U_ZERO_ERROR; | |
| 2105 UConverter *cnv=ucnv_open("utf-16be", &errorCode); | |
| 2106 if(U_FAILURE(errorCode)) { | |
| 2107 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCo
de)); | |
| 2108 return; | |
| 2109 } | |
| 2110 TestNextUChar(cnv, source, limit, results, "UTF-16BE"); | |
| 2111 /* Test the condition when source >= sourceLimit */ | |
| 2112 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
| 2113 /*Test for the condition where there is an invalid character*/ | |
| 2114 { | |
| 2115 static const uint8_t source2[]={0x61}; | |
| 2116 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); | |
| 2117 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); | |
| 2118 } | |
| 2119 #if 0 | |
| 2120 /* | |
| 2121 * Test disabled because currently the UTF-16BE/LE converters are supposed | |
| 2122 * to not set errors for unpaired surrogates. | |
| 2123 * This may change with | |
| 2124 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 | |
| 2125 */ | |
| 2126 | |
| 2127 /*Test for the condition where there is a surrogate pair*/ | |
| 2128 { | |
| 2129 const uint8_t source2[]={0xd8, 0x01}; | |
| 2130 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); | |
| 2131 } | |
| 2132 #endif | |
| 2133 ucnv_close(cnv); | |
| 2134 } | |
| 2135 | |
| 2136 static void | |
| 2137 TestUTF16LE() { | |
| 2138 /* test input */ | |
| 2139 static const uint8_t in[]={ | |
| 2140 0x61, 0x00, | |
| 2141 0x31, 0x00, | |
| 2142 0x4e, 0x2e, | |
| 2143 0x4e, 0x00, | |
| 2144 0x01, 0xd8, 0x01, 0xdc | |
| 2145 }; | |
| 2146 | |
| 2147 /* expected test results */ | |
| 2148 static const int32_t results[]={ | |
| 2149 /* number of bytes read, code point */ | |
| 2150 2, 0x61, | |
| 2151 2, 0x31, | |
| 2152 2, 0x2e4e, | |
| 2153 2, 0x4e, | |
| 2154 4, 0x10401 | |
| 2155 }; | |
| 2156 | |
| 2157 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
| 2158 UErrorCode errorCode=U_ZERO_ERROR; | |
| 2159 UConverter *cnv=ucnv_open("utf-16le", &errorCode); | |
| 2160 if(U_FAILURE(errorCode)) { | |
| 2161 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCo
de)); | |
| 2162 return; | |
| 2163 } | |
| 2164 TestNextUChar(cnv, source, limit, results, "UTF-16LE"); | |
| 2165 /* Test the condition when source >= sourceLimit */ | |
| 2166 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
| 2167 /*Test for the condition where there is an invalid character*/ | |
| 2168 { | |
| 2169 static const uint8_t source2[]={0x61}; | |
| 2170 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); | |
| 2171 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); | |
| 2172 } | |
| 2173 #if 0 | |
| 2174 /* | |
| 2175 * Test disabled because currently the UTF-16BE/LE converters are supposed | |
| 2176 * to not set errors for unpaired surrogates. | |
| 2177 * This may change with | |
| 2178 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 | |
| 2179 */ | |
| 2180 | |
| 2181 /*Test for the condition where there is a surrogate character*/ | |
| 2182 { | |
| 2183 static const uint8_t source2[]={0x01, 0xd8}; | |
| 2184 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); | |
| 2185 } | |
| 2186 #endif | |
| 2187 | |
| 2188 ucnv_close(cnv); | |
| 2189 } | |
| 2190 | |
| 2191 static void TestUTF32() { | |
| 2192 /* test input */ | |
| 2193 static const uint8_t in1[]={ | |
| 2194 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0x
ff | |
| 2195 }; | |
| 2196 static const uint8_t in2[]={ | |
| 2197 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x
00 | |
| 2198 }; | |
| 2199 static const uint8_t in3[]={ | |
| 2200 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x
40, 0x00, 0x00, 0xdc, 0x01 | |
| 2201 }; | |
| 2202 | |
| 2203 /* expected test results */ | |
| 2204 static const int32_t results1[]={ | |
| 2205 /* number of bytes read, code point */ | |
| 2206 8, 0x100f00, | |
| 2207 4, 0xfeff | |
| 2208 }; | |
| 2209 static const int32_t results2[]={ | |
| 2210 /* number of bytes read, code point */ | |
| 2211 8, 0x0f1000, | |
| 2212 4, 0xfffe | |
| 2213 }; | |
| 2214 static const int32_t results3[]={ | |
| 2215 /* number of bytes read, code point */ | |
| 2216 4, 0xfefe, | |
| 2217 4, 0x100f00, | |
| 2218 4, 0xfffd, /* unmatched surrogate */ | |
| 2219 4, 0xfffd /* unmatched surrogate */ | |
| 2220 }; | |
| 2221 | |
| 2222 const char *source, *limit; | |
| 2223 | |
| 2224 UErrorCode errorCode=U_ZERO_ERROR; | |
| 2225 UConverter *cnv=ucnv_open("UTF-32", &errorCode); | |
| 2226 if(U_FAILURE(errorCode)) { | |
| 2227 log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(erro
rCode)); | |
| 2228 return; | |
| 2229 } | |
| 2230 | |
| 2231 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); | |
| 2232 TestNextUChar(cnv, source, limit, results1, "UTF-32"); | |
| 2233 | |
| 2234 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); | |
| 2235 ucnv_resetToUnicode(cnv); | |
| 2236 TestNextUChar(cnv, source, limit, results2, "UTF-32"); | |
| 2237 | |
| 2238 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); | |
| 2239 ucnv_resetToUnicode(cnv); | |
| 2240 TestNextUChar(cnv, source, limit, results3, "UTF-32"); | |
| 2241 | |
| 2242 /* Test the condition when source >= sourceLimit */ | |
| 2243 ucnv_resetToUnicode(cnv); | |
| 2244 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
| 2245 | |
| 2246 ucnv_close(cnv); | |
| 2247 } | |
| 2248 | |
| 2249 static void | |
| 2250 TestUTF32BE() { | |
| 2251 /* test input */ | |
| 2252 static const uint8_t in[]={ | |
| 2253 0x00, 0x00, 0x00, 0x61, | |
| 2254 0x00, 0x00, 0x30, 0x61, | |
| 2255 0x00, 0x00, 0xdc, 0x00, | |
| 2256 0x00, 0x00, 0xd8, 0x00, | |
| 2257 0x00, 0x00, 0xdf, 0xff, | |
| 2258 0x00, 0x00, 0xff, 0xfe, | |
| 2259 0x00, 0x10, 0xab, 0xcd, | |
| 2260 0x00, 0x10, 0xff, 0xff | |
| 2261 }; | |
| 2262 | |
| 2263 /* expected test results */ | |
| 2264 static const int32_t results[]={ | |
| 2265 /* number of bytes read, code point */ | |
| 2266 4, 0x61, | |
| 2267 4, 0x3061, | |
| 2268 4, 0xfffd, | |
| 2269 4, 0xfffd, | |
| 2270 4, 0xfffd, | |
| 2271 4, 0xfffe, | |
| 2272 4, 0x10abcd, | |
| 2273 4, 0x10ffff | |
| 2274 }; | |
| 2275 | |
| 2276 /* error test input */ | |
| 2277 static const uint8_t in2[]={ | |
| 2278 0x00, 0x00, 0x00, 0x61, | |
| 2279 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ | |
| 2280 0x00, 0x00, 0x00, 0x62, | |
| 2281 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ | |
| 2282 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ | |
| 2283 0x00, 0x00, 0x01, 0x62, | |
| 2284 0x00, 0x00, 0x02, 0x62 | |
| 2285 }; | |
| 2286 | |
| 2287 /* expected error test results */ | |
| 2288 static const int32_t results2[]={ | |
| 2289 /* number of bytes read, code point */ | |
| 2290 4, 0x61, | |
| 2291 8, 0x62, | |
| 2292 12, 0x162, | |
| 2293 4, 0x262 | |
| 2294 }; | |
| 2295 | |
| 2296 UConverterToUCallback cb; | |
| 2297 const void *p; | |
| 2298 | |
| 2299 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
| 2300 UErrorCode errorCode=U_ZERO_ERROR; | |
| 2301 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode); | |
| 2302 if(U_FAILURE(errorCode)) { | |
| 2303 log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(er
rorCode)); | |
| 2304 return; | |
| 2305 } | |
| 2306 TestNextUChar(cnv, source, limit, results, "UTF-32BE"); | |
| 2307 | |
| 2308 /* Test the condition when source >= sourceLimit */ | |
| 2309 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
| 2310 | |
| 2311 /* test error behavior with a skip callback */ | |
| 2312 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode)
; | |
| 2313 source=(const char *)in2; | |
| 2314 limit=(const char *)(in2+sizeof(in2)); | |
| 2315 TestNextUChar(cnv, source, limit, results2, "UTF-32BE"); | |
| 2316 | |
| 2317 ucnv_close(cnv); | |
| 2318 } | |
| 2319 | |
| 2320 static void | |
| 2321 TestUTF32LE() { | |
| 2322 /* test input */ | |
| 2323 static const uint8_t in[]={ | |
| 2324 0x61, 0x00, 0x00, 0x00, | |
| 2325 0x61, 0x30, 0x00, 0x00, | |
| 2326 0x00, 0xdc, 0x00, 0x00, | |
| 2327 0x00, 0xd8, 0x00, 0x00, | |
| 2328 0xff, 0xdf, 0x00, 0x00, | |
| 2329 0xfe, 0xff, 0x00, 0x00, | |
| 2330 0xcd, 0xab, 0x10, 0x00, | |
| 2331 0xff, 0xff, 0x10, 0x00 | |
| 2332 }; | |
| 2333 | |
| 2334 /* expected test results */ | |
| 2335 static const int32_t results[]={ | |
| 2336 /* number of bytes read, code point */ | |
| 2337 4, 0x61, | |
| 2338 4, 0x3061, | |
| 2339 4, 0xfffd, | |
| 2340 4, 0xfffd, | |
| 2341 4, 0xfffd, | |
| 2342 4, 0xfffe, | |
| 2343 4, 0x10abcd, | |
| 2344 4, 0x10ffff | |
| 2345 }; | |
| 2346 | |
| 2347 /* error test input */ | |
| 2348 static const uint8_t in2[]={ | |
| 2349 0x61, 0x00, 0x00, 0x00, | |
| 2350 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ | |
| 2351 0x62, 0x00, 0x00, 0x00, | |
| 2352 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ | |
| 2353 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ | |
| 2354 0x62, 0x01, 0x00, 0x00, | |
| 2355 0x62, 0x02, 0x00, 0x00, | |
| 2356 }; | |
| 2357 | |
| 2358 /* expected error test results */ | |
| 2359 static const int32_t results2[]={ | |
| 2360 /* number of bytes read, code point */ | |
| 2361 4, 0x61, | |
| 2362 8, 0x62, | |
| 2363 12, 0x162, | |
| 2364 4, 0x262, | |
| 2365 }; | |
| 2366 | |
| 2367 UConverterToUCallback cb; | |
| 2368 const void *p; | |
| 2369 | |
| 2370 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
| 2371 UErrorCode errorCode=U_ZERO_ERROR; | |
| 2372 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode); | |
| 2373 if(U_FAILURE(errorCode)) { | |
| 2374 log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(er
rorCode)); | |
| 2375 return; | |
| 2376 } | |
| 2377 TestNextUChar(cnv, source, limit, results, "UTF-32LE"); | |
| 2378 | |
| 2379 /* Test the condition when source >= sourceLimit */ | |
| 2380 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
| 2381 | |
| 2382 /* test error behavior with a skip callback */ | |
| 2383 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode)
; | |
| 2384 source=(const char *)in2; | |
| 2385 limit=(const char *)(in2+sizeof(in2)); | |
| 2386 TestNextUChar(cnv, source, limit, results2, "UTF-32LE"); | |
| 2387 | |
| 2388 ucnv_close(cnv); | |
| 2389 } | |
| 2390 | |
| 2391 static void | |
| 2392 TestLATIN1() { | |
| 2393 /* test input */ | |
| 2394 static const uint8_t in[]={ | |
| 2395 0x61, | |
| 2396 0x31, | |
| 2397 0x32, | |
| 2398 0xc0, | |
| 2399 0xf0, | |
| 2400 0xf4, | |
| 2401 }; | |
| 2402 | |
| 2403 /* expected test results */ | |
| 2404 static const int32_t results[]={ | |
| 2405 /* number of bytes read, code point */ | |
| 2406 1, 0x61, | |
| 2407 1, 0x31, | |
| 2408 1, 0x32, | |
| 2409 1, 0xc0, | |
| 2410 1, 0xf0, | |
| 2411 1, 0xf4, | |
| 2412 }; | |
| 2413 static const uint16_t in1[] = { | |
| 2414 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef,
0x61, 0x1b, 0xe5, 0x84, | |
| 2415 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3,
0x94, 0x08, 0x02, 0x0f, | |
| 2416 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b,
0x6d, 0x41, 0x88, 0x4c, | |
| 2417 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e,
0x6b, 0x4c, 0x08, 0x0d, | |
| 2418 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa,
0x84, 0x08, 0x02, 0x0e, | |
| 2419 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc,
0x9f, 0x0e, 0x79, 0x3e, | |
| 2420 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08,
0x88, 0xbe, 0xa3, 0x8d, | |
| 2421 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08,
0x01, 0x93, 0xc8, 0xaa, | |
| 2422 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae,
0x93, 0xa8, 0xa0, 0x08, | |
| 2423 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80,
0x05, 0xec, 0x60, 0x8d, | |
| 2424 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4,
0xfe, 0xe7, 0xc2, 0x06, | |
| 2425 0xcb, 0x82 | |
| 2426 }; | |
| 2427 static const uint8_t out1[] = { | |
| 2428 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef,
0x61, 0x1b, 0xe5, 0x84, | |
| 2429 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3,
0x94, 0x08, 0x02, 0x0f, | |
| 2430 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b,
0x6d, 0x41, 0x88, 0x4c, | |
| 2431 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e,
0x6b, 0x4c, 0x08, 0x0d, | |
| 2432 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa,
0x84, 0x08, 0x02, 0x0e, | |
| 2433 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc,
0x9f, 0x0e, 0x79, 0x3e, | |
| 2434 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08,
0x88, 0xbe, 0xa3, 0x8d, | |
| 2435 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08,
0x01, 0x93, 0xc8, 0xaa, | |
| 2436 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae,
0x93, 0xa8, 0xa0, 0x08, | |
| 2437 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80,
0x05, 0xec, 0x60, 0x8d, | |
| 2438 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4,
0xfe, 0xe7, 0xc2, 0x06, | |
| 2439 0xcb, 0x82 | |
| 2440 }; | |
| 2441 static const uint16_t in2[]={ | |
| 2442 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, | |
| 2443 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, | |
| 2444 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, | |
| 2445 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, | |
| 2446 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, | |
| 2447 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, | |
| 2448 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, | |
| 2449 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, | |
| 2450 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, | |
| 2451 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, | |
| 2452 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, | |
| 2453 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, | |
| 2454 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, | |
| 2455 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, | |
| 2456 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, | |
| 2457 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, | |
| 2458 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, | |
| 2459 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, | |
| 2460 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, | |
| 2461 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, | |
| 2462 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, | |
| 2463 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, | |
| 2464 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, | |
| 2465 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, | |
| 2466 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, | |
| 2467 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, | |
| 2468 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, | |
| 2469 0x37, 0x20, 0x2A, 0x2F, | |
| 2470 }; | |
| 2471 static const unsigned char out2[]={ | |
| 2472 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, | |
| 2473 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, | |
| 2474 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, | |
| 2475 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, | |
| 2476 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, | |
| 2477 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, | |
| 2478 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, | |
| 2479 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, | |
| 2480 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, | |
| 2481 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, | |
| 2482 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, | |
| 2483 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, | |
| 2484 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, | |
| 2485 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, | |
| 2486 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, | |
| 2487 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, | |
| 2488 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, | |
| 2489 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, | |
| 2490 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, | |
| 2491 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, | |
| 2492 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, | |
| 2493 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, | |
| 2494 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, | |
| 2495 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, | |
| 2496 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, | |
| 2497 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, | |
| 2498 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, | |
| 2499 0x37, 0x20, 0x2A, 0x2F, | |
| 2500 }; | |
| 2501 const char *source=(const char *)in; | |
| 2502 const char *limit=(const char *)in+sizeof(in); | |
| 2503 | |
| 2504 UErrorCode errorCode=U_ZERO_ERROR; | |
| 2505 UConverter *cnv=ucnv_open("LATIN_1", &errorCode); | |
| 2506 if(U_FAILURE(errorCode)) { | |
| 2507 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(err
orCode)); | |
| 2508 return; | |
| 2509 } | |
| 2510 TestNextUChar(cnv, source, limit, results, "LATIN_1"); | |
| 2511 /* Test the condition when source >= sourceLimit */ | |
| 2512 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
| 2513 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof
(out1)); | |
| 2514 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out
2)); | |
| 2515 | |
| 2516 ucnv_close(cnv); | |
| 2517 } | |
| 2518 | |
| 2519 static void | |
| 2520 TestSBCS() { | |
| 2521 /* test input */ | |
| 2522 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4}; | |
| 2523 /* expected test results */ | |
| 2524 static const int32_t results[]={ | |
| 2525 /* number of bytes read, code point */ | |
| 2526 1, 0x61, | |
| 2527 1, 0xbf, | |
| 2528 1, 0xc4, | |
| 2529 1, 0x2021, | |
| 2530 1, 0xf8ff, | |
| 2531 1, 0x00d9 | |
| 2532 }; | |
| 2533 | |
| 2534 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
| 2535 UErrorCode errorCode=U_ZERO_ERROR; | |
| 2536 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode); | |
| 2537 if(U_FAILURE(errorCode)) { | |
| 2538 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_e
rrorName(errorCode)); | |
| 2539 return; | |
| 2540 } | |
| 2541 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)"); | |
| 2542 /* Test the condition when source >= sourceLimit */ | |
| 2543 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
| 2544 /*Test for Illegal character */ /* | |
| 2545 { | |
| 2546 static const uint8_t input1[]={ 0xA1 }; | |
| 2547 const char* illegalsource=(const char*)input1; | |
| 2548 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource),
U_INVALID_CHAR_FOUND, "source has a illegal characte"); | |
| 2549 } | |
| 2550 */ | |
| 2551 ucnv_close(cnv); | |
| 2552 } | |
| 2553 | |
| 2554 static void | |
| 2555 TestDBCS() { | |
| 2556 /* test input */ | |
| 2557 static const uint8_t in[]={ | |
| 2558 0x44, 0x6a, | |
| 2559 0xc4, 0x9c, | |
| 2560 0x7a, 0x74, | |
| 2561 0x46, 0xab, | |
| 2562 0x42, 0x5b, | |
| 2563 | |
| 2564 }; | |
| 2565 | |
| 2566 /* expected test results */ | |
| 2567 static const int32_t results[]={ | |
| 2568 /* number of bytes read, code point */ | |
| 2569 2, 0x00a7, | |
| 2570 2, 0xe1d2, | |
| 2571 2, 0x6962, | |
| 2572 2, 0xf842, | |
| 2573 2, 0xffe5, | |
| 2574 }; | |
| 2575 | |
| 2576 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
| 2577 UErrorCode errorCode=U_ZERO_ERROR; | |
| 2578 | |
| 2579 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode); | |
| 2580 if(U_FAILURE(errorCode)) { | |
| 2581 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorN
ame(errorCode)); | |
| 2582 return; | |
| 2583 } | |
| 2584 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)"); | |
| 2585 /* Test the condition when source >= sourceLimit */ | |
| 2586 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
| 2587 /*Test for the condition where there is an invalid character*/ | |
| 2588 { | |
| 2589 static const uint8_t source2[]={0x1a, 0x1b}; | |
| 2590 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character"); | |
| 2591 } | |
| 2592 /*Test for the condition where we have a truncated char*/ | |
| 2593 { | |
| 2594 static const uint8_t source1[]={0xc4}; | |
| 2595 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); | |
| 2596 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeo
f(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); | |
| 2597 } | |
| 2598 ucnv_close(cnv); | |
| 2599 } | |
| 2600 | |
| 2601 static void | |
| 2602 TestMBCS() { | |
| 2603 /* test input */ | |
| 2604 static const uint8_t in[]={ | |
| 2605 0x01, | |
| 2606 0xa6, 0xa3, | |
| 2607 0x00, | |
| 2608 0xa6, 0xa1, | |
| 2609 0x08, | |
| 2610 0xc2, 0x76, | |
| 2611 0xc2, 0x78, | |
| 2612 | |
| 2613 }; | |
| 2614 | |
| 2615 /* expected test results */ | |
| 2616 static const int32_t results[]={ | |
| 2617 /* number of bytes read, code point */ | |
| 2618 1, 0x0001, | |
| 2619 2, 0x250c, | |
| 2620 1, 0x0000, | |
| 2621 2, 0x2500, | |
| 2622 1, 0x0008, | |
| 2623 2, 0xd60c, | |
| 2624 2, 0xd60e, | |
| 2625 }; | |
| 2626 | |
| 2627 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
| 2628 UErrorCode errorCode=U_ZERO_ERROR; | |
| 2629 | |
| 2630 UConverter *cnv=ucnv_open("ibm-1363", &errorCode); | |
| 2631 if(U_FAILURE(errorCode)) { | |
| 2632 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorN
ame(errorCode)); | |
| 2633 return; | |
| 2634 } | |
| 2635 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)"); | |
| 2636 /* Test the condition when source >= sourceLimit */ | |
| 2637 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
| 2638 /*Test for the condition where there is an invalid character*/ | |
| 2639 { | |
| 2640 static const uint8_t source2[]={0xa1, 0x80}; | |
| 2641 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character"); | |
| 2642 } | |
| 2643 /*Test for the condition where we have a truncated char*/ | |
| 2644 { | |
| 2645 static const uint8_t source1[]={0xc4}; | |
| 2646 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); | |
| 2647 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeo
f(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); | |
| 2648 } | |
| 2649 ucnv_close(cnv); | |
| 2650 | |
| 2651 } | |
| 2652 | |
| 2653 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO | |
| 2654 static void | |
| 2655 TestICCRunout() { | |
| 2656 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1},
:int{0}, "\", "?", :bin{""} } */ | |
| 2657 | |
| 2658 const char *cnvName = "ibm-1363"; | |
| 2659 UErrorCode status = U_ZERO_ERROR; | |
| 2660 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 }; | |
| 2661 /* UChar expectUData[] = { 0x00a1, 0x001a }; */ | |
| 2662 const char *source = sourceData; | |
| 2663 const char *sourceLim = sourceData+sizeof(sourceData); | |
| 2664 UChar c1, c2, c3; | |
| 2665 UConverter *cnv=ucnv_open(cnvName, &status); | |
| 2666 if(U_FAILURE(status)) { | |
| 2667 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(s
tatus)); | |
| 2668 return; | |
| 2669 } | |
| 2670 | |
| 2671 #if 0 | |
| 2672 { | |
| 2673 UChar targetBuf[256]; | |
| 2674 UChar *target = targetBuf; | |
| 2675 UChar *targetLim = target+256; | |
| 2676 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &sta
tus); | |
| 2677 | |
| 2678 log_info("After convert: target@%d, source@%d, status%s\n", | |
| 2679 target-targetBuf, source-sourceData, u_errorName(status)); | |
| 2680 | |
| 2681 if(U_FAILURE(status)) { | |
| 2682 log_err("Failed to convert: %s\n", u_errorName(status)); | |
| 2683 } else { | |
| 2684 | |
| 2685 } | |
| 2686 } | |
| 2687 #endif | |
| 2688 | |
| 2689 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status); | |
| 2690 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_e
rrorName(status)); | |
| 2691 | |
| 2692 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status); | |
| 2693 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_e
rrorName(status)); | |
| 2694 | |
| 2695 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status); | |
| 2696 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_e
rrorName(status)); | |
| 2697 | |
| 2698 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) { | |
| 2699 log_verbose("OK\n"); | |
| 2700 } else { | |
| 2701 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n
"); | |
| 2702 } | |
| 2703 | |
| 2704 ucnv_close(cnv); | |
| 2705 | |
| 2706 } | |
| 2707 #endif | |
| 2708 | |
| 2709 #ifdef U_ENABLE_GENERIC_ISO_2022 | |
| 2710 | |
| 2711 static void | |
| 2712 TestISO_2022() { | |
| 2713 /* test input */ | |
| 2714 static const uint8_t in[]={ | |
| 2715 0x1b, 0x25, 0x42, | |
| 2716 0x31, | |
| 2717 0x32, | |
| 2718 0x61, | |
| 2719 0xc2, 0x80, | |
| 2720 0xe0, 0xa0, 0x80, | |
| 2721 0xf0, 0x90, 0x80, 0x80 | |
| 2722 }; | |
| 2723 | |
| 2724 | |
| 2725 | |
| 2726 /* expected test results */ | |
| 2727 static const int32_t results[]={ | |
| 2728 /* number of bytes read, code point */ | |
| 2729 4, 0x0031, /* 4 bytes including the escape sequence */ | |
| 2730 1, 0x0032, | |
| 2731 1, 0x61, | |
| 2732 2, 0x80, | |
| 2733 3, 0x800, | |
| 2734 4, 0x10000 | |
| 2735 }; | |
| 2736 | |
| 2737 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
| 2738 UErrorCode errorCode=U_ZERO_ERROR; | |
| 2739 UConverter *cnv; | |
| 2740 | |
| 2741 cnv=ucnv_open("ISO_2022", &errorCode); | |
| 2742 if(U_FAILURE(errorCode)) { | |
| 2743 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); | |
| 2744 return; | |
| 2745 } | |
| 2746 TestNextUChar(cnv, source, limit, results, "ISO_2022"); | |
| 2747 | |
| 2748 /* Test the condition when source >= sourceLimit */ | |
| 2749 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceL
imit < source"); | |
| 2750 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
| 2751 /*Test for the condition where we have a truncated char*/ | |
| 2752 { | |
| 2753 static const uint8_t source1[]={0xc4}; | |
| 2754 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); | |
| 2755 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeo
f(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); | |
| 2756 } | |
| 2757 /*Test for the condition where there is an invalid character*/ | |
| 2758 { | |
| 2759 static const uint8_t source2[]={0xa1, 0x01}; | |
| 2760 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character"); | |
| 2761 } | |
| 2762 ucnv_close(cnv); | |
| 2763 } | |
| 2764 | |
| 2765 #endif | |
| 2766 | |
| 2767 static void | |
| 2768 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverte
r* cnv){ | |
| 2769 const UChar* uSource; | |
| 2770 const UChar* uSourceLimit; | |
| 2771 const char* cSource; | |
| 2772 const char* cSourceLimit; | |
| 2773 UChar *uTargetLimit =NULL; | |
| 2774 UChar *uTarget; | |
| 2775 char *cTarget; | |
| 2776 const char *cTargetLimit; | |
| 2777 char *cBuf; | |
| 2778 UChar *uBuf; /*,*test;*/ | |
| 2779 int32_t uBufSize = 120; | |
| 2780 int len=0; | |
| 2781 int i=2; | |
| 2782 UErrorCode errorCode=U_ZERO_ERROR; | |
| 2783 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
| 2784 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); | |
| 2785 ucnv_reset(cnv); | |
| 2786 for(;--i>0; ){ | |
| 2787 uSource = (UChar*) source; | |
| 2788 uSourceLimit=(const UChar*)sourceLimit; | |
| 2789 cTarget = cBuf; | |
| 2790 uTarget = uBuf; | |
| 2791 cSource = cBuf; | |
| 2792 cTargetLimit = cBuf; | |
| 2793 uTargetLimit = uBuf; | |
| 2794 | |
| 2795 do{ | |
| 2796 | |
| 2797 cTargetLimit = cTargetLimit+ i; | |
| 2798 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit
,NULL,FALSE, &errorCode); | |
| 2799 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ | |
| 2800 errorCode=U_ZERO_ERROR; | |
| 2801 continue; | |
| 2802 } | |
| 2803 | |
| 2804 if(U_FAILURE(errorCode)){ | |
| 2805 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorN
ame(errorCode)); | |
| 2806 return; | |
| 2807 } | |
| 2808 | |
| 2809 }while (uSource<uSourceLimit); | |
| 2810 | |
| 2811 cSourceLimit =cTarget; | |
| 2812 do{ | |
| 2813 uTargetLimit=uTargetLimit+i; | |
| 2814 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,
FALSE,&errorCode); | |
| 2815 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ | |
| 2816 errorCode=U_ZERO_ERROR; | |
| 2817 continue; | |
| 2818 } | |
| 2819 if(U_FAILURE(errorCode)){ | |
| 2820 log_err("ucnv_toUnicode conversion failed reason %s\n", u_err
orName(errorCode)); | |
| 2821 return; | |
| 2822 } | |
| 2823 }while(cSource<cSourceLimit); | |
| 2824 | |
| 2825 uSource = source; | |
| 2826 /*test =uBuf;*/ | |
| 2827 for(len=0;len<(int)(source - sourceLimit);len++){ | |
| 2828 if(uBuf[len]!=uSource[len]){ | |
| 2829 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int
)uBuf[len]) ; | |
| 2830 } | |
| 2831 } | |
| 2832 } | |
| 2833 free(uBuf); | |
| 2834 free(cBuf); | |
| 2835 } | |
| 2836 /* Test for Jitterbug 778 */ | |
| 2837 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit
,UConverter* cnv){ | |
| 2838 const UChar* uSource; | |
| 2839 const UChar* uSourceLimit; | |
| 2840 const char* cSource; | |
| 2841 UChar *uTargetLimit =NULL; | |
| 2842 UChar *uTarget; | |
| 2843 char *cTarget; | |
| 2844 const char *cTargetLimit; | |
| 2845 char *cBuf; | |
| 2846 UChar *uBuf,*test; | |
| 2847 int32_t uBufSize = 120; | |
| 2848 int numCharsInTarget=0; | |
| 2849 UErrorCode errorCode=U_ZERO_ERROR; | |
| 2850 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
| 2851 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
| 2852 uSource = source; | |
| 2853 uSourceLimit=sourceLimit; | |
| 2854 cTarget = cBuf; | |
| 2855 cTargetLimit = cBuf +uBufSize*5; | |
| 2856 uTarget = uBuf; | |
| 2857 uTargetLimit = uBuf+ uBufSize*5; | |
| 2858 ucnv_reset(cnv); | |
| 2859 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarge
t), uSource, (int32_t)(uSourceLimit-uSource), &errorCode); | |
| 2860 if(U_FAILURE(errorCode)){ | |
| 2861 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
| 2862 return; | |
| 2863 } | |
| 2864 cSource = cBuf; | |
| 2865 test =uBuf; | |
| 2866 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsIn
Target,&errorCode); | |
| 2867 if(U_FAILURE(errorCode)){ | |
| 2868 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(erro
rCode)); | |
| 2869 return; | |
| 2870 } | |
| 2871 uSource = source; | |
| 2872 while(uSource<uSourceLimit){ | |
| 2873 if(*test!=*uSource){ | |
| 2874 | |
| 2875 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; | |
| 2876 } | |
| 2877 uSource++; | |
| 2878 test++; | |
| 2879 } | |
| 2880 free(uBuf); | |
| 2881 free(cBuf); | |
| 2882 } | |
| 2883 | |
| 2884 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLim
it,UConverter* cnv){ | |
| 2885 const UChar* uSource; | |
| 2886 const UChar* uSourceLimit; | |
| 2887 const char* cSource; | |
| 2888 const char* cSourceLimit; | |
| 2889 UChar *uTargetLimit =NULL; | |
| 2890 UChar *uTarget; | |
| 2891 char *cTarget; | |
| 2892 const char *cTargetLimit; | |
| 2893 char *cBuf; | |
| 2894 UChar *uBuf; /*,*test;*/ | |
| 2895 int32_t uBufSize = 120; | |
| 2896 int len=0; | |
| 2897 int i=2; | |
| 2898 const UChar *temp = sourceLimit; | |
| 2899 UErrorCode errorCode=U_ZERO_ERROR; | |
| 2900 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
| 2901 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); | |
| 2902 | |
| 2903 ucnv_reset(cnv); | |
| 2904 for(;--i>0;){ | |
| 2905 uSource = (UChar*) source; | |
| 2906 cTarget = cBuf; | |
| 2907 uTarget = uBuf; | |
| 2908 cSource = cBuf; | |
| 2909 cTargetLimit = cBuf; | |
| 2910 uTargetLimit = uBuf+uBufSize*5; | |
| 2911 cTargetLimit = cTargetLimit+uBufSize*10; | |
| 2912 uSourceLimit=uSource; | |
| 2913 do{ | |
| 2914 | |
| 2915 if (uSourceLimit < sourceLimit) { | |
| 2916 uSourceLimit = uSourceLimit+1; | |
| 2917 } | |
| 2918 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit
,NULL,FALSE, &errorCode); | |
| 2919 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ | |
| 2920 errorCode=U_ZERO_ERROR; | |
| 2921 continue; | |
| 2922 } | |
| 2923 | |
| 2924 if(U_FAILURE(errorCode)){ | |
| 2925 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorN
ame(errorCode)); | |
| 2926 return; | |
| 2927 } | |
| 2928 | |
| 2929 }while (uSource<temp); | |
| 2930 | |
| 2931 cSourceLimit =cBuf; | |
| 2932 do{ | |
| 2933 if (cSourceLimit < cBuf + (cTarget - cBuf)) { | |
| 2934 cSourceLimit = cSourceLimit+1; | |
| 2935 } | |
| 2936 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,
FALSE,&errorCode); | |
| 2937 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ | |
| 2938 errorCode=U_ZERO_ERROR; | |
| 2939 continue; | |
| 2940 } | |
| 2941 if(U_FAILURE(errorCode)){ | |
| 2942 log_err("ucnv_toUnicode conversion failed reason %s\n", u_err
orName(errorCode)); | |
| 2943 return; | |
| 2944 } | |
| 2945 }while(cSource<cTarget); | |
| 2946 | |
| 2947 uSource = source; | |
| 2948 /*test =uBuf;*/ | |
| 2949 for(;len<(int)(source - sourceLimit);len++){ | |
| 2950 if(uBuf[len]!=uSource[len]){ | |
| 2951 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int
)uBuf[len]) ; | |
| 2952 } | |
| 2953 } | |
| 2954 } | |
| 2955 free(uBuf); | |
| 2956 free(cBuf); | |
| 2957 } | |
| 2958 static void | |
| 2959 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit, | |
| 2960 const uint16_t results[], const char* message){ | |
| 2961 /* const char* s0; */ | |
| 2962 const char* s=(char*)source; | |
| 2963 const uint16_t *r=results; | |
| 2964 UErrorCode errorCode=U_ZERO_ERROR; | |
| 2965 uint32_t c,exC; | |
| 2966 ucnv_reset(cnv); | |
| 2967 while(s<limit) { | |
| 2968 /* s0=s; */ | |
| 2969 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); | |
| 2970 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { | |
| 2971 break; /* no more significant input */ | |
| 2972 } else if(U_FAILURE(errorCode)) { | |
| 2973 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(
errorCode)); | |
| 2974 break; | |
| 2975 } else { | |
| 2976 if(U16_IS_LEAD(*r)){ | |
| 2977 int i =0, len = 2; | |
| 2978 U16_NEXT(r, i, len, exC); | |
| 2979 r++; | |
| 2980 }else{ | |
| 2981 exC = *r; | |
| 2982 } | |
| 2983 if(c!=(uint32_t)(exC)) | |
| 2984 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X
\n",message,(uint32_t) (*r),c); | |
| 2985 } | |
| 2986 r++; | |
| 2987 } | |
| 2988 } | |
| 2989 | |
| 2990 static int TestJitterbug930(const char* enc){ | |
| 2991 UErrorCode err = U_ZERO_ERROR; | |
| 2992 UConverter*converter; | |
| 2993 char out[80]; | |
| 2994 char*target = out; | |
| 2995 UChar in[4]; | |
| 2996 const UChar*source = in; | |
| 2997 int32_t off[80]; | |
| 2998 int32_t* offsets = off; | |
| 2999 int numOffWritten=0; | |
| 3000 UBool flush = 0; | |
| 3001 converter = my_ucnv_open(enc, &err); | |
| 3002 | |
| 3003 in[0] = 0x41; /* 0x4E00;*/ | |
| 3004 in[1] = 0x4E01; | |
| 3005 in[2] = 0x4E02; | |
| 3006 in[3] = 0x4E03; | |
| 3007 | |
| 3008 memset(off, '*', sizeof(off)); | |
| 3009 | |
| 3010 ucnv_fromUnicode (converter, | |
| 3011 &target, | |
| 3012 target+2, | |
| 3013 &source, | |
| 3014 source+3, | |
| 3015 offsets, | |
| 3016 flush, | |
| 3017 &err); | |
| 3018 | |
| 3019 /* writes three bytes into the output buffer: 41 1B 24 | |
| 3020 * but offsets contains 0 1 1 | |
| 3021 */ | |
| 3022 while(*offsets< off[10]){ | |
| 3023 numOffWritten++; | |
| 3024 offsets++; | |
| 3025 } | |
| 3026 log_verbose("Testing Jitterbug 930 for encoding %s",enc); | |
| 3027 if(numOffWritten!= (int)(target-out)){ | |
| 3028 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",en
c, (int)(target-out),numOffWritten); | |
| 3029 } | |
| 3030 | |
| 3031 err = U_ZERO_ERROR; | |
| 3032 | |
| 3033 memset(off,'*' , sizeof(off)); | |
| 3034 | |
| 3035 flush = 1; | |
| 3036 offsets=off; | |
| 3037 ucnv_fromUnicode (converter, | |
| 3038 &target, | |
| 3039 target+4, | |
| 3040 &source, | |
| 3041 source, | |
| 3042 offsets, | |
| 3043 flush, | |
| 3044 &err); | |
| 3045 numOffWritten=0; | |
| 3046 while(*offsets< off[10]){ | |
| 3047 numOffWritten++; | |
| 3048 if(*offsets!= -1){ | |
| 3049 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i
",enc,-1,*offsets) ; | |
| 3050 } | |
| 3051 offsets++; | |
| 3052 } | |
| 3053 | |
| 3054 /* writes 42 43 7A into output buffer, | |
| 3055 * offsets contains -1 -1 -1 | |
| 3056 */ | |
| 3057 ucnv_close(converter); | |
| 3058 return 0; | |
| 3059 } | |
| 3060 | |
| 3061 static void | |
| 3062 TestHZ() { | |
| 3063 /* test input */ | |
| 3064 static const uint16_t in[]={ | |
| 3065 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x30
05, 0x2014, | |
| 3066 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73
BB, 0x83E0, | |
| 3067 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94
C2, 0x7B94, | |
| 3068 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A
73, 0x6355, | |
| 3069 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C
3F, 0x90E8, | |
| 3070 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x24
95, 0x2496, | |
| 3071 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x24
76, 0x2477, | |
| 3072 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x24
7F, 0x2480, | |
| 3073 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x00
46, 0x007E, | |
| 3074 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x00
4F, 0x0050, | |
| 3075 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x00
58, 0x0059, | |
| 3076 0x005A, 0x005B, 0x005C, 0x000A | |
| 3077 }; | |
| 3078 const UChar* uSource; | |
| 3079 const UChar* uSourceLimit; | |
| 3080 const char* cSource; | |
| 3081 const char* cSourceLimit; | |
| 3082 UChar *uTargetLimit =NULL; | |
| 3083 UChar *uTarget; | |
| 3084 char *cTarget; | |
| 3085 const char *cTargetLimit; | |
| 3086 char *cBuf; | |
| 3087 UChar *uBuf,*test; | |
| 3088 int32_t uBufSize = 120; | |
| 3089 UErrorCode errorCode=U_ZERO_ERROR; | |
| 3090 UConverter *cnv; | |
| 3091 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
| 3092 int32_t* myOff= offsets; | |
| 3093 cnv=ucnv_open("HZ", &errorCode); | |
| 3094 if(U_FAILURE(errorCode)) { | |
| 3095 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode)
); | |
| 3096 return; | |
| 3097 } | |
| 3098 | |
| 3099 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
| 3100 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
| 3101 uSource = (const UChar*)in; | |
| 3102 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); | |
| 3103 cTarget = cBuf; | |
| 3104 cTargetLimit = cBuf +uBufSize*5; | |
| 3105 uTarget = uBuf; | |
| 3106 uTargetLimit = uBuf+ uBufSize*5; | |
| 3107 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); | |
| 3108 if(U_FAILURE(errorCode)){ | |
| 3109 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
| 3110 return; | |
| 3111 } | |
| 3112 cSource = cBuf; | |
| 3113 cSourceLimit =cTarget; | |
| 3114 test =uBuf; | |
| 3115 myOff=offsets; | |
| 3116 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); | |
| 3117 if(U_FAILURE(errorCode)){ | |
| 3118 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); | |
| 3119 return; | |
| 3120 } | |
| 3121 uSource = (const UChar*)in; | |
| 3122 while(uSource<uSourceLimit){ | |
| 3123 if(*test!=*uSource){ | |
| 3124 | |
| 3125 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; | |
| 3126 } | |
| 3127 uSource++; | |
| 3128 test++; | |
| 3129 } | |
| 3130 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding"); | |
| 3131 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 3132 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 3133 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 3134 TestJitterbug930("csISO2022JP"); | |
| 3135 ucnv_close(cnv); | |
| 3136 free(offsets); | |
| 3137 free(uBuf); | |
| 3138 free(cBuf); | |
| 3139 } | |
| 3140 | |
| 3141 static void | |
| 3142 TestISCII(){ | |
| 3143 /* test input */ | |
| 3144 static const uint16_t in[]={ | |
| 3145 /* test full range of Devanagari */ | |
| 3146 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A, | |
| 3147 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911, | |
| 3148 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D, | |
| 3149 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926, | |
| 3150 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F, | |
| 3151 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937, | |
| 3152 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943, | |
| 3153 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D, | |
| 3154 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C, | |
| 3155 0x096D,0x096E,0x096F, | |
| 3156 /* test Soft halant*/ | |
| 3157 0x0915,0x094d, 0x200D, | |
| 3158 /* test explicit halant */ | |
| 3159 0x0915,0x094d, 0x200c, | |
| 3160 /* test double danda */ | |
| 3161 0x965, | |
| 3162 /* test ASCII */ | |
| 3163 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, | |
| 3164 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, | |
| 3165 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, | |
| 3166 /* tests from Lotus */ | |
| 3167 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043, | |
| 3168 0x0930,0x094D,0x200D, | |
| 3169 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043, | |
| 3170 0x0915,0x0921,0x002B,0x095F, | |
| 3171 /* tamil range */ | |
| 3172 0x0B86, 0xB87, 0xB88, | |
| 3173 /* telugu range */ | |
| 3174 0x0C05, 0x0C02, 0x0C03,0x0c31, | |
| 3175 /* kannada range */ | |
| 3176 0x0C85, 0xC82, 0x0C83, | |
| 3177 /* test Abbr sign and Anudatta */ | |
| 3178 0x0970, 0x952, | |
| 3179 /* 0x0958, | |
| 3180 0x0959, | |
| 3181 0x095A, | |
| 3182 0x095B, | |
| 3183 0x095C, | |
| 3184 0x095D, | |
| 3185 0x095E, | |
| 3186 0x095F,*/ | |
| 3187 0x0960 /* Vocallic RRI 0xAB, 0xE9*/, | |
| 3188 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */, | |
| 3189 0x090C , | |
| 3190 0x0962, | |
| 3191 0x0961 /* Vocallic LL 0xa6, 0xE9 */, | |
| 3192 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */, | |
| 3193 0x0950 /* OM Symbol 0xa1, 0xE9,*/, | |
| 3194 0x093D /* Avagraha 0xEA, 0xE9*/, | |
| 3195 0x0958, | |
| 3196 0x0959, | |
| 3197 0x095A, | |
| 3198 0x095B, | |
| 3199 0x095C, | |
| 3200 0x095D, | |
| 3201 0x095E, | |
| 3202 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0 | |
| 3203 }; | |
| 3204 static const unsigned char byteArr[]={ | |
| 3205 | |
| 3206 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9, | |
| 3207 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2, | |
| 3208 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb, | |
| 3209 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4, | |
| 3210 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd, | |
| 3211 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, | |
| 3212 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, | |
| 3213 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8, | |
| 3214 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7, | |
| 3215 0xf8,0xf9,0xfa, | |
| 3216 /* test soft halant */ | |
| 3217 0xb3, 0xE8, 0xE9, | |
| 3218 /* test explicit halant */ | |
| 3219 0xb3, 0xE8, 0xE8, | |
| 3220 /* test double danda */ | |
| 3221 0xea, 0xea, | |
| 3222 /* test ASCII */ | |
| 3223 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, | |
| 3224 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, | |
| 3225 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, | |
| 3226 /* test ATR code */ | |
| 3227 | |
| 3228 /* tests from Lotus */ | |
| 3229 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43, | |
| 3230 0xEF,0x42,0xCF,0xE8,0xD9, | |
| 3231 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43, | |
| 3232 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE, | |
| 3233 /* tamil range */ | |
| 3234 0xEF, 0x44, 0xa5, 0xa6, 0xa7, | |
| 3235 /* telugu range */ | |
| 3236 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0, | |
| 3237 /* kannada range */ | |
| 3238 0xEF, 0x48,0xa4, 0xa2, 0xa3, | |
| 3239 /* anudatta and abbreviation sign */ | |
| 3240 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8, | |
| 3241 | |
| 3242 | |
| 3243 0xAA, 0xE9,/* RI + NUKTA 0x0960*/ | |
| 3244 | |
| 3245 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/ | |
| 3246 | |
| 3247 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/ | |
| 3248 | |
| 3249 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/ | |
| 3250 | |
| 3251 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/ | |
| 3252 | |
| 3253 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/ | |
| 3254 | |
| 3255 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/ | |
| 3256 | |
| 3257 0xEA, 0xE9, /* Danda + Nukta 0x093D*/ | |
| 3258 | |
| 3259 0xB3, 0xE9, /* Ka + NUKTA */ | |
| 3260 | |
| 3261 0xB4, 0xE9, /* Kha + NUKTA */ | |
| 3262 | |
| 3263 0xB5, 0xE9, /* Ga + NUKTA */ | |
| 3264 | |
| 3265 0xBA, 0xE9, | |
| 3266 | |
| 3267 0xBF, 0xE9, | |
| 3268 | |
| 3269 0xC0, 0xE9, | |
| 3270 | |
| 3271 0xC9, 0xE9, | |
| 3272 /* INV halant RA */ | |
| 3273 0xD9, 0xE8, 0xCF, | |
| 3274 0x00, 0x00A0, | |
| 3275 /* just consume unhandled codepoints */ | |
| 3276 0xEF, 0x30, | |
| 3277 | |
| 3278 }; | |
| 3279 testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-i
scii-de",NULL,TRUE); | |
| 3280 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof
(byteArr)); | |
| 3281 | |
| 3282 } | |
| 3283 | |
| 3284 static void | |
| 3285 TestISO_2022_JP() { | |
| 3286 /* test input */ | |
| 3287 static const uint16_t in[]={ | |
| 3288 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A, | |
| 3289 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D,
0x000A, | |
| 3290 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D,
0x000A, | |
| 3291 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D,
0x000A, | |
| 3292 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, | |
| 3293 0x201D, 0x3014, 0x000D, 0x000A, | |
| 3294 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D,
0x000A, | |
| 3295 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D,
0x000A, | |
| 3296 }; | |
| 3297 const UChar* uSource; | |
| 3298 const UChar* uSourceLimit; | |
| 3299 const char* cSource; | |
| 3300 const char* cSourceLimit; | |
| 3301 UChar *uTargetLimit =NULL; | |
| 3302 UChar *uTarget; | |
| 3303 char *cTarget; | |
| 3304 const char *cTargetLimit; | |
| 3305 char *cBuf; | |
| 3306 UChar *uBuf,*test; | |
| 3307 int32_t uBufSize = 120; | |
| 3308 UErrorCode errorCode=U_ZERO_ERROR; | |
| 3309 UConverter *cnv; | |
| 3310 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
| 3311 int32_t* myOff= offsets; | |
| 3312 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); | |
| 3313 if(U_FAILURE(errorCode)) { | |
| 3314 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorN
ame(errorCode)); | |
| 3315 return; | |
| 3316 } | |
| 3317 | |
| 3318 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
| 3319 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
| 3320 uSource = (const UChar*)in; | |
| 3321 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); | |
| 3322 cTarget = cBuf; | |
| 3323 cTargetLimit = cBuf +uBufSize*5; | |
| 3324 uTarget = uBuf; | |
| 3325 uTargetLimit = uBuf+ uBufSize*5; | |
| 3326 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); | |
| 3327 if(U_FAILURE(errorCode)){ | |
| 3328 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
| 3329 return; | |
| 3330 } | |
| 3331 cSource = cBuf; | |
| 3332 cSourceLimit =cTarget; | |
| 3333 test =uBuf; | |
| 3334 myOff=offsets; | |
| 3335 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); | |
| 3336 if(U_FAILURE(errorCode)){ | |
| 3337 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); | |
| 3338 return; | |
| 3339 } | |
| 3340 | |
| 3341 uSource = (const UChar*)in; | |
| 3342 while(uSource<uSourceLimit){ | |
| 3343 if(*test!=*uSource){ | |
| 3344 | |
| 3345 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; | |
| 3346 } | |
| 3347 uSource++; | |
| 3348 test++; | |
| 3349 } | |
| 3350 | |
| 3351 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 3352 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 3353 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding"); | |
| 3354 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 3355 TestJitterbug930("csISO2022JP"); | |
| 3356 ucnv_close(cnv); | |
| 3357 free(uBuf); | |
| 3358 free(cBuf); | |
| 3359 free(offsets); | |
| 3360 } | |
| 3361 | |
| 3362 static void TestConv(const uint16_t in[],int len, const char* conv, const char*
lang, char byteArr[],int byteArrLen){ | |
| 3363 const UChar* uSource; | |
| 3364 const UChar* uSourceLimit; | |
| 3365 const char* cSource; | |
| 3366 const char* cSourceLimit; | |
| 3367 UChar *uTargetLimit =NULL; | |
| 3368 UChar *uTarget; | |
| 3369 char *cTarget; | |
| 3370 const char *cTargetLimit; | |
| 3371 char *cBuf; | |
| 3372 UChar *uBuf,*test; | |
| 3373 int32_t uBufSize = 120*10; | |
| 3374 UErrorCode errorCode=U_ZERO_ERROR; | |
| 3375 UConverter *cnv; | |
| 3376 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) ); | |
| 3377 int32_t* myOff= offsets; | |
| 3378 cnv=my_ucnv_open(conv, &errorCode); | |
| 3379 if(U_FAILURE(errorCode)) { | |
| 3380 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(er
rorCode)); | |
| 3381 return; | |
| 3382 } | |
| 3383 | |
| 3384 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)); | |
| 3385 cBuf =(char*)malloc(uBufSize * sizeof(char)); | |
| 3386 uSource = (const UChar*)in; | |
| 3387 uSourceLimit=uSource+len; | |
| 3388 cTarget = cBuf; | |
| 3389 cTargetLimit = cBuf +uBufSize; | |
| 3390 uTarget = uBuf; | |
| 3391 uTargetLimit = uBuf+ uBufSize; | |
| 3392 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); | |
| 3393 if(U_FAILURE(errorCode)){ | |
| 3394 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
| 3395 return; | |
| 3396 } | |
| 3397 /*log_verbose("length of compressed string for language %s using %s:%i \n",c
onv,lang,(cTarget-cBuf));*/ | |
| 3398 cSource = cBuf; | |
| 3399 cSourceLimit =cTarget; | |
| 3400 test =uBuf; | |
| 3401 myOff=offsets; | |
| 3402 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); | |
| 3403 if(U_FAILURE(errorCode)){ | |
| 3404 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(er
rorCode)); | |
| 3405 return; | |
| 3406 } | |
| 3407 | |
| 3408 uSource = (const UChar*)in; | |
| 3409 while(uSource<uSourceLimit){ | |
| 3410 if(*test!=*uSource){ | |
| 3411 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",con
v,*uSource,(int)*test) ; | |
| 3412 } | |
| 3413 uSource++; | |
| 3414 test++; | |
| 3415 } | |
| 3416 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv); | |
| 3417 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv); | |
| 3418 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv); | |
| 3419 if(byteArr && byteArrLen!=0){ | |
| 3420 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang); | |
| 3421 TestToAndFromUChars(in,(const UChar*)&in[len],cnv); | |
| 3422 { | |
| 3423 cSource = byteArr; | |
| 3424 cSourceLimit = cSource+byteArrLen; | |
| 3425 test=uBuf; | |
| 3426 myOff = offsets; | |
| 3427 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff
,TRUE,&errorCode); | |
| 3428 if(U_FAILURE(errorCode)){ | |
| 3429 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorN
ame(errorCode)); | |
| 3430 return; | |
| 3431 } | |
| 3432 | |
| 3433 uSource = (const UChar*)in; | |
| 3434 while(uSource<uSourceLimit){ | |
| 3435 if(*test!=*uSource){ | |
| 3436 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int
)*test) ; | |
| 3437 } | |
| 3438 uSource++; | |
| 3439 test++; | |
| 3440 } | |
| 3441 } | |
| 3442 } | |
| 3443 | |
| 3444 ucnv_close(cnv); | |
| 3445 free(uBuf); | |
| 3446 free(cBuf); | |
| 3447 free(offsets); | |
| 3448 } | |
| 3449 static UChar U_CALLCONV | |
| 3450 _charAt(int32_t offset, void *context) { | |
| 3451 return ((char*)context)[offset]; | |
| 3452 } | |
| 3453 | |
| 3454 static int32_t | |
| 3455 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *s
tatus){ | |
| 3456 int32_t srcIndex=0; | |
| 3457 int32_t dstIndex=0; | |
| 3458 if(U_FAILURE(*status)){ | |
| 3459 return 0; | |
| 3460 } | |
| 3461 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){ | |
| 3462 *status = U_ILLEGAL_ARGUMENT_ERROR; | |
| 3463 return 0; | |
| 3464 } | |
| 3465 if(srcLen==-1){ | |
| 3466 srcLen = (int32_t)uprv_strlen(src); | |
| 3467 } | |
| 3468 | |
| 3469 for (; srcIndex<srcLen; ) { | |
| 3470 UChar32 c = src[srcIndex++]; | |
| 3471 if (c == 0x005C /*'\\'*/) { | |
| 3472 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i
*/ | |
| 3473 if (c == (UChar32)0xFFFFFFFF) { | |
| 3474 *status=U_INVALID_CHAR_FOUND; /* return empty string */ | |
| 3475 break; /* invalid escape sequence */ | |
| 3476 } | |
| 3477 } | |
| 3478 if(dstIndex < dstLen){ | |
| 3479 if(c>0xFFFF){ | |
| 3480 dst[dstIndex++] = U16_LEAD(c); | |
| 3481 if(dstIndex<dstLen){ | |
| 3482 dst[dstIndex]=U16_TRAIL(c); | |
| 3483 }else{ | |
| 3484 *status=U_BUFFER_OVERFLOW_ERROR; | |
| 3485 } | |
| 3486 }else{ | |
| 3487 dst[dstIndex]=(UChar)c; | |
| 3488 } | |
| 3489 | |
| 3490 }else{ | |
| 3491 *status = U_BUFFER_OVERFLOW_ERROR; | |
| 3492 } | |
| 3493 dstIndex++; /* for preflighting */ | |
| 3494 } | |
| 3495 return dstIndex; | |
| 3496 } | |
| 3497 | |
| 3498 static void | |
| 3499 TestFullRoundtrip(const char* cp){ | |
| 3500 UChar usource[10] ={0}; | |
| 3501 UChar nsrc[10] = {0}; | |
| 3502 uint32_t i=1; | |
| 3503 int len=0, ulen; | |
| 3504 nsrc[0]=0x0061; | |
| 3505 /* Test codepoint 0 */ | |
| 3506 TestConv(usource,1,cp,"",NULL,0); | |
| 3507 TestConv(usource,2,cp,"",NULL,0); | |
| 3508 nsrc[2]=0x5555; | |
| 3509 TestConv(nsrc,3,cp,"",NULL,0); | |
| 3510 | |
| 3511 for(;i<=0x10FFFF;i++){ | |
| 3512 if(i==0xD800){ | |
| 3513 i=0xDFFF; | |
| 3514 continue; | |
| 3515 } | |
| 3516 if(i<=0xFFFF){ | |
| 3517 usource[0] =(UChar) i; | |
| 3518 len=1; | |
| 3519 }else{ | |
| 3520 usource[0]=U16_LEAD(i); | |
| 3521 usource[1]=U16_TRAIL(i); | |
| 3522 len=2; | |
| 3523 } | |
| 3524 ulen=len; | |
| 3525 if(i==0x80) { | |
| 3526 usource[2]=0; | |
| 3527 } | |
| 3528 /* Test only single code points */ | |
| 3529 TestConv(usource,ulen,cp,"",NULL,0); | |
| 3530 /* Test codepoint repeated twice */ | |
| 3531 usource[ulen]=usource[0]; | |
| 3532 usource[ulen+1]=usource[1]; | |
| 3533 ulen+=len; | |
| 3534 TestConv(usource,ulen,cp,"",NULL,0); | |
| 3535 /* Test codepoint repeated 3 times */ | |
| 3536 usource[ulen]=usource[0]; | |
| 3537 usource[ulen+1]=usource[1]; | |
| 3538 ulen+=len; | |
| 3539 TestConv(usource,ulen,cp,"",NULL,0); | |
| 3540 /* Test codepoint in between 2 codepoints */ | |
| 3541 nsrc[1]=usource[0]; | |
| 3542 nsrc[2]=usource[1]; | |
| 3543 nsrc[len+1]=0x5555; | |
| 3544 TestConv(nsrc,len+2,cp,"",NULL,0); | |
| 3545 uprv_memset(usource,0,sizeof(UChar)*10); | |
| 3546 } | |
| 3547 } | |
| 3548 | |
| 3549 static void | |
| 3550 TestRoundTrippingAllUTF(void){ | |
| 3551 if(!getTestOption(QUICK_OPTION)){ | |
| 3552 log_verbose("Running exhaustive round trip test for BOCU-1\n"); | |
| 3553 TestFullRoundtrip("BOCU-1"); | |
| 3554 log_verbose("Running exhaustive round trip test for SCSU\n"); | |
| 3555 TestFullRoundtrip("SCSU"); | |
| 3556 log_verbose("Running exhaustive round trip test for UTF-8\n"); | |
| 3557 TestFullRoundtrip("UTF-8"); | |
| 3558 log_verbose("Running exhaustive round trip test for CESU-8\n"); | |
| 3559 TestFullRoundtrip("CESU-8"); | |
| 3560 log_verbose("Running exhaustive round trip test for UTF-16BE\n"); | |
| 3561 TestFullRoundtrip("UTF-16BE"); | |
| 3562 log_verbose("Running exhaustive round trip test for UTF-16LE\n"); | |
| 3563 TestFullRoundtrip("UTF-16LE"); | |
| 3564 log_verbose("Running exhaustive round trip test for UTF-16\n"); | |
| 3565 TestFullRoundtrip("UTF-16"); | |
| 3566 log_verbose("Running exhaustive round trip test for UTF-32BE\n"); | |
| 3567 TestFullRoundtrip("UTF-32BE"); | |
| 3568 log_verbose("Running exhaustive round trip test for UTF-32LE\n"); | |
| 3569 TestFullRoundtrip("UTF-32LE"); | |
| 3570 log_verbose("Running exhaustive round trip test for UTF-32\n"); | |
| 3571 TestFullRoundtrip("UTF-32"); | |
| 3572 log_verbose("Running exhaustive round trip test for UTF-7\n"); | |
| 3573 TestFullRoundtrip("UTF-7"); | |
| 3574 log_verbose("Running exhaustive round trip test for UTF-7\n"); | |
| 3575 TestFullRoundtrip("UTF-7,version=1"); | |
| 3576 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n"
); | |
| 3577 TestFullRoundtrip("IMAP-mailbox-name"); | |
| 3578 /* | |
| 3579 * | |
| 3580 * With the update to GB18030 2005 (Ticket #8274), this test will fail b
ecause the 2005 version of | |
| 3581 * GB18030 contains mappings to actual Unicode codepoints (which were pr
eviously mapped to PUA). | |
| 3582 * The old mappings remain as fallbacks. | |
| 3583 * This test may be reintroduced at a later time. | |
| 3584 * | |
| 3585 * 110118 - mow | |
| 3586 */ | |
| 3587 /* | |
| 3588 log_verbose("Running exhaustive round trip test for GB18030\n"); | |
| 3589 TestFullRoundtrip("GB18030"); | |
| 3590 */ | |
| 3591 } | |
| 3592 } | |
| 3593 | |
| 3594 static void | |
| 3595 TestSCSU() { | |
| 3596 | |
| 3597 static const uint16_t germanUTF16[]={ | |
| 3598 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 | |
| 3599 }; | |
| 3600 | |
| 3601 static const uint8_t germanSCSU[]={ | |
| 3602 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74 | |
| 3603 }; | |
| 3604 | |
| 3605 static const uint16_t russianUTF16[]={ | |
| 3606 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 | |
| 3607 }; | |
| 3608 | |
| 3609 static const uint8_t russianSCSU[]={ | |
| 3610 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0 | |
| 3611 }; | |
| 3612 | |
| 3613 static const uint16_t japaneseUTF16[]={ | |
| 3614 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, | |
| 3615 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, | |
| 3616 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, | |
| 3617 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, | |
| 3618 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, | |
| 3619 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, | |
| 3620 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, | |
| 3621 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, | |
| 3622 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, | |
| 3623 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, | |
| 3624 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, | |
| 3625 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, | |
| 3626 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, | |
| 3627 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, | |
| 3628 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 | |
| 3629 }; | |
| 3630 | |
| 3631 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of
one different choice: | |
| 3632 it uses an SQn once where a longer look-ahead could have shown that SCn is
more efficient */ | |
| 3633 static const uint8_t japaneseSCSU[]={ | |
| 3634 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef,
0x61, 0x1b, 0xe5, 0x84, | |
| 3635 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3,
0x94, 0x08, 0x02, 0x0f, | |
| 3636 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b,
0x6d, 0x41, 0x88, 0x4c, | |
| 3637 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e,
0x6b, 0x4c, 0x08, 0x0d, | |
| 3638 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa,
0x84, 0x08, 0x02, 0x0e, | |
| 3639 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc,
0x9f, 0x0e, 0x79, 0x3e, | |
| 3640 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08,
0x88, 0xbe, 0xa3, 0x8d, | |
| 3641 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08,
0x01, 0x93, 0xc8, 0xaa, | |
| 3642 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae,
0x93, 0xa8, 0xa0, 0x08, | |
| 3643 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80,
0x05, 0xec, 0x60, 0x8d, | |
| 3644 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4,
0xfe, 0xe7, 0xc2, 0x06, | |
| 3645 0xcb, 0x82 | |
| 3646 }; | |
| 3647 | |
| 3648 static const uint16_t allFeaturesUTF16[]={ | |
| 3649 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, | |
| 3650 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, | |
| 3651 0x01df, 0xf000, 0xdbff, 0xdfff | |
| 3652 }; | |
| 3653 | |
| 3654 /* see comment at japaneseSCSU: the same kind of different choice yields a s
lightly shorter | |
| 3655 * result here (34B vs. 35B) | |
| 3656 */ | |
| 3657 static const uint8_t allFeaturesSCSU[]={ | |
| 3658 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03, | |
| 3659 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a, | |
| 3660 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13, | |
| 3661 0xdf, 0x14, 0x80, 0x15, 0xff | |
| 3662 }; | |
| 3663 static const uint16_t monkeyIn[]={ | |
| 3664 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D,
0x000A, | |
| 3665 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D,
0x000A, | |
| 3666 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D,
0x000A, | |
| 3667 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D,
0x000A, | |
| 3668 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D,
0x000A, | |
| 3669 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D,
0x000A, | |
| 3670 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D,
0x000A, | |
| 3671 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D,
0x000A, | |
| 3672 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D,
0x000A, | |
| 3673 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D,
0x000A, | |
| 3674 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D,
0x000A, | |
| 3675 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D,
0x000A, | |
| 3676 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D,
0x000A, | |
| 3677 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D,
0x000A, | |
| 3678 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D,
0x000A, | |
| 3679 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D,
0x000A, | |
| 3680 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D,
0x000A, | |
| 3681 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D,
0x000A, | |
| 3682 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D,
0x000A, | |
| 3683 /* test non-BMP code points */ | |
| 3684 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869,
0xDE9F, | |
| 3685 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869,
0xDEA8, | |
| 3686 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869,
0xDEAF, | |
| 3687 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869,
0xDEB6, | |
| 3688 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869,
0xDEBB, | |
| 3689 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869,
0xDEC0, | |
| 3690 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869,
0xDEC8, | |
| 3691 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869,
0xDECF, | |
| 3692 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869,
0xDED4, | |
| 3693 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF,
0xDFFF, | |
| 3694 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, | |
| 3695 | |
| 3696 | |
| 3697 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D,
0x000A, | |
| 3698 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D,
0x000A, | |
| 3699 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D,
0x000A, | |
| 3700 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D,
0x000A, | |
| 3701 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D,
0x000A, | |
| 3702 }; | |
| 3703 static const char *fTestCases [] = { | |
| 3704 "\\ud800\\udc00", /* smallest surrogate*/ | |
| 3705 "\\ud8ff\\udcff", | |
| 3706 "\\udBff\\udFff", /* largest surrogate pair*/ | |
| 3707 "\\ud834\\udc00", | |
| 3708 "\\U0010FFFF", | |
| 3709 "Hello \\u9292 \\u9192 World!", | |
| 3710 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!", | |
| 3711 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", | |
| 3712 | |
| 3713 "\\u0648\\u06c8", /* catch missing reset*/ | |
| 3714 "\\u0648\\u06c8", | |
| 3715 | |
| 3716 "\\u4444\\uE001", /* lowest quotable*/ | |
| 3717 "\\u4444\\uf2FF", /* highest quotable*/ | |
| 3718 "\\u4444\\uf188\\u4444", | |
| 3719 "\\u4444\\uf188\\uf288", | |
| 3720 "\\u4444\\uf188abc\\u0429\\uf288", | |
| 3721 "\\u9292\\u2222", | |
| 3722 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!", | |
| 3723 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", | |
| 3724 "Hello World!123456", | |
| 3725 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/ | |
| 3726 | |
| 3727 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/ | |
| 3728 "abc\\u4411d", /* uses SQU*/ | |
| 3729 "abc\\u4411\\u4412d",/* uses SCU*/ | |
| 3730 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/ | |
| 3731 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data
*/ | |
| 3732 "\\u9292\\u2222", | |
| 3733 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", | |
| 3734 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u306
5\\u300c", | |
| 3735 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53e
f\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002", | |
| 3736 | |
| 3737 "", /* empty input*/ | |
| 3738 "\\u0000", /* smallest BMP character*/ | |
| 3739 "\\uFFFF", /* largest BMP character*/ | |
| 3740 | |
| 3741 /* regression tests*/ | |
| 3742 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49f
d\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa", | |
| 3743 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u0
15f\\u00df\\u01df\\uf000\\udbff\\udfff", | |
| 3744 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e
1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c", | |
| 3745 "\\u0041\\u00df\\u0401\\u015f", | |
| 3746 "\\u9066\\u2123abc", | |
| 3747 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u
539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf51
3\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\
u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\ucc
d8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\
\u0bc0\\u06c5", | |
| 3748 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b
5\\u0cf3\\u6059\\u7489", | |
| 3749 }; | |
| 3750 int i=0; | |
| 3751 for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){ | |
| 3752 const char* cSrc = fTestCases[i]; | |
| 3753 UErrorCode status = U_ZERO_ERROR; | |
| 3754 int32_t cSrcLen,srcLen; | |
| 3755 UChar* src; | |
| 3756 /* UConverter* cnv = ucnv_open("SCSU",&status); */ | |
| 3757 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]); | |
| 3758 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar)); | |
| 3759 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status); | |
| 3760 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i); | |
| 3761 TestConv(src,srcLen,"SCSU","Coverage",NULL,0); | |
| 3762 free(src); | |
| 3763 } | |
| 3764 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features"
, (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); | |
| 3765 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features"
,(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); | |
| 3766 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)
japaneseSCSU,sizeof(japaneseSCSU)); | |
| 3767 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese
",(char *)japaneseSCSU,sizeof(japaneseSCSU)); | |
| 3768 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanS
CSU,sizeof(germanSCSU)); | |
| 3769 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)rus
sianSCSU,sizeof(russianSCSU)); | |
| 3770 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0); | |
| 3771 } | |
| 3772 | |
| 3773 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 3774 static void TestJitterbug2346(){ | |
| 3775 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a, | |
| 3776 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a}; | |
| 3777 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A}; | |
| 3778 | |
| 3779 UChar uTarget[500]={'\0'}; | |
| 3780 UChar* utarget=uTarget; | |
| 3781 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; | |
| 3782 | |
| 3783 char cTarget[500]={'\0'}; | |
| 3784 char* ctarget=cTarget; | |
| 3785 char* ctargetLimit=cTarget+sizeof(cTarget); | |
| 3786 const char* csource=source; | |
| 3787 UChar* temp = expected; | |
| 3788 UErrorCode err=U_ZERO_ERROR; | |
| 3789 | |
| 3790 UConverter* conv =ucnv_open("ISO_2022_JP",&err); | |
| 3791 if(U_FAILURE(err)) { | |
| 3792 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
r)); | |
| 3793 return; | |
| 3794 } | |
| 3795 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NU
LL,TRUE,&err); | |
| 3796 if(U_FAILURE(err)) { | |
| 3797 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(er
r)); | |
| 3798 return; | |
| 3799 } | |
| 3800 utargetLimit=utarget; | |
| 3801 utarget = uTarget; | |
| 3802 while(utarget<utargetLimit){ | |
| 3803 if(*temp!=*utarget){ | |
| 3804 | |
| 3805 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp)
; | |
| 3806 } | |
| 3807 utarget++; | |
| 3808 temp++; | |
| 3809 } | |
| 3810 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetL
imit,NULL,TRUE,&err); | |
| 3811 if(U_FAILURE(err)) { | |
| 3812 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(
err)); | |
| 3813 return; | |
| 3814 } | |
| 3815 ctargetLimit=ctarget; | |
| 3816 ctarget =cTarget; | |
| 3817 ucnv_close(conv); | |
| 3818 | |
| 3819 | |
| 3820 } | |
| 3821 | |
| 3822 static void | |
| 3823 TestISO_2022_JP_1() { | |
| 3824 /* test input */ | |
| 3825 static const uint16_t in[]={ | |
| 3826 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D,
0x000A, | |
| 3827 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D,
0x000A, | |
| 3828 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D,
0x000A, | |
| 3829 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D,
0x000A, | |
| 3830 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D,
0x000A, | |
| 3831 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, | |
| 3832 0x201D, 0x000D, 0x000A, | |
| 3833 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D,
0x000A, | |
| 3834 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D,
0x000A, | |
| 3835 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D,
0x000A, | |
| 3836 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D,
0x000A, | |
| 3837 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D,
0x000A, | |
| 3838 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A | |
| 3839 }; | |
| 3840 const UChar* uSource; | |
| 3841 const UChar* uSourceLimit; | |
| 3842 const char* cSource; | |
| 3843 const char* cSourceLimit; | |
| 3844 UChar *uTargetLimit =NULL; | |
| 3845 UChar *uTarget; | |
| 3846 char *cTarget; | |
| 3847 const char *cTargetLimit; | |
| 3848 char *cBuf; | |
| 3849 UChar *uBuf,*test; | |
| 3850 int32_t uBufSize = 120; | |
| 3851 UErrorCode errorCode=U_ZERO_ERROR; | |
| 3852 UConverter *cnv; | |
| 3853 | |
| 3854 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); | |
| 3855 if(U_FAILURE(errorCode)) { | |
| 3856 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); | |
| 3857 return; | |
| 3858 } | |
| 3859 | |
| 3860 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
| 3861 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
| 3862 uSource = (const UChar*)in; | |
| 3863 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); | |
| 3864 cTarget = cBuf; | |
| 3865 cTargetLimit = cBuf +uBufSize*5; | |
| 3866 uTarget = uBuf; | |
| 3867 uTargetLimit = uBuf+ uBufSize*5; | |
| 3868 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TR
UE, &errorCode); | |
| 3869 if(U_FAILURE(errorCode)){ | |
| 3870 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
| 3871 return; | |
| 3872 } | |
| 3873 cSource = cBuf; | |
| 3874 cSourceLimit =cTarget; | |
| 3875 test =uBuf; | |
| 3876 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&er
rorCode); | |
| 3877 if(U_FAILURE(errorCode)){ | |
| 3878 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); | |
| 3879 return; | |
| 3880 } | |
| 3881 uSource = (const UChar*)in; | |
| 3882 while(uSource<uSourceLimit){ | |
| 3883 if(*test!=*uSource){ | |
| 3884 | |
| 3885 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; | |
| 3886 } | |
| 3887 uSource++; | |
| 3888 test++; | |
| 3889 } | |
| 3890 /*ucnv_close(cnv); | |
| 3891 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/ | |
| 3892 /*Test for the condition where there is an invalid character*/ | |
| 3893 ucnv_reset(cnv); | |
| 3894 { | |
| 3895 static const uint8_t source2[]={0x0e,0x24,0x053}; | |
| 3896 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]"); | |
| 3897 } | |
| 3898 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 3899 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 3900 ucnv_close(cnv); | |
| 3901 free(uBuf); | |
| 3902 free(cBuf); | |
| 3903 } | |
| 3904 | |
| 3905 static void | |
| 3906 TestISO_2022_JP_2() { | |
| 3907 /* test input */ | |
| 3908 static const uint16_t in[]={ | |
| 3909 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D,
0x000A, | |
| 3910 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D,
0x000A, | |
| 3911 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D,
0x000A, | |
| 3912 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D,
0x000A, | |
| 3913 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D,
0x000A, | |
| 3914 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D,
0x000A, | |
| 3915 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D,
0x000A, | |
| 3916 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D,
0x000A, | |
| 3917 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D,
0x000A, | |
| 3918 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D,
0x000A, | |
| 3919 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D,
0x000A, | |
| 3920 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D,
0x000A, | |
| 3921 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D,
0x000A, | |
| 3922 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D,
0x000A, | |
| 3923 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D,
0x000A, | |
| 3924 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D,
0x000A, | |
| 3925 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D,
0x000A, | |
| 3926 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D,
0x000A, | |
| 3927 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D,
0x000A | |
| 3928 }; | |
| 3929 const UChar* uSource; | |
| 3930 const UChar* uSourceLimit; | |
| 3931 const char* cSource; | |
| 3932 const char* cSourceLimit; | |
| 3933 UChar *uTargetLimit =NULL; | |
| 3934 UChar *uTarget; | |
| 3935 char *cTarget; | |
| 3936 const char *cTargetLimit; | |
| 3937 char *cBuf; | |
| 3938 UChar *uBuf,*test; | |
| 3939 int32_t uBufSize = 120; | |
| 3940 UErrorCode errorCode=U_ZERO_ERROR; | |
| 3941 UConverter *cnv; | |
| 3942 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
| 3943 int32_t* myOff= offsets; | |
| 3944 cnv=ucnv_open("ISO_2022_JP_2", &errorCode); | |
| 3945 if(U_FAILURE(errorCode)) { | |
| 3946 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); | |
| 3947 return; | |
| 3948 } | |
| 3949 | |
| 3950 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
| 3951 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
| 3952 uSource = (const UChar*)in; | |
| 3953 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); | |
| 3954 cTarget = cBuf; | |
| 3955 cTargetLimit = cBuf +uBufSize*5; | |
| 3956 uTarget = uBuf; | |
| 3957 uTargetLimit = uBuf+ uBufSize*5; | |
| 3958 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); | |
| 3959 if(U_FAILURE(errorCode)){ | |
| 3960 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
| 3961 return; | |
| 3962 } | |
| 3963 cSource = cBuf; | |
| 3964 cSourceLimit =cTarget; | |
| 3965 test =uBuf; | |
| 3966 myOff=offsets; | |
| 3967 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); | |
| 3968 if(U_FAILURE(errorCode)){ | |
| 3969 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); | |
| 3970 return; | |
| 3971 } | |
| 3972 uSource = (const UChar*)in; | |
| 3973 while(uSource<uSourceLimit){ | |
| 3974 if(*test!=*uSource){ | |
| 3975 | |
| 3976 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; | |
| 3977 } | |
| 3978 uSource++; | |
| 3979 test++; | |
| 3980 } | |
| 3981 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 3982 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 3983 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 3984 /*Test for the condition where there is an invalid character*/ | |
| 3985 ucnv_reset(cnv); | |
| 3986 { | |
| 3987 static const uint8_t source2[]={0x0e,0x24,0x053}; | |
| 3988 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]"); | |
| 3989 } | |
| 3990 ucnv_close(cnv); | |
| 3991 free(uBuf); | |
| 3992 free(cBuf); | |
| 3993 free(offsets); | |
| 3994 } | |
| 3995 | |
| 3996 static void | |
| 3997 TestISO_2022_KR() { | |
| 3998 /* test input */ | |
| 3999 static const uint16_t in[]={ | |
| 4000 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x00
0D | |
| 4001 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC
04 | |
| 4002 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x00
28,0x0029 | |
| 4003 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53
CA,0x53CB | |
| 4004 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53
E2 | |
| 4005 ,0x53E3,0x53E4,0x000A,0x000D}; | |
| 4006 const UChar* uSource; | |
| 4007 const UChar* uSourceLimit; | |
| 4008 const char* cSource; | |
| 4009 const char* cSourceLimit; | |
| 4010 UChar *uTargetLimit =NULL; | |
| 4011 UChar *uTarget; | |
| 4012 char *cTarget; | |
| 4013 const char *cTargetLimit; | |
| 4014 char *cBuf; | |
| 4015 UChar *uBuf,*test; | |
| 4016 int32_t uBufSize = 120; | |
| 4017 UErrorCode errorCode=U_ZERO_ERROR; | |
| 4018 UConverter *cnv; | |
| 4019 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
| 4020 int32_t* myOff= offsets; | |
| 4021 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode); | |
| 4022 if(U_FAILURE(errorCode)) { | |
| 4023 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); | |
| 4024 return; | |
| 4025 } | |
| 4026 | |
| 4027 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
| 4028 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
| 4029 uSource = (const UChar*)in; | |
| 4030 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); | |
| 4031 cTarget = cBuf; | |
| 4032 cTargetLimit = cBuf +uBufSize*5; | |
| 4033 uTarget = uBuf; | |
| 4034 uTargetLimit = uBuf+ uBufSize*5; | |
| 4035 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); | |
| 4036 if(U_FAILURE(errorCode)){ | |
| 4037 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
| 4038 return; | |
| 4039 } | |
| 4040 cSource = cBuf; | |
| 4041 cSourceLimit =cTarget; | |
| 4042 test =uBuf; | |
| 4043 myOff=offsets; | |
| 4044 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); | |
| 4045 if(U_FAILURE(errorCode)){ | |
| 4046 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); | |
| 4047 return; | |
| 4048 } | |
| 4049 uSource = (const UChar*)in; | |
| 4050 while(uSource<uSourceLimit){ | |
| 4051 if(*test!=*uSource){ | |
| 4052 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; | |
| 4053 } | |
| 4054 uSource++; | |
| 4055 test++; | |
| 4056 } | |
| 4057 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); | |
| 4058 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 4059 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 4060 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 4061 TestJitterbug930("csISO2022KR"); | |
| 4062 /*Test for the condition where there is an invalid character*/ | |
| 4063 ucnv_reset(cnv); | |
| 4064 { | |
| 4065 static const uint8_t source2[]={0x1b,0x24,0x053}; | |
| 4066 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); | |
| 4067 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); | |
| 4068 } | |
| 4069 ucnv_close(cnv); | |
| 4070 free(uBuf); | |
| 4071 free(cBuf); | |
| 4072 free(offsets); | |
| 4073 } | |
| 4074 | |
| 4075 static void | |
| 4076 TestISO_2022_KR_1() { | |
| 4077 /* test input */ | |
| 4078 static const uint16_t in[]={ | |
| 4079 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x00
0D | |
| 4080 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC
04 | |
| 4081 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x00
28,0x0029 | |
| 4082 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53
CA,0x53CB | |
| 4083 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53
E2 | |
| 4084 ,0x53E3,0x53E4,0x000A,0x000D}; | |
| 4085 const UChar* uSource; | |
| 4086 const UChar* uSourceLimit; | |
| 4087 const char* cSource; | |
| 4088 const char* cSourceLimit; | |
| 4089 UChar *uTargetLimit =NULL; | |
| 4090 UChar *uTarget; | |
| 4091 char *cTarget; | |
| 4092 const char *cTargetLimit; | |
| 4093 char *cBuf; | |
| 4094 UChar *uBuf,*test; | |
| 4095 int32_t uBufSize = 120; | |
| 4096 UErrorCode errorCode=U_ZERO_ERROR; | |
| 4097 UConverter *cnv; | |
| 4098 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
| 4099 int32_t* myOff= offsets; | |
| 4100 cnv=ucnv_open("ibm-25546", &errorCode); | |
| 4101 if(U_FAILURE(errorCode)) { | |
| 4102 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); | |
| 4103 return; | |
| 4104 } | |
| 4105 | |
| 4106 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
| 4107 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
| 4108 uSource = (const UChar*)in; | |
| 4109 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); | |
| 4110 cTarget = cBuf; | |
| 4111 cTargetLimit = cBuf +uBufSize*5; | |
| 4112 uTarget = uBuf; | |
| 4113 uTargetLimit = uBuf+ uBufSize*5; | |
| 4114 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); | |
| 4115 if(U_FAILURE(errorCode)){ | |
| 4116 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
| 4117 return; | |
| 4118 } | |
| 4119 cSource = cBuf; | |
| 4120 cSourceLimit =cTarget; | |
| 4121 test =uBuf; | |
| 4122 myOff=offsets; | |
| 4123 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); | |
| 4124 if(U_FAILURE(errorCode)){ | |
| 4125 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); | |
| 4126 return; | |
| 4127 } | |
| 4128 uSource = (const UChar*)in; | |
| 4129 while(uSource<uSourceLimit){ | |
| 4130 if(*test!=*uSource){ | |
| 4131 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; | |
| 4132 } | |
| 4133 uSource++; | |
| 4134 test++; | |
| 4135 } | |
| 4136 ucnv_reset(cnv); | |
| 4137 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); | |
| 4138 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 4139 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 4140 ucnv_reset(cnv); | |
| 4141 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 4142 /*Test for the condition where there is an invalid character*/ | |
| 4143 ucnv_reset(cnv); | |
| 4144 { | |
| 4145 static const uint8_t source2[]={0x1b,0x24,0x053}; | |
| 4146 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); | |
| 4147 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); | |
| 4148 } | |
| 4149 ucnv_close(cnv); | |
| 4150 free(uBuf); | |
| 4151 free(cBuf); | |
| 4152 free(offsets); | |
| 4153 } | |
| 4154 | |
| 4155 static void TestJitterbug2411(){ | |
| 4156 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6
f\x69\x75\x79\x71\x77\x65\x68\x67\x0A" | |
| 4157 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x
66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43"; | |
| 4158 UConverter* kr=NULL, *kr1=NULL; | |
| 4159 UErrorCode errorCode = U_ZERO_ERROR; | |
| 4160 UChar tgt[100]={'\0'}; | |
| 4161 UChar* target = tgt; | |
| 4162 UChar* targetLimit = target+100; | |
| 4163 kr=ucnv_open("iso-2022-kr", &errorCode); | |
| 4164 if(U_FAILURE(errorCode)) { | |
| 4165 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName
(errorCode)); | |
| 4166 return; | |
| 4167 } | |
| 4168 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NUL
L,TRUE,&errorCode); | |
| 4169 if(U_FAILURE(errorCode)) { | |
| 4170 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_
errorName(errorCode)); | |
| 4171 return; | |
| 4172 } | |
| 4173 kr1 = ucnv_open("ibm-25546", &errorCode); | |
| 4174 if(U_FAILURE(errorCode)) { | |
| 4175 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorNa
me(errorCode)); | |
| 4176 return; | |
| 4177 } | |
| 4178 target = tgt; | |
| 4179 targetLimit = target+100; | |
| 4180 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NUL
L,TRUE,&errorCode); | |
| 4181 | |
| 4182 if(U_FAILURE(errorCode)) { | |
| 4183 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n",
u_errorName(errorCode)); | |
| 4184 return; | |
| 4185 } | |
| 4186 | |
| 4187 ucnv_close(kr); | |
| 4188 ucnv_close(kr1); | |
| 4189 | |
| 4190 } | |
| 4191 | |
| 4192 static void | |
| 4193 TestJIS(){ | |
| 4194 /* From Unicode moved to testdata/conversion.txt */ | |
| 4195 /*To Unicode*/ | |
| 4196 { | |
| 4197 static const uint8_t sampleTextJIS[] = { | |
| 4198 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/ | |
| 4199 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ | |
| 4200 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>
&@*/ | |
| 4201 }; | |
| 4202 static const uint16_t expectedISO2022JIS[] = { | |
| 4203 0x0041, 0x0042, | |
| 4204 0xFF81, 0xFF82, | |
| 4205 0x3000 | |
| 4206 }; | |
| 4207 static const int32_t toISO2022JISOffs[]={ | |
| 4208 3,4, | |
| 4209 8,9, | |
| 4210 16 | |
| 4211 }; | |
| 4212 | |
| 4213 static const uint8_t sampleTextJIS7[] = { | |
| 4214 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/ | |
| 4215 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ | |
| 4216 0x1b,0x24,0x42,0x21,0x21, | |
| 4217 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */ | |
| 4218 0x21,0x22, | |
| 4219 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>
&@*/ | |
| 4220 }; | |
| 4221 static const uint16_t expectedISO2022JIS7[] = { | |
| 4222 0x0041, 0x0042, | |
| 4223 0xFF81, 0xFF82, | |
| 4224 0x3000, | |
| 4225 0xFF81, 0xFF82, | |
| 4226 0x3001, | |
| 4227 0x3000 | |
| 4228 }; | |
| 4229 static const int32_t toISO2022JIS7Offs[]={ | |
| 4230 3,4, | |
| 4231 8,9, | |
| 4232 13,16, | |
| 4233 17, | |
| 4234 19,27 | |
| 4235 }; | |
| 4236 static const uint8_t sampleTextJIS8[] = { | |
| 4237 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/ | |
| 4238 0xa1,0xc8,0xd9,/*Katakana Set*/ | |
| 4239 0x1b,0x28,0x42, | |
| 4240 0x41,0x42, | |
| 4241 0xb1,0xc3, /*Katakana Set*/ | |
| 4242 0x1b,0x24,0x42,0x21,0x21 | |
| 4243 }; | |
| 4244 static const uint16_t expectedISO2022JIS8[] = { | |
| 4245 0x0041, 0x0042, | |
| 4246 0xff61, 0xff88, 0xff99, | |
| 4247 0x0041, 0x0042, | |
| 4248 0xff71, 0xff83, | |
| 4249 0x3000 | |
| 4250 }; | |
| 4251 static const int32_t toISO2022JIS8Offs[]={ | |
| 4252 3, 4, 5, 6, | |
| 4253 7, 11, 12, 13, | |
| 4254 14, 18, | |
| 4255 }; | |
| 4256 | |
| 4257 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS, | |
| 4258 sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toIS
O2022JISOffs,TRUE); | |
| 4259 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7
, | |
| 4260 sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", t
oISO2022JIS7Offs,TRUE); | |
| 4261 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8
, | |
| 4262 sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", t
oISO2022JIS8Offs,TRUE); | |
| 4263 } | |
| 4264 | |
| 4265 } | |
| 4266 | |
| 4267 | |
| 4268 #if 0 | |
| 4269 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 | |
| 4270 | |
| 4271 static void TestJitterbug915(){ | |
| 4272 /* tests for roundtripping of the below sequence | |
| 4273 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * / | |
| 4274 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * / | |
| 4275 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * / | |
| 4276 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * / | |
| 4277 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * / | |
| 4278 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * / | |
| 4279 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * / | |
| 4280 */ | |
| 4281 static const char cSource[]={ | |
| 4282 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, | |
| 4283 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, | |
| 4284 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, | |
| 4285 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, | |
| 4286 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, | |
| 4287 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, | |
| 4288 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70, | |
| 4289 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, | |
| 4290 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, | |
| 4291 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, | |
| 4292 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61, | |
| 4293 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, | |
| 4294 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, | |
| 4295 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, | |
| 4296 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, | |
| 4297 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, | |
| 4298 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, | |
| 4299 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, | |
| 4300 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, | |
| 4301 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, | |
| 4302 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, | |
| 4303 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, | |
| 4304 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, | |
| 4305 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, | |
| 4306 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, | |
| 4307 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, | |
| 4308 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, | |
| 4309 0x37, 0x20, 0x2A, 0x2F | |
| 4310 }; | |
| 4311 UChar uTarget[500]={'\0'}; | |
| 4312 UChar* utarget=uTarget; | |
| 4313 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; | |
| 4314 | |
| 4315 char cTarget[500]={'\0'}; | |
| 4316 char* ctarget=cTarget; | |
| 4317 char* ctargetLimit=cTarget+sizeof(cTarget); | |
| 4318 const char* csource=cSource; | |
| 4319 const char* tempSrc = cSource; | |
| 4320 UErrorCode err=U_ZERO_ERROR; | |
| 4321 | |
| 4322 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err); | |
| 4323 if(U_FAILURE(err)) { | |
| 4324 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
r)); | |
| 4325 return; | |
| 4326 } | |
| 4327 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),N
ULL,TRUE,&err); | |
| 4328 if(U_FAILURE(err)) { | |
| 4329 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(er
r)); | |
| 4330 return; | |
| 4331 } | |
| 4332 utargetLimit=utarget; | |
| 4333 utarget = uTarget; | |
| 4334 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetL
imit,NULL,TRUE,&err); | |
| 4335 if(U_FAILURE(err)) { | |
| 4336 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(
err)); | |
| 4337 return; | |
| 4338 } | |
| 4339 ctargetLimit=ctarget; | |
| 4340 ctarget =cTarget; | |
| 4341 while(ctarget<ctargetLimit){ | |
| 4342 if(*ctarget != *tempSrc){ | |
| 4343 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarg
et-cTarget), *ctarget,(int)*tempSrc) ; | |
| 4344 } | |
| 4345 ++ctarget; | |
| 4346 ++tempSrc; | |
| 4347 } | |
| 4348 | |
| 4349 ucnv_close(conv); | |
| 4350 } | |
| 4351 | |
| 4352 static void | |
| 4353 TestISO_2022_CN_EXT() { | |
| 4354 /* test input */ | |
| 4355 static const uint16_t in[]={ | |
| 4356 /* test Non-BMP code points */ | |
| 4357 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869,
0xDE9F, | |
| 4358 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869,
0xDEA8, | |
| 4359 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869,
0xDEAF, | |
| 4360 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869,
0xDEB6, | |
| 4361 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869,
0xDEBB, | |
| 4362 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869,
0xDEC0, | |
| 4363 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869,
0xDEC8, | |
| 4364 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869,
0xDECF, | |
| 4365 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869,
0xDED4, | |
| 4366 0xD869, 0xDED5, | |
| 4367 | |
| 4368 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D,
0x000A, | |
| 4369 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D,
0x000A, | |
| 4370 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D,
0x000A, | |
| 4371 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D,
0x000A, | |
| 4372 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D,
0x000A, | |
| 4373 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D,
0x000A, | |
| 4374 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D,
0x000A, | |
| 4375 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D,
0x000A, | |
| 4376 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D,
0x000A, | |
| 4377 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D,
0x000A, | |
| 4378 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D,
0x000A, | |
| 4379 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D,
0x000A, | |
| 4380 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D,
0x000A, | |
| 4381 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D,
0x000A, | |
| 4382 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D,
0x000A, | |
| 4383 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D,
0x000A, | |
| 4384 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D,
0x000A, | |
| 4385 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D,
0x000A, | |
| 4386 | |
| 4387 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A | |
| 4388 | |
| 4389 }; | |
| 4390 | |
| 4391 const UChar* uSource; | |
| 4392 const UChar* uSourceLimit; | |
| 4393 const char* cSource; | |
| 4394 const char* cSourceLimit; | |
| 4395 UChar *uTargetLimit =NULL; | |
| 4396 UChar *uTarget; | |
| 4397 char *cTarget; | |
| 4398 const char *cTargetLimit; | |
| 4399 char *cBuf; | |
| 4400 UChar *uBuf,*test; | |
| 4401 int32_t uBufSize = 180; | |
| 4402 UErrorCode errorCode=U_ZERO_ERROR; | |
| 4403 UConverter *cnv; | |
| 4404 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
| 4405 int32_t* myOff= offsets; | |
| 4406 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode); | |
| 4407 if(U_FAILURE(errorCode)) { | |
| 4408 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); | |
| 4409 return; | |
| 4410 } | |
| 4411 | |
| 4412 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
| 4413 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); | |
| 4414 uSource = (const UChar*)in; | |
| 4415 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); | |
| 4416 cTarget = cBuf; | |
| 4417 cTargetLimit = cBuf +uBufSize*5; | |
| 4418 uTarget = uBuf; | |
| 4419 uTargetLimit = uBuf+ uBufSize*5; | |
| 4420 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); | |
| 4421 if(U_FAILURE(errorCode)){ | |
| 4422 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
| 4423 return; | |
| 4424 } | |
| 4425 cSource = cBuf; | |
| 4426 cSourceLimit =cTarget; | |
| 4427 test =uBuf; | |
| 4428 myOff=offsets; | |
| 4429 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); | |
| 4430 if(U_FAILURE(errorCode)){ | |
| 4431 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); | |
| 4432 return; | |
| 4433 } | |
| 4434 uSource = (const UChar*)in; | |
| 4435 while(uSource<uSourceLimit){ | |
| 4436 if(*test!=*uSource){ | |
| 4437 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; | |
| 4438 } | |
| 4439 else{ | |
| 4440 log_verbose(" Got: \\u%04X\n",(int)*test) ; | |
| 4441 } | |
| 4442 uSource++; | |
| 4443 test++; | |
| 4444 } | |
| 4445 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 4446 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 4447 /*Test for the condition where there is an invalid character*/ | |
| 4448 ucnv_reset(cnv); | |
| 4449 { | |
| 4450 static const uint8_t source2[]={0x0e,0x24,0x053}; | |
| 4451 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]"); | |
| 4452 } | |
| 4453 ucnv_close(cnv); | |
| 4454 free(uBuf); | |
| 4455 free(cBuf); | |
| 4456 free(offsets); | |
| 4457 } | |
| 4458 #endif | |
| 4459 | |
| 4460 static void | |
| 4461 TestISO_2022_CN() { | |
| 4462 /* test input */ | |
| 4463 static const uint16_t in[]={ | |
| 4464 /* jitterbug 951 */ | |
| 4465 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41,
0xFF52, | |
| 4466 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16,
0xFF17, | |
| 4467 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45,
0xFF52, | |
| 4468 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E,
0xFF45, | |
| 4469 0x0020, 0x0045, 0x004e, 0x0044, | |
| 4470 /**/ | |
| 4471 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D,
0x000A, | |
| 4472 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D,
0x000A, | |
| 4473 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D,
0x000A, | |
| 4474 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D,
0x000A, | |
| 4475 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D,
0x000A, | |
| 4476 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D,
0x000A, | |
| 4477 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D,
0x000A, | |
| 4478 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D,
0x000A, | |
| 4479 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D,
0x000A, | |
| 4480 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D,
0x000A, | |
| 4481 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D,
0x000A, | |
| 4482 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D,
0x000A, | |
| 4483 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D,
0x000A, | |
| 4484 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D,
0x000A, | |
| 4485 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D,
0x000A, | |
| 4486 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485,
0x2486, | |
| 4487 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D,
0x000A, | |
| 4488 | |
| 4489 }; | |
| 4490 const UChar* uSource; | |
| 4491 const UChar* uSourceLimit; | |
| 4492 const char* cSource; | |
| 4493 const char* cSourceLimit; | |
| 4494 UChar *uTargetLimit =NULL; | |
| 4495 UChar *uTarget; | |
| 4496 char *cTarget; | |
| 4497 const char *cTargetLimit; | |
| 4498 char *cBuf; | |
| 4499 UChar *uBuf,*test; | |
| 4500 int32_t uBufSize = 180; | |
| 4501 UErrorCode errorCode=U_ZERO_ERROR; | |
| 4502 UConverter *cnv; | |
| 4503 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
| 4504 int32_t* myOff= offsets; | |
| 4505 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode); | |
| 4506 if(U_FAILURE(errorCode)) { | |
| 4507 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); | |
| 4508 return; | |
| 4509 } | |
| 4510 | |
| 4511 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
| 4512 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); | |
| 4513 uSource = (const UChar*)in; | |
| 4514 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); | |
| 4515 cTarget = cBuf; | |
| 4516 cTargetLimit = cBuf +uBufSize*5; | |
| 4517 uTarget = uBuf; | |
| 4518 uTargetLimit = uBuf+ uBufSize*5; | |
| 4519 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); | |
| 4520 if(U_FAILURE(errorCode)){ | |
| 4521 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
| 4522 return; | |
| 4523 } | |
| 4524 cSource = cBuf; | |
| 4525 cSourceLimit =cTarget; | |
| 4526 test =uBuf; | |
| 4527 myOff=offsets; | |
| 4528 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); | |
| 4529 if(U_FAILURE(errorCode)){ | |
| 4530 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); | |
| 4531 return; | |
| 4532 } | |
| 4533 uSource = (const UChar*)in; | |
| 4534 while(uSource<uSourceLimit){ | |
| 4535 if(*test!=*uSource){ | |
| 4536 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; | |
| 4537 } | |
| 4538 else{ | |
| 4539 log_verbose(" Got: \\u%04X\n",(int)*test) ; | |
| 4540 } | |
| 4541 uSource++; | |
| 4542 test++; | |
| 4543 } | |
| 4544 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding"); | |
| 4545 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 4546 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 4547 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
| 4548 TestJitterbug930("csISO2022CN"); | |
| 4549 /*Test for the condition where there is an invalid character*/ | |
| 4550 ucnv_reset(cnv); | |
| 4551 { | |
| 4552 static const uint8_t source2[]={0x0e,0x24,0x053}; | |
| 4553 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]"); | |
| 4554 } | |
| 4555 | |
| 4556 ucnv_close(cnv); | |
| 4557 free(uBuf); | |
| 4558 free(cBuf); | |
| 4559 free(offsets); | |
| 4560 } | |
| 4561 | |
| 4562 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallb
ackReason is UCNV_IRREGULAR */ | |
| 4563 typedef struct { | |
| 4564 const char * converterName; | |
| 4565 const char * inputText; | |
| 4566 int inputTextLength; | |
| 4567 } EmptySegmentTest; | |
| 4568 | |
| 4569 /* Callback for TestJitterbug6175, should only get called for empty segment erro
rs */ | |
| 4570 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUn
icodeArgs *toArgs, const char* codeUnits, | |
| 4571 int32_t length, UConverterCallbackR
eason reason, UErrorCode * err ) { | |
| 4572 if (reason > UCNV_IRREGULAR) { | |
| 4573 return; | |
| 4574 } | |
| 4575 if (reason != UCNV_IRREGULAR) { | |
| 4576 log_err("toUnicode callback invoked for empty segment but reason is not
UCNV_IRREGULAR\n"); | |
| 4577 } | |
| 4578 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */ | |
| 4579 *err = U_ZERO_ERROR; | |
| 4580 ucnv_cbToUWriteSub(toArgs,0,err); | |
| 4581 } | |
| 4582 | |
| 4583 enum { kEmptySegmentToUCharsMax = 64 }; | |
| 4584 static void TestJitterbug6175(void) { | |
| 4585 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0
x42, 0x63, 0x64, 0x0D, 0x0A }; | |
| 4586 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F,
0x62, 0x0D, 0x0A }; | |
| 4587 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E,
0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A }; | |
| 4588 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E,
0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A }; | |
| 4589 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63,
0x64 }; | |
| 4590 static const EmptySegmentTest emptySegmentTests[] = { | |
| 4591 /* converterName inputText inputTextLength */ | |
| 4592 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) }, | |
| 4593 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) }, | |
| 4594 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) }, | |
| 4595 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) }, | |
| 4596 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) }, | |
| 4597 /* terminator: */ | |
| 4598 { NULL, NULL, 0, } | |
| 4599 }; | |
| 4600 const EmptySegmentTest * testPtr; | |
| 4601 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr)
{ | |
| 4602 UErrorCode err = U_ZERO_ERROR; | |
| 4603 UConverter * cnv = ucnv_open(testPtr->converterName, &err); | |
| 4604 if (U_FAILURE(err)) { | |
| 4605 log_data_err("Unable to open %s converter: %s\n", testPtr->converter
Name, u_errorName(err)); | |
| 4606 return; | |
| 4607 } | |
| 4608 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NU
LL, &err); | |
| 4609 if (U_FAILURE(err)) { | |
| 4610 log_data_err("Unable to setToUCallBack for %s converter: %s\n", test
Ptr->converterName, u_errorName(err)); | |
| 4611 ucnv_close(cnv); | |
| 4612 return; | |
| 4613 } | |
| 4614 { | |
| 4615 UChar toUChars[kEmptySegmentToUCharsMax]; | |
| 4616 UChar * toUCharsPtr = toUChars; | |
| 4617 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax
; | |
| 4618 const char * inCharsPtr = testPtr->inputText; | |
| 4619 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength; | |
| 4620 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inChar
sLimit, NULL, TRUE, &err); | |
| 4621 } | |
| 4622 ucnv_close(cnv); | |
| 4623 } | |
| 4624 } | |
| 4625 | |
| 4626 static void | |
| 4627 TestEBCDIC_STATEFUL() { | |
| 4628 /* test input */ | |
| 4629 static const uint8_t in[]={ | |
| 4630 0x61, | |
| 4631 0x1a, | |
| 4632 0x0f, 0x4b, | |
| 4633 0x42, | |
| 4634 0x40, | |
| 4635 0x36, | |
| 4636 }; | |
| 4637 | |
| 4638 /* expected test results */ | |
| 4639 static const int32_t results[]={ | |
| 4640 /* number of bytes read, code point */ | |
| 4641 1, 0x002f, | |
| 4642 1, 0x0092, | |
| 4643 2, 0x002e, | |
| 4644 1, 0xff62, | |
| 4645 1, 0x0020, | |
| 4646 1, 0x0096, | |
| 4647 | |
| 4648 }; | |
| 4649 static const uint8_t in2[]={ | |
| 4650 0x0f, | |
| 4651 0xa1, | |
| 4652 0x01 | |
| 4653 }; | |
| 4654 | |
| 4655 /* expected test results */ | |
| 4656 static const int32_t results2[]={ | |
| 4657 /* number of bytes read, code point */ | |
| 4658 2, 0x203E, | |
| 4659 1, 0x0001, | |
| 4660 }; | |
| 4661 | |
| 4662 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
| 4663 UErrorCode errorCode=U_ZERO_ERROR; | |
| 4664 UConverter *cnv=ucnv_open("ibm-930", &errorCode); | |
| 4665 if(U_FAILURE(errorCode)) { | |
| 4666 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n"
, u_errorName(errorCode)); | |
| 4667 return; | |
| 4668 } | |
| 4669 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)"); | |
| 4670 ucnv_reset(cnv); | |
| 4671 /* Test the condition when source >= sourceLimit */ | |
| 4672 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
| 4673 ucnv_reset(cnv); | |
| 4674 /*Test for the condition where source > sourcelimit after consuming the shif
t chracter */ | |
| 4675 { | |
| 4676 static const uint8_t source1[]={0x0f}; | |
| 4677 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeo
f(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated"); | |
| 4678 } | |
| 4679 /*Test for the condition where there is an invalid character*/ | |
| 4680 ucnv_reset(cnv); | |
| 4681 { | |
| 4682 static const uint8_t source2[]={0x0e, 0x7F, 0xFF}; | |
| 4683 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]"); | |
| 4684 } | |
| 4685 ucnv_reset(cnv); | |
| 4686 source=(const char*)in2; | |
| 4687 limit=(const char*)in2+sizeof(in2); | |
| 4688 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2"); | |
| 4689 ucnv_close(cnv); | |
| 4690 | |
| 4691 } | |
| 4692 | |
| 4693 static void | |
| 4694 TestGB18030() { | |
| 4695 /* test input */ | |
| 4696 static const uint8_t in[]={ | |
| 4697 0x24, | |
| 4698 0x7f, | |
| 4699 0x81, 0x30, 0x81, 0x30, | |
| 4700 0xa8, 0xbf, | |
| 4701 0xa2, 0xe3, | |
| 4702 0xd2, 0xbb, | |
| 4703 0x82, 0x35, 0x8f, 0x33, | |
| 4704 0x84, 0x31, 0xa4, 0x39, | |
| 4705 0x90, 0x30, 0x81, 0x30, | |
| 4706 0xe3, 0x32, 0x9a, 0x35 | |
| 4707 #if 0 | |
| 4708 /* | |
| 4709 * Feature removed markus 2000-oct-26 | |
| 4710 * Only some codepages must match surrogate pairs into supplementary cod
e points - | |
| 4711 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvm
bcs.c . | |
| 4712 * GB 18030 provides direct encodings for supplementary code points, the
refore | |
| 4713 * it must not combine two single-encoded surrogates into one code point
. | |
| 4714 */ | |
| 4715 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded sur
rogates */ | |
| 4716 #endif | |
| 4717 }; | |
| 4718 | |
| 4719 /* expected test results */ | |
| 4720 static const int32_t results[]={ | |
| 4721 /* number of bytes read, code point */ | |
| 4722 1, 0x24, | |
| 4723 1, 0x7f, | |
| 4724 4, 0x80, | |
| 4725 2, 0x1f9, | |
| 4726 2, 0x20ac, | |
| 4727 2, 0x4e00, | |
| 4728 4, 0x9fa6, | |
| 4729 4, 0xffff, | |
| 4730 4, 0x10000, | |
| 4731 4, 0x10ffff | |
| 4732 #if 0 | |
| 4733 /* Feature removed. See comment above. */ | |
| 4734 8, 0x10000 | |
| 4735 #endif | |
| 4736 }; | |
| 4737 | |
| 4738 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */ | |
| 4739 UErrorCode errorCode=U_ZERO_ERROR; | |
| 4740 UConverter *cnv=ucnv_open("gb18030", &errorCode); | |
| 4741 if(U_FAILURE(errorCode)) { | |
| 4742 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(err
orCode)); | |
| 4743 return; | |
| 4744 } | |
| 4745 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "
gb18030"); | |
| 4746 ucnv_close(cnv); | |
| 4747 } | |
| 4748 | |
| 4749 static void | |
| 4750 TestLMBCS() { | |
| 4751 /* LMBCS-1 string */ | |
| 4752 static const uint8_t pszLMBCS[]={ | |
| 4753 0x61, | |
| 4754 0x01, 0x29, | |
| 4755 0x81, | |
| 4756 0xA0, | |
| 4757 0x0F, 0x27, | |
| 4758 0x0F, 0x91, | |
| 4759 0x14, 0x0a, 0x74, | |
| 4760 0x14, 0xF6, 0x02, | |
| 4761 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */ | |
| 4762 0x10, 0x88, 0xA0, | |
| 4763 }; | |
| 4764 | |
| 4765 /* Unicode UChar32 equivalents */ | |
| 4766 static const UChar32 pszUnicode32[]={ | |
| 4767 /* code point */ | |
| 4768 0x00000061, | |
| 4769 0x00002013, | |
| 4770 0x000000FC, | |
| 4771 0x000000E1, | |
| 4772 0x00000007, | |
| 4773 0x00000091, | |
| 4774 0x00000a74, | |
| 4775 0x00000200, | |
| 4776 0x00023456, /* code point for surrogate pair */ | |
| 4777 0x00005516 | |
| 4778 }; | |
| 4779 | |
| 4780 /* Unicode UChar equivalents */ | |
| 4781 static const UChar pszUnicode[]={ | |
| 4782 /* code point */ | |
| 4783 0x0061, | |
| 4784 0x2013, | |
| 4785 0x00FC, | |
| 4786 0x00E1, | |
| 4787 0x0007, | |
| 4788 0x0091, | |
| 4789 0x0a74, | |
| 4790 0x0200, | |
| 4791 0xD84D, /* low surrogate */ | |
| 4792 0xDC56, /* high surrogate */ | |
| 4793 0x5516 | |
| 4794 }; | |
| 4795 | |
| 4796 /* expected test results */ | |
| 4797 static const int offsets32[]={ | |
| 4798 /* number of bytes read, code point */ | |
| 4799 0, | |
| 4800 1, | |
| 4801 3, | |
| 4802 4, | |
| 4803 5, | |
| 4804 7, | |
| 4805 9, | |
| 4806 12, | |
| 4807 15, | |
| 4808 21, | |
| 4809 24 | |
| 4810 }; | |
| 4811 | |
| 4812 /* expected test results */ | |
| 4813 static const int offsets[]={ | |
| 4814 /* number of bytes read, code point */ | |
| 4815 0, | |
| 4816 1, | |
| 4817 3, | |
| 4818 4, | |
| 4819 5, | |
| 4820 7, | |
| 4821 9, | |
| 4822 12, | |
| 4823 15, | |
| 4824 18, | |
| 4825 21, | |
| 4826 24 | |
| 4827 }; | |
| 4828 | |
| 4829 | |
| 4830 UConverter *cnv; | |
| 4831 | |
| 4832 #define NAME_LMBCS_1 "LMBCS-1" | |
| 4833 #define NAME_LMBCS_2 "LMBCS-2" | |
| 4834 | |
| 4835 | |
| 4836 /* Some basic open/close/property tests on some LMBCS converters */ | |
| 4837 { | |
| 4838 | |
| 4839 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */ | |
| 4840 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/ | |
| 4841 char get_subchars [1]; | |
| 4842 const char * get_name; | |
| 4843 UConverter *cnv1; | |
| 4844 UConverter *cnv2; | |
| 4845 | |
| 4846 int8_t len = sizeof(get_subchars); | |
| 4847 | |
| 4848 UErrorCode errorCode=U_ZERO_ERROR; | |
| 4849 | |
| 4850 /* Open */ | |
| 4851 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode); | |
| 4852 if(U_FAILURE(errorCode)) { | |
| 4853 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(er
rorCode)); | |
| 4854 return; | |
| 4855 } | |
| 4856 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode); | |
| 4857 if(U_FAILURE(errorCode)) { | |
| 4858 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(er
rorCode)); | |
| 4859 return; | |
| 4860 } | |
| 4861 | |
| 4862 /* Name */ | |
| 4863 get_name = ucnv_getName (cnv1, &errorCode); | |
| 4864 if (strcmp(NAME_LMBCS_1,get_name)){ | |
| 4865 log_err("Unexpected converter name: %s\n", get_name); | |
| 4866 } | |
| 4867 get_name = ucnv_getName (cnv2, &errorCode); | |
| 4868 if (strcmp(NAME_LMBCS_2,get_name)){ | |
| 4869 log_err("Unexpected converter name: %s\n", get_name); | |
| 4870 } | |
| 4871 | |
| 4872 /* substitution chars */ | |
| 4873 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode); | |
| 4874 if(U_FAILURE(errorCode)) { | |
| 4875 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); | |
| 4876 } | |
| 4877 if (len!=1){ | |
| 4878 log_err("Unexpected length of sub chars\n"); | |
| 4879 } | |
| 4880 if (get_subchars[0] != expected_subchars[0]){ | |
| 4881 log_err("Unexpected value of sub chars\n"); | |
| 4882 } | |
| 4883 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode); | |
| 4884 if(U_FAILURE(errorCode)) { | |
| 4885 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode)); | |
| 4886 } | |
| 4887 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode); | |
| 4888 if(U_FAILURE(errorCode)) { | |
| 4889 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); | |
| 4890 } | |
| 4891 if (len!=1){ | |
| 4892 log_err("Unexpected length of sub chars\n"); | |
| 4893 } | |
| 4894 if (get_subchars[0] != new_subchars[0]){ | |
| 4895 log_err("Unexpected value of sub chars\n"); | |
| 4896 } | |
| 4897 ucnv_close(cnv1); | |
| 4898 ucnv_close(cnv2); | |
| 4899 | |
| 4900 } | |
| 4901 | |
| 4902 /* LMBCS to Unicode - offsets */ | |
| 4903 { | |
| 4904 UErrorCode errorCode=U_ZERO_ERROR; | |
| 4905 | |
| 4906 const char * pSource = (const char *)pszLMBCS; | |
| 4907 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); | |
| 4908 | |
| 4909 UChar Out [sizeof(pszUnicode) + 1]; | |
| 4910 UChar * pOut = Out; | |
| 4911 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); | |
| 4912 | |
| 4913 int32_t off [sizeof(offsets)]; | |
| 4914 | |
| 4915 /* last 'offset' in expected results is just the final size. | |
| 4916 (Makes other tests easier). Compensate here: */ | |
| 4917 | |
| 4918 off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS); | |
| 4919 | |
| 4920 | |
| 4921 | |
| 4922 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */ | |
| 4923 if(U_FAILURE(errorCode)) { | |
| 4924 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(er
rorCode)); | |
| 4925 return; | |
| 4926 } | |
| 4927 | |
| 4928 | |
| 4929 | |
| 4930 ucnv_toUnicode (cnv, | |
| 4931 &pOut, | |
| 4932 OutLimit, | |
| 4933 &pSource, | |
| 4934 sourceLimit, | |
| 4935 off, | |
| 4936 TRUE, | |
| 4937 &errorCode); | |
| 4938 | |
| 4939 | |
| 4940 if (memcmp(off,offsets,sizeof(offsets))) | |
| 4941 { | |
| 4942 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n
"); | |
| 4943 } | |
| 4944 if (memcmp(Out,pszUnicode,sizeof(pszUnicode))) | |
| 4945 { | |
| 4946 log_err("LMBCS->Uni: Calculated codepoints do not match expected result
s\n"); | |
| 4947 } | |
| 4948 ucnv_close(cnv); | |
| 4949 } | |
| 4950 { | |
| 4951 /* LMBCS to Unicode - getNextUChar */ | |
| 4952 const char * sourceStart; | |
| 4953 const char *source=(const char *)pszLMBCS; | |
| 4954 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS); | |
| 4955 const UChar32 *results= pszUnicode32; | |
| 4956 const int *off = offsets32; | |
| 4957 | |
| 4958 UErrorCode errorCode=U_ZERO_ERROR; | |
| 4959 UChar32 uniChar; | |
| 4960 | |
| 4961 cnv=ucnv_open("LMBCS-1", &errorCode); | |
| 4962 if(U_FAILURE(errorCode)) { | |
| 4963 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(
errorCode)); | |
| 4964 return; | |
| 4965 } | |
| 4966 else | |
| 4967 { | |
| 4968 | |
| 4969 while(source<limit) { | |
| 4970 sourceStart=source; | |
| 4971 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]),
&errorCode); | |
| 4972 if(U_FAILURE(errorCode)) { | |
| 4973 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorNam
e(errorCode)); | |
| 4974 break; | |
| 4975 } else if(source-sourceStart != off[1] - off[0] || uniChar != *resul
ts) { | |
| 4976 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, sh
ould have been %lx from %d bytes.\n", | |
| 4977 uniChar, (source-sourceStart), *results, *off); | |
| 4978 break; | |
| 4979 } | |
| 4980 results++; | |
| 4981 off++; | |
| 4982 } | |
| 4983 } | |
| 4984 ucnv_close(cnv); | |
| 4985 } | |
| 4986 { /* test locale & optimization group operations: Unicode to LMBCS */ | |
| 4987 | |
| 4988 UErrorCode errorCode=U_ZERO_ERROR; | |
| 4989 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode); | |
| 4990 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode); | |
| 4991 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode); | |
| 4992 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */ | |
| 4993 const UChar * pUniOut = uniString; | |
| 4994 UChar * pUniIn = uniString; | |
| 4995 uint8_t lmbcsString [4]; | |
| 4996 const char * pLMBCSOut = (const char *)lmbcsString; | |
| 4997 char * pLMBCSIn = (char *)lmbcsString; | |
| 4998 | |
| 4999 /* 0192 (hook) converts to both group 3 & group 1. input locale should dif
ferentiate */ | |
| 5000 ucnv_fromUnicode (cnv16he, | |
| 5001 &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsS
tring[0])), | |
| 5002 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0
]), | |
| 5003 NULL, 1, &errorCode); | |
| 5004 | |
| 5005 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83) | |
| 5006 { | |
| 5007 log_err("LMBCS-16,locale=he gives unexpected translation\n"); | |
| 5008 } | |
| 5009 | |
| 5010 pLMBCSIn= (char *)lmbcsString; | |
| 5011 pUniOut = uniString; | |
| 5012 ucnv_fromUnicode (cnv01us, | |
| 5013 &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsStri
ng)/sizeof(lmbcsString[0])), | |
| 5014 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0
]), | |
| 5015 NULL, 1, &errorCode); | |
| 5016 | |
| 5017 if (lmbcsString[0] != 0x9F) | |
| 5018 { | |
| 5019 log_err("LMBCS-1,locale=US gives unexpected translation\n"); | |
| 5020 } | |
| 5021 | |
| 5022 /* single byte char from mbcs char set */ | |
| 5023 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */ | |
| 5024 pLMBCSOut = (const char *)lmbcsString; | |
| 5025 pUniIn = uniString; | |
| 5026 ucnv_toUnicode (cnv16jp, | |
| 5027 &pUniIn, pUniIn + 1, | |
| 5028 &pLMBCSOut, (pLMBCSOut + 1), | |
| 5029 NULL, 1, &errorCode); | |
| 5030 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pU
niIn != uniString+1 || uniString[0] != 0xFF6E) | |
| 5031 { | |
| 5032 log_err("Unexpected results from LMBCS-16 single byte char\n"); | |
| 5033 } | |
| 5034 /* convert to group 1: should be 3 bytes */ | |
| 5035 pLMBCSIn = (char *)lmbcsString; | |
| 5036 pUniOut = uniString; | |
| 5037 ucnv_fromUnicode (cnv01us, | |
| 5038 &pLMBCSIn, (const char *)(pLMBCSIn + 3), | |
| 5039 &pUniOut, pUniOut + 1, | |
| 5040 NULL, 1, &errorCode); | |
| 5041 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUn
iOut != uniString+1 | |
| 5042 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] !
= 0xAE) | |
| 5043 { | |
| 5044 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n"); | |
| 5045 } | |
| 5046 pLMBCSOut = (const char *)lmbcsString; | |
| 5047 pUniIn = uniString; | |
| 5048 ucnv_toUnicode (cnv01us, | |
| 5049 &pUniIn, pUniIn + 1, | |
| 5050 &pLMBCSOut, (const char *)(pLMBCSOut + 3), | |
| 5051 NULL, 1, &errorCode); | |
| 5052 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pU
niIn != uniString+1 || uniString[0] != 0xFF6E) | |
| 5053 { | |
| 5054 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n"); | |
| 5055 } | |
| 5056 pLMBCSIn = (char *)lmbcsString; | |
| 5057 pUniOut = uniString; | |
| 5058 ucnv_fromUnicode (cnv16jp, | |
| 5059 &pLMBCSIn, (const char *)(pLMBCSIn + 1), | |
| 5060 &pUniOut, pUniOut + 1, | |
| 5061 NULL, 1, &errorCode); | |
| 5062 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUn
iOut != uniString+1 || lmbcsString[0] != 0xAE) | |
| 5063 { | |
| 5064 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n"); | |
| 5065 } | |
| 5066 ucnv_close(cnv16he); | |
| 5067 ucnv_close(cnv16jp); | |
| 5068 ucnv_close(cnv01us); | |
| 5069 } | |
| 5070 { | |
| 5071 /* Small source buffer testing, LMBCS -> Unicode */ | |
| 5072 | |
| 5073 UErrorCode errorCode=U_ZERO_ERROR; | |
| 5074 | |
| 5075 const char * pSource = (const char *)pszLMBCS; | |
| 5076 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); | |
| 5077 int codepointCount = 0; | |
| 5078 | |
| 5079 UChar Out [sizeof(pszUnicode) + 1]; | |
| 5080 UChar * pOut = Out; | |
| 5081 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); | |
| 5082 | |
| 5083 | |
| 5084 cnv = ucnv_open(NAME_LMBCS_1, &errorCode); | |
| 5085 if(U_FAILURE(errorCode)) { | |
| 5086 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(error
Code)); | |
| 5087 return; | |
| 5088 } | |
| 5089 | |
| 5090 | |
| 5091 while ((pSource < sourceLimit) && U_SUCCESS (errorCode)) | |
| 5092 { | |
| 5093 ucnv_toUnicode (cnv, | |
| 5094 &pOut, | |
| 5095 OutLimit, | |
| 5096 &pSource, | |
| 5097 (pSource+1), /* claim that this is a 1- byte buffer */ | |
| 5098 NULL, | |
| 5099 FALSE, /* FALSE means there might be more chars in the next bu
ffer */ | |
| 5100 &errorCode); | |
| 5101 | |
| 5102 if (U_SUCCESS (errorCode)) | |
| 5103 { | |
| 5104 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount
+1]) | |
| 5105 { | |
| 5106 /* we are on to the next code point: check value */ | |
| 5107 | |
| 5108 if (Out[0] != pszUnicode[codepointCount]){ | |
| 5109 log_err("LMBCS->Uni result %lx should have been %lx \n", | |
| 5110 Out[0], pszUnicode[codepointCount]); | |
| 5111 } | |
| 5112 | |
| 5113 pOut = Out; /* reset for accumulating next code point */ | |
| 5114 codepointCount++; | |
| 5115 } | |
| 5116 } | |
| 5117 else | |
| 5118 { | |
| 5119 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorC
ode)); | |
| 5120 } | |
| 5121 } | |
| 5122 { | |
| 5123 /* limits & surrogate error testing */ | |
| 5124 char LIn [sizeof(pszLMBCS)]; | |
| 5125 const char * pLIn = LIn; | |
| 5126 | |
| 5127 char LOut [sizeof(pszLMBCS)]; | |
| 5128 char * pLOut = LOut; | |
| 5129 | |
| 5130 UChar UOut [sizeof(pszUnicode)]; | |
| 5131 UChar * pUOut = UOut; | |
| 5132 | |
| 5133 UChar UIn [sizeof(pszUnicode)]; | |
| 5134 const UChar * pUIn = UIn; | |
| 5135 | |
| 5136 int32_t off [sizeof(offsets)]; | |
| 5137 UChar32 uniChar; | |
| 5138 | |
| 5139 errorCode=U_ZERO_ERROR; | |
| 5140 | |
| 5141 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERRO
R */ | |
| 5142 pUIn++; | |
| 5143 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &erro
rCode); | |
| 5144 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) | |
| 5145 { | |
| 5146 log_err("Unexpected Error on negative source request to ucnv_fromUni
code: %s\n", u_errorName(errorCode)); | |
| 5147 } | |
| 5148 pUIn--; | |
| 5149 | |
| 5150 errorCode=U_ZERO_ERROR; | |
| 5151 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(
pLIn-1),off,FALSE, &errorCode); | |
| 5152 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) | |
| 5153 { | |
| 5154 log_err("Unexpected Error on negative source request to ucnv_toUnico
de: %s\n", u_errorName(errorCode)); | |
| 5155 } | |
| 5156 errorCode=U_ZERO_ERROR; | |
| 5157 | |
| 5158 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(p
LIn-1), &errorCode); | |
| 5159 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) | |
| 5160 { | |
| 5161 log_err("Unexpected Error on negative source request to ucnv_getNext
UChar: %s\n", u_errorName(errorCode)); | |
| 5162 } | |
| 5163 errorCode=U_ZERO_ERROR; | |
| 5164 | |
| 5165 /* 0 byte source request - no error, no pointer movement */ | |
| 5166 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)p
LIn,off,FALSE, &errorCode); | |
| 5167 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode); | |
| 5168 if(U_FAILURE(errorCode)) { | |
| 5169 log_err("0 byte source request: unexpected error: %s\n", u_errorName
(errorCode)); | |
| 5170 } | |
| 5171 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn
)) | |
| 5172 { | |
| 5173 log_err("Unexpected pointer move in 0 byte source request \n"); | |
| 5174 } | |
| 5175 /*0 byte source request - GetNextUChar : error & value == fffe or ffff
*/ | |
| 5176 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pL
In, &errorCode); | |
| 5177 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR) | |
| 5178 { | |
| 5179 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUC
har: %s\n", u_errorName(errorCode)); | |
| 5180 } | |
| 5181 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */ | |
| 5182 { | |
| 5183 log_err("Unexpected value on 0-byte source request to ucnv_getnextUC
har \n"); | |
| 5184 } | |
| 5185 errorCode = U_ZERO_ERROR; | |
| 5186 | |
| 5187 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */ | |
| 5188 | |
| 5189 pUIn = pszUnicode; | |
| 5190 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnic
ode)/sizeof(UChar),off,FALSE, &errorCode); | |
| 5191 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4]
|| pUIn != pszUnicode+4 ) | |
| 5192 { | |
| 5193 log_err("Unexpected results on out of target room to ucnv_fromUnicod
e\n"); | |
| 5194 } | |
| 5195 | |
| 5196 errorCode = U_ZERO_ERROR; | |
| 5197 | |
| 5198 pLIn = (const char *)pszLMBCS; | |
| 5199 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FA
LSE, &errorCode); | |
| 5200 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn !
= (const char *)pszLMBCS+offsets[4]) | |
| 5201 { | |
| 5202 log_err("Unexpected results on out of target room to ucnv_toUnicode\
n"); | |
| 5203 } | |
| 5204 | |
| 5205 /* unpaired or chopped LMBCS surrogates */ | |
| 5206 | |
| 5207 /* OK high surrogate, Low surrogate is chopped */ | |
| 5208 LIn [0] = (char)0x14; | |
| 5209 LIn [1] = (char)0xD8; | |
| 5210 LIn [2] = (char)0x01; | |
| 5211 LIn [3] = (char)0x14; | |
| 5212 LIn [4] = (char)0xDC; | |
| 5213 pLIn = LIn; | |
| 5214 errorCode = U_ZERO_ERROR; | |
| 5215 pUOut = UOut; | |
| 5216 | |
| 5217 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &er
rorCode); | |
| 5218 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char
**)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); | |
| 5219 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut !
= UOut + 1 || pLIn != LIn + 5) | |
| 5220 { | |
| 5221 log_err("Unexpected results on chopped low surrogate\n"); | |
| 5222 } | |
| 5223 | |
| 5224 /* chopped at surrogate boundary */ | |
| 5225 LIn [0] = (char)0x14; | |
| 5226 LIn [1] = (char)0xD8; | |
| 5227 LIn [2] = (char)0x01; | |
| 5228 pLIn = LIn; | |
| 5229 errorCode = U_ZERO_ERROR; | |
| 5230 pUOut = UOut; | |
| 5231 | |
| 5232 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char
**)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode); | |
| 5233 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || p
LIn != LIn + 3) | |
| 5234 { | |
| 5235 log_err("Unexpected results on chopped at surrogate boundary \n"); | |
| 5236 } | |
| 5237 | |
| 5238 /* unpaired surrogate plus valid Unichar */ | |
| 5239 LIn [0] = (char)0x14; | |
| 5240 LIn [1] = (char)0xD8; | |
| 5241 LIn [2] = (char)0x01; | |
| 5242 LIn [3] = (char)0x14; | |
| 5243 LIn [4] = (char)0xC9; | |
| 5244 LIn [5] = (char)0xD0; | |
| 5245 pLIn = LIn; | |
| 5246 errorCode = U_ZERO_ERROR; | |
| 5247 pUOut = UOut; | |
| 5248 | |
| 5249 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char
**)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode); | |
| 5250 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || p
UOut != UOut + 2 || pLIn != LIn + 6) | |
| 5251 { | |
| 5252 log_err("Unexpected results after unpaired surrogate plus valid Unic
har \n"); | |
| 5253 } | |
| 5254 | |
| 5255 /* unpaired surrogate plus chopped Unichar */ | |
| 5256 LIn [0] = (char)0x14; | |
| 5257 LIn [1] = (char)0xD8; | |
| 5258 LIn [2] = (char)0x01; | |
| 5259 LIn [3] = (char)0x14; | |
| 5260 LIn [4] = (char)0xC9; | |
| 5261 | |
| 5262 pLIn = LIn; | |
| 5263 errorCode = U_ZERO_ERROR; | |
| 5264 pUOut = UOut; | |
| 5265 | |
| 5266 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char
**)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); | |
| 5267 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut !
= UOut + 1 || pLIn != LIn + 5) | |
| 5268 { | |
| 5269 log_err("Unexpected results after unpaired surrogate plus chopped Un
ichar \n"); | |
| 5270 } | |
| 5271 | |
| 5272 /* unpaired surrogate plus valid non-Unichar */ | |
| 5273 LIn [0] = (char)0x14; | |
| 5274 LIn [1] = (char)0xD8; | |
| 5275 LIn [2] = (char)0x01; | |
| 5276 LIn [3] = (char)0x0F; | |
| 5277 LIn [4] = (char)0x3B; | |
| 5278 | |
| 5279 pLIn = LIn; | |
| 5280 errorCode = U_ZERO_ERROR; | |
| 5281 pUOut = UOut; | |
| 5282 | |
| 5283 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char
**)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); | |
| 5284 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUO
ut != UOut + 2 || pLIn != LIn + 5) | |
| 5285 { | |
| 5286 log_err("Unexpected results after unpaired surrogate plus valid non-
Unichar\n"); | |
| 5287 } | |
| 5288 | |
| 5289 /* unpaired surrogate plus chopped non-Unichar */ | |
| 5290 LIn [0] = (char)0x14; | |
| 5291 LIn [1] = (char)0xD8; | |
| 5292 LIn [2] = (char)0x01; | |
| 5293 LIn [3] = (char)0x0F; | |
| 5294 | |
| 5295 pLIn = LIn; | |
| 5296 errorCode = U_ZERO_ERROR; | |
| 5297 pUOut = UOut; | |
| 5298 | |
| 5299 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char
**)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode); | |
| 5300 | |
| 5301 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut !
= UOut + 1 || pLIn != LIn + 4) | |
| 5302 { | |
| 5303 log_err("Unexpected results after unpaired surrogate plus chopped no
n-Unichar\n"); | |
| 5304 } | |
| 5305 } | |
| 5306 } | |
| 5307 ucnv_close(cnv); /* final cleanup */ | |
| 5308 } | |
| 5309 | |
| 5310 | |
| 5311 static void TestJitterbug255() | |
| 5312 { | |
| 5313 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x0
0 }; | |
| 5314 const char *testBuffer = (const char *)testBytes; | |
| 5315 const char *testEnd = (const char *)testBytes + sizeof(testBytes); | |
| 5316 UErrorCode status = U_ZERO_ERROR; | |
| 5317 /*UChar32 result;*/ | |
| 5318 UConverter *cnv = 0; | |
| 5319 | |
| 5320 cnv = ucnv_open("shift-jis", &status); | |
| 5321 if (U_FAILURE(status) || cnv == 0) { | |
| 5322 log_data_err("Failed to open the converter for SJIS.\n"); | |
| 5323 return; | |
| 5324 } | |
| 5325 while (testBuffer != testEnd) | |
| 5326 { | |
| 5327 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status); | |
| 5328 if (U_FAILURE(status)) | |
| 5329 { | |
| 5330 log_err("Failed to convert the next UChar for SJIS.\n"); | |
| 5331 break; | |
| 5332 } | |
| 5333 } | |
| 5334 ucnv_close(cnv); | |
| 5335 } | |
| 5336 | |
| 5337 static void TestEBCDICUS4XML() | |
| 5338 { | |
| 5339 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000}; | |
| 5340 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000}; | |
| 5341 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00}; | |
| 5342 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00}; | |
| 5343 char target_x[] = {0x00, 0x00, 0x00, 0x00}; | |
| 5344 UChar *unicodes = unicodes_x; | |
| 5345 const UChar *toUnicodeMaps = toUnicodeMaps_x; | |
| 5346 char *target = target_x; | |
| 5347 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x; | |
| 5348 UErrorCode status = U_ZERO_ERROR; | |
| 5349 UConverter *cnv = 0; | |
| 5350 | |
| 5351 cnv = ucnv_open("ebcdic-xml-us", &status); | |
| 5352 if (U_FAILURE(status) || cnv == 0) { | |
| 5353 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n"); | |
| 5354 return; | |
| 5355 } | |
| 5356 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines
+3, NULL, TRUE, &status); | |
| 5357 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3)
!= 0) { | |
| 5358 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n", | |
| 5359 u_errorName(status)); | |
| 5360 printUSeqErr(unicodes_x, 3); | |
| 5361 printUSeqErr(toUnicodeMaps, 3); | |
| 5362 } | |
| 5363 status = U_ZERO_ERROR; | |
| 5364 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUn
icodeMaps+3, NULL, TRUE, &status); | |
| 5365 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) !
= 0) { | |
| 5366 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n", | |
| 5367 u_errorName(status)); | |
| 5368 printSeqErr((const unsigned char*)target_x, 3); | |
| 5369 printSeqErr((const unsigned char*)fromUnicodeMaps, 3); | |
| 5370 } | |
| 5371 ucnv_close(cnv); | |
| 5372 } | |
| 5373 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */ | |
| 5374 | |
| 5375 #if !UCONFIG_NO_COLLATION | |
| 5376 | |
| 5377 static void TestJitterbug981(){ | |
| 5378 const UChar* rules; | |
| 5379 int32_t rules_length, target_cap, bytes_needed, buff_size; | |
| 5380 UErrorCode status = U_ZERO_ERROR; | |
| 5381 UConverter *utf8cnv; | |
| 5382 UCollator* myCollator; | |
| 5383 char *buff; | |
| 5384 int numNeeded=0; | |
| 5385 utf8cnv = ucnv_open ("utf8", &status); | |
| 5386 if(U_FAILURE(status)){ | |
| 5387 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(statu
s)); | |
| 5388 return; | |
| 5389 } | |
| 5390 myCollator = ucol_open("zh", &status); | |
| 5391 if(U_FAILURE(status)){ | |
| 5392 log_data_err("Could not open collator for zh locale. Error: %s\n", u_err
orName(status)); | |
| 5393 ucnv_close(utf8cnv); | |
| 5394 return; | |
| 5395 } | |
| 5396 | |
| 5397 rules = ucol_getRules(myCollator, &rules_length); | |
| 5398 if(rules_length == 0) { | |
| 5399 log_data_err("missing zh tailoring rule string\n"); | |
| 5400 ucol_close(myCollator); | |
| 5401 ucnv_close(utf8cnv); | |
| 5402 return; | |
| 5403 } | |
| 5404 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv); | |
| 5405 buff = malloc(buff_size); | |
| 5406 | |
| 5407 target_cap = 0; | |
| 5408 do { | |
| 5409 ucnv_reset(utf8cnv); | |
| 5410 status = U_ZERO_ERROR; | |
| 5411 if(target_cap >= buff_size) { | |
| 5412 log_err("wanted %d bytes, only %d available\n", target_cap, buff_siz
e); | |
| 5413 break; | |
| 5414 } | |
| 5415 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap, | |
| 5416 rules, rules_length, &status); | |
| 5417 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; | |
| 5418 if(numNeeded!=0 && numNeeded!= bytes_needed){ | |
| 5419 log_err("ucnv_fromUChars returns different values for required capac
ity in pre-flight and conversion modes"); | |
| 5420 break; | |
| 5421 } | |
| 5422 numNeeded = bytes_needed; | |
| 5423 } while (status == U_BUFFER_OVERFLOW_ERROR); | |
| 5424 ucol_close(myCollator); | |
| 5425 ucnv_close(utf8cnv); | |
| 5426 free(buff); | |
| 5427 } | |
| 5428 | |
| 5429 #endif | |
| 5430 | |
| 5431 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 5432 static void TestJitterbug1293(){ | |
| 5433 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4,
0x30D7,0x000}; | |
| 5434 char target[256]; | |
| 5435 UErrorCode status = U_ZERO_ERROR; | |
| 5436 UConverter* conv=NULL; | |
| 5437 int32_t target_cap, bytes_needed, numNeeded = 0; | |
| 5438 conv = ucnv_open("shift-jis",&status); | |
| 5439 if(U_FAILURE(status)){ | |
| 5440 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(
status)); | |
| 5441 return; | |
| 5442 } | |
| 5443 | |
| 5444 do{ | |
| 5445 target_cap =0; | |
| 5446 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status
); | |
| 5447 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; | |
| 5448 if(numNeeded!=0 && numNeeded!= bytes_needed){ | |
| 5449 log_err("ucnv_fromUChars returns different values for required capacit
y in pre-flight and conversion modes"); | |
| 5450 } | |
| 5451 numNeeded = bytes_needed; | |
| 5452 } while (status == U_BUFFER_OVERFLOW_ERROR); | |
| 5453 if(U_FAILURE(status)){ | |
| 5454 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(stat
us)); | |
| 5455 return; | |
| 5456 } | |
| 5457 ucnv_close(conv); | |
| 5458 } | |
| 5459 #endif | |
| 5460 | |
| 5461 static void TestJB5275_1(){ | |
| 5462 | |
| 5463 static const char* data = "\x3B\xB3\x0A" /* Easy characters */ | |
| 5464 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test
*/ | |
| 5465 /* Switch script: */ | |
| 5466 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengal
i test */ | |
| 5467 "\x3B\xB3\x0A" /* Easy characters - new line, so
should default!*/ | |
| 5468 "\xEF\x40\x3B\xB3\x0A"; | |
| 5469 static const UChar expected[] ={ | |
| 5470 0x003b, 0x0a15, 0x000a, /* Easy characters */ | |
| 5471 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi
test */ | |
| 5472 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali
*/ | |
| 5473 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should def
ault!*/ | |
| 5474 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/ | |
| 5475 }; | |
| 5476 | |
| 5477 UErrorCode status = U_ZERO_ERROR; | |
| 5478 UConverter* conv = ucnv_open("iscii-gur", &status); | |
| 5479 UChar dest[100] = {'\0'}; | |
| 5480 UChar* target = dest; | |
| 5481 UChar* targetLimit = dest+100; | |
| 5482 const char* source = data; | |
| 5483 const char* sourceLimit = data+strlen(data); | |
| 5484 const UChar* exp = expected; | |
| 5485 | |
| 5486 if (U_FAILURE(status)) { | |
| 5487 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n",
u_errorName(status)); | |
| 5488 return; | |
| 5489 } | |
| 5490 | |
| 5491 log_verbose("Testing switching back to default script when new line is encou
ntered.\n"); | |
| 5492 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE,
&status); | |
| 5493 if(U_FAILURE(status)){ | |
| 5494 log_err("conversion failed: %s \n", u_errorName(status)); | |
| 5495 } | |
| 5496 targetLimit = target; | |
| 5497 target = dest; | |
| 5498 printUSeq(target, targetLimit-target); | |
| 5499 while(target<targetLimit){ | |
| 5500 if(*exp!=*target){ | |
| 5501 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n
", *exp, *target); | |
| 5502 } | |
| 5503 target++; | |
| 5504 exp++; | |
| 5505 } | |
| 5506 ucnv_close(conv); | |
| 5507 } | |
| 5508 | |
| 5509 static void TestJB5275(){ | |
| 5510 static const char* data = | |
| 5511 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41
*/ | |
| 5512 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41
*/ | |
| 5513 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsuppor
ted sequence \xEF\x41 */ | |
| 5514 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ | |
| 5515 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */ | |
| 5516 "\xEF\x48\x38\xB3\x0A" /* Kannada test */ | |
| 5517 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */ | |
| 5518 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */ | |
| 5519 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */ | |
| 5520 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */; | |
| 5521 static const UChar expected[] ={ | |
| 5522 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test
*/ | |
| 5523 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati
test */ | |
| 5524 0x0038, 0x0C95, 0x000A, /* Kannada test */ | |
| 5525 0x0039, 0x0D15, 0x000A, /* Malayalam test */ | |
| 5526 0x003A, 0x0A95, 0x000A, /* Gujarati test */ | |
| 5527 0x003B, 0x0A15, 0x000A, /* Punjabi test */ | |
| 5528 }; | |
| 5529 | |
| 5530 UErrorCode status = U_ZERO_ERROR; | |
| 5531 UConverter* conv = ucnv_open("iscii", &status); | |
| 5532 UChar dest[100] = {'\0'}; | |
| 5533 UChar* target = dest; | |
| 5534 UChar* targetLimit = dest+100; | |
| 5535 const char* source = data; | |
| 5536 const char* sourceLimit = data+strlen(data); | |
| 5537 const UChar* exp = expected; | |
| 5538 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE,
&status); | |
| 5539 if(U_FAILURE(status)){ | |
| 5540 log_data_err("conversion failed: %s \n", u_errorName(status)); | |
| 5541 } | |
| 5542 targetLimit = target; | |
| 5543 target = dest; | |
| 5544 | |
| 5545 printUSeq(target, targetLimit-target); | |
| 5546 | |
| 5547 while(target<targetLimit){ | |
| 5548 if(*exp!=*target){ | |
| 5549 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n
", *exp, *target); | |
| 5550 } | |
| 5551 target++; | |
| 5552 exp++; | |
| 5553 } | |
| 5554 ucnv_close(conv); | |
| 5555 } | |
| 5556 | |
| 5557 static void | |
| 5558 TestIsFixedWidth() { | |
| 5559 UErrorCode status = U_ZERO_ERROR; | |
| 5560 UConverter *cnv = NULL; | |
| 5561 int32_t i; | |
| 5562 | |
| 5563 const char *fixedWidth[] = { | |
| 5564 "US-ASCII", | |
| 5565 "UTF32", | |
| 5566 "ibm-5478_P100-1995" | |
| 5567 }; | |
| 5568 | |
| 5569 const char *notFixedWidth[] = { | |
| 5570 "GB18030", | |
| 5571 "UTF8", | |
| 5572 "windows-949-2000", | |
| 5573 "UTF16" | |
| 5574 }; | |
| 5575 | |
| 5576 for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) { | |
| 5577 cnv = ucnv_open(fixedWidth[i], &status); | |
| 5578 if (cnv == NULL || U_FAILURE(status)) { | |
| 5579 log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_er
rorName(status)); | |
| 5580 continue; | |
| 5581 } | |
| 5582 | |
| 5583 if (!ucnv_isFixedWidth(cnv, &status)) { | |
| 5584 log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedW
idth[i]); | |
| 5585 } | |
| 5586 ucnv_close(cnv); | |
| 5587 } | |
| 5588 | |
| 5589 for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) { | |
| 5590 cnv = ucnv_open(notFixedWidth[i], &status); | |
| 5591 if (cnv == NULL || U_FAILURE(status)) { | |
| 5592 log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u
_errorName(status)); | |
| 5593 continue; | |
| 5594 } | |
| 5595 | |
| 5596 if (ucnv_isFixedWidth(cnv, &status)) { | |
| 5597 log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", not
FixedWidth[i]); | |
| 5598 } | |
| 5599 ucnv_close(cnv); | |
| 5600 } | |
| 5601 } | |
| OLD | NEW |