| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 ******************************************************************************* | |
| 3 * | |
| 4 * Copyright (C) 2003-2014, International Business Machines | |
| 5 * Corporation and others. All Rights Reserved. | |
| 6 * | |
| 7 ******************************************************************************* | |
| 8 * file name: convtest.cpp | |
| 9 * encoding: US-ASCII | |
| 10 * tab size: 8 (not used) | |
| 11 * indentation:4 | |
| 12 * | |
| 13 * created on: 2003jul15 | |
| 14 * created by: Markus W. Scherer | |
| 15 * | |
| 16 * Test file for data-driven conversion tests. | |
| 17 */ | |
| 18 | |
| 19 #include "unicode/utypes.h" | |
| 20 | |
| 21 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 22 /* | |
| 23 * Note: Turning off all of convtest.cpp if !UCONFIG_NO_LEGACY_CONVERSION | |
| 24 * is slightly unnecessary - it removes tests for Unicode charsets | |
| 25 * like UTF-8 that should work. | |
| 26 * However, there is no easy way for the test to detect whether a test case | |
| 27 * is for a Unicode charset, so it would be difficult to only exclude those. | |
| 28 * Also, regular testing of ICU is done with all modules on, therefore | |
| 29 * not testing conversion for a custom configuration like this should be ok. | |
| 30 */ | |
| 31 | |
| 32 #include "unicode/ucnv.h" | |
| 33 #include "unicode/unistr.h" | |
| 34 #include "unicode/parsepos.h" | |
| 35 #include "unicode/uniset.h" | |
| 36 #include "unicode/ustring.h" | |
| 37 #include "unicode/ures.h" | |
| 38 #include "convtest.h" | |
| 39 #include "cmemory.h" | |
| 40 #include "unicode/tstdtmod.h" | |
| 41 #include <string.h> | |
| 42 #include <stdlib.h> | |
| 43 | |
| 44 enum { | |
| 45 // characters used in test data for callbacks | |
| 46 SUB_CB='?', | |
| 47 SKIP_CB='0', | |
| 48 STOP_CB='.', | |
| 49 ESC_CB='&' | |
| 50 }; | |
| 51 | |
| 52 ConversionTest::ConversionTest() { | |
| 53 UErrorCode errorCode=U_ZERO_ERROR; | |
| 54 utf8Cnv=ucnv_open("UTF-8", &errorCode); | |
| 55 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); | |
| 56 if(U_FAILURE(errorCode)) { | |
| 57 errln("unable to open UTF-8 converter"); | |
| 58 } | |
| 59 } | |
| 60 | |
| 61 ConversionTest::~ConversionTest() { | |
| 62 ucnv_close(utf8Cnv); | |
| 63 } | |
| 64 | |
| 65 void | |
| 66 ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
r * /*par*/) { | |
| 67 if (exec) logln("TestSuite ConversionTest: "); | |
| 68 switch (index) { | |
| 69 #if !UCONFIG_NO_FILE_IO | |
| 70 case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break; | |
| 71 case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break; | |
| 72 case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break; | |
| 73 case 3: name="TestDefaultIgnorableCallback"; if (exec) TestDefaultIgnora
bleCallback(); break; | |
| 74 #else | |
| 75 case 0: | |
| 76 case 1: | |
| 77 case 2: | |
| 78 case 3: name="skip"; break; | |
| 79 #endif | |
| 80 case 4: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break
; | |
| 81 default: name=""; break; //needed to end loop | |
| 82 } | |
| 83 } | |
| 84 | |
| 85 // test data interface ----------------------------------------------------- *** | |
| 86 | |
| 87 void | |
| 88 ConversionTest::TestToUnicode() { | |
| 89 ConversionCase cc; | |
| 90 char charset[100], cbopt[4]; | |
| 91 const char *option; | |
| 92 UnicodeString s, unicode; | |
| 93 int32_t offsetsLength; | |
| 94 UConverterToUCallback callback; | |
| 95 | |
| 96 TestDataModule *dataModule; | |
| 97 TestData *testData; | |
| 98 const DataMap *testCase; | |
| 99 UErrorCode errorCode; | |
| 100 int32_t i; | |
| 101 | |
| 102 errorCode=U_ZERO_ERROR; | |
| 103 dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode)
; | |
| 104 if(U_SUCCESS(errorCode)) { | |
| 105 testData=dataModule->createTestData("toUnicode", errorCode); | |
| 106 if(U_SUCCESS(errorCode)) { | |
| 107 for(i=0; testData->nextCase(testCase, errorCode); ++i) { | |
| 108 if(U_FAILURE(errorCode)) { | |
| 109 errln("error retrieving conversion/toUnicode test case %d -
%s", | |
| 110 i, u_errorName(errorCode)); | |
| 111 errorCode=U_ZERO_ERROR; | |
| 112 continue; | |
| 113 } | |
| 114 | |
| 115 cc.caseNr=i; | |
| 116 | |
| 117 s=testCase->getString("charset", errorCode); | |
| 118 s.extract(0, 0x7fffffff, charset, sizeof(charset), ""); | |
| 119 cc.charset=charset; | |
| 120 | |
| 121 cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode)
; | |
| 122 unicode=testCase->getString("unicode", errorCode); | |
| 123 cc.unicode=unicode.getBuffer(); | |
| 124 cc.unicodeLength=unicode.length(); | |
| 125 | |
| 126 offsetsLength=0; | |
| 127 cc.offsets=testCase->getIntVector(offsetsLength, "offsets", erro
rCode); | |
| 128 if(offsetsLength==0) { | |
| 129 cc.offsets=NULL; | |
| 130 } else if(offsetsLength!=unicode.length()) { | |
| 131 errln("toUnicode[%d] unicode[%d] and offsets[%d] must have t
he same length", | |
| 132 i, unicode.length(), offsetsLength); | |
| 133 errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
| 134 } | |
| 135 | |
| 136 cc.finalFlush= 0!=testCase->getInt28("flush", errorCode); | |
| 137 cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode); | |
| 138 | |
| 139 s=testCase->getString("errorCode", errorCode); | |
| 140 if(s==UNICODE_STRING("invalid", 7)) { | |
| 141 cc.outErrorCode=U_INVALID_CHAR_FOUND; | |
| 142 } else if(s==UNICODE_STRING("illegal", 7)) { | |
| 143 cc.outErrorCode=U_ILLEGAL_CHAR_FOUND; | |
| 144 } else if(s==UNICODE_STRING("truncated", 9)) { | |
| 145 cc.outErrorCode=U_TRUNCATED_CHAR_FOUND; | |
| 146 } else if(s==UNICODE_STRING("illesc", 6)) { | |
| 147 cc.outErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE; | |
| 148 } else if(s==UNICODE_STRING("unsuppesc", 9)) { | |
| 149 cc.outErrorCode=U_UNSUPPORTED_ESCAPE_SEQUENCE; | |
| 150 } else { | |
| 151 cc.outErrorCode=U_ZERO_ERROR; | |
| 152 } | |
| 153 | |
| 154 s=testCase->getString("callback", errorCode); | |
| 155 s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), ""); | |
| 156 cc.cbopt=cbopt; | |
| 157 switch(cbopt[0]) { | |
| 158 case SUB_CB: | |
| 159 callback=UCNV_TO_U_CALLBACK_SUBSTITUTE; | |
| 160 break; | |
| 161 case SKIP_CB: | |
| 162 callback=UCNV_TO_U_CALLBACK_SKIP; | |
| 163 break; | |
| 164 case STOP_CB: | |
| 165 callback=UCNV_TO_U_CALLBACK_STOP; | |
| 166 break; | |
| 167 case ESC_CB: | |
| 168 callback=UCNV_TO_U_CALLBACK_ESCAPE; | |
| 169 break; | |
| 170 default: | |
| 171 callback=NULL; | |
| 172 break; | |
| 173 } | |
| 174 option=callback==NULL ? cbopt : cbopt+1; | |
| 175 if(*option==0) { | |
| 176 option=NULL; | |
| 177 } | |
| 178 | |
| 179 cc.invalidChars=testCase->getBinary(cc.invalidLength, "invalidCh
ars", errorCode); | |
| 180 | |
| 181 if(U_FAILURE(errorCode)) { | |
| 182 errln("error parsing conversion/toUnicode test case %d - %s"
, | |
| 183 i, u_errorName(errorCode)); | |
| 184 errorCode=U_ZERO_ERROR; | |
| 185 } else { | |
| 186 logln("TestToUnicode[%d] %s", i, charset); | |
| 187 ToUnicodeCase(cc, callback, option); | |
| 188 } | |
| 189 } | |
| 190 delete testData; | |
| 191 } | |
| 192 delete dataModule; | |
| 193 } | |
| 194 else { | |
| 195 dataerrln("Could not load test conversion data"); | |
| 196 } | |
| 197 } | |
| 198 | |
| 199 void | |
| 200 ConversionTest::TestFromUnicode() { | |
| 201 ConversionCase cc; | |
| 202 char charset[100], cbopt[4]; | |
| 203 const char *option; | |
| 204 UnicodeString s, unicode, invalidUChars; | |
| 205 int32_t offsetsLength, index; | |
| 206 UConverterFromUCallback callback; | |
| 207 | |
| 208 TestDataModule *dataModule; | |
| 209 TestData *testData; | |
| 210 const DataMap *testCase; | |
| 211 const UChar *p; | |
| 212 UErrorCode errorCode; | |
| 213 int32_t i, length; | |
| 214 | |
| 215 errorCode=U_ZERO_ERROR; | |
| 216 dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode)
; | |
| 217 if(U_SUCCESS(errorCode)) { | |
| 218 testData=dataModule->createTestData("fromUnicode", errorCode); | |
| 219 if(U_SUCCESS(errorCode)) { | |
| 220 for(i=0; testData->nextCase(testCase, errorCode); ++i) { | |
| 221 if(U_FAILURE(errorCode)) { | |
| 222 errln("error retrieving conversion/fromUnicode test case %d
- %s", | |
| 223 i, u_errorName(errorCode)); | |
| 224 errorCode=U_ZERO_ERROR; | |
| 225 continue; | |
| 226 } | |
| 227 | |
| 228 cc.caseNr=i; | |
| 229 | |
| 230 s=testCase->getString("charset", errorCode); | |
| 231 s.extract(0, 0x7fffffff, charset, sizeof(charset), ""); | |
| 232 cc.charset=charset; | |
| 233 | |
| 234 unicode=testCase->getString("unicode", errorCode); | |
| 235 cc.unicode=unicode.getBuffer(); | |
| 236 cc.unicodeLength=unicode.length(); | |
| 237 cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode)
; | |
| 238 | |
| 239 offsetsLength=0; | |
| 240 cc.offsets=testCase->getIntVector(offsetsLength, "offsets", erro
rCode); | |
| 241 if(offsetsLength==0) { | |
| 242 cc.offsets=NULL; | |
| 243 } else if(offsetsLength!=cc.bytesLength) { | |
| 244 errln("fromUnicode[%d] bytes[%d] and offsets[%d] must have t
he same length", | |
| 245 i, cc.bytesLength, offsetsLength); | |
| 246 errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
| 247 } | |
| 248 | |
| 249 cc.finalFlush= 0!=testCase->getInt28("flush", errorCode); | |
| 250 cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode); | |
| 251 | |
| 252 s=testCase->getString("errorCode", errorCode); | |
| 253 if(s==UNICODE_STRING("invalid", 7)) { | |
| 254 cc.outErrorCode=U_INVALID_CHAR_FOUND; | |
| 255 } else if(s==UNICODE_STRING("illegal", 7)) { | |
| 256 cc.outErrorCode=U_ILLEGAL_CHAR_FOUND; | |
| 257 } else if(s==UNICODE_STRING("truncated", 9)) { | |
| 258 cc.outErrorCode=U_TRUNCATED_CHAR_FOUND; | |
| 259 } else { | |
| 260 cc.outErrorCode=U_ZERO_ERROR; | |
| 261 } | |
| 262 | |
| 263 s=testCase->getString("callback", errorCode); | |
| 264 cc.setSub=0; // default: no subchar | |
| 265 | |
| 266 if((index=s.indexOf((UChar)0))>0) { | |
| 267 // read NUL-separated subchar first, if any | |
| 268 // copy the subchar from Latin-1 characters | |
| 269 // start after the NUL | |
| 270 p=s.getTerminatedBuffer(); | |
| 271 length=index+1; | |
| 272 p+=length; | |
| 273 length=s.length()-length; | |
| 274 if(length<=0 || length>=(int32_t)sizeof(cc.subchar)) { | |
| 275 errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
| 276 } else { | |
| 277 int32_t j; | |
| 278 | |
| 279 for(j=0; j<length; ++j) { | |
| 280 cc.subchar[j]=(char)p[j]; | |
| 281 } | |
| 282 // NUL-terminate the subchar | |
| 283 cc.subchar[j]=0; | |
| 284 cc.setSub=1; | |
| 285 } | |
| 286 | |
| 287 // remove the NUL and subchar from s | |
| 288 s.truncate(index); | |
| 289 } else if((index=s.indexOf((UChar)0x3d))>0) /* '=' */ { | |
| 290 // read a substitution string, separated by an equal sign | |
| 291 p=s.getBuffer()+index+1; | |
| 292 length=s.length()-(index+1); | |
| 293 if(length<0 || length>=UPRV_LENGTHOF(cc.subString)) { | |
| 294 errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
| 295 } else { | |
| 296 u_memcpy(cc.subString, p, length); | |
| 297 // NUL-terminate the subString | |
| 298 cc.subString[length]=0; | |
| 299 cc.setSub=-1; | |
| 300 } | |
| 301 | |
| 302 // remove the equal sign and subString from s | |
| 303 s.truncate(index); | |
| 304 } | |
| 305 | |
| 306 s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), ""); | |
| 307 cc.cbopt=cbopt; | |
| 308 switch(cbopt[0]) { | |
| 309 case SUB_CB: | |
| 310 callback=UCNV_FROM_U_CALLBACK_SUBSTITUTE; | |
| 311 break; | |
| 312 case SKIP_CB: | |
| 313 callback=UCNV_FROM_U_CALLBACK_SKIP; | |
| 314 break; | |
| 315 case STOP_CB: | |
| 316 callback=UCNV_FROM_U_CALLBACK_STOP; | |
| 317 break; | |
| 318 case ESC_CB: | |
| 319 callback=UCNV_FROM_U_CALLBACK_ESCAPE; | |
| 320 break; | |
| 321 default: | |
| 322 callback=NULL; | |
| 323 break; | |
| 324 } | |
| 325 option=callback==NULL ? cbopt : cbopt+1; | |
| 326 if(*option==0) { | |
| 327 option=NULL; | |
| 328 } | |
| 329 | |
| 330 invalidUChars=testCase->getString("invalidUChars", errorCode); | |
| 331 cc.invalidUChars=invalidUChars.getBuffer(); | |
| 332 cc.invalidLength=invalidUChars.length(); | |
| 333 | |
| 334 if(U_FAILURE(errorCode)) { | |
| 335 errln("error parsing conversion/fromUnicode test case %d - %
s", | |
| 336 i, u_errorName(errorCode)); | |
| 337 errorCode=U_ZERO_ERROR; | |
| 338 } else { | |
| 339 logln("TestFromUnicode[%d] %s", i, charset); | |
| 340 FromUnicodeCase(cc, callback, option); | |
| 341 } | |
| 342 } | |
| 343 delete testData; | |
| 344 } | |
| 345 delete dataModule; | |
| 346 } | |
| 347 else { | |
| 348 dataerrln("Could not load test conversion data"); | |
| 349 } | |
| 350 } | |
| 351 | |
| 352 static const UChar ellipsis[]={ 0x2e, 0x2e, 0x2e }; | |
| 353 | |
| 354 void | |
| 355 ConversionTest::TestGetUnicodeSet() { | |
| 356 char charset[100]; | |
| 357 UnicodeString s, map, mapnot; | |
| 358 int32_t which; | |
| 359 | |
| 360 ParsePosition pos; | |
| 361 UnicodeSet cnvSet, mapSet, mapnotSet, diffSet; | |
| 362 UnicodeSet *cnvSetPtr = &cnvSet; | |
| 363 LocalUConverterPointer cnv; | |
| 364 | |
| 365 TestDataModule *dataModule; | |
| 366 TestData *testData; | |
| 367 const DataMap *testCase; | |
| 368 UErrorCode errorCode; | |
| 369 int32_t i; | |
| 370 | |
| 371 errorCode=U_ZERO_ERROR; | |
| 372 dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode)
; | |
| 373 if(U_SUCCESS(errorCode)) { | |
| 374 testData=dataModule->createTestData("getUnicodeSet", errorCode); | |
| 375 if(U_SUCCESS(errorCode)) { | |
| 376 for(i=0; testData->nextCase(testCase, errorCode); ++i) { | |
| 377 if(U_FAILURE(errorCode)) { | |
| 378 errln("error retrieving conversion/getUnicodeSet test case %
d - %s", | |
| 379 i, u_errorName(errorCode)); | |
| 380 errorCode=U_ZERO_ERROR; | |
| 381 continue; | |
| 382 } | |
| 383 | |
| 384 s=testCase->getString("charset", errorCode); | |
| 385 s.extract(0, 0x7fffffff, charset, sizeof(charset), ""); | |
| 386 | |
| 387 map=testCase->getString("map", errorCode); | |
| 388 mapnot=testCase->getString("mapnot", errorCode); | |
| 389 | |
| 390 which=testCase->getInt28("which", errorCode); | |
| 391 | |
| 392 if(U_FAILURE(errorCode)) { | |
| 393 errln("error parsing conversion/getUnicodeSet test case %d -
%s", | |
| 394 i, u_errorName(errorCode)); | |
| 395 errorCode=U_ZERO_ERROR; | |
| 396 continue; | |
| 397 } | |
| 398 | |
| 399 // test this test case | |
| 400 mapSet.clear(); | |
| 401 mapnotSet.clear(); | |
| 402 | |
| 403 pos.setIndex(0); | |
| 404 mapSet.applyPattern(map, pos, 0, NULL, errorCode); | |
| 405 if(U_FAILURE(errorCode) || pos.getIndex()!=map.length()) { | |
| 406 errln("error creating the map set for conversion/getUnicodeS
et test case %d - %s\n" | |
| 407 " error index %d index %d U+%04x", | |
| 408 i, u_errorName(errorCode), pos.getErrorIndex(), pos.
getIndex(), map.char32At(pos.getIndex())); | |
| 409 errorCode=U_ZERO_ERROR; | |
| 410 continue; | |
| 411 } | |
| 412 | |
| 413 pos.setIndex(0); | |
| 414 mapnotSet.applyPattern(mapnot, pos, 0, NULL, errorCode); | |
| 415 if(U_FAILURE(errorCode) || pos.getIndex()!=mapnot.length()) { | |
| 416 errln("error creating the mapnot set for conversion/getUnico
deSet test case %d - %s\n" | |
| 417 " error index %d index %d U+%04x", | |
| 418 i, u_errorName(errorCode), pos.getErrorIndex(), pos.
getIndex(), mapnot.char32At(pos.getIndex())); | |
| 419 errorCode=U_ZERO_ERROR; | |
| 420 continue; | |
| 421 } | |
| 422 | |
| 423 logln("TestGetUnicodeSet[%d] %s", i, charset); | |
| 424 | |
| 425 cnv.adoptInstead(cnv_open(charset, errorCode)); | |
| 426 if(U_FAILURE(errorCode)) { | |
| 427 errcheckln(errorCode, "error opening \"%s\" for conversion/g
etUnicodeSet test case %d - %s", | |
| 428 charset, i, u_errorName(errorCode)); | |
| 429 errorCode=U_ZERO_ERROR; | |
| 430 continue; | |
| 431 } | |
| 432 | |
| 433 ucnv_getUnicodeSet(cnv.getAlias(), cnvSetPtr->toUSet(), (UConver
terUnicodeSet)which, &errorCode); | |
| 434 | |
| 435 if(U_FAILURE(errorCode)) { | |
| 436 errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/ge
tUnicodeSet test case %d - %s", | |
| 437 charset, i, u_errorName(errorCode)); | |
| 438 errorCode=U_ZERO_ERROR; | |
| 439 continue; | |
| 440 } | |
| 441 | |
| 442 // are there items that must be in cnvSet but are not? | |
| 443 (diffSet=mapSet).removeAll(cnvSet); | |
| 444 if(!diffSet.isEmpty()) { | |
| 445 diffSet.toPattern(s, TRUE); | |
| 446 if(s.length()>100) { | |
| 447 s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellip
sis)); | |
| 448 } | |
| 449 errln("error: ucnv_getUnicodeSet(\"%s\") is missing items -
conversion/getUnicodeSet test case %d", | |
| 450 charset, i); | |
| 451 errln(s); | |
| 452 } | |
| 453 | |
| 454 // are there items that must not be in cnvSet but are? | |
| 455 (diffSet=mapnotSet).retainAll(cnvSet); | |
| 456 if(!diffSet.isEmpty()) { | |
| 457 diffSet.toPattern(s, TRUE); | |
| 458 if(s.length()>100) { | |
| 459 s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellip
sis)); | |
| 460 } | |
| 461 errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected
items - conversion/getUnicodeSet test case %d", | |
| 462 charset, i); | |
| 463 errln(s); | |
| 464 } | |
| 465 } | |
| 466 delete testData; | |
| 467 } | |
| 468 delete dataModule; | |
| 469 } | |
| 470 else { | |
| 471 dataerrln("Could not load test conversion data"); | |
| 472 } | |
| 473 } | |
| 474 | |
| 475 U_CDECL_BEGIN | |
| 476 static void U_CALLCONV | |
| 477 getUnicodeSetCallback(const void *context, | |
| 478 UConverterFromUnicodeArgs * /*fromUArgs*/, | |
| 479 const UChar* /*codeUnits*/, | |
| 480 int32_t /*length*/, | |
| 481 UChar32 codePoint, | |
| 482 UConverterCallbackReason reason, | |
| 483 UErrorCode *pErrorCode) { | |
| 484 if(reason<=UCNV_IRREGULAR) { | |
| 485 ((UnicodeSet *)context)->remove(codePoint); // the converter cannot con
vert this code point | |
| 486 *pErrorCode=U_ZERO_ERROR; // skip | |
| 487 } // else ignore the reset, close and clone calls. | |
| 488 } | |
| 489 U_CDECL_END | |
| 490 | |
| 491 // Compare ucnv_getUnicodeSet() with the set of characters that can be converted
. | |
| 492 void | |
| 493 ConversionTest::TestGetUnicodeSet2() { | |
| 494 // Build a string with all code points. | |
| 495 UChar32 cpLimit; | |
| 496 int32_t s0Length; | |
| 497 if(quick) { | |
| 498 cpLimit=s0Length=0x10000; // BMP only | |
| 499 } else { | |
| 500 cpLimit=0x110000; | |
| 501 s0Length=0x10000+0x200000; // BMP + surrogate pairs | |
| 502 } | |
| 503 UChar *s0=new UChar[s0Length]; | |
| 504 if(s0==NULL) { | |
| 505 return; | |
| 506 } | |
| 507 UChar *s=s0; | |
| 508 UChar32 c; | |
| 509 UChar c2; | |
| 510 // low BMP | |
| 511 for(c=0; c<=0xd7ff; ++c) { | |
| 512 *s++=(UChar)c; | |
| 513 } | |
| 514 // trail surrogates | |
| 515 for(c=0xdc00; c<=0xdfff; ++c) { | |
| 516 *s++=(UChar)c; | |
| 517 } | |
| 518 // lead surrogates | |
| 519 // (after trails so that there is not even one surrogate pair in between) | |
| 520 for(c=0xd800; c<=0xdbff; ++c) { | |
| 521 *s++=(UChar)c; | |
| 522 } | |
| 523 // high BMP | |
| 524 for(c=0xe000; c<=0xffff; ++c) { | |
| 525 *s++=(UChar)c; | |
| 526 } | |
| 527 // supplementary code points = surrogate pairs | |
| 528 if(cpLimit==0x110000) { | |
| 529 for(c=0xd800; c<=0xdbff; ++c) { | |
| 530 for(c2=0xdc00; c2<=0xdfff; ++c2) { | |
| 531 *s++=(UChar)c; | |
| 532 *s++=c2; | |
| 533 } | |
| 534 } | |
| 535 } | |
| 536 | |
| 537 static const char *const cnvNames[]={ | |
| 538 "UTF-8", | |
| 539 "UTF-7", | |
| 540 "UTF-16", | |
| 541 "US-ASCII", | |
| 542 "ISO-8859-1", | |
| 543 "windows-1252", | |
| 544 "Shift-JIS", | |
| 545 "ibm-1390", // EBCDIC_STATEFUL table | |
| 546 "ibm-16684", // DBCS-only extension table based on EBCDIC_STATEFUL tabl
e | |
| 547 "HZ", | |
| 548 "ISO-2022-JP", | |
| 549 "JIS7", | |
| 550 "ISO-2022-CN", | |
| 551 "ISO-2022-CN-EXT", | |
| 552 "LMBCS" | |
| 553 }; | |
| 554 LocalUConverterPointer cnv; | |
| 555 char buffer[1024]; | |
| 556 int32_t i; | |
| 557 for(i=0; i<UPRV_LENGTHOF(cnvNames); ++i) { | |
| 558 UErrorCode errorCode=U_ZERO_ERROR; | |
| 559 cnv.adoptInstead(cnv_open(cnvNames[i], errorCode)); | |
| 560 if(U_FAILURE(errorCode)) { | |
| 561 errcheckln(errorCode, "failed to open converter %s - %s", cnvNames[i
], u_errorName(errorCode)); | |
| 562 continue; | |
| 563 } | |
| 564 UnicodeSet expected; | |
| 565 ucnv_setFromUCallBack(cnv.getAlias(), getUnicodeSetCallback, &expected,
NULL, NULL, &errorCode); | |
| 566 if(U_FAILURE(errorCode)) { | |
| 567 errln("failed to set the callback on converter %s - %s", cnvNames[i]
, u_errorName(errorCode)); | |
| 568 continue; | |
| 569 } | |
| 570 UConverterUnicodeSet which; | |
| 571 for(which=UCNV_ROUNDTRIP_SET; which<UCNV_SET_COUNT; which=(UConverterUni
codeSet)((int)which+1)) { | |
| 572 if(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) { | |
| 573 ucnv_setFallback(cnv.getAlias(), TRUE); | |
| 574 } | |
| 575 expected.add(0, cpLimit-1); | |
| 576 s=s0; | |
| 577 UBool flush; | |
| 578 do { | |
| 579 char *t=buffer; | |
| 580 flush=(UBool)(s==s0+s0Length); | |
| 581 ucnv_fromUnicode(cnv.getAlias(), &t, buffer+sizeof(buffer), (con
st UChar **)&s, s0+s0Length, NULL, flush, &errorCode); | |
| 582 if(U_FAILURE(errorCode)) { | |
| 583 if(errorCode==U_BUFFER_OVERFLOW_ERROR) { | |
| 584 errorCode=U_ZERO_ERROR; | |
| 585 continue; | |
| 586 } else { | |
| 587 break; // unexpected error, should not occur | |
| 588 } | |
| 589 } | |
| 590 } while(!flush); | |
| 591 UnicodeSet set; | |
| 592 ucnv_getUnicodeSet(cnv.getAlias(), set.toUSet(), which, &errorCode); | |
| 593 if(cpLimit<0x110000) { | |
| 594 set.remove(cpLimit, 0x10ffff); | |
| 595 } | |
| 596 if(which==UCNV_ROUNDTRIP_SET) { | |
| 597 // ignore PUA code points because they will be converted even if
they | |
| 598 // are fallbacks and when other fallbacks are turned off, | |
| 599 // but ucnv_getUnicodeSet(UCNV_ROUNDTRIP_SET) delivers true roun
dtrips | |
| 600 expected.remove(0xe000, 0xf8ff); | |
| 601 expected.remove(0xf0000, 0xffffd); | |
| 602 expected.remove(0x100000, 0x10fffd); | |
| 603 set.remove(0xe000, 0xf8ff); | |
| 604 set.remove(0xf0000, 0xffffd); | |
| 605 set.remove(0x100000, 0x10fffd); | |
| 606 } | |
| 607 if(set!=expected) { | |
| 608 // First try to see if we have different sets because ucnv_getUn
icodeSet() | |
| 609 // added strings: The above conversion method does not tell us w
hat strings might be convertible. | |
| 610 // Remove strings from the set and compare again. | |
| 611 // Unfortunately, there are no good, direct set methods for find
ing out whether there are strings | |
| 612 // in the set, nor for enumerating or removing just them. | |
| 613 // Intersect all code points with the set. The intersection will
not contain strings. | |
| 614 UnicodeSet temp(0, 0x10ffff); | |
| 615 temp.retainAll(set); | |
| 616 set=temp; | |
| 617 } | |
| 618 if(set!=expected) { | |
| 619 UnicodeSet diffSet; | |
| 620 UnicodeString out; | |
| 621 | |
| 622 // are there items that must be in the set but are not? | |
| 623 (diffSet=expected).removeAll(set); | |
| 624 if(!diffSet.isEmpty()) { | |
| 625 diffSet.toPattern(out, TRUE); | |
| 626 if(out.length()>100) { | |
| 627 out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ell
ipsis)); | |
| 628 } | |
| 629 errln("error: ucnv_getUnicodeSet(\"%s\") is missing items -
which set: %d", | |
| 630 cnvNames[i], which); | |
| 631 errln(out); | |
| 632 } | |
| 633 | |
| 634 // are there items that must not be in the set but are? | |
| 635 (diffSet=set).removeAll(expected); | |
| 636 if(!diffSet.isEmpty()) { | |
| 637 diffSet.toPattern(out, TRUE); | |
| 638 if(out.length()>100) { | |
| 639 out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ell
ipsis)); | |
| 640 } | |
| 641 errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected
items - which set: %d", | |
| 642 cnvNames[i], which); | |
| 643 errln(out); | |
| 644 } | |
| 645 } | |
| 646 } | |
| 647 } | |
| 648 | |
| 649 delete [] s0; | |
| 650 } | |
| 651 | |
| 652 // Test all codepoints which has the default ignorable Unicode property are igno
red if they have no mapping | |
| 653 // If there are any failures, the hard coded list (IS_DEFAULT_IGNORABLE_CODE_POI
NT) in ucnv_err.c should be updated | |
| 654 void | |
| 655 ConversionTest::TestDefaultIgnorableCallback() { | |
| 656 UErrorCode status = U_ZERO_ERROR; | |
| 657 const char *cnv_name = "euc-jp-2007"; | |
| 658 const char *pattern_ignorable = "[:Default_Ignorable_Code_Point:]"; | |
| 659 const char *pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]"; | |
| 660 | |
| 661 UnicodeSet *set_ignorable = new UnicodeSet(pattern_ignorable, status); | |
| 662 if (U_FAILURE(status)) { | |
| 663 dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable, u
_errorName(status)); | |
| 664 return; | |
| 665 } | |
| 666 | |
| 667 UnicodeSet *set_not_ignorable = new UnicodeSet(pattern_not_ignorable, status
); | |
| 668 if (U_FAILURE(status)) { | |
| 669 dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorabl
e, u_errorName(status)); | |
| 670 return; | |
| 671 } | |
| 672 | |
| 673 UConverter *cnv = cnv_open(cnv_name, status); | |
| 674 if (U_FAILURE(status)) { | |
| 675 dataerrln("Unable to open converter: %s - %s\n", cnv_name, u_errorName(s
tatus)); | |
| 676 return; | |
| 677 } | |
| 678 | |
| 679 // set callback for the converter | |
| 680 ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL
, &status); | |
| 681 | |
| 682 UChar32 input[1]; | |
| 683 char output[10]; | |
| 684 int32_t outputLength; | |
| 685 | |
| 686 // test default ignorables are ignored | |
| 687 int size = set_ignorable->size(); | |
| 688 for (int i = 0; i < size; i++) { | |
| 689 status = U_ZERO_ERROR; | |
| 690 outputLength= 0; | |
| 691 | |
| 692 input[0] = set_ignorable->charAt(i); | |
| 693 | |
| 694 outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32
(input, 1).getTerminatedBuffer(), -1, &status); | |
| 695 if (U_FAILURE(status) || outputLength != 0) { | |
| 696 errln("Ignorable code point: U+%04X not skipped as expected - %s", i
nput[0], u_errorName(status)); | |
| 697 } | |
| 698 } | |
| 699 | |
| 700 // test non-ignorables are not ignored | |
| 701 size = set_not_ignorable->size(); | |
| 702 for (int i = 0; i < size; i++) { | |
| 703 status = U_ZERO_ERROR; | |
| 704 outputLength= 0; | |
| 705 | |
| 706 input[0] = set_not_ignorable->charAt(i); | |
| 707 | |
| 708 if (input[0] == 0) { | |
| 709 continue; | |
| 710 } | |
| 711 | |
| 712 outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32
(input, 1).getTerminatedBuffer(), -1, &status); | |
| 713 if (U_FAILURE(status) || outputLength <= 0) { | |
| 714 errln("Non-ignorable code point: U+%04X skipped unexpectedly - %s",
input[0], u_errorName(status)); | |
| 715 } | |
| 716 } | |
| 717 | |
| 718 ucnv_close(cnv); | |
| 719 delete set_not_ignorable; | |
| 720 delete set_ignorable; | |
| 721 } | |
| 722 | |
| 723 // open testdata or ICU data converter ------------------------------------- *** | |
| 724 | |
| 725 UConverter * | |
| 726 ConversionTest::cnv_open(const char *name, UErrorCode &errorCode) { | |
| 727 if(name!=NULL && *name=='+') { | |
| 728 // Converter names that start with '+' are ignored in ICU4J tests. | |
| 729 ++name; | |
| 730 } | |
| 731 if(name!=NULL && *name=='*') { | |
| 732 /* loadTestData(): set the data directory */ | |
| 733 return ucnv_openPackage(loadTestData(errorCode), name+1, &errorCode); | |
| 734 } else { | |
| 735 return ucnv_open(name, &errorCode); | |
| 736 } | |
| 737 } | |
| 738 | |
| 739 // output helpers ---------------------------------------------------------- *** | |
| 740 | |
| 741 static inline char | |
| 742 hexDigit(uint8_t digit) { | |
| 743 return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit); | |
| 744 } | |
| 745 | |
| 746 static char * | |
| 747 printBytes(const uint8_t *bytes, int32_t length, char *out) { | |
| 748 uint8_t b; | |
| 749 | |
| 750 if(length>0) { | |
| 751 b=*bytes++; | |
| 752 --length; | |
| 753 *out++=hexDigit((uint8_t)(b>>4)); | |
| 754 *out++=hexDigit((uint8_t)(b&0xf)); | |
| 755 } | |
| 756 | |
| 757 while(length>0) { | |
| 758 b=*bytes++; | |
| 759 --length; | |
| 760 *out++=' '; | |
| 761 *out++=hexDigit((uint8_t)(b>>4)); | |
| 762 *out++=hexDigit((uint8_t)(b&0xf)); | |
| 763 } | |
| 764 *out++=0; | |
| 765 return out; | |
| 766 } | |
| 767 | |
| 768 static char * | |
| 769 printUnicode(const UChar *unicode, int32_t length, char *out) { | |
| 770 UChar32 c; | |
| 771 int32_t i; | |
| 772 | |
| 773 for(i=0; i<length;) { | |
| 774 if(i>0) { | |
| 775 *out++=' '; | |
| 776 } | |
| 777 U16_NEXT(unicode, i, length, c); | |
| 778 // write 4..6 digits | |
| 779 if(c>=0x100000) { | |
| 780 *out++='1'; | |
| 781 } | |
| 782 if(c>=0x10000) { | |
| 783 *out++=hexDigit((uint8_t)((c>>16)&0xf)); | |
| 784 } | |
| 785 *out++=hexDigit((uint8_t)((c>>12)&0xf)); | |
| 786 *out++=hexDigit((uint8_t)((c>>8)&0xf)); | |
| 787 *out++=hexDigit((uint8_t)((c>>4)&0xf)); | |
| 788 *out++=hexDigit((uint8_t)(c&0xf)); | |
| 789 } | |
| 790 *out++=0; | |
| 791 return out; | |
| 792 } | |
| 793 | |
| 794 static char * | |
| 795 printOffsets(const int32_t *offsets, int32_t length, char *out) { | |
| 796 int32_t i, o, d; | |
| 797 | |
| 798 if(offsets==NULL) { | |
| 799 length=0; | |
| 800 } | |
| 801 | |
| 802 for(i=0; i<length; ++i) { | |
| 803 if(i>0) { | |
| 804 *out++=' '; | |
| 805 } | |
| 806 o=offsets[i]; | |
| 807 | |
| 808 // print all offsets with 2 characters each (-x, -9..99, xx) | |
| 809 if(o<-9) { | |
| 810 *out++='-'; | |
| 811 *out++='x'; | |
| 812 } else if(o<0) { | |
| 813 *out++='-'; | |
| 814 *out++=(char)('0'-o); | |
| 815 } else if(o<=99) { | |
| 816 *out++=(d=o/10)==0 ? ' ' : (char)('0'+d); | |
| 817 *out++=(char)('0'+o%10); | |
| 818 } else /* o>99 */ { | |
| 819 *out++='x'; | |
| 820 *out++='x'; | |
| 821 } | |
| 822 } | |
| 823 *out++=0; | |
| 824 return out; | |
| 825 } | |
| 826 | |
| 827 // toUnicode test worker functions ----------------------------------------- *** | |
| 828 | |
| 829 static int32_t | |
| 830 stepToUnicode(ConversionCase &cc, UConverter *cnv, | |
| 831 UChar *result, int32_t resultCapacity, | |
| 832 int32_t *resultOffsets, /* also resultCapacity */ | |
| 833 int32_t step, | |
| 834 UErrorCode *pErrorCode) { | |
| 835 const char *source, *sourceLimit, *bytesLimit; | |
| 836 UChar *target, *targetLimit, *resultLimit; | |
| 837 UBool flush; | |
| 838 | |
| 839 source=(const char *)cc.bytes; | |
| 840 target=result; | |
| 841 bytesLimit=source+cc.bytesLength; | |
| 842 resultLimit=result+resultCapacity; | |
| 843 | |
| 844 if(step>=0) { | |
| 845 // call ucnv_toUnicode() with in/out buffers no larger than (step) at a
time | |
| 846 // move only one buffer (in vs. out) at a time to be extra mean | |
| 847 // step==0 performs bulk conversion and generates offsets | |
| 848 | |
| 849 // initialize the partial limits for the loop | |
| 850 if(step==0) { | |
| 851 // use the entire buffers | |
| 852 sourceLimit=bytesLimit; | |
| 853 targetLimit=resultLimit; | |
| 854 flush=cc.finalFlush; | |
| 855 } else { | |
| 856 // start with empty partial buffers | |
| 857 sourceLimit=source; | |
| 858 targetLimit=target; | |
| 859 flush=FALSE; | |
| 860 | |
| 861 // output offsets only for bulk conversion | |
| 862 resultOffsets=NULL; | |
| 863 } | |
| 864 | |
| 865 for(;;) { | |
| 866 // resetting the opposite conversion direction must not affect this
one | |
| 867 ucnv_resetFromUnicode(cnv); | |
| 868 | |
| 869 // convert | |
| 870 ucnv_toUnicode(cnv, | |
| 871 &target, targetLimit, | |
| 872 &source, sourceLimit, | |
| 873 resultOffsets, | |
| 874 flush, pErrorCode); | |
| 875 | |
| 876 // check pointers and errors | |
| 877 if(source>sourceLimit || target>targetLimit) { | |
| 878 *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
| 879 break; | |
| 880 } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { | |
| 881 if(target!=targetLimit) { | |
| 882 // buffer overflow must only be set when the target is fille
d | |
| 883 *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
| 884 break; | |
| 885 } else if(targetLimit==resultLimit) { | |
| 886 // not just a partial overflow | |
| 887 break; | |
| 888 } | |
| 889 | |
| 890 // the partial target is filled, set a new limit, reset the erro
r and continue | |
| 891 targetLimit=(resultLimit-target)>=step ? target+step : resultLim
it; | |
| 892 *pErrorCode=U_ZERO_ERROR; | |
| 893 } else if(U_FAILURE(*pErrorCode)) { | |
| 894 // some other error occurred, done | |
| 895 break; | |
| 896 } else { | |
| 897 if(source!=sourceLimit) { | |
| 898 // when no error occurs, then the input must be consumed | |
| 899 *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
| 900 break; | |
| 901 } | |
| 902 | |
| 903 if(sourceLimit==bytesLimit) { | |
| 904 // we are done | |
| 905 break; | |
| 906 } | |
| 907 | |
| 908 // the partial conversion succeeded, set a new limit and continu
e | |
| 909 sourceLimit=(bytesLimit-source)>=step ? source+step : bytesLimit
; | |
| 910 flush=(UBool)(cc.finalFlush && sourceLimit==bytesLimit); | |
| 911 } | |
| 912 } | |
| 913 } else /* step<0 */ { | |
| 914 /* | |
| 915 * step==-1: call only ucnv_getNextUChar() | |
| 916 * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar() | |
| 917 * if step==-2 or -3, then give ucnv_toUnicode() the whole remaining i
nput, | |
| 918 * else give it at most (-step-2)/2 bytes | |
| 919 */ | |
| 920 UChar32 c; | |
| 921 | |
| 922 // end the loop by getting an index out of bounds error | |
| 923 for(;;) { | |
| 924 // resetting the opposite conversion direction must not affect this
one | |
| 925 ucnv_resetFromUnicode(cnv); | |
| 926 | |
| 927 // convert | |
| 928 if((step&1)!=0 /* odd: -1, -3, -5, ... */) { | |
| 929 sourceLimit=source; // use sourceLimit not as a real limit | |
| 930 // but to remember the pre-getNextUChar sour
ce pointer | |
| 931 c=ucnv_getNextUChar(cnv, &source, bytesLimit, pErrorCode); | |
| 932 | |
| 933 // check pointers and errors | |
| 934 if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) { | |
| 935 if(source!=bytesLimit) { | |
| 936 *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
| 937 } else { | |
| 938 *pErrorCode=U_ZERO_ERROR; | |
| 939 } | |
| 940 break; | |
| 941 } else if(U_FAILURE(*pErrorCode)) { | |
| 942 break; | |
| 943 } | |
| 944 // source may not move if c is from previous overflow | |
| 945 | |
| 946 if(target==resultLimit) { | |
| 947 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; | |
| 948 break; | |
| 949 } | |
| 950 if(c<=0xffff) { | |
| 951 *target++=(UChar)c; | |
| 952 } else { | |
| 953 *target++=U16_LEAD(c); | |
| 954 if(target==resultLimit) { | |
| 955 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; | |
| 956 break; | |
| 957 } | |
| 958 *target++=U16_TRAIL(c); | |
| 959 } | |
| 960 | |
| 961 // alternate between -n-1 and -n but leave -1 alone | |
| 962 if(step<-1) { | |
| 963 ++step; | |
| 964 } | |
| 965 } else /* step is even */ { | |
| 966 // allow only one UChar output | |
| 967 targetLimit=target<resultLimit ? target+1 : resultLimit; | |
| 968 | |
| 969 // as with ucnv_getNextUChar(), we always flush (if we go to byt
esLimit) | |
| 970 // and never output offsets | |
| 971 if(step==-2) { | |
| 972 sourceLimit=bytesLimit; | |
| 973 } else { | |
| 974 sourceLimit=source+(-step-2)/2; | |
| 975 if(sourceLimit>bytesLimit) { | |
| 976 sourceLimit=bytesLimit; | |
| 977 } | |
| 978 } | |
| 979 | |
| 980 ucnv_toUnicode(cnv, | |
| 981 &target, targetLimit, | |
| 982 &source, sourceLimit, | |
| 983 NULL, (UBool)(sourceLimit==bytesLimit), pErrorCode); | |
| 984 | |
| 985 // check pointers and errors | |
| 986 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { | |
| 987 if(target!=targetLimit) { | |
| 988 // buffer overflow must only be set when the target is f
illed | |
| 989 *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
| 990 break; | |
| 991 } else if(targetLimit==resultLimit) { | |
| 992 // not just a partial overflow | |
| 993 break; | |
| 994 } | |
| 995 | |
| 996 // the partial target is filled, set a new limit and continu
e | |
| 997 *pErrorCode=U_ZERO_ERROR; | |
| 998 } else if(U_FAILURE(*pErrorCode)) { | |
| 999 // some other error occurred, done | |
| 1000 break; | |
| 1001 } else { | |
| 1002 if(source!=sourceLimit) { | |
| 1003 // when no error occurs, then the input must be consumed | |
| 1004 *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
| 1005 break; | |
| 1006 } | |
| 1007 | |
| 1008 // we are done (flush==TRUE) but we continue, to get the ind
ex out of bounds error above | |
| 1009 } | |
| 1010 | |
| 1011 --step; | |
| 1012 } | |
| 1013 } | |
| 1014 } | |
| 1015 | |
| 1016 return (int32_t)(target-result); | |
| 1017 } | |
| 1018 | |
| 1019 UBool | |
| 1020 ConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback
, const char *option) { | |
| 1021 // open the converter | |
| 1022 IcuTestErrorCode errorCode(*this, "ToUnicodeCase"); | |
| 1023 LocalUConverterPointer cnv(cnv_open(cc.charset, errorCode)); | |
| 1024 if(errorCode.isFailure()) { | |
| 1025 errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_o
pen() failed - %s", | |
| 1026 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, er
rorCode.errorName()); | |
| 1027 errorCode.reset(); | |
| 1028 return FALSE; | |
| 1029 } | |
| 1030 | |
| 1031 // set the callback | |
| 1032 if(callback!=NULL) { | |
| 1033 ucnv_setToUCallBack(cnv.getAlias(), callback, option, NULL, NULL, errorC
ode); | |
| 1034 if(U_FAILURE(errorCode)) { | |
| 1035 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBac
k() failed - %s", | |
| 1036 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush
, u_errorName(errorCode)); | |
| 1037 return FALSE; | |
| 1038 } | |
| 1039 } | |
| 1040 | |
| 1041 int32_t resultOffsets[256]; | |
| 1042 UChar result[256]; | |
| 1043 int32_t resultLength; | |
| 1044 UBool ok; | |
| 1045 | |
| 1046 static const struct { | |
| 1047 int32_t step; | |
| 1048 const char *name; | |
| 1049 } steps[]={ | |
| 1050 { 0, "bulk" }, // must be first for offsets to be checked | |
| 1051 { 1, "step=1" }, | |
| 1052 { 3, "step=3" }, | |
| 1053 { 7, "step=7" }, | |
| 1054 { -1, "getNext" }, | |
| 1055 { -2, "toU(bulk)+getNext" }, | |
| 1056 { -3, "getNext+toU(bulk)" }, | |
| 1057 { -4, "toU(1)+getNext" }, | |
| 1058 { -5, "getNext+toU(1)" }, | |
| 1059 { -12, "toU(5)+getNext" }, | |
| 1060 { -13, "getNext+toU(5)" }, | |
| 1061 }; | |
| 1062 int32_t i, step; | |
| 1063 | |
| 1064 ok=TRUE; | |
| 1065 for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) { | |
| 1066 step=steps[i].step; | |
| 1067 if(step<0 && !cc.finalFlush) { | |
| 1068 // skip ucnv_getNextUChar() if !finalFlush because | |
| 1069 // ucnv_getNextUChar() always implies flush | |
| 1070 continue; | |
| 1071 } | |
| 1072 if(step!=0) { | |
| 1073 // bulk test is first, then offsets are not checked any more | |
| 1074 cc.offsets=NULL; | |
| 1075 } | |
| 1076 else { | |
| 1077 memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets)); | |
| 1078 } | |
| 1079 memset(result, -1, UPRV_LENGTHOF(result)); | |
| 1080 errorCode.reset(); | |
| 1081 resultLength=stepToUnicode(cc, cnv.getAlias(), | |
| 1082 result, UPRV_LENGTHOF(result), | |
| 1083 step==0 ? resultOffsets : NULL, | |
| 1084 step, errorCode); | |
| 1085 ok=checkToUnicode( | |
| 1086 cc, cnv.getAlias(), steps[i].name, | |
| 1087 result, resultLength, | |
| 1088 cc.offsets!=NULL ? resultOffsets : NULL, | |
| 1089 errorCode); | |
| 1090 if(errorCode.isFailure() || !cc.finalFlush) { | |
| 1091 // reset if an error occurred or we did not flush | |
| 1092 // otherwise do nothing to make sure that flushing resets | |
| 1093 ucnv_resetToUnicode(cnv.getAlias()); | |
| 1094 } | |
| 1095 if (cc.offsets != NULL && resultOffsets[resultLength] != -1) { | |
| 1096 errln("toUnicode[%d](%s) Conversion wrote too much to offsets at ind
ex %d", | |
| 1097 cc.caseNr, cc.charset, resultLength); | |
| 1098 } | |
| 1099 if (result[resultLength] != (UChar)-1) { | |
| 1100 errln("toUnicode[%d](%s) Conversion wrote too much to result at inde
x %d", | |
| 1101 cc.caseNr, cc.charset, resultLength); | |
| 1102 } | |
| 1103 } | |
| 1104 | |
| 1105 // not a real loop, just a convenience for breaking out of the block | |
| 1106 while(ok && cc.finalFlush) { | |
| 1107 // test ucnv_toUChars() | |
| 1108 memset(result, 0, sizeof(result)); | |
| 1109 | |
| 1110 errorCode.reset(); | |
| 1111 resultLength=ucnv_toUChars(cnv.getAlias(), | |
| 1112 result, UPRV_LENGTHOF(result), | |
| 1113 (const char *)cc.bytes, cc.bytesLength, | |
| 1114 errorCode); | |
| 1115 ok=checkToUnicode( | |
| 1116 cc, cnv.getAlias(), "toUChars", | |
| 1117 result, resultLength, | |
| 1118 NULL, | |
| 1119 errorCode); | |
| 1120 if(!ok) { | |
| 1121 break; | |
| 1122 } | |
| 1123 | |
| 1124 // test preflighting | |
| 1125 // keep the correct result for simple checking | |
| 1126 errorCode.reset(); | |
| 1127 resultLength=ucnv_toUChars(cnv.getAlias(), | |
| 1128 NULL, 0, | |
| 1129 (const char *)cc.bytes, cc.bytesLength, | |
| 1130 errorCode); | |
| 1131 if(errorCode.get()==U_STRING_NOT_TERMINATED_WARNING || errorCode.get()==
U_BUFFER_OVERFLOW_ERROR) { | |
| 1132 errorCode.reset(); | |
| 1133 } | |
| 1134 ok=checkToUnicode( | |
| 1135 cc, cnv.getAlias(), "preflight toUChars", | |
| 1136 result, resultLength, | |
| 1137 NULL, | |
| 1138 errorCode); | |
| 1139 break; | |
| 1140 } | |
| 1141 | |
| 1142 errorCode.reset(); // all errors have already been reported | |
| 1143 return ok; | |
| 1144 } | |
| 1145 | |
| 1146 UBool | |
| 1147 ConversionTest::checkToUnicode(ConversionCase &cc, UConverter *cnv, const char *
name, | |
| 1148 const UChar *result, int32_t resultLength, | |
| 1149 const int32_t *resultOffsets, | |
| 1150 UErrorCode resultErrorCode) { | |
| 1151 char resultInvalidChars[8]; | |
| 1152 int8_t resultInvalidLength; | |
| 1153 UErrorCode errorCode; | |
| 1154 | |
| 1155 const char *msg; | |
| 1156 | |
| 1157 // reset the message; NULL will mean "ok" | |
| 1158 msg=NULL; | |
| 1159 | |
| 1160 errorCode=U_ZERO_ERROR; | |
| 1161 resultInvalidLength=sizeof(resultInvalidChars); | |
| 1162 ucnv_getInvalidChars(cnv, resultInvalidChars, &resultInvalidLength, &errorCo
de); | |
| 1163 if(U_FAILURE(errorCode)) { | |
| 1164 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidChar
s() failed - %s", | |
| 1165 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, na
me, u_errorName(errorCode)); | |
| 1166 return FALSE; | |
| 1167 } | |
| 1168 | |
| 1169 // check everything that might have gone wrong | |
| 1170 if(cc.unicodeLength!=resultLength) { | |
| 1171 msg="wrong result length"; | |
| 1172 } else if(0!=u_memcmp(cc.unicode, result, cc.unicodeLength)) { | |
| 1173 msg="wrong result string"; | |
| 1174 } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.unicod
eLength*sizeof(*cc.offsets))) { | |
| 1175 msg="wrong offsets"; | |
| 1176 } else if(cc.outErrorCode!=resultErrorCode) { | |
| 1177 msg="wrong error code"; | |
| 1178 } else if(cc.invalidLength!=resultInvalidLength) { | |
| 1179 msg="wrong length of last invalid input"; | |
| 1180 } else if(0!=memcmp(cc.invalidChars, resultInvalidChars, cc.invalidLength))
{ | |
| 1181 msg="wrong last invalid input"; | |
| 1182 } | |
| 1183 | |
| 1184 if(msg==NULL) { | |
| 1185 return TRUE; | |
| 1186 } else { | |
| 1187 char buffer[2000]; // one buffer for all strings | |
| 1188 char *s, *bytesString, *unicodeString, *resultString, | |
| 1189 *offsetsString, *resultOffsetsString, | |
| 1190 *invalidCharsString, *resultInvalidCharsString; | |
| 1191 | |
| 1192 bytesString=s=buffer; | |
| 1193 s=printBytes(cc.bytes, cc.bytesLength, bytesString); | |
| 1194 s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString=s); | |
| 1195 s=printUnicode(result, resultLength, resultString=s); | |
| 1196 s=printOffsets(cc.offsets, cc.unicodeLength, offsetsString=s); | |
| 1197 s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s); | |
| 1198 s=printBytes(cc.invalidChars, cc.invalidLength, invalidCharsString=s); | |
| 1199 s=printBytes((uint8_t *)resultInvalidChars, resultInvalidLength, resultI
nvalidCharsString=s); | |
| 1200 | |
| 1201 if((s-buffer)>(int32_t)sizeof(buffer)) { | |
| 1202 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: ch
eckToUnicode() test output buffer overflow writing %d chars\n", | |
| 1203 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush
, name, (int)(s-buffer)); | |
| 1204 exit(1); | |
| 1205 } | |
| 1206 | |
| 1207 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n" | |
| 1208 " bytes <%s>[%d]\n" | |
| 1209 " expected <%s>[%d]\n" | |
| 1210 " result <%s>[%d]\n" | |
| 1211 " offsets <%s>\n" | |
| 1212 " result offsets <%s>\n" | |
| 1213 " error code expected %s got %s\n" | |
| 1214 " invalidChars expected <%s> got <%s>\n", | |
| 1215 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name
, msg, | |
| 1216 bytesString, cc.bytesLength, | |
| 1217 unicodeString, cc.unicodeLength, | |
| 1218 resultString, resultLength, | |
| 1219 offsetsString, | |
| 1220 resultOffsetsString, | |
| 1221 u_errorName(cc.outErrorCode), u_errorName(resultErrorCode), | |
| 1222 invalidCharsString, resultInvalidCharsString); | |
| 1223 | |
| 1224 return FALSE; | |
| 1225 } | |
| 1226 } | |
| 1227 | |
| 1228 // fromUnicode test worker functions --------------------------------------- *** | |
| 1229 | |
| 1230 static int32_t | |
| 1231 stepFromUTF8(ConversionCase &cc, | |
| 1232 UConverter *utf8Cnv, UConverter *cnv, | |
| 1233 char *result, int32_t resultCapacity, | |
| 1234 int32_t step, | |
| 1235 UErrorCode *pErrorCode) { | |
| 1236 const char *source, *sourceLimit, *utf8Limit; | |
| 1237 UChar pivotBuffer[32]; | |
| 1238 UChar *pivotSource, *pivotTarget, *pivotLimit; | |
| 1239 char *target, *targetLimit, *resultLimit; | |
| 1240 UBool flush; | |
| 1241 | |
| 1242 source=cc.utf8; | |
| 1243 pivotSource=pivotTarget=pivotBuffer; | |
| 1244 target=result; | |
| 1245 utf8Limit=source+cc.utf8Length; | |
| 1246 resultLimit=result+resultCapacity; | |
| 1247 | |
| 1248 // call ucnv_convertEx() with in/out buffers no larger than (step) at a time | |
| 1249 // move only one buffer (in vs. out) at a time to be extra mean | |
| 1250 // step==0 performs bulk conversion | |
| 1251 | |
| 1252 // initialize the partial limits for the loop | |
| 1253 if(step==0) { | |
| 1254 // use the entire buffers | |
| 1255 sourceLimit=utf8Limit; | |
| 1256 targetLimit=resultLimit; | |
| 1257 flush=cc.finalFlush; | |
| 1258 | |
| 1259 pivotLimit=pivotBuffer+UPRV_LENGTHOF(pivotBuffer); | |
| 1260 } else { | |
| 1261 // start with empty partial buffers | |
| 1262 sourceLimit=source; | |
| 1263 targetLimit=target; | |
| 1264 flush=FALSE; | |
| 1265 | |
| 1266 // empty pivot is not allowed, make it of length step | |
| 1267 pivotLimit=pivotBuffer+step; | |
| 1268 } | |
| 1269 | |
| 1270 for(;;) { | |
| 1271 // resetting the opposite conversion direction must not affect this one | |
| 1272 ucnv_resetFromUnicode(utf8Cnv); | |
| 1273 ucnv_resetToUnicode(cnv); | |
| 1274 | |
| 1275 // convert | |
| 1276 ucnv_convertEx(cnv, utf8Cnv, | |
| 1277 &target, targetLimit, | |
| 1278 &source, sourceLimit, | |
| 1279 pivotBuffer, &pivotSource, &pivotTarget, pivotLimit, | |
| 1280 FALSE, flush, pErrorCode); | |
| 1281 | |
| 1282 // check pointers and errors | |
| 1283 if(source>sourceLimit || target>targetLimit) { | |
| 1284 *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
| 1285 break; | |
| 1286 } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { | |
| 1287 if(target!=targetLimit) { | |
| 1288 // buffer overflow must only be set when the target is filled | |
| 1289 *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
| 1290 break; | |
| 1291 } else if(targetLimit==resultLimit) { | |
| 1292 // not just a partial overflow | |
| 1293 break; | |
| 1294 } | |
| 1295 | |
| 1296 // the partial target is filled, set a new limit, reset the error an
d continue | |
| 1297 targetLimit=(resultLimit-target)>=step ? target+step : resultLimit; | |
| 1298 *pErrorCode=U_ZERO_ERROR; | |
| 1299 } else if(U_FAILURE(*pErrorCode)) { | |
| 1300 if(pivotSource==pivotBuffer) { | |
| 1301 // toUnicode error, should not occur | |
| 1302 // toUnicode errors are tested in cintltst TestConvertExFromUTF8
() | |
| 1303 break; | |
| 1304 } else { | |
| 1305 // fromUnicode error | |
| 1306 // some other error occurred, done | |
| 1307 break; | |
| 1308 } | |
| 1309 } else { | |
| 1310 if(source!=sourceLimit) { | |
| 1311 // when no error occurs, then the input must be consumed | |
| 1312 *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
| 1313 break; | |
| 1314 } | |
| 1315 | |
| 1316 if(sourceLimit==utf8Limit) { | |
| 1317 // we are done | |
| 1318 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { | |
| 1319 // ucnv_convertEx() warns about not terminating the output | |
| 1320 // but ucnv_fromUnicode() does not and so | |
| 1321 // checkFromUnicode() does not expect it | |
| 1322 *pErrorCode=U_ZERO_ERROR; | |
| 1323 } | |
| 1324 break; | |
| 1325 } | |
| 1326 | |
| 1327 // the partial conversion succeeded, set a new limit and continue | |
| 1328 sourceLimit=(utf8Limit-source)>=step ? source+step : utf8Limit; | |
| 1329 flush=(UBool)(cc.finalFlush && sourceLimit==utf8Limit); | |
| 1330 } | |
| 1331 } | |
| 1332 | |
| 1333 return (int32_t)(target-result); | |
| 1334 } | |
| 1335 | |
| 1336 static int32_t | |
| 1337 stepFromUnicode(ConversionCase &cc, UConverter *cnv, | |
| 1338 char *result, int32_t resultCapacity, | |
| 1339 int32_t *resultOffsets, /* also resultCapacity */ | |
| 1340 int32_t step, | |
| 1341 UErrorCode *pErrorCode) { | |
| 1342 const UChar *source, *sourceLimit, *unicodeLimit; | |
| 1343 char *target, *targetLimit, *resultLimit; | |
| 1344 UBool flush; | |
| 1345 | |
| 1346 source=cc.unicode; | |
| 1347 target=result; | |
| 1348 unicodeLimit=source+cc.unicodeLength; | |
| 1349 resultLimit=result+resultCapacity; | |
| 1350 | |
| 1351 // call ucnv_fromUnicode() with in/out buffers no larger than (step) at a ti
me | |
| 1352 // move only one buffer (in vs. out) at a time to be extra mean | |
| 1353 // step==0 performs bulk conversion and generates offsets | |
| 1354 | |
| 1355 // initialize the partial limits for the loop | |
| 1356 if(step==0) { | |
| 1357 // use the entire buffers | |
| 1358 sourceLimit=unicodeLimit; | |
| 1359 targetLimit=resultLimit; | |
| 1360 flush=cc.finalFlush; | |
| 1361 } else { | |
| 1362 // start with empty partial buffers | |
| 1363 sourceLimit=source; | |
| 1364 targetLimit=target; | |
| 1365 flush=FALSE; | |
| 1366 | |
| 1367 // output offsets only for bulk conversion | |
| 1368 resultOffsets=NULL; | |
| 1369 } | |
| 1370 | |
| 1371 for(;;) { | |
| 1372 // resetting the opposite conversion direction must not affect this one | |
| 1373 ucnv_resetToUnicode(cnv); | |
| 1374 | |
| 1375 // convert | |
| 1376 ucnv_fromUnicode(cnv, | |
| 1377 &target, targetLimit, | |
| 1378 &source, sourceLimit, | |
| 1379 resultOffsets, | |
| 1380 flush, pErrorCode); | |
| 1381 | |
| 1382 // check pointers and errors | |
| 1383 if(source>sourceLimit || target>targetLimit) { | |
| 1384 *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
| 1385 break; | |
| 1386 } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { | |
| 1387 if(target!=targetLimit) { | |
| 1388 // buffer overflow must only be set when the target is filled | |
| 1389 *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
| 1390 break; | |
| 1391 } else if(targetLimit==resultLimit) { | |
| 1392 // not just a partial overflow | |
| 1393 break; | |
| 1394 } | |
| 1395 | |
| 1396 // the partial target is filled, set a new limit, reset the error an
d continue | |
| 1397 targetLimit=(resultLimit-target)>=step ? target+step : resultLimit; | |
| 1398 *pErrorCode=U_ZERO_ERROR; | |
| 1399 } else if(U_FAILURE(*pErrorCode)) { | |
| 1400 // some other error occurred, done | |
| 1401 break; | |
| 1402 } else { | |
| 1403 if(source!=sourceLimit) { | |
| 1404 // when no error occurs, then the input must be consumed | |
| 1405 *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
| 1406 break; | |
| 1407 } | |
| 1408 | |
| 1409 if(sourceLimit==unicodeLimit) { | |
| 1410 // we are done | |
| 1411 break; | |
| 1412 } | |
| 1413 | |
| 1414 // the partial conversion succeeded, set a new limit and continue | |
| 1415 sourceLimit=(unicodeLimit-source)>=step ? source+step : unicodeLimit
; | |
| 1416 flush=(UBool)(cc.finalFlush && sourceLimit==unicodeLimit); | |
| 1417 } | |
| 1418 } | |
| 1419 | |
| 1420 return (int32_t)(target-result); | |
| 1421 } | |
| 1422 | |
| 1423 UBool | |
| 1424 ConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback call
back, const char *option) { | |
| 1425 UConverter *cnv; | |
| 1426 UErrorCode errorCode; | |
| 1427 | |
| 1428 // open the converter | |
| 1429 errorCode=U_ZERO_ERROR; | |
| 1430 cnv=cnv_open(cc.charset, errorCode); | |
| 1431 if(U_FAILURE(errorCode)) { | |
| 1432 errcheckln(errorCode, "fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv
_open() failed - %s", | |
| 1433 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_
errorName(errorCode)); | |
| 1434 return FALSE; | |
| 1435 } | |
| 1436 ucnv_resetToUnicode(utf8Cnv); | |
| 1437 | |
| 1438 // set the callback | |
| 1439 if(callback!=NULL) { | |
| 1440 ucnv_setFromUCallBack(cnv, callback, option, NULL, NULL, &errorCode); | |
| 1441 if(U_FAILURE(errorCode)) { | |
| 1442 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCal
lBack() failed - %s", | |
| 1443 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush
, u_errorName(errorCode)); | |
| 1444 ucnv_close(cnv); | |
| 1445 return FALSE; | |
| 1446 } | |
| 1447 } | |
| 1448 | |
| 1449 // set the fallbacks flag | |
| 1450 // TODO change with Jitterbug 2401, then add a similar call for toUnicode to
o | |
| 1451 ucnv_setFallback(cnv, cc.fallbacks); | |
| 1452 | |
| 1453 // set the subchar | |
| 1454 int32_t length; | |
| 1455 | |
| 1456 if(cc.setSub>0) { | |
| 1457 length=(int32_t)strlen(cc.subchar); | |
| 1458 ucnv_setSubstChars(cnv, cc.subchar, (int8_t)length, &errorCode); | |
| 1459 if(U_FAILURE(errorCode)) { | |
| 1460 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstCha
rs() failed - %s", | |
| 1461 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush
, u_errorName(errorCode)); | |
| 1462 ucnv_close(cnv); | |
| 1463 return FALSE; | |
| 1464 } | |
| 1465 } else if(cc.setSub<0) { | |
| 1466 ucnv_setSubstString(cnv, cc.subString, -1, &errorCode); | |
| 1467 if(U_FAILURE(errorCode)) { | |
| 1468 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstStr
ing() failed - %s", | |
| 1469 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush
, u_errorName(errorCode)); | |
| 1470 ucnv_close(cnv); | |
| 1471 return FALSE; | |
| 1472 } | |
| 1473 } | |
| 1474 | |
| 1475 // convert unicode to utf8 | |
| 1476 char utf8[256]; | |
| 1477 cc.utf8=utf8; | |
| 1478 u_strToUTF8(utf8, UPRV_LENGTHOF(utf8), &cc.utf8Length, | |
| 1479 cc.unicode, cc.unicodeLength, | |
| 1480 &errorCode); | |
| 1481 if(U_FAILURE(errorCode)) { | |
| 1482 // skip UTF-8 testing of a string with an unpaired surrogate, | |
| 1483 // or of one that's too long | |
| 1484 // toUnicode errors are tested in cintltst TestConvertExFromUTF8() | |
| 1485 cc.utf8Length=-1; | |
| 1486 } | |
| 1487 | |
| 1488 int32_t resultOffsets[256]; | |
| 1489 char result[256]; | |
| 1490 int32_t resultLength; | |
| 1491 UBool ok; | |
| 1492 | |
| 1493 static const struct { | |
| 1494 int32_t step; | |
| 1495 const char *name, *utf8Name; | |
| 1496 } steps[]={ | |
| 1497 { 0, "bulk", "utf8" }, // must be first for offsets to be checked | |
| 1498 { 1, "step=1", "utf8 step=1" }, | |
| 1499 { 3, "step=3", "utf8 step=3" }, | |
| 1500 { 7, "step=7", "utf8 step=7" } | |
| 1501 }; | |
| 1502 int32_t i, step; | |
| 1503 | |
| 1504 ok=TRUE; | |
| 1505 for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) { | |
| 1506 step=steps[i].step; | |
| 1507 memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets)); | |
| 1508 memset(result, -1, UPRV_LENGTHOF(result)); | |
| 1509 errorCode=U_ZERO_ERROR; | |
| 1510 resultLength=stepFromUnicode(cc, cnv, | |
| 1511 result, UPRV_LENGTHOF(result), | |
| 1512 step==0 ? resultOffsets : NULL, | |
| 1513 step, &errorCode); | |
| 1514 ok=checkFromUnicode( | |
| 1515 cc, cnv, steps[i].name, | |
| 1516 (uint8_t *)result, resultLength, | |
| 1517 cc.offsets!=NULL ? resultOffsets : NULL, | |
| 1518 errorCode); | |
| 1519 if(U_FAILURE(errorCode) || !cc.finalFlush) { | |
| 1520 // reset if an error occurred or we did not flush | |
| 1521 // otherwise do nothing to make sure that flushing resets | |
| 1522 ucnv_resetFromUnicode(cnv); | |
| 1523 } | |
| 1524 if (resultOffsets[resultLength] != -1) { | |
| 1525 errln("fromUnicode[%d](%s) Conversion wrote too much to offsets at i
ndex %d", | |
| 1526 cc.caseNr, cc.charset, resultLength); | |
| 1527 } | |
| 1528 if (result[resultLength] != (char)-1) { | |
| 1529 errln("fromUnicode[%d](%s) Conversion wrote too much to result at in
dex %d", | |
| 1530 cc.caseNr, cc.charset, resultLength); | |
| 1531 } | |
| 1532 | |
| 1533 // bulk test is first, then offsets are not checked any more | |
| 1534 cc.offsets=NULL; | |
| 1535 | |
| 1536 // test direct conversion from UTF-8 | |
| 1537 if(cc.utf8Length>=0) { | |
| 1538 errorCode=U_ZERO_ERROR; | |
| 1539 resultLength=stepFromUTF8(cc, utf8Cnv, cnv, | |
| 1540 result, UPRV_LENGTHOF(result), | |
| 1541 step, &errorCode); | |
| 1542 ok=checkFromUnicode( | |
| 1543 cc, cnv, steps[i].utf8Name, | |
| 1544 (uint8_t *)result, resultLength, | |
| 1545 NULL, | |
| 1546 errorCode); | |
| 1547 if(U_FAILURE(errorCode) || !cc.finalFlush) { | |
| 1548 // reset if an error occurred or we did not flush | |
| 1549 // otherwise do nothing to make sure that flushing resets | |
| 1550 ucnv_resetToUnicode(utf8Cnv); | |
| 1551 ucnv_resetFromUnicode(cnv); | |
| 1552 } | |
| 1553 } | |
| 1554 } | |
| 1555 | |
| 1556 // not a real loop, just a convenience for breaking out of the block | |
| 1557 while(ok && cc.finalFlush) { | |
| 1558 // test ucnv_fromUChars() | |
| 1559 memset(result, 0, sizeof(result)); | |
| 1560 | |
| 1561 errorCode=U_ZERO_ERROR; | |
| 1562 resultLength=ucnv_fromUChars(cnv, | |
| 1563 result, UPRV_LENGTHOF(result), | |
| 1564 cc.unicode, cc.unicodeLength, | |
| 1565 &errorCode); | |
| 1566 ok=checkFromUnicode( | |
| 1567 cc, cnv, "fromUChars", | |
| 1568 (uint8_t *)result, resultLength, | |
| 1569 NULL, | |
| 1570 errorCode); | |
| 1571 if(!ok) { | |
| 1572 break; | |
| 1573 } | |
| 1574 | |
| 1575 // test preflighting | |
| 1576 // keep the correct result for simple checking | |
| 1577 errorCode=U_ZERO_ERROR; | |
| 1578 resultLength=ucnv_fromUChars(cnv, | |
| 1579 NULL, 0, | |
| 1580 cc.unicode, cc.unicodeLength, | |
| 1581 &errorCode); | |
| 1582 if(errorCode==U_STRING_NOT_TERMINATED_WARNING || errorCode==U_BUFFER_OVE
RFLOW_ERROR) { | |
| 1583 errorCode=U_ZERO_ERROR; | |
| 1584 } | |
| 1585 ok=checkFromUnicode( | |
| 1586 cc, cnv, "preflight fromUChars", | |
| 1587 (uint8_t *)result, resultLength, | |
| 1588 NULL, | |
| 1589 errorCode); | |
| 1590 break; | |
| 1591 } | |
| 1592 | |
| 1593 ucnv_close(cnv); | |
| 1594 return ok; | |
| 1595 } | |
| 1596 | |
| 1597 UBool | |
| 1598 ConversionTest::checkFromUnicode(ConversionCase &cc, UConverter *cnv, const char
*name, | |
| 1599 const uint8_t *result, int32_t resultLength, | |
| 1600 const int32_t *resultOffsets, | |
| 1601 UErrorCode resultErrorCode) { | |
| 1602 UChar resultInvalidUChars[8]; | |
| 1603 int8_t resultInvalidLength; | |
| 1604 UErrorCode errorCode; | |
| 1605 | |
| 1606 const char *msg; | |
| 1607 | |
| 1608 // reset the message; NULL will mean "ok" | |
| 1609 msg=NULL; | |
| 1610 | |
| 1611 errorCode=U_ZERO_ERROR; | |
| 1612 resultInvalidLength=UPRV_LENGTHOF(resultInvalidUChars); | |
| 1613 ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &error
Code); | |
| 1614 if(U_FAILURE(errorCode)) { | |
| 1615 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUC
hars() failed - %s", | |
| 1616 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, na
me, u_errorName(errorCode)); | |
| 1617 return FALSE; | |
| 1618 } | |
| 1619 | |
| 1620 // check everything that might have gone wrong | |
| 1621 if(cc.bytesLength!=resultLength) { | |
| 1622 msg="wrong result length"; | |
| 1623 } else if(0!=memcmp(cc.bytes, result, cc.bytesLength)) { | |
| 1624 msg="wrong result string"; | |
| 1625 } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.bytesL
ength*sizeof(*cc.offsets))) { | |
| 1626 msg="wrong offsets"; | |
| 1627 } else if(cc.outErrorCode!=resultErrorCode) { | |
| 1628 msg="wrong error code"; | |
| 1629 } else if(cc.invalidLength!=resultInvalidLength) { | |
| 1630 msg="wrong length of last invalid input"; | |
| 1631 } else if(0!=u_memcmp(cc.invalidUChars, resultInvalidUChars, cc.invalidLengt
h)) { | |
| 1632 msg="wrong last invalid input"; | |
| 1633 } | |
| 1634 | |
| 1635 if(msg==NULL) { | |
| 1636 return TRUE; | |
| 1637 } else { | |
| 1638 char buffer[2000]; // one buffer for all strings | |
| 1639 char *s, *unicodeString, *bytesString, *resultString, | |
| 1640 *offsetsString, *resultOffsetsString, | |
| 1641 *invalidCharsString, *resultInvalidUCharsString; | |
| 1642 | |
| 1643 unicodeString=s=buffer; | |
| 1644 s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString); | |
| 1645 s=printBytes(cc.bytes, cc.bytesLength, bytesString=s); | |
| 1646 s=printBytes(result, resultLength, resultString=s); | |
| 1647 s=printOffsets(cc.offsets, cc.bytesLength, offsetsString=s); | |
| 1648 s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s); | |
| 1649 s=printUnicode(cc.invalidUChars, cc.invalidLength, invalidCharsString=s)
; | |
| 1650 s=printUnicode(resultInvalidUChars, resultInvalidLength, resultInvalidUC
harsString=s); | |
| 1651 | |
| 1652 if((s-buffer)>(int32_t)sizeof(buffer)) { | |
| 1653 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error:
checkFromUnicode() test output buffer overflow writing %d chars\n", | |
| 1654 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush
, name, (int)(s-buffer)); | |
| 1655 exit(1); | |
| 1656 } | |
| 1657 | |
| 1658 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n" | |
| 1659 " unicode <%s>[%d]\n" | |
| 1660 " expected <%s>[%d]\n" | |
| 1661 " result <%s>[%d]\n" | |
| 1662 " offsets <%s>\n" | |
| 1663 " result offsets <%s>\n" | |
| 1664 " error code expected %s got %s\n" | |
| 1665 " invalidChars expected <%s> got <%s>\n", | |
| 1666 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name
, msg, | |
| 1667 unicodeString, cc.unicodeLength, | |
| 1668 bytesString, cc.bytesLength, | |
| 1669 resultString, resultLength, | |
| 1670 offsetsString, | |
| 1671 resultOffsetsString, | |
| 1672 u_errorName(cc.outErrorCode), u_errorName(resultErrorCode), | |
| 1673 invalidCharsString, resultInvalidUCharsString); | |
| 1674 | |
| 1675 return FALSE; | |
| 1676 } | |
| 1677 } | |
| 1678 | |
| 1679 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ | |
| OLD | NEW |