| OLD | NEW |
| 1 /* | 1 /* |
| 2 ******************************************************************************* | 2 ******************************************************************************* |
| 3 * | 3 * |
| 4 * Copyright (C) 2003-2007, International Business Machines | 4 * Copyright (C) 2003-2007, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. | 5 * Corporation and others. All Rights Reserved. |
| 6 * | 6 * |
| 7 ******************************************************************************* | 7 ******************************************************************************* |
| 8 * file name: convtest.cpp | 8 * file name: convtest.cpp |
| 9 * encoding: US-ASCII | 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) | 10 * tab size: 8 (not used) |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 63 ucnv_close(utf8Cnv); | 63 ucnv_close(utf8Cnv); |
| 64 } | 64 } |
| 65 | 65 |
| 66 void | 66 void |
| 67 ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
r * /*par*/) { | 67 ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
r * /*par*/) { |
| 68 if (exec) logln("TestSuite ConversionTest: "); | 68 if (exec) logln("TestSuite ConversionTest: "); |
| 69 switch (index) { | 69 switch (index) { |
| 70 case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break; | 70 case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break; |
| 71 case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break; | 71 case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break; |
| 72 case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break; | 72 case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break; |
| 73 case 3: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break
; |
| 73 default: name=""; break; //needed to end loop | 74 default: name=""; break; //needed to end loop |
| 74 } | 75 } |
| 75 } | 76 } |
| 76 | 77 |
| 77 // test data interface ----------------------------------------------------- *** | 78 // test data interface ----------------------------------------------------- *** |
| 78 | 79 |
| 79 void | 80 void |
| 80 ConversionTest::TestToUnicode() { | 81 ConversionTest::TestToUnicode() { |
| 81 ConversionCase cc; | 82 ConversionCase cc; |
| 82 char charset[100], cbopt[4]; | 83 char charset[100], cbopt[4]; |
| (...skipping 375 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 458 } | 459 } |
| 459 delete testData; | 460 delete testData; |
| 460 } | 461 } |
| 461 delete dataModule; | 462 delete dataModule; |
| 462 } | 463 } |
| 463 else { | 464 else { |
| 464 errln("Failed: could not load test conversion data"); | 465 errln("Failed: could not load test conversion data"); |
| 465 } | 466 } |
| 466 } | 467 } |
| 467 | 468 |
| 469 U_CDECL_BEGIN |
| 470 static void U_CALLCONV |
| 471 getUnicodeSetCallback(const void *context, |
| 472 UConverterFromUnicodeArgs *fromUArgs, |
| 473 const UChar* codeUnits, |
| 474 int32_t length, |
| 475 UChar32 codePoint, |
| 476 UConverterCallbackReason reason, |
| 477 UErrorCode *pErrorCode) { |
| 478 if(reason<=UCNV_IRREGULAR) { |
| 479 ((UnicodeSet *)context)->remove(codePoint); // the converter cannot con
vert this code point |
| 480 *pErrorCode=U_ZERO_ERROR; // skip |
| 481 } // else ignore the reset, close and clone calls. |
| 482 } |
| 483 U_CDECL_END |
| 484 |
| 485 // Compare ucnv_getUnicodeSet() with the set of characters that can be converted
. |
| 486 void |
| 487 ConversionTest::TestGetUnicodeSet2() { |
| 488 // Build a string with all code points. |
| 489 UChar32 cpLimit; |
| 490 int32_t s0Length; |
| 491 if(quick) { |
| 492 cpLimit=s0Length=0x10000; // BMP only |
| 493 } else { |
| 494 cpLimit=0x110000; |
| 495 s0Length=0x10000+0x200000; // BMP + surrogate pairs |
| 496 } |
| 497 UChar *s0=new UChar[s0Length]; |
| 498 if(s0==NULL) { |
| 499 return; |
| 500 } |
| 501 UChar *s=s0; |
| 502 UChar32 c; |
| 503 UChar c2; |
| 504 // low BMP |
| 505 for(c=0; c<=0xd7ff; ++c) { |
| 506 *s++=(UChar)c; |
| 507 } |
| 508 // trail surrogates |
| 509 for(c=0xdc00; c<=0xdfff; ++c) { |
| 510 *s++=(UChar)c; |
| 511 } |
| 512 // lead surrogates |
| 513 // (after trails so that there is not even one surrogate pair in between) |
| 514 for(c=0xd800; c<=0xdbff; ++c) { |
| 515 *s++=(UChar)c; |
| 516 } |
| 517 // high BMP |
| 518 for(c=0xe000; c<=0xffff; ++c) { |
| 519 *s++=(UChar)c; |
| 520 } |
| 521 // supplementary code points = surrogate pairs |
| 522 if(cpLimit==0x110000) { |
| 523 for(c=0xd800; c<=0xdbff; ++c) { |
| 524 for(c2=0xdc00; c2<=0xdfff; ++c2) { |
| 525 *s++=(UChar)c; |
| 526 *s++=c2; |
| 527 } |
| 528 } |
| 529 } |
| 530 |
| 531 static const char *const cnvNames[]={ |
| 532 "UTF-8", |
| 533 "UTF-7", |
| 534 "UTF-16", |
| 535 "US-ASCII", |
| 536 "ISO-8859-1", |
| 537 "windows-1252", |
| 538 "Shift-JIS", |
| 539 "ibm-1390", // EBCDIC_STATEFUL table |
| 540 "ibm-16684", // DBCS-only extension table based on EBCDIC_STATEFUL tabl
e |
| 541 "HZ", |
| 542 "ISO-2022-JP", |
| 543 "JIS7", |
| 544 "ISO-2022-CN", |
| 545 "ISO-2022-CN-EXT", |
| 546 "LMBCS" |
| 547 }; |
| 548 char buffer[1024]; |
| 549 int32_t i; |
| 550 for(i=0; i<LENGTHOF(cnvNames); ++i) { |
| 551 UErrorCode errorCode=U_ZERO_ERROR; |
| 552 UConverter *cnv=cnv_open(cnvNames[i], errorCode); |
| 553 if(U_FAILURE(errorCode)) { |
| 554 errln("failed to open converter %s - %s", cnvNames[i], u_errorName(e
rrorCode)); |
| 555 continue; |
| 556 } |
| 557 UnicodeSet expected; |
| 558 ucnv_setFromUCallBack(cnv, getUnicodeSetCallback, &expected, NULL, NULL,
&errorCode); |
| 559 if(U_FAILURE(errorCode)) { |
| 560 errln("failed to set the callback on converter %s - %s", cnvNames[i]
, u_errorName(errorCode)); |
| 561 ucnv_close(cnv); |
| 562 continue; |
| 563 } |
| 564 UConverterUnicodeSet which; |
| 565 for(which=UCNV_ROUNDTRIP_SET; which<UCNV_SET_COUNT; which=(UConverterUni
codeSet)((int)which+1)) { |
| 566 if(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) { |
| 567 ucnv_setFallback(cnv, TRUE); |
| 568 } |
| 569 expected.add(0, cpLimit-1); |
| 570 s=s0; |
| 571 UBool flush; |
| 572 do { |
| 573 char *t=buffer; |
| 574 flush=(UBool)(s==s0+s0Length); |
| 575 ucnv_fromUnicode(cnv, &t, buffer+sizeof(buffer), (const UChar **
)&s, s0+s0Length, NULL, flush, &errorCode); |
| 576 if(U_FAILURE(errorCode)) { |
| 577 if(errorCode==U_BUFFER_OVERFLOW_ERROR) { |
| 578 errorCode=U_ZERO_ERROR; |
| 579 continue; |
| 580 } else { |
| 581 break; // unexpected error, should not occur |
| 582 } |
| 583 } |
| 584 } while(!flush); |
| 585 UnicodeSet set; |
| 586 ucnv_getUnicodeSet(cnv, (USet *)&set, which, &errorCode); |
| 587 if(cpLimit<0x110000) { |
| 588 set.remove(cpLimit, 0x10ffff); |
| 589 } |
| 590 if(which==UCNV_ROUNDTRIP_SET) { |
| 591 // ignore PUA code points because they will be converted even if
they |
| 592 // are fallbacks and when other fallbacks are turned off, |
| 593 // but ucnv_getUnicodeSet(UCNV_ROUNDTRIP_SET) delivers true roun
dtrips |
| 594 expected.remove(0xe000, 0xf8ff); |
| 595 expected.remove(0xf0000, 0xffffd); |
| 596 expected.remove(0x100000, 0x10fffd); |
| 597 set.remove(0xe000, 0xf8ff); |
| 598 set.remove(0xf0000, 0xffffd); |
| 599 set.remove(0x100000, 0x10fffd); |
| 600 } |
| 601 if(set!=expected) { |
| 602 // First try to see if we have different sets because ucnv_getUn
icodeSet() |
| 603 // added strings: The above conversion method does not tell us w
hat strings might be convertible. |
| 604 // Remove strings from the set and compare again. |
| 605 // Unfortunately, there are no good, direct set methods for find
ing out whether there are strings |
| 606 // in the set, nor for enumerating or removing just them. |
| 607 // Intersect all code points with the set. The intersection will
not contain strings. |
| 608 UnicodeSet temp(0, 0x10ffff); |
| 609 temp.retainAll(set); |
| 610 set=temp; |
| 611 } |
| 612 if(set!=expected) { |
| 613 UnicodeSet diffSet; |
| 614 UnicodeString out; |
| 615 |
| 616 // are there items that must be in the set but are not? |
| 617 (diffSet=expected).removeAll(set); |
| 618 if(!diffSet.isEmpty()) { |
| 619 diffSet.toPattern(out, TRUE); |
| 620 if(out.length()>100) { |
| 621 out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis
)); |
| 622 } |
| 623 errln("error: ucnv_getUnicodeSet(\"%s\") is missing items -
which set: %d", |
| 624 cnvNames[i], which); |
| 625 errln(out); |
| 626 } |
| 627 |
| 628 // are there items that must not be in the set but are? |
| 629 (diffSet=set).removeAll(expected); |
| 630 if(!diffSet.isEmpty()) { |
| 631 diffSet.toPattern(out, TRUE); |
| 632 if(out.length()>100) { |
| 633 out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis
)); |
| 634 } |
| 635 errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected
items - which set: %d", |
| 636 cnvNames[i], which); |
| 637 errln(out); |
| 638 } |
| 639 } |
| 640 } |
| 641 } |
| 642 |
| 643 delete [] s0; |
| 644 } |
| 645 |
| 468 // open testdata or ICU data converter ------------------------------------- *** | 646 // open testdata or ICU data converter ------------------------------------- *** |
| 469 | 647 |
| 470 UConverter * | 648 UConverter * |
| 471 ConversionTest::cnv_open(const char *name, UErrorCode &errorCode) { | 649 ConversionTest::cnv_open(const char *name, UErrorCode &errorCode) { |
| 472 if(name!=NULL && *name=='*') { | 650 if(name!=NULL && *name=='*') { |
| 473 /* loadTestData(): set the data directory */ | 651 /* loadTestData(): set the data directory */ |
| 474 return ucnv_openPackage(loadTestData(errorCode), name+1, &errorCode); | 652 return ucnv_openPackage(loadTestData(errorCode), name+1, &errorCode); |
| 475 } else { | 653 } else { |
| 476 return ucnv_open(name, &errorCode); | 654 return ucnv_open(name, &errorCode); |
| 477 } | 655 } |
| (...skipping 936 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1414 offsetsString, | 1592 offsetsString, |
| 1415 resultOffsetsString, | 1593 resultOffsetsString, |
| 1416 u_errorName(cc.outErrorCode), u_errorName(resultErrorCode), | 1594 u_errorName(cc.outErrorCode), u_errorName(resultErrorCode), |
| 1417 invalidCharsString, resultInvalidUCharsString); | 1595 invalidCharsString, resultInvalidUCharsString); |
| 1418 | 1596 |
| 1419 return FALSE; | 1597 return FALSE; |
| 1420 } | 1598 } |
| 1421 } | 1599 } |
| 1422 | 1600 |
| 1423 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ | 1601 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
| OLD | NEW |