OLD | NEW |
1 /******************************************************************** | 1 /******************************************************************** |
2 * COPYRIGHT: | 2 * COPYRIGHT: |
3 * Copyright (c) 1997-2011, International Business Machines Corporation and | 3 * Copyright (c) 1997-2014, International Business Machines Corporation and |
4 * others. All Rights Reserved. | 4 * others. All Rights Reserved. |
5 ********************************************************************/ | 5 ********************************************************************/ |
6 | 6 |
7 #include "unicode/utypes.h" | 7 #include "unicode/utypes.h" |
8 | 8 |
9 #if !UCONFIG_NO_NORMALIZATION | 9 #if !UCONFIG_NO_NORMALIZATION |
10 | 10 |
11 #include "unicode/uchar.h" | 11 #include "unicode/uchar.h" |
12 #include "unicode/errorcode.h" | 12 #include "unicode/errorcode.h" |
13 #include "unicode/normlzr.h" | 13 #include "unicode/normlzr.h" |
14 #include "unicode/uniset.h" | 14 #include "unicode/uniset.h" |
15 #include "unicode/usetiter.h" | 15 #include "unicode/usetiter.h" |
16 #include "unicode/schriter.h" | 16 #include "unicode/schriter.h" |
17 #include "unicode/utf16.h" | 17 #include "unicode/utf16.h" |
18 #include "cstring.h" | 18 #include "cstring.h" |
19 #include "normalizer2impl.h" | 19 #include "normalizer2impl.h" |
20 #include "tstnorm.h" | 20 #include "tstnorm.h" |
21 | 21 |
22 #define LENGTHOF(array) ((int32_t)(sizeof(array)/sizeof((array)[0]))) | 22 #define ARRAY_LENGTH(array) UPRV_LENGTHOF(array) |
23 #define ARRAY_LENGTH(array) LENGTHOF(array) | |
24 | 23 |
25 #define CASE(id,test) case id: \ | 24 #define CASE(id,test) case id: \ |
26 name = #test; \ | 25 name = #test; \ |
27 if (exec) { \ | 26 if (exec) { \ |
28 logln(#test "---"); \ | 27 logln(#test "---"); \ |
29 logln((UnicodeString)""); \ | 28 logln((UnicodeString)""); \ |
30 test(); \ | 29 test(); \ |
31 } \ | 30 } \ |
32 break | 31 break |
33 | 32 |
(...skipping 706 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
740 EXPECT_MIDDLE=3, | 739 EXPECT_MIDDLE=3, |
741 SRC_MIDDLE_2=2, | 740 SRC_MIDDLE_2=2, |
742 EXPECT_MIDDLE_2=1 | 741 EXPECT_MIDDLE_2=1 |
743 }; | 742 }; |
744 | 743 |
745 // movement vector | 744 // movement vector |
746 // - for previous(), 0 for current(), + for next() | 745 // - for previous(), 0 for current(), + for next() |
747 // for both sets of test data | 746 // for both sets of test data |
748 static const char *const moves="0+0+0--0-0-+++0--+++++++0--------"; | 747 static const char *const moves="0+0+0--0-0-+++0--+++++++0--------"; |
749 | 748 |
750 TestPreviousNext(src, LENGTHOF(src), | 749 TestPreviousNext(src, UPRV_LENGTHOF(src), |
751 expect, LENGTHOF(expect), | 750 expect, UPRV_LENGTHOF(expect), |
752 expectIndex, | 751 expectIndex, |
753 SRC_MIDDLE, EXPECT_MIDDLE, | 752 SRC_MIDDLE, EXPECT_MIDDLE, |
754 moves, UNORM_NFD, "basic"); | 753 moves, UNORM_NFD, "basic"); |
755 | 754 |
756 TestPreviousNext(src_j2911, LENGTHOF(src_j2911), | 755 TestPreviousNext(src_j2911, UPRV_LENGTHOF(src_j2911), |
757 expect_j2911, LENGTHOF(expect_j2911), | 756 expect_j2911, UPRV_LENGTHOF(expect_j2911), |
758 expectIndex_j2911, | 757 expectIndex_j2911, |
759 SRC_MIDDLE, EXPECT_MIDDLE, | 758 SRC_MIDDLE, EXPECT_MIDDLE, |
760 moves, UNORM_NFKC, "j2911"); | 759 moves, UNORM_NFKC, "j2911"); |
761 | 760 |
762 // try again from different "middle" indexes | 761 // try again from different "middle" indexes |
763 TestPreviousNext(src, LENGTHOF(src), | 762 TestPreviousNext(src, UPRV_LENGTHOF(src), |
764 expect, LENGTHOF(expect), | 763 expect, UPRV_LENGTHOF(expect), |
765 expectIndex, | 764 expectIndex, |
766 SRC_MIDDLE_2, EXPECT_MIDDLE_2, | 765 SRC_MIDDLE_2, EXPECT_MIDDLE_2, |
767 moves, UNORM_NFD, "basic_2"); | 766 moves, UNORM_NFD, "basic_2"); |
768 | 767 |
769 TestPreviousNext(src_j2911, LENGTHOF(src_j2911), | 768 TestPreviousNext(src_j2911, UPRV_LENGTHOF(src_j2911), |
770 expect_j2911, LENGTHOF(expect_j2911), | 769 expect_j2911, UPRV_LENGTHOF(expect_j2911), |
771 expectIndex_j2911, | 770 expectIndex_j2911, |
772 SRC_MIDDLE_2, EXPECT_MIDDLE_2, | 771 SRC_MIDDLE_2, EXPECT_MIDDLE_2, |
773 moves, UNORM_NFKC, "j2911_2"); | 772 moves, UNORM_NFKC, "j2911_2"); |
774 } | 773 } |
775 | 774 |
776 void BasicNormalizerTest::TestConcatenate() { | 775 void BasicNormalizerTest::TestConcatenate() { |
777 static const char *const | 776 static const char *const |
778 cases[][4]={ | 777 cases[][4]={ |
779 /* mode, left, right, result */ | 778 /* mode, left, right, result */ |
780 { | 779 { |
(...skipping 276 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1057 } opt[]={ | 1056 } opt[]={ |
1058 { 0, "default" }, | 1057 { 0, "default" }, |
1059 { U_COMPARE_CODE_POINT_ORDER, "c.p. order" }, | 1058 { U_COMPARE_CODE_POINT_ORDER, "c.p. order" }, |
1060 { U_COMPARE_IGNORE_CASE, "ignore case" }, | 1059 { U_COMPARE_IGNORE_CASE, "ignore case" }, |
1061 { U_COMPARE_CODE_POINT_ORDER|U_COMPARE_IGNORE_CASE, "c.p. order & ignore
case" }, | 1060 { U_COMPARE_CODE_POINT_ORDER|U_COMPARE_IGNORE_CASE, "c.p. order & ignore
case" }, |
1062 { U_COMPARE_IGNORE_CASE|U_FOLD_CASE_EXCLUDE_SPECIAL_I, "ignore case & sp
ecial i" }, | 1061 { U_COMPARE_IGNORE_CASE|U_FOLD_CASE_EXCLUDE_SPECIAL_I, "ignore case & sp
ecial i" }, |
1063 { U_COMPARE_CODE_POINT_ORDER|U_COMPARE_IGNORE_CASE|U_FOLD_CASE_EXCLUDE_S
PECIAL_I, "c.p. order & ignore case & special i" }, | 1062 { U_COMPARE_CODE_POINT_ORDER|U_COMPARE_IGNORE_CASE|U_FOLD_CASE_EXCLUDE_S
PECIAL_I, "c.p. order & ignore case & special i" }, |
1064 { UNORM_UNICODE_3_2<<UNORM_COMPARE_NORM_OPTIONS_SHIFT, "Unicode 3.2" } | 1063 { UNORM_UNICODE_3_2<<UNORM_COMPARE_NORM_OPTIONS_SHIFT, "Unicode 3.2" } |
1065 }; | 1064 }; |
1066 | 1065 |
1067 int32_t i, j, k, count=LENGTHOF(strings); | 1066 int32_t i, j, k, count=UPRV_LENGTHOF(strings); |
1068 int32_t result, refResult; | 1067 int32_t result, refResult; |
1069 | 1068 |
1070 UErrorCode errorCode; | 1069 UErrorCode errorCode; |
1071 | 1070 |
1072 // create the UnicodeStrings | 1071 // create the UnicodeStrings |
1073 for(i=0; i<count; ++i) { | 1072 for(i=0; i<count; ++i) { |
1074 s[i]=UnicodeString(strings[i], "").unescape(); | 1073 s[i]=UnicodeString(strings[i], "").unescape(); |
1075 } | 1074 } |
1076 | 1075 |
1077 // test them each with each other | 1076 // test them each with each other |
1078 for(i=0; i<count; ++i) { | 1077 for(i=0; i<count; ++i) { |
1079 for(j=i; j<count; ++j) { | 1078 for(j=i; j<count; ++j) { |
1080 for(k=0; k<LENGTHOF(opt); ++k) { | 1079 for(k=0; k<UPRV_LENGTHOF(opt); ++k) { |
1081 // test Normalizer::compare | 1080 // test Normalizer::compare |
1082 errorCode=U_ZERO_ERROR; | 1081 errorCode=U_ZERO_ERROR; |
1083 result=_norm_compare(s[i], s[j], opt[k].options, errorCode); | 1082 result=_norm_compare(s[i], s[j], opt[k].options, errorCode); |
1084 refResult=ref_norm_compare(s[i], s[j], opt[k].options, errorCode
); | 1083 refResult=ref_norm_compare(s[i], s[j], opt[k].options, errorCode
); |
1085 if(_sign(result)!=_sign(refResult)) { | 1084 if(_sign(result)!=_sign(refResult)) { |
1086 errln("Normalizer::compare(%d, %d, %s)%s should be %s %s", | 1085 errln("Normalizer::compare(%d, %d, %s)%s should be %s %s", |
1087 i, j, opt[k].name, _signString(result), _signString(refR
esult), | 1086 i, j, opt[k].name, _signString(result), _signString(refR
esult), |
1088 U_SUCCESS(errorCode) ? "" : u_errorName(errorCode)); | 1087 U_SUCCESS(errorCode) ? "" : u_errorName(errorCode)); |
1089 } | 1088 } |
1090 | 1089 |
(...skipping 19 matching lines...) Expand all Loading... |
1110 UnicodeString s1, s2; | 1109 UnicodeString s1, s2; |
1111 | 1110 |
1112 const Normalizer2Impl *nfcImpl=Normalizer2Factory::getNFCImpl(errorCode); | 1111 const Normalizer2Impl *nfcImpl=Normalizer2Factory::getNFCImpl(errorCode); |
1113 if(U_FAILURE(errorCode) || !nfcImpl->ensureCanonIterData(errorCode)) { | 1112 if(U_FAILURE(errorCode) || !nfcImpl->ensureCanonIterData(errorCode)) { |
1114 dataerrln("Normalizer2Factory::getNFCImpl().ensureCanonIterData() failed
: %s", | 1113 dataerrln("Normalizer2Factory::getNFCImpl().ensureCanonIterData() failed
: %s", |
1115 u_errorName(errorCode)); | 1114 u_errorName(errorCode)); |
1116 return; | 1115 return; |
1117 } | 1116 } |
1118 | 1117 |
1119 // collect all sets into one for contiguous output | 1118 // collect all sets into one for contiguous output |
1120 for(i=0; i<LENGTHOF(iI); ++i) { | 1119 for(i=0; i<UPRV_LENGTHOF(iI); ++i) { |
1121 if(nfcImpl->getCanonStartSet(iI[i], iSet)) { | 1120 if(nfcImpl->getCanonStartSet(iI[i], iSet)) { |
1122 set.addAll(iSet); | 1121 set.addAll(iSet); |
1123 } | 1122 } |
1124 } | 1123 } |
1125 | 1124 |
1126 // test all of these precomposed characters | 1125 // test all of these precomposed characters |
1127 const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode); | 1126 const Normalizer2 *nfcNorm2=Normalizer2::getNFCInstance(errorCode); |
1128 UnicodeSetIterator it(set); | 1127 UnicodeSetIterator it(set); |
1129 while(it.next() && !it.isString()) { | 1128 while(it.next() && !it.isString()) { |
1130 UChar32 c=it.getCodepoint(); | 1129 UChar32 c=it.getCodepoint(); |
1131 if(!nfcNorm2->getDecomposition(c, s2)) { | 1130 if(!nfcNorm2->getDecomposition(c, s2)) { |
1132 dataerrln("NFC.getDecomposition(i-composite U+%04lx) failed", (long)
c); | 1131 dataerrln("NFC.getDecomposition(i-composite U+%04lx) failed", (long)
c); |
1133 return; | 1132 return; |
1134 } | 1133 } |
1135 | 1134 |
1136 s1.setTo(c); | 1135 s1.setTo(c); |
1137 for(k=0; k<LENGTHOF(opt); ++k) { | 1136 for(k=0; k<UPRV_LENGTHOF(opt); ++k) { |
1138 // test Normalizer::compare | 1137 // test Normalizer::compare |
1139 errorCode=U_ZERO_ERROR; | 1138 errorCode=U_ZERO_ERROR; |
1140 result=_norm_compare(s1, s2, opt[k].options, errorCode); | 1139 result=_norm_compare(s1, s2, opt[k].options, errorCode); |
1141 refResult=ref_norm_compare(s1, s2, opt[k].options, errorCode); | 1140 refResult=ref_norm_compare(s1, s2, opt[k].options, errorCode); |
1142 if(_sign(result)!=_sign(refResult)) { | 1141 if(_sign(result)!=_sign(refResult)) { |
1143 errln("Normalizer::compare(U+%04x with its NFD, %s)%s should be
%s %s", | 1142 errln("Normalizer::compare(U+%04x with its NFD, %s)%s should be
%s %s", |
1144 c, opt[k].name, _signString(result), _signString(refResult), | 1143 c, opt[k].name, _signString(result), _signString(refResult), |
1145 U_SUCCESS(errorCode) ? "" : u_errorName(errorCode)); | 1144 U_SUCCESS(errorCode) ? "" : u_errorName(errorCode)); |
1146 } | 1145 } |
1147 | 1146 |
(...skipping 283 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1431 { "\\uE111\\u1161\\uE112\\u1162", "\\uAE4C\\u1102\\u0062\\u1162" }, | 1430 { "\\uE111\\u1161\\uE112\\u1162", "\\uAE4C\\u1102\\u0062\\u1162" }, |
1432 { "\\uFFF3\\uFFF7\\U00010036\\U00010077", "\\U00010037\\U00010037\\uFFF6
\\U00010037" } | 1431 { "\\uFFF3\\uFFF7\\U00010036\\U00010077", "\\U00010037\\U00010037\\uFFF6
\\U00010037" } |
1433 }; | 1432 }; |
1434 IcuTestErrorCode errorCode(*this, "BasicNormalizerTest/TestCustomComp"); | 1433 IcuTestErrorCode errorCode(*this, "BasicNormalizerTest/TestCustomComp"); |
1435 const Normalizer2 *customNorm2= | 1434 const Normalizer2 *customNorm2= |
1436 Normalizer2::getInstance(loadTestData(errorCode), "testnorm", | 1435 Normalizer2::getInstance(loadTestData(errorCode), "testnorm", |
1437 UNORM2_COMPOSE, errorCode); | 1436 UNORM2_COMPOSE, errorCode); |
1438 if(errorCode.logDataIfFailureAndReset("unable to load testdata/testnorm.nrm"
)) { | 1437 if(errorCode.logDataIfFailureAndReset("unable to load testdata/testnorm.nrm"
)) { |
1439 return; | 1438 return; |
1440 } | 1439 } |
1441 for(int32_t i=0; i<LENGTHOF(pairs); ++i) { | 1440 for(int32_t i=0; i<UPRV_LENGTHOF(pairs); ++i) { |
1442 const StringPair &pair=pairs[i]; | 1441 const StringPair &pair=pairs[i]; |
1443 UnicodeString input=UnicodeString(pair.input, -1, US_INV).unescape(); | 1442 UnicodeString input=UnicodeString(pair.input, -1, US_INV).unescape(); |
1444 UnicodeString expected=UnicodeString(pair.expected, -1, US_INV).unescape
(); | 1443 UnicodeString expected=UnicodeString(pair.expected, -1, US_INV).unescape
(); |
1445 UnicodeString result=customNorm2->normalize(input, errorCode); | 1444 UnicodeString result=customNorm2->normalize(input, errorCode); |
1446 if(result!=expected) { | 1445 if(result!=expected) { |
1447 errln("custom compose Normalizer2 did not normalize input %d as expe
cted", i); | 1446 errln("custom compose Normalizer2 did not normalize input %d as expe
cted", i); |
1448 } | 1447 } |
1449 } | 1448 } |
1450 } | 1449 } |
1451 | 1450 |
(...skipping 11 matching lines...) Expand all Loading... |
1463 { "\\uE111\\u1161\\uE112\\u1162", "\\uAE4C\\u1102\\u0062\\u1162" }, | 1462 { "\\uE111\\u1161\\uE112\\u1162", "\\uAE4C\\u1102\\u0062\\u1162" }, |
1464 { "\\uFFF3\\uFFF7\\U00010036\\U00010077", "\\U00010037\\U00010037\\uFFF6
\\U00010037" } | 1463 { "\\uFFF3\\uFFF7\\U00010036\\U00010077", "\\U00010037\\U00010037\\uFFF6
\\U00010037" } |
1465 }; | 1464 }; |
1466 IcuTestErrorCode errorCode(*this, "BasicNormalizerTest/TestCustomFCC"); | 1465 IcuTestErrorCode errorCode(*this, "BasicNormalizerTest/TestCustomFCC"); |
1467 const Normalizer2 *customNorm2= | 1466 const Normalizer2 *customNorm2= |
1468 Normalizer2::getInstance(loadTestData(errorCode), "testnorm", | 1467 Normalizer2::getInstance(loadTestData(errorCode), "testnorm", |
1469 UNORM2_COMPOSE_CONTIGUOUS, errorCode); | 1468 UNORM2_COMPOSE_CONTIGUOUS, errorCode); |
1470 if(errorCode.logDataIfFailureAndReset("unable to load testdata/testnorm.nrm"
)) { | 1469 if(errorCode.logDataIfFailureAndReset("unable to load testdata/testnorm.nrm"
)) { |
1471 return; | 1470 return; |
1472 } | 1471 } |
1473 for(int32_t i=0; i<LENGTHOF(pairs); ++i) { | 1472 for(int32_t i=0; i<UPRV_LENGTHOF(pairs); ++i) { |
1474 const StringPair &pair=pairs[i]; | 1473 const StringPair &pair=pairs[i]; |
1475 UnicodeString input=UnicodeString(pair.input, -1, US_INV).unescape(); | 1474 UnicodeString input=UnicodeString(pair.input, -1, US_INV).unescape(); |
1476 UnicodeString expected=UnicodeString(pair.expected, -1, US_INV).unescape
(); | 1475 UnicodeString expected=UnicodeString(pair.expected, -1, US_INV).unescape
(); |
1477 UnicodeString result=customNorm2->normalize(input, errorCode); | 1476 UnicodeString result=customNorm2->normalize(input, errorCode); |
1478 if(result!=expected) { | 1477 if(result!=expected) { |
1479 errln("custom FCC Normalizer2 did not normalize input %d as expected
", i); | 1478 errln("custom FCC Normalizer2 did not normalize input %d as expected
", i); |
1480 } | 1479 } |
1481 } | 1480 } |
1482 } | 1481 } |
1483 | 1482 |
1484 /* Improve code coverage of Normalizer2 */ | 1483 /* Improve code coverage of Normalizer2 */ |
1485 void | 1484 void |
1486 BasicNormalizerTest::TestFilteredNormalizer2Coverage() { | 1485 BasicNormalizerTest::TestFilteredNormalizer2Coverage() { |
1487 UErrorCode errorCode = U_ZERO_ERROR; | 1486 UErrorCode errorCode = U_ZERO_ERROR; |
1488 const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode); | 1487 const Normalizer2 *nfcNorm2=Normalizer2::getNFCInstance(errorCode); |
1489 if (U_FAILURE(errorCode)) { | 1488 if (U_FAILURE(errorCode)) { |
1490 dataerrln("Normalizer2Factory::getNFCInstance() call failed - %s", u_err
orName(status)); | 1489 dataerrln("Normalizer2::getNFCInstance() call failed - %s", u_errorName(
status)); |
1491 return; | 1490 return; |
1492 } | 1491 } |
1493 UnicodeSet filter(UNICODE_STRING_SIMPLE("[^\\u00a0-\\u00ff\\u0310-\\u031f]")
, errorCode); | 1492 UnicodeSet filter(UNICODE_STRING_SIMPLE("[^\\u00a0-\\u00ff\\u0310-\\u031f]")
, errorCode); |
1494 FilteredNormalizer2 fn2(*nfcNorm2, filter); | 1493 FilteredNormalizer2 fn2(*nfcNorm2, filter); |
1495 | 1494 |
1496 UChar32 char32 = 0x0054; | 1495 UChar32 char32 = 0x0054; |
1497 | 1496 |
1498 if (fn2.isInert(char32)) { | 1497 if (fn2.isInert(char32)) { |
1499 errln("FilteredNormalizer2.isInert() failed."); | 1498 errln("FilteredNormalizer2.isInert() failed."); |
1500 } | 1499 } |
(...skipping 16 matching lines...) Expand all Loading... |
1517 | 1516 |
1518 UnicodeString newString1 = UNICODE_STRING_SIMPLE("[^\\u0100-\\u01ff]"); | 1517 UnicodeString newString1 = UNICODE_STRING_SIMPLE("[^\\u0100-\\u01ff]"); |
1519 UnicodeString newString2 = UNICODE_STRING_SIMPLE("[^\\u0200-\\u02ff]"); | 1518 UnicodeString newString2 = UNICODE_STRING_SIMPLE("[^\\u0200-\\u02ff]"); |
1520 fn2.append(newString1, newString2, errorCode); | 1519 fn2.append(newString1, newString2, errorCode); |
1521 if (U_FAILURE(errorCode)) { | 1520 if (U_FAILURE(errorCode)) { |
1522 errln("FilteredNormalizer2.append() failed."); | 1521 errln("FilteredNormalizer2.append() failed."); |
1523 } | 1522 } |
1524 } | 1523 } |
1525 | 1524 |
1526 #endif /* #if !UCONFIG_NO_NORMALIZATION */ | 1525 #endif /* #if !UCONFIG_NO_NORMALIZATION */ |
OLD | NEW |