OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 2012-2014, International Business Machines | 3 * Copyright (C) 2012-2015, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 * collationtest.cpp | 6 * collationtest.cpp |
7 * | 7 * |
8 * created on: 2012apr27 | 8 * created on: 2012apr27 |
9 * created by: Markus W. Scherer | 9 * created by: Markus W. Scherer |
10 */ | 10 */ |
11 | 11 |
12 #include "unicode/utypes.h" | 12 #include "unicode/utypes.h" |
13 | 13 |
(...skipping 27 matching lines...) Expand all Loading... |
41 #include "normalizer2impl.h" | 41 #include "normalizer2impl.h" |
42 #include "ucbuf.h" | 42 #include "ucbuf.h" |
43 #include "uhash.h" | 43 #include "uhash.h" |
44 #include "uitercollationiterator.h" | 44 #include "uitercollationiterator.h" |
45 #include "utf16collationiterator.h" | 45 #include "utf16collationiterator.h" |
46 #include "utf8collationiterator.h" | 46 #include "utf8collationiterator.h" |
47 #include "uvectr32.h" | 47 #include "uvectr32.h" |
48 #include "uvectr64.h" | 48 #include "uvectr64.h" |
49 #include "writesrc.h" | 49 #include "writesrc.h" |
50 | 50 |
51 // TODO: Move to ucbuf.h | |
52 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCHARBUFPointer, UCHARBUF, ucbuf_close); | |
53 | |
54 class CodePointIterator; | 51 class CodePointIterator; |
55 | 52 |
56 // TODO: try to share code with IntlTestCollator; for example, prettify(Collatio
nKey) | 53 // TODO: try to share code with IntlTestCollator; for example, prettify(Collatio
nKey) |
57 | 54 |
58 class CollationTest : public IntlTest { | 55 class CollationTest : public IntlTest { |
59 public: | 56 public: |
60 CollationTest() | 57 CollationTest() |
61 : fcd(NULL), nfd(NULL), | 58 : fcd(NULL), nfd(NULL), |
62 fileLineNumber(0), | 59 fileLineNumber(0), |
63 coll(NULL) {} | 60 coll(NULL) {} |
(...skipping 26 matching lines...) Expand all Loading... |
90 | 87 |
91 // Helpers & fields for data-driven test. | 88 // Helpers & fields for data-driven test. |
92 static UBool isCROrLF(UChar c) { return c == 0xa || c == 0xd; } | 89 static UBool isCROrLF(UChar c) { return c == 0xa || c == 0xd; } |
93 static UBool isSpace(UChar c) { return c == 9 || c == 0x20 || c == 0x3000; } | 90 static UBool isSpace(UChar c) { return c == 9 || c == 0x20 || c == 0x3000; } |
94 static UBool isSectionStarter(UChar c) { return c == 0x25 || c == 0x2a || c
== 0x40; } // %*@ | 91 static UBool isSectionStarter(UChar c) { return c == 0x25 || c == 0x2a || c
== 0x40; } // %*@ |
95 int32_t skipSpaces(int32_t i) { | 92 int32_t skipSpaces(int32_t i) { |
96 while(isSpace(fileLine[i])) { ++i; } | 93 while(isSpace(fileLine[i])) { ++i; } |
97 return i; | 94 return i; |
98 } | 95 } |
99 | 96 |
100 UBool readLine(UCHARBUF *f, IcuTestErrorCode &errorCode); | 97 UBool readNonEmptyLine(UCHARBUF *f, IcuTestErrorCode &errorCode); |
101 void parseString(int32_t &start, UnicodeString &prefix, UnicodeString &s, UE
rrorCode &errorCode); | 98 void parseString(int32_t &start, UnicodeString &prefix, UnicodeString &s, UE
rrorCode &errorCode); |
102 Collation::Level parseRelationAndString(UnicodeString &s, IcuTestErrorCode &
errorCode); | 99 Collation::Level parseRelationAndString(UnicodeString &s, IcuTestErrorCode &
errorCode); |
103 void parseAndSetAttribute(IcuTestErrorCode &errorCode); | 100 void parseAndSetAttribute(IcuTestErrorCode &errorCode); |
104 void parseAndSetReorderCodes(int32_t start, IcuTestErrorCode &errorCode); | 101 void parseAndSetReorderCodes(int32_t start, IcuTestErrorCode &errorCode); |
105 void buildTailoring(UCHARBUF *f, IcuTestErrorCode &errorCode); | 102 void buildTailoring(UCHARBUF *f, IcuTestErrorCode &errorCode); |
106 void setRootCollator(IcuTestErrorCode &errorCode); | 103 void setRootCollator(IcuTestErrorCode &errorCode); |
107 void setLocaleCollator(IcuTestErrorCode &errorCode); | 104 void setLocaleCollator(IcuTestErrorCode &errorCode); |
108 | 105 |
109 UBool needsNormalization(const UnicodeString &s, UErrorCode &errorCode) cons
t; | 106 UBool needsNormalization(const UnicodeString &s, UErrorCode &errorCode) cons
t; |
110 | 107 |
111 UBool getSortKeyParts(const UChar *s, int32_t length, | 108 UBool getSortKeyParts(const UChar *s, int32_t length, |
112 CharString &dest, int32_t partSize, | 109 CharString &dest, int32_t partSize, |
113 IcuTestErrorCode &errorCode); | 110 IcuTestErrorCode &errorCode); |
114 UBool getCollationKey(const char *norm, const UnicodeString &line, | 111 UBool getCollationKey(const char *norm, const UnicodeString &line, |
115 const UChar *s, int32_t length, | 112 const UChar *s, int32_t length, |
116 CollationKey &key, IcuTestErrorCode &errorCode); | 113 CollationKey &key, IcuTestErrorCode &errorCode); |
| 114 UBool getMergedCollationKey(const UChar *s, int32_t length, |
| 115 CollationKey &key, IcuTestErrorCode &errorCode); |
117 UBool checkCompareTwo(const char *norm, const UnicodeString &prevFileLine, | 116 UBool checkCompareTwo(const char *norm, const UnicodeString &prevFileLine, |
118 const UnicodeString &prevString, const UnicodeString &
s, | 117 const UnicodeString &prevString, const UnicodeString &
s, |
119 UCollationResult expectedOrder, Collation::Level expec
tedLevel, | 118 UCollationResult expectedOrder, Collation::Level expec
tedLevel, |
120 IcuTestErrorCode &errorCode); | 119 IcuTestErrorCode &errorCode); |
121 void checkCompareStrings(UCHARBUF *f, IcuTestErrorCode &errorCode); | 120 void checkCompareStrings(UCHARBUF *f, IcuTestErrorCode &errorCode); |
122 | 121 |
123 const Normalizer2 *fcd, *nfd; | 122 const Normalizer2 *fcd, *nfd; |
124 UnicodeString fileLine; | 123 UnicodeString fileLine; |
125 int32_t fileLineNumber; | 124 int32_t fileLineNumber; |
126 UnicodeString fileTestName; | 125 UnicodeString fileTestName; |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
165 | 164 |
166 static const UChar s[2] = { 0xfffe, 0xffff }; | 165 static const UChar s[2] = { 0xfffe, 0xffff }; |
167 UVector64 ces(errorCode); | 166 UVector64 ces(errorCode); |
168 rbc->internalGetCEs(UnicodeString(FALSE, s, 2), ces, errorCode); | 167 rbc->internalGetCEs(UnicodeString(FALSE, s, 2), ces, errorCode); |
169 errorCode.assertSuccess(); | 168 errorCode.assertSuccess(); |
170 if(ces.size() != 2) { | 169 if(ces.size() != 2) { |
171 errln("expected 2 CEs for <FFFE, FFFF>, got %d", (int)ces.size()); | 170 errln("expected 2 CEs for <FFFE, FFFF>, got %d", (int)ces.size()); |
172 return; | 171 return; |
173 } | 172 } |
174 int64_t ce = ces.elementAti(0); | 173 int64_t ce = ces.elementAti(0); |
175 int64_t expected = | 174 int64_t expected = Collation::makeCE(Collation::MERGE_SEPARATOR_PRIMARY); |
176 ((int64_t)Collation::MERGE_SEPARATOR_PRIMARY << 32) | | |
177 Collation::MERGE_SEPARATOR_LOWER32; | |
178 if(ce != expected) { | 175 if(ce != expected) { |
179 errln("CE(U+fffe)=%04lx != 02.02.02", (long)ce); | 176 errln("CE(U+fffe)=%04lx != 02..", (long)ce); |
180 } | 177 } |
181 | 178 |
182 ce = ces.elementAti(1); | 179 ce = ces.elementAti(1); |
183 expected = Collation::makeCE(Collation::MAX_PRIMARY); | 180 expected = Collation::makeCE(Collation::MAX_PRIMARY); |
184 if(ce != expected) { | 181 if(ce != expected) { |
185 errln("CE(U+ffff)=%04lx != max..", (long)ce); | 182 errln("CE(U+ffff)=%04lx != max..", (long)ce); |
186 } | 183 } |
187 } | 184 } |
188 | 185 |
189 void CollationTest::TestImplicits() { | 186 void CollationTest::TestImplicits() { |
190 IcuTestErrorCode errorCode(*this, "TestImplicits"); | 187 IcuTestErrorCode errorCode(*this, "TestImplicits"); |
191 | 188 |
192 const CollationData *cd = CollationRoot::getData(errorCode); | 189 const CollationData *cd = CollationRoot::getData(errorCode); |
193 if(errorCode.logDataIfFailureAndReset("CollationRoot::getBaseData()")) { | 190 if(errorCode.logDataIfFailureAndReset("CollationRoot::getData()")) { |
194 return; | 191 return; |
195 } | 192 } |
196 | 193 |
197 // Implicit primary weights should be assigned for the following sets, | 194 // Implicit primary weights should be assigned for the following sets, |
198 // and sort in ascending order by set and then code point. | 195 // and sort in ascending order by set and then code point. |
199 // See http://www.unicode.org/reports/tr10/#Implicit_Weights | 196 // See http://www.unicode.org/reports/tr10/#Implicit_Weights |
200 | 197 |
201 // core Han Unified Ideographs | 198 // core Han Unified Ideographs |
202 UnicodeSet coreHan("[\\p{unified_ideograph}&" | 199 UnicodeSet coreHan("[\\p{unified_ideograph}&" |
203 "[\\p{Block=CJK_Unified_Ideographs}" | 200 "[\\p{Block=CJK_Unified_Ideographs}" |
(...skipping 406 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
610 } | 607 } |
611 // No intermediate zero bytes. | 608 // No intermediate zero bytes. |
612 if(p1 != 0 && p2 == 0 && (p & 0xffff) != 0) { | 609 if(p1 != 0 && p2 == 0 && (p & 0xffff) != 0) { |
613 return FALSE; | 610 return FALSE; |
614 } | 611 } |
615 if(p2 != 0 && p3 == 0 && p4 != 0) { | 612 if(p2 != 0 && p3 == 0 && p4 != 0) { |
616 return FALSE; | 613 return FALSE; |
617 } | 614 } |
618 // Minimum & maximum lead bytes. | 615 // Minimum & maximum lead bytes. |
619 if((p1 != 0 && p1 <= Collation::MERGE_SEPARATOR_BYTE) || | 616 if((p1 != 0 && p1 <= Collation::MERGE_SEPARATOR_BYTE) || |
620 (s1 != 0 && s1 <= Collation::MERGE_SEPARATOR_BYTE) || | 617 s1 == Collation::LEVEL_SEPARATOR_BYTE || |
621 (t1 != 0 && t1 <= Collation::MERGE_SEPARATOR_BYTE)) { | 618 t1 == Collation::LEVEL_SEPARATOR_BYTE || t1 > 0x3f) { |
622 return FALSE; | |
623 } | |
624 if(t1 != 0 && t1 > 0x3f) { | |
625 return FALSE; | 619 return FALSE; |
626 } | 620 } |
627 if(c > 2) { | 621 if(c > 2) { |
628 return FALSE; | 622 return FALSE; |
629 } | 623 } |
630 // The valid byte range for the second primary byte depends on compressibili
ty. | 624 // The valid byte range for the second primary byte depends on compressibili
ty. |
631 if(p2 != 0) { | 625 if(p2 != 0) { |
632 if(data.isCompressibleLeadByte(p1)) { | 626 if(data.isCompressibleLeadByte(p1)) { |
633 if(p2 <= Collation::PRIMARY_COMPRESSION_LOW_BYTE || | 627 if(p2 <= Collation::PRIMARY_COMPRESSION_LOW_BYTE || |
634 Collation::PRIMARY_COMPRESSION_HIGH_BYTE <= p2) { | 628 Collation::PRIMARY_COMPRESSION_HIGH_BYTE <= p2) { |
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
719 if((pri & 0xffff) == 0) { | 713 if((pri & 0xffff) == 0) { |
720 pri = Collation::incTwoBytePrimaryByOffset(pri, isCompressible,
step); | 714 pri = Collation::incTwoBytePrimaryByOffset(pri, isCompressible,
step); |
721 } else { | 715 } else { |
722 pri = Collation::incThreeBytePrimaryByOffset(pri, isCompressible
, step); | 716 pri = Collation::incThreeBytePrimaryByOffset(pri, isCompressible
, step); |
723 } | 717 } |
724 return TRUE; | 718 return TRUE; |
725 } | 719 } |
726 // Simple primary CE. | 720 // Simple primary CE. |
727 ++index; | 721 ++index; |
728 pri = p; | 722 pri = p; |
729 secTer = Collation::COMMON_SEC_AND_TER_CE; | 723 // Does this have an explicit below-common sec/ter unit, |
| 724 // or does it imply a common one? |
| 725 if(index == length) { |
| 726 secTer = Collation::COMMON_SEC_AND_TER_CE; |
| 727 } else { |
| 728 secTer = elements[index]; |
| 729 if((secTer & CollationRootElements::SEC_TER_DELTA_FLAG) == 0) { |
| 730 // No sec/ter delta. |
| 731 secTer = Collation::COMMON_SEC_AND_TER_CE; |
| 732 } else { |
| 733 secTer &= ~CollationRootElements::SEC_TER_DELTA_FLAG; |
| 734 if(secTer > Collation::COMMON_SEC_AND_TER_CE) { |
| 735 // Implied sec/ter. |
| 736 secTer = Collation::COMMON_SEC_AND_TER_CE; |
| 737 } else { |
| 738 // Explicit sec/ter below common/common. |
| 739 ++index; |
| 740 } |
| 741 } |
| 742 } |
730 return TRUE; | 743 return TRUE; |
731 } | 744 } |
732 | 745 |
733 uint32_t getPrimary() const { return pri; } | 746 uint32_t getPrimary() const { return pri; } |
734 uint32_t getSecTer() const { return secTer; } | 747 uint32_t getSecTer() const { return secTer; } |
735 | 748 |
736 private: | 749 private: |
737 const CollationData &data; | 750 const CollationData &data; |
738 const uint32_t *elements; | 751 const uint32_t *elements; |
739 int32_t length; | 752 int32_t length; |
(...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
946 } | 959 } |
947 return s; | 960 return s; |
948 } | 961 } |
949 | 962 |
950 UnicodeString CollationTest::printCollationKey(const CollationKey &key) { | 963 UnicodeString CollationTest::printCollationKey(const CollationKey &key) { |
951 int32_t length; | 964 int32_t length; |
952 const uint8_t *p = key.getByteArray(length); | 965 const uint8_t *p = key.getByteArray(length); |
953 return printSortKey(p, length); | 966 return printSortKey(p, length); |
954 } | 967 } |
955 | 968 |
956 UBool CollationTest::readLine(UCHARBUF *f, IcuTestErrorCode &errorCode) { | 969 UBool CollationTest::readNonEmptyLine(UCHARBUF *f, IcuTestErrorCode &errorCode)
{ |
957 int32_t lineLength; | 970 for(;;) { |
958 const UChar *line = ucbuf_readline(f, &lineLength, errorCode); | 971 int32_t lineLength; |
959 if(line == NULL || errorCode.isFailure()) { | 972 const UChar *line = ucbuf_readline(f, &lineLength, errorCode); |
960 fileLine.remove(); | 973 if(line == NULL || errorCode.isFailure()) { |
961 return FALSE; | 974 fileLine.remove(); |
| 975 return FALSE; |
| 976 } |
| 977 ++fileLineNumber; |
| 978 // Strip trailing CR/LF, comments, and spaces. |
| 979 const UChar *comment = u_memchr(line, 0x23, lineLength); // '#' |
| 980 if(comment != NULL) { |
| 981 lineLength = (int32_t)(comment - line); |
| 982 } else { |
| 983 while(lineLength > 0 && isCROrLF(line[lineLength - 1])) { --lineLeng
th; } |
| 984 } |
| 985 while(lineLength > 0 && isSpace(line[lineLength - 1])) { --lineLength; } |
| 986 if(lineLength != 0) { |
| 987 fileLine.setTo(FALSE, line, lineLength); |
| 988 return TRUE; |
| 989 } |
| 990 // Empty line, continue. |
962 } | 991 } |
963 ++fileLineNumber; | |
964 // Strip trailing CR/LF, comments, and spaces. | |
965 const UChar *comment = u_memchr(line, 0x23, lineLength); // '#' | |
966 if(comment != NULL) { | |
967 lineLength = (int32_t)(comment - line); | |
968 } else { | |
969 while(lineLength > 0 && isCROrLF(line[lineLength - 1])) { --lineLength;
} | |
970 } | |
971 while(lineLength > 0 && isSpace(line[lineLength - 1])) { --lineLength; } | |
972 fileLine.setTo(FALSE, line, lineLength); | |
973 return TRUE; | |
974 } | 992 } |
975 | 993 |
976 void CollationTest::parseString(int32_t &start, UnicodeString &prefix, UnicodeSt
ring &s, | 994 void CollationTest::parseString(int32_t &start, UnicodeString &prefix, UnicodeSt
ring &s, |
977 UErrorCode &errorCode) { | 995 UErrorCode &errorCode) { |
978 int32_t length = fileLine.length(); | 996 int32_t length = fileLine.length(); |
979 int32_t i; | 997 int32_t i; |
980 for(i = start; i < length && !isSpace(fileLine[i]); ++i) {} | 998 for(i = start; i < length && !isSpace(fileLine[i]); ++i) {} |
981 int32_t pipeIndex = fileLine.indexOf((UChar)0x7c, start, i - start); // '|' | 999 int32_t pipeIndex = fileLine.indexOf((UChar)0x7c, start, i - start); // '|' |
982 if(pipeIndex >= 0) { | 1000 if(pipeIndex >= 0) { |
983 prefix = fileLine.tempSubStringBetween(start, pipeIndex).unescape(); | 1001 prefix = fileLine.tempSubStringBetween(start, pipeIndex).unescape(); |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1087 { "identical", UCOL_IDENTICAL }, | 1105 { "identical", UCOL_IDENTICAL }, |
1088 { "off", UCOL_OFF }, | 1106 { "off", UCOL_OFF }, |
1089 { "on", UCOL_ON }, | 1107 { "on", UCOL_ON }, |
1090 { "shifted", UCOL_SHIFTED }, | 1108 { "shifted", UCOL_SHIFTED }, |
1091 { "non-ignorable", UCOL_NON_IGNORABLE }, | 1109 { "non-ignorable", UCOL_NON_IGNORABLE }, |
1092 { "lower", UCOL_LOWER_FIRST }, | 1110 { "lower", UCOL_LOWER_FIRST }, |
1093 { "upper", UCOL_UPPER_FIRST } | 1111 { "upper", UCOL_UPPER_FIRST } |
1094 }; | 1112 }; |
1095 | 1113 |
1096 void CollationTest::parseAndSetAttribute(IcuTestErrorCode &errorCode) { | 1114 void CollationTest::parseAndSetAttribute(IcuTestErrorCode &errorCode) { |
| 1115 // Parse attributes even if the Collator could not be created, |
| 1116 // in order to report syntax errors. |
1097 int32_t start = skipSpaces(1); | 1117 int32_t start = skipSpaces(1); |
1098 int32_t equalPos = fileLine.indexOf(0x3d); | 1118 int32_t equalPos = fileLine.indexOf(0x3d); |
1099 if(equalPos < 0) { | 1119 if(equalPos < 0) { |
1100 if(fileLine.compare(start, 7, UNICODE_STRING("reorder", 7)) == 0) { | 1120 if(fileLine.compare(start, 7, UNICODE_STRING("reorder", 7)) == 0) { |
1101 parseAndSetReorderCodes(start + 7, errorCode); | 1121 parseAndSetReorderCodes(start + 7, errorCode); |
1102 return; | 1122 return; |
1103 } | 1123 } |
1104 errln("missing '=' on line %d", (int)fileLineNumber); | 1124 errln("missing '=' on line %d", (int)fileLineNumber); |
1105 infoln(fileLine); | 1125 infoln(fileLine); |
1106 errorCode.set(U_PARSE_ERROR); | 1126 errorCode.set(U_PARSE_ERROR); |
(...skipping 11 matching lines...) Expand all Loading... |
1118 } else if(valueString == UNICODE_STRING("symbol", 6)) { | 1138 } else if(valueString == UNICODE_STRING("symbol", 6)) { |
1119 max = UCOL_REORDER_CODE_SYMBOL; | 1139 max = UCOL_REORDER_CODE_SYMBOL; |
1120 } else if(valueString == UNICODE_STRING("currency", 8)) { | 1140 } else if(valueString == UNICODE_STRING("currency", 8)) { |
1121 max = UCOL_REORDER_CODE_CURRENCY; | 1141 max = UCOL_REORDER_CODE_CURRENCY; |
1122 } else { | 1142 } else { |
1123 errln("invalid attribute value name on line %d", (int)fileLineNumber
); | 1143 errln("invalid attribute value name on line %d", (int)fileLineNumber
); |
1124 infoln(fileLine); | 1144 infoln(fileLine); |
1125 errorCode.set(U_PARSE_ERROR); | 1145 errorCode.set(U_PARSE_ERROR); |
1126 return; | 1146 return; |
1127 } | 1147 } |
1128 coll->setMaxVariable(max, errorCode); | 1148 if(coll != NULL) { |
1129 if(errorCode.isFailure()) { | 1149 coll->setMaxVariable(max, errorCode); |
1130 errln("setMaxVariable() failed on line %d: %s", | 1150 if(errorCode.isFailure()) { |
1131 (int)fileLineNumber, errorCode.errorName()); | 1151 errln("setMaxVariable() failed on line %d: %s", |
1132 infoln(fileLine); | 1152 (int)fileLineNumber, errorCode.errorName()); |
1133 return; | 1153 infoln(fileLine); |
| 1154 return; |
| 1155 } |
1134 } | 1156 } |
1135 fileLine.remove(); | 1157 fileLine.remove(); |
1136 return; | 1158 return; |
1137 } | 1159 } |
1138 | 1160 |
1139 UColAttribute attr; | 1161 UColAttribute attr; |
1140 for(int32_t i = 0;; ++i) { | 1162 for(int32_t i = 0;; ++i) { |
1141 if(i == UPRV_LENGTHOF(attributes)) { | 1163 if(i == UPRV_LENGTHOF(attributes)) { |
1142 errln("invalid attribute name on line %d", (int)fileLineNumber); | 1164 errln("invalid attribute name on line %d", (int)fileLineNumber); |
1143 infoln(fileLine); | 1165 infoln(fileLine); |
(...skipping 13 matching lines...) Expand all Loading... |
1157 infoln(fileLine); | 1179 infoln(fileLine); |
1158 errorCode.set(U_PARSE_ERROR); | 1180 errorCode.set(U_PARSE_ERROR); |
1159 return; | 1181 return; |
1160 } | 1182 } |
1161 if(valueString == UnicodeString(attributeValues[i].name, -1, US_INV)) { | 1183 if(valueString == UnicodeString(attributeValues[i].name, -1, US_INV)) { |
1162 value = attributeValues[i].value; | 1184 value = attributeValues[i].value; |
1163 break; | 1185 break; |
1164 } | 1186 } |
1165 } | 1187 } |
1166 | 1188 |
1167 coll->setAttribute(attr, value, errorCode); | 1189 if(coll != NULL) { |
1168 if(errorCode.isFailure()) { | 1190 coll->setAttribute(attr, value, errorCode); |
1169 errln("illegal attribute=value combination on line %d: %s", | 1191 if(errorCode.isFailure()) { |
1170 (int)fileLineNumber, errorCode.errorName()); | 1192 errln("illegal attribute=value combination on line %d: %s", |
1171 infoln(fileLine); | 1193 (int)fileLineNumber, errorCode.errorName()); |
1172 return; | 1194 infoln(fileLine); |
| 1195 return; |
| 1196 } |
1173 } | 1197 } |
1174 fileLine.remove(); | 1198 fileLine.remove(); |
1175 } | 1199 } |
1176 | 1200 |
1177 void CollationTest::parseAndSetReorderCodes(int32_t start, IcuTestErrorCode &err
orCode) { | 1201 void CollationTest::parseAndSetReorderCodes(int32_t start, IcuTestErrorCode &err
orCode) { |
1178 UVector32 reorderCodes(errorCode); | 1202 UVector32 reorderCodes(errorCode); |
1179 while(start < fileLine.length()) { | 1203 while(start < fileLine.length()) { |
1180 start = skipSpaces(start); | 1204 start = skipSpaces(start); |
1181 int32_t limit = start; | 1205 int32_t limit = start; |
1182 while(limit < fileLine.length() && !isSpace(fileLine[limit])) { ++limit;
} | 1206 while(limit < fileLine.length() && !isSpace(fileLine[limit])) { ++limit;
} |
1183 CharString name; | 1207 CharString name; |
1184 name.appendInvariantChars(fileLine.tempSubStringBetween(start, limit), e
rrorCode); | 1208 name.appendInvariantChars(fileLine.tempSubStringBetween(start, limit), e
rrorCode); |
1185 int32_t code = CollationRuleParser::getReorderCode(name.data()); | 1209 int32_t code = CollationRuleParser::getReorderCode(name.data()); |
1186 if(code < 0) { | 1210 if(code < 0) { |
1187 if(uprv_stricmp(name.data(), "default") == 0) { | 1211 if(uprv_stricmp(name.data(), "default") == 0) { |
1188 code = UCOL_REORDER_CODE_DEFAULT; // -1 | 1212 code = UCOL_REORDER_CODE_DEFAULT; // -1 |
1189 } else { | 1213 } else { |
1190 errln("invalid reorder code '%s' on line %d", name.data(), (int)
fileLineNumber); | 1214 errln("invalid reorder code '%s' on line %d", name.data(), (int)
fileLineNumber); |
1191 infoln(fileLine); | 1215 infoln(fileLine); |
1192 errorCode.set(U_PARSE_ERROR); | 1216 errorCode.set(U_PARSE_ERROR); |
1193 return; | 1217 return; |
1194 } | 1218 } |
1195 } | 1219 } |
1196 reorderCodes.addElement(code, errorCode); | 1220 reorderCodes.addElement(code, errorCode); |
1197 start = limit; | 1221 start = limit; |
1198 } | 1222 } |
1199 coll->setReorderCodes(reorderCodes.getBuffer(), reorderCodes.size(), errorCo
de); | 1223 if(coll != NULL) { |
1200 if(errorCode.isFailure()) { | 1224 coll->setReorderCodes(reorderCodes.getBuffer(), reorderCodes.size(), err
orCode); |
1201 errln("setReorderCodes() failed on line %d: %s", (int)fileLineNumber, er
rorCode.errorName()); | 1225 if(errorCode.isFailure()) { |
1202 infoln(fileLine); | 1226 errln("setReorderCodes() failed on line %d: %s", |
1203 return; | 1227 (int)fileLineNumber, errorCode.errorName()); |
| 1228 infoln(fileLine); |
| 1229 return; |
| 1230 } |
1204 } | 1231 } |
1205 fileLine.remove(); | 1232 fileLine.remove(); |
1206 } | 1233 } |
1207 | 1234 |
1208 void CollationTest::buildTailoring(UCHARBUF *f, IcuTestErrorCode &errorCode) { | 1235 void CollationTest::buildTailoring(UCHARBUF *f, IcuTestErrorCode &errorCode) { |
1209 UnicodeString rules; | 1236 UnicodeString rules; |
1210 while(readLine(f, errorCode)) { | 1237 while(readNonEmptyLine(f, errorCode) && !isSectionStarter(fileLine[0])) { |
1211 if(fileLine.isEmpty()) { continue; } | |
1212 if(isSectionStarter(fileLine[0])) { break; } | |
1213 rules.append(fileLine.unescape()); | 1238 rules.append(fileLine.unescape()); |
1214 } | 1239 } |
1215 if(errorCode.isFailure()) { return; } | 1240 if(errorCode.isFailure()) { return; } |
1216 logln(rules); | 1241 logln(rules); |
1217 | 1242 |
1218 UParseError parseError; | 1243 UParseError parseError; |
1219 UnicodeString reason; | 1244 UnicodeString reason; |
1220 delete coll; | 1245 delete coll; |
1221 coll = new RuleBasedCollator(rules, parseError, reason, errorCode); | 1246 coll = new RuleBasedCollator(rules, parseError, reason, errorCode); |
1222 if(coll == NULL) { | 1247 if(coll == NULL) { |
1223 errln("unable to allocate a new collator"); | 1248 errln("unable to allocate a new collator"); |
1224 errorCode.set(U_MEMORY_ALLOCATION_ERROR); | 1249 errorCode.set(U_MEMORY_ALLOCATION_ERROR); |
1225 return; | 1250 return; |
1226 } | 1251 } |
1227 if(errorCode.isFailure()) { | 1252 if(errorCode.isFailure()) { |
1228 dataerrln("RuleBasedCollator(rules) failed - %s", errorCode.errorName())
; | 1253 dataerrln("RuleBasedCollator(rules) failed - %s", errorCode.errorName())
; |
1229 infoln(UnicodeString(" reason: ") + reason); | 1254 infoln(UnicodeString(" reason: ") + reason); |
1230 if(parseError.offset >= 0) { infoln(" rules offset: %d", (int)parseErro
r.offset); } | 1255 if(parseError.offset >= 0) { infoln(" rules offset: %d", (int)parseErro
r.offset); } |
1231 if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) { | 1256 if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) { |
1232 infoln(UnicodeString(" snippet: ...") + | 1257 infoln(UnicodeString(" snippet: ...") + |
1233 parseError.preContext + "(!)" + parseError.postContext + "..."); | 1258 parseError.preContext + "(!)" + parseError.postContext + "..."); |
1234 } | 1259 } |
| 1260 delete coll; |
| 1261 coll = NULL; |
| 1262 errorCode.reset(); |
1235 } else { | 1263 } else { |
1236 assertEquals("no error reason when RuleBasedCollator(rules) succeeds", | 1264 assertEquals("no error reason when RuleBasedCollator(rules) succeeds", |
1237 UnicodeString(), reason); | 1265 UnicodeString(), reason); |
1238 } | 1266 } |
1239 } | 1267 } |
1240 | 1268 |
1241 void CollationTest::setRootCollator(IcuTestErrorCode &errorCode) { | 1269 void CollationTest::setRootCollator(IcuTestErrorCode &errorCode) { |
1242 if(errorCode.isFailure()) { return; } | 1270 if(errorCode.isFailure()) { return; } |
1243 delete coll; | 1271 delete coll; |
1244 coll = Collator::createInstance(Locale::getRoot(), errorCode); | 1272 coll = Collator::createInstance(Locale::getRoot(), errorCode); |
1245 if(errorCode.isFailure()) { | 1273 if(errorCode.isFailure()) { |
1246 dataerrln("unable to create a root collator"); | 1274 dataerrln("unable to create a root collator"); |
1247 return; | 1275 return; |
1248 } | 1276 } |
1249 } | 1277 } |
1250 | 1278 |
1251 void CollationTest::setLocaleCollator(IcuTestErrorCode &errorCode) { | 1279 void CollationTest::setLocaleCollator(IcuTestErrorCode &errorCode) { |
1252 if(errorCode.isFailure()) { return; } | 1280 if(errorCode.isFailure()) { return; } |
| 1281 delete coll; |
| 1282 coll = NULL; |
1253 int32_t at = fileLine.indexOf((UChar)0x40, 9); // @ is not invariant | 1283 int32_t at = fileLine.indexOf((UChar)0x40, 9); // @ is not invariant |
1254 if(at >= 0) { | 1284 if(at >= 0) { |
1255 fileLine.setCharAt(at, (UChar)0x2a); // * | 1285 fileLine.setCharAt(at, (UChar)0x2a); // * |
1256 } | 1286 } |
1257 CharString localeID; | 1287 CharString localeID; |
1258 localeID.appendInvariantChars(fileLine.tempSubString(9), errorCode); | 1288 localeID.appendInvariantChars(fileLine.tempSubString(9), errorCode); |
1259 if(at >= 0) { | 1289 if(at >= 0) { |
1260 localeID.data()[at - 9] = '@'; | 1290 localeID.data()[at - 9] = '@'; |
1261 } | 1291 } |
1262 Locale locale(localeID.data()); | 1292 Locale locale(localeID.data()); |
1263 if(fileLine.length() == 9 || errorCode.isFailure() || locale.isBogus()) { | 1293 if(fileLine.length() == 9 || errorCode.isFailure() || locale.isBogus()) { |
1264 errln("invalid language tag on line %d", (int)fileLineNumber); | 1294 errln("invalid language tag on line %d", (int)fileLineNumber); |
1265 infoln(fileLine); | 1295 infoln(fileLine); |
1266 if(errorCode.isSuccess()) { errorCode.set(U_PARSE_ERROR); } | 1296 if(errorCode.isSuccess()) { errorCode.set(U_PARSE_ERROR); } |
1267 return; | 1297 return; |
1268 } | 1298 } |
1269 | 1299 |
1270 logln("creating a collator for locale ID %s", locale.getName()); | 1300 logln("creating a collator for locale ID %s", locale.getName()); |
1271 Collator *newColl = Collator::createInstance(locale, errorCode); | 1301 coll = Collator::createInstance(locale, errorCode); |
1272 if(errorCode.isFailure()) { | 1302 if(errorCode.isFailure()) { |
1273 dataerrln("unable to create a collator for locale %s on line %d", | 1303 dataerrln("unable to create a collator for locale %s on line %d", |
1274 locale.getName(), (int)fileLineNumber); | 1304 locale.getName(), (int)fileLineNumber); |
1275 infoln(fileLine); | 1305 infoln(fileLine); |
1276 return; | 1306 delete coll; |
| 1307 coll = NULL; |
| 1308 errorCode.reset(); |
1277 } | 1309 } |
1278 delete coll; | |
1279 coll = newColl; | |
1280 } | 1310 } |
1281 | 1311 |
1282 UBool CollationTest::needsNormalization(const UnicodeString &s, UErrorCode &erro
rCode) const { | 1312 UBool CollationTest::needsNormalization(const UnicodeString &s, UErrorCode &erro
rCode) const { |
1283 if(U_FAILURE(errorCode) || !fcd->isNormalized(s, errorCode)) { return TRUE;
} | 1313 if(U_FAILURE(errorCode) || !fcd->isNormalized(s, errorCode)) { return TRUE;
} |
1284 // In some sequences with Tibetan composite vowel signs, | 1314 // In some sequences with Tibetan composite vowel signs, |
1285 // even if the string passes the FCD check, | 1315 // even if the string passes the FCD check, |
1286 // those composites must be decomposed. | 1316 // those composites must be decomposed. |
1287 // Check if s contains 0F71 immediately followed by 0F73 or 0F75 or 0F81. | 1317 // Check if s contains 0F71 immediately followed by 0F73 or 0F75 or 0F81. |
1288 int32_t index = 0; | 1318 int32_t index = 0; |
1289 while((index = s.indexOf((UChar)0xf71, index)) >= 0) { | 1319 while((index = s.indexOf((UChar)0xf71, index)) >= 0) { |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1365 } | 1395 } |
1366 if(numLevelSeparators != (numLevels - 1)) { | 1396 if(numLevelSeparators != (numLevels - 1)) { |
1367 infoln(fileTestName); | 1397 infoln(fileTestName); |
1368 errln("Collator(%s).getCollationKey() has %d level separators for %d lev
els", | 1398 errln("Collator(%s).getCollationKey() has %d level separators for %d lev
els", |
1369 norm, (int)numLevelSeparators, (int)numLevels); | 1399 norm, (int)numLevelSeparators, (int)numLevels); |
1370 infoln(line); | 1400 infoln(line); |
1371 infoln(printCollationKey(key)); | 1401 infoln(printCollationKey(key)); |
1372 return FALSE; | 1402 return FALSE; |
1373 } | 1403 } |
1374 | 1404 |
1375 // If s contains U+FFFE, check that merged segments make the same key. | 1405 // Check that internalNextSortKeyPart() makes the same key, with several par
t sizes. |
| 1406 static const int32_t partSizes[] = { 32, 3, 1 }; |
| 1407 for(int32_t psi = 0; psi < UPRV_LENGTHOF(partSizes); ++psi) { |
| 1408 int32_t partSize = partSizes[psi]; |
| 1409 CharString parts; |
| 1410 if(!getSortKeyParts(s, length, parts, 32, errorCode)) { |
| 1411 infoln(fileTestName); |
| 1412 errln("Collator(%s).internalNextSortKeyPart(%d) failed: %s", |
| 1413 norm, (int)partSize, errorCode.errorName()); |
| 1414 infoln(line); |
| 1415 return FALSE; |
| 1416 } |
| 1417 if(keyLength != parts.length() || uprv_memcmp(keyBytes, parts.data(), ke
yLength) != 0) { |
| 1418 infoln(fileTestName); |
| 1419 errln("Collator(%s).getCollationKey() != internalNextSortKeyPart(%d)
", |
| 1420 norm, (int)partSize); |
| 1421 infoln(line); |
| 1422 infoln(printCollationKey(key)); |
| 1423 infoln(printSortKey(reinterpret_cast<uint8_t *>(parts.data()), parts
.length())); |
| 1424 return FALSE; |
| 1425 } |
| 1426 } |
| 1427 return TRUE; |
| 1428 } |
| 1429 |
| 1430 /** |
| 1431 * Changes the key to the merged segments of the U+FFFE-separated substrings of
s. |
| 1432 * Leaves key unchanged if s does not contain U+FFFE. |
| 1433 * @return TRUE if the key was successfully changed |
| 1434 */ |
| 1435 UBool CollationTest::getMergedCollationKey(const UChar *s, int32_t length, |
| 1436 CollationKey &key, IcuTestErrorCode &
errorCode) { |
| 1437 if(errorCode.isFailure()) { return FALSE; } |
1376 LocalMemory<uint8_t> mergedKey; | 1438 LocalMemory<uint8_t> mergedKey; |
1377 int32_t mergedKeyLength = 0; | 1439 int32_t mergedKeyLength = 0; |
1378 int32_t mergedKeyCapacity = 0; | 1440 int32_t mergedKeyCapacity = 0; |
1379 int32_t sLength = (length >= 0) ? length : u_strlen(s); | 1441 int32_t sLength = (length >= 0) ? length : u_strlen(s); |
1380 int32_t segmentStart = 0; | 1442 int32_t segmentStart = 0; |
1381 for(int32_t i = 0;;) { | 1443 for(int32_t i = 0;;) { |
1382 if(i == sLength) { | 1444 if(i == sLength) { |
1383 if(segmentStart == 0) { | 1445 if(segmentStart == 0) { |
1384 // s does not contain any U+FFFE. | 1446 // s does not contain any U+FFFE. |
1385 break; | 1447 return FALSE; |
1386 } | 1448 } |
1387 } else if(s[i] != 0xfffe) { | 1449 } else if(s[i] != 0xfffe) { |
1388 ++i; | 1450 ++i; |
1389 continue; | 1451 continue; |
1390 } | 1452 } |
1391 // Get the sort key for another segment and merge it into mergedKey. | 1453 // Get the sort key for another segment and merge it into mergedKey. |
1392 CollationKey key1(mergedKey.getAlias(), mergedKeyLength); // copies the
bytes | 1454 CollationKey key1(mergedKey.getAlias(), mergedKeyLength); // copies the
bytes |
1393 CollationKey key2; | 1455 CollationKey key2; |
1394 coll->getCollationKey(s + segmentStart, i - segmentStart, key2, errorCod
e); | 1456 coll->getCollationKey(s + segmentStart, i - segmentStart, key2, errorCod
e); |
1395 int32_t key1Length, key2Length; | 1457 int32_t key1Length, key2Length; |
(...skipping 20 matching lines...) Expand all Loading... |
1416 uprv_memcpy(dest, key2Bytes, key2Length); | 1478 uprv_memcpy(dest, key2Bytes, key2Length); |
1417 mergedKeyLength = key2Length; | 1479 mergedKeyLength = key2Length; |
1418 } else { | 1480 } else { |
1419 mergedKeyLength = | 1481 mergedKeyLength = |
1420 ucol_mergeSortkeys(key1Bytes, key1Length, key2Bytes, key2Length, | 1482 ucol_mergeSortkeys(key1Bytes, key1Length, key2Bytes, key2Length, |
1421 dest, mergedKeyCapacity); | 1483 dest, mergedKeyCapacity); |
1422 } | 1484 } |
1423 if(i == sLength) { break; } | 1485 if(i == sLength) { break; } |
1424 segmentStart = ++i; | 1486 segmentStart = ++i; |
1425 } | 1487 } |
1426 if(segmentStart != 0 && | 1488 key = CollationKey(mergedKey.getAlias(), mergedKeyLength); |
1427 (mergedKeyLength != keyLength || | |
1428 uprv_memcmp(mergedKey.getAlias(), keyBytes, keyLength) != 0)) { | |
1429 infoln(fileTestName); | |
1430 errln("Collator(%s).getCollationKey(with U+FFFE) != " | |
1431 "ucol_mergeSortkeys(segments)", | |
1432 norm); | |
1433 infoln(line); | |
1434 infoln(printCollationKey(key)); | |
1435 infoln(printSortKey(mergedKey.getAlias(), mergedKeyLength)); | |
1436 return FALSE; | |
1437 } | |
1438 | |
1439 // Check that internalNextSortKeyPart() makes the same key, with several par
t sizes. | |
1440 static const int32_t partSizes[] = { 32, 3, 1 }; | |
1441 for(int32_t psi = 0; psi < UPRV_LENGTHOF(partSizes); ++psi) { | |
1442 int32_t partSize = partSizes[psi]; | |
1443 CharString parts; | |
1444 if(!getSortKeyParts(s, length, parts, 32, errorCode)) { | |
1445 infoln(fileTestName); | |
1446 errln("Collator(%s).internalNextSortKeyPart(%d) failed: %s", | |
1447 norm, (int)partSize, errorCode.errorName()); | |
1448 infoln(line); | |
1449 return FALSE; | |
1450 } | |
1451 if(keyLength != parts.length() || uprv_memcmp(keyBytes, parts.data(), ke
yLength) != 0) { | |
1452 infoln(fileTestName); | |
1453 errln("Collator(%s).getCollationKey() != internalNextSortKeyPart(%d)
", | |
1454 norm, (int)partSize); | |
1455 infoln(line); | |
1456 infoln(printCollationKey(key)); | |
1457 infoln(printSortKey(reinterpret_cast<uint8_t *>(parts.data()), parts
.length())); | |
1458 return FALSE; | |
1459 } | |
1460 } | |
1461 return TRUE; | 1489 return TRUE; |
1462 } | 1490 } |
1463 | 1491 |
1464 namespace { | 1492 namespace { |
1465 | 1493 |
1466 /** | 1494 /** |
1467 * Replaces unpaired surrogates with U+FFFD. | 1495 * Replaces unpaired surrogates with U+FFFD. |
1468 * Returns s if no replacement was made, otherwise buffer. | 1496 * Returns s if no replacement was made, otherwise buffer. |
1469 */ | 1497 */ |
1470 const UnicodeString &surrogatesToFFFD(const UnicodeString &s, UnicodeString &buf
fer) { | 1498 const UnicodeString &surrogatesToFFFD(const UnicodeString &s, UnicodeString &buf
fer) { |
(...skipping 10 matching lines...) Expand all Loading... |
1481 } | 1509 } |
1482 if(buffer.isEmpty()) { | 1510 if(buffer.isEmpty()) { |
1483 return s; | 1511 return s; |
1484 } | 1512 } |
1485 if(buffer.length() < i) { | 1513 if(buffer.length() < i) { |
1486 buffer.append(s, buffer.length(), i - buffer.length()); | 1514 buffer.append(s, buffer.length(), i - buffer.length()); |
1487 } | 1515 } |
1488 return buffer; | 1516 return buffer; |
1489 } | 1517 } |
1490 | 1518 |
| 1519 int32_t getDifferenceLevel(const CollationKey &prevKey, const CollationKey &key, |
| 1520 UCollationResult order, UBool collHasCaseLevel) { |
| 1521 if(order == UCOL_EQUAL) { |
| 1522 return Collation::NO_LEVEL; |
| 1523 } |
| 1524 int32_t prevKeyLength; |
| 1525 const uint8_t *prevBytes = prevKey.getByteArray(prevKeyLength); |
| 1526 int32_t keyLength; |
| 1527 const uint8_t *bytes = key.getByteArray(keyLength); |
| 1528 int32_t level = Collation::PRIMARY_LEVEL; |
| 1529 for(int32_t i = 0;; ++i) { |
| 1530 uint8_t b = prevBytes[i]; |
| 1531 if(b != bytes[i]) { break; } |
| 1532 if(b == Collation::LEVEL_SEPARATOR_BYTE) { |
| 1533 ++level; |
| 1534 if(level == Collation::CASE_LEVEL && !collHasCaseLevel) { |
| 1535 ++level; |
| 1536 } |
| 1537 } |
| 1538 } |
| 1539 return level; |
| 1540 } |
| 1541 |
1491 } | 1542 } |
1492 | 1543 |
1493 UBool CollationTest::checkCompareTwo(const char *norm, const UnicodeString &prev
FileLine, | 1544 UBool CollationTest::checkCompareTwo(const char *norm, const UnicodeString &prev
FileLine, |
1494 const UnicodeString &prevString, const Unic
odeString &s, | 1545 const UnicodeString &prevString, const Unic
odeString &s, |
1495 UCollationResult expectedOrder, Collation::
Level expectedLevel, | 1546 UCollationResult expectedOrder, Collation::
Level expectedLevel, |
1496 IcuTestErrorCode &errorCode) { | 1547 IcuTestErrorCode &errorCode) { |
1497 if(errorCode.isFailure()) { return FALSE; } | 1548 if(errorCode.isFailure()) { return FALSE; } |
1498 | 1549 |
1499 // Get the sort keys first, for error debug output. | 1550 // Get the sort keys first, for error debug output. |
1500 CollationKey prevKey; | 1551 CollationKey prevKey; |
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1642 if(order != expectedOrder || errorCode.isFailure()) { | 1693 if(order != expectedOrder || errorCode.isFailure()) { |
1643 infoln(fileTestName); | 1694 infoln(fileTestName); |
1644 errln("line %d Collator(%s).getCollationKey(previous, current).compareTo
() wrong order: %d != %d (%s)", | 1695 errln("line %d Collator(%s).getCollationKey(previous, current).compareTo
() wrong order: %d != %d (%s)", |
1645 (int)fileLineNumber, norm, order, expectedOrder, errorCode.errorNa
me()); | 1696 (int)fileLineNumber, norm, order, expectedOrder, errorCode.errorNa
me()); |
1646 infoln(prevFileLine); | 1697 infoln(prevFileLine); |
1647 infoln(fileLine); | 1698 infoln(fileLine); |
1648 infoln(printCollationKey(prevKey)); | 1699 infoln(printCollationKey(prevKey)); |
1649 infoln(printCollationKey(key)); | 1700 infoln(printCollationKey(key)); |
1650 return FALSE; | 1701 return FALSE; |
1651 } | 1702 } |
| 1703 UBool collHasCaseLevel = coll->getAttribute(UCOL_CASE_LEVEL, errorCode) == U
COL_ON; |
| 1704 int32_t level = getDifferenceLevel(prevKey, key, order, collHasCaseLevel); |
1652 if(order != UCOL_EQUAL && expectedLevel != Collation::NO_LEVEL) { | 1705 if(order != UCOL_EQUAL && expectedLevel != Collation::NO_LEVEL) { |
1653 int32_t prevKeyLength; | |
1654 const uint8_t *prevBytes = prevKey.getByteArray(prevKeyLength); | |
1655 int32_t keyLength; | |
1656 const uint8_t *bytes = key.getByteArray(keyLength); | |
1657 int32_t level = Collation::PRIMARY_LEVEL; | |
1658 for(int32_t i = 0;; ++i) { | |
1659 uint8_t b = prevBytes[i]; | |
1660 if(b != bytes[i]) { break; } | |
1661 if(b == Collation::LEVEL_SEPARATOR_BYTE) { | |
1662 ++level; | |
1663 if(level == Collation::CASE_LEVEL && | |
1664 coll->getAttribute(UCOL_CASE_LEVEL, errorCode) == UCOL_O
FF) { | |
1665 ++level; | |
1666 } | |
1667 } | |
1668 } | |
1669 if(level != expectedLevel) { | 1706 if(level != expectedLevel) { |
1670 infoln(fileTestName); | 1707 infoln(fileTestName); |
1671 errln("line %d Collator(%s).getCollationKey(previous, current).compa
reTo()=%d wrong level: %d != %d", | 1708 errln("line %d Collator(%s).getCollationKey(previous, current).compa
reTo()=%d wrong level: %d != %d", |
1672 (int)fileLineNumber, norm, order, level, expectedLevel); | 1709 (int)fileLineNumber, norm, order, level, expectedLevel); |
1673 infoln(prevFileLine); | 1710 infoln(prevFileLine); |
1674 infoln(fileLine); | 1711 infoln(fileLine); |
1675 infoln(printCollationKey(prevKey)); | 1712 infoln(printCollationKey(prevKey)); |
1676 infoln(printCollationKey(key)); | 1713 infoln(printCollationKey(key)); |
1677 return FALSE; | 1714 return FALSE; |
1678 } | 1715 } |
1679 } | 1716 } |
| 1717 |
| 1718 // If either string contains U+FFFE, then their sort keys must compare the s
ame as |
| 1719 // the merged sort keys of each string's between-FFFE segments. |
| 1720 // |
| 1721 // It is not required that |
| 1722 // sortkey(str1 + "\uFFFE" + str2) == mergeSortkeys(sortkey(str1), sortkey
(str2)) |
| 1723 // only that those two methods yield the same order. |
| 1724 // |
| 1725 // Use bit-wise OR so that getMergedCollationKey() is always called for both
strings. |
| 1726 if((getMergedCollationKey(prevString.getBuffer(), prevString.length(), prevK
ey, errorCode) | |
| 1727 getMergedCollationKey(s.getBuffer(), s.length(), key, errorCode)
) || |
| 1728 errorCode.isFailure()) { |
| 1729 order = prevKey.compareTo(key, errorCode); |
| 1730 if(order != expectedOrder || errorCode.isFailure()) { |
| 1731 infoln(fileTestName); |
| 1732 errln("line %d ucol_mergeSortkeys(Collator(%s).getCollationKey" |
| 1733 "(previous, current segments between U+FFFE)).compareTo() wrong
order: %d != %d (%s)", |
| 1734 (int)fileLineNumber, norm, order, expectedOrder, errorCode.error
Name()); |
| 1735 infoln(prevFileLine); |
| 1736 infoln(fileLine); |
| 1737 infoln(printCollationKey(prevKey)); |
| 1738 infoln(printCollationKey(key)); |
| 1739 return FALSE; |
| 1740 } |
| 1741 int32_t mergedLevel = getDifferenceLevel(prevKey, key, order, collHasCas
eLevel); |
| 1742 if(order != UCOL_EQUAL && expectedLevel != Collation::NO_LEVEL) { |
| 1743 if(mergedLevel != level) { |
| 1744 infoln(fileTestName); |
| 1745 errln("line %d ucol_mergeSortkeys(Collator(%s).getCollationKey" |
| 1746 "(previous, current segments between U+FFFE)).compareTo()=%d
wrong level: %d != %d", |
| 1747 (int)fileLineNumber, norm, order, mergedLevel, level); |
| 1748 infoln(prevFileLine); |
| 1749 infoln(fileLine); |
| 1750 infoln(printCollationKey(prevKey)); |
| 1751 infoln(printCollationKey(key)); |
| 1752 return FALSE; |
| 1753 } |
| 1754 } |
| 1755 } |
1680 return TRUE; | 1756 return TRUE; |
1681 } | 1757 } |
1682 | 1758 |
1683 void CollationTest::checkCompareStrings(UCHARBUF *f, IcuTestErrorCode &errorCode
) { | 1759 void CollationTest::checkCompareStrings(UCHARBUF *f, IcuTestErrorCode &errorCode
) { |
1684 if(errorCode.isFailure()) { return; } | 1760 if(errorCode.isFailure()) { return; } |
1685 UnicodeString prevFileLine = UNICODE_STRING("(none)", 6); | 1761 UnicodeString prevFileLine = UNICODE_STRING("(none)", 6); |
1686 UnicodeString prevString, s; | 1762 UnicodeString prevString, s; |
1687 prevString.getTerminatedBuffer(); // Ensure NUL-termination. | 1763 prevString.getTerminatedBuffer(); // Ensure NUL-termination. |
1688 while(readLine(f, errorCode)) { | 1764 while(readNonEmptyLine(f, errorCode) && !isSectionStarter(fileLine[0])) { |
1689 if(fileLine.isEmpty()) { continue; } | 1765 // Parse the line even if it will be ignored (when we do not have a Coll
ator) |
1690 if(isSectionStarter(fileLine[0])) { break; } | 1766 // in order to report syntax issues. |
1691 Collation::Level relation = parseRelationAndString(s, errorCode); | 1767 Collation::Level relation = parseRelationAndString(s, errorCode); |
1692 if(errorCode.isFailure()) { | 1768 if(errorCode.isFailure()) { |
1693 errorCode.reset(); | 1769 errorCode.reset(); |
1694 break; | 1770 break; |
1695 } | 1771 } |
| 1772 if(coll == NULL) { |
| 1773 // We were unable to create the Collator but continue with tests. |
| 1774 // Ignore test data for this Collator. |
| 1775 // The next Collator creation might work. |
| 1776 continue; |
| 1777 } |
1696 UCollationResult expectedOrder = (relation == Collation::ZERO_LEVEL) ? U
COL_EQUAL : UCOL_LESS; | 1778 UCollationResult expectedOrder = (relation == Collation::ZERO_LEVEL) ? U
COL_EQUAL : UCOL_LESS; |
1697 Collation::Level expectedLevel = relation; | 1779 Collation::Level expectedLevel = relation; |
1698 s.getTerminatedBuffer(); // Ensure NUL-termination. | 1780 s.getTerminatedBuffer(); // Ensure NUL-termination. |
1699 UBool isOk = TRUE; | 1781 UBool isOk = TRUE; |
1700 if(!needsNormalization(prevString, errorCode) && !needsNormalization(s,
errorCode)) { | 1782 if(!needsNormalization(prevString, errorCode) && !needsNormalization(s,
errorCode)) { |
1701 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, errorCode); | 1783 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, errorCode); |
1702 isOk = checkCompareTwo("normalization=on", prevFileLine, prevString,
s, | 1784 isOk = checkCompareTwo("normalization=on", prevFileLine, prevString,
s, |
1703 expectedOrder, expectedLevel, errorCode); | 1785 expectedOrder, expectedLevel, errorCode); |
1704 } | 1786 } |
1705 if(isOk) { | 1787 if(isOk) { |
(...skipping 28 matching lines...) Expand all Loading... |
1734 return; | 1816 return; |
1735 } | 1817 } |
1736 | 1818 |
1737 CharString path(getSourceTestData(errorCode), errorCode); | 1819 CharString path(getSourceTestData(errorCode), errorCode); |
1738 path.appendPathPart("collationtest.txt", errorCode); | 1820 path.appendPathPart("collationtest.txt", errorCode); |
1739 const char *codePage = "UTF-8"; | 1821 const char *codePage = "UTF-8"; |
1740 LocalUCHARBUFPointer f(ucbuf_open(path.data(), &codePage, TRUE, FALSE, error
Code)); | 1822 LocalUCHARBUFPointer f(ucbuf_open(path.data(), &codePage, TRUE, FALSE, error
Code)); |
1741 if(errorCode.logIfFailureAndReset("ucbuf_open(collationtest.txt)")) { | 1823 if(errorCode.logIfFailureAndReset("ucbuf_open(collationtest.txt)")) { |
1742 return; | 1824 return; |
1743 } | 1825 } |
1744 while(errorCode.isSuccess()) { | 1826 // Read a new line if necessary. |
1745 // Read a new line if necessary. | 1827 // Sub-parsers leave the first line set that they do not handle. |
1746 // Sub-parsers leave the first line set that they do not handle. | 1828 while(errorCode.isSuccess() && (!fileLine.isEmpty() || readNonEmptyLine(f.ge
tAlias(), errorCode))) { |
1747 if(fileLine.isEmpty()) { | |
1748 if(!readLine(f.getAlias(), errorCode)) { break; } | |
1749 continue; | |
1750 } | |
1751 if(!isSectionStarter(fileLine[0])) { | 1829 if(!isSectionStarter(fileLine[0])) { |
1752 errln("syntax error on line %d", (int)fileLineNumber); | 1830 errln("syntax error on line %d", (int)fileLineNumber); |
1753 infoln(fileLine); | 1831 infoln(fileLine); |
1754 return; | 1832 return; |
1755 } | 1833 } |
1756 if(fileLine.startsWith(UNICODE_STRING("** test: ", 9))) { | 1834 if(fileLine.startsWith(UNICODE_STRING("** test: ", 9))) { |
1757 fileTestName = fileLine; | 1835 fileTestName = fileLine; |
1758 logln(fileLine); | 1836 logln(fileLine); |
1759 fileLine.remove(); | 1837 fileLine.remove(); |
1760 } else if(fileLine == UNICODE_STRING("@ root", 6)) { | 1838 } else if(fileLine == UNICODE_STRING("@ root", 6)) { |
(...skipping 10 matching lines...) Expand all Loading... |
1771 checkCompareStrings(f.getAlias(), errorCode); | 1849 checkCompareStrings(f.getAlias(), errorCode); |
1772 } else { | 1850 } else { |
1773 errln("syntax error on line %d", (int)fileLineNumber); | 1851 errln("syntax error on line %d", (int)fileLineNumber); |
1774 infoln(fileLine); | 1852 infoln(fileLine); |
1775 return; | 1853 return; |
1776 } | 1854 } |
1777 } | 1855 } |
1778 } | 1856 } |
1779 | 1857 |
1780 #endif // !UCONFIG_NO_COLLATION | 1858 #endif // !UCONFIG_NO_COLLATION |
OLD | NEW |