Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(32)

Unified Diff: source/test/intltest/collationtest.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/test/intltest/canittst.cpp ('k') | source/test/intltest/compactdecimalformattest.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/test/intltest/collationtest.cpp
diff --git a/source/test/intltest/collationtest.cpp b/source/test/intltest/collationtest.cpp
index d80940720007ef09e27e015a896dd2e80ed6bebe..1248e1ce44335a1ecc3e4d32c721873caba63bd6 100644
--- a/source/test/intltest/collationtest.cpp
+++ b/source/test/intltest/collationtest.cpp
@@ -1,6 +1,6 @@
/*
*******************************************************************************
-* Copyright (C) 2012-2014, International Business Machines
+* Copyright (C) 2012-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* collationtest.cpp
@@ -48,9 +48,6 @@
#include "uvectr64.h"
#include "writesrc.h"
-// TODO: Move to ucbuf.h
-U_DEFINE_LOCAL_OPEN_POINTER(LocalUCHARBUFPointer, UCHARBUF, ucbuf_close);
-
class CodePointIterator;
// TODO: try to share code with IntlTestCollator; for example, prettify(CollationKey)
@@ -97,7 +94,7 @@ private:
return i;
}
- UBool readLine(UCHARBUF *f, IcuTestErrorCode &errorCode);
+ UBool readNonEmptyLine(UCHARBUF *f, IcuTestErrorCode &errorCode);
void parseString(int32_t &start, UnicodeString &prefix, UnicodeString &s, UErrorCode &errorCode);
Collation::Level parseRelationAndString(UnicodeString &s, IcuTestErrorCode &errorCode);
void parseAndSetAttribute(IcuTestErrorCode &errorCode);
@@ -114,6 +111,8 @@ private:
UBool getCollationKey(const char *norm, const UnicodeString &line,
const UChar *s, int32_t length,
CollationKey &key, IcuTestErrorCode &errorCode);
+ UBool getMergedCollationKey(const UChar *s, int32_t length,
+ CollationKey &key, IcuTestErrorCode &errorCode);
UBool checkCompareTwo(const char *norm, const UnicodeString &prevFileLine,
const UnicodeString &prevString, const UnicodeString &s,
UCollationResult expectedOrder, Collation::Level expectedLevel,
@@ -172,11 +171,9 @@ void CollationTest::TestMinMax() {
return;
}
int64_t ce = ces.elementAti(0);
- int64_t expected =
- ((int64_t)Collation::MERGE_SEPARATOR_PRIMARY << 32) |
- Collation::MERGE_SEPARATOR_LOWER32;
+ int64_t expected = Collation::makeCE(Collation::MERGE_SEPARATOR_PRIMARY);
if(ce != expected) {
- errln("CE(U+fffe)=%04lx != 02.02.02", (long)ce);
+ errln("CE(U+fffe)=%04lx != 02..", (long)ce);
}
ce = ces.elementAti(1);
@@ -190,7 +187,7 @@ void CollationTest::TestImplicits() {
IcuTestErrorCode errorCode(*this, "TestImplicits");
const CollationData *cd = CollationRoot::getData(errorCode);
- if(errorCode.logDataIfFailureAndReset("CollationRoot::getBaseData()")) {
+ if(errorCode.logDataIfFailureAndReset("CollationRoot::getData()")) {
return;
}
@@ -617,11 +614,8 @@ UBool isValidCE(const CollationRootElements &re, const CollationData &data,
}
// Minimum & maximum lead bytes.
if((p1 != 0 && p1 <= Collation::MERGE_SEPARATOR_BYTE) ||
- (s1 != 0 && s1 <= Collation::MERGE_SEPARATOR_BYTE) ||
- (t1 != 0 && t1 <= Collation::MERGE_SEPARATOR_BYTE)) {
- return FALSE;
- }
- if(t1 != 0 && t1 > 0x3f) {
+ s1 == Collation::LEVEL_SEPARATOR_BYTE ||
+ t1 == Collation::LEVEL_SEPARATOR_BYTE || t1 > 0x3f) {
return FALSE;
}
if(c > 2) {
@@ -726,7 +720,26 @@ public:
// Simple primary CE.
++index;
pri = p;
- secTer = Collation::COMMON_SEC_AND_TER_CE;
+ // Does this have an explicit below-common sec/ter unit,
+ // or does it imply a common one?
+ if(index == length) {
+ secTer = Collation::COMMON_SEC_AND_TER_CE;
+ } else {
+ secTer = elements[index];
+ if((secTer & CollationRootElements::SEC_TER_DELTA_FLAG) == 0) {
+ // No sec/ter delta.
+ secTer = Collation::COMMON_SEC_AND_TER_CE;
+ } else {
+ secTer &= ~CollationRootElements::SEC_TER_DELTA_FLAG;
+ if(secTer > Collation::COMMON_SEC_AND_TER_CE) {
+ // Implied sec/ter.
+ secTer = Collation::COMMON_SEC_AND_TER_CE;
+ } else {
+ // Explicit sec/ter below common/common.
+ ++index;
+ }
+ }
+ }
return TRUE;
}
@@ -953,24 +966,29 @@ UnicodeString CollationTest::printCollationKey(const CollationKey &key) {
return printSortKey(p, length);
}
-UBool CollationTest::readLine(UCHARBUF *f, IcuTestErrorCode &errorCode) {
- int32_t lineLength;
- const UChar *line = ucbuf_readline(f, &lineLength, errorCode);
- if(line == NULL || errorCode.isFailure()) {
- fileLine.remove();
- return FALSE;
- }
- ++fileLineNumber;
- // Strip trailing CR/LF, comments, and spaces.
- const UChar *comment = u_memchr(line, 0x23, lineLength); // '#'
- if(comment != NULL) {
- lineLength = (int32_t)(comment - line);
- } else {
- while(lineLength > 0 && isCROrLF(line[lineLength - 1])) { --lineLength; }
+UBool CollationTest::readNonEmptyLine(UCHARBUF *f, IcuTestErrorCode &errorCode) {
+ for(;;) {
+ int32_t lineLength;
+ const UChar *line = ucbuf_readline(f, &lineLength, errorCode);
+ if(line == NULL || errorCode.isFailure()) {
+ fileLine.remove();
+ return FALSE;
+ }
+ ++fileLineNumber;
+ // Strip trailing CR/LF, comments, and spaces.
+ const UChar *comment = u_memchr(line, 0x23, lineLength); // '#'
+ if(comment != NULL) {
+ lineLength = (int32_t)(comment - line);
+ } else {
+ while(lineLength > 0 && isCROrLF(line[lineLength - 1])) { --lineLength; }
+ }
+ while(lineLength > 0 && isSpace(line[lineLength - 1])) { --lineLength; }
+ if(lineLength != 0) {
+ fileLine.setTo(FALSE, line, lineLength);
+ return TRUE;
+ }
+ // Empty line, continue.
}
- while(lineLength > 0 && isSpace(line[lineLength - 1])) { --lineLength; }
- fileLine.setTo(FALSE, line, lineLength);
- return TRUE;
}
void CollationTest::parseString(int32_t &start, UnicodeString &prefix, UnicodeString &s,
@@ -1094,6 +1112,8 @@ static const struct {
};
void CollationTest::parseAndSetAttribute(IcuTestErrorCode &errorCode) {
+ // Parse attributes even if the Collator could not be created,
+ // in order to report syntax errors.
int32_t start = skipSpaces(1);
int32_t equalPos = fileLine.indexOf(0x3d);
if(equalPos < 0) {
@@ -1125,12 +1145,14 @@ void CollationTest::parseAndSetAttribute(IcuTestErrorCode &errorCode) {
errorCode.set(U_PARSE_ERROR);
return;
}
- coll->setMaxVariable(max, errorCode);
- if(errorCode.isFailure()) {
- errln("setMaxVariable() failed on line %d: %s",
- (int)fileLineNumber, errorCode.errorName());
- infoln(fileLine);
- return;
+ if(coll != NULL) {
+ coll->setMaxVariable(max, errorCode);
+ if(errorCode.isFailure()) {
+ errln("setMaxVariable() failed on line %d: %s",
+ (int)fileLineNumber, errorCode.errorName());
+ infoln(fileLine);
+ return;
+ }
}
fileLine.remove();
return;
@@ -1164,12 +1186,14 @@ void CollationTest::parseAndSetAttribute(IcuTestErrorCode &errorCode) {
}
}
- coll->setAttribute(attr, value, errorCode);
- if(errorCode.isFailure()) {
- errln("illegal attribute=value combination on line %d: %s",
- (int)fileLineNumber, errorCode.errorName());
- infoln(fileLine);
- return;
+ if(coll != NULL) {
+ coll->setAttribute(attr, value, errorCode);
+ if(errorCode.isFailure()) {
+ errln("illegal attribute=value combination on line %d: %s",
+ (int)fileLineNumber, errorCode.errorName());
+ infoln(fileLine);
+ return;
+ }
}
fileLine.remove();
}
@@ -1196,20 +1220,21 @@ void CollationTest::parseAndSetReorderCodes(int32_t start, IcuTestErrorCode &err
reorderCodes.addElement(code, errorCode);
start = limit;
}
- coll->setReorderCodes(reorderCodes.getBuffer(), reorderCodes.size(), errorCode);
- if(errorCode.isFailure()) {
- errln("setReorderCodes() failed on line %d: %s", (int)fileLineNumber, errorCode.errorName());
- infoln(fileLine);
- return;
+ if(coll != NULL) {
+ coll->setReorderCodes(reorderCodes.getBuffer(), reorderCodes.size(), errorCode);
+ if(errorCode.isFailure()) {
+ errln("setReorderCodes() failed on line %d: %s",
+ (int)fileLineNumber, errorCode.errorName());
+ infoln(fileLine);
+ return;
+ }
}
fileLine.remove();
}
void CollationTest::buildTailoring(UCHARBUF *f, IcuTestErrorCode &errorCode) {
UnicodeString rules;
- while(readLine(f, errorCode)) {
- if(fileLine.isEmpty()) { continue; }
- if(isSectionStarter(fileLine[0])) { break; }
+ while(readNonEmptyLine(f, errorCode) && !isSectionStarter(fileLine[0])) {
rules.append(fileLine.unescape());
}
if(errorCode.isFailure()) { return; }
@@ -1232,6 +1257,9 @@ void CollationTest::buildTailoring(UCHARBUF *f, IcuTestErrorCode &errorCode) {
infoln(UnicodeString(" snippet: ...") +
parseError.preContext + "(!)" + parseError.postContext + "...");
}
+ delete coll;
+ coll = NULL;
+ errorCode.reset();
} else {
assertEquals("no error reason when RuleBasedCollator(rules) succeeds",
UnicodeString(), reason);
@@ -1250,6 +1278,8 @@ void CollationTest::setRootCollator(IcuTestErrorCode &errorCode) {
void CollationTest::setLocaleCollator(IcuTestErrorCode &errorCode) {
if(errorCode.isFailure()) { return; }
+ delete coll;
+ coll = NULL;
int32_t at = fileLine.indexOf((UChar)0x40, 9); // @ is not invariant
if(at >= 0) {
fileLine.setCharAt(at, (UChar)0x2a); // *
@@ -1268,15 +1298,15 @@ void CollationTest::setLocaleCollator(IcuTestErrorCode &errorCode) {
}
logln("creating a collator for locale ID %s", locale.getName());
- Collator *newColl = Collator::createInstance(locale, errorCode);
+ coll = Collator::createInstance(locale, errorCode);
if(errorCode.isFailure()) {
dataerrln("unable to create a collator for locale %s on line %d",
locale.getName(), (int)fileLineNumber);
infoln(fileLine);
- return;
+ delete coll;
+ coll = NULL;
+ errorCode.reset();
}
- delete coll;
- coll = newColl;
}
UBool CollationTest::needsNormalization(const UnicodeString &s, UErrorCode &errorCode) const {
@@ -1372,7 +1402,39 @@ UBool CollationTest::getCollationKey(const char *norm, const UnicodeString &line
return FALSE;
}
- // If s contains U+FFFE, check that merged segments make the same key.
+ // Check that internalNextSortKeyPart() makes the same key, with several part sizes.
+ static const int32_t partSizes[] = { 32, 3, 1 };
+ for(int32_t psi = 0; psi < UPRV_LENGTHOF(partSizes); ++psi) {
+ int32_t partSize = partSizes[psi];
+ CharString parts;
+ if(!getSortKeyParts(s, length, parts, 32, errorCode)) {
+ infoln(fileTestName);
+ errln("Collator(%s).internalNextSortKeyPart(%d) failed: %s",
+ norm, (int)partSize, errorCode.errorName());
+ infoln(line);
+ return FALSE;
+ }
+ if(keyLength != parts.length() || uprv_memcmp(keyBytes, parts.data(), keyLength) != 0) {
+ infoln(fileTestName);
+ errln("Collator(%s).getCollationKey() != internalNextSortKeyPart(%d)",
+ norm, (int)partSize);
+ infoln(line);
+ infoln(printCollationKey(key));
+ infoln(printSortKey(reinterpret_cast<uint8_t *>(parts.data()), parts.length()));
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+/**
+ * Changes the key to the merged segments of the U+FFFE-separated substrings of s.
+ * Leaves key unchanged if s does not contain U+FFFE.
+ * @return TRUE if the key was successfully changed
+ */
+UBool CollationTest::getMergedCollationKey(const UChar *s, int32_t length,
+ CollationKey &key, IcuTestErrorCode &errorCode) {
+ if(errorCode.isFailure()) { return FALSE; }
LocalMemory<uint8_t> mergedKey;
int32_t mergedKeyLength = 0;
int32_t mergedKeyCapacity = 0;
@@ -1382,7 +1444,7 @@ UBool CollationTest::getCollationKey(const char *norm, const UnicodeString &line
if(i == sLength) {
if(segmentStart == 0) {
// s does not contain any U+FFFE.
- break;
+ return FALSE;
}
} else if(s[i] != 0xfffe) {
++i;
@@ -1423,41 +1485,7 @@ UBool CollationTest::getCollationKey(const char *norm, const UnicodeString &line
if(i == sLength) { break; }
segmentStart = ++i;
}
- if(segmentStart != 0 &&
- (mergedKeyLength != keyLength ||
- uprv_memcmp(mergedKey.getAlias(), keyBytes, keyLength) != 0)) {
- infoln(fileTestName);
- errln("Collator(%s).getCollationKey(with U+FFFE) != "
- "ucol_mergeSortkeys(segments)",
- norm);
- infoln(line);
- infoln(printCollationKey(key));
- infoln(printSortKey(mergedKey.getAlias(), mergedKeyLength));
- return FALSE;
- }
-
- // Check that internalNextSortKeyPart() makes the same key, with several part sizes.
- static const int32_t partSizes[] = { 32, 3, 1 };
- for(int32_t psi = 0; psi < UPRV_LENGTHOF(partSizes); ++psi) {
- int32_t partSize = partSizes[psi];
- CharString parts;
- if(!getSortKeyParts(s, length, parts, 32, errorCode)) {
- infoln(fileTestName);
- errln("Collator(%s).internalNextSortKeyPart(%d) failed: %s",
- norm, (int)partSize, errorCode.errorName());
- infoln(line);
- return FALSE;
- }
- if(keyLength != parts.length() || uprv_memcmp(keyBytes, parts.data(), keyLength) != 0) {
- infoln(fileTestName);
- errln("Collator(%s).getCollationKey() != internalNextSortKeyPart(%d)",
- norm, (int)partSize);
- infoln(line);
- infoln(printCollationKey(key));
- infoln(printSortKey(reinterpret_cast<uint8_t *>(parts.data()), parts.length()));
- return FALSE;
- }
- }
+ key = CollationKey(mergedKey.getAlias(), mergedKeyLength);
return TRUE;
}
@@ -1488,6 +1516,29 @@ const UnicodeString &surrogatesToFFFD(const UnicodeString &s, UnicodeString &buf
return buffer;
}
+int32_t getDifferenceLevel(const CollationKey &prevKey, const CollationKey &key,
+ UCollationResult order, UBool collHasCaseLevel) {
+ if(order == UCOL_EQUAL) {
+ return Collation::NO_LEVEL;
+ }
+ int32_t prevKeyLength;
+ const uint8_t *prevBytes = prevKey.getByteArray(prevKeyLength);
+ int32_t keyLength;
+ const uint8_t *bytes = key.getByteArray(keyLength);
+ int32_t level = Collation::PRIMARY_LEVEL;
+ for(int32_t i = 0;; ++i) {
+ uint8_t b = prevBytes[i];
+ if(b != bytes[i]) { break; }
+ if(b == Collation::LEVEL_SEPARATOR_BYTE) {
+ ++level;
+ if(level == Collation::CASE_LEVEL && !collHasCaseLevel) {
+ ++level;
+ }
+ }
+ }
+ return level;
+}
+
}
UBool CollationTest::checkCompareTwo(const char *norm, const UnicodeString &prevFileLine,
@@ -1649,23 +1700,9 @@ UBool CollationTest::checkCompareTwo(const char *norm, const UnicodeString &prev
infoln(printCollationKey(key));
return FALSE;
}
+ UBool collHasCaseLevel = coll->getAttribute(UCOL_CASE_LEVEL, errorCode) == UCOL_ON;
+ int32_t level = getDifferenceLevel(prevKey, key, order, collHasCaseLevel);
if(order != UCOL_EQUAL && expectedLevel != Collation::NO_LEVEL) {
- int32_t prevKeyLength;
- const uint8_t *prevBytes = prevKey.getByteArray(prevKeyLength);
- int32_t keyLength;
- const uint8_t *bytes = key.getByteArray(keyLength);
- int32_t level = Collation::PRIMARY_LEVEL;
- for(int32_t i = 0;; ++i) {
- uint8_t b = prevBytes[i];
- if(b != bytes[i]) { break; }
- if(b == Collation::LEVEL_SEPARATOR_BYTE) {
- ++level;
- if(level == Collation::CASE_LEVEL &&
- coll->getAttribute(UCOL_CASE_LEVEL, errorCode) == UCOL_OFF) {
- ++level;
- }
- }
- }
if(level != expectedLevel) {
infoln(fileTestName);
errln("line %d Collator(%s).getCollationKey(previous, current).compareTo()=%d wrong level: %d != %d",
@@ -1677,6 +1714,45 @@ UBool CollationTest::checkCompareTwo(const char *norm, const UnicodeString &prev
return FALSE;
}
}
+
+ // If either string contains U+FFFE, then their sort keys must compare the same as
+ // the merged sort keys of each string's between-FFFE segments.
+ //
+ // It is not required that
+ // sortkey(str1 + "\uFFFE" + str2) == mergeSortkeys(sortkey(str1), sortkey(str2))
+ // only that those two methods yield the same order.
+ //
+ // Use bit-wise OR so that getMergedCollationKey() is always called for both strings.
+ if((getMergedCollationKey(prevString.getBuffer(), prevString.length(), prevKey, errorCode) |
+ getMergedCollationKey(s.getBuffer(), s.length(), key, errorCode)) ||
+ errorCode.isFailure()) {
+ order = prevKey.compareTo(key, errorCode);
+ if(order != expectedOrder || errorCode.isFailure()) {
+ infoln(fileTestName);
+ errln("line %d ucol_mergeSortkeys(Collator(%s).getCollationKey"
+ "(previous, current segments between U+FFFE)).compareTo() wrong order: %d != %d (%s)",
+ (int)fileLineNumber, norm, order, expectedOrder, errorCode.errorName());
+ infoln(prevFileLine);
+ infoln(fileLine);
+ infoln(printCollationKey(prevKey));
+ infoln(printCollationKey(key));
+ return FALSE;
+ }
+ int32_t mergedLevel = getDifferenceLevel(prevKey, key, order, collHasCaseLevel);
+ if(order != UCOL_EQUAL && expectedLevel != Collation::NO_LEVEL) {
+ if(mergedLevel != level) {
+ infoln(fileTestName);
+ errln("line %d ucol_mergeSortkeys(Collator(%s).getCollationKey"
+ "(previous, current segments between U+FFFE)).compareTo()=%d wrong level: %d != %d",
+ (int)fileLineNumber, norm, order, mergedLevel, level);
+ infoln(prevFileLine);
+ infoln(fileLine);
+ infoln(printCollationKey(prevKey));
+ infoln(printCollationKey(key));
+ return FALSE;
+ }
+ }
+ }
return TRUE;
}
@@ -1685,14 +1761,20 @@ void CollationTest::checkCompareStrings(UCHARBUF *f, IcuTestErrorCode &errorCode
UnicodeString prevFileLine = UNICODE_STRING("(none)", 6);
UnicodeString prevString, s;
prevString.getTerminatedBuffer(); // Ensure NUL-termination.
- while(readLine(f, errorCode)) {
- if(fileLine.isEmpty()) { continue; }
- if(isSectionStarter(fileLine[0])) { break; }
+ while(readNonEmptyLine(f, errorCode) && !isSectionStarter(fileLine[0])) {
+ // Parse the line even if it will be ignored (when we do not have a Collator)
+ // in order to report syntax issues.
Collation::Level relation = parseRelationAndString(s, errorCode);
if(errorCode.isFailure()) {
errorCode.reset();
break;
}
+ if(coll == NULL) {
+ // We were unable to create the Collator but continue with tests.
+ // Ignore test data for this Collator.
+ // The next Collator creation might work.
+ continue;
+ }
UCollationResult expectedOrder = (relation == Collation::ZERO_LEVEL) ? UCOL_EQUAL : UCOL_LESS;
Collation::Level expectedLevel = relation;
s.getTerminatedBuffer(); // Ensure NUL-termination.
@@ -1741,13 +1823,9 @@ void CollationTest::TestDataDriven() {
if(errorCode.logIfFailureAndReset("ucbuf_open(collationtest.txt)")) {
return;
}
- while(errorCode.isSuccess()) {
- // Read a new line if necessary.
- // Sub-parsers leave the first line set that they do not handle.
- if(fileLine.isEmpty()) {
- if(!readLine(f.getAlias(), errorCode)) { break; }
- continue;
- }
+ // Read a new line if necessary.
+ // Sub-parsers leave the first line set that they do not handle.
+ while(errorCode.isSuccess() && (!fileLine.isEmpty() || readNonEmptyLine(f.getAlias(), errorCode))) {
if(!isSectionStarter(fileLine[0])) {
errln("syntax error on line %d", (int)fileLineNumber);
infoln(fileLine);
« no previous file with comments | « source/test/intltest/canittst.cpp ('k') | source/test/intltest/compactdecimalformattest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698