Index: source/i18n/collationbuilder.cpp |
diff --git a/source/i18n/collationbuilder.cpp b/source/i18n/collationbuilder.cpp |
index 37f701ce775cc9a6cf6a8a842d35ed3381700ab6..3465832d5a240b0e5fc6982bf78841e8668a0d72 100644 |
--- a/source/i18n/collationbuilder.cpp |
+++ b/source/i18n/collationbuilder.cpp |
@@ -189,6 +189,13 @@ RuleBasedCollator::internalBuildTailoring(const UnicodeString &rules, |
// CollationBuilder implementation ----------------------------------------- *** |
+// Some compilers don't care if constants are defined in the .cpp file. |
+// MS Visual C++ does not like it, but gcc requires it. clang does not care. |
+#ifndef _MSC_VER |
+const int32_t CollationBuilder::HAS_BEFORE2; |
+const int32_t CollationBuilder::HAS_BEFORE3; |
+#endif |
+ |
CollationBuilder::CollationBuilder(const CollationTailoring *b, UErrorCode &errorCode) |
: nfd(*Normalizer2::getNFDInstance(errorCode)), |
fcd(*Normalizer2Factory::getFCDInstance(errorCode)), |
@@ -313,7 +320,7 @@ CollationBuilder::addReset(int32_t strength, const UnicodeString &str, |
if(U_FAILURE(errorCode)) { return; } |
int64_t node = nodes.elementAti(index); |
- // If the index is for a "weaker" tailored node, |
+ // If the index is for a "weaker" node, |
// then skip backwards over this and further "weaker" nodes. |
while(strengthFromNode(node) > strength) { |
index = previousIndexFromNode(node); |
@@ -360,6 +367,8 @@ CollationBuilder::addReset(int32_t strength, const UnicodeString &str, |
if(strength >= UCOL_TERTIARY) { |
index = findCommonNode(index, UCOL_TERTIARY); |
} |
+ // findCommonNode() stayed on the stronger node or moved to |
+ // an explicit common-weight node of the reset-before strength. |
node = nodes.elementAti(index); |
if(strengthFromNode(node) == strength) { |
// Found a same-strength node with an explicit weight. |
@@ -373,89 +382,45 @@ CollationBuilder::addReset(int32_t strength, const UnicodeString &str, |
} |
return; |
} |
- U_ASSERT(weight16 >= Collation::COMMON_WEIGHT16); |
+ U_ASSERT(weight16 > Collation::BEFORE_WEIGHT16); |
+ // Reset to just before this node. |
+ // Insert the preceding same-level explicit weight if it is not there already. |
+ // Which explicit weight immediately precedes this one? |
+ weight16 = getWeight16Before(index, node, strength); |
+ // Does this preceding weight have a node? |
+ uint32_t previousWeight16; |
int32_t previousIndex = previousIndexFromNode(node); |
- if(weight16 == Collation::COMMON_WEIGHT16) { |
- // Reset to just before this same-strength common-weight node. |
+ for(int32_t i = previousIndex;; i = previousIndexFromNode(node)) { |
+ node = nodes.elementAti(i); |
+ int32_t previousStrength = strengthFromNode(node); |
+ if(previousStrength < strength) { |
+ U_ASSERT(weight16 >= Collation::COMMON_WEIGHT16 || i == previousIndex); |
+ // Either the reset element has an above-common weight and |
+ // the parent node provides the implied common weight, |
+ // or the reset element has a weight<=common in the node |
+ // right after the parent, and we need to insert the preceding weight. |
+ previousWeight16 = Collation::COMMON_WEIGHT16; |
+ break; |
+ } else if(previousStrength == strength && !isTailoredNode(node)) { |
+ previousWeight16 = weight16FromNode(node); |
+ break; |
+ } |
+ // Skip weaker nodes and same-level tailored nodes. |
+ } |
+ if(previousWeight16 == weight16) { |
+ // The preceding weight has a node, |
+ // maybe with following weaker or tailored nodes. |
+ // Reset to the last of them. |
index = previousIndex; |
} else { |
- // A non-common weight is only possible from a root CE. |
- // Find the higher-level weights, which must all be explicit, |
- // and then find the preceding weight for this level. |
- uint32_t previousWeight16 = 0; |
- int32_t previousWeightIndex = -1; |
- int32_t i = index; |
- if(strength == UCOL_SECONDARY) { |
- uint32_t p; |
- do { |
- i = previousIndexFromNode(node); |
- node = nodes.elementAti(i); |
- if(strengthFromNode(node) == UCOL_SECONDARY && !isTailoredNode(node) && |
- previousWeightIndex < 0) { |
- previousWeightIndex = i; |
- previousWeight16 = weight16FromNode(node); |
- } |
- } while(strengthFromNode(node) > UCOL_PRIMARY); |
- U_ASSERT(!isTailoredNode(node)); |
- p = weight32FromNode(node); |
- weight16 = rootElements.getSecondaryBefore(p, weight16); |
- } else { |
- uint32_t p, s; |
- do { |
- i = previousIndexFromNode(node); |
- node = nodes.elementAti(i); |
- if(strengthFromNode(node) == UCOL_TERTIARY && !isTailoredNode(node) && |
- previousWeightIndex < 0) { |
- previousWeightIndex = i; |
- previousWeight16 = weight16FromNode(node); |
- } |
- } while(strengthFromNode(node) > UCOL_SECONDARY); |
- U_ASSERT(!isTailoredNode(node)); |
- if(strengthFromNode(node) == UCOL_SECONDARY) { |
- s = weight16FromNode(node); |
- do { |
- i = previousIndexFromNode(node); |
- node = nodes.elementAti(i); |
- } while(strengthFromNode(node) > UCOL_PRIMARY); |
- U_ASSERT(!isTailoredNode(node)); |
- } else { |
- U_ASSERT(!nodeHasBefore2(node)); |
- s = Collation::COMMON_WEIGHT16; |
- } |
- p = weight32FromNode(node); |
- weight16 = rootElements.getTertiaryBefore(p, s, weight16); |
- U_ASSERT((weight16 & ~Collation::ONLY_TERTIARY_MASK) == 0); |
- } |
- // Find or insert the new explicit weight before the current one. |
- if(previousWeightIndex >= 0 && weight16 == previousWeight16) { |
- // Tailor after the last node between adjacent root nodes. |
- index = previousIndex; |
- } else { |
- node = nodeFromWeight16(weight16) | nodeFromStrength(strength); |
- index = insertNodeBetween(previousIndex, index, node, errorCode); |
- } |
+ // Insert a node with the preceding weight, reset to that. |
+ node = nodeFromWeight16(weight16) | nodeFromStrength(strength); |
+ index = insertNodeBetween(previousIndex, index, node, errorCode); |
} |
} else { |
// Found a stronger node with implied strength-common weight. |
- int64_t hasBefore3 = 0; |
- if(strength == UCOL_SECONDARY) { |
- U_ASSERT(!nodeHasBefore2(node)); |
- // Move the HAS_BEFORE3 flag from the parent node |
- // to the new secondary common node. |
- hasBefore3 = node & HAS_BEFORE3; |
- node = (node & ~(int64_t)HAS_BEFORE3) | HAS_BEFORE2; |
- } else { |
- U_ASSERT(!nodeHasBefore3(node)); |
- node |= HAS_BEFORE3; |
- } |
- nodes.setElementAt(node, index); |
- int32_t nextIndex = nextIndexFromNode(node); |
- // Insert default nodes with weights 02 and 05, reset to the 02 node. |
- node = nodeFromWeight16(BEFORE_WEIGHT16) | nodeFromStrength(strength); |
- index = insertNodeBetween(index, nextIndex, node, errorCode); |
- node = nodeFromWeight16(Collation::COMMON_WEIGHT16) | hasBefore3 | |
- nodeFromStrength(strength); |
- insertNodeBetween(index, nextIndex, node, errorCode); |
+ uint32_t weight16 = getWeight16Before(index, node, strength); |
+ index = findOrInsertWeakNode(index, weight16, strength, errorCode); |
} |
// Strength of the temporary CE = strength of its reset position. |
// Code above raises an error if the before-strength is stronger. |
@@ -468,6 +433,49 @@ CollationBuilder::addReset(int32_t strength, const UnicodeString &str, |
ces[cesLength - 1] = tempCEFromIndexAndStrength(index, strength); |
} |
+uint32_t |
+CollationBuilder::getWeight16Before(int32_t index, int64_t node, int32_t level) { |
+ U_ASSERT(strengthFromNode(node) < level || !isTailoredNode(node)); |
+ // Collect the root CE weights if this node is for a root CE. |
+ // If it is not, then return the low non-primary boundary for a tailored CE. |
+ uint32_t t; |
+ if(strengthFromNode(node) == UCOL_TERTIARY) { |
+ t = weight16FromNode(node); |
+ } else { |
+ t = Collation::COMMON_WEIGHT16; // Stronger node with implied common weight. |
+ } |
+ while(strengthFromNode(node) > UCOL_SECONDARY) { |
+ index = previousIndexFromNode(node); |
+ node = nodes.elementAti(index); |
+ } |
+ if(isTailoredNode(node)) { |
+ return Collation::BEFORE_WEIGHT16; |
+ } |
+ uint32_t s; |
+ if(strengthFromNode(node) == UCOL_SECONDARY) { |
+ s = weight16FromNode(node); |
+ } else { |
+ s = Collation::COMMON_WEIGHT16; // Stronger node with implied common weight. |
+ } |
+ while(strengthFromNode(node) > UCOL_PRIMARY) { |
+ index = previousIndexFromNode(node); |
+ node = nodes.elementAti(index); |
+ } |
+ if(isTailoredNode(node)) { |
+ return Collation::BEFORE_WEIGHT16; |
+ } |
+ // [p, s, t] is a root CE. Return the preceding weight for the requested level. |
+ uint32_t p = weight32FromNode(node); |
+ uint32_t weight16; |
+ if(level == UCOL_SECONDARY) { |
+ weight16 = rootElements.getSecondaryBefore(p, s); |
+ } else { |
+ weight16 = rootElements.getTertiaryBefore(p, s, t); |
+ U_ASSERT((weight16 & ~Collation::ONLY_TERTIARY_MASK) == 0); |
+ } |
+ return weight16; |
+} |
+ |
int64_t |
CollationBuilder::getSpecialResetPosition(const UnicodeString &str, |
const char *&parserErrorReason, UErrorCode &errorCode) { |
@@ -793,7 +801,7 @@ CollationBuilder::findOrInsertNodeForRootCE(int64_t ce, int32_t strength, UError |
// down to the requested level/strength. |
// Root CEs must have common=zero quaternary weights (for which we never insert any nodes). |
U_ASSERT((ce & 0xc0) == 0); |
- int32_t index = findOrInsertNodeForPrimary((uint32_t)(ce >> 32) , errorCode); |
+ int32_t index = findOrInsertNodeForPrimary((uint32_t)(ce >> 32), errorCode); |
if(strength >= UCOL_SECONDARY) { |
uint32_t lower32 = (uint32_t)ce; |
index = findOrInsertWeakNode(index, lower32 >> 16, UCOL_SECONDARY, errorCode); |
@@ -863,17 +871,44 @@ int32_t |
CollationBuilder::findOrInsertWeakNode(int32_t index, uint32_t weight16, int32_t level, UErrorCode &errorCode) { |
if(U_FAILURE(errorCode)) { return 0; } |
U_ASSERT(0 <= index && index < nodes.size()); |
+ U_ASSERT(UCOL_SECONDARY <= level && level <= UCOL_TERTIARY); |
- U_ASSERT(weight16 == 0 || weight16 >= Collation::COMMON_WEIGHT16); |
- // Only reset-before inserts common weights. |
if(weight16 == Collation::COMMON_WEIGHT16) { |
return findCommonNode(index, level); |
} |
+ |
+ // If this will be the first below-common weight for the parent node, |
+ // then we will also need to insert a common weight after it. |
+ int64_t node = nodes.elementAti(index); |
+ U_ASSERT(strengthFromNode(node) < level); // parent node is stronger |
+ if(weight16 != 0 && weight16 < Collation::COMMON_WEIGHT16) { |
+ int32_t hasThisLevelBefore = level == UCOL_SECONDARY ? HAS_BEFORE2 : HAS_BEFORE3; |
+ if((node & hasThisLevelBefore) == 0) { |
+ // The parent node has an implied level-common weight. |
+ int64_t commonNode = |
+ nodeFromWeight16(Collation::COMMON_WEIGHT16) | nodeFromStrength(level); |
+ if(level == UCOL_SECONDARY) { |
+ // Move the HAS_BEFORE3 flag from the parent node |
+ // to the new secondary common node. |
+ commonNode |= node & HAS_BEFORE3; |
+ node &= ~(int64_t)HAS_BEFORE3; |
+ } |
+ nodes.setElementAt(node | hasThisLevelBefore, index); |
+ // Insert below-common-weight node. |
+ int32_t nextIndex = nextIndexFromNode(node); |
+ node = nodeFromWeight16(weight16) | nodeFromStrength(level); |
+ index = insertNodeBetween(index, nextIndex, node, errorCode); |
+ // Insert common-weight node. |
+ insertNodeBetween(index, nextIndex, commonNode, errorCode); |
+ // Return index of below-common-weight node. |
+ return index; |
+ } |
+ } |
+ |
// Find the root CE's weight for this level. |
// Postpone insertion if not found: |
// Insert the new root node before the next stronger node, |
// or before the next root node with the same strength and a larger weight. |
- int64_t node = nodes.elementAti(index); |
int32_t nextIndex; |
while((nextIndex = nextIndexFromNode(node)) != 0) { |
node = nodes.elementAti(nextIndex); |
@@ -961,13 +996,14 @@ CollationBuilder::findCommonNode(int32_t index, int32_t strength) const { |
index = nextIndexFromNode(node); |
node = nodes.elementAti(index); |
U_ASSERT(!isTailoredNode(node) && strengthFromNode(node) == strength && |
- weight16FromNode(node) == BEFORE_WEIGHT16); |
+ weight16FromNode(node) < Collation::COMMON_WEIGHT16); |
// Skip to the explicit common node. |
do { |
index = nextIndexFromNode(node); |
node = nodes.elementAti(index); |
U_ASSERT(strengthFromNode(node) >= strength); |
- } while(isTailoredNode(node) || strengthFromNode(node) > strength); |
+ } while(isTailoredNode(node) || strengthFromNode(node) > strength || |
+ weight16FromNode(node) < Collation::COMMON_WEIGHT16); |
U_ASSERT(weight16FromNode(node) == Collation::COMMON_WEIGHT16); |
return index; |
} |
@@ -1351,6 +1387,9 @@ CollationBuilder::makeTailoredCEs(UErrorCode &errorCode) { |
CollationWeights primaries, secondaries, tertiaries; |
int64_t *nodesArray = nodes.getBuffer(); |
+#ifdef DEBUG_COLLATION_BUILDER |
+ puts("\nCollationBuilder::makeTailoredCEs()"); |
+#endif |
for(int32_t rpi = 0; rpi < rootPrimaryIndexes.size(); ++rpi) { |
int32_t i = rootPrimaryIndexes.elementAti(rpi); |
@@ -1398,11 +1437,11 @@ CollationBuilder::makeTailoredCEs(UErrorCode &errorCode) { |
// Gap at the beginning of the tertiary CE range. |
t = rootElements.getTertiaryBoundary() - 0x100; |
tLimit = rootElements.getFirstTertiaryCE() & Collation::ONLY_TERTIARY_MASK; |
- } else if(t == BEFORE_WEIGHT16) { |
- tLimit = Collation::COMMON_WEIGHT16; |
} else if(!pIsTailored && !sIsTailored) { |
// p and s are root weights. |
tLimit = rootElements.getTertiaryAfter(pIndex, s, t); |
+ } else if(t == Collation::BEFORE_WEIGHT16) { |
+ tLimit = Collation::COMMON_WEIGHT16; |
} else { |
// [p, s] is tailored. |
U_ASSERT(t == Collation::COMMON_WEIGHT16); |
@@ -1441,11 +1480,11 @@ CollationBuilder::makeTailoredCEs(UErrorCode &errorCode) { |
// Gap at the beginning of the secondary CE range. |
s = rootElements.getSecondaryBoundary() - 0x100; |
sLimit = rootElements.getFirstSecondaryCE() >> 16; |
- } else if(s == BEFORE_WEIGHT16) { |
- sLimit = Collation::COMMON_WEIGHT16; |
} else if(!pIsTailored) { |
// p is a root primary. |
sLimit = rootElements.getSecondaryAfter(pIndex, s); |
+ } else if(s == Collation::BEFORE_WEIGHT16) { |
+ sLimit = Collation::COMMON_WEIGHT16; |
} else { |
// p is a tailored primary. |
U_ASSERT(s == Collation::COMMON_WEIGHT16); |
@@ -1460,6 +1499,11 @@ CollationBuilder::makeTailoredCEs(UErrorCode &errorCode) { |
if(!secondaries.allocWeights(s, sLimit, sCount)) { |
errorCode = U_BUFFER_OVERFLOW_ERROR; |
errorReason = "secondary tailoring gap too small"; |
+#ifdef DEBUG_COLLATION_BUILDER |
+ printf("!secondaries.allocWeights(%lx, %lx, sCount=%ld)\n", |
+ (long)alignWeightRight(s), (long)alignWeightRight(sLimit), |
+ (long)alignWeightRight(sCount)); |
+#endif |
return; |
} |
sIsTailored = TRUE; |
@@ -1563,9 +1607,8 @@ CEFinalizer::~CEFinalizer() {} |
void |
CollationBuilder::finalizeCEs(UErrorCode &errorCode) { |
if(U_FAILURE(errorCode)) { return; } |
- LocalPointer<CollationDataBuilder> newBuilder(new CollationDataBuilder(errorCode)); |
- if(newBuilder.isNull()) { |
- errorCode = U_MEMORY_ALLOCATION_ERROR; |
+ LocalPointer<CollationDataBuilder> newBuilder(new CollationDataBuilder(errorCode), errorCode); |
+ if(U_FAILURE(errorCode)) { |
return; |
} |
newBuilder->initForTailoring(baseData, errorCode); |