Index: source/i18n/collationweights.cpp |
diff --git a/source/i18n/collationweights.cpp b/source/i18n/collationweights.cpp |
index 17c044f8e92d068d86066f61480b20ed7f17795e..869cbaa6d3f21cd7f64ca3ed1bb4a78c99060f64 100644 |
--- a/source/i18n/collationweights.cpp |
+++ b/source/i18n/collationweights.cpp |
@@ -1,7 +1,7 @@ |
/* |
******************************************************************************* |
* |
-* Copyright (C) 1999-2014, International Business Machines |
+* Copyright (C) 1999-2015, International Business Machines |
* Corporation and others. All Rights Reserved. |
* |
******************************************************************************* |
@@ -126,7 +126,7 @@ CollationWeights::initForSecondary() { |
maxBytes[1] = 0; |
minBytes[2] = 0; |
maxBytes[2] = 0; |
- minBytes[3] = Collation::MERGE_SEPARATOR_BYTE + 1; |
+ minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1; |
maxBytes[3] = 0xff; |
minBytes[4] = 2; |
maxBytes[4] = 0xff; |
@@ -142,7 +142,7 @@ CollationWeights::initForTertiary() { |
maxBytes[2] = 0; |
// We use only 6 bits per byte. |
// The other bits are used for case & quaternary weights. |
- minBytes[3] = Collation::MERGE_SEPARATOR_BYTE + 1; |
+ minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1; |
maxBytes[3] = 0x3f; |
minBytes[4] = 2; |
maxBytes[4] = 0x3f; |
@@ -296,24 +296,49 @@ CollationWeights::getWeightRanges(uint32_t lowerLimit, uint32_t upperLimit) { |
middle.count=(int32_t)((middle.end-middle.start)>>(8*(4-middleLength)))+1; |
} else { |
/* no middle range, eliminate overlaps */ |
- |
- /* reduce or remove the lower ranges that go beyond upperLimit */ |
for(int32_t length=4; length>middleLength; --length) { |
if(lower[length].count>0 && upper[length].count>0) { |
- uint32_t start=upper[length].start; |
- uint32_t end=lower[length].end; |
- |
- if(end>=start || incWeight(end, length)==start) { |
- /* lower and upper ranges collide or are directly adjacent: merge these two and remove all shorter ranges */ |
- start=lower[length].start; |
- end=lower[length].end=upper[length].end; |
- /* |
- * merging directly adjacent ranges needs to subtract the 0/1 gaps in between; |
- * it may result in a range with count>countBytes |
- */ |
+ // Note: The lowerEnd and upperStart weights are versions of |
+ // lowerLimit and upperLimit (which are lowerLimit<upperLimit), |
+ // truncated (still less-or-equal) |
+ // and then with their last bytes changed to the |
+ // maxByte (for lowerEnd) or minByte (for upperStart). |
+ const uint32_t lowerEnd=lower[length].end; |
+ const uint32_t upperStart=upper[length].start; |
+ UBool merged=FALSE; |
+ |
+ if(lowerEnd>upperStart) { |
+ // These two lower and upper ranges collide. |
+ // Since lowerLimit<upperLimit and lowerEnd and upperStart |
+ // are versions with only their last bytes modified |
+ // (and following ones removed/reset to 0), |
+ // lowerEnd>upperStart is only possible |
+ // if the leading bytes are equal |
+ // and lastByte(lowerEnd)>lastByte(upperStart). |
+ U_ASSERT(truncateWeight(lowerEnd, length-1)== |
+ truncateWeight(upperStart, length-1)); |
+ // Intersect these two ranges. |
+ lower[length].end=upper[length].end; |
lower[length].count= |
- (int32_t)(getWeightTrail(end, length)-getWeightTrail(start, length)+1+ |
- countBytes(length)*(getWeightByte(end, length-1)-getWeightByte(start, length-1))); |
+ (int32_t)getWeightTrail(lower[length].end, length)- |
+ (int32_t)getWeightTrail(lower[length].start, length)+1; |
+ // count might be <=0 in which case there is no room, |
+ // and the range-collecting code below will ignore this range. |
+ merged=TRUE; |
+ } else if(lowerEnd==upperStart) { |
+ // Not possible, unless minByte==maxByte which is not allowed. |
+ U_ASSERT(minBytes[length]<maxBytes[length]); |
+ } else /* lowerEnd<upperStart */ { |
+ if(incWeight(lowerEnd, length)==upperStart) { |
+ // Merge adjacent ranges. |
+ lower[length].end=upper[length].end; |
+ lower[length].count+=upper[length].count; // might be >countBytes |
+ merged=TRUE; |
+ } |
+ } |
+ if(merged) { |
+ // Remove all shorter ranges. |
+ // There was no room available for them between the ranges we just merged. |
upper[length].count=0; |
while(--length>middleLength) { |
lower[length].count=upper[length].count=0; |