| Index: source/i18n/collationweights.cpp
|
| diff --git a/source/i18n/collationweights.cpp b/source/i18n/collationweights.cpp
|
| index 17c044f8e92d068d86066f61480b20ed7f17795e..869cbaa6d3f21cd7f64ca3ed1bb4a78c99060f64 100644
|
| --- a/source/i18n/collationweights.cpp
|
| +++ b/source/i18n/collationweights.cpp
|
| @@ -1,7 +1,7 @@
|
| /*
|
| *******************************************************************************
|
| *
|
| -* Copyright (C) 1999-2014, International Business Machines
|
| +* Copyright (C) 1999-2015, International Business Machines
|
| * Corporation and others. All Rights Reserved.
|
| *
|
| *******************************************************************************
|
| @@ -126,7 +126,7 @@ CollationWeights::initForSecondary() {
|
| maxBytes[1] = 0;
|
| minBytes[2] = 0;
|
| maxBytes[2] = 0;
|
| - minBytes[3] = Collation::MERGE_SEPARATOR_BYTE + 1;
|
| + minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;
|
| maxBytes[3] = 0xff;
|
| minBytes[4] = 2;
|
| maxBytes[4] = 0xff;
|
| @@ -142,7 +142,7 @@ CollationWeights::initForTertiary() {
|
| maxBytes[2] = 0;
|
| // We use only 6 bits per byte.
|
| // The other bits are used for case & quaternary weights.
|
| - minBytes[3] = Collation::MERGE_SEPARATOR_BYTE + 1;
|
| + minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;
|
| maxBytes[3] = 0x3f;
|
| minBytes[4] = 2;
|
| maxBytes[4] = 0x3f;
|
| @@ -296,24 +296,49 @@ CollationWeights::getWeightRanges(uint32_t lowerLimit, uint32_t upperLimit) {
|
| middle.count=(int32_t)((middle.end-middle.start)>>(8*(4-middleLength)))+1;
|
| } else {
|
| /* no middle range, eliminate overlaps */
|
| -
|
| - /* reduce or remove the lower ranges that go beyond upperLimit */
|
| for(int32_t length=4; length>middleLength; --length) {
|
| if(lower[length].count>0 && upper[length].count>0) {
|
| - uint32_t start=upper[length].start;
|
| - uint32_t end=lower[length].end;
|
| -
|
| - if(end>=start || incWeight(end, length)==start) {
|
| - /* lower and upper ranges collide or are directly adjacent: merge these two and remove all shorter ranges */
|
| - start=lower[length].start;
|
| - end=lower[length].end=upper[length].end;
|
| - /*
|
| - * merging directly adjacent ranges needs to subtract the 0/1 gaps in between;
|
| - * it may result in a range with count>countBytes
|
| - */
|
| + // Note: The lowerEnd and upperStart weights are versions of
|
| + // lowerLimit and upperLimit (which are lowerLimit<upperLimit),
|
| + // truncated (still less-or-equal)
|
| + // and then with their last bytes changed to the
|
| + // maxByte (for lowerEnd) or minByte (for upperStart).
|
| + const uint32_t lowerEnd=lower[length].end;
|
| + const uint32_t upperStart=upper[length].start;
|
| + UBool merged=FALSE;
|
| +
|
| + if(lowerEnd>upperStart) {
|
| + // These two lower and upper ranges collide.
|
| + // Since lowerLimit<upperLimit and lowerEnd and upperStart
|
| + // are versions with only their last bytes modified
|
| + // (and following ones removed/reset to 0),
|
| + // lowerEnd>upperStart is only possible
|
| + // if the leading bytes are equal
|
| + // and lastByte(lowerEnd)>lastByte(upperStart).
|
| + U_ASSERT(truncateWeight(lowerEnd, length-1)==
|
| + truncateWeight(upperStart, length-1));
|
| + // Intersect these two ranges.
|
| + lower[length].end=upper[length].end;
|
| lower[length].count=
|
| - (int32_t)(getWeightTrail(end, length)-getWeightTrail(start, length)+1+
|
| - countBytes(length)*(getWeightByte(end, length-1)-getWeightByte(start, length-1)));
|
| + (int32_t)getWeightTrail(lower[length].end, length)-
|
| + (int32_t)getWeightTrail(lower[length].start, length)+1;
|
| + // count might be <=0 in which case there is no room,
|
| + // and the range-collecting code below will ignore this range.
|
| + merged=TRUE;
|
| + } else if(lowerEnd==upperStart) {
|
| + // Not possible, unless minByte==maxByte which is not allowed.
|
| + U_ASSERT(minBytes[length]<maxBytes[length]);
|
| + } else /* lowerEnd<upperStart */ {
|
| + if(incWeight(lowerEnd, length)==upperStart) {
|
| + // Merge adjacent ranges.
|
| + lower[length].end=upper[length].end;
|
| + lower[length].count+=upper[length].count; // might be >countBytes
|
| + merged=TRUE;
|
| + }
|
| + }
|
| + if(merged) {
|
| + // Remove all shorter ranges.
|
| + // There was no room available for them between the ranges we just merged.
|
| upper[length].count=0;
|
| while(--length>middleLength) {
|
| lower[length].count=upper[length].count=0;
|
|
|