source/i18n/collationweights.cpp - Issue 1621843002: ICU 56 update step 1

Unified Diff: source/i18n/collationweights.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View side-by-side diff with in-line comments

Index: source/i18n/collationweights.cpp

diff --git a/source/i18n/collationweights.cpp b/source/i18n/collationweights.cpp

index 17c044f8e92d068d86066f61480b20ed7f17795e..869cbaa6d3f21cd7f64ca3ed1bb4a78c99060f64 100644

--- a/source/i18n/collationweights.cpp

+++ b/source/i18n/collationweights.cpp

@@ -1,7 +1,7 @@

*******************************************************************************

@@ -126,7 +126,7 @@ CollationWeights::initForSecondary() {

maxBytes[1] = 0;

minBytes[2] = 0;

maxBytes[2] = 0;

- minBytes[3] = Collation::MERGE_SEPARATOR_BYTE + 1;

+ minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;

maxBytes[3] = 0xff;

minBytes[4] = 2;

maxBytes[4] = 0xff;

@@ -142,7 +142,7 @@ CollationWeights::initForTertiary() {

maxBytes[2] = 0;

// We use only 6 bits per byte.

// The other bits are used for case & quaternary weights.

- minBytes[3] = Collation::MERGE_SEPARATOR_BYTE + 1;

+ minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;

maxBytes[3] = 0x3f;

minBytes[4] = 2;

maxBytes[4] = 0x3f;

@@ -296,24 +296,49 @@ CollationWeights::getWeightRanges(uint32_t lowerLimit, uint32_t upperLimit) {

middle.count=(int32_t)((middle.end-middle.start)>>(8*(4-middleLength)))+1;

} else {

/* no middle range, eliminate overlaps */

- /* reduce or remove the lower ranges that go beyond upperLimit */

for(int32_t length=4; length>middleLength; --length) {

if(lower[length].count>0 && upper[length].count>0) {

- uint32_t start=upper[length].start;

- uint32_t end=lower[length].end;

- if(end>=start || incWeight(end, length)==start) {

- /* lower and upper ranges collide or are directly adjacent: merge these two and remove all shorter ranges */

- start=lower[length].start;

- end=lower[length].end=upper[length].end;

- /*

- * merging directly adjacent ranges needs to subtract the 0/1 gaps in between;

- * it may result in a range with count>countBytes

- */

+ // Note: The lowerEnd and upperStart weights are versions of

+ // lowerLimit and upperLimit (which are lowerLimit<upperLimit),

+ // truncated (still less-or-equal)

+ // and then with their last bytes changed to the

+ // maxByte (for lowerEnd) or minByte (for upperStart).

+ const uint32_t lowerEnd=lower[length].end;

+ const uint32_t upperStart=upper[length].start;

+ UBool merged=FALSE;

+ if(lowerEnd>upperStart) {

+ // These two lower and upper ranges collide.

+ // Since lowerLimit<upperLimit and lowerEnd and upperStart

+ // are versions with only their last bytes modified

+ // (and following ones removed/reset to 0),

+ // lowerEnd>upperStart is only possible

+ // if the leading bytes are equal

+ // and lastByte(lowerEnd)>lastByte(upperStart).

+ U_ASSERT(truncateWeight(lowerEnd, length-1)==

+ truncateWeight(upperStart, length-1));

+ // Intersect these two ranges.

+ lower[length].end=upper[length].end;

lower[length].count=

- (int32_t)(getWeightTrail(end, length)-getWeightTrail(start, length)+1+

- countBytes(length)*(getWeightByte(end, length-1)-getWeightByte(start, length-1)));

+ (int32_t)getWeightTrail(lower[length].end, length)-

+ (int32_t)getWeightTrail(lower[length].start, length)+1;

+ // count might be <=0 in which case there is no room,

+ // and the range-collecting code below will ignore this range.

+ merged=TRUE;

+ } else if(lowerEnd==upperStart) {

+ // Not possible, unless minByte==maxByte which is not allowed.

+ U_ASSERT(minBytes[length]<maxBytes[length]);

+ } else /* lowerEnd<upperStart */ {

+ if(incWeight(lowerEnd, length)==upperStart) {

+ // Merge adjacent ranges.

+ lower[length].end=upper[length].end;

+ lower[length].count+=upper[length].count; // might be >countBytes

+ merged=TRUE;

+ }

+ if(merged) {

+ // Remove all shorter ranges.

+ // There was no room available for them between the ranges we just merged.

upper[length].count=0;

while(--length>middleLength) {

lower[length].count=upper[length].count=0;

« no previous file with comments | « source/i18n/collationtailoring.cpp ('k') | source/i18n/collunsafe.h » ('j') | no next file with comments »