Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(119)

Unified Diff: source/i18n/collationweights.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/i18n/collationtailoring.cpp ('k') | source/i18n/collunsafe.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/i18n/collationweights.cpp
diff --git a/source/i18n/collationweights.cpp b/source/i18n/collationweights.cpp
index 17c044f8e92d068d86066f61480b20ed7f17795e..869cbaa6d3f21cd7f64ca3ed1bb4a78c99060f64 100644
--- a/source/i18n/collationweights.cpp
+++ b/source/i18n/collationweights.cpp
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 1999-2014, International Business Machines
+* Copyright (C) 1999-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -126,7 +126,7 @@ CollationWeights::initForSecondary() {
maxBytes[1] = 0;
minBytes[2] = 0;
maxBytes[2] = 0;
- minBytes[3] = Collation::MERGE_SEPARATOR_BYTE + 1;
+ minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;
maxBytes[3] = 0xff;
minBytes[4] = 2;
maxBytes[4] = 0xff;
@@ -142,7 +142,7 @@ CollationWeights::initForTertiary() {
maxBytes[2] = 0;
// We use only 6 bits per byte.
// The other bits are used for case & quaternary weights.
- minBytes[3] = Collation::MERGE_SEPARATOR_BYTE + 1;
+ minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;
maxBytes[3] = 0x3f;
minBytes[4] = 2;
maxBytes[4] = 0x3f;
@@ -296,24 +296,49 @@ CollationWeights::getWeightRanges(uint32_t lowerLimit, uint32_t upperLimit) {
middle.count=(int32_t)((middle.end-middle.start)>>(8*(4-middleLength)))+1;
} else {
/* no middle range, eliminate overlaps */
-
- /* reduce or remove the lower ranges that go beyond upperLimit */
for(int32_t length=4; length>middleLength; --length) {
if(lower[length].count>0 && upper[length].count>0) {
- uint32_t start=upper[length].start;
- uint32_t end=lower[length].end;
-
- if(end>=start || incWeight(end, length)==start) {
- /* lower and upper ranges collide or are directly adjacent: merge these two and remove all shorter ranges */
- start=lower[length].start;
- end=lower[length].end=upper[length].end;
- /*
- * merging directly adjacent ranges needs to subtract the 0/1 gaps in between;
- * it may result in a range with count>countBytes
- */
+ // Note: The lowerEnd and upperStart weights are versions of
+ // lowerLimit and upperLimit (which are lowerLimit<upperLimit),
+ // truncated (still less-or-equal)
+ // and then with their last bytes changed to the
+ // maxByte (for lowerEnd) or minByte (for upperStart).
+ const uint32_t lowerEnd=lower[length].end;
+ const uint32_t upperStart=upper[length].start;
+ UBool merged=FALSE;
+
+ if(lowerEnd>upperStart) {
+ // These two lower and upper ranges collide.
+ // Since lowerLimit<upperLimit and lowerEnd and upperStart
+ // are versions with only their last bytes modified
+ // (and following ones removed/reset to 0),
+ // lowerEnd>upperStart is only possible
+ // if the leading bytes are equal
+ // and lastByte(lowerEnd)>lastByte(upperStart).
+ U_ASSERT(truncateWeight(lowerEnd, length-1)==
+ truncateWeight(upperStart, length-1));
+ // Intersect these two ranges.
+ lower[length].end=upper[length].end;
lower[length].count=
- (int32_t)(getWeightTrail(end, length)-getWeightTrail(start, length)+1+
- countBytes(length)*(getWeightByte(end, length-1)-getWeightByte(start, length-1)));
+ (int32_t)getWeightTrail(lower[length].end, length)-
+ (int32_t)getWeightTrail(lower[length].start, length)+1;
+ // count might be <=0 in which case there is no room,
+ // and the range-collecting code below will ignore this range.
+ merged=TRUE;
+ } else if(lowerEnd==upperStart) {
+ // Not possible, unless minByte==maxByte which is not allowed.
+ U_ASSERT(minBytes[length]<maxBytes[length]);
+ } else /* lowerEnd<upperStart */ {
+ if(incWeight(lowerEnd, length)==upperStart) {
+ // Merge adjacent ranges.
+ lower[length].end=upper[length].end;
+ lower[length].count+=upper[length].count; // might be >countBytes
+ merged=TRUE;
+ }
+ }
+ if(merged) {
+ // Remove all shorter ranges.
+ // There was no room available for them between the ranges we just merged.
upper[length].count=0;
while(--length>middleLength) {
lower[length].count=upper[length].count=0;
« no previous file with comments | « source/i18n/collationtailoring.cpp ('k') | source/i18n/collunsafe.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698