Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(641)

Side by Side Diff: source/i18n/collationweights.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/i18n/collationtailoring.cpp ('k') | source/i18n/collunsafe.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ******************************************************************************* 2 *******************************************************************************
3 * 3 *
4 * Copyright (C) 1999-2014, International Business Machines 4 * Copyright (C) 1999-2015, International Business Machines
5 * Corporation and others. All Rights Reserved. 5 * Corporation and others. All Rights Reserved.
6 * 6 *
7 ******************************************************************************* 7 *******************************************************************************
8 * file name: collationweights.cpp 8 * file name: collationweights.cpp
9 * encoding: US-ASCII 9 * encoding: US-ASCII
10 * tab size: 8 (not used) 10 * tab size: 8 (not used)
11 * indentation:4 11 * indentation:4
12 * 12 *
13 * created on: 2001mar08 as ucol_wgt.cpp 13 * created on: 2001mar08 as ucol_wgt.cpp
14 * created by: Markus W. Scherer 14 * created by: Markus W. Scherer
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after
119 } 119 }
120 120
121 void 121 void
122 CollationWeights::initForSecondary() { 122 CollationWeights::initForSecondary() {
123 // We use only the lower 16 bits for secondary weights. 123 // We use only the lower 16 bits for secondary weights.
124 middleLength=3; 124 middleLength=3;
125 minBytes[1] = 0; 125 minBytes[1] = 0;
126 maxBytes[1] = 0; 126 maxBytes[1] = 0;
127 minBytes[2] = 0; 127 minBytes[2] = 0;
128 maxBytes[2] = 0; 128 maxBytes[2] = 0;
129 minBytes[3] = Collation::MERGE_SEPARATOR_BYTE + 1; 129 minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;
130 maxBytes[3] = 0xff; 130 maxBytes[3] = 0xff;
131 minBytes[4] = 2; 131 minBytes[4] = 2;
132 maxBytes[4] = 0xff; 132 maxBytes[4] = 0xff;
133 } 133 }
134 134
135 void 135 void
136 CollationWeights::initForTertiary() { 136 CollationWeights::initForTertiary() {
137 // We use only the lower 16 bits for tertiary weights. 137 // We use only the lower 16 bits for tertiary weights.
138 middleLength=3; 138 middleLength=3;
139 minBytes[1] = 0; 139 minBytes[1] = 0;
140 maxBytes[1] = 0; 140 maxBytes[1] = 0;
141 minBytes[2] = 0; 141 minBytes[2] = 0;
142 maxBytes[2] = 0; 142 maxBytes[2] = 0;
143 // We use only 6 bits per byte. 143 // We use only 6 bits per byte.
144 // The other bits are used for case & quaternary weights. 144 // The other bits are used for case & quaternary weights.
145 minBytes[3] = Collation::MERGE_SEPARATOR_BYTE + 1; 145 minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;
146 maxBytes[3] = 0x3f; 146 maxBytes[3] = 0x3f;
147 minBytes[4] = 2; 147 minBytes[4] = 2;
148 maxBytes[4] = 0x3f; 148 maxBytes[4] = 0x3f;
149 } 149 }
150 150
151 uint32_t 151 uint32_t
152 CollationWeights::incWeight(uint32_t weight, int32_t length) const { 152 CollationWeights::incWeight(uint32_t weight, int32_t length) const {
153 for(;;) { 153 for(;;) {
154 uint32_t byte=getWeightByte(weight, length); 154 uint32_t byte=getWeightByte(weight, length);
155 if(byte<maxBytes[length]) { 155 if(byte<maxBytes[length]) {
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after
289 weight=truncateWeight(weight, length-1); 289 weight=truncateWeight(weight, length-1);
290 } 290 }
291 middle.end=decWeightTrail(weight, middleLength); 291 middle.end=decWeightTrail(weight, middleLength);
292 292
293 /* set the middle range */ 293 /* set the middle range */
294 middle.length=middleLength; 294 middle.length=middleLength;
295 if(middle.end>=middle.start) { 295 if(middle.end>=middle.start) {
296 middle.count=(int32_t)((middle.end-middle.start)>>(8*(4-middleLength)))+ 1; 296 middle.count=(int32_t)((middle.end-middle.start)>>(8*(4-middleLength)))+ 1;
297 } else { 297 } else {
298 /* no middle range, eliminate overlaps */ 298 /* no middle range, eliminate overlaps */
299
300 /* reduce or remove the lower ranges that go beyond upperLimit */
301 for(int32_t length=4; length>middleLength; --length) { 299 for(int32_t length=4; length>middleLength; --length) {
302 if(lower[length].count>0 && upper[length].count>0) { 300 if(lower[length].count>0 && upper[length].count>0) {
303 uint32_t start=upper[length].start; 301 // Note: The lowerEnd and upperStart weights are versions of
304 uint32_t end=lower[length].end; 302 // lowerLimit and upperLimit (which are lowerLimit<upperLimit),
303 // truncated (still less-or-equal)
304 // and then with their last bytes changed to the
305 // maxByte (for lowerEnd) or minByte (for upperStart).
306 const uint32_t lowerEnd=lower[length].end;
307 const uint32_t upperStart=upper[length].start;
308 UBool merged=FALSE;
305 309
306 if(end>=start || incWeight(end, length)==start) { 310 if(lowerEnd>upperStart) {
307 /* lower and upper ranges collide or are directly adjacent: merge these two and remove all shorter ranges */ 311 // These two lower and upper ranges collide.
308 start=lower[length].start; 312 // Since lowerLimit<upperLimit and lowerEnd and upperStart
309 end=lower[length].end=upper[length].end; 313 // are versions with only their last bytes modified
310 /* 314 // (and following ones removed/reset to 0),
311 * merging directly adjacent ranges needs to subtract the 0/ 1 gaps in between; 315 // lowerEnd>upperStart is only possible
312 * it may result in a range with count>countBytes 316 // if the leading bytes are equal
313 */ 317 // and lastByte(lowerEnd)>lastByte(upperStart).
318 U_ASSERT(truncateWeight(lowerEnd, length-1)==
319 truncateWeight(upperStart, length-1));
320 // Intersect these two ranges.
321 lower[length].end=upper[length].end;
314 lower[length].count= 322 lower[length].count=
315 (int32_t)(getWeightTrail(end, length)-getWeightTrail(sta rt, length)+1+ 323 (int32_t)getWeightTrail(lower[length].end, length)-
316 countBytes(length)*(getWeightByte(end, length- 1)-getWeightByte(start, length-1))); 324 (int32_t)getWeightTrail(lower[length].start, length) +1;
325 // count might be <=0 in which case there is no room,
326 // and the range-collecting code below will ignore this rang e.
327 merged=TRUE;
328 } else if(lowerEnd==upperStart) {
329 // Not possible, unless minByte==maxByte which is not allowe d.
330 U_ASSERT(minBytes[length]<maxBytes[length]);
331 } else /* lowerEnd<upperStart */ {
332 if(incWeight(lowerEnd, length)==upperStart) {
333 // Merge adjacent ranges.
334 lower[length].end=upper[length].end;
335 lower[length].count+=upper[length].count; // might be > countBytes
336 merged=TRUE;
337 }
338 }
339 if(merged) {
340 // Remove all shorter ranges.
341 // There was no room available for them between the ranges w e just merged.
317 upper[length].count=0; 342 upper[length].count=0;
318 while(--length>middleLength) { 343 while(--length>middleLength) {
319 lower[length].count=upper[length].count=0; 344 lower[length].count=upper[length].count=0;
320 } 345 }
321 break; 346 break;
322 } 347 }
323 } 348 }
324 } 349 }
325 } 350 }
326 351
(...skipping 207 matching lines...) Expand 10 before | Expand all | Expand 10 after
534 U_ASSERT(range.start <= range.end); 559 U_ASSERT(range.start <= range.end);
535 } 560 }
536 561
537 return weight; 562 return weight;
538 } 563 }
539 } 564 }
540 565
541 U_NAMESPACE_END 566 U_NAMESPACE_END
542 567
543 #endif /* #if !UCONFIG_NO_COLLATION */ 568 #endif /* #if !UCONFIG_NO_COLLATION */
OLDNEW
« no previous file with comments | « source/i18n/collationtailoring.cpp ('k') | source/i18n/collunsafe.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698