source/i18n/collationweights.cpp - Issue 1621843002: ICU 56 update step 1

Side by Side Diff: source/i18n/collationweights.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 *******************************************************************************	2 *******************************************************************************

3 *	3 *

4 * Copyright (C) 1999-2014, International Business Machines	4 * Copyright (C) 1999-2015, International Business Machines

5 * Corporation and others. All Rights Reserved.	5 * Corporation and others. All Rights Reserved.

6 *	6 *

7 *******************************************************************************	7 *******************************************************************************

8 * file name: collationweights.cpp	8 * file name: collationweights.cpp

9 * encoding: US-ASCII	9 * encoding: US-ASCII

10 * tab size: 8 (not used)	10 * tab size: 8 (not used)

11 * indentation:4	11 * indentation:4

12 *	12 *

13 * created on: 2001mar08 as ucol_wgt.cpp	13 * created on: 2001mar08 as ucol_wgt.cpp

14 * created by: Markus W. Scherer	14 * created by: Markus W. Scherer

(...skipping 104 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
119 }	119 }

120	120

121 void	121 void

122 CollationWeights::initForSecondary() {	122 CollationWeights::initForSecondary() {

123 // We use only the lower 16 bits for secondary weights.	123 // We use only the lower 16 bits for secondary weights.

124 middleLength=3;	124 middleLength=3;

125 minBytes[1] = 0;	125 minBytes[1] = 0;

126 maxBytes[1] = 0;	126 maxBytes[1] = 0;

127 minBytes[2] = 0;	127 minBytes[2] = 0;

128 maxBytes[2] = 0;	128 maxBytes[2] = 0;

129 minBytes[3] = Collation::MERGE_SEPARATOR_BYTE + 1;	129 minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;

130 maxBytes[3] = 0xff;	130 maxBytes[3] = 0xff;

131 minBytes[4] = 2;	131 minBytes[4] = 2;

132 maxBytes[4] = 0xff;	132 maxBytes[4] = 0xff;

133 }	133 }

134	134

135 void	135 void

136 CollationWeights::initForTertiary() {	136 CollationWeights::initForTertiary() {

137 // We use only the lower 16 bits for tertiary weights.	137 // We use only the lower 16 bits for tertiary weights.

138 middleLength=3;	138 middleLength=3;

139 minBytes[1] = 0;	139 minBytes[1] = 0;

140 maxBytes[1] = 0;	140 maxBytes[1] = 0;

141 minBytes[2] = 0;	141 minBytes[2] = 0;

142 maxBytes[2] = 0;	142 maxBytes[2] = 0;

143 // We use only 6 bits per byte.	143 // We use only 6 bits per byte.

144 // The other bits are used for case & quaternary weights.	144 // The other bits are used for case & quaternary weights.

145 minBytes[3] = Collation::MERGE_SEPARATOR_BYTE + 1;	145 minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;

146 maxBytes[3] = 0x3f;	146 maxBytes[3] = 0x3f;

147 minBytes[4] = 2;	147 minBytes[4] = 2;

148 maxBytes[4] = 0x3f;	148 maxBytes[4] = 0x3f;

149 }	149 }

150	150

151 uint32_t	151 uint32_t

152 CollationWeights::incWeight(uint32_t weight, int32_t length) const {	152 CollationWeights::incWeight(uint32_t weight, int32_t length) const {

153 for(;;) {	153 for(;;) {

154 uint32_t byte=getWeightByte(weight, length);	154 uint32_t byte=getWeightByte(weight, length);

155 if(byte<maxBytes[length]) {	155 if(byte<maxBytes[length]) {

(...skipping 133 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
289 weight=truncateWeight(weight, length-1);	289 weight=truncateWeight(weight, length-1);

290 }	290 }

291 middle.end=decWeightTrail(weight, middleLength);	291 middle.end=decWeightTrail(weight, middleLength);

292	292

293 /* set the middle range */	293 /* set the middle range */

294 middle.length=middleLength;	294 middle.length=middleLength;

295 if(middle.end>=middle.start) {	295 if(middle.end>=middle.start) {

296 middle.count=(int32_t)((middle.end-middle.start)>>(8*(4-middleLength)))+ 1;	296 middle.count=(int32_t)((middle.end-middle.start)>>(8*(4-middleLength)))+ 1;

297 } else {	297 } else {

298 /* no middle range, eliminate overlaps */	298 /* no middle range, eliminate overlaps */

299

300 /* reduce or remove the lower ranges that go beyond upperLimit */

301 for(int32_t length=4; length>middleLength; --length) {	299 for(int32_t length=4; length>middleLength; --length) {

302 if(lower[length].count>0 && upper[length].count>0) {	300 if(lower[length].count>0 && upper[length].count>0) {

303 uint32_t start=upper[length].start;	301 // Note: The lowerEnd and upperStart weights are versions of

304 uint32_t end=lower[length].end;	302 // lowerLimit and upperLimit (which are lowerLimit<upperLimit),

	303 // truncated (still less-or-equal)

	304 // and then with their last bytes changed to the

	305 // maxByte (for lowerEnd) or minByte (for upperStart).

	306 const uint32_t lowerEnd=lower[length].end;

	307 const uint32_t upperStart=upper[length].start;

	308 UBool merged=FALSE;

305	309

306 if(end>=start \|\| incWeight(end, length)==start) {	310 if(lowerEnd>upperStart) {

307 /* lower and upper ranges collide or are directly adjacent: merge these two and remove all shorter ranges */	311 // These two lower and upper ranges collide.

308 start=lower[length].start;	312 // Since lowerLimit<upperLimit and lowerEnd and upperStart

309 end=lower[length].end=upper[length].end;	313 // are versions with only their last bytes modified

310 /*	314 // (and following ones removed/reset to 0),

311 * merging directly adjacent ranges needs to subtract the 0/ 1 gaps in between;	315 // lowerEnd>upperStart is only possible

312 * it may result in a range with count>countBytes	316 // if the leading bytes are equal

313 */	317 // and lastByte(lowerEnd)>lastByte(upperStart).

	318 U_ASSERT(truncateWeight(lowerEnd, length-1)==

	319 truncateWeight(upperStart, length-1));

	320 // Intersect these two ranges.

	321 lower[length].end=upper[length].end;

314 lower[length].count=	322 lower[length].count=

315 (int32_t)(getWeightTrail(end, length)-getWeightTrail(sta rt, length)+1+	323 (int32_t)getWeightTrail(lower[length].end, length)-

316 countBytes(length)*(getWeightByte(end, length- 1)-getWeightByte(start, length-1)));	324 (int32_t)getWeightTrail(lower[length].start, length) +1;

	325 // count might be <=0 in which case there is no room,

	326 // and the range-collecting code below will ignore this rang e.

	327 merged=TRUE;

	328 } else if(lowerEnd==upperStart) {

	329 // Not possible, unless minByte==maxByte which is not allowe d.

	330 U_ASSERT(minBytes[length]<maxBytes[length]);

	331 } else /* lowerEnd<upperStart */ {

	332 if(incWeight(lowerEnd, length)==upperStart) {

	333 // Merge adjacent ranges.

	334 lower[length].end=upper[length].end;

	335 lower[length].count+=upper[length].count; // might be > countBytes

	336 merged=TRUE;

	337 }

	338 }

	339 if(merged) {

	340 // Remove all shorter ranges.

	341 // There was no room available for them between the ranges w e just merged.

317 upper[length].count=0;	342 upper[length].count=0;

318 while(--length>middleLength) {	343 while(--length>middleLength) {

319 lower[length].count=upper[length].count=0;	344 lower[length].count=upper[length].count=0;

320 }	345 }

321 break;	346 break;

322 }	347 }

323 }	348 }

324 }	349 }

325 }	350 }

326	351

(...skipping 207 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
534 U_ASSERT(range.start <= range.end);	559 U_ASSERT(range.start <= range.end);

535 }	560 }

536	561

537 return weight;	562 return weight;

538 }	563 }

539 }	564 }

540	565

541 U_NAMESPACE_END	566 U_NAMESPACE_END

542	567

543 #endif /* #if !UCONFIG_NO_COLLATION */	568 #endif /* #if !UCONFIG_NO_COLLATION */

OLD	NEW

« no previous file with comments | « source/i18n/collationtailoring.cpp ('k') | source/i18n/collunsafe.h » ('j') | no next file with comments »