| OLD | NEW |
| 1 /* | 1 /* |
| 2 ******************************************************************************* | 2 ******************************************************************************* |
| 3 * | 3 * |
| 4 * Copyright (C) 1999-2014, International Business Machines | 4 * Copyright (C) 1999-2015, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. | 5 * Corporation and others. All Rights Reserved. |
| 6 * | 6 * |
| 7 ******************************************************************************* | 7 ******************************************************************************* |
| 8 * file name: collationweights.cpp | 8 * file name: collationweights.cpp |
| 9 * encoding: US-ASCII | 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) | 10 * tab size: 8 (not used) |
| 11 * indentation:4 | 11 * indentation:4 |
| 12 * | 12 * |
| 13 * created on: 2001mar08 as ucol_wgt.cpp | 13 * created on: 2001mar08 as ucol_wgt.cpp |
| 14 * created by: Markus W. Scherer | 14 * created by: Markus W. Scherer |
| (...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 119 } | 119 } |
| 120 | 120 |
| 121 void | 121 void |
| 122 CollationWeights::initForSecondary() { | 122 CollationWeights::initForSecondary() { |
| 123 // We use only the lower 16 bits for secondary weights. | 123 // We use only the lower 16 bits for secondary weights. |
| 124 middleLength=3; | 124 middleLength=3; |
| 125 minBytes[1] = 0; | 125 minBytes[1] = 0; |
| 126 maxBytes[1] = 0; | 126 maxBytes[1] = 0; |
| 127 minBytes[2] = 0; | 127 minBytes[2] = 0; |
| 128 maxBytes[2] = 0; | 128 maxBytes[2] = 0; |
| 129 minBytes[3] = Collation::MERGE_SEPARATOR_BYTE + 1; | 129 minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1; |
| 130 maxBytes[3] = 0xff; | 130 maxBytes[3] = 0xff; |
| 131 minBytes[4] = 2; | 131 minBytes[4] = 2; |
| 132 maxBytes[4] = 0xff; | 132 maxBytes[4] = 0xff; |
| 133 } | 133 } |
| 134 | 134 |
| 135 void | 135 void |
| 136 CollationWeights::initForTertiary() { | 136 CollationWeights::initForTertiary() { |
| 137 // We use only the lower 16 bits for tertiary weights. | 137 // We use only the lower 16 bits for tertiary weights. |
| 138 middleLength=3; | 138 middleLength=3; |
| 139 minBytes[1] = 0; | 139 minBytes[1] = 0; |
| 140 maxBytes[1] = 0; | 140 maxBytes[1] = 0; |
| 141 minBytes[2] = 0; | 141 minBytes[2] = 0; |
| 142 maxBytes[2] = 0; | 142 maxBytes[2] = 0; |
| 143 // We use only 6 bits per byte. | 143 // We use only 6 bits per byte. |
| 144 // The other bits are used for case & quaternary weights. | 144 // The other bits are used for case & quaternary weights. |
| 145 minBytes[3] = Collation::MERGE_SEPARATOR_BYTE + 1; | 145 minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1; |
| 146 maxBytes[3] = 0x3f; | 146 maxBytes[3] = 0x3f; |
| 147 minBytes[4] = 2; | 147 minBytes[4] = 2; |
| 148 maxBytes[4] = 0x3f; | 148 maxBytes[4] = 0x3f; |
| 149 } | 149 } |
| 150 | 150 |
| 151 uint32_t | 151 uint32_t |
| 152 CollationWeights::incWeight(uint32_t weight, int32_t length) const { | 152 CollationWeights::incWeight(uint32_t weight, int32_t length) const { |
| 153 for(;;) { | 153 for(;;) { |
| 154 uint32_t byte=getWeightByte(weight, length); | 154 uint32_t byte=getWeightByte(weight, length); |
| 155 if(byte<maxBytes[length]) { | 155 if(byte<maxBytes[length]) { |
| (...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 289 weight=truncateWeight(weight, length-1); | 289 weight=truncateWeight(weight, length-1); |
| 290 } | 290 } |
| 291 middle.end=decWeightTrail(weight, middleLength); | 291 middle.end=decWeightTrail(weight, middleLength); |
| 292 | 292 |
| 293 /* set the middle range */ | 293 /* set the middle range */ |
| 294 middle.length=middleLength; | 294 middle.length=middleLength; |
| 295 if(middle.end>=middle.start) { | 295 if(middle.end>=middle.start) { |
| 296 middle.count=(int32_t)((middle.end-middle.start)>>(8*(4-middleLength)))+
1; | 296 middle.count=(int32_t)((middle.end-middle.start)>>(8*(4-middleLength)))+
1; |
| 297 } else { | 297 } else { |
| 298 /* no middle range, eliminate overlaps */ | 298 /* no middle range, eliminate overlaps */ |
| 299 | |
| 300 /* reduce or remove the lower ranges that go beyond upperLimit */ | |
| 301 for(int32_t length=4; length>middleLength; --length) { | 299 for(int32_t length=4; length>middleLength; --length) { |
| 302 if(lower[length].count>0 && upper[length].count>0) { | 300 if(lower[length].count>0 && upper[length].count>0) { |
| 303 uint32_t start=upper[length].start; | 301 // Note: The lowerEnd and upperStart weights are versions of |
| 304 uint32_t end=lower[length].end; | 302 // lowerLimit and upperLimit (which are lowerLimit<upperLimit), |
| 303 // truncated (still less-or-equal) |
| 304 // and then with their last bytes changed to the |
| 305 // maxByte (for lowerEnd) or minByte (for upperStart). |
| 306 const uint32_t lowerEnd=lower[length].end; |
| 307 const uint32_t upperStart=upper[length].start; |
| 308 UBool merged=FALSE; |
| 305 | 309 |
| 306 if(end>=start || incWeight(end, length)==start) { | 310 if(lowerEnd>upperStart) { |
| 307 /* lower and upper ranges collide or are directly adjacent:
merge these two and remove all shorter ranges */ | 311 // These two lower and upper ranges collide. |
| 308 start=lower[length].start; | 312 // Since lowerLimit<upperLimit and lowerEnd and upperStart |
| 309 end=lower[length].end=upper[length].end; | 313 // are versions with only their last bytes modified |
| 310 /* | 314 // (and following ones removed/reset to 0), |
| 311 * merging directly adjacent ranges needs to subtract the 0/
1 gaps in between; | 315 // lowerEnd>upperStart is only possible |
| 312 * it may result in a range with count>countBytes | 316 // if the leading bytes are equal |
| 313 */ | 317 // and lastByte(lowerEnd)>lastByte(upperStart). |
| 318 U_ASSERT(truncateWeight(lowerEnd, length-1)== |
| 319 truncateWeight(upperStart, length-1)); |
| 320 // Intersect these two ranges. |
| 321 lower[length].end=upper[length].end; |
| 314 lower[length].count= | 322 lower[length].count= |
| 315 (int32_t)(getWeightTrail(end, length)-getWeightTrail(sta
rt, length)+1+ | 323 (int32_t)getWeightTrail(lower[length].end, length)- |
| 316 countBytes(length)*(getWeightByte(end, length-
1)-getWeightByte(start, length-1))); | 324 (int32_t)getWeightTrail(lower[length].start, length)
+1; |
| 325 // count might be <=0 in which case there is no room, |
| 326 // and the range-collecting code below will ignore this rang
e. |
| 327 merged=TRUE; |
| 328 } else if(lowerEnd==upperStart) { |
| 329 // Not possible, unless minByte==maxByte which is not allowe
d. |
| 330 U_ASSERT(minBytes[length]<maxBytes[length]); |
| 331 } else /* lowerEnd<upperStart */ { |
| 332 if(incWeight(lowerEnd, length)==upperStart) { |
| 333 // Merge adjacent ranges. |
| 334 lower[length].end=upper[length].end; |
| 335 lower[length].count+=upper[length].count; // might be >
countBytes |
| 336 merged=TRUE; |
| 337 } |
| 338 } |
| 339 if(merged) { |
| 340 // Remove all shorter ranges. |
| 341 // There was no room available for them between the ranges w
e just merged. |
| 317 upper[length].count=0; | 342 upper[length].count=0; |
| 318 while(--length>middleLength) { | 343 while(--length>middleLength) { |
| 319 lower[length].count=upper[length].count=0; | 344 lower[length].count=upper[length].count=0; |
| 320 } | 345 } |
| 321 break; | 346 break; |
| 322 } | 347 } |
| 323 } | 348 } |
| 324 } | 349 } |
| 325 } | 350 } |
| 326 | 351 |
| (...skipping 207 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 534 U_ASSERT(range.start <= range.end); | 559 U_ASSERT(range.start <= range.end); |
| 535 } | 560 } |
| 536 | 561 |
| 537 return weight; | 562 return weight; |
| 538 } | 563 } |
| 539 } | 564 } |
| 540 | 565 |
| 541 U_NAMESPACE_END | 566 U_NAMESPACE_END |
| 542 | 567 |
| 543 #endif /* #if !UCONFIG_NO_COLLATION */ | 568 #endif /* #if !UCONFIG_NO_COLLATION */ |
| OLD | NEW |