| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 ******************************************************************************* | |
| 3 * Copyright (C) 2012-2014, International Business Machines | |
| 4 * Corporation and others. All Rights Reserved. | |
| 5 ******************************************************************************* | |
| 6 * collationbasedatabuilder.cpp | |
| 7 * | |
| 8 * created on: 2012aug11 | |
| 9 * created by: Markus W. Scherer | |
| 10 */ | |
| 11 | |
| 12 #include "unicode/utypes.h" | |
| 13 | |
| 14 #if !UCONFIG_NO_COLLATION | |
| 15 | |
| 16 #include "unicode/localpointer.h" | |
| 17 #include "unicode/ucharstriebuilder.h" | |
| 18 #include "unicode/uniset.h" | |
| 19 #include "unicode/unistr.h" | |
| 20 #include "unicode/utf16.h" | |
| 21 #include "collation.h" | |
| 22 #include "collationbasedatabuilder.h" | |
| 23 #include "collationdata.h" | |
| 24 #include "collationdatabuilder.h" | |
| 25 #include "collationrootelements.h" | |
| 26 #include "normalizer2impl.h" | |
| 27 #include "uassert.h" | |
| 28 #include "utrie2.h" | |
| 29 #include "uvectr32.h" | |
| 30 #include "uvectr64.h" | |
| 31 #include "uvector.h" | |
| 32 | |
| 33 U_NAMESPACE_BEGIN | |
| 34 | |
| 35 namespace { | |
| 36 | |
| 37 /** | |
| 38 * Compare two signed int64_t values as if they were unsigned. | |
| 39 */ | |
| 40 int32_t | |
| 41 compareInt64AsUnsigned(int64_t a, int64_t b) { | |
| 42 if((uint64_t)a < (uint64_t)b) { | |
| 43 return -1; | |
| 44 } else if((uint64_t)a > (uint64_t)b) { | |
| 45 return 1; | |
| 46 } else { | |
| 47 return 0; | |
| 48 } | |
| 49 } | |
| 50 | |
| 51 // TODO: Try to merge this with the binarySearch in alphaindex.cpp. | |
| 52 /** | |
| 53 * Like Java Collections.binarySearch(List, String, Comparator). | |
| 54 * | |
| 55 * @return the index>=0 where the item was found, | |
| 56 * or the index<0 for inserting the string at ~index in sorted order | |
| 57 */ | |
| 58 int32_t | |
| 59 binarySearch(const UVector64 &list, int64_t ce) { | |
| 60 if (list.size() == 0) { return ~0; } | |
| 61 int32_t start = 0; | |
| 62 int32_t limit = list.size(); | |
| 63 for (;;) { | |
| 64 int32_t i = (start + limit) / 2; | |
| 65 int32_t cmp = compareInt64AsUnsigned(ce, list.elementAti(i)); | |
| 66 if (cmp == 0) { | |
| 67 return i; | |
| 68 } else if (cmp < 0) { | |
| 69 if (i == start) { | |
| 70 return ~start; // insert ce before i | |
| 71 } | |
| 72 limit = i; | |
| 73 } else { | |
| 74 if (i == start) { | |
| 75 return ~(start + 1); // insert ce after i | |
| 76 } | |
| 77 start = i; | |
| 78 } | |
| 79 } | |
| 80 } | |
| 81 | |
| 82 } // namespace | |
| 83 | |
| 84 CollationBaseDataBuilder::CollationBaseDataBuilder(UErrorCode &errorCode) | |
| 85 : CollationDataBuilder(errorCode), | |
| 86 numericPrimary(0x12000000), | |
| 87 firstHanPrimary(0), lastHanPrimary(0), hanStep(2), | |
| 88 rootElements(errorCode) { | |
| 89 } | |
| 90 | |
| 91 CollationBaseDataBuilder::~CollationBaseDataBuilder() { | |
| 92 } | |
| 93 | |
| 94 void | |
| 95 CollationBaseDataBuilder::init(UErrorCode &errorCode) { | |
| 96 if(U_FAILURE(errorCode)) { return; } | |
| 97 if(trie != NULL) { | |
| 98 errorCode = U_INVALID_STATE_ERROR; | |
| 99 return; | |
| 100 } | |
| 101 | |
| 102 // Not compressible: | |
| 103 // - digits | |
| 104 // - Latin | |
| 105 // - Hani | |
| 106 // - trail weights | |
| 107 // Some scripts are compressible, some are not. | |
| 108 uprv_memset(compressibleBytes, FALSE, 256); | |
| 109 compressibleBytes[Collation::UNASSIGNED_IMPLICIT_BYTE] = TRUE; | |
| 110 | |
| 111 // For a base, the default is to compute an unassigned-character implicit CE
. | |
| 112 // This includes surrogate code points; see the last option in | |
| 113 // UCA section 7.1.1 Handling Ill-Formed Code Unit Sequences. | |
| 114 trie = utrie2_open(Collation::UNASSIGNED_CE32, Collation::FFFD_CE32, &errorC
ode); | |
| 115 | |
| 116 // Preallocate trie blocks for Latin in the hope that proximity helps with C
PU caches. | |
| 117 for(UChar32 c = 0; c < 0x180; ++c) { | |
| 118 utrie2_set32(trie, c, Collation::UNASSIGNED_CE32, &errorCode); | |
| 119 } | |
| 120 | |
| 121 utrie2_set32(trie, 0xfffe, Collation::MERGE_SEPARATOR_CE32, &errorCode); | |
| 122 // No root element for the merge separator which has 02 weights. | |
| 123 // Some code assumes that the root first primary CE is the "space first prim
ary" | |
| 124 // from FractionalUCA.txt. | |
| 125 | |
| 126 uint32_t hangulCE32 = Collation::makeCE32FromTagAndIndex(Collation::HANGUL_T
AG, 0); | |
| 127 utrie2_setRange32(trie, Hangul::HANGUL_BASE, Hangul::HANGUL_END, hangulCE32,
TRUE, &errorCode); | |
| 128 | |
| 129 // Add a mapping for the first-unassigned boundary, | |
| 130 // which is the AlphabeticIndex overflow boundary. | |
| 131 UnicodeString s((UChar)0xfdd1); // Script boundary contractions start with
U+FDD1. | |
| 132 s.append((UChar)0xfdd0); // Zzzz script sample character U+FDD0. | |
| 133 int64_t ce = Collation::makeCE(Collation::FIRST_UNASSIGNED_PRIMARY); | |
| 134 add(UnicodeString(), s, &ce, 1, errorCode); | |
| 135 | |
| 136 // Add a tailoring boundary, but not a mapping, for [first trailing]. | |
| 137 ce = Collation::makeCE(Collation::FIRST_TRAILING_PRIMARY); | |
| 138 rootElements.addElement(ce, errorCode); | |
| 139 | |
| 140 // U+FFFD maps to a CE with the third-highest primary weight, | |
| 141 // for predictable handling of ill-formed UTF-8. | |
| 142 uint32_t ce32 = Collation::FFFD_CE32; | |
| 143 utrie2_set32(trie, 0xfffd, ce32, &errorCode); | |
| 144 addRootElement(Collation::ceFromSimpleCE32(ce32), errorCode); | |
| 145 | |
| 146 // U+FFFF maps to a CE with the highest primary weight. | |
| 147 ce32 = Collation::MAX_REGULAR_CE32; | |
| 148 utrie2_set32(trie, 0xffff, ce32, &errorCode); | |
| 149 addRootElement(Collation::ceFromSimpleCE32(ce32), errorCode); | |
| 150 } | |
| 151 | |
| 152 void | |
| 153 CollationBaseDataBuilder::initHanRanges(const UChar32 ranges[], int32_t length, | |
| 154 UErrorCode &errorCode) { | |
| 155 if(U_FAILURE(errorCode) || length == 0) { return; } | |
| 156 if((length & 1) != 0) { // incomplete start/end pairs | |
| 157 errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
| 158 return; | |
| 159 } | |
| 160 if(isAssigned(0x4e00)) { // already set | |
| 161 errorCode = U_INVALID_STATE_ERROR; | |
| 162 return; | |
| 163 } | |
| 164 int32_t numHanCodePoints = 0; | |
| 165 for(int32_t i = 0; i < length; i += 2) { | |
| 166 UChar32 start = ranges[i]; | |
| 167 UChar32 end = ranges[i + 1]; | |
| 168 numHanCodePoints += end - start + 1; | |
| 169 } | |
| 170 // Multiply the number of code points by (gap+1). | |
| 171 // Add hanStep+2 for tailoring after the last Han character. | |
| 172 int32_t gap = 1; | |
| 173 hanStep = gap + 1; | |
| 174 int32_t numHan = numHanCodePoints * hanStep + hanStep + 2; | |
| 175 // Numbers of Han primaries per lead byte determined by | |
| 176 // numbers of 2nd (not compressible) times 3rd primary byte values. | |
| 177 int32_t numHanPerLeadByte = 254 * 254; | |
| 178 int32_t numHanLeadBytes = (numHan + numHanPerLeadByte - 1) / numHanPerLeadBy
te; | |
| 179 uint32_t hanPrimary = (uint32_t)(Collation::UNASSIGNED_IMPLICIT_BYTE - numHa
nLeadBytes) << 24; | |
| 180 hanPrimary |= 0x20200; | |
| 181 firstHanPrimary = hanPrimary; | |
| 182 for(int32_t i = 0; i < length; i += 2) { | |
| 183 UChar32 start = ranges[i]; | |
| 184 UChar32 end = ranges[i + 1]; | |
| 185 hanPrimary = setPrimaryRangeAndReturnNext(start, end, hanPrimary, hanSte
p, errorCode); | |
| 186 } | |
| 187 // One past the actual last one, but that is harmless for tailoring. | |
| 188 // It saves us from subtracting "hanStep" and handling underflows. | |
| 189 lastHanPrimary = hanPrimary; | |
| 190 } | |
| 191 | |
| 192 UBool | |
| 193 CollationBaseDataBuilder::isCompressibleLeadByte(uint32_t b) const { | |
| 194 return compressibleBytes[b]; | |
| 195 } | |
| 196 | |
| 197 void | |
| 198 CollationBaseDataBuilder::setCompressibleLeadByte(uint32_t b) { | |
| 199 compressibleBytes[b] = TRUE; | |
| 200 } | |
| 201 | |
| 202 int32_t | |
| 203 CollationBaseDataBuilder::diffTwoBytePrimaries(uint32_t p1, uint32_t p2, UBool i
sCompressible) { | |
| 204 if((p1 & 0xff000000) == (p2 & 0xff000000)) { | |
| 205 // Same lead bytes. | |
| 206 return (int32_t)(p2 - p1) >> 16; | |
| 207 } else { | |
| 208 int32_t linear1; | |
| 209 int32_t linear2; | |
| 210 int32_t factor; | |
| 211 if(isCompressible) { | |
| 212 // Second byte for compressible lead byte: 251 bytes 04..FE | |
| 213 linear1 = (int32_t)((p1 >> 16) & 0xff) - 4; | |
| 214 linear2 = (int32_t)((p2 >> 16) & 0xff) - 4; | |
| 215 factor = 251; | |
| 216 } else { | |
| 217 // Second byte for incompressible lead byte: 254 bytes 02..FF | |
| 218 linear1 = (int32_t)((p1 >> 16) & 0xff) - 2; | |
| 219 linear2 = (int32_t)((p2 >> 16) & 0xff) - 2; | |
| 220 factor = 254; | |
| 221 } | |
| 222 linear1 += factor * (int32_t)((p1 >> 24) & 0xff); | |
| 223 linear2 += factor * (int32_t)((p2 >> 24) & 0xff); | |
| 224 return linear2 - linear1; | |
| 225 } | |
| 226 } | |
| 227 | |
| 228 int32_t | |
| 229 CollationBaseDataBuilder::diffThreeBytePrimaries(uint32_t p1, uint32_t p2, UBool
isCompressible) { | |
| 230 if((p1 & 0xffff0000) == (p2 & 0xffff0000)) { | |
| 231 // Same first two bytes. | |
| 232 return (int32_t)(p2 - p1) >> 8; | |
| 233 } else { | |
| 234 // Third byte: 254 bytes 02..FF | |
| 235 int32_t linear1 = (int32_t)((p1 >> 8) & 0xff) - 2; | |
| 236 int32_t linear2 = (int32_t)((p2 >> 8) & 0xff) - 2; | |
| 237 int32_t factor; | |
| 238 if(isCompressible) { | |
| 239 // Second byte for compressible lead byte: 251 bytes 04..FE | |
| 240 linear1 += 254 * ((int32_t)((p1 >> 16) & 0xff) - 4); | |
| 241 linear2 += 254 * ((int32_t)((p2 >> 16) & 0xff) - 4); | |
| 242 factor = 251 * 254; | |
| 243 } else { | |
| 244 // Second byte for incompressible lead byte: 254 bytes 02..FF | |
| 245 linear1 += 254 * ((int32_t)((p1 >> 16) & 0xff) - 2); | |
| 246 linear2 += 254 * ((int32_t)((p2 >> 16) & 0xff) - 2); | |
| 247 factor = 254 * 254; | |
| 248 } | |
| 249 linear1 += factor * (int32_t)((p1 >> 24) & 0xff); | |
| 250 linear2 += factor * (int32_t)((p2 >> 24) & 0xff); | |
| 251 return linear2 - linear1; | |
| 252 } | |
| 253 } | |
| 254 | |
| 255 uint32_t | |
| 256 CollationBaseDataBuilder::encodeCEs(const int64_t ces[], int32_t cesLength, UErr
orCode &errorCode) { | |
| 257 addRootElements(ces, cesLength, errorCode); | |
| 258 return CollationDataBuilder::encodeCEs(ces, cesLength, errorCode); | |
| 259 } | |
| 260 | |
| 261 void | |
| 262 CollationBaseDataBuilder::addRootElements(const int64_t ces[], int32_t cesLength
, | |
| 263 UErrorCode &errorCode) { | |
| 264 if(U_FAILURE(errorCode)) { return; } | |
| 265 for(int32_t i = 0; i < cesLength; ++i) { | |
| 266 addRootElement(ces[i], errorCode); | |
| 267 } | |
| 268 } | |
| 269 | |
| 270 void | |
| 271 CollationBaseDataBuilder::addRootElement(int64_t ce, UErrorCode &errorCode) { | |
| 272 if(U_FAILURE(errorCode) || ce == 0) { return; } | |
| 273 // Remove case bits. | |
| 274 ce &= INT64_C(0xffffffffffff3fff); | |
| 275 U_ASSERT((ce & 0xc0) == 0); // quaternary==0 | |
| 276 // Ignore the CE if it has a Han primary weight and common secondary/tertiar
y weights. | |
| 277 // We will add it later, as part of the Han ranges. | |
| 278 uint32_t p = (uint32_t)(ce >> 32); | |
| 279 uint32_t secTer = (uint32_t)ce; | |
| 280 if(secTer == Collation::COMMON_SEC_AND_TER_CE) { | |
| 281 if(firstHanPrimary <= p && p <= lastHanPrimary) { | |
| 282 return; | |
| 283 } | |
| 284 } else { | |
| 285 // Check that secondary and tertiary weights are >= "common". | |
| 286 uint32_t s = secTer >> 16; | |
| 287 uint32_t t = secTer & Collation::ONLY_TERTIARY_MASK; | |
| 288 if((s != 0 && s < Collation::COMMON_WEIGHT16) || (t != 0 && t < Collatio
n::COMMON_WEIGHT16)) { | |
| 289 errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
| 290 return; | |
| 291 } | |
| 292 } | |
| 293 // Check that primaries have at most 3 bytes. | |
| 294 if((p & 0xff) != 0) { | |
| 295 errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
| 296 return; | |
| 297 } | |
| 298 int32_t i = binarySearch(rootElements, ce); | |
| 299 if(i < 0) { | |
| 300 rootElements.insertElementAt(ce, ~i, errorCode); | |
| 301 } | |
| 302 } | |
| 303 | |
| 304 void | |
| 305 CollationBaseDataBuilder::addReorderingGroup(uint32_t firstByte, uint32_t lastBy
te, | |
| 306 const UnicodeString &groupScripts, | |
| 307 UErrorCode &errorCode) { | |
| 308 if(U_FAILURE(errorCode)) { return; } | |
| 309 if(groupScripts.isEmpty()) { | |
| 310 errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
| 311 return; | |
| 312 } | |
| 313 if(groupScripts.indexOf((UChar)USCRIPT_UNKNOWN) >= 0) { | |
| 314 // Zzzz must not occur. | |
| 315 // It is the code used in the API to separate low and high scripts. | |
| 316 errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
| 317 return; | |
| 318 } | |
| 319 // Note: We are mostly trusting the input data, | |
| 320 // rather than verifying that reordering groups do not intersect | |
| 321 // with their lead byte ranges nor their sets of scripts, | |
| 322 // and that all script codes are valid. | |
| 323 scripts.append((UChar)((firstByte << 8) | lastByte)); | |
| 324 scripts.append((UChar)groupScripts.length()); | |
| 325 scripts.append(groupScripts); | |
| 326 } | |
| 327 | |
| 328 void | |
| 329 CollationBaseDataBuilder::build(CollationData &data, UErrorCode &errorCode) { | |
| 330 buildMappings(data, errorCode); | |
| 331 data.numericPrimary = numericPrimary; | |
| 332 data.compressibleBytes = compressibleBytes; | |
| 333 data.scripts = reinterpret_cast<const uint16_t *>(scripts.getBuffer()); | |
| 334 data.scriptsLength = scripts.length(); | |
| 335 buildFastLatinTable(data, errorCode); | |
| 336 } | |
| 337 | |
| 338 void | |
| 339 CollationBaseDataBuilder::buildRootElementsTable(UVector32 &table, UErrorCode &e
rrorCode) { | |
| 340 if(U_FAILURE(errorCode)) { return; } | |
| 341 uint32_t nextHanPrimary = firstHanPrimary; // Set to 0xffffffff after the l
ast Han range. | |
| 342 uint32_t prevPrimary = 0; // Start with primary ignorable CEs. | |
| 343 UBool tryRange = FALSE; | |
| 344 for(int32_t i = 0; i < rootElements.size(); ++i) { | |
| 345 int64_t ce = rootElements.elementAti(i); | |
| 346 uint32_t p = (uint32_t)(ce >> 32); | |
| 347 uint32_t secTer = (uint32_t)ce & Collation::ONLY_SEC_TER_MASK; | |
| 348 if(p != prevPrimary) { | |
| 349 U_ASSERT((p & 0xff) == 0); | |
| 350 int32_t end; | |
| 351 if(p >= nextHanPrimary) { | |
| 352 // Add a Han primary weight or range. | |
| 353 // We omitted them initially, and omitted all CEs with Han prima
ries | |
| 354 // and common secondary/tertiary weights. | |
| 355 U_ASSERT(p > lastHanPrimary || secTer != Collation::COMMON_SEC_A
ND_TER_CE); | |
| 356 if(p == nextHanPrimary) { | |
| 357 // One single Han primary with non-common secondary/tertiary
weights. | |
| 358 table.addElement((int32_t)p, errorCode); | |
| 359 if(p < lastHanPrimary) { | |
| 360 // Prepare for the next Han range. | |
| 361 nextHanPrimary = Collation::incThreeBytePrimaryByOffset(
p, FALSE, hanStep); | |
| 362 } else { | |
| 363 // p is the last Han primary. | |
| 364 nextHanPrimary = 0xffffffff; | |
| 365 } | |
| 366 } else { | |
| 367 // p > nextHanPrimary: Add a Han primary range, starting wit
h nextHanPrimary. | |
| 368 table.addElement((int32_t)nextHanPrimary, errorCode); | |
| 369 if(nextHanPrimary == lastHanPrimary) { | |
| 370 // nextHanPrimary == lastHanPrimary < p | |
| 371 // We just wrote the single last Han primary. | |
| 372 nextHanPrimary = 0xffffffff; | |
| 373 } else if(p < lastHanPrimary) { | |
| 374 // nextHanPrimary < p < lastHanPrimary | |
| 375 // End the Han range on p, prepare for the next range. | |
| 376 table.addElement((int32_t)p | hanStep, errorCode); | |
| 377 nextHanPrimary = Collation::incThreeBytePrimaryByOffset(
p, FALSE, hanStep); | |
| 378 } else if(p == lastHanPrimary) { | |
| 379 // nextHanPrimary < p == lastHanPrimary | |
| 380 // End the last Han range on p. | |
| 381 table.addElement((int32_t)p | hanStep, errorCode); | |
| 382 nextHanPrimary = 0xffffffff; | |
| 383 } else { | |
| 384 // nextHanPrimary < lastHanPrimary < p | |
| 385 // End the last Han range, then write p. | |
| 386 table.addElement((int32_t)lastHanPrimary | hanStep, erro
rCode); | |
| 387 nextHanPrimary = 0xffffffff; | |
| 388 table.addElement((int32_t)p, errorCode); | |
| 389 } | |
| 390 } | |
| 391 } else if(tryRange && secTer == Collation::COMMON_SEC_AND_TER_CE && | |
| 392 (end = writeRootElementsRange(prevPrimary, p, i + 1, table,
errorCode)) != 0) { | |
| 393 // Multiple CEs with only common secondary/tertiary weights were | |
| 394 // combined into a primary range. | |
| 395 // The range end was written, ending with the primary of rootEle
ments[end]. | |
| 396 ce = rootElements.elementAti(end); | |
| 397 p = (uint32_t)(ce >> 32); | |
| 398 secTer = (uint32_t)ce & Collation::ONLY_SEC_TER_MASK; | |
| 399 i = end; | |
| 400 } else { | |
| 401 // Write the primary weight of a normal CE. | |
| 402 table.addElement((int32_t)p, errorCode); | |
| 403 } | |
| 404 prevPrimary = p; | |
| 405 } | |
| 406 if(secTer == Collation::COMMON_SEC_AND_TER_CE) { | |
| 407 // The common secondar/tertiary weights are implied in the primary u
nit. | |
| 408 // If there is no intervening delta unit, then we will try to combin
e | |
| 409 // the next several primaries into a range. | |
| 410 tryRange = TRUE; | |
| 411 } else { | |
| 412 // For each new set of secondary/tertiary weights we write a delta u
nit. | |
| 413 table.addElement((int32_t)secTer | CollationRootElements::SEC_TER_DE
LTA_FLAG, errorCode); | |
| 414 tryRange = FALSE; | |
| 415 } | |
| 416 } | |
| 417 | |
| 418 // Limit sentinel for root elements. | |
| 419 // This allows us to reduce range checks at runtime. | |
| 420 table.addElement(CollationRootElements::PRIMARY_SENTINEL, errorCode); | |
| 421 } | |
| 422 | |
| 423 int32_t | |
| 424 CollationBaseDataBuilder::writeRootElementsRange( | |
| 425 uint32_t prevPrimary, uint32_t p, int32_t i, | |
| 426 UVector32 &table, UErrorCode &errorCode) { | |
| 427 if(U_FAILURE(errorCode) || i >= rootElements.size()) { return 0; } | |
| 428 U_ASSERT(prevPrimary < p); | |
| 429 // No ranges of single-byte primaries. | |
| 430 if((p & prevPrimary & 0xff0000) == 0) { return 0; } | |
| 431 // Lead bytes of compressible primaries must match. | |
| 432 UBool isCompressible = isCompressiblePrimary(p); | |
| 433 if((isCompressible || isCompressiblePrimary(prevPrimary)) && | |
| 434 (p & 0xff000000) != (prevPrimary & 0xff000000)) { | |
| 435 return 0; | |
| 436 } | |
| 437 // Number of bytes in the primaries. | |
| 438 UBool twoBytes; | |
| 439 // Number of primaries from prevPrimary to p. | |
| 440 int32_t step; | |
| 441 if((p & 0xff00) == 0) { | |
| 442 // 2-byte primary | |
| 443 if((prevPrimary & 0xff00) != 0) { return 0; } // length mismatch | |
| 444 twoBytes = TRUE; | |
| 445 step = diffTwoBytePrimaries(prevPrimary, p, isCompressible); | |
| 446 } else { | |
| 447 // 3-byte primary | |
| 448 if((prevPrimary & 0xff00) == 0) { return 0; } // length mismatch | |
| 449 twoBytes = FALSE; | |
| 450 step = diffThreeBytePrimaries(prevPrimary, p, isCompressible); | |
| 451 } | |
| 452 if(step > (int32_t)CollationRootElements::PRIMARY_STEP_MASK) { return 0; } | |
| 453 // See if there are more than two CEs with primaries increasing by "step" | |
| 454 // and with only common secondary/tertiary weights on all but the last one. | |
| 455 int32_t end = 0; // Initially 0: No range for just two primaries. | |
| 456 for(;;) { | |
| 457 prevPrimary = p; | |
| 458 // Calculate which primary we expect next. | |
| 459 uint32_t nextPrimary; // = p + step | |
| 460 if(twoBytes) { | |
| 461 nextPrimary = Collation::incTwoBytePrimaryByOffset(p, isCompressible
, step); | |
| 462 } else { | |
| 463 nextPrimary = Collation::incThreeBytePrimaryByOffset(p, isCompressib
le, step); | |
| 464 } | |
| 465 // Fetch the actual next CE. | |
| 466 int64_t ce = rootElements.elementAti(i); | |
| 467 p = (uint32_t)(ce >> 32); | |
| 468 uint32_t secTer = (uint32_t)ce & Collation::ONLY_SEC_TER_MASK; | |
| 469 // Does this primary increase by "step" from the last one? | |
| 470 if(p != nextPrimary || | |
| 471 // Do not cross into a new lead byte if either is compressible. | |
| 472 ((p & 0xff000000) != (prevPrimary & 0xff000000) && | |
| 473 (isCompressible || isCompressiblePrimary(p)))) { | |
| 474 // The range ends with the previous CE. | |
| 475 p = prevPrimary; | |
| 476 break; | |
| 477 } | |
| 478 // Extend the range to include this primary. | |
| 479 end = i++; | |
| 480 // This primary is the last in the range if it has non-common weights | |
| 481 // or if we are at the end of the list. | |
| 482 if(secTer != Collation::COMMON_SEC_AND_TER_CE || i >= rootElements.size(
)) { break; } | |
| 483 } | |
| 484 if(end != 0) { | |
| 485 table.addElement((int32_t)p | step, errorCode); | |
| 486 } | |
| 487 return end; | |
| 488 } | |
| 489 | |
| 490 U_NAMESPACE_END | |
| 491 | |
| 492 #endif // !UCONFIG_NO_COLLATION | |
| OLD | NEW |