OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * Copyright (C) 2012-2014, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ******************************************************************************* |
| 6 * collationbasedatabuilder.cpp |
| 7 * |
| 8 * created on: 2012aug11 |
| 9 * created by: Markus W. Scherer |
| 10 */ |
| 11 |
| 12 #include "unicode/utypes.h" |
| 13 |
| 14 #if !UCONFIG_NO_COLLATION |
| 15 |
| 16 #include "unicode/localpointer.h" |
| 17 #include "unicode/ucharstriebuilder.h" |
| 18 #include "unicode/uniset.h" |
| 19 #include "unicode/unistr.h" |
| 20 #include "unicode/utf16.h" |
| 21 #include "collation.h" |
| 22 #include "collationbasedatabuilder.h" |
| 23 #include "collationdata.h" |
| 24 #include "collationdatabuilder.h" |
| 25 #include "collationrootelements.h" |
| 26 #include "normalizer2impl.h" |
| 27 #include "uassert.h" |
| 28 #include "utrie2.h" |
| 29 #include "uvectr32.h" |
| 30 #include "uvectr64.h" |
| 31 #include "uvector.h" |
| 32 |
| 33 U_NAMESPACE_BEGIN |
| 34 |
| 35 namespace { |
| 36 |
| 37 /** |
| 38 * Compare two signed int64_t values as if they were unsigned. |
| 39 */ |
| 40 int32_t |
| 41 compareInt64AsUnsigned(int64_t a, int64_t b) { |
| 42 if((uint64_t)a < (uint64_t)b) { |
| 43 return -1; |
| 44 } else if((uint64_t)a > (uint64_t)b) { |
| 45 return 1; |
| 46 } else { |
| 47 return 0; |
| 48 } |
| 49 } |
| 50 |
| 51 // TODO: Try to merge this with the binarySearch in alphaindex.cpp. |
| 52 /** |
| 53 * Like Java Collections.binarySearch(List, String, Comparator). |
| 54 * |
| 55 * @return the index>=0 where the item was found, |
| 56 * or the index<0 for inserting the string at ~index in sorted order |
| 57 */ |
| 58 int32_t |
| 59 binarySearch(const UVector64 &list, int64_t ce) { |
| 60 if (list.size() == 0) { return ~0; } |
| 61 int32_t start = 0; |
| 62 int32_t limit = list.size(); |
| 63 for (;;) { |
| 64 int32_t i = (start + limit) / 2; |
| 65 int32_t cmp = compareInt64AsUnsigned(ce, list.elementAti(i)); |
| 66 if (cmp == 0) { |
| 67 return i; |
| 68 } else if (cmp < 0) { |
| 69 if (i == start) { |
| 70 return ~start; // insert ce before i |
| 71 } |
| 72 limit = i; |
| 73 } else { |
| 74 if (i == start) { |
| 75 return ~(start + 1); // insert ce after i |
| 76 } |
| 77 start = i; |
| 78 } |
| 79 } |
| 80 } |
| 81 |
| 82 } // namespace |
| 83 |
| 84 CollationBaseDataBuilder::CollationBaseDataBuilder(UErrorCode &errorCode) |
| 85 : CollationDataBuilder(errorCode), |
| 86 numericPrimary(0x12000000), |
| 87 firstHanPrimary(0), lastHanPrimary(0), hanStep(2), |
| 88 rootElements(errorCode) { |
| 89 } |
| 90 |
| 91 CollationBaseDataBuilder::~CollationBaseDataBuilder() { |
| 92 } |
| 93 |
| 94 void |
| 95 CollationBaseDataBuilder::init(UErrorCode &errorCode) { |
| 96 if(U_FAILURE(errorCode)) { return; } |
| 97 if(trie != NULL) { |
| 98 errorCode = U_INVALID_STATE_ERROR; |
| 99 return; |
| 100 } |
| 101 |
| 102 // Not compressible: |
| 103 // - digits |
| 104 // - Latin |
| 105 // - Hani |
| 106 // - trail weights |
| 107 // Some scripts are compressible, some are not. |
| 108 uprv_memset(compressibleBytes, FALSE, 256); |
| 109 compressibleBytes[Collation::UNASSIGNED_IMPLICIT_BYTE] = TRUE; |
| 110 |
| 111 // For a base, the default is to compute an unassigned-character implicit CE
. |
| 112 // This includes surrogate code points; see the last option in |
| 113 // UCA section 7.1.1 Handling Ill-Formed Code Unit Sequences. |
| 114 trie = utrie2_open(Collation::UNASSIGNED_CE32, Collation::FFFD_CE32, &errorC
ode); |
| 115 |
| 116 // Preallocate trie blocks for Latin in the hope that proximity helps with C
PU caches. |
| 117 for(UChar32 c = 0; c < 0x180; ++c) { |
| 118 utrie2_set32(trie, c, Collation::UNASSIGNED_CE32, &errorCode); |
| 119 } |
| 120 |
| 121 utrie2_set32(trie, 0xfffe, Collation::MERGE_SEPARATOR_CE32, &errorCode); |
| 122 // No root element for the merge separator which has 02 weights. |
| 123 // Some code assumes that the root first primary CE is the "space first prim
ary" |
| 124 // from FractionalUCA.txt. |
| 125 |
| 126 uint32_t hangulCE32 = Collation::makeCE32FromTagAndIndex(Collation::HANGUL_T
AG, 0); |
| 127 utrie2_setRange32(trie, Hangul::HANGUL_BASE, Hangul::HANGUL_END, hangulCE32,
TRUE, &errorCode); |
| 128 |
| 129 // Add a mapping for the first-unassigned boundary, |
| 130 // which is the AlphabeticIndex overflow boundary. |
| 131 UnicodeString s((UChar)0xfdd1); // Script boundary contractions start with
U+FDD1. |
| 132 s.append((UChar)0xfdd0); // Zzzz script sample character U+FDD0. |
| 133 int64_t ce = Collation::makeCE(Collation::FIRST_UNASSIGNED_PRIMARY); |
| 134 add(UnicodeString(), s, &ce, 1, errorCode); |
| 135 |
| 136 // Add a tailoring boundary, but not a mapping, for [first trailing]. |
| 137 ce = Collation::makeCE(Collation::FIRST_TRAILING_PRIMARY); |
| 138 rootElements.addElement(ce, errorCode); |
| 139 |
| 140 // U+FFFD maps to a CE with the third-highest primary weight, |
| 141 // for predictable handling of ill-formed UTF-8. |
| 142 uint32_t ce32 = Collation::FFFD_CE32; |
| 143 utrie2_set32(trie, 0xfffd, ce32, &errorCode); |
| 144 addRootElement(Collation::ceFromSimpleCE32(ce32), errorCode); |
| 145 |
| 146 // U+FFFF maps to a CE with the highest primary weight. |
| 147 ce32 = Collation::MAX_REGULAR_CE32; |
| 148 utrie2_set32(trie, 0xffff, ce32, &errorCode); |
| 149 addRootElement(Collation::ceFromSimpleCE32(ce32), errorCode); |
| 150 } |
| 151 |
| 152 void |
| 153 CollationBaseDataBuilder::initHanRanges(const UChar32 ranges[], int32_t length, |
| 154 UErrorCode &errorCode) { |
| 155 if(U_FAILURE(errorCode) || length == 0) { return; } |
| 156 if((length & 1) != 0) { // incomplete start/end pairs |
| 157 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 158 return; |
| 159 } |
| 160 if(isAssigned(0x4e00)) { // already set |
| 161 errorCode = U_INVALID_STATE_ERROR; |
| 162 return; |
| 163 } |
| 164 int32_t numHanCodePoints = 0; |
| 165 for(int32_t i = 0; i < length; i += 2) { |
| 166 UChar32 start = ranges[i]; |
| 167 UChar32 end = ranges[i + 1]; |
| 168 numHanCodePoints += end - start + 1; |
| 169 } |
| 170 // Multiply the number of code points by (gap+1). |
| 171 // Add hanStep+2 for tailoring after the last Han character. |
| 172 int32_t gap = 1; |
| 173 hanStep = gap + 1; |
| 174 int32_t numHan = numHanCodePoints * hanStep + hanStep + 2; |
| 175 // Numbers of Han primaries per lead byte determined by |
| 176 // numbers of 2nd (not compressible) times 3rd primary byte values. |
| 177 int32_t numHanPerLeadByte = 254 * 254; |
| 178 int32_t numHanLeadBytes = (numHan + numHanPerLeadByte - 1) / numHanPerLeadBy
te; |
| 179 uint32_t hanPrimary = (uint32_t)(Collation::UNASSIGNED_IMPLICIT_BYTE - numHa
nLeadBytes) << 24; |
| 180 hanPrimary |= 0x20200; |
| 181 firstHanPrimary = hanPrimary; |
| 182 for(int32_t i = 0; i < length; i += 2) { |
| 183 UChar32 start = ranges[i]; |
| 184 UChar32 end = ranges[i + 1]; |
| 185 hanPrimary = setPrimaryRangeAndReturnNext(start, end, hanPrimary, hanSte
p, errorCode); |
| 186 } |
| 187 // One past the actual last one, but that is harmless for tailoring. |
| 188 // It saves us from subtracting "hanStep" and handling underflows. |
| 189 lastHanPrimary = hanPrimary; |
| 190 } |
| 191 |
| 192 UBool |
| 193 CollationBaseDataBuilder::isCompressibleLeadByte(uint32_t b) const { |
| 194 return compressibleBytes[b]; |
| 195 } |
| 196 |
| 197 void |
| 198 CollationBaseDataBuilder::setCompressibleLeadByte(uint32_t b) { |
| 199 compressibleBytes[b] = TRUE; |
| 200 } |
| 201 |
| 202 int32_t |
| 203 CollationBaseDataBuilder::diffTwoBytePrimaries(uint32_t p1, uint32_t p2, UBool i
sCompressible) { |
| 204 if((p1 & 0xff000000) == (p2 & 0xff000000)) { |
| 205 // Same lead bytes. |
| 206 return (int32_t)(p2 - p1) >> 16; |
| 207 } else { |
| 208 int32_t linear1; |
| 209 int32_t linear2; |
| 210 int32_t factor; |
| 211 if(isCompressible) { |
| 212 // Second byte for compressible lead byte: 251 bytes 04..FE |
| 213 linear1 = (int32_t)((p1 >> 16) & 0xff) - 4; |
| 214 linear2 = (int32_t)((p2 >> 16) & 0xff) - 4; |
| 215 factor = 251; |
| 216 } else { |
| 217 // Second byte for incompressible lead byte: 254 bytes 02..FF |
| 218 linear1 = (int32_t)((p1 >> 16) & 0xff) - 2; |
| 219 linear2 = (int32_t)((p2 >> 16) & 0xff) - 2; |
| 220 factor = 254; |
| 221 } |
| 222 linear1 += factor * (int32_t)((p1 >> 24) & 0xff); |
| 223 linear2 += factor * (int32_t)((p2 >> 24) & 0xff); |
| 224 return linear2 - linear1; |
| 225 } |
| 226 } |
| 227 |
| 228 int32_t |
| 229 CollationBaseDataBuilder::diffThreeBytePrimaries(uint32_t p1, uint32_t p2, UBool
isCompressible) { |
| 230 if((p1 & 0xffff0000) == (p2 & 0xffff0000)) { |
| 231 // Same first two bytes. |
| 232 return (int32_t)(p2 - p1) >> 8; |
| 233 } else { |
| 234 // Third byte: 254 bytes 02..FF |
| 235 int32_t linear1 = (int32_t)((p1 >> 8) & 0xff) - 2; |
| 236 int32_t linear2 = (int32_t)((p2 >> 8) & 0xff) - 2; |
| 237 int32_t factor; |
| 238 if(isCompressible) { |
| 239 // Second byte for compressible lead byte: 251 bytes 04..FE |
| 240 linear1 += 254 * ((int32_t)((p1 >> 16) & 0xff) - 4); |
| 241 linear2 += 254 * ((int32_t)((p2 >> 16) & 0xff) - 4); |
| 242 factor = 251 * 254; |
| 243 } else { |
| 244 // Second byte for incompressible lead byte: 254 bytes 02..FF |
| 245 linear1 += 254 * ((int32_t)((p1 >> 16) & 0xff) - 2); |
| 246 linear2 += 254 * ((int32_t)((p2 >> 16) & 0xff) - 2); |
| 247 factor = 254 * 254; |
| 248 } |
| 249 linear1 += factor * (int32_t)((p1 >> 24) & 0xff); |
| 250 linear2 += factor * (int32_t)((p2 >> 24) & 0xff); |
| 251 return linear2 - linear1; |
| 252 } |
| 253 } |
| 254 |
| 255 uint32_t |
| 256 CollationBaseDataBuilder::encodeCEs(const int64_t ces[], int32_t cesLength, UErr
orCode &errorCode) { |
| 257 addRootElements(ces, cesLength, errorCode); |
| 258 return CollationDataBuilder::encodeCEs(ces, cesLength, errorCode); |
| 259 } |
| 260 |
| 261 void |
| 262 CollationBaseDataBuilder::addRootElements(const int64_t ces[], int32_t cesLength
, |
| 263 UErrorCode &errorCode) { |
| 264 if(U_FAILURE(errorCode)) { return; } |
| 265 for(int32_t i = 0; i < cesLength; ++i) { |
| 266 addRootElement(ces[i], errorCode); |
| 267 } |
| 268 } |
| 269 |
| 270 void |
| 271 CollationBaseDataBuilder::addRootElement(int64_t ce, UErrorCode &errorCode) { |
| 272 if(U_FAILURE(errorCode) || ce == 0) { return; } |
| 273 // Remove case bits. |
| 274 ce &= INT64_C(0xffffffffffff3fff); |
| 275 U_ASSERT((ce & 0xc0) == 0); // quaternary==0 |
| 276 // Ignore the CE if it has a Han primary weight and common secondary/tertiar
y weights. |
| 277 // We will add it later, as part of the Han ranges. |
| 278 uint32_t p = (uint32_t)(ce >> 32); |
| 279 uint32_t secTer = (uint32_t)ce; |
| 280 if(secTer == Collation::COMMON_SEC_AND_TER_CE) { |
| 281 if(firstHanPrimary <= p && p <= lastHanPrimary) { |
| 282 return; |
| 283 } |
| 284 } else { |
| 285 // Check that secondary and tertiary weights are >= "common". |
| 286 uint32_t s = secTer >> 16; |
| 287 uint32_t t = secTer & Collation::ONLY_TERTIARY_MASK; |
| 288 if((s != 0 && s < Collation::COMMON_WEIGHT16) || (t != 0 && t < Collatio
n::COMMON_WEIGHT16)) { |
| 289 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 290 return; |
| 291 } |
| 292 } |
| 293 // Check that primaries have at most 3 bytes. |
| 294 if((p & 0xff) != 0) { |
| 295 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 296 return; |
| 297 } |
| 298 int32_t i = binarySearch(rootElements, ce); |
| 299 if(i < 0) { |
| 300 rootElements.insertElementAt(ce, ~i, errorCode); |
| 301 } |
| 302 } |
| 303 |
| 304 void |
| 305 CollationBaseDataBuilder::addReorderingGroup(uint32_t firstByte, uint32_t lastBy
te, |
| 306 const UnicodeString &groupScripts, |
| 307 UErrorCode &errorCode) { |
| 308 if(U_FAILURE(errorCode)) { return; } |
| 309 if(groupScripts.isEmpty()) { |
| 310 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 311 return; |
| 312 } |
| 313 if(groupScripts.indexOf((UChar)USCRIPT_UNKNOWN) >= 0) { |
| 314 // Zzzz must not occur. |
| 315 // It is the code used in the API to separate low and high scripts. |
| 316 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 317 return; |
| 318 } |
| 319 // Note: We are mostly trusting the input data, |
| 320 // rather than verifying that reordering groups do not intersect |
| 321 // with their lead byte ranges nor their sets of scripts, |
| 322 // and that all script codes are valid. |
| 323 scripts.append((UChar)((firstByte << 8) | lastByte)); |
| 324 scripts.append((UChar)groupScripts.length()); |
| 325 scripts.append(groupScripts); |
| 326 } |
| 327 |
| 328 void |
| 329 CollationBaseDataBuilder::build(CollationData &data, UErrorCode &errorCode) { |
| 330 buildMappings(data, errorCode); |
| 331 data.numericPrimary = numericPrimary; |
| 332 data.compressibleBytes = compressibleBytes; |
| 333 data.scripts = reinterpret_cast<const uint16_t *>(scripts.getBuffer()); |
| 334 data.scriptsLength = scripts.length(); |
| 335 buildFastLatinTable(data, errorCode); |
| 336 } |
| 337 |
| 338 void |
| 339 CollationBaseDataBuilder::buildRootElementsTable(UVector32 &table, UErrorCode &e
rrorCode) { |
| 340 if(U_FAILURE(errorCode)) { return; } |
| 341 uint32_t nextHanPrimary = firstHanPrimary; // Set to 0xffffffff after the l
ast Han range. |
| 342 uint32_t prevPrimary = 0; // Start with primary ignorable CEs. |
| 343 UBool tryRange = FALSE; |
| 344 for(int32_t i = 0; i < rootElements.size(); ++i) { |
| 345 int64_t ce = rootElements.elementAti(i); |
| 346 uint32_t p = (uint32_t)(ce >> 32); |
| 347 uint32_t secTer = (uint32_t)ce & Collation::ONLY_SEC_TER_MASK; |
| 348 if(p != prevPrimary) { |
| 349 U_ASSERT((p & 0xff) == 0); |
| 350 int32_t end; |
| 351 if(p >= nextHanPrimary) { |
| 352 // Add a Han primary weight or range. |
| 353 // We omitted them initially, and omitted all CEs with Han prima
ries |
| 354 // and common secondary/tertiary weights. |
| 355 U_ASSERT(p > lastHanPrimary || secTer != Collation::COMMON_SEC_A
ND_TER_CE); |
| 356 if(p == nextHanPrimary) { |
| 357 // One single Han primary with non-common secondary/tertiary
weights. |
| 358 table.addElement((int32_t)p, errorCode); |
| 359 if(p < lastHanPrimary) { |
| 360 // Prepare for the next Han range. |
| 361 nextHanPrimary = Collation::incThreeBytePrimaryByOffset(
p, FALSE, hanStep); |
| 362 } else { |
| 363 // p is the last Han primary. |
| 364 nextHanPrimary = 0xffffffff; |
| 365 } |
| 366 } else { |
| 367 // p > nextHanPrimary: Add a Han primary range, starting wit
h nextHanPrimary. |
| 368 table.addElement((int32_t)nextHanPrimary, errorCode); |
| 369 if(nextHanPrimary == lastHanPrimary) { |
| 370 // nextHanPrimary == lastHanPrimary < p |
| 371 // We just wrote the single last Han primary. |
| 372 nextHanPrimary = 0xffffffff; |
| 373 } else if(p < lastHanPrimary) { |
| 374 // nextHanPrimary < p < lastHanPrimary |
| 375 // End the Han range on p, prepare for the next range. |
| 376 table.addElement((int32_t)p | hanStep, errorCode); |
| 377 nextHanPrimary = Collation::incThreeBytePrimaryByOffset(
p, FALSE, hanStep); |
| 378 } else if(p == lastHanPrimary) { |
| 379 // nextHanPrimary < p == lastHanPrimary |
| 380 // End the last Han range on p. |
| 381 table.addElement((int32_t)p | hanStep, errorCode); |
| 382 nextHanPrimary = 0xffffffff; |
| 383 } else { |
| 384 // nextHanPrimary < lastHanPrimary < p |
| 385 // End the last Han range, then write p. |
| 386 table.addElement((int32_t)lastHanPrimary | hanStep, erro
rCode); |
| 387 nextHanPrimary = 0xffffffff; |
| 388 table.addElement((int32_t)p, errorCode); |
| 389 } |
| 390 } |
| 391 } else if(tryRange && secTer == Collation::COMMON_SEC_AND_TER_CE && |
| 392 (end = writeRootElementsRange(prevPrimary, p, i + 1, table,
errorCode)) != 0) { |
| 393 // Multiple CEs with only common secondary/tertiary weights were |
| 394 // combined into a primary range. |
| 395 // The range end was written, ending with the primary of rootEle
ments[end]. |
| 396 ce = rootElements.elementAti(end); |
| 397 p = (uint32_t)(ce >> 32); |
| 398 secTer = (uint32_t)ce & Collation::ONLY_SEC_TER_MASK; |
| 399 i = end; |
| 400 } else { |
| 401 // Write the primary weight of a normal CE. |
| 402 table.addElement((int32_t)p, errorCode); |
| 403 } |
| 404 prevPrimary = p; |
| 405 } |
| 406 if(secTer == Collation::COMMON_SEC_AND_TER_CE) { |
| 407 // The common secondar/tertiary weights are implied in the primary u
nit. |
| 408 // If there is no intervening delta unit, then we will try to combin
e |
| 409 // the next several primaries into a range. |
| 410 tryRange = TRUE; |
| 411 } else { |
| 412 // For each new set of secondary/tertiary weights we write a delta u
nit. |
| 413 table.addElement((int32_t)secTer | CollationRootElements::SEC_TER_DE
LTA_FLAG, errorCode); |
| 414 tryRange = FALSE; |
| 415 } |
| 416 } |
| 417 |
| 418 // Limit sentinel for root elements. |
| 419 // This allows us to reduce range checks at runtime. |
| 420 table.addElement(CollationRootElements::PRIMARY_SENTINEL, errorCode); |
| 421 } |
| 422 |
| 423 int32_t |
| 424 CollationBaseDataBuilder::writeRootElementsRange( |
| 425 uint32_t prevPrimary, uint32_t p, int32_t i, |
| 426 UVector32 &table, UErrorCode &errorCode) { |
| 427 if(U_FAILURE(errorCode) || i >= rootElements.size()) { return 0; } |
| 428 U_ASSERT(prevPrimary < p); |
| 429 // No ranges of single-byte primaries. |
| 430 if((p & prevPrimary & 0xff0000) == 0) { return 0; } |
| 431 // Lead bytes of compressible primaries must match. |
| 432 UBool isCompressible = isCompressiblePrimary(p); |
| 433 if((isCompressible || isCompressiblePrimary(prevPrimary)) && |
| 434 (p & 0xff000000) != (prevPrimary & 0xff000000)) { |
| 435 return 0; |
| 436 } |
| 437 // Number of bytes in the primaries. |
| 438 UBool twoBytes; |
| 439 // Number of primaries from prevPrimary to p. |
| 440 int32_t step; |
| 441 if((p & 0xff00) == 0) { |
| 442 // 2-byte primary |
| 443 if((prevPrimary & 0xff00) != 0) { return 0; } // length mismatch |
| 444 twoBytes = TRUE; |
| 445 step = diffTwoBytePrimaries(prevPrimary, p, isCompressible); |
| 446 } else { |
| 447 // 3-byte primary |
| 448 if((prevPrimary & 0xff00) == 0) { return 0; } // length mismatch |
| 449 twoBytes = FALSE; |
| 450 step = diffThreeBytePrimaries(prevPrimary, p, isCompressible); |
| 451 } |
| 452 if(step > (int32_t)CollationRootElements::PRIMARY_STEP_MASK) { return 0; } |
| 453 // See if there are more than two CEs with primaries increasing by "step" |
| 454 // and with only common secondary/tertiary weights on all but the last one. |
| 455 int32_t end = 0; // Initially 0: No range for just two primaries. |
| 456 for(;;) { |
| 457 prevPrimary = p; |
| 458 // Calculate which primary we expect next. |
| 459 uint32_t nextPrimary; // = p + step |
| 460 if(twoBytes) { |
| 461 nextPrimary = Collation::incTwoBytePrimaryByOffset(p, isCompressible
, step); |
| 462 } else { |
| 463 nextPrimary = Collation::incThreeBytePrimaryByOffset(p, isCompressib
le, step); |
| 464 } |
| 465 // Fetch the actual next CE. |
| 466 int64_t ce = rootElements.elementAti(i); |
| 467 p = (uint32_t)(ce >> 32); |
| 468 uint32_t secTer = (uint32_t)ce & Collation::ONLY_SEC_TER_MASK; |
| 469 // Does this primary increase by "step" from the last one? |
| 470 if(p != nextPrimary || |
| 471 // Do not cross into a new lead byte if either is compressible. |
| 472 ((p & 0xff000000) != (prevPrimary & 0xff000000) && |
| 473 (isCompressible || isCompressiblePrimary(p)))) { |
| 474 // The range ends with the previous CE. |
| 475 p = prevPrimary; |
| 476 break; |
| 477 } |
| 478 // Extend the range to include this primary. |
| 479 end = i++; |
| 480 // This primary is the last in the range if it has non-common weights |
| 481 // or if we are at the end of the list. |
| 482 if(secTer != Collation::COMMON_SEC_AND_TER_CE || i >= rootElements.size(
)) { break; } |
| 483 } |
| 484 if(end != 0) { |
| 485 table.addElement((int32_t)p | step, errorCode); |
| 486 } |
| 487 return end; |
| 488 } |
| 489 |
| 490 U_NAMESPACE_END |
| 491 |
| 492 #endif // !UCONFIG_NO_COLLATION |
OLD | NEW |