OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 2013-2014, International Business Machines | 3 * Copyright (C) 2013-2014, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 * collationbuilder.cpp | 6 * collationbuilder.cpp |
7 * | 7 * |
8 * (replaced the former ucol_bld.cpp) | 8 * (replaced the former ucol_bld.cpp) |
9 * | 9 * |
10 * created on: 2013may06 | 10 * created on: 2013may06 |
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
182 if(strength != UCOL_DEFAULT) { | 182 if(strength != UCOL_DEFAULT) { |
183 setAttribute(UCOL_STRENGTH, (UColAttributeValue)strength, errorCode); | 183 setAttribute(UCOL_STRENGTH, (UColAttributeValue)strength, errorCode); |
184 } | 184 } |
185 if(decompositionMode != UCOL_DEFAULT) { | 185 if(decompositionMode != UCOL_DEFAULT) { |
186 setAttribute(UCOL_NORMALIZATION_MODE, decompositionMode, errorCode); | 186 setAttribute(UCOL_NORMALIZATION_MODE, decompositionMode, errorCode); |
187 } | 187 } |
188 } | 188 } |
189 | 189 |
190 // CollationBuilder implementation ----------------------------------------- *** | 190 // CollationBuilder implementation ----------------------------------------- *** |
191 | 191 |
| 192 // Some compilers don't care if constants are defined in the .cpp file. |
| 193 // MS Visual C++ does not like it, but gcc requires it. clang does not care. |
| 194 #ifndef _MSC_VER |
| 195 const int32_t CollationBuilder::HAS_BEFORE2; |
| 196 const int32_t CollationBuilder::HAS_BEFORE3; |
| 197 #endif |
| 198 |
192 CollationBuilder::CollationBuilder(const CollationTailoring *b, UErrorCode &erro
rCode) | 199 CollationBuilder::CollationBuilder(const CollationTailoring *b, UErrorCode &erro
rCode) |
193 : nfd(*Normalizer2::getNFDInstance(errorCode)), | 200 : nfd(*Normalizer2::getNFDInstance(errorCode)), |
194 fcd(*Normalizer2Factory::getFCDInstance(errorCode)), | 201 fcd(*Normalizer2Factory::getFCDInstance(errorCode)), |
195 nfcImpl(*Normalizer2Factory::getNFCImpl(errorCode)), | 202 nfcImpl(*Normalizer2Factory::getNFCImpl(errorCode)), |
196 base(b), | 203 base(b), |
197 baseData(b->data), | 204 baseData(b->data), |
198 rootElements(b->data->rootElements, b->data->rootElementsLength), | 205 rootElements(b->data->rootElements, b->data->rootElementsLength), |
199 variableTop(0), | 206 variableTop(0), |
200 dataBuilder(new CollationDataBuilder(errorCode)), fastLatinEnabled(TRU
E), | 207 dataBuilder(new CollationDataBuilder(errorCode)), fastLatinEnabled(TRU
E), |
201 errorReason(NULL), | 208 errorReason(NULL), |
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
306 } | 313 } |
307 } | 314 } |
308 if(strength == UCOL_IDENTICAL) { return; } // simple reset-at-position | 315 if(strength == UCOL_IDENTICAL) { return; } // simple reset-at-position |
309 | 316 |
310 // &[before strength]position | 317 // &[before strength]position |
311 U_ASSERT(UCOL_PRIMARY <= strength && strength <= UCOL_TERTIARY); | 318 U_ASSERT(UCOL_PRIMARY <= strength && strength <= UCOL_TERTIARY); |
312 int32_t index = findOrInsertNodeForCEs(strength, parserErrorReason, errorCod
e); | 319 int32_t index = findOrInsertNodeForCEs(strength, parserErrorReason, errorCod
e); |
313 if(U_FAILURE(errorCode)) { return; } | 320 if(U_FAILURE(errorCode)) { return; } |
314 | 321 |
315 int64_t node = nodes.elementAti(index); | 322 int64_t node = nodes.elementAti(index); |
316 // If the index is for a "weaker" tailored node, | 323 // If the index is for a "weaker" node, |
317 // then skip backwards over this and further "weaker" nodes. | 324 // then skip backwards over this and further "weaker" nodes. |
318 while(strengthFromNode(node) > strength) { | 325 while(strengthFromNode(node) > strength) { |
319 index = previousIndexFromNode(node); | 326 index = previousIndexFromNode(node); |
320 node = nodes.elementAti(index); | 327 node = nodes.elementAti(index); |
321 } | 328 } |
322 | 329 |
323 // Find or insert a node whose index we will put into a temporary CE. | 330 // Find or insert a node whose index we will put into a temporary CE. |
324 if(strengthFromNode(node) == strength && isTailoredNode(node)) { | 331 if(strengthFromNode(node) == strength && isTailoredNode(node)) { |
325 // Reset to just before this same-strength tailored node. | 332 // Reset to just before this same-strength tailored node. |
326 index = previousIndexFromNode(node); | 333 index = previousIndexFromNode(node); |
(...skipping 26 matching lines...) Expand all Loading... |
353 int32_t nextIndex = nextIndexFromNode(node); | 360 int32_t nextIndex = nextIndexFromNode(node); |
354 if(nextIndex == 0) { break; } | 361 if(nextIndex == 0) { break; } |
355 index = nextIndex; | 362 index = nextIndex; |
356 } | 363 } |
357 } else { | 364 } else { |
358 // &[before 2] or &[before 3] | 365 // &[before 2] or &[before 3] |
359 index = findCommonNode(index, UCOL_SECONDARY); | 366 index = findCommonNode(index, UCOL_SECONDARY); |
360 if(strength >= UCOL_TERTIARY) { | 367 if(strength >= UCOL_TERTIARY) { |
361 index = findCommonNode(index, UCOL_TERTIARY); | 368 index = findCommonNode(index, UCOL_TERTIARY); |
362 } | 369 } |
| 370 // findCommonNode() stayed on the stronger node or moved to |
| 371 // an explicit common-weight node of the reset-before strength. |
363 node = nodes.elementAti(index); | 372 node = nodes.elementAti(index); |
364 if(strengthFromNode(node) == strength) { | 373 if(strengthFromNode(node) == strength) { |
365 // Found a same-strength node with an explicit weight. | 374 // Found a same-strength node with an explicit weight. |
366 uint32_t weight16 = weight16FromNode(node); | 375 uint32_t weight16 = weight16FromNode(node); |
367 if(weight16 == 0) { | 376 if(weight16 == 0) { |
368 errorCode = U_UNSUPPORTED_ERROR; | 377 errorCode = U_UNSUPPORTED_ERROR; |
369 if(strength == UCOL_SECONDARY) { | 378 if(strength == UCOL_SECONDARY) { |
370 parserErrorReason = "reset secondary-before secondary ignora
ble not possible"; | 379 parserErrorReason = "reset secondary-before secondary ignora
ble not possible"; |
371 } else { | 380 } else { |
372 parserErrorReason = "reset tertiary-before completely ignora
ble not possible"; | 381 parserErrorReason = "reset tertiary-before completely ignora
ble not possible"; |
373 } | 382 } |
374 return; | 383 return; |
375 } | 384 } |
376 U_ASSERT(weight16 >= Collation::COMMON_WEIGHT16); | 385 U_ASSERT(weight16 > Collation::BEFORE_WEIGHT16); |
| 386 // Reset to just before this node. |
| 387 // Insert the preceding same-level explicit weight if it is not ther
e already. |
| 388 // Which explicit weight immediately precedes this one? |
| 389 weight16 = getWeight16Before(index, node, strength); |
| 390 // Does this preceding weight have a node? |
| 391 uint32_t previousWeight16; |
377 int32_t previousIndex = previousIndexFromNode(node); | 392 int32_t previousIndex = previousIndexFromNode(node); |
378 if(weight16 == Collation::COMMON_WEIGHT16) { | 393 for(int32_t i = previousIndex;; i = previousIndexFromNode(node)) { |
379 // Reset to just before this same-strength common-weight node. | 394 node = nodes.elementAti(i); |
| 395 int32_t previousStrength = strengthFromNode(node); |
| 396 if(previousStrength < strength) { |
| 397 U_ASSERT(weight16 >= Collation::COMMON_WEIGHT16 || i == prev
iousIndex); |
| 398 // Either the reset element has an above-common weight and |
| 399 // the parent node provides the implied common weight, |
| 400 // or the reset element has a weight<=common in the node |
| 401 // right after the parent, and we need to insert the precedi
ng weight. |
| 402 previousWeight16 = Collation::COMMON_WEIGHT16; |
| 403 break; |
| 404 } else if(previousStrength == strength && !isTailoredNode(node))
{ |
| 405 previousWeight16 = weight16FromNode(node); |
| 406 break; |
| 407 } |
| 408 // Skip weaker nodes and same-level tailored nodes. |
| 409 } |
| 410 if(previousWeight16 == weight16) { |
| 411 // The preceding weight has a node, |
| 412 // maybe with following weaker or tailored nodes. |
| 413 // Reset to the last of them. |
380 index = previousIndex; | 414 index = previousIndex; |
381 } else { | 415 } else { |
382 // A non-common weight is only possible from a root CE. | 416 // Insert a node with the preceding weight, reset to that. |
383 // Find the higher-level weights, which must all be explicit, | 417 node = nodeFromWeight16(weight16) | nodeFromStrength(strength); |
384 // and then find the preceding weight for this level. | 418 index = insertNodeBetween(previousIndex, index, node, errorCode)
; |
385 uint32_t previousWeight16 = 0; | |
386 int32_t previousWeightIndex = -1; | |
387 int32_t i = index; | |
388 if(strength == UCOL_SECONDARY) { | |
389 uint32_t p; | |
390 do { | |
391 i = previousIndexFromNode(node); | |
392 node = nodes.elementAti(i); | |
393 if(strengthFromNode(node) == UCOL_SECONDARY && !isTailor
edNode(node) && | |
394 previousWeightIndex < 0) { | |
395 previousWeightIndex = i; | |
396 previousWeight16 = weight16FromNode(node); | |
397 } | |
398 } while(strengthFromNode(node) > UCOL_PRIMARY); | |
399 U_ASSERT(!isTailoredNode(node)); | |
400 p = weight32FromNode(node); | |
401 weight16 = rootElements.getSecondaryBefore(p, weight16); | |
402 } else { | |
403 uint32_t p, s; | |
404 do { | |
405 i = previousIndexFromNode(node); | |
406 node = nodes.elementAti(i); | |
407 if(strengthFromNode(node) == UCOL_TERTIARY && !isTailore
dNode(node) && | |
408 previousWeightIndex < 0) { | |
409 previousWeightIndex = i; | |
410 previousWeight16 = weight16FromNode(node); | |
411 } | |
412 } while(strengthFromNode(node) > UCOL_SECONDARY); | |
413 U_ASSERT(!isTailoredNode(node)); | |
414 if(strengthFromNode(node) == UCOL_SECONDARY) { | |
415 s = weight16FromNode(node); | |
416 do { | |
417 i = previousIndexFromNode(node); | |
418 node = nodes.elementAti(i); | |
419 } while(strengthFromNode(node) > UCOL_PRIMARY); | |
420 U_ASSERT(!isTailoredNode(node)); | |
421 } else { | |
422 U_ASSERT(!nodeHasBefore2(node)); | |
423 s = Collation::COMMON_WEIGHT16; | |
424 } | |
425 p = weight32FromNode(node); | |
426 weight16 = rootElements.getTertiaryBefore(p, s, weight16); | |
427 U_ASSERT((weight16 & ~Collation::ONLY_TERTIARY_MASK) == 0); | |
428 } | |
429 // Find or insert the new explicit weight before the current one
. | |
430 if(previousWeightIndex >= 0 && weight16 == previousWeight16) { | |
431 // Tailor after the last node between adjacent root nodes. | |
432 index = previousIndex; | |
433 } else { | |
434 node = nodeFromWeight16(weight16) | nodeFromStrength(strengt
h); | |
435 index = insertNodeBetween(previousIndex, index, node, errorC
ode); | |
436 } | |
437 } | 419 } |
438 } else { | 420 } else { |
439 // Found a stronger node with implied strength-common weight. | 421 // Found a stronger node with implied strength-common weight. |
440 int64_t hasBefore3 = 0; | 422 uint32_t weight16 = getWeight16Before(index, node, strength); |
441 if(strength == UCOL_SECONDARY) { | 423 index = findOrInsertWeakNode(index, weight16, strength, errorCode); |
442 U_ASSERT(!nodeHasBefore2(node)); | |
443 // Move the HAS_BEFORE3 flag from the parent node | |
444 // to the new secondary common node. | |
445 hasBefore3 = node & HAS_BEFORE3; | |
446 node = (node & ~(int64_t)HAS_BEFORE3) | HAS_BEFORE2; | |
447 } else { | |
448 U_ASSERT(!nodeHasBefore3(node)); | |
449 node |= HAS_BEFORE3; | |
450 } | |
451 nodes.setElementAt(node, index); | |
452 int32_t nextIndex = nextIndexFromNode(node); | |
453 // Insert default nodes with weights 02 and 05, reset to the 02 node
. | |
454 node = nodeFromWeight16(BEFORE_WEIGHT16) | nodeFromStrength(strength
); | |
455 index = insertNodeBetween(index, nextIndex, node, errorCode); | |
456 node = nodeFromWeight16(Collation::COMMON_WEIGHT16) | hasBefore3 | | |
457 nodeFromStrength(strength); | |
458 insertNodeBetween(index, nextIndex, node, errorCode); | |
459 } | 424 } |
460 // Strength of the temporary CE = strength of its reset position. | 425 // Strength of the temporary CE = strength of its reset position. |
461 // Code above raises an error if the before-strength is stronger. | 426 // Code above raises an error if the before-strength is stronger. |
462 strength = ceStrength(ces[cesLength - 1]); | 427 strength = ceStrength(ces[cesLength - 1]); |
463 } | 428 } |
464 if(U_FAILURE(errorCode)) { | 429 if(U_FAILURE(errorCode)) { |
465 parserErrorReason = "inserting reset position for &[before n]"; | 430 parserErrorReason = "inserting reset position for &[before n]"; |
466 return; | 431 return; |
467 } | 432 } |
468 ces[cesLength - 1] = tempCEFromIndexAndStrength(index, strength); | 433 ces[cesLength - 1] = tempCEFromIndexAndStrength(index, strength); |
469 } | 434 } |
470 | 435 |
| 436 uint32_t |
| 437 CollationBuilder::getWeight16Before(int32_t index, int64_t node, int32_t level)
{ |
| 438 U_ASSERT(strengthFromNode(node) < level || !isTailoredNode(node)); |
| 439 // Collect the root CE weights if this node is for a root CE. |
| 440 // If it is not, then return the low non-primary boundary for a tailored CE. |
| 441 uint32_t t; |
| 442 if(strengthFromNode(node) == UCOL_TERTIARY) { |
| 443 t = weight16FromNode(node); |
| 444 } else { |
| 445 t = Collation::COMMON_WEIGHT16; // Stronger node with implied common we
ight. |
| 446 } |
| 447 while(strengthFromNode(node) > UCOL_SECONDARY) { |
| 448 index = previousIndexFromNode(node); |
| 449 node = nodes.elementAti(index); |
| 450 } |
| 451 if(isTailoredNode(node)) { |
| 452 return Collation::BEFORE_WEIGHT16; |
| 453 } |
| 454 uint32_t s; |
| 455 if(strengthFromNode(node) == UCOL_SECONDARY) { |
| 456 s = weight16FromNode(node); |
| 457 } else { |
| 458 s = Collation::COMMON_WEIGHT16; // Stronger node with implied common we
ight. |
| 459 } |
| 460 while(strengthFromNode(node) > UCOL_PRIMARY) { |
| 461 index = previousIndexFromNode(node); |
| 462 node = nodes.elementAti(index); |
| 463 } |
| 464 if(isTailoredNode(node)) { |
| 465 return Collation::BEFORE_WEIGHT16; |
| 466 } |
| 467 // [p, s, t] is a root CE. Return the preceding weight for the requested lev
el. |
| 468 uint32_t p = weight32FromNode(node); |
| 469 uint32_t weight16; |
| 470 if(level == UCOL_SECONDARY) { |
| 471 weight16 = rootElements.getSecondaryBefore(p, s); |
| 472 } else { |
| 473 weight16 = rootElements.getTertiaryBefore(p, s, t); |
| 474 U_ASSERT((weight16 & ~Collation::ONLY_TERTIARY_MASK) == 0); |
| 475 } |
| 476 return weight16; |
| 477 } |
| 478 |
471 int64_t | 479 int64_t |
472 CollationBuilder::getSpecialResetPosition(const UnicodeString &str, | 480 CollationBuilder::getSpecialResetPosition(const UnicodeString &str, |
473 const char *&parserErrorReason, UError
Code &errorCode) { | 481 const char *&parserErrorReason, UError
Code &errorCode) { |
474 U_ASSERT(str.length() == 2); | 482 U_ASSERT(str.length() == 2); |
475 int64_t ce; | 483 int64_t ce; |
476 int32_t strength = UCOL_PRIMARY; | 484 int32_t strength = UCOL_PRIMARY; |
477 UBool isBoundary = FALSE; | 485 UBool isBoundary = FALSE; |
478 UChar32 pos = str.charAt(1) - CollationRuleParser::POS_BASE; | 486 UChar32 pos = str.charAt(1) - CollationRuleParser::POS_BASE; |
479 U_ASSERT(0 <= pos && pos <= CollationRuleParser::LAST_TRAILING); | 487 U_ASSERT(0 <= pos && pos <= CollationRuleParser::LAST_TRAILING); |
480 switch(pos) { | 488 switch(pos) { |
(...skipping 305 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
786 | 794 |
787 int32_t | 795 int32_t |
788 CollationBuilder::findOrInsertNodeForRootCE(int64_t ce, int32_t strength, UError
Code &errorCode) { | 796 CollationBuilder::findOrInsertNodeForRootCE(int64_t ce, int32_t strength, UError
Code &errorCode) { |
789 if(U_FAILURE(errorCode)) { return 0; } | 797 if(U_FAILURE(errorCode)) { return 0; } |
790 U_ASSERT((uint8_t)(ce >> 56) != Collation::UNASSIGNED_IMPLICIT_BYTE); | 798 U_ASSERT((uint8_t)(ce >> 56) != Collation::UNASSIGNED_IMPLICIT_BYTE); |
791 | 799 |
792 // Find or insert the node for each of the root CE's weights, | 800 // Find or insert the node for each of the root CE's weights, |
793 // down to the requested level/strength. | 801 // down to the requested level/strength. |
794 // Root CEs must have common=zero quaternary weights (for which we never ins
ert any nodes). | 802 // Root CEs must have common=zero quaternary weights (for which we never ins
ert any nodes). |
795 U_ASSERT((ce & 0xc0) == 0); | 803 U_ASSERT((ce & 0xc0) == 0); |
796 int32_t index = findOrInsertNodeForPrimary((uint32_t)(ce >> 32) , errorCode)
; | 804 int32_t index = findOrInsertNodeForPrimary((uint32_t)(ce >> 32), errorCode); |
797 if(strength >= UCOL_SECONDARY) { | 805 if(strength >= UCOL_SECONDARY) { |
798 uint32_t lower32 = (uint32_t)ce; | 806 uint32_t lower32 = (uint32_t)ce; |
799 index = findOrInsertWeakNode(index, lower32 >> 16, UCOL_SECONDARY, error
Code); | 807 index = findOrInsertWeakNode(index, lower32 >> 16, UCOL_SECONDARY, error
Code); |
800 if(strength >= UCOL_TERTIARY) { | 808 if(strength >= UCOL_TERTIARY) { |
801 index = findOrInsertWeakNode(index, lower32 & Collation::ONLY_TERTIA
RY_MASK, | 809 index = findOrInsertWeakNode(index, lower32 & Collation::ONLY_TERTIA
RY_MASK, |
802 UCOL_TERTIARY, errorCode); | 810 UCOL_TERTIARY, errorCode); |
803 } | 811 } |
804 } | 812 } |
805 return index; | 813 return index; |
806 } | 814 } |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
856 nodes.addElement(nodeFromWeight32(p), errorCode); | 864 nodes.addElement(nodeFromWeight32(p), errorCode); |
857 rootPrimaryIndexes.insertElementAt(index, ~rootIndex, errorCode); | 865 rootPrimaryIndexes.insertElementAt(index, ~rootIndex, errorCode); |
858 return index; | 866 return index; |
859 } | 867 } |
860 } | 868 } |
861 | 869 |
862 int32_t | 870 int32_t |
863 CollationBuilder::findOrInsertWeakNode(int32_t index, uint32_t weight16, int32_t
level, UErrorCode &errorCode) { | 871 CollationBuilder::findOrInsertWeakNode(int32_t index, uint32_t weight16, int32_t
level, UErrorCode &errorCode) { |
864 if(U_FAILURE(errorCode)) { return 0; } | 872 if(U_FAILURE(errorCode)) { return 0; } |
865 U_ASSERT(0 <= index && index < nodes.size()); | 873 U_ASSERT(0 <= index && index < nodes.size()); |
| 874 U_ASSERT(UCOL_SECONDARY <= level && level <= UCOL_TERTIARY); |
866 | 875 |
867 U_ASSERT(weight16 == 0 || weight16 >= Collation::COMMON_WEIGHT16); | |
868 // Only reset-before inserts common weights. | |
869 if(weight16 == Collation::COMMON_WEIGHT16) { | 876 if(weight16 == Collation::COMMON_WEIGHT16) { |
870 return findCommonNode(index, level); | 877 return findCommonNode(index, level); |
871 } | 878 } |
| 879 |
| 880 // If this will be the first below-common weight for the parent node, |
| 881 // then we will also need to insert a common weight after it. |
| 882 int64_t node = nodes.elementAti(index); |
| 883 U_ASSERT(strengthFromNode(node) < level); // parent node is stronger |
| 884 if(weight16 != 0 && weight16 < Collation::COMMON_WEIGHT16) { |
| 885 int32_t hasThisLevelBefore = level == UCOL_SECONDARY ? HAS_BEFORE2 : HAS
_BEFORE3; |
| 886 if((node & hasThisLevelBefore) == 0) { |
| 887 // The parent node has an implied level-common weight. |
| 888 int64_t commonNode = |
| 889 nodeFromWeight16(Collation::COMMON_WEIGHT16) | nodeFromStrength(
level); |
| 890 if(level == UCOL_SECONDARY) { |
| 891 // Move the HAS_BEFORE3 flag from the parent node |
| 892 // to the new secondary common node. |
| 893 commonNode |= node & HAS_BEFORE3; |
| 894 node &= ~(int64_t)HAS_BEFORE3; |
| 895 } |
| 896 nodes.setElementAt(node | hasThisLevelBefore, index); |
| 897 // Insert below-common-weight node. |
| 898 int32_t nextIndex = nextIndexFromNode(node); |
| 899 node = nodeFromWeight16(weight16) | nodeFromStrength(level); |
| 900 index = insertNodeBetween(index, nextIndex, node, errorCode); |
| 901 // Insert common-weight node. |
| 902 insertNodeBetween(index, nextIndex, commonNode, errorCode); |
| 903 // Return index of below-common-weight node. |
| 904 return index; |
| 905 } |
| 906 } |
| 907 |
872 // Find the root CE's weight for this level. | 908 // Find the root CE's weight for this level. |
873 // Postpone insertion if not found: | 909 // Postpone insertion if not found: |
874 // Insert the new root node before the next stronger node, | 910 // Insert the new root node before the next stronger node, |
875 // or before the next root node with the same strength and a larger weight. | 911 // or before the next root node with the same strength and a larger weight. |
876 int64_t node = nodes.elementAti(index); | |
877 int32_t nextIndex; | 912 int32_t nextIndex; |
878 while((nextIndex = nextIndexFromNode(node)) != 0) { | 913 while((nextIndex = nextIndexFromNode(node)) != 0) { |
879 node = nodes.elementAti(nextIndex); | 914 node = nodes.elementAti(nextIndex); |
880 int32_t nextStrength = strengthFromNode(node); | 915 int32_t nextStrength = strengthFromNode(node); |
881 if(nextStrength <= level) { | 916 if(nextStrength <= level) { |
882 // Insert before a stronger node. | 917 // Insert before a stronger node. |
883 if(nextStrength < level) { break; } | 918 if(nextStrength < level) { break; } |
884 // nextStrength == level | 919 // nextStrength == level |
885 if(!isTailoredNode(node)) { | 920 if(!isTailoredNode(node)) { |
886 uint32_t nextWeight16 = weight16FromNode(node); | 921 uint32_t nextWeight16 = weight16FromNode(node); |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
954 // The current node is no stronger. | 989 // The current node is no stronger. |
955 return index; | 990 return index; |
956 } | 991 } |
957 if(strength == UCOL_SECONDARY ? !nodeHasBefore2(node) : !nodeHasBefore3(node
)) { | 992 if(strength == UCOL_SECONDARY ? !nodeHasBefore2(node) : !nodeHasBefore3(node
)) { |
958 // The current node implies the strength-common weight. | 993 // The current node implies the strength-common weight. |
959 return index; | 994 return index; |
960 } | 995 } |
961 index = nextIndexFromNode(node); | 996 index = nextIndexFromNode(node); |
962 node = nodes.elementAti(index); | 997 node = nodes.elementAti(index); |
963 U_ASSERT(!isTailoredNode(node) && strengthFromNode(node) == strength && | 998 U_ASSERT(!isTailoredNode(node) && strengthFromNode(node) == strength && |
964 weight16FromNode(node) == BEFORE_WEIGHT16); | 999 weight16FromNode(node) < Collation::COMMON_WEIGHT16); |
965 // Skip to the explicit common node. | 1000 // Skip to the explicit common node. |
966 do { | 1001 do { |
967 index = nextIndexFromNode(node); | 1002 index = nextIndexFromNode(node); |
968 node = nodes.elementAti(index); | 1003 node = nodes.elementAti(index); |
969 U_ASSERT(strengthFromNode(node) >= strength); | 1004 U_ASSERT(strengthFromNode(node) >= strength); |
970 } while(isTailoredNode(node) || strengthFromNode(node) > strength); | 1005 } while(isTailoredNode(node) || strengthFromNode(node) > strength || |
| 1006 weight16FromNode(node) < Collation::COMMON_WEIGHT16); |
971 U_ASSERT(weight16FromNode(node) == Collation::COMMON_WEIGHT16); | 1007 U_ASSERT(weight16FromNode(node) == Collation::COMMON_WEIGHT16); |
972 return index; | 1008 return index; |
973 } | 1009 } |
974 | 1010 |
975 void | 1011 void |
976 CollationBuilder::setCaseBits(const UnicodeString &nfdString, | 1012 CollationBuilder::setCaseBits(const UnicodeString &nfdString, |
977 const char *&parserErrorReason, UErrorCode &errorC
ode) { | 1013 const char *&parserErrorReason, UErrorCode &errorC
ode) { |
978 if(U_FAILURE(errorCode)) { return; } | 1014 if(U_FAILURE(errorCode)) { return; } |
979 int32_t numTailoredPrimaries = 0; | 1015 int32_t numTailoredPrimaries = 0; |
980 for(int32_t i = 0; i < cesLength; ++i) { | 1016 for(int32_t i = 0; i < cesLength; ++i) { |
(...skipping 363 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1344 } | 1380 } |
1345 | 1381 |
1346 #endif | 1382 #endif |
1347 | 1383 |
1348 void | 1384 void |
1349 CollationBuilder::makeTailoredCEs(UErrorCode &errorCode) { | 1385 CollationBuilder::makeTailoredCEs(UErrorCode &errorCode) { |
1350 if(U_FAILURE(errorCode)) { return; } | 1386 if(U_FAILURE(errorCode)) { return; } |
1351 | 1387 |
1352 CollationWeights primaries, secondaries, tertiaries; | 1388 CollationWeights primaries, secondaries, tertiaries; |
1353 int64_t *nodesArray = nodes.getBuffer(); | 1389 int64_t *nodesArray = nodes.getBuffer(); |
| 1390 #ifdef DEBUG_COLLATION_BUILDER |
| 1391 puts("\nCollationBuilder::makeTailoredCEs()"); |
| 1392 #endif |
1354 | 1393 |
1355 for(int32_t rpi = 0; rpi < rootPrimaryIndexes.size(); ++rpi) { | 1394 for(int32_t rpi = 0; rpi < rootPrimaryIndexes.size(); ++rpi) { |
1356 int32_t i = rootPrimaryIndexes.elementAti(rpi); | 1395 int32_t i = rootPrimaryIndexes.elementAti(rpi); |
1357 int64_t node = nodesArray[i]; | 1396 int64_t node = nodesArray[i]; |
1358 uint32_t p = weight32FromNode(node); | 1397 uint32_t p = weight32FromNode(node); |
1359 uint32_t s = p == 0 ? 0 : Collation::COMMON_WEIGHT16; | 1398 uint32_t s = p == 0 ? 0 : Collation::COMMON_WEIGHT16; |
1360 uint32_t t = s; | 1399 uint32_t t = s; |
1361 uint32_t q = 0; | 1400 uint32_t q = 0; |
1362 UBool pIsTailored = FALSE; | 1401 UBool pIsTailored = FALSE; |
1363 UBool sIsTailored = FALSE; | 1402 UBool sIsTailored = FALSE; |
(...skipping 27 matching lines...) Expand all Loading... |
1391 #endif | 1430 #endif |
1392 if(!tIsTailored) { | 1431 if(!tIsTailored) { |
1393 // First tailored tertiary node for [p, s]. | 1432 // First tailored tertiary node for [p, s]. |
1394 int32_t tCount = countTailoredNodes(nodesArray, next
Index, | 1433 int32_t tCount = countTailoredNodes(nodesArray, next
Index, |
1395 UCOL_TERTIARY) +
1; | 1434 UCOL_TERTIARY) +
1; |
1396 uint32_t tLimit; | 1435 uint32_t tLimit; |
1397 if(t == 0) { | 1436 if(t == 0) { |
1398 // Gap at the beginning of the tertiary CE range
. | 1437 // Gap at the beginning of the tertiary CE range
. |
1399 t = rootElements.getTertiaryBoundary() - 0x100; | 1438 t = rootElements.getTertiaryBoundary() - 0x100; |
1400 tLimit = rootElements.getFirstTertiaryCE() & Col
lation::ONLY_TERTIARY_MASK; | 1439 tLimit = rootElements.getFirstTertiaryCE() & Col
lation::ONLY_TERTIARY_MASK; |
1401 } else if(t == BEFORE_WEIGHT16) { | |
1402 tLimit = Collation::COMMON_WEIGHT16; | |
1403 } else if(!pIsTailored && !sIsTailored) { | 1440 } else if(!pIsTailored && !sIsTailored) { |
1404 // p and s are root weights. | 1441 // p and s are root weights. |
1405 tLimit = rootElements.getTertiaryAfter(pIndex, s
, t); | 1442 tLimit = rootElements.getTertiaryAfter(pIndex, s
, t); |
| 1443 } else if(t == Collation::BEFORE_WEIGHT16) { |
| 1444 tLimit = Collation::COMMON_WEIGHT16; |
1406 } else { | 1445 } else { |
1407 // [p, s] is tailored. | 1446 // [p, s] is tailored. |
1408 U_ASSERT(t == Collation::COMMON_WEIGHT16); | 1447 U_ASSERT(t == Collation::COMMON_WEIGHT16); |
1409 tLimit = rootElements.getTertiaryBoundary(); | 1448 tLimit = rootElements.getTertiaryBoundary(); |
1410 } | 1449 } |
1411 U_ASSERT(tLimit == 0x4000 || (tLimit & ~Collation::O
NLY_TERTIARY_MASK) == 0); | 1450 U_ASSERT(tLimit == 0x4000 || (tLimit & ~Collation::O
NLY_TERTIARY_MASK) == 0); |
1412 tertiaries.initForTertiary(); | 1451 tertiaries.initForTertiary(); |
1413 if(!tertiaries.allocWeights(t, tLimit, tCount)) { | 1452 if(!tertiaries.allocWeights(t, tLimit, tCount)) { |
1414 errorCode = U_BUFFER_OVERFLOW_ERROR; | 1453 errorCode = U_BUFFER_OVERFLOW_ERROR; |
1415 errorReason = "tertiary tailoring gap too small"
; | 1454 errorReason = "tertiary tailoring gap too small"
; |
(...skipping 18 matching lines...) Expand all Loading... |
1434 #endif | 1473 #endif |
1435 if(!sIsTailored) { | 1474 if(!sIsTailored) { |
1436 // First tailored secondary node for p. | 1475 // First tailored secondary node for p. |
1437 int32_t sCount = countTailoredNodes(nodesArray,
nextIndex, | 1476 int32_t sCount = countTailoredNodes(nodesArray,
nextIndex, |
1438 UCOL_SECONDA
RY) + 1; | 1477 UCOL_SECONDA
RY) + 1; |
1439 uint32_t sLimit; | 1478 uint32_t sLimit; |
1440 if(s == 0) { | 1479 if(s == 0) { |
1441 // Gap at the beginning of the secondary CE
range. | 1480 // Gap at the beginning of the secondary CE
range. |
1442 s = rootElements.getSecondaryBoundary() - 0x
100; | 1481 s = rootElements.getSecondaryBoundary() - 0x
100; |
1443 sLimit = rootElements.getFirstSecondaryCE()
>> 16; | 1482 sLimit = rootElements.getFirstSecondaryCE()
>> 16; |
1444 } else if(s == BEFORE_WEIGHT16) { | |
1445 sLimit = Collation::COMMON_WEIGHT16; | |
1446 } else if(!pIsTailored) { | 1483 } else if(!pIsTailored) { |
1447 // p is a root primary. | 1484 // p is a root primary. |
1448 sLimit = rootElements.getSecondaryAfter(pInd
ex, s); | 1485 sLimit = rootElements.getSecondaryAfter(pInd
ex, s); |
| 1486 } else if(s == Collation::BEFORE_WEIGHT16) { |
| 1487 sLimit = Collation::COMMON_WEIGHT16; |
1449 } else { | 1488 } else { |
1450 // p is a tailored primary. | 1489 // p is a tailored primary. |
1451 U_ASSERT(s == Collation::COMMON_WEIGHT16); | 1490 U_ASSERT(s == Collation::COMMON_WEIGHT16); |
1452 sLimit = rootElements.getSecondaryBoundary()
; | 1491 sLimit = rootElements.getSecondaryBoundary()
; |
1453 } | 1492 } |
1454 if(s == Collation::COMMON_WEIGHT16) { | 1493 if(s == Collation::COMMON_WEIGHT16) { |
1455 // Do not tailor into the getSortKey() range
of | 1494 // Do not tailor into the getSortKey() range
of |
1456 // compressed common secondaries. | 1495 // compressed common secondaries. |
1457 s = rootElements.getLastCommonSecondary(); | 1496 s = rootElements.getLastCommonSecondary(); |
1458 } | 1497 } |
1459 secondaries.initForSecondary(); | 1498 secondaries.initForSecondary(); |
1460 if(!secondaries.allocWeights(s, sLimit, sCount))
{ | 1499 if(!secondaries.allocWeights(s, sLimit, sCount))
{ |
1461 errorCode = U_BUFFER_OVERFLOW_ERROR; | 1500 errorCode = U_BUFFER_OVERFLOW_ERROR; |
1462 errorReason = "secondary tailoring gap too s
mall"; | 1501 errorReason = "secondary tailoring gap too s
mall"; |
| 1502 #ifdef DEBUG_COLLATION_BUILDER |
| 1503 printf("!secondaries.allocWeights(%lx, %lx,
sCount=%ld)\n", |
| 1504 (long)alignWeightRight(s), (long)alig
nWeightRight(sLimit), |
| 1505 (long)alignWeightRight(sCount)); |
| 1506 #endif |
1463 return; | 1507 return; |
1464 } | 1508 } |
1465 sIsTailored = TRUE; | 1509 sIsTailored = TRUE; |
1466 } | 1510 } |
1467 s = secondaries.nextWeight(); | 1511 s = secondaries.nextWeight(); |
1468 U_ASSERT(s != 0xffffffff); | 1512 U_ASSERT(s != 0xffffffff); |
1469 } else { | 1513 } else { |
1470 s = weight16FromNode(node); | 1514 s = weight16FromNode(node); |
1471 sIsTailored = FALSE; | 1515 sIsTailored = FALSE; |
1472 #ifdef DEBUG_COLLATION_BUILDER | 1516 #ifdef DEBUG_COLLATION_BUILDER |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1556 | 1600 |
1557 private: | 1601 private: |
1558 const int64_t *finalCEs; | 1602 const int64_t *finalCEs; |
1559 }; | 1603 }; |
1560 | 1604 |
1561 CEFinalizer::~CEFinalizer() {} | 1605 CEFinalizer::~CEFinalizer() {} |
1562 | 1606 |
1563 void | 1607 void |
1564 CollationBuilder::finalizeCEs(UErrorCode &errorCode) { | 1608 CollationBuilder::finalizeCEs(UErrorCode &errorCode) { |
1565 if(U_FAILURE(errorCode)) { return; } | 1609 if(U_FAILURE(errorCode)) { return; } |
1566 LocalPointer<CollationDataBuilder> newBuilder(new CollationDataBuilder(error
Code)); | 1610 LocalPointer<CollationDataBuilder> newBuilder(new CollationDataBuilder(error
Code), errorCode); |
1567 if(newBuilder.isNull()) { | 1611 if(U_FAILURE(errorCode)) { |
1568 errorCode = U_MEMORY_ALLOCATION_ERROR; | |
1569 return; | 1612 return; |
1570 } | 1613 } |
1571 newBuilder->initForTailoring(baseData, errorCode); | 1614 newBuilder->initForTailoring(baseData, errorCode); |
1572 CEFinalizer finalizer(nodes.getBuffer()); | 1615 CEFinalizer finalizer(nodes.getBuffer()); |
1573 newBuilder->copyFrom(*dataBuilder, finalizer, errorCode); | 1616 newBuilder->copyFrom(*dataBuilder, finalizer, errorCode); |
1574 if(U_FAILURE(errorCode)) { return; } | 1617 if(U_FAILURE(errorCode)) { return; } |
1575 delete dataBuilder; | 1618 delete dataBuilder; |
1576 dataBuilder = newBuilder.orphan(); | 1619 dataBuilder = newBuilder.orphan(); |
1577 } | 1620 } |
1578 | 1621 |
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1664 } | 1707 } |
1665 } | 1708 } |
1666 } | 1709 } |
1667 | 1710 |
1668 uset_close(contractions); | 1711 uset_close(contractions); |
1669 | 1712 |
1670 return uset_size(unsafe); | 1713 return uset_size(unsafe); |
1671 } | 1714 } |
1672 | 1715 |
1673 #endif // !UCONFIG_NO_COLLATION | 1716 #endif // !UCONFIG_NO_COLLATION |
OLD | NEW |