Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(317)

Side by Side Diff: source/i18n/collationbuilder.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/i18n/collationbuilder.h ('k') | source/i18n/collationcompare.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ******************************************************************************* 2 *******************************************************************************
3 * Copyright (C) 2013-2014, International Business Machines 3 * Copyright (C) 2013-2014, International Business Machines
4 * Corporation and others. All Rights Reserved. 4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************* 5 *******************************************************************************
6 * collationbuilder.cpp 6 * collationbuilder.cpp
7 * 7 *
8 * (replaced the former ucol_bld.cpp) 8 * (replaced the former ucol_bld.cpp)
9 * 9 *
10 * created on: 2013may06 10 * created on: 2013may06
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after
182 if(strength != UCOL_DEFAULT) { 182 if(strength != UCOL_DEFAULT) {
183 setAttribute(UCOL_STRENGTH, (UColAttributeValue)strength, errorCode); 183 setAttribute(UCOL_STRENGTH, (UColAttributeValue)strength, errorCode);
184 } 184 }
185 if(decompositionMode != UCOL_DEFAULT) { 185 if(decompositionMode != UCOL_DEFAULT) {
186 setAttribute(UCOL_NORMALIZATION_MODE, decompositionMode, errorCode); 186 setAttribute(UCOL_NORMALIZATION_MODE, decompositionMode, errorCode);
187 } 187 }
188 } 188 }
189 189
190 // CollationBuilder implementation ----------------------------------------- *** 190 // CollationBuilder implementation ----------------------------------------- ***
191 191
192 // Some compilers don't care if constants are defined in the .cpp file.
193 // MS Visual C++ does not like it, but gcc requires it. clang does not care.
194 #ifndef _MSC_VER
195 const int32_t CollationBuilder::HAS_BEFORE2;
196 const int32_t CollationBuilder::HAS_BEFORE3;
197 #endif
198
192 CollationBuilder::CollationBuilder(const CollationTailoring *b, UErrorCode &erro rCode) 199 CollationBuilder::CollationBuilder(const CollationTailoring *b, UErrorCode &erro rCode)
193 : nfd(*Normalizer2::getNFDInstance(errorCode)), 200 : nfd(*Normalizer2::getNFDInstance(errorCode)),
194 fcd(*Normalizer2Factory::getFCDInstance(errorCode)), 201 fcd(*Normalizer2Factory::getFCDInstance(errorCode)),
195 nfcImpl(*Normalizer2Factory::getNFCImpl(errorCode)), 202 nfcImpl(*Normalizer2Factory::getNFCImpl(errorCode)),
196 base(b), 203 base(b),
197 baseData(b->data), 204 baseData(b->data),
198 rootElements(b->data->rootElements, b->data->rootElementsLength), 205 rootElements(b->data->rootElements, b->data->rootElementsLength),
199 variableTop(0), 206 variableTop(0),
200 dataBuilder(new CollationDataBuilder(errorCode)), fastLatinEnabled(TRU E), 207 dataBuilder(new CollationDataBuilder(errorCode)), fastLatinEnabled(TRU E),
201 errorReason(NULL), 208 errorReason(NULL),
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after
306 } 313 }
307 } 314 }
308 if(strength == UCOL_IDENTICAL) { return; } // simple reset-at-position 315 if(strength == UCOL_IDENTICAL) { return; } // simple reset-at-position
309 316
310 // &[before strength]position 317 // &[before strength]position
311 U_ASSERT(UCOL_PRIMARY <= strength && strength <= UCOL_TERTIARY); 318 U_ASSERT(UCOL_PRIMARY <= strength && strength <= UCOL_TERTIARY);
312 int32_t index = findOrInsertNodeForCEs(strength, parserErrorReason, errorCod e); 319 int32_t index = findOrInsertNodeForCEs(strength, parserErrorReason, errorCod e);
313 if(U_FAILURE(errorCode)) { return; } 320 if(U_FAILURE(errorCode)) { return; }
314 321
315 int64_t node = nodes.elementAti(index); 322 int64_t node = nodes.elementAti(index);
316 // If the index is for a "weaker" tailored node, 323 // If the index is for a "weaker" node,
317 // then skip backwards over this and further "weaker" nodes. 324 // then skip backwards over this and further "weaker" nodes.
318 while(strengthFromNode(node) > strength) { 325 while(strengthFromNode(node) > strength) {
319 index = previousIndexFromNode(node); 326 index = previousIndexFromNode(node);
320 node = nodes.elementAti(index); 327 node = nodes.elementAti(index);
321 } 328 }
322 329
323 // Find or insert a node whose index we will put into a temporary CE. 330 // Find or insert a node whose index we will put into a temporary CE.
324 if(strengthFromNode(node) == strength && isTailoredNode(node)) { 331 if(strengthFromNode(node) == strength && isTailoredNode(node)) {
325 // Reset to just before this same-strength tailored node. 332 // Reset to just before this same-strength tailored node.
326 index = previousIndexFromNode(node); 333 index = previousIndexFromNode(node);
(...skipping 26 matching lines...) Expand all
353 int32_t nextIndex = nextIndexFromNode(node); 360 int32_t nextIndex = nextIndexFromNode(node);
354 if(nextIndex == 0) { break; } 361 if(nextIndex == 0) { break; }
355 index = nextIndex; 362 index = nextIndex;
356 } 363 }
357 } else { 364 } else {
358 // &[before 2] or &[before 3] 365 // &[before 2] or &[before 3]
359 index = findCommonNode(index, UCOL_SECONDARY); 366 index = findCommonNode(index, UCOL_SECONDARY);
360 if(strength >= UCOL_TERTIARY) { 367 if(strength >= UCOL_TERTIARY) {
361 index = findCommonNode(index, UCOL_TERTIARY); 368 index = findCommonNode(index, UCOL_TERTIARY);
362 } 369 }
370 // findCommonNode() stayed on the stronger node or moved to
371 // an explicit common-weight node of the reset-before strength.
363 node = nodes.elementAti(index); 372 node = nodes.elementAti(index);
364 if(strengthFromNode(node) == strength) { 373 if(strengthFromNode(node) == strength) {
365 // Found a same-strength node with an explicit weight. 374 // Found a same-strength node with an explicit weight.
366 uint32_t weight16 = weight16FromNode(node); 375 uint32_t weight16 = weight16FromNode(node);
367 if(weight16 == 0) { 376 if(weight16 == 0) {
368 errorCode = U_UNSUPPORTED_ERROR; 377 errorCode = U_UNSUPPORTED_ERROR;
369 if(strength == UCOL_SECONDARY) { 378 if(strength == UCOL_SECONDARY) {
370 parserErrorReason = "reset secondary-before secondary ignora ble not possible"; 379 parserErrorReason = "reset secondary-before secondary ignora ble not possible";
371 } else { 380 } else {
372 parserErrorReason = "reset tertiary-before completely ignora ble not possible"; 381 parserErrorReason = "reset tertiary-before completely ignora ble not possible";
373 } 382 }
374 return; 383 return;
375 } 384 }
376 U_ASSERT(weight16 >= Collation::COMMON_WEIGHT16); 385 U_ASSERT(weight16 > Collation::BEFORE_WEIGHT16);
386 // Reset to just before this node.
387 // Insert the preceding same-level explicit weight if it is not ther e already.
388 // Which explicit weight immediately precedes this one?
389 weight16 = getWeight16Before(index, node, strength);
390 // Does this preceding weight have a node?
391 uint32_t previousWeight16;
377 int32_t previousIndex = previousIndexFromNode(node); 392 int32_t previousIndex = previousIndexFromNode(node);
378 if(weight16 == Collation::COMMON_WEIGHT16) { 393 for(int32_t i = previousIndex;; i = previousIndexFromNode(node)) {
379 // Reset to just before this same-strength common-weight node. 394 node = nodes.elementAti(i);
395 int32_t previousStrength = strengthFromNode(node);
396 if(previousStrength < strength) {
397 U_ASSERT(weight16 >= Collation::COMMON_WEIGHT16 || i == prev iousIndex);
398 // Either the reset element has an above-common weight and
399 // the parent node provides the implied common weight,
400 // or the reset element has a weight<=common in the node
401 // right after the parent, and we need to insert the precedi ng weight.
402 previousWeight16 = Collation::COMMON_WEIGHT16;
403 break;
404 } else if(previousStrength == strength && !isTailoredNode(node)) {
405 previousWeight16 = weight16FromNode(node);
406 break;
407 }
408 // Skip weaker nodes and same-level tailored nodes.
409 }
410 if(previousWeight16 == weight16) {
411 // The preceding weight has a node,
412 // maybe with following weaker or tailored nodes.
413 // Reset to the last of them.
380 index = previousIndex; 414 index = previousIndex;
381 } else { 415 } else {
382 // A non-common weight is only possible from a root CE. 416 // Insert a node with the preceding weight, reset to that.
383 // Find the higher-level weights, which must all be explicit, 417 node = nodeFromWeight16(weight16) | nodeFromStrength(strength);
384 // and then find the preceding weight for this level. 418 index = insertNodeBetween(previousIndex, index, node, errorCode) ;
385 uint32_t previousWeight16 = 0;
386 int32_t previousWeightIndex = -1;
387 int32_t i = index;
388 if(strength == UCOL_SECONDARY) {
389 uint32_t p;
390 do {
391 i = previousIndexFromNode(node);
392 node = nodes.elementAti(i);
393 if(strengthFromNode(node) == UCOL_SECONDARY && !isTailor edNode(node) &&
394 previousWeightIndex < 0) {
395 previousWeightIndex = i;
396 previousWeight16 = weight16FromNode(node);
397 }
398 } while(strengthFromNode(node) > UCOL_PRIMARY);
399 U_ASSERT(!isTailoredNode(node));
400 p = weight32FromNode(node);
401 weight16 = rootElements.getSecondaryBefore(p, weight16);
402 } else {
403 uint32_t p, s;
404 do {
405 i = previousIndexFromNode(node);
406 node = nodes.elementAti(i);
407 if(strengthFromNode(node) == UCOL_TERTIARY && !isTailore dNode(node) &&
408 previousWeightIndex < 0) {
409 previousWeightIndex = i;
410 previousWeight16 = weight16FromNode(node);
411 }
412 } while(strengthFromNode(node) > UCOL_SECONDARY);
413 U_ASSERT(!isTailoredNode(node));
414 if(strengthFromNode(node) == UCOL_SECONDARY) {
415 s = weight16FromNode(node);
416 do {
417 i = previousIndexFromNode(node);
418 node = nodes.elementAti(i);
419 } while(strengthFromNode(node) > UCOL_PRIMARY);
420 U_ASSERT(!isTailoredNode(node));
421 } else {
422 U_ASSERT(!nodeHasBefore2(node));
423 s = Collation::COMMON_WEIGHT16;
424 }
425 p = weight32FromNode(node);
426 weight16 = rootElements.getTertiaryBefore(p, s, weight16);
427 U_ASSERT((weight16 & ~Collation::ONLY_TERTIARY_MASK) == 0);
428 }
429 // Find or insert the new explicit weight before the current one .
430 if(previousWeightIndex >= 0 && weight16 == previousWeight16) {
431 // Tailor after the last node between adjacent root nodes.
432 index = previousIndex;
433 } else {
434 node = nodeFromWeight16(weight16) | nodeFromStrength(strengt h);
435 index = insertNodeBetween(previousIndex, index, node, errorC ode);
436 }
437 } 419 }
438 } else { 420 } else {
439 // Found a stronger node with implied strength-common weight. 421 // Found a stronger node with implied strength-common weight.
440 int64_t hasBefore3 = 0; 422 uint32_t weight16 = getWeight16Before(index, node, strength);
441 if(strength == UCOL_SECONDARY) { 423 index = findOrInsertWeakNode(index, weight16, strength, errorCode);
442 U_ASSERT(!nodeHasBefore2(node));
443 // Move the HAS_BEFORE3 flag from the parent node
444 // to the new secondary common node.
445 hasBefore3 = node & HAS_BEFORE3;
446 node = (node & ~(int64_t)HAS_BEFORE3) | HAS_BEFORE2;
447 } else {
448 U_ASSERT(!nodeHasBefore3(node));
449 node |= HAS_BEFORE3;
450 }
451 nodes.setElementAt(node, index);
452 int32_t nextIndex = nextIndexFromNode(node);
453 // Insert default nodes with weights 02 and 05, reset to the 02 node .
454 node = nodeFromWeight16(BEFORE_WEIGHT16) | nodeFromStrength(strength );
455 index = insertNodeBetween(index, nextIndex, node, errorCode);
456 node = nodeFromWeight16(Collation::COMMON_WEIGHT16) | hasBefore3 |
457 nodeFromStrength(strength);
458 insertNodeBetween(index, nextIndex, node, errorCode);
459 } 424 }
460 // Strength of the temporary CE = strength of its reset position. 425 // Strength of the temporary CE = strength of its reset position.
461 // Code above raises an error if the before-strength is stronger. 426 // Code above raises an error if the before-strength is stronger.
462 strength = ceStrength(ces[cesLength - 1]); 427 strength = ceStrength(ces[cesLength - 1]);
463 } 428 }
464 if(U_FAILURE(errorCode)) { 429 if(U_FAILURE(errorCode)) {
465 parserErrorReason = "inserting reset position for &[before n]"; 430 parserErrorReason = "inserting reset position for &[before n]";
466 return; 431 return;
467 } 432 }
468 ces[cesLength - 1] = tempCEFromIndexAndStrength(index, strength); 433 ces[cesLength - 1] = tempCEFromIndexAndStrength(index, strength);
469 } 434 }
470 435
436 uint32_t
437 CollationBuilder::getWeight16Before(int32_t index, int64_t node, int32_t level) {
438 U_ASSERT(strengthFromNode(node) < level || !isTailoredNode(node));
439 // Collect the root CE weights if this node is for a root CE.
440 // If it is not, then return the low non-primary boundary for a tailored CE.
441 uint32_t t;
442 if(strengthFromNode(node) == UCOL_TERTIARY) {
443 t = weight16FromNode(node);
444 } else {
445 t = Collation::COMMON_WEIGHT16; // Stronger node with implied common we ight.
446 }
447 while(strengthFromNode(node) > UCOL_SECONDARY) {
448 index = previousIndexFromNode(node);
449 node = nodes.elementAti(index);
450 }
451 if(isTailoredNode(node)) {
452 return Collation::BEFORE_WEIGHT16;
453 }
454 uint32_t s;
455 if(strengthFromNode(node) == UCOL_SECONDARY) {
456 s = weight16FromNode(node);
457 } else {
458 s = Collation::COMMON_WEIGHT16; // Stronger node with implied common we ight.
459 }
460 while(strengthFromNode(node) > UCOL_PRIMARY) {
461 index = previousIndexFromNode(node);
462 node = nodes.elementAti(index);
463 }
464 if(isTailoredNode(node)) {
465 return Collation::BEFORE_WEIGHT16;
466 }
467 // [p, s, t] is a root CE. Return the preceding weight for the requested lev el.
468 uint32_t p = weight32FromNode(node);
469 uint32_t weight16;
470 if(level == UCOL_SECONDARY) {
471 weight16 = rootElements.getSecondaryBefore(p, s);
472 } else {
473 weight16 = rootElements.getTertiaryBefore(p, s, t);
474 U_ASSERT((weight16 & ~Collation::ONLY_TERTIARY_MASK) == 0);
475 }
476 return weight16;
477 }
478
471 int64_t 479 int64_t
472 CollationBuilder::getSpecialResetPosition(const UnicodeString &str, 480 CollationBuilder::getSpecialResetPosition(const UnicodeString &str,
473 const char *&parserErrorReason, UError Code &errorCode) { 481 const char *&parserErrorReason, UError Code &errorCode) {
474 U_ASSERT(str.length() == 2); 482 U_ASSERT(str.length() == 2);
475 int64_t ce; 483 int64_t ce;
476 int32_t strength = UCOL_PRIMARY; 484 int32_t strength = UCOL_PRIMARY;
477 UBool isBoundary = FALSE; 485 UBool isBoundary = FALSE;
478 UChar32 pos = str.charAt(1) - CollationRuleParser::POS_BASE; 486 UChar32 pos = str.charAt(1) - CollationRuleParser::POS_BASE;
479 U_ASSERT(0 <= pos && pos <= CollationRuleParser::LAST_TRAILING); 487 U_ASSERT(0 <= pos && pos <= CollationRuleParser::LAST_TRAILING);
480 switch(pos) { 488 switch(pos) {
(...skipping 305 matching lines...) Expand 10 before | Expand all | Expand 10 after
786 794
787 int32_t 795 int32_t
788 CollationBuilder::findOrInsertNodeForRootCE(int64_t ce, int32_t strength, UError Code &errorCode) { 796 CollationBuilder::findOrInsertNodeForRootCE(int64_t ce, int32_t strength, UError Code &errorCode) {
789 if(U_FAILURE(errorCode)) { return 0; } 797 if(U_FAILURE(errorCode)) { return 0; }
790 U_ASSERT((uint8_t)(ce >> 56) != Collation::UNASSIGNED_IMPLICIT_BYTE); 798 U_ASSERT((uint8_t)(ce >> 56) != Collation::UNASSIGNED_IMPLICIT_BYTE);
791 799
792 // Find or insert the node for each of the root CE's weights, 800 // Find or insert the node for each of the root CE's weights,
793 // down to the requested level/strength. 801 // down to the requested level/strength.
794 // Root CEs must have common=zero quaternary weights (for which we never ins ert any nodes). 802 // Root CEs must have common=zero quaternary weights (for which we never ins ert any nodes).
795 U_ASSERT((ce & 0xc0) == 0); 803 U_ASSERT((ce & 0xc0) == 0);
796 int32_t index = findOrInsertNodeForPrimary((uint32_t)(ce >> 32) , errorCode) ; 804 int32_t index = findOrInsertNodeForPrimary((uint32_t)(ce >> 32), errorCode);
797 if(strength >= UCOL_SECONDARY) { 805 if(strength >= UCOL_SECONDARY) {
798 uint32_t lower32 = (uint32_t)ce; 806 uint32_t lower32 = (uint32_t)ce;
799 index = findOrInsertWeakNode(index, lower32 >> 16, UCOL_SECONDARY, error Code); 807 index = findOrInsertWeakNode(index, lower32 >> 16, UCOL_SECONDARY, error Code);
800 if(strength >= UCOL_TERTIARY) { 808 if(strength >= UCOL_TERTIARY) {
801 index = findOrInsertWeakNode(index, lower32 & Collation::ONLY_TERTIA RY_MASK, 809 index = findOrInsertWeakNode(index, lower32 & Collation::ONLY_TERTIA RY_MASK,
802 UCOL_TERTIARY, errorCode); 810 UCOL_TERTIARY, errorCode);
803 } 811 }
804 } 812 }
805 return index; 813 return index;
806 } 814 }
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
856 nodes.addElement(nodeFromWeight32(p), errorCode); 864 nodes.addElement(nodeFromWeight32(p), errorCode);
857 rootPrimaryIndexes.insertElementAt(index, ~rootIndex, errorCode); 865 rootPrimaryIndexes.insertElementAt(index, ~rootIndex, errorCode);
858 return index; 866 return index;
859 } 867 }
860 } 868 }
861 869
862 int32_t 870 int32_t
863 CollationBuilder::findOrInsertWeakNode(int32_t index, uint32_t weight16, int32_t level, UErrorCode &errorCode) { 871 CollationBuilder::findOrInsertWeakNode(int32_t index, uint32_t weight16, int32_t level, UErrorCode &errorCode) {
864 if(U_FAILURE(errorCode)) { return 0; } 872 if(U_FAILURE(errorCode)) { return 0; }
865 U_ASSERT(0 <= index && index < nodes.size()); 873 U_ASSERT(0 <= index && index < nodes.size());
874 U_ASSERT(UCOL_SECONDARY <= level && level <= UCOL_TERTIARY);
866 875
867 U_ASSERT(weight16 == 0 || weight16 >= Collation::COMMON_WEIGHT16);
868 // Only reset-before inserts common weights.
869 if(weight16 == Collation::COMMON_WEIGHT16) { 876 if(weight16 == Collation::COMMON_WEIGHT16) {
870 return findCommonNode(index, level); 877 return findCommonNode(index, level);
871 } 878 }
879
880 // If this will be the first below-common weight for the parent node,
881 // then we will also need to insert a common weight after it.
882 int64_t node = nodes.elementAti(index);
883 U_ASSERT(strengthFromNode(node) < level); // parent node is stronger
884 if(weight16 != 0 && weight16 < Collation::COMMON_WEIGHT16) {
885 int32_t hasThisLevelBefore = level == UCOL_SECONDARY ? HAS_BEFORE2 : HAS _BEFORE3;
886 if((node & hasThisLevelBefore) == 0) {
887 // The parent node has an implied level-common weight.
888 int64_t commonNode =
889 nodeFromWeight16(Collation::COMMON_WEIGHT16) | nodeFromStrength( level);
890 if(level == UCOL_SECONDARY) {
891 // Move the HAS_BEFORE3 flag from the parent node
892 // to the new secondary common node.
893 commonNode |= node & HAS_BEFORE3;
894 node &= ~(int64_t)HAS_BEFORE3;
895 }
896 nodes.setElementAt(node | hasThisLevelBefore, index);
897 // Insert below-common-weight node.
898 int32_t nextIndex = nextIndexFromNode(node);
899 node = nodeFromWeight16(weight16) | nodeFromStrength(level);
900 index = insertNodeBetween(index, nextIndex, node, errorCode);
901 // Insert common-weight node.
902 insertNodeBetween(index, nextIndex, commonNode, errorCode);
903 // Return index of below-common-weight node.
904 return index;
905 }
906 }
907
872 // Find the root CE's weight for this level. 908 // Find the root CE's weight for this level.
873 // Postpone insertion if not found: 909 // Postpone insertion if not found:
874 // Insert the new root node before the next stronger node, 910 // Insert the new root node before the next stronger node,
875 // or before the next root node with the same strength and a larger weight. 911 // or before the next root node with the same strength and a larger weight.
876 int64_t node = nodes.elementAti(index);
877 int32_t nextIndex; 912 int32_t nextIndex;
878 while((nextIndex = nextIndexFromNode(node)) != 0) { 913 while((nextIndex = nextIndexFromNode(node)) != 0) {
879 node = nodes.elementAti(nextIndex); 914 node = nodes.elementAti(nextIndex);
880 int32_t nextStrength = strengthFromNode(node); 915 int32_t nextStrength = strengthFromNode(node);
881 if(nextStrength <= level) { 916 if(nextStrength <= level) {
882 // Insert before a stronger node. 917 // Insert before a stronger node.
883 if(nextStrength < level) { break; } 918 if(nextStrength < level) { break; }
884 // nextStrength == level 919 // nextStrength == level
885 if(!isTailoredNode(node)) { 920 if(!isTailoredNode(node)) {
886 uint32_t nextWeight16 = weight16FromNode(node); 921 uint32_t nextWeight16 = weight16FromNode(node);
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
954 // The current node is no stronger. 989 // The current node is no stronger.
955 return index; 990 return index;
956 } 991 }
957 if(strength == UCOL_SECONDARY ? !nodeHasBefore2(node) : !nodeHasBefore3(node )) { 992 if(strength == UCOL_SECONDARY ? !nodeHasBefore2(node) : !nodeHasBefore3(node )) {
958 // The current node implies the strength-common weight. 993 // The current node implies the strength-common weight.
959 return index; 994 return index;
960 } 995 }
961 index = nextIndexFromNode(node); 996 index = nextIndexFromNode(node);
962 node = nodes.elementAti(index); 997 node = nodes.elementAti(index);
963 U_ASSERT(!isTailoredNode(node) && strengthFromNode(node) == strength && 998 U_ASSERT(!isTailoredNode(node) && strengthFromNode(node) == strength &&
964 weight16FromNode(node) == BEFORE_WEIGHT16); 999 weight16FromNode(node) < Collation::COMMON_WEIGHT16);
965 // Skip to the explicit common node. 1000 // Skip to the explicit common node.
966 do { 1001 do {
967 index = nextIndexFromNode(node); 1002 index = nextIndexFromNode(node);
968 node = nodes.elementAti(index); 1003 node = nodes.elementAti(index);
969 U_ASSERT(strengthFromNode(node) >= strength); 1004 U_ASSERT(strengthFromNode(node) >= strength);
970 } while(isTailoredNode(node) || strengthFromNode(node) > strength); 1005 } while(isTailoredNode(node) || strengthFromNode(node) > strength ||
1006 weight16FromNode(node) < Collation::COMMON_WEIGHT16);
971 U_ASSERT(weight16FromNode(node) == Collation::COMMON_WEIGHT16); 1007 U_ASSERT(weight16FromNode(node) == Collation::COMMON_WEIGHT16);
972 return index; 1008 return index;
973 } 1009 }
974 1010
975 void 1011 void
976 CollationBuilder::setCaseBits(const UnicodeString &nfdString, 1012 CollationBuilder::setCaseBits(const UnicodeString &nfdString,
977 const char *&parserErrorReason, UErrorCode &errorC ode) { 1013 const char *&parserErrorReason, UErrorCode &errorC ode) {
978 if(U_FAILURE(errorCode)) { return; } 1014 if(U_FAILURE(errorCode)) { return; }
979 int32_t numTailoredPrimaries = 0; 1015 int32_t numTailoredPrimaries = 0;
980 for(int32_t i = 0; i < cesLength; ++i) { 1016 for(int32_t i = 0; i < cesLength; ++i) {
(...skipping 363 matching lines...) Expand 10 before | Expand all | Expand 10 after
1344 } 1380 }
1345 1381
1346 #endif 1382 #endif
1347 1383
1348 void 1384 void
1349 CollationBuilder::makeTailoredCEs(UErrorCode &errorCode) { 1385 CollationBuilder::makeTailoredCEs(UErrorCode &errorCode) {
1350 if(U_FAILURE(errorCode)) { return; } 1386 if(U_FAILURE(errorCode)) { return; }
1351 1387
1352 CollationWeights primaries, secondaries, tertiaries; 1388 CollationWeights primaries, secondaries, tertiaries;
1353 int64_t *nodesArray = nodes.getBuffer(); 1389 int64_t *nodesArray = nodes.getBuffer();
1390 #ifdef DEBUG_COLLATION_BUILDER
1391 puts("\nCollationBuilder::makeTailoredCEs()");
1392 #endif
1354 1393
1355 for(int32_t rpi = 0; rpi < rootPrimaryIndexes.size(); ++rpi) { 1394 for(int32_t rpi = 0; rpi < rootPrimaryIndexes.size(); ++rpi) {
1356 int32_t i = rootPrimaryIndexes.elementAti(rpi); 1395 int32_t i = rootPrimaryIndexes.elementAti(rpi);
1357 int64_t node = nodesArray[i]; 1396 int64_t node = nodesArray[i];
1358 uint32_t p = weight32FromNode(node); 1397 uint32_t p = weight32FromNode(node);
1359 uint32_t s = p == 0 ? 0 : Collation::COMMON_WEIGHT16; 1398 uint32_t s = p == 0 ? 0 : Collation::COMMON_WEIGHT16;
1360 uint32_t t = s; 1399 uint32_t t = s;
1361 uint32_t q = 0; 1400 uint32_t q = 0;
1362 UBool pIsTailored = FALSE; 1401 UBool pIsTailored = FALSE;
1363 UBool sIsTailored = FALSE; 1402 UBool sIsTailored = FALSE;
(...skipping 27 matching lines...) Expand all
1391 #endif 1430 #endif
1392 if(!tIsTailored) { 1431 if(!tIsTailored) {
1393 // First tailored tertiary node for [p, s]. 1432 // First tailored tertiary node for [p, s].
1394 int32_t tCount = countTailoredNodes(nodesArray, next Index, 1433 int32_t tCount = countTailoredNodes(nodesArray, next Index,
1395 UCOL_TERTIARY) + 1; 1434 UCOL_TERTIARY) + 1;
1396 uint32_t tLimit; 1435 uint32_t tLimit;
1397 if(t == 0) { 1436 if(t == 0) {
1398 // Gap at the beginning of the tertiary CE range . 1437 // Gap at the beginning of the tertiary CE range .
1399 t = rootElements.getTertiaryBoundary() - 0x100; 1438 t = rootElements.getTertiaryBoundary() - 0x100;
1400 tLimit = rootElements.getFirstTertiaryCE() & Col lation::ONLY_TERTIARY_MASK; 1439 tLimit = rootElements.getFirstTertiaryCE() & Col lation::ONLY_TERTIARY_MASK;
1401 } else if(t == BEFORE_WEIGHT16) {
1402 tLimit = Collation::COMMON_WEIGHT16;
1403 } else if(!pIsTailored && !sIsTailored) { 1440 } else if(!pIsTailored && !sIsTailored) {
1404 // p and s are root weights. 1441 // p and s are root weights.
1405 tLimit = rootElements.getTertiaryAfter(pIndex, s , t); 1442 tLimit = rootElements.getTertiaryAfter(pIndex, s , t);
1443 } else if(t == Collation::BEFORE_WEIGHT16) {
1444 tLimit = Collation::COMMON_WEIGHT16;
1406 } else { 1445 } else {
1407 // [p, s] is tailored. 1446 // [p, s] is tailored.
1408 U_ASSERT(t == Collation::COMMON_WEIGHT16); 1447 U_ASSERT(t == Collation::COMMON_WEIGHT16);
1409 tLimit = rootElements.getTertiaryBoundary(); 1448 tLimit = rootElements.getTertiaryBoundary();
1410 } 1449 }
1411 U_ASSERT(tLimit == 0x4000 || (tLimit & ~Collation::O NLY_TERTIARY_MASK) == 0); 1450 U_ASSERT(tLimit == 0x4000 || (tLimit & ~Collation::O NLY_TERTIARY_MASK) == 0);
1412 tertiaries.initForTertiary(); 1451 tertiaries.initForTertiary();
1413 if(!tertiaries.allocWeights(t, tLimit, tCount)) { 1452 if(!tertiaries.allocWeights(t, tLimit, tCount)) {
1414 errorCode = U_BUFFER_OVERFLOW_ERROR; 1453 errorCode = U_BUFFER_OVERFLOW_ERROR;
1415 errorReason = "tertiary tailoring gap too small" ; 1454 errorReason = "tertiary tailoring gap too small" ;
(...skipping 18 matching lines...) Expand all
1434 #endif 1473 #endif
1435 if(!sIsTailored) { 1474 if(!sIsTailored) {
1436 // First tailored secondary node for p. 1475 // First tailored secondary node for p.
1437 int32_t sCount = countTailoredNodes(nodesArray, nextIndex, 1476 int32_t sCount = countTailoredNodes(nodesArray, nextIndex,
1438 UCOL_SECONDA RY) + 1; 1477 UCOL_SECONDA RY) + 1;
1439 uint32_t sLimit; 1478 uint32_t sLimit;
1440 if(s == 0) { 1479 if(s == 0) {
1441 // Gap at the beginning of the secondary CE range. 1480 // Gap at the beginning of the secondary CE range.
1442 s = rootElements.getSecondaryBoundary() - 0x 100; 1481 s = rootElements.getSecondaryBoundary() - 0x 100;
1443 sLimit = rootElements.getFirstSecondaryCE() >> 16; 1482 sLimit = rootElements.getFirstSecondaryCE() >> 16;
1444 } else if(s == BEFORE_WEIGHT16) {
1445 sLimit = Collation::COMMON_WEIGHT16;
1446 } else if(!pIsTailored) { 1483 } else if(!pIsTailored) {
1447 // p is a root primary. 1484 // p is a root primary.
1448 sLimit = rootElements.getSecondaryAfter(pInd ex, s); 1485 sLimit = rootElements.getSecondaryAfter(pInd ex, s);
1486 } else if(s == Collation::BEFORE_WEIGHT16) {
1487 sLimit = Collation::COMMON_WEIGHT16;
1449 } else { 1488 } else {
1450 // p is a tailored primary. 1489 // p is a tailored primary.
1451 U_ASSERT(s == Collation::COMMON_WEIGHT16); 1490 U_ASSERT(s == Collation::COMMON_WEIGHT16);
1452 sLimit = rootElements.getSecondaryBoundary() ; 1491 sLimit = rootElements.getSecondaryBoundary() ;
1453 } 1492 }
1454 if(s == Collation::COMMON_WEIGHT16) { 1493 if(s == Collation::COMMON_WEIGHT16) {
1455 // Do not tailor into the getSortKey() range of 1494 // Do not tailor into the getSortKey() range of
1456 // compressed common secondaries. 1495 // compressed common secondaries.
1457 s = rootElements.getLastCommonSecondary(); 1496 s = rootElements.getLastCommonSecondary();
1458 } 1497 }
1459 secondaries.initForSecondary(); 1498 secondaries.initForSecondary();
1460 if(!secondaries.allocWeights(s, sLimit, sCount)) { 1499 if(!secondaries.allocWeights(s, sLimit, sCount)) {
1461 errorCode = U_BUFFER_OVERFLOW_ERROR; 1500 errorCode = U_BUFFER_OVERFLOW_ERROR;
1462 errorReason = "secondary tailoring gap too s mall"; 1501 errorReason = "secondary tailoring gap too s mall";
1502 #ifdef DEBUG_COLLATION_BUILDER
1503 printf("!secondaries.allocWeights(%lx, %lx, sCount=%ld)\n",
1504 (long)alignWeightRight(s), (long)alig nWeightRight(sLimit),
1505 (long)alignWeightRight(sCount));
1506 #endif
1463 return; 1507 return;
1464 } 1508 }
1465 sIsTailored = TRUE; 1509 sIsTailored = TRUE;
1466 } 1510 }
1467 s = secondaries.nextWeight(); 1511 s = secondaries.nextWeight();
1468 U_ASSERT(s != 0xffffffff); 1512 U_ASSERT(s != 0xffffffff);
1469 } else { 1513 } else {
1470 s = weight16FromNode(node); 1514 s = weight16FromNode(node);
1471 sIsTailored = FALSE; 1515 sIsTailored = FALSE;
1472 #ifdef DEBUG_COLLATION_BUILDER 1516 #ifdef DEBUG_COLLATION_BUILDER
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
1556 1600
1557 private: 1601 private:
1558 const int64_t *finalCEs; 1602 const int64_t *finalCEs;
1559 }; 1603 };
1560 1604
1561 CEFinalizer::~CEFinalizer() {} 1605 CEFinalizer::~CEFinalizer() {}
1562 1606
1563 void 1607 void
1564 CollationBuilder::finalizeCEs(UErrorCode &errorCode) { 1608 CollationBuilder::finalizeCEs(UErrorCode &errorCode) {
1565 if(U_FAILURE(errorCode)) { return; } 1609 if(U_FAILURE(errorCode)) { return; }
1566 LocalPointer<CollationDataBuilder> newBuilder(new CollationDataBuilder(error Code)); 1610 LocalPointer<CollationDataBuilder> newBuilder(new CollationDataBuilder(error Code), errorCode);
1567 if(newBuilder.isNull()) { 1611 if(U_FAILURE(errorCode)) {
1568 errorCode = U_MEMORY_ALLOCATION_ERROR;
1569 return; 1612 return;
1570 } 1613 }
1571 newBuilder->initForTailoring(baseData, errorCode); 1614 newBuilder->initForTailoring(baseData, errorCode);
1572 CEFinalizer finalizer(nodes.getBuffer()); 1615 CEFinalizer finalizer(nodes.getBuffer());
1573 newBuilder->copyFrom(*dataBuilder, finalizer, errorCode); 1616 newBuilder->copyFrom(*dataBuilder, finalizer, errorCode);
1574 if(U_FAILURE(errorCode)) { return; } 1617 if(U_FAILURE(errorCode)) { return; }
1575 delete dataBuilder; 1618 delete dataBuilder;
1576 dataBuilder = newBuilder.orphan(); 1619 dataBuilder = newBuilder.orphan();
1577 } 1620 }
1578 1621
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
1664 } 1707 }
1665 } 1708 }
1666 } 1709 }
1667 1710
1668 uset_close(contractions); 1711 uset_close(contractions);
1669 1712
1670 return uset_size(unsafe); 1713 return uset_size(unsafe);
1671 } 1714 }
1672 1715
1673 #endif // !UCONFIG_NO_COLLATION 1716 #endif // !UCONFIG_NO_COLLATION
OLDNEW
« no previous file with comments | « source/i18n/collationbuilder.h ('k') | source/i18n/collationcompare.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698