Index: source/i18n/collationrootelements.cpp |
diff --git a/source/i18n/collationrootelements.cpp b/source/i18n/collationrootelements.cpp |
new file mode 100644 |
index 0000000000000000000000000000000000000000..d59048b75b7c9c3905947e3e751b5bc0aaa618f3 |
--- /dev/null |
+++ b/source/i18n/collationrootelements.cpp |
@@ -0,0 +1,314 @@ |
+/* |
+******************************************************************************* |
+* Copyright (C) 2013-2014, International Business Machines |
+* Corporation and others. All Rights Reserved. |
+******************************************************************************* |
+* collationrootelements.cpp |
+* |
+* created on: 2013mar05 |
+* created by: Markus W. Scherer |
+*/ |
+ |
+#include "unicode/utypes.h" |
+ |
+#if !UCONFIG_NO_COLLATION |
+ |
+#include "collation.h" |
+#include "collationrootelements.h" |
+#include "uassert.h" |
+ |
+U_NAMESPACE_BEGIN |
+ |
+int64_t |
+CollationRootElements::lastCEWithPrimaryBefore(uint32_t p) const { |
+ if(p == 0) { return 0; } |
+ U_ASSERT(p > elements[elements[IX_FIRST_PRIMARY_INDEX]]); |
+ int32_t index = findP(p); |
+ uint32_t q = elements[index]; |
+ uint32_t secTer; |
+ if(p == (q & 0xffffff00)) { |
+ // p == elements[index] is a root primary. Find the CE before it. |
+ // We must not be in a primary range. |
+ U_ASSERT((q & PRIMARY_STEP_MASK) == 0); |
+ secTer = elements[index - 1]; |
+ if((secTer & SEC_TER_DELTA_FLAG) == 0) { |
+ // Primary CE just before p. |
+ p = secTer & 0xffffff00; |
+ secTer = Collation::COMMON_SEC_AND_TER_CE; |
+ } else { |
+ // secTer = last secondary & tertiary for the previous primary |
+ index -= 2; |
+ for(;;) { |
+ p = elements[index]; |
+ if((p & SEC_TER_DELTA_FLAG) == 0) { |
+ p &= 0xffffff00; |
+ break; |
+ } |
+ --index; |
+ } |
+ } |
+ } else { |
+ // p > elements[index] which is the previous primary. |
+ // Find the last secondary & tertiary weights for it. |
+ p = q & 0xffffff00; |
+ secTer = Collation::COMMON_SEC_AND_TER_CE; |
+ for(;;) { |
+ q = elements[++index]; |
+ if((q & SEC_TER_DELTA_FLAG) == 0) { |
+ // We must not be in a primary range. |
+ U_ASSERT((q & PRIMARY_STEP_MASK) == 0); |
+ break; |
+ } |
+ secTer = q; |
+ } |
+ } |
+ return ((int64_t)p << 32) | (secTer & ~SEC_TER_DELTA_FLAG); |
+} |
+ |
+int64_t |
+CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p) const { |
+ if(p == 0) { return 0; } |
+ int32_t index = findP(p); |
+ if(p != (elements[index] & 0xffffff00)) { |
+ for(;;) { |
+ p = elements[++index]; |
+ if((p & SEC_TER_DELTA_FLAG) == 0) { |
+ // First primary after p. We must not be in a primary range. |
+ U_ASSERT((p & PRIMARY_STEP_MASK) == 0); |
+ break; |
+ } |
+ } |
+ } |
+ // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0. |
+ return ((int64_t)p << 32) | Collation::COMMON_SEC_AND_TER_CE; |
+} |
+ |
+uint32_t |
+CollationRootElements::getPrimaryBefore(uint32_t p, UBool isCompressible) const { |
+ int32_t index = findPrimary(p); |
+ int32_t step; |
+ uint32_t q = elements[index]; |
+ if(p == (q & 0xffffff00)) { |
+ // Found p itself. Return the previous primary. |
+ // See if p is at the end of a previous range. |
+ step = (int32_t)q & PRIMARY_STEP_MASK; |
+ if(step == 0) { |
+ // p is not at the end of a range. Look for the previous primary. |
+ do { |
+ p = elements[--index]; |
+ } while((p & SEC_TER_DELTA_FLAG) != 0); |
+ return p & 0xffffff00; |
+ } |
+ } else { |
+ // p is in a range, and not at the start. |
+ uint32_t nextElement = elements[index + 1]; |
+ U_ASSERT(isEndOfPrimaryRange(nextElement)); |
+ step = (int32_t)nextElement & PRIMARY_STEP_MASK; |
+ } |
+ // Return the previous range primary. |
+ if((p & 0xffff) == 0) { |
+ return Collation::decTwoBytePrimaryByOneStep(p, isCompressible, step); |
+ } else { |
+ return Collation::decThreeBytePrimaryByOneStep(p, isCompressible, step); |
+ } |
+} |
+ |
+uint32_t |
+CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const { |
+ int32_t index; |
+ uint32_t previousSec, sec; |
+ if(p == 0) { |
+ index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; |
+ // Gap at the beginning of the secondary CE range. |
+ previousSec = 0; |
+ sec = elements[index] >> 16; |
+ } else { |
+ index = findPrimary(p) + 1; |
+ previousSec = Collation::MERGE_SEPARATOR_WEIGHT16; |
+ sec = Collation::COMMON_WEIGHT16; |
+ } |
+ U_ASSERT(s >= sec); |
+ while(s > sec) { |
+ previousSec = sec; |
+ U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0); |
+ sec = elements[index++] >> 16; |
+ } |
+ U_ASSERT(sec == s); |
+ return previousSec; |
+} |
+ |
+uint32_t |
+CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const { |
+ U_ASSERT((t & ~Collation::ONLY_TERTIARY_MASK) == 0); |
+ int32_t index; |
+ uint32_t previousTer, secTer; |
+ if(p == 0) { |
+ if(s == 0) { |
+ index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX]; |
+ // Gap at the beginning of the tertiary CE range. |
+ previousTer = 0; |
+ } else { |
+ index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; |
+ previousTer = Collation::MERGE_SEPARATOR_WEIGHT16; |
+ } |
+ secTer = elements[index] & ~SEC_TER_DELTA_FLAG; |
+ } else { |
+ index = findPrimary(p) + 1; |
+ previousTer = Collation::MERGE_SEPARATOR_WEIGHT16; |
+ secTer = Collation::COMMON_SEC_AND_TER_CE; |
+ } |
+ uint32_t st = (s << 16) | t; |
+ while(st > secTer) { |
+ if((secTer >> 16) == s) { previousTer = secTer; } |
+ U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0); |
+ secTer = elements[index++] & ~SEC_TER_DELTA_FLAG; |
+ } |
+ U_ASSERT(secTer == st); |
+ return previousTer & 0xffff; |
+} |
+ |
+uint32_t |
+CollationRootElements::getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const { |
+ U_ASSERT(p == (elements[index] & 0xffffff00) || isEndOfPrimaryRange(elements[index + 1])); |
+ uint32_t q = elements[++index]; |
+ int32_t step; |
+ if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int32_t)q & PRIMARY_STEP_MASK) != 0) { |
+ // Return the next primary in this range. |
+ if((p & 0xffff) == 0) { |
+ return Collation::incTwoBytePrimaryByOffset(p, isCompressible, step); |
+ } else { |
+ return Collation::incThreeBytePrimaryByOffset(p, isCompressible, step); |
+ } |
+ } else { |
+ // Return the next primary in the list. |
+ while((q & SEC_TER_DELTA_FLAG) != 0) { |
+ q = elements[++index]; |
+ } |
+ U_ASSERT((q & PRIMARY_STEP_MASK) == 0); |
+ return q; |
+ } |
+} |
+ |
+uint32_t |
+CollationRootElements::getSecondaryAfter(int32_t index, uint32_t s) const { |
+ uint32_t secLimit; |
+ if(index == 0) { |
+ // primary = 0 |
+ index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; |
+ // Gap at the end of the secondary CE range. |
+ secLimit = 0x10000; |
+ } else { |
+ U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]); |
+ ++index; |
+ // Gap for secondaries of primary CEs. |
+ secLimit = getSecondaryBoundary(); |
+ } |
+ for(;;) { |
+ uint32_t secTer = elements[index]; |
+ if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; } |
+ uint32_t sec = secTer >> 16; |
+ if(sec > s) { return sec; } |
+ ++index; |
+ } |
+} |
+ |
+uint32_t |
+CollationRootElements::getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const { |
+ uint32_t terLimit; |
+ if(index == 0) { |
+ // primary = 0 |
+ if(s == 0) { |
+ index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX]; |
+ // Gap at the end of the tertiary CE range. |
+ terLimit = 0x4000; |
+ } else { |
+ index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; |
+ // Gap for tertiaries of primary/secondary CEs. |
+ terLimit = getTertiaryBoundary(); |
+ } |
+ } else { |
+ U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]); |
+ ++index; |
+ terLimit = getTertiaryBoundary(); |
+ } |
+ uint32_t st = (s << 16) | t; |
+ for(;;) { |
+ uint32_t secTer = elements[index]; |
+ // No tertiary greater than t for this primary+secondary. |
+ if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; } |
+ secTer &= ~SEC_TER_DELTA_FLAG; |
+ if(secTer > st) { return secTer & 0xffff; } |
+ ++index; |
+ } |
+} |
+ |
+int32_t |
+CollationRootElements::findPrimary(uint32_t p) const { |
+ // Requirement: p must occur as a root primary. |
+ U_ASSERT((p & 0xff) == 0); // at most a 3-byte primary |
+ int32_t index = findP(p); |
+ // If p is in a range, then we just assume that p is an actual primary in this range. |
+ // (Too cumbersome/expensive to check.) |
+ // Otherwise, it must be an exact match. |
+ U_ASSERT(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00)); |
+ return index; |
+} |
+ |
+int32_t |
+CollationRootElements::findP(uint32_t p) const { |
+ // p need not occur as a root primary. |
+ // For example, it might be a reordering group boundary. |
+ U_ASSERT((p >> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE); |
+ // modified binary search |
+ int32_t start = (int32_t)elements[IX_FIRST_PRIMARY_INDEX]; |
+ U_ASSERT(p >= elements[start]); |
+ int32_t limit = length - 1; |
+ U_ASSERT(elements[limit] >= PRIMARY_SENTINEL); |
+ U_ASSERT(p < elements[limit]); |
+ while((start + 1) < limit) { |
+ // Invariant: elements[start] and elements[limit] are primaries, |
+ // and elements[start]<=p<=elements[limit]. |
+ int32_t i = (start + limit) / 2; |
+ uint32_t q = elements[i]; |
+ if((q & SEC_TER_DELTA_FLAG) != 0) { |
+ // Find the next primary. |
+ int32_t j = i + 1; |
+ for(;;) { |
+ if(j == limit) { break; } |
+ q = elements[j]; |
+ if((q & SEC_TER_DELTA_FLAG) == 0) { |
+ i = j; |
+ break; |
+ } |
+ ++j; |
+ } |
+ if((q & SEC_TER_DELTA_FLAG) != 0) { |
+ // Find the preceding primary. |
+ j = i - 1; |
+ for(;;) { |
+ if(j == start) { break; } |
+ q = elements[j]; |
+ if((q & SEC_TER_DELTA_FLAG) == 0) { |
+ i = j; |
+ break; |
+ } |
+ --j; |
+ } |
+ if((q & SEC_TER_DELTA_FLAG) != 0) { |
+ // No primary between start and limit. |
+ break; |
+ } |
+ } |
+ } |
+ if(p < (q & 0xffffff00)) { // Reset the "step" bits of a range end primary. |
+ limit = i; |
+ } else { |
+ start = i; |
+ } |
+ } |
+ return start; |
+} |
+ |
+U_NAMESPACE_END |
+ |
+#endif // !UCONFIG_NO_COLLATION |