Index: source/i18n/collationfastlatinbuilder.cpp |
diff --git a/source/i18n/collationfastlatinbuilder.cpp b/source/i18n/collationfastlatinbuilder.cpp |
index fefed8600e7fae1be2b731966131c22acd4f5bbc..d5acda15b0175597ade7db6c2d0737ab5f5c6e94 100644 |
--- a/source/i18n/collationfastlatinbuilder.cpp |
+++ b/source/i18n/collationfastlatinbuilder.cpp |
@@ -1,6 +1,6 @@ |
/* |
******************************************************************************* |
-* Copyright (C) 2013-2014, International Business Machines |
+* Copyright (C) 2013-2015, International Business Machines |
* Corporation and others. All Rights Reserved. |
******************************************************************************* |
* collationfastlatinbuilder.cpp |
@@ -136,42 +136,26 @@ CollationFastLatinBuilder::forData(const CollationData &data, UErrorCode &errorC |
UBool |
CollationFastLatinBuilder::loadGroups(const CollationData &data, UErrorCode &errorCode) { |
if(U_FAILURE(errorCode)) { return FALSE; } |
- result.append(0); // reserved for version & headerLength |
+ headerLength = 1 + NUM_SPECIAL_GROUPS; |
+ uint32_t r0 = (CollationFastLatin::VERSION << 8) | headerLength; |
+ result.append((UChar)r0); |
// The first few reordering groups should be special groups |
// (space, punct, ..., digit) followed by Latn, then Grek and other scripts. |
- for(int32_t i = 0;;) { |
- if(i >= data.scriptsLength) { |
- // no Latn script |
- errorCode = U_INTERNAL_PROGRAM_ERROR; |
+ for(int32_t i = 0; i < NUM_SPECIAL_GROUPS; ++i) { |
+ lastSpecialPrimaries[i] = data.getLastPrimaryForGroup(UCOL_REORDER_CODE_FIRST + i); |
+ if(lastSpecialPrimaries[i] == 0) { |
+ // missing data |
return FALSE; |
} |
- uint32_t head = data.scripts[i]; |
- uint32_t lastByte = head & 0xff; // last primary byte in the group |
- int32_t group = data.scripts[i + 2]; |
- if(group == UCOL_REORDER_CODE_DIGIT) { |
- firstDigitPrimary = (head & 0xff00) << 16; |
- headerLength = result.length(); |
- uint32_t r0 = (CollationFastLatin::VERSION << 8) | headerLength; |
- result.setCharAt(0, (UChar)r0); |
- } else if(group == USCRIPT_LATIN) { |
- if(firstDigitPrimary == 0) { |
- // no digit group |
- errorCode = U_INTERNAL_PROGRAM_ERROR; |
- return FALSE; |
- } |
- firstLatinPrimary = (head & 0xff00) << 16; |
- lastLatinPrimary = (lastByte << 24) | 0xffffff; |
- break; |
- } else if(firstDigitPrimary == 0) { |
- // a group below digits |
- if(lastByte > 0x7f) { |
- // We only use 7 bits for the last byte of a below-digits group. |
- // This does not warrant an errorCode, but we do not build a fast Latin table. |
- return FALSE; |
- } |
- result.append((UChar)lastByte); |
- } |
- i = i + 2 + data.scripts[i + 1]; |
+ result.append(0); // reserve a slot for this group |
+ } |
+ |
+ firstDigitPrimary = data.getFirstPrimaryForGroup(UCOL_REORDER_CODE_DIGIT); |
+ firstLatinPrimary = data.getFirstPrimaryForGroup(USCRIPT_LATIN); |
+ lastLatinPrimary = data.getLastPrimaryForGroup(USCRIPT_LATIN); |
+ if(firstDigitPrimary == 0 || firstLatinPrimary == 0) { |
+ // missing data |
+ return FALSE; |
} |
return TRUE; |
} |
@@ -187,23 +171,21 @@ CollationFastLatinBuilder::inSameGroup(uint32_t p, uint32_t q) const { |
} |
// Both or neither must be potentially-variable, |
// so that we can test only one and determine if both are variable. |
- if(p >= firstDigitPrimary) { |
- return q >= firstDigitPrimary; |
- } else if(q >= firstDigitPrimary) { |
+ uint32_t lastVariablePrimary = lastSpecialPrimaries[NUM_SPECIAL_GROUPS - 1]; |
+ if(p > lastVariablePrimary) { |
+ return q > lastVariablePrimary; |
+ } else if(q > lastVariablePrimary) { |
return FALSE; |
} |
// Both will be encoded with long mini primaries. |
// They must be in the same special reordering group, |
// so that we can test only one and determine if both are variable. |
- p >>= 24; // first primary byte |
- q >>= 24; |
U_ASSERT(p != 0 && q != 0); |
- U_ASSERT(p <= result[headerLength - 1]); // the loop will terminate |
- for(int32_t i = 1;; ++i) { |
- uint32_t lastByte = result[i]; |
- if(p <= lastByte) { |
- return q <= lastByte; |
- } else if(q <= lastByte) { |
+ for(int32_t i = 0;; ++i) { // will terminate |
+ uint32_t lastPrimary = lastSpecialPrimaries[i]; |
+ if(p <= lastPrimary) { |
+ return q <= lastPrimary; |
+ } else if(q <= lastPrimary) { |
return FALSE; |
} |
} |
@@ -451,8 +433,8 @@ CollationFastLatinBuilder::encodeUniqueCEs(UErrorCode &errorCode) { |
errorCode = U_MEMORY_ALLOCATION_ERROR; |
return FALSE; |
} |
- int32_t group = 1; |
- uint32_t lastGroupByte = result[group]; |
+ int32_t group = 0; |
+ uint32_t lastGroupPrimary = lastSpecialPrimaries[group]; |
// The lowest unique CE must be at least a secondary CE. |
U_ASSERT(((uint32_t)uniqueCEs.elementAti(0) >> 16) != 0); |
uint32_t prevPrimary = 0; |
@@ -466,16 +448,15 @@ CollationFastLatinBuilder::encodeUniqueCEs(UErrorCode &errorCode) { |
// (uniqueCEs does not store case bits.) |
uint32_t p = (uint32_t)(ce >> 32); |
if(p != prevPrimary) { |
- uint32_t p1 = p >> 24; |
- while(p1 > lastGroupByte) { |
+ while(p > lastGroupPrimary) { |
U_ASSERT(pri <= CollationFastLatin::MAX_LONG); |
- // Add the last "long primary" in or before the group |
- // into the upper 9 bits of the group entry. |
- result.setCharAt(group, (UChar)((pri << 4) | lastGroupByte)); |
- if(++group < headerLength) { // group is 1-based |
- lastGroupByte = result[group]; |
+ // Set the group's header entry to the |
+ // last "long primary" in or before the group. |
+ result.setCharAt(1 + group, (UChar)pri); |
+ if(++group < NUM_SPECIAL_GROUPS) { |
+ lastGroupPrimary = lastSpecialPrimaries[group]; |
} else { |
- lastGroupByte = 0xff; |
+ lastGroupPrimary = 0xffffffff; |
break; |
} |
} |