| Index: source/i18n/collationkeys.cpp
|
| diff --git a/source/i18n/collationkeys.cpp b/source/i18n/collationkeys.cpp
|
| index 6006811377fec607d99b0f7e300033b9d5eaf399..3fb1af1b36768d8788529553ca62db1f558f63f9 100644
|
| --- a/source/i18n/collationkeys.cpp
|
| +++ b/source/i18n/collationkeys.cpp
|
| @@ -1,6 +1,6 @@
|
| /*
|
| *******************************************************************************
|
| -* Copyright (C) 2012-2014, International Business Machines
|
| +* Copyright (C) 2012-2015, International Business Machines
|
| * Corporation and others. All Rights Reserved.
|
| *******************************************************************************
|
| * collationkeys.cpp
|
| @@ -246,7 +246,6 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
| // +1 so that we can use "<" and primary ignorables test out early.
|
| variableTop = settings.variableTop + 1;
|
| }
|
| - const uint8_t *reorderTable = settings.reorderTable;
|
|
|
| uint32_t tertiaryMask = CollationSettings::getTertiaryMask(options);
|
|
|
| @@ -255,14 +254,14 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
| SortKeyLevel tertiaries;
|
| SortKeyLevel quaternaries;
|
|
|
| - uint32_t compressedP1 = 0; // 0==no compression; otherwise reordered compressible lead byte
|
| + uint32_t prevReorderedPrimary = 0; // 0==no compression
|
| int32_t commonCases = 0;
|
| int32_t commonSecondaries = 0;
|
| int32_t commonTertiaries = 0;
|
| int32_t commonQuaternaries = 0;
|
|
|
| uint32_t prevSecondary = 0;
|
| - UBool anyMergeSeparators = FALSE;
|
| + int32_t secSegmentStart = 0;
|
|
|
| for(;;) {
|
| // No need to keep all CEs in the buffer when we write a sort key.
|
| @@ -284,14 +283,15 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
| }
|
| do {
|
| if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
|
| - uint32_t p1 = p >> 24;
|
| - if(reorderTable != NULL) { p1 = reorderTable[p1]; }
|
| - if(p1 >= QUAT_SHIFTED_LIMIT_BYTE) {
|
| + if(settings.hasReordering()) {
|
| + p = settings.reorder(p);
|
| + }
|
| + if((p >> 24) >= QUAT_SHIFTED_LIMIT_BYTE) {
|
| // Prevent shifted primary lead bytes from
|
| // overlapping with the common compression range.
|
| quaternaries.appendByte(QUAT_SHIFTED_LIMIT_BYTE);
|
| }
|
| - quaternaries.appendWeight32((p1 << 24) | (p & 0xffffff));
|
| + quaternaries.appendWeight32(p);
|
| }
|
| do {
|
| ce = iter.nextCE(errorCode);
|
| @@ -304,11 +304,15 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
| // If ce==NO_CE, then write nothing for the primary level but
|
| // terminate compression on all levels and then exit the loop.
|
| if(p > Collation::NO_CE_PRIMARY && (levels & Collation::PRIMARY_LEVEL_FLAG) != 0) {
|
| + // Test the un-reordered primary for compressibility.
|
| + UBool isCompressible = compressibleBytes[p >> 24];
|
| + if(settings.hasReordering()) {
|
| + p = settings.reorder(p);
|
| + }
|
| uint32_t p1 = p >> 24;
|
| - if(reorderTable != NULL) { p1 = reorderTable[p1]; }
|
| - if(p1 != compressedP1) {
|
| - if(compressedP1 != 0) {
|
| - if(p1 < compressedP1) {
|
| + if(!isCompressible || p1 != (prevReorderedPrimary >> 24)) {
|
| + if(prevReorderedPrimary != 0) {
|
| + if(p < prevReorderedPrimary) {
|
| // No primary compression terminator
|
| // at the end of the level or merged segment.
|
| if(p1 > Collation::MERGE_SEPARATOR_BYTE) {
|
| @@ -319,12 +323,10 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
| }
|
| }
|
| sink.Append(p1);
|
| - // Test the un-reordered lead byte for compressibility but
|
| - // remember the reordered lead byte.
|
| - if(compressibleBytes[p >> 24]) {
|
| - compressedP1 = p1;
|
| + if(isCompressible) {
|
| + prevReorderedPrimary = p;
|
| } else {
|
| - compressedP1 = 0;
|
| + prevReorderedPrimary = 0;
|
| }
|
| }
|
| char p2 = (char)(p >> 16);
|
| @@ -350,7 +352,11 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
| uint32_t s = lower32 >> 16;
|
| if(s == 0) {
|
| // secondary ignorable
|
| - } else if(s == Collation::COMMON_WEIGHT16) {
|
| + } else if(s == Collation::COMMON_WEIGHT16 &&
|
| + ((options & CollationSettings::BACKWARD_SECONDARY) == 0 ||
|
| + p != Collation::MERGE_SEPARATOR_PRIMARY)) {
|
| + // s is a common secondary weight, and
|
| + // backwards-secondary is off or the ce is not the merge separator.
|
| ++commonSecondaries;
|
| } else if((options & CollationSettings::BACKWARD_SECONDARY) == 0) {
|
| if(commonSecondaries != 0) {
|
| @@ -389,16 +395,28 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
| }
|
| // commonSecondaries == 0
|
| }
|
| - // Reduce separators so that we can look for byte<=1 later.
|
| - if(s <= Collation::MERGE_SEPARATOR_WEIGHT16) {
|
| - if(s == Collation::MERGE_SEPARATOR_WEIGHT16) {
|
| - anyMergeSeparators = TRUE;
|
| + if(0 < p && p <= Collation::MERGE_SEPARATOR_PRIMARY) {
|
| + // The backwards secondary level compares secondary weights backwards
|
| + // within segments separated by the merge separator (U+FFFE).
|
| + uint8_t *secs = secondaries.data();
|
| + int32_t last = secondaries.length() - 1;
|
| + if(secSegmentStart < last) {
|
| + uint8_t *p = secs + secSegmentStart;
|
| + uint8_t *q = secs + last;
|
| + do {
|
| + uint8_t b = *p;
|
| + *p++ = *q;
|
| + *q-- = b;
|
| + } while(p < q);
|
| }
|
| - secondaries.appendByte((s >> 8) - 1);
|
| + secondaries.appendByte(p == Collation::NO_CE_PRIMARY ?
|
| + Collation::LEVEL_SEPARATOR_BYTE : Collation::MERGE_SEPARATOR_BYTE);
|
| + prevSecondary = 0;
|
| + secSegmentStart = secondaries.length();
|
| } else {
|
| secondaries.appendReverseWeight16(s);
|
| + prevSecondary = s;
|
| }
|
| - prevSecondary = s;
|
| }
|
| }
|
|
|
| @@ -411,19 +429,23 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
| } else {
|
| uint32_t c = (lower32 >> 8) & 0xff; // case bits & tertiary lead byte
|
| U_ASSERT((c & 0xc0) != 0xc0);
|
| - if((c & 0xc0) == 0 && c > Collation::MERGE_SEPARATOR_BYTE) {
|
| + if((c & 0xc0) == 0 && c > Collation::LEVEL_SEPARATOR_BYTE) {
|
| ++commonCases;
|
| } else {
|
| if((options & CollationSettings::UPPER_FIRST) == 0) {
|
| // lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14, upper=15.
|
| - if(commonCases != 0) {
|
| + // If there are only common (=lowest) weights in the whole level,
|
| + // then we need not write anything.
|
| + // Level length differences are handled already on the next-higher level.
|
| + if(commonCases != 0 &&
|
| + (c > Collation::LEVEL_SEPARATOR_BYTE || !cases.isEmpty())) {
|
| --commonCases;
|
| while(commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) {
|
| cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4);
|
| commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT;
|
| }
|
| uint32_t b;
|
| - if(c <= Collation::MERGE_SEPARATOR_BYTE) {
|
| + if(c <= Collation::LEVEL_SEPARATOR_BYTE) {
|
| b = CASE_LOWER_FIRST_COMMON_LOW + commonCases;
|
| } else {
|
| b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases;
|
| @@ -431,7 +453,7 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
| cases.appendByte(b << 4);
|
| commonCases = 0;
|
| }
|
| - if(c > Collation::MERGE_SEPARATOR_BYTE) {
|
| + if(c > Collation::LEVEL_SEPARATOR_BYTE) {
|
| c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >> 6)) << 4; // 14 or 15
|
| }
|
| } else {
|
| @@ -447,11 +469,11 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
| cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4);
|
| commonCases = 0;
|
| }
|
| - if(c > Collation::MERGE_SEPARATOR_BYTE) {
|
| + if(c > Collation::LEVEL_SEPARATOR_BYTE) {
|
| c = (CASE_UPPER_FIRST_COMMON_LOW - (c >> 6)) << 4; // 2 or 1
|
| }
|
| }
|
| - // c is a separator byte 01 or 02,
|
| + // c is a separator byte 01,
|
| // or a left-shifted nibble 0x10, 0x20, ... 0xf0.
|
| cases.appendByte(c);
|
| }
|
| @@ -510,14 +532,14 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
| // Their case+tertiary weights must be greater than those of
|
| // primary and secondary CEs.
|
| //
|
| - // Separators 01..02 -> 01..02 (unchanged)
|
| - // Lowercase 03..04 -> 83..84 (includes uncased)
|
| + // Separator 01 -> 01 (unchanged)
|
| + // Lowercase 02..04 -> 82..84 (includes uncased)
|
| // Common weight 05 -> 85..C5 (common-weight compression range)
|
| // Lowercase 06..3F -> C6..FF
|
| - // Mixed case 43..7F -> 43..7F
|
| - // Uppercase 83..BF -> 03..3F
|
| + // Mixed case 42..7F -> 42..7F
|
| + // Uppercase 82..BF -> 02..3F
|
| // Tertiary CE 86..BF -> C6..FF
|
| - if(t <= Collation::MERGE_SEPARATOR_WEIGHT16) {
|
| + if(t <= Collation::NO_CE_WEIGHT16) {
|
| // Keep separators unchanged.
|
| } else if(lower32 > 0xffff) {
|
| // Invert case bits of primary & secondary CEs.
|
| @@ -551,24 +573,22 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
|
|
| if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
|
| uint32_t q = lower32 & 0xffff;
|
| - if((q & 0xc0) == 0 && q > Collation::MERGE_SEPARATOR_WEIGHT16) {
|
| + if((q & 0xc0) == 0 && q > Collation::NO_CE_WEIGHT16) {
|
| ++commonQuaternaries;
|
| - } else if(q <= Collation::MERGE_SEPARATOR_WEIGHT16 &&
|
| + } else if(q == Collation::NO_CE_WEIGHT16 &&
|
| (options & CollationSettings::ALTERNATE_MASK) == 0 &&
|
| - (quaternaries.isEmpty() ||
|
| - quaternaries[quaternaries.length() - 1] == Collation::MERGE_SEPARATOR_BYTE)) {
|
| - // If alternate=non-ignorable and there are only
|
| - // common quaternary weights between two separators,
|
| - // then we need not write anything between these separators.
|
| + quaternaries.isEmpty()) {
|
| + // If alternate=non-ignorable and there are only common quaternary weights,
|
| + // then we need not write anything.
|
| // The only weights greater than the merge separator and less than the common weight
|
| // are shifted primary weights, which are not generated for alternate=non-ignorable.
|
| // There are also exactly as many quaternary weights as tertiary weights,
|
| // so level length differences are handled already on tertiary level.
|
| // Any above-common quaternary weight will compare greater regardless.
|
| - quaternaries.appendByte(q >> 8);
|
| + quaternaries.appendByte(Collation::LEVEL_SEPARATOR_BYTE);
|
| } else {
|
| - if(q <= Collation::MERGE_SEPARATOR_WEIGHT16) {
|
| - q >>= 8;
|
| + if(q == Collation::NO_CE_WEIGHT16) {
|
| + q = Collation::LEVEL_SEPARATOR_BYTE;
|
| } else {
|
| q = 0xfc + ((q >> 6) & 3);
|
| }
|
| @@ -602,42 +622,7 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
| if(!callback.needToWrite(Collation::SECONDARY_LEVEL)) { return; }
|
| ok &= secondaries.isOk();
|
| sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
|
| - uint8_t *secs = secondaries.data();
|
| - int32_t length = secondaries.length() - 1; // Ignore the trailing NO_CE.
|
| - if((options & CollationSettings::BACKWARD_SECONDARY) != 0) {
|
| - // The backwards secondary level compares secondary weights backwards
|
| - // within segments separated by the merge separator (U+FFFE, weight 02).
|
| - // The separator weights 01 & 02 were reduced to 00 & 01 so that
|
| - // we do not accidentally separate at a _second_ weight byte of 02.
|
| - int32_t start = 0;
|
| - for(;;) {
|
| - // Find the merge separator or the NO_CE terminator.
|
| - int32_t limit;
|
| - if(anyMergeSeparators) {
|
| - limit = start;
|
| - while(secs[limit] > 1) { ++limit; }
|
| - } else {
|
| - limit = length;
|
| - }
|
| - // Reverse this segment.
|
| - if(start < limit) {
|
| - uint8_t *p = secs + start;
|
| - uint8_t *q = secs + limit - 1;
|
| - while(p < q) {
|
| - uint8_t s = *p;
|
| - *p++ = *q;
|
| - *q-- = s;
|
| - }
|
| - }
|
| - // Did we reach the end of the string?
|
| - if(secs[limit] == 0) { break; }
|
| - // Restore the merge separator.
|
| - secs[limit] = 2;
|
| - // Skip the merge separator and continue.
|
| - start = limit + 1;
|
| - }
|
| - }
|
| - sink.Append(reinterpret_cast<char *>(secs), length);
|
| + secondaries.appendTo(sink);
|
| }
|
|
|
| if((levels & Collation::CASE_LEVEL_FLAG) != 0) {
|
| @@ -649,21 +634,12 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
|
| uint8_t b = 0;
|
| for(int32_t i = 0; i < length; ++i) {
|
| uint8_t c = (uint8_t)cases[i];
|
| - if(c <= Collation::MERGE_SEPARATOR_BYTE) {
|
| - U_ASSERT(c != 0);
|
| - if(b != 0) {
|
| - sink.Append(b);
|
| - b = 0;
|
| - }
|
| - sink.Append(c);
|
| + U_ASSERT((c & 0xf) == 0 && c != 0);
|
| + if(b == 0) {
|
| + b = c;
|
| } else {
|
| - U_ASSERT((c & 0xf) == 0);
|
| - if(b == 0) {
|
| - b = c;
|
| - } else {
|
| - sink.Append(b | (c >> 4));
|
| - b = 0;
|
| - }
|
| + sink.Append(b | (c >> 4));
|
| + b = 0;
|
| }
|
| }
|
| if(b != 0) {
|
|
|