Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Unified Diff: source/i18n/collationcompare.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/i18n/collationcompare.h ('k') | source/i18n/collationdata.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/i18n/collationcompare.cpp
diff --git a/source/i18n/collationcompare.cpp b/source/i18n/collationcompare.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6f9107e9db516416fdbdec4493f6cd585d9897ac
--- /dev/null
+++ b/source/i18n/collationcompare.cpp
@@ -0,0 +1,363 @@
+/*
+*******************************************************************************
+* Copyright (C) 1996-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* collationcompare.cpp
+*
+* created on: 2012feb14 with new and old collation code
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/ucol.h"
+#include "cmemory.h"
+#include "collation.h"
+#include "collationcompare.h"
+#include "collationiterator.h"
+#include "collationsettings.h"
+#include "uassert.h"
+
+U_NAMESPACE_BEGIN
+
+UCollationResult
+CollationCompare::compareUpToQuaternary(CollationIterator &left, CollationIterator &right,
+ const CollationSettings &settings,
+ UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
+
+ int32_t options = settings.options;
+ uint32_t variableTop;
+ if((options & CollationSettings::ALTERNATE_MASK) == 0) {
+ variableTop = 0;
+ } else {
+ // +1 so that we can use "<" and primary ignorables test out early.
+ variableTop = settings.variableTop + 1;
+ }
+ UBool anyVariable = FALSE;
+
+ // Fetch CEs, compare primaries, store secondary & tertiary weights.
+ U_ALIGN_CODE(16);
+ for(;;) {
+ // We fetch CEs until we get a non-ignorable primary or reach the end.
+ uint32_t leftPrimary;
+ do {
+ int64_t ce = left.nextCE(errorCode);
+ leftPrimary = (uint32_t)(ce >> 32);
+ if(leftPrimary < variableTop && leftPrimary > Collation::MERGE_SEPARATOR_PRIMARY) {
+ // Variable CE, shift it to quaternary level.
+ // Ignore all following primary ignorables, and shift further variable CEs.
+ anyVariable = TRUE;
+ do {
+ // Store only the primary of the variable CE.
+ left.setCurrentCE(ce & INT64_C(0xffffffff00000000));
+ for(;;) {
+ ce = left.nextCE(errorCode);
+ leftPrimary = (uint32_t)(ce >> 32);
+ if(leftPrimary == 0) {
+ left.setCurrentCE(0);
+ } else {
+ break;
+ }
+ }
+ } while(leftPrimary < variableTop &&
+ leftPrimary > Collation::MERGE_SEPARATOR_PRIMARY);
+ }
+ } while(leftPrimary == 0);
+
+ uint32_t rightPrimary;
+ do {
+ int64_t ce = right.nextCE(errorCode);
+ rightPrimary = (uint32_t)(ce >> 32);
+ if(rightPrimary < variableTop && rightPrimary > Collation::MERGE_SEPARATOR_PRIMARY) {
+ // Variable CE, shift it to quaternary level.
+ // Ignore all following primary ignorables, and shift further variable CEs.
+ anyVariable = TRUE;
+ do {
+ // Store only the primary of the variable CE.
+ right.setCurrentCE(ce & INT64_C(0xffffffff00000000));
+ for(;;) {
+ ce = right.nextCE(errorCode);
+ rightPrimary = (uint32_t)(ce >> 32);
+ if(rightPrimary == 0) {
+ right.setCurrentCE(0);
+ } else {
+ break;
+ }
+ }
+ } while(rightPrimary < variableTop &&
+ rightPrimary > Collation::MERGE_SEPARATOR_PRIMARY);
+ }
+ } while(rightPrimary == 0);
+
+ if(leftPrimary != rightPrimary) {
+ // Return the primary difference, with script reordering.
+ const uint8_t *reorderTable = settings.reorderTable;
+ if (reorderTable != NULL) {
+ leftPrimary = Collation::reorder(reorderTable, leftPrimary);
+ rightPrimary = Collation::reorder(reorderTable, rightPrimary);
+ }
+ return (leftPrimary < rightPrimary) ? UCOL_LESS : UCOL_GREATER;
+ }
+ if(leftPrimary == Collation::NO_CE_PRIMARY) { break; }
+ }
+ if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
+
+ // Compare the buffered secondary & tertiary weights.
+ // We might skip the secondary level but continue with the case level
+ // which is turned on separately.
+ if(CollationSettings::getStrength(options) >= UCOL_SECONDARY) {
+ if((options & CollationSettings::BACKWARD_SECONDARY) == 0) {
+ int32_t leftIndex = 0;
+ int32_t rightIndex = 0;
+ for(;;) {
+ uint32_t leftSecondary;
+ do {
+ leftSecondary = ((uint32_t)left.getCE(leftIndex++)) >> 16;
+ } while(leftSecondary == 0);
+
+ uint32_t rightSecondary;
+ do {
+ rightSecondary = ((uint32_t)right.getCE(rightIndex++)) >> 16;
+ } while(rightSecondary == 0);
+
+ if(leftSecondary != rightSecondary) {
+ return (leftSecondary < rightSecondary) ? UCOL_LESS : UCOL_GREATER;
+ }
+ if(leftSecondary == Collation::NO_CE_WEIGHT16) { break; }
+ }
+ } else {
+ // The backwards secondary level compares secondary weights backwards
+ // within segments separated by the merge separator (U+FFFE, weight 02).
+ int32_t leftStart = 0;
+ int32_t rightStart = 0;
+ for(;;) {
+ // Find the merge separator or the NO_CE terminator.
+ int32_t leftLimit = leftStart;
+ uint32_t leftLower32;
+ while((leftLower32 = (uint32_t)left.getCE(leftLimit)) >
+ Collation::MERGE_SEPARATOR_LOWER32 ||
+ leftLower32 == 0) {
+ ++leftLimit;
+ }
+ int32_t rightLimit = rightStart;
+ uint32_t rightLower32;
+ while((rightLower32 = (uint32_t)right.getCE(rightLimit)) >
+ Collation::MERGE_SEPARATOR_LOWER32 ||
+ rightLower32 == 0) {
+ ++rightLimit;
+ }
+
+ // Compare the segments.
+ int32_t leftIndex = leftLimit;
+ int32_t rightIndex = rightLimit;
+ for(;;) {
+ int32_t leftSecondary = 0;
+ while(leftSecondary == 0 && leftIndex > leftStart) {
+ leftSecondary = ((uint32_t)left.getCE(--leftIndex)) >> 16;
+ }
+
+ int32_t rightSecondary = 0;
+ while(rightSecondary == 0 && rightIndex > rightStart) {
+ rightSecondary = ((uint32_t)right.getCE(--rightIndex)) >> 16;
+ }
+
+ if(leftSecondary != rightSecondary) {
+ return (leftSecondary < rightSecondary) ? UCOL_LESS : UCOL_GREATER;
+ }
+ if(leftSecondary == 0) { break; }
+ }
+
+ // Did we reach the end of either string?
+ // Both strings have the same number of merge separators,
+ // or else there would have been a primary-level difference.
+ U_ASSERT(left.getCE(leftLimit) == right.getCE(rightLimit));
+ if(left.getCE(leftLimit) == Collation::NO_CE) { break; }
+ // Skip both merge separators and continue.
+ leftStart = leftLimit + 1;
+ rightStart = rightLimit + 1;
+ }
+ }
+ }
+
+ if((options & CollationSettings::CASE_LEVEL) != 0) {
+ int32_t strength = CollationSettings::getStrength(options);
+ int32_t leftIndex = 0;
+ int32_t rightIndex = 0;
+ for(;;) {
+ uint32_t leftCase, leftLower32, rightCase;
+ if(strength == UCOL_PRIMARY) {
+ // Primary+caseLevel: Ignore case level weights of primary ignorables.
+ // Otherwise we would get a-umlaut > a
+ // which is not desirable for accent-insensitive sorting.
+ // Check for (lower 32 bits) == 0 as well because variable CEs are stored
+ // with only primary weights.
+ int64_t ce;
+ do {
+ ce = left.getCE(leftIndex++);
+ leftCase = (uint32_t)ce;
+ } while((uint32_t)(ce >> 32) == 0 || leftCase == 0);
+ leftLower32 = leftCase;
+ leftCase &= 0xc000;
+
+ do {
+ ce = right.getCE(rightIndex++);
+ rightCase = (uint32_t)ce;
+ } while((uint32_t)(ce >> 32) == 0 || rightCase == 0);
+ rightCase &= 0xc000;
+ } else {
+ // Secondary+caseLevel: By analogy with the above,
+ // ignore case level weights of secondary ignorables.
+ //
+ // Note: A tertiary CE has uppercase case bits (0.0.ut)
+ // to keep tertiary+caseFirst well-formed.
+ //
+ // Tertiary+caseLevel: Also ignore case level weights of secondary ignorables.
+ // Otherwise a tertiary CE's uppercase would be no greater than
+ // a primary/secondary CE's uppercase.
+ // (See UCA well-formedness condition 2.)
+ // We could construct a special case weight higher than uppercase,
+ // but it's simpler to always ignore case weights of secondary ignorables,
+ // turning 0.0.ut into 0.0.0.t.
+ // (See LDML Collation, Case Parameters.)
+ do {
+ leftCase = (uint32_t)left.getCE(leftIndex++);
+ } while(leftCase <= 0xffff);
+ leftLower32 = leftCase;
+ leftCase &= 0xc000;
+
+ do {
+ rightCase = (uint32_t)right.getCE(rightIndex++);
+ } while(rightCase <= 0xffff);
+ rightCase &= 0xc000;
+ }
+
+ // No need to handle NO_CE and MERGE_SEPARATOR specially:
+ // There is one case weight for each previous-level weight,
+ // so level length differences were handled there.
+ if(leftCase != rightCase) {
+ if((options & CollationSettings::UPPER_FIRST) == 0) {
+ return (leftCase < rightCase) ? UCOL_LESS : UCOL_GREATER;
+ } else {
+ return (leftCase < rightCase) ? UCOL_GREATER : UCOL_LESS;
+ }
+ }
+ if((leftLower32 >> 16) == Collation::NO_CE_WEIGHT16) { break; }
+ }
+ }
+ if(CollationSettings::getStrength(options) <= UCOL_SECONDARY) { return UCOL_EQUAL; }
+
+ uint32_t tertiaryMask = CollationSettings::getTertiaryMask(options);
+
+ int32_t leftIndex = 0;
+ int32_t rightIndex = 0;
+ uint32_t anyQuaternaries = 0;
+ for(;;) {
+ uint32_t leftLower32, leftTertiary;
+ do {
+ leftLower32 = (uint32_t)left.getCE(leftIndex++);
+ anyQuaternaries |= leftLower32;
+ U_ASSERT((leftLower32 & Collation::ONLY_TERTIARY_MASK) != 0 ||
+ (leftLower32 & 0xc0c0) == 0);
+ leftTertiary = leftLower32 & tertiaryMask;
+ } while(leftTertiary == 0);
+
+ uint32_t rightLower32, rightTertiary;
+ do {
+ rightLower32 = (uint32_t)right.getCE(rightIndex++);
+ anyQuaternaries |= rightLower32;
+ U_ASSERT((rightLower32 & Collation::ONLY_TERTIARY_MASK) != 0 ||
+ (rightLower32 & 0xc0c0) == 0);
+ rightTertiary = rightLower32 & tertiaryMask;
+ } while(rightTertiary == 0);
+
+ if(leftTertiary != rightTertiary) {
+ if(CollationSettings::sortsTertiaryUpperCaseFirst(options)) {
+ // Pass through NO_CE and MERGE_SEPARATOR
+ // and keep real tertiary weights larger than the MERGE_SEPARATOR.
+ // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
+ // to keep tertiary CEs well-formed.
+ // Their case+tertiary weights must be greater than those of
+ // primary and secondary CEs.
+ if(leftTertiary > Collation::MERGE_SEPARATOR_WEIGHT16) {
+ if(leftLower32 > 0xffff) {
+ leftTertiary ^= 0xc000;
+ } else {
+ leftTertiary += 0x4000;
+ }
+ }
+ if(rightTertiary > Collation::MERGE_SEPARATOR_WEIGHT16) {
+ if(rightLower32 > 0xffff) {
+ rightTertiary ^= 0xc000;
+ } else {
+ rightTertiary += 0x4000;
+ }
+ }
+ }
+ return (leftTertiary < rightTertiary) ? UCOL_LESS : UCOL_GREATER;
+ }
+ if(leftTertiary == Collation::NO_CE_WEIGHT16) { break; }
+ }
+ if(CollationSettings::getStrength(options) <= UCOL_TERTIARY) { return UCOL_EQUAL; }
+
+ if(!anyVariable && (anyQuaternaries & 0xc0) == 0) {
+ // If there are no "variable" CEs and no non-zero quaternary weights,
+ // then there are no quaternary differences.
+ return UCOL_EQUAL;
+ }
+
+ leftIndex = 0;
+ rightIndex = 0;
+ for(;;) {
+ uint32_t leftQuaternary;
+ do {
+ int64_t ce = left.getCE(leftIndex++);
+ leftQuaternary = (uint32_t)ce & 0xffff;
+ if(leftQuaternary == 0) {
+ // Variable primary or completely ignorable.
+ leftQuaternary = (uint32_t)(ce >> 32);
+ } else if(leftQuaternary <= Collation::MERGE_SEPARATOR_WEIGHT16) {
+ // Leave NO_CE or MERGE_SEPARATOR as is.
+ } else {
+ // Regular CE, not tertiary ignorable.
+ // Preserve the quaternary weight in bits 7..6.
+ leftQuaternary |= 0xffffff3f;
+ }
+ } while(leftQuaternary == 0);
+
+ uint32_t rightQuaternary;
+ do {
+ int64_t ce = right.getCE(rightIndex++);
+ rightQuaternary = (uint32_t)ce & 0xffff;
+ if(rightQuaternary == 0) {
+ // Variable primary or completely ignorable.
+ rightQuaternary = (uint32_t)(ce >> 32);
+ } else if(rightQuaternary <= Collation::MERGE_SEPARATOR_WEIGHT16) {
+ // Leave NO_CE or MERGE_SEPARATOR as is.
+ } else {
+ // Regular CE, not tertiary ignorable.
+ // Preserve the quaternary weight in bits 7..6.
+ rightQuaternary |= 0xffffff3f;
+ }
+ } while(rightQuaternary == 0);
+
+ if(leftQuaternary != rightQuaternary) {
+ // Return the difference, with script reordering.
+ const uint8_t *reorderTable = settings.reorderTable;
+ if (reorderTable != NULL) {
+ leftQuaternary = Collation::reorder(reorderTable, leftQuaternary);
+ rightQuaternary = Collation::reorder(reorderTable, rightQuaternary);
+ }
+ return (leftQuaternary < rightQuaternary) ? UCOL_LESS : UCOL_GREATER;
+ }
+ if(leftQuaternary == Collation::NO_CE_WEIGHT16) { break; }
+ }
+ return UCOL_EQUAL;
+}
+
+U_NAMESPACE_END
+
+#endif // !UCONFIG_NO_COLLATION
« no previous file with comments | « source/i18n/collationcompare.h ('k') | source/i18n/collationdata.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698