| Index: source/i18n/coll.cpp
|
| diff --git a/source/i18n/coll.cpp b/source/i18n/coll.cpp
|
| index d4224ba39e5a5ed94ae5135e9b2805fbb51918e3..c4845f2b1f9e62e77fbe9336abe01a1d700bafff 100644
|
| --- a/source/i18n/coll.cpp
|
| +++ b/source/i18n/coll.cpp
|
| @@ -1,6 +1,6 @@
|
| /*
|
| ******************************************************************************
|
| - * Copyright (C) 1996-2013, International Business Machines Corporation and
|
| + * Copyright (C) 1996-2014, International Business Machines Corporation and
|
| * others. All Rights Reserved.
|
| ******************************************************************************
|
| */
|
| @@ -35,9 +35,10 @@
|
| * Normalizer::EMode
|
| * 11/23/9 srl Inlining of some critical functions
|
| * 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h)
|
| + * 2012-2014 markus Rewritten in C++ again.
|
| */
|
|
|
| -#include "utypeinfo.h" // for 'typeid' to work
|
| +#include "utypeinfo.h" // for 'typeid' to work
|
|
|
| #include "unicode/utypes.h"
|
|
|
| @@ -45,6 +46,9 @@
|
|
|
| #include "unicode/coll.h"
|
| #include "unicode/tblcoll.h"
|
| +#include "collationdata.h"
|
| +#include "collationroot.h"
|
| +#include "collationtailoring.h"
|
| #include "ucol_imp.h"
|
| #include "cstring.h"
|
| #include "cmemory.h"
|
| @@ -176,22 +180,7 @@ public:
|
| if (actualReturn == NULL) {
|
| actualReturn = &ar;
|
| }
|
| - Collator* result = (Collator*)ICULocaleService::getKey(key, actualReturn, status);
|
| - // Ugly Hack Alert! If the actualReturn length is zero, this
|
| - // means we got a default object, not a "real" service-created
|
| - // object. We don't call setLocales() on a default object,
|
| - // because that will overwrite its correct built-in locale
|
| - // metadata (valid & actual) with our incorrect data (all we
|
| - // have is the requested locale). (TODO remove in 3.0) [aliu]
|
| - if (result && actualReturn->length() > 0) {
|
| - const LocaleKey& lkey = (const LocaleKey&)key;
|
| - Locale canonicalLocale("");
|
| - Locale currentLocale("");
|
| -
|
| - LocaleUtility::initLocaleFromName(*actualReturn, currentLocale);
|
| - result->setLocales(lkey.canonicalLocale(canonicalLocale), currentLocale, currentLocale);
|
| - }
|
| - return result;
|
| + return (Collator*)ICULocaleService::getKey(key, actualReturn, status);
|
| }
|
|
|
| virtual UBool isDefault() const {
|
| @@ -225,40 +214,6 @@ hasService(void)
|
| return retVal;
|
| }
|
|
|
| -// -------------------------------------
|
| -
|
| -UCollator*
|
| -Collator::createUCollator(const char *loc,
|
| - UErrorCode *status)
|
| -{
|
| - UCollator *result = 0;
|
| - if (status && U_SUCCESS(*status) && hasService()) {
|
| - Locale desiredLocale(loc);
|
| - Collator *col = (Collator*)gService->get(desiredLocale, *status);
|
| - RuleBasedCollator *rbc;
|
| - if (col && (rbc = dynamic_cast<RuleBasedCollator *>(col))) {
|
| - if (!rbc->dataIsOwned) {
|
| - result = ucol_safeClone(rbc->ucollator, NULL, NULL, status);
|
| - } else {
|
| - result = rbc->ucollator;
|
| - rbc->ucollator = NULL; // to prevent free on delete
|
| - }
|
| - } else {
|
| - // should go in a function- ucol_initDelegate(delegate)
|
| - result = (UCollator *)uprv_malloc(sizeof(UCollator));
|
| - if(result == NULL) {
|
| - *status = U_MEMORY_ALLOCATION_ERROR;
|
| - } else {
|
| - uprv_memset(result, 0, sizeof(UCollator));
|
| - result->delegate = col;
|
| - result->freeOnClose = TRUE; // do free on close.
|
| - col = NULL; // to prevent free on delete.
|
| - }
|
| - }
|
| - delete col;
|
| - }
|
| - return result;
|
| -}
|
| #endif /* UCONFIG_NO_SERVICE */
|
|
|
| static void U_CALLCONV
|
| @@ -301,6 +256,169 @@ static UBool isAvailableLocaleListInitialized(UErrorCode &status) {
|
|
|
| // Collator public methods -----------------------------------------------
|
|
|
| +namespace {
|
| +
|
| +static const struct {
|
| + const char *name;
|
| + UColAttribute attr;
|
| +} collAttributes[] = {
|
| + { "colStrength", UCOL_STRENGTH },
|
| + { "colBackwards", UCOL_FRENCH_COLLATION },
|
| + { "colCaseLevel", UCOL_CASE_LEVEL },
|
| + { "colCaseFirst", UCOL_CASE_FIRST },
|
| + { "colAlternate", UCOL_ALTERNATE_HANDLING },
|
| + { "colNormalization", UCOL_NORMALIZATION_MODE },
|
| + { "colNumeric", UCOL_NUMERIC_COLLATION }
|
| +};
|
| +
|
| +static const struct {
|
| + const char *name;
|
| + UColAttributeValue value;
|
| +} collAttributeValues[] = {
|
| + { "primary", UCOL_PRIMARY },
|
| + { "secondary", UCOL_SECONDARY },
|
| + { "tertiary", UCOL_TERTIARY },
|
| + { "quaternary", UCOL_QUATERNARY },
|
| + // Note: Not supporting typo "quarternary" because it was never supported in locale IDs.
|
| + { "identical", UCOL_IDENTICAL },
|
| + { "no", UCOL_OFF },
|
| + { "yes", UCOL_ON },
|
| + { "shifted", UCOL_SHIFTED },
|
| + { "non-ignorable", UCOL_NON_IGNORABLE },
|
| + { "lower", UCOL_LOWER_FIRST },
|
| + { "upper", UCOL_UPPER_FIRST }
|
| +};
|
| +
|
| +static const char *collReorderCodes[UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST] = {
|
| + "space", "punct", "symbol", "currency", "digit"
|
| +};
|
| +
|
| +int32_t getReorderCode(const char *s) {
|
| + for (int32_t i = 0; i < UPRV_LENGTHOF(collReorderCodes); ++i) {
|
| + if (uprv_stricmp(s, collReorderCodes[i]) == 0) {
|
| + return UCOL_REORDER_CODE_FIRST + i;
|
| + }
|
| + }
|
| + // Not supporting "others" = UCOL_REORDER_CODE_OTHERS
|
| + // as a synonym for Zzzz = USCRIPT_UNKNOWN for now:
|
| + // Avoid introducing synonyms/aliases.
|
| + return -1;
|
| +}
|
| +
|
| +/**
|
| + * Sets collation attributes according to locale keywords. See
|
| + * http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Settings
|
| + *
|
| + * Using "alias" keywords and values where defined:
|
| + * http://www.unicode.org/reports/tr35/tr35.html#Old_Locale_Extension_Syntax
|
| + * http://unicode.org/repos/cldr/trunk/common/bcp47/collation.xml
|
| + */
|
| +void setAttributesFromKeywords(const Locale &loc, Collator &coll, UErrorCode &errorCode) {
|
| + if (U_FAILURE(errorCode)) {
|
| + return;
|
| + }
|
| + if (uprv_strcmp(loc.getName(), loc.getBaseName()) == 0) {
|
| + // No keywords.
|
| + return;
|
| + }
|
| + char value[1024]; // The reordering value could be long.
|
| + // Check for collation keywords that were already deprecated
|
| + // before any were supported in createInstance() (except for "collation").
|
| + int32_t length = loc.getKeywordValue("colHiraganaQuaternary", value, UPRV_LENGTHOF(value), errorCode);
|
| + if (U_FAILURE(errorCode)) {
|
| + errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return;
|
| + }
|
| + if (length != 0) {
|
| + errorCode = U_UNSUPPORTED_ERROR;
|
| + return;
|
| + }
|
| + length = loc.getKeywordValue("variableTop", value, UPRV_LENGTHOF(value), errorCode);
|
| + if (U_FAILURE(errorCode)) {
|
| + errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return;
|
| + }
|
| + if (length != 0) {
|
| + errorCode = U_UNSUPPORTED_ERROR;
|
| + return;
|
| + }
|
| + // Parse known collation keywords, ignore others.
|
| + if (errorCode == U_STRING_NOT_TERMINATED_WARNING) {
|
| + errorCode = U_ZERO_ERROR;
|
| + }
|
| + for (int32_t i = 0; i < UPRV_LENGTHOF(collAttributes); ++i) {
|
| + length = loc.getKeywordValue(collAttributes[i].name, value, UPRV_LENGTHOF(value), errorCode);
|
| + if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
|
| + errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return;
|
| + }
|
| + if (length == 0) { continue; }
|
| + for (int32_t j = 0;; ++j) {
|
| + if (j == UPRV_LENGTHOF(collAttributeValues)) {
|
| + errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return;
|
| + }
|
| + if (uprv_stricmp(value, collAttributeValues[j].name) == 0) {
|
| + coll.setAttribute(collAttributes[i].attr, collAttributeValues[j].value, errorCode);
|
| + break;
|
| + }
|
| + }
|
| + }
|
| + length = loc.getKeywordValue("colReorder", value, UPRV_LENGTHOF(value), errorCode);
|
| + if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
|
| + errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return;
|
| + }
|
| + if (length != 0) {
|
| + int32_t codes[USCRIPT_CODE_LIMIT + UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST];
|
| + int32_t codesLength = 0;
|
| + char *scriptName = value;
|
| + for (;;) {
|
| + if (codesLength == UPRV_LENGTHOF(codes)) {
|
| + errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return;
|
| + }
|
| + char *limit = scriptName;
|
| + char c;
|
| + while ((c = *limit) != 0 && c != '-') { ++limit; }
|
| + *limit = 0;
|
| + int32_t code;
|
| + if ((limit - scriptName) == 4) {
|
| + // Strict parsing, accept only 4-letter script codes, not long names.
|
| + code = u_getPropertyValueEnum(UCHAR_SCRIPT, scriptName);
|
| + } else {
|
| + code = getReorderCode(scriptName);
|
| + }
|
| + if (code < 0) {
|
| + errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return;
|
| + }
|
| + codes[codesLength++] = code;
|
| + if (c == 0) { break; }
|
| + scriptName = limit + 1;
|
| + }
|
| + coll.setReorderCodes(codes, codesLength, errorCode);
|
| + }
|
| + length = loc.getKeywordValue("kv", value, UPRV_LENGTHOF(value), errorCode);
|
| + if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
|
| + errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return;
|
| + }
|
| + if (length != 0) {
|
| + int32_t code = getReorderCode(value);
|
| + if (code < 0) {
|
| + errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return;
|
| + }
|
| + coll.setMaxVariable((UColReorderCode)code, errorCode);
|
| + }
|
| + if (U_FAILURE(errorCode)) {
|
| + errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
| + }
|
| +}
|
| +
|
| +} // namespace
|
| +
|
| Collator* U_EXPORT2 Collator::createInstance(UErrorCode& success)
|
| {
|
| return createInstance(Locale::getDefault(), success);
|
| @@ -311,97 +429,49 @@ Collator* U_EXPORT2 Collator::createInstance(const Locale& desiredLocale,
|
| {
|
| if (U_FAILURE(status))
|
| return 0;
|
| -
|
| + if (desiredLocale.isBogus()) {
|
| + // Locale constructed from malformed locale ID or language tag.
|
| + status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return NULL;
|
| + }
|
| +
|
| + Collator* coll;
|
| #if !UCONFIG_NO_SERVICE
|
| if (hasService()) {
|
| Locale actualLoc;
|
| - Collator *result =
|
| - (Collator*)gService->get(desiredLocale, &actualLoc, status);
|
| -
|
| - // Ugly Hack Alert! If the returned locale is empty (not root,
|
| - // but empty -- getName() == "") then that means the service
|
| - // returned a default object, not a "real" service object. In
|
| - // that case, the locale metadata (valid & actual) is setup
|
| - // correctly already, and we don't want to overwrite it. (TODO
|
| - // remove in 3.0) [aliu]
|
| - if (*actualLoc.getName() != 0) {
|
| - result->setLocales(desiredLocale, actualLoc, actualLoc);
|
| - }
|
| - return result;
|
| - }
|
| + coll = (Collator*)gService->get(desiredLocale, &actualLoc, status);
|
| + } else
|
| #endif
|
| - return makeInstance(desiredLocale, status);
|
| -}
|
| -
|
| -
|
| -Collator* Collator::makeInstance(const Locale& desiredLocale,
|
| - UErrorCode& status)
|
| -{
|
| - // A bit of explanation is required here. Although in the current
|
| - // implementation
|
| - // Collator::createInstance() is just turning around and calling
|
| - // RuleBasedCollator(Locale&), this will not necessarily always be the
|
| - // case. For example, suppose we modify this code to handle a
|
| - // non-table-based Collator, such as that for Thai. In this case,
|
| - // createInstance() will have to be modified to somehow determine this fact
|
| - // (perhaps a field in the resource bundle). Then it can construct the
|
| - // non-table-based Collator in some other way, when it sees that it needs
|
| - // to.
|
| - // The specific caution is this: RuleBasedCollator(Locale&) will ALWAYS
|
| - // return a valid collation object, if the system is functioning properly.
|
| - // The reason is that it will fall back, use the default locale, and even
|
| - // use the built-in default collation rules. THEREFORE, createInstance()
|
| - // should in general ONLY CALL RuleBasedCollator(Locale&) IF IT KNOWS IN
|
| - // ADVANCE that the given locale's collation is properly implemented as a
|
| - // RuleBasedCollator.
|
| - // Currently, we don't do this...we always return a RuleBasedCollator,
|
| - // whether it is strictly correct to do so or not, without checking, because
|
| - // we currently have no way of checking.
|
| -
|
| - RuleBasedCollator* collation = new RuleBasedCollator(desiredLocale,
|
| - status);
|
| - /* test for NULL */
|
| - if (collation == 0) {
|
| - status = U_MEMORY_ALLOCATION_ERROR;
|
| - return 0;
|
| - }
|
| - if (U_FAILURE(status))
|
| {
|
| - delete collation;
|
| - collation = 0;
|
| + coll = makeInstance(desiredLocale, status);
|
| + }
|
| + setAttributesFromKeywords(desiredLocale, *coll, status);
|
| + if (U_FAILURE(status)) {
|
| + delete coll;
|
| + return NULL;
|
| }
|
| - return collation;
|
| + return coll;
|
| }
|
|
|
| -#ifdef U_USE_COLLATION_OBSOLETE_2_6
|
| -// !!! dlf the following is obsolete, ignore registration for this
|
|
|
| -Collator *
|
| -Collator::createInstance(const Locale &loc,
|
| - UVersionInfo version,
|
| - UErrorCode &status)
|
| -{
|
| - Collator *collator;
|
| - UVersionInfo info;
|
| -
|
| - collator=new RuleBasedCollator(loc, status);
|
| - /* test for NULL */
|
| - if (collator == 0) {
|
| +Collator* Collator::makeInstance(const Locale& desiredLocale, UErrorCode& status) {
|
| + const CollationCacheEntry *entry = CollationLoader::loadTailoring(desiredLocale, status);
|
| + if (U_SUCCESS(status)) {
|
| + Collator *result = new RuleBasedCollator(entry);
|
| + if (result != NULL) {
|
| + // Both the unified cache's get() and the RBC constructor
|
| + // did addRef(). Undo one of them.
|
| + entry->removeRef();
|
| + return result;
|
| + }
|
| status = U_MEMORY_ALLOCATION_ERROR;
|
| - return 0;
|
| }
|
| -
|
| - if(U_SUCCESS(status)) {
|
| - collator->getVersion(info);
|
| - if(0!=uprv_memcmp(version, info, sizeof(UVersionInfo))) {
|
| - delete collator;
|
| - status=U_MISSING_RESOURCE_ERROR;
|
| - return 0;
|
| - }
|
| + if (entry != NULL) {
|
| + // Undo the addRef() from the cache.get().
|
| + entry->removeRef();
|
| }
|
| - return collator;
|
| + return NULL;
|
| }
|
| -#endif
|
|
|
| Collator *
|
| Collator::safeClone() const {
|
| @@ -599,6 +669,10 @@ URegistryKey U_EXPORT2
|
| Collator::registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status)
|
| {
|
| if (U_SUCCESS(status)) {
|
| + // Set the collator locales while registering so that createInstance()
|
| + // need not guess whether the collator's locales are already set properly
|
| + // (as they are by the data loader).
|
| + toAdopt->setLocales(locale, locale, locale);
|
| return getService()->registerInstance(toAdopt, locale, status);
|
| }
|
| return NULL;
|
| @@ -853,6 +927,19 @@ Collator::setStrength(ECollationStrength newStrength) {
|
| setAttribute(UCOL_STRENGTH, (UColAttributeValue)newStrength, intStatus);
|
| }
|
|
|
| +Collator &
|
| +Collator::setMaxVariable(UColReorderCode /*group*/, UErrorCode &errorCode) {
|
| + if (U_SUCCESS(errorCode)) {
|
| + errorCode = U_UNSUPPORTED_ERROR;
|
| + }
|
| + return *this;
|
| +}
|
| +
|
| +UColReorderCode
|
| +Collator::getMaxVariable() const {
|
| + return UCOL_REORDER_CODE_PUNCTUATION;
|
| +}
|
| +
|
| int32_t
|
| Collator::getReorderCodes(int32_t* /* dest*/,
|
| int32_t /* destCapacity*/,
|
| @@ -874,16 +961,18 @@ Collator::setReorderCodes(const int32_t* /* reorderCodes */,
|
| }
|
| }
|
|
|
| -int32_t U_EXPORT2
|
| -Collator::getEquivalentReorderCodes(int32_t /* reorderCode */,
|
| - int32_t* /* dest */,
|
| - int32_t /* destCapacity */,
|
| - UErrorCode& status)
|
| -{
|
| - if (U_SUCCESS(status)) {
|
| - status = U_UNSUPPORTED_ERROR;
|
| +int32_t
|
| +Collator::getEquivalentReorderCodes(int32_t reorderCode,
|
| + int32_t *dest, int32_t capacity,
|
| + UErrorCode &errorCode) {
|
| + if(U_FAILURE(errorCode)) { return 0; }
|
| + if(capacity < 0 || (dest == NULL && capacity > 0)) {
|
| + errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return 0;
|
| }
|
| - return 0;
|
| + const CollationData *baseData = CollationRoot::getData(errorCode);
|
| + if(U_FAILURE(errorCode)) { return 0; }
|
| + return baseData->getEquivalentScripts(reorderCode, dest, capacity, errorCode);
|
| }
|
|
|
| int32_t
|
| @@ -897,6 +986,30 @@ Collator::internalGetShortDefinitionString(const char * /*locale*/,
|
| return 0;
|
| }
|
|
|
| +UCollationResult
|
| +Collator::internalCompareUTF8(const char *left, int32_t leftLength,
|
| + const char *right, int32_t rightLength,
|
| + UErrorCode &errorCode) const {
|
| + if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
|
| + if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
|
| + errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return UCOL_EQUAL;
|
| + }
|
| + return compareUTF8(
|
| + StringPiece(left, (leftLength < 0) ? uprv_strlen(left) : leftLength),
|
| + StringPiece(right, (rightLength < 0) ? uprv_strlen(right) : rightLength),
|
| + errorCode);
|
| +}
|
| +
|
| +int32_t
|
| +Collator::internalNextSortKeyPart(UCharIterator * /*iter*/, uint32_t /*state*/[2],
|
| + uint8_t * /*dest*/, int32_t /*count*/, UErrorCode &errorCode) const {
|
| + if (U_SUCCESS(errorCode)) {
|
| + errorCode = U_UNSUPPORTED_ERROR;
|
| + }
|
| + return 0;
|
| +}
|
| +
|
| // UCollator private data members ----------------------------------------
|
|
|
| /* This is useless information */
|
|
|