Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Unified Diff: source/i18n/coll.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/i18n/coleitr.cpp ('k') | source/i18n/collation.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/i18n/coll.cpp
diff --git a/source/i18n/coll.cpp b/source/i18n/coll.cpp
index d4224ba39e5a5ed94ae5135e9b2805fbb51918e3..c4845f2b1f9e62e77fbe9336abe01a1d700bafff 100644
--- a/source/i18n/coll.cpp
+++ b/source/i18n/coll.cpp
@@ -1,6 +1,6 @@
/*
******************************************************************************
- * Copyright (C) 1996-2013, International Business Machines Corporation and
+ * Copyright (C) 1996-2014, International Business Machines Corporation and
* others. All Rights Reserved.
******************************************************************************
*/
@@ -35,9 +35,10 @@
* Normalizer::EMode
* 11/23/9 srl Inlining of some critical functions
* 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h)
+ * 2012-2014 markus Rewritten in C++ again.
*/
-#include "utypeinfo.h" // for 'typeid' to work
+#include "utypeinfo.h" // for 'typeid' to work
#include "unicode/utypes.h"
@@ -45,6 +46,9 @@
#include "unicode/coll.h"
#include "unicode/tblcoll.h"
+#include "collationdata.h"
+#include "collationroot.h"
+#include "collationtailoring.h"
#include "ucol_imp.h"
#include "cstring.h"
#include "cmemory.h"
@@ -176,22 +180,7 @@ public:
if (actualReturn == NULL) {
actualReturn = &ar;
}
- Collator* result = (Collator*)ICULocaleService::getKey(key, actualReturn, status);
- // Ugly Hack Alert! If the actualReturn length is zero, this
- // means we got a default object, not a "real" service-created
- // object. We don't call setLocales() on a default object,
- // because that will overwrite its correct built-in locale
- // metadata (valid & actual) with our incorrect data (all we
- // have is the requested locale). (TODO remove in 3.0) [aliu]
- if (result && actualReturn->length() > 0) {
- const LocaleKey& lkey = (const LocaleKey&)key;
- Locale canonicalLocale("");
- Locale currentLocale("");
-
- LocaleUtility::initLocaleFromName(*actualReturn, currentLocale);
- result->setLocales(lkey.canonicalLocale(canonicalLocale), currentLocale, currentLocale);
- }
- return result;
+ return (Collator*)ICULocaleService::getKey(key, actualReturn, status);
}
virtual UBool isDefault() const {
@@ -225,40 +214,6 @@ hasService(void)
return retVal;
}
-// -------------------------------------
-
-UCollator*
-Collator::createUCollator(const char *loc,
- UErrorCode *status)
-{
- UCollator *result = 0;
- if (status && U_SUCCESS(*status) && hasService()) {
- Locale desiredLocale(loc);
- Collator *col = (Collator*)gService->get(desiredLocale, *status);
- RuleBasedCollator *rbc;
- if (col && (rbc = dynamic_cast<RuleBasedCollator *>(col))) {
- if (!rbc->dataIsOwned) {
- result = ucol_safeClone(rbc->ucollator, NULL, NULL, status);
- } else {
- result = rbc->ucollator;
- rbc->ucollator = NULL; // to prevent free on delete
- }
- } else {
- // should go in a function- ucol_initDelegate(delegate)
- result = (UCollator *)uprv_malloc(sizeof(UCollator));
- if(result == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- } else {
- uprv_memset(result, 0, sizeof(UCollator));
- result->delegate = col;
- result->freeOnClose = TRUE; // do free on close.
- col = NULL; // to prevent free on delete.
- }
- }
- delete col;
- }
- return result;
-}
#endif /* UCONFIG_NO_SERVICE */
static void U_CALLCONV
@@ -301,6 +256,169 @@ static UBool isAvailableLocaleListInitialized(UErrorCode &status) {
// Collator public methods -----------------------------------------------
+namespace {
+
+static const struct {
+ const char *name;
+ UColAttribute attr;
+} collAttributes[] = {
+ { "colStrength", UCOL_STRENGTH },
+ { "colBackwards", UCOL_FRENCH_COLLATION },
+ { "colCaseLevel", UCOL_CASE_LEVEL },
+ { "colCaseFirst", UCOL_CASE_FIRST },
+ { "colAlternate", UCOL_ALTERNATE_HANDLING },
+ { "colNormalization", UCOL_NORMALIZATION_MODE },
+ { "colNumeric", UCOL_NUMERIC_COLLATION }
+};
+
+static const struct {
+ const char *name;
+ UColAttributeValue value;
+} collAttributeValues[] = {
+ { "primary", UCOL_PRIMARY },
+ { "secondary", UCOL_SECONDARY },
+ { "tertiary", UCOL_TERTIARY },
+ { "quaternary", UCOL_QUATERNARY },
+ // Note: Not supporting typo "quarternary" because it was never supported in locale IDs.
+ { "identical", UCOL_IDENTICAL },
+ { "no", UCOL_OFF },
+ { "yes", UCOL_ON },
+ { "shifted", UCOL_SHIFTED },
+ { "non-ignorable", UCOL_NON_IGNORABLE },
+ { "lower", UCOL_LOWER_FIRST },
+ { "upper", UCOL_UPPER_FIRST }
+};
+
+static const char *collReorderCodes[UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST] = {
+ "space", "punct", "symbol", "currency", "digit"
+};
+
+int32_t getReorderCode(const char *s) {
+ for (int32_t i = 0; i < UPRV_LENGTHOF(collReorderCodes); ++i) {
+ if (uprv_stricmp(s, collReorderCodes[i]) == 0) {
+ return UCOL_REORDER_CODE_FIRST + i;
+ }
+ }
+ // Not supporting "others" = UCOL_REORDER_CODE_OTHERS
+ // as a synonym for Zzzz = USCRIPT_UNKNOWN for now:
+ // Avoid introducing synonyms/aliases.
+ return -1;
+}
+
+/**
+ * Sets collation attributes according to locale keywords. See
+ * http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Settings
+ *
+ * Using "alias" keywords and values where defined:
+ * http://www.unicode.org/reports/tr35/tr35.html#Old_Locale_Extension_Syntax
+ * http://unicode.org/repos/cldr/trunk/common/bcp47/collation.xml
+ */
+void setAttributesFromKeywords(const Locale &loc, Collator &coll, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ if (uprv_strcmp(loc.getName(), loc.getBaseName()) == 0) {
+ // No keywords.
+ return;
+ }
+ char value[1024]; // The reordering value could be long.
+ // Check for collation keywords that were already deprecated
+ // before any were supported in createInstance() (except for "collation").
+ int32_t length = loc.getKeywordValue("colHiraganaQuaternary", value, UPRV_LENGTHOF(value), errorCode);
+ if (U_FAILURE(errorCode)) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ if (length != 0) {
+ errorCode = U_UNSUPPORTED_ERROR;
+ return;
+ }
+ length = loc.getKeywordValue("variableTop", value, UPRV_LENGTHOF(value), errorCode);
+ if (U_FAILURE(errorCode)) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ if (length != 0) {
+ errorCode = U_UNSUPPORTED_ERROR;
+ return;
+ }
+ // Parse known collation keywords, ignore others.
+ if (errorCode == U_STRING_NOT_TERMINATED_WARNING) {
+ errorCode = U_ZERO_ERROR;
+ }
+ for (int32_t i = 0; i < UPRV_LENGTHOF(collAttributes); ++i) {
+ length = loc.getKeywordValue(collAttributes[i].name, value, UPRV_LENGTHOF(value), errorCode);
+ if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ if (length == 0) { continue; }
+ for (int32_t j = 0;; ++j) {
+ if (j == UPRV_LENGTHOF(collAttributeValues)) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ if (uprv_stricmp(value, collAttributeValues[j].name) == 0) {
+ coll.setAttribute(collAttributes[i].attr, collAttributeValues[j].value, errorCode);
+ break;
+ }
+ }
+ }
+ length = loc.getKeywordValue("colReorder", value, UPRV_LENGTHOF(value), errorCode);
+ if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ if (length != 0) {
+ int32_t codes[USCRIPT_CODE_LIMIT + UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST];
+ int32_t codesLength = 0;
+ char *scriptName = value;
+ for (;;) {
+ if (codesLength == UPRV_LENGTHOF(codes)) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ char *limit = scriptName;
+ char c;
+ while ((c = *limit) != 0 && c != '-') { ++limit; }
+ *limit = 0;
+ int32_t code;
+ if ((limit - scriptName) == 4) {
+ // Strict parsing, accept only 4-letter script codes, not long names.
+ code = u_getPropertyValueEnum(UCHAR_SCRIPT, scriptName);
+ } else {
+ code = getReorderCode(scriptName);
+ }
+ if (code < 0) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ codes[codesLength++] = code;
+ if (c == 0) { break; }
+ scriptName = limit + 1;
+ }
+ coll.setReorderCodes(codes, codesLength, errorCode);
+ }
+ length = loc.getKeywordValue("kv", value, UPRV_LENGTHOF(value), errorCode);
+ if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ if (length != 0) {
+ int32_t code = getReorderCode(value);
+ if (code < 0) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ coll.setMaxVariable((UColReorderCode)code, errorCode);
+ }
+ if (U_FAILURE(errorCode)) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+} // namespace
+
Collator* U_EXPORT2 Collator::createInstance(UErrorCode& success)
{
return createInstance(Locale::getDefault(), success);
@@ -311,97 +429,49 @@ Collator* U_EXPORT2 Collator::createInstance(const Locale& desiredLocale,
{
if (U_FAILURE(status))
return 0;
-
+ if (desiredLocale.isBogus()) {
+ // Locale constructed from malformed locale ID or language tag.
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ Collator* coll;
#if !UCONFIG_NO_SERVICE
if (hasService()) {
Locale actualLoc;
- Collator *result =
- (Collator*)gService->get(desiredLocale, &actualLoc, status);
-
- // Ugly Hack Alert! If the returned locale is empty (not root,
- // but empty -- getName() == "") then that means the service
- // returned a default object, not a "real" service object. In
- // that case, the locale metadata (valid & actual) is setup
- // correctly already, and we don't want to overwrite it. (TODO
- // remove in 3.0) [aliu]
- if (*actualLoc.getName() != 0) {
- result->setLocales(desiredLocale, actualLoc, actualLoc);
- }
- return result;
- }
+ coll = (Collator*)gService->get(desiredLocale, &actualLoc, status);
+ } else
#endif
- return makeInstance(desiredLocale, status);
-}
-
-
-Collator* Collator::makeInstance(const Locale& desiredLocale,
- UErrorCode& status)
-{
- // A bit of explanation is required here. Although in the current
- // implementation
- // Collator::createInstance() is just turning around and calling
- // RuleBasedCollator(Locale&), this will not necessarily always be the
- // case. For example, suppose we modify this code to handle a
- // non-table-based Collator, such as that for Thai. In this case,
- // createInstance() will have to be modified to somehow determine this fact
- // (perhaps a field in the resource bundle). Then it can construct the
- // non-table-based Collator in some other way, when it sees that it needs
- // to.
- // The specific caution is this: RuleBasedCollator(Locale&) will ALWAYS
- // return a valid collation object, if the system is functioning properly.
- // The reason is that it will fall back, use the default locale, and even
- // use the built-in default collation rules. THEREFORE, createInstance()
- // should in general ONLY CALL RuleBasedCollator(Locale&) IF IT KNOWS IN
- // ADVANCE that the given locale's collation is properly implemented as a
- // RuleBasedCollator.
- // Currently, we don't do this...we always return a RuleBasedCollator,
- // whether it is strictly correct to do so or not, without checking, because
- // we currently have no way of checking.
-
- RuleBasedCollator* collation = new RuleBasedCollator(desiredLocale,
- status);
- /* test for NULL */
- if (collation == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- if (U_FAILURE(status))
{
- delete collation;
- collation = 0;
+ coll = makeInstance(desiredLocale, status);
+ }
+ setAttributesFromKeywords(desiredLocale, *coll, status);
+ if (U_FAILURE(status)) {
+ delete coll;
+ return NULL;
}
- return collation;
+ return coll;
}
-#ifdef U_USE_COLLATION_OBSOLETE_2_6
-// !!! dlf the following is obsolete, ignore registration for this
-Collator *
-Collator::createInstance(const Locale &loc,
- UVersionInfo version,
- UErrorCode &status)
-{
- Collator *collator;
- UVersionInfo info;
-
- collator=new RuleBasedCollator(loc, status);
- /* test for NULL */
- if (collator == 0) {
+Collator* Collator::makeInstance(const Locale& desiredLocale, UErrorCode& status) {
+ const CollationCacheEntry *entry = CollationLoader::loadTailoring(desiredLocale, status);
+ if (U_SUCCESS(status)) {
+ Collator *result = new RuleBasedCollator(entry);
+ if (result != NULL) {
+ // Both the unified cache's get() and the RBC constructor
+ // did addRef(). Undo one of them.
+ entry->removeRef();
+ return result;
+ }
status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
}
-
- if(U_SUCCESS(status)) {
- collator->getVersion(info);
- if(0!=uprv_memcmp(version, info, sizeof(UVersionInfo))) {
- delete collator;
- status=U_MISSING_RESOURCE_ERROR;
- return 0;
- }
+ if (entry != NULL) {
+ // Undo the addRef() from the cache.get().
+ entry->removeRef();
}
- return collator;
+ return NULL;
}
-#endif
Collator *
Collator::safeClone() const {
@@ -599,6 +669,10 @@ URegistryKey U_EXPORT2
Collator::registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status)
{
if (U_SUCCESS(status)) {
+ // Set the collator locales while registering so that createInstance()
+ // need not guess whether the collator's locales are already set properly
+ // (as they are by the data loader).
+ toAdopt->setLocales(locale, locale, locale);
return getService()->registerInstance(toAdopt, locale, status);
}
return NULL;
@@ -853,6 +927,19 @@ Collator::setStrength(ECollationStrength newStrength) {
setAttribute(UCOL_STRENGTH, (UColAttributeValue)newStrength, intStatus);
}
+Collator &
+Collator::setMaxVariable(UColReorderCode /*group*/, UErrorCode &errorCode) {
+ if (U_SUCCESS(errorCode)) {
+ errorCode = U_UNSUPPORTED_ERROR;
+ }
+ return *this;
+}
+
+UColReorderCode
+Collator::getMaxVariable() const {
+ return UCOL_REORDER_CODE_PUNCTUATION;
+}
+
int32_t
Collator::getReorderCodes(int32_t* /* dest*/,
int32_t /* destCapacity*/,
@@ -874,16 +961,18 @@ Collator::setReorderCodes(const int32_t* /* reorderCodes */,
}
}
-int32_t U_EXPORT2
-Collator::getEquivalentReorderCodes(int32_t /* reorderCode */,
- int32_t* /* dest */,
- int32_t /* destCapacity */,
- UErrorCode& status)
-{
- if (U_SUCCESS(status)) {
- status = U_UNSUPPORTED_ERROR;
+int32_t
+Collator::getEquivalentReorderCodes(int32_t reorderCode,
+ int32_t *dest, int32_t capacity,
+ UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return 0; }
+ if(capacity < 0 || (dest == NULL && capacity > 0)) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
}
- return 0;
+ const CollationData *baseData = CollationRoot::getData(errorCode);
+ if(U_FAILURE(errorCode)) { return 0; }
+ return baseData->getEquivalentScripts(reorderCode, dest, capacity, errorCode);
}
int32_t
@@ -897,6 +986,30 @@ Collator::internalGetShortDefinitionString(const char * /*locale*/,
return 0;
}
+UCollationResult
+Collator::internalCompareUTF8(const char *left, int32_t leftLength,
+ const char *right, int32_t rightLength,
+ UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
+ if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return UCOL_EQUAL;
+ }
+ return compareUTF8(
+ StringPiece(left, (leftLength < 0) ? uprv_strlen(left) : leftLength),
+ StringPiece(right, (rightLength < 0) ? uprv_strlen(right) : rightLength),
+ errorCode);
+}
+
+int32_t
+Collator::internalNextSortKeyPart(UCharIterator * /*iter*/, uint32_t /*state*/[2],
+ uint8_t * /*dest*/, int32_t /*count*/, UErrorCode &errorCode) const {
+ if (U_SUCCESS(errorCode)) {
+ errorCode = U_UNSUPPORTED_ERROR;
+ }
+ return 0;
+}
+
// UCollator private data members ----------------------------------------
/* This is useless information */
« no previous file with comments | « source/i18n/coleitr.cpp ('k') | source/i18n/collation.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698