Index: source/i18n/tblcoll.cpp |
diff --git a/source/i18n/tblcoll.cpp b/source/i18n/tblcoll.cpp |
deleted file mode 100644 |
index 744600d18c63f5b11379a66c45b81535bc21fe9b..0000000000000000000000000000000000000000 |
--- a/source/i18n/tblcoll.cpp |
+++ /dev/null |
@@ -1,695 +0,0 @@ |
-/* |
- ****************************************************************************** |
- * Copyright (C) 1996-2013, International Business Machines Corporation and |
- * others. All Rights Reserved. |
- ****************************************************************************** |
- */ |
- |
-/** |
- * File tblcoll.cpp |
- * |
- * Created by: Helena Shih |
- * |
- * Modification History: |
- * |
- * Date Name Description |
- * 2/5/97 aliu Added streamIn and streamOut methods. Added |
- * constructor which reads RuleBasedCollator object from |
- * a binary file. Added writeToFile method which streams |
- * RuleBasedCollator out to a binary file. The streamIn |
- * and streamOut methods use istream and ostream objects |
- * in binary mode. |
- * 2/11/97 aliu Moved declarations out of for loop initializer. |
- * Added Mac compatibility #ifdef for ios::nocreate. |
- * 2/12/97 aliu Modified to use TableCollationData sub-object to |
- * hold invariant data. |
- * 2/13/97 aliu Moved several methods into this class from Collation. |
- * Added a private RuleBasedCollator(Locale&) constructor, |
- * to be used by Collator::getInstance(). General |
- * clean up. Made use of UErrorCode variables consistent. |
- * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy |
- * constructor and getDynamicClassID. |
- * 3/5/97 aliu Changed compaction cycle to improve performance. We |
- * use the maximum allowable value which is kBlockCount. |
- * Modified getRules() to load rules dynamically. Changed |
- * constructFromFile() call to accomodate this (added |
- * parameter to specify whether binary loading is to |
- * take place). |
- * 05/06/97 helena Added memory allocation error check. |
- * 6/20/97 helena Java class name change. |
- * 6/23/97 helena Adding comments to make code more readable. |
- * 09/03/97 helena Added createCollationKeyValues(). |
- * 06/26/98 erm Changes for CollationKeys using byte arrays. |
- * 08/10/98 erm Synched with 1.2 version of RuleBasedCollator.java |
- * 04/23/99 stephen Removed EDecompositionMode, merged with |
- * Normalizer::EMode |
- * 06/14/99 stephen Removed kResourceBundleSuffix |
- * 06/22/99 stephen Fixed logic in constructFromFile() since .ctx |
- * files are no longer used. |
- * 11/02/99 helena Collator performance enhancements. Special case |
- * for NO_OP situations. |
- * 11/17/99 srl More performance enhancements. Inlined some internal functions. |
- * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator |
- * to implementation file. |
- * 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h) |
- */ |
- |
-#include "unicode/utypes.h" |
- |
-#if !UCONFIG_NO_COLLATION |
- |
-#include "unicode/tblcoll.h" |
-#include "unicode/coleitr.h" |
-#include "unicode/ures.h" |
-#include "unicode/uset.h" |
-#include "ucol_imp.h" |
-#include "uresimp.h" |
-#include "uhash.h" |
-#include "cmemory.h" |
-#include "cstring.h" |
-#include "putilimp.h" |
-#include "ustr_imp.h" |
- |
-/* public RuleBasedCollator constructor ---------------------------------- */ |
- |
-U_NAMESPACE_BEGIN |
- |
-/** |
-* Copy constructor, aliasing, not write-through |
-*/ |
-RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that) |
-: Collator(that) |
-, dataIsOwned(FALSE) |
-, isWriteThroughAlias(FALSE) |
-, ucollator(NULL) |
-{ |
- RuleBasedCollator::operator=(that); |
-} |
- |
-RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, |
- UErrorCode& status) : |
-dataIsOwned(FALSE) |
-{ |
- construct(rules, |
- UCOL_DEFAULT_STRENGTH, |
- UCOL_DEFAULT, |
- status); |
-} |
- |
-RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, |
- ECollationStrength collationStrength, |
- UErrorCode& status) : dataIsOwned(FALSE) |
-{ |
- construct(rules, |
- (UColAttributeValue)collationStrength, |
- UCOL_DEFAULT, |
- status); |
-} |
- |
-RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, |
- UColAttributeValue decompositionMode, |
- UErrorCode& status) : |
-dataIsOwned(FALSE) |
-{ |
- construct(rules, |
- UCOL_DEFAULT_STRENGTH, |
- decompositionMode, |
- status); |
-} |
- |
-RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, |
- ECollationStrength collationStrength, |
- UColAttributeValue decompositionMode, |
- UErrorCode& status) : dataIsOwned(FALSE) |
-{ |
- construct(rules, |
- (UColAttributeValue)collationStrength, |
- decompositionMode, |
- status); |
-} |
-RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length, |
- const RuleBasedCollator *base, |
- UErrorCode &status) : |
-dataIsOwned(TRUE), |
-isWriteThroughAlias(FALSE) |
-{ |
- ucollator = ucol_openBinary(bin, length, base->ucollator, &status); |
-} |
- |
-void |
-RuleBasedCollator::setRuleStringFromCollator() |
-{ |
- int32_t length; |
- const UChar *r = ucol_getRules(ucollator, &length); |
- |
- if (r && length > 0) { |
- // alias the rules string |
- urulestring.setTo(TRUE, r, length); |
- } |
- else { |
- urulestring.truncate(0); // Clear string. |
- } |
-} |
- |
-// not aliasing, not write-through |
-void |
-RuleBasedCollator::construct(const UnicodeString& rules, |
- UColAttributeValue collationStrength, |
- UColAttributeValue decompositionMode, |
- UErrorCode& status) |
-{ |
- ucollator = ucol_openRules(rules.getBuffer(), rules.length(), |
- decompositionMode, collationStrength, |
- NULL, &status); |
- |
- dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it |
- isWriteThroughAlias = FALSE; |
- |
- if(ucollator == NULL) { |
- if(U_SUCCESS(status)) { |
- status = U_MEMORY_ALLOCATION_ERROR; |
- } |
- return; // Failure |
- } |
- |
- setRuleStringFromCollator(); |
-} |
- |
-/* RuleBasedCollator public destructor ----------------------------------- */ |
- |
-RuleBasedCollator::~RuleBasedCollator() |
-{ |
- if (dataIsOwned) |
- { |
- ucol_close(ucollator); |
- } |
- ucollator = 0; |
-} |
- |
-/* RuleBaseCollator public methods --------------------------------------- */ |
- |
-UBool RuleBasedCollator::operator==(const Collator& that) const |
-{ |
- /* only checks for address equals here */ |
- if (this == &that) { |
- return TRUE; |
- } |
- if (!Collator::operator==(that)) { |
- return FALSE; /* not the same class */ |
- } |
- |
- RuleBasedCollator& thatAlias = (RuleBasedCollator&)that; |
- |
- return ucol_equals(this->ucollator, thatAlias.ucollator); |
-} |
- |
-// aliasing, not write-through |
-RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that) |
-{ |
- if (this == &that) { return *this; } |
- |
- UErrorCode intStatus = U_ZERO_ERROR; |
- UCollator *ucol = ucol_safeClone(that.ucollator, NULL, NULL, &intStatus); |
- if (U_FAILURE(intStatus)) { return *this; } |
- |
- if (dataIsOwned) { |
- ucol_close(ucollator); |
- } |
- ucollator = ucol; |
- dataIsOwned = TRUE; |
- isWriteThroughAlias = FALSE; |
- setRuleStringFromCollator(); |
- return *this; |
-} |
- |
-// aliasing, not write-through |
-Collator* RuleBasedCollator::clone() const |
-{ |
- RuleBasedCollator* coll = new RuleBasedCollator(*this); |
- // There is a small chance that the internal ucol_safeClone() call fails. |
- if (coll != NULL && coll->ucollator == NULL) { |
- delete coll; |
- return NULL; |
- } |
- return coll; |
-} |
- |
- |
-CollationElementIterator* RuleBasedCollator::createCollationElementIterator |
- (const UnicodeString& source) const |
-{ |
- UErrorCode status = U_ZERO_ERROR; |
- CollationElementIterator *result = new CollationElementIterator(source, this, |
- status); |
- if (U_FAILURE(status)) { |
- delete result; |
- return NULL; |
- } |
- |
- return result; |
-} |
- |
-/** |
-* Create a CollationElementIterator object that will iterate over the |
-* elements in a string, using the collation rules defined in this |
-* RuleBasedCollator |
-*/ |
-CollationElementIterator* RuleBasedCollator::createCollationElementIterator |
- (const CharacterIterator& source) const |
-{ |
- UErrorCode status = U_ZERO_ERROR; |
- CollationElementIterator *result = new CollationElementIterator(source, this, |
- status); |
- |
- if (U_FAILURE(status)) { |
- delete result; |
- return NULL; |
- } |
- |
- return result; |
-} |
- |
-/** |
-* Return a string representation of this collator's rules. The string can |
-* later be passed to the constructor that takes a UnicodeString argument, |
-* which will construct a collator that's functionally identical to this one. |
-* You can also allow users to edit the string in order to change the collation |
-* data, or you can print it out for inspection, or whatever. |
-*/ |
-const UnicodeString& RuleBasedCollator::getRules() const |
-{ |
- return urulestring; |
-} |
- |
-void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) |
-{ |
- int32_t rulesize = ucol_getRulesEx(ucollator, delta, NULL, -1); |
- |
- if (rulesize > 0) { |
- UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) ); |
- if(rules != NULL) { |
- ucol_getRulesEx(ucollator, delta, rules, rulesize); |
- buffer.setTo(rules, rulesize); |
- uprv_free(rules); |
- } else { // couldn't allocate |
- buffer.remove(); |
- } |
- } |
- else { |
- buffer.remove(); |
- } |
-} |
- |
-UnicodeSet * |
-RuleBasedCollator::getTailoredSet(UErrorCode &status) const |
-{ |
- if(U_FAILURE(status)) { |
- return NULL; |
- } |
- return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status); |
-} |
- |
- |
-void RuleBasedCollator::getVersion(UVersionInfo versionInfo) const |
-{ |
- if (versionInfo!=NULL){ |
- ucol_getVersion(ucollator, versionInfo); |
- } |
-} |
- |
-/** |
-* Compare two strings using this collator |
-*/ |
-UCollationResult RuleBasedCollator::compare( |
- const UnicodeString& source, |
- const UnicodeString& target, |
- int32_t length, |
- UErrorCode &status) const |
-{ |
- return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status); |
-} |
- |
-UCollationResult RuleBasedCollator::compare(const UChar* source, |
- int32_t sourceLength, |
- const UChar* target, |
- int32_t targetLength, |
- UErrorCode &status) const |
-{ |
- if(U_SUCCESS(status)) { |
- return ucol_strcoll(ucollator, source, sourceLength, target, targetLength); |
- } else { |
- return UCOL_EQUAL; |
- } |
-} |
- |
-UCollationResult RuleBasedCollator::compare( |
- const UnicodeString& source, |
- const UnicodeString& target, |
- UErrorCode &status) const |
-{ |
- if(U_SUCCESS(status)) { |
- return ucol_strcoll(ucollator, source.getBuffer(), source.length(), |
- target.getBuffer(), target.length()); |
- } else { |
- return UCOL_EQUAL; |
- } |
-} |
- |
-UCollationResult RuleBasedCollator::compare(UCharIterator &sIter, |
- UCharIterator &tIter, |
- UErrorCode &status) const { |
- if(U_SUCCESS(status)) { |
- return ucol_strcollIter(ucollator, &sIter, &tIter, &status); |
- } else { |
- return UCOL_EQUAL; |
- } |
-} |
- |
-/** |
-* Retrieve a collation key for the specified string. The key can be compared |
-* with other collation keys using a bitwise comparison (e.g. memcmp) to find |
-* the ordering of their respective source strings. This is handy when doing a |
-* sort, where each sort key must be compared many times. |
-* |
-* The basic algorithm here is to find all of the collation elements for each |
-* character in the source string, convert them to an ASCII representation, and |
-* put them into the collation key. But it's trickier than that. Each |
-* collation element in a string has three components: primary ('A' vs 'B'), |
-* secondary ('u' vs '\u00FC'), and tertiary ('A' vs 'a'), and a primary difference |
-* at the end of a string takes precedence over a secondary or tertiary |
-* difference earlier in the string. |
-* |
-* To account for this, we put all of the primary orders at the beginning of |
-* the string, followed by the secondary and tertiary orders. Each set of |
-* orders is terminated by nulls so that a key for a string which is a initial |
-* substring of another key will compare less without any special case. |
-* |
-* Here's a hypothetical example, with the collation element represented as a |
-* three-digit number, one digit for primary, one for secondary, etc. |
-* |
-* String: A a B \u00C9 |
-* Collation Elements: 101 100 201 511 |
-* Collation Key: 1125<null>0001<null>1011<null> |
-* |
-* To make things even trickier, secondary differences (accent marks) are |
-* compared starting at the *end* of the string in languages with French |
-* secondary ordering. But when comparing the accent marks on a single base |
-* character, they are compared from the beginning. To handle this, we reverse |
-* all of the accents that belong to each base character, then we reverse the |
-* entire string of secondary orderings at the end. |
-*/ |
-CollationKey& RuleBasedCollator::getCollationKey( |
- const UnicodeString& source, |
- CollationKey& sortkey, |
- UErrorCode& status) const |
-{ |
- return getCollationKey(source.getBuffer(), source.length(), sortkey, status); |
-} |
- |
-CollationKey& RuleBasedCollator::getCollationKey(const UChar* source, |
- int32_t sourceLen, |
- CollationKey& sortkey, |
- UErrorCode& status) const |
-{ |
- if (U_FAILURE(status)) { |
- return sortkey.setToBogus(); |
- } |
- if (sourceLen < -1 || (source == NULL && sourceLen != 0)) { |
- status = U_ILLEGAL_ARGUMENT_ERROR; |
- return sortkey.setToBogus(); |
- } |
- |
- if (sourceLen < 0) { |
- sourceLen = u_strlen(source); |
- } |
- if (sourceLen == 0) { |
- return sortkey.reset(); |
- } |
- |
- int32_t resultLen = ucol_getCollationKey(ucollator, source, sourceLen, sortkey, status); |
- |
- if (U_SUCCESS(status)) { |
- sortkey.setLength(resultLen); |
- } else { |
- sortkey.setToBogus(); |
- } |
- return sortkey; |
-} |
- |
-/** |
- * Return the maximum length of any expansion sequences that end with the |
- * specified comparison order. |
- * @param order a collation order returned by previous or next. |
- * @return the maximum length of any expansion seuences ending with the |
- * specified order or 1 if collation order does not occur at the end of any |
- * expansion sequence. |
- * @see CollationElementIterator#getMaxExpansion |
- */ |
-int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const |
-{ |
- uint8_t result; |
- UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result); |
- return result; |
-} |
- |
-uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length, |
- UErrorCode &status) |
-{ |
- if (U_FAILURE(status)) { return NULL; } |
- LocalMemory<uint8_t> buffer((uint8_t *)uprv_malloc(20000)); |
- if (buffer.isNull()) { |
- status = U_MEMORY_ALLOCATION_ERROR; |
- return NULL; |
- } |
- length = cloneBinary(buffer.getAlias(), 20000, status); |
- if (status == U_BUFFER_OVERFLOW_ERROR) { |
- if (buffer.allocateInsteadAndCopy(length, 0) == NULL) { |
- status = U_MEMORY_ALLOCATION_ERROR; |
- return NULL; |
- } |
- status = U_ZERO_ERROR; |
- length = cloneBinary(buffer.getAlias(), length, status); |
- } |
- if (U_FAILURE(status)) { return NULL; } |
- return buffer.orphan(); |
-} |
- |
- |
-int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) |
-{ |
- return ucol_cloneBinary(ucollator, buffer, capacity, &status); |
-} |
- |
-void RuleBasedCollator::setAttribute(UColAttribute attr, |
- UColAttributeValue value, |
- UErrorCode &status) |
-{ |
- if (U_FAILURE(status)) |
- return; |
- checkOwned(); |
- ucol_setAttribute(ucollator, attr, value, &status); |
-} |
- |
-UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr, |
- UErrorCode &status) const |
-{ |
- if (U_FAILURE(status)) |
- return UCOL_DEFAULT; |
- return ucol_getAttribute(ucollator, attr, &status); |
-} |
- |
-uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) { |
- checkOwned(); |
- return ucol_setVariableTop(ucollator, varTop, len, &status); |
-} |
- |
-uint32_t RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &status) { |
- checkOwned(); |
- return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status); |
-} |
- |
-void RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &status) { |
- checkOwned(); |
- ucol_restoreVariableTop(ucollator, varTop, &status); |
-} |
- |
-uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const { |
- return ucol_getVariableTop(ucollator, &status); |
-} |
- |
-int32_t RuleBasedCollator::getSortKey(const UnicodeString& source, |
- uint8_t *result, int32_t resultLength) |
- const |
-{ |
- return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength); |
-} |
- |
-int32_t RuleBasedCollator::getSortKey(const UChar *source, |
- int32_t sourceLength, uint8_t *result, |
- int32_t resultLength) const |
-{ |
- return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength); |
-} |
- |
-int32_t RuleBasedCollator::getReorderCodes(int32_t *dest, |
- int32_t destCapacity, |
- UErrorCode& status) const |
-{ |
- return ucol_getReorderCodes(ucollator, dest, destCapacity, &status); |
-} |
- |
-void RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, |
- int32_t reorderCodesLength, |
- UErrorCode& status) |
-{ |
- checkOwned(); |
- ucol_setReorderCodes(ucollator, reorderCodes, reorderCodesLength, &status); |
-} |
- |
-int32_t RuleBasedCollator::getEquivalentReorderCodes(int32_t reorderCode, |
- int32_t* dest, |
- int32_t destCapacity, |
- UErrorCode& status) |
-{ |
- return ucol_getEquivalentReorderCodes(reorderCode, dest, destCapacity, &status); |
-} |
- |
-/** |
-* Create a hash code for this collation. Just hash the main rule table -- that |
-* should be good enough for almost any use. |
-*/ |
-int32_t RuleBasedCollator::hashCode() const |
-{ |
- int32_t length; |
- const UChar *rules = ucol_getRules(ucollator, &length); |
- return ustr_hashUCharsN(rules, length); |
-} |
- |
-/** |
-* return the locale of this collator |
-*/ |
-Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const { |
- const char *result = ucol_getLocaleByType(ucollator, type, &status); |
- if(result == NULL) { |
- Locale res(""); |
- res.setToBogus(); |
- return res; |
- } else { |
- return Locale(result); |
- } |
-} |
- |
-void |
-RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) { |
- checkOwned(); |
- char* rloc = uprv_strdup(requestedLocale.getName()); |
- if (rloc) { |
- char* vloc = uprv_strdup(validLocale.getName()); |
- if (vloc) { |
- char* aloc = uprv_strdup(actualLocale.getName()); |
- if (aloc) { |
- ucol_setReqValidLocales(ucollator, rloc, vloc, aloc); |
- return; |
- } |
- uprv_free(vloc); |
- } |
- uprv_free(rloc); |
- } |
-} |
- |
-// RuleBaseCollatorNew private constructor ---------------------------------- |
- |
-RuleBasedCollator::RuleBasedCollator() |
- : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL) |
-{ |
-} |
- |
-RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale, |
- UErrorCode& status) |
- : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL) |
-{ |
- if (U_FAILURE(status)) |
- return; |
- |
- /* |
- Try to load, in order: |
- 1. The desired locale's collation. |
- 2. A fallback of the desired locale. |
- 3. The default locale's collation. |
- 4. A fallback of the default locale. |
- 5. The default collation rules, which contains en_US collation rules. |
- |
- To reiterate, we try: |
- Specific: |
- language+country+variant |
- language+country |
- language |
- Default: |
- language+country+variant |
- language+country |
- language |
- Root: (aka DEFAULTRULES) |
- steps 1-5 are handled by resource bundle fallback mechanism. |
- however, in a very unprobable situation that no resource bundle |
- data exists, step 5 is repeated with hardcoded default rules. |
- */ |
- |
- setUCollator(desiredLocale, status); |
- |
- if (U_FAILURE(status)) |
- { |
- status = U_ZERO_ERROR; |
- |
- setUCollator(kRootLocaleName, status); |
- if (status == U_ZERO_ERROR) { |
- status = U_USING_DEFAULT_WARNING; |
- } |
- } |
- |
- if (U_SUCCESS(status)) |
- { |
- setRuleStringFromCollator(); |
- } |
-} |
- |
-void |
-RuleBasedCollator::setUCollator(const char *locale, |
- UErrorCode &status) |
-{ |
- if (U_FAILURE(status)) { |
- return; |
- } |
- if (ucollator && dataIsOwned) |
- ucol_close(ucollator); |
- ucollator = ucol_open_internal(locale, &status); |
- dataIsOwned = TRUE; |
- isWriteThroughAlias = FALSE; |
-} |
- |
- |
-void |
-RuleBasedCollator::checkOwned() { |
- if (!(dataIsOwned || isWriteThroughAlias)) { |
- UErrorCode status = U_ZERO_ERROR; |
- ucollator = ucol_safeClone(ucollator, NULL, NULL, &status); |
- setRuleStringFromCollator(); |
- dataIsOwned = TRUE; |
- isWriteThroughAlias = FALSE; |
- } |
-} |
- |
- |
-int32_t RuleBasedCollator::internalGetShortDefinitionString(const char *locale, |
- char *buffer, |
- int32_t capacity, |
- UErrorCode &status) const { |
- /* simply delegate */ |
- return ucol_getShortDefinitionString(ucollator, locale, buffer, capacity, &status); |
-} |
- |
- |
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator) |
- |
-U_NAMESPACE_END |
- |
-#endif /* #if !UCONFIG_NO_COLLATION */ |