| Index: source/i18n/ucol_sit.cpp
|
| diff --git a/source/i18n/ucol_sit.cpp b/source/i18n/ucol_sit.cpp
|
| index d92ddf7569094f595b57fc2fbde2cf432359fb1e..15e0981bc3bdb0eb4bf3e0a298ecc2013cce1453 100644
|
| --- a/source/i18n/ucol_sit.cpp
|
| +++ b/source/i18n/ucol_sit.cpp
|
| @@ -1,6 +1,6 @@
|
| /*
|
| *******************************************************************************
|
| -* Copyright (C) 2004-2012, International Business Machines
|
| +* Copyright (C) 2004-2014, International Business Machines
|
| * Corporation and others. All Rights Reserved.
|
| *******************************************************************************
|
| * file name: ucol_sit.cpp
|
| @@ -15,10 +15,9 @@
|
|
|
| #include "unicode/ustring.h"
|
| #include "unicode/udata.h"
|
| -
|
| +#include "unicode/utf16.h"
|
| #include "utracimp.h"
|
| #include "ucol_imp.h"
|
| -#include "ucol_tok.h"
|
| #include "cmemory.h"
|
| #include "cstring.h"
|
| #include "uresimp.h"
|
| @@ -30,6 +29,8 @@
|
|
|
| #if !UCONFIG_NO_COLLATION
|
|
|
| +#include "unicode/tblcoll.h"
|
| +
|
| enum OptionsList {
|
| UCOL_SIT_LANGUAGE = 0,
|
| UCOL_SIT_SCRIPT = 1,
|
| @@ -126,21 +127,6 @@ static const AttributeConversion conversions[12] = {
|
| };
|
|
|
|
|
| -static char
|
| -ucol_sit_attributeValueToLetter(UColAttributeValue value, UErrorCode *status) {
|
| - uint32_t i = 0;
|
| - for(i = 0; i < sizeof(conversions)/sizeof(conversions[0]); i++) {
|
| - if(conversions[i].value == value) {
|
| - return conversions[i].letter;
|
| - }
|
| - }
|
| - *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| -#ifdef UCOL_TRACE_SIT
|
| - fprintf(stderr, "%s:%d: unknown UColAttributeValue %d: %s\n", __FILE__, __LINE__, value, u_errorName(*status));
|
| -#endif
|
| - return 0;
|
| -}
|
| -
|
| static UColAttributeValue
|
| ucol_sit_letterToAttributeValue(char letter, UErrorCode *status) {
|
| uint32_t i = 0;
|
| @@ -571,23 +557,6 @@ ucol_openFromShortString( const char *definition,
|
| }
|
|
|
|
|
| -static void appendShortStringElement(const char *src, int32_t len, char *result, int32_t *resultSize, int32_t capacity, char arg)
|
| -{
|
| - if(len) {
|
| - if(*resultSize) {
|
| - if(*resultSize < capacity) {
|
| - uprv_strcat(result, "_");
|
| - }
|
| - (*resultSize)++;
|
| - }
|
| - *resultSize += len + 1;
|
| - if(*resultSize < capacity) {
|
| - uprv_strncat(result, &arg, 1);
|
| - uprv_strncat(result, src, len);
|
| - }
|
| - }
|
| -}
|
| -
|
| U_CAPI int32_t U_EXPORT2
|
| ucol_getShortDefinitionString(const UCollator *coll,
|
| const char *locale,
|
| @@ -596,59 +565,11 @@ ucol_getShortDefinitionString(const UCollator *coll,
|
| UErrorCode *status)
|
| {
|
| if(U_FAILURE(*status)) return 0;
|
| - if(coll->delegate != NULL) {
|
| - return ((icu::Collator*)coll->delegate)->internalGetShortDefinitionString(locale,dst,capacity,*status);
|
| - }
|
| - char buffer[internalBufferSize];
|
| - uprv_memset(buffer, 0, internalBufferSize*sizeof(char));
|
| - int32_t resultSize = 0;
|
| - char tempbuff[internalBufferSize];
|
| - char locBuff[internalBufferSize];
|
| - uprv_memset(buffer, 0, internalBufferSize*sizeof(char));
|
| - int32_t elementSize = 0;
|
| - UBool isAvailable = 0;
|
| - CollatorSpec s;
|
| - ucol_sit_initCollatorSpecs(&s);
|
| -
|
| - if(!locale) {
|
| - locale = ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, status);
|
| - }
|
| - elementSize = ucol_getFunctionalEquivalent(locBuff, internalBufferSize, "collation", locale, &isAvailable, status);
|
| -
|
| - if(elementSize) {
|
| - // we should probably canonicalize here...
|
| - elementSize = uloc_getLanguage(locBuff, tempbuff, internalBufferSize, status);
|
| - appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, languageArg);
|
| - elementSize = uloc_getCountry(locBuff, tempbuff, internalBufferSize, status);
|
| - appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, regionArg);
|
| - elementSize = uloc_getScript(locBuff, tempbuff, internalBufferSize, status);
|
| - appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, scriptArg);
|
| - elementSize = uloc_getVariant(locBuff, tempbuff, internalBufferSize, status);
|
| - appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, variantArg);
|
| - elementSize = uloc_getKeywordValue(locBuff, "collation", tempbuff, internalBufferSize, status);
|
| - appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, keywordArg);
|
| - }
|
| -
|
| - int32_t i = 0;
|
| - UColAttributeValue attribute = UCOL_DEFAULT;
|
| - for(i = 0; i < UCOL_SIT_ITEMS_COUNT; i++) {
|
| - if(options[i].action == _processCollatorOption) {
|
| - attribute = ucol_getAttributeOrDefault(coll, (UColAttribute)options[i].attr, status);
|
| - if(attribute != UCOL_DEFAULT) {
|
| - char letter = ucol_sit_attributeValueToLetter(attribute, status);
|
| - appendShortStringElement(&letter, 1,
|
| - buffer, &resultSize, /*capacity*/internalBufferSize, options[i].optionStart);
|
| - }
|
| - }
|
| - }
|
| - if(coll->variableTopValueisDefault == FALSE) {
|
| - //s.variableTopValue = ucol_getVariableTop(coll, status);
|
| - elementSize = T_CString_integerToString(tempbuff, coll->variableTopValue, 16);
|
| - appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, capacity, variableTopValArg);
|
| + if(coll == NULL) {
|
| + *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return 0;
|
| }
|
| -
|
| - UParseError parseError;
|
| - return ucol_normalizeShortDefinitionString(buffer, dst, capacity, &parseError, status);
|
| + return ((icu::Collator*)coll)->internalGetShortDefinitionString(locale,dst,capacity,*status);
|
| }
|
|
|
| U_CAPI int32_t U_EXPORT2
|
| @@ -679,164 +600,6 @@ ucol_normalizeShortDefinitionString(const char *definition,
|
| return ucol_sit_dumpSpecs(&s, destination, capacity, status);
|
| }
|
|
|
| -U_CAPI UColAttributeValue U_EXPORT2
|
| -ucol_getAttributeOrDefault(const UCollator *coll, UColAttribute attr, UErrorCode *status)
|
| -{
|
| - if(U_FAILURE(*status) || coll == NULL) {
|
| - return UCOL_DEFAULT;
|
| - }
|
| - switch(attr) {
|
| - case UCOL_NUMERIC_COLLATION:
|
| - return coll->numericCollationisDefault?UCOL_DEFAULT:coll->numericCollation;
|
| - case UCOL_HIRAGANA_QUATERNARY_MODE:
|
| - return coll->hiraganaQisDefault?UCOL_DEFAULT:coll->hiraganaQ;
|
| - case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*/
|
| - return coll->frenchCollationisDefault?UCOL_DEFAULT:coll->frenchCollation;
|
| - case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/
|
| - return coll->alternateHandlingisDefault?UCOL_DEFAULT:coll->alternateHandling;
|
| - case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */
|
| - return coll->caseFirstisDefault?UCOL_DEFAULT:coll->caseFirst;
|
| - case UCOL_CASE_LEVEL: /* do we have an extra case level */
|
| - return coll->caseLevelisDefault?UCOL_DEFAULT:coll->caseLevel;
|
| - case UCOL_NORMALIZATION_MODE: /* attribute for normalization */
|
| - return coll->normalizationModeisDefault?UCOL_DEFAULT:coll->normalizationMode;
|
| - case UCOL_STRENGTH: /* attribute for strength */
|
| - return coll->strengthisDefault?UCOL_DEFAULT:coll->strength;
|
| - case UCOL_ATTRIBUTE_COUNT:
|
| - default:
|
| - *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| -#ifdef UCOL_TRACE_SIT
|
| - fprintf(stderr, "%s:%d: Unknown attr value '%d': %s\n", __FILE__, __LINE__, (int)attr, u_errorName(*status));
|
| -#endif
|
| - break;
|
| - }
|
| - return UCOL_DEFAULT;
|
| -}
|
| -
|
| -
|
| -struct contContext {
|
| - const UCollator *coll;
|
| - USet *conts;
|
| - USet *expansions;
|
| - USet *removedContractions;
|
| - UBool addPrefixes;
|
| - UErrorCode *status;
|
| -};
|
| -
|
| -
|
| -
|
| -static void
|
| -addSpecial(contContext *context, UChar *buffer, int32_t bufLen,
|
| - uint32_t CE, int32_t leftIndex, int32_t rightIndex, UErrorCode *status)
|
| -{
|
| - const UCollator *coll = context->coll;
|
| - USet *contractions = context->conts;
|
| - USet *expansions = context->expansions;
|
| - UBool addPrefixes = context->addPrefixes;
|
| -
|
| - const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE);
|
| - uint32_t newCE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
|
| - // we might have a contraction that ends from previous level
|
| - if(newCE != UCOL_NOT_FOUND) {
|
| - if(isSpecial(CE) && getCETag(CE) == CONTRACTION_TAG && isSpecial(newCE) && getCETag(newCE) == SPEC_PROC_TAG && addPrefixes) {
|
| - addSpecial(context, buffer, bufLen, newCE, leftIndex, rightIndex, status);
|
| - }
|
| - if(contractions && rightIndex-leftIndex > 1) {
|
| - uset_addString(contractions, buffer+leftIndex, rightIndex-leftIndex);
|
| - if(expansions && isSpecial(CE) && getCETag(CE) == EXPANSION_TAG) {
|
| - uset_addString(expansions, buffer+leftIndex, rightIndex-leftIndex);
|
| - }
|
| - }
|
| - }
|
| -
|
| - UCharOffset++;
|
| - // check whether we're doing contraction or prefix
|
| - if(getCETag(CE) == SPEC_PROC_TAG && addPrefixes) {
|
| - if(leftIndex == 0) {
|
| - *status = U_INTERNAL_PROGRAM_ERROR;
|
| - return;
|
| - }
|
| - --leftIndex;
|
| - while(*UCharOffset != 0xFFFF) {
|
| - newCE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
|
| - buffer[leftIndex] = *UCharOffset;
|
| - if(isSpecial(newCE) && (getCETag(newCE) == CONTRACTION_TAG || getCETag(newCE) == SPEC_PROC_TAG)) {
|
| - addSpecial(context, buffer, bufLen, newCE, leftIndex, rightIndex, status);
|
| - } else {
|
| - if(contractions) {
|
| - uset_addString(contractions, buffer+leftIndex, rightIndex-leftIndex);
|
| - }
|
| - if(expansions && isSpecial(newCE) && getCETag(newCE) == EXPANSION_TAG) {
|
| - uset_addString(expansions, buffer+leftIndex, rightIndex-leftIndex);
|
| - }
|
| - }
|
| - UCharOffset++;
|
| - }
|
| - } else if(getCETag(CE) == CONTRACTION_TAG) {
|
| - if(rightIndex == bufLen-1) {
|
| - *status = U_INTERNAL_PROGRAM_ERROR;
|
| - return;
|
| - }
|
| - while(*UCharOffset != 0xFFFF) {
|
| - newCE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
|
| - buffer[rightIndex] = *UCharOffset;
|
| - if(isSpecial(newCE) && (getCETag(newCE) == CONTRACTION_TAG || getCETag(newCE) == SPEC_PROC_TAG)) {
|
| - addSpecial(context, buffer, bufLen, newCE, leftIndex, rightIndex+1, status);
|
| - } else {
|
| - if(contractions) {
|
| - uset_addString(contractions, buffer+leftIndex, rightIndex+1-leftIndex);
|
| - }
|
| - if(expansions && isSpecial(newCE) && getCETag(newCE) == EXPANSION_TAG) {
|
| - uset_addString(expansions, buffer+leftIndex, rightIndex+1-leftIndex);
|
| - }
|
| - }
|
| - UCharOffset++;
|
| - }
|
| - }
|
| -
|
| -}
|
| -
|
| -U_CDECL_BEGIN
|
| -static UBool U_CALLCONV
|
| -_processSpecials(const void *context, UChar32 start, UChar32 limit, uint32_t CE)
|
| -{
|
| - UErrorCode *status = ((contContext *)context)->status;
|
| - USet *expansions = ((contContext *)context)->expansions;
|
| - USet *removed = ((contContext *)context)->removedContractions;
|
| - UBool addPrefixes = ((contContext *)context)->addPrefixes;
|
| - UChar contraction[internalBufferSize];
|
| - if(isSpecial(CE)) {
|
| - if(((getCETag(CE) == SPEC_PROC_TAG && addPrefixes) || getCETag(CE) == CONTRACTION_TAG)) {
|
| - while(start < limit && U_SUCCESS(*status)) {
|
| - // if there are suppressed contractions, we don't
|
| - // want to add them.
|
| - if(removed && uset_contains(removed, start)) {
|
| - start++;
|
| - continue;
|
| - }
|
| - // we start our contraction from middle, since we don't know if it
|
| - // will grow toward right or left
|
| - contraction[internalBufferSize/2] = (UChar)start;
|
| - addSpecial(((contContext *)context), contraction, internalBufferSize, CE, internalBufferSize/2, internalBufferSize/2+1, status);
|
| - start++;
|
| - }
|
| - } else if(expansions && getCETag(CE) == EXPANSION_TAG) {
|
| - while(start < limit && U_SUCCESS(*status)) {
|
| - uset_add(expansions, start++);
|
| - }
|
| - }
|
| - }
|
| - if(U_FAILURE(*status)) {
|
| - return FALSE;
|
| - } else {
|
| - return TRUE;
|
| - }
|
| -}
|
| -
|
| -U_CDECL_END
|
| -
|
| -
|
| -
|
| /**
|
| * Get a set containing the contractions defined by the collator. The set includes
|
| * both the UCA contractions and the contractions defined by the collator
|
| @@ -878,78 +641,14 @@ ucol_getContractionsAndExpansions( const UCollator *coll,
|
| *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| return;
|
| }
|
| -
|
| - if(contractions) {
|
| - uset_clear(contractions);
|
| - }
|
| - if(expansions) {
|
| - uset_clear(expansions);
|
| - }
|
| - int32_t rulesLen = 0;
|
| - const UChar* rules = ucol_getRules(coll, &rulesLen);
|
| - UColTokenParser src;
|
| - ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA,
|
| - ucol_tok_getRulesFromBundle, NULL, status);
|
| -
|
| - contContext c = { NULL, contractions, expansions, src.removeSet, addPrefixes, status };
|
| -
|
| - // Add the UCA contractions
|
| - c.coll = coll->UCA;
|
| - utrie_enum(&coll->UCA->mapping, NULL, _processSpecials, &c);
|
| -
|
| - // This is collator specific. Add contractions from a collator
|
| - c.coll = coll;
|
| - c.removedContractions = NULL;
|
| - utrie_enum(&coll->mapping, NULL, _processSpecials, &c);
|
| - ucol_tok_closeTokenList(&src);
|
| -}
|
| -
|
| -U_CAPI int32_t U_EXPORT2
|
| -ucol_getUnsafeSet( const UCollator *coll,
|
| - USet *unsafe,
|
| - UErrorCode *status)
|
| -{
|
| - UChar buffer[internalBufferSize];
|
| - int32_t len = 0;
|
| -
|
| - uset_clear(unsafe);
|
| -
|
| - // cccpattern = "[[:^tccc=0:][:^lccc=0:]]", unfortunately variant
|
| - static const UChar cccpattern[25] = { 0x5b, 0x5b, 0x3a, 0x5e, 0x74, 0x63, 0x63, 0x63, 0x3d, 0x30, 0x3a, 0x5d,
|
| - 0x5b, 0x3a, 0x5e, 0x6c, 0x63, 0x63, 0x63, 0x3d, 0x30, 0x3a, 0x5d, 0x5d, 0x00 };
|
| -
|
| - // add chars that fail the fcd check
|
| - uset_applyPattern(unsafe, cccpattern, 24, USET_IGNORE_SPACE, status);
|
| -
|
| - // add Thai/Lao prevowels
|
| - uset_addRange(unsafe, 0xe40, 0xe44);
|
| - uset_addRange(unsafe, 0xec0, 0xec4);
|
| - // add lead/trail surrogates
|
| - uset_addRange(unsafe, 0xd800, 0xdfff);
|
| -
|
| - USet *contractions = uset_open(0,0);
|
| -
|
| - int32_t i = 0, j = 0;
|
| - int32_t contsSize = ucol_getContractions(coll, contractions, status);
|
| - UChar32 c = 0;
|
| - // Contraction set consists only of strings
|
| - // to get unsafe code points, we need to
|
| - // break the strings apart and add them to the unsafe set
|
| - for(i = 0; i < contsSize; i++) {
|
| - len = uset_getItem(contractions, i, NULL, NULL, buffer, internalBufferSize, status);
|
| - if(len > 0) {
|
| - j = 0;
|
| - while(j < len) {
|
| - U16_NEXT(buffer, j, len, c);
|
| - if(j < len) {
|
| - uset_add(unsafe, c);
|
| - }
|
| - }
|
| - }
|
| + const icu::RuleBasedCollator *rbc = icu::RuleBasedCollator::rbcFromUCollator(coll);
|
| + if(rbc == NULL) {
|
| + *status = U_UNSUPPORTED_ERROR;
|
| + return;
|
| }
|
| -
|
| - uset_close(contractions);
|
| -
|
| - return uset_size(unsafe);
|
| + rbc->internalGetContractionsAndExpansions(
|
| + icu::UnicodeSet::fromUSet(contractions),
|
| + icu::UnicodeSet::fromUSet(expansions),
|
| + addPrefixes, *status);
|
| }
|
| #endif
|
|
|