| OLD | NEW | 
|---|
| 1 /* | 1 /* | 
| 2 ******************************************************************************* | 2 ******************************************************************************* | 
| 3 *   Copyright (C) 2004-2012, International Business Machines | 3 *   Copyright (C) 2004-2014, International Business Machines | 
| 4 *   Corporation and others.  All Rights Reserved. | 4 *   Corporation and others.  All Rights Reserved. | 
| 5 ******************************************************************************* | 5 ******************************************************************************* | 
| 6 *   file name:  ucol_sit.cpp | 6 *   file name:  ucol_sit.cpp | 
| 7 *   encoding:   US-ASCII | 7 *   encoding:   US-ASCII | 
| 8 *   tab size:   8 (not used) | 8 *   tab size:   8 (not used) | 
| 9 *   indentation:4 | 9 *   indentation:4 | 
| 10 * | 10 * | 
| 11 * Modification history | 11 * Modification history | 
| 12 * Date        Name      Comments | 12 * Date        Name      Comments | 
| 13 * 03/12/2004  weiv      Creation | 13 * 03/12/2004  weiv      Creation | 
| 14 */ | 14 */ | 
| 15 | 15 | 
| 16 #include "unicode/ustring.h" | 16 #include "unicode/ustring.h" | 
| 17 #include "unicode/udata.h" | 17 #include "unicode/udata.h" | 
| 18 | 18 #include "unicode/utf16.h" | 
| 19 #include "utracimp.h" | 19 #include "utracimp.h" | 
| 20 #include "ucol_imp.h" | 20 #include "ucol_imp.h" | 
| 21 #include "ucol_tok.h" |  | 
| 22 #include "cmemory.h" | 21 #include "cmemory.h" | 
| 23 #include "cstring.h" | 22 #include "cstring.h" | 
| 24 #include "uresimp.h" | 23 #include "uresimp.h" | 
| 25 #include "unicode/coll.h" | 24 #include "unicode/coll.h" | 
| 26 | 25 | 
| 27 #ifdef UCOL_TRACE_SIT | 26 #ifdef UCOL_TRACE_SIT | 
| 28 # include <stdio.h> | 27 # include <stdio.h> | 
| 29 #endif | 28 #endif | 
| 30 | 29 | 
| 31 #if !UCONFIG_NO_COLLATION | 30 #if !UCONFIG_NO_COLLATION | 
| 32 | 31 | 
|  | 32 #include "unicode/tblcoll.h" | 
|  | 33 | 
| 33 enum OptionsList { | 34 enum OptionsList { | 
| 34     UCOL_SIT_LANGUAGE = 0, | 35     UCOL_SIT_LANGUAGE = 0, | 
| 35     UCOL_SIT_SCRIPT   = 1, | 36     UCOL_SIT_SCRIPT   = 1, | 
| 36     UCOL_SIT_REGION   = 2, | 37     UCOL_SIT_REGION   = 2, | 
| 37     UCOL_SIT_VARIANT  = 3, | 38     UCOL_SIT_VARIANT  = 3, | 
| 38     UCOL_SIT_KEYWORD  = 4, | 39     UCOL_SIT_KEYWORD  = 4, | 
| 39     UCOL_SIT_PROVIDER = 5, | 40     UCOL_SIT_PROVIDER = 5, | 
| 40     UCOL_SIT_LOCELEMENT_MAX = UCOL_SIT_PROVIDER, /* the last element that's part
      of LocElements */ | 41     UCOL_SIT_LOCELEMENT_MAX = UCOL_SIT_PROVIDER, /* the last element that's part
      of LocElements */ | 
| 41 | 42 | 
| 42     UCOL_SIT_BCP47, | 43     UCOL_SIT_BCP47, | 
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 119     { 'I', UCOL_IDENTICAL }, | 120     { 'I', UCOL_IDENTICAL }, | 
| 120     { 'L', UCOL_LOWER_FIRST }, | 121     { 'L', UCOL_LOWER_FIRST }, | 
| 121     { 'N', UCOL_NON_IGNORABLE }, | 122     { 'N', UCOL_NON_IGNORABLE }, | 
| 122     { 'O', UCOL_ON }, | 123     { 'O', UCOL_ON }, | 
| 123     { 'S', UCOL_SHIFTED }, | 124     { 'S', UCOL_SHIFTED }, | 
| 124     { 'U', UCOL_UPPER_FIRST }, | 125     { 'U', UCOL_UPPER_FIRST }, | 
| 125     { 'X', UCOL_OFF } | 126     { 'X', UCOL_OFF } | 
| 126 }; | 127 }; | 
| 127 | 128 | 
| 128 | 129 | 
| 129 static char |  | 
| 130 ucol_sit_attributeValueToLetter(UColAttributeValue value, UErrorCode *status) { |  | 
| 131     uint32_t i = 0; |  | 
| 132     for(i = 0; i < sizeof(conversions)/sizeof(conversions[0]); i++) { |  | 
| 133         if(conversions[i].value == value) { |  | 
| 134             return conversions[i].letter; |  | 
| 135         } |  | 
| 136     } |  | 
| 137     *status = U_ILLEGAL_ARGUMENT_ERROR; |  | 
| 138 #ifdef UCOL_TRACE_SIT |  | 
| 139     fprintf(stderr, "%s:%d: unknown UColAttributeValue %d: %s\n", __FILE__, __LI
     NE__, value, u_errorName(*status)); |  | 
| 140 #endif |  | 
| 141     return 0; |  | 
| 142 } |  | 
| 143 |  | 
| 144 static UColAttributeValue | 130 static UColAttributeValue | 
| 145 ucol_sit_letterToAttributeValue(char letter, UErrorCode *status) { | 131 ucol_sit_letterToAttributeValue(char letter, UErrorCode *status) { | 
| 146     uint32_t i = 0; | 132     uint32_t i = 0; | 
| 147     for(i = 0; i < sizeof(conversions)/sizeof(conversions[0]); i++) { | 133     for(i = 0; i < sizeof(conversions)/sizeof(conversions[0]); i++) { | 
| 148         if(conversions[i].letter == letter) { | 134         if(conversions[i].letter == letter) { | 
| 149             return conversions[i].value; | 135             return conversions[i].value; | 
| 150         } | 136         } | 
| 151     } | 137     } | 
| 152     *status = U_ILLEGAL_ARGUMENT_ERROR; | 138     *status = U_ILLEGAL_ARGUMENT_ERROR; | 
| 153 #ifdef UCOL_TRACE_SIT | 139 #ifdef UCOL_TRACE_SIT | 
| (...skipping 410 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 564     if(U_FAILURE(*status)) { // here it can only be a bogus value | 550     if(U_FAILURE(*status)) { // here it can only be a bogus value | 
| 565         ucol_close(result); | 551         ucol_close(result); | 
| 566         result = NULL; | 552         result = NULL; | 
| 567     } | 553     } | 
| 568 | 554 | 
| 569     UTRACE_EXIT_PTR_STATUS(result, *status); | 555     UTRACE_EXIT_PTR_STATUS(result, *status); | 
| 570     return result; | 556     return result; | 
| 571 } | 557 } | 
| 572 | 558 | 
| 573 | 559 | 
| 574 static void appendShortStringElement(const char *src, int32_t len, char *result,
      int32_t *resultSize, int32_t capacity, char arg) |  | 
| 575 { |  | 
| 576     if(len) { |  | 
| 577         if(*resultSize) { |  | 
| 578             if(*resultSize < capacity) { |  | 
| 579                 uprv_strcat(result, "_"); |  | 
| 580             } |  | 
| 581             (*resultSize)++; |  | 
| 582         } |  | 
| 583         *resultSize += len + 1; |  | 
| 584         if(*resultSize < capacity) { |  | 
| 585             uprv_strncat(result, &arg, 1); |  | 
| 586             uprv_strncat(result, src, len); |  | 
| 587         } |  | 
| 588     } |  | 
| 589 } |  | 
| 590 |  | 
| 591 U_CAPI int32_t U_EXPORT2 | 560 U_CAPI int32_t U_EXPORT2 | 
| 592 ucol_getShortDefinitionString(const UCollator *coll, | 561 ucol_getShortDefinitionString(const UCollator *coll, | 
| 593                               const char *locale, | 562                               const char *locale, | 
| 594                               char *dst, | 563                               char *dst, | 
| 595                               int32_t capacity, | 564                               int32_t capacity, | 
| 596                               UErrorCode *status) | 565                               UErrorCode *status) | 
| 597 { | 566 { | 
| 598     if(U_FAILURE(*status)) return 0; | 567     if(U_FAILURE(*status)) return 0; | 
| 599     if(coll->delegate != NULL) { | 568     if(coll == NULL) { | 
| 600       return ((icu::Collator*)coll->delegate)->internalGetShortDefinitionString(
     locale,dst,capacity,*status); | 569         *status = U_ILLEGAL_ARGUMENT_ERROR; | 
|  | 570         return 0; | 
| 601     } | 571     } | 
| 602     char buffer[internalBufferSize]; | 572     return ((icu::Collator*)coll)->internalGetShortDefinitionString(locale,dst,c
     apacity,*status); | 
| 603     uprv_memset(buffer, 0, internalBufferSize*sizeof(char)); |  | 
| 604     int32_t resultSize = 0; |  | 
| 605     char tempbuff[internalBufferSize]; |  | 
| 606     char locBuff[internalBufferSize]; |  | 
| 607     uprv_memset(buffer, 0, internalBufferSize*sizeof(char)); |  | 
| 608     int32_t elementSize = 0; |  | 
| 609     UBool isAvailable = 0; |  | 
| 610     CollatorSpec s; |  | 
| 611     ucol_sit_initCollatorSpecs(&s); |  | 
| 612 |  | 
| 613     if(!locale) { |  | 
| 614         locale = ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, status); |  | 
| 615     } |  | 
| 616     elementSize = ucol_getFunctionalEquivalent(locBuff, internalBufferSize, "col
     lation", locale, &isAvailable, status); |  | 
| 617 |  | 
| 618     if(elementSize) { |  | 
| 619         // we should probably canonicalize here... |  | 
| 620         elementSize = uloc_getLanguage(locBuff, tempbuff, internalBufferSize, st
     atus); |  | 
| 621         appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c
     apacity*/internalBufferSize, languageArg); |  | 
| 622         elementSize = uloc_getCountry(locBuff, tempbuff, internalBufferSize, sta
     tus); |  | 
| 623         appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c
     apacity*/internalBufferSize, regionArg); |  | 
| 624         elementSize = uloc_getScript(locBuff, tempbuff, internalBufferSize, stat
     us); |  | 
| 625         appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c
     apacity*/internalBufferSize, scriptArg); |  | 
| 626         elementSize = uloc_getVariant(locBuff, tempbuff, internalBufferSize, sta
     tus); |  | 
| 627         appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c
     apacity*/internalBufferSize, variantArg); |  | 
| 628         elementSize = uloc_getKeywordValue(locBuff, "collation", tempbuff, inter
     nalBufferSize, status); |  | 
| 629         appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c
     apacity*/internalBufferSize, keywordArg); |  | 
| 630     } |  | 
| 631 |  | 
| 632     int32_t i = 0; |  | 
| 633     UColAttributeValue attribute = UCOL_DEFAULT; |  | 
| 634     for(i = 0; i < UCOL_SIT_ITEMS_COUNT; i++) { |  | 
| 635         if(options[i].action == _processCollatorOption) { |  | 
| 636             attribute = ucol_getAttributeOrDefault(coll, (UColAttribute)options[
     i].attr, status); |  | 
| 637             if(attribute != UCOL_DEFAULT) { |  | 
| 638                 char letter = ucol_sit_attributeValueToLetter(attribute, status)
     ; |  | 
| 639                 appendShortStringElement(&letter, 1, |  | 
| 640                     buffer, &resultSize, /*capacity*/internalBufferSize, options
     [i].optionStart); |  | 
| 641             } |  | 
| 642         } |  | 
| 643     } |  | 
| 644     if(coll->variableTopValueisDefault == FALSE) { |  | 
| 645         //s.variableTopValue = ucol_getVariableTop(coll, status); |  | 
| 646         elementSize = T_CString_integerToString(tempbuff, coll->variableTopValue
     , 16); |  | 
| 647         appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, cap
     acity, variableTopValArg); |  | 
| 648     } |  | 
| 649 |  | 
| 650     UParseError parseError; |  | 
| 651     return ucol_normalizeShortDefinitionString(buffer, dst, capacity, &parseErro
     r, status); |  | 
| 652 } | 573 } | 
| 653 | 574 | 
| 654 U_CAPI int32_t U_EXPORT2 | 575 U_CAPI int32_t U_EXPORT2 | 
| 655 ucol_normalizeShortDefinitionString(const char *definition, | 576 ucol_normalizeShortDefinitionString(const char *definition, | 
| 656                                     char *destination, | 577                                     char *destination, | 
| 657                                     int32_t capacity, | 578                                     int32_t capacity, | 
| 658                                     UParseError *parseError, | 579                                     UParseError *parseError, | 
| 659                                     UErrorCode *status) | 580                                     UErrorCode *status) | 
| 660 { | 581 { | 
| 661 | 582 | 
| (...skipping 10 matching lines...) Expand all  Loading... | 
| 672         parseError = &pe; | 593         parseError = &pe; | 
| 673     } | 594     } | 
| 674 | 595 | 
| 675     // validate | 596     // validate | 
| 676     CollatorSpec s; | 597     CollatorSpec s; | 
| 677     ucol_sit_initCollatorSpecs(&s); | 598     ucol_sit_initCollatorSpecs(&s); | 
| 678     ucol_sit_readSpecs(&s, definition, parseError, status); | 599     ucol_sit_readSpecs(&s, definition, parseError, status); | 
| 679     return ucol_sit_dumpSpecs(&s, destination, capacity, status); | 600     return ucol_sit_dumpSpecs(&s, destination, capacity, status); | 
| 680 } | 601 } | 
| 681 | 602 | 
| 682 U_CAPI UColAttributeValue  U_EXPORT2 |  | 
| 683 ucol_getAttributeOrDefault(const UCollator *coll, UColAttribute attr, UErrorCode
      *status) |  | 
| 684 { |  | 
| 685     if(U_FAILURE(*status) || coll == NULL) { |  | 
| 686       return UCOL_DEFAULT; |  | 
| 687     } |  | 
| 688     switch(attr) { |  | 
| 689     case UCOL_NUMERIC_COLLATION: |  | 
| 690         return coll->numericCollationisDefault?UCOL_DEFAULT:coll->numericCollati
     on; |  | 
| 691     case UCOL_HIRAGANA_QUATERNARY_MODE: |  | 
| 692         return coll->hiraganaQisDefault?UCOL_DEFAULT:coll->hiraganaQ; |  | 
| 693     case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*
     / |  | 
| 694         return coll->frenchCollationisDefault?UCOL_DEFAULT:coll->frenchCollation
     ; |  | 
| 695     case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/ |  | 
| 696         return coll->alternateHandlingisDefault?UCOL_DEFAULT:coll->alternateHand
     ling; |  | 
| 697     case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */ |  | 
| 698         return coll->caseFirstisDefault?UCOL_DEFAULT:coll->caseFirst; |  | 
| 699     case UCOL_CASE_LEVEL: /* do we have an extra case level */ |  | 
| 700         return coll->caseLevelisDefault?UCOL_DEFAULT:coll->caseLevel; |  | 
| 701     case UCOL_NORMALIZATION_MODE: /* attribute for normalization */ |  | 
| 702         return coll->normalizationModeisDefault?UCOL_DEFAULT:coll->normalization
     Mode; |  | 
| 703     case UCOL_STRENGTH:         /* attribute for strength */ |  | 
| 704         return coll->strengthisDefault?UCOL_DEFAULT:coll->strength; |  | 
| 705     case UCOL_ATTRIBUTE_COUNT: |  | 
| 706     default: |  | 
| 707         *status = U_ILLEGAL_ARGUMENT_ERROR; |  | 
| 708 #ifdef UCOL_TRACE_SIT |  | 
| 709         fprintf(stderr, "%s:%d: Unknown attr value '%d': %s\n", __FILE__, __LINE
     __, (int)attr, u_errorName(*status)); |  | 
| 710 #endif |  | 
| 711         break; |  | 
| 712     } |  | 
| 713     return UCOL_DEFAULT; |  | 
| 714 } |  | 
| 715 |  | 
| 716 |  | 
| 717 struct contContext { |  | 
| 718     const UCollator *coll; |  | 
| 719     USet            *conts; |  | 
| 720     USet            *expansions; |  | 
| 721     USet            *removedContractions; |  | 
| 722     UBool           addPrefixes; |  | 
| 723     UErrorCode      *status; |  | 
| 724 }; |  | 
| 725 |  | 
| 726 |  | 
| 727 |  | 
| 728 static void |  | 
| 729 addSpecial(contContext *context, UChar *buffer, int32_t bufLen, |  | 
| 730                uint32_t CE, int32_t leftIndex, int32_t rightIndex, UErrorCode *s
     tatus) |  | 
| 731 { |  | 
| 732   const UCollator *coll = context->coll; |  | 
| 733   USet *contractions = context->conts; |  | 
| 734   USet *expansions = context->expansions; |  | 
| 735   UBool addPrefixes = context->addPrefixes; |  | 
| 736 |  | 
| 737     const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE); |  | 
| 738     uint32_t newCE = *(coll->contractionCEs + (UCharOffset - coll->contractionIn
     dex)); |  | 
| 739     // we might have a contraction that ends from previous level |  | 
| 740     if(newCE != UCOL_NOT_FOUND) { |  | 
| 741       if(isSpecial(CE) && getCETag(CE) == CONTRACTION_TAG && isSpecial(newCE) &&
      getCETag(newCE) == SPEC_PROC_TAG && addPrefixes) { |  | 
| 742         addSpecial(context, buffer, bufLen, newCE, leftIndex, rightIndex, status
     ); |  | 
| 743       } |  | 
| 744       if(contractions && rightIndex-leftIndex > 1) { |  | 
| 745             uset_addString(contractions, buffer+leftIndex, rightIndex-leftIndex)
     ; |  | 
| 746             if(expansions && isSpecial(CE) && getCETag(CE) == EXPANSION_TAG) { |  | 
| 747               uset_addString(expansions, buffer+leftIndex, rightIndex-leftIndex)
     ; |  | 
| 748             } |  | 
| 749       } |  | 
| 750     } |  | 
| 751 |  | 
| 752     UCharOffset++; |  | 
| 753     // check whether we're doing contraction or prefix |  | 
| 754     if(getCETag(CE) == SPEC_PROC_TAG && addPrefixes) { |  | 
| 755       if(leftIndex == 0) { |  | 
| 756           *status = U_INTERNAL_PROGRAM_ERROR; |  | 
| 757           return; |  | 
| 758       } |  | 
| 759       --leftIndex; |  | 
| 760       while(*UCharOffset != 0xFFFF) { |  | 
| 761           newCE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex
     )); |  | 
| 762           buffer[leftIndex] = *UCharOffset; |  | 
| 763           if(isSpecial(newCE) && (getCETag(newCE) == CONTRACTION_TAG || getCETag
     (newCE) == SPEC_PROC_TAG)) { |  | 
| 764               addSpecial(context, buffer, bufLen, newCE, leftIndex, rightIndex, 
     status); |  | 
| 765           } else { |  | 
| 766             if(contractions) { |  | 
| 767                 uset_addString(contractions, buffer+leftIndex, rightIndex-leftIn
     dex); |  | 
| 768             } |  | 
| 769             if(expansions && isSpecial(newCE) && getCETag(newCE) == EXPANSION_TA
     G) { |  | 
| 770               uset_addString(expansions, buffer+leftIndex, rightIndex-leftIndex)
     ; |  | 
| 771             } |  | 
| 772           } |  | 
| 773           UCharOffset++; |  | 
| 774       } |  | 
| 775     } else if(getCETag(CE) == CONTRACTION_TAG) { |  | 
| 776       if(rightIndex == bufLen-1) { |  | 
| 777           *status = U_INTERNAL_PROGRAM_ERROR; |  | 
| 778           return; |  | 
| 779       } |  | 
| 780       while(*UCharOffset != 0xFFFF) { |  | 
| 781           newCE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex
     )); |  | 
| 782           buffer[rightIndex] = *UCharOffset; |  | 
| 783           if(isSpecial(newCE) && (getCETag(newCE) == CONTRACTION_TAG || getCETag
     (newCE) == SPEC_PROC_TAG)) { |  | 
| 784               addSpecial(context, buffer, bufLen, newCE, leftIndex, rightIndex+1
     , status); |  | 
| 785           } else { |  | 
| 786             if(contractions) { |  | 
| 787               uset_addString(contractions, buffer+leftIndex, rightIndex+1-leftIn
     dex); |  | 
| 788             } |  | 
| 789             if(expansions && isSpecial(newCE) && getCETag(newCE) == EXPANSION_TA
     G) { |  | 
| 790               uset_addString(expansions, buffer+leftIndex, rightIndex+1-leftInde
     x); |  | 
| 791             } |  | 
| 792           } |  | 
| 793           UCharOffset++; |  | 
| 794       } |  | 
| 795     } |  | 
| 796 |  | 
| 797 } |  | 
| 798 |  | 
| 799 U_CDECL_BEGIN |  | 
| 800 static UBool U_CALLCONV |  | 
| 801 _processSpecials(const void *context, UChar32 start, UChar32 limit, uint32_t CE) |  | 
| 802 { |  | 
| 803     UErrorCode *status = ((contContext *)context)->status; |  | 
| 804     USet *expansions = ((contContext *)context)->expansions; |  | 
| 805     USet *removed = ((contContext *)context)->removedContractions; |  | 
| 806     UBool addPrefixes = ((contContext *)context)->addPrefixes; |  | 
| 807     UChar contraction[internalBufferSize]; |  | 
| 808     if(isSpecial(CE)) { |  | 
| 809       if(((getCETag(CE) == SPEC_PROC_TAG && addPrefixes) || getCETag(CE) == CONT
     RACTION_TAG)) { |  | 
| 810         while(start < limit && U_SUCCESS(*status)) { |  | 
| 811             // if there are suppressed contractions, we don't |  | 
| 812             // want to add them. |  | 
| 813             if(removed && uset_contains(removed, start)) { |  | 
| 814                 start++; |  | 
| 815                 continue; |  | 
| 816             } |  | 
| 817             // we start our contraction from middle, since we don't know if it |  | 
| 818             // will grow toward right or left |  | 
| 819             contraction[internalBufferSize/2] = (UChar)start; |  | 
| 820             addSpecial(((contContext *)context), contraction, internalBufferSize
     , CE, internalBufferSize/2, internalBufferSize/2+1, status); |  | 
| 821             start++; |  | 
| 822         } |  | 
| 823       } else if(expansions && getCETag(CE) == EXPANSION_TAG) { |  | 
| 824         while(start < limit && U_SUCCESS(*status)) { |  | 
| 825           uset_add(expansions, start++); |  | 
| 826         } |  | 
| 827       } |  | 
| 828     } |  | 
| 829     if(U_FAILURE(*status)) { |  | 
| 830         return FALSE; |  | 
| 831     } else { |  | 
| 832         return TRUE; |  | 
| 833     } |  | 
| 834 } |  | 
| 835 |  | 
| 836 U_CDECL_END |  | 
| 837 |  | 
| 838 |  | 
| 839 |  | 
| 840 /** | 603 /** | 
| 841  * Get a set containing the contractions defined by the collator. The set includ
     es | 604  * Get a set containing the contractions defined by the collator. The set includ
     es | 
| 842  * both the UCA contractions and the contractions defined by the collator | 605  * both the UCA contractions and the contractions defined by the collator | 
| 843  * @param coll collator | 606  * @param coll collator | 
| 844  * @param conts the set to hold the result | 607  * @param conts the set to hold the result | 
| 845  * @param status to hold the error code | 608  * @param status to hold the error code | 
| 846  * @return the size of the contraction set | 609  * @return the size of the contraction set | 
| 847  */ | 610  */ | 
| 848 U_CAPI int32_t U_EXPORT2 | 611 U_CAPI int32_t U_EXPORT2 | 
| 849 ucol_getContractions( const UCollator *coll, | 612 ucol_getContractions( const UCollator *coll, | 
| (...skipping 21 matching lines...) Expand all  Loading... | 
| 871                   UBool addPrefixes, | 634                   UBool addPrefixes, | 
| 872                   UErrorCode *status) | 635                   UErrorCode *status) | 
| 873 { | 636 { | 
| 874     if(U_FAILURE(*status)) { | 637     if(U_FAILURE(*status)) { | 
| 875         return; | 638         return; | 
| 876     } | 639     } | 
| 877     if(coll == NULL) { | 640     if(coll == NULL) { | 
| 878         *status = U_ILLEGAL_ARGUMENT_ERROR; | 641         *status = U_ILLEGAL_ARGUMENT_ERROR; | 
| 879         return; | 642         return; | 
| 880     } | 643     } | 
| 881 | 644     const icu::RuleBasedCollator *rbc = icu::RuleBasedCollator::rbcFromUCollator
     (coll); | 
| 882     if(contractions) { | 645     if(rbc == NULL) { | 
| 883       uset_clear(contractions); | 646         *status = U_UNSUPPORTED_ERROR; | 
|  | 647         return; | 
| 884     } | 648     } | 
| 885     if(expansions) { | 649     rbc->internalGetContractionsAndExpansions( | 
| 886       uset_clear(expansions); | 650             icu::UnicodeSet::fromUSet(contractions), | 
| 887     } | 651             icu::UnicodeSet::fromUSet(expansions), | 
| 888     int32_t rulesLen = 0; | 652             addPrefixes, *status); | 
| 889     const UChar* rules = ucol_getRules(coll, &rulesLen); |  | 
| 890     UColTokenParser src; |  | 
| 891     ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, |  | 
| 892                            ucol_tok_getRulesFromBundle, NULL, status); |  | 
| 893 |  | 
| 894     contContext c = { NULL, contractions, expansions, src.removeSet, addPrefixes
     , status }; |  | 
| 895 |  | 
| 896     // Add the UCA contractions |  | 
| 897     c.coll = coll->UCA; |  | 
| 898     utrie_enum(&coll->UCA->mapping, NULL, _processSpecials, &c); |  | 
| 899 |  | 
| 900     // This is collator specific. Add contractions from a collator |  | 
| 901     c.coll = coll; |  | 
| 902     c.removedContractions =  NULL; |  | 
| 903     utrie_enum(&coll->mapping, NULL, _processSpecials, &c); |  | 
| 904     ucol_tok_closeTokenList(&src); |  | 
| 905 } |  | 
| 906 |  | 
| 907 U_CAPI int32_t U_EXPORT2 |  | 
| 908 ucol_getUnsafeSet( const UCollator *coll, |  | 
| 909                   USet *unsafe, |  | 
| 910                   UErrorCode *status) |  | 
| 911 { |  | 
| 912     UChar buffer[internalBufferSize]; |  | 
| 913     int32_t len = 0; |  | 
| 914 |  | 
| 915     uset_clear(unsafe); |  | 
| 916 |  | 
| 917     // cccpattern = "[[:^tccc=0:][:^lccc=0:]]", unfortunately variant |  | 
| 918     static const UChar cccpattern[25] = { 0x5b, 0x5b, 0x3a, 0x5e, 0x74, 0x63, 0x
     63, 0x63, 0x3d, 0x30, 0x3a, 0x5d, |  | 
| 919                                     0x5b, 0x3a, 0x5e, 0x6c, 0x63, 0x63, 0x63, 0x
     3d, 0x30, 0x3a, 0x5d, 0x5d, 0x00 }; |  | 
| 920 |  | 
| 921     // add chars that fail the fcd check |  | 
| 922     uset_applyPattern(unsafe, cccpattern, 24, USET_IGNORE_SPACE, status); |  | 
| 923 |  | 
| 924     // add Thai/Lao prevowels |  | 
| 925     uset_addRange(unsafe, 0xe40, 0xe44); |  | 
| 926     uset_addRange(unsafe, 0xec0, 0xec4); |  | 
| 927     // add lead/trail surrogates |  | 
| 928     uset_addRange(unsafe, 0xd800, 0xdfff); |  | 
| 929 |  | 
| 930     USet *contractions = uset_open(0,0); |  | 
| 931 |  | 
| 932     int32_t i = 0, j = 0; |  | 
| 933     int32_t contsSize = ucol_getContractions(coll, contractions, status); |  | 
| 934     UChar32 c = 0; |  | 
| 935     // Contraction set consists only of strings |  | 
| 936     // to get unsafe code points, we need to |  | 
| 937     // break the strings apart and add them to the unsafe set |  | 
| 938     for(i = 0; i < contsSize; i++) { |  | 
| 939         len = uset_getItem(contractions, i, NULL, NULL, buffer, internalBufferSi
     ze, status); |  | 
| 940         if(len > 0) { |  | 
| 941             j = 0; |  | 
| 942             while(j < len) { |  | 
| 943                 U16_NEXT(buffer, j, len, c); |  | 
| 944                 if(j < len) { |  | 
| 945                     uset_add(unsafe, c); |  | 
| 946                 } |  | 
| 947             } |  | 
| 948         } |  | 
| 949     } |  | 
| 950 |  | 
| 951     uset_close(contractions); |  | 
| 952 |  | 
| 953     return uset_size(unsafe); |  | 
| 954 } | 653 } | 
| 955 #endif | 654 #endif | 
| OLD | NEW | 
|---|