| OLD | NEW |
| 1 /* | 1 /* |
| 2 ******************************************************************************* | 2 ******************************************************************************* |
| 3 * Copyright (C) 2004-2012, International Business Machines | 3 * Copyright (C) 2004-2014, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
| 5 ******************************************************************************* | 5 ******************************************************************************* |
| 6 * file name: ucol_sit.cpp | 6 * file name: ucol_sit.cpp |
| 7 * encoding: US-ASCII | 7 * encoding: US-ASCII |
| 8 * tab size: 8 (not used) | 8 * tab size: 8 (not used) |
| 9 * indentation:4 | 9 * indentation:4 |
| 10 * | 10 * |
| 11 * Modification history | 11 * Modification history |
| 12 * Date Name Comments | 12 * Date Name Comments |
| 13 * 03/12/2004 weiv Creation | 13 * 03/12/2004 weiv Creation |
| 14 */ | 14 */ |
| 15 | 15 |
| 16 #include "unicode/ustring.h" | 16 #include "unicode/ustring.h" |
| 17 #include "unicode/udata.h" | 17 #include "unicode/udata.h" |
| 18 | 18 #include "unicode/utf16.h" |
| 19 #include "utracimp.h" | 19 #include "utracimp.h" |
| 20 #include "ucol_imp.h" | 20 #include "ucol_imp.h" |
| 21 #include "ucol_tok.h" | |
| 22 #include "cmemory.h" | 21 #include "cmemory.h" |
| 23 #include "cstring.h" | 22 #include "cstring.h" |
| 24 #include "uresimp.h" | 23 #include "uresimp.h" |
| 25 #include "unicode/coll.h" | 24 #include "unicode/coll.h" |
| 26 | 25 |
| 27 #ifdef UCOL_TRACE_SIT | 26 #ifdef UCOL_TRACE_SIT |
| 28 # include <stdio.h> | 27 # include <stdio.h> |
| 29 #endif | 28 #endif |
| 30 | 29 |
| 31 #if !UCONFIG_NO_COLLATION | 30 #if !UCONFIG_NO_COLLATION |
| 32 | 31 |
| 32 #include "unicode/tblcoll.h" |
| 33 |
| 33 enum OptionsList { | 34 enum OptionsList { |
| 34 UCOL_SIT_LANGUAGE = 0, | 35 UCOL_SIT_LANGUAGE = 0, |
| 35 UCOL_SIT_SCRIPT = 1, | 36 UCOL_SIT_SCRIPT = 1, |
| 36 UCOL_SIT_REGION = 2, | 37 UCOL_SIT_REGION = 2, |
| 37 UCOL_SIT_VARIANT = 3, | 38 UCOL_SIT_VARIANT = 3, |
| 38 UCOL_SIT_KEYWORD = 4, | 39 UCOL_SIT_KEYWORD = 4, |
| 39 UCOL_SIT_PROVIDER = 5, | 40 UCOL_SIT_PROVIDER = 5, |
| 40 UCOL_SIT_LOCELEMENT_MAX = UCOL_SIT_PROVIDER, /* the last element that's part
of LocElements */ | 41 UCOL_SIT_LOCELEMENT_MAX = UCOL_SIT_PROVIDER, /* the last element that's part
of LocElements */ |
| 41 | 42 |
| 42 UCOL_SIT_BCP47, | 43 UCOL_SIT_BCP47, |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 119 { 'I', UCOL_IDENTICAL }, | 120 { 'I', UCOL_IDENTICAL }, |
| 120 { 'L', UCOL_LOWER_FIRST }, | 121 { 'L', UCOL_LOWER_FIRST }, |
| 121 { 'N', UCOL_NON_IGNORABLE }, | 122 { 'N', UCOL_NON_IGNORABLE }, |
| 122 { 'O', UCOL_ON }, | 123 { 'O', UCOL_ON }, |
| 123 { 'S', UCOL_SHIFTED }, | 124 { 'S', UCOL_SHIFTED }, |
| 124 { 'U', UCOL_UPPER_FIRST }, | 125 { 'U', UCOL_UPPER_FIRST }, |
| 125 { 'X', UCOL_OFF } | 126 { 'X', UCOL_OFF } |
| 126 }; | 127 }; |
| 127 | 128 |
| 128 | 129 |
| 129 static char | |
| 130 ucol_sit_attributeValueToLetter(UColAttributeValue value, UErrorCode *status) { | |
| 131 uint32_t i = 0; | |
| 132 for(i = 0; i < sizeof(conversions)/sizeof(conversions[0]); i++) { | |
| 133 if(conversions[i].value == value) { | |
| 134 return conversions[i].letter; | |
| 135 } | |
| 136 } | |
| 137 *status = U_ILLEGAL_ARGUMENT_ERROR; | |
| 138 #ifdef UCOL_TRACE_SIT | |
| 139 fprintf(stderr, "%s:%d: unknown UColAttributeValue %d: %s\n", __FILE__, __LI
NE__, value, u_errorName(*status)); | |
| 140 #endif | |
| 141 return 0; | |
| 142 } | |
| 143 | |
| 144 static UColAttributeValue | 130 static UColAttributeValue |
| 145 ucol_sit_letterToAttributeValue(char letter, UErrorCode *status) { | 131 ucol_sit_letterToAttributeValue(char letter, UErrorCode *status) { |
| 146 uint32_t i = 0; | 132 uint32_t i = 0; |
| 147 for(i = 0; i < sizeof(conversions)/sizeof(conversions[0]); i++) { | 133 for(i = 0; i < sizeof(conversions)/sizeof(conversions[0]); i++) { |
| 148 if(conversions[i].letter == letter) { | 134 if(conversions[i].letter == letter) { |
| 149 return conversions[i].value; | 135 return conversions[i].value; |
| 150 } | 136 } |
| 151 } | 137 } |
| 152 *status = U_ILLEGAL_ARGUMENT_ERROR; | 138 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 153 #ifdef UCOL_TRACE_SIT | 139 #ifdef UCOL_TRACE_SIT |
| (...skipping 410 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 564 if(U_FAILURE(*status)) { // here it can only be a bogus value | 550 if(U_FAILURE(*status)) { // here it can only be a bogus value |
| 565 ucol_close(result); | 551 ucol_close(result); |
| 566 result = NULL; | 552 result = NULL; |
| 567 } | 553 } |
| 568 | 554 |
| 569 UTRACE_EXIT_PTR_STATUS(result, *status); | 555 UTRACE_EXIT_PTR_STATUS(result, *status); |
| 570 return result; | 556 return result; |
| 571 } | 557 } |
| 572 | 558 |
| 573 | 559 |
| 574 static void appendShortStringElement(const char *src, int32_t len, char *result,
int32_t *resultSize, int32_t capacity, char arg) | |
| 575 { | |
| 576 if(len) { | |
| 577 if(*resultSize) { | |
| 578 if(*resultSize < capacity) { | |
| 579 uprv_strcat(result, "_"); | |
| 580 } | |
| 581 (*resultSize)++; | |
| 582 } | |
| 583 *resultSize += len + 1; | |
| 584 if(*resultSize < capacity) { | |
| 585 uprv_strncat(result, &arg, 1); | |
| 586 uprv_strncat(result, src, len); | |
| 587 } | |
| 588 } | |
| 589 } | |
| 590 | |
| 591 U_CAPI int32_t U_EXPORT2 | 560 U_CAPI int32_t U_EXPORT2 |
| 592 ucol_getShortDefinitionString(const UCollator *coll, | 561 ucol_getShortDefinitionString(const UCollator *coll, |
| 593 const char *locale, | 562 const char *locale, |
| 594 char *dst, | 563 char *dst, |
| 595 int32_t capacity, | 564 int32_t capacity, |
| 596 UErrorCode *status) | 565 UErrorCode *status) |
| 597 { | 566 { |
| 598 if(U_FAILURE(*status)) return 0; | 567 if(U_FAILURE(*status)) return 0; |
| 599 if(coll->delegate != NULL) { | 568 if(coll == NULL) { |
| 600 return ((icu::Collator*)coll->delegate)->internalGetShortDefinitionString(
locale,dst,capacity,*status); | 569 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 570 return 0; |
| 601 } | 571 } |
| 602 char buffer[internalBufferSize]; | 572 return ((icu::Collator*)coll)->internalGetShortDefinitionString(locale,dst,c
apacity,*status); |
| 603 uprv_memset(buffer, 0, internalBufferSize*sizeof(char)); | |
| 604 int32_t resultSize = 0; | |
| 605 char tempbuff[internalBufferSize]; | |
| 606 char locBuff[internalBufferSize]; | |
| 607 uprv_memset(buffer, 0, internalBufferSize*sizeof(char)); | |
| 608 int32_t elementSize = 0; | |
| 609 UBool isAvailable = 0; | |
| 610 CollatorSpec s; | |
| 611 ucol_sit_initCollatorSpecs(&s); | |
| 612 | |
| 613 if(!locale) { | |
| 614 locale = ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, status); | |
| 615 } | |
| 616 elementSize = ucol_getFunctionalEquivalent(locBuff, internalBufferSize, "col
lation", locale, &isAvailable, status); | |
| 617 | |
| 618 if(elementSize) { | |
| 619 // we should probably canonicalize here... | |
| 620 elementSize = uloc_getLanguage(locBuff, tempbuff, internalBufferSize, st
atus); | |
| 621 appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c
apacity*/internalBufferSize, languageArg); | |
| 622 elementSize = uloc_getCountry(locBuff, tempbuff, internalBufferSize, sta
tus); | |
| 623 appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c
apacity*/internalBufferSize, regionArg); | |
| 624 elementSize = uloc_getScript(locBuff, tempbuff, internalBufferSize, stat
us); | |
| 625 appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c
apacity*/internalBufferSize, scriptArg); | |
| 626 elementSize = uloc_getVariant(locBuff, tempbuff, internalBufferSize, sta
tus); | |
| 627 appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c
apacity*/internalBufferSize, variantArg); | |
| 628 elementSize = uloc_getKeywordValue(locBuff, "collation", tempbuff, inter
nalBufferSize, status); | |
| 629 appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c
apacity*/internalBufferSize, keywordArg); | |
| 630 } | |
| 631 | |
| 632 int32_t i = 0; | |
| 633 UColAttributeValue attribute = UCOL_DEFAULT; | |
| 634 for(i = 0; i < UCOL_SIT_ITEMS_COUNT; i++) { | |
| 635 if(options[i].action == _processCollatorOption) { | |
| 636 attribute = ucol_getAttributeOrDefault(coll, (UColAttribute)options[
i].attr, status); | |
| 637 if(attribute != UCOL_DEFAULT) { | |
| 638 char letter = ucol_sit_attributeValueToLetter(attribute, status)
; | |
| 639 appendShortStringElement(&letter, 1, | |
| 640 buffer, &resultSize, /*capacity*/internalBufferSize, options
[i].optionStart); | |
| 641 } | |
| 642 } | |
| 643 } | |
| 644 if(coll->variableTopValueisDefault == FALSE) { | |
| 645 //s.variableTopValue = ucol_getVariableTop(coll, status); | |
| 646 elementSize = T_CString_integerToString(tempbuff, coll->variableTopValue
, 16); | |
| 647 appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, cap
acity, variableTopValArg); | |
| 648 } | |
| 649 | |
| 650 UParseError parseError; | |
| 651 return ucol_normalizeShortDefinitionString(buffer, dst, capacity, &parseErro
r, status); | |
| 652 } | 573 } |
| 653 | 574 |
| 654 U_CAPI int32_t U_EXPORT2 | 575 U_CAPI int32_t U_EXPORT2 |
| 655 ucol_normalizeShortDefinitionString(const char *definition, | 576 ucol_normalizeShortDefinitionString(const char *definition, |
| 656 char *destination, | 577 char *destination, |
| 657 int32_t capacity, | 578 int32_t capacity, |
| 658 UParseError *parseError, | 579 UParseError *parseError, |
| 659 UErrorCode *status) | 580 UErrorCode *status) |
| 660 { | 581 { |
| 661 | 582 |
| (...skipping 10 matching lines...) Expand all Loading... |
| 672 parseError = &pe; | 593 parseError = &pe; |
| 673 } | 594 } |
| 674 | 595 |
| 675 // validate | 596 // validate |
| 676 CollatorSpec s; | 597 CollatorSpec s; |
| 677 ucol_sit_initCollatorSpecs(&s); | 598 ucol_sit_initCollatorSpecs(&s); |
| 678 ucol_sit_readSpecs(&s, definition, parseError, status); | 599 ucol_sit_readSpecs(&s, definition, parseError, status); |
| 679 return ucol_sit_dumpSpecs(&s, destination, capacity, status); | 600 return ucol_sit_dumpSpecs(&s, destination, capacity, status); |
| 680 } | 601 } |
| 681 | 602 |
| 682 U_CAPI UColAttributeValue U_EXPORT2 | |
| 683 ucol_getAttributeOrDefault(const UCollator *coll, UColAttribute attr, UErrorCode
*status) | |
| 684 { | |
| 685 if(U_FAILURE(*status) || coll == NULL) { | |
| 686 return UCOL_DEFAULT; | |
| 687 } | |
| 688 switch(attr) { | |
| 689 case UCOL_NUMERIC_COLLATION: | |
| 690 return coll->numericCollationisDefault?UCOL_DEFAULT:coll->numericCollati
on; | |
| 691 case UCOL_HIRAGANA_QUATERNARY_MODE: | |
| 692 return coll->hiraganaQisDefault?UCOL_DEFAULT:coll->hiraganaQ; | |
| 693 case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*
/ | |
| 694 return coll->frenchCollationisDefault?UCOL_DEFAULT:coll->frenchCollation
; | |
| 695 case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/ | |
| 696 return coll->alternateHandlingisDefault?UCOL_DEFAULT:coll->alternateHand
ling; | |
| 697 case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */ | |
| 698 return coll->caseFirstisDefault?UCOL_DEFAULT:coll->caseFirst; | |
| 699 case UCOL_CASE_LEVEL: /* do we have an extra case level */ | |
| 700 return coll->caseLevelisDefault?UCOL_DEFAULT:coll->caseLevel; | |
| 701 case UCOL_NORMALIZATION_MODE: /* attribute for normalization */ | |
| 702 return coll->normalizationModeisDefault?UCOL_DEFAULT:coll->normalization
Mode; | |
| 703 case UCOL_STRENGTH: /* attribute for strength */ | |
| 704 return coll->strengthisDefault?UCOL_DEFAULT:coll->strength; | |
| 705 case UCOL_ATTRIBUTE_COUNT: | |
| 706 default: | |
| 707 *status = U_ILLEGAL_ARGUMENT_ERROR; | |
| 708 #ifdef UCOL_TRACE_SIT | |
| 709 fprintf(stderr, "%s:%d: Unknown attr value '%d': %s\n", __FILE__, __LINE
__, (int)attr, u_errorName(*status)); | |
| 710 #endif | |
| 711 break; | |
| 712 } | |
| 713 return UCOL_DEFAULT; | |
| 714 } | |
| 715 | |
| 716 | |
| 717 struct contContext { | |
| 718 const UCollator *coll; | |
| 719 USet *conts; | |
| 720 USet *expansions; | |
| 721 USet *removedContractions; | |
| 722 UBool addPrefixes; | |
| 723 UErrorCode *status; | |
| 724 }; | |
| 725 | |
| 726 | |
| 727 | |
| 728 static void | |
| 729 addSpecial(contContext *context, UChar *buffer, int32_t bufLen, | |
| 730 uint32_t CE, int32_t leftIndex, int32_t rightIndex, UErrorCode *s
tatus) | |
| 731 { | |
| 732 const UCollator *coll = context->coll; | |
| 733 USet *contractions = context->conts; | |
| 734 USet *expansions = context->expansions; | |
| 735 UBool addPrefixes = context->addPrefixes; | |
| 736 | |
| 737 const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE); | |
| 738 uint32_t newCE = *(coll->contractionCEs + (UCharOffset - coll->contractionIn
dex)); | |
| 739 // we might have a contraction that ends from previous level | |
| 740 if(newCE != UCOL_NOT_FOUND) { | |
| 741 if(isSpecial(CE) && getCETag(CE) == CONTRACTION_TAG && isSpecial(newCE) &&
getCETag(newCE) == SPEC_PROC_TAG && addPrefixes) { | |
| 742 addSpecial(context, buffer, bufLen, newCE, leftIndex, rightIndex, status
); | |
| 743 } | |
| 744 if(contractions && rightIndex-leftIndex > 1) { | |
| 745 uset_addString(contractions, buffer+leftIndex, rightIndex-leftIndex)
; | |
| 746 if(expansions && isSpecial(CE) && getCETag(CE) == EXPANSION_TAG) { | |
| 747 uset_addString(expansions, buffer+leftIndex, rightIndex-leftIndex)
; | |
| 748 } | |
| 749 } | |
| 750 } | |
| 751 | |
| 752 UCharOffset++; | |
| 753 // check whether we're doing contraction or prefix | |
| 754 if(getCETag(CE) == SPEC_PROC_TAG && addPrefixes) { | |
| 755 if(leftIndex == 0) { | |
| 756 *status = U_INTERNAL_PROGRAM_ERROR; | |
| 757 return; | |
| 758 } | |
| 759 --leftIndex; | |
| 760 while(*UCharOffset != 0xFFFF) { | |
| 761 newCE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex
)); | |
| 762 buffer[leftIndex] = *UCharOffset; | |
| 763 if(isSpecial(newCE) && (getCETag(newCE) == CONTRACTION_TAG || getCETag
(newCE) == SPEC_PROC_TAG)) { | |
| 764 addSpecial(context, buffer, bufLen, newCE, leftIndex, rightIndex,
status); | |
| 765 } else { | |
| 766 if(contractions) { | |
| 767 uset_addString(contractions, buffer+leftIndex, rightIndex-leftIn
dex); | |
| 768 } | |
| 769 if(expansions && isSpecial(newCE) && getCETag(newCE) == EXPANSION_TA
G) { | |
| 770 uset_addString(expansions, buffer+leftIndex, rightIndex-leftIndex)
; | |
| 771 } | |
| 772 } | |
| 773 UCharOffset++; | |
| 774 } | |
| 775 } else if(getCETag(CE) == CONTRACTION_TAG) { | |
| 776 if(rightIndex == bufLen-1) { | |
| 777 *status = U_INTERNAL_PROGRAM_ERROR; | |
| 778 return; | |
| 779 } | |
| 780 while(*UCharOffset != 0xFFFF) { | |
| 781 newCE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex
)); | |
| 782 buffer[rightIndex] = *UCharOffset; | |
| 783 if(isSpecial(newCE) && (getCETag(newCE) == CONTRACTION_TAG || getCETag
(newCE) == SPEC_PROC_TAG)) { | |
| 784 addSpecial(context, buffer, bufLen, newCE, leftIndex, rightIndex+1
, status); | |
| 785 } else { | |
| 786 if(contractions) { | |
| 787 uset_addString(contractions, buffer+leftIndex, rightIndex+1-leftIn
dex); | |
| 788 } | |
| 789 if(expansions && isSpecial(newCE) && getCETag(newCE) == EXPANSION_TA
G) { | |
| 790 uset_addString(expansions, buffer+leftIndex, rightIndex+1-leftInde
x); | |
| 791 } | |
| 792 } | |
| 793 UCharOffset++; | |
| 794 } | |
| 795 } | |
| 796 | |
| 797 } | |
| 798 | |
| 799 U_CDECL_BEGIN | |
| 800 static UBool U_CALLCONV | |
| 801 _processSpecials(const void *context, UChar32 start, UChar32 limit, uint32_t CE) | |
| 802 { | |
| 803 UErrorCode *status = ((contContext *)context)->status; | |
| 804 USet *expansions = ((contContext *)context)->expansions; | |
| 805 USet *removed = ((contContext *)context)->removedContractions; | |
| 806 UBool addPrefixes = ((contContext *)context)->addPrefixes; | |
| 807 UChar contraction[internalBufferSize]; | |
| 808 if(isSpecial(CE)) { | |
| 809 if(((getCETag(CE) == SPEC_PROC_TAG && addPrefixes) || getCETag(CE) == CONT
RACTION_TAG)) { | |
| 810 while(start < limit && U_SUCCESS(*status)) { | |
| 811 // if there are suppressed contractions, we don't | |
| 812 // want to add them. | |
| 813 if(removed && uset_contains(removed, start)) { | |
| 814 start++; | |
| 815 continue; | |
| 816 } | |
| 817 // we start our contraction from middle, since we don't know if it | |
| 818 // will grow toward right or left | |
| 819 contraction[internalBufferSize/2] = (UChar)start; | |
| 820 addSpecial(((contContext *)context), contraction, internalBufferSize
, CE, internalBufferSize/2, internalBufferSize/2+1, status); | |
| 821 start++; | |
| 822 } | |
| 823 } else if(expansions && getCETag(CE) == EXPANSION_TAG) { | |
| 824 while(start < limit && U_SUCCESS(*status)) { | |
| 825 uset_add(expansions, start++); | |
| 826 } | |
| 827 } | |
| 828 } | |
| 829 if(U_FAILURE(*status)) { | |
| 830 return FALSE; | |
| 831 } else { | |
| 832 return TRUE; | |
| 833 } | |
| 834 } | |
| 835 | |
| 836 U_CDECL_END | |
| 837 | |
| 838 | |
| 839 | |
| 840 /** | 603 /** |
| 841 * Get a set containing the contractions defined by the collator. The set includ
es | 604 * Get a set containing the contractions defined by the collator. The set includ
es |
| 842 * both the UCA contractions and the contractions defined by the collator | 605 * both the UCA contractions and the contractions defined by the collator |
| 843 * @param coll collator | 606 * @param coll collator |
| 844 * @param conts the set to hold the result | 607 * @param conts the set to hold the result |
| 845 * @param status to hold the error code | 608 * @param status to hold the error code |
| 846 * @return the size of the contraction set | 609 * @return the size of the contraction set |
| 847 */ | 610 */ |
| 848 U_CAPI int32_t U_EXPORT2 | 611 U_CAPI int32_t U_EXPORT2 |
| 849 ucol_getContractions( const UCollator *coll, | 612 ucol_getContractions( const UCollator *coll, |
| (...skipping 21 matching lines...) Expand all Loading... |
| 871 UBool addPrefixes, | 634 UBool addPrefixes, |
| 872 UErrorCode *status) | 635 UErrorCode *status) |
| 873 { | 636 { |
| 874 if(U_FAILURE(*status)) { | 637 if(U_FAILURE(*status)) { |
| 875 return; | 638 return; |
| 876 } | 639 } |
| 877 if(coll == NULL) { | 640 if(coll == NULL) { |
| 878 *status = U_ILLEGAL_ARGUMENT_ERROR; | 641 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 879 return; | 642 return; |
| 880 } | 643 } |
| 881 | 644 const icu::RuleBasedCollator *rbc = icu::RuleBasedCollator::rbcFromUCollator
(coll); |
| 882 if(contractions) { | 645 if(rbc == NULL) { |
| 883 uset_clear(contractions); | 646 *status = U_UNSUPPORTED_ERROR; |
| 647 return; |
| 884 } | 648 } |
| 885 if(expansions) { | 649 rbc->internalGetContractionsAndExpansions( |
| 886 uset_clear(expansions); | 650 icu::UnicodeSet::fromUSet(contractions), |
| 887 } | 651 icu::UnicodeSet::fromUSet(expansions), |
| 888 int32_t rulesLen = 0; | 652 addPrefixes, *status); |
| 889 const UChar* rules = ucol_getRules(coll, &rulesLen); | |
| 890 UColTokenParser src; | |
| 891 ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, | |
| 892 ucol_tok_getRulesFromBundle, NULL, status); | |
| 893 | |
| 894 contContext c = { NULL, contractions, expansions, src.removeSet, addPrefixes
, status }; | |
| 895 | |
| 896 // Add the UCA contractions | |
| 897 c.coll = coll->UCA; | |
| 898 utrie_enum(&coll->UCA->mapping, NULL, _processSpecials, &c); | |
| 899 | |
| 900 // This is collator specific. Add contractions from a collator | |
| 901 c.coll = coll; | |
| 902 c.removedContractions = NULL; | |
| 903 utrie_enum(&coll->mapping, NULL, _processSpecials, &c); | |
| 904 ucol_tok_closeTokenList(&src); | |
| 905 } | |
| 906 | |
| 907 U_CAPI int32_t U_EXPORT2 | |
| 908 ucol_getUnsafeSet( const UCollator *coll, | |
| 909 USet *unsafe, | |
| 910 UErrorCode *status) | |
| 911 { | |
| 912 UChar buffer[internalBufferSize]; | |
| 913 int32_t len = 0; | |
| 914 | |
| 915 uset_clear(unsafe); | |
| 916 | |
| 917 // cccpattern = "[[:^tccc=0:][:^lccc=0:]]", unfortunately variant | |
| 918 static const UChar cccpattern[25] = { 0x5b, 0x5b, 0x3a, 0x5e, 0x74, 0x63, 0x
63, 0x63, 0x3d, 0x30, 0x3a, 0x5d, | |
| 919 0x5b, 0x3a, 0x5e, 0x6c, 0x63, 0x63, 0x63, 0x
3d, 0x30, 0x3a, 0x5d, 0x5d, 0x00 }; | |
| 920 | |
| 921 // add chars that fail the fcd check | |
| 922 uset_applyPattern(unsafe, cccpattern, 24, USET_IGNORE_SPACE, status); | |
| 923 | |
| 924 // add Thai/Lao prevowels | |
| 925 uset_addRange(unsafe, 0xe40, 0xe44); | |
| 926 uset_addRange(unsafe, 0xec0, 0xec4); | |
| 927 // add lead/trail surrogates | |
| 928 uset_addRange(unsafe, 0xd800, 0xdfff); | |
| 929 | |
| 930 USet *contractions = uset_open(0,0); | |
| 931 | |
| 932 int32_t i = 0, j = 0; | |
| 933 int32_t contsSize = ucol_getContractions(coll, contractions, status); | |
| 934 UChar32 c = 0; | |
| 935 // Contraction set consists only of strings | |
| 936 // to get unsafe code points, we need to | |
| 937 // break the strings apart and add them to the unsafe set | |
| 938 for(i = 0; i < contsSize; i++) { | |
| 939 len = uset_getItem(contractions, i, NULL, NULL, buffer, internalBufferSi
ze, status); | |
| 940 if(len > 0) { | |
| 941 j = 0; | |
| 942 while(j < len) { | |
| 943 U16_NEXT(buffer, j, len, c); | |
| 944 if(j < len) { | |
| 945 uset_add(unsafe, c); | |
| 946 } | |
| 947 } | |
| 948 } | |
| 949 } | |
| 950 | |
| 951 uset_close(contractions); | |
| 952 | |
| 953 return uset_size(unsafe); | |
| 954 } | 653 } |
| 955 #endif | 654 #endif |
| OLD | NEW |