OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 2004-2012, International Business Machines | 3 * Copyright (C) 2004-2014, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 * file name: ucol_sit.cpp | 6 * file name: ucol_sit.cpp |
7 * encoding: US-ASCII | 7 * encoding: US-ASCII |
8 * tab size: 8 (not used) | 8 * tab size: 8 (not used) |
9 * indentation:4 | 9 * indentation:4 |
10 * | 10 * |
11 * Modification history | 11 * Modification history |
12 * Date Name Comments | 12 * Date Name Comments |
13 * 03/12/2004 weiv Creation | 13 * 03/12/2004 weiv Creation |
14 */ | 14 */ |
15 | 15 |
16 #include "unicode/ustring.h" | 16 #include "unicode/ustring.h" |
17 #include "unicode/udata.h" | 17 #include "unicode/udata.h" |
18 | 18 #include "unicode/utf16.h" |
19 #include "utracimp.h" | 19 #include "utracimp.h" |
20 #include "ucol_imp.h" | 20 #include "ucol_imp.h" |
21 #include "ucol_tok.h" | |
22 #include "cmemory.h" | 21 #include "cmemory.h" |
23 #include "cstring.h" | 22 #include "cstring.h" |
24 #include "uresimp.h" | 23 #include "uresimp.h" |
25 #include "unicode/coll.h" | 24 #include "unicode/coll.h" |
26 | 25 |
27 #ifdef UCOL_TRACE_SIT | 26 #ifdef UCOL_TRACE_SIT |
28 # include <stdio.h> | 27 # include <stdio.h> |
29 #endif | 28 #endif |
30 | 29 |
31 #if !UCONFIG_NO_COLLATION | 30 #if !UCONFIG_NO_COLLATION |
32 | 31 |
| 32 #include "unicode/tblcoll.h" |
| 33 |
33 enum OptionsList { | 34 enum OptionsList { |
34 UCOL_SIT_LANGUAGE = 0, | 35 UCOL_SIT_LANGUAGE = 0, |
35 UCOL_SIT_SCRIPT = 1, | 36 UCOL_SIT_SCRIPT = 1, |
36 UCOL_SIT_REGION = 2, | 37 UCOL_SIT_REGION = 2, |
37 UCOL_SIT_VARIANT = 3, | 38 UCOL_SIT_VARIANT = 3, |
38 UCOL_SIT_KEYWORD = 4, | 39 UCOL_SIT_KEYWORD = 4, |
39 UCOL_SIT_PROVIDER = 5, | 40 UCOL_SIT_PROVIDER = 5, |
40 UCOL_SIT_LOCELEMENT_MAX = UCOL_SIT_PROVIDER, /* the last element that's part
of LocElements */ | 41 UCOL_SIT_LOCELEMENT_MAX = UCOL_SIT_PROVIDER, /* the last element that's part
of LocElements */ |
41 | 42 |
42 UCOL_SIT_BCP47, | 43 UCOL_SIT_BCP47, |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
119 { 'I', UCOL_IDENTICAL }, | 120 { 'I', UCOL_IDENTICAL }, |
120 { 'L', UCOL_LOWER_FIRST }, | 121 { 'L', UCOL_LOWER_FIRST }, |
121 { 'N', UCOL_NON_IGNORABLE }, | 122 { 'N', UCOL_NON_IGNORABLE }, |
122 { 'O', UCOL_ON }, | 123 { 'O', UCOL_ON }, |
123 { 'S', UCOL_SHIFTED }, | 124 { 'S', UCOL_SHIFTED }, |
124 { 'U', UCOL_UPPER_FIRST }, | 125 { 'U', UCOL_UPPER_FIRST }, |
125 { 'X', UCOL_OFF } | 126 { 'X', UCOL_OFF } |
126 }; | 127 }; |
127 | 128 |
128 | 129 |
129 static char | |
130 ucol_sit_attributeValueToLetter(UColAttributeValue value, UErrorCode *status) { | |
131 uint32_t i = 0; | |
132 for(i = 0; i < sizeof(conversions)/sizeof(conversions[0]); i++) { | |
133 if(conversions[i].value == value) { | |
134 return conversions[i].letter; | |
135 } | |
136 } | |
137 *status = U_ILLEGAL_ARGUMENT_ERROR; | |
138 #ifdef UCOL_TRACE_SIT | |
139 fprintf(stderr, "%s:%d: unknown UColAttributeValue %d: %s\n", __FILE__, __LI
NE__, value, u_errorName(*status)); | |
140 #endif | |
141 return 0; | |
142 } | |
143 | |
144 static UColAttributeValue | 130 static UColAttributeValue |
145 ucol_sit_letterToAttributeValue(char letter, UErrorCode *status) { | 131 ucol_sit_letterToAttributeValue(char letter, UErrorCode *status) { |
146 uint32_t i = 0; | 132 uint32_t i = 0; |
147 for(i = 0; i < sizeof(conversions)/sizeof(conversions[0]); i++) { | 133 for(i = 0; i < sizeof(conversions)/sizeof(conversions[0]); i++) { |
148 if(conversions[i].letter == letter) { | 134 if(conversions[i].letter == letter) { |
149 return conversions[i].value; | 135 return conversions[i].value; |
150 } | 136 } |
151 } | 137 } |
152 *status = U_ILLEGAL_ARGUMENT_ERROR; | 138 *status = U_ILLEGAL_ARGUMENT_ERROR; |
153 #ifdef UCOL_TRACE_SIT | 139 #ifdef UCOL_TRACE_SIT |
(...skipping 410 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
564 if(U_FAILURE(*status)) { // here it can only be a bogus value | 550 if(U_FAILURE(*status)) { // here it can only be a bogus value |
565 ucol_close(result); | 551 ucol_close(result); |
566 result = NULL; | 552 result = NULL; |
567 } | 553 } |
568 | 554 |
569 UTRACE_EXIT_PTR_STATUS(result, *status); | 555 UTRACE_EXIT_PTR_STATUS(result, *status); |
570 return result; | 556 return result; |
571 } | 557 } |
572 | 558 |
573 | 559 |
574 static void appendShortStringElement(const char *src, int32_t len, char *result,
int32_t *resultSize, int32_t capacity, char arg) | |
575 { | |
576 if(len) { | |
577 if(*resultSize) { | |
578 if(*resultSize < capacity) { | |
579 uprv_strcat(result, "_"); | |
580 } | |
581 (*resultSize)++; | |
582 } | |
583 *resultSize += len + 1; | |
584 if(*resultSize < capacity) { | |
585 uprv_strncat(result, &arg, 1); | |
586 uprv_strncat(result, src, len); | |
587 } | |
588 } | |
589 } | |
590 | |
591 U_CAPI int32_t U_EXPORT2 | 560 U_CAPI int32_t U_EXPORT2 |
592 ucol_getShortDefinitionString(const UCollator *coll, | 561 ucol_getShortDefinitionString(const UCollator *coll, |
593 const char *locale, | 562 const char *locale, |
594 char *dst, | 563 char *dst, |
595 int32_t capacity, | 564 int32_t capacity, |
596 UErrorCode *status) | 565 UErrorCode *status) |
597 { | 566 { |
598 if(U_FAILURE(*status)) return 0; | 567 if(U_FAILURE(*status)) return 0; |
599 if(coll->delegate != NULL) { | 568 if(coll == NULL) { |
600 return ((icu::Collator*)coll->delegate)->internalGetShortDefinitionString(
locale,dst,capacity,*status); | 569 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 570 return 0; |
601 } | 571 } |
602 char buffer[internalBufferSize]; | 572 return ((icu::Collator*)coll)->internalGetShortDefinitionString(locale,dst,c
apacity,*status); |
603 uprv_memset(buffer, 0, internalBufferSize*sizeof(char)); | |
604 int32_t resultSize = 0; | |
605 char tempbuff[internalBufferSize]; | |
606 char locBuff[internalBufferSize]; | |
607 uprv_memset(buffer, 0, internalBufferSize*sizeof(char)); | |
608 int32_t elementSize = 0; | |
609 UBool isAvailable = 0; | |
610 CollatorSpec s; | |
611 ucol_sit_initCollatorSpecs(&s); | |
612 | |
613 if(!locale) { | |
614 locale = ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, status); | |
615 } | |
616 elementSize = ucol_getFunctionalEquivalent(locBuff, internalBufferSize, "col
lation", locale, &isAvailable, status); | |
617 | |
618 if(elementSize) { | |
619 // we should probably canonicalize here... | |
620 elementSize = uloc_getLanguage(locBuff, tempbuff, internalBufferSize, st
atus); | |
621 appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c
apacity*/internalBufferSize, languageArg); | |
622 elementSize = uloc_getCountry(locBuff, tempbuff, internalBufferSize, sta
tus); | |
623 appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c
apacity*/internalBufferSize, regionArg); | |
624 elementSize = uloc_getScript(locBuff, tempbuff, internalBufferSize, stat
us); | |
625 appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c
apacity*/internalBufferSize, scriptArg); | |
626 elementSize = uloc_getVariant(locBuff, tempbuff, internalBufferSize, sta
tus); | |
627 appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c
apacity*/internalBufferSize, variantArg); | |
628 elementSize = uloc_getKeywordValue(locBuff, "collation", tempbuff, inter
nalBufferSize, status); | |
629 appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c
apacity*/internalBufferSize, keywordArg); | |
630 } | |
631 | |
632 int32_t i = 0; | |
633 UColAttributeValue attribute = UCOL_DEFAULT; | |
634 for(i = 0; i < UCOL_SIT_ITEMS_COUNT; i++) { | |
635 if(options[i].action == _processCollatorOption) { | |
636 attribute = ucol_getAttributeOrDefault(coll, (UColAttribute)options[
i].attr, status); | |
637 if(attribute != UCOL_DEFAULT) { | |
638 char letter = ucol_sit_attributeValueToLetter(attribute, status)
; | |
639 appendShortStringElement(&letter, 1, | |
640 buffer, &resultSize, /*capacity*/internalBufferSize, options
[i].optionStart); | |
641 } | |
642 } | |
643 } | |
644 if(coll->variableTopValueisDefault == FALSE) { | |
645 //s.variableTopValue = ucol_getVariableTop(coll, status); | |
646 elementSize = T_CString_integerToString(tempbuff, coll->variableTopValue
, 16); | |
647 appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, cap
acity, variableTopValArg); | |
648 } | |
649 | |
650 UParseError parseError; | |
651 return ucol_normalizeShortDefinitionString(buffer, dst, capacity, &parseErro
r, status); | |
652 } | 573 } |
653 | 574 |
654 U_CAPI int32_t U_EXPORT2 | 575 U_CAPI int32_t U_EXPORT2 |
655 ucol_normalizeShortDefinitionString(const char *definition, | 576 ucol_normalizeShortDefinitionString(const char *definition, |
656 char *destination, | 577 char *destination, |
657 int32_t capacity, | 578 int32_t capacity, |
658 UParseError *parseError, | 579 UParseError *parseError, |
659 UErrorCode *status) | 580 UErrorCode *status) |
660 { | 581 { |
661 | 582 |
(...skipping 10 matching lines...) Expand all Loading... |
672 parseError = &pe; | 593 parseError = &pe; |
673 } | 594 } |
674 | 595 |
675 // validate | 596 // validate |
676 CollatorSpec s; | 597 CollatorSpec s; |
677 ucol_sit_initCollatorSpecs(&s); | 598 ucol_sit_initCollatorSpecs(&s); |
678 ucol_sit_readSpecs(&s, definition, parseError, status); | 599 ucol_sit_readSpecs(&s, definition, parseError, status); |
679 return ucol_sit_dumpSpecs(&s, destination, capacity, status); | 600 return ucol_sit_dumpSpecs(&s, destination, capacity, status); |
680 } | 601 } |
681 | 602 |
682 U_CAPI UColAttributeValue U_EXPORT2 | |
683 ucol_getAttributeOrDefault(const UCollator *coll, UColAttribute attr, UErrorCode
*status) | |
684 { | |
685 if(U_FAILURE(*status) || coll == NULL) { | |
686 return UCOL_DEFAULT; | |
687 } | |
688 switch(attr) { | |
689 case UCOL_NUMERIC_COLLATION: | |
690 return coll->numericCollationisDefault?UCOL_DEFAULT:coll->numericCollati
on; | |
691 case UCOL_HIRAGANA_QUATERNARY_MODE: | |
692 return coll->hiraganaQisDefault?UCOL_DEFAULT:coll->hiraganaQ; | |
693 case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*
/ | |
694 return coll->frenchCollationisDefault?UCOL_DEFAULT:coll->frenchCollation
; | |
695 case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/ | |
696 return coll->alternateHandlingisDefault?UCOL_DEFAULT:coll->alternateHand
ling; | |
697 case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */ | |
698 return coll->caseFirstisDefault?UCOL_DEFAULT:coll->caseFirst; | |
699 case UCOL_CASE_LEVEL: /* do we have an extra case level */ | |
700 return coll->caseLevelisDefault?UCOL_DEFAULT:coll->caseLevel; | |
701 case UCOL_NORMALIZATION_MODE: /* attribute for normalization */ | |
702 return coll->normalizationModeisDefault?UCOL_DEFAULT:coll->normalization
Mode; | |
703 case UCOL_STRENGTH: /* attribute for strength */ | |
704 return coll->strengthisDefault?UCOL_DEFAULT:coll->strength; | |
705 case UCOL_ATTRIBUTE_COUNT: | |
706 default: | |
707 *status = U_ILLEGAL_ARGUMENT_ERROR; | |
708 #ifdef UCOL_TRACE_SIT | |
709 fprintf(stderr, "%s:%d: Unknown attr value '%d': %s\n", __FILE__, __LINE
__, (int)attr, u_errorName(*status)); | |
710 #endif | |
711 break; | |
712 } | |
713 return UCOL_DEFAULT; | |
714 } | |
715 | |
716 | |
717 struct contContext { | |
718 const UCollator *coll; | |
719 USet *conts; | |
720 USet *expansions; | |
721 USet *removedContractions; | |
722 UBool addPrefixes; | |
723 UErrorCode *status; | |
724 }; | |
725 | |
726 | |
727 | |
728 static void | |
729 addSpecial(contContext *context, UChar *buffer, int32_t bufLen, | |
730 uint32_t CE, int32_t leftIndex, int32_t rightIndex, UErrorCode *s
tatus) | |
731 { | |
732 const UCollator *coll = context->coll; | |
733 USet *contractions = context->conts; | |
734 USet *expansions = context->expansions; | |
735 UBool addPrefixes = context->addPrefixes; | |
736 | |
737 const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE); | |
738 uint32_t newCE = *(coll->contractionCEs + (UCharOffset - coll->contractionIn
dex)); | |
739 // we might have a contraction that ends from previous level | |
740 if(newCE != UCOL_NOT_FOUND) { | |
741 if(isSpecial(CE) && getCETag(CE) == CONTRACTION_TAG && isSpecial(newCE) &&
getCETag(newCE) == SPEC_PROC_TAG && addPrefixes) { | |
742 addSpecial(context, buffer, bufLen, newCE, leftIndex, rightIndex, status
); | |
743 } | |
744 if(contractions && rightIndex-leftIndex > 1) { | |
745 uset_addString(contractions, buffer+leftIndex, rightIndex-leftIndex)
; | |
746 if(expansions && isSpecial(CE) && getCETag(CE) == EXPANSION_TAG) { | |
747 uset_addString(expansions, buffer+leftIndex, rightIndex-leftIndex)
; | |
748 } | |
749 } | |
750 } | |
751 | |
752 UCharOffset++; | |
753 // check whether we're doing contraction or prefix | |
754 if(getCETag(CE) == SPEC_PROC_TAG && addPrefixes) { | |
755 if(leftIndex == 0) { | |
756 *status = U_INTERNAL_PROGRAM_ERROR; | |
757 return; | |
758 } | |
759 --leftIndex; | |
760 while(*UCharOffset != 0xFFFF) { | |
761 newCE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex
)); | |
762 buffer[leftIndex] = *UCharOffset; | |
763 if(isSpecial(newCE) && (getCETag(newCE) == CONTRACTION_TAG || getCETag
(newCE) == SPEC_PROC_TAG)) { | |
764 addSpecial(context, buffer, bufLen, newCE, leftIndex, rightIndex,
status); | |
765 } else { | |
766 if(contractions) { | |
767 uset_addString(contractions, buffer+leftIndex, rightIndex-leftIn
dex); | |
768 } | |
769 if(expansions && isSpecial(newCE) && getCETag(newCE) == EXPANSION_TA
G) { | |
770 uset_addString(expansions, buffer+leftIndex, rightIndex-leftIndex)
; | |
771 } | |
772 } | |
773 UCharOffset++; | |
774 } | |
775 } else if(getCETag(CE) == CONTRACTION_TAG) { | |
776 if(rightIndex == bufLen-1) { | |
777 *status = U_INTERNAL_PROGRAM_ERROR; | |
778 return; | |
779 } | |
780 while(*UCharOffset != 0xFFFF) { | |
781 newCE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex
)); | |
782 buffer[rightIndex] = *UCharOffset; | |
783 if(isSpecial(newCE) && (getCETag(newCE) == CONTRACTION_TAG || getCETag
(newCE) == SPEC_PROC_TAG)) { | |
784 addSpecial(context, buffer, bufLen, newCE, leftIndex, rightIndex+1
, status); | |
785 } else { | |
786 if(contractions) { | |
787 uset_addString(contractions, buffer+leftIndex, rightIndex+1-leftIn
dex); | |
788 } | |
789 if(expansions && isSpecial(newCE) && getCETag(newCE) == EXPANSION_TA
G) { | |
790 uset_addString(expansions, buffer+leftIndex, rightIndex+1-leftInde
x); | |
791 } | |
792 } | |
793 UCharOffset++; | |
794 } | |
795 } | |
796 | |
797 } | |
798 | |
799 U_CDECL_BEGIN | |
800 static UBool U_CALLCONV | |
801 _processSpecials(const void *context, UChar32 start, UChar32 limit, uint32_t CE) | |
802 { | |
803 UErrorCode *status = ((contContext *)context)->status; | |
804 USet *expansions = ((contContext *)context)->expansions; | |
805 USet *removed = ((contContext *)context)->removedContractions; | |
806 UBool addPrefixes = ((contContext *)context)->addPrefixes; | |
807 UChar contraction[internalBufferSize]; | |
808 if(isSpecial(CE)) { | |
809 if(((getCETag(CE) == SPEC_PROC_TAG && addPrefixes) || getCETag(CE) == CONT
RACTION_TAG)) { | |
810 while(start < limit && U_SUCCESS(*status)) { | |
811 // if there are suppressed contractions, we don't | |
812 // want to add them. | |
813 if(removed && uset_contains(removed, start)) { | |
814 start++; | |
815 continue; | |
816 } | |
817 // we start our contraction from middle, since we don't know if it | |
818 // will grow toward right or left | |
819 contraction[internalBufferSize/2] = (UChar)start; | |
820 addSpecial(((contContext *)context), contraction, internalBufferSize
, CE, internalBufferSize/2, internalBufferSize/2+1, status); | |
821 start++; | |
822 } | |
823 } else if(expansions && getCETag(CE) == EXPANSION_TAG) { | |
824 while(start < limit && U_SUCCESS(*status)) { | |
825 uset_add(expansions, start++); | |
826 } | |
827 } | |
828 } | |
829 if(U_FAILURE(*status)) { | |
830 return FALSE; | |
831 } else { | |
832 return TRUE; | |
833 } | |
834 } | |
835 | |
836 U_CDECL_END | |
837 | |
838 | |
839 | |
840 /** | 603 /** |
841 * Get a set containing the contractions defined by the collator. The set includ
es | 604 * Get a set containing the contractions defined by the collator. The set includ
es |
842 * both the UCA contractions and the contractions defined by the collator | 605 * both the UCA contractions and the contractions defined by the collator |
843 * @param coll collator | 606 * @param coll collator |
844 * @param conts the set to hold the result | 607 * @param conts the set to hold the result |
845 * @param status to hold the error code | 608 * @param status to hold the error code |
846 * @return the size of the contraction set | 609 * @return the size of the contraction set |
847 */ | 610 */ |
848 U_CAPI int32_t U_EXPORT2 | 611 U_CAPI int32_t U_EXPORT2 |
849 ucol_getContractions( const UCollator *coll, | 612 ucol_getContractions( const UCollator *coll, |
(...skipping 21 matching lines...) Expand all Loading... |
871 UBool addPrefixes, | 634 UBool addPrefixes, |
872 UErrorCode *status) | 635 UErrorCode *status) |
873 { | 636 { |
874 if(U_FAILURE(*status)) { | 637 if(U_FAILURE(*status)) { |
875 return; | 638 return; |
876 } | 639 } |
877 if(coll == NULL) { | 640 if(coll == NULL) { |
878 *status = U_ILLEGAL_ARGUMENT_ERROR; | 641 *status = U_ILLEGAL_ARGUMENT_ERROR; |
879 return; | 642 return; |
880 } | 643 } |
881 | 644 const icu::RuleBasedCollator *rbc = icu::RuleBasedCollator::rbcFromUCollator
(coll); |
882 if(contractions) { | 645 if(rbc == NULL) { |
883 uset_clear(contractions); | 646 *status = U_UNSUPPORTED_ERROR; |
| 647 return; |
884 } | 648 } |
885 if(expansions) { | 649 rbc->internalGetContractionsAndExpansions( |
886 uset_clear(expansions); | 650 icu::UnicodeSet::fromUSet(contractions), |
887 } | 651 icu::UnicodeSet::fromUSet(expansions), |
888 int32_t rulesLen = 0; | 652 addPrefixes, *status); |
889 const UChar* rules = ucol_getRules(coll, &rulesLen); | |
890 UColTokenParser src; | |
891 ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, | |
892 ucol_tok_getRulesFromBundle, NULL, status); | |
893 | |
894 contContext c = { NULL, contractions, expansions, src.removeSet, addPrefixes
, status }; | |
895 | |
896 // Add the UCA contractions | |
897 c.coll = coll->UCA; | |
898 utrie_enum(&coll->UCA->mapping, NULL, _processSpecials, &c); | |
899 | |
900 // This is collator specific. Add contractions from a collator | |
901 c.coll = coll; | |
902 c.removedContractions = NULL; | |
903 utrie_enum(&coll->mapping, NULL, _processSpecials, &c); | |
904 ucol_tok_closeTokenList(&src); | |
905 } | |
906 | |
907 U_CAPI int32_t U_EXPORT2 | |
908 ucol_getUnsafeSet( const UCollator *coll, | |
909 USet *unsafe, | |
910 UErrorCode *status) | |
911 { | |
912 UChar buffer[internalBufferSize]; | |
913 int32_t len = 0; | |
914 | |
915 uset_clear(unsafe); | |
916 | |
917 // cccpattern = "[[:^tccc=0:][:^lccc=0:]]", unfortunately variant | |
918 static const UChar cccpattern[25] = { 0x5b, 0x5b, 0x3a, 0x5e, 0x74, 0x63, 0x
63, 0x63, 0x3d, 0x30, 0x3a, 0x5d, | |
919 0x5b, 0x3a, 0x5e, 0x6c, 0x63, 0x63, 0x63, 0x
3d, 0x30, 0x3a, 0x5d, 0x5d, 0x00 }; | |
920 | |
921 // add chars that fail the fcd check | |
922 uset_applyPattern(unsafe, cccpattern, 24, USET_IGNORE_SPACE, status); | |
923 | |
924 // add Thai/Lao prevowels | |
925 uset_addRange(unsafe, 0xe40, 0xe44); | |
926 uset_addRange(unsafe, 0xec0, 0xec4); | |
927 // add lead/trail surrogates | |
928 uset_addRange(unsafe, 0xd800, 0xdfff); | |
929 | |
930 USet *contractions = uset_open(0,0); | |
931 | |
932 int32_t i = 0, j = 0; | |
933 int32_t contsSize = ucol_getContractions(coll, contractions, status); | |
934 UChar32 c = 0; | |
935 // Contraction set consists only of strings | |
936 // to get unsafe code points, we need to | |
937 // break the strings apart and add them to the unsafe set | |
938 for(i = 0; i < contsSize; i++) { | |
939 len = uset_getItem(contractions, i, NULL, NULL, buffer, internalBufferSi
ze, status); | |
940 if(len > 0) { | |
941 j = 0; | |
942 while(j < len) { | |
943 U16_NEXT(buffer, j, len, c); | |
944 if(j < len) { | |
945 uset_add(unsafe, c); | |
946 } | |
947 } | |
948 } | |
949 } | |
950 | |
951 uset_close(contractions); | |
952 | |
953 return uset_size(unsafe); | |
954 } | 653 } |
955 #endif | 654 #endif |
OLD | NEW |