Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Side by Side Diff: source/i18n/ucol_sit.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/i18n/ucol_res.cpp ('k') | source/i18n/ucol_tok.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ******************************************************************************* 2 *******************************************************************************
3 * Copyright (C) 2004-2012, International Business Machines 3 * Copyright (C) 2004-2014, International Business Machines
4 * Corporation and others. All Rights Reserved. 4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************* 5 *******************************************************************************
6 * file name: ucol_sit.cpp 6 * file name: ucol_sit.cpp
7 * encoding: US-ASCII 7 * encoding: US-ASCII
8 * tab size: 8 (not used) 8 * tab size: 8 (not used)
9 * indentation:4 9 * indentation:4
10 * 10 *
11 * Modification history 11 * Modification history
12 * Date Name Comments 12 * Date Name Comments
13 * 03/12/2004 weiv Creation 13 * 03/12/2004 weiv Creation
14 */ 14 */
15 15
16 #include "unicode/ustring.h" 16 #include "unicode/ustring.h"
17 #include "unicode/udata.h" 17 #include "unicode/udata.h"
18 18 #include "unicode/utf16.h"
19 #include "utracimp.h" 19 #include "utracimp.h"
20 #include "ucol_imp.h" 20 #include "ucol_imp.h"
21 #include "ucol_tok.h"
22 #include "cmemory.h" 21 #include "cmemory.h"
23 #include "cstring.h" 22 #include "cstring.h"
24 #include "uresimp.h" 23 #include "uresimp.h"
25 #include "unicode/coll.h" 24 #include "unicode/coll.h"
26 25
27 #ifdef UCOL_TRACE_SIT 26 #ifdef UCOL_TRACE_SIT
28 # include <stdio.h> 27 # include <stdio.h>
29 #endif 28 #endif
30 29
31 #if !UCONFIG_NO_COLLATION 30 #if !UCONFIG_NO_COLLATION
32 31
32 #include "unicode/tblcoll.h"
33
33 enum OptionsList { 34 enum OptionsList {
34 UCOL_SIT_LANGUAGE = 0, 35 UCOL_SIT_LANGUAGE = 0,
35 UCOL_SIT_SCRIPT = 1, 36 UCOL_SIT_SCRIPT = 1,
36 UCOL_SIT_REGION = 2, 37 UCOL_SIT_REGION = 2,
37 UCOL_SIT_VARIANT = 3, 38 UCOL_SIT_VARIANT = 3,
38 UCOL_SIT_KEYWORD = 4, 39 UCOL_SIT_KEYWORD = 4,
39 UCOL_SIT_PROVIDER = 5, 40 UCOL_SIT_PROVIDER = 5,
40 UCOL_SIT_LOCELEMENT_MAX = UCOL_SIT_PROVIDER, /* the last element that's part of LocElements */ 41 UCOL_SIT_LOCELEMENT_MAX = UCOL_SIT_PROVIDER, /* the last element that's part of LocElements */
41 42
42 UCOL_SIT_BCP47, 43 UCOL_SIT_BCP47,
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
119 { 'I', UCOL_IDENTICAL }, 120 { 'I', UCOL_IDENTICAL },
120 { 'L', UCOL_LOWER_FIRST }, 121 { 'L', UCOL_LOWER_FIRST },
121 { 'N', UCOL_NON_IGNORABLE }, 122 { 'N', UCOL_NON_IGNORABLE },
122 { 'O', UCOL_ON }, 123 { 'O', UCOL_ON },
123 { 'S', UCOL_SHIFTED }, 124 { 'S', UCOL_SHIFTED },
124 { 'U', UCOL_UPPER_FIRST }, 125 { 'U', UCOL_UPPER_FIRST },
125 { 'X', UCOL_OFF } 126 { 'X', UCOL_OFF }
126 }; 127 };
127 128
128 129
129 static char
130 ucol_sit_attributeValueToLetter(UColAttributeValue value, UErrorCode *status) {
131 uint32_t i = 0;
132 for(i = 0; i < sizeof(conversions)/sizeof(conversions[0]); i++) {
133 if(conversions[i].value == value) {
134 return conversions[i].letter;
135 }
136 }
137 *status = U_ILLEGAL_ARGUMENT_ERROR;
138 #ifdef UCOL_TRACE_SIT
139 fprintf(stderr, "%s:%d: unknown UColAttributeValue %d: %s\n", __FILE__, __LI NE__, value, u_errorName(*status));
140 #endif
141 return 0;
142 }
143
144 static UColAttributeValue 130 static UColAttributeValue
145 ucol_sit_letterToAttributeValue(char letter, UErrorCode *status) { 131 ucol_sit_letterToAttributeValue(char letter, UErrorCode *status) {
146 uint32_t i = 0; 132 uint32_t i = 0;
147 for(i = 0; i < sizeof(conversions)/sizeof(conversions[0]); i++) { 133 for(i = 0; i < sizeof(conversions)/sizeof(conversions[0]); i++) {
148 if(conversions[i].letter == letter) { 134 if(conversions[i].letter == letter) {
149 return conversions[i].value; 135 return conversions[i].value;
150 } 136 }
151 } 137 }
152 *status = U_ILLEGAL_ARGUMENT_ERROR; 138 *status = U_ILLEGAL_ARGUMENT_ERROR;
153 #ifdef UCOL_TRACE_SIT 139 #ifdef UCOL_TRACE_SIT
(...skipping 410 matching lines...) Expand 10 before | Expand all | Expand 10 after
564 if(U_FAILURE(*status)) { // here it can only be a bogus value 550 if(U_FAILURE(*status)) { // here it can only be a bogus value
565 ucol_close(result); 551 ucol_close(result);
566 result = NULL; 552 result = NULL;
567 } 553 }
568 554
569 UTRACE_EXIT_PTR_STATUS(result, *status); 555 UTRACE_EXIT_PTR_STATUS(result, *status);
570 return result; 556 return result;
571 } 557 }
572 558
573 559
574 static void appendShortStringElement(const char *src, int32_t len, char *result, int32_t *resultSize, int32_t capacity, char arg)
575 {
576 if(len) {
577 if(*resultSize) {
578 if(*resultSize < capacity) {
579 uprv_strcat(result, "_");
580 }
581 (*resultSize)++;
582 }
583 *resultSize += len + 1;
584 if(*resultSize < capacity) {
585 uprv_strncat(result, &arg, 1);
586 uprv_strncat(result, src, len);
587 }
588 }
589 }
590
591 U_CAPI int32_t U_EXPORT2 560 U_CAPI int32_t U_EXPORT2
592 ucol_getShortDefinitionString(const UCollator *coll, 561 ucol_getShortDefinitionString(const UCollator *coll,
593 const char *locale, 562 const char *locale,
594 char *dst, 563 char *dst,
595 int32_t capacity, 564 int32_t capacity,
596 UErrorCode *status) 565 UErrorCode *status)
597 { 566 {
598 if(U_FAILURE(*status)) return 0; 567 if(U_FAILURE(*status)) return 0;
599 if(coll->delegate != NULL) { 568 if(coll == NULL) {
600 return ((icu::Collator*)coll->delegate)->internalGetShortDefinitionString( locale,dst,capacity,*status); 569 *status = U_ILLEGAL_ARGUMENT_ERROR;
570 return 0;
601 } 571 }
602 char buffer[internalBufferSize]; 572 return ((icu::Collator*)coll)->internalGetShortDefinitionString(locale,dst,c apacity,*status);
603 uprv_memset(buffer, 0, internalBufferSize*sizeof(char));
604 int32_t resultSize = 0;
605 char tempbuff[internalBufferSize];
606 char locBuff[internalBufferSize];
607 uprv_memset(buffer, 0, internalBufferSize*sizeof(char));
608 int32_t elementSize = 0;
609 UBool isAvailable = 0;
610 CollatorSpec s;
611 ucol_sit_initCollatorSpecs(&s);
612
613 if(!locale) {
614 locale = ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, status);
615 }
616 elementSize = ucol_getFunctionalEquivalent(locBuff, internalBufferSize, "col lation", locale, &isAvailable, status);
617
618 if(elementSize) {
619 // we should probably canonicalize here...
620 elementSize = uloc_getLanguage(locBuff, tempbuff, internalBufferSize, st atus);
621 appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c apacity*/internalBufferSize, languageArg);
622 elementSize = uloc_getCountry(locBuff, tempbuff, internalBufferSize, sta tus);
623 appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c apacity*/internalBufferSize, regionArg);
624 elementSize = uloc_getScript(locBuff, tempbuff, internalBufferSize, stat us);
625 appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c apacity*/internalBufferSize, scriptArg);
626 elementSize = uloc_getVariant(locBuff, tempbuff, internalBufferSize, sta tus);
627 appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c apacity*/internalBufferSize, variantArg);
628 elementSize = uloc_getKeywordValue(locBuff, "collation", tempbuff, inter nalBufferSize, status);
629 appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*c apacity*/internalBufferSize, keywordArg);
630 }
631
632 int32_t i = 0;
633 UColAttributeValue attribute = UCOL_DEFAULT;
634 for(i = 0; i < UCOL_SIT_ITEMS_COUNT; i++) {
635 if(options[i].action == _processCollatorOption) {
636 attribute = ucol_getAttributeOrDefault(coll, (UColAttribute)options[ i].attr, status);
637 if(attribute != UCOL_DEFAULT) {
638 char letter = ucol_sit_attributeValueToLetter(attribute, status) ;
639 appendShortStringElement(&letter, 1,
640 buffer, &resultSize, /*capacity*/internalBufferSize, options [i].optionStart);
641 }
642 }
643 }
644 if(coll->variableTopValueisDefault == FALSE) {
645 //s.variableTopValue = ucol_getVariableTop(coll, status);
646 elementSize = T_CString_integerToString(tempbuff, coll->variableTopValue , 16);
647 appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, cap acity, variableTopValArg);
648 }
649
650 UParseError parseError;
651 return ucol_normalizeShortDefinitionString(buffer, dst, capacity, &parseErro r, status);
652 } 573 }
653 574
654 U_CAPI int32_t U_EXPORT2 575 U_CAPI int32_t U_EXPORT2
655 ucol_normalizeShortDefinitionString(const char *definition, 576 ucol_normalizeShortDefinitionString(const char *definition,
656 char *destination, 577 char *destination,
657 int32_t capacity, 578 int32_t capacity,
658 UParseError *parseError, 579 UParseError *parseError,
659 UErrorCode *status) 580 UErrorCode *status)
660 { 581 {
661 582
(...skipping 10 matching lines...) Expand all
672 parseError = &pe; 593 parseError = &pe;
673 } 594 }
674 595
675 // validate 596 // validate
676 CollatorSpec s; 597 CollatorSpec s;
677 ucol_sit_initCollatorSpecs(&s); 598 ucol_sit_initCollatorSpecs(&s);
678 ucol_sit_readSpecs(&s, definition, parseError, status); 599 ucol_sit_readSpecs(&s, definition, parseError, status);
679 return ucol_sit_dumpSpecs(&s, destination, capacity, status); 600 return ucol_sit_dumpSpecs(&s, destination, capacity, status);
680 } 601 }
681 602
682 U_CAPI UColAttributeValue U_EXPORT2
683 ucol_getAttributeOrDefault(const UCollator *coll, UColAttribute attr, UErrorCode *status)
684 {
685 if(U_FAILURE(*status) || coll == NULL) {
686 return UCOL_DEFAULT;
687 }
688 switch(attr) {
689 case UCOL_NUMERIC_COLLATION:
690 return coll->numericCollationisDefault?UCOL_DEFAULT:coll->numericCollati on;
691 case UCOL_HIRAGANA_QUATERNARY_MODE:
692 return coll->hiraganaQisDefault?UCOL_DEFAULT:coll->hiraganaQ;
693 case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights* /
694 return coll->frenchCollationisDefault?UCOL_DEFAULT:coll->frenchCollation ;
695 case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/
696 return coll->alternateHandlingisDefault?UCOL_DEFAULT:coll->alternateHand ling;
697 case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */
698 return coll->caseFirstisDefault?UCOL_DEFAULT:coll->caseFirst;
699 case UCOL_CASE_LEVEL: /* do we have an extra case level */
700 return coll->caseLevelisDefault?UCOL_DEFAULT:coll->caseLevel;
701 case UCOL_NORMALIZATION_MODE: /* attribute for normalization */
702 return coll->normalizationModeisDefault?UCOL_DEFAULT:coll->normalization Mode;
703 case UCOL_STRENGTH: /* attribute for strength */
704 return coll->strengthisDefault?UCOL_DEFAULT:coll->strength;
705 case UCOL_ATTRIBUTE_COUNT:
706 default:
707 *status = U_ILLEGAL_ARGUMENT_ERROR;
708 #ifdef UCOL_TRACE_SIT
709 fprintf(stderr, "%s:%d: Unknown attr value '%d': %s\n", __FILE__, __LINE __, (int)attr, u_errorName(*status));
710 #endif
711 break;
712 }
713 return UCOL_DEFAULT;
714 }
715
716
717 struct contContext {
718 const UCollator *coll;
719 USet *conts;
720 USet *expansions;
721 USet *removedContractions;
722 UBool addPrefixes;
723 UErrorCode *status;
724 };
725
726
727
728 static void
729 addSpecial(contContext *context, UChar *buffer, int32_t bufLen,
730 uint32_t CE, int32_t leftIndex, int32_t rightIndex, UErrorCode *s tatus)
731 {
732 const UCollator *coll = context->coll;
733 USet *contractions = context->conts;
734 USet *expansions = context->expansions;
735 UBool addPrefixes = context->addPrefixes;
736
737 const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE);
738 uint32_t newCE = *(coll->contractionCEs + (UCharOffset - coll->contractionIn dex));
739 // we might have a contraction that ends from previous level
740 if(newCE != UCOL_NOT_FOUND) {
741 if(isSpecial(CE) && getCETag(CE) == CONTRACTION_TAG && isSpecial(newCE) && getCETag(newCE) == SPEC_PROC_TAG && addPrefixes) {
742 addSpecial(context, buffer, bufLen, newCE, leftIndex, rightIndex, status );
743 }
744 if(contractions && rightIndex-leftIndex > 1) {
745 uset_addString(contractions, buffer+leftIndex, rightIndex-leftIndex) ;
746 if(expansions && isSpecial(CE) && getCETag(CE) == EXPANSION_TAG) {
747 uset_addString(expansions, buffer+leftIndex, rightIndex-leftIndex) ;
748 }
749 }
750 }
751
752 UCharOffset++;
753 // check whether we're doing contraction or prefix
754 if(getCETag(CE) == SPEC_PROC_TAG && addPrefixes) {
755 if(leftIndex == 0) {
756 *status = U_INTERNAL_PROGRAM_ERROR;
757 return;
758 }
759 --leftIndex;
760 while(*UCharOffset != 0xFFFF) {
761 newCE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex ));
762 buffer[leftIndex] = *UCharOffset;
763 if(isSpecial(newCE) && (getCETag(newCE) == CONTRACTION_TAG || getCETag (newCE) == SPEC_PROC_TAG)) {
764 addSpecial(context, buffer, bufLen, newCE, leftIndex, rightIndex, status);
765 } else {
766 if(contractions) {
767 uset_addString(contractions, buffer+leftIndex, rightIndex-leftIn dex);
768 }
769 if(expansions && isSpecial(newCE) && getCETag(newCE) == EXPANSION_TA G) {
770 uset_addString(expansions, buffer+leftIndex, rightIndex-leftIndex) ;
771 }
772 }
773 UCharOffset++;
774 }
775 } else if(getCETag(CE) == CONTRACTION_TAG) {
776 if(rightIndex == bufLen-1) {
777 *status = U_INTERNAL_PROGRAM_ERROR;
778 return;
779 }
780 while(*UCharOffset != 0xFFFF) {
781 newCE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex ));
782 buffer[rightIndex] = *UCharOffset;
783 if(isSpecial(newCE) && (getCETag(newCE) == CONTRACTION_TAG || getCETag (newCE) == SPEC_PROC_TAG)) {
784 addSpecial(context, buffer, bufLen, newCE, leftIndex, rightIndex+1 , status);
785 } else {
786 if(contractions) {
787 uset_addString(contractions, buffer+leftIndex, rightIndex+1-leftIn dex);
788 }
789 if(expansions && isSpecial(newCE) && getCETag(newCE) == EXPANSION_TA G) {
790 uset_addString(expansions, buffer+leftIndex, rightIndex+1-leftInde x);
791 }
792 }
793 UCharOffset++;
794 }
795 }
796
797 }
798
799 U_CDECL_BEGIN
800 static UBool U_CALLCONV
801 _processSpecials(const void *context, UChar32 start, UChar32 limit, uint32_t CE)
802 {
803 UErrorCode *status = ((contContext *)context)->status;
804 USet *expansions = ((contContext *)context)->expansions;
805 USet *removed = ((contContext *)context)->removedContractions;
806 UBool addPrefixes = ((contContext *)context)->addPrefixes;
807 UChar contraction[internalBufferSize];
808 if(isSpecial(CE)) {
809 if(((getCETag(CE) == SPEC_PROC_TAG && addPrefixes) || getCETag(CE) == CONT RACTION_TAG)) {
810 while(start < limit && U_SUCCESS(*status)) {
811 // if there are suppressed contractions, we don't
812 // want to add them.
813 if(removed && uset_contains(removed, start)) {
814 start++;
815 continue;
816 }
817 // we start our contraction from middle, since we don't know if it
818 // will grow toward right or left
819 contraction[internalBufferSize/2] = (UChar)start;
820 addSpecial(((contContext *)context), contraction, internalBufferSize , CE, internalBufferSize/2, internalBufferSize/2+1, status);
821 start++;
822 }
823 } else if(expansions && getCETag(CE) == EXPANSION_TAG) {
824 while(start < limit && U_SUCCESS(*status)) {
825 uset_add(expansions, start++);
826 }
827 }
828 }
829 if(U_FAILURE(*status)) {
830 return FALSE;
831 } else {
832 return TRUE;
833 }
834 }
835
836 U_CDECL_END
837
838
839
840 /** 603 /**
841 * Get a set containing the contractions defined by the collator. The set includ es 604 * Get a set containing the contractions defined by the collator. The set includ es
842 * both the UCA contractions and the contractions defined by the collator 605 * both the UCA contractions and the contractions defined by the collator
843 * @param coll collator 606 * @param coll collator
844 * @param conts the set to hold the result 607 * @param conts the set to hold the result
845 * @param status to hold the error code 608 * @param status to hold the error code
846 * @return the size of the contraction set 609 * @return the size of the contraction set
847 */ 610 */
848 U_CAPI int32_t U_EXPORT2 611 U_CAPI int32_t U_EXPORT2
849 ucol_getContractions( const UCollator *coll, 612 ucol_getContractions( const UCollator *coll,
(...skipping 21 matching lines...) Expand all
871 UBool addPrefixes, 634 UBool addPrefixes,
872 UErrorCode *status) 635 UErrorCode *status)
873 { 636 {
874 if(U_FAILURE(*status)) { 637 if(U_FAILURE(*status)) {
875 return; 638 return;
876 } 639 }
877 if(coll == NULL) { 640 if(coll == NULL) {
878 *status = U_ILLEGAL_ARGUMENT_ERROR; 641 *status = U_ILLEGAL_ARGUMENT_ERROR;
879 return; 642 return;
880 } 643 }
881 644 const icu::RuleBasedCollator *rbc = icu::RuleBasedCollator::rbcFromUCollator (coll);
882 if(contractions) { 645 if(rbc == NULL) {
883 uset_clear(contractions); 646 *status = U_UNSUPPORTED_ERROR;
647 return;
884 } 648 }
885 if(expansions) { 649 rbc->internalGetContractionsAndExpansions(
886 uset_clear(expansions); 650 icu::UnicodeSet::fromUSet(contractions),
887 } 651 icu::UnicodeSet::fromUSet(expansions),
888 int32_t rulesLen = 0; 652 addPrefixes, *status);
889 const UChar* rules = ucol_getRules(coll, &rulesLen);
890 UColTokenParser src;
891 ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA,
892 ucol_tok_getRulesFromBundle, NULL, status);
893
894 contContext c = { NULL, contractions, expansions, src.removeSet, addPrefixes , status };
895
896 // Add the UCA contractions
897 c.coll = coll->UCA;
898 utrie_enum(&coll->UCA->mapping, NULL, _processSpecials, &c);
899
900 // This is collator specific. Add contractions from a collator
901 c.coll = coll;
902 c.removedContractions = NULL;
903 utrie_enum(&coll->mapping, NULL, _processSpecials, &c);
904 ucol_tok_closeTokenList(&src);
905 }
906
907 U_CAPI int32_t U_EXPORT2
908 ucol_getUnsafeSet( const UCollator *coll,
909 USet *unsafe,
910 UErrorCode *status)
911 {
912 UChar buffer[internalBufferSize];
913 int32_t len = 0;
914
915 uset_clear(unsafe);
916
917 // cccpattern = "[[:^tccc=0:][:^lccc=0:]]", unfortunately variant
918 static const UChar cccpattern[25] = { 0x5b, 0x5b, 0x3a, 0x5e, 0x74, 0x63, 0x 63, 0x63, 0x3d, 0x30, 0x3a, 0x5d,
919 0x5b, 0x3a, 0x5e, 0x6c, 0x63, 0x63, 0x63, 0x 3d, 0x30, 0x3a, 0x5d, 0x5d, 0x00 };
920
921 // add chars that fail the fcd check
922 uset_applyPattern(unsafe, cccpattern, 24, USET_IGNORE_SPACE, status);
923
924 // add Thai/Lao prevowels
925 uset_addRange(unsafe, 0xe40, 0xe44);
926 uset_addRange(unsafe, 0xec0, 0xec4);
927 // add lead/trail surrogates
928 uset_addRange(unsafe, 0xd800, 0xdfff);
929
930 USet *contractions = uset_open(0,0);
931
932 int32_t i = 0, j = 0;
933 int32_t contsSize = ucol_getContractions(coll, contractions, status);
934 UChar32 c = 0;
935 // Contraction set consists only of strings
936 // to get unsafe code points, we need to
937 // break the strings apart and add them to the unsafe set
938 for(i = 0; i < contsSize; i++) {
939 len = uset_getItem(contractions, i, NULL, NULL, buffer, internalBufferSi ze, status);
940 if(len > 0) {
941 j = 0;
942 while(j < len) {
943 U16_NEXT(buffer, j, len, c);
944 if(j < len) {
945 uset_add(unsafe, c);
946 }
947 }
948 }
949 }
950
951 uset_close(contractions);
952
953 return uset_size(unsafe);
954 } 653 }
955 #endif 654 #endif
OLDNEW
« no previous file with comments | « source/i18n/ucol_res.cpp ('k') | source/i18n/ucol_tok.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698