OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * | 3 * |
4 * Copyright (C) 2002-2013, International Business Machines | 4 * Copyright (C) 2002-2014, International Business Machines |
5 * Corporation and others. All Rights Reserved. | 5 * Corporation and others. All Rights Reserved. |
6 * | 6 * |
7 ******************************************************************************* | 7 ******************************************************************************* |
8 * file name: uprops.cpp | 8 * file name: uprops.cpp |
9 * encoding: US-ASCII | 9 * encoding: US-ASCII |
10 * tab size: 8 (not used) | 10 * tab size: 8 (not used) |
11 * indentation:4 | 11 * indentation:4 |
12 * | 12 * |
13 * created on: 2002feb24 | 13 * created on: 2002feb24 |
14 * created by: Markus W. Scherer | 14 * created by: Markus W. Scherer |
15 * | 15 * |
16 * Implementations for mostly non-core Unicode character properties | 16 * Implementations for mostly non-core Unicode character properties |
17 * stored in uprops.icu. | 17 * stored in uprops.icu. |
18 * | 18 * |
19 * With the APIs implemented here, almost all properties files and | 19 * With the APIs implemented here, almost all properties files and |
20 * their associated implementation files are used from this file, | 20 * their associated implementation files are used from this file, |
21 * including those for normalization and case mappings. | 21 * including those for normalization and case mappings. |
22 */ | 22 */ |
23 | 23 |
24 #include "unicode/utypes.h" | 24 #include "unicode/utypes.h" |
25 #include "unicode/uchar.h" | 25 #include "unicode/uchar.h" |
26 #include "unicode/unorm2.h" | 26 #include "unicode/unorm2.h" |
27 #include "unicode/uscript.h" | 27 #include "unicode/uscript.h" |
28 #include "unicode/ustring.h" | 28 #include "unicode/ustring.h" |
29 #include "cstring.h" | 29 #include "cstring.h" |
30 #include "normalizer2impl.h" | 30 #include "normalizer2impl.h" |
31 #include "ucln_cmn.h" | |
32 #include "umutex.h" | 31 #include "umutex.h" |
33 #include "ubidi_props.h" | 32 #include "ubidi_props.h" |
34 #include "uprops.h" | 33 #include "uprops.h" |
35 #include "ucase.h" | 34 #include "ucase.h" |
36 #include "ustr_imp.h" | 35 #include "ustr_imp.h" |
37 | 36 |
38 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
39 | |
40 U_NAMESPACE_USE | 37 U_NAMESPACE_USE |
41 | 38 |
42 #define GET_BIDI_PROPS() ubidi_getSingleton() | 39 #define GET_BIDI_PROPS() ubidi_getSingleton() |
43 | 40 |
44 /* general properties API functions ----------------------------------------- */ | 41 /* general properties API functions ----------------------------------------- */ |
45 | 42 |
46 struct BinaryProperty; | 43 struct BinaryProperty; |
47 | 44 |
48 typedef UBool BinaryPropertyContains(const BinaryProperty &prop, UChar32 c, UPro
perty which); | 45 typedef UBool BinaryPropertyContains(const BinaryProperty &prop, UChar32 c, UPro
perty which); |
49 | 46 |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
102 #endif | 99 #endif |
103 | 100 |
104 #if UCONFIG_NO_NORMALIZATION | 101 #if UCONFIG_NO_NORMALIZATION |
105 static UBool changesWhenCasefolded(const BinaryProperty &, UChar32, UProperty) { | 102 static UBool changesWhenCasefolded(const BinaryProperty &, UChar32, UProperty) { |
106 return FALSE; | 103 return FALSE; |
107 } | 104 } |
108 #else | 105 #else |
109 static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UP
roperty /*which*/) { | 106 static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UP
roperty /*which*/) { |
110 UnicodeString nfd; | 107 UnicodeString nfd; |
111 UErrorCode errorCode=U_ZERO_ERROR; | 108 UErrorCode errorCode=U_ZERO_ERROR; |
112 const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode); | 109 const Normalizer2 *nfcNorm2=Normalizer2::getNFCInstance(errorCode); |
113 if(U_FAILURE(errorCode)) { | 110 if(U_FAILURE(errorCode)) { |
114 return FALSE; | 111 return FALSE; |
115 } | 112 } |
116 if(nfcNorm2->getDecomposition(c, nfd)) { | 113 if(nfcNorm2->getDecomposition(c, nfd)) { |
117 /* c has a decomposition */ | 114 /* c has a decomposition */ |
118 if(nfd.length()==1) { | 115 if(nfd.length()==1) { |
119 c=nfd[0]; /* single BMP code point */ | 116 c=nfd[0]; /* single BMP code point */ |
120 } else if(nfd.length()<=U16_MAX_LENGTH && | 117 } else if(nfd.length()<=U16_MAX_LENGTH && |
121 nfd.length()==U16_LENGTH(c=nfd.char32At(0)) | 118 nfd.length()==U16_LENGTH(c=nfd.char32At(0)) |
122 ) { | 119 ) { |
123 /* single supplementary code point */ | 120 /* single supplementary code point */ |
124 } else { | 121 } else { |
125 c=U_SENTINEL; | 122 c=U_SENTINEL; |
126 } | 123 } |
127 } else if(c<0) { | 124 } else if(c<0) { |
128 return FALSE; /* protect against bad input */ | 125 return FALSE; /* protect against bad input */ |
129 } | 126 } |
130 if(c>=0) { | 127 if(c>=0) { |
131 /* single code point */ | 128 /* single code point */ |
132 const UCaseProps *csp=ucase_getSingleton(); | 129 const UCaseProps *csp=ucase_getSingleton(); |
133 const UChar *resultString; | 130 const UChar *resultString; |
134 return (UBool)(ucase_toFullFolding(csp, c, &resultString, U_FOLD_CASE_DE
FAULT)>=0); | 131 return (UBool)(ucase_toFullFolding(csp, c, &resultString, U_FOLD_CASE_DE
FAULT)>=0); |
135 } else { | 132 } else { |
136 /* guess some large but stack-friendly capacity */ | 133 /* guess some large but stack-friendly capacity */ |
137 UChar dest[2*UCASE_MAX_STRING_LENGTH]; | 134 UChar dest[2*UCASE_MAX_STRING_LENGTH]; |
138 int32_t destLength; | 135 int32_t destLength; |
139 destLength=u_strFoldCase(dest, LENGTHOF(dest), | 136 destLength=u_strFoldCase(dest, UPRV_LENGTHOF(dest), |
140 nfd.getBuffer(), nfd.length(), | 137 nfd.getBuffer(), nfd.length(), |
141 U_FOLD_CASE_DEFAULT, &errorCode); | 138 U_FOLD_CASE_DEFAULT, &errorCode); |
142 return (UBool)(U_SUCCESS(errorCode) && | 139 return (UBool)(U_SUCCESS(errorCode) && |
143 0!=u_strCompare(nfd.getBuffer(), nfd.length(), | 140 0!=u_strCompare(nfd.getBuffer(), nfd.length(), |
144 dest, destLength, FALSE)); | 141 dest, destLength, FALSE)); |
145 } | 142 } |
146 } | 143 } |
147 #endif | 144 #endif |
148 | 145 |
149 #if UCONFIG_NO_NORMALIZATION | 146 #if UCONFIG_NO_NORMALIZATION |
(...skipping 225 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
375 U_HST_VOWEL_JAMO /* U_GCB_V */ | 372 U_HST_VOWEL_JAMO /* U_GCB_V */ |
376 /* | 373 /* |
377 * Omit GCB values beyond what we need for hst. | 374 * Omit GCB values beyond what we need for hst. |
378 * The code below checks for the array length. | 375 * The code below checks for the array length. |
379 */ | 376 */ |
380 }; | 377 }; |
381 | 378 |
382 static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UPr
operty /*which*/) { | 379 static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UPr
operty /*which*/) { |
383 /* see comments on gcbToHst[] above */ | 380 /* see comments on gcbToHst[] above */ |
384 int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_
GCB_SHIFT; | 381 int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_
GCB_SHIFT; |
385 if(gcb<LENGTHOF(gcbToHst)) { | 382 if(gcb<UPRV_LENGTHOF(gcbToHst)) { |
386 return gcbToHst[gcb]; | 383 return gcbToHst[gcb]; |
387 } else { | 384 } else { |
388 return U_HST_NOT_APPLICABLE; | 385 return U_HST_NOT_APPLICABLE; |
389 } | 386 } |
390 } | 387 } |
391 | 388 |
392 #if UCONFIG_NO_NORMALIZATION | 389 #if UCONFIG_NO_NORMALIZATION |
393 static int32_t getNormQuickCheck(const IntProperty &, UChar32, UProperty) { | 390 static int32_t getNormQuickCheck(const IntProperty &, UChar32, UProperty) { |
394 return 0; | 391 return 0; |
395 } | 392 } |
(...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
565 if(destCapacity<0 || (dest==NULL && destCapacity>0)) { | 562 if(destCapacity<0 || (dest==NULL && destCapacity>0)) { |
566 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | 563 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
567 return 0; | 564 return 0; |
568 } | 565 } |
569 // Compute the FC_NFKC_Closure on the fly: | 566 // Compute the FC_NFKC_Closure on the fly: |
570 // We have the API for complete coverage of Unicode properties, although | 567 // We have the API for complete coverage of Unicode properties, although |
571 // this value by itself is not useful via API. | 568 // this value by itself is not useful via API. |
572 // (What could be useful is a custom normalization table that combines | 569 // (What could be useful is a custom normalization table that combines |
573 // case folding and NFKC.) | 570 // case folding and NFKC.) |
574 // For the derivation, see Unicode's DerivedNormalizationProps.txt. | 571 // For the derivation, see Unicode's DerivedNormalizationProps.txt. |
575 const Normalizer2 *nfkc=Normalizer2Factory::getNFKCInstance(*pErrorCode); | 572 const Normalizer2 *nfkc=Normalizer2::getNFKCInstance(*pErrorCode); |
576 const UCaseProps *csp=ucase_getSingleton(); | 573 const UCaseProps *csp=ucase_getSingleton(); |
577 if(U_FAILURE(*pErrorCode)) { | 574 if(U_FAILURE(*pErrorCode)) { |
578 return 0; | 575 return 0; |
579 } | 576 } |
580 // first: b = NFKC(Fold(a)) | 577 // first: b = NFKC(Fold(a)) |
581 UnicodeString folded1String; | 578 UnicodeString folded1String; |
582 const UChar *folded1; | 579 const UChar *folded1; |
583 int32_t folded1Length=ucase_toFullFolding(csp, c, &folded1, U_FOLD_CASE_DEFA
ULT); | 580 int32_t folded1Length=ucase_toFullFolding(csp, c, &folded1, U_FOLD_CASE_DEFA
ULT); |
584 if(folded1Length<0) { | 581 if(folded1Length<0) { |
585 const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc); | 582 const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc); |
(...skipping 14 matching lines...) Expand all Loading... |
600 UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode); | 597 UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode); |
601 // if (c != b) add the mapping from a to c | 598 // if (c != b) add the mapping from a to c |
602 if(U_FAILURE(*pErrorCode) || kc1==kc2) { | 599 if(U_FAILURE(*pErrorCode) || kc1==kc2) { |
603 return u_terminateUChars(dest, destCapacity, 0, pErrorCode); | 600 return u_terminateUChars(dest, destCapacity, 0, pErrorCode); |
604 } else { | 601 } else { |
605 return kc2.extract(dest, destCapacity, *pErrorCode); | 602 return kc2.extract(dest, destCapacity, *pErrorCode); |
606 } | 603 } |
607 } | 604 } |
608 | 605 |
609 #endif | 606 #endif |
OLD | NEW |