OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * |
| 4 * Copyright (C) 2002-2010, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ******************************************************************************* |
| 8 * file name: uprops.cpp |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * created on: 2002feb24 |
| 14 * created by: Markus W. Scherer |
| 15 * |
| 16 * Implementations for mostly non-core Unicode character properties |
| 17 * stored in uprops.icu. |
| 18 * |
| 19 * With the APIs implemented here, almost all properties files and |
| 20 * their associated implementation files are used from this file, |
| 21 * including those for normalization and case mappings. |
| 22 */ |
| 23 |
| 24 #include "unicode/utypes.h" |
| 25 #include "unicode/uchar.h" |
| 26 #include "unicode/unorm2.h" |
| 27 #include "unicode/uscript.h" |
| 28 #include "unicode/ustring.h" |
| 29 #include "cstring.h" |
| 30 #include "normalizer2impl.h" |
| 31 #include "ucln_cmn.h" |
| 32 #include "umutex.h" |
| 33 #include "ubidi_props.h" |
| 34 #include "uprops.h" |
| 35 #include "ucase.h" |
| 36 #include "ustr_imp.h" |
| 37 |
| 38 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |
| 39 |
| 40 U_NAMESPACE_USE |
| 41 |
| 42 #define GET_BIDI_PROPS() ubidi_getSingleton() |
| 43 |
| 44 /* general properties API functions ----------------------------------------- */ |
| 45 |
| 46 struct BinaryProperty; |
| 47 |
| 48 typedef UBool BinaryPropertyContains(const BinaryProperty &prop, UChar32 c, UPro
perty which); |
| 49 |
| 50 struct BinaryProperty { |
| 51 int32_t column; // SRC_PROPSVEC column, or "source" if mask==0 |
| 52 uint32_t mask; |
| 53 BinaryPropertyContains *contains; |
| 54 }; |
| 55 |
| 56 static UBool defaultContains(const BinaryProperty &prop, UChar32 c, UProperty /*
which*/) { |
| 57 /* systematic, directly stored properties */ |
| 58 return (u_getUnicodeProperties(c, prop.column)&prop.mask)!=0; |
| 59 } |
| 60 |
| 61 static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32
c, UProperty which) { |
| 62 return ucase_hasBinaryProperty(c, which); |
| 63 } |
| 64 |
| 65 static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty
/*which*/) { |
| 66 return ubidi_isBidiControl(GET_BIDI_PROPS(), c); |
| 67 } |
| 68 |
| 69 static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*w
hich*/) { |
| 70 return ubidi_isMirrored(GET_BIDI_PROPS(), c); |
| 71 } |
| 72 |
| 73 static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty
/*which*/) { |
| 74 return ubidi_isJoinControl(GET_BIDI_PROPS(), c); |
| 75 } |
| 76 |
| 77 #if UCONFIG_NO_NORMALIZATION |
| 78 static UBool hasFullCompositionExclusion(const BinaryProperty &, UChar32, UPrope
rty) { |
| 79 return FALSE; |
| 80 } |
| 81 #else |
| 82 static UBool hasFullCompositionExclusion(const BinaryProperty &/*prop*/, UChar32
c, UProperty /*which*/) { |
| 83 // By definition, Full_Composition_Exclusion is the same as NFC_QC=No. |
| 84 UErrorCode errorCode=U_ZERO_ERROR; |
| 85 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); |
| 86 return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c)); |
| 87 } |
| 88 #endif |
| 89 |
| 90 // UCHAR_NF*_INERT properties |
| 91 #if UCONFIG_NO_NORMALIZATION |
| 92 static UBool isNormInert(const BinaryProperty &, UChar32, UProperty) { |
| 93 return FALSE; |
| 94 } |
| 95 #else |
| 96 static UBool isNormInert(const BinaryProperty &/*prop*/, UChar32 c, UProperty wh
ich) { |
| 97 UErrorCode errorCode=U_ZERO_ERROR; |
| 98 const Normalizer2 *norm2=Normalizer2Factory::getInstance( |
| 99 (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode); |
| 100 return U_SUCCESS(errorCode) && norm2->isInert(c); |
| 101 } |
| 102 #endif |
| 103 |
| 104 #if UCONFIG_NO_NORMALIZATION |
| 105 static UBool changesWhenCasefolded(const BinaryProperty &, UChar32, UProperty) { |
| 106 return FALSE; |
| 107 } |
| 108 #else |
| 109 static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UP
roperty /*which*/) { |
| 110 UnicodeString nfd; |
| 111 UErrorCode errorCode=U_ZERO_ERROR; |
| 112 const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode); |
| 113 if(U_FAILURE(errorCode)) { |
| 114 return FALSE; |
| 115 } |
| 116 if(nfcNorm2->getDecomposition(c, nfd)) { |
| 117 /* c has a decomposition */ |
| 118 if(nfd.length()==1) { |
| 119 c=nfd[0]; /* single BMP code point */ |
| 120 } else if(nfd.length()<=U16_MAX_LENGTH && |
| 121 nfd.length()==U16_LENGTH(c=nfd.char32At(0)) |
| 122 ) { |
| 123 /* single supplementary code point */ |
| 124 } else { |
| 125 c=U_SENTINEL; |
| 126 } |
| 127 } else if(c<0) { |
| 128 return FALSE; /* protect against bad input */ |
| 129 } |
| 130 if(c>=0) { |
| 131 /* single code point */ |
| 132 const UCaseProps *csp=ucase_getSingleton(); |
| 133 const UChar *resultString; |
| 134 return (UBool)(ucase_toFullFolding(csp, c, &resultString, U_FOLD_CASE_DE
FAULT)>=0); |
| 135 } else { |
| 136 /* guess some large but stack-friendly capacity */ |
| 137 UChar dest[2*UCASE_MAX_STRING_LENGTH]; |
| 138 int32_t destLength; |
| 139 destLength=u_strFoldCase(dest, LENGTHOF(dest), |
| 140 nfd.getBuffer(), nfd.length(), |
| 141 U_FOLD_CASE_DEFAULT, &errorCode); |
| 142 return (UBool)(U_SUCCESS(errorCode) && |
| 143 0!=u_strCompare(nfd.getBuffer(), nfd.length(), |
| 144 dest, destLength, FALSE)); |
| 145 } |
| 146 } |
| 147 #endif |
| 148 |
| 149 #if UCONFIG_NO_NORMALIZATION |
| 150 static UBool changesWhenNFKC_Casefolded(const BinaryProperty &, UChar32, UProper
ty) { |
| 151 return FALSE; |
| 152 } |
| 153 #else |
| 154 static UBool changesWhenNFKC_Casefolded(const BinaryProperty &/*prop*/, UChar32
c, UProperty /*which*/) { |
| 155 UErrorCode errorCode=U_ZERO_ERROR; |
| 156 const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode); |
| 157 if(U_FAILURE(errorCode)) { |
| 158 return FALSE; |
| 159 } |
| 160 UnicodeString src(c); |
| 161 UnicodeString dest; |
| 162 { |
| 163 // The ReorderingBuffer must be in a block because its destructor |
| 164 // needs to release dest's buffer before we look at its contents. |
| 165 ReorderingBuffer buffer(*kcf, dest); |
| 166 // Small destCapacity for NFKC_CF(c). |
| 167 if(buffer.init(5, errorCode)) { |
| 168 const UChar *srcArray=src.getBuffer(); |
| 169 kcf->compose(srcArray, srcArray+src.length(), FALSE, |
| 170 TRUE, buffer, errorCode); |
| 171 } |
| 172 } |
| 173 return U_SUCCESS(errorCode) && dest!=src; |
| 174 } |
| 175 #endif |
| 176 |
| 177 #if UCONFIG_NO_NORMALIZATION |
| 178 static UBool isCanonSegmentStarter(const BinaryProperty &, UChar32, UProperty) { |
| 179 return FALSE; |
| 180 } |
| 181 #else |
| 182 static UBool isCanonSegmentStarter(const BinaryProperty &/*prop*/, UChar32 c, UP
roperty /*which*/) { |
| 183 UErrorCode errorCode=U_ZERO_ERROR; |
| 184 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); |
| 185 return |
| 186 U_SUCCESS(errorCode) && impl->ensureCanonIterData(errorCode) && |
| 187 impl->isCanonSegmentStarter(c); |
| 188 } |
| 189 #endif |
| 190 |
| 191 static UBool isPOSIX_alnum(const BinaryProperty &/*prop*/, UChar32 c, UProperty
/*which*/) { |
| 192 return u_isalnumPOSIX(c); |
| 193 } |
| 194 |
| 195 static UBool isPOSIX_blank(const BinaryProperty &/*prop*/, UChar32 c, UProperty
/*which*/) { |
| 196 return u_isblank(c); |
| 197 } |
| 198 |
| 199 static UBool isPOSIX_graph(const BinaryProperty &/*prop*/, UChar32 c, UProperty
/*which*/) { |
| 200 return u_isgraphPOSIX(c); |
| 201 } |
| 202 |
| 203 static UBool isPOSIX_print(const BinaryProperty &/*prop*/, UChar32 c, UProperty
/*which*/) { |
| 204 return u_isprintPOSIX(c); |
| 205 } |
| 206 |
| 207 static UBool isPOSIX_xdigit(const BinaryProperty &/*prop*/, UChar32 c, UProperty
/*which*/) { |
| 208 return u_isxdigit(c); |
| 209 } |
| 210 |
| 211 static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={ |
| 212 /* |
| 213 * column and mask values for binary properties from u_getUnicodeProperties(
). |
| 214 * Must be in order of corresponding UProperty, |
| 215 * and there must be exactly one entry per binary UProperty. |
| 216 * |
| 217 * Properties with mask==0 and contains==NULL are handled in code. |
| 218 * For them, column is the UPropertySource value. |
| 219 */ |
| 220 { 1, U_MASK(UPROPS_ALPHABETIC), defaultContains }, |
| 221 { 1, U_MASK(UPROPS_ASCII_HEX_DIGIT), defaultContains }, |
| 222 { UPROPS_SRC_BIDI, 0, isBidiControl }, |
| 223 { UPROPS_SRC_BIDI, 0, isMirrored }, |
| 224 { 1, U_MASK(UPROPS_DASH), defaultContains }, |
| 225 { 1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT), defaultCont
ains }, |
| 226 { 1, U_MASK(UPROPS_DEPRECATED), defaultContains }, |
| 227 { 1, U_MASK(UPROPS_DIACRITIC), defaultContains }, |
| 228 { 1, U_MASK(UPROPS_EXTENDER), defaultContains }, |
| 229 { UPROPS_SRC_NFC, 0, hasFullCompositionExclusion }, |
| 230 { 1, U_MASK(UPROPS_GRAPHEME_BASE), defaultContains }, |
| 231 { 1, U_MASK(UPROPS_GRAPHEME_EXTEND), defaultContains }, |
| 232 { 1, U_MASK(UPROPS_GRAPHEME_LINK), defaultContains }, |
| 233 { 1, U_MASK(UPROPS_HEX_DIGIT), defaultContains }, |
| 234 { 1, U_MASK(UPROPS_HYPHEN), defaultContains }, |
| 235 { 1, U_MASK(UPROPS_ID_CONTINUE), defaultContains }, |
| 236 { 1, U_MASK(UPROPS_ID_START), defaultContains }, |
| 237 { 1, U_MASK(UPROPS_IDEOGRAPHIC), defaultContains }, |
| 238 { 1, U_MASK(UPROPS_IDS_BINARY_OPERATOR), defaultContains }, |
| 239 { 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR), defaultContains }, |
| 240 { UPROPS_SRC_BIDI, 0, isJoinControl }, |
| 241 { 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION), defaultContains
}, |
| 242 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_LOWERCASE |
| 243 { 1, U_MASK(UPROPS_MATH), defaultContains }, |
| 244 { 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT), defaultContains
}, |
| 245 { 1, U_MASK(UPROPS_QUOTATION_MARK), defaultContains }, |
| 246 { 1, U_MASK(UPROPS_RADICAL), defaultContains }, |
| 247 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_SOFT_DOTTED |
| 248 { 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION), defaultContains }, |
| 249 { 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH), defaultContains }, |
| 250 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_UPPERCASE |
| 251 { 1, U_MASK(UPROPS_WHITE_SPACE), defaultContains }, |
| 252 { 1, U_MASK(UPROPS_XID_CONTINUE), defaultContains }, |
| 253 { 1, U_MASK(UPROPS_XID_START), defaultContains }, |
| 254 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_SENSITIV
E |
| 255 { 1, U_MASK(UPROPS_S_TERM), defaultContains }, |
| 256 { 1, U_MASK(UPROPS_VARIATION_SELECTOR), defaultContains }, |
| 257 { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFD_INERT |
| 258 { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKD_INERT |
| 259 { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFC_INERT |
| 260 { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKC_INERT |
| 261 { UPROPS_SRC_NFC_CANON_ITER, 0, isCanonSegmentStarter }, |
| 262 { 1, U_MASK(UPROPS_PATTERN_SYNTAX), defaultContains }, |
| 263 { 1, U_MASK(UPROPS_PATTERN_WHITE_SPACE), defaultContains }, |
| 264 { UPROPS_SRC_CHAR_AND_PROPSVEC, 0, isPOSIX_alnum }, |
| 265 { UPROPS_SRC_CHAR, 0, isPOSIX_blank }, |
| 266 { UPROPS_SRC_CHAR, 0, isPOSIX_graph }, |
| 267 { UPROPS_SRC_CHAR, 0, isPOSIX_print }, |
| 268 { UPROPS_SRC_CHAR, 0, isPOSIX_xdigit }, |
| 269 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASED |
| 270 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_IGNORABL
E |
| 271 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_
LOWERCASED |
| 272 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_
UPPERCASED |
| 273 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_
TITLECASED |
| 274 { UPROPS_SRC_CASE_AND_NORM, 0, changesWhenCasefolded }, |
| 275 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_
CASEMAPPED |
| 276 { UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded } |
| 277 }; |
| 278 |
| 279 U_CAPI UBool U_EXPORT2 |
| 280 u_hasBinaryProperty(UChar32 c, UProperty which) { |
| 281 /* c is range-checked in the functions that are called from here */ |
| 282 if(which<UCHAR_BINARY_START || UCHAR_BINARY_LIMIT<=which) { |
| 283 /* not a known binary property */ |
| 284 return FALSE; |
| 285 } else { |
| 286 const BinaryProperty &prop=binProps[which]; |
| 287 return prop.contains(prop, c, which); |
| 288 } |
| 289 } |
| 290 |
| 291 #if !UCONFIG_NO_NORMALIZATION |
| 292 |
| 293 U_CAPI uint8_t U_EXPORT2 |
| 294 u_getCombiningClass(UChar32 c) { |
| 295 UErrorCode errorCode=U_ZERO_ERROR; |
| 296 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); |
| 297 if(U_SUCCESS(errorCode)) { |
| 298 return impl->getCC(impl->getNorm16(c)); |
| 299 } else { |
| 300 return 0; |
| 301 } |
| 302 } |
| 303 |
| 304 static uint16_t |
| 305 getFCD16(UChar32 c) { |
| 306 UErrorCode errorCode=U_ZERO_ERROR; |
| 307 const UTrie2 *trie=Normalizer2Factory::getFCDTrie(errorCode); |
| 308 if(U_SUCCESS(errorCode)) { |
| 309 return UTRIE2_GET16(trie, c); |
| 310 } else { |
| 311 return 0; |
| 312 } |
| 313 } |
| 314 |
| 315 #endif |
| 316 |
| 317 struct IntProperty; |
| 318 |
| 319 typedef int32_t IntPropertyGetValue(const IntProperty &prop, UChar32 c, UPropert
y which); |
| 320 typedef int32_t IntPropertyGetMaxValue(const IntProperty &prop, UProperty which)
; |
| 321 |
| 322 struct IntProperty { |
| 323 int32_t column; // SRC_PROPSVEC column, or "source" if mask==0 |
| 324 uint32_t mask; |
| 325 int32_t shift; // =maxValue if getMaxValueFromShift() is used |
| 326 IntPropertyGetValue *getValue; |
| 327 IntPropertyGetMaxValue *getMaxValue; |
| 328 }; |
| 329 |
| 330 static int32_t defaultGetValue(const IntProperty &prop, UChar32 c, UProperty /*w
hich*/) { |
| 331 /* systematic, directly stored properties */ |
| 332 return (int32_t)(u_getUnicodeProperties(c, prop.column)&prop.mask)>>prop.shi
ft; |
| 333 } |
| 334 |
| 335 static int32_t defaultGetMaxValue(const IntProperty &prop, UProperty /*which*/)
{ |
| 336 return (uprv_getMaxValues(prop.column)&prop.mask)>>prop.shift; |
| 337 } |
| 338 |
| 339 static int32_t getMaxValueFromShift(const IntProperty &prop, UProperty /*which*/
) { |
| 340 return prop.shift; |
| 341 } |
| 342 |
| 343 static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*
which*/) { |
| 344 return (int32_t)u_charDirection(c); |
| 345 } |
| 346 |
| 347 static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) { |
| 348 return ubidi_getMaxValue(GET_BIDI_PROPS(), which); |
| 349 } |
| 350 |
| 351 #if UCONFIG_NO_NORMALIZATION |
| 352 static int32_t getCombiningClass(const IntProperty &, UChar32, UProperty) { |
| 353 return 0; |
| 354 } |
| 355 #else |
| 356 static int32_t getCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProper
ty /*which*/) { |
| 357 return u_getCombiningClass(c); |
| 358 } |
| 359 #endif |
| 360 |
| 361 static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UPrope
rty /*which*/) { |
| 362 return (int32_t)u_charType(c); |
| 363 } |
| 364 |
| 365 static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty
/*which*/) { |
| 366 return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c); |
| 367 } |
| 368 |
| 369 static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty
/*which*/) { |
| 370 return ubidi_getJoiningType(GET_BIDI_PROPS(), c); |
| 371 } |
| 372 |
| 373 static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty
/*which*/) { |
| 374 int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getUnicodeProperties(c, -1)); |
| 375 return UPROPS_NTV_GET_TYPE(ntv); |
| 376 } |
| 377 |
| 378 static int32_t getScript(const IntProperty &/*prop*/, UChar32 c, UProperty /*whi
ch*/) { |
| 379 UErrorCode errorCode=U_ZERO_ERROR; |
| 380 return (int32_t)uscript_getScript(c, &errorCode); |
| 381 } |
| 382 |
| 383 /* |
| 384 * Map some of the Grapheme Cluster Break values to Hangul Syllable Types. |
| 385 * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Bre
ak. |
| 386 */ |
| 387 static const UHangulSyllableType gcbToHst[]={ |
| 388 U_HST_NOT_APPLICABLE, /* U_GCB_OTHER */ |
| 389 U_HST_NOT_APPLICABLE, /* U_GCB_CONTROL */ |
| 390 U_HST_NOT_APPLICABLE, /* U_GCB_CR */ |
| 391 U_HST_NOT_APPLICABLE, /* U_GCB_EXTEND */ |
| 392 U_HST_LEADING_JAMO, /* U_GCB_L */ |
| 393 U_HST_NOT_APPLICABLE, /* U_GCB_LF */ |
| 394 U_HST_LV_SYLLABLE, /* U_GCB_LV */ |
| 395 U_HST_LVT_SYLLABLE, /* U_GCB_LVT */ |
| 396 U_HST_TRAILING_JAMO, /* U_GCB_T */ |
| 397 U_HST_VOWEL_JAMO /* U_GCB_V */ |
| 398 /* |
| 399 * Omit GCB values beyond what we need for hst. |
| 400 * The code below checks for the array length. |
| 401 */ |
| 402 }; |
| 403 |
| 404 static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UPr
operty /*which*/) { |
| 405 /* see comments on gcbToHst[] above */ |
| 406 int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_
GCB_SHIFT; |
| 407 if(gcb<LENGTHOF(gcbToHst)) { |
| 408 return gcbToHst[gcb]; |
| 409 } else { |
| 410 return U_HST_NOT_APPLICABLE; |
| 411 } |
| 412 } |
| 413 |
| 414 #if UCONFIG_NO_NORMALIZATION |
| 415 static int32_t getNormQuickCheck(const IntProperty &, UChar32, UProperty) { |
| 416 return 0; |
| 417 } |
| 418 #else |
| 419 static int32_t getNormQuickCheck(const IntProperty &/*prop*/, UChar32 c, UProper
ty which) { |
| 420 return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_
QUICK_CHECK+UNORM_NFD)); |
| 421 } |
| 422 #endif |
| 423 |
| 424 #if UCONFIG_NO_NORMALIZATION |
| 425 static int32_t getLeadCombiningClass(const IntProperty &, UChar32, UProperty) { |
| 426 return 0; |
| 427 } |
| 428 #else |
| 429 static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UPr
operty /*which*/) { |
| 430 return getFCD16(c)>>8; |
| 431 } |
| 432 #endif |
| 433 |
| 434 #if UCONFIG_NO_NORMALIZATION |
| 435 static int32_t getTrailCombiningClass(const IntProperty &, UChar32, UProperty) { |
| 436 return 0; |
| 437 } |
| 438 #else |
| 439 static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UP
roperty /*which*/) { |
| 440 return getFCD16(c)&0xff; |
| 441 } |
| 442 #endif |
| 443 |
| 444 static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={ |
| 445 /* |
| 446 * column, mask and shift values for int-value properties from u_getUnicodeP
roperties(). |
| 447 * Must be in order of corresponding UProperty, |
| 448 * and there must be exactly one entry per int UProperty. |
| 449 * |
| 450 * Properties with mask==0 and getValue==NULL are handled in code. |
| 451 * For them, column is the UPropertySource value. |
| 452 */ |
| 453 { UPROPS_SRC_BIDI, 0, 0, getBiDiClass, biDiGe
tMaxValue }, |
| 454 { 0, UPROPS_BLOCK_MASK, UPROPS_BLOCK_SHIFT, defaultGetValue,
defaultGetMaxValue }, |
| 455 { UPROPS_SRC_NFC, 0, 0xff, getCombiningClass, g
etMaxValueFromShift }, |
| 456 { 2, UPROPS_DT_MASK, 0, defaultGetValue, def
aultGetMaxValue }, |
| 457 { 0, UPROPS_EA_MASK, UPROPS_EA_SHIFT, defaultGetValue, def
aultGetMaxValue }, |
| 458 { UPROPS_SRC_CHAR, 0, (int32_t)U_CHAR_CATEGORY_COUNT-1,getGeneralCategory,
getMaxValueFromShift }, |
| 459 { UPROPS_SRC_BIDI, 0, 0, getJoiningGroup, biD
iGetMaxValue }, |
| 460 { UPROPS_SRC_BIDI, 0, 0, getJoiningType, biDi
GetMaxValue }, |
| 461 { 2, UPROPS_LB_MASK, UPROPS_LB_SHIFT, defaultGetValue, def
aultGetMaxValue }, |
| 462 { UPROPS_SRC_CHAR, 0, (int32_t)U_NT_COUNT-1, getNumericType, getM
axValueFromShift }, |
| 463 { 0, UPROPS_SCRIPT_MASK, 0, getScript, defaultGe
tMaxValue }, |
| 464 { UPROPS_SRC_PROPSVEC, 0, (int32_t)U_HST_COUNT-1, getHangulSyllableTyp
e, getMaxValueFromShift }, |
| 465 // UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" |
| 466 { UPROPS_SRC_NFC, 0, (int32_t)UNORM_YES, getNormQuickCheck, g
etMaxValueFromShift }, |
| 467 // UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" |
| 468 { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_YES, getNormQuickCheck, g
etMaxValueFromShift }, |
| 469 // UCHAR_NFC_QUICK_CHECK: max=2=MAYBE |
| 470 { UPROPS_SRC_NFC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, g
etMaxValueFromShift }, |
| 471 // UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE |
| 472 { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, g
etMaxValueFromShift }, |
| 473 { UPROPS_SRC_NFC, 0, 0xff, getLeadCombiningClas
s, getMaxValueFromShift }, |
| 474 { UPROPS_SRC_NFC, 0, 0xff, getTrailCombiningCla
ss, getMaxValueFromShift }, |
| 475 { 2, UPROPS_GCB_MASK, UPROPS_GCB_SHIFT, defaultGetValue, def
aultGetMaxValue }, |
| 476 { 2, UPROPS_SB_MASK, UPROPS_SB_SHIFT, defaultGetValue, def
aultGetMaxValue }, |
| 477 { 2, UPROPS_WB_MASK, UPROPS_WB_SHIFT, defaultGetValue, def
aultGetMaxValue } |
| 478 }; |
| 479 |
| 480 U_CAPI int32_t U_EXPORT2 |
| 481 u_getIntPropertyValue(UChar32 c, UProperty which) { |
| 482 if(which<UCHAR_INT_START) { |
| 483 if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) { |
| 484 const BinaryProperty &prop=binProps[which]; |
| 485 return prop.contains(prop, c, which); |
| 486 } |
| 487 } else if(which<UCHAR_INT_LIMIT) { |
| 488 const IntProperty &prop=intProps[which-UCHAR_INT_START]; |
| 489 return prop.getValue(prop, c, which); |
| 490 } else if(which==UCHAR_GENERAL_CATEGORY_MASK) { |
| 491 return U_MASK(u_charType(c)); |
| 492 } |
| 493 return 0; // undefined |
| 494 } |
| 495 |
| 496 U_CAPI int32_t U_EXPORT2 |
| 497 u_getIntPropertyMinValue(UProperty /*which*/) { |
| 498 return 0; /* all binary/enum/int properties have a minimum value of 0 */ |
| 499 } |
| 500 |
| 501 U_CAPI int32_t U_EXPORT2 |
| 502 u_getIntPropertyMaxValue(UProperty which) { |
| 503 if(which<UCHAR_INT_START) { |
| 504 if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) { |
| 505 return 1; // maximum TRUE for all binary properties |
| 506 } |
| 507 } else if(which<UCHAR_INT_LIMIT) { |
| 508 const IntProperty &prop=intProps[which-UCHAR_INT_START]; |
| 509 return prop.getMaxValue(prop, which); |
| 510 } |
| 511 return -1; // undefined |
| 512 } |
| 513 |
| 514 U_CFUNC UPropertySource U_EXPORT2 |
| 515 uprops_getSource(UProperty which) { |
| 516 if(which<UCHAR_BINARY_START) { |
| 517 return UPROPS_SRC_NONE; /* undefined */ |
| 518 } else if(which<UCHAR_BINARY_LIMIT) { |
| 519 const BinaryProperty &prop=binProps[which]; |
| 520 if(prop.mask!=0) { |
| 521 return UPROPS_SRC_PROPSVEC; |
| 522 } else { |
| 523 return (UPropertySource)prop.column; |
| 524 } |
| 525 } else if(which<UCHAR_INT_START) { |
| 526 return UPROPS_SRC_NONE; /* undefined */ |
| 527 } else if(which<UCHAR_INT_LIMIT) { |
| 528 const IntProperty &prop=intProps[which-UCHAR_INT_START]; |
| 529 if(prop.mask!=0) { |
| 530 return UPROPS_SRC_PROPSVEC; |
| 531 } else { |
| 532 return (UPropertySource)prop.column; |
| 533 } |
| 534 } else if(which<UCHAR_STRING_START) { |
| 535 switch(which) { |
| 536 case UCHAR_GENERAL_CATEGORY_MASK: |
| 537 case UCHAR_NUMERIC_VALUE: |
| 538 return UPROPS_SRC_CHAR; |
| 539 |
| 540 default: |
| 541 return UPROPS_SRC_NONE; |
| 542 } |
| 543 } else if(which<UCHAR_STRING_LIMIT) { |
| 544 switch(which) { |
| 545 case UCHAR_AGE: |
| 546 return UPROPS_SRC_PROPSVEC; |
| 547 |
| 548 case UCHAR_BIDI_MIRRORING_GLYPH: |
| 549 return UPROPS_SRC_BIDI; |
| 550 |
| 551 case UCHAR_CASE_FOLDING: |
| 552 case UCHAR_LOWERCASE_MAPPING: |
| 553 case UCHAR_SIMPLE_CASE_FOLDING: |
| 554 case UCHAR_SIMPLE_LOWERCASE_MAPPING: |
| 555 case UCHAR_SIMPLE_TITLECASE_MAPPING: |
| 556 case UCHAR_SIMPLE_UPPERCASE_MAPPING: |
| 557 case UCHAR_TITLECASE_MAPPING: |
| 558 case UCHAR_UPPERCASE_MAPPING: |
| 559 return UPROPS_SRC_CASE; |
| 560 |
| 561 case UCHAR_ISO_COMMENT: |
| 562 case UCHAR_NAME: |
| 563 case UCHAR_UNICODE_1_NAME: |
| 564 return UPROPS_SRC_NAMES; |
| 565 |
| 566 default: |
| 567 return UPROPS_SRC_NONE; |
| 568 } |
| 569 } else { |
| 570 switch(which) { |
| 571 case UCHAR_SCRIPT_EXTENSIONS: |
| 572 return UPROPS_SRC_PROPSVEC; |
| 573 default: |
| 574 return UPROPS_SRC_NONE; /* undefined */ |
| 575 } |
| 576 } |
| 577 } |
| 578 |
| 579 #if !UCONFIG_NO_NORMALIZATION |
| 580 |
| 581 U_CAPI int32_t U_EXPORT2 |
| 582 u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *p
ErrorCode) { |
| 583 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
| 584 return 0; |
| 585 } |
| 586 if(destCapacity<0 || (dest==NULL && destCapacity>0)) { |
| 587 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 588 return 0; |
| 589 } |
| 590 // Compute the FC_NFKC_Closure on the fly: |
| 591 // We have the API for complete coverage of Unicode properties, although |
| 592 // this value by itself is not useful via API. |
| 593 // (What could be useful is a custom normalization table that combines |
| 594 // case folding and NFKC.) |
| 595 // For the derivation, see Unicode's DerivedNormalizationProps.txt. |
| 596 const Normalizer2 *nfkc=Normalizer2Factory::getNFKCInstance(*pErrorCode); |
| 597 const UCaseProps *csp=ucase_getSingleton(); |
| 598 if(U_FAILURE(*pErrorCode)) { |
| 599 return 0; |
| 600 } |
| 601 // first: b = NFKC(Fold(a)) |
| 602 UnicodeString folded1String; |
| 603 const UChar *folded1; |
| 604 int32_t folded1Length=ucase_toFullFolding(csp, c, &folded1, U_FOLD_CASE_DEFA
ULT); |
| 605 if(folded1Length<0) { |
| 606 const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc); |
| 607 if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) { |
| 608 return u_terminateUChars(dest, destCapacity, 0, pErrorCode); // c d
oes not change at all under CaseFolding+NFKC |
| 609 } |
| 610 folded1String.setTo(c); |
| 611 } else { |
| 612 if(folded1Length>UCASE_MAX_STRING_LENGTH) { |
| 613 folded1String.setTo(folded1Length); |
| 614 } else { |
| 615 folded1String.setTo(FALSE, folded1, folded1Length); |
| 616 } |
| 617 } |
| 618 UnicodeString kc1=nfkc->normalize(folded1String, *pErrorCode); |
| 619 // second: c = NFKC(Fold(b)) |
| 620 UnicodeString folded2String(kc1); |
| 621 UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode); |
| 622 // if (c != b) add the mapping from a to c |
| 623 if(U_FAILURE(*pErrorCode) || kc1==kc2) { |
| 624 return u_terminateUChars(dest, destCapacity, 0, pErrorCode); |
| 625 } else { |
| 626 return kc2.extract(dest, destCapacity, *pErrorCode); |
| 627 } |
| 628 } |
| 629 |
| 630 #endif |
OLD | NEW |