OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * |
| 4 * Copyright (C) 2009-2010, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ******************************************************************************* |
| 8 * file name: normalizer2.cpp |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * created on: 2009nov22 |
| 14 * created by: Markus W. Scherer |
| 15 */ |
| 16 |
| 17 #include "unicode/utypes.h" |
| 18 |
| 19 #if !UCONFIG_NO_NORMALIZATION |
| 20 |
| 21 #include "unicode/localpointer.h" |
| 22 #include "unicode/normalizer2.h" |
| 23 #include "unicode/unistr.h" |
| 24 #include "unicode/unorm.h" |
| 25 #include "cpputils.h" |
| 26 #include "cstring.h" |
| 27 #include "mutex.h" |
| 28 #include "normalizer2impl.h" |
| 29 #include "ucln_cmn.h" |
| 30 #include "uhash.h" |
| 31 |
| 32 U_NAMESPACE_BEGIN |
| 33 |
| 34 // Public API dispatch via Normalizer2 subclasses -------------------------- *** |
| 35 |
| 36 // Normalizer2 implementation for the old UNORM_NONE. |
| 37 class NoopNormalizer2 : public Normalizer2 { |
| 38 virtual UnicodeString & |
| 39 normalize(const UnicodeString &src, |
| 40 UnicodeString &dest, |
| 41 UErrorCode &errorCode) const { |
| 42 if(U_SUCCESS(errorCode)) { |
| 43 if(&dest!=&src) { |
| 44 dest=src; |
| 45 } else { |
| 46 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 47 } |
| 48 } |
| 49 return dest; |
| 50 } |
| 51 virtual UnicodeString & |
| 52 normalizeSecondAndAppend(UnicodeString &first, |
| 53 const UnicodeString &second, |
| 54 UErrorCode &errorCode) const { |
| 55 if(U_SUCCESS(errorCode)) { |
| 56 if(&first!=&second) { |
| 57 first.append(second); |
| 58 } else { |
| 59 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 60 } |
| 61 } |
| 62 return first; |
| 63 } |
| 64 virtual UnicodeString & |
| 65 append(UnicodeString &first, |
| 66 const UnicodeString &second, |
| 67 UErrorCode &errorCode) const { |
| 68 if(U_SUCCESS(errorCode)) { |
| 69 if(&first!=&second) { |
| 70 first.append(second); |
| 71 } else { |
| 72 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 73 } |
| 74 } |
| 75 return first; |
| 76 } |
| 77 virtual UBool |
| 78 getDecomposition(UChar32, UnicodeString &) const { |
| 79 return FALSE; |
| 80 } |
| 81 virtual UBool |
| 82 isNormalized(const UnicodeString &, UErrorCode &) const { |
| 83 return TRUE; |
| 84 } |
| 85 virtual UNormalizationCheckResult |
| 86 quickCheck(const UnicodeString &, UErrorCode &) const { |
| 87 return UNORM_YES; |
| 88 } |
| 89 virtual int32_t |
| 90 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const { |
| 91 return s.length(); |
| 92 } |
| 93 virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; } |
| 94 virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; } |
| 95 virtual UBool isInert(UChar32) const { return TRUE; } |
| 96 }; |
| 97 |
| 98 // Intermediate class: |
| 99 // Has Normalizer2Impl and does boilerplate argument checking and setup. |
| 100 class Normalizer2WithImpl : public Normalizer2 { |
| 101 public: |
| 102 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} |
| 103 |
| 104 // normalize |
| 105 virtual UnicodeString & |
| 106 normalize(const UnicodeString &src, |
| 107 UnicodeString &dest, |
| 108 UErrorCode &errorCode) const { |
| 109 if(U_FAILURE(errorCode)) { |
| 110 dest.setToBogus(); |
| 111 return dest; |
| 112 } |
| 113 const UChar *sArray=src.getBuffer(); |
| 114 if(&dest==&src || sArray==NULL) { |
| 115 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 116 dest.setToBogus(); |
| 117 return dest; |
| 118 } |
| 119 dest.remove(); |
| 120 ReorderingBuffer buffer(impl, dest); |
| 121 if(buffer.init(src.length(), errorCode)) { |
| 122 normalize(sArray, sArray+src.length(), buffer, errorCode); |
| 123 } |
| 124 return dest; |
| 125 } |
| 126 virtual void |
| 127 normalize(const UChar *src, const UChar *limit, |
| 128 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; |
| 129 |
| 130 // normalize and append |
| 131 virtual UnicodeString & |
| 132 normalizeSecondAndAppend(UnicodeString &first, |
| 133 const UnicodeString &second, |
| 134 UErrorCode &errorCode) const { |
| 135 return normalizeSecondAndAppend(first, second, TRUE, errorCode); |
| 136 } |
| 137 virtual UnicodeString & |
| 138 append(UnicodeString &first, |
| 139 const UnicodeString &second, |
| 140 UErrorCode &errorCode) const { |
| 141 return normalizeSecondAndAppend(first, second, FALSE, errorCode); |
| 142 } |
| 143 UnicodeString & |
| 144 normalizeSecondAndAppend(UnicodeString &first, |
| 145 const UnicodeString &second, |
| 146 UBool doNormalize, |
| 147 UErrorCode &errorCode) const { |
| 148 uprv_checkCanGetBuffer(first, errorCode); |
| 149 if(U_FAILURE(errorCode)) { |
| 150 return first; |
| 151 } |
| 152 const UChar *secondArray=second.getBuffer(); |
| 153 if(&first==&second || secondArray==NULL) { |
| 154 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 155 return first; |
| 156 } |
| 157 ReorderingBuffer buffer(impl, first); |
| 158 if(buffer.init(first.length()+second.length(), errorCode)) { |
| 159 normalizeAndAppend(secondArray, secondArray+second.length(), doNorma
lize, |
| 160 buffer, errorCode); |
| 161 } |
| 162 return first; |
| 163 } |
| 164 virtual void |
| 165 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
| 166 ReorderingBuffer &buffer, UErrorCode &errorCode) const =
0; |
| 167 virtual UBool |
| 168 getDecomposition(UChar32 c, UnicodeString &decomposition) const { |
| 169 UChar buffer[4]; |
| 170 int32_t length; |
| 171 const UChar *d=impl.getDecomposition(c, buffer, length); |
| 172 if(d==NULL) { |
| 173 return FALSE; |
| 174 } |
| 175 if(d==buffer) { |
| 176 decomposition.setTo(buffer, length); // copy the string (Jamos from
Hangul syllable c) |
| 177 } else { |
| 178 decomposition.setTo(FALSE, d, length); // read-only alias |
| 179 } |
| 180 return TRUE; |
| 181 } |
| 182 |
| 183 // quick checks |
| 184 virtual UBool |
| 185 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { |
| 186 if(U_FAILURE(errorCode)) { |
| 187 return FALSE; |
| 188 } |
| 189 const UChar *sArray=s.getBuffer(); |
| 190 if(sArray==NULL) { |
| 191 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 192 return FALSE; |
| 193 } |
| 194 const UChar *sLimit=sArray+s.length(); |
| 195 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); |
| 196 } |
| 197 virtual UNormalizationCheckResult |
| 198 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { |
| 199 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNO
RM_NO; |
| 200 } |
| 201 virtual int32_t |
| 202 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { |
| 203 if(U_FAILURE(errorCode)) { |
| 204 return 0; |
| 205 } |
| 206 const UChar *sArray=s.getBuffer(); |
| 207 if(sArray==NULL) { |
| 208 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 209 return 0; |
| 210 } |
| 211 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)
-sArray); |
| 212 } |
| 213 virtual const UChar * |
| 214 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCod
e) const = 0; |
| 215 |
| 216 virtual UNormalizationCheckResult getQuickCheck(UChar32) const { |
| 217 return UNORM_YES; |
| 218 } |
| 219 |
| 220 const Normalizer2Impl &impl; |
| 221 }; |
| 222 |
| 223 class DecomposeNormalizer2 : public Normalizer2WithImpl { |
| 224 public: |
| 225 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} |
| 226 |
| 227 private: |
| 228 virtual void |
| 229 normalize(const UChar *src, const UChar *limit, |
| 230 ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| 231 impl.decompose(src, limit, &buffer, errorCode); |
| 232 } |
| 233 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base cl
ass function. |
| 234 virtual void |
| 235 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
| 236 ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| 237 impl.decomposeAndAppend(src, limit, doNormalize, buffer, errorCode); |
| 238 } |
| 239 virtual const UChar * |
| 240 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCod
e) const { |
| 241 return impl.decompose(src, limit, NULL, errorCode); |
| 242 } |
| 243 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding
base class function. |
| 244 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { |
| 245 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; |
| 246 } |
| 247 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoun
dary(c, TRUE); } |
| 248 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBound
ary(c, FALSE); } |
| 249 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } |
| 250 }; |
| 251 |
| 252 class ComposeNormalizer2 : public Normalizer2WithImpl { |
| 253 public: |
| 254 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : |
| 255 Normalizer2WithImpl(ni), onlyContiguous(fcc) {} |
| 256 |
| 257 private: |
| 258 virtual void |
| 259 normalize(const UChar *src, const UChar *limit, |
| 260 ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| 261 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); |
| 262 } |
| 263 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base cl
ass function. |
| 264 virtual void |
| 265 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
| 266 ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| 267 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, buffer, e
rrorCode); |
| 268 } |
| 269 |
| 270 virtual UBool |
| 271 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { |
| 272 if(U_FAILURE(errorCode)) { |
| 273 return FALSE; |
| 274 } |
| 275 const UChar *sArray=s.getBuffer(); |
| 276 if(sArray==NULL) { |
| 277 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 278 return FALSE; |
| 279 } |
| 280 UnicodeString temp; |
| 281 ReorderingBuffer buffer(impl, temp); |
| 282 if(!buffer.init(5, errorCode)) { // small destCapacity for substring no
rmalization |
| 283 return FALSE; |
| 284 } |
| 285 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, bu
ffer, errorCode); |
| 286 } |
| 287 virtual UNormalizationCheckResult |
| 288 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { |
| 289 if(U_FAILURE(errorCode)) { |
| 290 return UNORM_MAYBE; |
| 291 } |
| 292 const UChar *sArray=s.getBuffer(); |
| 293 if(sArray==NULL) { |
| 294 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 295 return UNORM_MAYBE; |
| 296 } |
| 297 UNormalizationCheckResult qcResult=UNORM_YES; |
| 298 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcRes
ult); |
| 299 return qcResult; |
| 300 } |
| 301 virtual const UChar * |
| 302 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const
{ |
| 303 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); |
| 304 } |
| 305 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding
base class function. |
| 306 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { |
| 307 return impl.getCompQuickCheck(impl.getNorm16(c)); |
| 308 } |
| 309 virtual UBool hasBoundaryBefore(UChar32 c) const { |
| 310 return impl.hasCompBoundaryBefore(c); |
| 311 } |
| 312 virtual UBool hasBoundaryAfter(UChar32 c) const { |
| 313 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); |
| 314 } |
| 315 virtual UBool isInert(UChar32 c) const { |
| 316 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); |
| 317 } |
| 318 |
| 319 const UBool onlyContiguous; |
| 320 }; |
| 321 |
| 322 class FCDNormalizer2 : public Normalizer2WithImpl { |
| 323 public: |
| 324 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} |
| 325 |
| 326 private: |
| 327 virtual void |
| 328 normalize(const UChar *src, const UChar *limit, |
| 329 ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| 330 impl.makeFCD(src, limit, &buffer, errorCode); |
| 331 } |
| 332 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base cl
ass function. |
| 333 virtual void |
| 334 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
| 335 ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| 336 impl.makeFCDAndAppend(src, limit, doNormalize, buffer, errorCode); |
| 337 } |
| 338 virtual const UChar * |
| 339 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCod
e) const { |
| 340 return impl.makeFCD(src, limit, NULL, errorCode); |
| 341 } |
| 342 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding
base class function. |
| 343 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundar
yBefore(c); } |
| 344 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundary
After(c); } |
| 345 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } |
| 346 }; |
| 347 |
| 348 // instance cache ---------------------------------------------------------- *** |
| 349 |
| 350 struct Norm2AllModes : public UMemory { |
| 351 static Norm2AllModes *createInstance(const char *packageName, |
| 352 const char *name, |
| 353 UErrorCode &errorCode); |
| 354 Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE
) {} |
| 355 |
| 356 Normalizer2Impl impl; |
| 357 ComposeNormalizer2 comp; |
| 358 DecomposeNormalizer2 decomp; |
| 359 FCDNormalizer2 fcd; |
| 360 ComposeNormalizer2 fcc; |
| 361 }; |
| 362 |
| 363 Norm2AllModes * |
| 364 Norm2AllModes::createInstance(const char *packageName, |
| 365 const char *name, |
| 366 UErrorCode &errorCode) { |
| 367 if(U_FAILURE(errorCode)) { |
| 368 return NULL; |
| 369 } |
| 370 LocalPointer<Norm2AllModes> allModes(new Norm2AllModes); |
| 371 if(allModes.isNull()) { |
| 372 errorCode=U_MEMORY_ALLOCATION_ERROR; |
| 373 return NULL; |
| 374 } |
| 375 allModes->impl.load(packageName, name, errorCode); |
| 376 return U_SUCCESS(errorCode) ? allModes.orphan() : NULL; |
| 377 } |
| 378 |
| 379 U_CDECL_BEGIN |
| 380 static UBool U_CALLCONV uprv_normalizer2_cleanup(); |
| 381 U_CDECL_END |
| 382 |
| 383 class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> { |
| 384 public: |
| 385 Norm2AllModesSingleton(TriStateSingleton &s, const char *n) : |
| 386 TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {} |
| 387 Norm2AllModes *getInstance(UErrorCode &errorCode) { |
| 388 return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstan
ce, name, errorCode); |
| 389 } |
| 390 private: |
| 391 static void *createInstance(const void *context, UErrorCode &errorCode) { |
| 392 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cl
eanup); |
| 393 return Norm2AllModes::createInstance(NULL, (const char *)context, errorC
ode); |
| 394 } |
| 395 |
| 396 const char *name; |
| 397 }; |
| 398 |
| 399 STATIC_TRI_STATE_SINGLETON(nfcSingleton); |
| 400 STATIC_TRI_STATE_SINGLETON(nfkcSingleton); |
| 401 STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton); |
| 402 |
| 403 class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> { |
| 404 public: |
| 405 Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s)
{} |
| 406 Normalizer2 *getInstance(UErrorCode &errorCode) { |
| 407 return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance,
NULL, errorCode); |
| 408 } |
| 409 private: |
| 410 static void *createInstance(const void *, UErrorCode &errorCode) { |
| 411 Normalizer2 *noop=new NoopNormalizer2; |
| 412 if(noop==NULL) { |
| 413 errorCode=U_MEMORY_ALLOCATION_ERROR; |
| 414 } |
| 415 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cl
eanup); |
| 416 return noop; |
| 417 } |
| 418 }; |
| 419 |
| 420 STATIC_SIMPLE_SINGLETON(noopSingleton); |
| 421 |
| 422 static UHashtable *cache=NULL; |
| 423 |
| 424 U_CDECL_BEGIN |
| 425 |
| 426 static void U_CALLCONV deleteNorm2AllModes(void *allModes) { |
| 427 delete (Norm2AllModes *)allModes; |
| 428 } |
| 429 |
| 430 static UBool U_CALLCONV uprv_normalizer2_cleanup() { |
| 431 Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance(); |
| 432 Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance(); |
| 433 Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance(); |
| 434 Norm2Singleton(noopSingleton).deleteInstance(); |
| 435 uhash_close(cache); |
| 436 cache=NULL; |
| 437 return TRUE; |
| 438 } |
| 439 |
| 440 U_CDECL_END |
| 441 |
| 442 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) { |
| 443 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInsta
nce(errorCode); |
| 444 return allModes!=NULL ? &allModes->comp : NULL; |
| 445 } |
| 446 |
| 447 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) { |
| 448 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInsta
nce(errorCode); |
| 449 return allModes!=NULL ? &allModes->decomp : NULL; |
| 450 } |
| 451 |
| 452 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { |
| 453 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInsta
nce(errorCode); |
| 454 if(allModes!=NULL) { |
| 455 allModes->impl.getFCDTrie(errorCode); |
| 456 return &allModes->fcd; |
| 457 } else { |
| 458 return NULL; |
| 459 } |
| 460 } |
| 461 |
| 462 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { |
| 463 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInsta
nce(errorCode); |
| 464 return allModes!=NULL ? &allModes->fcc : NULL; |
| 465 } |
| 466 |
| 467 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) { |
| 468 Norm2AllModes *allModes= |
| 469 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); |
| 470 return allModes!=NULL ? &allModes->comp : NULL; |
| 471 } |
| 472 |
| 473 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) { |
| 474 Norm2AllModes *allModes= |
| 475 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); |
| 476 return allModes!=NULL ? &allModes->decomp : NULL; |
| 477 } |
| 478 |
| 479 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode)
{ |
| 480 Norm2AllModes *allModes= |
| 481 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCod
e); |
| 482 return allModes!=NULL ? &allModes->comp : NULL; |
| 483 } |
| 484 |
| 485 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { |
| 486 return Norm2Singleton(noopSingleton).getInstance(errorCode); |
| 487 } |
| 488 |
| 489 const Normalizer2 * |
| 490 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode)
{ |
| 491 if(U_FAILURE(errorCode)) { |
| 492 return NULL; |
| 493 } |
| 494 switch(mode) { |
| 495 case UNORM_NFD: |
| 496 return getNFDInstance(errorCode); |
| 497 case UNORM_NFKD: |
| 498 return getNFKDInstance(errorCode); |
| 499 case UNORM_NFC: |
| 500 return getNFCInstance(errorCode); |
| 501 case UNORM_NFKC: |
| 502 return getNFKCInstance(errorCode); |
| 503 case UNORM_FCD: |
| 504 return getFCDInstance(errorCode); |
| 505 default: // UNORM_NONE |
| 506 return getNoopInstance(errorCode); |
| 507 } |
| 508 } |
| 509 |
| 510 const Normalizer2Impl * |
| 511 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { |
| 512 Norm2AllModes *allModes= |
| 513 Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); |
| 514 return allModes!=NULL ? &allModes->impl : NULL; |
| 515 } |
| 516 |
| 517 const Normalizer2Impl * |
| 518 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { |
| 519 Norm2AllModes *allModes= |
| 520 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); |
| 521 return allModes!=NULL ? &allModes->impl : NULL; |
| 522 } |
| 523 |
| 524 const Normalizer2Impl * |
| 525 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { |
| 526 Norm2AllModes *allModes= |
| 527 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCod
e); |
| 528 return allModes!=NULL ? &allModes->impl : NULL; |
| 529 } |
| 530 |
| 531 const Normalizer2Impl * |
| 532 Normalizer2Factory::getImpl(const Normalizer2 *norm2) { |
| 533 return &((Normalizer2WithImpl *)norm2)->impl; |
| 534 } |
| 535 |
| 536 const UTrie2 * |
| 537 Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) { |
| 538 Norm2AllModes *allModes= |
| 539 Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); |
| 540 if(allModes!=NULL) { |
| 541 return allModes->impl.getFCDTrie(errorCode); |
| 542 } else { |
| 543 return NULL; |
| 544 } |
| 545 } |
| 546 |
| 547 const Normalizer2 * |
| 548 Normalizer2::getInstance(const char *packageName, |
| 549 const char *name, |
| 550 UNormalization2Mode mode, |
| 551 UErrorCode &errorCode) { |
| 552 if(U_FAILURE(errorCode)) { |
| 553 return NULL; |
| 554 } |
| 555 if(name==NULL || *name==0) { |
| 556 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 557 } |
| 558 Norm2AllModes *allModes=NULL; |
| 559 if(packageName==NULL) { |
| 560 if(0==uprv_strcmp(name, "nfc")) { |
| 561 allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(err
orCode); |
| 562 } else if(0==uprv_strcmp(name, "nfkc")) { |
| 563 allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(e
rrorCode); |
| 564 } else if(0==uprv_strcmp(name, "nfkc_cf")) { |
| 565 allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInst
ance(errorCode); |
| 566 } |
| 567 } |
| 568 if(allModes==NULL && U_SUCCESS(errorCode)) { |
| 569 { |
| 570 Mutex lock; |
| 571 if(cache!=NULL) { |
| 572 allModes=(Norm2AllModes *)uhash_get(cache, name); |
| 573 } |
| 574 } |
| 575 if(allModes==NULL) { |
| 576 LocalPointer<Norm2AllModes> localAllModes( |
| 577 Norm2AllModes::createInstance(packageName, name, errorCode)); |
| 578 if(U_SUCCESS(errorCode)) { |
| 579 Mutex lock; |
| 580 if(cache==NULL) { |
| 581 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL,
&errorCode); |
| 582 if(U_FAILURE(errorCode)) { |
| 583 return NULL; |
| 584 } |
| 585 uhash_setKeyDeleter(cache, uprv_free); |
| 586 uhash_setValueDeleter(cache, deleteNorm2AllModes); |
| 587 } |
| 588 void *temp=uhash_get(cache, name); |
| 589 if(temp==NULL) { |
| 590 int32_t keyLength=uprv_strlen(name)+1; |
| 591 char *nameCopy=(char *)uprv_malloc(keyLength); |
| 592 if(nameCopy==NULL) { |
| 593 errorCode=U_MEMORY_ALLOCATION_ERROR; |
| 594 return NULL; |
| 595 } |
| 596 uprv_memcpy(nameCopy, name, keyLength); |
| 597 uhash_put(cache, nameCopy, allModes=localAllModes.orphan(),
&errorCode); |
| 598 } else { |
| 599 // race condition |
| 600 allModes=(Norm2AllModes *)temp; |
| 601 } |
| 602 } |
| 603 } |
| 604 } |
| 605 if(allModes!=NULL && U_SUCCESS(errorCode)) { |
| 606 switch(mode) { |
| 607 case UNORM2_COMPOSE: |
| 608 return &allModes->comp; |
| 609 case UNORM2_DECOMPOSE: |
| 610 return &allModes->decomp; |
| 611 case UNORM2_FCD: |
| 612 allModes->impl.getFCDTrie(errorCode); |
| 613 return &allModes->fcd; |
| 614 case UNORM2_COMPOSE_CONTIGUOUS: |
| 615 return &allModes->fcc; |
| 616 default: |
| 617 break; // do nothing |
| 618 } |
| 619 } |
| 620 return NULL; |
| 621 } |
| 622 |
| 623 UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2) |
| 624 |
| 625 U_NAMESPACE_END |
| 626 |
| 627 // C API ------------------------------------------------------------------- *** |
| 628 |
| 629 U_NAMESPACE_USE |
| 630 |
| 631 U_DRAFT const UNormalizer2 * U_EXPORT2 |
| 632 unorm2_getInstance(const char *packageName, |
| 633 const char *name, |
| 634 UNormalization2Mode mode, |
| 635 UErrorCode *pErrorCode) { |
| 636 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mod
e, *pErrorCode); |
| 637 } |
| 638 |
| 639 U_DRAFT void U_EXPORT2 |
| 640 unorm2_close(UNormalizer2 *norm2) { |
| 641 delete (Normalizer2 *)norm2; |
| 642 } |
| 643 |
| 644 U_DRAFT int32_t U_EXPORT2 |
| 645 unorm2_normalize(const UNormalizer2 *norm2, |
| 646 const UChar *src, int32_t length, |
| 647 UChar *dest, int32_t capacity, |
| 648 UErrorCode *pErrorCode) { |
| 649 if(U_FAILURE(*pErrorCode)) { |
| 650 return 0; |
| 651 } |
| 652 if( (src==NULL ? length!=0 : length<-1) || |
| 653 (dest==NULL ? capacity!=0 : capacity<0) || |
| 654 (src==dest && src!=NULL) |
| 655 ) { |
| 656 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 657 return 0; |
| 658 } |
| 659 UnicodeString destString(dest, 0, capacity); |
| 660 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) wo
uld crash. |
| 661 if(length!=0) { |
| 662 const Normalizer2 *n2=(const Normalizer2 *)norm2; |
| 663 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *
>(n2); |
| 664 if(n2wi!=NULL) { |
| 665 // Avoid duplicate argument checking and support NUL-terminated src. |
| 666 ReorderingBuffer buffer(n2wi->impl, destString); |
| 667 if(buffer.init(length, *pErrorCode)) { |
| 668 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pEr
rorCode); |
| 669 } |
| 670 } else { |
| 671 UnicodeString srcString(length<0, src, length); |
| 672 n2->normalize(srcString, destString, *pErrorCode); |
| 673 } |
| 674 } |
| 675 return destString.extract(dest, capacity, *pErrorCode); |
| 676 } |
| 677 |
| 678 static int32_t |
| 679 normalizeSecondAndAppend(const UNormalizer2 *norm2, |
| 680 UChar *first, int32_t firstLength, int32_t firstCapacit
y, |
| 681 const UChar *second, int32_t secondLength, |
| 682 UBool doNormalize, |
| 683 UErrorCode *pErrorCode) { |
| 684 if(U_FAILURE(*pErrorCode)) { |
| 685 return 0; |
| 686 } |
| 687 if( (second==NULL ? secondLength!=0 : secondLength<-1) || |
| 688 (first==NULL ? (firstCapacity!=0 || firstLength!=0) : |
| 689 (firstCapacity<0 || firstLength<-1)) || |
| 690 (first==second && first!=NULL) |
| 691 ) { |
| 692 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 693 return 0; |
| 694 } |
| 695 UnicodeString firstString(first, firstLength, firstCapacity); |
| 696 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL,
buffer, ...) would crash. |
| 697 if(secondLength!=0) { |
| 698 const Normalizer2 *n2=(const Normalizer2 *)norm2; |
| 699 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *
>(n2); |
| 700 if(n2wi!=NULL) { |
| 701 // Avoid duplicate argument checking and support NUL-terminated src. |
| 702 ReorderingBuffer buffer(n2wi->impl, firstString); |
| 703 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destC
apacity>=-1 |
| 704 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+second
Length : NULL, |
| 705 doNormalize, buffer, *pErrorCode); |
| 706 } |
| 707 } else { |
| 708 UnicodeString secondString(secondLength<0, second, secondLength); |
| 709 if(doNormalize) { |
| 710 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorC
ode); |
| 711 } else { |
| 712 n2->append(firstString, secondString, *pErrorCode); |
| 713 } |
| 714 } |
| 715 } |
| 716 return firstString.extract(first, firstCapacity, *pErrorCode); |
| 717 } |
| 718 |
| 719 U_DRAFT int32_t U_EXPORT2 |
| 720 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, |
| 721 UChar *first, int32_t firstLength, int32_t first
Capacity, |
| 722 const UChar *second, int32_t secondLength, |
| 723 UErrorCode *pErrorCode) { |
| 724 return normalizeSecondAndAppend(norm2, |
| 725 first, firstLength, firstCapacity, |
| 726 second, secondLength, |
| 727 TRUE, pErrorCode); |
| 728 } |
| 729 |
| 730 U_DRAFT int32_t U_EXPORT2 |
| 731 unorm2_append(const UNormalizer2 *norm2, |
| 732 UChar *first, int32_t firstLength, int32_t firstCapacity, |
| 733 const UChar *second, int32_t secondLength, |
| 734 UErrorCode *pErrorCode) { |
| 735 return normalizeSecondAndAppend(norm2, |
| 736 first, firstLength, firstCapacity, |
| 737 second, secondLength, |
| 738 FALSE, pErrorCode); |
| 739 } |
| 740 |
| 741 U_DRAFT int32_t U_EXPORT2 |
| 742 unorm2_getDecomposition(const UNormalizer2 *norm2, |
| 743 UChar32 c, UChar *decomposition, int32_t capacity, |
| 744 UErrorCode *pErrorCode) { |
| 745 if(U_FAILURE(*pErrorCode)) { |
| 746 return 0; |
| 747 } |
| 748 if(decomposition==NULL ? capacity!=0 : capacity<0) { |
| 749 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 750 return 0; |
| 751 } |
| 752 UnicodeString destString(decomposition, 0, capacity); |
| 753 if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destStr
ing)) { |
| 754 return destString.extract(decomposition, capacity, *pErrorCode); |
| 755 } else { |
| 756 return -1; |
| 757 } |
| 758 } |
| 759 |
| 760 U_DRAFT UBool U_EXPORT2 |
| 761 unorm2_isNormalized(const UNormalizer2 *norm2, |
| 762 const UChar *s, int32_t length, |
| 763 UErrorCode *pErrorCode) { |
| 764 if(U_FAILURE(*pErrorCode)) { |
| 765 return 0; |
| 766 } |
| 767 if((s==NULL && length!=0) || length<-1) { |
| 768 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 769 return 0; |
| 770 } |
| 771 UnicodeString sString(length<0, s, length); |
| 772 return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode); |
| 773 } |
| 774 |
| 775 U_DRAFT UNormalizationCheckResult U_EXPORT2 |
| 776 unorm2_quickCheck(const UNormalizer2 *norm2, |
| 777 const UChar *s, int32_t length, |
| 778 UErrorCode *pErrorCode) { |
| 779 if(U_FAILURE(*pErrorCode)) { |
| 780 return UNORM_NO; |
| 781 } |
| 782 if((s==NULL && length!=0) || length<-1) { |
| 783 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 784 return UNORM_NO; |
| 785 } |
| 786 UnicodeString sString(length<0, s, length); |
| 787 return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode); |
| 788 } |
| 789 |
| 790 U_DRAFT int32_t U_EXPORT2 |
| 791 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, |
| 792 const UChar *s, int32_t length, |
| 793 UErrorCode *pErrorCode) { |
| 794 if(U_FAILURE(*pErrorCode)) { |
| 795 return 0; |
| 796 } |
| 797 if((s==NULL && length!=0) || length<-1) { |
| 798 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 799 return 0; |
| 800 } |
| 801 UnicodeString sString(length<0, s, length); |
| 802 return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode)
; |
| 803 } |
| 804 |
| 805 U_DRAFT UBool U_EXPORT2 |
| 806 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) { |
| 807 return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c); |
| 808 } |
| 809 |
| 810 U_DRAFT UBool U_EXPORT2 |
| 811 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) { |
| 812 return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c); |
| 813 } |
| 814 |
| 815 U_DRAFT UBool U_EXPORT2 |
| 816 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { |
| 817 return ((const Normalizer2 *)norm2)->isInert(c); |
| 818 } |
| 819 |
| 820 // Some properties APIs ---------------------------------------------------- *** |
| 821 |
| 822 U_CFUNC UNormalizationCheckResult U_EXPORT2 |
| 823 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { |
| 824 if(mode<=UNORM_NONE || UNORM_FCD<=mode) { |
| 825 return UNORM_YES; |
| 826 } |
| 827 UErrorCode errorCode=U_ZERO_ERROR; |
| 828 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); |
| 829 if(U_SUCCESS(errorCode)) { |
| 830 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); |
| 831 } else { |
| 832 return UNORM_MAYBE; |
| 833 } |
| 834 } |
| 835 |
| 836 U_CAPI const uint16_t * U_EXPORT2 |
| 837 unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) { |
| 838 const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode); |
| 839 if(U_SUCCESS(*pErrorCode)) { |
| 840 fcdHighStart=trie->highStart; |
| 841 return trie->index; |
| 842 } else { |
| 843 return NULL; |
| 844 } |
| 845 } |
| 846 |
| 847 #endif // !UCONFIG_NO_NORMALIZATION |
OLD | NEW |