OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * Copyright (C) 2014, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ******************************************************************************* |
| 6 * loadednormalizer2impl.cpp |
| 7 * |
| 8 * created on: 2014sep03 |
| 9 * created by: Markus W. Scherer |
| 10 */ |
| 11 |
| 12 #include "unicode/utypes.h" |
| 13 |
| 14 #if !UCONFIG_NO_NORMALIZATION |
| 15 |
| 16 #include "unicode/udata.h" |
| 17 #include "unicode/localpointer.h" |
| 18 #include "unicode/normalizer2.h" |
| 19 #include "unicode/unistr.h" |
| 20 #include "unicode/unorm.h" |
| 21 #include "cstring.h" |
| 22 #include "mutex.h" |
| 23 #include "norm2allmodes.h" |
| 24 #include "normalizer2impl.h" |
| 25 #include "uassert.h" |
| 26 #include "ucln_cmn.h" |
| 27 #include "uhash.h" |
| 28 |
| 29 U_NAMESPACE_BEGIN |
| 30 |
| 31 class LoadedNormalizer2Impl : public Normalizer2Impl { |
| 32 public: |
| 33 LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {} |
| 34 virtual ~LoadedNormalizer2Impl(); |
| 35 |
| 36 void load(const char *packageName, const char *name, UErrorCode &errorCode); |
| 37 |
| 38 private: |
| 39 static UBool U_CALLCONV |
| 40 isAcceptable(void *context, const char *type, const char *name, const UDataI
nfo *pInfo); |
| 41 |
| 42 UDataMemory *memory; |
| 43 UTrie2 *ownedTrie; |
| 44 }; |
| 45 |
| 46 LoadedNormalizer2Impl::~LoadedNormalizer2Impl() { |
| 47 udata_close(memory); |
| 48 utrie2_close(ownedTrie); |
| 49 } |
| 50 |
| 51 UBool U_CALLCONV |
| 52 LoadedNormalizer2Impl::isAcceptable(void * /*context*/, |
| 53 const char * /* type */, const char * /*name
*/, |
| 54 const UDataInfo *pInfo) { |
| 55 if( |
| 56 pInfo->size>=20 && |
| 57 pInfo->isBigEndian==U_IS_BIG_ENDIAN && |
| 58 pInfo->charsetFamily==U_CHARSET_FAMILY && |
| 59 pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */ |
| 60 pInfo->dataFormat[1]==0x72 && |
| 61 pInfo->dataFormat[2]==0x6d && |
| 62 pInfo->dataFormat[3]==0x32 && |
| 63 pInfo->formatVersion[0]==2 |
| 64 ) { |
| 65 // Normalizer2Impl *me=(Normalizer2Impl *)context; |
| 66 // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4); |
| 67 return TRUE; |
| 68 } else { |
| 69 return FALSE; |
| 70 } |
| 71 } |
| 72 |
| 73 void |
| 74 LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCod
e &errorCode) { |
| 75 if(U_FAILURE(errorCode)) { |
| 76 return; |
| 77 } |
| 78 memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &error
Code); |
| 79 if(U_FAILURE(errorCode)) { |
| 80 return; |
| 81 } |
| 82 const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory); |
| 83 const int32_t *inIndexes=(const int32_t *)inBytes; |
| 84 int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4; |
| 85 if(indexesLength<=IX_MIN_MAYBE_YES) { |
| 86 errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes. |
| 87 return; |
| 88 } |
| 89 |
| 90 int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET]; |
| 91 int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET]; |
| 92 ownedTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, |
| 93 inBytes+offset, nextOffset-offset, NULL, |
| 94 &errorCode); |
| 95 if(U_FAILURE(errorCode)) { |
| 96 return; |
| 97 } |
| 98 |
| 99 offset=nextOffset; |
| 100 nextOffset=inIndexes[IX_SMALL_FCD_OFFSET]; |
| 101 const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset); |
| 102 |
| 103 // smallFCD: new in formatVersion 2 |
| 104 offset=nextOffset; |
| 105 const uint8_t *inSmallFCD=inBytes+offset; |
| 106 |
| 107 init(inIndexes, ownedTrie, inExtraData, inSmallFCD); |
| 108 } |
| 109 |
| 110 // instance cache ---------------------------------------------------------- *** |
| 111 |
| 112 Norm2AllModes * |
| 113 Norm2AllModes::createInstance(const char *packageName, |
| 114 const char *name, |
| 115 UErrorCode &errorCode) { |
| 116 if(U_FAILURE(errorCode)) { |
| 117 return NULL; |
| 118 } |
| 119 LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl; |
| 120 if(impl==NULL) { |
| 121 errorCode=U_MEMORY_ALLOCATION_ERROR; |
| 122 return NULL; |
| 123 } |
| 124 impl->load(packageName, name, errorCode); |
| 125 return createInstance(impl, errorCode); |
| 126 } |
| 127 |
| 128 U_CDECL_BEGIN |
| 129 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup(); |
| 130 U_CDECL_END |
| 131 |
| 132 static Norm2AllModes *nfkcSingleton; |
| 133 static Norm2AllModes *nfkc_cfSingleton; |
| 134 static UHashtable *cache=NULL; |
| 135 |
| 136 static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER; |
| 137 static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER; |
| 138 |
| 139 // UInitOnce singleton initialization function |
| 140 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) { |
| 141 if (uprv_strcmp(what, "nfkc") == 0) { |
| 142 nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode
); |
| 143 } else if (uprv_strcmp(what, "nfkc_cf") == 0) { |
| 144 nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorC
ode); |
| 145 } else { |
| 146 U_ASSERT(FALSE); // Unknown singleton |
| 147 } |
| 148 ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_norm
alizer2_cleanup); |
| 149 } |
| 150 |
| 151 U_CDECL_BEGIN |
| 152 |
| 153 static void U_CALLCONV deleteNorm2AllModes(void *allModes) { |
| 154 delete (Norm2AllModes *)allModes; |
| 155 } |
| 156 |
| 157 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() { |
| 158 delete nfkcSingleton; |
| 159 nfkcSingleton = NULL; |
| 160 delete nfkc_cfSingleton; |
| 161 nfkc_cfSingleton = NULL; |
| 162 uhash_close(cache); |
| 163 cache=NULL; |
| 164 nfkcInitOnce.reset(); |
| 165 nfkc_cfInitOnce.reset(); |
| 166 return TRUE; |
| 167 } |
| 168 |
| 169 U_CDECL_END |
| 170 |
| 171 const Norm2AllModes * |
| 172 Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) { |
| 173 if(U_FAILURE(errorCode)) { return NULL; } |
| 174 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); |
| 175 return nfkcSingleton; |
| 176 } |
| 177 |
| 178 const Norm2AllModes * |
| 179 Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) { |
| 180 if(U_FAILURE(errorCode)) { return NULL; } |
| 181 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); |
| 182 return nfkc_cfSingleton; |
| 183 } |
| 184 |
| 185 const Normalizer2 * |
| 186 Normalizer2::getNFKCInstance(UErrorCode &errorCode) { |
| 187 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); |
| 188 return allModes!=NULL ? &allModes->comp : NULL; |
| 189 } |
| 190 |
| 191 const Normalizer2 * |
| 192 Normalizer2::getNFKDInstance(UErrorCode &errorCode) { |
| 193 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); |
| 194 return allModes!=NULL ? &allModes->decomp : NULL; |
| 195 } |
| 196 |
| 197 const Normalizer2 * |
| 198 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { |
| 199 const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); |
| 200 return allModes!=NULL ? &allModes->comp : NULL; |
| 201 } |
| 202 |
| 203 const Normalizer2 * |
| 204 Normalizer2::getInstance(const char *packageName, |
| 205 const char *name, |
| 206 UNormalization2Mode mode, |
| 207 UErrorCode &errorCode) { |
| 208 if(U_FAILURE(errorCode)) { |
| 209 return NULL; |
| 210 } |
| 211 if(name==NULL || *name==0) { |
| 212 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 213 return NULL; |
| 214 } |
| 215 const Norm2AllModes *allModes=NULL; |
| 216 if(packageName==NULL) { |
| 217 if(0==uprv_strcmp(name, "nfc")) { |
| 218 allModes=Norm2AllModes::getNFCInstance(errorCode); |
| 219 } else if(0==uprv_strcmp(name, "nfkc")) { |
| 220 allModes=Norm2AllModes::getNFKCInstance(errorCode); |
| 221 } else if(0==uprv_strcmp(name, "nfkc_cf")) { |
| 222 allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); |
| 223 } |
| 224 } |
| 225 if(allModes==NULL && U_SUCCESS(errorCode)) { |
| 226 { |
| 227 Mutex lock; |
| 228 if(cache!=NULL) { |
| 229 allModes=(Norm2AllModes *)uhash_get(cache, name); |
| 230 } |
| 231 } |
| 232 if(allModes==NULL) { |
| 233 LocalPointer<Norm2AllModes> localAllModes( |
| 234 Norm2AllModes::createInstance(packageName, name, errorCode)); |
| 235 if(U_SUCCESS(errorCode)) { |
| 236 Mutex lock; |
| 237 if(cache==NULL) { |
| 238 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL,
&errorCode); |
| 239 if(U_FAILURE(errorCode)) { |
| 240 return NULL; |
| 241 } |
| 242 uhash_setKeyDeleter(cache, uprv_free); |
| 243 uhash_setValueDeleter(cache, deleteNorm2AllModes); |
| 244 } |
| 245 void *temp=uhash_get(cache, name); |
| 246 if(temp==NULL) { |
| 247 int32_t keyLength=uprv_strlen(name)+1; |
| 248 char *nameCopy=(char *)uprv_malloc(keyLength); |
| 249 if(nameCopy==NULL) { |
| 250 errorCode=U_MEMORY_ALLOCATION_ERROR; |
| 251 return NULL; |
| 252 } |
| 253 uprv_memcpy(nameCopy, name, keyLength); |
| 254 allModes=localAllModes.getAlias(); |
| 255 uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCod
e); |
| 256 } else { |
| 257 // race condition |
| 258 allModes=(Norm2AllModes *)temp; |
| 259 } |
| 260 } |
| 261 } |
| 262 } |
| 263 if(allModes!=NULL && U_SUCCESS(errorCode)) { |
| 264 switch(mode) { |
| 265 case UNORM2_COMPOSE: |
| 266 return &allModes->comp; |
| 267 case UNORM2_DECOMPOSE: |
| 268 return &allModes->decomp; |
| 269 case UNORM2_FCD: |
| 270 return &allModes->fcd; |
| 271 case UNORM2_COMPOSE_CONTIGUOUS: |
| 272 return &allModes->fcc; |
| 273 default: |
| 274 break; // do nothing |
| 275 } |
| 276 } |
| 277 return NULL; |
| 278 } |
| 279 |
| 280 const Normalizer2 * |
| 281 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode)
{ |
| 282 if(U_FAILURE(errorCode)) { |
| 283 return NULL; |
| 284 } |
| 285 switch(mode) { |
| 286 case UNORM_NFD: |
| 287 return Normalizer2::getNFDInstance(errorCode); |
| 288 case UNORM_NFKD: |
| 289 return Normalizer2::getNFKDInstance(errorCode); |
| 290 case UNORM_NFC: |
| 291 return Normalizer2::getNFCInstance(errorCode); |
| 292 case UNORM_NFKC: |
| 293 return Normalizer2::getNFKCInstance(errorCode); |
| 294 case UNORM_FCD: |
| 295 return getFCDInstance(errorCode); |
| 296 default: // UNORM_NONE |
| 297 return getNoopInstance(errorCode); |
| 298 } |
| 299 } |
| 300 |
| 301 const Normalizer2Impl * |
| 302 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { |
| 303 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); |
| 304 return allModes!=NULL ? allModes->impl : NULL; |
| 305 } |
| 306 |
| 307 const Normalizer2Impl * |
| 308 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { |
| 309 const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); |
| 310 return allModes!=NULL ? allModes->impl : NULL; |
| 311 } |
| 312 |
| 313 U_NAMESPACE_END |
| 314 |
| 315 // C API ------------------------------------------------------------------- *** |
| 316 |
| 317 U_NAMESPACE_USE |
| 318 |
| 319 U_CAPI const UNormalizer2 * U_EXPORT2 |
| 320 unorm2_getNFKCInstance(UErrorCode *pErrorCode) { |
| 321 return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode); |
| 322 } |
| 323 |
| 324 U_CAPI const UNormalizer2 * U_EXPORT2 |
| 325 unorm2_getNFKDInstance(UErrorCode *pErrorCode) { |
| 326 return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode); |
| 327 } |
| 328 |
| 329 U_CAPI const UNormalizer2 * U_EXPORT2 |
| 330 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { |
| 331 return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCod
e); |
| 332 } |
| 333 |
| 334 U_CAPI const UNormalizer2 * U_EXPORT2 |
| 335 unorm2_getInstance(const char *packageName, |
| 336 const char *name, |
| 337 UNormalization2Mode mode, |
| 338 UErrorCode *pErrorCode) { |
| 339 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mod
e, *pErrorCode); |
| 340 } |
| 341 |
| 342 U_CFUNC UNormalizationCheckResult |
| 343 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { |
| 344 if(mode<=UNORM_NONE || UNORM_FCD<=mode) { |
| 345 return UNORM_YES; |
| 346 } |
| 347 UErrorCode errorCode=U_ZERO_ERROR; |
| 348 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); |
| 349 if(U_SUCCESS(errorCode)) { |
| 350 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); |
| 351 } else { |
| 352 return UNORM_MAYBE; |
| 353 } |
| 354 } |
| 355 |
| 356 #endif // !UCONFIG_NO_NORMALIZATION |
OLD | NEW |