| OLD | NEW |
| 1 /* | 1 /* |
| 2 ***************************************************************** | 2 ***************************************************************** |
| 3 * Copyright (c) 2002-2011, International Business Machines Corporation | 3 * Copyright (c) 2002-2014, International Business Machines Corporation |
| 4 * and others. All Rights Reserved. | 4 * and others. All Rights Reserved. |
| 5 ***************************************************************** | 5 ***************************************************************** |
| 6 * Date Name Description | 6 * Date Name Description |
| 7 * 06/06/2002 aliu Creation. | 7 * 06/06/2002 aliu Creation. |
| 8 ***************************************************************** | 8 ***************************************************************** |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "unicode/utypes.h" | 11 #include "unicode/utypes.h" |
| 12 | 12 |
| 13 #if !UCONFIG_NO_TRANSLITERATION | 13 #if !UCONFIG_NO_TRANSLITERATION |
| 14 | 14 |
| 15 #include "unicode/uobject.h" | 15 #include "unicode/uobject.h" |
| 16 #include "unicode/uscript.h" | 16 #include "unicode/uscript.h" |
| 17 |
| 18 #include "anytrans.h" |
| 19 #include "hash.h" |
| 20 #include "mutex.h" |
| 17 #include "nultrans.h" | 21 #include "nultrans.h" |
| 18 #include "anytrans.h" | 22 #include "putilimp.h" |
| 23 #include "tridpars.h" |
| 24 #include "uinvchar.h" |
| 19 #include "uvector.h" | 25 #include "uvector.h" |
| 20 #include "tridpars.h" | |
| 21 #include "hash.h" | |
| 22 #include "putilimp.h" | |
| 23 #include "uinvchar.h" | |
| 24 | 26 |
| 25 //------------------------------------------------------------ | 27 //------------------------------------------------------------ |
| 26 // Constants | 28 // Constants |
| 27 | 29 |
| 28 static const UChar TARGET_SEP = 45; // '-' | 30 static const UChar TARGET_SEP = 45; // '-' |
| 29 static const UChar VARIANT_SEP = 47; // '/' | 31 static const UChar VARIANT_SEP = 47; // '/' |
| 30 static const UChar ANY[] = {65,110,121,0}; // "Any" | 32 static const UChar ANY[] = {65,110,121,0}; // "Any" |
| 31 static const UChar NULL_ID[] = {78,117,108,108,0}; // "Null" | 33 static const UChar NULL_ID[] = {78,117,108,108,0}; // "Null" |
| 32 static const UChar LATIN_PIVOT[] = {45,76,97,116,105,110,59,76,97,116,105,110,45
,0}; // "-Latin;Latin-" | 34 static const UChar LATIN_PIVOT[] = {45,76,97,116,105,110,59,76,97,116,105,110,45
,0}; // "-Latin;Latin-" |
| 33 | 35 |
| 34 //------------------------------------------------------------ | 36 //------------------------------------------------------------ |
| 35 | 37 |
| 36 U_CDECL_BEGIN | 38 U_CDECL_BEGIN |
| 37 /** | 39 /** |
| 38 * Deleter function for Transliterator*. | 40 * Deleter function for Transliterator*. |
| 39 */ | 41 */ |
| 40 static void U_CALLCONV | 42 static void U_CALLCONV |
| 41 _deleteTransliterator(void *obj) { | 43 _deleteTransliterator(void *obj) { |
| 42 delete (icu::Transliterator*) obj; | 44 delete (icu::Transliterator*) obj; |
| 43 } | 45 } |
| 44 U_CDECL_END | 46 U_CDECL_END |
| 45 | 47 |
| 46 //------------------------------------------------------------ | 48 //------------------------------------------------------------ |
| 47 | 49 |
| 48 U_NAMESPACE_BEGIN | 50 U_NAMESPACE_BEGIN |
| 49 | 51 |
| 50 //------------------------------------------------------------ | 52 //------------------------------------------------------------ |
| 51 // ScriptRunIterator | 53 // ScriptRunIterator |
| 52 | 54 |
| (...skipping 25 matching lines...) Expand all Loading... |
| 78 | 80 |
| 79 /** | 81 /** |
| 80 * The start of the run, inclusive, valid after next() returns. | 82 * The start of the run, inclusive, valid after next() returns. |
| 81 */ | 83 */ |
| 82 int32_t start; | 84 int32_t start; |
| 83 | 85 |
| 84 /** | 86 /** |
| 85 * The end of the run, exclusive, valid after next() returns. | 87 * The end of the run, exclusive, valid after next() returns. |
| 86 */ | 88 */ |
| 87 int32_t limit; | 89 int32_t limit; |
| 88 | 90 |
| 89 /** | 91 /** |
| 90 * Constructs a run iterator over the given text from start | 92 * Constructs a run iterator over the given text from start |
| 91 * (inclusive) to limit (exclusive). | 93 * (inclusive) to limit (exclusive). |
| 92 */ | 94 */ |
| 93 ScriptRunIterator(const Replaceable& text, int32_t start, int32_t limit); | 95 ScriptRunIterator(const Replaceable& text, int32_t start, int32_t limit); |
| 94 | 96 |
| 95 /** | 97 /** |
| 96 * Returns TRUE if there are any more runs. TRUE is always | 98 * Returns TRUE if there are any more runs. TRUE is always |
| 97 * returned at least once. Upon return, the caller should | 99 * returned at least once. Upon return, the caller should |
| 98 * examine scriptCode, start, and limit. | 100 * examine scriptCode, start, and limit. |
| (...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 173 // AnyTransliterator | 175 // AnyTransliterator |
| 174 | 176 |
| 175 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(AnyTransliterator) | 177 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(AnyTransliterator) |
| 176 | 178 |
| 177 AnyTransliterator::AnyTransliterator(const UnicodeString& id, | 179 AnyTransliterator::AnyTransliterator(const UnicodeString& id, |
| 178 const UnicodeString& theTarget, | 180 const UnicodeString& theTarget, |
| 179 const UnicodeString& theVariant, | 181 const UnicodeString& theVariant, |
| 180 UScriptCode theTargetScript, | 182 UScriptCode theTargetScript, |
| 181 UErrorCode& ec) : | 183 UErrorCode& ec) : |
| 182 Transliterator(id, NULL), | 184 Transliterator(id, NULL), |
| 183 targetScript(theTargetScript) | 185 targetScript(theTargetScript) |
| 184 { | 186 { |
| 185 cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec); | 187 cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec); |
| 186 if (U_FAILURE(ec)) { | 188 if (U_FAILURE(ec)) { |
| 187 return; | 189 return; |
| 188 } | 190 } |
| 189 uhash_setValueDeleter(cache, _deleteTransliterator); | 191 uhash_setValueDeleter(cache, _deleteTransliterator); |
| 190 | 192 |
| 191 target = theTarget; | 193 target = theTarget; |
| 192 if (theVariant.length() > 0) { | 194 if (theVariant.length() > 0) { |
| 193 target.append(VARIANT_SEP).append(theVariant); | 195 target.append(VARIANT_SEP).append(theVariant); |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 232 | 234 |
| 233 ScriptRunIterator it(text, pos.contextStart, pos.contextLimit); | 235 ScriptRunIterator it(text, pos.contextStart, pos.contextLimit); |
| 234 | 236 |
| 235 while (it.next()) { | 237 while (it.next()) { |
| 236 // Ignore runs in the ante context | 238 // Ignore runs in the ante context |
| 237 if (it.limit <= allStart) continue; | 239 if (it.limit <= allStart) continue; |
| 238 | 240 |
| 239 // Try to instantiate transliterator from it.scriptCode to | 241 // Try to instantiate transliterator from it.scriptCode to |
| 240 // our target or target/variant | 242 // our target or target/variant |
| 241 Transliterator* t = getTransliterator(it.scriptCode); | 243 Transliterator* t = getTransliterator(it.scriptCode); |
| 242 | 244 |
| 243 if (t == NULL) { | 245 if (t == NULL) { |
| 244 // We have no transliterator. Do nothing, but keep | 246 // We have no transliterator. Do nothing, but keep |
| 245 // pos.start up to date. | 247 // pos.start up to date. |
| 246 pos.start = it.limit; | 248 pos.start = it.limit; |
| 247 continue; | 249 continue; |
| 248 } | 250 } |
| 249 | 251 |
| 250 // If the run end is before the transliteration limit, do | 252 // If the run end is before the transliteration limit, do |
| 251 // a non-incremental transliteration. Otherwise do an | 253 // a non-incremental transliteration. Otherwise do an |
| 252 // incremental one. | 254 // incremental one. |
| 253 UBool incremental = isIncremental && (it.limit >= allLimit); | 255 UBool incremental = isIncremental && (it.limit >= allLimit); |
| 254 | 256 |
| 255 pos.start = uprv_max(allStart, it.start); | 257 pos.start = uprv_max(allStart, it.start); |
| 256 pos.limit = uprv_min(allLimit, it.limit); | 258 pos.limit = uprv_min(allLimit, it.limit); |
| 257 int32_t limit = pos.limit; | 259 int32_t limit = pos.limit; |
| 258 t->filteredTransliterate(text, pos, incremental); | 260 t->filteredTransliterate(text, pos, incremental); |
| 259 int32_t delta = pos.limit - limit; | 261 int32_t delta = pos.limit - limit; |
| 260 allLimit += delta; | 262 allLimit += delta; |
| 261 it.adjustLimit(delta); | 263 it.adjustLimit(delta); |
| 262 | 264 |
| 263 // We're done if we enter the post context | 265 // We're done if we enter the post context |
| 264 if (it.limit >= allLimit) break; | 266 if (it.limit >= allLimit) break; |
| 265 } | 267 } |
| 266 | 268 |
| 267 // Restore limit. pos.start is fine where the last transliterator | 269 // Restore limit. pos.start is fine where the last transliterator |
| 268 // left it, or at the end of the last run. | 270 // left it, or at the end of the last run. |
| 269 pos.limit = allLimit; | 271 pos.limit = allLimit; |
| 270 } | 272 } |
| 271 | 273 |
| 272 Transliterator* AnyTransliterator::getTransliterator(UScriptCode source) const { | 274 Transliterator* AnyTransliterator::getTransliterator(UScriptCode source) const { |
| 273 | 275 |
| 274 if (source == targetScript || source == USCRIPT_INVALID_CODE) { | 276 if (source == targetScript || source == USCRIPT_INVALID_CODE) { |
| 275 return NULL; | 277 return NULL; |
| 276 } | 278 } |
| 277 | 279 |
| 278 Transliterator* t = (Transliterator*) uhash_iget(cache, (int32_t) source); | 280 Transliterator* t = NULL; |
| 281 { |
| 282 Mutex m(NULL); |
| 283 t = (Transliterator*) uhash_iget(cache, (int32_t) source); |
| 284 } |
| 279 if (t == NULL) { | 285 if (t == NULL) { |
| 280 UErrorCode ec = U_ZERO_ERROR; | 286 UErrorCode ec = U_ZERO_ERROR; |
| 281 UnicodeString sourceName(uscript_getName(source), -1, US_INV); | 287 UnicodeString sourceName(uscript_getName(source), -1, US_INV); |
| 282 UnicodeString id(sourceName); | 288 UnicodeString id(sourceName); |
| 283 id.append(TARGET_SEP).append(target); | 289 id.append(TARGET_SEP).append(target); |
| 284 | 290 |
| 285 t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); | 291 t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); |
| 286 if (U_FAILURE(ec) || t == NULL) { | 292 if (U_FAILURE(ec) || t == NULL) { |
| 287 delete t; | 293 delete t; |
| 288 | 294 |
| 289 // Try to pivot around Latin, our most common script | 295 // Try to pivot around Latin, our most common script |
| 290 id = sourceName; | 296 id = sourceName; |
| 291 id.append(LATIN_PIVOT, -1).append(target); | 297 id.append(LATIN_PIVOT, -1).append(target); |
| 292 t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); | 298 t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); |
| 293 if (U_FAILURE(ec) || t == NULL) { | 299 if (U_FAILURE(ec) || t == NULL) { |
| 294 delete t; | 300 delete t; |
| 295 t = NULL; | 301 t = NULL; |
| 296 } | 302 } |
| 297 } | 303 } |
| 298 | 304 |
| 299 if (t != NULL) { | 305 if (t != NULL) { |
| 300 uhash_iput(cache, (int32_t) source, t, &ec); | 306 Transliterator *rt = NULL; |
| 307 { |
| 308 Mutex m(NULL); |
| 309 rt = static_cast<Transliterator *> (uhash_iget(cache, (int32_t)
source)); |
| 310 if (rt == NULL) { |
| 311 // Common case, no race to cache this new transliterator. |
| 312 uhash_iput(cache, (int32_t) source, t, &ec); |
| 313 } else { |
| 314 // Race case, some other thread beat us to caching this tran
sliterator. |
| 315 Transliterator *temp = rt; |
| 316 rt = t; // Our newly created transliterator that lost the
race & now needs deleting. |
| 317 t = temp; // The transliterator from the cache that we will
return. |
| 318 } |
| 319 } |
| 320 delete rt; // will be non-null only in case of races. |
| 301 } | 321 } |
| 302 } | 322 } |
| 303 | |
| 304 return t; | 323 return t; |
| 305 } | 324 } |
| 306 | 325 |
| 307 /** | 326 /** |
| 308 * Return the script code for a given name, or -1 if not found. | 327 * Return the script code for a given name, or -1 if not found. |
| 309 */ | 328 */ |
| 310 static UScriptCode scriptNameToCode(const UnicodeString& name) { | 329 static UScriptCode scriptNameToCode(const UnicodeString& name) { |
| 311 char buf[128]; | 330 char buf[128]; |
| 312 UScriptCode code; | 331 UScriptCode code; |
| 313 UErrorCode ec = U_ZERO_ERROR; | 332 UErrorCode ec = U_ZERO_ERROR; |
| 314 int32_t nameLen = name.length(); | 333 int32_t nameLen = name.length(); |
| 315 UBool isInvariant = uprv_isInvariantUString(name.getBuffer(), nameLen); | 334 UBool isInvariant = uprv_isInvariantUString(name.getBuffer(), nameLen); |
| 316 | 335 |
| 317 if (isInvariant) { | 336 if (isInvariant) { |
| 318 name.extract(0, nameLen, buf, (int32_t)sizeof(buf), US_INV); | 337 name.extract(0, nameLen, buf, (int32_t)sizeof(buf), US_INV); |
| 319 buf[127] = 0; // Make sure that we NULL terminate the string. | 338 buf[127] = 0; // Make sure that we NULL terminate the string. |
| 320 } | 339 } |
| 321 if (!isInvariant || uscript_getCode(buf, &code, 1, &ec) != 1 || U_FAILURE(ec
)) | 340 if (!isInvariant || uscript_getCode(buf, &code, 1, &ec) != 1 || U_FAILURE(ec
)) |
| 322 { | 341 { |
| 323 code = USCRIPT_INVALID_CODE; | 342 code = USCRIPT_INVALID_CODE; |
| 324 } | 343 } |
| 325 return code; | 344 return code; |
| 326 } | 345 } |
| (...skipping 18 matching lines...) Expand all Loading... |
| 345 | 364 |
| 346 int32_t targetCount = Transliterator::_countAvailableTargets(source); | 365 int32_t targetCount = Transliterator::_countAvailableTargets(source); |
| 347 for (int32_t t=0; t<targetCount; ++t) { | 366 for (int32_t t=0; t<targetCount; ++t) { |
| 348 UnicodeString target; | 367 UnicodeString target; |
| 349 Transliterator::_getAvailableTarget(t, source, target); | 368 Transliterator::_getAvailableTarget(t, source, target); |
| 350 | 369 |
| 351 // Only process each target once | 370 // Only process each target once |
| 352 if (seen.geti(target) != 0) continue; | 371 if (seen.geti(target) != 0) continue; |
| 353 ec = U_ZERO_ERROR; | 372 ec = U_ZERO_ERROR; |
| 354 seen.puti(target, 1, ec); | 373 seen.puti(target, 1, ec); |
| 355 | 374 |
| 356 // Get the script code for the target. If not a script, ignore. | 375 // Get the script code for the target. If not a script, ignore. |
| 357 UScriptCode targetScript = scriptNameToCode(target); | 376 UScriptCode targetScript = scriptNameToCode(target); |
| 358 if (targetScript == USCRIPT_INVALID_CODE) continue; | 377 if (targetScript == USCRIPT_INVALID_CODE) continue; |
| 359 | 378 |
| 360 int32_t variantCount = Transliterator::_countAvailableVariants(sourc
e, target); | 379 int32_t variantCount = Transliterator::_countAvailableVariants(sourc
e, target); |
| 361 // assert(variantCount >= 1); | 380 // assert(variantCount >= 1); |
| 362 for (int32_t v=0; v<variantCount; ++v) { | 381 for (int32_t v=0; v<variantCount; ++v) { |
| 363 UnicodeString variant; | 382 UnicodeString variant; |
| 364 Transliterator::_getAvailableVariant(v, source, target, variant)
; | 383 Transliterator::_getAvailableVariant(v, source, target, variant)
; |
| 365 | 384 |
| 366 UnicodeString id; | 385 UnicodeString id; |
| 367 TransliteratorIDParser::STVtoID(UnicodeString(TRUE, ANY, 3), tar
get, variant, id); | 386 TransliteratorIDParser::STVtoID(UnicodeString(TRUE, ANY, 3), tar
get, variant, id); |
| 368 ec = U_ZERO_ERROR; | 387 ec = U_ZERO_ERROR; |
| 369 AnyTransliterator* t = new AnyTransliterator(id, target, variant
, | 388 AnyTransliterator* t = new AnyTransliterator(id, target, variant
, |
| 370 targetScript, ec); | 389 targetScript, ec); |
| 371 if (U_FAILURE(ec)) { | 390 if (U_FAILURE(ec)) { |
| 372 delete t; | 391 delete t; |
| 373 } else { | 392 } else { |
| 374 Transliterator::_registerInstance(t); | 393 Transliterator::_registerInstance(t); |
| 375 Transliterator::_registerSpecialInverse(target, UnicodeStrin
g(TRUE, NULL_ID, 4), FALSE); | 394 Transliterator::_registerSpecialInverse(target, UnicodeStrin
g(TRUE, NULL_ID, 4), FALSE); |
| 376 } | 395 } |
| 377 } | 396 } |
| 378 } | 397 } |
| 379 } | 398 } |
| 380 } | 399 } |
| 381 | 400 |
| 382 U_NAMESPACE_END | 401 U_NAMESPACE_END |
| 383 | 402 |
| 384 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ | 403 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
| 385 | 404 |
| 386 //eof | 405 //eof |
| OLD | NEW |