OLD | NEW |
1 /* | 1 /* |
2 ***************************************************************** | 2 ***************************************************************** |
3 * Copyright (c) 2002-2011, International Business Machines Corporation | 3 * Copyright (c) 2002-2014, International Business Machines Corporation |
4 * and others. All Rights Reserved. | 4 * and others. All Rights Reserved. |
5 ***************************************************************** | 5 ***************************************************************** |
6 * Date Name Description | 6 * Date Name Description |
7 * 06/06/2002 aliu Creation. | 7 * 06/06/2002 aliu Creation. |
8 ***************************************************************** | 8 ***************************************************************** |
9 */ | 9 */ |
10 | 10 |
11 #include "unicode/utypes.h" | 11 #include "unicode/utypes.h" |
12 | 12 |
13 #if !UCONFIG_NO_TRANSLITERATION | 13 #if !UCONFIG_NO_TRANSLITERATION |
14 | 14 |
15 #include "unicode/uobject.h" | 15 #include "unicode/uobject.h" |
16 #include "unicode/uscript.h" | 16 #include "unicode/uscript.h" |
| 17 |
| 18 #include "anytrans.h" |
| 19 #include "hash.h" |
| 20 #include "mutex.h" |
17 #include "nultrans.h" | 21 #include "nultrans.h" |
18 #include "anytrans.h" | 22 #include "putilimp.h" |
| 23 #include "tridpars.h" |
| 24 #include "uinvchar.h" |
19 #include "uvector.h" | 25 #include "uvector.h" |
20 #include "tridpars.h" | |
21 #include "hash.h" | |
22 #include "putilimp.h" | |
23 #include "uinvchar.h" | |
24 | 26 |
25 //------------------------------------------------------------ | 27 //------------------------------------------------------------ |
26 // Constants | 28 // Constants |
27 | 29 |
28 static const UChar TARGET_SEP = 45; // '-' | 30 static const UChar TARGET_SEP = 45; // '-' |
29 static const UChar VARIANT_SEP = 47; // '/' | 31 static const UChar VARIANT_SEP = 47; // '/' |
30 static const UChar ANY[] = {65,110,121,0}; // "Any" | 32 static const UChar ANY[] = {65,110,121,0}; // "Any" |
31 static const UChar NULL_ID[] = {78,117,108,108,0}; // "Null" | 33 static const UChar NULL_ID[] = {78,117,108,108,0}; // "Null" |
32 static const UChar LATIN_PIVOT[] = {45,76,97,116,105,110,59,76,97,116,105,110,45
,0}; // "-Latin;Latin-" | 34 static const UChar LATIN_PIVOT[] = {45,76,97,116,105,110,59,76,97,116,105,110,45
,0}; // "-Latin;Latin-" |
33 | 35 |
34 //------------------------------------------------------------ | 36 //------------------------------------------------------------ |
35 | 37 |
36 U_CDECL_BEGIN | 38 U_CDECL_BEGIN |
37 /** | 39 /** |
38 * Deleter function for Transliterator*. | 40 * Deleter function for Transliterator*. |
39 */ | 41 */ |
40 static void U_CALLCONV | 42 static void U_CALLCONV |
41 _deleteTransliterator(void *obj) { | 43 _deleteTransliterator(void *obj) { |
42 delete (icu::Transliterator*) obj; | 44 delete (icu::Transliterator*) obj; |
43 } | 45 } |
44 U_CDECL_END | 46 U_CDECL_END |
45 | 47 |
46 //------------------------------------------------------------ | 48 //------------------------------------------------------------ |
47 | 49 |
48 U_NAMESPACE_BEGIN | 50 U_NAMESPACE_BEGIN |
49 | 51 |
50 //------------------------------------------------------------ | 52 //------------------------------------------------------------ |
51 // ScriptRunIterator | 53 // ScriptRunIterator |
52 | 54 |
(...skipping 25 matching lines...) Expand all Loading... |
78 | 80 |
79 /** | 81 /** |
80 * The start of the run, inclusive, valid after next() returns. | 82 * The start of the run, inclusive, valid after next() returns. |
81 */ | 83 */ |
82 int32_t start; | 84 int32_t start; |
83 | 85 |
84 /** | 86 /** |
85 * The end of the run, exclusive, valid after next() returns. | 87 * The end of the run, exclusive, valid after next() returns. |
86 */ | 88 */ |
87 int32_t limit; | 89 int32_t limit; |
88 | 90 |
89 /** | 91 /** |
90 * Constructs a run iterator over the given text from start | 92 * Constructs a run iterator over the given text from start |
91 * (inclusive) to limit (exclusive). | 93 * (inclusive) to limit (exclusive). |
92 */ | 94 */ |
93 ScriptRunIterator(const Replaceable& text, int32_t start, int32_t limit); | 95 ScriptRunIterator(const Replaceable& text, int32_t start, int32_t limit); |
94 | 96 |
95 /** | 97 /** |
96 * Returns TRUE if there are any more runs. TRUE is always | 98 * Returns TRUE if there are any more runs. TRUE is always |
97 * returned at least once. Upon return, the caller should | 99 * returned at least once. Upon return, the caller should |
98 * examine scriptCode, start, and limit. | 100 * examine scriptCode, start, and limit. |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
173 // AnyTransliterator | 175 // AnyTransliterator |
174 | 176 |
175 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(AnyTransliterator) | 177 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(AnyTransliterator) |
176 | 178 |
177 AnyTransliterator::AnyTransliterator(const UnicodeString& id, | 179 AnyTransliterator::AnyTransliterator(const UnicodeString& id, |
178 const UnicodeString& theTarget, | 180 const UnicodeString& theTarget, |
179 const UnicodeString& theVariant, | 181 const UnicodeString& theVariant, |
180 UScriptCode theTargetScript, | 182 UScriptCode theTargetScript, |
181 UErrorCode& ec) : | 183 UErrorCode& ec) : |
182 Transliterator(id, NULL), | 184 Transliterator(id, NULL), |
183 targetScript(theTargetScript) | 185 targetScript(theTargetScript) |
184 { | 186 { |
185 cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec); | 187 cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec); |
186 if (U_FAILURE(ec)) { | 188 if (U_FAILURE(ec)) { |
187 return; | 189 return; |
188 } | 190 } |
189 uhash_setValueDeleter(cache, _deleteTransliterator); | 191 uhash_setValueDeleter(cache, _deleteTransliterator); |
190 | 192 |
191 target = theTarget; | 193 target = theTarget; |
192 if (theVariant.length() > 0) { | 194 if (theVariant.length() > 0) { |
193 target.append(VARIANT_SEP).append(theVariant); | 195 target.append(VARIANT_SEP).append(theVariant); |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
232 | 234 |
233 ScriptRunIterator it(text, pos.contextStart, pos.contextLimit); | 235 ScriptRunIterator it(text, pos.contextStart, pos.contextLimit); |
234 | 236 |
235 while (it.next()) { | 237 while (it.next()) { |
236 // Ignore runs in the ante context | 238 // Ignore runs in the ante context |
237 if (it.limit <= allStart) continue; | 239 if (it.limit <= allStart) continue; |
238 | 240 |
239 // Try to instantiate transliterator from it.scriptCode to | 241 // Try to instantiate transliterator from it.scriptCode to |
240 // our target or target/variant | 242 // our target or target/variant |
241 Transliterator* t = getTransliterator(it.scriptCode); | 243 Transliterator* t = getTransliterator(it.scriptCode); |
242 | 244 |
243 if (t == NULL) { | 245 if (t == NULL) { |
244 // We have no transliterator. Do nothing, but keep | 246 // We have no transliterator. Do nothing, but keep |
245 // pos.start up to date. | 247 // pos.start up to date. |
246 pos.start = it.limit; | 248 pos.start = it.limit; |
247 continue; | 249 continue; |
248 } | 250 } |
249 | 251 |
250 // If the run end is before the transliteration limit, do | 252 // If the run end is before the transliteration limit, do |
251 // a non-incremental transliteration. Otherwise do an | 253 // a non-incremental transliteration. Otherwise do an |
252 // incremental one. | 254 // incremental one. |
253 UBool incremental = isIncremental && (it.limit >= allLimit); | 255 UBool incremental = isIncremental && (it.limit >= allLimit); |
254 | 256 |
255 pos.start = uprv_max(allStart, it.start); | 257 pos.start = uprv_max(allStart, it.start); |
256 pos.limit = uprv_min(allLimit, it.limit); | 258 pos.limit = uprv_min(allLimit, it.limit); |
257 int32_t limit = pos.limit; | 259 int32_t limit = pos.limit; |
258 t->filteredTransliterate(text, pos, incremental); | 260 t->filteredTransliterate(text, pos, incremental); |
259 int32_t delta = pos.limit - limit; | 261 int32_t delta = pos.limit - limit; |
260 allLimit += delta; | 262 allLimit += delta; |
261 it.adjustLimit(delta); | 263 it.adjustLimit(delta); |
262 | 264 |
263 // We're done if we enter the post context | 265 // We're done if we enter the post context |
264 if (it.limit >= allLimit) break; | 266 if (it.limit >= allLimit) break; |
265 } | 267 } |
266 | 268 |
267 // Restore limit. pos.start is fine where the last transliterator | 269 // Restore limit. pos.start is fine where the last transliterator |
268 // left it, or at the end of the last run. | 270 // left it, or at the end of the last run. |
269 pos.limit = allLimit; | 271 pos.limit = allLimit; |
270 } | 272 } |
271 | 273 |
272 Transliterator* AnyTransliterator::getTransliterator(UScriptCode source) const { | 274 Transliterator* AnyTransliterator::getTransliterator(UScriptCode source) const { |
273 | 275 |
274 if (source == targetScript || source == USCRIPT_INVALID_CODE) { | 276 if (source == targetScript || source == USCRIPT_INVALID_CODE) { |
275 return NULL; | 277 return NULL; |
276 } | 278 } |
277 | 279 |
278 Transliterator* t = (Transliterator*) uhash_iget(cache, (int32_t) source); | 280 Transliterator* t = NULL; |
| 281 { |
| 282 Mutex m(NULL); |
| 283 t = (Transliterator*) uhash_iget(cache, (int32_t) source); |
| 284 } |
279 if (t == NULL) { | 285 if (t == NULL) { |
280 UErrorCode ec = U_ZERO_ERROR; | 286 UErrorCode ec = U_ZERO_ERROR; |
281 UnicodeString sourceName(uscript_getName(source), -1, US_INV); | 287 UnicodeString sourceName(uscript_getName(source), -1, US_INV); |
282 UnicodeString id(sourceName); | 288 UnicodeString id(sourceName); |
283 id.append(TARGET_SEP).append(target); | 289 id.append(TARGET_SEP).append(target); |
284 | 290 |
285 t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); | 291 t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); |
286 if (U_FAILURE(ec) || t == NULL) { | 292 if (U_FAILURE(ec) || t == NULL) { |
287 delete t; | 293 delete t; |
288 | 294 |
289 // Try to pivot around Latin, our most common script | 295 // Try to pivot around Latin, our most common script |
290 id = sourceName; | 296 id = sourceName; |
291 id.append(LATIN_PIVOT, -1).append(target); | 297 id.append(LATIN_PIVOT, -1).append(target); |
292 t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); | 298 t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); |
293 if (U_FAILURE(ec) || t == NULL) { | 299 if (U_FAILURE(ec) || t == NULL) { |
294 delete t; | 300 delete t; |
295 t = NULL; | 301 t = NULL; |
296 } | 302 } |
297 } | 303 } |
298 | 304 |
299 if (t != NULL) { | 305 if (t != NULL) { |
300 uhash_iput(cache, (int32_t) source, t, &ec); | 306 Transliterator *rt = NULL; |
| 307 { |
| 308 Mutex m(NULL); |
| 309 rt = static_cast<Transliterator *> (uhash_iget(cache, (int32_t)
source)); |
| 310 if (rt == NULL) { |
| 311 // Common case, no race to cache this new transliterator. |
| 312 uhash_iput(cache, (int32_t) source, t, &ec); |
| 313 } else { |
| 314 // Race case, some other thread beat us to caching this tran
sliterator. |
| 315 Transliterator *temp = rt; |
| 316 rt = t; // Our newly created transliterator that lost the
race & now needs deleting. |
| 317 t = temp; // The transliterator from the cache that we will
return. |
| 318 } |
| 319 } |
| 320 delete rt; // will be non-null only in case of races. |
301 } | 321 } |
302 } | 322 } |
303 | |
304 return t; | 323 return t; |
305 } | 324 } |
306 | 325 |
307 /** | 326 /** |
308 * Return the script code for a given name, or -1 if not found. | 327 * Return the script code for a given name, or -1 if not found. |
309 */ | 328 */ |
310 static UScriptCode scriptNameToCode(const UnicodeString& name) { | 329 static UScriptCode scriptNameToCode(const UnicodeString& name) { |
311 char buf[128]; | 330 char buf[128]; |
312 UScriptCode code; | 331 UScriptCode code; |
313 UErrorCode ec = U_ZERO_ERROR; | 332 UErrorCode ec = U_ZERO_ERROR; |
314 int32_t nameLen = name.length(); | 333 int32_t nameLen = name.length(); |
315 UBool isInvariant = uprv_isInvariantUString(name.getBuffer(), nameLen); | 334 UBool isInvariant = uprv_isInvariantUString(name.getBuffer(), nameLen); |
316 | 335 |
317 if (isInvariant) { | 336 if (isInvariant) { |
318 name.extract(0, nameLen, buf, (int32_t)sizeof(buf), US_INV); | 337 name.extract(0, nameLen, buf, (int32_t)sizeof(buf), US_INV); |
319 buf[127] = 0; // Make sure that we NULL terminate the string. | 338 buf[127] = 0; // Make sure that we NULL terminate the string. |
320 } | 339 } |
321 if (!isInvariant || uscript_getCode(buf, &code, 1, &ec) != 1 || U_FAILURE(ec
)) | 340 if (!isInvariant || uscript_getCode(buf, &code, 1, &ec) != 1 || U_FAILURE(ec
)) |
322 { | 341 { |
323 code = USCRIPT_INVALID_CODE; | 342 code = USCRIPT_INVALID_CODE; |
324 } | 343 } |
325 return code; | 344 return code; |
326 } | 345 } |
(...skipping 18 matching lines...) Expand all Loading... |
345 | 364 |
346 int32_t targetCount = Transliterator::_countAvailableTargets(source); | 365 int32_t targetCount = Transliterator::_countAvailableTargets(source); |
347 for (int32_t t=0; t<targetCount; ++t) { | 366 for (int32_t t=0; t<targetCount; ++t) { |
348 UnicodeString target; | 367 UnicodeString target; |
349 Transliterator::_getAvailableTarget(t, source, target); | 368 Transliterator::_getAvailableTarget(t, source, target); |
350 | 369 |
351 // Only process each target once | 370 // Only process each target once |
352 if (seen.geti(target) != 0) continue; | 371 if (seen.geti(target) != 0) continue; |
353 ec = U_ZERO_ERROR; | 372 ec = U_ZERO_ERROR; |
354 seen.puti(target, 1, ec); | 373 seen.puti(target, 1, ec); |
355 | 374 |
356 // Get the script code for the target. If not a script, ignore. | 375 // Get the script code for the target. If not a script, ignore. |
357 UScriptCode targetScript = scriptNameToCode(target); | 376 UScriptCode targetScript = scriptNameToCode(target); |
358 if (targetScript == USCRIPT_INVALID_CODE) continue; | 377 if (targetScript == USCRIPT_INVALID_CODE) continue; |
359 | 378 |
360 int32_t variantCount = Transliterator::_countAvailableVariants(sourc
e, target); | 379 int32_t variantCount = Transliterator::_countAvailableVariants(sourc
e, target); |
361 // assert(variantCount >= 1); | 380 // assert(variantCount >= 1); |
362 for (int32_t v=0; v<variantCount; ++v) { | 381 for (int32_t v=0; v<variantCount; ++v) { |
363 UnicodeString variant; | 382 UnicodeString variant; |
364 Transliterator::_getAvailableVariant(v, source, target, variant)
; | 383 Transliterator::_getAvailableVariant(v, source, target, variant)
; |
365 | 384 |
366 UnicodeString id; | 385 UnicodeString id; |
367 TransliteratorIDParser::STVtoID(UnicodeString(TRUE, ANY, 3), tar
get, variant, id); | 386 TransliteratorIDParser::STVtoID(UnicodeString(TRUE, ANY, 3), tar
get, variant, id); |
368 ec = U_ZERO_ERROR; | 387 ec = U_ZERO_ERROR; |
369 AnyTransliterator* t = new AnyTransliterator(id, target, variant
, | 388 AnyTransliterator* t = new AnyTransliterator(id, target, variant
, |
370 targetScript, ec); | 389 targetScript, ec); |
371 if (U_FAILURE(ec)) { | 390 if (U_FAILURE(ec)) { |
372 delete t; | 391 delete t; |
373 } else { | 392 } else { |
374 Transliterator::_registerInstance(t); | 393 Transliterator::_registerInstance(t); |
375 Transliterator::_registerSpecialInverse(target, UnicodeStrin
g(TRUE, NULL_ID, 4), FALSE); | 394 Transliterator::_registerSpecialInverse(target, UnicodeStrin
g(TRUE, NULL_ID, 4), FALSE); |
376 } | 395 } |
377 } | 396 } |
378 } | 397 } |
379 } | 398 } |
380 } | 399 } |
381 | 400 |
382 U_NAMESPACE_END | 401 U_NAMESPACE_END |
383 | 402 |
384 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ | 403 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
385 | 404 |
386 //eof | 405 //eof |
OLD | NEW |