icu46/source/i18n/translit.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/i18n/translit.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 **********************************************************************

	3 * Copyright (C) 1999-2010, International Business Machines

	4 * Corporation and others. All Rights Reserved.

	5 **********************************************************************

	6 * Date Name Description

	7 * 11/17/99 aliu Creation.

	8 **********************************************************************

	9 */

	10

	11 #include <typeinfo> // for 'typeid' to work

	12

	13 #include "unicode/utypes.h"

	14

	15 #if !UCONFIG_NO_TRANSLITERATION

	16

	17 #include "unicode/putil.h"

	18 #include "unicode/translit.h"

	19 #include "unicode/locid.h"

	20 #include "unicode/msgfmt.h"

	21 #include "unicode/rep.h"

	22 #include "unicode/resbund.h"

	23 #include "unicode/unifilt.h"

	24 #include "unicode/uniset.h"

	25 #include "unicode/uscript.h"

	26 #include "unicode/strenum.h"

	27 #include "cpdtrans.h"

	28 #include "nultrans.h"

	29 #include "rbt_data.h"

	30 #include "rbt_pars.h"

	31 #include "rbt.h"

	32 #include "transreg.h"

	33 #include "name2uni.h"

	34 #include "nortrans.h"

	35 #include "remtrans.h"

	36 #include "titletrn.h"

	37 #include "tolowtrn.h"

	38 #include "toupptrn.h"

	39 #include "uni2name.h"

	40 #include "brktrans.h"

	41 #include "esctrn.h"

	42 #include "unesctrn.h"

	43 #include "tridpars.h"

	44 #include "anytrans.h"

	45 #include "util.h"

	46 #include "hash.h"

	47 #include "mutex.h"

	48 #include "ucln_in.h"

	49 #include "uassert.h"

	50 #include "cmemory.h"

	51 #include "cstring.h"

	52 #include "uinvchar.h"

	53

	54 static const UChar TARGET_SEP = 0x002D; /-/

	55 static const UChar ID_DELIM = 0x003B; /;/

	56 static const UChar VARIANT_SEP = 0x002F; // '/'

	57

	58 /**

	59 * Prefix for resource bundle key for the display name for a

	60 * transliterator. The ID is appended to this to form the key.

	61 * The resource bundle value should be a String.

	62 */

	63 static const char RB_DISPLAY_NAME_PREFIX[] = "%Translit%%";

	64

	65 /**

	66 * Prefix for resource bundle key for the display name for a

	67 * transliterator SCRIPT. The ID is appended to this to form the key.

	68 * The resource bundle value should be a String.

	69 */

	70 static const char RB_SCRIPT_DISPLAY_NAME_PREFIX[] = "%Translit%";

	71

	72 /**

	73 * Resource bundle key for display name pattern.

	74 * The resource bundle value should be a String forming a

	75 * MessageFormat pattern, e.g.:

	76 * "{0,choice,0#\|1#{1} Transliterator\|2#{1} to {2} Transliterator}".

	77 */

	78 static const char RB_DISPLAY_NAME_PATTERN[] = "TransliteratorNamePattern";

	79

	80 /**

	81 * Resource bundle key for the list of RuleBasedTransliterator IDs.

	82 * The resource bundle value should be a String[] with each element

	83 * being a valid ID. The ID will be appended to RB_RULE_BASED_PREFIX

	84 * to obtain the class name in which the RB_RULE key will be sought.

	85 */

	86 static const char RB_RULE_BASED_IDS[] = "RuleBasedTransliteratorIDs";

	87

	88 /**

	89 * The mutex controlling access to registry object.

	90 */

	91 static UMTX registryMutex = 0;

	92

	93 /**

	94 * System transliterator registry; non-null when initialized.

	95 */

	96 static U_NAMESPACE_QUALIFIER TransliteratorRegistry* registry = 0;

	97

	98 // Macro to check/initialize the registry. ONLY USE WITHIN

	99 // MUTEX. Avoids function call when registry is initialized.

	100 #define HAVE_REGISTRY(status) (registry!=0 \|\| initializeRegistry(status))

	101

	102 // Empty string

	103 static const UChar EMPTY[] = {0}; //""

	104

	105 U_NAMESPACE_BEGIN

	106

	107 UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(Transliterator)

	108

	109 /**

	110 * Return TRUE if the given UTransPosition is valid for text of

	111 * the given length.

	112 */

	113 static inline UBool positionIsValid(UTransPosition& index, int32_t len) {

	114 return !(index.contextStart < 0 \|\|

	115 index.start < index.contextStart \|\|

	116 index.limit < index.start \|\|

	117 index.contextLimit < index.limit \|\|

	118 len < index.contextLimit);

	119 }

	120

	121 /**

	122 * Default constructor.

	123 * @param theID the string identifier for this transliterator

	124 * @param theFilter the filter. Any character for which

	125 * <tt>filter.contains()</tt> returns <tt>FALSE</tt> will not be

	126 * altered by this transliterator. If <tt>filter</tt> is

	127 * <tt>null</tt> then no filtering is applied.

	128 */

	129 Transliterator::Transliterator(const UnicodeString& theID,

	130 UnicodeFilter* adoptedFilter) :

	131 UObject(), ID(theID), filter(adoptedFilter),

	132 maximumContextLength(0)

	133 {

	134 // NUL-terminate the ID string, which is a non-aliased copy.

	135 ID.append((UChar)0);

	136 ID.truncate(ID.length()-1);

	137 }

	138

	139 /**

	140 * Destructor.

	141 */

	142 Transliterator::~Transliterator() {

	143 if (filter) {

	144 delete filter;

	145 }

	146 }

	147

	148 /**

	149 * Copy constructor.

	150 */

	151 Transliterator::Transliterator(const Transliterator& other) :

	152 UObject(other), ID(other.ID), filter(0),

	153 maximumContextLength(other.maximumContextLength)

	154 {

	155 // NUL-terminate the ID string, which is a non-aliased copy.

	156 ID.append((UChar)0);

	157 ID.truncate(ID.length()-1);

	158

	159 if (other.filter != 0) {

	160 // We own the filter, so we must have our own copy

	161 filter = (UnicodeFilter*) other.filter->clone();

	162 }

	163 }

	164

	165 Transliterator* Transliterator::clone() const {

	166 return NULL;

	167 }

	168

	169 /**

	170 * Assignment operator.

	171 */

	172 Transliterator& Transliterator::operator=(const Transliterator& other) {

	173 ID = other.ID;

	174 // NUL-terminate the ID string

	175 ID.getTerminatedBuffer();

	176

	177 maximumContextLength = other.maximumContextLength;

	178 adoptFilter((other.filter == 0) ? 0 : (UnicodeFilter*) other.filter->clone() );

	179 return *this;

	180 }

	181

	182 /**

	183 * Transliterates a segment of a string. <code>Transliterator</code> API.

	184 * @param text the string to be transliterated

	185 * @param start the beginning index, inclusive; <code>0 <= start

	186 * <= limit</code>.

	187 * @param limit the ending index, exclusive; <code>start <= limit

	188 * <= text.length()</code>.

	189 * @return the new limit index, or -1

	190 */

	191 int32_t Transliterator::transliterate(Replaceable& text,

	192 int32_t start, int32_t limit) const {

	193 if (start < 0 \|\|

	194 limit < start \|\|

	195 text.length() < limit) {

	196 return -1;

	197 }

	198

	199 UTransPosition offsets;

	200 offsets.contextStart= start;

	201 offsets.contextLimit = limit;

	202 offsets.start = start;

	203 offsets.limit = limit;

	204 filteredTransliterate(text, offsets, FALSE, TRUE);

	205 return offsets.limit;

	206 }

	207

	208 /**

	209 * Transliterates an entire string in place. Convenience method.

	210 * @param text the string to be transliterated

	211 */

	212 void Transliterator::transliterate(Replaceable& text) const {

	213 transliterate(text, 0, text.length());

	214 }

	215

	216 /**

	217 * Transliterates the portion of the text buffer that can be

	218 * transliterated unambiguosly after new text has been inserted,

	219 * typically as a result of a keyboard event. The new text in

	220 * <code>insertion</code> will be inserted into <code>text</code>

	221 * at <code>index.contextLimit</code>, advancing

	222 * <code>index.contextLimit</code> by <code>insertion.length()</code>.

	223 * Then the transliterator will try to transliterate characters of

	224 * <code>text</code> between <code>index.start</code> and

	225 * <code>index.contextLimit</code>. Characters before

	226 * <code>index.start</code> will not be changed.

	227 *

	228 * <p>Upon return, values in <code>index</code> will be updated.

	229 * <code>index.contextStart</code> will be advanced to the first

	230 * character that future calls to this method will read.

	231 * <code>index.start</code> and <code>index.contextLimit</code> will

	232 * be adjusted to delimit the range of text that future calls to

	233 * this method may change.

	234 *

	235 * <p>Typical usage of this method begins with an initial call

	236 * with <code>index.contextStart</code> and <code>index.contextLimit</code>

	237 * set to indicate the portion of <code>text</code> to be

	238 * transliterated, and <code>index.start == index.contextStart</code>.

	239 * Thereafter, <code>index</code> can be used without

	240 * modification in future calls, provided that all changes to

	241 * <code>text</code> are made via this method.

	242 *

	243 * <p>This method assumes that future calls may be made that will

	244 * insert new text into the buffer. As a result, it only performs

	245 * unambiguous transliterations. After the last call to this

	246 * method, there may be untransliterated text that is waiting for

	247 * more input to resolve an ambiguity. In order to perform these

	248 * pending transliterations, clients should call {@link

	249 * #finishKeyboardTransliteration} after the last call to this

	250 * method has been made.

	251 *

	252 * @param text the buffer holding transliterated and untransliterated text

	253 * @param index an array of three integers.

	254 *

	255 * <ul><li><code>index.contextStart</code>: the beginning index,

	256 * inclusive; <code>0 <= index.contextStart <= index.contextLimit</code>.

	257 *

	258 * <li><code>index.contextLimit</code>: the ending index, exclusive;

	259 * <code>index.contextStart <= index.contextLimit <= text.length()</code>.

	260 * <code>insertion</code> is inserted at

	261 * <code>index.contextLimit</code>.

	262 *

	263 * <li><code>index.start</code>: the next character to be

	264 * considered for transliteration; <code>index.contextStart <=

	265 * index.start <= index.contextLimit</code>. Characters before

	266 * <code>index.start</code> will not be changed by future calls

	267 * to this method.</ul>

	268 *

	269 * @param insertion text to be inserted and possibly

	270 * transliterated into the translation buffer at

	271 * <code>index.contextLimit</code>. If <code>null</code> then no text

	272 * is inserted.

	273 * @see #START

	274 * @see #LIMIT

	275 * @see #CURSOR

	276 * @see #handleTransliterate

	277 * @exception IllegalArgumentException if <code>index</code>

	278 * is invalid

	279 */

	280 void Transliterator::transliterate(Replaceable& text,

	281 UTransPosition& index,

	282 const UnicodeString& insertion,

	283 UErrorCode &status) const {

	284 _transliterate(text, index, &insertion, status);

	285 }

	286

	287 /**

	288 * Transliterates the portion of the text buffer that can be

	289 * transliterated unambiguosly after a new character has been

	290 * inserted, typically as a result of a keyboard event. This is a

	291 * convenience method; see {@link

	292 * #transliterate(Replaceable, int[], String)} for details.

	293 * @param text the buffer holding transliterated and

	294 * untransliterated text

	295 * @param index an array of three integers. See {@link

	296 * #transliterate(Replaceable, int[], String)}.

	297 * @param insertion text to be inserted and possibly

	298 * transliterated into the translation buffer at

	299 * <code>index.contextLimit</code>.

	300 * @see #transliterate(Replaceable, int[], String)

	301 */

	302 void Transliterator::transliterate(Replaceable& text,

	303 UTransPosition& index,

	304 UChar32 insertion,

	305 UErrorCode& status) const {

	306 UnicodeString str(insertion);

	307 _transliterate(text, index, &str, status);

	308 }

	309

	310 /**

	311 * Transliterates the portion of the text buffer that can be

	312 * transliterated unambiguosly. This is a convenience method; see

	313 * {@link #transliterate(Replaceable, int[], String)} for

	314 * details.

	315 * @param text the buffer holding transliterated and

	316 * untransliterated text

	317 * @param index an array of three integers. See {@link

	318 * #transliterate(Replaceable, int[], String)}.

	319 * @see #transliterate(Replaceable, int[], String)

	320 */

	321 void Transliterator::transliterate(Replaceable& text,

	322 UTransPosition& index,

	323 UErrorCode& status) const {

	324 _transliterate(text, index, 0, status);

	325 }

	326

	327 /**

	328 * Finishes any pending transliterations that were waiting for

	329 * more characters. Clients should call this method as the last

	330 * call after a sequence of one or more calls to

	331 * <code>transliterate()</code>.

	332 * @param text the buffer holding transliterated and

	333 * untransliterated text.

	334 * @param index the array of indices previously passed to {@link

	335 * #transliterate}

	336 */

	337 void Transliterator::finishTransliteration(Replaceable& text,

	338 UTransPosition& index) const {

	339 if (!positionIsValid(index, text.length())) {

	340 return;

	341 }

	342

	343 filteredTransliterate(text, index, FALSE, TRUE);

	344 }

	345

	346 /**

	347 * This internal method does keyboard transliteration. If the

	348 * 'insertion' is non-null then we append it to 'text' before

	349 * proceeding. This method calls through to the pure virtual

	350 * framework method handleTransliterate() to do the actual

	351 * work.

	352 */

	353 void Transliterator::_transliterate(Replaceable& text,

	354 UTransPosition& index,

	355 const UnicodeString* insertion,

	356 UErrorCode &status) const {

	357 if (U_FAILURE(status)) {

	358 return;

	359 }

	360

	361 if (!positionIsValid(index, text.length())) {

	362 status = U_ILLEGAL_ARGUMENT_ERROR;

	363 return;

	364 }

	365

	366 // int32_t originalStart = index.contextStart;

	367 if (insertion != 0) {

	368 text.handleReplaceBetween(index.limit, index.limit, *insertion);

	369 index.limit += insertion->length();

	370 index.contextLimit += insertion->length();

	371 }

	372

	373 if (index.limit > 0 &&

	374 UTF_IS_LEAD(text.charAt(index.limit - 1))) {

	375 // Oops, there is a dangling lead surrogate in the buffer.

	376 // This will break most transliterators, since they will

	377 // assume it is part of a pair. Don't transliterate until

	378 // more text comes in.

	379 return;

	380 }

	381

	382 filteredTransliterate(text, index, TRUE, TRUE);

	383

	384 #if 0

	385 // TODO

	386 // I CAN'T DO what I'm attempting below now that the Kleene star

	387 // operator is supported. For example, in the rule

	388

	389 // ([:Lu:]+) { x } > $1;

	390

	391 // what is the maximum context length? getMaximumContextLength()

	392 // will return 1, but this is just the length of the ante context

	393 // part of the pattern string -- 1 character, which is a standin

	394 // for a Quantifier, which contains a StringMatcher, which

	395 // contains a UnicodeSet.

	396

	397 // There is a complicated way to make this work again, and that's

	398 // to add a "maximum left context" protocol into the

	399 // UnicodeMatcher hierarchy. At present I'm not convinced this is

	400 // worth it.

	401

	402 // ---

	403

	404 // The purpose of the code below is to keep the context small

	405 // while doing incremental transliteration. When part of the left

	406 // context (between contextStart and start) is no longer needed,

	407 // we try to advance contextStart past that portion. We use the

	408 // maximum context length to do so.

	409 int32_t newCS = index.start;

	410 int32_t n = getMaximumContextLength();

	411 while (newCS > originalStart && n-- > 0) {

	412 --newCS;

	413 newCS -= UTF_CHAR_LENGTH(text.char32At(newCS)) - 1;

	414 }

	415 index.contextStart = uprv_max(newCS, originalStart);

	416 #endif

	417 }

	418

	419 /**

	420 * This method breaks up the input text into runs of unfiltered

	421 * characters. It passes each such run to

	422 * <subclass>.handleTransliterate(). Subclasses that can handle the

	423 * filter logic more efficiently themselves may override this method.

	424 *

	425 * All transliteration calls in this class go through this method.

	426 */

	427 void Transliterator::filteredTransliterate(Replaceable& text,

	428 UTransPosition& index,

	429 UBool incremental,

	430 UBool rollback) const {

	431 // Short circuit path for transliterators with no filter in

	432 // non-incremental mode.

	433 if (filter == 0 && !rollback) {

	434 handleTransliterate(text, index, incremental);

	435 return;

	436 }

	437

	438 //----------------------------------------------------------------------

	439 // This method processes text in two groupings:

	440 //

	441 // RUNS -- A run is a contiguous group of characters which are contained

	442 // in the filter for this transliterator (filter.contains(ch) == TRUE).

	443 // Text outside of runs may appear as context but it is not modified.

	444 // The start and limit Position values are narrowed to each run.

	445 //

	446 // PASSES (incremental only) -- To make incremental mode work correctly,

	447 // each run is broken up into n passes, where n is the length (in code

	448 // points) of the run. Each pass contains the first n characters. If a

	449 // pass is completely transliterated, it is committed, and further passes

	450 // include characters after the committed text. If a pass is blocked,

	451 // and does not transliterate completely, then this method rolls back

	452 // the changes made during the pass, extends the pass by one code point,

	453 // and tries again.

	454 //----------------------------------------------------------------------

	455

	456 // globalLimit is the limit value for the entire operation. We

	457 // set index.limit to the end of each unfiltered run before

	458 // calling handleTransliterate(), so we need to maintain the real

	459 // value of index.limit here. After each transliteration, we

	460 // update globalLimit for insertions or deletions that have

	461 // happened.

	462 int32_t globalLimit = index.limit;

	463

	464 // If there is a non-null filter, then break the input text up. Say the

	465 // input text has the form:

	466 // xxxabcxxdefxx

	467 // where 'x' represents a filtered character (filter.contains('x') ==

	468 // false). Then we break this up into:

	469 // xxxabc xxdef xx

	470 // Each pass through the loop consumes a run of filtered

	471 // characters (which are ignored) and a subsequent run of

	472 // unfiltered characters (which are transliterated).

	473

	474 for (;;) {

	475

	476 if (filter != NULL) {

	477 // Narrow the range to be transliterated to the first segment

	478 // of unfiltered characters at or after index.start.

	479

	480 // Advance past filtered chars

	481 UChar32 c;

	482 while (index.start < globalLimit &&

	483 !filter->contains(c=text.char32At(index.start))) {

	484 index.start += UTF_CHAR_LENGTH(c);

	485 }

	486

	487 // Find the end of this run of unfiltered chars

	488 index.limit = index.start;

	489 while (index.limit < globalLimit &&

	490 filter->contains(c=text.char32At(index.limit))) {

	491 index.limit += UTF_CHAR_LENGTH(c);

	492 }

	493 }

	494

	495 // Check to see if the unfiltered run is empty. This only

	496 // happens at the end of the string when all the remaining

	497 // characters are filtered.

	498 if (index.limit == index.start) {

	499 // assert(index.start == globalLimit);

	500 break;

	501 }

	502

	503 // Is this run incremental? If there is additional

	504 // filtered text (if limit < globalLimit) then we pass in

	505 // an incremental value of FALSE to force the subclass to

	506 // complete the transliteration for this run.

	507 UBool isIncrementalRun =

	508 (index.limit < globalLimit ? FALSE : incremental);

	509

	510 int32_t delta;

	511

	512 // Implement rollback. To understand the need for rollback,

	513 // consider the following transliterator:

	514 //

	515 // "t" is "a > A;"

	516 // "u" is "A > b;"

	517 // "v" is a compound of "t; NFD; u" with a filter [:Ll:]

	518 //

	519 // Now apply "c" to the input text "a". The result is "b". But if

	520 // the transliteration is done incrementally, then the NFD holds

	521 // things up after "t" has already transformed "a" to "A". When

	522 // finishTransliterate() is called, "A" is _not_ processed because

	523 // it gets excluded by the [:Ll:] filter, and the end result is "A"

	524 // -- incorrect. The problem is that the filter is applied to a

	525 // partially-transliterated result, when we only want it to apply to

	526 // input text. Although this example hinges on a compound

	527 // transliterator containing NFD and a specific filter, it can

	528 // actually happen with any transliterator which may do a partial

	529 // transformation in incremental mode into characters outside its

	530 // filter.

	531 //

	532 // To handle this, when in incremental mode we supply characters to

	533 // handleTransliterate() in several passes. Each pass adds one more

	534 // input character to the input text. That is, for input "ABCD", we

	535 // first try "A", then "AB", then "ABC", and finally "ABCD". If at

	536 // any point we block (upon return, start < limit) then we roll

	537 // back. If at any point we complete the run (upon return start ==

	538 // limit) then we commit that run.

	539

	540 if (rollback && isIncrementalRun) {

	541

	542 int32_t runStart = index.start;

	543 int32_t runLimit = index.limit;

	544 int32_t runLength = runLimit - runStart;

	545

	546 // Make a rollback copy at the end of the string

	547 int32_t rollbackOrigin = text.length();

	548 text.copy(runStart, runLimit, rollbackOrigin);

	549

	550 // Variables reflecting the commitment of completely

	551 // transliterated text. passStart is the runStart, advanced

	552 // past committed text. rollbackStart is the rollbackOrigin,

	553 // advanced past rollback text that corresponds to committed

	554 // text.

	555 int32_t passStart = runStart;

	556 int32_t rollbackStart = rollbackOrigin;

	557

	558 // The limit for each pass; we advance by one code point with

	559 // each iteration.

	560 int32_t passLimit = index.start;

	561

	562 // Total length, in 16-bit code units, of uncommitted text.

	563 // This is the length to be rolled back.

	564 int32_t uncommittedLength = 0;

	565

	566 // Total delta (change in length) for all passes

	567 int32_t totalDelta = 0;

	568

	569 // PASS MAIN LOOP -- Start with a single character, and extend

	570 // the text by one character at a time. Roll back partial

	571 // transliterations and commit complete transliterations.

	572 for (;;) {

	573 // Length of additional code point, either one or two

	574 int32_t charLength =

	575 UTF_CHAR_LENGTH(text.char32At(passLimit));

	576 passLimit += charLength;

	577 if (passLimit > runLimit) {

	578 break;

	579 }

	580 uncommittedLength += charLength;

	581

	582 index.limit = passLimit;

	583

	584 // Delegate to subclass for actual transliteration. Upon

	585 // return, start will be updated to point after the

	586 // transliterated text, and limit and contextLimit will be

	587 // adjusted for length changes.

	588 handleTransliterate(text, index, TRUE);

	589

	590 delta = index.limit - passLimit; // change in length

	591

	592 // We failed to completely transliterate this pass.

	593 // Roll back the text. Indices remain unchanged; reset

	594 // them where necessary.

	595 if (index.start != index.limit) {

	596 // Find the rollbackStart, adjusted for length changes

	597 // and the deletion of partially transliterated text.

	598 int32_t rs = rollbackStart + delta - (index.limit - passStar t);

	599

	600 // Delete the partially transliterated text

	601 text.handleReplaceBetween(passStart, index.limit, EMPTY);

	602

	603 // Copy the rollback text back

	604 text.copy(rs, rs + uncommittedLength, passStart);

	605

	606 // Restore indices to their original values

	607 index.start = passStart;

	608 index.limit = passLimit;

	609 index.contextLimit -= delta;

	610 }

	611

	612 // We did completely transliterate this pass. Update the

	613 // commit indices to record how far we got. Adjust indices

	614 // for length change.

	615 else {

	616 // Move the pass indices past the committed text.

	617 passStart = passLimit = index.start;

	618

	619 // Adjust the rollbackStart for length changes and move

	620 // it past the committed text. All characters we've

	621 // processed to this point are committed now, so zero

	622 // out the uncommittedLength.

	623 rollbackStart += delta + uncommittedLength;

	624 uncommittedLength = 0;

	625

	626 // Adjust indices for length changes.

	627 runLimit += delta;

	628 totalDelta += delta;

	629 }

	630 }

	631

	632 // Adjust overall limit and rollbackOrigin for insertions and

	633 // deletions. Don't need to worry about contextLimit because

	634 // handleTransliterate() maintains that.

	635 rollbackOrigin += totalDelta;

	636 globalLimit += totalDelta;

	637

	638 // Delete the rollback copy

	639 text.handleReplaceBetween(rollbackOrigin, rollbackOrigin + runLength , EMPTY);

	640

	641 // Move start past committed text

	642 index.start = passStart;

	643 }

	644

	645 else {

	646 // Delegate to subclass for actual transliteration.

	647 int32_t limit = index.limit;

	648 handleTransliterate(text, index, isIncrementalRun);

	649 delta = index.limit - limit; // change in length

	650

	651 // In a properly written transliterator, start == limit after

	652 // handleTransliterate() returns when incremental is false.

	653 // Catch cases where the subclass doesn't do this, and throw

	654 // an exception. (Just pinning start to limit is a bad idea,

	655 // because what's probably happening is that the subclass

	656 // isn't transliterating all the way to the end, and it should

	657 // in non-incremental mode.)

	658 if (!incremental && index.start != index.limit) {

	659 // We can't throw an exception, so just fudge things

	660 index.start = index.limit;

	661 }

	662

	663 // Adjust overall limit for insertions/deletions. Don't need

	664 // to worry about contextLimit because handleTransliterate()

	665 // maintains that.

	666 globalLimit += delta;

	667 }

	668

	669 if (filter == NULL \|\| isIncrementalRun) {

	670 break;

	671 }

	672

	673 // If we did completely transliterate this

	674 // run, then repeat with the next unfiltered run.

	675 }

	676

	677 // Start is valid where it is. Limit needs to be put back where

	678 // it was, modulo adjustments for deletions/insertions.

	679 index.limit = globalLimit;

	680 }

	681

	682 void Transliterator::filteredTransliterate(Replaceable& text,

	683 UTransPosition& index,

	684 UBool incremental) const {

	685 filteredTransliterate(text, index, incremental, FALSE);

	686 }

	687

	688 /**

	689 * Method for subclasses to use to set the maximum context length.

	690 * @see #getMaximumContextLength

	691 */

	692 void Transliterator::setMaximumContextLength(int32_t maxContextLength) {

	693 maximumContextLength = maxContextLength;

	694 }

	695

	696 /**

	697 * Returns a programmatic identifier for this transliterator.

	698 * If this identifier is passed to <code>getInstance()</code>, it

	699 * will return this object, if it has been registered.

	700 * @see #registerInstance

	701 * @see #getAvailableIDs

	702 */

	703 const UnicodeString& Transliterator::getID(void) const {

	704 return ID;

	705 }

	706

	707 /**

	708 * Returns a name for this transliterator that is appropriate for

	709 * display to the user in the default locale. See {@link

	710 * #getDisplayName(Locale)} for details.

	711 */

	712 UnicodeString& U_EXPORT2 Transliterator::getDisplayName(const UnicodeString& ID,

	713 UnicodeString& result) {

	714 return getDisplayName(ID, Locale::getDefault(), result);

	715 }

	716

	717 /**

	718 * Returns a name for this transliterator that is appropriate for

	719 * display to the user in the given locale. This name is taken

	720 * from the locale resource data in the standard manner of the

	721 * <code>java.text</code> package.

	722 *

	723 * <p>If no localized names exist in the system resource bundles,

	724 * a name is synthesized using a localized

	725 * <code>MessageFormat</code> pattern from the resource data. The

	726 * arguments to this pattern are an integer followed by one or two

	727 * strings. The integer is the number of strings, either 1 or 2.

	728 * The strings are formed by splitting the ID for this

	729 * transliterator at the first TARGET_SEP. If there is no TARGET_SEP, then the

	730 * entire ID forms the only string.

	731 * @param inLocale the Locale in which the display name should be

	732 * localized.

	733 * @see java.text.MessageFormat

	734 */

	735 UnicodeString& U_EXPORT2 Transliterator::getDisplayName(const UnicodeString& id,

	736 const Locale& inLocale,

	737 UnicodeString& result) {

	738 UErrorCode status = U_ZERO_ERROR;

	739

	740 ResourceBundle bundle(U_ICUDATA_TRANSLIT, inLocale, status);

	741

	742 // Suspend checking status until later...

	743

	744 result.truncate(0);

	745

	746 // Normalize the ID

	747 UnicodeString source, target, variant;

	748 UBool sawSource;

	749 TransliteratorIDParser::IDtoSTV(id, source, target, variant, sawSource);

	750 if (target.length() < 1) {

	751 // No target; malformed id

	752 return result;

	753 }

	754 if (variant.length() > 0) { // Change "Foo" to "/Foo"

	755 variant.insert(0, VARIANT_SEP);

	756 }

	757 UnicodeString ID(source);

	758 ID.append(TARGET_SEP).append(target).append(variant);

	759

	760 // build the char* key

	761 if (uprv_isInvariantUString(ID.getBuffer(), ID.length())) {

	762 char key[200];

	763 uprv_strcpy(key, RB_DISPLAY_NAME_PREFIX);

	764 int32_t length=(int32_t)uprv_strlen(RB_DISPLAY_NAME_PREFIX);

	765 ID.extract(0, (int32_t)(sizeof(key)-length), key+length, (int32_t)(sizeo f(key)-length), US_INV);

	766

	767 // Try to retrieve a UnicodeString from the bundle.

	768 UnicodeString resString = bundle.getStringEx(key, status);

	769

	770 if (U_SUCCESS(status) && resString.length() != 0) {

	771 return result = resString; // [sic] assign & return

	772 }

	773

	774 #if !UCONFIG_NO_FORMATTING

	775 // We have failed to get a name from the locale data. This is

	776 // typical, since most transliterators will not have localized

	777 // name data. The next step is to retrieve the MessageFormat

	778 // pattern from the locale data and to use it to synthesize the

	779 // name from the ID.

	780

	781 status = U_ZERO_ERROR;

	782 resString = bundle.getStringEx(RB_DISPLAY_NAME_PATTERN, status);

	783

	784 if (U_SUCCESS(status) && resString.length() != 0) {

	785 MessageFormat msg(resString, inLocale, status);

	786 // Suspend checking status until later...

	787

	788 // We pass either 2 or 3 Formattable objects to msg.

	789 Formattable args[3];

	790 int32_t nargs;

	791 args[0].setLong(2); // # of args to follow

	792 args[1].setString(source);

	793 args[2].setString(target);

	794 nargs = 3;

	795

	796 // Use display names for the scripts, if they exist

	797 UnicodeString s;

	798 length=(int32_t)uprv_strlen(RB_SCRIPT_DISPLAY_NAME_PREFIX);

	799 for (int j=1; j<=2; ++j) {

	800 status = U_ZERO_ERROR;

	801 uprv_strcpy(key, RB_SCRIPT_DISPLAY_NAME_PREFIX);

	802 args[j].getString(s);

	803 if (uprv_isInvariantUString(s.getBuffer(), s.length())) {

	804 s.extract(0, sizeof(key)-length-1, key+length, (int32_t)size of(key)-length-1, US_INV);

	805

	806 resString = bundle.getStringEx(key, status);

	807

	808 if (U_SUCCESS(status)) {

	809 args[j] = resString;

	810 }

	811 }

	812 }

	813

	814 status = U_ZERO_ERROR;

	815 FieldPosition pos; // ignored by msg

	816 msg.format(args, nargs, result, pos, status);

	817 if (U_SUCCESS(status)) {

	818 result.append(variant);

	819 return result;

	820 }

	821 }

	822 #endif

	823 }

	824

	825 // We should not reach this point unless there is something

	826 // wrong with the build or the RB_DISPLAY_NAME_PATTERN has

	827 // been deleted from the root RB_LOCALE_ELEMENTS resource.

	828 result = ID;

	829 return result;

	830 }

	831

	832 /**

	833 * Returns the filter used by this transliterator, or <tt>null</tt>

	834 * if this transliterator uses no filter. Caller musn't delete

	835 * the result!

	836 */

	837 const UnicodeFilter* Transliterator::getFilter(void) const {

	838 return filter;

	839 }

	840

	841 /**

	842 * Returns the filter used by this transliterator, or

	843 * <tt>NULL</tt> if this transliterator uses no filter. The

	844 * caller must eventually delete the result. After this call,

	845 * this transliterator's filter is set to <tt>NULL</tt>.

	846 */

	847 UnicodeFilter* Transliterator::orphanFilter(void) {

	848 UnicodeFilter *result = filter;

	849 filter = NULL;

	850 return result;

	851 }

	852

	853 /**

	854 * Changes the filter used by this transliterator. If the filter

	855 * is set to <tt>null</tt> then no filtering will occur.

	856 *

	857 * <p>Callers must take care if a transliterator is in use by

	858 * multiple threads. The filter should not be changed by one

	859 * thread while another thread may be transliterating.

	860 */

	861 void Transliterator::adoptFilter(UnicodeFilter* filterToAdopt) {

	862 delete filter;

	863 filter = filterToAdopt;

	864 }

	865

	866 /**

	867 * Returns this transliterator's inverse. See the class

	868 * documentation for details. This implementation simply inverts

	869 * the two entities in the ID and attempts to retrieve the

	870 * resulting transliterator. That is, if <code>getID()</code>

	871 * returns "A-B", then this method will return the result of

	872 * <code>getInstance("B-A")</code>, or <code>null</code> if that

	873 * call fails.

	874 *

	875 * <p>This method does not take filtering into account. The

	876 * returned transliterator will have no filter.

	877 *

	878 * <p>Subclasses with knowledge of their inverse may wish to

	879 * override this method.

	880 *

	881 * @return a transliterator that is an inverse, not necessarily

	882 * exact, of this transliterator, or <code>null</code> if no such

	883 * transliterator is registered.

	884 * @see #registerInstance

	885 */

	886 Transliterator* Transliterator::createInverse(UErrorCode& status) const {

	887 UParseError parseError;

	888 return Transliterator::createInstance(ID, UTRANS_REVERSE,parseError,status);

	889 }

	890

	891 Transliterator* U_EXPORT2

	892 Transliterator::createInstance(const UnicodeString& ID,

	893 UTransDirection dir,

	894 UErrorCode& status)

	895 {

	896 UParseError parseError;

	897 return createInstance(ID, dir, parseError, status);

	898 }

	899

	900 /**

	901 * Returns a <code>Transliterator</code> object given its ID.

	902 * The ID must be either a system transliterator ID or a ID registered

	903 * using <code>registerInstance()</code>.

	904 *

	905 * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>

	906 * @return A <code>Transliterator</code> object with the given ID

	907 * @see #registerInstance

	908 * @see #getAvailableIDs

	909 * @see #getID

	910 */

	911 Transliterator* U_EXPORT2

	912 Transliterator::createInstance(const UnicodeString& ID,

	913 UTransDirection dir,

	914 UParseError& parseError,

	915 UErrorCode& status)

	916 {

	917 if (U_FAILURE(status)) {

	918 return 0;

	919 }

	920

	921 UnicodeString canonID;

	922 UVector list(status);

	923 if (U_FAILURE(status)) {

	924 return NULL;

	925 }

	926

	927 UnicodeSet* globalFilter;

	928 // TODO add code for parseError...currently unused, but

	929 // later may be used by parsing code...

	930 if (!TransliteratorIDParser::parseCompoundID(ID, dir, canonID, list, globalF ilter)) {

	931 status = U_INVALID_ID;

	932 return NULL;

	933 }

	934

	935 TransliteratorIDParser::instantiateList(list, status);

	936 if (U_FAILURE(status)) {

	937 return NULL;

	938 }

	939

	940 U_ASSERT(list.size() > 0);

	941 Transliterator* t = NULL;

	942

	943 if (list.size() > 1 \|\| canonID.indexOf(ID_DELIM) >= 0) {

	944 // [NOTE: If it's a compoundID, we instantiate a CompoundTransliterator even if it only

	945 // has one child transliterator. This is so that toRules() will return the right thing

	946 // (without any inactive ID), but our main ID still comes out correct. That is, if we

	947 // instantiate "(Lower);Latin-Greek;", we want the rules to come out as "::Latin-Greek;"

	948 // even though the ID is "(Lower);Latin-Greek;".

	949 t = new CompoundTransliterator(list, parseError, status);

	950 }

	951 else {

	952 t = (Transliterator*)list.elementAt(0);

	953 }

	954 // Check null pointer

	955 if (t != NULL) {

	956 t->setID(canonID);

	957 if (globalFilter != NULL) {

	958 t->adoptFilter(globalFilter);

	959 }

	960 }

	961 else if (U_SUCCESS(status)) {

	962 status = U_MEMORY_ALLOCATION_ERROR;

	963 }

	964 return t;

	965 }

	966

	967 /**

	968 * Create a transliterator from a basic ID. This is an ID

	969 * containing only the forward direction source, target, and

	970 * variant.

	971 * @param id a basic ID of the form S-T or S-T/V.

	972 * @return a newly created Transliterator or null if the ID is

	973 * invalid.

	974 */

	975 Transliterator* Transliterator::createBasicInstance(const UnicodeString& id,

	976 const UnicodeString* canon) {

	977 UParseError pe;

	978 UErrorCode ec = U_ZERO_ERROR;

	979 TransliteratorAlias* alias = 0;

	980 Transliterator* t = 0;

	981

	982 umtx_lock(&registryMutex);

	983 if (HAVE_REGISTRY(ec)) {

	984 t = registry->get(id, alias, ec);

	985 }

	986 umtx_unlock(&registryMutex);

	987

	988 if (U_FAILURE(ec)) {

	989 delete t;

	990 delete alias;

	991 return 0;

	992 }

	993

	994 // We may have not gotten a transliterator: Because we can't

	995 // instantiate a transliterator from inside TransliteratorRegistry::

	996 // get() (that would deadlock), we sometimes pass back an alias. This

	997 // contains the data we need to finish the instantiation outside the

	998 // registry mutex. The alias may, in turn, generate another alias, so

	999 // we handle aliases in a loop. The max times through the loop is two.

	1000 // [alan]

	1001 while (alias != 0) {

	1002 U_ASSERT(t==0);

	1003 // Rule-based aliases are handled with TransliteratorAlias::

	1004 // parse(), followed by TransliteratorRegistry::reget().

	1005 // Other aliases are handled with TransliteratorAlias::create().

	1006 if (alias->isRuleBased()) {

	1007 // Step 1. parse

	1008 TransliteratorParser parser(ec);

	1009 alias->parse(parser, pe, ec);

	1010 delete alias;

	1011 alias = 0;

	1012

	1013 // Step 2. reget

	1014 umtx_lock(&registryMutex);

	1015 if (HAVE_REGISTRY(ec)) {

	1016 t = registry->reget(id, parser, alias, ec);

	1017 }

	1018 umtx_unlock(&registryMutex);

	1019

	1020 // Step 3. Loop back around!

	1021 } else {

	1022 t = alias->create(pe, ec);

	1023 delete alias;

	1024 alias = 0;

	1025 break;

	1026 }

	1027 if (U_FAILURE(ec)) {

	1028 delete t;

	1029 delete alias;

	1030 t = NULL;

	1031 break;

	1032 }

	1033 }

	1034

	1035 if (t != NULL && canon != NULL) {

	1036 t->setID(*canon);

	1037 }

	1038

	1039 return t;

	1040 }

	1041

	1042 /**

	1043 * Returns a <code>Transliterator</code> object constructed from

	1044 * the given rule string. This will be a RuleBasedTransliterator,

	1045 * if the rule string contains only rules, or a

	1046 * CompoundTransliterator, if it contains ID blocks, or a

	1047 * NullTransliterator, if it contains ID blocks which parse as

	1048 * empty for the given direction.

	1049 */

	1050 Transliterator* U_EXPORT2

	1051 Transliterator::createFromRules(const UnicodeString& ID,

	1052 const UnicodeString& rules,

	1053 UTransDirection dir,

	1054 UParseError& parseError,

	1055 UErrorCode& status)

	1056 {

	1057 Transliterator* t = NULL;

	1058

	1059 TransliteratorParser parser(status);

	1060 parser.parse(rules, dir, parseError, status);

	1061

	1062 if (U_FAILURE(status)) {

	1063 return 0;

	1064 }

	1065

	1066 // NOTE: The logic here matches that in TransliteratorRegistry.

	1067 if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 0) {

	1068 t = new NullTransliterator();

	1069 }

	1070 else if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 1) {

	1071 t = new RuleBasedTransliterator(ID, (TransliterationRuleData*)parser.dat aVector.orphanElementAt(0), TRUE);

	1072 }

	1073 else if (parser.idBlockVector.size() == 1 && parser.dataVector.size() == 0) {

	1074 // idBlock, no data -- this is an alias. The ID has

	1075 // been munged from reverse into forward mode, if

	1076 // necessary, so instantiate the ID in the forward

	1077 // direction.

	1078 if (parser.compoundFilter != NULL) {

	1079 UnicodeString filterPattern;

	1080 parser.compoundFilter->toPattern(filterPattern, FALSE);

	1081 t = createInstance(filterPattern + UnicodeString(ID_DELIM)

	1082 + ((UnicodeString)parser.idBlockVector.elementAt(0)), UTRA NS_FORWARD, parseError, status);

	1083 }

	1084 else

	1085 t = createInstance(((UnicodeString)parser.idBlockVector.elementAt( 0)), UTRANS_FORWARD, parseError, status);

	1086

	1087

	1088 if (t != NULL) {

	1089 t->setID(ID);

	1090 }

	1091 }

	1092 else {

	1093 UVector transliterators(status);

	1094 int32_t passNumber = 1;

	1095

	1096 int32_t limit = parser.idBlockVector.size();

	1097 if (parser.dataVector.size() > limit)

	1098 limit = parser.dataVector.size();

	1099

	1100 for (int32_t i = 0; i < limit; i++) {

	1101 if (i < parser.idBlockVector.size()) {

	1102 UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.el ementAt(i);

	1103 if (!idBlock->isEmpty()) {

	1104 Transliterator* temp = createInstance(*idBlock, UTRANS_FORWA RD, parseError, status);

	1105 if (temp != NULL && typeid(*temp) != typeid(NullTransliterat or))

	1106 transliterators.addElement(temp, status);

	1107 else

	1108 delete temp;

	1109 }

	1110 }

	1111 if (!parser.dataVector.isEmpty()) {

	1112 TransliterationRuleData* data = (TransliterationRuleData*)parser .dataVector.orphanElementAt(0);

	1113 RuleBasedTransliterator* temprbt = new RuleBasedTransliterator(U nicodeString(CompoundTransliterator::PASS_STRING) + (passNumber++),

	1114 data, TRUE);

	1115 // Check if NULL before adding it to transliterators to avoid fu ture usage of NULL pointer.

	1116 if (temprbt == NULL) {

	1117 status = U_MEMORY_ALLOCATION_ERROR;

	1118 return t;

	1119 }

	1120 transliterators.addElement(temprbt, status);

	1121 }

	1122 }

	1123

	1124 t = new CompoundTransliterator(transliterators, passNumber - 1, parseErr or, status);

	1125 // Null pointer check

	1126 if (t != NULL) {

	1127 t->setID(ID);

	1128 t->adoptFilter(parser.orphanCompoundFilter());

	1129 }

	1130 }

	1131 if (U_SUCCESS(status) && t == NULL) {

	1132 status = U_MEMORY_ALLOCATION_ERROR;

	1133 }

	1134 return t;

	1135 }

	1136

	1137 UnicodeString& Transliterator::toRules(UnicodeString& rulesSource,

	1138 UBool escapeUnprintable) const {

	1139 // The base class implementation of toRules munges the ID into

	1140 // the correct format. That is: foo => ::foo

	1141 if (escapeUnprintable) {

	1142 rulesSource.truncate(0);

	1143 UnicodeString id = getID();

	1144 for (int32_t i=0; i<id.length();) {

	1145 UChar32 c = id.char32At(i);

	1146 if (!ICU_Utility::escapeUnprintable(rulesSource, c)) {

	1147 rulesSource.append(c);

	1148 }

	1149 i += UTF_CHAR_LENGTH(c);

	1150 }

	1151 } else {

	1152 rulesSource = getID();

	1153 }

	1154 // KEEP in sync with rbt_pars

	1155 rulesSource.insert(0, UNICODE_STRING_SIMPLE("::"));

	1156 rulesSource.append(ID_DELIM);

	1157 return rulesSource;

	1158 }

	1159

	1160 int32_t Transliterator::countElements() const {

	1161 const CompoundTransliterator* ct = dynamic_cast<const CompoundTransliterator *>(this);

	1162 return ct != NULL ? ct->getCount() : 0;

	1163 }

	1164

	1165 const Transliterator& Transliterator::getElement(int32_t index, UErrorCode& ec) const {

	1166 if (U_FAILURE(ec)) {

	1167 return *this;

	1168 }

	1169 const CompoundTransliterator* cpd = dynamic_cast<const CompoundTransliterato r*>(this);

	1170 int32_t n = (cpd == NULL) ? 1 : cpd->getCount();

	1171 if (index < 0 \|\| index >= n) {

	1172 ec = U_INDEX_OUTOFBOUNDS_ERROR;

	1173 return *this;

	1174 } else {

	1175 return (n == 1) ? *this : cpd->getTransliterator(index);

	1176 }

	1177 }

	1178

	1179 UnicodeSet& Transliterator::getSourceSet(UnicodeSet& result) const {

	1180 handleGetSourceSet(result);

	1181 if (filter != NULL) {

	1182 UnicodeSet* filterSet = dynamic_cast<UnicodeSet*>(filter);

	1183 UBool deleteFilterSet = FALSE;

	1184 // Most, but not all filters will be UnicodeSets. Optimize for

	1185 // the high-runner case.

	1186 if (filterSet == NULL) {

	1187 filterSet = new UnicodeSet();

	1188 // Check null pointer

	1189 if (filterSet == NULL) {

	1190 return result;

	1191 }

	1192 deleteFilterSet = TRUE;

	1193 filter->addMatchSetTo(*filterSet);

	1194 }

	1195 result.retainAll(*filterSet);

	1196 if (deleteFilterSet) {

	1197 delete filterSet;

	1198 }

	1199 }

	1200 return result;

	1201 }

	1202

	1203 void Transliterator::handleGetSourceSet(UnicodeSet& result) const {

	1204 result.clear();

	1205 }

	1206

	1207 UnicodeSet& Transliterator::getTargetSet(UnicodeSet& result) const {

	1208 return result.clear();

	1209 }

	1210

	1211 // For public consumption

	1212 void U_EXPORT2 Transliterator::registerFactory(const UnicodeString& id,

	1213 Transliterator::Factory factory,

	1214 Transliterator::Token context) {

	1215 Mutex lock(&registryMutex);

	1216 UErrorCode ec = U_ZERO_ERROR;

	1217 if (HAVE_REGISTRY(ec)) {

	1218 _registerFactory(id, factory, context);

	1219 }

	1220 }

	1221

	1222 // To be called only by Transliterator subclasses that are called

	1223 // to register themselves by initializeRegistry().

	1224 void Transliterator::_registerFactory(const UnicodeString& id,

	1225 Transliterator::Factory factory,

	1226 Transliterator::Token context) {

	1227 UErrorCode ec = U_ZERO_ERROR;

	1228 registry->put(id, factory, context, TRUE, ec);

	1229 }

	1230

	1231 // To be called only by Transliterator subclasses that are called

	1232 // to register themselves by initializeRegistry().

	1233 void Transliterator::_registerSpecialInverse(const UnicodeString& target,

	1234 const UnicodeString& inverseTarget,

	1235 UBool bidirectional) {

	1236 UErrorCode status = U_ZERO_ERROR;

	1237 TransliteratorIDParser::registerSpecialInverse(target, inverseTarget, bidire ctional, status);

	1238 }

	1239

	1240 /**

	1241 * Registers a instance <tt>obj</tt> of a subclass of

	1242 * <code>Transliterator</code> with the system. This object must

	1243 * implement the <tt>clone()</tt> method. When

	1244 * <tt>getInstance()</tt> is called with an ID string that is

	1245 * equal to <tt>obj.getID()</tt>, then <tt>obj.clone()</tt> is

	1246 * returned.

	1247 *

	1248 * @param obj an instance of subclass of

	1249 * <code>Transliterator</code> that defines <tt>clone()</tt>

	1250 * @see #getInstance

	1251 * @see #unregister

	1252 */

	1253 void U_EXPORT2 Transliterator::registerInstance(Transliterator* adoptedPrototype ) {

	1254 Mutex lock(&registryMutex);

	1255 UErrorCode ec = U_ZERO_ERROR;

	1256 if (HAVE_REGISTRY(ec)) {

	1257 _registerInstance(adoptedPrototype);

	1258 }

	1259 }

	1260

	1261 void Transliterator::_registerInstance(Transliterator* adoptedPrototype) {

	1262 UErrorCode ec = U_ZERO_ERROR;

	1263 registry->put(adoptedPrototype, TRUE, ec);

	1264 }

	1265

	1266 void U_EXPORT2 Transliterator::registerAlias(const UnicodeString& aliasID,

	1267 const UnicodeString& realID) {

	1268 Mutex lock(&registryMutex);

	1269 UErrorCode ec = U_ZERO_ERROR;

	1270 if (HAVE_REGISTRY(ec)) {

	1271 _registerAlias(aliasID, realID);

	1272 }

	1273 }

	1274

	1275 void Transliterator::_registerAlias(const UnicodeString& aliasID,

	1276 const UnicodeString& realID) {

	1277 UErrorCode ec = U_ZERO_ERROR;

	1278 registry->put(aliasID, realID, FALSE, TRUE, ec);

	1279 }

	1280

	1281 /**

	1282 * Unregisters a transliterator or class. This may be either

	1283 * a system transliterator or a user transliterator or class.

	1284 *

	1285 * @param ID the ID of the transliterator or class

	1286 * @see #registerInstance

	1287

	1288 */

	1289 void U_EXPORT2 Transliterator::unregister(const UnicodeString& ID) {

	1290 Mutex lock(&registryMutex);

	1291 UErrorCode ec = U_ZERO_ERROR;

	1292 if (HAVE_REGISTRY(ec)) {

	1293 registry->remove(ID);

	1294 }

	1295 }

	1296

	1297 /**

	1298 * == OBSOLETE - remove in ICU 3.4 ==

	1299 * Return the number of IDs currently registered with the system.

	1300 * To retrieve the actual IDs, call getAvailableID(i) with

	1301 * i from 0 to countAvailableIDs() - 1.

	1302 */

	1303 int32_t U_EXPORT2 Transliterator::countAvailableIDs(void) {

	1304 int32_t retVal = 0;

	1305 Mutex lock(&registryMutex);

	1306 UErrorCode ec = U_ZERO_ERROR;

	1307 if (HAVE_REGISTRY(ec)) {

	1308 retVal = registry->countAvailableIDs();

	1309 }

	1310 return retVal;

	1311 }

	1312

	1313 /**

	1314 * == OBSOLETE - remove in ICU 3.4 ==

	1315 * Return the index-th available ID. index must be between 0

	1316 * and countAvailableIDs() - 1, inclusive. If index is out of

	1317 * range, the result of getAvailableID(0) is returned.

	1318 */

	1319 const UnicodeString& U_EXPORT2 Transliterator::getAvailableID(int32_t index) {

	1320 const UnicodeString* result = NULL;

	1321 umtx_lock(&registryMutex);

	1322 UErrorCode ec = U_ZERO_ERROR;

	1323 if (HAVE_REGISTRY(ec)) {

	1324 result = &registry->getAvailableID(index);

	1325 }

	1326 umtx_unlock(&registryMutex);

	1327 U_ASSERT(result != NULL); // fail if no registry

	1328 return *result;

	1329 }

	1330

	1331 StringEnumeration* U_EXPORT2 Transliterator::getAvailableIDs(UErrorCode& ec) {

	1332 if (U_FAILURE(ec)) return NULL;

	1333 StringEnumeration* result = NULL;

	1334 umtx_lock(&registryMutex);

	1335 if (HAVE_REGISTRY(ec)) {

	1336 result = registry->getAvailableIDs();

	1337 }

	1338 umtx_unlock(&registryMutex);

	1339 if (result == NULL) {

	1340 ec = U_INTERNAL_TRANSLITERATOR_ERROR;

	1341 }

	1342 return result;

	1343 }

	1344

	1345 int32_t U_EXPORT2 Transliterator::countAvailableSources(void) {

	1346 Mutex lock(&registryMutex);

	1347 UErrorCode ec = U_ZERO_ERROR;

	1348 return HAVE_REGISTRY(ec) ? _countAvailableSources() : 0;

	1349 }

	1350

	1351 UnicodeString& U_EXPORT2 Transliterator::getAvailableSource(int32_t index,

	1352 UnicodeString& result) {

	1353 Mutex lock(&registryMutex);

	1354 UErrorCode ec = U_ZERO_ERROR;

	1355 if (HAVE_REGISTRY(ec)) {

	1356 _getAvailableSource(index, result);

	1357 }

	1358 return result;

	1359 }

	1360

	1361 int32_t U_EXPORT2 Transliterator::countAvailableTargets(const UnicodeString& sou rce) {

	1362 Mutex lock(&registryMutex);

	1363 UErrorCode ec = U_ZERO_ERROR;

	1364 return HAVE_REGISTRY(ec) ? _countAvailableTargets(source) : 0;

	1365 }

	1366

	1367 UnicodeString& U_EXPORT2 Transliterator::getAvailableTarget(int32_t index,

	1368 const UnicodeString& source,

	1369 UnicodeString& result) {

	1370 Mutex lock(&registryMutex);

	1371 UErrorCode ec = U_ZERO_ERROR;

	1372 if (HAVE_REGISTRY(ec)) {

	1373 _getAvailableTarget(index, source, result);

	1374 }

	1375 return result;

	1376 }

	1377

	1378 int32_t U_EXPORT2 Transliterator::countAvailableVariants(const UnicodeString& so urce,

	1379 const UnicodeString& target) {

	1380 Mutex lock(&registryMutex);

	1381 UErrorCode ec = U_ZERO_ERROR;

	1382 return HAVE_REGISTRY(ec) ? _countAvailableVariants(source, target) : 0;

	1383 }

	1384

	1385 UnicodeString& U_EXPORT2 Transliterator::getAvailableVariant(int32_t index,

	1386 const UnicodeString& source,

	1387 const UnicodeString& target,

	1388 UnicodeString& result) {

	1389 Mutex lock(&registryMutex);

	1390 UErrorCode ec = U_ZERO_ERROR;

	1391 if (HAVE_REGISTRY(ec)) {

	1392 _getAvailableVariant(index, source, target, result);

	1393 }

	1394 return result;

	1395 }

	1396

	1397 int32_t Transliterator::_countAvailableSources(void) {

	1398 return registry->countAvailableSources();

	1399 }

	1400

	1401 UnicodeString& Transliterator::_getAvailableSource(int32_t index,

	1402 UnicodeString& result) {

	1403 return registry->getAvailableSource(index, result);

	1404 }

	1405

	1406 int32_t Transliterator::_countAvailableTargets(const UnicodeString& source) {

	1407 return registry->countAvailableTargets(source);

	1408 }

	1409

	1410 UnicodeString& Transliterator::_getAvailableTarget(int32_t index,

	1411 const UnicodeString& source,

	1412 UnicodeString& result) {

	1413 return registry->getAvailableTarget(index, source, result);

	1414 }

	1415

	1416 int32_t Transliterator::_countAvailableVariants(const UnicodeString& source,

	1417 const UnicodeString& target) {

	1418 return registry->countAvailableVariants(source, target);

	1419 }

	1420

	1421 UnicodeString& Transliterator::_getAvailableVariant(int32_t index,

	1422 const UnicodeString& source,

	1423 const UnicodeString& target,

	1424 UnicodeString& result) {

	1425 return registry->getAvailableVariant(index, source, target, result);

	1426 }

	1427

	1428 #ifdef U_USE_DEPRECATED_TRANSLITERATOR_API

	1429

	1430 /**

	1431 * Method for subclasses to use to obtain a character in the given

	1432 * string, with filtering.

	1433 * @deprecated the new architecture provides filtering at the top

	1434 * level. This method will be removed Dec 31 2001.

	1435 */

	1436 UChar Transliterator::filteredCharAt(const Replaceable& text, int32_t i) const {

	1437 UChar c;

	1438 const UnicodeFilter* localFilter = getFilter();

	1439 return (localFilter == 0) ? text.charAt(i) :

	1440 (localFilter->contains(c = text.charAt(i)) ? c : (UChar)0xFFFE);

	1441 }

	1442

	1443 #endif

	1444

	1445 /**

	1446 * If the registry is initialized, return TRUE. If not, initialize it

	1447 * and return TRUE. If the registry cannot be initialized, return

	1448 * FALSE (rare).

	1449 *

	1450 * IMPORTANT: Upon entry, registryMutex must be LOCKED. The entire

	1451 * initialization is done with the lock held. There is NO REASON to

	1452 * unlock, since no other thread that is waiting on the registryMutex

	1453 * cannot itself proceed until the registry is initialized.

	1454 */

	1455 UBool Transliterator::initializeRegistry(UErrorCode &status) {

	1456 if (registry != 0) {

	1457 return TRUE;

	1458 }

	1459

	1460 registry = new TransliteratorRegistry(status);

	1461 if (registry == 0 \|\| U_FAILURE(status)) {

	1462 delete registry;

	1463 registry = 0;

	1464 return FALSE; // can't create registry, no recovery

	1465 }

	1466

	1467 /* The following code parses the index table located in

	1468 * icu/data/translit/root.txt. The index is an n x 4 table

	1469 * that follows this format:

	1470 * <id>{

	1471 * file{

	1472 * resource{"<resource>"}

	1473 * direction{"<direction>"}

	1474 * }

	1475 * }

	1476 * <id>{

	1477 * internal{

	1478 * resource{"<resource>"}

	1479 * direction{"<direction"}

	1480 * }

	1481 * }

	1482 * <id>{

	1483 * alias{"<getInstanceArg"}

	1484 * }

	1485 * <id> is the ID of the system transliterator being defined. These

	1486 * are public IDs enumerated by Transliterator.getAvailableIDs(),

	1487 * unless the second field is "internal".

	1488 *

	1489 * <resource> is a ResourceReader resource name. Currently these refer

	1490 * to file names under com/ibm/text/resources. This string is passed

	1491 * directly to ResourceReader, together with <encoding>.

	1492 *

	1493 * <direction> is either "FORWARD" or "REVERSE".

	1494 *

	1495 * <getInstanceArg> is a string to be passed directly to

	1496 * Transliterator.getInstance(). The returned Transliterator object

	1497 * then has its ID changed to <id> and is returned.

	1498 *

	1499 * The extra blank field on "alias" lines is to make the array square.

	1500 */

	1501 //static const char translit_index[] = "translit_index";

	1502

	1503 UResourceBundle bundle, transIDs, *colBund;

	1504 bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/open default locale/, &status) ;

	1505 transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status);

	1506

	1507 int32_t row, maxRows;

	1508 if (U_SUCCESS(status)) {

	1509 maxRows = ures_getSize(transIDs);

	1510 for (row = 0; row < maxRows; row++) {

	1511 colBund = ures_getByIndex(transIDs, row, 0, &status);

	1512 if (U_SUCCESS(status)) {

	1513 UnicodeString id(ures_getKey(colBund), -1, US_INV);

	1514 UResourceBundle* res = ures_getNextResource(colBund, NULL, &stat us);

	1515 const char* typeStr = ures_getKey(res);

	1516 UChar type;

	1517 u_charsToUChars(typeStr, &type, 1);

	1518

	1519 if (U_SUCCESS(status)) {

	1520 int32_t len = 0;

	1521 const UChar *resString;

	1522 switch (type) {

	1523 case 0x66: // 'f'

	1524 case 0x69: // 'i'

	1525 // 'file' or 'internal';

	1526 // row[2]=resource, row[3]=direction

	1527 {

	1528

	1529 resString = ures_getStringByKey(res, "resource", &le n, &status);

	1530 UBool visible = (type == 0x0066 /f/);

	1531 UTransDirection dir =

	1532 (ures_getUnicodeStringByKey(res, "direction", &s tatus).charAt(0) ==

	1533 0x0046 /F/) ?

	1534 UTRANS_FORWARD : UTRANS_REVERSE;

	1535 registry->put(id, UnicodeString(TRUE, resString, len ), dir, TRUE, visible, status);

	1536 }

	1537 break;

	1538 case 0x61: // 'a'

	1539 // 'alias'; row[2]=createInstance argument

	1540 resString = ures_getString(res, &len, &status);

	1541 registry->put(id, UnicodeString(TRUE, resString, len), T RUE, TRUE, status);

	1542 break;

	1543 }

	1544 }

	1545 ures_close(res);

	1546 }

	1547 ures_close(colBund);

	1548 }

	1549 }

	1550

	1551 ures_close(transIDs);

	1552 ures_close(bundle);

	1553

	1554 // Manually add prototypes that the system knows about to the

	1555 // cache. This is how new non-rule-based transliterators are

	1556 // added to the system.

	1557

	1558 // This is to allow for null pointer check

	1559 NullTransliterator* tempNullTranslit = new NullTransliterator();

	1560 LowercaseTransliterator* tempLowercaseTranslit = new LowercaseTransliterator ();

	1561 UppercaseTransliterator* tempUppercaseTranslit = new UppercaseTransliterator ();

	1562 TitlecaseTransliterator* tempTitlecaseTranslit = new TitlecaseTransliterator ();

	1563 UnicodeNameTransliterator* tempUnicodeTranslit = new UnicodeNameTransliterat or();

	1564 NameUnicodeTransliterator* tempNameUnicodeTranslit = new NameUnicodeTranslit erator();

	1565 #if !UCONFIG_NO_BREAK_ITERATION

	1566 // TODO: could or should these transliterators be referenced polymorphicall y once constructed?

	1567 BreakTransliterator* tempBreakTranslit = new BreakTransliterator();

	1568 #endif

	1569 // Check for null pointers

	1570 if (tempNullTranslit == NULL \|\| tempLowercaseTranslit == NULL \|\| tempUpperca seTranslit == NULL \|\|

	1571 tempTitlecaseTranslit == NULL \|\| tempUnicodeTranslit == NULL \|\|

	1572 #if !UCONFIG_NO_BREAK_ITERATION

	1573 tempBreakTranslit == NULL \|\|

	1574 #endif

	1575 tempNameUnicodeTranslit == NULL )

	1576 {

	1577 delete tempNullTranslit;

	1578 delete tempLowercaseTranslit;

	1579 delete tempUppercaseTranslit;

	1580 delete tempTitlecaseTranslit;

	1581 delete tempUnicodeTranslit;

	1582 delete tempNameUnicodeTranslit;

	1583 #if !UCONFIG_NO_BREAK_ITERATION

	1584 delete tempBreakTranslit;

	1585 #endif

	1586 // Since there was an error, remove registry

	1587 delete registry;

	1588 registry = NULL;

	1589

	1590 status = U_MEMORY_ALLOCATION_ERROR;

	1591 return 0;

	1592 }

	1593

	1594 registry->put(tempNullTranslit, TRUE, status);

	1595 registry->put(tempLowercaseTranslit, TRUE, status);

	1596 registry->put(tempUppercaseTranslit, TRUE, status);

	1597 registry->put(tempTitlecaseTranslit, TRUE, status);

	1598 registry->put(tempUnicodeTranslit, TRUE, status);

	1599 registry->put(tempNameUnicodeTranslit, TRUE, status);

	1600 #if !UCONFIG_NO_BREAK_ITERATION

	1601 registry->put(tempBreakTranslit, FALSE, status); // FALSE means invisible.

	1602 #endif

	1603

	1604 RemoveTransliterator::registerIDs(); // Must be within mutex

	1605 EscapeTransliterator::registerIDs();

	1606 UnescapeTransliterator::registerIDs();

	1607 NormalizationTransliterator::registerIDs();

	1608 AnyTransliterator::registerIDs();

	1609

	1610 _registerSpecialInverse(UNICODE_STRING_SIMPLE("Null"),

	1611 UNICODE_STRING_SIMPLE("Null"), FALSE);

	1612 _registerSpecialInverse(UNICODE_STRING_SIMPLE("Upper"),

	1613 UNICODE_STRING_SIMPLE("Lower"), TRUE);

	1614 _registerSpecialInverse(UNICODE_STRING_SIMPLE("Title"),

	1615 UNICODE_STRING_SIMPLE("Lower"), FALSE);

	1616

	1617 ucln_i18n_registerCleanup(UCLN_I18N_TRANSLITERATOR, utrans_transliterator_cl eanup);

	1618

	1619 return TRUE;

	1620 }

	1621

	1622 U_NAMESPACE_END

	1623

	1624 // Defined in ucln_in.h:

	1625

	1626 /**

	1627 * Release all static memory held by transliterator. This will

	1628 * necessarily invalidate any rule-based transliterators held by the

	1629 * user, because RBTs hold pointers to common data objects.

	1630 */

	1631 U_CFUNC UBool utrans_transliterator_cleanup(void) {

	1632 U_NAMESPACE_USE

	1633 TransliteratorIDParser::cleanup();

	1634 if (registry) {

	1635 delete registry;

	1636 registry = NULL;

	1637 }

	1638 umtx_destroy(&registryMutex);

	1639 return TRUE;

	1640 }

	1641

	1642 #endif /* #if !UCONFIG_NO_TRANSLITERATION */

	1643

	1644 //eof

OLD	NEW

« no previous file with comments | « icu46/source/i18n/toupptrn.cpp ('k') | icu46/source/i18n/transreg.h » ('j') | no next file with comments »