icu46/source/i18n/tridpars.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/i18n/tridpars.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 **********************************************************************

	3 * Copyright (c) 2002-2009, International Business Machines Corporation

	4 * and others. All Rights Reserved.

	5 **********************************************************************

	6 * Date Name Description

	7 * 01/14/2002 aliu Creation.

	8 **********************************************************************

	9 */

	10

	11 #include "unicode/utypes.h"

	12

	13 #if !UCONFIG_NO_TRANSLITERATION

	14

	15 #include "tridpars.h"

	16 #include "hash.h"

	17 #include "mutex.h"

	18 #include "ucln_in.h"

	19 #include "unicode/parsepos.h"

	20 #include "unicode/translit.h"

	21 #include "unicode/uchar.h"

	22 #include "unicode/uniset.h"

	23 #include "unicode/unistr.h"

	24 #include "unicode/utrans.h"

	25 #include "util.h"

	26 #include "uvector.h"

	27

	28 U_NAMESPACE_BEGIN

	29

	30 static const UChar ID_DELIM = 0x003B; // ;

	31 static const UChar TARGET_SEP = 0x002D; // -

	32 static const UChar VARIANT_SEP = 0x002F; // /

	33 static const UChar OPEN_REV = 0x0028; // (

	34 static const UChar CLOSE_REV = 0x0029; // )

	35

	36 //static const UChar EMPTY[] = {0}; // ""

	37 static const UChar ANY[] = {65,110,121,0}; // "Any"

	38 static const UChar ANY_NULL[] = {65,110,121,45,78,117,108,108,0}; // "Any-Null"

	39

	40 static const int32_t FORWARD = UTRANS_FORWARD;

	41 static const int32_t REVERSE = UTRANS_REVERSE;

	42

	43 static Hashtable* SPECIAL_INVERSES = NULL;

	44

	45 /**

	46 * The mutex controlling access to SPECIAL_INVERSES

	47 */

	48 static UMTX LOCK = 0;

	49

	50 TransliteratorIDParser::Specs::Specs(const UnicodeString& s, const UnicodeString & t,

	51 const UnicodeString& v, UBool sawS,

	52 const UnicodeString& f) {

	53 source = s;

	54 target = t;

	55 variant = v;

	56 sawSource = sawS;

	57 filter = f;

	58 }

	59

	60 TransliteratorIDParser::SingleID::SingleID(const UnicodeString& c, const Unicode String& b,

	61 const UnicodeString& f) {

	62 canonID = c;

	63 basicID = b;

	64 filter = f;

	65 }

	66

	67 TransliteratorIDParser::SingleID::SingleID(const UnicodeString& c, const Unicode String& b) {

	68 canonID = c;

	69 basicID = b;

	70 }

	71

	72 Transliterator* TransliteratorIDParser::SingleID::createInstance() {

	73 Transliterator* t;

	74 if (basicID.length() == 0) {

	75 t = createBasicInstance(ANY_NULL, &canonID);

	76 } else {

	77 t = createBasicInstance(basicID, &canonID);

	78 }

	79 if (t != NULL) {

	80 if (filter.length() != 0) {

	81 UErrorCode ec = U_ZERO_ERROR;

	82 UnicodeSet *set = new UnicodeSet(filter, ec);

	83 if (U_FAILURE(ec)) {

	84 delete set;

	85 } else {

	86 t->adoptFilter(set);

	87 }

	88 }

	89 }

	90 return t;

	91 }

	92

	93

	94 /**

	95 * Parse a single ID, that is, an ID of the general form

	96 * "[f1] s1-t1/v1 ([f2] s2-t3/v2)", with the parenthesized element

	97 * optional, the filters optional, and the variants optional.

	98 * @param id the id to be parsed

	99 * @param pos INPUT-OUTPUT parameter. On input, the position of

	100 * the first character to parse. On output, the position after

	101 * the last character parsed.

	102 * @param dir the direction. If the direction is REVERSE then the

	103 * SingleID is constructed for the reverse direction.

	104 * @return a SingleID object or NULL

	105 */

	106 TransliteratorIDParser::SingleID*

	107 TransliteratorIDParser::parseSingleID(const UnicodeString& id, int32_t& pos,

	108 int32_t dir, UErrorCode& status) {

	109

	110 int32_t start = pos;

	111

	112 // The ID will be of the form A, A(), A(B), or (B), where

	113 // A and B are filter IDs.

	114 Specs* specsA = NULL;

	115 Specs* specsB = NULL;

	116 UBool sawParen = FALSE;

	117

	118 // On the first pass, look for (B) or (). If this fails, then

	119 // on the second pass, look for A, A(B), or A().

	120 for (int32_t pass=1; pass<=2; ++pass) {

	121 if (pass == 2) {

	122 specsA = parseFilterID(id, pos, TRUE);

	123 if (specsA == NULL) {

	124 pos = start;

	125 return NULL;

	126 }

	127 }

	128 if (ICU_Utility::parseChar(id, pos, OPEN_REV)) {

	129 sawParen = TRUE;

	130 if (!ICU_Utility::parseChar(id, pos, CLOSE_REV)) {

	131 specsB = parseFilterID(id, pos, TRUE);

	132 // Must close with a ')'

	133 if (specsB == NULL \|\| !ICU_Utility::parseChar(id, pos, CLOSE_REV )) {

	134 delete specsA;

	135 pos = start;

	136 return NULL;

	137 }

	138 }

	139 break;

	140 }

	141 }

	142

	143 // Assemble return results

	144 SingleID* single;

	145 if (sawParen) {

	146 if (dir == FORWARD) {

	147 SingleID* b = specsToID(specsB, FORWARD);

	148 single = specsToID(specsA, FORWARD);

	149 // Null pointers check

	150 if (b == NULL \|\| single == NULL) {

	151 delete b;

	152 delete single;

	153 status = U_MEMORY_ALLOCATION_ERROR;

	154 return NULL;

	155 }

	156 single->canonID.append(OPEN_REV)

	157 .append(b->canonID).append(CLOSE_REV);

	158 if (specsA != NULL) {

	159 single->filter = specsA->filter;

	160 }

	161 delete b;

	162 } else {

	163 SingleID* a = specsToID(specsA, FORWARD);

	164 single = specsToID(specsB, FORWARD);

	165 // Check for null pointer.

	166 if (a == NULL \|\| single == NULL) {

	167 delete a;

	168 delete single;

	169 status = U_MEMORY_ALLOCATION_ERROR;

	170 return NULL;

	171 }

	172 single->canonID.append(OPEN_REV)

	173 .append(a->canonID).append(CLOSE_REV);

	174 if (specsB != NULL) {

	175 single->filter = specsB->filter;

	176 }

	177 delete a;

	178 }

	179 } else {

	180 // assert(specsA != NULL);

	181 if (dir == FORWARD) {

	182 single = specsToID(specsA, FORWARD);

	183 } else {

	184 single = specsToSpecialInverse(*specsA, status);

	185 if (single == NULL) {

	186 single = specsToID(specsA, REVERSE);

	187 }

	188 }

	189 // Check for NULL pointer

	190 if (single == NULL) {

	191 status = U_MEMORY_ALLOCATION_ERROR;

	192 return NULL;

	193 }

	194 single->filter = specsA->filter;

	195 }

	196

	197 delete specsA;

	198 delete specsB;

	199

	200 return single;

	201 }

	202

	203 /**

	204 * Parse a filter ID, that is, an ID of the general form

	205 * "[f1] s1-t1/v1", with the filters optional, and the variants optional.

	206 * @param id the id to be parsed

	207 * @param pos INPUT-OUTPUT parameter. On input, the position of

	208 * the first character to parse. On output, the position after

	209 * the last character parsed.

	210 * @return a SingleID object or null if the parse fails

	211 */

	212 TransliteratorIDParser::SingleID*

	213 TransliteratorIDParser::parseFilterID(const UnicodeString& id, int32_t& pos) {

	214

	215 int32_t start = pos;

	216

	217 Specs* specs = parseFilterID(id, pos, TRUE);

	218 if (specs == NULL) {

	219 pos = start;

	220 return NULL;

	221 }

	222

	223 // Assemble return results

	224 SingleID* single = specsToID(specs, FORWARD);

	225 if (single != NULL) {

	226 single->filter = specs->filter;

	227 }

	228 delete specs;

	229 return single;

	230 }

	231

	232 /**

	233 * Parse a global filter of the form "[f]" or "([f])", depending

	234 * on 'withParens'.

	235 * @param id the pattern the parse

	236 * @param pos INPUT-OUTPUT parameter. On input, the position of

	237 * the first character to parse. On output, the position after

	238 * the last character parsed.

	239 * @param dir the direction.

	240 * @param withParens INPUT-OUTPUT parameter. On entry, if

	241 * withParens is 0, then parens are disallowed. If it is 1,

	242 * then parens are requires. If it is -1, then parens are

	243 * optional, and the return result will be set to 0 or 1.

	244 * @param canonID OUTPUT parameter. The pattern for the filter

	245 * added to the canonID, either at the end, if dir is FORWARD, or

	246 * at the start, if dir is REVERSE. The pattern will be enclosed

	247 * in parentheses if appropriate, and will be suffixed with an

	248 * ID_DELIM character. May be NULL.

	249 * @return a UnicodeSet object or NULL. A non-NULL results

	250 * indicates a successful parse, regardless of whether the filter

	251 * applies to the given direction. The caller should discard it

	252 * if withParens != (dir == REVERSE).

	253 */

	254 UnicodeSet* TransliteratorIDParser::parseGlobalFilter(const UnicodeString& id, i nt32_t& pos,

	255 int32_t dir,

	256 int32_t& withParens,

	257 UnicodeString* canonID) {

	258 UnicodeSet* filter = NULL;

	259 int32_t start = pos;

	260

	261 if (withParens == -1) {

	262 withParens = ICU_Utility::parseChar(id, pos, OPEN_REV) ? 1 : 0;

	263 } else if (withParens == 1) {

	264 if (!ICU_Utility::parseChar(id, pos, OPEN_REV)) {

	265 pos = start;

	266 return NULL;

	267 }

	268 }

	269

	270 ICU_Utility::skipWhitespace(id, pos, TRUE);

	271

	272 if (UnicodeSet::resemblesPattern(id, pos)) {

	273 ParsePosition ppos(pos);

	274 UErrorCode ec = U_ZERO_ERROR;

	275 filter = new UnicodeSet(id, ppos, USET_IGNORE_SPACE, NULL, ec);

	276 /* test for NULL */

	277 if (filter == 0) {

	278 pos = start;

	279 return 0;

	280 }

	281 if (U_FAILURE(ec)) {

	282 delete filter;

	283 pos = start;

	284 return NULL;

	285 }

	286

	287 UnicodeString pattern;

	288 id.extractBetween(pos, ppos.getIndex(), pattern);

	289 pos = ppos.getIndex();

	290

	291 if (withParens == 1 && !ICU_Utility::parseChar(id, pos, CLOSE_REV)) {

	292 pos = start;

	293 return NULL;

	294 }

	295

	296 // In the forward direction, append the pattern to the

	297 // canonID. In the reverse, insert it at zero, and invert

	298 // the presence of parens ("A" <-> "(A)").

	299 if (canonID != NULL) {

	300 if (dir == FORWARD) {

	301 if (withParens == 1) {

	302 pattern.insert(0, OPEN_REV);

	303 pattern.append(CLOSE_REV);

	304 }

	305 canonID->append(pattern).append(ID_DELIM);

	306 } else {

	307 if (withParens == 0) {

	308 pattern.insert(0, OPEN_REV);

	309 pattern.append(CLOSE_REV);

	310 }

	311 canonID->insert(0, pattern);

	312 canonID->insert(pattern.length(), ID_DELIM);

	313 }

	314 }

	315 }

	316

	317 return filter;

	318 }

	319

	320 U_CDECL_BEGIN

	321 static void U_CALLCONV _deleteSingleID(void* obj) {

	322 delete (TransliteratorIDParser::SingleID*) obj;

	323 }

	324

	325 static void U_CALLCONV _deleteTransliteratorTrIDPars(void* obj) {

	326 delete (Transliterator*) obj;

	327 }

	328 U_CDECL_END

	329

	330 /**

	331 * Parse a compound ID, consisting of an optional forward global

	332 * filter, a separator, one or more single IDs delimited by

	333 * separators, an an optional reverse global filter. The

	334 * separator is a semicolon. The global filters are UnicodeSet

	335 * patterns. The reverse global filter must be enclosed in

	336 * parentheses.

	337 * @param id the pattern the parse

	338 * @param dir the direction.

	339 * @param canonID OUTPUT parameter that receives the canonical ID,

	340 * consisting of canonical IDs for all elements, as returned by

	341 * parseSingleID(), separated by semicolons. Previous contents

	342 * are discarded.

	343 * @param list OUTPUT parameter that receives a list of SingleID

	344 * objects representing the parsed IDs. Previous contents are

	345 * discarded.

	346 * @param globalFilter OUTPUT parameter that receives a pointer to

	347 * a newly created global filter for this ID in this direction, or

	348 * NULL if there is none.

	349 * @return TRUE if the parse succeeds, that is, if the entire

	350 * id is consumed without syntax error.

	351 */

	352 UBool TransliteratorIDParser::parseCompoundID(const UnicodeString& id, int32_t d ir,

	353 UnicodeString& canonID,

	354 UVector& list,

	355 UnicodeSet*& globalFilter) {

	356 UErrorCode ec = U_ZERO_ERROR;

	357 int32_t i;

	358 int32_t pos = 0;

	359 int32_t withParens = 1;

	360 list.removeAllElements();

	361 UnicodeSet* filter;

	362 globalFilter = NULL;

	363 canonID.truncate(0);

	364

	365 // Parse leading global filter, if any

	366 withParens = 0; // parens disallowed

	367 filter = parseGlobalFilter(id, pos, dir, withParens, &canonID);

	368 if (filter != NULL) {

	369 if (!ICU_Utility::parseChar(id, pos, ID_DELIM)) {

	370 // Not a global filter; backup and resume

	371 canonID.truncate(0);

	372 pos = 0;

	373 }

	374 if (dir == FORWARD) {

	375 globalFilter = filter;

	376 } else {

	377 delete filter;

	378 }

	379 filter = NULL;

	380 }

	381

	382 UBool sawDelimiter = TRUE;

	383 for (;;) {

	384 SingleID* single = parseSingleID(id, pos, dir, ec);

	385 if (single == NULL) {

	386 break;

	387 }

	388 if (dir == FORWARD) {

	389 list.addElement(single, ec);

	390 } else {

	391 list.insertElementAt(single, 0, ec);

	392 }

	393 if (U_FAILURE(ec)) {

	394 goto FAIL;

	395 }

	396 if (!ICU_Utility::parseChar(id, pos, ID_DELIM)) {

	397 sawDelimiter = FALSE;

	398 break;

	399 }

	400 }

	401

	402 if (list.size() == 0) {

	403 goto FAIL;

	404 }

	405

	406 // Construct canonical ID

	407 for (i=0; i<list.size(); ++i) {

	408 SingleID* single = (SingleID*) list.elementAt(i);

	409 canonID.append(single->canonID);

	410 if (i != (list.size()-1)) {

	411 canonID.append(ID_DELIM);

	412 }

	413 }

	414

	415 // Parse trailing global filter, if any, and only if we saw

	416 // a trailing delimiter after the IDs.

	417 if (sawDelimiter) {

	418 withParens = 1; // parens required

	419 filter = parseGlobalFilter(id, pos, dir, withParens, &canonID);

	420 if (filter != NULL) {

	421 // Don't require trailing ';', but parse it if present

	422 ICU_Utility::parseChar(id, pos, ID_DELIM);

	423

	424 if (dir == REVERSE) {

	425 globalFilter = filter;

	426 } else {

	427 delete filter;

	428 }

	429 filter = NULL;

	430 }

	431 }

	432

	433 // Trailing unparsed text is a syntax error

	434 ICU_Utility::skipWhitespace(id, pos, TRUE);

	435 if (pos != id.length()) {

	436 goto FAIL;

	437 }

	438

	439 return TRUE;

	440

	441 FAIL:

	442 UObjectDeleter *save = list.setDeleter(_deleteSingleID);

	443 list.removeAllElements();

	444 list.setDeleter(save);

	445 delete globalFilter;

	446 globalFilter = NULL;

	447 return FALSE;

	448 }

	449

	450 /**

	451 * Convert the elements of the 'list' vector, which are SingleID

	452 * objects, into actual Transliterator objects. In the course of

	453 * this, some (or all) entries may be removed. If all entries

	454 * are removed, the NULL transliterator will be added.

	455 *

	456 * Delete entries with empty basicIDs; these are generated by

	457 * elements like "(A)" in the forward direction, or "A()" in

	458 * the reverse. THIS MAY RESULT IN AN EMPTY VECTOR. Convert

	459 * SingleID entries to actual transliterators.

	460 *

	461 * @param list vector of SingleID objects. On exit, vector

	462 * of one or more Transliterators.

	463 * @return new value of insertIndex. The index will shift if

	464 * there are empty items, like "(Lower)", with indices less than

	465 * insertIndex.

	466 */

	467 void TransliteratorIDParser::instantiateList(UVector& list,

	468 UErrorCode& ec) {

	469 UVector tlist(ec);

	470 if (U_FAILURE(ec)) {

	471 goto RETURN;

	472 }

	473 tlist.setDeleter(_deleteTransliteratorTrIDPars);

	474

	475 Transliterator* t;

	476 int32_t i;

	477 for (i=0; i<=list.size(); ++i) { // [sic]: i<=list.size()

	478 // We run the loop too long by one, so we can

	479 // do an insert after the last element

	480 if (i==list.size()) {

	481 break;

	482 }

	483

	484 SingleID* single = (SingleID*) list.elementAt(i);

	485 if (single->basicID.length() != 0) {

	486 t = single->createInstance();

	487 if (t == NULL) {

	488 ec = U_INVALID_ID;

	489 goto RETURN;

	490 }

	491 tlist.addElement(t, ec);

	492 if (U_FAILURE(ec)) {

	493 delete t;

	494 goto RETURN;

	495 }

	496 }

	497 }

	498

	499 // An empty list is equivalent to a NULL transliterator.

	500 if (tlist.size() == 0) {

	501 t = createBasicInstance(ANY_NULL, NULL);

	502 if (t == NULL) {

	503 // Should never happen

	504 ec = U_INTERNAL_TRANSLITERATOR_ERROR;

	505 }

	506 tlist.addElement(t, ec);

	507 if (U_FAILURE(ec)) {

	508 delete t;

	509 }

	510 }

	511

	512 RETURN:

	513

	514 UObjectDeleter *save = list.setDeleter(_deleteSingleID);

	515 list.removeAllElements();

	516

	517 if (U_SUCCESS(ec)) {

	518 list.setDeleter(_deleteTransliteratorTrIDPars);

	519

	520 while (tlist.size() > 0) {

	521 t = (Transliterator*) tlist.orphanElementAt(0);

	522 list.addElement(t, ec);

	523 if (U_FAILURE(ec)) {

	524 delete t;

	525 list.removeAllElements();

	526 break;

	527 }

	528 }

	529 }

	530

	531 list.setDeleter(save);

	532 }

	533

	534 /**

	535 * Parse an ID into pieces. Take IDs of the form T, T/V, S-T,

	536 * S-T/V, or S/V-T. If the source is missing, return a source of

	537 * ANY.

	538 * @param id the id string, in any of several forms

	539 * @return an array of 4 strings: source, target, variant, and

	540 * isSourcePresent. If the source is not present, ANY will be

	541 * given as the source, and isSourcePresent will be NULL. Otherwise

	542 * isSourcePresent will be non-NULL. The target may be empty if the

	543 * id is not well-formed. The variant may be empty.

	544 */

	545 void TransliteratorIDParser::IDtoSTV(const UnicodeString& id,

	546 UnicodeString& source,

	547 UnicodeString& target,

	548 UnicodeString& variant,

	549 UBool& isSourcePresent) {

	550 source = ANY;

	551 target.truncate(0);

	552 variant.truncate(0);

	553

	554 int32_t sep = id.indexOf(TARGET_SEP);

	555 int32_t var = id.indexOf(VARIANT_SEP);

	556 if (var < 0) {

	557 var = id.length();

	558 }

	559 isSourcePresent = FALSE;

	560

	561 if (sep < 0) {

	562 // Form: T/V or T (or /V)

	563 id.extractBetween(0, var, target);

	564 id.extractBetween(var, id.length(), variant);

	565 } else if (sep < var) {

	566 // Form: S-T/V or S-T (or -T/V or -T)

	567 if (sep > 0) {

	568 id.extractBetween(0, sep, source);

	569 isSourcePresent = TRUE;

	570 }

	571 id.extractBetween(++sep, var, target);

	572 id.extractBetween(var, id.length(), variant);

	573 } else {

	574 // Form: (S/V-T or /V-T)

	575 if (var > 0) {

	576 id.extractBetween(0, var, source);

	577 isSourcePresent = TRUE;

	578 }

	579 id.extractBetween(var, sep++, variant);

	580 id.extractBetween(sep, id.length(), target);

	581 }

	582

	583 if (variant.length() > 0) {

	584 variant.remove(0, 1);

	585 }

	586 }

	587

	588 /**

	589 * Given source, target, and variant strings, concatenate them into a

	590 * full ID. If the source is empty, then "Any" will be used for the

	591 * source, so the ID will always be of the form s-t/v or s-t.

	592 */

	593 void TransliteratorIDParser::STVtoID(const UnicodeString& source,

	594 const UnicodeString& target,

	595 const UnicodeString& variant,

	596 UnicodeString& id) {

	597 id = source;

	598 if (id.length() == 0) {

	599 id = ANY;

	600 }

	601 id.append(TARGET_SEP).append(target);

	602 if (variant.length() != 0) {

	603 id.append(VARIANT_SEP).append(variant);

	604 }

	605 // NUL-terminate the ID string for getTerminatedBuffer.

	606 // This prevents valgrind and Purify warnings.

	607 id.append((UChar)0);

	608 id.truncate(id.length()-1);

	609 }

	610

	611 /**

	612 * Register two targets as being inverses of one another. For

	613 * example, calling registerSpecialInverse("NFC", "NFD", TRUE) causes

	614 * Transliterator to form the following inverse relationships:

	615 *

	616 * <pre>NFC => NFD

	617 * Any-NFC => Any-NFD

	618 * NFD => NFC

	619 * Any-NFD => Any-NFC</pre>

	620 *

	621 * (Without the special inverse registration, the inverse of NFC

	622 * would be NFC-Any.) Note that NFD is shorthand for Any-NFD, but

	623 * that the presence or absence of "Any-" is preserved.

	624 *

	625 * <p>The relationship is symmetrical; registering (a, b) is

	626 * equivalent to registering (b, a).

	627 *

	628 * <p>The relevant IDs must still be registered separately as

	629 * factories or classes.

	630 *

	631 * <p>Only the targets are specified. Special inverses always

	632 * have the form Any-Target1 <=> Any-Target2. The target should

	633 * have canonical casing (the casing desired to be produced when

	634 * an inverse is formed) and should contain no whitespace or other

	635 * extraneous characters.

	636 *

	637 * @param target the target against which to register the inverse

	638 * @param inverseTarget the inverse of target, that is

	639 * Any-target.getInverse() => Any-inverseTarget

	640 * @param bidirectional if TRUE, register the reverse relation

	641 * as well, that is, Any-inverseTarget.getInverse() => Any-target

	642 */

	643 void TransliteratorIDParser::registerSpecialInverse(const UnicodeString& target,

	644 const UnicodeString& inverse Target,

	645 UBool bidirectional,

	646 UErrorCode &status) {

	647 init(status);

	648 if (U_FAILURE(status)) {

	649 return;

	650 }

	651

	652 // If target == inverseTarget then force bidirectional => FALSE

	653 if (bidirectional && 0==target.caseCompare(inverseTarget, U_FOLD_CASE_DEFAUL T)) {

	654 bidirectional = FALSE;

	655 }

	656

	657 Mutex lock(&LOCK);

	658

	659 UnicodeString *tempus = new UnicodeString(inverseTarget); // Used for null pointer check before usage.

	660 if (tempus == NULL) {

	661 status = U_MEMORY_ALLOCATION_ERROR;

	662 return;

	663 }

	664 SPECIAL_INVERSES->put(target, tempus, status);

	665 if (bidirectional) {

	666 tempus = new UnicodeString(target);

	667 if (tempus == NULL) {

	668 status = U_MEMORY_ALLOCATION_ERROR;

	669 return;

	670 }

	671 SPECIAL_INVERSES->put(inverseTarget, tempus, status);

	672 }

	673 }

	674

	675 //----------------------------------------------------------------

	676 // Private implementation

	677 //----------------------------------------------------------------

	678

	679 /**

	680 * Parse an ID into component pieces. Take IDs of the form T,

	681 * T/V, S-T, S-T/V, or S/V-T. If the source is missing, return a

	682 * source of ANY.

	683 * @param id the id string, in any of several forms

	684 * @param pos INPUT-OUTPUT parameter. On input, pos is the

	685 * offset of the first character to parse in id. On output,

	686 * pos is the offset after the last parsed character. If the

	687 * parse failed, pos will be unchanged.

	688 * @param allowFilter2 if TRUE, a UnicodeSet pattern is allowed

	689 * at any location between specs or delimiters, and is returned

	690 * as the fifth string in the array.

	691 * @return a Specs object, or NULL if the parse failed. If

	692 * neither source nor target was seen in the parsed id, then the

	693 * parse fails. If allowFilter is TRUE, then the parsed filter

	694 * pattern is returned in the Specs object, otherwise the returned

	695 * filter reference is NULL. If the parse fails for any reason

	696 * NULL is returned.

	697 */

	698 TransliteratorIDParser::Specs*

	699 TransliteratorIDParser::parseFilterID(const UnicodeString& id, int32_t& pos,

	700 UBool allowFilter) {

	701 UnicodeString first;

	702 UnicodeString source;

	703 UnicodeString target;

	704 UnicodeString variant;

	705 UnicodeString filter;

	706 UChar delimiter = 0;

	707 int32_t specCount = 0;

	708 int32_t start = pos;

	709

	710 // This loop parses one of the following things with each

	711 // pass: a filter, a delimiter character (either '-' or '/'),

	712 // or a spec (source, target, or variant).

	713 for (;;) {

	714 ICU_Utility::skipWhitespace(id, pos, TRUE);

	715 if (pos == id.length()) {

	716 break;

	717 }

	718

	719 // Parse filters

	720 if (allowFilter && filter.length() == 0 &&

	721 UnicodeSet::resemblesPattern(id, pos)) {

	722

	723 ParsePosition ppos(pos);

	724 UErrorCode ec = U_ZERO_ERROR;

	725 UnicodeSet set(id, ppos, USET_IGNORE_SPACE, NULL, ec);

	726 if (U_FAILURE(ec)) {

	727 pos = start;

	728 return NULL;

	729 }

	730 id.extractBetween(pos, ppos.getIndex(), filter);

	731 pos = ppos.getIndex();

	732 continue;

	733 }

	734

	735 if (delimiter == 0) {

	736 UChar c = id.charAt(pos);

	737 if ((c == TARGET_SEP && target.length() == 0) \|\|

	738 (c == VARIANT_SEP && variant.length() == 0)) {

	739 delimiter = c;

	740 ++pos;

	741 continue;

	742 }

	743 }

	744

	745 // We are about to try to parse a spec with no delimiter

	746 // when we can no longer do so (we can only do so at the

	747 // start); break.

	748 if (delimiter == 0 && specCount > 0) {

	749 break;

	750 }

	751

	752 UnicodeString spec = ICU_Utility::parseUnicodeIdentifier(id, pos);

	753 if (spec.length() == 0) {

	754 // Note that if there was a trailing delimiter, we

	755 // consume it. So Foo-, Foo/, Foo-Bar/, and Foo/Bar-

	756 // are legal.

	757 break;

	758 }

	759

	760 switch (delimiter) {

	761 case 0:

	762 first = spec;

	763 break;

	764 case TARGET_SEP:

	765 target = spec;

	766 break;

	767 case VARIANT_SEP:

	768 variant = spec;

	769 break;

	770 }

	771 ++specCount;

	772 delimiter = 0;

	773 }

	774

	775 // A spec with no prior character is either source or target,

	776 // depending on whether an explicit "-target" was seen.

	777 if (first.length() != 0) {

	778 if (target.length() == 0) {

	779 target = first;

	780 } else {

	781 source = first;

	782 }

	783 }

	784

	785 // Must have either source or target

	786 if (source.length() == 0 && target.length() == 0) {

	787 pos = start;

	788 return NULL;

	789 }

	790

	791 // Empty source or target defaults to ANY

	792 UBool sawSource = TRUE;

	793 if (source.length() == 0) {

	794 source = ANY;

	795 sawSource = FALSE;

	796 }

	797 if (target.length() == 0) {

	798 target = ANY;

	799 }

	800

	801 return new Specs(source, target, variant, sawSource, filter);

	802 }

	803

	804 /**

	805 * Givens a Spec object, convert it to a SingleID object. The

	806 * Spec object is a more unprocessed parse result. The SingleID

	807 * object contains information about canonical and basic IDs.

	808 * @return a SingleID; never returns NULL. Returned object always

	809 * has 'filter' field of NULL.

	810 */

	811 TransliteratorIDParser::SingleID*

	812 TransliteratorIDParser::specsToID(const Specs* specs, int32_t dir) {

	813 UnicodeString canonID;

	814 UnicodeString basicID;

	815 UnicodeString basicPrefix;

	816 if (specs != NULL) {

	817 UnicodeString buf;

	818 if (dir == FORWARD) {

	819 if (specs->sawSource) {

	820 buf.append(specs->source).append(TARGET_SEP);

	821 } else {

	822 basicPrefix = specs->source;

	823 basicPrefix.append(TARGET_SEP);

	824 }

	825 buf.append(specs->target);

	826 } else {

	827 buf.append(specs->target).append(TARGET_SEP).append(specs->source);

	828 }

	829 if (specs->variant.length() != 0) {

	830 buf.append(VARIANT_SEP).append(specs->variant);

	831 }

	832 basicID = basicPrefix;

	833 basicID.append(buf);

	834 if (specs->filter.length() != 0) {

	835 buf.insert(0, specs->filter);

	836 }

	837 canonID = buf;

	838 }

	839 return new SingleID(canonID, basicID);

	840 }

	841

	842 /**

	843 * Given a Specs object, return a SingleID representing the

	844 * special inverse of that ID. If there is no special inverse

	845 * then return NULL.

	846 * @return a SingleID or NULL. Returned object always has

	847 * 'filter' field of NULL.

	848 */

	849 TransliteratorIDParser::SingleID*

	850 TransliteratorIDParser::specsToSpecialInverse(const Specs& specs, UErrorCode &st atus) {

	851 if (0!=specs.source.caseCompare(ANY, U_FOLD_CASE_DEFAULT)) {

	852 return NULL;

	853 }

	854 init(status);

	855

	856 UnicodeString* inverseTarget;

	857

	858 umtx_lock(&LOCK);

	859 inverseTarget = (UnicodeString*) SPECIAL_INVERSES->get(specs.target);

	860 umtx_unlock(&LOCK);

	861

	862 if (inverseTarget != NULL) {

	863 // If the original ID contained "Any-" then make the

	864 // special inverse "Any-Foo"; otherwise make it "Foo".

	865 // So "Any-NFC" => "Any-NFD" but "NFC" => "NFD".

	866 UnicodeString buf;

	867 if (specs.filter.length() != 0) {

	868 buf.append(specs.filter);

	869 }

	870 if (specs.sawSource) {

	871 buf.append(ANY).append(TARGET_SEP);

	872 }

	873 buf.append(*inverseTarget);

	874

	875 UnicodeString basicID(ANY);

	876 basicID.append(TARGET_SEP).append(*inverseTarget);

	877

	878 if (specs.variant.length() != 0) {

	879 buf.append(VARIANT_SEP).append(specs.variant);

	880 basicID.append(VARIANT_SEP).append(specs.variant);

	881 }

	882 return new SingleID(buf, basicID);

	883 }

	884 return NULL;

	885 }

	886

	887 /**

	888 * Glue method to get around access problems in C++. This would

	889 * ideally be inline but we want to avoid a circular header

	890 * dependency.

	891 */

	892 Transliterator* TransliteratorIDParser::createBasicInstance(const UnicodeString& id, const UnicodeString* canonID) {

	893 return Transliterator::createBasicInstance(id, canonID);

	894 }

	895

	896 /**

	897 * Initialize static memory.

	898 */

	899 void TransliteratorIDParser::init(UErrorCode &status) {

	900 if (SPECIAL_INVERSES != NULL) {

	901 return;

	902 }

	903

	904 Hashtable* special_inverses = new Hashtable(TRUE, status);

	905 // Null pointer check

	906 if (special_inverses == NULL) {

	907 status = U_MEMORY_ALLOCATION_ERROR;

	908 return;

	909 }

	910 special_inverses->setValueDeleter(uhash_deleteUnicodeString);

	911

	912 umtx_lock(&LOCK);

	913 if (SPECIAL_INVERSES == NULL) {

	914 SPECIAL_INVERSES = special_inverses;

	915 special_inverses = NULL;

	916 }

	917 umtx_unlock(&LOCK);

	918 delete special_inverses; /null instance/

	919

	920 ucln_i18n_registerCleanup(UCLN_I18N_TRANSLITERATOR, utrans_transliterator_cl eanup);

	921 }

	922

	923 /**

	924 * Free static memory.

	925 */

	926 void TransliteratorIDParser::cleanup() {

	927 if (SPECIAL_INVERSES) {

	928 delete SPECIAL_INVERSES;

	929 SPECIAL_INVERSES = NULL;

	930 }

	931 umtx_destroy(&LOCK);

	932 }

	933

	934 U_NAMESPACE_END

	935

	936 #endif /* #if !UCONFIG_NO_TRANSLITERATION */

	937

	938 //eof

OLD	NEW

« no previous file with comments | « icu46/source/i18n/tridpars.h ('k') | icu46/source/i18n/tzrule.cpp » ('j') | no next file with comments »