icu46/source/common/ustrcase.c - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/common/ustrcase.c

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 *******************************************************************************

	3 *

	4 * Copyright (C) 2001-2010, International Business Machines

	5 * Corporation and others. All Rights Reserved.

	6 *

	7 *******************************************************************************

	8 * file name: ustrcase.c

	9 * encoding: US-ASCII

	10 * tab size: 8 (not used)

	11 * indentation:4

	12 *

	13 * created on: 2002feb20

	14 * created by: Markus W. Scherer

	15 *

	16 * Implementation file for string casing C API functions.

	17 * Uses functions from uchar.c for basic functionality that requires access

	18 * to the Unicode Character Database (uprops.dat).

	19 */

	20

	21 #include "unicode/utypes.h"

	22 #include "unicode/uloc.h"

	23 #include "unicode/ustring.h"

	24 #include "unicode/ucasemap.h"

	25 #include "unicode/ubrk.h"

	26 #include "cmemory.h"

	27 #include "ucase.h"

	28 #include "ustr_imp.h"

	29

	30 /* string casing ------------------------------------------------------------ */

	31

	32 /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */

	33 static U_INLINE int32_t

	34 appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,

	35 int32_t result, const UChar *s) {

	36 UChar32 c;

	37 int32_t length;

	38

	39 /* decode the result */

	40 if(result<0) {

	41 /* (not) original code point */

	42 c=~result;

	43 length=-1;

	44 } else if(result<=UCASE_MAX_STRING_LENGTH) {

	45 c=U_SENTINEL;

	46 length=result;

	47 } else {

	48 c=result;

	49 length=-1;

	50 }

	51

	52 if(destIndex<destCapacity) {

	53 /* append the result */

	54 if(length<0) {

	55 /* code point */

	56 UBool isError=FALSE;

	57 U16_APPEND(dest, destIndex, destCapacity, c, isError);

	58 if(isError) {

	59 /* overflow, nothing written */

	60 destIndex+=U16_LENGTH(c);

	61 }

	62 } else {

	63 /* string */

	64 if((destIndex+length)<=destCapacity) {

	65 while(length>0) {

	66 dest[destIndex++]=*s++;

	67 --length;

	68 }

	69 } else {

	70 /* overflow */

	71 destIndex+=length;

	72 }

	73 }

	74 } else {

	75 /* preflight */

	76 if(length<0) {

	77 destIndex+=U16_LENGTH(c);

	78 } else {

	79 destIndex+=length;

	80 }

	81 }

	82 return destIndex;

	83 }

	84

	85 static UChar32 U_CALLCONV

	86 utf16_caseContextIterator(void *context, int8_t dir) {

	87 UCaseContext csc=(UCaseContext )context;

	88 UChar32 c;

	89

	90 if(dir<0) {

	91 /* reset for backward iteration */

	92 csc->index=csc->cpStart;

	93 csc->dir=dir;

	94 } else if(dir>0) {

	95 /* reset for forward iteration */

	96 csc->index=csc->cpLimit;

	97 csc->dir=dir;

	98 } else {

	99 /* continue current iteration direction */

	100 dir=csc->dir;

	101 }

	102

	103 if(dir<0) {

	104 if(csc->start<csc->index) {

	105 U16_PREV((const UChar *)csc->p, csc->start, csc->index, c);

	106 return c;

	107 }

	108 } else {

	109 if(csc->index<csc->limit) {

	110 U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c);

	111 return c;

	112 }

	113 }

	114 return U_SENTINEL;

	115 }

	116

	117 /*

	118 * Case-maps [srcStart..srcLimit[ but takes

	119 * context [0..srcLength[ into account.

	120 */

	121 static int32_t

	122 _caseMap(const UCaseMap csm, UCaseMapFull map,

	123 UChar *dest, int32_t destCapacity,

	124 const UChar src, UCaseContext csc,

	125 int32_t srcStart, int32_t srcLimit,

	126 UErrorCode *pErrorCode) {

	127 const UChar *s;

	128 UChar32 c, c2 = 0;

	129 int32_t srcIndex, destIndex;

	130 int32_t locCache;

	131

	132 locCache=csm->locCache;

	133

	134 /* case mapping loop */

	135 srcIndex=srcStart;

	136 destIndex=0;

	137 while(srcIndex<srcLimit) {

	138 csc->cpStart=srcIndex;

	139 U16_NEXT(src, srcIndex, srcLimit, c);

	140 csc->cpLimit=srcIndex;

	141 c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &loc Cache);

	142 if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING _LENGTH<c && (c2=c)<=0xffff)) {

	143 /* fast path version of appendResult() for BMP results */

	144 dest[destIndex++]=(UChar)c2;

	145 } else {

	146 destIndex=appendResult(dest, destIndex, destCapacity, c, s);

	147 }

	148 }

	149

	150 if(destIndex>destCapacity) {

	151 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	152 }

	153 return destIndex;

	154 }

	155

	156 static void

	157 setTempCaseMapLocale(UCaseMap csm, const char locale, UErrorCode *pErrorCode) {

	158 /*

	159 * We could call ucasemap_setLocale(), but here we really only care about

	160 * the initial language subtag, we need not return the real string via

	161 * ucasemap_getLocale(), and we don't care about only getting "x" from

	162 * "x-some-thing" etc.

	163 *

	164 * We ignore locales with a longer-than-3 initial subtag.

	165 *

	166 * We also do not fill in the locCache because it is rarely used,

	167 * and not worth setting unless we reuse it for many case mapping operations .

	168 * (That's why UCaseMap was created.)

	169 */

	170 int i;

	171 char c;

	172

	173 /* the internal functions require locale!=NULL */

	174 if(locale==NULL) {

	175 locale=uloc_getDefault();

	176 }

	177 for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) {

	178 csm->locale[i]=c;

	179 }

	180 if(i<=3) {

	181 csm->locale[i]=0; /* Up to 3 non-separator characters. */

	182 } else {

	183 csm->locale[0]=0; /* Longer-than-3 initial subtag: Ignore. */

	184 }

	185 }

	186

	187 /*

	188 * Set parameters on an empty UCaseMap, for UCaseMap-less API functions.

	189 * Do this fast because it is called with every function call.

	190 */

	191 static U_INLINE void

	192 setTempCaseMap(UCaseMap csm, const char locale, UErrorCode *pErrorCode) {

	193 if(csm->csp==NULL) {

	194 csm->csp=ucase_getSingleton();

	195 }

	196 if(locale!=NULL && locale[0]==0) {

	197 csm->locale[0]=0;

	198 } else {

	199 setTempCaseMapLocale(csm, locale, pErrorCode);

	200 }

	201 }

	202

	203 #if !UCONFIG_NO_BREAK_ITERATION

	204

	205 /*

	206 * Internal titlecasing function.

	207 */

	208 static int32_t

	209 _toTitle(UCaseMap *csm,

	210 UChar *dest, int32_t destCapacity,

	211 const UChar src, UCaseContext csc,

	212 int32_t srcLength,

	213 UErrorCode *pErrorCode) {

	214 const UChar *s;

	215 UChar32 c;

	216 int32_t prev, titleStart, titleLimit, idx, destIndex, length;

	217 UBool isFirstIndex;

	218

	219 if(csm->iter!=NULL) {

	220 ubrk_setText(csm->iter, src, srcLength, pErrorCode);

	221 } else {

	222 csm->iter=ubrk_open(UBRK_WORD, csm->locale,

	223 src, srcLength,

	224 pErrorCode);

	225 }

	226 if(U_FAILURE(*pErrorCode)) {

	227 return 0;

	228 }

	229

	230 /* set up local variables */

	231 destIndex=0;

	232 prev=0;

	233 isFirstIndex=TRUE;

	234

	235 /* titlecasing loop */

	236 while(prev<srcLength) {

	237 /* find next index where to titlecase */

	238 if(isFirstIndex) {

	239 isFirstIndex=FALSE;

	240 idx=ubrk_first(csm->iter);

	241 } else {

	242 idx=ubrk_next(csm->iter);

	243 }

	244 if(idx==UBRK_DONE \|\| idx>srcLength) {

	245 idx=srcLength;

	246 }

	247

	248 /*

	249 * Unicode 4 & 5 section 3.13 Default Case Operations:

	250 *

	251 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standar d Annex

	252 * #29, "Text Boundaries." Between each pair of word boundaries, find th e first

	253 * cased character F. If F exists, map F to default_title(F); then map e ach

	254 * subsequent character C to default_lower(C).

	255 *

	256 * In this implementation, segment [prev..index[ into 3 parts:

	257 * a) uncased characters (copy as-is) [prev..titleStart[

	258 * b) first case letter (titlecase) [titleStart..titleLimit[

	259 * c) subsequent characters (lowercase) [titleLimit..ind ex[

	260 */

	261 if(prev<idx) {

	262 /* find and copy uncased characters [prev..titleStart[ */

	263 titleStart=titleLimit=prev;

	264 U16_NEXT(src, titleLimit, idx, c);

	265 if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE== ucase_getType(csm->csp, c)) {

	266 /* Adjust the titlecasing index (titleStart) to the next cased c haracter. */

	267 for(;;) {

	268 titleStart=titleLimit;

	269 if(titleLimit==idx) {

	270 /*

	271 * only uncased characters in [prev..index[

	272 * stop with titleStart==titleLimit==index

	273 */

	274 break;

	275 }

	276 U16_NEXT(src, titleLimit, idx, c);

	277 if(UCASE_NONE!=ucase_getType(csm->csp, c)) {

	278 break; /* cased letter at [titleStart..titleLimit[ */

	279 }

	280 }

	281 length=titleStart-prev;

	282 if(length>0) {

	283 if((destIndex+length)<=destCapacity) {

	284 uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UC HAR);

	285 }

	286 destIndex+=length;

	287 }

	288 }

	289

	290 if(titleStart<titleLimit) {

	291 /* titlecase c which is from [titleStart..titleLimit[ */

	292 csc->cpStart=titleStart;

	293 csc->cpLimit=titleLimit;

	294 c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &csm->locCache);

	295 destIndex=appendResult(dest, destIndex, destCapacity, c, s);

	296

	297 /* Special case Dutch IJ titlecasing */

	298 if ( titleStart+1 < idx &&

	299 ucase_getCaseLocale(csm->locale,&csm->locCache) == UCASE_LO C_DUTCH &&

	300 ( src[titleStart] == (UChar32) 0x0049 \|\| src[titleStart] == (UChar32) 0x0069 ) &&

	301 ( src[titleStart+1] == (UChar32) 0x004A \|\| src[titleStart+1 ] == (UChar32) 0x006A )) {

	302 c=(UChar32) 0x004A;

	303 destIndex=appendResult(dest, destIndex, destCapacity , c, s);

	304 titleLimit++;

	305 }

	306

	307 /* lowercase [titleLimit..index[ */

	308 if(titleLimit<idx) {

	309 if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) {

	310 /* Normal operation: Lowercase the rest of the word. */

	311 destIndex+=

	312 _caseMap(

	313 csm, ucase_toFullLower,

	314 dest+destIndex, destCapacity-destIndex,

	315 src, csc,

	316 titleLimit, idx,

	317 pErrorCode);

	318 } else {

	319 /* Optionally just copy the rest of the word unchanged. */

	320 length=idx-titleLimit;

	321 if((destIndex+length)<=destCapacity) {

	322 uprv_memcpy(dest+destIndex, src+titleLimit, length*U _SIZEOF_UCHAR);

	323 }

	324 destIndex+=length;

	325 }

	326 }

	327 }

	328 }

	329

	330 prev=idx;

	331 }

	332

	333 if(destIndex>destCapacity) {

	334 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	335 }

	336 return destIndex;

	337 }

	338

	339 #endif

	340

	341 /* functions available in the common library (for unistr_case.cpp) */

	342

	343 U_CFUNC int32_t

	344 ustr_toLower(const UCaseProps *csp,

	345 UChar *dest, int32_t destCapacity,

	346 const UChar *src, int32_t srcLength,

	347 const char *locale,

	348 UErrorCode *pErrorCode) {

	349 UCaseMap csm={ NULL };

	350 UCaseContext csc={ NULL };

	351

	352 csm.csp=csp;

	353 setTempCaseMap(&csm, locale, pErrorCode);

	354 csc.p=(void *)src;

	355 csc.limit=srcLength;

	356

	357 return _caseMap(&csm, ucase_toFullLower,

	358 dest, destCapacity,

	359 src, &csc, 0, srcLength,

	360 pErrorCode);

	361 }

	362

	363 U_CFUNC int32_t

	364 ustr_toUpper(const UCaseProps *csp,

	365 UChar *dest, int32_t destCapacity,

	366 const UChar *src, int32_t srcLength,

	367 const char *locale,

	368 UErrorCode *pErrorCode) {

	369 UCaseMap csm={ NULL };

	370 UCaseContext csc={ NULL };

	371

	372 csm.csp=csp;

	373 setTempCaseMap(&csm, locale, pErrorCode);

	374 csc.p=(void *)src;

	375 csc.limit=srcLength;

	376

	377 return _caseMap(&csm, ucase_toFullUpper,

	378 dest, destCapacity,

	379 src, &csc, 0, srcLength,

	380 pErrorCode);

	381 }

	382

	383 #if !UCONFIG_NO_BREAK_ITERATION

	384

	385 U_CFUNC int32_t

	386 ustr_toTitle(const UCaseProps *csp,

	387 UChar *dest, int32_t destCapacity,

	388 const UChar *src, int32_t srcLength,

	389 UBreakIterator *titleIter,

	390 const char *locale, uint32_t options,

	391 UErrorCode *pErrorCode) {

	392 UCaseMap csm={ NULL };

	393 UCaseContext csc={ NULL };

	394 int32_t length;

	395

	396 csm.csp=csp;

	397 csm.iter=titleIter;

	398 csm.options=options;

	399 setTempCaseMap(&csm, locale, pErrorCode);

	400 csc.p=(void *)src;

	401 csc.limit=srcLength;

	402

	403 length=_toTitle(&csm,

	404 dest, destCapacity,

	405 src, &csc, srcLength,

	406 pErrorCode);

	407 if(titleIter==NULL && csm.iter!=NULL) {

	408 ubrk_close(csm.iter);

	409 }

	410 return length;

	411 }

	412

	413 #endif

	414

	415 U_CFUNC int32_t

	416 ustr_foldCase(const UCaseProps *csp,

	417 UChar *dest, int32_t destCapacity,

	418 const UChar *src, int32_t srcLength,

	419 uint32_t options,

	420 UErrorCode *pErrorCode) {

	421 int32_t srcIndex, destIndex;

	422

	423 const UChar *s;

	424 UChar32 c, c2 = 0;

	425

	426 /* case mapping loop */

	427 srcIndex=destIndex=0;

	428 while(srcIndex<srcLength) {

	429 U16_NEXT(src, srcIndex, srcLength, c);

	430 c=ucase_toFullFolding(csp, c, &s, options);

	431 if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING _LENGTH<c && (c2=c)<=0xffff)) {

	432 /* fast path version of appendResult() for BMP results */

	433 dest[destIndex++]=(UChar)c2;

	434 } else {

	435 destIndex=appendResult(dest, destIndex, destCapacity, c, s);

	436 }

	437 }

	438

	439 if(destIndex>destCapacity) {

	440 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	441 }

	442 return destIndex;

	443 }

	444

	445 /*

	446 * Implement argument checking and buffer handling

	447 * for string case mapping as a common function.

	448 */

	449

	450 /* common internal function for public API functions */

	451

	452 static int32_t

	453 caseMap(const UCaseMap *csm,

	454 UChar *dest, int32_t destCapacity,

	455 const UChar *src, int32_t srcLength,

	456 int32_t toWhichCase,

	457 UErrorCode *pErrorCode) {

	458 UChar buffer[300];

	459 UChar *temp;

	460

	461 int32_t destLength;

	462

	463 /* check argument values */

	464 if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {

	465 return 0;

	466 }

	467 if( destCapacity<0 \|\|

	468 (dest==NULL && destCapacity>0) \|\|

	469 src==NULL \|\|

	470 srcLength<-1

	471 ) {

	472 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	473 return 0;

	474 }

	475

	476 /* get the string length */

	477 if(srcLength==-1) {

	478 srcLength=u_strlen(src);

	479 }

	480

	481 /* check for overlapping source and destination */

	482 if( dest!=NULL &&

	483 ((src>=dest && src<(dest+destCapacity)) \|\|

	484 (dest>=src && dest<(src+srcLength)))

	485 ) {

	486 /* overlap: provide a temporary destination buffer and later copy the re sult */

	487 if(destCapacity<=(sizeof(buffer)/U_SIZEOF_UCHAR)) {

	488 /* the stack buffer is large enough */

	489 temp=buffer;

	490 } else {

	491 /* allocate a buffer */

	492 temp=(UChar )uprv_malloc(destCapacityU_SIZEOF_UCHAR);

	493 if(temp==NULL) {

	494 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;

	495 return 0;

	496 }

	497 }

	498 } else {

	499 temp=dest;

	500 }

	501

	502 destLength=0;

	503

	504 if(toWhichCase==FOLD_CASE) {

	505 destLength=ustr_foldCase(csm->csp, temp, destCapacity, src, srcLength,

	506 csm->options, pErrorCode);

	507 } else {

	508 UCaseContext csc={ NULL };

	509

	510 csc.p=(void *)src;

	511 csc.limit=srcLength;

	512

	513 if(toWhichCase==TO_LOWER) {

	514 destLength=_caseMap(csm, ucase_toFullLower,

	515 temp, destCapacity,

	516 src, &csc,

	517 0, srcLength,

	518 pErrorCode);

	519 } else if(toWhichCase==TO_UPPER) {

	520 destLength=_caseMap(csm, ucase_toFullUpper,

	521 temp, destCapacity,

	522 src, &csc,

	523 0, srcLength,

	524 pErrorCode);

	525 } else /* if(toWhichCase==TO_TITLE) */ {

	526 #if UCONFIG_NO_BREAK_ITERATION

	527 *pErrorCode=U_UNSUPPORTED_ERROR;

	528 #else

	529 /* UCaseMap is actually non-const in toTitle() APIs. */

	530 destLength=_toTitle((UCaseMap *)csm, temp, destCapacity,

	531 src, &csc, srcLength,

	532 pErrorCode);

	533 #endif

	534 }

	535 }

	536 if(temp!=dest) {

	537 /* copy the result string to the destination buffer */

	538 if(destLength>0) {

	539 int32_t copyLength= destLength<=destCapacity ? destLength : destCapa city;

	540 if(copyLength>0) {

	541 uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR);

	542 }

	543 }

	544 if(temp!=buffer) {

	545 uprv_free(temp);

	546 }

	547 }

	548

	549 return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);

	550 }

	551

	552 /* public API functions */

	553

	554 U_CAPI int32_t U_EXPORT2

	555 u_strToLower(UChar *dest, int32_t destCapacity,

	556 const UChar *src, int32_t srcLength,

	557 const char *locale,

	558 UErrorCode *pErrorCode) {

	559 UCaseMap csm={ NULL };

	560 setTempCaseMap(&csm, locale, pErrorCode);

	561 return caseMap(&csm,

	562 dest, destCapacity,

	563 src, srcLength,

	564 TO_LOWER, pErrorCode);

	565 }

	566

	567 U_CAPI int32_t U_EXPORT2

	568 u_strToUpper(UChar *dest, int32_t destCapacity,

	569 const UChar *src, int32_t srcLength,

	570 const char *locale,

	571 UErrorCode *pErrorCode) {

	572 UCaseMap csm={ NULL };

	573 setTempCaseMap(&csm, locale, pErrorCode);

	574 return caseMap(&csm,

	575 dest, destCapacity,

	576 src, srcLength,

	577 TO_UPPER, pErrorCode);

	578 }

	579

	580 #if !UCONFIG_NO_BREAK_ITERATION

	581

	582 U_CAPI int32_t U_EXPORT2

	583 u_strToTitle(UChar *dest, int32_t destCapacity,

	584 const UChar *src, int32_t srcLength,

	585 UBreakIterator *titleIter,

	586 const char *locale,

	587 UErrorCode *pErrorCode) {

	588 UCaseMap csm={ NULL };

	589 int32_t length;

	590

	591 csm.iter=titleIter;

	592 setTempCaseMap(&csm, locale, pErrorCode);

	593 length=caseMap(&csm,

	594 dest, destCapacity,

	595 src, srcLength,

	596 TO_TITLE, pErrorCode);

	597 if(titleIter==NULL && csm.iter!=NULL) {

	598 ubrk_close(csm.iter);

	599 }

	600 return length;

	601 }

	602

	603 U_CAPI int32_t U_EXPORT2

	604 ucasemap_toTitle(UCaseMap *csm,

	605 UChar *dest, int32_t destCapacity,

	606 const UChar *src, int32_t srcLength,

	607 UErrorCode *pErrorCode) {

	608 return caseMap(csm,

	609 dest, destCapacity,

	610 src, srcLength,

	611 TO_TITLE, pErrorCode);

	612 }

	613

	614 #endif

	615

	616 U_CAPI int32_t U_EXPORT2

	617 u_strFoldCase(UChar *dest, int32_t destCapacity,

	618 const UChar *src, int32_t srcLength,

	619 uint32_t options,

	620 UErrorCode *pErrorCode) {

	621 UCaseMap csm={ NULL };

	622 csm.csp=ucase_getSingleton();

	623 csm.options=options;

	624 return caseMap(&csm,

	625 dest, destCapacity,

	626 src, srcLength,

	627 FOLD_CASE, pErrorCode);

	628 }

	629

	630 /* case-insensitive string comparisons -------------------------------------- */

	631

	632 /*

	633 * This function is a copy of unorm_cmpEquivFold() minus the parts for

	634 * canonical equivalence.

	635 * Keep the functions in sync, and see there for how this works.

	636 * The duplication is for modularization:

	637 * It makes caseless (but not canonical caseless) matches independent of

	638 * the normalization code.

	639 */

	640

	641 /* stack element for previous-level source/decomposition pointers */

	642 struct CmpEquivLevel {

	643 const UChar start, s, *limit;

	644 };

	645 typedef struct CmpEquivLevel CmpEquivLevel;

	646

	647 /* internal function */

	648 U_CFUNC int32_t

	649 u_strcmpFold(const UChar *s1, int32_t length1,

	650 const UChar *s2, int32_t length2,

	651 uint32_t options,

	652 UErrorCode *pErrorCode) {

	653 const UCaseProps *csp;

	654

	655 /* current-level start/limit - s1/s2 as current */

	656 const UChar start1, start2, limit1, limit2;

	657

	658 /* case folding variables */

	659 const UChar *p;

	660 int32_t length;

	661

	662 /* stacks of previous-level start/current/limit */

	663 CmpEquivLevel stack1[2], stack2[2];

	664

	665 /* case folding buffers, only use current-level start/limit */

	666 UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];

	667

	668 /* track which is the current level per string */

	669 int32_t level1, level2;

	670

	671 /* current code units, and code points for lookups */

	672 UChar32 c1, c2, cp1, cp2;

	673

	674 /* no argument error checking because this itself is not an API */

	675

	676 /*

	677 * assume that at least the option U_COMPARE_IGNORE_CASE is set

	678 * otherwise this function would have to behave exactly as uprv_strCompare()

	679 */

	680 csp=ucase_getSingleton();

	681 if(U_FAILURE(*pErrorCode)) {

	682 return 0;

	683 }

	684

	685 /* initialize */

	686 start1=s1;

	687 if(length1==-1) {

	688 limit1=NULL;

	689 } else {

	690 limit1=s1+length1;

	691 }

	692

	693 start2=s2;

	694 if(length2==-1) {

	695 limit2=NULL;

	696 } else {

	697 limit2=s2+length2;

	698 }

	699

	700 level1=level2=0;

	701 c1=c2=-1;

	702

	703 /* comparison loop */

	704 for(;;) {

	705 /*

	706 * here a code unit value of -1 means "get another code unit"

	707 * below it will mean "this source is finished"

	708 */

	709

	710 if(c1<0) {

	711 /* get next code unit from string 1, post-increment */

	712 for(;;) {

	713 if(s1==limit1 \|\| ((c1=*s1)==0 && (limit1==NULL \|\| (options&_STRN CMP_STYLE)))) {

	714 if(level1==0) {

	715 c1=-1;

	716 break;

	717 }

	718 } else {

	719 ++s1;

	720 break;

	721 }

	722

	723 /* reached end of level buffer, pop one level */

	724 do {

	725 --level1;

	726 start1=stack1[level1].start;

	727 } while(start1==NULL);

	728 s1=stack1[level1].s;

	729 limit1=stack1[level1].limit;

	730 }

	731 }

	732

	733 if(c2<0) {

	734 /* get next code unit from string 2, post-increment */

	735 for(;;) {

	736 if(s2==limit2 \|\| ((c2=*s2)==0 && (limit2==NULL \|\| (options&_STRN CMP_STYLE)))) {

	737 if(level2==0) {

	738 c2=-1;

	739 break;

	740 }

	741 } else {

	742 ++s2;

	743 break;

	744 }

	745

	746 /* reached end of level buffer, pop one level */

	747 do {

	748 --level2;

	749 start2=stack2[level2].start;

	750 } while(start2==NULL);

	751 s2=stack2[level2].s;

	752 limit2=stack2[level2].limit;

	753 }

	754 }

	755

	756 /*

	757 * compare c1 and c2

	758 * either variable c1, c2 is -1 only if the corresponding string is fini shed

	759 */

	760 if(c1==c2) {

	761 if(c1<0) {

	762 return 0; /* c1==c2==-1 indicating end of strings */

	763 }

	764 c1=c2=-1; /* make us fetch new code units */

	765 continue;

	766 } else if(c1<0) {

	767 return -1; /* string 1 ends before string 2 */

	768 } else if(c2<0) {

	769 return 1; /* string 2 ends before string 1 */

	770 }

	771 /* c1!=c2 && c1>=0 && c2>=0 */

	772

	773 /* get complete code points for c1, c2 for lookups if either is a surrog ate */

	774 cp1=c1;

	775 if(U_IS_SURROGATE(c1)) {

	776 UChar c;

	777

	778 if(U_IS_SURROGATE_LEAD(c1)) {

	779 if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {

	780 /* advance ++s1; only below if cp1 decomposes/case-folds */

	781 cp1=U16_GET_SUPPLEMENTARY(c1, c);

	782 }

	783 } else /* isTrail(c1) */ {

	784 if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {

	785 cp1=U16_GET_SUPPLEMENTARY(c, c1);

	786 }

	787 }

	788 }

	789

	790 cp2=c2;

	791 if(U_IS_SURROGATE(c2)) {

	792 UChar c;

	793

	794 if(U_IS_SURROGATE_LEAD(c2)) {

	795 if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {

	796 /* advance ++s2; only below if cp2 decomposes/case-folds */

	797 cp2=U16_GET_SUPPLEMENTARY(c2, c);

	798 }

	799 } else /* isTrail(c2) */ {

	800 if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {

	801 cp2=U16_GET_SUPPLEMENTARY(c, c2);

	802 }

	803 }

	804 }

	805

	806 /*

	807 * go down one level for each string

	808 * continue with the main loop as soon as there is a real change

	809 */

	810

	811 if( level1==0 &&

	812 (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0

	813 ) {

	814 /* cp1 case-folds to the code point "length" or to p[length] */

	815 if(U_IS_SURROGATE(c1)) {

	816 if(U_IS_SURROGATE_LEAD(c1)) {

	817 /* advance beyond source surrogate pair if it case-folds */

	818 ++s1;

	819 } else /* isTrail(c1) */ {

	820 /*

	821 * we got a supplementary code point when hitting its trail surrogate,

	822 * therefore the lead surrogate must have been the same as i n the other string;

	823 * compare this decomposition with the lead surrogate in the other string

	824 * remember that this simulates bulk text replacement:

	825 * the decomposition would replace the entire code point

	826 */

	827 --s2;

	828 c2=*(s2-1);

	829 }

	830 }

	831

	832 /* push current level pointers */

	833 stack1[0].start=start1;

	834 stack1[0].s=s1;

	835 stack1[0].limit=limit1;

	836 ++level1;

	837

	838 /* copy the folding result to fold1[] */

	839 if(length<=UCASE_MAX_STRING_LENGTH) {

	840 u_memcpy(fold1, p, length);

	841 } else {

	842 int32_t i=0;

	843 U16_APPEND_UNSAFE(fold1, i, length);

	844 length=i;

	845 }

	846

	847 /* set next level pointers to case folding */

	848 start1=s1=fold1;

	849 limit1=fold1+length;

	850

	851 /* get ready to read from decomposition, continue with loop */

	852 c1=-1;

	853 continue;

	854 }

	855

	856 if( level2==0 &&

	857 (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0

	858 ) {

	859 /* cp2 case-folds to the code point "length" or to p[length] */

	860 if(U_IS_SURROGATE(c2)) {

	861 if(U_IS_SURROGATE_LEAD(c2)) {

	862 /* advance beyond source surrogate pair if it case-folds */

	863 ++s2;

	864 } else /* isTrail(c2) */ {

	865 /*

	866 * we got a supplementary code point when hitting its trail surrogate,

	867 * therefore the lead surrogate must have been the same as i n the other string;

	868 * compare this decomposition with the lead surrogate in the other string

	869 * remember that this simulates bulk text replacement:

	870 * the decomposition would replace the entire code point

	871 */

	872 --s1;

	873 c1=*(s1-1);

	874 }

	875 }

	876

	877 /* push current level pointers */

	878 stack2[0].start=start2;

	879 stack2[0].s=s2;

	880 stack2[0].limit=limit2;

	881 ++level2;

	882

	883 /* copy the folding result to fold2[] */

	884 if(length<=UCASE_MAX_STRING_LENGTH) {

	885 u_memcpy(fold2, p, length);

	886 } else {

	887 int32_t i=0;

	888 U16_APPEND_UNSAFE(fold2, i, length);

	889 length=i;

	890 }

	891

	892 /* set next level pointers to case folding */

	893 start2=s2=fold2;

	894 limit2=fold2+length;

	895

	896 /* get ready to read from decomposition, continue with loop */

	897 c2=-1;

	898 continue;

	899 }

	900

	901 /*

	902 * no decomposition/case folding, max level for both sides:

	903 * return difference result

	904 *

	905 * code point order comparison must not just return cp1-cp2

	906 * because when single surrogates are present then the surrogate pairs

	907 * that formed cp1 and cp2 may be from different string indexes

	908 *

	909 * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units

	910 * c1=d800 cp1=10001 c2=dc00 cp2=10000

	911 * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }

	912 *

	913 * therefore, use same fix-up as in ustring.c/uprv_strCompare()

	914 * except: uprv_strCompare() fetches c=s while this functions fetches c =s++

	915 * so we have slightly different pointer/start/limit comparisons here

	916 */

	917

	918 if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {

	919 /* subtract 0x2800 from BMP code points to make them smaller than su pplementary ones */

	920 if(

	921 (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) \|\|

	922 (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))

	923 ) {

	924 /* part of a surrogate pair, leave >=d800 */

	925 } else {

	926 /* BMP code point - may be surrogate code point - make <d800 */

	927 c1-=0x2800;

	928 }

	929

	930 if(

	931 (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) \|\|

	932 (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))

	933 ) {

	934 /* part of a surrogate pair, leave >=d800 */

	935 } else {

	936 /* BMP code point - may be surrogate code point - make <d800 */

	937 c2-=0x2800;

	938 }

	939 }

	940

	941 return c1-c2;

	942 }

	943 }

	944

	945 /* public API functions */

	946

	947 U_CAPI int32_t U_EXPORT2

	948 u_strCaseCompare(const UChar *s1, int32_t length1,

	949 const UChar *s2, int32_t length2,

	950 uint32_t options,

	951 UErrorCode *pErrorCode) {

	952 /* argument checking */

	953 if(pErrorCode==0 \|\| U_FAILURE(*pErrorCode)) {

	954 return 0;

	955 }

	956 if(s1==NULL \|\| length1<-1 \|\| s2==NULL \|\| length2<-1) {

	957 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	958 return 0;

	959 }

	960 return u_strcmpFold(s1, length1, s2, length2,

	961 options\|U_COMPARE_IGNORE_CASE,

	962 pErrorCode);

	963 }

	964

	965 U_CAPI int32_t U_EXPORT2

	966 u_strcasecmp(const UChar s1, const UChar s2, uint32_t options) {

	967 UErrorCode errorCode=U_ZERO_ERROR;

	968 return u_strcmpFold(s1, -1, s2, -1,

	969 options\|U_COMPARE_IGNORE_CASE,

	970 &errorCode);

	971 }

	972

	973 U_CAPI int32_t U_EXPORT2

	974 u_memcasecmp(const UChar s1, const UChar s2, int32_t length, uint32_t options) {

	975 UErrorCode errorCode=U_ZERO_ERROR;

	976 return u_strcmpFold(s1, length, s2, length,

	977 options\|U_COMPARE_IGNORE_CASE,

	978 &errorCode);

	979 }

	980

	981 U_CAPI int32_t U_EXPORT2

	982 u_strncasecmp(const UChar s1, const UChar s2, int32_t n, uint32_t options) {

	983 UErrorCode errorCode=U_ZERO_ERROR;

	984 return u_strcmpFold(s1, n, s2, n,

	985 options\|(U_COMPARE_IGNORE_CASE\|_STRNCMP_STYLE),

	986 &errorCode);

	987 }

OLD	NEW

« no previous file with comments | « icu46/source/common/ustr_wcs.c ('k') | icu46/source/common/ustrenum.h » ('j') | no next file with comments »