icu46/source/common/utext.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/common/utext.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 *******************************************************************************

	3 *

	4 * Copyright (C) 2005-2010, International Business Machines

	5 * Corporation and others. All Rights Reserved.

	6 *

	7 *******************************************************************************

	8 * file name: utext.cpp

	9 * encoding: US-ASCII

	10 * tab size: 8 (not used)

	11 * indentation:4

	12 *

	13 * created on: 2005apr12

	14 * created by: Markus W. Scherer

	15 */

	16

	17 #include "unicode/utypes.h"

	18 #include "unicode/ustring.h"

	19 #include "unicode/unistr.h"

	20 #include "unicode/chariter.h"

	21 #include "unicode/utext.h"

	22 #include "ustr_imp.h"

	23 #include "cmemory.h"

	24 #include "cstring.h"

	25 #include "uassert.h"

	26 #include "putilimp.h"

	27

	28 U_NAMESPACE_USE

	29

	30 #define I32_FLAG(bitIndex) ((int32_t)1<<(bitIndex))

	31

	32

	33 static UBool

	34 utext_access(UText *ut, int64_t index, UBool forward) {

	35 return ut->pFuncs->access(ut, index, forward);

	36 }

	37

	38

	39

	40 U_CAPI UBool U_EXPORT2

	41 utext_moveIndex32(UText *ut, int32_t delta) {

	42 UChar32 c;

	43 if (delta > 0) {

	44 do {

	45 if(ut->chunkOffset>=ut->chunkLength && !utext_access(ut, ut->chunkNa tiveLimit, TRUE)) {

	46 return FALSE;

	47 }

	48 c = ut->chunkContents[ut->chunkOffset];

	49 if (U16_IS_SURROGATE(c)) {

	50 c = utext_next32(ut);

	51 if (c == U_SENTINEL) {

	52 return FALSE;

	53 }

	54 } else {

	55 ut->chunkOffset++;

	56 }

	57 } while(--delta>0);

	58

	59 } else if (delta<0) {

	60 do {

	61 if(ut->chunkOffset<=0 && !utext_access(ut, ut->chunkNativeStart, FAL SE)) {

	62 return FALSE;

	63 }

	64 c = ut->chunkContents[ut->chunkOffset-1];

	65 if (U16_IS_SURROGATE(c)) {

	66 c = utext_previous32(ut);

	67 if (c == U_SENTINEL) {

	68 return FALSE;

	69 }

	70 } else {

	71 ut->chunkOffset--;

	72 }

	73 } while(++delta<0);

	74 }

	75

	76 return TRUE;

	77 }

	78

	79

	80 U_CAPI int64_t U_EXPORT2

	81 utext_nativeLength(UText *ut) {

	82 return ut->pFuncs->nativeLength(ut);

	83 }

	84

	85

	86 U_CAPI UBool U_EXPORT2

	87 utext_isLengthExpensive(const UText *ut) {

	88 UBool r = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENS IVE)) != 0;

	89 return r;

	90 }

	91

	92

	93 U_CAPI int64_t U_EXPORT2

	94 utext_getNativeIndex(const UText *ut) {

	95 if(ut->chunkOffset <= ut->nativeIndexingLimit) {

	96 return ut->chunkNativeStart+ut->chunkOffset;

	97 } else {

	98 return ut->pFuncs->mapOffsetToNative(ut);

	99 }

	100 }

	101

	102

	103 U_CAPI void U_EXPORT2

	104 utext_setNativeIndex(UText *ut, int64_t index) {

	105 if(index<ut->chunkNativeStart \|\| index>=ut->chunkNativeLimit) {

	106 // The desired position is outside of the current chunk.

	107 // Access the new position. Assume a forward iteration from here,

	108 // which will also be optimimum for a single random access.

	109 // Reverse iterations may suffer slightly.

	110 ut->pFuncs->access(ut, index, TRUE);

	111 } else if((int32_t)(index - ut->chunkNativeStart) <= ut->nativeIndexingLimit ) {

	112 // utf-16 indexing.

	113 ut->chunkOffset=(int32_t)(index-ut->chunkNativeStart);

	114 } else {

	115 ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);

	116 }

	117 // The convention is that the index must always be on a code point boundary.

	118 // Adjust the index position if it is in the middle of a surrogate pair.

	119 if (ut->chunkOffset<ut->chunkLength) {

	120 UChar c= ut->chunkContents[ut->chunkOffset];

	121 if (UTF16_IS_TRAIL(c)) {

	122 if (ut->chunkOffset==0) {

	123 ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE);

	124 }

	125 if (ut->chunkOffset>0) {

	126 UChar lead = ut->chunkContents[ut->chunkOffset-1];

	127 if (UTF16_IS_LEAD(lead)) {

	128 ut->chunkOffset--;

	129 }

	130 }

	131 }

	132 }

	133 }

	134

	135

	136

	137 U_CAPI int64_t U_EXPORT2

	138 utext_getPreviousNativeIndex(UText *ut) {

	139 //

	140 // Fast-path the common case.

	141 // Common means current position is not at the beginning of a chunk

	142 // and the preceding character is not supplementary.

	143 //

	144 int32_t i = ut->chunkOffset - 1;

	145 int64_t result;

	146 if (i >= 0) {

	147 UChar c = ut->chunkContents[i];

	148 if (U16_IS_TRAIL(c) == FALSE) {

	149 if (i <= ut->nativeIndexingLimit) {

	150 result = ut->chunkNativeStart + i;

	151 } else {

	152 ut->chunkOffset = i;

	153 result = ut->pFuncs->mapOffsetToNative(ut);

	154 ut->chunkOffset++;

	155 }

	156 return result;

	157 }

	158 }

	159

	160 // If at the start of text, simply return 0.

	161 if (ut->chunkOffset==0 && ut->chunkNativeStart==0) {

	162 return 0;

	163 }

	164

	165 // Harder, less common cases. We are at a chunk boundary, or on a surrogate .

	166 // Keep it simple, use other functions to handle the edges.

	167 //

	168 utext_previous32(ut);

	169 result = UTEXT_GETNATIVEINDEX(ut);

	170 utext_next32(ut);

	171 return result;

	172 }

	173

	174

	175 //

	176 // utext_current32. Get the UChar32 at the current position.

	177 // UText iteration position is always on a code point boundar y,

	178 // never on the trail half of a surrogate pair.

	179 //

	180 U_CAPI UChar32 U_EXPORT2

	181 utext_current32(UText *ut) {

	182 UChar32 c;

	183 if (ut->chunkOffset==ut->chunkLength) {

	184 // Current position is just off the end of the chunk.

	185 if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) {

	186 // Off the end of the text.

	187 return U_SENTINEL;

	188 }

	189 }

	190

	191 c = ut->chunkContents[ut->chunkOffset];

	192 if (U16_IS_LEAD(c) == FALSE) {

	193 // Normal, non-supplementary case.

	194 return c;

	195 }

	196

	197 //

	198 // Possible supplementary char.

	199 //

	200 UChar32 trail = 0;

	201 UChar32 supplementaryC = c;

	202 if ((ut->chunkOffset+1) < ut->chunkLength) {

	203 // The trail surrogate is in the same chunk.

	204 trail = ut->chunkContents[ut->chunkOffset+1];

	205 } else {

	206 // The trail surrogate is in a different chunk.

	207 // Because we must maintain the iteration position, we need to switc h forward

	208 // into the new chunk, get the trail surrogate, then revert the chun k back to the

	209 // original one.

	210 // An edge case to be careful of: the entire text may end with an u npaired

	211 // leading surrogate. The attempt to access the trail will fail, but

	212 // the original position before the unpaired lead still needs to be restored.

	213 int64_t nativePosition = ut->chunkNativeLimit;

	214 int32_t originalOffset = ut->chunkOffset;

	215 if (ut->pFuncs->access(ut, nativePosition, TRUE)) {

	216 trail = ut->chunkContents[ut->chunkOffset];

	217 }

	218 UBool r = ut->pFuncs->access(ut, nativePosition, FALSE); // reverse ite ration flag loads preceding chunk

	219 U_ASSERT(r==TRUE);

	220 ut->chunkOffset = originalOffset;

	221 if(!r) {

	222 return U_SENTINEL;

	223 }

	224 }

	225

	226 if (U16_IS_TRAIL(trail)) {

	227 supplementaryC = U16_GET_SUPPLEMENTARY(c, trail);

	228 }

	229 return supplementaryC;

	230

	231 }

	232

	233

	234 U_CAPI UChar32 U_EXPORT2

	235 utext_char32At(UText *ut, int64_t nativeIndex) {

	236 UChar32 c = U_SENTINEL;

	237

	238 // Fast path the common case.

	239 if (nativeIndex>=ut->chunkNativeStart && nativeIndex < ut->chunkNativeStart + ut->nativeIndexingLimit) {

	240 ut->chunkOffset = (int32_t)(nativeIndex - ut->chunkNativeStart);

	241 c = ut->chunkContents[ut->chunkOffset];

	242 if (U16_IS_SURROGATE(c) == FALSE) {

	243 return c;

	244 }

	245 }

	246

	247

	248 utext_setNativeIndex(ut, nativeIndex);

	249 if (nativeIndex>=ut->chunkNativeStart && ut->chunkOffset<ut->chunkLength) {

	250 c = ut->chunkContents[ut->chunkOffset];

	251 if (U16_IS_SURROGATE(c)) {

	252 // For surrogates, let current32() deal with the complications

	253 // of supplementaries that may span chunk boundaries.

	254 c = utext_current32(ut);

	255 }

	256 }

	257 return c;

	258 }

	259

	260

	261 U_CAPI UChar32 U_EXPORT2

	262 utext_next32(UText *ut) {

	263 UChar32 c;

	264

	265 if (ut->chunkOffset >= ut->chunkLength) {

	266 if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) {

	267 return U_SENTINEL;

	268 }

	269 }

	270

	271 c = ut->chunkContents[ut->chunkOffset++];

	272 if (U16_IS_LEAD(c) == FALSE) {

	273 // Normal case, not supplementary.

	274 // (A trail surrogate seen here is just returned as is, as a surrogate value.

	275 // It cannot be part of a pair.)

	276 return c;

	277 }

	278

	279 if (ut->chunkOffset >= ut->chunkLength) {

	280 if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) {

	281 // c is an unpaired lead surrogate at the end of the text.

	282 // return it as it is.

	283 return c;

	284 }

	285 }

	286 UChar32 trail = ut->chunkContents[ut->chunkOffset];

	287 if (U16_IS_TRAIL(trail) == FALSE) {

	288 // c was an unpaired lead surrogate, not at the end of the text.

	289 // return it as it is (unpaired). Iteration position is on the

	290 // following character, possibly in the next chunk, where the

	291 // trail surrogate would have been if it had existed.

	292 return c;

	293 }

	294

	295 UChar32 supplementary = U16_GET_SUPPLEMENTARY(c, trail);

	296 ut->chunkOffset++; // move iteration position over the trail surrogate.

	297 return supplementary;

	298 }

	299

	300

	301 U_CAPI UChar32 U_EXPORT2

	302 utext_previous32(UText *ut) {

	303 UChar32 c;

	304

	305 if (ut->chunkOffset <= 0) {

	306 if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE) == FALSE) {

	307 return U_SENTINEL;

	308 }

	309 }

	310 ut->chunkOffset--;

	311 c = ut->chunkContents[ut->chunkOffset];

	312 if (U16_IS_TRAIL(c) == FALSE) {

	313 // Normal case, not supplementary.

	314 // (A lead surrogate seen here is just returned as is, as a surrogate value.

	315 // It cannot be part of a pair.)

	316 return c;

	317 }

	318

	319 if (ut->chunkOffset <= 0) {

	320 if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE) == FALSE) {

	321 // c is an unpaired trail surrogate at the start of the text.

	322 // return it as it is.

	323 return c;

	324 }

	325 }

	326

	327 UChar32 lead = ut->chunkContents[ut->chunkOffset-1];

	328 if (U16_IS_LEAD(lead) == FALSE) {

	329 // c was an unpaired trail surrogate, not at the end of the text.

	330 // return it as it is (unpaired). Iteration position is at c

	331 return c;

	332 }

	333

	334 UChar32 supplementary = U16_GET_SUPPLEMENTARY(lead, c);

	335 ut->chunkOffset--; // move iteration position over the lead surrogate.

	336 return supplementary;

	337 }

	338

	339

	340

	341 U_CAPI UChar32 U_EXPORT2

	342 utext_next32From(UText *ut, int64_t index) {

	343 UChar32 c = U_SENTINEL;

	344

	345 if(index<ut->chunkNativeStart \|\| index>=ut->chunkNativeLimit) {

	346 // Desired position is outside of the current chunk.

	347 if(!ut->pFuncs->access(ut, index, TRUE)) {

	348 // no chunk available here

	349 return U_SENTINEL;

	350 }

	351 } else if (index - ut->chunkNativeStart <= (int64_t)ut->nativeIndexingLimit ) {

	352 // Desired position is in chunk, with direct 1:1 native to UTF16 indexin g

	353 ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);

	354 } else {

	355 // Desired position is in chunk, with non-UTF16 indexing.

	356 ut->chunkOffset = ut->pFuncs->mapNativeIndexToUTF16(ut, index);

	357 }

	358

	359 c = ut->chunkContents[ut->chunkOffset++];

	360 if (U16_IS_SURROGATE(c)) {

	361 // Surrogates. Many edge cases. Use other functions that already

	362 // deal with the problems.

	363 utext_setNativeIndex(ut, index);

	364 c = utext_next32(ut);

	365 }

	366 return c;

	367 }

	368

	369

	370 U_CAPI UChar32 U_EXPORT2

	371 utext_previous32From(UText *ut, int64_t index) {

	372 //

	373 // Return the character preceding the specified index.

	374 // Leave the iteration position at the start of the character that was retu rned.

	375 //

	376 UChar32 cPrev; // The character preceding cCurr, which is what we wil l return.

	377

	378 // Address the chunk containg the position preceding the incoming index

	379 // A tricky edge case:

	380 // We try to test the requested native index against the chunkNativeStart to determine

	381 // whether the character preceding the one at the index is in the current chunk.

	382 // BUT, this test can fail with UTF-8 (or any other multibyte encoding), when the

	383 // requested index is on something other than the first position of the f irst char.

	384 //

	385 if(index<=ut->chunkNativeStart \|\| index>ut->chunkNativeLimit) {

	386 // Requested native index is outside of the current chunk.

	387 if(!ut->pFuncs->access(ut, index, FALSE)) {

	388 // no chunk available here

	389 return U_SENTINEL;

	390 }

	391 } else if(index - ut->chunkNativeStart <= (int64_t)ut->nativeIndexingLimit) {

	392 // Direct UTF-16 indexing.

	393 ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);

	394 } else {

	395 ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);

	396 if (ut->chunkOffset==0 && !ut->pFuncs->access(ut, index, FALSE)) {

	397 // no chunk available here

	398 return U_SENTINEL;

	399 }

	400 }

	401

	402 //

	403 // Simple case with no surrogates.

	404 //

	405 ut->chunkOffset--;

	406 cPrev = ut->chunkContents[ut->chunkOffset];

	407

	408 if (U16_IS_SURROGATE(cPrev)) {

	409 // Possible supplementary. Many edge cases.

	410 // Let other functions do the heavy lifting.

	411 utext_setNativeIndex(ut, index);

	412 cPrev = utext_previous32(ut);

	413 }

	414 return cPrev;

	415 }

	416

	417

	418 U_CAPI int32_t U_EXPORT2

	419 utext_extract(UText *ut,

	420 int64_t start, int64_t limit,

	421 UChar *dest, int32_t destCapacity,

	422 UErrorCode *status) {

	423 return ut->pFuncs->extract(ut, start, limit, dest, destCapacity , status);

	424 }

	425

	426

	427

	428 U_CAPI UBool U_EXPORT2

	429 utext_equals(const UText a, const UText b) {

	430 if (a==NULL \|\| b==NULL \|\|

	431 a->magic != UTEXT_MAGIC \|\|

	432 b->magic != UTEXT_MAGIC) {

	433 // Null or invalid arguments don't compare equal to anything.

	434 return FALSE;

	435 }

	436

	437 if (a->pFuncs != b->pFuncs) {

	438 // Different types of text providers.

	439 return FALSE;

	440 }

	441

	442 if (a->context != b->context) {

	443 // Different sources (different strings)

	444 return FALSE;

	445 }

	446 if (utext_getNativeIndex(a) != utext_getNativeIndex(b)) {

	447 // Different current position in the string.

	448 return FALSE;

	449 }

	450

	451 return TRUE;

	452 }

	453

	454 U_CAPI int32_t U_EXPORT2

	455 utext_compare(UText *s1, int32_t length1,

	456 UText *s2, int32_t length2) {

	457 UChar32 c1 = 0, c2 = 0;

	458

	459 if(length1<0 && length2<0) {

	460 /* strcmp style, go until end of string */

	461 for(;;) {

	462 c1 = UTEXT_NEXT32(s1);

	463 c2 = UTEXT_NEXT32(s2);

	464 if(c1 != c2) {

	465 break;

	466 } else if(c1 == U_SENTINEL) {

	467 return 0;

	468 }

	469 }

	470 } else {

	471 if(length1 < 0) {

	472 length1 = INT32_MIN;

	473 } else if (length2 < 0) {

	474 length2 = INT32_MIN;

	475 }

	476

	477 /* memcmp/UnicodeString style, both length-specified */

	478 while((length1 > 0 \|\| length1 == INT32_MIN) && (length2 > 0 \|\| length2 = = INT32_MIN)) {

	479 c1 = UTEXT_NEXT32(s1);

	480 c2 = UTEXT_NEXT32(s2);

	481

	482 if(c1 != c2) {

	483 break;

	484 } else if(c1 == U_SENTINEL) {

	485 return 0;

	486 }

	487

	488 if (length1 != INT32_MIN) {

	489 length1 -= 1;

	490 }

	491 if (length2 != INT32_MIN) {

	492 length2 -= 1;

	493 }

	494 }

	495

	496 if(length1 <= 0 && length1 != INT32_MIN) {

	497 if(length2 <= 0) {

	498 return 0;

	499 } else {

	500 return -1;

	501 }

	502 } else if(length2 <= 0 && length2 != INT32_MIN) {

	503 if (length1 <= 0) {

	504 return 0;

	505 } else {

	506 return 1;

	507 }

	508 }

	509 }

	510

	511 return (int32_t)c1-(int32_t)c2;

	512 }

	513

	514 U_CAPI int32_t U_EXPORT2

	515 utext_compareNativeLimit(UText *s1, int64_t limit1,

	516 UText *s2, int64_t limit2) {

	517 UChar32 c1, c2;

	518

	519 if(limit1<0 && limit2<0) {

	520 /* strcmp style, go until end of string */

	521 for(;;) {

	522 c1 = UTEXT_NEXT32(s1);

	523 c2 = UTEXT_NEXT32(s2);

	524 if(c1 != c2) {

	525 return (int32_t)c1-(int32_t)c2;

	526 } else if(c1 == U_SENTINEL) {

	527 return 0;

	528 }

	529 }

	530 } else {

	531 /* memcmp/UnicodeString style, both length-specified */

	532 int64_t index1 = (limit1 >= 0 ? UTEXT_GETNATIVEINDEX(s1) : 0);

	533 int64_t index2 = (limit2 >= 0 ? UTEXT_GETNATIVEINDEX(s2) : 0);

	534

	535 while((limit1 < 0 \|\| index1 < limit1) && (limit2 < 0 \|\| index2 < limit2) ) {

	536 c1 = UTEXT_NEXT32(s1);

	537 c2 = UTEXT_NEXT32(s2);

	538

	539 if(c1 != c2) {

	540 return (int32_t)c1-(int32_t)c2;

	541 } else if(c1 == U_SENTINEL) {

	542 return 0;

	543 }

	544

	545 if (limit1 >= 0) {

	546 index1 = UTEXT_GETNATIVEINDEX(s1);

	547 }

	548 if (limit2 >= 0) {

	549 index2 = UTEXT_GETNATIVEINDEX(s2);

	550 }

	551 }

	552

	553 if(limit1 >= 0 && index1 >= limit1) {

	554 if(index2 >= limit2) {

	555 return 0;

	556 } else {

	557 return -1;

	558 }

	559 } else {

	560 if(index1 >= limit1) {

	561 return 0;

	562 } else {

	563 return 1;

	564 }

	565 }

	566 }

	567 }

	568

	569 U_CAPI int32_t U_EXPORT2

	570 utext_caseCompare(UText *s1, int32_t length1,

	571 UText *s2, int32_t length2,

	572 uint32_t options, UErrorCode *pErrorCode) {

	573 const UCaseProps *csp;

	574

	575 /* case folding variables */

	576 const UChar *p;

	577 int32_t length;

	578

	579 /* case folding buffers, only use current-level start/limit */

	580 UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];

	581 int32_t foldOffset1, foldOffset2, foldLength1, foldLength2;

	582

	583 /* current code points */

	584 UChar32 c1, c2;

	585 uint8_t cLength1, cLength2;

	586

	587 /* argument checking */

	588 if(U_FAILURE(*pErrorCode)) {

	589 return 0;

	590 }

	591 if(s1==NULL \|\| s2==NULL) {

	592 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	593 return 0;

	594 }

	595

	596 csp=ucase_getSingleton();

	597

	598 /* for variable-length strings */

	599 if(length1 < 0) {

	600 length1 = INT32_MIN;

	601 }

	602 if (length2 < 0) {

	603 length2 = INT32_MIN;

	604 }

	605

	606 /* initialize */

	607 foldOffset1 = foldOffset2 = foldLength1 = foldLength2 = 0;

	608

	609 /* comparison loop */

	610 while((foldOffset1 < foldLength1 \|\| length1 > 0 \|\| length1 == INT32_MIN) &&

	611 (foldOffset2 < foldLength2 \|\| length2 > 0 \|\| length2 == INT32_MIN)) {

	612 if(foldOffset1 < foldLength1) {

	613 U16_NEXT_UNSAFE(fold1, foldOffset1, c1);

	614 cLength1 = 0;

	615 } else {

	616 c1 = UTEXT_NEXT32(s1);

	617 if (c1 != U_SENTINEL) {

	618 cLength1 = U16_LENGTH(c1);

	619

	620 length = ucase_toFullFolding(csp, c1, &p, options);

	621 if(length >= 0) {

	622 if(length <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not c orrectly handle 0-length folded-case strings

	623 u_memcpy(fold1, p, length);

	624 foldOffset1 = 0;

	625 foldLength1 = length;

	626 U16_NEXT_UNSAFE(fold1, foldOffset1, c1);

	627 } else {

	628 c1 = length;

	629 }

	630 }

	631 }

	632

	633 if(length1 != INT32_MIN) {

	634 length1 -= 1;

	635 }

	636 }

	637

	638 if(foldOffset2 < foldLength2) {

	639 U16_NEXT_UNSAFE(fold2, foldOffset2, c2);

	640 cLength2 = 0;

	641 } else {

	642 c2 = UTEXT_NEXT32(s2);

	643 if (c2 != U_SENTINEL) {

	644 cLength2 = U16_LENGTH(c2);

	645

	646 length = ucase_toFullFolding(csp, c2, &p, options);

	647 if(length >= 0) {

	648 if(length <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not c orrectly handle 0-length folded-case strings

	649 u_memcpy(fold2, p, length);

	650 foldOffset2 = 0;

	651 foldLength2 = length;

	652 U16_NEXT_UNSAFE(fold2, foldOffset2, c2);

	653 } else {

	654 c2 = length;

	655 }

	656 }

	657 } else if(c1 == U_SENTINEL) {

	658 return 0; // end of both strings at once

	659 }

	660

	661 if(length2 != INT32_MIN) {

	662 length2 -= 1;

	663 }

	664 }

	665

	666 if(c1 != c2) {

	667 return (int32_t)c1-(int32_t)c2;

	668 }

	669 }

	670

	671 /* By now at least one of the strings is out of characters */

	672 length1 += foldLength1 - foldOffset1;

	673 length2 += foldLength2 - foldOffset2;

	674

	675 if(length1 <= 0 && length1 != INT32_MIN) {

	676 if(length2 <= 0) {

	677 return 0;

	678 } else {

	679 return -1;

	680 }

	681 } else {

	682 if (length1 <= 0) {

	683 return 0;

	684 } else {

	685 return 1;

	686 }

	687 }

	688 }

	689

	690 U_CAPI int32_t U_EXPORT2

	691 utext_caseCompareNativeLimit(UText *s1, int64_t limit1,

	692 UText *s2, int64_t limit2,

	693 uint32_t options, UErrorCode *pErrorCode) {

	694 const UCaseProps *csp;

	695

	696 /* case folding variables */

	697 const UChar *p;

	698 int32_t length;

	699

	700 /* case folding buffers, only use current-level start/limit */

	701 UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];

	702 int32_t foldOffset1, foldOffset2, foldLength1, foldLength2;

	703

	704 /* current code points */

	705 UChar32 c1, c2;

	706

	707 /* native indexes into s1 and s2 */

	708 int64_t index1, index2;

	709

	710 /* argument checking */

	711 if(U_FAILURE(*pErrorCode)) {

	712 return 0;

	713 }

	714 if(s1==NULL \|\| s2==NULL) {

	715 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	716 return 0;

	717 }

	718

	719 csp=ucase_getSingleton();

	720

	721 /* initialize */

	722 index1 = (limit1 >= 0 ? UTEXT_GETNATIVEINDEX(s1) : 0);

	723 index2 = (limit2 >= 0 ? UTEXT_GETNATIVEINDEX(s2) : 0);

	724

	725 foldOffset1 = foldOffset2 = foldLength1 = foldLength2 = 0;

	726

	727 /* comparison loop */

	728 while((foldOffset1 < foldLength1 \|\| limit1 < 0 \|\| index1 < limit1) &&

	729 (foldOffset2 < foldLength2 \|\| limit2 < 0 \|\| index2 < limit2)) {

	730 if(foldOffset1 < foldLength1) {

	731 U16_NEXT_UNSAFE(fold1, foldOffset1, c1);

	732 } else {

	733 c1 = UTEXT_NEXT32(s1);

	734 if (c1 != U_SENTINEL) {

	735 length = ucase_toFullFolding(csp, c1, &p, options);

	736 if(length >= 0) {

	737 if(length <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not c orrectly handle 0-length folded-case strings

	738 u_memcpy(fold1, p, length);

	739 foldOffset1 = 0;

	740 foldLength1 = length;

	741 U16_NEXT_UNSAFE(fold1, foldOffset1, c1);

	742 } else {

	743 c1 = length;

	744 }

	745 }

	746 }

	747

	748 if (limit1 >= 0) {

	749 index1 = UTEXT_GETNATIVEINDEX(s1);

	750 }

	751 }

	752

	753 if(foldOffset2 < foldLength2) {

	754 U16_NEXT_UNSAFE(fold2, foldOffset2, c2);

	755 } else {

	756 c2 = UTEXT_NEXT32(s2);

	757 if (c2 != U_SENTINEL) {

	758 length = ucase_toFullFolding(csp, c2, &p, options);

	759 if(length >= 0) {

	760 if(length <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not c orrectly handle 0-length folded-case strings

	761 u_memcpy(fold2, p, length);

	762 foldOffset2 = 0;

	763 foldLength2 = length;

	764 U16_NEXT_UNSAFE(fold2, foldOffset2, c2);

	765 } else {

	766 c2 = length;

	767 }

	768 }

	769 } else if(c1 == U_SENTINEL) {

	770 return 0;

	771 }

	772

	773 if (limit2 >= 0) {

	774 index2 = UTEXT_GETNATIVEINDEX(s2);

	775 }

	776 }

	777

	778 if(c1 != c2) {

	779 return (int32_t)c1-(int32_t)c2;

	780 }

	781 }

	782

	783 /* By now at least one of the strings is out of characters */

	784 index1 -= foldLength1 - foldOffset1;

	785 index2 -= foldLength2 - foldOffset2;

	786

	787 if(limit1 >= 0 && index1 >= limit1) {

	788 if(index2 >= limit2) {

	789 return 0;

	790 } else {

	791 return -1;

	792 }

	793 } else {

	794 if(index1 >= limit1) {

	795 return 0;

	796 } else {

	797 return 1;

	798 }

	799 }

	800 }

	801

	802

	803 U_CAPI UBool U_EXPORT2

	804 utext_isWritable(const UText *ut)

	805 {

	806 UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) != 0;

	807 return b;

	808 }

	809

	810

	811 U_CAPI void U_EXPORT2

	812 utext_freeze(UText *ut) {

	813 // Zero out the WRITABLE flag.

	814 ut->providerProperties &= ~(I32_FLAG(UTEXT_PROVIDER_WRITABLE));

	815 }

	816

	817

	818 U_CAPI UBool U_EXPORT2

	819 utext_hasMetaData(const UText *ut)

	820 {

	821 UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA)) != 0;

	822 return b;

	823 }

	824

	825

	826

	827 U_CAPI int32_t U_EXPORT2

	828 utext_replace(UText *ut,

	829 int64_t nativeStart, int64_t nativeLimit,

	830 const UChar *replacementText, int32_t replacementLength,

	831 UErrorCode *status)

	832 {

	833 if (U_FAILURE(*status)) {

	834 return 0;

	835 }

	836 if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) {

	837 *status = U_NO_WRITE_PERMISSION;

	838 return 0;

	839 }

	840 int32_t i = ut->pFuncs->replace(ut, nativeStart, nativeLimit, replacementTex t, replacementLength, status);

	841 return i;

	842 }

	843

	844 U_CAPI void U_EXPORT2

	845 utext_copy(UText *ut,

	846 int64_t nativeStart, int64_t nativeLimit,

	847 int64_t destIndex,

	848 UBool move,

	849 UErrorCode *status)

	850 {

	851 if (U_FAILURE(*status)) {

	852 return;

	853 }

	854 if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) {

	855 *status = U_NO_WRITE_PERMISSION;

	856 return;

	857 }

	858 ut->pFuncs->copy(ut, nativeStart, nativeLimit, destIndex, move, status);

	859 }

	860

	861

	862

	863 U_CAPI UText * U_EXPORT2

	864 utext_clone(UText dest, const UText src, UBool deep, UBool readOnly, UErrorCod e *status) {

	865 UText *result;

	866 result = src->pFuncs->clone(dest, src, deep, status);

	867 if (readOnly) {

	868 utext_freeze(result);

	869 }

	870 return result;

	871 }

	872

	873

	874

	875 //------------------------------------------------------------------------------

	876 //

	877 // UText common functions implementation

	878 //

	879 //------------------------------------------------------------------------------

	880

	881 //

	882 // UText.flags bit definitions

	883 //

	884 enum {

	885 UTEXT_HEAP_ALLOCATED = 1, // 1 if ICU has allocated this UText struct on the heap.

	886 // 0 if caller provided storage for the UTe xt.

	887

	888 UTEXT_EXTRA_HEAP_ALLOCATED = 2, // 1 if ICU has allocated extra storage as a separate

	889 // heap block.

	890 // 0 if there is no separate allocation. E ither no extra

	891 // storage was requested, or it is appen ded to the end

	892 // of the main UText storage.

	893

	894 UTEXT_OPEN = 4 // 1 if this UText is currently open

	895 // 0 if this UText is not open.

	896 };

	897

	898

	899 //

	900 // Extended form of a UText. The purpose is to aid in computing the total size required

	901 // when a provider asks for a UText to be allocated with extra storage.

	902

	903 struct ExtendedUText {

	904 UText ut;

	905 UAlignedMemory extension;

	906 };

	907

	908 static const UText emptyText = UTEXT_INITIALIZER;

	909

	910 U_CAPI UText * U_EXPORT2

	911 utext_setup(UText ut, int32_t extraSpace, UErrorCode status) {

	912 if (U_FAILURE(*status)) {

	913 return ut;

	914 }

	915

	916 if (ut == NULL) {

	917 // We need to heap-allocate storage for the new UText

	918 int32_t spaceRequired = sizeof(UText);

	919 if (extraSpace > 0) {

	920 spaceRequired = sizeof(ExtendedUText) + extraSpace - sizeof(UAligned Memory);

	921 }

	922 ut = (UText *)uprv_malloc(spaceRequired);

	923 if (ut == NULL) {

	924 *status = U_MEMORY_ALLOCATION_ERROR;

	925 return NULL;

	926 } else {

	927 *ut = emptyText;

	928 ut->flags \|= UTEXT_HEAP_ALLOCATED;

	929 if (spaceRequired>0) {

	930 ut->extraSize = extraSpace;

	931 ut->pExtra = &((ExtendedUText *)ut)->extension;

	932 }

	933 }

	934 } else {

	935 // We have been supplied with an already existing UText.

	936 // Verify that it really appears to be a UText.

	937 if (ut->magic != UTEXT_MAGIC) {

	938 *status = U_ILLEGAL_ARGUMENT_ERROR;

	939 return ut;

	940 }

	941 // If the ut is already open and there's a provider supplied close

	942 // function, call it.

	943 if ((ut->flags & UTEXT_OPEN) && ut->pFuncs->close != NULL) {

	944 ut->pFuncs->close(ut);

	945 }

	946 ut->flags &= ~UTEXT_OPEN;

	947

	948 // If extra space was requested by our caller, check whether

	949 // sufficient already exists, and allocate new if needed.

	950 if (extraSpace > ut->extraSize) {

	951 // Need more space. If there is existing separately allocated space ,

	952 // delete it first, then allocate new space.

	953 if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) {

	954 uprv_free(ut->pExtra);

	955 ut->extraSize = 0;

	956 }

	957 ut->pExtra = uprv_malloc(extraSpace);

	958 if (ut->pExtra == NULL) {

	959 *status = U_MEMORY_ALLOCATION_ERROR;

	960 } else {

	961 ut->extraSize = extraSpace;

	962 ut->flags \|= UTEXT_EXTRA_HEAP_ALLOCATED;

	963 }

	964 }

	965 }

	966 if (U_SUCCESS(*status)) {

	967 ut->flags \|= UTEXT_OPEN;

	968

	969 // Initialize all remaining fields of the UText.

	970 //

	971 ut->context = NULL;

	972 ut->chunkContents = NULL;

	973 ut->p = NULL;

	974 ut->q = NULL;

	975 ut->r = NULL;

	976 ut->a = 0;

	977 ut->b = 0;

	978 ut->c = 0;

	979 ut->chunkOffset = 0;

	980 ut->chunkLength = 0;

	981 ut->chunkNativeStart = 0;

	982 ut->chunkNativeLimit = 0;

	983 ut->nativeIndexingLimit = 0;

	984 ut->providerProperties = 0;

	985 ut->privA = 0;

	986 ut->privB = 0;

	987 ut->privC = 0;

	988 ut->privP = NULL;

	989 if (ut->pExtra!=NULL && ut->extraSize>0)

	990 uprv_memset(ut->pExtra, 0, ut->extraSize);

	991

	992 }

	993 return ut;

	994 }

	995

	996

	997 U_CAPI UText * U_EXPORT2

	998 utext_close(UText *ut) {

	999 if (ut==NULL \|\|

	1000 ut->magic != UTEXT_MAGIC \|\|

	1001 (ut->flags & UTEXT_OPEN) == 0)

	1002 {

	1003 // The supplied ut is not an open UText.

	1004 // Do nothing.

	1005 return ut;

	1006 }

	1007

	1008 // If the provider gave us a close function, call it now.

	1009 // This will clean up anything allocated specifically by the provider.

	1010 if (ut->pFuncs->close != NULL) {

	1011 ut->pFuncs->close(ut);

	1012 }

	1013 ut->flags &= ~UTEXT_OPEN;

	1014

	1015 // If we (the framework) allocated the UText or subsidiary storage,

	1016 // delete it.

	1017 if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) {

	1018 uprv_free(ut->pExtra);

	1019 ut->pExtra = NULL;

	1020 ut->flags &= ~UTEXT_EXTRA_HEAP_ALLOCATED;

	1021 ut->extraSize = 0;

	1022 }

	1023

	1024 // Zero out function table of the closed UText. This is a defensive move,

	1025 // inteded to cause applications that inadvertantly use a closed

	1026 // utext to crash with null pointer errors.

	1027 ut->pFuncs = NULL;

	1028

	1029 if (ut->flags & UTEXT_HEAP_ALLOCATED) {

	1030 // This UText was allocated by UText setup. We need to free it.

	1031 // Clear magic, so we can detect if the user messes up and immediately

	1032 // tries to reopen another UText using the deleted storage.

	1033 ut->magic = 0;

	1034 uprv_free(ut);

	1035 ut = NULL;

	1036 }

	1037 return ut;

	1038 }

	1039

	1040

	1041

	1042

	1043 //

	1044 // invalidateChunk Reset a chunk to have no contents, so that the next call

	1045 // to access will cause new data to load.

	1046 // This is needed when copy/move/replace operate directly on t he

	1047 // backing text, potentially putting it out of sync with the

	1048 // contents in the chunk.

	1049 //

	1050 static void

	1051 invalidateChunk(UText *ut) {

	1052 ut->chunkLength = 0;

	1053 ut->chunkNativeLimit = 0;

	1054 ut->chunkNativeStart = 0;

	1055 ut->chunkOffset = 0;

	1056 ut->nativeIndexingLimit = 0;

	1057 }

	1058

	1059 //

	1060 // pinIndex Do range pinning on a native index parameter.

	1061 // 64 bit pinning is done in place.

	1062 // 32 bit truncated result is returned as a convenience for

	1063 // use in providers that don't need 64 bits.

	1064 static int32_t

	1065 pinIndex(int64_t &index, int64_t limit) {

	1066 if (index<0) {

	1067 index = 0;

	1068 } else if (index > limit) {

	1069 index = limit;

	1070 }

	1071 return (int32_t)index;

	1072 }

	1073

	1074

	1075 U_CDECL_BEGIN

	1076

	1077 //

	1078 // Pointer relocation function,

	1079 // a utility used by shallow clone.

	1080 // Adjust a pointer that refers to something within one UText (the source)

	1081 // to refer to the same relative offset within a another UText (the target)

	1082 //

	1083 static void adjustPointer(UText dest, const void destPtr, const UText src) {

	1084 // convert all pointers to (char *) so that byte address arithmetic will wor k.

	1085 char dptr = (char )*destPtr;

	1086 char dUText = (char )dest;

	1087 char sUText = (char )src;

	1088

	1089 if (dptr >= (char )src->pExtra && dptr < ((char)src->pExtra)+src->extraSiz e) {

	1090 // target ptr was to something within the src UText's pExtra storage.

	1091 // relocate it into the target UText's pExtra region.

	1092 destPtr = ((char )dest->pExtra) + (dptr - (char *)src->pExtra);

	1093 } else if (dptr>=sUText && dptr < sUText+src->sizeOfStruct) {

	1094 // target ptr was pointing to somewhere within the source UText itself.

	1095 // Move it to the same offset within the target UText.

	1096 *destPtr = dUText + (dptr-sUText);

	1097 }

	1098 }

	1099

	1100

	1101 //

	1102 // Clone. This is a generic copy-the-utext-by-value clone function that can be

	1103 // used as-is with some utext types, and as a helper by other clones.

	1104 //

	1105 static UText * U_CALLCONV

	1106 shallowTextClone(UText * dest, const UText * src, UErrorCode * status) {

	1107 if (U_FAILURE(*status)) {

	1108 return NULL;

	1109 }

	1110 int32_t srcExtraSize = src->extraSize;

	1111

	1112 //

	1113 // Use the generic text_setup to allocate storage if required.

	1114 //

	1115 dest = utext_setup(dest, srcExtraSize, status);

	1116 if (U_FAILURE(*status)) {

	1117 return dest;

	1118 }

	1119

	1120 //

	1121 // flags (how the UText was allocated) and the pointer to the

	1122 // extra storage must retain the values in the cloned utext that

	1123 // were set up by utext_setup. Save them separately before

	1124 // copying the whole struct.

	1125 //

	1126 void *destExtra = dest->pExtra;

	1127 int32_t flags = dest->flags;

	1128

	1129

	1130 //

	1131 // Copy the whole UText struct by value.

	1132 // Any "Extra" storage is copied also.

	1133 //

	1134 int sizeToCopy = src->sizeOfStruct;

	1135 if (sizeToCopy > dest->sizeOfStruct) {

	1136 sizeToCopy = dest->sizeOfStruct;

	1137 }

	1138 uprv_memcpy(dest, src, sizeToCopy);

	1139 dest->pExtra = destExtra;

	1140 dest->flags = flags;

	1141 if (srcExtraSize > 0) {

	1142 uprv_memcpy(dest->pExtra, src->pExtra, srcExtraSize);

	1143 }

	1144

	1145 //

	1146 // Relocate any pointers in the target that refer to the UText itself

	1147 // to point to the cloned copy rather than the original source.

	1148 //

	1149 adjustPointer(dest, &dest->context, src);

	1150 adjustPointer(dest, &dest->p, src);

	1151 adjustPointer(dest, &dest->q, src);

	1152 adjustPointer(dest, &dest->r, src);

	1153 adjustPointer(dest, (const void **)&dest->chunkContents, src);

	1154

	1155 return dest;

	1156 }

	1157

	1158

	1159 U_CDECL_END

	1160

	1161

	1162

	1163 //------------------------------------------------------------------------------

	1164 //

	1165 // UText implementation for UTF-8 char * strings (read-only)

	1166 // Limitation: string length must be <= 0x7fffffff in length.

	1167 // (length must for in an int32_t variable)

	1168 //

	1169 // Use of UText data members:

	1170 // context pointer to UTF-8 string

	1171 // utext.b is the input string length (bytes).

	1172 // utext.c Length scanned so far in string

	1173 // (for optimizing finding length of zero terminated s trings.)

	1174 // utext.p pointer to the current buffer

	1175 // utext.q pointer to the other buffer.

	1176 //

	1177 //------------------------------------------------------------------------------

	1178

	1179 // Chunk size.

	1180 // Must be less than 85, because of byte mapping from UChar indexes to nativ e indexes.

	1181 // Worst case is three native bytes to one UChar. (Supplemenaries are 4 nat ive bytes

	1182 // to two UChars.)

	1183 //

	1184 enum { UTF8_TEXT_CHUNK_SIZE=32 };

	1185

	1186 //

	1187 // UTF8Buf Two of these structs will be set up in the UText's extra allocated s pace.

	1188 // Each contains the UChar chunk buffer, the to and from native maps, a nd

	1189 // header info.

	1190 //

	1191 // because backwards iteration fills the buffers starting at the end and

	1192 // working towards the front, the filled part of the buffers may not begin

	1193 // at the start of the available storage for the buffers.

	1194 //

	1195 // Buffer size is one bigger than the specified UTF8_TEXT_CHUNK_SIZE to allo w for

	1196 // the last character added being a supplementary, and thus requiring a surr ogate

	1197 // pair. Doing this is simpler than checking for the edge case.

	1198 //

	1199

	1200 struct UTF8Buf {

	1201 int32_t bufNativeStart; // Native index of first ch ar in UChar buf

	1202 int32_t bufNativeLimit; // Native index following l ast char in buf.

	1203 int32_t bufStartIdx; // First filled position in buf.

	1204 int32_t bufLimitIdx; // Limit of filled range in buf.

	1205 int32_t bufNILimit; // Limit of native indexing part of buf

	1206 int32_t toUCharsMapStart; // Native index correspondi ng to

	1207 // mapToUChars[0].

	1208 // Set to bufNativeStart when filling forwards.

	1209 // Set to computed value when filling backwards.

	1210

	1211 UChar buf[UTF8_TEXT_CHUNK_SIZE+4]; // The UChar buffer. Requi res one extra position beyond the

	1212 // the chunk size, to all ow for surrogate at the end.

	1213 // Length must be identic al to mapToNative array, below,

	1214 // because of the way ind exing works when the array is

	1215 // filled backwards durin g a reverse iteration. Thus,

	1216 // the additional extra s ize.

	1217 uint8_t mapToNative[UTF8_TEXT_CHUNK_SIZE+4]; // map UChar index in buf t o

	1218 // native offset from bufN ativeStart.

	1219 // Requires two extra slot s,

	1220 // one for a supplementa ry starting in the last normal position,

	1221 // and one for an entry for the buffer limit position.

	1222 uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from b ufNativeStart to

	1223 // correspoding offset in filled part of buf.

	1224 int32_t align;

	1225 };

	1226

	1227 U_CDECL_BEGIN

	1228

	1229 //

	1230 // utf8TextLength

	1231 //

	1232 // Get the length of the string. If we don't already know it,

	1233 // we'll need to scan for the trailing nul.

	1234 //

	1235 static int64_t U_CALLCONV

	1236 utf8TextLength(UText *ut) {

	1237 if (ut->b < 0) {

	1238 // Zero terminated string, and we haven't scanned to the end yet.

	1239 // Scan it now.

	1240 const char r = (const char )ut->context + ut->c;

	1241 while (*r != 0) {

	1242 r++;

	1243 }

	1244 if ((r - (const char *)ut->context) < 0x7fffffff) {

	1245 ut->b = (int32_t)(r - (const char *)ut->context);

	1246 } else {

	1247 // Actual string was bigger (more than 2 gig) than we

	1248 // can handle. Clip it to 2 GB.

	1249 ut->b = 0x7fffffff;

	1250 }

	1251 ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);

	1252 }

	1253 return ut->b;

	1254 }

	1255

	1256

	1257

	1258

	1259

	1260

	1261 static UBool U_CALLCONV

	1262 utf8TextAccess(UText *ut, int64_t index, UBool forward) {

	1263 //

	1264 // Apologies to those who are allergic to goto statements.

	1265 // Consider each goto to a labelled block to be the equivalent of

	1266 // call the named block as if it were a function();

	1267 // return;

	1268 //

	1269 const uint8_t s8=(const uint8_t )ut->context;

	1270 UTF8Buf *u8b = NULL;

	1271 int32_t length = ut->b; // Length of original utf-8

	1272 int32_t ix= (int32_t)index; // Requested index, trimmed to 32 bits.

	1273 int32_t mapIndex = 0;

	1274 if (index<0) {

	1275 ix=0;

	1276 } else if (index > 0x7fffffff) {

	1277 // Strings with 64 bit lengths not supported by this UTF-8 provider.

	1278 ix = 0x7fffffff;

	1279 }

	1280

	1281 // Pin requested index to the string length.

	1282 if (ix>length) {

	1283 if (length>=0) {

	1284 ix=length;

	1285 } else if (ix>=ut->c) {

	1286 // Zero terminated string, and requested index is beyond

	1287 // the region that has already been scanned.

	1288 // Scan up to either the end of the string or to the

	1289 // requested position, whichever comes first.

	1290 while (ut->c<ix && s8[ut->c]!=0) {

	1291 ut->c++;

	1292 }

	1293 // TODO: support for null terminated string length > 32 bits.

	1294 if (s8[ut->c] == 0) {

	1295 // We just found the actual length of the string.

	1296 // Trim the requested index back to that.

	1297 ix = ut->c;

	1298 ut->b = ut->c;

	1299 length = ut->c;

	1300 ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXP ENSIVE);

	1301 }

	1302 }

	1303 }

	1304

	1305 //

	1306 // Dispatch to the appropriate action for a forward iteration request.

	1307 //

	1308 if (forward) {

	1309 if (ix==ut->chunkNativeLimit) {

	1310 // Check for normal sequential iteration cases first.

	1311 if (ix==length) {

	1312 // Just reached end of string

	1313 // Don't swap buffers, but do set the

	1314 // current buffer position.

	1315 ut->chunkOffset = ut->chunkLength;

	1316 return FALSE;

	1317 } else {

	1318 // End of current buffer.

	1319 // check whether other buffer already has what we need.

	1320 UTF8Buf altB = (UTF8Buf )ut->q;

	1321 if (ix>=altB->bufNativeStart && ix<altB->bufNativeLimit) {

	1322 goto swapBuffers;

	1323 }

	1324 }

	1325 }

	1326

	1327 // A random access. Desired index could be in either or niether buf.

	1328 // For optimizing the order of testing, first check for the index

	1329 // being in the other buffer. This will be the case for uses that

	1330 // move back and forth over a fairly limited range

	1331 {

	1332 u8b = (UTF8Buf *)ut->q; // the alternate buffer

	1333 if (ix>=u8b->bufNativeStart && ix<u8b->bufNativeLimit) {

	1334 // Requested index is in the other buffer.

	1335 goto swapBuffers;

	1336 }

	1337 if (ix == length) {

	1338 // Requested index is end-of-string.

	1339 // (this is the case of randomly seeking to the end.

	1340 // The case of iterating off the end is handled earlier.)

	1341 if (ix == ut->chunkNativeLimit) {

	1342 // Current buffer extends up to the end of the string.

	1343 // Leave it as the current buffer.

	1344 ut->chunkOffset = ut->chunkLength;

	1345 return FALSE;

	1346 }

	1347 if (ix == u8b->bufNativeLimit) {

	1348 // Alternate buffer extends to the end of string.

	1349 // Swap it in as the current buffer.

	1350 goto swapBuffersAndFail;

	1351 }

	1352

	1353 // Neither existing buffer extends to the end of the string.

	1354 goto makeStubBuffer;

	1355 }

	1356

	1357 if (ix<ut->chunkNativeStart \|\| ix>=ut->chunkNativeLimit) {

	1358 // Requested index is in neither buffer.

	1359 goto fillForward;

	1360 }

	1361

	1362 // Requested index is in this buffer.

	1363 u8b = (UTF8Buf *)ut->p; // the current buffer

	1364 mapIndex = ix - u8b->toUCharsMapStart;

	1365 ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;

	1366 return TRUE;

	1367

	1368 }

	1369 }

	1370

	1371

	1372 //

	1373 // Dispatch to the appropriate action for a

	1374 // Backwards Diretion iteration request.

	1375 //

	1376 if (ix==ut->chunkNativeStart) {

	1377 // Check for normal sequential iteration cases first.

	1378 if (ix==0) {

	1379 // Just reached the start of string

	1380 // Don't swap buffers, but do set the

	1381 // current buffer position.

	1382 ut->chunkOffset = 0;

	1383 return FALSE;

	1384 } else {

	1385 // Start of current buffer.

	1386 // check whether other buffer already has what we need.

	1387 UTF8Buf altB = (UTF8Buf )ut->q;

	1388 if (ix>altB->bufNativeStart && ix<=altB->bufNativeLimit) {

	1389 goto swapBuffers;

	1390 }

	1391 }

	1392 }

	1393

	1394 // A random access. Desired index could be in either or niether buf.

	1395 // For optimizing the order of testing,

	1396 // Most likely case: in the other buffer.

	1397 // Second most likely: in neither buffer.

	1398 // Unlikely, but must work: in the current buffer.

	1399 u8b = (UTF8Buf *)ut->q; // the alternate buffer

	1400 if (ix>u8b->bufNativeStart && ix<=u8b->bufNativeLimit) {

	1401 // Requested index is in the other buffer.

	1402 goto swapBuffers;

	1403 }

	1404 // Requested index is start-of-string.

	1405 // (this is the case of randomly seeking to the start.

	1406 // The case of iterating off the start is handled earlier.)

	1407 if (ix==0) {

	1408 if (u8b->bufNativeStart==0) {

	1409 // Alternate buffer contains the data for the start string.

	1410 // Make it be the current buffer.

	1411 goto swapBuffersAndFail;

	1412 } else {

	1413 // Request for data before the start of string,

	1414 // neither buffer is usable.

	1415 // set up a zero-length buffer.

	1416 goto makeStubBuffer;

	1417 }

	1418 }

	1419

	1420 if (ix<=ut->chunkNativeStart \|\| ix>ut->chunkNativeLimit) {

	1421 // Requested index is in neither buffer.

	1422 goto fillReverse;

	1423 }

	1424

	1425 // Requested index is in this buffer.

	1426 // Set the utf16 buffer index.

	1427 u8b = (UTF8Buf *)ut->p;

	1428 mapIndex = ix - u8b->toUCharsMapStart;

	1429 ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;

	1430 if (ut->chunkOffset==0) {

	1431 // This occurs when the first character in the text is

	1432 // a multi-byte UTF-8 char, and the requested index is to

	1433 // one of the trailing bytes. Because there is no preceding ,

	1434 // character, this access fails. We can't pick up on the

	1435 // situation sooner because the requested index is not zero.

	1436 return FALSE;

	1437 } else {

	1438 return TRUE;

	1439 }

	1440

	1441

	1442

	1443 swapBuffers:

	1444 // The alternate buffer (ut->q) has the string data that was requested.

	1445 // Swap the primary and alternate buffers, and set the

	1446 // chunk index into the new primary buffer.

	1447 {

	1448 u8b = (UTF8Buf *)ut->q;

	1449 ut->q = ut->p;

	1450 ut->p = u8b;

	1451 ut->chunkContents = &u8b->buf[u8b->bufStartIdx];

	1452 ut->chunkLength = u8b->bufLimitIdx - u8b->bufStartIdx;

	1453 ut->chunkNativeStart = u8b->bufNativeStart;

	1454 ut->chunkNativeLimit = u8b->bufNativeLimit;

	1455 ut->nativeIndexingLimit = u8b->bufNILimit;

	1456

	1457 // Index into the (now current) chunk

	1458 // Use the map to set the chunk index. It's more trouble than it's wort h

	1459 // to check whether native indexing can be used.

	1460 U_ASSERT(ix>=u8b->bufNativeStart);

	1461 U_ASSERT(ix<=u8b->bufNativeLimit);

	1462 mapIndex = ix - u8b->toUCharsMapStart;

	1463 U_ASSERT(mapIndex>=0);

	1464 U_ASSERT(mapIndex<(int32_t)sizeof(u8b->mapToUChars));

	1465 ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;

	1466

	1467 return TRUE;

	1468 }

	1469

	1470

	1471 swapBuffersAndFail:

	1472 // We got a request for either the start or end of the string,

	1473 // with iteration continuing in the out-of-bounds direction.

	1474 // The alternate buffer already contains the data up to the

	1475 // start/end.

	1476 // Swap the buffers, then return failure, indicating that we couldn't

	1477 // make things correct for continuing the iteration in the requested

	1478 // direction. The position & buffer are correct should the

	1479 // user decide to iterate in the opposite direction.

	1480 u8b = (UTF8Buf *)ut->q;

	1481 ut->q = ut->p;

	1482 ut->p = u8b;

	1483 ut->chunkContents = &u8b->buf[u8b->bufStartIdx];

	1484 ut->chunkLength = u8b->bufLimitIdx - u8b->bufStartIdx;

	1485 ut->chunkNativeStart = u8b->bufNativeStart;

	1486 ut->chunkNativeLimit = u8b->bufNativeLimit;

	1487 ut->nativeIndexingLimit = u8b->bufNILimit;

	1488

	1489 // Index into the (now current) chunk

	1490 // For this function (swapBuffersAndFail), the requested index

	1491 // will always be at either the start or end of the chunk.

	1492 if (ix==u8b->bufNativeLimit) {

	1493 ut->chunkOffset = ut->chunkLength;

	1494 } else {

	1495 ut->chunkOffset = 0;

	1496 U_ASSERT(ix == u8b->bufNativeStart);

	1497 }

	1498 return FALSE;

	1499

	1500 makeStubBuffer:

	1501 // The user has done a seek/access past the start or end

	1502 // of the string. Rather than loading data that is likely

	1503 // to never be used, just set up a zero-length buffer at

	1504 // the position.

	1505 u8b = (UTF8Buf *)ut->q;

	1506 u8b->bufNativeStart = ix;

	1507 u8b->bufNativeLimit = ix;

	1508 u8b->bufStartIdx = 0;

	1509 u8b->bufLimitIdx = 0;

	1510 u8b->bufNILimit = 0;

	1511 u8b->toUCharsMapStart = ix;

	1512 u8b->mapToNative[0] = 0;

	1513 u8b->mapToUChars[0] = 0;

	1514 goto swapBuffersAndFail;

	1515

	1516

	1517

	1518 fillForward:

	1519 {

	1520 // Move the incoming index to a code point boundary.

	1521 U8_SET_CP_START(s8, 0, ix);

	1522

	1523 // Swap the UText buffers.

	1524 // We want to fill what was previously the alternate buffer,

	1525 // and make what was the current buffer be the new alternate.

	1526 UTF8Buf u8b = (UTF8Buf )ut->q;

	1527 ut->q = ut->p;

	1528 ut->p = u8b;

	1529

	1530 int32_t strLen = ut->b;

	1531 UBool nulTerminated = FALSE;

	1532 if (strLen < 0) {

	1533 strLen = 0x7fffffff;

	1534 nulTerminated = TRUE;

	1535 }

	1536

	1537 UChar *buf = u8b->buf;

	1538 uint8_t *mapToNative = u8b->mapToNative;

	1539 uint8_t *mapToUChars = u8b->mapToUChars;

	1540 int32_t destIx = 0;

	1541 int32_t srcIx = ix;

	1542 UBool seenNonAscii = FALSE;

	1543 UChar32 c = 0;

	1544

	1545 // Fill the chunk buffer and mapping arrays.

	1546 while (destIx<UTF8_TEXT_CHUNK_SIZE) {

	1547 c = s8[srcIx];

	1548 if (c>0 && c<0x80) {

	1549 // Special case ASCII range for speed.

	1550 // zero is excluded to simplify bounds checking.

	1551 buf[destIx] = (UChar)c;

	1552 mapToNative[destIx] = (uint8_t)(srcIx - ix);

	1553 mapToUChars[srcIx-ix] = (uint8_t)destIx;

	1554 srcIx++;

	1555 destIx++;

	1556 } else {

	1557 // General case, handle everything.

	1558 if (seenNonAscii == FALSE) {

	1559 seenNonAscii = TRUE;

	1560 u8b->bufNILimit = destIx;

	1561 }

	1562

	1563 int32_t cIx = srcIx;

	1564 int32_t dIx = destIx;

	1565 int32_t dIxSaved = destIx;

	1566 U8_NEXT(s8, srcIx, strLen, c);

	1567 if (c==0 && nulTerminated) {

	1568 srcIx--;

	1569 break;

	1570 }

	1571 if (c<0) {

	1572 // Illegal UTF-8. Replace with sub character.

	1573 c = 0x0fffd;

	1574 }

	1575

	1576 U16_APPEND_UNSAFE(buf, destIx, c);

	1577 do {

	1578 mapToNative[dIx++] = (uint8_t)(cIx - ix);

	1579 } while (dIx < destIx);

	1580

	1581 do {

	1582 mapToUChars[cIx++ - ix] = (uint8_t)dIxSaved;

	1583 } while (cIx < srcIx);

	1584 }

	1585 if (srcIx>=strLen) {

	1586 break;

	1587 }

	1588

	1589 }

	1590

	1591 // store Native <--> Chunk Map entries for the end of the buffer.

	1592 // There is no actual character here, but the index position is valid .

	1593 mapToNative[destIx] = (uint8_t)(srcIx - ix);

	1594 mapToUChars[srcIx - ix] = (uint8_t)destIx;

	1595

	1596 // fill in Buffer descriptor

	1597 u8b->bufNativeStart = ix;

	1598 u8b->bufNativeLimit = srcIx;

	1599 u8b->bufStartIdx = 0;

	1600 u8b->bufLimitIdx = destIx;

	1601 if (seenNonAscii == FALSE) {

	1602 u8b->bufNILimit = destIx;

	1603 }

	1604 u8b->toUCharsMapStart = u8b->bufNativeStart;

	1605

	1606 // Set UText chunk to refer to this buffer.

	1607 ut->chunkContents = buf;

	1608 ut->chunkOffset = 0;

	1609 ut->chunkLength = u8b->bufLimitIdx;

	1610 ut->chunkNativeStart = u8b->bufNativeStart;

	1611 ut->chunkNativeLimit = u8b->bufNativeLimit;

	1612 ut->nativeIndexingLimit = u8b->bufNILimit;

	1613

	1614 // For zero terminated strings, keep track of the maximum point

	1615 // scanned so far.

	1616 if (nulTerminated && srcIx>ut->c) {

	1617 ut->c = srcIx;

	1618 if (c==0) {

	1619 // We scanned to the end.

	1620 // Remember the actual length.

	1621 ut->b = srcIx;

	1622 ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXP ENSIVE);

	1623 }

	1624 }

	1625 return TRUE;

	1626 }

	1627

	1628

	1629 fillReverse:

	1630 {

	1631 // Move the incoming index to a code point boundary.

	1632 // Can only do this if the incoming index is somewhere in the interior o f the string.

	1633 // If index is at the end, there is no character there to look at.

	1634 if (ix != ut->b) {

	1635 U8_SET_CP_START(s8, 0, ix);

	1636 }

	1637

	1638 // Swap the UText buffers.

	1639 // We want to fill what was previously the alternate buffer,

	1640 // and make what was the current buffer be the new alternate.

	1641 UTF8Buf u8b = (UTF8Buf )ut->q;

	1642 ut->q = ut->p;

	1643 ut->p = u8b;

	1644

	1645 UChar *buf = u8b->buf;

	1646 uint8_t *mapToNative = u8b->mapToNative;

	1647 uint8_t *mapToUChars = u8b->mapToUChars;

	1648 int32_t toUCharsMapStart = ix - (UTF8_TEXT_CHUNK_SIZE*3 + 1);

	1649 int32_t destIx = UTF8_TEXT_CHUNK_SIZE+2; // Start in the overflow reg ion

	1650 // at end of buffer to lea ve room

	1651 // for a surrogate pair at the

	1652 // buffer start.

	1653 int32_t srcIx = ix;

	1654 int32_t bufNILimit = destIx;

	1655 UChar32 c;

	1656

	1657 // Map to/from Native Indexes, fill in for the position at the end of

	1658 // the buffer.

	1659 //

	1660 mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);

	1661 mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;

	1662

	1663 // Fill the chunk buffer

	1664 // Work backwards, filling from the end of the buffer towards the front.

	1665 //

	1666 while (destIx>2 && (srcIx - toUCharsMapStart > 5) && (srcIx > 0)) {

	1667 srcIx--;

	1668 destIx--;

	1669

	1670 // Get last byte of the UTF-8 character

	1671 c = s8[srcIx];

	1672 if (c<0x80) {

	1673 // Special case ASCII range for speed.

	1674 buf[destIx] = (UChar)c;

	1675 mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;

	1676 mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);

	1677 } else {

	1678 // General case, handle everything non-ASCII.

	1679

	1680 int32_t sIx = srcIx; // ix of last byte of multi-byte u8 char

	1681

	1682 // Get the full character from the UTF8 string.

	1683 // use code derived from tbe macros in utf.8

	1684 // Leaves srcIx pointing at the first byte of the UTF-8 char.

	1685 //

	1686 if (c<=0xbf) {

	1687 c=utf8_prevCharSafeBody(s8, 0, &srcIx, c, -1);

	1688 // leaves srcIx at first byte of the multi-byte char.

	1689 } else {

	1690 c=0x0fffd;

	1691 }

	1692

	1693 // Store the character in UTF-16 buffer.

	1694 if (c<0x10000) {

	1695 buf[destIx] = (UChar)c;

	1696 mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);

	1697 } else {

	1698 buf[destIx] = U16_TRAIL(c);

	1699 mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);

	1700 buf[--destIx] = U16_LEAD(c);

	1701 mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);

	1702 }

	1703

	1704 // Fill in the map from native indexes to UChars buf index.

	1705 do {

	1706 mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx;

	1707 } while (sIx >= srcIx);

	1708

	1709 // Set native indexing limit to be the current position.

	1710 // We are processing a non-ascii, non-native-indexing char now ;

	1711 // the limit will be here if the rest of the chars to be

	1712 // added to this buffer are ascii.

	1713 bufNILimit = destIx;

	1714 }

	1715 }

	1716 u8b->bufNativeStart = srcIx;

	1717 u8b->bufNativeLimit = ix;

	1718 u8b->bufStartIdx = destIx;

	1719 u8b->bufLimitIdx = UTF8_TEXT_CHUNK_SIZE+2;

	1720 u8b->bufNILimit = bufNILimit - u8b->bufStartIdx;

	1721 u8b->toUCharsMapStart = toUCharsMapStart;

	1722

	1723 ut->chunkContents = &buf[u8b->bufStartIdx];

	1724 ut->chunkLength = u8b->bufLimitIdx - u8b->bufStartIdx;

	1725 ut->chunkOffset = ut->chunkLength;

	1726 ut->chunkNativeStart = u8b->bufNativeStart;

	1727 ut->chunkNativeLimit = u8b->bufNativeLimit;

	1728 ut->nativeIndexingLimit = u8b->bufNILimit;

	1729 return TRUE;

	1730 }

	1731

	1732 }

	1733

	1734

	1735

	1736 //

	1737 // This is a slightly modified copy of u_strFromUTF8,

	1738 // Inserts a Replacement Char rather than failing on invalid UTF-8

	1739 // Removes unnecessary features.

	1740 //

	1741 static UChar*

	1742 utext_strFromUTF8(UChar *dest,

	1743 int32_t destCapacity,

	1744 int32_t *pDestLength,

	1745 const char* src,

	1746 int32_t srcLength, // required. NUL terminated not support ed.

	1747 UErrorCode *pErrorCode

	1748 )

	1749 {

	1750

	1751 UChar *pDest = dest;

	1752 UChar *pDestLimit = dest+destCapacity;

	1753 UChar32 ch=0;

	1754 int32_t index = 0;

	1755 int32_t reqLength = 0;

	1756 uint8_t* pSrc = (uint8_t*) src;

	1757

	1758

	1759 while((index < srcLength)&&(pDest<pDestLimit)){

	1760 ch = pSrc[index++];

	1761 if(ch <=0x7f){

	1762 *pDest++=(UChar)ch;

	1763 }else{

	1764 ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -1);

	1765 if(ch<0){

	1766 ch = 0xfffd;

	1767 }

	1768 if(U_IS_BMP(ch)){

	1769 *(pDest++)=(UChar)ch;

	1770 }else{

	1771 *(pDest++)=UTF16_LEAD(ch);

	1772 if(pDest<pDestLimit){

	1773 *(pDest++)=UTF16_TRAIL(ch);

	1774 }else{

	1775 reqLength++;

	1776 break;

	1777 }

	1778 }

	1779 }

	1780 }

	1781 /* donot fill the dest buffer just count the UChars needed */

	1782 while(index < srcLength){

	1783 ch = pSrc[index++];

	1784 if(ch <= 0x7f){

	1785 reqLength++;

	1786 }else{

	1787 ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -1);

	1788 if(ch<0){

	1789 ch = 0xfffd;

	1790 }

	1791 reqLength+=U16_LENGTH(ch);

	1792 }

	1793 }

	1794

	1795 reqLength+=(int32_t)(pDest - dest);

	1796

	1797 if(pDestLength){

	1798 *pDestLength = reqLength;

	1799 }

	1800

	1801 /* Terminate the buffer */

	1802 u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);

	1803

	1804 return dest;

	1805 }

	1806

	1807

	1808

	1809 static int32_t U_CALLCONV

	1810 utf8TextExtract(UText *ut,

	1811 int64_t start, int64_t limit,

	1812 UChar *dest, int32_t destCapacity,

	1813 UErrorCode *pErrorCode) {

	1814 if(U_FAILURE(*pErrorCode)) {

	1815 return 0;

	1816 }

	1817 if(destCapacity<0 \|\| (dest==NULL && destCapacity>0)) {

	1818 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	1819 return 0;

	1820 }

	1821 int32_t length = ut->b;

	1822 int32_t start32 = pinIndex(start, length);

	1823 int32_t limit32 = pinIndex(limit, length);

	1824

	1825 if(start32>limit32) {

	1826 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

	1827 return 0;

	1828 }

	1829

	1830

	1831 // adjust the incoming indexes to land on code point boundaries if needed.

	1832 // adjust by no more than three, because that is the largest number of tr ail bytes

	1833 // in a well formed UTF8 character.

	1834 const uint8_t buf = (const uint8_t )ut->context;

	1835 int i;

	1836 if (start32 < ut->chunkNativeLimit) {

	1837 for (i=0; i<3; i++) {

	1838 if (U8_IS_SINGLE(buf[start32]) \|\| U8_IS_LEAD(buf[start32]) \|\| start3 2==0) {

	1839 break;

	1840 }

	1841 start32--;

	1842 }

	1843 }

	1844

	1845 if (limit32 < ut->chunkNativeLimit) {

	1846 for (i=0; i<3; i++) {

	1847 if (U8_IS_SINGLE(buf[limit32]) \|\| U8_IS_LEAD(buf[limit32]) \|\| limit3 2==0) {

	1848 break;

	1849 }

	1850 limit32--;

	1851 }

	1852 }

	1853

	1854 // Do the actual extract.

	1855 int32_t destLength=0;

	1856 utext_strFromUTF8(dest, destCapacity, &destLength,

	1857 (const char *)ut->context+start32, limit32-start32,

	1858 pErrorCode);

	1859 utf8TextAccess(ut, limit32, TRUE);

	1860 return destLength;

	1861 }

	1862

	1863 //

	1864 // utf8TextMapOffsetToNative

	1865 //

	1866 // Map a chunk (UTF-16) offset to a native index.

	1867 static int64_t U_CALLCONV

	1868 utf8TextMapOffsetToNative(const UText *ut) {

	1869 //

	1870 UTF8Buf u8b = (UTF8Buf )ut->p;

	1871 U_ASSERT(ut->chunkOffset>ut->nativeIndexingLimit && ut->chunkOffset<=ut->chu nkLength);

	1872 int32_t nativeOffset = u8b->mapToNative[ut->chunkOffset + u8b->bufStartIdx] + u8b->toUCharsMapStart;

	1873 U_ASSERT(nativeOffset >= ut->chunkNativeStart && nativeOffset <= ut->chunkNa tiveLimit);

	1874 return nativeOffset;

	1875 }

	1876

	1877 //

	1878 // Map a native index to the corrsponding chunk offset

	1879 //

	1880 static int32_t U_CALLCONV

	1881 utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) {

	1882 U_ASSERT(index64 <= 0x7fffffff);

	1883 int32_t index = (int32_t)index64;

	1884 UTF8Buf u8b = (UTF8Buf )ut->p;

	1885 U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit);

	1886 U_ASSERT(index<=ut->chunkNativeLimit);

	1887 int32_t mapIndex = index - u8b->toUCharsMapStart;

	1888 int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;

	1889 U_ASSERT(offset>=0 && offset<=ut->chunkLength);

	1890 return offset;

	1891 }

	1892

	1893 static UText * U_CALLCONV

	1894 utf8TextClone(UText dest, const UText src, UBool deep, UErrorCode *status)

	1895 {

	1896 // First do a generic shallow clone. Does everything needed for the UText s truct itself.

	1897 dest = shallowTextClone(dest, src, status);

	1898

	1899 // For deep clones, make a copy of the string.

	1900 // The copied storage is owned by the newly created clone.

	1901 //

	1902 // TODO: There is an isssue with using utext_nativeLength().

	1903 // That function is non-const in cases where the input was NUL termin ated

	1904 // and the length has not yet been determined.

	1905 // This function (clone()) is const.

	1906 // There potentially a thread safety issue lurking here.

	1907 //

	1908 if (deep && U_SUCCESS(*status)) {

	1909 int32_t len = (int32_t)utext_nativeLength((UText *)src);

	1910 char copyStr = (char )uprv_malloc(len+1);

	1911 if (copyStr == NULL) {

	1912 *status = U_MEMORY_ALLOCATION_ERROR;

	1913 } else {

	1914 uprv_memcpy(copyStr, src->context, len+1);

	1915 dest->context = copyStr;

	1916 dest->providerProperties \|= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);

	1917 }

	1918 }

	1919 return dest;

	1920 }

	1921

	1922

	1923 static void U_CALLCONV

	1924 utf8TextClose(UText *ut) {

	1925 // Most of the work of close is done by the generic UText framework close.

	1926 // All that needs to be done here is to delete the UTF8 string if the UText

	1927 // owns it. This occurs if the UText was created by cloning.

	1928 if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {

	1929 char s = (char )ut->context;

	1930 uprv_free(s);

	1931 ut->context = NULL;

	1932 }

	1933 }

	1934

	1935 U_CDECL_END

	1936

	1937

	1938 static const struct UTextFuncs utf8Funcs =

	1939 {

	1940 sizeof(UTextFuncs),

	1941 0, 0, 0, // Reserved alignment padding

	1942 utf8TextClone,

	1943 utf8TextLength,

	1944 utf8TextAccess,

	1945 utf8TextExtract,

	1946 NULL, /* replace*/

	1947 NULL, /* copy */

	1948 utf8TextMapOffsetToNative,

	1949 utf8TextMapIndexToUTF16,

	1950 utf8TextClose,

	1951 NULL, // spare 1

	1952 NULL, // spare 2

	1953 NULL // spare 3

	1954 };

	1955

	1956

	1957 static const char gEmptyString[] = {0};

	1958

	1959 U_CAPI UText * U_EXPORT2

	1960 utext_openUTF8(UText ut, const char s, int64_t length, UErrorCode *status) {

	1961 if(U_FAILURE(*status)) {

	1962 return NULL;

	1963 }

	1964 if(s==NULL && length==0) {

	1965 s = gEmptyString;

	1966 }

	1967

	1968 if(s==NULL \|\| length<-1 \|\| length>INT32_MAX) {

	1969 *status=U_ILLEGAL_ARGUMENT_ERROR;

	1970 return NULL;

	1971 }

	1972

	1973 ut = utext_setup(ut, sizeof(UTF8Buf) * 2, status);

	1974 if (U_FAILURE(*status)) {

	1975 return ut;

	1976 }

	1977

	1978 ut->pFuncs = &utf8Funcs;

	1979 ut->context = s;

	1980 ut->b = (int32_t)length;

	1981 ut->c = (int32_t)length;

	1982 if (ut->c < 0) {

	1983 ut->c = 0;

	1984 ut->providerProperties \|= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);

	1985 }

	1986 ut->p = ut->pExtra;

	1987 ut->q = (char *)ut->pExtra + sizeof(UTF8Buf);

	1988 return ut;

	1989

	1990 }

	1991

	1992

	1993

	1994

	1995

	1996

	1997

	1998

	1999 //------------------------------------------------------------------------------

	2000 //

	2001 // UText implementation wrapper for Replaceable (read/write)

	2002 //

	2003 // Use of UText data members:

	2004 // context pointer to Replaceable.

	2005 // p pointer to Replaceable if it is owned by the UText.

	2006 //

	2007 //------------------------------------------------------------------------------

	2008

	2009

	2010

	2011 // minimum chunk size for this implementation: 3

	2012 // to allow for possible trimming for code point boundaries

	2013 enum { REP_TEXT_CHUNK_SIZE=10 };

	2014

	2015 struct ReplExtra {

	2016 /*

	2017 * Chunk UChars.

	2018 * +1 to simplify filling with surrogate pair at the end.

	2019 */

	2020 UChar s[REP_TEXT_CHUNK_SIZE+1];

	2021 };

	2022

	2023

	2024 U_CDECL_BEGIN

	2025

	2026 static UText * U_CALLCONV

	2027 repTextClone(UText dest, const UText src, UBool deep, UErrorCode *status) {

	2028 // First do a generic shallow clone. Does everything needed for the UText s truct itself.

	2029 dest = shallowTextClone(dest, src, status);

	2030

	2031 // For deep clones, make a copy of the Replaceable.

	2032 // The copied Replaceable storage is owned by the newly created UText clone .

	2033 // A non-NULL pointer in UText.p is the signal to the close() function to d elete

	2034 // it.

	2035 //

	2036 if (deep && U_SUCCESS(*status)) {

	2037 const Replaceable replSrc = (const Replaceable )src->context;

	2038 dest->context = replSrc->clone();

	2039 dest->providerProperties \|= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);

	2040

	2041 // with deep clone, the copy is writable, even when the source is not.

	2042 dest->providerProperties \|= I32_FLAG(UTEXT_PROVIDER_WRITABLE);

	2043 }

	2044 return dest;

	2045 }

	2046

	2047

	2048 static void U_CALLCONV

	2049 repTextClose(UText *ut) {

	2050 // Most of the work of close is done by the generic UText framework close.

	2051 // All that needs to be done here is delete the Replaceable if the UText

	2052 // owns it. This occurs if the UText was created by cloning.

	2053 if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {

	2054 Replaceable rep = (Replaceable )ut->context;

	2055 delete rep;

	2056 ut->context = NULL;

	2057 }

	2058 }

	2059

	2060

	2061 static int64_t U_CALLCONV

	2062 repTextLength(UText *ut) {

	2063 const Replaceable replSrc = (const Replaceable )ut->context;

	2064 int32_t len = replSrc->length();

	2065 return len;

	2066 }

	2067

	2068

	2069 static UBool U_CALLCONV

	2070 repTextAccess(UText *ut, int64_t index, UBool forward) {

	2071 const Replaceable rep=(const Replaceable )ut->context;

	2072 int32_t length=rep->length(); // Full length of the input text (bigger tha n a chunk)

	2073

	2074 // clip the requested index to the limits of the text.

	2075 int32_t index32 = pinIndex(index, length);

	2076 U_ASSERT(index<=INT32_MAX);

	2077

	2078

	2079 /*

	2080 * Compute start/limit boundaries around index, for a segment of text

	2081 * to be extracted.

	2082 * To allow for the possibility that our user gave an index to the trailing

	2083 * half of a surrogate pair, we must request one extra preceding UChar when

	2084 * going in the forward direction. This will ensure that the buffer has the

	2085 * entire code point at the specified index.

	2086 */

	2087 if(forward) {

	2088

	2089 if (index32>=ut->chunkNativeStart && index32<ut->chunkNativeLimit) {

	2090 // Buffer already contains the requested position.

	2091 ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);

	2092 return TRUE;

	2093 }

	2094 if (index32>=length && ut->chunkNativeLimit==length) {

	2095 // Request for end of string, and buffer already extends up to it.

	2096 // Can't get the data, but don't change the buffer.

	2097 ut->chunkOffset = length - (int32_t)ut->chunkNativeStart;

	2098 return FALSE;

	2099 }

	2100

	2101 ut->chunkNativeLimit = index + REP_TEXT_CHUNK_SIZE - 1;

	2102 // Going forward, so we want to have the buffer with stuff at and beyond

	2103 // the requested index. The -1 gets us one code point before the

	2104 // requested index also, to handle the case of the index being on

	2105 // a trail surrogate of a surrogate pair.

	2106 if(ut->chunkNativeLimit > length) {

	2107 ut->chunkNativeLimit = length;

	2108 }

	2109 // unless buffer ran off end, start is index-1.

	2110 ut->chunkNativeStart = ut->chunkNativeLimit - REP_TEXT_CHUNK_SIZE;

	2111 if(ut->chunkNativeStart < 0) {

	2112 ut->chunkNativeStart = 0;

	2113 }

	2114 } else {

	2115 // Reverse iteration. Fill buffer with data preceding the requested ind ex.

	2116 if (index32>ut->chunkNativeStart && index32<=ut->chunkNativeLimit) {

	2117 // Requested position already in buffer.

	2118 ut->chunkOffset = index32 - (int32_t)ut->chunkNativeStart;

	2119 return TRUE;

	2120 }

	2121 if (index32==0 && ut->chunkNativeStart==0) {

	2122 // Request for start, buffer already begins at start.

	2123 // No data, but keep the buffer as is.

	2124 ut->chunkOffset = 0;

	2125 return FALSE;

	2126 }

	2127

	2128 // Figure out the bounds of the chunk to extract for reverse iteration.

	2129 // Need to worry about chunk not splitting surrogate pairs, and while st ill

	2130 // containing the data we need.

	2131 // Fix by requesting a chunk that includes an extra UChar at the end.

	2132 // If this turns out to be a lead surrogate, we can lop it off and still have

	2133 // the data we wanted.

	2134 ut->chunkNativeStart = index32 + 1 - REP_TEXT_CHUNK_SIZE;

	2135 if (ut->chunkNativeStart < 0) {

	2136 ut->chunkNativeStart = 0;

	2137 }

	2138

	2139 ut->chunkNativeLimit = index32 + 1;

	2140 if (ut->chunkNativeLimit > length) {

	2141 ut->chunkNativeLimit = length;

	2142 }

	2143 }

	2144

	2145 // Extract the new chunk of text from the Replaceable source.

	2146 ReplExtra ex = (ReplExtra )ut->pExtra;

	2147 // UnicodeString with its buffer a writable alias to the chunk buffer

	2148 UnicodeString buffer(ex->s, 0 /buffer length/, REP_TEXT_CHUNK_SIZE /buffe r capacity/);

	2149 rep->extractBetween((int32_t)ut->chunkNativeStart, (int32_t)ut->chunkNativeL imit, buffer);

	2150

	2151 ut->chunkContents = ex->s;

	2152 ut->chunkLength = (int32_t)(ut->chunkNativeLimit - ut->chunkNativeStart);

	2153 ut->chunkOffset = (int32_t)(index32 - ut->chunkNativeStart);

	2154

	2155 // Surrogate pairs from the input text must not span chunk boundaries.

	2156 // If end of chunk could be the start of a surrogate, trim it off.

	2157 if (ut->chunkNativeLimit < length &&

	2158 U16_IS_LEAD(ex->s[ut->chunkLength-1])) {

	2159 ut->chunkLength--;

	2160 ut->chunkNativeLimit--;

	2161 if (ut->chunkOffset > ut->chunkLength) {

	2162 ut->chunkOffset = ut->chunkLength;

	2163 }

	2164 }

	2165

	2166 // if the first UChar in the chunk could be the trailing half of a surrogate pair,

	2167 // trim it off.

	2168 if(ut->chunkNativeStart>0 && U16_IS_TRAIL(ex->s[0])) {

	2169 ++(ut->chunkContents);

	2170 ++(ut->chunkNativeStart);

	2171 --(ut->chunkLength);

	2172 --(ut->chunkOffset);

	2173 }

	2174

	2175 // adjust the index/chunkOffset to a code point boundary

	2176 U16_SET_CP_START(ut->chunkContents, 0, ut->chunkOffset);

	2177

	2178 // Use fast indexing for get/setNativeIndex()

	2179 ut->nativeIndexingLimit = ut->chunkLength;

	2180

	2181 return TRUE;

	2182 }

	2183

	2184

	2185

	2186 static int32_t U_CALLCONV

	2187 repTextExtract(UText *ut,

	2188 int64_t start, int64_t limit,

	2189 UChar *dest, int32_t destCapacity,

	2190 UErrorCode *status) {

	2191 const Replaceable rep=(const Replaceable )ut->context;

	2192 int32_t length=rep->length();

	2193

	2194 if(U_FAILURE(*status)) {

	2195 return 0;

	2196 }

	2197 if(destCapacity<0 \|\| (dest==NULL && destCapacity>0)) {

	2198 *status=U_ILLEGAL_ARGUMENT_ERROR;

	2199 }

	2200 if(start>limit) {

	2201 *status=U_INDEX_OUTOFBOUNDS_ERROR;

	2202 return 0;

	2203 }

	2204

	2205 int32_t start32 = pinIndex(start, length);

	2206 int32_t limit32 = pinIndex(limit, length);

	2207

	2208 // adjust start, limit if they point to trail half of surrogates

	2209 if (start32<length && U16_IS_TRAIL(rep->charAt(start32)) &&

	2210 U_IS_SUPPLEMENTARY(rep->char32At(start32))){

	2211 start32--;

	2212 }

	2213 if (limit32<length && U16_IS_TRAIL(rep->charAt(limit32)) &&

	2214 U_IS_SUPPLEMENTARY(rep->char32At(limit32))){

	2215 limit32--;

	2216 }

	2217

	2218 length=limit32-start32;

	2219 if(length>destCapacity) {

	2220 limit32 = start32 + destCapacity;

	2221 }

	2222 UnicodeString buffer(dest, 0, destCapacity); // writable alias

	2223 rep->extractBetween(start32, limit32, buffer);

	2224 repTextAccess(ut, limit32, TRUE);

	2225

	2226 return u_terminateUChars(dest, destCapacity, length, status);

	2227 }

	2228

	2229 static int32_t U_CALLCONV

	2230 repTextReplace(UText *ut,

	2231 int64_t start, int64_t limit,

	2232 const UChar *src, int32_t length,

	2233 UErrorCode *status) {

	2234 Replaceable rep=(Replaceable )ut->context;

	2235 int32_t oldLength;

	2236

	2237 if(U_FAILURE(*status)) {

	2238 return 0;

	2239 }

	2240 if(src==NULL && length!=0) {

	2241 *status=U_ILLEGAL_ARGUMENT_ERROR;

	2242 return 0;

	2243 }

	2244 oldLength=rep->length(); // will subtract from new length

	2245 if(start>limit ) {

	2246 *status=U_INDEX_OUTOFBOUNDS_ERROR;

	2247 return 0;

	2248 }

	2249

	2250 int32_t start32 = pinIndex(start, oldLength);

	2251 int32_t limit32 = pinIndex(limit, oldLength);

	2252

	2253 // Snap start & limit to code point boundaries.

	2254 if (start32<oldLength && U16_IS_TRAIL(rep->charAt(start32)) &&

	2255 start32>0 && U16_IS_LEAD(rep->charAt(start32-1)))

	2256 {

	2257 start32--;

	2258 }

	2259 if (limit32<oldLength && U16_IS_LEAD(rep->charAt(limit32-1)) &&

	2260 U16_IS_TRAIL(rep->charAt(limit32)))

	2261 {

	2262 limit32++;

	2263 }

	2264

	2265 // Do the actual replace operation using methods of the Replaceable class

	2266 UnicodeString replStr((UBool)(length<0), src, length); // read-only alias

	2267 rep->handleReplaceBetween(start32, limit32, replStr);

	2268 int32_t newLength = rep->length();

	2269 int32_t lengthDelta = newLength - oldLength;

	2270

	2271 // Is the UText chunk buffer OK?

	2272 if (ut->chunkNativeLimit > start32) {

	2273 // this replace operation may have impacted the current chunk.

	2274 // invalidate it, which will force a reload on the next access.

	2275 invalidateChunk(ut);

	2276 }

	2277

	2278 // set the iteration position to the end of the newly inserted replacement t ext.

	2279 int32_t newIndexPos = limit32 + lengthDelta;

	2280 repTextAccess(ut, newIndexPos, TRUE);

	2281

	2282 return lengthDelta;

	2283 }

	2284

	2285

	2286 static void U_CALLCONV

	2287 repTextCopy(UText *ut,

	2288 int64_t start, int64_t limit,

	2289 int64_t destIndex,

	2290 UBool move,

	2291 UErrorCode *status)

	2292 {

	2293 Replaceable rep=(Replaceable )ut->context;

	2294 int32_t length=rep->length();

	2295

	2296 if(U_FAILURE(*status)) {

	2297 return;

	2298 }

	2299 if (start>limit \|\| (start<destIndex && destIndex<limit))

	2300 {

	2301 *status=U_INDEX_OUTOFBOUNDS_ERROR;

	2302 return;

	2303 }

	2304

	2305 int32_t start32 = pinIndex(start, length);

	2306 int32_t limit32 = pinIndex(limit, length);

	2307 int32_t destIndex32 = pinIndex(destIndex, length);

	2308

	2309 // TODO: snap input parameters to code point boundaries.

	2310

	2311 if(move) {

	2312 // move: copy to destIndex, then replace original with nothing

	2313 int32_t segLength=limit32-start32;

	2314 rep->copy(start32, limit32, destIndex32);

	2315 if(destIndex32<start32) {

	2316 start32+=segLength;

	2317 limit32+=segLength;

	2318 }

	2319 rep->handleReplaceBetween(start32, limit32, UnicodeString());

	2320 } else {

	2321 // copy

	2322 rep->copy(start32, limit32, destIndex32);

	2323 }

	2324

	2325 // If the change to the text touched the region in the chunk buffer,

	2326 // invalidate the buffer.

	2327 int32_t firstAffectedIndex = destIndex32;

	2328 if (move && start32<firstAffectedIndex) {

	2329 firstAffectedIndex = start32;

	2330 }

	2331 if (firstAffectedIndex < ut->chunkNativeLimit) {

	2332 // changes may have affected range covered by the chunk

	2333 invalidateChunk(ut);

	2334 }

	2335

	2336 // Put iteration position at the newly inserted (moved) block,

	2337 int32_t nativeIterIndex = destIndex32 + limit32 - start32;

	2338 if (move && destIndex32>start32) {

	2339 // moved a block of text towards the end of the string.

	2340 nativeIterIndex = destIndex32;

	2341 }

	2342

	2343 // Set position, reload chunk if needed.

	2344 repTextAccess(ut, nativeIterIndex, TRUE);

	2345 }

	2346

	2347 static const struct UTextFuncs repFuncs =

	2348 {

	2349 sizeof(UTextFuncs),

	2350 0, 0, 0, // Reserved alignment padding

	2351 repTextClone,

	2352 repTextLength,

	2353 repTextAccess,

	2354 repTextExtract,

	2355 repTextReplace,

	2356 repTextCopy,

	2357 NULL, // MapOffsetToNative,

	2358 NULL, // MapIndexToUTF16,

	2359 repTextClose,

	2360 NULL, // spare 1

	2361 NULL, // spare 2

	2362 NULL // spare 3

	2363 };

	2364

	2365

	2366 U_CAPI UText * U_EXPORT2

	2367 utext_openReplaceable(UText ut, Replaceable rep, UErrorCode *status)

	2368 {

	2369 if(U_FAILURE(*status)) {

	2370 return NULL;

	2371 }

	2372 if(rep==NULL) {

	2373 *status=U_ILLEGAL_ARGUMENT_ERROR;

	2374 return NULL;

	2375 }

	2376 ut = utext_setup(ut, sizeof(ReplExtra), status);

	2377

	2378 ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_WRITABLE);

	2379 if(rep->hasMetaData()) {

	2380 ut->providerProperties \|=I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA);

	2381 }

	2382

	2383 ut->pFuncs = &repFuncs;

	2384 ut->context = rep;

	2385 return ut;

	2386 }

	2387

	2388 U_CDECL_END

	2389

	2390

	2391

	2392

	2393

	2394

	2395

	2396

	2397 //------------------------------------------------------------------------------

	2398 //

	2399 // UText implementation for UnicodeString (read/write) and

	2400 // for const UnicodeString (read only)

	2401 // (same implementation, only the flags are different)

	2402 //

	2403 // Use of UText data members:

	2404 // context pointer to UnicodeString

	2405 // p pointer to UnicodeString IF this UText owns the string

	2406 // and it must be deleted on close(). NULL otherwise.

	2407 //

	2408 //------------------------------------------------------------------------------

	2409

	2410 U_CDECL_BEGIN

	2411

	2412

	2413 static UText * U_CALLCONV

	2414 unistrTextClone(UText dest, const UText src, UBool deep, UErrorCode *status) {

	2415 // First do a generic shallow clone. Does everything needed for the UText s truct itself.

	2416 dest = shallowTextClone(dest, src, status);

	2417

	2418 // For deep clones, make a copy of the UnicodeSring.

	2419 // The copied UnicodeString storage is owned by the newly created UText clo ne.

	2420 // A non-NULL pointer in UText.p is the signal to the close() function to d elete

	2421 // the UText.

	2422 //

	2423 if (deep && U_SUCCESS(*status)) {

	2424 const UnicodeString srcString = (const UnicodeString )src->context;

	2425 dest->context = new UnicodeString(*srcString);

	2426 dest->providerProperties \|= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);

	2427

	2428 // with deep clone, the copy is writable, even when the source is not.

	2429 dest->providerProperties \|= I32_FLAG(UTEXT_PROVIDER_WRITABLE);

	2430 }

	2431 return dest;

	2432 }

	2433

	2434 static void U_CALLCONV

	2435 unistrTextClose(UText *ut) {

	2436 // Most of the work of close is done by the generic UText framework close.

	2437 // All that needs to be done here is delete the UnicodeString if the UText

	2438 // owns it. This occurs if the UText was created by cloning.

	2439 if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {

	2440 UnicodeString str = (UnicodeString )ut->context;

	2441 delete str;

	2442 ut->context = NULL;

	2443 }

	2444 }

	2445

	2446

	2447 static int64_t U_CALLCONV

	2448 unistrTextLength(UText *t) {

	2449 return ((const UnicodeString *)t->context)->length();

	2450 }

	2451

	2452

	2453 static UBool U_CALLCONV

	2454 unistrTextAccess(UText *ut, int64_t index, UBool forward) {

	2455 int32_t length = ut->chunkLength;

	2456 ut->chunkOffset = pinIndex(index, length);

	2457

	2458 // Check whether request is at the start or end

	2459 UBool retVal = (forward && index<length) \|\| (!forward && index>0);

	2460 return retVal;

	2461 }

	2462

	2463

	2464

	2465 static int32_t U_CALLCONV

	2466 unistrTextExtract(UText *t,

	2467 int64_t start, int64_t limit,

	2468 UChar *dest, int32_t destCapacity,

	2469 UErrorCode *pErrorCode) {

	2470 const UnicodeString us=(const UnicodeString )t->context;

	2471 int32_t length=us->length();

	2472

	2473 if(U_FAILURE(*pErrorCode)) {

	2474 return 0;

	2475 }

	2476 if(destCapacity<0 \|\| (dest==NULL && destCapacity>0)) {

	2477 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	2478 }

	2479 if(start<0 \|\| start>limit) {

	2480 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

	2481 return 0;

	2482 }

	2483

	2484 int32_t start32 = start<length ? us->getChar32Start((int32_t)start) : length ;

	2485 int32_t limit32 = limit<length ? us->getChar32Start((int32_t)limit) : length ;

	2486

	2487 length=limit32-start32;

	2488 if (destCapacity>0 && dest!=NULL) {

	2489 int32_t trimmedLength = length;

	2490 if(trimmedLength>destCapacity) {

	2491 trimmedLength=destCapacity;

	2492 }

	2493 us->extract(start32, trimmedLength, dest);

	2494 t->chunkOffset = start32+trimmedLength;

	2495 } else {

	2496 t->chunkOffset = start32;

	2497 }

	2498 u_terminateUChars(dest, destCapacity, length, pErrorCode);

	2499 return length;

	2500 }

	2501

	2502 static int32_t U_CALLCONV

	2503 unistrTextReplace(UText *ut,

	2504 int64_t start, int64_t limit,

	2505 const UChar *src, int32_t length,

	2506 UErrorCode *pErrorCode) {

	2507 UnicodeString us=(UnicodeString )ut->context;

	2508 int32_t oldLength;

	2509

	2510 if(U_FAILURE(*pErrorCode)) {

	2511 return 0;

	2512 }

	2513 if(src==NULL && length!=0) {

	2514 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	2515 }

	2516 if(start>limit) {

	2517 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

	2518 return 0;

	2519 }

	2520 oldLength=us->length();

	2521 int32_t start32 = pinIndex(start, oldLength);

	2522 int32_t limit32 = pinIndex(limit, oldLength);

	2523 if (start32 < oldLength) {

	2524 start32 = us->getChar32Start(start32);

	2525 }

	2526 if (limit32 < oldLength) {

	2527 limit32 = us->getChar32Start(limit32);

	2528 }

	2529

	2530 // replace

	2531 us->replace(start32, limit32-start32, src, length);

	2532 int32_t newLength = us->length();

	2533

	2534 // Update the chunk description.

	2535 ut->chunkContents = us->getBuffer();

	2536 ut->chunkLength = newLength;

	2537 ut->chunkNativeLimit = newLength;

	2538 ut->nativeIndexingLimit = newLength;

	2539

	2540 // Set iteration position to the point just following the newly inserted tex t.

	2541 int32_t lengthDelta = newLength - oldLength;

	2542 ut->chunkOffset = limit32 + lengthDelta;

	2543

	2544 return lengthDelta;

	2545 }

	2546

	2547 static void U_CALLCONV

	2548 unistrTextCopy(UText *ut,

	2549 int64_t start, int64_t limit,

	2550 int64_t destIndex,

	2551 UBool move,

	2552 UErrorCode *pErrorCode) {

	2553 UnicodeString us=(UnicodeString )ut->context;

	2554 int32_t length=us->length();

	2555

	2556 if(U_FAILURE(*pErrorCode)) {

	2557 return;

	2558 }

	2559 int32_t start32 = pinIndex(start, length);

	2560 int32_t limit32 = pinIndex(limit, length);

	2561 int32_t destIndex32 = pinIndex(destIndex, length);

	2562

	2563 if( start32>limit32 \|\| (start32<destIndex32 && destIndex32<limit32)) {

	2564 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

	2565 return;

	2566 }

	2567

	2568 if(move) {

	2569 // move: copy to destIndex, then replace original with nothing

	2570 int32_t segLength=limit32-start32;

	2571 us->copy(start32, limit32, destIndex32);

	2572 if(destIndex32<start32) {

	2573 start32+=segLength;

	2574 }

	2575 us->replace(start32, segLength, NULL, 0);

	2576 } else {

	2577 // copy

	2578 us->copy(start32, limit32, destIndex32);

	2579 }

	2580

	2581 // update chunk description, set iteration position.

	2582 ut->chunkContents = us->getBuffer();

	2583 if (move==FALSE) {

	2584 // copy operation, string length grows

	2585 ut->chunkLength += limit32-start32;

	2586 ut->chunkNativeLimit = ut->chunkLength;

	2587 ut->nativeIndexingLimit = ut->chunkLength;

	2588 }

	2589

	2590 // Iteration position to end of the newly inserted text.

	2591 ut->chunkOffset = destIndex32+limit32-start32;

	2592 if (move && destIndex32>start32) {

	2593 ut->chunkOffset = destIndex32;

	2594 }

	2595

	2596 }

	2597

	2598 static const struct UTextFuncs unistrFuncs =

	2599 {

	2600 sizeof(UTextFuncs),

	2601 0, 0, 0, // Reserved alignment padding

	2602 unistrTextClone,

	2603 unistrTextLength,

	2604 unistrTextAccess,

	2605 unistrTextExtract,

	2606 unistrTextReplace,

	2607 unistrTextCopy,

	2608 NULL, // MapOffsetToNative,

	2609 NULL, // MapIndexToUTF16,

	2610 unistrTextClose,

	2611 NULL, // spare 1

	2612 NULL, // spare 2

	2613 NULL // spare 3

	2614 };

	2615

	2616

	2617

	2618 U_CDECL_END

	2619

	2620

	2621 U_CAPI UText * U_EXPORT2

	2622 utext_openUnicodeString(UText ut, UnicodeString s, UErrorCode *status) {

	2623 // TODO: use openConstUnicodeString, then add in the differences.

	2624 //

	2625 ut = utext_setup(ut, 0, status);

	2626 if (U_SUCCESS(*status)) {

	2627 ut->pFuncs = &unistrFuncs;

	2628 ut->context = s;

	2629 ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS)\|

	2630 I32_FLAG(UTEXT_PROVIDER_WRITABLE);

	2631

	2632 ut->chunkContents = s->getBuffer();

	2633 ut->chunkLength = s->length();

	2634 ut->chunkNativeStart = 0;

	2635 ut->chunkNativeLimit = ut->chunkLength;

	2636 ut->nativeIndexingLimit = ut->chunkLength;

	2637 }

	2638 return ut;

	2639 }

	2640

	2641

	2642

	2643 U_CAPI UText * U_EXPORT2

	2644 utext_openConstUnicodeString(UText ut, const UnicodeString s, UErrorCode *stat us) {

	2645 ut = utext_setup(ut, 0, status);

	2646 // note: use the standard (writable) function table for UnicodeString.

	2647 // The flag settings disable writing, so having the functions in

	2648 // the table is harmless.

	2649 if (U_SUCCESS(*status)) {

	2650 ut->pFuncs = &unistrFuncs;

	2651 ut->context = s;

	2652 ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);

	2653 ut->chunkContents = s->getBuffer();

	2654 ut->chunkLength = s->length();

	2655 ut->chunkNativeStart = 0;

	2656 ut->chunkNativeLimit = ut->chunkLength;

	2657 ut->nativeIndexingLimit = ut->chunkLength;

	2658 }

	2659 return ut;

	2660 }

	2661

	2662 //------------------------------------------------------------------------------

	2663 //

	2664 // UText implementation for const UChar * strings

	2665 //

	2666 // Use of UText data members:

	2667 // context pointer to UnicodeString

	2668 // a length. -1 if not yet known.

	2669 //

	2670 // TODO: support 64 bit lengths.

	2671 //

	2672 //------------------------------------------------------------------------------

	2673

	2674 U_CDECL_BEGIN

	2675

	2676

	2677 static UText * U_CALLCONV

	2678 ucstrTextClone(UText dest, const UText src, UBool deep, UErrorCode * status) {

	2679 // First do a generic shallow clone.

	2680 dest = shallowTextClone(dest, src, status);

	2681

	2682 // For deep clones, make a copy of the string.

	2683 // The copied storage is owned by the newly created clone.

	2684 // A non-NULL pointer in UText.p is the signal to the close() function to d elete

	2685 // it.

	2686 //

	2687 if (deep && U_SUCCESS(*status)) {

	2688 U_ASSERT(utext_nativeLength(dest) < INT32_MAX);

	2689 int32_t len = (int32_t)utext_nativeLength(dest);

	2690

	2691 // The cloned string IS going to be NUL terminated, whether or not the o riginal was.

	2692 const UChar srcStr = (const UChar )src->context;

	2693 UChar copyStr = (UChar )uprv_malloc((len+1) * sizeof(UChar));

	2694 if (copyStr == NULL) {

	2695 *status = U_MEMORY_ALLOCATION_ERROR;

	2696 } else {

	2697 int64_t i;

	2698 for (i=0; i<len; i++) {

	2699 copyStr[i] = srcStr[i];

	2700 }

	2701 copyStr[len] = 0;

	2702 dest->context = copyStr;

	2703 dest->providerProperties \|= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);

	2704 }

	2705 }

	2706 return dest;

	2707 }

	2708

	2709

	2710 static void U_CALLCONV

	2711 ucstrTextClose(UText *ut) {

	2712 // Most of the work of close is done by the generic UText framework close.

	2713 // All that needs to be done here is delete the string if the UText

	2714 // owns it. This occurs if the UText was created by cloning.

	2715 if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {

	2716 UChar s = (UChar )ut->context;

	2717 uprv_free(s);

	2718 ut->context = NULL;

	2719 }

	2720 }

	2721

	2722

	2723

	2724 static int64_t U_CALLCONV

	2725 ucstrTextLength(UText *ut) {

	2726 if (ut->a < 0) {

	2727 // null terminated, we don't yet know the length. Scan for it.

	2728 // Access is not convenient for doing this

	2729 // because the current interation postion can't be changed.

	2730 const UChar str = (const UChar )ut->context;

	2731 for (;;) {

	2732 if (str[ut->chunkNativeLimit] == 0) {

	2733 break;

	2734 }

	2735 ut->chunkNativeLimit++;

	2736 }

	2737 ut->a = ut->chunkNativeLimit;

	2738 ut->chunkLength = (int32_t)ut->chunkNativeLimit;

	2739 ut->nativeIndexingLimit = ut->chunkLength;

	2740 ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);

	2741 }

	2742 return ut->a;

	2743 }

	2744

	2745

	2746 static UBool U_CALLCONV

	2747 ucstrTextAccess(UText *ut, int64_t index, UBool forward) {

	2748 const UChar str = (const UChar )ut->context;

	2749

	2750 // pin the requested index to the bounds of the string,

	2751 // and set current iteration position.

	2752 if (index<0) {

	2753 index = 0;

	2754 } else if (index < ut->chunkNativeLimit) {

	2755 // The request data is within the chunk as it is known so far.

	2756 // Put index on a code point boundary.

	2757 U16_SET_CP_START(str, 0, index);

	2758 } else if (ut->a >= 0) {

	2759 // We know the length of this string, and the user is requesting somethi ng

	2760 // at or beyond the length. Pin the requested index to the length.

	2761 index = ut->a;

	2762 } else {

	2763 // Null terminated string, length not yet known, and the requested index

	2764 // is beyond where we have scanned so far.

	2765 // Scan to 32 UChars beyond the requested index. The strategy here is

	2766 // to avoid fully scanning a long string when the caller only wants to

	2767 // see a few characters at its beginning.

	2768 int32_t scanLimit = (int32_t)index + 32;

	2769 if ((index + 32)>INT32_MAX \|\| (index + 32)<0 ) { // note: int64 expres sion

	2770 scanLimit = INT32_MAX;

	2771 }

	2772

	2773 int32_t chunkLimit = (int32_t)ut->chunkNativeLimit;

	2774 for (; chunkLimit<scanLimit; chunkLimit++) {

	2775 if (str[chunkLimit] == 0) {

	2776 // We found the end of the string. Remember it, pin the request ed index to it,

	2777 // and bail out of here.

	2778 ut->a = chunkLimit;

	2779 ut->chunkLength = chunkLimit;

	2780 ut->nativeIndexingLimit = chunkLimit;

	2781 if (index >= chunkLimit) {

	2782 index = chunkLimit;

	2783 } else {

	2784 U16_SET_CP_START(str, 0, index);

	2785 }

	2786

	2787 ut->chunkNativeLimit = chunkLimit;

	2788 ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXP ENSIVE);

	2789 goto breakout;

	2790 }

	2791 }

	2792 // We scanned through the next batch of UChars without finding the end.

	2793 U16_SET_CP_START(str, 0, index);

	2794 if (chunkLimit == INT32_MAX) {

	2795 // Scanned to the limit of a 32 bit length.

	2796 // Forceably trim the overlength string back so length fits in int32

	2797 // TODO: add support for 64 bit strings.

	2798 ut->a = chunkLimit;

	2799 ut->chunkLength = chunkLimit;

	2800 ut->nativeIndexingLimit = chunkLimit;

	2801 if (index > chunkLimit) {

	2802 index = chunkLimit;

	2803 }

	2804 ut->chunkNativeLimit = chunkLimit;

	2805 ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSI VE);

	2806 } else {

	2807 // The endpoint of a chunk must not be left in the middle of a surro gate pair.

	2808 // If the current end is on a lead surrogate, back the end up by one .

	2809 // It doesn't matter if the end char happens to be an unpaired surro gate,

	2810 // and it's simpler not to worry about it.

	2811 if (U16_IS_LEAD(str[chunkLimit-1])) {

	2812 --chunkLimit;

	2813 }

	2814 // Null-terminated chunk with end still unknown.

	2815 // Update the chunk length to reflect what has been scanned thus far .

	2816 // That the full length is still unknown is (still) flagged by

	2817 // ut->a being < 0.

	2818 ut->chunkNativeLimit = chunkLimit;

	2819 ut->nativeIndexingLimit = chunkLimit;

	2820 ut->chunkLength = chunkLimit;

	2821 }

	2822

	2823 }

	2824 breakout:

	2825 U_ASSERT(index<=INT32_MAX);

	2826 ut->chunkOffset = (int32_t)index;

	2827

	2828 // Check whether request is at the start or end

	2829 UBool retVal = (forward && index<ut->chunkNativeLimit) \|\| (!forward && index >0);

	2830 return retVal;

	2831 }

	2832

	2833

	2834

	2835 static int32_t U_CALLCONV

	2836 ucstrTextExtract(UText *ut,

	2837 int64_t start, int64_t limit,

	2838 UChar *dest, int32_t destCapacity,

	2839 UErrorCode *pErrorCode)

	2840 {

	2841 if(U_FAILURE(*pErrorCode)) {

	2842 return 0;

	2843 }

	2844 if(destCapacity<0 \|\| (dest==NULL && destCapacity>0) \|\| start>limit) {

	2845 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	2846 return 0;

	2847 }

	2848

	2849 const UChar s=(const UChar )ut->context;

	2850 int32_t si, di;

	2851

	2852 int32_t start32;

	2853 int32_t limit32;

	2854

	2855 // Access the start. Does two things we need:

	2856 // Pins 'start' to the length of the string, if it came in out-of-bounds.

	2857 // Snaps 'start' to the beginning of a code point.

	2858 ucstrTextAccess(ut, start, TRUE);

	2859 U_ASSERT(start <= INT32_MAX);

	2860 start32 = (int32_t)start;

	2861

	2862 int32_t strLength=(int32_t)ut->a;

	2863 if (strLength >= 0) {

	2864 limit32 = pinIndex(limit, strLength);

	2865 } else {

	2866 limit32 = pinIndex(limit, INT32_MAX);

	2867 }

	2868

	2869 di = 0;

	2870 for (si=start32; si<limit32; si++) {

	2871 if (strLength<0 && s[si]==0) {

	2872 // Just hit the end of a null-terminated string.

	2873 ut->a = si; // set string length for this UText

	2874 ut->chunkNativeLimit = si;

	2875 ut->chunkLength = si;

	2876 ut->nativeIndexingLimit = si;

	2877 strLength = si;

	2878 break;

	2879 }

	2880 if (di<destCapacity) {

	2881 // only store if there is space.

	2882 dest[di] = s[si];

	2883 } else {

	2884 if (strLength>=0) {

	2885 // We have filled the destination buffer, and the string length is known.

	2886 // Cut the loop short. There is no need to scan string termina tion.

	2887 di = limit32 - start32;

	2888 si = limit32;

	2889 break;

	2890 }

	2891 }

	2892 di++;

	2893 }

	2894

	2895 // If the limit index points to a lead surrogate of a pair,

	2896 // add the corresponding trail surrogate to the destination.

	2897 if (si>0 && U16_IS_LEAD(s[si-1]) &&

	2898 ((si<strLength \|\| strLength<0) && U16_IS_TRAIL(s[si])))

	2899 {

	2900 if (di<destCapacity) {

	2901 // store only if there is space in the output buffer.

	2902 dest[di++] = s[si++];

	2903 }

	2904 }

	2905

	2906 // Put iteration position at the point just following the extracted text

	2907 ut->chunkOffset = uprv_min(strLength, start32 + destCapacity);

	2908

	2909 // Add a terminating NUL if space in the buffer permits,

	2910 // and set the error status as required.

	2911 u_terminateUChars(dest, destCapacity, di, pErrorCode);

	2912 return di;

	2913 }

	2914

	2915 static const struct UTextFuncs ucstrFuncs =

	2916 {

	2917 sizeof(UTextFuncs),

	2918 0, 0, 0, // Reserved alignment padding

	2919 ucstrTextClone,

	2920 ucstrTextLength,

	2921 ucstrTextAccess,

	2922 ucstrTextExtract,

	2923 NULL, // Replace

	2924 NULL, // Copy

	2925 NULL, // MapOffsetToNative,

	2926 NULL, // MapIndexToUTF16,

	2927 ucstrTextClose,

	2928 NULL, // spare 1

	2929 NULL, // spare 2

	2930 NULL, // spare 3

	2931 };

	2932

	2933 U_CDECL_END

	2934

	2935 static const UChar gEmptyUString[] = {0};

	2936

	2937 U_CAPI UText * U_EXPORT2

	2938 utext_openUChars(UText ut, const UChar s, int64_t length, UErrorCode *status) {

	2939 if (U_FAILURE(*status)) {

	2940 return NULL;

	2941 }

	2942 if(s==NULL && length==0) {

	2943 s = gEmptyUString;

	2944 }

	2945 if (s==NULL \|\| length < -1 \|\| length>INT32_MAX) {

	2946 *status = U_ILLEGAL_ARGUMENT_ERROR;

	2947 return NULL;

	2948 }

	2949 ut = utext_setup(ut, 0, status);

	2950 if (U_SUCCESS(*status)) {

	2951 ut->pFuncs = &ucstrFuncs;

	2952 ut->context = s;

	2953 ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);

	2954 if (length==-1) {

	2955 ut->providerProperties \|= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIV E);

	2956 }

	2957 ut->a = length;

	2958 ut->chunkContents = s;

	2959 ut->chunkNativeStart = 0;

	2960 ut->chunkNativeLimit = length>=0? length : 0;

	2961 ut->chunkLength = (int32_t)ut->chunkNativeLimit;

	2962 ut->chunkOffset = 0;

	2963 ut->nativeIndexingLimit = ut->chunkLength;

	2964 }

	2965 return ut;

	2966 }

	2967

	2968

	2969 //------------------------------------------------------------------------------

	2970 //

	2971 // UText implementation for text from ICU CharacterIterators

	2972 //

	2973 // Use of UText data members:

	2974 // context pointer to the CharacterIterator

	2975 // a length of the full text.

	2976 // p pointer to buffer 1

	2977 // b start index of local buffer 1 contents

	2978 // q pointer to buffer 2

	2979 // c start index of local buffer 2 contents

	2980 // r pointer to the character iterator if the UText owns it.

	2981 // Null otherwise.

	2982 //

	2983 //------------------------------------------------------------------------------

	2984 #define CIBufSize 16

	2985

	2986 U_CDECL_BEGIN

	2987 static void U_CALLCONV

	2988 charIterTextClose(UText *ut) {

	2989 // Most of the work of close is done by the generic UText framework close.

	2990 // All that needs to be done here is delete the CharacterIterator if the UTe xt

	2991 // owns it. This occurs if the UText was created by cloning.

	2992 CharacterIterator ci = (CharacterIterator )ut->r;

	2993 delete ci;

	2994 ut->r = NULL;

	2995 }

	2996

	2997 static int64_t U_CALLCONV

	2998 charIterTextLength(UText *ut) {

	2999 return (int32_t)ut->a;

	3000 }

	3001

	3002 static UBool U_CALLCONV

	3003 charIterTextAccess(UText *ut, int64_t index, UBool forward) {

	3004 CharacterIterator ci = (CharacterIterator )ut->context;

	3005

	3006 int32_t clippedIndex = (int32_t)index;

	3007 if (clippedIndex<0) {

	3008 clippedIndex=0;

	3009 } else if (clippedIndex>=ut->a) {

	3010 clippedIndex=(int32_t)ut->a;

	3011 }

	3012 int32_t neededIndex = clippedIndex;

	3013 if (!forward && neededIndex>0) {

	3014 // reverse iteration, want the position just before what was asked for.

	3015 neededIndex--;

	3016 } else if (forward && neededIndex==ut->a && neededIndex>0) {

	3017 // Forward iteration, don't ask for something past the end of the text.

	3018 neededIndex--;

	3019 }

	3020

	3021 // Find the native index of the start of the buffer containing what we want.

	3022 neededIndex -= neededIndex % CIBufSize;

	3023

	3024 UChar *buf = NULL;

	3025 UBool needChunkSetup = TRUE;

	3026 int i;

	3027 if (ut->chunkNativeStart == neededIndex) {

	3028 // The buffer we want is already the current chunk.

	3029 needChunkSetup = FALSE;

	3030 } else if (ut->b == neededIndex) {

	3031 // The first buffer (buffer p) has what we need.

	3032 buf = (UChar *)ut->p;

	3033 } else if (ut->c == neededIndex) {

	3034 // The second buffer (buffer q) has what we need.

	3035 buf = (UChar *)ut->q;

	3036 } else {

	3037 // Neither buffer already has what we need.

	3038 // Load new data from the character iterator.

	3039 // Use the buf that is not the current buffer.

	3040 buf = (UChar *)ut->p;

	3041 if (ut->p == ut->chunkContents) {

	3042 buf = (UChar *)ut->q;

	3043 }

	3044 ci->setIndex(neededIndex);

	3045 for (i=0; i<CIBufSize; i++) {

	3046 buf[i] = ci->nextPostInc();

	3047 if (i+neededIndex > ut->a) {

	3048 break;

	3049 }

	3050 }

	3051 }

	3052

	3053 // We have a buffer with the data we need.

	3054 // Set it up as the current chunk, if it wasn't already.

	3055 if (needChunkSetup) {

	3056 ut->chunkContents = buf;

	3057 ut->chunkLength = CIBufSize;

	3058 ut->chunkNativeStart = neededIndex;

	3059 ut->chunkNativeLimit = neededIndex + CIBufSize;

	3060 if (ut->chunkNativeLimit > ut->a) {

	3061 ut->chunkNativeLimit = ut->a;

	3062 ut->chunkLength = (int32_t)(ut->chunkNativeLimit)-(int32_t)(ut->chu nkNativeStart);

	3063 }

	3064 ut->nativeIndexingLimit = ut->chunkLength;

	3065 U_ASSERT(ut->chunkOffset>=0 && ut->chunkOffset<=CIBufSize);

	3066 }

	3067 ut->chunkOffset = clippedIndex - (int32_t)ut->chunkNativeStart;

	3068 UBool success = (forward? ut->chunkOffset<ut->chunkLength : ut->chunkOffset> 0);

	3069 return success;

	3070 }

	3071

	3072 static UText * U_CALLCONV

	3073 charIterTextClone(UText dest, const UText src, UBool deep, UErrorCode * status ) {

	3074 if (U_FAILURE(*status)) {

	3075 return NULL;

	3076 }

	3077

	3078 if (deep) {

	3079 // There is no CharacterIterator API for cloning the underlying text sto rage.

	3080 *status = U_UNSUPPORTED_ERROR;

	3081 return NULL;

	3082 } else {

	3083 CharacterIterator srcCI =(CharacterIterator )src->context;

	3084 srcCI = srcCI->clone();

	3085 dest = utext_openCharacterIterator(dest, srcCI, status);

	3086 // cast off const on getNativeIndex.

	3087 // For CharacterIterator based UTexts, this is safe, the operation is const.

	3088 int64_t ix = utext_getNativeIndex((UText *)src);

	3089 utext_setNativeIndex(dest, ix);

	3090 dest->r = srcCI; // flags that this UText owns the CharacterIterator

	3091 }

	3092 return dest;

	3093 }

	3094

	3095 static int32_t U_CALLCONV

	3096 charIterTextExtract(UText *ut,

	3097 int64_t start, int64_t limit,

	3098 UChar *dest, int32_t destCapacity,

	3099 UErrorCode *status)

	3100 {

	3101 if(U_FAILURE(*status)) {

	3102 return 0;

	3103 }

	3104 if(destCapacity<0 \|\| (dest==NULL && destCapacity>0) \|\| start>limit) {

	3105 *status=U_ILLEGAL_ARGUMENT_ERROR;

	3106 return 0;

	3107 }

	3108 int32_t length = (int32_t)ut->a;

	3109 int32_t start32 = pinIndex(start, length);

	3110 int32_t limit32 = pinIndex(limit, length);

	3111 int32_t desti = 0;

	3112 int32_t srci;

	3113 int32_t copyLimit;

	3114

	3115 CharacterIterator ci = (CharacterIterator )ut->context;

	3116 ci->setIndex32(start32); // Moves ix to lead of surrogate pair, if needed.

	3117 srci = ci->getIndex();

	3118 copyLimit = srci;

	3119 while (srci<limit32) {

	3120 UChar32 c = ci->next32PostInc();

	3121 int32_t len = U16_LENGTH(c);

	3122 if (desti+len <= destCapacity) {

	3123 U16_APPEND_UNSAFE(dest, desti, c);

	3124 copyLimit = srci+len;

	3125 } else {

	3126 desti += len;

	3127 *status = U_BUFFER_OVERFLOW_ERROR;

	3128 }

	3129 srci += len;

	3130 }

	3131

	3132 charIterTextAccess(ut, copyLimit, TRUE);

	3133

	3134 u_terminateUChars(dest, destCapacity, desti, status);

	3135 return desti;

	3136 }

	3137

	3138 static const struct UTextFuncs charIterFuncs =

	3139 {

	3140 sizeof(UTextFuncs),

	3141 0, 0, 0, // Reserved alignment padding

	3142 charIterTextClone,

	3143 charIterTextLength,

	3144 charIterTextAccess,

	3145 charIterTextExtract,

	3146 NULL, // Replace

	3147 NULL, // Copy

	3148 NULL, // MapOffsetToNative,

	3149 NULL, // MapIndexToUTF16,

	3150 charIterTextClose,

	3151 NULL, // spare 1

	3152 NULL, // spare 2

	3153 NULL // spare 3

	3154 };

	3155 U_CDECL_END

	3156

	3157

	3158 U_CAPI UText * U_EXPORT2

	3159 utext_openCharacterIterator(UText ut, CharacterIterator ci, UErrorCode *status ) {

	3160 if (U_FAILURE(*status)) {

	3161 return NULL;

	3162 }

	3163

	3164 if (ci->startIndex() > 0) {

	3165 // No support for CharacterIterators that do not start indexing from zer o.

	3166 *status = U_UNSUPPORTED_ERROR;

	3167 return NULL;

	3168 }

	3169

	3170 // Extra space in UText for 2 buffers of CIBufSize UChars each.

	3171 int32_t extraSpace = 2 * CIBufSize * sizeof(UChar);

	3172 ut = utext_setup(ut, extraSpace, status);

	3173 if (U_SUCCESS(*status)) {

	3174 ut->pFuncs = &charIterFuncs;

	3175 ut->context = ci;

	3176 ut->providerProperties = 0;

	3177 ut->a = ci->endIndex(); // Length of text

	3178 ut->p = ut->pExtra; // First buffer

	3179 ut->b = -1; // Native index of fir st buffer contents

	3180 ut->q = (UChar*)ut->pExtra+CIBufSize; // Second buff er

	3181 ut->c = -1; // Native index of sec ond buffer contents

	3182

	3183 // Initialize current chunk contents to be empty.

	3184 // First access will fault something in.

	3185 // Note: The initial nativeStart and chunkOffset must sum to zero

	3186 // so that getNativeIndex() will correctly compute to zero

	3187 // if no call to Access() has ever been made. They can't be bo th

	3188 // zero without Access() thinking that the chunk is valid.

	3189 ut->chunkContents = (UChar *)ut->p;

	3190 ut->chunkNativeStart = -1;

	3191 ut->chunkOffset = 1;

	3192 ut->chunkNativeLimit = 0;

	3193 ut->chunkLength = 0;

	3194 ut->nativeIndexingLimit = ut->chunkOffset; // enables native indexing

	3195 }

	3196 return ut;

	3197 }

	3198

	3199

	3200

OLD	NEW

« no previous file with comments | « icu46/source/common/ustrtrns.c ('k') | icu46/source/common/utf_impl.c » ('j') | no next file with comments »