icu46/source/common/utext.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Unified Diff: icu46/source/common/utext.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu46/source/common/utext.cpp

===================================================================

--- icu46/source/common/utext.cpp (revision 0)

+++ icu46/source/common/utext.cpp (revision 0)

@@ -0,0 +1,3200 @@

+/*

+*******************************************************************************

+* file name: utext.cpp

+* encoding: US-ASCII

+* tab size: 8 (not used)

+* indentation:4

+* created on: 2005apr12

+* created by: Markus W. Scherer

+*/

+#include "unicode/utypes.h"

+#include "unicode/ustring.h"

+#include "unicode/unistr.h"

+#include "unicode/chariter.h"

+#include "unicode/utext.h"

+#include "ustr_imp.h"

+#include "cmemory.h"

+#include "cstring.h"

+#include "uassert.h"

+#include "putilimp.h"

+U_NAMESPACE_USE

+#define I32_FLAG(bitIndex) ((int32_t)1<<(bitIndex))

+static UBool

+utext_access(UText *ut, int64_t index, UBool forward) {

+ return ut->pFuncs->access(ut, index, forward);

+U_CAPI UBool U_EXPORT2

+utext_moveIndex32(UText *ut, int32_t delta) {

+ UChar32 c;

+ if (delta > 0) {

+ do {

+ if(ut->chunkOffset>=ut->chunkLength && !utext_access(ut, ut->chunkNativeLimit, TRUE)) {

+ return FALSE;

+ }

+ c = ut->chunkContents[ut->chunkOffset];

+ if (U16_IS_SURROGATE(c)) {

+ c = utext_next32(ut);

+ if (c == U_SENTINEL) {

+ return FALSE;

+ }

+ } else {

+ ut->chunkOffset++;

+ }

+ } while(--delta>0);

+ } else if (delta<0) {

+ do {

+ if(ut->chunkOffset<=0 && !utext_access(ut, ut->chunkNativeStart, FALSE)) {

+ return FALSE;

+ }

+ c = ut->chunkContents[ut->chunkOffset-1];

+ if (U16_IS_SURROGATE(c)) {

+ c = utext_previous32(ut);

+ if (c == U_SENTINEL) {

+ return FALSE;

+ }

+ } else {

+ ut->chunkOffset--;

+ }

+ } while(++delta<0);

+ }

+ return TRUE;

+U_CAPI int64_t U_EXPORT2

+utext_nativeLength(UText *ut) {

+ return ut->pFuncs->nativeLength(ut);

+U_CAPI UBool U_EXPORT2

+utext_isLengthExpensive(const UText *ut) {

+ UBool r = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE)) != 0;

+ return r;

+U_CAPI int64_t U_EXPORT2

+utext_getNativeIndex(const UText *ut) {

+ if(ut->chunkOffset <= ut->nativeIndexingLimit) {

+ return ut->chunkNativeStart+ut->chunkOffset;

+ } else {

+ return ut->pFuncs->mapOffsetToNative(ut);

+ }

+U_CAPI void U_EXPORT2

+utext_setNativeIndex(UText *ut, int64_t index) {

+ if(index<ut->chunkNativeStart || index>=ut->chunkNativeLimit) {

+ // The desired position is outside of the current chunk.

+ // Access the new position. Assume a forward iteration from here,

+ // which will also be optimimum for a single random access.

+ // Reverse iterations may suffer slightly.

+ ut->pFuncs->access(ut, index, TRUE);

+ } else if((int32_t)(index - ut->chunkNativeStart) <= ut->nativeIndexingLimit) {

+ // utf-16 indexing.

+ ut->chunkOffset=(int32_t)(index-ut->chunkNativeStart);

+ } else {

+ ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);

+ }

+ // The convention is that the index must always be on a code point boundary.

+ // Adjust the index position if it is in the middle of a surrogate pair.

+ if (ut->chunkOffset<ut->chunkLength) {

+ UChar c= ut->chunkContents[ut->chunkOffset];

+ if (UTF16_IS_TRAIL(c)) {

+ if (ut->chunkOffset==0) {

+ ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE);

+ }

+ if (ut->chunkOffset>0) {

+ UChar lead = ut->chunkContents[ut->chunkOffset-1];

+ if (UTF16_IS_LEAD(lead)) {

+ ut->chunkOffset--;

+ }

+U_CAPI int64_t U_EXPORT2

+utext_getPreviousNativeIndex(UText *ut) {

+ //

+ // Fast-path the common case.

+ // Common means current position is not at the beginning of a chunk

+ // and the preceding character is not supplementary.

+ //

+ int32_t i = ut->chunkOffset - 1;

+ int64_t result;

+ if (i >= 0) {

+ UChar c = ut->chunkContents[i];

+ if (U16_IS_TRAIL(c) == FALSE) {

+ if (i <= ut->nativeIndexingLimit) {

+ result = ut->chunkNativeStart + i;

+ } else {

+ ut->chunkOffset = i;

+ result = ut->pFuncs->mapOffsetToNative(ut);

+ ut->chunkOffset++;

+ }

+ return result;

+ }

+ // If at the start of text, simply return 0.

+ if (ut->chunkOffset==0 && ut->chunkNativeStart==0) {

+ return 0;

+ }

+ // Harder, less common cases. We are at a chunk boundary, or on a surrogate.

+ // Keep it simple, use other functions to handle the edges.

+ //

+ utext_previous32(ut);

+ result = UTEXT_GETNATIVEINDEX(ut);

+ utext_next32(ut);

+ return result;

+//

+// utext_current32. Get the UChar32 at the current position.

+// UText iteration position is always on a code point boundary,

+// never on the trail half of a surrogate pair.

+//

+U_CAPI UChar32 U_EXPORT2

+utext_current32(UText *ut) {

+ UChar32 c;

+ if (ut->chunkOffset==ut->chunkLength) {

+ // Current position is just off the end of the chunk.

+ if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) {

+ // Off the end of the text.

+ return U_SENTINEL;

+ }

+ c = ut->chunkContents[ut->chunkOffset];

+ if (U16_IS_LEAD(c) == FALSE) {

+ // Normal, non-supplementary case.

+ return c;

+ }

+ //

+ // Possible supplementary char.

+ //

+ UChar32 trail = 0;

+ UChar32 supplementaryC = c;

+ if ((ut->chunkOffset+1) < ut->chunkLength) {

+ // The trail surrogate is in the same chunk.

+ trail = ut->chunkContents[ut->chunkOffset+1];

+ } else {

+ // The trail surrogate is in a different chunk.

+ // Because we must maintain the iteration position, we need to switch forward

+ // into the new chunk, get the trail surrogate, then revert the chunk back to the

+ // original one.

+ // An edge case to be careful of: the entire text may end with an unpaired

+ // leading surrogate. The attempt to access the trail will fail, but

+ // the original position before the unpaired lead still needs to be restored.

+ int64_t nativePosition = ut->chunkNativeLimit;

+ int32_t originalOffset = ut->chunkOffset;

+ if (ut->pFuncs->access(ut, nativePosition, TRUE)) {

+ trail = ut->chunkContents[ut->chunkOffset];

+ }

+ UBool r = ut->pFuncs->access(ut, nativePosition, FALSE); // reverse iteration flag loads preceding chunk

+ U_ASSERT(r==TRUE);

+ ut->chunkOffset = originalOffset;

+ if(!r) {

+ return U_SENTINEL;

+ }

+ if (U16_IS_TRAIL(trail)) {

+ supplementaryC = U16_GET_SUPPLEMENTARY(c, trail);

+ }

+ return supplementaryC;

+U_CAPI UChar32 U_EXPORT2

+utext_char32At(UText *ut, int64_t nativeIndex) {

+ UChar32 c = U_SENTINEL;

+ // Fast path the common case.

+ if (nativeIndex>=ut->chunkNativeStart && nativeIndex < ut->chunkNativeStart + ut->nativeIndexingLimit) {

+ ut->chunkOffset = (int32_t)(nativeIndex - ut->chunkNativeStart);

+ c = ut->chunkContents[ut->chunkOffset];

+ if (U16_IS_SURROGATE(c) == FALSE) {

+ return c;

+ }

+ utext_setNativeIndex(ut, nativeIndex);

+ if (nativeIndex>=ut->chunkNativeStart && ut->chunkOffset<ut->chunkLength) {

+ c = ut->chunkContents[ut->chunkOffset];

+ if (U16_IS_SURROGATE(c)) {

+ // For surrogates, let current32() deal with the complications

+ // of supplementaries that may span chunk boundaries.

+ c = utext_current32(ut);

+ }

+ return c;

+U_CAPI UChar32 U_EXPORT2

+utext_next32(UText *ut) {

+ UChar32 c;

+ if (ut->chunkOffset >= ut->chunkLength) {

+ if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) {

+ return U_SENTINEL;

+ }

+ c = ut->chunkContents[ut->chunkOffset++];

+ if (U16_IS_LEAD(c) == FALSE) {

+ // Normal case, not supplementary.

+ // (A trail surrogate seen here is just returned as is, as a surrogate value.

+ // It cannot be part of a pair.)

+ return c;

+ }

+ if (ut->chunkOffset >= ut->chunkLength) {

+ if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) {

+ // c is an unpaired lead surrogate at the end of the text.

+ // return it as it is.

+ return c;

+ }

+ UChar32 trail = ut->chunkContents[ut->chunkOffset];

+ if (U16_IS_TRAIL(trail) == FALSE) {

+ // c was an unpaired lead surrogate, not at the end of the text.

+ // return it as it is (unpaired). Iteration position is on the

+ // following character, possibly in the next chunk, where the

+ // trail surrogate would have been if it had existed.

+ return c;

+ }

+ UChar32 supplementary = U16_GET_SUPPLEMENTARY(c, trail);

+ ut->chunkOffset++; // move iteration position over the trail surrogate.

+ return supplementary;

+ }

+U_CAPI UChar32 U_EXPORT2

+utext_previous32(UText *ut) {

+ UChar32 c;

+ if (ut->chunkOffset <= 0) {

+ if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE) == FALSE) {

+ return U_SENTINEL;

+ }

+ ut->chunkOffset--;

+ c = ut->chunkContents[ut->chunkOffset];

+ if (U16_IS_TRAIL(c) == FALSE) {

+ // Normal case, not supplementary.

+ // (A lead surrogate seen here is just returned as is, as a surrogate value.

+ // It cannot be part of a pair.)

+ return c;

+ }

+ if (ut->chunkOffset <= 0) {

+ if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE) == FALSE) {

+ // c is an unpaired trail surrogate at the start of the text.

+ // return it as it is.

+ return c;

+ }

+ UChar32 lead = ut->chunkContents[ut->chunkOffset-1];

+ if (U16_IS_LEAD(lead) == FALSE) {

+ // c was an unpaired trail surrogate, not at the end of the text.

+ // return it as it is (unpaired). Iteration position is at c

+ return c;

+ }

+ UChar32 supplementary = U16_GET_SUPPLEMENTARY(lead, c);

+ ut->chunkOffset--; // move iteration position over the lead surrogate.

+ return supplementary;

+U_CAPI UChar32 U_EXPORT2

+utext_next32From(UText *ut, int64_t index) {

+ UChar32 c = U_SENTINEL;

+ if(index<ut->chunkNativeStart || index>=ut->chunkNativeLimit) {

+ // Desired position is outside of the current chunk.

+ if(!ut->pFuncs->access(ut, index, TRUE)) {

+ // no chunk available here

+ return U_SENTINEL;

+ }

+ } else if (index - ut->chunkNativeStart <= (int64_t)ut->nativeIndexingLimit) {

+ // Desired position is in chunk, with direct 1:1 native to UTF16 indexing

+ ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);

+ } else {

+ // Desired position is in chunk, with non-UTF16 indexing.

+ ut->chunkOffset = ut->pFuncs->mapNativeIndexToUTF16(ut, index);

+ }

+ c = ut->chunkContents[ut->chunkOffset++];

+ if (U16_IS_SURROGATE(c)) {

+ // Surrogates. Many edge cases. Use other functions that already

+ // deal with the problems.

+ utext_setNativeIndex(ut, index);

+ c = utext_next32(ut);

+ }

+ return c;

+U_CAPI UChar32 U_EXPORT2

+utext_previous32From(UText *ut, int64_t index) {

+ //

+ // Return the character preceding the specified index.

+ // Leave the iteration position at the start of the character that was returned.

+ //

+ UChar32 cPrev; // The character preceding cCurr, which is what we will return.

+ // Address the chunk containg the position preceding the incoming index

+ // A tricky edge case:

+ // We try to test the requested native index against the chunkNativeStart to determine

+ // whether the character preceding the one at the index is in the current chunk.

+ // BUT, this test can fail with UTF-8 (or any other multibyte encoding), when the

+ // requested index is on something other than the first position of the first char.

+ //

+ if(index<=ut->chunkNativeStart || index>ut->chunkNativeLimit) {

+ // Requested native index is outside of the current chunk.

+ if(!ut->pFuncs->access(ut, index, FALSE)) {

+ // no chunk available here

+ return U_SENTINEL;

+ }

+ } else if(index - ut->chunkNativeStart <= (int64_t)ut->nativeIndexingLimit) {

+ // Direct UTF-16 indexing.

+ ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);

+ } else {

+ ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);

+ if (ut->chunkOffset==0 && !ut->pFuncs->access(ut, index, FALSE)) {

+ // no chunk available here

+ return U_SENTINEL;

+ }

+ //

+ // Simple case with no surrogates.

+ //

+ ut->chunkOffset--;

+ cPrev = ut->chunkContents[ut->chunkOffset];

+ if (U16_IS_SURROGATE(cPrev)) {

+ // Possible supplementary. Many edge cases.

+ // Let other functions do the heavy lifting.

+ utext_setNativeIndex(ut, index);

+ cPrev = utext_previous32(ut);

+ }

+ return cPrev;

+U_CAPI int32_t U_EXPORT2

+utext_extract(UText *ut,

+ int64_t start, int64_t limit,

+ UChar *dest, int32_t destCapacity,

+ UErrorCode *status) {

+ return ut->pFuncs->extract(ut, start, limit, dest, destCapacity, status);

+ }

+U_CAPI UBool U_EXPORT2

+utext_equals(const UText *a, const UText *b) {

+ if (a==NULL || b==NULL ||

+ a->magic != UTEXT_MAGIC ||

+ b->magic != UTEXT_MAGIC) {

+ // Null or invalid arguments don't compare equal to anything.

+ return FALSE;

+ }

+ if (a->pFuncs != b->pFuncs) {

+ // Different types of text providers.

+ return FALSE;

+ }

+ if (a->context != b->context) {

+ // Different sources (different strings)

+ return FALSE;

+ }

+ if (utext_getNativeIndex(a) != utext_getNativeIndex(b)) {

+ // Different current position in the string.

+ return FALSE;

+ }

+ return TRUE;

+U_CAPI int32_t U_EXPORT2

+utext_compare(UText *s1, int32_t length1,

+ UText *s2, int32_t length2) {

+ UChar32 c1 = 0, c2 = 0;

+ if(length1<0 && length2<0) {

+ /* strcmp style, go until end of string */

+ for(;;) {

+ c1 = UTEXT_NEXT32(s1);

+ c2 = UTEXT_NEXT32(s2);

+ if(c1 != c2) {

+ break;

+ } else if(c1 == U_SENTINEL) {

+ return 0;

+ }

+ } else {

+ if(length1 < 0) {

+ length1 = INT32_MIN;

+ } else if (length2 < 0) {

+ length2 = INT32_MIN;

+ }

+ /* memcmp/UnicodeString style, both length-specified */

+ while((length1 > 0 || length1 == INT32_MIN) && (length2 > 0 || length2 == INT32_MIN)) {

+ c1 = UTEXT_NEXT32(s1);

+ c2 = UTEXT_NEXT32(s2);

+ if(c1 != c2) {

+ break;

+ } else if(c1 == U_SENTINEL) {

+ return 0;

+ }

+ if (length1 != INT32_MIN) {

+ length1 -= 1;

+ }

+ if (length2 != INT32_MIN) {

+ length2 -= 1;

+ }

+ if(length1 <= 0 && length1 != INT32_MIN) {

+ if(length2 <= 0) {

+ return 0;

+ } else {

+ return -1;

+ }

+ } else if(length2 <= 0 && length2 != INT32_MIN) {

+ if (length1 <= 0) {

+ return 0;

+ } else {

+ return 1;

+ }

+ return (int32_t)c1-(int32_t)c2;

+U_CAPI int32_t U_EXPORT2

+utext_compareNativeLimit(UText *s1, int64_t limit1,

+ UText *s2, int64_t limit2) {

+ UChar32 c1, c2;

+ if(limit1<0 && limit2<0) {

+ /* strcmp style, go until end of string */

+ for(;;) {

+ c1 = UTEXT_NEXT32(s1);

+ c2 = UTEXT_NEXT32(s2);

+ if(c1 != c2) {

+ return (int32_t)c1-(int32_t)c2;

+ } else if(c1 == U_SENTINEL) {

+ return 0;

+ }

+ } else {

+ /* memcmp/UnicodeString style, both length-specified */

+ int64_t index1 = (limit1 >= 0 ? UTEXT_GETNATIVEINDEX(s1) : 0);

+ int64_t index2 = (limit2 >= 0 ? UTEXT_GETNATIVEINDEX(s2) : 0);

+ while((limit1 < 0 || index1 < limit1) && (limit2 < 0 || index2 < limit2)) {

+ c1 = UTEXT_NEXT32(s1);

+ c2 = UTEXT_NEXT32(s2);

+ if(c1 != c2) {

+ return (int32_t)c1-(int32_t)c2;

+ } else if(c1 == U_SENTINEL) {

+ return 0;

+ }

+ if (limit1 >= 0) {

+ index1 = UTEXT_GETNATIVEINDEX(s1);

+ }

+ if (limit2 >= 0) {

+ index2 = UTEXT_GETNATIVEINDEX(s2);

+ }

+ if(limit1 >= 0 && index1 >= limit1) {

+ if(index2 >= limit2) {

+ return 0;

+ } else {

+ return -1;

+ }

+ } else {

+ if(index1 >= limit1) {

+ return 0;

+ } else {

+ return 1;

+ }

+U_CAPI int32_t U_EXPORT2

+utext_caseCompare(UText *s1, int32_t length1,

+ UText *s2, int32_t length2,

+ uint32_t options, UErrorCode *pErrorCode) {

+ const UCaseProps *csp;

+ /* case folding variables */

+ const UChar *p;

+ int32_t length;

+ /* case folding buffers, only use current-level start/limit */

+ UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];

+ int32_t foldOffset1, foldOffset2, foldLength1, foldLength2;

+ /* current code points */

+ UChar32 c1, c2;

+ uint8_t cLength1, cLength2;

+ /* argument checking */

+ if(U_FAILURE(*pErrorCode)) {

+ return 0;

+ }

+ if(s1==NULL || s2==NULL) {

+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ return 0;

+ }

+ csp=ucase_getSingleton();

+ /* for variable-length strings */

+ if(length1 < 0) {

+ length1 = INT32_MIN;

+ }

+ if (length2 < 0) {

+ length2 = INT32_MIN;

+ }

+ /* initialize */

+ foldOffset1 = foldOffset2 = foldLength1 = foldLength2 = 0;

+ /* comparison loop */

+ while((foldOffset1 < foldLength1 || length1 > 0 || length1 == INT32_MIN) &&

+ (foldOffset2 < foldLength2 || length2 > 0 || length2 == INT32_MIN)) {

+ if(foldOffset1 < foldLength1) {

+ U16_NEXT_UNSAFE(fold1, foldOffset1, c1);

+ cLength1 = 0;

+ } else {

+ c1 = UTEXT_NEXT32(s1);

+ if (c1 != U_SENTINEL) {

+ cLength1 = U16_LENGTH(c1);

+ length = ucase_toFullFolding(csp, c1, &p, options);

+ if(length >= 0) {

+ if(length <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not correctly handle 0-length folded-case strings

+ u_memcpy(fold1, p, length);

+ foldOffset1 = 0;

+ foldLength1 = length;

+ U16_NEXT_UNSAFE(fold1, foldOffset1, c1);

+ } else {

+ c1 = length;

+ }

+ if(length1 != INT32_MIN) {

+ length1 -= 1;

+ }

+ if(foldOffset2 < foldLength2) {

+ U16_NEXT_UNSAFE(fold2, foldOffset2, c2);

+ cLength2 = 0;

+ } else {

+ c2 = UTEXT_NEXT32(s2);

+ if (c2 != U_SENTINEL) {

+ cLength2 = U16_LENGTH(c2);

+ length = ucase_toFullFolding(csp, c2, &p, options);

+ if(length >= 0) {

+ if(length <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not correctly handle 0-length folded-case strings

+ u_memcpy(fold2, p, length);

+ foldOffset2 = 0;

+ foldLength2 = length;

+ U16_NEXT_UNSAFE(fold2, foldOffset2, c2);

+ } else {

+ c2 = length;

+ }

+ } else if(c1 == U_SENTINEL) {

+ return 0; // end of both strings at once

+ }

+ if(length2 != INT32_MIN) {

+ length2 -= 1;

+ }

+ if(c1 != c2) {

+ return (int32_t)c1-(int32_t)c2;

+ }

+ /* By now at least one of the strings is out of characters */

+ length1 += foldLength1 - foldOffset1;

+ length2 += foldLength2 - foldOffset2;

+ if(length1 <= 0 && length1 != INT32_MIN) {

+ if(length2 <= 0) {

+ return 0;

+ } else {

+ return -1;

+ }

+ } else {

+ if (length1 <= 0) {

+ return 0;

+ } else {

+ return 1;

+ }

+U_CAPI int32_t U_EXPORT2

+utext_caseCompareNativeLimit(UText *s1, int64_t limit1,

+ UText *s2, int64_t limit2,

+ uint32_t options, UErrorCode *pErrorCode) {

+ const UCaseProps *csp;

+ /* case folding variables */

+ const UChar *p;

+ int32_t length;

+ /* case folding buffers, only use current-level start/limit */

+ UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];

+ int32_t foldOffset1, foldOffset2, foldLength1, foldLength2;

+ /* current code points */

+ UChar32 c1, c2;

+ /* native indexes into s1 and s2 */

+ int64_t index1, index2;

+ /* argument checking */

+ if(U_FAILURE(*pErrorCode)) {

+ return 0;

+ }

+ if(s1==NULL || s2==NULL) {

+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ return 0;

+ }

+ csp=ucase_getSingleton();

+ /* initialize */

+ index1 = (limit1 >= 0 ? UTEXT_GETNATIVEINDEX(s1) : 0);

+ index2 = (limit2 >= 0 ? UTEXT_GETNATIVEINDEX(s2) : 0);

+ foldOffset1 = foldOffset2 = foldLength1 = foldLength2 = 0;

+ /* comparison loop */

+ while((foldOffset1 < foldLength1 || limit1 < 0 || index1 < limit1) &&

+ (foldOffset2 < foldLength2 || limit2 < 0 || index2 < limit2)) {

+ if(foldOffset1 < foldLength1) {

+ U16_NEXT_UNSAFE(fold1, foldOffset1, c1);

+ } else {

+ c1 = UTEXT_NEXT32(s1);

+ if (c1 != U_SENTINEL) {

+ length = ucase_toFullFolding(csp, c1, &p, options);

+ if(length >= 0) {

+ if(length <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not correctly handle 0-length folded-case strings

+ u_memcpy(fold1, p, length);

+ foldOffset1 = 0;

+ foldLength1 = length;

+ U16_NEXT_UNSAFE(fold1, foldOffset1, c1);

+ } else {

+ c1 = length;

+ }

+ if (limit1 >= 0) {

+ index1 = UTEXT_GETNATIVEINDEX(s1);

+ }

+ if(foldOffset2 < foldLength2) {

+ U16_NEXT_UNSAFE(fold2, foldOffset2, c2);

+ } else {

+ c2 = UTEXT_NEXT32(s2);

+ if (c2 != U_SENTINEL) {

+ length = ucase_toFullFolding(csp, c2, &p, options);

+ if(length >= 0) {

+ if(length <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not correctly handle 0-length folded-case strings

+ u_memcpy(fold2, p, length);

+ foldOffset2 = 0;

+ foldLength2 = length;

+ U16_NEXT_UNSAFE(fold2, foldOffset2, c2);

+ } else {

+ c2 = length;

+ }

+ } else if(c1 == U_SENTINEL) {

+ return 0;

+ }

+ if (limit2 >= 0) {

+ index2 = UTEXT_GETNATIVEINDEX(s2);

+ }

+ if(c1 != c2) {

+ return (int32_t)c1-(int32_t)c2;

+ }

+ /* By now at least one of the strings is out of characters */

+ index1 -= foldLength1 - foldOffset1;

+ index2 -= foldLength2 - foldOffset2;

+ if(limit1 >= 0 && index1 >= limit1) {

+ if(index2 >= limit2) {

+ return 0;

+ } else {

+ return -1;

+ }

+ } else {

+ if(index1 >= limit1) {

+ return 0;

+ } else {

+ return 1;

+ }

+U_CAPI UBool U_EXPORT2

+utext_isWritable(const UText *ut)

+ UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) != 0;

+ return b;

+U_CAPI void U_EXPORT2

+utext_freeze(UText *ut) {

+ // Zero out the WRITABLE flag.

+ ut->providerProperties &= ~(I32_FLAG(UTEXT_PROVIDER_WRITABLE));

+U_CAPI UBool U_EXPORT2

+utext_hasMetaData(const UText *ut)

+ UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA)) != 0;

+ return b;

+U_CAPI int32_t U_EXPORT2

+utext_replace(UText *ut,

+ int64_t nativeStart, int64_t nativeLimit,

+ const UChar *replacementText, int32_t replacementLength,

+ UErrorCode *status)

+ if (U_FAILURE(*status)) {

+ return 0;

+ }

+ if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) {

+ *status = U_NO_WRITE_PERMISSION;

+ return 0;

+ }

+ int32_t i = ut->pFuncs->replace(ut, nativeStart, nativeLimit, replacementText, replacementLength, status);

+ return i;

+U_CAPI void U_EXPORT2

+utext_copy(UText *ut,

+ int64_t nativeStart, int64_t nativeLimit,

+ int64_t destIndex,

+ UBool move,

+ UErrorCode *status)

+ if (U_FAILURE(*status)) {

+ return;

+ }

+ if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) {

+ *status = U_NO_WRITE_PERMISSION;

+ return;

+ }

+ ut->pFuncs->copy(ut, nativeStart, nativeLimit, destIndex, move, status);

+U_CAPI UText * U_EXPORT2

+utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status) {

+ UText *result;

+ result = src->pFuncs->clone(dest, src, deep, status);

+ if (readOnly) {

+ utext_freeze(result);

+ }

+ return result;

+//------------------------------------------------------------------------------

+//

+// UText common functions implementation

+//

+//------------------------------------------------------------------------------

+//

+// UText.flags bit definitions

+//

+enum {

+ UTEXT_HEAP_ALLOCATED = 1, // 1 if ICU has allocated this UText struct on the heap.

+ // 0 if caller provided storage for the UText.

+ UTEXT_EXTRA_HEAP_ALLOCATED = 2, // 1 if ICU has allocated extra storage as a separate

+ // heap block.

+ // 0 if there is no separate allocation. Either no extra

+ // storage was requested, or it is appended to the end

+ // of the main UText storage.

+ UTEXT_OPEN = 4 // 1 if this UText is currently open

+ // 0 if this UText is not open.

+};

+//

+// Extended form of a UText. The purpose is to aid in computing the total size required

+// when a provider asks for a UText to be allocated with extra storage.

+struct ExtendedUText {

+ UText ut;

+ UAlignedMemory extension;

+};

+static const UText emptyText = UTEXT_INITIALIZER;

+U_CAPI UText * U_EXPORT2

+utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status) {

+ if (U_FAILURE(*status)) {

+ return ut;

+ }

+ if (ut == NULL) {

+ // We need to heap-allocate storage for the new UText

+ int32_t spaceRequired = sizeof(UText);

+ if (extraSpace > 0) {

+ spaceRequired = sizeof(ExtendedUText) + extraSpace - sizeof(UAlignedMemory);

+ }

+ ut = (UText *)uprv_malloc(spaceRequired);

+ if (ut == NULL) {

+ *status = U_MEMORY_ALLOCATION_ERROR;

+ return NULL;

+ } else {

+ *ut = emptyText;

+ ut->flags |= UTEXT_HEAP_ALLOCATED;

+ if (spaceRequired>0) {

+ ut->extraSize = extraSpace;

+ ut->pExtra = &((ExtendedUText *)ut)->extension;

+ }

+ } else {

+ // We have been supplied with an already existing UText.

+ // Verify that it really appears to be a UText.

+ if (ut->magic != UTEXT_MAGIC) {

+ *status = U_ILLEGAL_ARGUMENT_ERROR;

+ return ut;

+ }

+ // If the ut is already open and there's a provider supplied close

+ // function, call it.

+ if ((ut->flags & UTEXT_OPEN) && ut->pFuncs->close != NULL) {

+ ut->pFuncs->close(ut);

+ }

+ ut->flags &= ~UTEXT_OPEN;

+ // If extra space was requested by our caller, check whether

+ // sufficient already exists, and allocate new if needed.

+ if (extraSpace > ut->extraSize) {

+ // Need more space. If there is existing separately allocated space,

+ // delete it first, then allocate new space.

+ if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) {

+ uprv_free(ut->pExtra);

+ ut->extraSize = 0;

+ }

+ ut->pExtra = uprv_malloc(extraSpace);

+ if (ut->pExtra == NULL) {

+ *status = U_MEMORY_ALLOCATION_ERROR;

+ } else {

+ ut->extraSize = extraSpace;

+ ut->flags |= UTEXT_EXTRA_HEAP_ALLOCATED;

+ }

+ if (U_SUCCESS(*status)) {

+ ut->flags |= UTEXT_OPEN;

+ // Initialize all remaining fields of the UText.

+ //

+ ut->context = NULL;

+ ut->chunkContents = NULL;

+ ut->p = NULL;

+ ut->q = NULL;

+ ut->r = NULL;

+ ut->a = 0;

+ ut->b = 0;

+ ut->c = 0;

+ ut->chunkOffset = 0;

+ ut->chunkLength = 0;

+ ut->chunkNativeStart = 0;

+ ut->chunkNativeLimit = 0;

+ ut->nativeIndexingLimit = 0;

+ ut->providerProperties = 0;

+ ut->privA = 0;

+ ut->privB = 0;

+ ut->privC = 0;

+ ut->privP = NULL;

+ if (ut->pExtra!=NULL && ut->extraSize>0)

+ uprv_memset(ut->pExtra, 0, ut->extraSize);

+ }

+ return ut;

+U_CAPI UText * U_EXPORT2

+utext_close(UText *ut) {

+ if (ut==NULL ||

+ ut->magic != UTEXT_MAGIC ||

+ (ut->flags & UTEXT_OPEN) == 0)

+ {

+ // The supplied ut is not an open UText.

+ // Do nothing.

+ return ut;

+ }

+ // If the provider gave us a close function, call it now.

+ // This will clean up anything allocated specifically by the provider.

+ if (ut->pFuncs->close != NULL) {

+ ut->pFuncs->close(ut);

+ }

+ ut->flags &= ~UTEXT_OPEN;

+ // If we (the framework) allocated the UText or subsidiary storage,

+ // delete it.

+ if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) {

+ uprv_free(ut->pExtra);

+ ut->pExtra = NULL;

+ ut->flags &= ~UTEXT_EXTRA_HEAP_ALLOCATED;

+ ut->extraSize = 0;

+ }

+ // Zero out function table of the closed UText. This is a defensive move,

+ // inteded to cause applications that inadvertantly use a closed

+ // utext to crash with null pointer errors.

+ ut->pFuncs = NULL;

+ if (ut->flags & UTEXT_HEAP_ALLOCATED) {

+ // This UText was allocated by UText setup. We need to free it.

+ // Clear magic, so we can detect if the user messes up and immediately

+ // tries to reopen another UText using the deleted storage.

+ ut->magic = 0;

+ uprv_free(ut);

+ ut = NULL;

+ }

+ return ut;

+//

+// invalidateChunk Reset a chunk to have no contents, so that the next call

+// to access will cause new data to load.

+// This is needed when copy/move/replace operate directly on the

+// backing text, potentially putting it out of sync with the

+// contents in the chunk.

+//

+static void

+invalidateChunk(UText *ut) {

+ ut->chunkLength = 0;

+ ut->chunkNativeLimit = 0;

+ ut->chunkNativeStart = 0;

+ ut->chunkOffset = 0;

+ ut->nativeIndexingLimit = 0;

+//

+// pinIndex Do range pinning on a native index parameter.

+// 64 bit pinning is done in place.

+// 32 bit truncated result is returned as a convenience for

+// use in providers that don't need 64 bits.

+static int32_t

+pinIndex(int64_t &index, int64_t limit) {

+ if (index<0) {

+ index = 0;

+ } else if (index > limit) {

+ index = limit;

+ }

+ return (int32_t)index;

+U_CDECL_BEGIN

+//

+// Pointer relocation function,

+// a utility used by shallow clone.

+// Adjust a pointer that refers to something within one UText (the source)

+// to refer to the same relative offset within a another UText (the target)

+//

+static void adjustPointer(UText *dest, const void **destPtr, const UText *src) {

+ // convert all pointers to (char *) so that byte address arithmetic will work.

+ char *dptr = (char *)*destPtr;

+ char *dUText = (char *)dest;

+ char *sUText = (char *)src;

+ if (dptr >= (char *)src->pExtra && dptr < ((char*)src->pExtra)+src->extraSize) {

+ // target ptr was to something within the src UText's pExtra storage.

+ // relocate it into the target UText's pExtra region.

+ *destPtr = ((char *)dest->pExtra) + (dptr - (char *)src->pExtra);

+ } else if (dptr>=sUText && dptr < sUText+src->sizeOfStruct) {

+ // target ptr was pointing to somewhere within the source UText itself.

+ // Move it to the same offset within the target UText.

+ *destPtr = dUText + (dptr-sUText);

+ }

+//

+// Clone. This is a generic copy-the-utext-by-value clone function that can be

+// used as-is with some utext types, and as a helper by other clones.

+//

+static UText * U_CALLCONV

+shallowTextClone(UText * dest, const UText * src, UErrorCode * status) {

+ if (U_FAILURE(*status)) {

+ return NULL;

+ }

+ int32_t srcExtraSize = src->extraSize;

+ //

+ // Use the generic text_setup to allocate storage if required.

+ //

+ dest = utext_setup(dest, srcExtraSize, status);

+ if (U_FAILURE(*status)) {

+ return dest;

+ }

+ //

+ // flags (how the UText was allocated) and the pointer to the

+ // extra storage must retain the values in the cloned utext that

+ // were set up by utext_setup. Save them separately before

+ // copying the whole struct.

+ //

+ void *destExtra = dest->pExtra;

+ int32_t flags = dest->flags;

+ //

+ // Copy the whole UText struct by value.

+ // Any "Extra" storage is copied also.

+ //

+ int sizeToCopy = src->sizeOfStruct;

+ if (sizeToCopy > dest->sizeOfStruct) {

+ sizeToCopy = dest->sizeOfStruct;

+ }

+ uprv_memcpy(dest, src, sizeToCopy);

+ dest->pExtra = destExtra;

+ dest->flags = flags;

+ if (srcExtraSize > 0) {

+ uprv_memcpy(dest->pExtra, src->pExtra, srcExtraSize);

+ }

+ //

+ // Relocate any pointers in the target that refer to the UText itself

+ // to point to the cloned copy rather than the original source.

+ //

+ adjustPointer(dest, &dest->context, src);

+ adjustPointer(dest, &dest->p, src);

+ adjustPointer(dest, &dest->q, src);

+ adjustPointer(dest, &dest->r, src);

+ adjustPointer(dest, (const void **)&dest->chunkContents, src);

+ return dest;

+U_CDECL_END

+//------------------------------------------------------------------------------

+//

+// UText implementation for UTF-8 char * strings (read-only)

+// Limitation: string length must be <= 0x7fffffff in length.

+// (length must for in an int32_t variable)

+//

+// Use of UText data members:

+// context pointer to UTF-8 string

+// utext.b is the input string length (bytes).

+// utext.c Length scanned so far in string

+// (for optimizing finding length of zero terminated strings.)

+// utext.p pointer to the current buffer

+// utext.q pointer to the other buffer.

+//

+//------------------------------------------------------------------------------

+// Chunk size.

+// Must be less than 85, because of byte mapping from UChar indexes to native indexes.

+// Worst case is three native bytes to one UChar. (Supplemenaries are 4 native bytes

+// to two UChars.)

+//

+enum { UTF8_TEXT_CHUNK_SIZE=32 };

+//

+// UTF8Buf Two of these structs will be set up in the UText's extra allocated space.

+// Each contains the UChar chunk buffer, the to and from native maps, and

+// header info.

+//

+// because backwards iteration fills the buffers starting at the end and

+// working towards the front, the filled part of the buffers may not begin

+// at the start of the available storage for the buffers.

+//

+// Buffer size is one bigger than the specified UTF8_TEXT_CHUNK_SIZE to allow for

+// the last character added being a supplementary, and thus requiring a surrogate

+// pair. Doing this is simpler than checking for the edge case.

+//

+struct UTF8Buf {

+ int32_t bufNativeStart; // Native index of first char in UChar buf

+ int32_t bufNativeLimit; // Native index following last char in buf.

+ int32_t bufStartIdx; // First filled position in buf.

+ int32_t bufLimitIdx; // Limit of filled range in buf.

+ int32_t bufNILimit; // Limit of native indexing part of buf

+ int32_t toUCharsMapStart; // Native index corresponding to

+ // mapToUChars[0].

+ // Set to bufNativeStart when filling forwards.

+ // Set to computed value when filling backwards.

+ UChar buf[UTF8_TEXT_CHUNK_SIZE+4]; // The UChar buffer. Requires one extra position beyond the

+ // the chunk size, to allow for surrogate at the end.

+ // Length must be identical to mapToNative array, below,

+ // because of the way indexing works when the array is

+ // filled backwards during a reverse iteration. Thus,

+ // the additional extra size.

+ uint8_t mapToNative[UTF8_TEXT_CHUNK_SIZE+4]; // map UChar index in buf to

+ // native offset from bufNativeStart.

+ // Requires two extra slots,

+ // one for a supplementary starting in the last normal position,

+ // and one for an entry for the buffer limit position.

+ uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to

+ // correspoding offset in filled part of buf.

+ int32_t align;

+};

+U_CDECL_BEGIN

+//

+// utf8TextLength

+//

+// Get the length of the string. If we don't already know it,

+// we'll need to scan for the trailing nul.

+//

+static int64_t U_CALLCONV

+utf8TextLength(UText *ut) {

+ if (ut->b < 0) {

+ // Zero terminated string, and we haven't scanned to the end yet.

+ // Scan it now.

+ const char *r = (const char *)ut->context + ut->c;

+ while (*r != 0) {

+ r++;

+ }

+ if ((r - (const char *)ut->context) < 0x7fffffff) {

+ ut->b = (int32_t)(r - (const char *)ut->context);

+ } else {

+ // Actual string was bigger (more than 2 gig) than we

+ // can handle. Clip it to 2 GB.

+ ut->b = 0x7fffffff;

+ }

+ ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);

+ }

+ return ut->b;

+static UBool U_CALLCONV

+utf8TextAccess(UText *ut, int64_t index, UBool forward) {

+ //

+ // Apologies to those who are allergic to goto statements.

+ // Consider each goto to a labelled block to be the equivalent of

+ // call the named block as if it were a function();

+ // return;

+ //

+ const uint8_t *s8=(const uint8_t *)ut->context;

+ UTF8Buf *u8b = NULL;

+ int32_t length = ut->b; // Length of original utf-8

+ int32_t ix= (int32_t)index; // Requested index, trimmed to 32 bits.

+ int32_t mapIndex = 0;

+ if (index<0) {

+ ix=0;

+ } else if (index > 0x7fffffff) {

+ // Strings with 64 bit lengths not supported by this UTF-8 provider.

+ ix = 0x7fffffff;

+ }

+ // Pin requested index to the string length.

+ if (ix>length) {

+ if (length>=0) {

+ ix=length;

+ } else if (ix>=ut->c) {

+ // Zero terminated string, and requested index is beyond

+ // the region that has already been scanned.

+ // Scan up to either the end of the string or to the

+ // requested position, whichever comes first.

+ while (ut->c<ix && s8[ut->c]!=0) {

+ ut->c++;

+ }

+ // TODO: support for null terminated string length > 32 bits.

+ if (s8[ut->c] == 0) {

+ // We just found the actual length of the string.

+ // Trim the requested index back to that.

+ ix = ut->c;

+ ut->b = ut->c;

+ length = ut->c;

+ ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);

+ }

+ //

+ // Dispatch to the appropriate action for a forward iteration request.

+ //

+ if (forward) {

+ if (ix==ut->chunkNativeLimit) {

+ // Check for normal sequential iteration cases first.

+ if (ix==length) {

+ // Just reached end of string

+ // Don't swap buffers, but do set the

+ // current buffer position.

+ ut->chunkOffset = ut->chunkLength;

+ return FALSE;

+ } else {

+ // End of current buffer.

+ // check whether other buffer already has what we need.

+ UTF8Buf *altB = (UTF8Buf *)ut->q;

+ if (ix>=altB->bufNativeStart && ix<altB->bufNativeLimit) {

+ goto swapBuffers;

+ }

+ // A random access. Desired index could be in either or niether buf.

+ // For optimizing the order of testing, first check for the index

+ // being in the other buffer. This will be the case for uses that

+ // move back and forth over a fairly limited range

+ {

+ u8b = (UTF8Buf *)ut->q; // the alternate buffer

+ if (ix>=u8b->bufNativeStart && ix<u8b->bufNativeLimit) {

+ // Requested index is in the other buffer.

+ goto swapBuffers;

+ }

+ if (ix == length) {

+ // Requested index is end-of-string.

+ // (this is the case of randomly seeking to the end.

+ // The case of iterating off the end is handled earlier.)

+ if (ix == ut->chunkNativeLimit) {

+ // Current buffer extends up to the end of the string.

+ // Leave it as the current buffer.

+ ut->chunkOffset = ut->chunkLength;

+ return FALSE;

+ }

+ if (ix == u8b->bufNativeLimit) {

+ // Alternate buffer extends to the end of string.

+ // Swap it in as the current buffer.

+ goto swapBuffersAndFail;

+ }

+ // Neither existing buffer extends to the end of the string.

+ goto makeStubBuffer;

+ }

+ if (ix<ut->chunkNativeStart || ix>=ut->chunkNativeLimit) {

+ // Requested index is in neither buffer.

+ goto fillForward;

+ }

+ // Requested index is in this buffer.

+ u8b = (UTF8Buf *)ut->p; // the current buffer

+ mapIndex = ix - u8b->toUCharsMapStart;

+ ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;

+ return TRUE;

+ }

+ //

+ // Dispatch to the appropriate action for a

+ // Backwards Diretion iteration request.

+ //

+ if (ix==ut->chunkNativeStart) {

+ // Check for normal sequential iteration cases first.

+ if (ix==0) {

+ // Just reached the start of string

+ // Don't swap buffers, but do set the

+ // current buffer position.

+ ut->chunkOffset = 0;

+ return FALSE;

+ } else {

+ // Start of current buffer.

+ // check whether other buffer already has what we need.

+ UTF8Buf *altB = (UTF8Buf *)ut->q;

+ if (ix>altB->bufNativeStart && ix<=altB->bufNativeLimit) {

+ goto swapBuffers;

+ }

+ // A random access. Desired index could be in either or niether buf.

+ // For optimizing the order of testing,

+ // Most likely case: in the other buffer.

+ // Second most likely: in neither buffer.

+ // Unlikely, but must work: in the current buffer.

+ u8b = (UTF8Buf *)ut->q; // the alternate buffer

+ if (ix>u8b->bufNativeStart && ix<=u8b->bufNativeLimit) {

+ // Requested index is in the other buffer.

+ goto swapBuffers;

+ }

+ // Requested index is start-of-string.

+ // (this is the case of randomly seeking to the start.

+ // The case of iterating off the start is handled earlier.)

+ if (ix==0) {

+ if (u8b->bufNativeStart==0) {

+ // Alternate buffer contains the data for the start string.

+ // Make it be the current buffer.

+ goto swapBuffersAndFail;

+ } else {

+ // Request for data before the start of string,

+ // neither buffer is usable.

+ // set up a zero-length buffer.

+ goto makeStubBuffer;

+ }

+ if (ix<=ut->chunkNativeStart || ix>ut->chunkNativeLimit) {

+ // Requested index is in neither buffer.

+ goto fillReverse;

+ }

+ // Requested index is in this buffer.

+ // Set the utf16 buffer index.

+ u8b = (UTF8Buf *)ut->p;

+ mapIndex = ix - u8b->toUCharsMapStart;

+ ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;

+ if (ut->chunkOffset==0) {

+ // This occurs when the first character in the text is

+ // a multi-byte UTF-8 char, and the requested index is to

+ // one of the trailing bytes. Because there is no preceding ,

+ // character, this access fails. We can't pick up on the

+ // situation sooner because the requested index is not zero.

+ return FALSE;

+ } else {

+ return TRUE;

+ }

+swapBuffers:

+ // The alternate buffer (ut->q) has the string data that was requested.

+ // Swap the primary and alternate buffers, and set the

+ // chunk index into the new primary buffer.

+ {

+ u8b = (UTF8Buf *)ut->q;

+ ut->q = ut->p;

+ ut->p = u8b;

+ ut->chunkContents = &u8b->buf[u8b->bufStartIdx];

+ ut->chunkLength = u8b->bufLimitIdx - u8b->bufStartIdx;

+ ut->chunkNativeStart = u8b->bufNativeStart;

+ ut->chunkNativeLimit = u8b->bufNativeLimit;

+ ut->nativeIndexingLimit = u8b->bufNILimit;

+ // Index into the (now current) chunk

+ // Use the map to set the chunk index. It's more trouble than it's worth

+ // to check whether native indexing can be used.

+ U_ASSERT(ix>=u8b->bufNativeStart);

+ U_ASSERT(ix<=u8b->bufNativeLimit);

+ mapIndex = ix - u8b->toUCharsMapStart;

+ U_ASSERT(mapIndex>=0);

+ U_ASSERT(mapIndex<(int32_t)sizeof(u8b->mapToUChars));

+ ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;

+ return TRUE;

+ }

+ swapBuffersAndFail:

+ // We got a request for either the start or end of the string,

+ // with iteration continuing in the out-of-bounds direction.

+ // The alternate buffer already contains the data up to the

+ // start/end.

+ // Swap the buffers, then return failure, indicating that we couldn't

+ // make things correct for continuing the iteration in the requested

+ // direction. The position & buffer are correct should the

+ // user decide to iterate in the opposite direction.

+ u8b = (UTF8Buf *)ut->q;

+ ut->q = ut->p;

+ ut->p = u8b;

+ ut->chunkContents = &u8b->buf[u8b->bufStartIdx];

+ ut->chunkLength = u8b->bufLimitIdx - u8b->bufStartIdx;

+ ut->chunkNativeStart = u8b->bufNativeStart;

+ ut->chunkNativeLimit = u8b->bufNativeLimit;

+ ut->nativeIndexingLimit = u8b->bufNILimit;

+ // Index into the (now current) chunk

+ // For this function (swapBuffersAndFail), the requested index

+ // will always be at either the start or end of the chunk.

+ if (ix==u8b->bufNativeLimit) {

+ ut->chunkOffset = ut->chunkLength;

+ } else {

+ ut->chunkOffset = 0;

+ U_ASSERT(ix == u8b->bufNativeStart);

+ }

+ return FALSE;

+makeStubBuffer:

+ // The user has done a seek/access past the start or end

+ // of the string. Rather than loading data that is likely

+ // to never be used, just set up a zero-length buffer at

+ // the position.

+ u8b = (UTF8Buf *)ut->q;

+ u8b->bufNativeStart = ix;

+ u8b->bufNativeLimit = ix;

+ u8b->bufStartIdx = 0;

+ u8b->bufLimitIdx = 0;

+ u8b->bufNILimit = 0;

+ u8b->toUCharsMapStart = ix;

+ u8b->mapToNative[0] = 0;

+ u8b->mapToUChars[0] = 0;

+ goto swapBuffersAndFail;

+fillForward:

+ {

+ // Move the incoming index to a code point boundary.

+ U8_SET_CP_START(s8, 0, ix);

+ // Swap the UText buffers.

+ // We want to fill what was previously the alternate buffer,

+ // and make what was the current buffer be the new alternate.

+ UTF8Buf *u8b = (UTF8Buf *)ut->q;

+ ut->q = ut->p;

+ ut->p = u8b;

+ int32_t strLen = ut->b;

+ UBool nulTerminated = FALSE;

+ if (strLen < 0) {

+ strLen = 0x7fffffff;

+ nulTerminated = TRUE;

+ }

+ UChar *buf = u8b->buf;

+ uint8_t *mapToNative = u8b->mapToNative;

+ uint8_t *mapToUChars = u8b->mapToUChars;

+ int32_t destIx = 0;

+ int32_t srcIx = ix;

+ UBool seenNonAscii = FALSE;

+ UChar32 c = 0;

+ // Fill the chunk buffer and mapping arrays.

+ while (destIx<UTF8_TEXT_CHUNK_SIZE) {

+ c = s8[srcIx];

+ if (c>0 && c<0x80) {

+ // Special case ASCII range for speed.

+ // zero is excluded to simplify bounds checking.

+ buf[destIx] = (UChar)c;

+ mapToNative[destIx] = (uint8_t)(srcIx - ix);

+ mapToUChars[srcIx-ix] = (uint8_t)destIx;

+ srcIx++;

+ destIx++;

+ } else {

+ // General case, handle everything.

+ if (seenNonAscii == FALSE) {

+ seenNonAscii = TRUE;

+ u8b->bufNILimit = destIx;

+ }

+ int32_t cIx = srcIx;

+ int32_t dIx = destIx;

+ int32_t dIxSaved = destIx;

+ U8_NEXT(s8, srcIx, strLen, c);

+ if (c==0 && nulTerminated) {

+ srcIx--;

+ break;

+ }

+ if (c<0) {

+ // Illegal UTF-8. Replace with sub character.

+ c = 0x0fffd;

+ }

+ U16_APPEND_UNSAFE(buf, destIx, c);

+ do {

+ mapToNative[dIx++] = (uint8_t)(cIx - ix);

+ } while (dIx < destIx);

+ do {

+ mapToUChars[cIx++ - ix] = (uint8_t)dIxSaved;

+ } while (cIx < srcIx);

+ }

+ if (srcIx>=strLen) {

+ break;

+ }

+ // store Native <--> Chunk Map entries for the end of the buffer.

+ // There is no actual character here, but the index position is valid.

+ mapToNative[destIx] = (uint8_t)(srcIx - ix);

+ mapToUChars[srcIx - ix] = (uint8_t)destIx;

+ // fill in Buffer descriptor

+ u8b->bufNativeStart = ix;

+ u8b->bufNativeLimit = srcIx;

+ u8b->bufStartIdx = 0;

+ u8b->bufLimitIdx = destIx;

+ if (seenNonAscii == FALSE) {

+ u8b->bufNILimit = destIx;

+ }

+ u8b->toUCharsMapStart = u8b->bufNativeStart;

+ // Set UText chunk to refer to this buffer.

+ ut->chunkContents = buf;

+ ut->chunkOffset = 0;

+ ut->chunkLength = u8b->bufLimitIdx;

+ ut->chunkNativeStart = u8b->bufNativeStart;

+ ut->chunkNativeLimit = u8b->bufNativeLimit;

+ ut->nativeIndexingLimit = u8b->bufNILimit;

+ // For zero terminated strings, keep track of the maximum point

+ // scanned so far.

+ if (nulTerminated && srcIx>ut->c) {

+ ut->c = srcIx;

+ if (c==0) {

+ // We scanned to the end.

+ // Remember the actual length.

+ ut->b = srcIx;

+ ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);

+ }

+ return TRUE;

+ }

+fillReverse:

+ {

+ // Move the incoming index to a code point boundary.

+ // Can only do this if the incoming index is somewhere in the interior of the string.

+ // If index is at the end, there is no character there to look at.

+ if (ix != ut->b) {

+ U8_SET_CP_START(s8, 0, ix);

+ }

+ // Swap the UText buffers.

+ // We want to fill what was previously the alternate buffer,

+ // and make what was the current buffer be the new alternate.

+ UTF8Buf *u8b = (UTF8Buf *)ut->q;

+ ut->q = ut->p;

+ ut->p = u8b;

+ UChar *buf = u8b->buf;

+ uint8_t *mapToNative = u8b->mapToNative;

+ uint8_t *mapToUChars = u8b->mapToUChars;

+ int32_t toUCharsMapStart = ix - (UTF8_TEXT_CHUNK_SIZE*3 + 1);

+ int32_t destIx = UTF8_TEXT_CHUNK_SIZE+2; // Start in the overflow region

+ // at end of buffer to leave room

+ // for a surrogate pair at the

+ // buffer start.

+ int32_t srcIx = ix;

+ int32_t bufNILimit = destIx;

+ UChar32 c;

+ // Map to/from Native Indexes, fill in for the position at the end of

+ // the buffer.

+ //

+ mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);

+ mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;

+ // Fill the chunk buffer

+ // Work backwards, filling from the end of the buffer towards the front.

+ //

+ while (destIx>2 && (srcIx - toUCharsMapStart > 5) && (srcIx > 0)) {

+ srcIx--;

+ destIx--;

+ // Get last byte of the UTF-8 character

+ c = s8[srcIx];

+ if (c<0x80) {

+ // Special case ASCII range for speed.

+ buf[destIx] = (UChar)c;

+ mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;

+ mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);

+ } else {

+ // General case, handle everything non-ASCII.

+ int32_t sIx = srcIx; // ix of last byte of multi-byte u8 char

+ // Get the full character from the UTF8 string.

+ // use code derived from tbe macros in utf.8

+ // Leaves srcIx pointing at the first byte of the UTF-8 char.

+ //

+ if (c<=0xbf) {

+ c=utf8_prevCharSafeBody(s8, 0, &srcIx, c, -1);

+ // leaves srcIx at first byte of the multi-byte char.

+ } else {

+ c=0x0fffd;

+ }

+ // Store the character in UTF-16 buffer.

+ if (c<0x10000) {

+ buf[destIx] = (UChar)c;

+ mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);

+ } else {

+ buf[destIx] = U16_TRAIL(c);

+ mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);

+ buf[--destIx] = U16_LEAD(c);

+ mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);

+ }

+ // Fill in the map from native indexes to UChars buf index.

+ do {

+ mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx;

+ } while (sIx >= srcIx);

+ // Set native indexing limit to be the current position.

+ // We are processing a non-ascii, non-native-indexing char now;

+ // the limit will be here if the rest of the chars to be

+ // added to this buffer are ascii.

+ bufNILimit = destIx;

+ }

+ u8b->bufNativeStart = srcIx;

+ u8b->bufNativeLimit = ix;

+ u8b->bufStartIdx = destIx;

+ u8b->bufLimitIdx = UTF8_TEXT_CHUNK_SIZE+2;

+ u8b->bufNILimit = bufNILimit - u8b->bufStartIdx;

+ u8b->toUCharsMapStart = toUCharsMapStart;

+ ut->chunkContents = &buf[u8b->bufStartIdx];

+ ut->chunkLength = u8b->bufLimitIdx - u8b->bufStartIdx;

+ ut->chunkOffset = ut->chunkLength;

+ ut->chunkNativeStart = u8b->bufNativeStart;

+ ut->chunkNativeLimit = u8b->bufNativeLimit;

+ ut->nativeIndexingLimit = u8b->bufNILimit;

+ return TRUE;

+ }

+//

+// This is a slightly modified copy of u_strFromUTF8,

+// Inserts a Replacement Char rather than failing on invalid UTF-8

+// Removes unnecessary features.

+//

+static UChar*

+utext_strFromUTF8(UChar *dest,

+ int32_t destCapacity,

+ int32_t *pDestLength,

+ const char* src,

+ int32_t srcLength, // required. NUL terminated not supported.

+ UErrorCode *pErrorCode

+ )

+ UChar *pDest = dest;

+ UChar *pDestLimit = dest+destCapacity;

+ UChar32 ch=0;

+ int32_t index = 0;

+ int32_t reqLength = 0;

+ uint8_t* pSrc = (uint8_t*) src;

+ while((index < srcLength)&&(pDest<pDestLimit)){

+ ch = pSrc[index++];

+ if(ch <=0x7f){

+ *pDest++=(UChar)ch;

+ }else{

+ ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -1);

+ if(ch<0){

+ ch = 0xfffd;

+ }

+ if(U_IS_BMP(ch)){

+ *(pDest++)=(UChar)ch;

+ }else{

+ *(pDest++)=UTF16_LEAD(ch);

+ if(pDest<pDestLimit){

+ *(pDest++)=UTF16_TRAIL(ch);

+ }else{

+ reqLength++;

+ break;

+ }

+ /* donot fill the dest buffer just count the UChars needed */

+ while(index < srcLength){

+ ch = pSrc[index++];

+ if(ch <= 0x7f){

+ reqLength++;

+ }else{

+ ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -1);

+ if(ch<0){

+ ch = 0xfffd;

+ }

+ reqLength+=U16_LENGTH(ch);

+ }

+ reqLength+=(int32_t)(pDest - dest);

+ if(pDestLength){

+ *pDestLength = reqLength;

+ }

+ /* Terminate the buffer */

+ u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);

+ return dest;

+static int32_t U_CALLCONV

+utf8TextExtract(UText *ut,

+ int64_t start, int64_t limit,

+ UChar *dest, int32_t destCapacity,

+ UErrorCode *pErrorCode) {

+ if(U_FAILURE(*pErrorCode)) {

+ return 0;

+ }

+ if(destCapacity<0 || (dest==NULL && destCapacity>0)) {

+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ return 0;

+ }

+ int32_t length = ut->b;

+ int32_t start32 = pinIndex(start, length);

+ int32_t limit32 = pinIndex(limit, length);

+ if(start32>limit32) {

+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

+ return 0;

+ }

+ // adjust the incoming indexes to land on code point boundaries if needed.

+ // adjust by no more than three, because that is the largest number of trail bytes

+ // in a well formed UTF8 character.

+ const uint8_t *buf = (const uint8_t *)ut->context;

+ int i;

+ if (start32 < ut->chunkNativeLimit) {

+ for (i=0; i<3; i++) {

+ if (U8_IS_SINGLE(buf[start32]) || U8_IS_LEAD(buf[start32]) || start32==0) {

+ break;

+ }

+ start32--;

+ }

+ if (limit32 < ut->chunkNativeLimit) {

+ for (i=0; i<3; i++) {

+ if (U8_IS_SINGLE(buf[limit32]) || U8_IS_LEAD(buf[limit32]) || limit32==0) {

+ break;

+ }

+ limit32--;

+ }

+ // Do the actual extract.

+ int32_t destLength=0;

+ utext_strFromUTF8(dest, destCapacity, &destLength,

+ (const char *)ut->context+start32, limit32-start32,

+ pErrorCode);

+ utf8TextAccess(ut, limit32, TRUE);

+ return destLength;

+//

+// utf8TextMapOffsetToNative

+//

+// Map a chunk (UTF-16) offset to a native index.

+static int64_t U_CALLCONV

+utf8TextMapOffsetToNative(const UText *ut) {

+ //

+ UTF8Buf *u8b = (UTF8Buf *)ut->p;

+ U_ASSERT(ut->chunkOffset>ut->nativeIndexingLimit && ut->chunkOffset<=ut->chunkLength);

+ int32_t nativeOffset = u8b->mapToNative[ut->chunkOffset + u8b->bufStartIdx] + u8b->toUCharsMapStart;

+ U_ASSERT(nativeOffset >= ut->chunkNativeStart && nativeOffset <= ut->chunkNativeLimit);

+ return nativeOffset;

+//

+// Map a native index to the corrsponding chunk offset

+//

+static int32_t U_CALLCONV

+utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) {

+ U_ASSERT(index64 <= 0x7fffffff);

+ int32_t index = (int32_t)index64;

+ UTF8Buf *u8b = (UTF8Buf *)ut->p;

+ U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit);

+ U_ASSERT(index<=ut->chunkNativeLimit);

+ int32_t mapIndex = index - u8b->toUCharsMapStart;

+ int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;

+ U_ASSERT(offset>=0 && offset<=ut->chunkLength);

+ return offset;

+static UText * U_CALLCONV

+utf8TextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status)

+ // First do a generic shallow clone. Does everything needed for the UText struct itself.

+ dest = shallowTextClone(dest, src, status);

+ // For deep clones, make a copy of the string.

+ // The copied storage is owned by the newly created clone.

+ //

+ // TODO: There is an isssue with using utext_nativeLength().

+ // That function is non-const in cases where the input was NUL terminated

+ // and the length has not yet been determined.

+ // This function (clone()) is const.

+ // There potentially a thread safety issue lurking here.

+ //

+ if (deep && U_SUCCESS(*status)) {

+ int32_t len = (int32_t)utext_nativeLength((UText *)src);

+ char *copyStr = (char *)uprv_malloc(len+1);

+ if (copyStr == NULL) {

+ *status = U_MEMORY_ALLOCATION_ERROR;

+ } else {

+ uprv_memcpy(copyStr, src->context, len+1);

+ dest->context = copyStr;

+ dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);

+ }

+ return dest;

+static void U_CALLCONV

+utf8TextClose(UText *ut) {

+ // Most of the work of close is done by the generic UText framework close.

+ // All that needs to be done here is to delete the UTF8 string if the UText

+ // owns it. This occurs if the UText was created by cloning.

+ if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {

+ char *s = (char *)ut->context;

+ uprv_free(s);

+ ut->context = NULL;

+ }

+U_CDECL_END

+static const struct UTextFuncs utf8Funcs =

+ sizeof(UTextFuncs),

+ 0, 0, 0, // Reserved alignment padding

+ utf8TextClone,

+ utf8TextLength,

+ utf8TextAccess,

+ utf8TextExtract,

+ NULL, /* replace*/

+ NULL, /* copy */

+ utf8TextMapOffsetToNative,

+ utf8TextMapIndexToUTF16,

+ utf8TextClose,

+ NULL, // spare 1

+ NULL, // spare 2

+ NULL // spare 3

+};

+static const char gEmptyString[] = {0};

+U_CAPI UText * U_EXPORT2

+utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status) {

+ if(U_FAILURE(*status)) {

+ return NULL;

+ }

+ if(s==NULL && length==0) {

+ s = gEmptyString;

+ }

+ if(s==NULL || length<-1 || length>INT32_MAX) {

+ *status=U_ILLEGAL_ARGUMENT_ERROR;

+ return NULL;

+ }

+ ut = utext_setup(ut, sizeof(UTF8Buf) * 2, status);

+ if (U_FAILURE(*status)) {

+ return ut;

+ }

+ ut->pFuncs = &utf8Funcs;

+ ut->context = s;

+ ut->b = (int32_t)length;

+ ut->c = (int32_t)length;

+ if (ut->c < 0) {

+ ut->c = 0;

+ ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);

+ }

+ ut->p = ut->pExtra;

+ ut->q = (char *)ut->pExtra + sizeof(UTF8Buf);

+ return ut;

+//------------------------------------------------------------------------------

+//

+// UText implementation wrapper for Replaceable (read/write)

+//

+// Use of UText data members:

+// context pointer to Replaceable.

+// p pointer to Replaceable if it is owned by the UText.

+//

+//------------------------------------------------------------------------------

+// minimum chunk size for this implementation: 3

+// to allow for possible trimming for code point boundaries

+enum { REP_TEXT_CHUNK_SIZE=10 };

+struct ReplExtra {

+ /*

+ * Chunk UChars.

+ * +1 to simplify filling with surrogate pair at the end.

+ */

+ UChar s[REP_TEXT_CHUNK_SIZE+1];

+};

+U_CDECL_BEGIN

+static UText * U_CALLCONV

+repTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) {

+ // First do a generic shallow clone. Does everything needed for the UText struct itself.

+ dest = shallowTextClone(dest, src, status);

+ // For deep clones, make a copy of the Replaceable.

+ // The copied Replaceable storage is owned by the newly created UText clone.

+ // A non-NULL pointer in UText.p is the signal to the close() function to delete

+ // it.

+ //

+ if (deep && U_SUCCESS(*status)) {

+ const Replaceable *replSrc = (const Replaceable *)src->context;

+ dest->context = replSrc->clone();

+ dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);

+ // with deep clone, the copy is writable, even when the source is not.

+ dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE);

+ }

+ return dest;

+static void U_CALLCONV

+repTextClose(UText *ut) {

+ // Most of the work of close is done by the generic UText framework close.

+ // All that needs to be done here is delete the Replaceable if the UText

+ // owns it. This occurs if the UText was created by cloning.

+ if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {

+ Replaceable *rep = (Replaceable *)ut->context;

+ delete rep;

+ ut->context = NULL;

+ }

+static int64_t U_CALLCONV

+repTextLength(UText *ut) {

+ const Replaceable *replSrc = (const Replaceable *)ut->context;

+ int32_t len = replSrc->length();

+ return len;

+static UBool U_CALLCONV

+repTextAccess(UText *ut, int64_t index, UBool forward) {

+ const Replaceable *rep=(const Replaceable *)ut->context;

+ int32_t length=rep->length(); // Full length of the input text (bigger than a chunk)

+ // clip the requested index to the limits of the text.

+ int32_t index32 = pinIndex(index, length);

+ U_ASSERT(index<=INT32_MAX);

+ /*

+ * Compute start/limit boundaries around index, for a segment of text

+ * to be extracted.

+ * To allow for the possibility that our user gave an index to the trailing

+ * half of a surrogate pair, we must request one extra preceding UChar when

+ * going in the forward direction. This will ensure that the buffer has the

+ * entire code point at the specified index.

+ */

+ if(forward) {

+ if (index32>=ut->chunkNativeStart && index32<ut->chunkNativeLimit) {

+ // Buffer already contains the requested position.

+ ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);

+ return TRUE;

+ }

+ if (index32>=length && ut->chunkNativeLimit==length) {

+ // Request for end of string, and buffer already extends up to it.

+ // Can't get the data, but don't change the buffer.

+ ut->chunkOffset = length - (int32_t)ut->chunkNativeStart;

+ return FALSE;

+ }

+ ut->chunkNativeLimit = index + REP_TEXT_CHUNK_SIZE - 1;

+ // Going forward, so we want to have the buffer with stuff at and beyond

+ // the requested index. The -1 gets us one code point before the

+ // requested index also, to handle the case of the index being on

+ // a trail surrogate of a surrogate pair.

+ if(ut->chunkNativeLimit > length) {

+ ut->chunkNativeLimit = length;

+ }

+ // unless buffer ran off end, start is index-1.

+ ut->chunkNativeStart = ut->chunkNativeLimit - REP_TEXT_CHUNK_SIZE;

+ if(ut->chunkNativeStart < 0) {

+ ut->chunkNativeStart = 0;

+ }

+ } else {

+ // Reverse iteration. Fill buffer with data preceding the requested index.

+ if (index32>ut->chunkNativeStart && index32<=ut->chunkNativeLimit) {

+ // Requested position already in buffer.

+ ut->chunkOffset = index32 - (int32_t)ut->chunkNativeStart;

+ return TRUE;

+ }

+ if (index32==0 && ut->chunkNativeStart==0) {

+ // Request for start, buffer already begins at start.

+ // No data, but keep the buffer as is.

+ ut->chunkOffset = 0;

+ return FALSE;

+ }

+ // Figure out the bounds of the chunk to extract for reverse iteration.

+ // Need to worry about chunk not splitting surrogate pairs, and while still

+ // containing the data we need.

+ // Fix by requesting a chunk that includes an extra UChar at the end.

+ // If this turns out to be a lead surrogate, we can lop it off and still have

+ // the data we wanted.

+ ut->chunkNativeStart = index32 + 1 - REP_TEXT_CHUNK_SIZE;

+ if (ut->chunkNativeStart < 0) {

+ ut->chunkNativeStart = 0;

+ }

+ ut->chunkNativeLimit = index32 + 1;

+ if (ut->chunkNativeLimit > length) {

+ ut->chunkNativeLimit = length;

+ }

+ // Extract the new chunk of text from the Replaceable source.

+ ReplExtra *ex = (ReplExtra *)ut->pExtra;

+ // UnicodeString with its buffer a writable alias to the chunk buffer

+ UnicodeString buffer(ex->s, 0 /*buffer length*/, REP_TEXT_CHUNK_SIZE /*buffer capacity*/);

+ rep->extractBetween((int32_t)ut->chunkNativeStart, (int32_t)ut->chunkNativeLimit, buffer);

+ ut->chunkContents = ex->s;

+ ut->chunkLength = (int32_t)(ut->chunkNativeLimit - ut->chunkNativeStart);

+ ut->chunkOffset = (int32_t)(index32 - ut->chunkNativeStart);

+ // Surrogate pairs from the input text must not span chunk boundaries.

+ // If end of chunk could be the start of a surrogate, trim it off.

+ if (ut->chunkNativeLimit < length &&

+ U16_IS_LEAD(ex->s[ut->chunkLength-1])) {

+ ut->chunkLength--;

+ ut->chunkNativeLimit--;

+ if (ut->chunkOffset > ut->chunkLength) {

+ ut->chunkOffset = ut->chunkLength;

+ }

+ // if the first UChar in the chunk could be the trailing half of a surrogate pair,

+ // trim it off.

+ if(ut->chunkNativeStart>0 && U16_IS_TRAIL(ex->s[0])) {

+ ++(ut->chunkContents);

+ ++(ut->chunkNativeStart);

+ --(ut->chunkLength);

+ --(ut->chunkOffset);

+ }

+ // adjust the index/chunkOffset to a code point boundary

+ U16_SET_CP_START(ut->chunkContents, 0, ut->chunkOffset);

+ // Use fast indexing for get/setNativeIndex()

+ ut->nativeIndexingLimit = ut->chunkLength;

+ return TRUE;

+static int32_t U_CALLCONV

+repTextExtract(UText *ut,

+ int64_t start, int64_t limit,

+ UChar *dest, int32_t destCapacity,

+ UErrorCode *status) {

+ const Replaceable *rep=(const Replaceable *)ut->context;

+ int32_t length=rep->length();

+ if(U_FAILURE(*status)) {

+ return 0;

+ }

+ if(destCapacity<0 || (dest==NULL && destCapacity>0)) {

+ *status=U_ILLEGAL_ARGUMENT_ERROR;

+ }

+ if(start>limit) {

+ *status=U_INDEX_OUTOFBOUNDS_ERROR;

+ return 0;

+ }

+ int32_t start32 = pinIndex(start, length);

+ int32_t limit32 = pinIndex(limit, length);

+ // adjust start, limit if they point to trail half of surrogates

+ if (start32<length && U16_IS_TRAIL(rep->charAt(start32)) &&

+ U_IS_SUPPLEMENTARY(rep->char32At(start32))){

+ start32--;

+ }

+ if (limit32<length && U16_IS_TRAIL(rep->charAt(limit32)) &&

+ U_IS_SUPPLEMENTARY(rep->char32At(limit32))){

+ limit32--;

+ }

+ length=limit32-start32;

+ if(length>destCapacity) {

+ limit32 = start32 + destCapacity;

+ }

+ UnicodeString buffer(dest, 0, destCapacity); // writable alias

+ rep->extractBetween(start32, limit32, buffer);

+ repTextAccess(ut, limit32, TRUE);

+ return u_terminateUChars(dest, destCapacity, length, status);

+static int32_t U_CALLCONV

+repTextReplace(UText *ut,

+ int64_t start, int64_t limit,

+ const UChar *src, int32_t length,

+ UErrorCode *status) {

+ Replaceable *rep=(Replaceable *)ut->context;

+ int32_t oldLength;

+ if(U_FAILURE(*status)) {

+ return 0;

+ }

+ if(src==NULL && length!=0) {

+ *status=U_ILLEGAL_ARGUMENT_ERROR;

+ return 0;

+ }

+ oldLength=rep->length(); // will subtract from new length

+ if(start>limit ) {

+ *status=U_INDEX_OUTOFBOUNDS_ERROR;

+ return 0;

+ }

+ int32_t start32 = pinIndex(start, oldLength);

+ int32_t limit32 = pinIndex(limit, oldLength);

+ // Snap start & limit to code point boundaries.

+ if (start32<oldLength && U16_IS_TRAIL(rep->charAt(start32)) &&

+ start32>0 && U16_IS_LEAD(rep->charAt(start32-1)))

+ {

+ start32--;

+ }

+ if (limit32<oldLength && U16_IS_LEAD(rep->charAt(limit32-1)) &&

+ U16_IS_TRAIL(rep->charAt(limit32)))

+ {

+ limit32++;

+ }

+ // Do the actual replace operation using methods of the Replaceable class

+ UnicodeString replStr((UBool)(length<0), src, length); // read-only alias

+ rep->handleReplaceBetween(start32, limit32, replStr);

+ int32_t newLength = rep->length();

+ int32_t lengthDelta = newLength - oldLength;

+ // Is the UText chunk buffer OK?

+ if (ut->chunkNativeLimit > start32) {

+ // this replace operation may have impacted the current chunk.

+ // invalidate it, which will force a reload on the next access.

+ invalidateChunk(ut);

+ }

+ // set the iteration position to the end of the newly inserted replacement text.

+ int32_t newIndexPos = limit32 + lengthDelta;

+ repTextAccess(ut, newIndexPos, TRUE);

+ return lengthDelta;

+static void U_CALLCONV

+repTextCopy(UText *ut,

+ int64_t start, int64_t limit,

+ int64_t destIndex,

+ UBool move,

+ UErrorCode *status)

+ Replaceable *rep=(Replaceable *)ut->context;

+ int32_t length=rep->length();

+ if(U_FAILURE(*status)) {

+ return;

+ }

+ if (start>limit || (start<destIndex && destIndex<limit))

+ {

+ *status=U_INDEX_OUTOFBOUNDS_ERROR;

+ return;

+ }

+ int32_t start32 = pinIndex(start, length);

+ int32_t limit32 = pinIndex(limit, length);

+ int32_t destIndex32 = pinIndex(destIndex, length);

+ // TODO: snap input parameters to code point boundaries.

+ if(move) {

+ // move: copy to destIndex, then replace original with nothing

+ int32_t segLength=limit32-start32;

+ rep->copy(start32, limit32, destIndex32);

+ if(destIndex32<start32) {

+ start32+=segLength;

+ limit32+=segLength;

+ }

+ rep->handleReplaceBetween(start32, limit32, UnicodeString());

+ } else {

+ // copy

+ rep->copy(start32, limit32, destIndex32);

+ }

+ // If the change to the text touched the region in the chunk buffer,

+ // invalidate the buffer.

+ int32_t firstAffectedIndex = destIndex32;

+ if (move && start32<firstAffectedIndex) {

+ firstAffectedIndex = start32;

+ }

+ if (firstAffectedIndex < ut->chunkNativeLimit) {

+ // changes may have affected range covered by the chunk

+ invalidateChunk(ut);

+ }

+ // Put iteration position at the newly inserted (moved) block,

+ int32_t nativeIterIndex = destIndex32 + limit32 - start32;

+ if (move && destIndex32>start32) {

+ // moved a block of text towards the end of the string.

+ nativeIterIndex = destIndex32;

+ }

+ // Set position, reload chunk if needed.

+ repTextAccess(ut, nativeIterIndex, TRUE);

+static const struct UTextFuncs repFuncs =

+ sizeof(UTextFuncs),

+ 0, 0, 0, // Reserved alignment padding

+ repTextClone,

+ repTextLength,

+ repTextAccess,

+ repTextExtract,

+ repTextReplace,

+ repTextCopy,

+ NULL, // MapOffsetToNative,

+ NULL, // MapIndexToUTF16,

+ repTextClose,

+ NULL, // spare 1

+ NULL, // spare 2

+ NULL // spare 3

+};

+U_CAPI UText * U_EXPORT2

+utext_openReplaceable(UText *ut, Replaceable *rep, UErrorCode *status)

+ if(U_FAILURE(*status)) {

+ return NULL;

+ }

+ if(rep==NULL) {

+ *status=U_ILLEGAL_ARGUMENT_ERROR;

+ return NULL;

+ }

+ ut = utext_setup(ut, sizeof(ReplExtra), status);

+ ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_WRITABLE);

+ if(rep->hasMetaData()) {

+ ut->providerProperties |=I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA);

+ }

+ ut->pFuncs = &repFuncs;

+ ut->context = rep;

+ return ut;

+U_CDECL_END

+//------------------------------------------------------------------------------

+//

+// UText implementation for UnicodeString (read/write) and

+// for const UnicodeString (read only)

+// (same implementation, only the flags are different)

+//

+// Use of UText data members:

+// context pointer to UnicodeString

+// p pointer to UnicodeString IF this UText owns the string

+// and it must be deleted on close(). NULL otherwise.

+//

+//------------------------------------------------------------------------------

+U_CDECL_BEGIN

+static UText * U_CALLCONV

+unistrTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) {

+ // First do a generic shallow clone. Does everything needed for the UText struct itself.

+ dest = shallowTextClone(dest, src, status);

+ // For deep clones, make a copy of the UnicodeSring.

+ // The copied UnicodeString storage is owned by the newly created UText clone.

+ // A non-NULL pointer in UText.p is the signal to the close() function to delete

+ // the UText.

+ //

+ if (deep && U_SUCCESS(*status)) {

+ const UnicodeString *srcString = (const UnicodeString *)src->context;

+ dest->context = new UnicodeString(*srcString);

+ dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);

+ // with deep clone, the copy is writable, even when the source is not.

+ dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE);

+ }

+ return dest;

+static void U_CALLCONV

+unistrTextClose(UText *ut) {

+ // Most of the work of close is done by the generic UText framework close.

+ // All that needs to be done here is delete the UnicodeString if the UText

+ // owns it. This occurs if the UText was created by cloning.

+ if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {

+ UnicodeString *str = (UnicodeString *)ut->context;

+ delete str;

+ ut->context = NULL;

+ }

+static int64_t U_CALLCONV

+unistrTextLength(UText *t) {

+ return ((const UnicodeString *)t->context)->length();

+static UBool U_CALLCONV

+unistrTextAccess(UText *ut, int64_t index, UBool forward) {

+ int32_t length = ut->chunkLength;

+ ut->chunkOffset = pinIndex(index, length);

+ // Check whether request is at the start or end

+ UBool retVal = (forward && index<length) || (!forward && index>0);

+ return retVal;

+static int32_t U_CALLCONV

+unistrTextExtract(UText *t,

+ int64_t start, int64_t limit,

+ UChar *dest, int32_t destCapacity,

+ UErrorCode *pErrorCode) {

+ const UnicodeString *us=(const UnicodeString *)t->context;

+ int32_t length=us->length();

+ if(U_FAILURE(*pErrorCode)) {

+ return 0;

+ }

+ if(destCapacity<0 || (dest==NULL && destCapacity>0)) {

+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ }

+ if(start<0 || start>limit) {

+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

+ return 0;

+ }

+ int32_t start32 = start<length ? us->getChar32Start((int32_t)start) : length;

+ int32_t limit32 = limit<length ? us->getChar32Start((int32_t)limit) : length;

+ length=limit32-start32;

+ if (destCapacity>0 && dest!=NULL) {

+ int32_t trimmedLength = length;

+ if(trimmedLength>destCapacity) {

+ trimmedLength=destCapacity;

+ }

+ us->extract(start32, trimmedLength, dest);

+ t->chunkOffset = start32+trimmedLength;

+ } else {

+ t->chunkOffset = start32;

+ }

+ u_terminateUChars(dest, destCapacity, length, pErrorCode);

+ return length;

+static int32_t U_CALLCONV

+unistrTextReplace(UText *ut,

+ int64_t start, int64_t limit,

+ const UChar *src, int32_t length,

+ UErrorCode *pErrorCode) {

+ UnicodeString *us=(UnicodeString *)ut->context;

+ int32_t oldLength;

+ if(U_FAILURE(*pErrorCode)) {

+ return 0;

+ }

+ if(src==NULL && length!=0) {

+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ }

+ if(start>limit) {

+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

+ return 0;

+ }

+ oldLength=us->length();

+ int32_t start32 = pinIndex(start, oldLength);

+ int32_t limit32 = pinIndex(limit, oldLength);

+ if (start32 < oldLength) {

+ start32 = us->getChar32Start(start32);

+ }

+ if (limit32 < oldLength) {

+ limit32 = us->getChar32Start(limit32);

+ }

+ // replace

+ us->replace(start32, limit32-start32, src, length);

+ int32_t newLength = us->length();

+ // Update the chunk description.

+ ut->chunkContents = us->getBuffer();

+ ut->chunkLength = newLength;

+ ut->chunkNativeLimit = newLength;

+ ut->nativeIndexingLimit = newLength;

+ // Set iteration position to the point just following the newly inserted text.

+ int32_t lengthDelta = newLength - oldLength;

+ ut->chunkOffset = limit32 + lengthDelta;

+ return lengthDelta;

+static void U_CALLCONV

+unistrTextCopy(UText *ut,

+ int64_t start, int64_t limit,

+ int64_t destIndex,

+ UBool move,

+ UErrorCode *pErrorCode) {

+ UnicodeString *us=(UnicodeString *)ut->context;

+ int32_t length=us->length();

+ if(U_FAILURE(*pErrorCode)) {

+ return;

+ }

+ int32_t start32 = pinIndex(start, length);

+ int32_t limit32 = pinIndex(limit, length);

+ int32_t destIndex32 = pinIndex(destIndex, length);

+ if( start32>limit32 || (start32<destIndex32 && destIndex32<limit32)) {

+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

+ return;

+ }

+ if(move) {

+ // move: copy to destIndex, then replace original with nothing

+ int32_t segLength=limit32-start32;

+ us->copy(start32, limit32, destIndex32);

+ if(destIndex32<start32) {

+ start32+=segLength;

+ }

+ us->replace(start32, segLength, NULL, 0);

+ } else {

+ // copy

+ us->copy(start32, limit32, destIndex32);

+ }

+ // update chunk description, set iteration position.

+ ut->chunkContents = us->getBuffer();

+ if (move==FALSE) {

+ // copy operation, string length grows

+ ut->chunkLength += limit32-start32;

+ ut->chunkNativeLimit = ut->chunkLength;

+ ut->nativeIndexingLimit = ut->chunkLength;

+ }

+ // Iteration position to end of the newly inserted text.

+ ut->chunkOffset = destIndex32+limit32-start32;

+ if (move && destIndex32>start32) {

+ ut->chunkOffset = destIndex32;

+ }

+static const struct UTextFuncs unistrFuncs =

+ sizeof(UTextFuncs),

+ 0, 0, 0, // Reserved alignment padding

+ unistrTextClone,

+ unistrTextLength,

+ unistrTextAccess,

+ unistrTextExtract,

+ unistrTextReplace,

+ unistrTextCopy,

+ NULL, // MapOffsetToNative,

+ NULL, // MapIndexToUTF16,

+ unistrTextClose,

+ NULL, // spare 1

+ NULL, // spare 2

+ NULL // spare 3

+};

+U_CDECL_END

+U_CAPI UText * U_EXPORT2

+utext_openUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status) {

+ // TODO: use openConstUnicodeString, then add in the differences.

+ //

+ ut = utext_setup(ut, 0, status);

+ if (U_SUCCESS(*status)) {

+ ut->pFuncs = &unistrFuncs;

+ ut->context = s;

+ ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS)|

+ I32_FLAG(UTEXT_PROVIDER_WRITABLE);

+ ut->chunkContents = s->getBuffer();

+ ut->chunkLength = s->length();

+ ut->chunkNativeStart = 0;

+ ut->chunkNativeLimit = ut->chunkLength;

+ ut->nativeIndexingLimit = ut->chunkLength;

+ }

+ return ut;

+U_CAPI UText * U_EXPORT2

+utext_openConstUnicodeString(UText *ut, const UnicodeString *s, UErrorCode *status) {

+ ut = utext_setup(ut, 0, status);

+ // note: use the standard (writable) function table for UnicodeString.

+ // The flag settings disable writing, so having the functions in

+ // the table is harmless.

+ if (U_SUCCESS(*status)) {

+ ut->pFuncs = &unistrFuncs;

+ ut->context = s;

+ ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);

+ ut->chunkContents = s->getBuffer();

+ ut->chunkLength = s->length();

+ ut->chunkNativeStart = 0;

+ ut->chunkNativeLimit = ut->chunkLength;

+ ut->nativeIndexingLimit = ut->chunkLength;

+ }

+ return ut;

+//------------------------------------------------------------------------------

+//

+// UText implementation for const UChar * strings

+//

+// Use of UText data members:

+// context pointer to UnicodeString

+// a length. -1 if not yet known.

+//

+// TODO: support 64 bit lengths.

+//

+//------------------------------------------------------------------------------

+U_CDECL_BEGIN

+static UText * U_CALLCONV

+ucstrTextClone(UText *dest, const UText * src, UBool deep, UErrorCode * status) {

+ // First do a generic shallow clone.

+ dest = shallowTextClone(dest, src, status);

+ // For deep clones, make a copy of the string.

+ // The copied storage is owned by the newly created clone.

+ // A non-NULL pointer in UText.p is the signal to the close() function to delete

+ // it.

+ //

+ if (deep && U_SUCCESS(*status)) {

+ U_ASSERT(utext_nativeLength(dest) < INT32_MAX);

+ int32_t len = (int32_t)utext_nativeLength(dest);

+ // The cloned string IS going to be NUL terminated, whether or not the original was.

+ const UChar *srcStr = (const UChar *)src->context;

+ UChar *copyStr = (UChar *)uprv_malloc((len+1) * sizeof(UChar));

+ if (copyStr == NULL) {

+ *status = U_MEMORY_ALLOCATION_ERROR;

+ } else {

+ int64_t i;

+ for (i=0; i<len; i++) {

+ copyStr[i] = srcStr[i];

+ }

+ copyStr[len] = 0;

+ dest->context = copyStr;

+ dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);

+ }

+ return dest;

+static void U_CALLCONV

+ucstrTextClose(UText *ut) {

+ // Most of the work of close is done by the generic UText framework close.

+ // All that needs to be done here is delete the string if the UText

+ // owns it. This occurs if the UText was created by cloning.

+ if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {

+ UChar *s = (UChar *)ut->context;

+ uprv_free(s);

+ ut->context = NULL;

+ }

+static int64_t U_CALLCONV

+ucstrTextLength(UText *ut) {

+ if (ut->a < 0) {

+ // null terminated, we don't yet know the length. Scan for it.

+ // Access is not convenient for doing this

+ // because the current interation postion can't be changed.

+ const UChar *str = (const UChar *)ut->context;

+ for (;;) {

+ if (str[ut->chunkNativeLimit] == 0) {

+ break;

+ }

+ ut->chunkNativeLimit++;

+ }

+ ut->a = ut->chunkNativeLimit;

+ ut->chunkLength = (int32_t)ut->chunkNativeLimit;

+ ut->nativeIndexingLimit = ut->chunkLength;

+ ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);

+ }

+ return ut->a;

+static UBool U_CALLCONV

+ucstrTextAccess(UText *ut, int64_t index, UBool forward) {

+ const UChar *str = (const UChar *)ut->context;

+ // pin the requested index to the bounds of the string,

+ // and set current iteration position.

+ if (index<0) {

+ index = 0;

+ } else if (index < ut->chunkNativeLimit) {

+ // The request data is within the chunk as it is known so far.

+ // Put index on a code point boundary.

+ U16_SET_CP_START(str, 0, index);

+ } else if (ut->a >= 0) {

+ // We know the length of this string, and the user is requesting something

+ // at or beyond the length. Pin the requested index to the length.

+ index = ut->a;

+ } else {

+ // Null terminated string, length not yet known, and the requested index

+ // is beyond where we have scanned so far.

+ // Scan to 32 UChars beyond the requested index. The strategy here is

+ // to avoid fully scanning a long string when the caller only wants to

+ // see a few characters at its beginning.

+ int32_t scanLimit = (int32_t)index + 32;

+ if ((index + 32)>INT32_MAX || (index + 32)<0 ) { // note: int64 expression

+ scanLimit = INT32_MAX;

+ }

+ int32_t chunkLimit = (int32_t)ut->chunkNativeLimit;

+ for (; chunkLimit<scanLimit; chunkLimit++) {

+ if (str[chunkLimit] == 0) {

+ // We found the end of the string. Remember it, pin the requested index to it,

+ // and bail out of here.

+ ut->a = chunkLimit;

+ ut->chunkLength = chunkLimit;

+ ut->nativeIndexingLimit = chunkLimit;

+ if (index >= chunkLimit) {

+ index = chunkLimit;

+ } else {

+ U16_SET_CP_START(str, 0, index);

+ }

+ ut->chunkNativeLimit = chunkLimit;

+ ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);

+ goto breakout;

+ }

+ // We scanned through the next batch of UChars without finding the end.

+ U16_SET_CP_START(str, 0, index);

+ if (chunkLimit == INT32_MAX) {

+ // Scanned to the limit of a 32 bit length.

+ // Forceably trim the overlength string back so length fits in int32

+ // TODO: add support for 64 bit strings.

+ ut->a = chunkLimit;

+ ut->chunkLength = chunkLimit;

+ ut->nativeIndexingLimit = chunkLimit;

+ if (index > chunkLimit) {

+ index = chunkLimit;

+ }

+ ut->chunkNativeLimit = chunkLimit;

+ ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);

+ } else {

+ // The endpoint of a chunk must not be left in the middle of a surrogate pair.

+ // If the current end is on a lead surrogate, back the end up by one.

+ // It doesn't matter if the end char happens to be an unpaired surrogate,

+ // and it's simpler not to worry about it.

+ if (U16_IS_LEAD(str[chunkLimit-1])) {

+ --chunkLimit;

+ }

+ // Null-terminated chunk with end still unknown.

+ // Update the chunk length to reflect what has been scanned thus far.

+ // That the full length is still unknown is (still) flagged by

+ // ut->a being < 0.

+ ut->chunkNativeLimit = chunkLimit;

+ ut->nativeIndexingLimit = chunkLimit;

+ ut->chunkLength = chunkLimit;

+ }

+breakout:

+ U_ASSERT(index<=INT32_MAX);

+ ut->chunkOffset = (int32_t)index;

+ // Check whether request is at the start or end

+ UBool retVal = (forward && index<ut->chunkNativeLimit) || (!forward && index>0);

+ return retVal;

+static int32_t U_CALLCONV

+ucstrTextExtract(UText *ut,

+ int64_t start, int64_t limit,

+ UChar *dest, int32_t destCapacity,

+ UErrorCode *pErrorCode)

+ if(U_FAILURE(*pErrorCode)) {

+ return 0;

+ }

+ if(destCapacity<0 || (dest==NULL && destCapacity>0) || start>limit) {

+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ return 0;

+ }

+ const UChar *s=(const UChar *)ut->context;

+ int32_t si, di;

+ int32_t start32;

+ int32_t limit32;

+ // Access the start. Does two things we need:

+ // Pins 'start' to the length of the string, if it came in out-of-bounds.

+ // Snaps 'start' to the beginning of a code point.

+ ucstrTextAccess(ut, start, TRUE);

+ U_ASSERT(start <= INT32_MAX);

+ start32 = (int32_t)start;

+ int32_t strLength=(int32_t)ut->a;

+ if (strLength >= 0) {

+ limit32 = pinIndex(limit, strLength);

+ } else {

+ limit32 = pinIndex(limit, INT32_MAX);

+ }

+ di = 0;

+ for (si=start32; si<limit32; si++) {

+ if (strLength<0 && s[si]==0) {

+ // Just hit the end of a null-terminated string.

+ ut->a = si; // set string length for this UText

+ ut->chunkNativeLimit = si;

+ ut->chunkLength = si;

+ ut->nativeIndexingLimit = si;

+ strLength = si;

+ break;

+ }

+ if (di<destCapacity) {

+ // only store if there is space.

+ dest[di] = s[si];

+ } else {

+ if (strLength>=0) {

+ // We have filled the destination buffer, and the string length is known.

+ // Cut the loop short. There is no need to scan string termination.

+ di = limit32 - start32;

+ si = limit32;

+ break;

+ }

+ di++;

+ }

+ // If the limit index points to a lead surrogate of a pair,

+ // add the corresponding trail surrogate to the destination.

+ if (si>0 && U16_IS_LEAD(s[si-1]) &&

+ ((si<strLength || strLength<0) && U16_IS_TRAIL(s[si])))

+ {

+ if (di<destCapacity) {

+ // store only if there is space in the output buffer.

+ dest[di++] = s[si++];

+ }

+ // Put iteration position at the point just following the extracted text

+ ut->chunkOffset = uprv_min(strLength, start32 + destCapacity);

+ // Add a terminating NUL if space in the buffer permits,

+ // and set the error status as required.

+ u_terminateUChars(dest, destCapacity, di, pErrorCode);

+ return di;

+static const struct UTextFuncs ucstrFuncs =

+ sizeof(UTextFuncs),

+ 0, 0, 0, // Reserved alignment padding

+ ucstrTextClone,

+ ucstrTextLength,

+ ucstrTextAccess,

+ ucstrTextExtract,

+ NULL, // Replace

+ NULL, // Copy

+ NULL, // MapOffsetToNative,

+ NULL, // MapIndexToUTF16,

+ ucstrTextClose,

+ NULL, // spare 1

+ NULL, // spare 2

+ NULL, // spare 3

+};

+U_CDECL_END

+static const UChar gEmptyUString[] = {0};

+U_CAPI UText * U_EXPORT2

+utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status) {

+ if (U_FAILURE(*status)) {

+ return NULL;

+ }

+ if(s==NULL && length==0) {

+ s = gEmptyUString;

+ }

+ if (s==NULL || length < -1 || length>INT32_MAX) {

+ *status = U_ILLEGAL_ARGUMENT_ERROR;

+ return NULL;

+ }

+ ut = utext_setup(ut, 0, status);

+ if (U_SUCCESS(*status)) {

+ ut->pFuncs = &ucstrFuncs;

+ ut->context = s;

+ ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);

+ if (length==-1) {

+ ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);

+ }

+ ut->a = length;

+ ut->chunkContents = s;

+ ut->chunkNativeStart = 0;

+ ut->chunkNativeLimit = length>=0? length : 0;

+ ut->chunkLength = (int32_t)ut->chunkNativeLimit;

+ ut->chunkOffset = 0;

+ ut->nativeIndexingLimit = ut->chunkLength;

+ }

+ return ut;

+//------------------------------------------------------------------------------

+//

+// UText implementation for text from ICU CharacterIterators

+//

+// Use of UText data members:

+// context pointer to the CharacterIterator

+// a length of the full text.

+// p pointer to buffer 1

+// b start index of local buffer 1 contents

+// q pointer to buffer 2

+// c start index of local buffer 2 contents

+// r pointer to the character iterator if the UText owns it.

+// Null otherwise.

+//

+//------------------------------------------------------------------------------

+#define CIBufSize 16

+U_CDECL_BEGIN

+static void U_CALLCONV

+charIterTextClose(UText *ut) {

+ // Most of the work of close is done by the generic UText framework close.

+ // All that needs to be done here is delete the CharacterIterator if the UText

+ // owns it. This occurs if the UText was created by cloning.

+ CharacterIterator *ci = (CharacterIterator *)ut->r;

+ delete ci;

+ ut->r = NULL;

+static int64_t U_CALLCONV

+charIterTextLength(UText *ut) {

+ return (int32_t)ut->a;

+static UBool U_CALLCONV

+charIterTextAccess(UText *ut, int64_t index, UBool forward) {

+ CharacterIterator *ci = (CharacterIterator *)ut->context;

+ int32_t clippedIndex = (int32_t)index;

+ if (clippedIndex<0) {

+ clippedIndex=0;

+ } else if (clippedIndex>=ut->a) {

+ clippedIndex=(int32_t)ut->a;

+ }

+ int32_t neededIndex = clippedIndex;

+ if (!forward && neededIndex>0) {

+ // reverse iteration, want the position just before what was asked for.

+ neededIndex--;

+ } else if (forward && neededIndex==ut->a && neededIndex>0) {

+ // Forward iteration, don't ask for something past the end of the text.

+ neededIndex--;

+ }

+ // Find the native index of the start of the buffer containing what we want.

+ neededIndex -= neededIndex % CIBufSize;

+ UChar *buf = NULL;

+ UBool needChunkSetup = TRUE;

+ int i;

+ if (ut->chunkNativeStart == neededIndex) {

+ // The buffer we want is already the current chunk.

+ needChunkSetup = FALSE;

+ } else if (ut->b == neededIndex) {

+ // The first buffer (buffer p) has what we need.

+ buf = (UChar *)ut->p;

+ } else if (ut->c == neededIndex) {

+ // The second buffer (buffer q) has what we need.

+ buf = (UChar *)ut->q;

+ } else {

+ // Neither buffer already has what we need.

+ // Load new data from the character iterator.

+ // Use the buf that is not the current buffer.

+ buf = (UChar *)ut->p;

+ if (ut->p == ut->chunkContents) {

+ buf = (UChar *)ut->q;

+ }

+ ci->setIndex(neededIndex);

+ for (i=0; i<CIBufSize; i++) {

+ buf[i] = ci->nextPostInc();

+ if (i+neededIndex > ut->a) {

+ break;

+ }

+ // We have a buffer with the data we need.

+ // Set it up as the current chunk, if it wasn't already.

+ if (needChunkSetup) {

+ ut->chunkContents = buf;

+ ut->chunkLength = CIBufSize;

+ ut->chunkNativeStart = neededIndex;

+ ut->chunkNativeLimit = neededIndex + CIBufSize;

+ if (ut->chunkNativeLimit > ut->a) {

+ ut->chunkNativeLimit = ut->a;

+ ut->chunkLength = (int32_t)(ut->chunkNativeLimit)-(int32_t)(ut->chunkNativeStart);

+ }

+ ut->nativeIndexingLimit = ut->chunkLength;

+ U_ASSERT(ut->chunkOffset>=0 && ut->chunkOffset<=CIBufSize);

+ }

+ ut->chunkOffset = clippedIndex - (int32_t)ut->chunkNativeStart;

+ UBool success = (forward? ut->chunkOffset<ut->chunkLength : ut->chunkOffset>0);

+ return success;

+static UText * U_CALLCONV

+charIterTextClone(UText *dest, const UText *src, UBool deep, UErrorCode * status) {

+ if (U_FAILURE(*status)) {

+ return NULL;

+ }

+ if (deep) {

+ // There is no CharacterIterator API for cloning the underlying text storage.

+ *status = U_UNSUPPORTED_ERROR;

+ return NULL;

+ } else {

+ CharacterIterator *srcCI =(CharacterIterator *)src->context;

+ srcCI = srcCI->clone();

+ dest = utext_openCharacterIterator(dest, srcCI, status);

+ // cast off const on getNativeIndex.

+ // For CharacterIterator based UTexts, this is safe, the operation is const.

+ int64_t ix = utext_getNativeIndex((UText *)src);

+ utext_setNativeIndex(dest, ix);

+ dest->r = srcCI; // flags that this UText owns the CharacterIterator

+ }

+ return dest;

+static int32_t U_CALLCONV

+charIterTextExtract(UText *ut,

+ int64_t start, int64_t limit,

+ UChar *dest, int32_t destCapacity,

+ UErrorCode *status)

+ if(U_FAILURE(*status)) {

+ return 0;

+ }

+ if(destCapacity<0 || (dest==NULL && destCapacity>0) || start>limit) {

+ *status=U_ILLEGAL_ARGUMENT_ERROR;

+ return 0;

+ }

+ int32_t length = (int32_t)ut->a;

+ int32_t start32 = pinIndex(start, length);

+ int32_t limit32 = pinIndex(limit, length);

+ int32_t desti = 0;

+ int32_t srci;

+ int32_t copyLimit;

+ CharacterIterator *ci = (CharacterIterator *)ut->context;

+ ci->setIndex32(start32); // Moves ix to lead of surrogate pair, if needed.

+ srci = ci->getIndex();

+ copyLimit = srci;

+ while (srci<limit32) {

+ UChar32 c = ci->next32PostInc();

+ int32_t len = U16_LENGTH(c);

+ if (desti+len <= destCapacity) {

+ U16_APPEND_UNSAFE(dest, desti, c);

+ copyLimit = srci+len;

+ } else {

+ desti += len;

+ *status = U_BUFFER_OVERFLOW_ERROR;

+ }

+ srci += len;

+ }

+ charIterTextAccess(ut, copyLimit, TRUE);

+ u_terminateUChars(dest, destCapacity, desti, status);

+ return desti;

+static const struct UTextFuncs charIterFuncs =

+ sizeof(UTextFuncs),

+ 0, 0, 0, // Reserved alignment padding

+ charIterTextClone,

+ charIterTextLength,

+ charIterTextAccess,

+ charIterTextExtract,

+ NULL, // Replace

+ NULL, // Copy

+ NULL, // MapOffsetToNative,

+ NULL, // MapIndexToUTF16,

+ charIterTextClose,

+ NULL, // spare 1

+ NULL, // spare 2

+ NULL // spare 3

+};

+U_CDECL_END

+U_CAPI UText * U_EXPORT2

+utext_openCharacterIterator(UText *ut, CharacterIterator *ci, UErrorCode *status) {

+ if (U_FAILURE(*status)) {

+ return NULL;

+ }

+ if (ci->startIndex() > 0) {

+ // No support for CharacterIterators that do not start indexing from zero.

+ *status = U_UNSUPPORTED_ERROR;

+ return NULL;

+ }

+ // Extra space in UText for 2 buffers of CIBufSize UChars each.

+ int32_t extraSpace = 2 * CIBufSize * sizeof(UChar);

+ ut = utext_setup(ut, extraSpace, status);

+ if (U_SUCCESS(*status)) {

+ ut->pFuncs = &charIterFuncs;

+ ut->context = ci;

+ ut->providerProperties = 0;

+ ut->a = ci->endIndex(); // Length of text

+ ut->p = ut->pExtra; // First buffer

+ ut->b = -1; // Native index of first buffer contents

+ ut->q = (UChar*)ut->pExtra+CIBufSize; // Second buffer

+ ut->c = -1; // Native index of second buffer contents

+ // Initialize current chunk contents to be empty.

+ // First access will fault something in.

+ // Note: The initial nativeStart and chunkOffset must sum to zero

+ // so that getNativeIndex() will correctly compute to zero

+ // if no call to Access() has ever been made. They can't be both

+ // zero without Access() thinking that the chunk is valid.

+ ut->chunkContents = (UChar *)ut->p;

+ ut->chunkNativeStart = -1;

+ ut->chunkOffset = 1;

+ ut->chunkNativeLimit = 0;

+ ut->chunkLength = 0;

+ ut->nativeIndexingLimit = ut->chunkOffset; // enables native indexing

+ }

+ return ut;

Property changes on: icu46/source/common/utext.cpp

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu46/source/common/ustrtrns.c ('k') | icu46/source/common/utf_impl.c » ('j') | no next file with comments »