icu46/source/common/unistr.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/common/unistr.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 ******************************************************************************

	3 * Copyright (C) 1999-2010, International Business Machines Corporation and *

	4 * others. All Rights Reserved. *

	5 ******************************************************************************

	6 *

	7 * File unistr.cpp

	8 *

	9 * Modification History:

	10 *

	11 * Date Name Description

	12 * 09/25/98 stephen Creation.

	13 * 04/20/99 stephen Overhauled per 4/16 code review.

	14 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX

	15 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from

	16 * Replaceable.

	17 * 06/25/01 grhoten Removed the dependency on iostream

	18 ******************************************************************************

	19 */

	20

	21 #include "unicode/utypes.h"

	22 #include "unicode/putil.h"

	23 #include "cstring.h"

	24 #include "cmemory.h"

	25 #include "unicode/ustring.h"

	26 #include "unicode/unistr.h"

	27 #include "uhash.h"

	28 #include "ustr_imp.h"

	29 #include "umutex.h"

	30

	31 #if 0

	32

	33 #if U_IOSTREAM_SOURCE >= 199711

	34 #include <iostream>

	35 using namespace std;

	36 #elif U_IOSTREAM_SOURCE >= 198506

	37 #include <iostream.h>

	38 #endif

	39

	40 //DEBUGGING

	41 void

	42 print(const UnicodeString& s,

	43 const char *name)

	44 {

	45 UChar c;

	46 cout << name << ":\|";

	47 for(int i = 0; i < s.length(); ++i) {

	48 c = s[i];

	49 if(c>= 0x007E \|\| c < 0x0020)

	50 cout << "[0x" << hex << s[i] << "]";

	51 else

	52 cout << (char) s[i];

	53 }

	54 cout << '\|' << endl;

	55 }

	56

	57 void

	58 print(const UChar *s,

	59 int32_t len,

	60 const char *name)

	61 {

	62 UChar c;

	63 cout << name << ":\|";

	64 for(int i = 0; i < len; ++i) {

	65 c = s[i];

	66 if(c>= 0x007E \|\| c < 0x0020)

	67 cout << "[0x" << hex << s[i] << "]";

	68 else

	69 cout << (char) s[i];

	70 }

	71 cout << '\|' << endl;

	72 }

	73 // END DEBUGGING

	74 #endif

	75

	76 // Local function definitions for now

	77

	78 // need to copy areas that may overlap

	79 static

	80 inline void

	81 us_arrayCopy(const UChar *src, int32_t srcStart,

	82 UChar *dst, int32_t dstStart, int32_t count)

	83 {

	84 if(count>0) {

	85 uprv_memmove(dst+dstStart, src+srcStart, (size_t)(countsizeof(src)));

	86 }

	87 }

	88

	89 // u_unescapeAt() callback to get a UChar from a UnicodeString

	90 U_CDECL_BEGIN

	91 static UChar U_CALLCONV

	92 UnicodeString_charAt(int32_t offset, void *context) {

	93 return ((U_NAMESPACE_QUALIFIER UnicodeString*) context)->charAt(offset);

	94 }

	95 U_CDECL_END

	96

	97 U_NAMESPACE_BEGIN

	98

	99 /* The Replaceable virtual destructor can't be defined in the header

	100 due to how AIX works with multiple definitions of virtual functions.

	101 */

	102 Replaceable::~Replaceable() {}

	103 Replaceable::Replaceable() {}

	104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)

	105

	106 UnicodeString U_EXPORT2

	107 operator+ (const UnicodeString &s1, const UnicodeString &s2) {

	108 return

	109 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).

	110 append(s1).

	111 append(s2);

	112 }

	113

	114 //========================================

	115 // Reference Counting functions, put at top of file so that optimizing compilers

	116 // have a chance to automatically inline.

	117 //========================================

	118

	119 void

	120 UnicodeString::addRef()

	121 { umtx_atomic_inc((int32_t *)fUnion.fFields.fArray - 1);}

	122

	123 int32_t

	124 UnicodeString::removeRef()

	125 { return umtx_atomic_dec((int32_t *)fUnion.fFields.fArray - 1);}

	126

	127 int32_t

	128 UnicodeString::refCount() const

	129 {

	130 umtx_lock(NULL);

	131 // Note: without the lock to force a memory barrier, we might see a very

	132 // stale value on some multi-processor systems.

	133 int32_t count = ((int32_t )fUnion.fFields.fArray - 1);

	134 umtx_unlock(NULL);

	135 return count;

	136 }

	137

	138 void

	139 UnicodeString::releaseArray() {

	140 if((fFlags & kRefCounted) && removeRef() == 0) {

	141 uprv_free((int32_t *)fUnion.fFields.fArray - 1);

	142 }

	143 }

	144

	145

	146

	147 //========================================

	148 // Constructors

	149 //========================================

	150 UnicodeString::UnicodeString()

	151 : fShortLength(0),

	152 fFlags(kShortString)

	153 {}

	154

	155 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)

	156 : fShortLength(0),

	157 fFlags(0)

	158 {

	159 if(count <= 0 \|\| (uint32_t)c > 0x10ffff) {

	160 // just allocate and do not do anything else

	161 allocate(capacity);

	162 } else {

	163 // count > 0, allocate and fill the new string with count c's

	164 int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount;

	165 if(capacity < length) {

	166 capacity = length;

	167 }

	168 if(allocate(capacity)) {

	169 UChar *array = getArrayStart();

	170 int32_t i = 0;

	171

	172 // fill the new string with c

	173 if(unitCount == 1) {

	174 // fill with length UChars

	175 while(i < length) {

	176 array[i++] = (UChar)c;

	177 }

	178 } else {

	179 // get the code units for c

	180 UChar units[UTF_MAX_CHAR_LENGTH];

	181 UTF_APPEND_CHAR_UNSAFE(units, i, c);

	182

	183 // now it must be i==unitCount

	184 i = 0;

	185

	186 // for Unicode, unitCount can only be 1, 2, 3, or 4

	187 // 1 is handled above

	188 while(i < length) {

	189 int32_t unitIdx = 0;

	190 while(unitIdx < unitCount) {

	191 array[i++]=units[unitIdx++];

	192 }

	193 }

	194 }

	195 }

	196 setLength(length);

	197 }

	198 }

	199

	200 UnicodeString::UnicodeString(UChar ch)

	201 : fShortLength(1),

	202 fFlags(kShortString)

	203 {

	204 fUnion.fStackBuffer[0] = ch;

	205 }

	206

	207 UnicodeString::UnicodeString(UChar32 ch)

	208 : fShortLength(0),

	209 fFlags(kShortString)

	210 {

	211 int32_t i = 0;

	212 UBool isError = FALSE;

	213 U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);

	214 fShortLength = (int8_t)i;

	215 }

	216

	217 UnicodeString::UnicodeString(const UChar *text)

	218 : fShortLength(0),

	219 fFlags(kShortString)

	220 {

	221 doReplace(0, 0, text, 0, -1);

	222 }

	223

	224 UnicodeString::UnicodeString(const UChar *text,

	225 int32_t textLength)

	226 : fShortLength(0),

	227 fFlags(kShortString)

	228 {

	229 doReplace(0, 0, text, 0, textLength);

	230 }

	231

	232 UnicodeString::UnicodeString(UBool isTerminated,

	233 const UChar *text,

	234 int32_t textLength)

	235 : fShortLength(0),

	236 fFlags(kReadonlyAlias)

	237 {

	238 if(text == NULL) {

	239 // treat as an empty string, do not alias

	240 setToEmpty();

	241 } else if(textLength < -1 \|\|

	242 (textLength == -1 && !isTerminated) \|\|

	243 (textLength >= 0 && isTerminated && text[textLength] != 0)

	244 ) {

	245 setToBogus();

	246 } else {

	247 if(textLength == -1) {

	248 // text is terminated, or else it would have failed the above test

	249 textLength = u_strlen(text);

	250 }

	251 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLeng th);

	252 }

	253 }

	254

	255 UnicodeString::UnicodeString(UChar *buff,

	256 int32_t buffLength,

	257 int32_t buffCapacity)

	258 : fShortLength(0),

	259 fFlags(kWritableAlias)

	260 {

	261 if(buff == NULL) {

	262 // treat as an empty string, do not alias

	263 setToEmpty();

	264 } else if(buffLength < -1 \|\| buffCapacity < 0 \|\| buffLength > buffCapacity) {

	265 setToBogus();

	266 } else {

	267 if(buffLength == -1) {

	268 // fLength = u_strlen(buff); but do not look beyond buffCapacity

	269 const UChar p = buff, limit = buff + buffCapacity;

	270 while(p != limit && *p != 0) {

	271 ++p;

	272 }

	273 buffLength = (int32_t)(p - buff);

	274 }

	275 setArray(buff, buffLength, buffCapacity);

	276 }

	277 }

	278

	279 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)

	280 : fShortLength(0),

	281 fFlags(kShortString)

	282 {

	283 if(src==NULL) {

	284 // treat as an empty string

	285 } else {

	286 if(length<0) {

	287 length=(int32_t)uprv_strlen(src);

	288 }

	289 if(cloneArrayIfNeeded(length, length, FALSE)) {

	290 u_charsToUChars(src, getArrayStart(), length);

	291 setLength(length);

	292 } else {

	293 setToBogus();

	294 }

	295 }

	296 }

	297

	298 #if U_CHARSET_IS_UTF8

	299

	300 UnicodeString::UnicodeString(const char *codepageData)

	301 : fShortLength(0),

	302 fFlags(kShortString) {

	303 if(codepageData != 0) {

	304 setToUTF8(codepageData);

	305 }

	306 }

	307

	308 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)

	309 : fShortLength(0),

	310 fFlags(kShortString) {

	311 // if there's nothing to convert, do nothing

	312 if(codepageData == 0 \|\| dataLength == 0 \|\| dataLength < -1) {

	313 return;

	314 }

	315 if(dataLength == -1) {

	316 dataLength = (int32_t)uprv_strlen(codepageData);

	317 }

	318 setToUTF8(StringPiece(codepageData, dataLength));

	319 }

	320

	321 // else see unistr_cnv.cpp

	322 #endif

	323

	324 UnicodeString::UnicodeString(const UnicodeString& that)

	325 : Replaceable(),

	326 fShortLength(0),

	327 fFlags(kShortString)

	328 {

	329 copyFrom(that);

	330 }

	331

	332 UnicodeString::UnicodeString(const UnicodeString& that,

	333 int32_t srcStart)

	334 : Replaceable(),

	335 fShortLength(0),

	336 fFlags(kShortString)

	337 {

	338 setTo(that, srcStart);

	339 }

	340

	341 UnicodeString::UnicodeString(const UnicodeString& that,

	342 int32_t srcStart,

	343 int32_t srcLength)

	344 : Replaceable(),

	345 fShortLength(0),

	346 fFlags(kShortString)

	347 {

	348 setTo(that, srcStart, srcLength);

	349 }

	350

	351 // Replaceable base class clone() default implementation, does not clone

	352 Replaceable *

	353 Replaceable::clone() const {

	354 return NULL;

	355 }

	356

	357 // UnicodeString overrides clone() with a real implementation

	358 Replaceable *

	359 UnicodeString::clone() const {

	360 return new UnicodeString(*this);

	361 }

	362

	363 //========================================

	364 // array allocation

	365 //========================================

	366

	367 UBool

	368 UnicodeString::allocate(int32_t capacity) {

	369 if(capacity <= US_STACKBUF_SIZE) {

	370 fFlags = kShortString;

	371 } else {

	372 // count bytes for the refCounter and the string capacity, and

	373 // round up to a multiple of 16; then divide by 4 and allocate int32_t's

	374 // to be safely aligned for the refCount

	375 // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedB uffer()

	376 int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHA R + 15) & ~15) >> 2);

	377 int32_t array = (int32_t) uprv_malloc( sizeof(int32_t) * words );

	378 if(array != 0) {

	379 // set initial refCount and point behind the refCount

	380 *array++ = 1;

	381

	382 // have fArray point to the first UChar

	383 fUnion.fFields.fArray = (UChar *)array;

	384 fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SI ZEOF_UCHAR));

	385 fFlags = kLongString;

	386 } else {

	387 fShortLength = 0;

	388 fUnion.fFields.fArray = 0;

	389 fUnion.fFields.fCapacity = 0;

	390 fFlags = kIsBogus;

	391 return FALSE;

	392 }

	393 }

	394 return TRUE;

	395 }

	396

	397 //========================================

	398 // Destructor

	399 //========================================

	400 UnicodeString::~UnicodeString()

	401 {

	402 releaseArray();

	403 }

	404

	405 //========================================

	406 // Factory methods

	407 //========================================

	408

	409 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {

	410 UnicodeString result;

	411 result.setToUTF8(utf8);

	412 return result;

	413 }

	414

	415 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {

	416 UnicodeString result;

	417 int32_t capacity;

	418 // Most UTF-32 strings will be BMP-only and result in a same-length

	419 // UTF-16 string. We overestimate the capacity just slightly,

	420 // just in case there are a few supplementary characters.

	421 if(length <= US_STACKBUF_SIZE) {

	422 capacity = US_STACKBUF_SIZE;

	423 } else {

	424 capacity = length + (length >> 4) + 4;

	425 }

	426 do {

	427 UChar *utf16 = result.getBuffer(capacity);

	428 int32_t length16;

	429 UErrorCode errorCode = U_ZERO_ERROR;

	430 u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,

	431 utf32, length,

	432 0xfffd, // Substitution character.

	433 NULL, // Don't care about number of substitutions.

	434 &errorCode);

	435 result.releaseBuffer(length16);

	436 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {

	437 capacity = length16 + 1; // +1 for the terminating NUL.

	438 continue;

	439 } else if(U_FAILURE(errorCode)) {

	440 result.setToBogus();

	441 }

	442 break;

	443 } while(TRUE);

	444 return result;

	445 }

	446

	447 //========================================

	448 // Assignment

	449 //========================================

	450

	451 UnicodeString &

	452 UnicodeString::operator=(const UnicodeString &src) {

	453 return copyFrom(src);

	454 }

	455

	456 UnicodeString &

	457 UnicodeString::fastCopyFrom(const UnicodeString &src) {

	458 return copyFrom(src, TRUE);

	459 }

	460

	461 UnicodeString &

	462 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {

	463 // if assigning to ourselves, do nothing

	464 if(this == 0 \|\| this == &src) {

	465 return *this;

	466 }

	467

	468 // is the right side bogus?

	469 if(&src == 0 \|\| src.isBogus()) {

	470 setToBogus();

	471 return *this;

	472 }

	473

	474 // delete the current contents

	475 releaseArray();

	476

	477 if(src.isEmpty()) {

	478 // empty string - use the stack buffer

	479 setToEmpty();

	480 return *this;

	481 }

	482

	483 // we always copy the length

	484 int32_t srcLength = src.length();

	485 setLength(srcLength);

	486

	487 // fLength>0 and not an "open" src.getBuffer(minCapacity)

	488 switch(src.fFlags) {

	489 case kShortString:

	490 // short string using the stack buffer, do the same

	491 fFlags = kShortString;

	492 uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZE OF_UCHAR);

	493 break;

	494 case kLongString:

	495 // src uses a refCounted string buffer, use that buffer with refCount

	496 // src is const, use a cast - we don't really change it

	497 ((UnicodeString &)src).addRef();

	498 // copy all fields, share the reference-counted buffer

	499 fUnion.fFields.fArray = src.fUnion.fFields.fArray;

	500 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;

	501 fFlags = src.fFlags;

	502 break;

	503 case kReadonlyAlias:

	504 if(fastCopy) {

	505 // src is a readonly alias, do the same

	506 // -> maintain the readonly alias as such

	507 fUnion.fFields.fArray = src.fUnion.fFields.fArray;

	508 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;

	509 fFlags = src.fFlags;

	510 break;

	511 }

	512 // else if(!fastCopy) fall through to case kWritableAlias

	513 // -> allocate a new buffer and copy the contents

	514 case kWritableAlias:

	515 // src is a writable alias; we make a copy of that instead

	516 if(allocate(srcLength)) {

	517 uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCH AR);

	518 break;

	519 }

	520 // if there is not enough memory, then fall through to setting to bogus

	521 default:

	522 // if src is bogus, set ourselves to bogus

	523 // do not call setToBogus() here because fArray and fFlags are not consisten t here

	524 fShortLength = 0;

	525 fUnion.fFields.fArray = 0;

	526 fUnion.fFields.fCapacity = 0;

	527 fFlags = kIsBogus;

	528 break;

	529 }

	530

	531 return *this;

	532 }

	533

	534 //========================================

	535 // Miscellaneous operations

	536 //========================================

	537

	538 UnicodeString UnicodeString::unescape() const {

	539 UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with ca pacity

	540 const UChar *array = getBuffer();

	541 int32_t len = length();

	542 int32_t prev = 0;

	543 for (int32_t i=0;;) {

	544 if (i == len) {

	545 result.append(array, prev, len - prev);

	546 break;

	547 }

	548 if (array[i++] == 0x5C /'\\'/) {

	549 result.append(array, prev, (i - 1) - prev);

	550 UChar32 c = unescapeAt(i); // advances i

	551 if (c < 0) {

	552 result.remove(); // return empty string

	553 break; // invalid escape sequence

	554 }

	555 result.append(c);

	556 prev = i;

	557 }

	558 }

	559 return result;

	560 }

	561

	562 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {

	563 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);

	564 }

	565

	566 //========================================

	567 // Read-only implementation

	568 //========================================

	569 int8_t

	570 UnicodeString::doCompare( int32_t start,

	571 int32_t length,

	572 const UChar *srcChars,

	573 int32_t srcStart,

	574 int32_t srcLength) const

	575 {

	576 // compare illegal string values

	577 // treat const UChar *srcChars==NULL as an empty string

	578 if(isBogus()) {

	579 return -1;

	580 }

	581

	582 // pin indices to legal values

	583 pinIndices(start, length);

	584

	585 if(srcChars == NULL) {

	586 srcStart = srcLength = 0;

	587 }

	588

	589 // get the correct pointer

	590 const UChar *chars = getArrayStart();

	591

	592 chars += start;

	593 srcChars += srcStart;

	594

	595 int32_t minLength;

	596 int8_t lengthResult;

	597

	598 // get the srcLength if necessary

	599 if(srcLength < 0) {

	600 srcLength = u_strlen(srcChars + srcStart);

	601 }

	602

	603 // are we comparing different lengths?

	604 if(length != srcLength) {

	605 if(length < srcLength) {

	606 minLength = length;

	607 lengthResult = -1;

	608 } else {

	609 minLength = srcLength;

	610 lengthResult = 1;

	611 }

	612 } else {

	613 minLength = length;

	614 lengthResult = 0;

	615 }

	616

	617 /*

	618 * note that uprv_memcmp() returns an int but we return an int8_t;

	619 * we need to take care not to truncate the result -

	620 * one way to do this is to right-shift the value to

	621 * move the sign bit into the lower 8 bits and making sure that this

	622 * does not become 0 itself

	623 */

	624

	625 if(minLength > 0 && chars != srcChars) {

	626 int32_t result;

	627

	628 # if U_IS_BIG_ENDIAN

	629 // big-endian: byte comparison works

	630 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));

	631 if(result != 0) {

	632 return (int8_t)(result >> 15 \| 1);

	633 }

	634 # else

	635 // little-endian: compare UChar units

	636 do {

	637 result = ((int32_t)(chars++) - (int32_t)(srcChars++));

	638 if(result != 0) {

	639 return (int8_t)(result >> 15 \| 1);

	640 }

	641 } while(--minLength > 0);

	642 # endif

	643 }

	644 return lengthResult;

	645 }

	646

	647 /* String compare in code point order - doCompare() compares in code unit order. */

	648 int8_t

	649 UnicodeString::doCompareCodePointOrder(int32_t start,

	650 int32_t length,

	651 const UChar *srcChars,

	652 int32_t srcStart,

	653 int32_t srcLength) const

	654 {

	655 // compare illegal string values

	656 // treat const UChar *srcChars==NULL as an empty string

	657 if(isBogus()) {

	658 return -1;

	659 }

	660

	661 // pin indices to legal values

	662 pinIndices(start, length);

	663

	664 if(srcChars == NULL) {

	665 srcStart = srcLength = 0;

	666 }

	667

	668 int32_t diff = uprv_strCompare(getArrayStart() + start, length, srcChars + src Start, srcLength, FALSE, TRUE);

	669 /* translate the 32-bit result into an 8-bit one */

	670 if(diff!=0) {

	671 return (int8_t)(diff >> 15 \| 1);

	672 } else {

	673 return 0;

	674 }

	675 }

	676

	677 int32_t

	678 UnicodeString::getLength() const {

	679 return length();

	680 }

	681

	682 UChar

	683 UnicodeString::getCharAt(int32_t offset) const {

	684 return charAt(offset);

	685 }

	686

	687 UChar32

	688 UnicodeString::getChar32At(int32_t offset) const {

	689 return char32At(offset);

	690 }

	691

	692 int32_t

	693 UnicodeString::countChar32(int32_t start, int32_t length) const {

	694 pinIndices(start, length);

	695 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL

	696 return u_countChar32(getArrayStart()+start, length);

	697 }

	698

	699 UBool

	700 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {

	701 pinIndices(start, length);

	702 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL

	703 return u_strHasMoreChar32Than(getArrayStart()+start, length, number);

	704 }

	705

	706 int32_t

	707 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {

	708 // pin index

	709 int32_t len = length();

	710 if(index<0) {

	711 index=0;

	712 } else if(index>len) {

	713 index=len;

	714 }

	715

	716 const UChar *array = getArrayStart();

	717 if(delta>0) {

	718 UTF_FWD_N(array, index, len, delta);

	719 } else {

	720 UTF_BACK_N(array, 0, index, -delta);

	721 }

	722

	723 return index;

	724 }

	725

	726 void

	727 UnicodeString::doExtract(int32_t start,

	728 int32_t length,

	729 UChar *dst,

	730 int32_t dstStart) const

	731 {

	732 // pin indices to legal values

	733 pinIndices(start, length);

	734

	735 // do not copy anything if we alias dst itself

	736 const UChar *array = getArrayStart();

	737 if(array + start != dst + dstStart) {

	738 us_arrayCopy(array, start, dst, dstStart, length);

	739 }

	740 }

	741

	742 int32_t

	743 UnicodeString::extract(UChar *dest, int32_t destCapacity,

	744 UErrorCode &errorCode) const {

	745 int32_t len = length();

	746 if(U_SUCCESS(errorCode)) {

	747 if(isBogus() \|\| destCapacity<0 \|\| (destCapacity>0 && dest==0)) {

	748 errorCode=U_ILLEGAL_ARGUMENT_ERROR;

	749 } else {

	750 const UChar *array = getArrayStart();

	751 if(len>0 && len<=destCapacity && array!=dest) {

	752 uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);

	753 }

	754 return u_terminateUChars(dest, destCapacity, len, &errorCode);

	755 }

	756 }

	757

	758 return len;

	759 }

	760

	761 int32_t

	762 UnicodeString::extract(int32_t start,

	763 int32_t length,

	764 char *target,

	765 int32_t targetCapacity,

	766 enum EInvariant) const

	767 {

	768 // if the arguments are illegal, then do nothing

	769 if(targetCapacity < 0 \|\| (targetCapacity > 0 && target == NULL)) {

	770 return 0;

	771 }

	772

	773 // pin the indices to legal values

	774 pinIndices(start, length);

	775

	776 if(length <= targetCapacity) {

	777 u_UCharsToChars(getArrayStart() + start, target, length);

	778 }

	779 UErrorCode status = U_ZERO_ERROR;

	780 return u_terminateChars(target, targetCapacity, length, &status);

	781 }

	782

	783 UnicodeString

	784 UnicodeString::tempSubString(int32_t start, int32_t len) const {

	785 pinIndices(start, len);

	786 const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer

	787 if(array==NULL) {

	788 array=fUnion.fStackBuffer; // anything not NULL because that would make an empty string

	789 len=-2; // bogus result string

	790 }

	791 return UnicodeString(FALSE, array + start, len);

	792 }

	793

	794 int32_t

	795 UnicodeString::toUTF8(int32_t start, int32_t len,

	796 char *target, int32_t capacity) const {

	797 pinIndices(start, len);

	798 int32_t length8;

	799 UErrorCode errorCode = U_ZERO_ERROR;

	800 u_strToUTF8WithSub(target, capacity, &length8,

	801 getBuffer() + start, len,

	802 0xFFFD, // Standard substitution character.

	803 NULL, // Don't care about number of substitutions.

	804 &errorCode);

	805 return length8;

	806 }

	807

	808 #if U_CHARSET_IS_UTF8

	809

	810 int32_t

	811 UnicodeString::extract(int32_t start, int32_t len,

	812 char *target, uint32_t dstSize) const {

	813 // if the arguments are illegal, then do nothing

	814 if(/dstSize < 0 \|\| /(dstSize > 0 && target == 0)) {

	815 return 0;

	816 }

	817 return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0 x7fffffff);

	818 }

	819

	820 // else see unistr_cnv.cpp

	821 #endif

	822

	823 void

	824 UnicodeString::extractBetween(int32_t start,

	825 int32_t limit,

	826 UnicodeString& target) const {

	827 pinIndex(start);

	828 pinIndex(limit);

	829 doExtract(start, limit - start, target);

	830 }

	831

	832 // When converting from UTF-16 to UTF-8, the result will have at most 3 times

	833 // as many bytes as the source has UChars.

	834 // The "worst cases" are writing systems like Indic, Thai and CJK with

	835 // 3:1 bytes:UChars.

	836 void

	837 UnicodeString::toUTF8(ByteSink &sink) const {

	838 int32_t length16 = length();

	839 if(length16 != 0) {

	840 char stackBuffer[1024];

	841 int32_t capacity = (int32_t)sizeof(stackBuffer);

	842 UBool utf8IsOwned = FALSE;

	843 char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,

	844 3*length16,

	845 stackBuffer, capacity,

	846 &capacity);

	847 int32_t length8 = 0;

	848 UErrorCode errorCode = U_ZERO_ERROR;

	849 u_strToUTF8WithSub(utf8, capacity, &length8,

	850 getBuffer(), length16,

	851 0xFFFD, // Standard substitution character.

	852 NULL, // Don't care about number of substitutions.

	853 &errorCode);

	854 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {

	855 utf8 = (char *)uprv_malloc(length8);

	856 if(utf8 != NULL) {

	857 utf8IsOwned = TRUE;

	858 errorCode = U_ZERO_ERROR;

	859 u_strToUTF8WithSub(utf8, length8, &length8,

	860 getBuffer(), length16,

	861 0xFFFD, // Standard substitution character.

	862 NULL, // Don't care about number of substitutions.

	863 &errorCode);

	864 } else {

	865 errorCode = U_MEMORY_ALLOCATION_ERROR;

	866 }

	867 }

	868 if(U_SUCCESS(errorCode)) {

	869 sink.Append(utf8, length8);

	870 sink.Flush();

	871 }

	872 if(utf8IsOwned) {

	873 uprv_free(utf8);

	874 }

	875 }

	876 }

	877

	878 int32_t

	879 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {

	880 int32_t length32=0;

	881 if(U_SUCCESS(errorCode)) {

	882 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.

	883 u_strToUTF32WithSub(utf32, capacity, &length32,

	884 getBuffer(), length(),

	885 0xfffd, // Substitution character.

	886 NULL, // Don't care about number of substitutions.

	887 &errorCode);

	888 }

	889 return length32;

	890 }

	891

	892 int32_t

	893 UnicodeString::indexOf(const UChar *srcChars,

	894 int32_t srcStart,

	895 int32_t srcLength,

	896 int32_t start,

	897 int32_t length) const

	898 {

	899 if(isBogus() \|\| srcChars == 0 \|\| srcStart < 0 \|\| srcLength == 0) {

	900 return -1;

	901 }

	902

	903 // UnicodeString does not find empty substrings

	904 if(srcLength < 0 && srcChars[srcStart] == 0) {

	905 return -1;

	906 }

	907

	908 // get the indices within bounds

	909 pinIndices(start, length);

	910

	911 // find the first occurrence of the substring

	912 const UChar *array = getArrayStart();

	913 const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart , srcLength);

	914 if(match == NULL) {

	915 return -1;

	916 } else {

	917 return (int32_t)(match - array);

	918 }

	919 }

	920

	921 int32_t

	922 UnicodeString::doIndexOf(UChar c,

	923 int32_t start,

	924 int32_t length) const

	925 {

	926 // pin indices

	927 pinIndices(start, length);

	928

	929 // find the first occurrence of c

	930 const UChar *array = getArrayStart();

	931 const UChar *match = u_memchr(array + start, c, length);

	932 if(match == NULL) {

	933 return -1;

	934 } else {

	935 return (int32_t)(match - array);

	936 }

	937 }

	938

	939 int32_t

	940 UnicodeString::doIndexOf(UChar32 c,

	941 int32_t start,

	942 int32_t length) const {

	943 // pin indices

	944 pinIndices(start, length);

	945

	946 // find the first occurrence of c

	947 const UChar *array = getArrayStart();

	948 const UChar *match = u_memchr32(array + start, c, length);

	949 if(match == NULL) {

	950 return -1;

	951 } else {

	952 return (int32_t)(match - array);

	953 }

	954 }

	955

	956 int32_t

	957 UnicodeString::lastIndexOf(const UChar *srcChars,

	958 int32_t srcStart,

	959 int32_t srcLength,

	960 int32_t start,

	961 int32_t length) const

	962 {

	963 if(isBogus() \|\| srcChars == 0 \|\| srcStart < 0 \|\| srcLength == 0) {

	964 return -1;

	965 }

	966

	967 // UnicodeString does not find empty substrings

	968 if(srcLength < 0 && srcChars[srcStart] == 0) {

	969 return -1;

	970 }

	971

	972 // get the indices within bounds

	973 pinIndices(start, length);

	974

	975 // find the last occurrence of the substring

	976 const UChar *array = getArrayStart();

	977 const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);

	978 if(match == NULL) {

	979 return -1;

	980 } else {

	981 return (int32_t)(match - array);

	982 }

	983 }

	984

	985 int32_t

	986 UnicodeString::doLastIndexOf(UChar c,

	987 int32_t start,

	988 int32_t length) const

	989 {

	990 if(isBogus()) {

	991 return -1;

	992 }

	993

	994 // pin indices

	995 pinIndices(start, length);

	996

	997 // find the last occurrence of c

	998 const UChar *array = getArrayStart();

	999 const UChar *match = u_memrchr(array + start, c, length);

	1000 if(match == NULL) {

	1001 return -1;

	1002 } else {

	1003 return (int32_t)(match - array);

	1004 }

	1005 }

	1006

	1007 int32_t

	1008 UnicodeString::doLastIndexOf(UChar32 c,

	1009 int32_t start,

	1010 int32_t length) const {

	1011 // pin indices

	1012 pinIndices(start, length);

	1013

	1014 // find the last occurrence of c

	1015 const UChar *array = getArrayStart();

	1016 const UChar *match = u_memrchr32(array + start, c, length);

	1017 if(match == NULL) {

	1018 return -1;

	1019 } else {

	1020 return (int32_t)(match - array);

	1021 }

	1022 }

	1023

	1024 //========================================

	1025 // Write implementation

	1026 //========================================

	1027

	1028 UnicodeString&

	1029 UnicodeString::findAndReplace(int32_t start,

	1030 int32_t length,

	1031 const UnicodeString& oldText,

	1032 int32_t oldStart,

	1033 int32_t oldLength,

	1034 const UnicodeString& newText,

	1035 int32_t newStart,

	1036 int32_t newLength)

	1037 {

	1038 if(isBogus() \|\| oldText.isBogus() \|\| newText.isBogus()) {

	1039 return *this;

	1040 }

	1041

	1042 pinIndices(start, length);

	1043 oldText.pinIndices(oldStart, oldLength);

	1044 newText.pinIndices(newStart, newLength);

	1045

	1046 if(oldLength == 0) {

	1047 return *this;

	1048 }

	1049

	1050 while(length > 0 && length >= oldLength) {

	1051 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);

	1052 if(pos < 0) {

	1053 // no more oldText's here: done

	1054 break;

	1055 } else {

	1056 // we found oldText, replace it by newText and go beyond it

	1057 replace(pos, oldLength, newText, newStart, newLength);

	1058 length -= pos + oldLength - start;

	1059 start = pos + newLength;

	1060 }

	1061 }

	1062

	1063 return *this;

	1064 }

	1065

	1066

	1067 void

	1068 UnicodeString::setToBogus()

	1069 {

	1070 releaseArray();

	1071

	1072 fShortLength = 0;

	1073 fUnion.fFields.fArray = 0;

	1074 fUnion.fFields.fCapacity = 0;

	1075 fFlags = kIsBogus;

	1076 }

	1077

	1078 // turn a bogus string into an empty one

	1079 void

	1080 UnicodeString::unBogus() {

	1081 if(fFlags & kIsBogus) {

	1082 setToEmpty();

	1083 }

	1084 }

	1085

	1086 // setTo() analogous to the readonly-aliasing constructor with the same signatur e

	1087 UnicodeString &

	1088 UnicodeString::setTo(UBool isTerminated,

	1089 const UChar *text,

	1090 int32_t textLength)

	1091 {

	1092 if(fFlags & kOpenGetBuffer) {

	1093 // do not modify a string that has an "open" getBuffer(minCapacity)

	1094 return *this;

	1095 }

	1096

	1097 if(text == NULL) {

	1098 // treat as an empty string, do not alias

	1099 releaseArray();

	1100 setToEmpty();

	1101 return *this;

	1102 }

	1103

	1104 if( textLength < -1 \|\|

	1105 (textLength == -1 && !isTerminated) \|\|

	1106 (textLength >= 0 && isTerminated && text[textLength] != 0)

	1107 ) {

	1108 setToBogus();

	1109 return *this;

	1110 }

	1111

	1112 releaseArray();

	1113

	1114 if(textLength == -1) {

	1115 // text is terminated, or else it would have failed the above test

	1116 textLength = u_strlen(text);

	1117 }

	1118 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength );

	1119

	1120 fFlags = kReadonlyAlias;

	1121 return *this;

	1122 }

	1123

	1124 // setTo() analogous to the writable-aliasing constructor with the same signatur e

	1125 UnicodeString &

	1126 UnicodeString::setTo(UChar *buffer,

	1127 int32_t buffLength,

	1128 int32_t buffCapacity) {

	1129 if(fFlags & kOpenGetBuffer) {

	1130 // do not modify a string that has an "open" getBuffer(minCapacity)

	1131 return *this;

	1132 }

	1133

	1134 if(buffer == NULL) {

	1135 // treat as an empty string, do not alias

	1136 releaseArray();

	1137 setToEmpty();

	1138 return *this;

	1139 }

	1140

	1141 if(buffLength < -1 \|\| buffCapacity < 0 \|\| buffLength > buffCapacity) {

	1142 setToBogus();

	1143 return *this;

	1144 } else if(buffLength == -1) {

	1145 // buffLength = u_strlen(buff); but do not look beyond buffCapacity

	1146 const UChar p = buffer, limit = buffer + buffCapacity;

	1147 while(p != limit && *p != 0) {

	1148 ++p;

	1149 }

	1150 buffLength = (int32_t)(p - buffer);

	1151 }

	1152

	1153 releaseArray();

	1154

	1155 setArray(buffer, buffLength, buffCapacity);

	1156 fFlags = kWritableAlias;

	1157 return *this;

	1158 }

	1159

	1160 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {

	1161 unBogus();

	1162 int32_t length = utf8.length();

	1163 int32_t capacity;

	1164 // The UTF-16 string will be at most as long as the UTF-8 string.

	1165 if(length <= US_STACKBUF_SIZE) {

	1166 capacity = US_STACKBUF_SIZE;

	1167 } else {

	1168 capacity = length + 1; // +1 for the terminating NUL.

	1169 }

	1170 UChar *utf16 = getBuffer(capacity);

	1171 int32_t length16;

	1172 UErrorCode errorCode = U_ZERO_ERROR;

	1173 u_strFromUTF8WithSub(utf16, getCapacity(), &length16,

	1174 utf8.data(), length,

	1175 0xfffd, // Substitution character.

	1176 NULL, // Don't care about number of substitutions.

	1177 &errorCode);

	1178 releaseBuffer(length16);

	1179 if(U_FAILURE(errorCode)) {

	1180 setToBogus();

	1181 }

	1182 return *this;

	1183 }

	1184

	1185 UnicodeString&

	1186 UnicodeString::setCharAt(int32_t offset,

	1187 UChar c)

	1188 {

	1189 int32_t len = length();

	1190 if(cloneArrayIfNeeded() && len > 0) {

	1191 if(offset < 0) {

	1192 offset = 0;

	1193 } else if(offset >= len) {

	1194 offset = len - 1;

	1195 }

	1196

	1197 getArrayStart()[offset] = c;

	1198 }

	1199 return *this;

	1200 }

	1201

	1202 UnicodeString&

	1203 UnicodeString::doReplace( int32_t start,

	1204 int32_t length,

	1205 const UnicodeString& src,

	1206 int32_t srcStart,

	1207 int32_t srcLength)

	1208 {

	1209 if(!src.isBogus()) {

	1210 // pin the indices to legal values

	1211 src.pinIndices(srcStart, srcLength);

	1212

	1213 // get the characters from src

	1214 // and replace the range in ourselves with them

	1215 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);

	1216 } else {

	1217 // remove the range

	1218 return doReplace(start, length, 0, 0, 0);

	1219 }

	1220 }

	1221

	1222 UnicodeString&

	1223 UnicodeString::doReplace(int32_t start,

	1224 int32_t length,

	1225 const UChar *srcChars,

	1226 int32_t srcStart,

	1227 int32_t srcLength)

	1228 {

	1229 if(!isWritable()) {

	1230 return *this;

	1231 }

	1232

	1233 int32_t oldLength = this->length();

	1234

	1235 // optimize (read-only alias).remove(0, start) and .remove(start, end)

	1236 if((fFlags&kBufferIsReadonly) && srcLength == 0) {

	1237 if(start == 0) {

	1238 // remove prefix by adjusting the array pointer

	1239 pinIndex(length);

	1240 fUnion.fFields.fArray += length;

	1241 fUnion.fFields.fCapacity -= length;

	1242 setLength(oldLength - length);

	1243 return *this;

	1244 } else {

	1245 pinIndex(start);

	1246 if(length >= (oldLength - start)) {

	1247 // remove suffix by reducing the length (like truncate())

	1248 setLength(start);

	1249 fUnion.fFields.fCapacity = start; // not NUL-terminated any more

	1250 return *this;

	1251 }

	1252 }

	1253 }

	1254

	1255 if(srcChars == 0) {

	1256 srcStart = srcLength = 0;

	1257 } else if(srcLength < 0) {

	1258 // get the srcLength if necessary

	1259 srcLength = u_strlen(srcChars + srcStart);

	1260 }

	1261

	1262 // calculate the size of the string after the replace

	1263 int32_t newSize;

	1264

	1265 // optimize append() onto a large-enough, owned string

	1266 if(start >= oldLength) {

	1267 newSize = oldLength + srcLength;

	1268 if(newSize <= getCapacity() && isBufferWritable()) {

	1269 us_arrayCopy(srcChars, srcStart, getArrayStart(), oldLength, srcLength);

	1270 setLength(newSize);

	1271 return *this;

	1272 } else {

	1273 // pin the indices to legal values

	1274 start = oldLength;

	1275 length = 0;

	1276 }

	1277 } else {

	1278 // pin the indices to legal values

	1279 pinIndices(start, length);

	1280

	1281 newSize = oldLength - length + srcLength;

	1282 }

	1283

	1284 // the following may change fArray but will not copy the current contents;

	1285 // therefore we need to keep the current fArray

	1286 UChar oldStackBuffer[US_STACKBUF_SIZE];

	1287 UChar *oldArray;

	1288 if((fFlags&kUsingStackBuffer) && (newSize > US_STACKBUF_SIZE)) {

	1289 // copy the stack buffer contents because it will be overwritten with

	1290 // fUnion.fFields values

	1291 u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);

	1292 oldArray = oldStackBuffer;

	1293 } else {

	1294 oldArray = getArrayStart();

	1295 }

	1296

	1297 // clone our array and allocate a bigger array if needed

	1298 int32_t *bufferToDelete = 0;

	1299 if(!cloneArrayIfNeeded(newSize, newSize + (newSize >> 2) + kGrowSize,

	1300 FALSE, &bufferToDelete)

	1301 ) {

	1302 return *this;

	1303 }

	1304

	1305 // now do the replace

	1306

	1307 UChar *newArray = getArrayStart();

	1308 if(newArray != oldArray) {

	1309 // if fArray changed, then we need to copy everything except what will chang e

	1310 us_arrayCopy(oldArray, 0, newArray, 0, start);

	1311 us_arrayCopy(oldArray, start + length,

	1312 newArray, start + srcLength,

	1313 oldLength - (start + length));

	1314 } else if(length != srcLength) {

	1315 // fArray did not change; copy only the portion that isn't changing, leaving a hole

	1316 us_arrayCopy(oldArray, start + length,

	1317 newArray, start + srcLength,

	1318 oldLength - (start + length));

	1319 }

	1320

	1321 // now fill in the hole with the new string

	1322 us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);

	1323

	1324 setLength(newSize);

	1325

	1326 // delayed delete in case srcChars == fArray when we started, and

	1327 // to keep oldArray alive for the above operations

	1328 if (bufferToDelete) {

	1329 uprv_free(bufferToDelete);

	1330 }

	1331

	1332 return *this;

	1333 }

	1334

	1335 /**

	1336 * Replaceable API

	1337 */

	1338 void

	1339 UnicodeString::handleReplaceBetween(int32_t start,

	1340 int32_t limit,

	1341 const UnicodeString& text) {

	1342 replaceBetween(start, limit, text);

	1343 }

	1344

	1345 /**

	1346 * Replaceable API

	1347 */

	1348 void

	1349 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {

	1350 if (limit <= start) {

	1351 return; // Nothing to do; avoid bogus malloc call

	1352 }

	1353 UChar* text = (UChar) uprv_malloc( sizeof(UChar) (limit - start) );

	1354 // Check to make sure text is not null.

	1355 if (text != NULL) {

	1356 extractBetween(start, limit, text, 0);

	1357 insert(dest, text, 0, limit - start);

	1358 uprv_free(text);

	1359 }

	1360 }

	1361

	1362 /**

	1363 * Replaceable API

	1364 *

	1365 * NOTE: This is for the Replaceable class. There is no rep.cpp,

	1366 * so we implement this function here.

	1367 */

	1368 UBool Replaceable::hasMetaData() const {

	1369 return TRUE;

	1370 }

	1371

	1372 /**

	1373 * Replaceable API

	1374 */

	1375 UBool UnicodeString::hasMetaData() const {

	1376 return FALSE;

	1377 }

	1378

	1379 UnicodeString&

	1380 UnicodeString::doReverse(int32_t start, int32_t length) {

	1381 if(length <= 1 \|\| !cloneArrayIfNeeded()) {

	1382 return *this;

	1383 }

	1384

	1385 // pin the indices to legal values

	1386 pinIndices(start, length);

	1387 if(length <= 1) { // pinIndices() might have shrunk the length

	1388 return *this;

	1389 }

	1390

	1391 UChar *left = getArrayStart() + start;

	1392 UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2)

	1393 UChar swap;

	1394 UBool hasSupplementary = FALSE;

	1395

	1396 // Before the loop we know left<right because length>=2.

	1397 do {

	1398 hasSupplementary \|= (UBool)U16_IS_LEAD(swap = *left);

	1399 hasSupplementary \|= (UBool)U16_IS_LEAD(left++ = right);

	1400 *right-- = swap;

	1401 } while(left < right);

	1402 // Make sure to test the middle code unit of an odd-length string.

	1403 // Redundant if the length is even.

	1404 hasSupplementary \|= (UBool)U16_IS_LEAD(*left);

	1405

	1406 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */

	1407 if(hasSupplementary) {

	1408 UChar swap2;

	1409

	1410 left = getArrayStart() + start;

	1411 right = left + length - 1; // -1 so that we can look at *(left+1) if left<ri ght

	1412 while(left < right) {

	1413 if(U16_IS_TRAIL(swap = left) && U16_IS_LEAD(swap2 = (left + 1))) {

	1414 *left++ = swap2;

	1415 *left++ = swap;

	1416 } else {

	1417 ++left;

	1418 }

	1419 }

	1420 }

	1421

	1422 return *this;

	1423 }

	1424

	1425 UBool

	1426 UnicodeString::padLeading(int32_t targetLength,

	1427 UChar padChar)

	1428 {

	1429 int32_t oldLength = length();

	1430 if(oldLength >= targetLength \|\| !cloneArrayIfNeeded(targetLength)) {

	1431 return FALSE;

	1432 } else {

	1433 // move contents up by padding width

	1434 UChar *array = getArrayStart();

	1435 int32_t start = targetLength - oldLength;

	1436 us_arrayCopy(array, 0, array, start, oldLength);

	1437

	1438 // fill in padding character

	1439 while(--start >= 0) {

	1440 array[start] = padChar;

	1441 }

	1442 setLength(targetLength);

	1443 return TRUE;

	1444 }

	1445 }

	1446

	1447 UBool

	1448 UnicodeString::padTrailing(int32_t targetLength,

	1449 UChar padChar)

	1450 {

	1451 int32_t oldLength = length();

	1452 if(oldLength >= targetLength \|\| !cloneArrayIfNeeded(targetLength)) {

	1453 return FALSE;

	1454 } else {

	1455 // fill in padding character

	1456 UChar *array = getArrayStart();

	1457 int32_t length = targetLength;

	1458 while(--length >= oldLength) {

	1459 array[length] = padChar;

	1460 }

	1461 setLength(targetLength);

	1462 return TRUE;

	1463 }

	1464 }

	1465

	1466 //========================================

	1467 // Hashing

	1468 //========================================

	1469 int32_t

	1470 UnicodeString::doHashCode() const

	1471 {

	1472 /* Delegate hash computation to uhash. This makes UnicodeString

	1473 * hashing consistent with UChar* hashing. */

	1474 int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length());

	1475 if (hashCode == kInvalidHashCode) {

	1476 hashCode = kEmptyHashCode;

	1477 }

	1478 return hashCode;

	1479 }

	1480

	1481 //========================================

	1482 // External Buffer

	1483 //========================================

	1484

	1485 UChar *

	1486 UnicodeString::getBuffer(int32_t minCapacity) {

	1487 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {

	1488 fFlags\|=kOpenGetBuffer;

	1489 fShortLength=0;

	1490 return getArrayStart();

	1491 } else {

	1492 return 0;

	1493 }

	1494 }

	1495

	1496 void

	1497 UnicodeString::releaseBuffer(int32_t newLength) {

	1498 if(fFlags&kOpenGetBuffer && newLength>=-1) {

	1499 // set the new fLength

	1500 int32_t capacity=getCapacity();

	1501 if(newLength==-1) {

	1502 // the new length is the string length, capped by fCapacity

	1503 const UChar array=getArrayStart(), p=array, *limit=array+capacity;

	1504 while(p<limit && *p!=0) {

	1505 ++p;

	1506 }

	1507 newLength=(int32_t)(p-array);

	1508 } else if(newLength>capacity) {

	1509 newLength=capacity;

	1510 }

	1511 setLength(newLength);

	1512 fFlags&=~kOpenGetBuffer;

	1513 }

	1514 }

	1515

	1516 //========================================

	1517 // Miscellaneous

	1518 //========================================

	1519 UBool

	1520 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,

	1521 int32_t growCapacity,

	1522 UBool doCopyArray,

	1523 int32_t **pBufferToDelete,

	1524 UBool forceClone) {

	1525 // default parameters need to be static, therefore

	1526 // the defaults are -1 to have convenience defaults

	1527 if(newCapacity == -1) {

	1528 newCapacity = getCapacity();

	1529 }

	1530

	1531 // while a getBuffer(minCapacity) is "open",

	1532 // prevent any modifications of the string by returning FALSE here

	1533 // if the string is bogus, then only an assignment or similar can revive it

	1534 if(!isWritable()) {

	1535 return FALSE;

	1536 }

	1537

	1538 /*

	1539 * We need to make a copy of the array if

	1540 * the buffer is read-only, or

	1541 * the buffer is refCounted (shared), and refCount>1, or

	1542 * the buffer is too small.

	1543 * Return FALSE if memory could not be allocated.

	1544 */

	1545 if(forceClone \|\|

	1546 fFlags & kBufferIsReadonly \|\|

	1547 (fFlags & kRefCounted && refCount() > 1) \|\|

	1548 newCapacity > getCapacity()

	1549 ) {

	1550 // check growCapacity for default value and use of the stack buffer

	1551 if(growCapacity == -1) {

	1552 growCapacity = newCapacity;

	1553 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE ) {

	1554 growCapacity = US_STACKBUF_SIZE;

	1555 }

	1556

	1557 // save old values

	1558 UChar oldStackBuffer[US_STACKBUF_SIZE];

	1559 UChar *oldArray;

	1560 uint8_t flags = fFlags;

	1561

	1562 if(flags&kUsingStackBuffer) {

	1563 if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {

	1564 // copy the stack buffer contents because it will be overwritten with

	1565 // fUnion.fFields values

	1566 us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);

	1567 oldArray = oldStackBuffer;

	1568 } else {

	1569 oldArray = 0; // no need to copy from stack buffer to itself

	1570 }

	1571 } else {

	1572 oldArray = fUnion.fFields.fArray;

	1573 }

	1574

	1575 // allocate a new array

	1576 if(allocate(growCapacity) \|\|

	1577 (newCapacity < growCapacity && allocate(newCapacity))

	1578 ) {

	1579 if(doCopyArray && oldArray != 0) {

	1580 // copy the contents

	1581 // do not copy more than what fits - it may be smaller than before

	1582 int32_t minLength = length();

	1583 newCapacity = getCapacity();

	1584 if(newCapacity < minLength) {

	1585 minLength = newCapacity;

	1586 setLength(minLength);

	1587 }

	1588 us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);

	1589 } else {

	1590 fShortLength = 0;

	1591 }

	1592

	1593 // release the old array

	1594 if(flags & kRefCounted) {

	1595 // the array is refCounted; decrement and release if 0

	1596 int32_t pRefCount = ((int32_t )oldArray - 1);

	1597 if(umtx_atomic_dec(pRefCount) == 0) {

	1598 if(pBufferToDelete == 0) {

	1599 uprv_free(pRefCount);

	1600 } else {

	1601 // the caller requested to delete it himself

	1602 *pBufferToDelete = pRefCount;

	1603 }

	1604 }

	1605 }

	1606 } else {

	1607 // not enough memory for growCapacity and not even for the smaller newCapa city

	1608 // reset the old values for setToBogus() to release the array

	1609 if(!(flags&kUsingStackBuffer)) {

	1610 fUnion.fFields.fArray = oldArray;

	1611 }

	1612 fFlags = flags;

	1613 setToBogus();

	1614 return FALSE;

	1615 }

	1616 }

	1617 return TRUE;

	1618 }

	1619 U_NAMESPACE_END

	1620

	1621 #ifdef U_STATIC_IMPLEMENTATION

	1622 /*

	1623 This should never be called. It is defined here to make sure that the

	1624 virtual vector deleting destructor is defined within unistr.cpp.

	1625 The vector deleting destructor is already a part of UObject,

	1626 but defining it here makes sure that it is included with this object file.

	1627 This makes sure that static library dependencies are kept to a minimum.

	1628 */

	1629 static void uprv_UnicodeStringDummy(void) {

	1630 U_NAMESPACE_USE

	1631 delete [] (new UnicodeString[2]);

	1632 }

	1633 #endif

OLD	NEW

« no previous file with comments | « icu46/source/common/unisetspan.cpp ('k') | icu46/source/common/unistr_case.cpp » ('j') | no next file with comments »