icu46/source/common/unicode/unistr.h - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/common/unicode/unistr.h

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 **********************************************************************

	3 * Copyright (C) 1998-2010, International Business Machines

	4 * Corporation and others. All Rights Reserved.

	5 **********************************************************************

	6 *

	7 * File unistr.h

	8 *

	9 * Modification History:

	10 *

	11 * Date Name Description

	12 * 09/25/98 stephen Creation.

	13 * 11/11/98 stephen Changed per 11/9 code review.

	14 * 04/20/99 stephen Overhauled per 4/16 code review.

	15 * 11/18/99 aliu Made to inherit from Replaceable. Added method

	16 * handleReplaceBetween(); other methods unchanged.

	17 * 06/25/01 grhoten Remove dependency on iostream.

	18 ******************************************************************************

	19 */

	20

	21 #ifndef UNISTR_H

	22 #define UNISTR_H

	23

	24 /**

	25 * \file

	26 * \brief C++ API: Unicode String

	27 */

	28

	29 #include "unicode/utypes.h"

	30 #include "unicode/rep.h"

	31 #include "unicode/std_string.h"

	32 #include "unicode/stringpiece.h"

	33 #include "unicode/bytestream.h"

	34

	35 struct UConverter; // unicode/ucnv.h

	36 class StringThreadTest;

	37

	38 #ifndef U_COMPARE_CODE_POINT_ORDER

	39 /* see also ustring.h and unorm.h */

	40 /**

	41 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:

	42 * Compare strings in code point order instead of code unit order.

	43 * @stable ICU 2.2

	44 */

	45 #define U_COMPARE_CODE_POINT_ORDER 0x8000

	46 #endif

	47

	48 #ifndef USTRING_H

	49 /**

	50 * \ingroup ustring_ustrlen

	51 */

	52 U_STABLE int32_t U_EXPORT2

	53 u_strlen(const UChar *s);

	54 #endif

	55

	56 U_NAMESPACE_BEGIN

	57

	58 class Locale; // unicode/locid.h

	59 class StringCharacterIterator;

	60 class BreakIterator; // unicode/brkiter.h

	61

	62 /* The <iostream> include has been moved to unicode/ustream.h */

	63

	64 /**

	65 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constru ctor

	66 * which constructs a Unicode string from an invariant-character char * string.

	67 * About invariant characters see utypes.h.

	68 * This constructor has no runtime dependency on conversion code and is

	69 * therefore recommended over ones taking a charset name string

	70 * (where the empty string "" indicates invariant-character conversion).

	71 *

	72 * @stable ICU 3.2

	73 */

	74 #define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant

	75

	76 /**

	77 * Unicode String literals in C++.

	78 * Dependent on the platform properties, different UnicodeString

	79 * constructors should be used to create a UnicodeString object from

	80 * a string literal.

	81 * The macros are defined for maximum performance.

	82 * They work only for strings that contain "invariant characters", i.e.,

	83 * only latin letters, digits, and some punctuation.

	84 * See utypes.h for details.

	85 *

	86 * The string parameter must be a C string literal.

	87 * The length of the string, not including the terminating

	88 * <code>NUL</code>, must be specified as a constant.

	89 * The U_STRING_DECL macro should be invoked exactly once for one

	90 * such string variable before it is used.

	91 * @stable ICU 2.0

	92 */

	93 #if defined(U_DECLARE_UTF16)

	94 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)

	95 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY \|\| ( U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))

	96 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)

	97 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY

	98 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)

	99 #else

	100 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _ length, US_INV)

	101 #endif

	102

	103 /**

	104 * Unicode String literals in C++.

	105 * Dependent on the platform properties, different UnicodeString

	106 * constructors should be used to create a UnicodeString object from

	107 * a string literal.

	108 * The macros are defined for improved performance.

	109 * They work only for strings that contain "invariant characters", i.e.,

	110 * only latin letters, digits, and some punctuation.

	111 * See utypes.h for details.

	112 *

	113 * The string parameter must be a C string literal.

	114 * @stable ICU 2.0

	115 */

	116 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)

	117

	118 /**

	119 * UnicodeString is a string class that stores Unicode characters directly and p rovides

	120 * similar functionality as the Java String and StringBuffer classes.

	121 * It is a concrete implementation of the abstract class Replaceable (for transl iteration).

	122 *

	123 * The UnicodeString class is not suitable for subclassing.

	124 *

	125 * <p>For an overview of Unicode strings in C and C++ see the

	126 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings ch apter</a>.</p>

	127 *

	128 * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.

	129 * A Unicode character may be stored with either one code unit

	130 * (the most common case) or with a matched pair of special code units

	131 * ("surrogates"). The data type for code units is UChar.

	132 * For single-character handling, a Unicode character code <em>point</em> is a v alue

	133 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>

	134 *

	135 * <p>Indexes and offsets into and lengths of strings always count code units, n ot code points.

	136 * This is the same as with multi-byte char* strings in traditional string handl ing.

	137 * Operations on partial strings typically do not test for code point boundaries .

	138 * If necessary, the user needs to take care of such boundaries by testing for t he code unit

	139 * values or by using functions like

	140 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()

	141 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), s ee utf.h).</p>

	142 *

	143 * UnicodeString methods are more lenient with regard to input parameter values

	144 * than other ICU APIs. In particular:

	145 * - If indexes are out of bounds for a UnicodeString object

	146 * (<0 or >length()) then they are "pinned" to the nearest boundary.

	147 * - If primitive string pointer values (e.g., const UChar * or char *)

	148 * for input strings are NULL, then those input string parameters are treated

	149 * as if they pointed to an empty string.

	150 * However, this is <em>not</em> the case for char * parameters for charset na mes

	151 * or other IDs.

	152 * - Most UnicodeString methods do not take a UErrorCode parameter because

	153 * there are usually very few opportunities for failure other than a shortage

	154 * of memory, error codes in low-level C++ string methods would be inconvenien t,

	155 * and the error code as the last parameter (ICU convention) would prevent

	156 * the use of default parameter values.

	157 * Instead, such methods set the UnicodeString into a "bogus" state

	158 * (see isBogus()) if an error occurs.

	159 *

	160 * In string comparisons, two UnicodeString objects that are both "bogus"

	161 * compare equal (to be transitive and prevent endless loops in sorting),

	162 * and a "bogus" string compares less than any non-"bogus" one.

	163 *

	164 * Const UnicodeString methods are thread-safe. Multiple threads can use

	165 * const methods on the same UnicodeString object simultaneously,

	166 * but non-const methods must not be called concurrently (in multiple threads)

	167 * with any other (const or non-const) methods.

	168 *

	169 * Similarly, const UnicodeString & parameters are thread-safe.

	170 * One object may be passed in as such a parameter concurrently in multiple thre ads.

	171 * This includes the const UnicodeString & parameters for

	172 * copy construction, assignment, and cloning.

	173 *

	174 * <p>UnicodeString uses several storage methods.

	175 * String contents can be stored inside the UnicodeString object itself,

	176 * in an allocated and shared buffer, or in an outside buffer that is "aliased".

	177 * Most of this is done transparently, but careful aliasing in particular provid es

	178 * significant performance improvements.

	179 * Also, the internal buffer is accessible via special functions.

	180 * For details see the

	181 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings ch apter</a>.</p>

	182 *

	183 * @see utf.h

	184 * @see CharacterIterator

	185 * @stable ICU 2.0

	186 */

	187 class U_COMMON_API UnicodeString : public Replaceable

	188 {

	189 public:

	190

	191 /**

	192 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) const ructor

	193 * which constructs a Unicode string from an invariant-character char * string .

	194 * Use the macro US_INV instead of the full qualification for this value.

	195 *

	196 * @see US_INV

	197 * @stable ICU 3.2

	198 */

	199 enum EInvariant {

	200 /**

	201 * @see EInvariant

	202 * @stable ICU 3.2

	203 */

	204 kInvariant

	205 };

	206

	207 //========================================

	208 // Read-only operations

	209 //========================================

	210

	211 /* Comparison - bitwise only - for international comparison use collation */

	212

	213 /**

	214 * Equality operator. Performs only bitwise comparison.

	215 * @param text The UnicodeString to compare to this one.

	216 * @return TRUE if <TT>text</TT> contains the same characters as this one,

	217 * FALSE otherwise.

	218 * @stable ICU 2.0

	219 */

	220 inline UBool operator== (const UnicodeString& text) const;

	221

	222 /**

	223 * Inequality operator. Performs only bitwise comparison.

	224 * @param text The UnicodeString to compare to this one.

	225 * @return FALSE if <TT>text</TT> contains the same characters as this one,

	226 * TRUE otherwise.

	227 * @stable ICU 2.0

	228 */

	229 inline UBool operator!= (const UnicodeString& text) const;

	230

	231 /**

	232 * Greater than operator. Performs only bitwise comparison.

	233 * @param text The UnicodeString to compare to this one.

	234 * @return TRUE if the characters in this are bitwise

	235 * greater than the characters in <code>text</code>, FALSE otherwise

	236 * @stable ICU 2.0

	237 */

	238 inline UBool operator> (const UnicodeString& text) const;

	239

	240 /**

	241 * Less than operator. Performs only bitwise comparison.

	242 * @param text The UnicodeString to compare to this one.

	243 * @return TRUE if the characters in this are bitwise

	244 * less than the characters in <code>text</code>, FALSE otherwise

	245 * @stable ICU 2.0

	246 */

	247 inline UBool operator< (const UnicodeString& text) const;

	248

	249 /**

	250 * Greater than or equal operator. Performs only bitwise comparison.

	251 * @param text The UnicodeString to compare to this one.

	252 * @return TRUE if the characters in this are bitwise

	253 * greater than or equal to the characters in <code>text</code>, FALSE otherwi se

	254 * @stable ICU 2.0

	255 */

	256 inline UBool operator>= (const UnicodeString& text) const;

	257

	258 /**

	259 * Less than or equal operator. Performs only bitwise comparison.

	260 * @param text The UnicodeString to compare to this one.

	261 * @return TRUE if the characters in this are bitwise

	262 * less than or equal to the characters in <code>text</code>, FALSE otherwise

	263 * @stable ICU 2.0

	264 */

	265 inline UBool operator<= (const UnicodeString& text) const;

	266

	267 /**

	268 * Compare the characters bitwise in this UnicodeString to

	269 * the characters in <code>text</code>.

	270 * @param text The UnicodeString to compare to this one.

	271 * @return The result of bitwise character comparison: 0 if this

	272 * contains the same characters as <code>text</code>, -1 if the characters in

	273 * this are bitwise less than the characters in <code>text</code>, +1 if the

	274 * characters in this are bitwise greater than the characters

	275 * in <code>text</code>.

	276 * @stable ICU 2.0

	277 */

	278 inline int8_t compare(const UnicodeString& text) const;

	279

	280 /**

	281 * Compare the characters bitwise in the range

	282 * [<TT>start</TT>, <TT>start + length</TT>) with the characters

	283 * in <TT>text</TT>

	284 * @param start the offset at which the compare operation begins

	285 * @param length the number of characters of text to compare.

	286 * @param text the other text to be compared against this string.

	287 * @return The result of bitwise character comparison: 0 if this

	288 * contains the same characters as <code>text</code>, -1 if the characters in

	289 * this are bitwise less than the characters in <code>text</code>, +1 if the

	290 * characters in this are bitwise greater than the characters

	291 * in <code>text</code>.

	292 * @stable ICU 2.0

	293 */

	294 inline int8_t compare(int32_t start,

	295 int32_t length,

	296 const UnicodeString& text) const;

	297

	298 /**

	299 * Compare the characters bitwise in the range

	300 * [<TT>start</TT>, <TT>start + length</TT>) with the characters

	301 * in <TT>srcText</TT> in the range

	302 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

	303 * @param start the offset at which the compare operation begins

	304 * @param length the number of characters in this to compare.

	305 * @param srcText the text to be compared

	306 * @param srcStart the offset into <TT>srcText</TT> to start comparison

	307 * @param srcLength the number of characters in <TT>src</TT> to compare

	308 * @return The result of bitwise character comparison: 0 if this

	309 * contains the same characters as <code>srcText</code>, -1 if the characters in

	310 * this are bitwise less than the characters in <code>srcText</code>, +1 if th e

	311 * characters in this are bitwise greater than the characters

	312 * in <code>srcText</code>.

	313 * @stable ICU 2.0

	314 */

	315 inline int8_t compare(int32_t start,

	316 int32_t length,

	317 const UnicodeString& srcText,

	318 int32_t srcStart,

	319 int32_t srcLength) const;

	320

	321 /**

	322 * Compare the characters bitwise in this UnicodeString with the first

	323 * <TT>srcLength</TT> characters in <TT>srcChars</TT>.

	324 * @param srcChars The characters to compare to this UnicodeString.

	325 * @param srcLength the number of characters in <TT>srcChars</TT> to compare

	326 * @return The result of bitwise character comparison: 0 if this

	327 * contains the same characters as <code>srcChars</code>, -1 if the characters in

	328 * this are bitwise less than the characters in <code>srcChars</code>, +1 if t he

	329 * characters in this are bitwise greater than the characters

	330 * in <code>srcChars</code>.

	331 * @stable ICU 2.0

	332 */

	333 inline int8_t compare(const UChar *srcChars,

	334 int32_t srcLength) const;

	335

	336 /**

	337 * Compare the characters bitwise in the range

	338 * [<TT>start</TT>, <TT>start + length</TT>) with the first

	339 * <TT>length</TT> characters in <TT>srcChars</TT>

	340 * @param start the offset at which the compare operation begins

	341 * @param length the number of characters to compare.

	342 * @param srcChars the characters to be compared

	343 * @return The result of bitwise character comparison: 0 if this

	344 * contains the same characters as <code>srcChars</code>, -1 if the characters in

	345 * this are bitwise less than the characters in <code>srcChars</code>, +1 if t he

	346 * characters in this are bitwise greater than the characters

	347 * in <code>srcChars</code>.

	348 * @stable ICU 2.0

	349 */

	350 inline int8_t compare(int32_t start,

	351 int32_t length,

	352 const UChar *srcChars) const;

	353

	354 /**

	355 * Compare the characters bitwise in the range

	356 * [<TT>start</TT>, <TT>start + length</TT>) with the characters

	357 * in <TT>srcChars</TT> in the range

	358 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

	359 * @param start the offset at which the compare operation begins

	360 * @param length the number of characters in this to compare

	361 * @param srcChars the characters to be compared

	362 * @param srcStart the offset into <TT>srcChars</TT> to start comparison

	363 * @param srcLength the number of characters in <TT>srcChars</TT> to compare

	364 * @return The result of bitwise character comparison: 0 if this

	365 * contains the same characters as <code>srcChars</code>, -1 if the characters in

	366 * this are bitwise less than the characters in <code>srcChars</code>, +1 if t he

	367 * characters in this are bitwise greater than the characters

	368 * in <code>srcChars</code>.

	369 * @stable ICU 2.0

	370 */

	371 inline int8_t compare(int32_t start,

	372 int32_t length,

	373 const UChar *srcChars,

	374 int32_t srcStart,

	375 int32_t srcLength) const;

	376

	377 /**

	378 * Compare the characters bitwise in the range

	379 * [<TT>start</TT>, <TT>limit</TT>) with the characters

	380 * in <TT>srcText</TT> in the range

	381 * [<TT>srcStart</TT>, <TT>srcLimit</TT>).

	382 * @param start the offset at which the compare operation begins

	383 * @param limit the offset immediately following the compare operation

	384 * @param srcText the text to be compared

	385 * @param srcStart the offset into <TT>srcText</TT> to start comparison

	386 * @param srcLimit the offset into <TT>srcText</TT> to limit comparison

	387 * @return The result of bitwise character comparison: 0 if this

	388 * contains the same characters as <code>srcText</code>, -1 if the characters in

	389 * this are bitwise less than the characters in <code>srcText</code>, +1 if th e

	390 * characters in this are bitwise greater than the characters

	391 * in <code>srcText</code>.

	392 * @stable ICU 2.0

	393 */

	394 inline int8_t compareBetween(int32_t start,

	395 int32_t limit,

	396 const UnicodeString& srcText,

	397 int32_t srcStart,

	398 int32_t srcLimit) const;

	399

	400 /**

	401 * Compare two Unicode strings in code point order.

	402 * The result may be different from the results of compare(), operator<, etc.

	403 * if supplementary characters are present:

	404 *

	405 * In UTF-16, supplementary characters (with code points U+10000 and above) ar e

	406 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

	407 * which means that they compare as less than some other BMP characters like U +feff.

	408 * This function compares Unicode strings in code point order.

	409 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired su rrogates), then the result is not defined.

	410 *

	411 * @param text Another string to compare this one to.

	412 * @return a negative/zero/positive integer corresponding to whether

	413 * this string is less than/equal to/greater than the second one

	414 * in code point order

	415 * @stable ICU 2.0

	416 */

	417 inline int8_t compareCodePointOrder(const UnicodeString& text) const;

	418

	419 /**

	420 * Compare two Unicode strings in code point order.

	421 * The result may be different from the results of compare(), operator<, etc.

	422 * if supplementary characters are present:

	423 *

	424 * In UTF-16, supplementary characters (with code points U+10000 and above) ar e

	425 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

	426 * which means that they compare as less than some other BMP characters like U +feff.

	427 * This function compares Unicode strings in code point order.

	428 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired su rrogates), then the result is not defined.

	429 *

	430 * @param start The start offset in this string at which the compare operation begins.

	431 * @param length The number of code units from this string to compare.

	432 * @param srcText Another string to compare this one to.

	433 * @return a negative/zero/positive integer corresponding to whether

	434 * this string is less than/equal to/greater than the second one

	435 * in code point order

	436 * @stable ICU 2.0

	437 */

	438 inline int8_t compareCodePointOrder(int32_t start,

	439 int32_t length,

	440 const UnicodeString& srcText) const;

	441

	442 /**

	443 * Compare two Unicode strings in code point order.

	444 * The result may be different from the results of compare(), operator<, etc.

	445 * if supplementary characters are present:

	446 *

	447 * In UTF-16, supplementary characters (with code points U+10000 and above) ar e

	448 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

	449 * which means that they compare as less than some other BMP characters like U +feff.

	450 * This function compares Unicode strings in code point order.

	451 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired su rrogates), then the result is not defined.

	452 *

	453 * @param start The start offset in this string at which the compare operation begins.

	454 * @param length The number of code units from this string to compare.

	455 * @param srcText Another string to compare this one to.

	456 * @param srcStart The start offset in that string at which the compare operat ion begins.

	457 * @param srcLength The number of code units from that string to compare.

	458 * @return a negative/zero/positive integer corresponding to whether

	459 * this string is less than/equal to/greater than the second one

	460 * in code point order

	461 * @stable ICU 2.0

	462 */

	463 inline int8_t compareCodePointOrder(int32_t start,

	464 int32_t length,

	465 const UnicodeString& srcText,

	466 int32_t srcStart,

	467 int32_t srcLength) const;

	468

	469 /**

	470 * Compare two Unicode strings in code point order.

	471 * The result may be different from the results of compare(), operator<, etc.

	472 * if supplementary characters are present:

	473 *

	474 * In UTF-16, supplementary characters (with code points U+10000 and above) ar e

	475 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

	476 * which means that they compare as less than some other BMP characters like U +feff.

	477 * This function compares Unicode strings in code point order.

	478 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired su rrogates), then the result is not defined.

	479 *

	480 * @param srcChars A pointer to another string to compare this one to.

	481 * @param srcLength The number of code units from that string to compare.

	482 * @return a negative/zero/positive integer corresponding to whether

	483 * this string is less than/equal to/greater than the second one

	484 * in code point order

	485 * @stable ICU 2.0

	486 */

	487 inline int8_t compareCodePointOrder(const UChar *srcChars,

	488 int32_t srcLength) const;

	489

	490 /**

	491 * Compare two Unicode strings in code point order.

	492 * The result may be different from the results of compare(), operator<, etc.

	493 * if supplementary characters are present:

	494 *

	495 * In UTF-16, supplementary characters (with code points U+10000 and above) ar e

	496 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

	497 * which means that they compare as less than some other BMP characters like U +feff.

	498 * This function compares Unicode strings in code point order.

	499 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired su rrogates), then the result is not defined.

	500 *

	501 * @param start The start offset in this string at which the compare operation begins.

	502 * @param length The number of code units from this string to compare.

	503 * @param srcChars A pointer to another string to compare this one to.

	504 * @return a negative/zero/positive integer corresponding to whether

	505 * this string is less than/equal to/greater than the second one

	506 * in code point order

	507 * @stable ICU 2.0

	508 */

	509 inline int8_t compareCodePointOrder(int32_t start,

	510 int32_t length,

	511 const UChar *srcChars) const;

	512

	513 /**

	514 * Compare two Unicode strings in code point order.

	515 * The result may be different from the results of compare(), operator<, etc.

	516 * if supplementary characters are present:

	517 *

	518 * In UTF-16, supplementary characters (with code points U+10000 and above) ar e

	519 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

	520 * which means that they compare as less than some other BMP characters like U +feff.

	521 * This function compares Unicode strings in code point order.

	522 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired su rrogates), then the result is not defined.

	523 *

	524 * @param start The start offset in this string at which the compare operation begins.

	525 * @param length The number of code units from this string to compare.

	526 * @param srcChars A pointer to another string to compare this one to.

	527 * @param srcStart The start offset in that string at which the compare operat ion begins.

	528 * @param srcLength The number of code units from that string to compare.

	529 * @return a negative/zero/positive integer corresponding to whether

	530 * this string is less than/equal to/greater than the second one

	531 * in code point order

	532 * @stable ICU 2.0

	533 */

	534 inline int8_t compareCodePointOrder(int32_t start,

	535 int32_t length,

	536 const UChar *srcChars,

	537 int32_t srcStart,

	538 int32_t srcLength) const;

	539

	540 /**

	541 * Compare two Unicode strings in code point order.

	542 * The result may be different from the results of compare(), operator<, etc.

	543 * if supplementary characters are present:

	544 *

	545 * In UTF-16, supplementary characters (with code points U+10000 and above) ar e

	546 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

	547 * which means that they compare as less than some other BMP characters like U +feff.

	548 * This function compares Unicode strings in code point order.

	549 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired su rrogates), then the result is not defined.

	550 *

	551 * @param start The start offset in this string at which the compare operation begins.

	552 * @param limit The offset after the last code unit from this string to compar e.

	553 * @param srcText Another string to compare this one to.

	554 * @param srcStart The start offset in that string at which the compare operat ion begins.

	555 * @param srcLimit The offset after the last code unit from that string to com pare.

	556 * @return a negative/zero/positive integer corresponding to whether

	557 * this string is less than/equal to/greater than the second one

	558 * in code point order

	559 * @stable ICU 2.0

	560 */

	561 inline int8_t compareCodePointOrderBetween(int32_t start,

	562 int32_t limit,

	563 const UnicodeString& srcText,

	564 int32_t srcStart,

	565 int32_t srcLimit) const;

	566

	567 /**

	568 * Compare two strings case-insensitively using full case folding.

	569 * This is equivalent to this->foldCase(options).compare(text.foldCase(options )).

	570 *

	571 * @param text Another string to compare this one to.

	572 * @param options A bit set of options:

	573 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:

	574 * Comparison in code unit order with default case folding.

	575 *

	576 * - U_COMPARE_CODE_POINT_ORDER

	577 * Set to choose code point order instead of code unit order

	578 * (see u_strCompare for details).

	579 *

	580 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I

	581 *

	582 * @return A negative, zero, or positive integer indicating the comparison res ult.

	583 * @stable ICU 2.0

	584 */

	585 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;

	586

	587 /**

	588 * Compare two strings case-insensitively using full case folding.

	589 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(opti ons)).

	590 *

	591 * @param start The start offset in this string at which the compare operation begins.

	592 * @param length The number of code units from this string to compare.

	593 * @param srcText Another string to compare this one to.

	594 * @param options A bit set of options:

	595 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:

	596 * Comparison in code unit order with default case folding.

	597 *

	598 * - U_COMPARE_CODE_POINT_ORDER

	599 * Set to choose code point order instead of code unit order

	600 * (see u_strCompare for details).

	601 *

	602 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I

	603 *

	604 * @return A negative, zero, or positive integer indicating the comparison res ult.

	605 * @stable ICU 2.0

	606 */

	607 inline int8_t caseCompare(int32_t start,

	608 int32_t length,

	609 const UnicodeString& srcText,

	610 uint32_t options) const;

	611

	612 /**

	613 * Compare two strings case-insensitively using full case folding.

	614 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(opti ons)).

	615 *

	616 * @param start The start offset in this string at which the compare operation begins.

	617 * @param length The number of code units from this string to compare.

	618 * @param srcText Another string to compare this one to.

	619 * @param srcStart The start offset in that string at which the compare operat ion begins.

	620 * @param srcLength The number of code units from that string to compare.

	621 * @param options A bit set of options:

	622 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:

	623 * Comparison in code unit order with default case folding.

	624 *

	625 * - U_COMPARE_CODE_POINT_ORDER

	626 * Set to choose code point order instead of code unit order

	627 * (see u_strCompare for details).

	628 *

	629 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I

	630 *

	631 * @return A negative, zero, or positive integer indicating the comparison res ult.

	632 * @stable ICU 2.0

	633 */

	634 inline int8_t caseCompare(int32_t start,

	635 int32_t length,

	636 const UnicodeString& srcText,

	637 int32_t srcStart,

	638 int32_t srcLength,

	639 uint32_t options) const;

	640

	641 /**

	642 * Compare two strings case-insensitively using full case folding.

	643 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(opt ions)).

	644 *

	645 * @param srcChars A pointer to another string to compare this one to.

	646 * @param srcLength The number of code units from that string to compare.

	647 * @param options A bit set of options:

	648 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:

	649 * Comparison in code unit order with default case folding.

	650 *

	651 * - U_COMPARE_CODE_POINT_ORDER

	652 * Set to choose code point order instead of code unit order

	653 * (see u_strCompare for details).

	654 *

	655 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I

	656 *

	657 * @return A negative, zero, or positive integer indicating the comparison res ult.

	658 * @stable ICU 2.0

	659 */

	660 inline int8_t caseCompare(const UChar *srcChars,

	661 int32_t srcLength,

	662 uint32_t options) const;

	663

	664 /**

	665 * Compare two strings case-insensitively using full case folding.

	666 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(opt ions)).

	667 *

	668 * @param start The start offset in this string at which the compare operation begins.

	669 * @param length The number of code units from this string to compare.

	670 * @param srcChars A pointer to another string to compare this one to.

	671 * @param options A bit set of options:

	672 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:

	673 * Comparison in code unit order with default case folding.

	674 *

	675 * - U_COMPARE_CODE_POINT_ORDER

	676 * Set to choose code point order instead of code unit order

	677 * (see u_strCompare for details).

	678 *

	679 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I

	680 *

	681 * @return A negative, zero, or positive integer indicating the comparison res ult.

	682 * @stable ICU 2.0

	683 */

	684 inline int8_t caseCompare(int32_t start,

	685 int32_t length,

	686 const UChar *srcChars,

	687 uint32_t options) const;

	688

	689 /**

	690 * Compare two strings case-insensitively using full case folding.

	691 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(opt ions)).

	692 *

	693 * @param start The start offset in this string at which the compare operation begins.

	694 * @param length The number of code units from this string to compare.

	695 * @param srcChars A pointer to another string to compare this one to.

	696 * @param srcStart The start offset in that string at which the compare operat ion begins.

	697 * @param srcLength The number of code units from that string to compare.

	698 * @param options A bit set of options:

	699 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:

	700 * Comparison in code unit order with default case folding.

	701 *

	702 * - U_COMPARE_CODE_POINT_ORDER

	703 * Set to choose code point order instead of code unit order

	704 * (see u_strCompare for details).

	705 *

	706 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I

	707 *

	708 * @return A negative, zero, or positive integer indicating the comparison res ult.

	709 * @stable ICU 2.0

	710 */

	711 inline int8_t caseCompare(int32_t start,

	712 int32_t length,

	713 const UChar *srcChars,

	714 int32_t srcStart,

	715 int32_t srcLength,

	716 uint32_t options) const;

	717

	718 /**

	719 * Compare two strings case-insensitively using full case folding.

	720 * This is equivalent to this->foldCase(options).compareBetween(text.foldCase( options)).

	721 *

	722 * @param start The start offset in this string at which the compare operation begins.

	723 * @param limit The offset after the last code unit from this string to compar e.

	724 * @param srcText Another string to compare this one to.

	725 * @param srcStart The start offset in that string at which the compare operat ion begins.

	726 * @param srcLimit The offset after the last code unit from that string to com pare.

	727 * @param options A bit set of options:

	728 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:

	729 * Comparison in code unit order with default case folding.

	730 *

	731 * - U_COMPARE_CODE_POINT_ORDER

	732 * Set to choose code point order instead of code unit order

	733 * (see u_strCompare for details).

	734 *

	735 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I

	736 *

	737 * @return A negative, zero, or positive integer indicating the comparison res ult.

	738 * @stable ICU 2.0

	739 */

	740 inline int8_t caseCompareBetween(int32_t start,

	741 int32_t limit,

	742 const UnicodeString& srcText,

	743 int32_t srcStart,

	744 int32_t srcLimit,

	745 uint32_t options) const;

	746

	747 /**

	748 * Determine if this starts with the characters in <TT>text</TT>

	749 * @param text The text to match.

	750 * @return TRUE if this starts with the characters in <TT>text</TT>,

	751 * FALSE otherwise

	752 * @stable ICU 2.0

	753 */

	754 inline UBool startsWith(const UnicodeString& text) const;

	755

	756 /**

	757 * Determine if this starts with the characters in <TT>srcText</TT>

	758 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

	759 * @param srcText The text to match.

	760 * @param srcStart the offset into <TT>srcText</TT> to start matching

	761 * @param srcLength the number of characters in <TT>srcText</TT> to match

	762 * @return TRUE if this starts with the characters in <TT>text</TT>,

	763 * FALSE otherwise

	764 * @stable ICU 2.0

	765 */

	766 inline UBool startsWith(const UnicodeString& srcText,

	767 int32_t srcStart,

	768 int32_t srcLength) const;

	769

	770 /**

	771 * Determine if this starts with the characters in <TT>srcChars</TT>

	772 * @param srcChars The characters to match.

	773 * @param srcLength the number of characters in <TT>srcChars</TT>

	774 * @return TRUE if this starts with the characters in <TT>srcChars</TT>,

	775 * FALSE otherwise

	776 * @stable ICU 2.0

	777 */

	778 inline UBool startsWith(const UChar *srcChars,

	779 int32_t srcLength) const;

	780

	781 /**

	782 * Determine if this ends with the characters in <TT>srcChars</TT>

	783 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

	784 * @param srcChars The characters to match.

	785 * @param srcStart the offset into <TT>srcText</TT> to start matching

	786 * @param srcLength the number of characters in <TT>srcChars</TT> to match

	787 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE o therwise

	788 * @stable ICU 2.0

	789 */

	790 inline UBool startsWith(const UChar *srcChars,

	791 int32_t srcStart,

	792 int32_t srcLength) const;

	793

	794 /**

	795 * Determine if this ends with the characters in <TT>text</TT>

	796 * @param text The text to match.

	797 * @return TRUE if this ends with the characters in <TT>text</TT>,

	798 * FALSE otherwise

	799 * @stable ICU 2.0

	800 */

	801 inline UBool endsWith(const UnicodeString& text) const;

	802

	803 /**

	804 * Determine if this ends with the characters in <TT>srcText</TT>

	805 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

	806 * @param srcText The text to match.

	807 * @param srcStart the offset into <TT>srcText</TT> to start matching

	808 * @param srcLength the number of characters in <TT>srcText</TT> to match

	809 * @return TRUE if this ends with the characters in <TT>text</TT>,

	810 * FALSE otherwise

	811 * @stable ICU 2.0

	812 */

	813 inline UBool endsWith(const UnicodeString& srcText,

	814 int32_t srcStart,

	815 int32_t srcLength) const;

	816

	817 /**

	818 * Determine if this ends with the characters in <TT>srcChars</TT>

	819 * @param srcChars The characters to match.

	820 * @param srcLength the number of characters in <TT>srcChars</TT>

	821 * @return TRUE if this ends with the characters in <TT>srcChars</TT>,

	822 * FALSE otherwise

	823 * @stable ICU 2.0

	824 */

	825 inline UBool endsWith(const UChar *srcChars,

	826 int32_t srcLength) const;

	827

	828 /**

	829 * Determine if this ends with the characters in <TT>srcChars</TT>

	830 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

	831 * @param srcChars The characters to match.

	832 * @param srcStart the offset into <TT>srcText</TT> to start matching

	833 * @param srcLength the number of characters in <TT>srcChars</TT> to match

	834 * @return TRUE if this ends with the characters in <TT>srcChars</TT>,

	835 * FALSE otherwise

	836 * @stable ICU 2.0

	837 */

	838 inline UBool endsWith(const UChar *srcChars,

	839 int32_t srcStart,

	840 int32_t srcLength) const;

	841

	842

	843 /* Searching - bitwise only */

	844

	845 /**

	846 * Locate in this the first occurrence of the characters in <TT>text</TT>,

	847 * using bitwise comparison.

	848 * @param text The text to search for.

	849 * @return The offset into this of the start of <TT>text</TT>,

	850 * or -1 if not found.

	851 * @stable ICU 2.0

	852 */

	853 inline int32_t indexOf(const UnicodeString& text) const;

	854

	855 /**

	856 * Locate in this the first occurrence of the characters in <TT>text</TT>

	857 * starting at offset <TT>start</TT>, using bitwise comparison.

	858 * @param text The text to search for.

	859 * @param start The offset at which searching will start.

	860 * @return The offset into this of the start of <TT>text</TT>,

	861 * or -1 if not found.

	862 * @stable ICU 2.0

	863 */

	864 inline int32_t indexOf(const UnicodeString& text,

	865 int32_t start) const;

	866

	867 /**

	868 * Locate in this the first occurrence in the range

	869 * [<TT>start</TT>, <TT>start + length</TT>) of the characters

	870 * in <TT>text</TT>, using bitwise comparison.

	871 * @param text The text to search for.

	872 * @param start The offset at which searching will start.

	873 * @param length The number of characters to search

	874 * @return The offset into this of the start of <TT>text</TT>,

	875 * or -1 if not found.

	876 * @stable ICU 2.0

	877 */

	878 inline int32_t indexOf(const UnicodeString& text,

	879 int32_t start,

	880 int32_t length) const;

	881

	882 /**

	883 * Locate in this the first occurrence in the range

	884 * [<TT>start</TT>, <TT>start + length</TT>) of the characters

	885 * in <TT>srcText</TT> in the range

	886 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),

	887 * using bitwise comparison.

	888 * @param srcText The text to search for.

	889 * @param srcStart the offset into <TT>srcText</TT> at which

	890 * to start matching

	891 * @param srcLength the number of characters in <TT>srcText</TT> to match

	892 * @param start the offset into this at which to start matching

	893 * @param length the number of characters in this to search

	894 * @return The offset into this of the start of <TT>text</TT>,

	895 * or -1 if not found.

	896 * @stable ICU 2.0

	897 */

	898 inline int32_t indexOf(const UnicodeString& srcText,

	899 int32_t srcStart,

	900 int32_t srcLength,

	901 int32_t start,

	902 int32_t length) const;

	903

	904 /**

	905 * Locate in this the first occurrence of the characters in

	906 * <TT>srcChars</TT>

	907 * starting at offset <TT>start</TT>, using bitwise comparison.

	908 * @param srcChars The text to search for.

	909 * @param srcLength the number of characters in <TT>srcChars</TT> to match

	910 * @param start the offset into this at which to start matching

	911 * @return The offset into this of the start of <TT>text</TT>,

	912 * or -1 if not found.

	913 * @stable ICU 2.0

	914 */

	915 inline int32_t indexOf(const UChar *srcChars,

	916 int32_t srcLength,

	917 int32_t start) const;

	918

	919 /**

	920 * Locate in this the first occurrence in the range

	921 * [<TT>start</TT>, <TT>start + length</TT>) of the characters

	922 * in <TT>srcChars</TT>, using bitwise comparison.

	923 * @param srcChars The text to search for.

	924 * @param srcLength the number of characters in <TT>srcChars</TT>

	925 * @param start The offset at which searching will start.

	926 * @param length The number of characters to search

	927 * @return The offset into this of the start of <TT>srcChars</TT>,

	928 * or -1 if not found.

	929 * @stable ICU 2.0

	930 */

	931 inline int32_t indexOf(const UChar *srcChars,

	932 int32_t srcLength,

	933 int32_t start,

	934 int32_t length) const;

	935

	936 /**

	937 * Locate in this the first occurrence in the range

	938 * [<TT>start</TT>, <TT>start + length</TT>) of the characters

	939 * in <TT>srcChars</TT> in the range

	940 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),

	941 * using bitwise comparison.

	942 * @param srcChars The text to search for.

	943 * @param srcStart the offset into <TT>srcChars</TT> at which

	944 * to start matching

	945 * @param srcLength the number of characters in <TT>srcChars</TT> to match

	946 * @param start the offset into this at which to start matching

	947 * @param length the number of characters in this to search

	948 * @return The offset into this of the start of <TT>text</TT>,

	949 * or -1 if not found.

	950 * @stable ICU 2.0

	951 */

	952 int32_t indexOf(const UChar *srcChars,

	953 int32_t srcStart,

	954 int32_t srcLength,

	955 int32_t start,

	956 int32_t length) const;

	957

	958 /**

	959 * Locate in this the first occurrence of the BMP code point <code>c</code>,

	960 * using bitwise comparison.

	961 * @param c The code unit to search for.

	962 * @return The offset into this of <TT>c</TT>, or -1 if not found.

	963 * @stable ICU 2.0

	964 */

	965 inline int32_t indexOf(UChar c) const;

	966

	967 /**

	968 * Locate in this the first occurrence of the code point <TT>c</TT>,

	969 * using bitwise comparison.

	970 *

	971 * @param c The code point to search for.

	972 * @return The offset into this of <TT>c</TT>, or -1 if not found.

	973 * @stable ICU 2.0

	974 */

	975 inline int32_t indexOf(UChar32 c) const;

	976

	977 /**

	978 * Locate in this the first occurrence of the BMP code point <code>c</code>,

	979 * starting at offset <TT>start</TT>, using bitwise comparison.

	980 * @param c The code unit to search for.

	981 * @param start The offset at which searching will start.

	982 * @return The offset into this of <TT>c</TT>, or -1 if not found.

	983 * @stable ICU 2.0

	984 */

	985 inline int32_t indexOf(UChar c,

	986 int32_t start) const;

	987

	988 /**

	989 * Locate in this the first occurrence of the code point <TT>c</TT>

	990 * starting at offset <TT>start</TT>, using bitwise comparison.

	991 *

	992 * @param c The code point to search for.

	993 * @param start The offset at which searching will start.

	994 * @return The offset into this of <TT>c</TT>, or -1 if not found.

	995 * @stable ICU 2.0

	996 */

	997 inline int32_t indexOf(UChar32 c,

	998 int32_t start) const;

	999

	1000 /**

	1001 * Locate in this the first occurrence of the BMP code point <code>c</code>

	1002 * in the range [<TT>start</TT>, <TT>start + length</TT>),

	1003 * using bitwise comparison.

	1004 * @param c The code unit to search for.

	1005 * @param start the offset into this at which to start matching

	1006 * @param length the number of characters in this to search

	1007 * @return The offset into this of <TT>c</TT>, or -1 if not found.

	1008 * @stable ICU 2.0

	1009 */

	1010 inline int32_t indexOf(UChar c,

	1011 int32_t start,

	1012 int32_t length) const;

	1013

	1014 /**

	1015 * Locate in this the first occurrence of the code point <TT>c</TT>

	1016 * in the range [<TT>start</TT>, <TT>start + length</TT>),

	1017 * using bitwise comparison.

	1018 *

	1019 * @param c The code point to search for.

	1020 * @param start the offset into this at which to start matching

	1021 * @param length the number of characters in this to search

	1022 * @return The offset into this of <TT>c</TT>, or -1 if not found.

	1023 * @stable ICU 2.0

	1024 */

	1025 inline int32_t indexOf(UChar32 c,

	1026 int32_t start,

	1027 int32_t length) const;

	1028

	1029 /**

	1030 * Locate in this the last occurrence of the characters in <TT>text</TT>,

	1031 * using bitwise comparison.

	1032 * @param text The text to search for.

	1033 * @return The offset into this of the start of <TT>text</TT>,

	1034 * or -1 if not found.

	1035 * @stable ICU 2.0

	1036 */

	1037 inline int32_t lastIndexOf(const UnicodeString& text) const;

	1038

	1039 /**

	1040 * Locate in this the last occurrence of the characters in <TT>text</TT>

	1041 * starting at offset <TT>start</TT>, using bitwise comparison.

	1042 * @param text The text to search for.

	1043 * @param start The offset at which searching will start.

	1044 * @return The offset into this of the start of <TT>text</TT>,

	1045 * or -1 if not found.

	1046 * @stable ICU 2.0

	1047 */

	1048 inline int32_t lastIndexOf(const UnicodeString& text,

	1049 int32_t start) const;

	1050

	1051 /**

	1052 * Locate in this the last occurrence in the range

	1053 * [<TT>start</TT>, <TT>start + length</TT>) of the characters

	1054 * in <TT>text</TT>, using bitwise comparison.

	1055 * @param text The text to search for.

	1056 * @param start The offset at which searching will start.

	1057 * @param length The number of characters to search

	1058 * @return The offset into this of the start of <TT>text</TT>,

	1059 * or -1 if not found.

	1060 * @stable ICU 2.0

	1061 */

	1062 inline int32_t lastIndexOf(const UnicodeString& text,

	1063 int32_t start,

	1064 int32_t length) const;

	1065

	1066 /**

	1067 * Locate in this the last occurrence in the range

	1068 * [<TT>start</TT>, <TT>start + length</TT>) of the characters

	1069 * in <TT>srcText</TT> in the range

	1070 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),

	1071 * using bitwise comparison.

	1072 * @param srcText The text to search for.

	1073 * @param srcStart the offset into <TT>srcText</TT> at which

	1074 * to start matching

	1075 * @param srcLength the number of characters in <TT>srcText</TT> to match

	1076 * @param start the offset into this at which to start matching

	1077 * @param length the number of characters in this to search

	1078 * @return The offset into this of the start of <TT>text</TT>,

	1079 * or -1 if not found.

	1080 * @stable ICU 2.0

	1081 */

	1082 inline int32_t lastIndexOf(const UnicodeString& srcText,

	1083 int32_t srcStart,

	1084 int32_t srcLength,

	1085 int32_t start,

	1086 int32_t length) const;

	1087

	1088 /**

	1089 * Locate in this the last occurrence of the characters in <TT>srcChars</TT>

	1090 * starting at offset <TT>start</TT>, using bitwise comparison.

	1091 * @param srcChars The text to search for.

	1092 * @param srcLength the number of characters in <TT>srcChars</TT> to match

	1093 * @param start the offset into this at which to start matching

	1094 * @return The offset into this of the start of <TT>text</TT>,

	1095 * or -1 if not found.

	1096 * @stable ICU 2.0

	1097 */

	1098 inline int32_t lastIndexOf(const UChar *srcChars,

	1099 int32_t srcLength,

	1100 int32_t start) const;

	1101

	1102 /**

	1103 * Locate in this the last occurrence in the range

	1104 * [<TT>start</TT>, <TT>start + length</TT>) of the characters

	1105 * in <TT>srcChars</TT>, using bitwise comparison.

	1106 * @param srcChars The text to search for.

	1107 * @param srcLength the number of characters in <TT>srcChars</TT>

	1108 * @param start The offset at which searching will start.

	1109 * @param length The number of characters to search

	1110 * @return The offset into this of the start of <TT>srcChars</TT>,

	1111 * or -1 if not found.

	1112 * @stable ICU 2.0

	1113 */

	1114 inline int32_t lastIndexOf(const UChar *srcChars,

	1115 int32_t srcLength,

	1116 int32_t start,

	1117 int32_t length) const;

	1118

	1119 /**

	1120 * Locate in this the last occurrence in the range

	1121 * [<TT>start</TT>, <TT>start + length</TT>) of the characters

	1122 * in <TT>srcChars</TT> in the range

	1123 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),

	1124 * using bitwise comparison.

	1125 * @param srcChars The text to search for.

	1126 * @param srcStart the offset into <TT>srcChars</TT> at which

	1127 * to start matching

	1128 * @param srcLength the number of characters in <TT>srcChars</TT> to match

	1129 * @param start the offset into this at which to start matching

	1130 * @param length the number of characters in this to search

	1131 * @return The offset into this of the start of <TT>text</TT>,

	1132 * or -1 if not found.

	1133 * @stable ICU 2.0

	1134 */

	1135 int32_t lastIndexOf(const UChar *srcChars,

	1136 int32_t srcStart,

	1137 int32_t srcLength,

	1138 int32_t start,

	1139 int32_t length) const;

	1140

	1141 /**

	1142 * Locate in this the last occurrence of the BMP code point <code>c</code>,

	1143 * using bitwise comparison.

	1144 * @param c The code unit to search for.

	1145 * @return The offset into this of <TT>c</TT>, or -1 if not found.

	1146 * @stable ICU 2.0

	1147 */

	1148 inline int32_t lastIndexOf(UChar c) const;

	1149

	1150 /**

	1151 * Locate in this the last occurrence of the code point <TT>c</TT>,

	1152 * using bitwise comparison.

	1153 *

	1154 * @param c The code point to search for.

	1155 * @return The offset into this of <TT>c</TT>, or -1 if not found.

	1156 * @stable ICU 2.0

	1157 */

	1158 inline int32_t lastIndexOf(UChar32 c) const;

	1159

	1160 /**

	1161 * Locate in this the last occurrence of the BMP code point <code>c</code>

	1162 * starting at offset <TT>start</TT>, using bitwise comparison.

	1163 * @param c The code unit to search for.

	1164 * @param start The offset at which searching will start.

	1165 * @return The offset into this of <TT>c</TT>, or -1 if not found.

	1166 * @stable ICU 2.0

	1167 */

	1168 inline int32_t lastIndexOf(UChar c,

	1169 int32_t start) const;

	1170

	1171 /**

	1172 * Locate in this the last occurrence of the code point <TT>c</TT>

	1173 * starting at offset <TT>start</TT>, using bitwise comparison.

	1174 *

	1175 * @param c The code point to search for.

	1176 * @param start The offset at which searching will start.

	1177 * @return The offset into this of <TT>c</TT>, or -1 if not found.

	1178 * @stable ICU 2.0

	1179 */

	1180 inline int32_t lastIndexOf(UChar32 c,

	1181 int32_t start) const;

	1182

	1183 /**

	1184 * Locate in this the last occurrence of the BMP code point <code>c</code>

	1185 * in the range [<TT>start</TT>, <TT>start + length</TT>),

	1186 * using bitwise comparison.

	1187 * @param c The code unit to search for.

	1188 * @param start the offset into this at which to start matching

	1189 * @param length the number of characters in this to search

	1190 * @return The offset into this of <TT>c</TT>, or -1 if not found.

	1191 * @stable ICU 2.0

	1192 */

	1193 inline int32_t lastIndexOf(UChar c,

	1194 int32_t start,

	1195 int32_t length) const;

	1196

	1197 /**

	1198 * Locate in this the last occurrence of the code point <TT>c</TT>

	1199 * in the range [<TT>start</TT>, <TT>start + length</TT>),

	1200 * using bitwise comparison.

	1201 *

	1202 * @param c The code point to search for.

	1203 * @param start the offset into this at which to start matching

	1204 * @param length the number of characters in this to search

	1205 * @return The offset into this of <TT>c</TT>, or -1 if not found.

	1206 * @stable ICU 2.0

	1207 */

	1208 inline int32_t lastIndexOf(UChar32 c,

	1209 int32_t start,

	1210 int32_t length) const;

	1211

	1212

	1213 /* Character access */

	1214

	1215 /**

	1216 * Return the code unit at offset <tt>offset</tt>.

	1217 * If the offset is not valid (0..length()-1) then U+ffff is returned.

	1218 * @param offset a valid offset into the text

	1219 * @return the code unit at offset <tt>offset</tt>

	1220 * or 0xffff if the offset is not valid for this string

	1221 * @stable ICU 2.0

	1222 */

	1223 inline UChar charAt(int32_t offset) const;

	1224

	1225 /**

	1226 * Return the code unit at offset <tt>offset</tt>.

	1227 * If the offset is not valid (0..length()-1) then U+ffff is returned.

	1228 * @param offset a valid offset into the text

	1229 * @return the code unit at offset <tt>offset</tt>

	1230 * @stable ICU 2.0

	1231 */

	1232 inline UChar operator[] (int32_t offset) const;

	1233

	1234 /**

	1235 * Return the code point that contains the code unit

	1236 * at offset <tt>offset</tt>.

	1237 * If the offset is not valid (0..length()-1) then U+ffff is returned.

	1238 * @param offset a valid offset into the text

	1239 * that indicates the text offset of any of the code units

	1240 * that will be assembled into a code point (21-bit value) and returned

	1241 * @return the code point of text at <tt>offset</tt>

	1242 * or 0xffff if the offset is not valid for this string

	1243 * @stable ICU 2.0

	1244 */

	1245 inline UChar32 char32At(int32_t offset) const;

	1246

	1247 /**

	1248 * Adjust a random-access offset so that

	1249 * it points to the beginning of a Unicode character.

	1250 * The offset that is passed in points to

	1251 * any code unit of a code point,

	1252 * while the returned offset will point to the first code unit

	1253 * of the same code point.

	1254 * In UTF-16, if the input offset points to a second surrogate

	1255 * of a surrogate pair, then the returned offset will point

	1256 * to the first surrogate.

	1257 * @param offset a valid offset into one code point of the text

	1258 * @return offset of the first code unit of the same code point

	1259 * @see U16_SET_CP_START

	1260 * @stable ICU 2.0

	1261 */

	1262 inline int32_t getChar32Start(int32_t offset) const;

	1263

	1264 /**

	1265 * Adjust a random-access offset so that

	1266 * it points behind a Unicode character.

	1267 * The offset that is passed in points behind

	1268 * any code unit of a code point,

	1269 * while the returned offset will point behind the last code unit

	1270 * of the same code point.

	1271 * In UTF-16, if the input offset points behind the first surrogate

	1272 * (i.e., to the second surrogate)

	1273 * of a surrogate pair, then the returned offset will point

	1274 * behind the second surrogate (i.e., to the first surrogate).

	1275 * @param offset a valid offset after any code unit of a code point of the tex t

	1276 * @return offset of the first code unit after the same code point

	1277 * @see U16_SET_CP_LIMIT

	1278 * @stable ICU 2.0

	1279 */

	1280 inline int32_t getChar32Limit(int32_t offset) const;

	1281

	1282 /**

	1283 * Move the code unit index along the string by delta code points.

	1284 * Interpret the input index as a code unit-based offset into the string,

	1285 * move the index forward or backward by delta code points, and

	1286 * return the resulting index.

	1287 * The input index should point to the first code unit of a code point,

	1288 * if there is more than one.

	1289 *

	1290 * Both input and output indexes are code unit-based as for all

	1291 * string indexes/offsets in ICU (and other libraries, like MBCS char*).

	1292 * If delta<0 then the index is moved backward (toward the start of the string ).

	1293 * If delta>0 then the index is moved forward (toward the end of the string).

	1294 *

	1295 * This behaves like CharacterIterator::move32(delta, kCurrent).

	1296 *

	1297 * Behavior for out-of-bounds indexes:

	1298 * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,

	1299 * if the input index<0 then it is pinned to 0;

	1300 * if it is index>length() then it is pinned to length().

	1301 * Afterwards, the index is moved by <code>delta</code> code points

	1302 * forward or backward,

	1303 * but no further backward than to 0 and no further forward than to length().

	1304 * The resulting index return value will be in between 0 and length(), inclusi vely.

	1305 *

	1306 * Examples:

	1307 * <pre>

	1308 * // s has code points 'a' U+10000 'b' U+10ffff U+2029

	1309 * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unesc ape();

	1310 *

	1311 * // initial index: position of U+10000

	1312 * int32_t index=1;

	1313 *

	1314 * // the following examples will all result in index==4, position of U+10ffff

	1315 *

	1316 * // skip 2 code points from some position in the string

	1317 * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'

	1318 *

	1319 * // go to the 3rd code point from the start of s (0-based)

	1320 * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'

	1321 *

	1322 * // go to the next-to-last code point of s

	1323 * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff

	1324 * </pre>

	1325 *

	1326 * @param index input code unit index

	1327 * @param delta (signed) code point count to move the index forward or backwar d

	1328 * in the string

	1329 * @return the resulting code unit index

	1330 * @stable ICU 2.0

	1331 */

	1332 int32_t moveIndex32(int32_t index, int32_t delta) const;

	1333

	1334 /* Substring extraction */

	1335

	1336 /**

	1337 * Copy the characters in the range

	1338 * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,

	1339 * beginning at <tt>dstStart</tt>.

	1340 * If the string aliases to <code>dst</code> itself as an external buffer,

	1341 * then extract() will not copy the contents.

	1342 *

	1343 * @param start offset of first character which will be copied into the array

	1344 * @param length the number of characters to extract

	1345 * @param dst array in which to copy characters. The length of <tt>dst</tt>

	1346 * must be at least (<tt>dstStart + length</tt>).

	1347 * @param dstStart the offset in <TT>dst</TT> where the first character

	1348 * will be extracted

	1349 * @stable ICU 2.0

	1350 */

	1351 inline void extract(int32_t start,

	1352 int32_t length,

	1353 UChar *dst,

	1354 int32_t dstStart = 0) const;

	1355

	1356 /**

	1357 * Copy the contents of the string into dest.

	1358 * This is a convenience function that

	1359 * checks if there is enough space in dest,

	1360 * extracts the entire string if possible,

	1361 * and NUL-terminates dest if possible.

	1362 *

	1363 * If the string fits into dest but cannot be NUL-terminated

	1364 * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINA TED_WARNING.

	1365 * If the string itself does not fit into dest

	1366 * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERR OR.

	1367 *

	1368 * If the string aliases to <code>dest</code> itself as an external buffer,

	1369 * then extract() will not copy the contents.

	1370 *

	1371 * @param dest Destination string buffer.

	1372 * @param destCapacity Number of UChars available at dest.

	1373 * @param errorCode ICU error code.

	1374 * @return length()

	1375 * @stable ICU 2.0

	1376 */

	1377 int32_t

	1378 extract(UChar *dest, int32_t destCapacity,

	1379 UErrorCode &errorCode) const;

	1380

	1381 /**

	1382 * Copy the characters in the range

	1383 * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString

	1384 * <tt>target</tt>.

	1385 * @param start offset of first character which will be copied

	1386 * @param length the number of characters to extract

	1387 * @param target UnicodeString into which to copy characters.

	1388 * @return A reference to <TT>target</TT>

	1389 * @stable ICU 2.0

	1390 */

	1391 inline void extract(int32_t start,

	1392 int32_t length,

	1393 UnicodeString& target) const;

	1394

	1395 /**

	1396 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)

	1397 * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.

	1398 * @param start offset of first character which will be copied into the array

	1399 * @param limit offset immediately following the last character to be copied

	1400 * @param dst array in which to copy characters. The length of <tt>dst</tt>

	1401 * must be at least (<tt>dstStart + (limit - start)</tt>).

	1402 * @param dstStart the offset in <TT>dst</TT> where the first character

	1403 * will be extracted

	1404 * @stable ICU 2.0

	1405 */

	1406 inline void extractBetween(int32_t start,

	1407 int32_t limit,

	1408 UChar *dst,

	1409 int32_t dstStart = 0) const;

	1410

	1411 /**

	1412 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)

	1413 * into the UnicodeString <tt>target</tt>. Replaceable API.

	1414 * @param start offset of first character which will be copied

	1415 * @param limit offset immediately following the last character to be copied

	1416 * @param target UnicodeString into which to copy characters.

	1417 * @return A reference to <TT>target</TT>

	1418 * @stable ICU 2.0

	1419 */

	1420 virtual void extractBetween(int32_t start,

	1421 int32_t limit,

	1422 UnicodeString& target) const;

	1423

	1424 /**

	1425 * Copy the characters in the range

	1426 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.

	1427 * All characters must be invariant (see utypes.h).

	1428 * Use US_INV as the last, signature-distinguishing parameter.

	1429 *

	1430 * This function does not write any more than <code>targetLength</code>

	1431 * characters but returns the length of the entire output string

	1432 * so that one can allocate a larger buffer and call the function again

	1433 * if necessary.

	1434 * The output string is NUL-terminated if possible.

	1435 *

	1436 * @param start offset of first character which will be copied

	1437 * @param startLength the number of characters to extract

	1438 * @param target the target buffer for extraction, can be NULL

	1439 * if targetLength is 0

	1440 * @param targetCapacity the length of the target buffer

	1441 * @param inv Signature-distinguishing paramater, use US_INV.

	1442 * @return the output string length, not including the terminating NUL

	1443 * @stable ICU 3.2

	1444 */

	1445 int32_t extract(int32_t start,

	1446 int32_t startLength,

	1447 char *target,

	1448 int32_t targetCapacity,

	1449 enum EInvariant inv) const;

	1450

	1451 #if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION

	1452

	1453 /**

	1454 * Copy the characters in the range

	1455 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters

	1456 * in the platform's default codepage.

	1457 * This function does not write any more than <code>targetLength</code>

	1458 * characters but returns the length of the entire output string

	1459 * so that one can allocate a larger buffer and call the function again

	1460 * if necessary.

	1461 * The output string is NUL-terminated if possible.

	1462 *

	1463 * @param start offset of first character which will be copied

	1464 * @param startLength the number of characters to extract

	1465 * @param target the target buffer for extraction

	1466 * @param targetLength the length of the target buffer

	1467 * If <TT>target</TT> is NULL, then the number of bytes required for

	1468 * <TT>target</TT> is returned.

	1469 * @return the output string length, not including the terminating NUL

	1470 * @stable ICU 2.0

	1471 */

	1472 int32_t extract(int32_t start,

	1473 int32_t startLength,

	1474 char *target,

	1475 uint32_t targetLength) const;

	1476

	1477 #endif

	1478

	1479 #if !UCONFIG_NO_CONVERSION

	1480

	1481 /**

	1482 * Copy the characters in the range

	1483 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters

	1484 * in a specified codepage.

	1485 * The output string is NUL-terminated.

	1486 *

	1487 * Recommendation: For invariant-character strings use

	1488 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity , enum EInvariant inv) const

	1489 * because it avoids object code dependencies of UnicodeString on

	1490 * the conversion code.

	1491 *

	1492 * @param start offset of first character which will be copied

	1493 * @param startLength the number of characters to extract

	1494 * @param target the target buffer for extraction

	1495 * @param codepage the desired codepage for the characters. 0 has

	1496 * the special meaning of the default codepage

	1497 * If <code>codepage</code> is an empty string (<code>""</code>),

	1498 * then a simple conversion is performed on the codepage-invariant

	1499 * subset ("invariant characters") of the platform encoding. See utypes.h.

	1500 * If <TT>target</TT> is NULL, then the number of bytes required for

	1501 * <TT>target</TT> is returned. It is assumed that the target is big enough

	1502 * to fit all of the characters.

	1503 * @return the output string length, not including the terminating NUL

	1504 * @stable ICU 2.0

	1505 */

	1506 inline int32_t extract(int32_t start,

	1507 int32_t startLength,

	1508 char *target,

	1509 const char *codepage = 0) const;

	1510

	1511 /**

	1512 * Copy the characters in the range

	1513 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters

	1514 * in a specified codepage.

	1515 * This function does not write any more than <code>targetLength</code>

	1516 * characters but returns the length of the entire output string

	1517 * so that one can allocate a larger buffer and call the function again

	1518 * if necessary.

	1519 * The output string is NUL-terminated if possible.

	1520 *

	1521 * Recommendation: For invariant-character strings use

	1522 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity , enum EInvariant inv) const

	1523 * because it avoids object code dependencies of UnicodeString on

	1524 * the conversion code.

	1525 *

	1526 * @param start offset of first character which will be copied

	1527 * @param startLength the number of characters to extract

	1528 * @param target the target buffer for extraction

	1529 * @param targetLength the length of the target buffer

	1530 * @param codepage the desired codepage for the characters. 0 has

	1531 * the special meaning of the default codepage

	1532 * If <code>codepage</code> is an empty string (<code>""</code>),

	1533 * then a simple conversion is performed on the codepage-invariant

	1534 * subset ("invariant characters") of the platform encoding. See utypes.h.

	1535 * If <TT>target</TT> is NULL, then the number of bytes required for

	1536 * <TT>target</TT> is returned.

	1537 * @return the output string length, not including the terminating NUL

	1538 * @stable ICU 2.0

	1539 */

	1540 int32_t extract(int32_t start,

	1541 int32_t startLength,

	1542 char *target,

	1543 uint32_t targetLength,

	1544 const char *codepage) const;

	1545

	1546 /**

	1547 * Convert the UnicodeString into a codepage string using an existing UConvert er.

	1548 * The output string is NUL-terminated if possible.

	1549 *

	1550 * This function avoids the overhead of opening and closing a converter if

	1551 * multiple strings are extracted.

	1552 *

	1553 * @param dest destination string buffer, can be NULL if destCapacity==0

	1554 * @param destCapacity the number of chars available at dest

	1555 * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),

	1556 * or NULL for the default converter

	1557 * @param errorCode normal ICU error code

	1558 * @return the length of the output string, not counting the terminating NUL;

	1559 * if the length is greater than destCapacity, then the string will no t fit

	1560 * and a buffer of the indicated length would need to be passed in

	1561 * @stable ICU 2.0

	1562 */

	1563 int32_t extract(char *dest, int32_t destCapacity,

	1564 UConverter *cnv,

	1565 UErrorCode &errorCode) const;

	1566

	1567 #endif

	1568

	1569 /**

	1570 * Create a temporary substring for the specified range.

	1571 * Unlike the substring constructor and setTo() functions,

	1572 * the object returned here will be a read-only alias (using getBuffer())

	1573 * rather than copying the text.

	1574 * As a result, this substring operation is much faster but requires

	1575 * that the original string not be modified or deleted during the lifetime

	1576 * of the returned substring object.

	1577 * @param start offset of the first character visible in the substring

	1578 * @param length length of the substring

	1579 * @return a read-only alias UnicodeString object for the substring

	1580 * @stable ICU 4.4

	1581 */

	1582 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;

	1583

	1584 /**

	1585 * Create a temporary substring for the specified range.

	1586 * Same as tempSubString(start, length) except that the substring range

	1587 * is specified as a (start, limit) pair (with an exclusive limit index)

	1588 * rather than a (start, length) pair.

	1589 * @param start offset of the first character visible in the substring

	1590 * @param limit offset immediately following the last character visible in the substring

	1591 * @return a read-only alias UnicodeString object for the substring

	1592 * @stable ICU 4.4

	1593 */

	1594 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_M AX) const;

	1595

	1596 /**

	1597 * Convert the UnicodeString to UTF-8 and write the result

	1598 * to a ByteSink. This is called by toUTF8String().

	1599 * Unpaired surrogates are replaced with U+FFFD.

	1600 * Calls u_strToUTF8WithSub().

	1601 *

	1602 * @param sink A ByteSink to which the UTF-8 version of the string is written.

	1603 * sink.Flush() is called at the end.

	1604 * @stable ICU 4.2

	1605 * @see toUTF8String

	1606 */

	1607 void toUTF8(ByteSink &sink) const;

	1608

	1609 #if U_HAVE_STD_STRING

	1610

	1611 /**

	1612 * Convert the UnicodeString to UTF-8 and append the result

	1613 * to a standard string.

	1614 * Unpaired surrogates are replaced with U+FFFD.

	1615 * Calls toUTF8().

	1616 *

	1617 * @param result A standard string (or a compatible object)

	1618 * to which the UTF-8 version of the string is appended.

	1619 * @return The string object.

	1620 * @stable ICU 4.2

	1621 * @see toUTF8

	1622 */

	1623 template<typename StringClass>

	1624 StringClass &toUTF8String(StringClass &result) const {

	1625 StringByteSink<StringClass> sbs(&result);

	1626 toUTF8(sbs);

	1627 return result;

	1628 }

	1629

	1630 #endif

	1631

	1632 /**

	1633 * Convert the UnicodeString to UTF-32.

	1634 * Unpaired surrogates are replaced with U+FFFD.

	1635 * Calls u_strToUTF32WithSub().

	1636 *

	1637 * @param utf32 destination string buffer, can be NULL if capacity==0

	1638 * @param capacity the number of UChar32s available at utf32

	1639 * @param errorCode Standard ICU error code. Its input value must

	1640 * pass the U_SUCCESS() test, or else the function returns

	1641 * immediately. Check for U_FAILURE() on output or use with

	1642 * function chaining. (See User Guide for details.)

	1643 * @return The length of the UTF-32 string.

	1644 * @see fromUTF32

	1645 * @stable ICU 4.2

	1646 */

	1647 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const ;

	1648

	1649 /* Length operations */

	1650

	1651 /**

	1652 * Return the length of the UnicodeString object.

	1653 * The length is the number of UChar code units are in the UnicodeString.

	1654 * If you want the number of code points, please use countChar32().

	1655 * @return the length of the UnicodeString object

	1656 * @see countChar32

	1657 * @stable ICU 2.0

	1658 */

	1659 inline int32_t length(void) const;

	1660

	1661 /**

	1662 * Count Unicode code points in the length UChar code units of the string.

	1663 * A code point may occupy either one or two UChar code units.

	1664 * Counting code points involves reading all code units.

	1665 *

	1666 * This functions is basically the inverse of moveIndex32().

	1667 *

	1668 * @param start the index of the first code unit to check

	1669 * @param length the number of UChar code units to check

	1670 * @return the number of code points in the specified code units

	1671 * @see length

	1672 * @stable ICU 2.0

	1673 */

	1674 int32_t

	1675 countChar32(int32_t start=0, int32_t length=INT32_MAX) const;

	1676

	1677 /**

	1678 * Check if the length UChar code units of the string

	1679 * contain more Unicode code points than a certain number.

	1680 * This is more efficient than counting all code points in this part of the st ring

	1681 * and comparing that number with a threshold.

	1682 * This function may not need to scan the string at all if the length

	1683 * falls within a certain range, and

	1684 * never needs to count more than 'number+1' code points.

	1685 * Logically equivalent to (countChar32(start, length)>number).

	1686 * A Unicode code point may occupy either one or two UChar code units.

	1687 *

	1688 * @param start the index of the first code unit to check (0 for the entire st ring)

	1689 * @param length the number of UChar code units to check

	1690 * (use INT32_MAX for the entire string; remember that start/len gth

	1691 * values are pinned)

	1692 * @param number The number of code points in the (sub)string is compared agai nst

	1693 * the 'number' parameter.

	1694 * @return Boolean value for whether the string contains more Unicode code poi nts

	1695 * than 'number'. Same as (u_countChar32(s, length)>number).

	1696 * @see countChar32

	1697 * @see u_strHasMoreChar32Than

	1698 * @stable ICU 2.4

	1699 */

	1700 UBool

	1701 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;

	1702

	1703 /**

	1704 * Determine if this string is empty.

	1705 * @return TRUE if this string contains 0 characters, FALSE otherwise.

	1706 * @stable ICU 2.0

	1707 */

	1708 inline UBool isEmpty(void) const;

	1709

	1710 /**

	1711 * Return the capacity of the internal buffer of the UnicodeString object.

	1712 * This is useful together with the getBuffer functions.

	1713 * See there for details.

	1714 *

	1715 * @return the number of UChars available in the internal buffer

	1716 * @see getBuffer

	1717 * @stable ICU 2.0

	1718 */

	1719 inline int32_t getCapacity(void) const;

	1720

	1721 /* Other operations */

	1722

	1723 /**

	1724 * Generate a hash code for this object.

	1725 * @return The hash code of this UnicodeString.

	1726 * @stable ICU 2.0

	1727 */

	1728 inline int32_t hashCode(void) const;

	1729

	1730 /**

	1731 * Determine if this object contains a valid string.

	1732 * A bogus string has no value. It is different from an empty string,

	1733 * although in both cases isEmpty() returns TRUE and length() returns 0.

	1734 * setToBogus() and isBogus() can be used to indicate that no string value is available.

	1735 * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and

	1736 * length() returns 0.

	1737 *

	1738 * @return TRUE if the string is valid, FALSE otherwise

	1739 * @see setToBogus()

	1740 * @stable ICU 2.0

	1741 */

	1742 inline UBool isBogus(void) const;

	1743

	1744

	1745 //========================================

	1746 // Write operations

	1747 //========================================

	1748

	1749 /* Assignment operations */

	1750

	1751 /**

	1752 * Assignment operator. Replace the characters in this UnicodeString

	1753 * with the characters from <TT>srcText</TT>.

	1754 * @param srcText The text containing the characters to replace

	1755 * @return a reference to this

	1756 * @stable ICU 2.0

	1757 */

	1758 UnicodeString &operator=(const UnicodeString &srcText);

	1759

	1760 /**

	1761 * Almost the same as the assignment operator.

	1762 * Replace the characters in this UnicodeString

	1763 * with the characters from <code>srcText</code>.

	1764 *

	1765 * This function works the same for all strings except for ones that

	1766 * are readonly aliases.

	1767 * Starting with ICU 2.4, the assignment operator and the copy constructor

	1768 * allocate a new buffer and copy the buffer contents even for readonly aliase s.

	1769 * This function implements the old, more efficient but less safe behavior

	1770 * of making this string also a readonly alias to the same buffer.

	1771 * The fastCopyFrom function must be used only if it is known that the lifetim e of

	1772 * this UnicodeString is at least as long as the lifetime of the aliased buffe r

	1773 * including its contents, for example for strings from resource bundles

	1774 * or aliases to string contents.

	1775 *

	1776 * @param src The text containing the characters to replace.

	1777 * @return a reference to this

	1778 * @stable ICU 2.4

	1779 */

	1780 UnicodeString &fastCopyFrom(const UnicodeString &src);

	1781

	1782 /**

	1783 * Assignment operator. Replace the characters in this UnicodeString

	1784 * with the code unit <TT>ch</TT>.

	1785 * @param ch the code unit to replace

	1786 * @return a reference to this

	1787 * @stable ICU 2.0

	1788 */

	1789 inline UnicodeString& operator= (UChar ch);

	1790

	1791 /**

	1792 * Assignment operator. Replace the characters in this UnicodeString

	1793 * with the code point <TT>ch</TT>.

	1794 * @param ch the code point to replace

	1795 * @return a reference to this

	1796 * @stable ICU 2.0

	1797 */

	1798 inline UnicodeString& operator= (UChar32 ch);

	1799

	1800 /**

	1801 * Set the text in the UnicodeString object to the characters

	1802 * in <TT>srcText</TT> in the range

	1803 * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).

	1804 * <TT>srcText</TT> is not modified.

	1805 * @param srcText the source for the new characters

	1806 * @param srcStart the offset into <TT>srcText</TT> where new characters

	1807 * will be obtained

	1808 * @return a reference to this

	1809 * @stable ICU 2.2

	1810 */

	1811 inline UnicodeString& setTo(const UnicodeString& srcText,

	1812 int32_t srcStart);

	1813

	1814 /**

	1815 * Set the text in the UnicodeString object to the characters

	1816 * in <TT>srcText</TT> in the range

	1817 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

	1818 * <TT>srcText</TT> is not modified.

	1819 * @param srcText the source for the new characters

	1820 * @param srcStart the offset into <TT>srcText</TT> where new characters

	1821 * will be obtained

	1822 * @param srcLength the number of characters in <TT>srcText</TT> in the

	1823 * replace string.

	1824 * @return a reference to this

	1825 * @stable ICU 2.0

	1826 */

	1827 inline UnicodeString& setTo(const UnicodeString& srcText,

	1828 int32_t srcStart,

	1829 int32_t srcLength);

	1830

	1831 /**

	1832 * Set the text in the UnicodeString object to the characters in

	1833 * <TT>srcText</TT>.

	1834 * <TT>srcText</TT> is not modified.

	1835 * @param srcText the source for the new characters

	1836 * @return a reference to this

	1837 * @stable ICU 2.0

	1838 */

	1839 inline UnicodeString& setTo(const UnicodeString& srcText);

	1840

	1841 /**

	1842 * Set the characters in the UnicodeString object to the characters

	1843 * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.

	1844 * @param srcChars the source for the new characters

	1845 * @param srcLength the number of Unicode characters in srcChars.

	1846 * @return a reference to this

	1847 * @stable ICU 2.0

	1848 */

	1849 inline UnicodeString& setTo(const UChar *srcChars,

	1850 int32_t srcLength);

	1851

	1852 /**

	1853 * Set the characters in the UnicodeString object to the code unit

	1854 * <TT>srcChar</TT>.

	1855 * @param srcChar the code unit which becomes the UnicodeString's character

	1856 * content

	1857 * @return a reference to this

	1858 * @stable ICU 2.0

	1859 */

	1860 UnicodeString& setTo(UChar srcChar);

	1861

	1862 /**

	1863 * Set the characters in the UnicodeString object to the code point

	1864 * <TT>srcChar</TT>.

	1865 * @param srcChar the code point which becomes the UnicodeString's character

	1866 * content

	1867 * @return a reference to this

	1868 * @stable ICU 2.0

	1869 */

	1870 UnicodeString& setTo(UChar32 srcChar);

	1871

	1872 /**

	1873 * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constr uctor.

	1874 * The text will be used for the UnicodeString object, but

	1875 * it will not be released when the UnicodeString is destroyed.

	1876 * This has copy-on-write semantics:

	1877 * When the string is modified, then the buffer is first copied into

	1878 * newly allocated memory.

	1879 * The aliased buffer is never modified.

	1880 * In an assignment to another UnicodeString, the text will be aliased again,

	1881 * so that both strings then alias the same readonly-text.

	1882 *

	1883 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-term inated.

	1884 * This must be true if <code>textLength==-1</code>.

	1885 * @param text The characters to alias for the UnicodeString.

	1886 * @param textLength The number of Unicode characters in <code>text</code> to alias.

	1887 * If -1, then this constructor will determine the length

	1888 * by calling <code>u_strlen()</code>.

	1889 * @return a reference to this

	1890 * @stable ICU 2.0

	1891 */

	1892 UnicodeString &setTo(UBool isTerminated,

	1893 const UChar *text,

	1894 int32_t textLength);

	1895

	1896 /**

	1897 * Aliasing setTo() function, analogous to the writable-aliasing UChar* constr uctor.

	1898 * The text will be used for the UnicodeString object, but

	1899 * it will not be released when the UnicodeString is destroyed.

	1900 * This has write-through semantics:

	1901 * For as long as the capacity of the buffer is sufficient, write operations

	1902 * will directly affect the buffer. When more capacity is necessary, then

	1903 * a new buffer will be allocated and the contents copied as with regularly

	1904 * constructed strings.

	1905 * In an assignment to another UnicodeString, the buffer will be copied.

	1906 * The extract(UChar *dst) function detects whether the dst pointer is the sam e

	1907 * as the string buffer itself and will in this case not copy the contents.

	1908 *

	1909 * @param buffer The characters to alias for the UnicodeString.

	1910 * @param buffLength The number of Unicode characters in <code>buffer</code> t o alias.

	1911 * @param buffCapacity The size of <code>buffer</code> in UChars.

	1912 * @return a reference to this

	1913 * @stable ICU 2.0

	1914 */

	1915 UnicodeString &setTo(UChar *buffer,

	1916 int32_t buffLength,

	1917 int32_t buffCapacity);

	1918

	1919 /**

	1920 * Make this UnicodeString object invalid.

	1921 * The string will test TRUE with isBogus().

	1922 *

	1923 * A bogus string has no value. It is different from an empty string.

	1924 * It can be used to indicate that no string value is available.

	1925 * getBuffer() and getTerminatedBuffer() return NULL, and

	1926 * length() returns 0.

	1927 *

	1928 * This utility function is used throughout the UnicodeString

	1929 * implementation to indicate that a UnicodeString operation failed,

	1930 * and may be used in other functions,

	1931 * especially but not exclusively when such functions do not

	1932 * take a UErrorCode for simplicity.

	1933 *

	1934 * The following methods, and no others, will clear a string object's bogus fl ag:

	1935 * - remove()

	1936 * - remove(0, INT32_MAX)

	1937 * - truncate(0)

	1938 * - operator=() (assignment operator)

	1939 * - setTo(...)

	1940 *

	1941 * The simplest ways to turn a bogus string into an empty one

	1942 * is to use the remove() function.

	1943 * Examples for other functions that are equivalent to "set to empty string":

	1944 * \code

	1945 * if(s.isBogus()) {

	1946 * s.remove(); // set to an empty string (remove all), or

	1947 * s.remove(0, INT32_MAX); // set to an empty string (remove all), or

	1948 * s.truncate(0); // set to an empty string (complete truncation), or

	1949 * s=UnicodeString(); // assign an empty string, or

	1950 * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or

	1951 * static const UChar nul=0;

	1952 * s.setTo(&nul, 0); // set to an empty C Unicode string

	1953 * }

	1954 * \endcode

	1955 *

	1956 * @see isBogus()

	1957 * @stable ICU 2.0

	1958 */

	1959 void setToBogus();

	1960

	1961 /**

	1962 * Set the character at the specified offset to the specified character.

	1963 * @param offset A valid offset into the text of the character to set

	1964 * @param ch The new character

	1965 * @return A reference to this

	1966 * @stable ICU 2.0

	1967 */

	1968 UnicodeString& setCharAt(int32_t offset,

	1969 UChar ch);

	1970

	1971

	1972 /* Append operations */

	1973

	1974 /**

	1975 * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString

	1976 * object.

	1977 * @param ch the code unit to be appended

	1978 * @return a reference to this

	1979 * @stable ICU 2.0

	1980 */

	1981 inline UnicodeString& operator+= (UChar ch);

	1982

	1983 /**

	1984 * Append operator. Append the code point <TT>ch</TT> to the UnicodeString

	1985 * object.

	1986 * @param ch the code point to be appended

	1987 * @return a reference to this

	1988 * @stable ICU 2.0

	1989 */

	1990 inline UnicodeString& operator+= (UChar32 ch);

	1991

	1992 /**

	1993 * Append operator. Append the characters in <TT>srcText</TT> to the

	1994 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> is

	1995 * not modified.

	1996 * @param srcText the source for the new characters

	1997 * @return a reference to this

	1998 * @stable ICU 2.0

	1999 */

	2000 inline UnicodeString& operator+= (const UnicodeString& srcText);

	2001

	2002 /**

	2003 * Append the characters

	2004 * in <TT>srcText</TT> in the range

	2005 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the

	2006 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>

	2007 * is not modified.

	2008 * @param srcText the source for the new characters

	2009 * @param srcStart the offset into <TT>srcText</TT> where new characters

	2010 * will be obtained

	2011 * @param srcLength the number of characters in <TT>srcText</TT> in

	2012 * the append string

	2013 * @return a reference to this

	2014 * @stable ICU 2.0

	2015 */

	2016 inline UnicodeString& append(const UnicodeString& srcText,

	2017 int32_t srcStart,

	2018 int32_t srcLength);

	2019

	2020 /**

	2021 * Append the characters in <TT>srcText</TT> to the UnicodeString object at

	2022 * offset <TT>start</TT>. <TT>srcText</TT> is not modified.

	2023 * @param srcText the source for the new characters

	2024 * @return a reference to this

	2025 * @stable ICU 2.0

	2026 */

	2027 inline UnicodeString& append(const UnicodeString& srcText);

	2028

	2029 /**

	2030 * Append the characters in <TT>srcChars</TT> in the range

	2031 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString

	2032 * object at offset

	2033 * <TT>start</TT>. <TT>srcChars</TT> is not modified.

	2034 * @param srcChars the source for the new characters

	2035 * @param srcStart the offset into <TT>srcChars</TT> where new characters

	2036 * will be obtained

	2037 * @param srcLength the number of characters in <TT>srcChars</TT> in

	2038 * the append string

	2039 * @return a reference to this

	2040 * @stable ICU 2.0

	2041 */

	2042 inline UnicodeString& append(const UChar *srcChars,

	2043 int32_t srcStart,

	2044 int32_t srcLength);

	2045

	2046 /**

	2047 * Append the characters in <TT>srcChars</TT> to the UnicodeString object

	2048 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.

	2049 * @param srcChars the source for the new characters

	2050 * @param srcLength the number of Unicode characters in <TT>srcChars</TT>

	2051 * @return a reference to this

	2052 * @stable ICU 2.0

	2053 */

	2054 inline UnicodeString& append(const UChar *srcChars,

	2055 int32_t srcLength);

	2056

	2057 /**

	2058 * Append the code unit <TT>srcChar</TT> to the UnicodeString object.

	2059 * @param srcChar the code unit to append

	2060 * @return a reference to this

	2061 * @stable ICU 2.0

	2062 */

	2063 inline UnicodeString& append(UChar srcChar);

	2064

	2065 /**

	2066 * Append the code point <TT>srcChar</TT> to the UnicodeString object.

	2067 * @param srcChar the code point to append

	2068 * @return a reference to this

	2069 * @stable ICU 2.0

	2070 */

	2071 inline UnicodeString& append(UChar32 srcChar);

	2072

	2073

	2074 /* Insert operations */

	2075

	2076 /**

	2077 * Insert the characters in <TT>srcText</TT> in the range

	2078 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString

	2079 * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.

	2080 * @param start the offset where the insertion begins

	2081 * @param srcText the source for the new characters

	2082 * @param srcStart the offset into <TT>srcText</TT> where new characters

	2083 * will be obtained

	2084 * @param srcLength the number of characters in <TT>srcText</TT> in

	2085 * the insert string

	2086 * @return a reference to this

	2087 * @stable ICU 2.0

	2088 */

	2089 inline UnicodeString& insert(int32_t start,

	2090 const UnicodeString& srcText,

	2091 int32_t srcStart,

	2092 int32_t srcLength);

	2093

	2094 /**

	2095 * Insert the characters in <TT>srcText</TT> into the UnicodeString object

	2096 * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.

	2097 * @param start the offset where the insertion begins

	2098 * @param srcText the source for the new characters

	2099 * @return a reference to this

	2100 * @stable ICU 2.0

	2101 */

	2102 inline UnicodeString& insert(int32_t start,

	2103 const UnicodeString& srcText);

	2104

	2105 /**

	2106 * Insert the characters in <TT>srcChars</TT> in the range

	2107 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString

	2108 * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.

	2109 * @param start the offset at which the insertion begins

	2110 * @param srcChars the source for the new characters

	2111 * @param srcStart the offset into <TT>srcChars</TT> where new characters

	2112 * will be obtained

	2113 * @param srcLength the number of characters in <TT>srcChars</TT>

	2114 * in the insert string

	2115 * @return a reference to this

	2116 * @stable ICU 2.0

	2117 */

	2118 inline UnicodeString& insert(int32_t start,

	2119 const UChar *srcChars,

	2120 int32_t srcStart,

	2121 int32_t srcLength);

	2122

	2123 /**

	2124 * Insert the characters in <TT>srcChars</TT> into the UnicodeString object

	2125 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.

	2126 * @param start the offset where the insertion begins

	2127 * @param srcChars the source for the new characters

	2128 * @param srcLength the number of Unicode characters in srcChars.

	2129 * @return a reference to this

	2130 * @stable ICU 2.0

	2131 */

	2132 inline UnicodeString& insert(int32_t start,

	2133 const UChar *srcChars,

	2134 int32_t srcLength);

	2135

	2136 /**

	2137 * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at

	2138 * offset <TT>start</TT>.

	2139 * @param start the offset at which the insertion occurs

	2140 * @param srcChar the code unit to insert

	2141 * @return a reference to this

	2142 * @stable ICU 2.0

	2143 */

	2144 inline UnicodeString& insert(int32_t start,

	2145 UChar srcChar);

	2146

	2147 /**

	2148 * Insert the code point <TT>srcChar</TT> into the UnicodeString object at

	2149 * offset <TT>start</TT>.

	2150 * @param start the offset at which the insertion occurs

	2151 * @param srcChar the code point to insert

	2152 * @return a reference to this

	2153 * @stable ICU 2.0

	2154 */

	2155 inline UnicodeString& insert(int32_t start,

	2156 UChar32 srcChar);

	2157

	2158

	2159 /* Replace operations */

	2160

	2161 /**

	2162 * Replace the characters in the range

	2163 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in

	2164 * <TT>srcText</TT> in the range

	2165 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

	2166 * <TT>srcText</TT> is not modified.

	2167 * @param start the offset at which the replace operation begins

	2168 * @param length the number of characters to replace. The character at

	2169 * <TT>start + length</TT> is not modified.

	2170 * @param srcText the source for the new characters

	2171 * @param srcStart the offset into <TT>srcText</TT> where new characters

	2172 * will be obtained

	2173 * @param srcLength the number of characters in <TT>srcText</TT> in

	2174 * the replace string

	2175 * @return a reference to this

	2176 * @stable ICU 2.0

	2177 */

	2178 UnicodeString& replace(int32_t start,

	2179 int32_t length,

	2180 const UnicodeString& srcText,

	2181 int32_t srcStart,

	2182 int32_t srcLength);

	2183

	2184 /**

	2185 * Replace the characters in the range

	2186 * [<TT>start</TT>, <TT>start + length</TT>)

	2187 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is

	2188 * not modified.

	2189 * @param start the offset at which the replace operation begins

	2190 * @param length the number of characters to replace. The character at

	2191 * <TT>start + length</TT> is not modified.

	2192 * @param srcText the source for the new characters

	2193 * @return a reference to this

	2194 * @stable ICU 2.0

	2195 */

	2196 UnicodeString& replace(int32_t start,

	2197 int32_t length,

	2198 const UnicodeString& srcText);

	2199

	2200 /**

	2201 * Replace the characters in the range

	2202 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in

	2203 * <TT>srcChars</TT> in the range

	2204 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>

	2205 * is not modified.

	2206 * @param start the offset at which the replace operation begins

	2207 * @param length the number of characters to replace. The character at

	2208 * <TT>start + length</TT> is not modified.

	2209 * @param srcChars the source for the new characters

	2210 * @param srcStart the offset into <TT>srcChars</TT> where new characters

	2211 * will be obtained

	2212 * @param srcLength the number of characters in <TT>srcChars</TT>

	2213 * in the replace string

	2214 * @return a reference to this

	2215 * @stable ICU 2.0

	2216 */

	2217 UnicodeString& replace(int32_t start,

	2218 int32_t length,

	2219 const UChar *srcChars,

	2220 int32_t srcStart,

	2221 int32_t srcLength);

	2222

	2223 /**

	2224 * Replace the characters in the range

	2225 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in

	2226 * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.

	2227 * @param start the offset at which the replace operation begins

	2228 * @param length number of characters to replace. The character at

	2229 * <TT>start + length</TT> is not modified.

	2230 * @param srcChars the source for the new characters

	2231 * @param srcLength the number of Unicode characters in srcChars

	2232 * @return a reference to this

	2233 * @stable ICU 2.0

	2234 */

	2235 inline UnicodeString& replace(int32_t start,

	2236 int32_t length,

	2237 const UChar *srcChars,

	2238 int32_t srcLength);

	2239

	2240 /**

	2241 * Replace the characters in the range

	2242 * [<TT>start</TT>, <TT>start + length</TT>) with the code unit

	2243 * <TT>srcChar</TT>.

	2244 * @param start the offset at which the replace operation begins

	2245 * @param length the number of characters to replace. The character at

	2246 * <TT>start + length</TT> is not modified.

	2247 * @param srcChar the new code unit

	2248 * @return a reference to this

	2249 * @stable ICU 2.0

	2250 */

	2251 inline UnicodeString& replace(int32_t start,

	2252 int32_t length,

	2253 UChar srcChar);

	2254

	2255 /**

	2256 * Replace the characters in the range

	2257 * [<TT>start</TT>, <TT>start + length</TT>) with the code point

	2258 * <TT>srcChar</TT>.

	2259 * @param start the offset at which the replace operation begins

	2260 * @param length the number of characters to replace. The character at

	2261 * <TT>start + length</TT> is not modified.

	2262 * @param srcChar the new code point

	2263 * @return a reference to this

	2264 * @stable ICU 2.0

	2265 */

	2266 inline UnicodeString& replace(int32_t start,

	2267 int32_t length,

	2268 UChar32 srcChar);

	2269

	2270 /**

	2271 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)

	2272 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.

	2273 * @param start the offset at which the replace operation begins

	2274 * @param limit the offset immediately following the replace range

	2275 * @param srcText the source for the new characters

	2276 * @return a reference to this

	2277 * @stable ICU 2.0

	2278 */

	2279 inline UnicodeString& replaceBetween(int32_t start,

	2280 int32_t limit,

	2281 const UnicodeString& srcText);

	2282

	2283 /**

	2284 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)

	2285 * with the characters in <TT>srcText</TT> in the range

	2286 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.

	2287 * @param start the offset at which the replace operation begins

	2288 * @param limit the offset immediately following the replace range

	2289 * @param srcText the source for the new characters

	2290 * @param srcStart the offset into <TT>srcChars</TT> where new characters

	2291 * will be obtained

	2292 * @param srcLimit the offset immediately following the range to copy

	2293 * in <TT>srcText</TT>

	2294 * @return a reference to this

	2295 * @stable ICU 2.0

	2296 */

	2297 inline UnicodeString& replaceBetween(int32_t start,

	2298 int32_t limit,

	2299 const UnicodeString& srcText,

	2300 int32_t srcStart,

	2301 int32_t srcLimit);

	2302

	2303 /**

	2304 * Replace a substring of this object with the given text.

	2305 * @param start the beginning index, inclusive; <code>0 <= start

	2306 * <= limit</code>.

	2307 * @param limit the ending index, exclusive; <code>start <= limit

	2308 * <= length()</code>.

	2309 * @param text the text to replace characters <code>start</code>

	2310 * to <code>limit - 1</code>

	2311 * @stable ICU 2.0

	2312 */

	2313 virtual void handleReplaceBetween(int32_t start,

	2314 int32_t limit,

	2315 const UnicodeString& text);

	2316

	2317 /**

	2318 * Replaceable API

	2319 * @return TRUE if it has MetaData

	2320 * @stable ICU 2.4

	2321 */

	2322 virtual UBool hasMetaData() const;

	2323

	2324 /**

	2325 * Copy a substring of this object, retaining attribute (out-of-band)

	2326 * information. This method is used to duplicate or reorder substrings.

	2327 * The destination index must not overlap the source range.

	2328 *

	2329 * @param start the beginning index, inclusive; <code>0 <= start <=

	2330 * limit</code>.

	2331 * @param limit the ending index, exclusive; <code>start <= limit <=

	2332 * length()</code>.

	2333 * @param dest the destination index. The characters from

	2334 * <code>start..limit-1</code> will be copied to <code>dest</code>.

	2335 * Implementations of this method may assume that <code>dest <= start \|\|

	2336 * dest >= limit</code>.

	2337 * @stable ICU 2.0

	2338 */

	2339 virtual void copy(int32_t start, int32_t limit, int32_t dest);

	2340

	2341 /* Search and replace operations */

	2342

	2343 /**

	2344 * Replace all occurrences of characters in oldText with the characters

	2345 * in newText

	2346 * @param oldText the text containing the search text

	2347 * @param newText the text containing the replacement text

	2348 * @return a reference to this

	2349 * @stable ICU 2.0

	2350 */

	2351 inline UnicodeString& findAndReplace(const UnicodeString& oldText,

	2352 const UnicodeString& newText);

	2353

	2354 /**

	2355 * Replace all occurrences of characters in oldText with characters

	2356 * in newText

	2357 * in the range [<TT>start</TT>, <TT>start + length</TT>).

	2358 * @param start the start of the range in which replace will performed

	2359 * @param length the length of the range in which replace will be performed

	2360 * @param oldText the text containing the search text

	2361 * @param newText the text containing the replacement text

	2362 * @return a reference to this

	2363 * @stable ICU 2.0

	2364 */

	2365 inline UnicodeString& findAndReplace(int32_t start,

	2366 int32_t length,

	2367 const UnicodeString& oldText,

	2368 const UnicodeString& newText);

	2369

	2370 /**

	2371 * Replace all occurrences of characters in oldText in the range

	2372 * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters

	2373 * in newText in the range

	2374 * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)

	2375 * in the range [<TT>start</TT>, <TT>start + length</TT>).

	2376 * @param start the start of the range in which replace will performed

	2377 * @param length the length of the range in which replace will be performed

	2378 * @param oldText the text containing the search text

	2379 * @param oldStart the start of the search range in <TT>oldText</TT>

	2380 * @param oldLength the length of the search range in <TT>oldText</TT>

	2381 * @param newText the text containing the replacement text

	2382 * @param newStart the start of the replacement range in <TT>newText</TT>

	2383 * @param newLength the length of the replacement range in <TT>newText</TT>

	2384 * @return a reference to this

	2385 * @stable ICU 2.0

	2386 */

	2387 UnicodeString& findAndReplace(int32_t start,

	2388 int32_t length,

	2389 const UnicodeString& oldText,

	2390 int32_t oldStart,

	2391 int32_t oldLength,

	2392 const UnicodeString& newText,

	2393 int32_t newStart,

	2394 int32_t newLength);

	2395

	2396

	2397 /* Remove operations */

	2398

	2399 /**

	2400 * Remove all characters from the UnicodeString object.

	2401 * @return a reference to this

	2402 * @stable ICU 2.0

	2403 */

	2404 inline UnicodeString& remove(void);

	2405

	2406 /**

	2407 * Remove the characters in the range

	2408 * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.

	2409 * @param start the offset of the first character to remove

	2410 * @param length the number of characters to remove

	2411 * @return a reference to this

	2412 * @stable ICU 2.0

	2413 */

	2414 inline UnicodeString& remove(int32_t start,

	2415 int32_t length = (int32_t)INT32_MAX);

	2416

	2417 /**

	2418 * Remove the characters in the range

	2419 * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.

	2420 * @param start the offset of the first character to remove

	2421 * @param limit the offset immediately following the range to remove

	2422 * @return a reference to this

	2423 * @stable ICU 2.0

	2424 */

	2425 inline UnicodeString& removeBetween(int32_t start,

	2426 int32_t limit = (int32_t)INT32_MAX);

	2427

	2428 /**

	2429 * Retain only the characters in the range

	2430 * [<code>start</code>, <code>limit</code>) from the UnicodeString object.

	2431 * Removes characters before <code>start</code> and at and after <code>limit</ code>.

	2432 * @param start the offset of the first character to retain

	2433 * @param limit the offset immediately following the range to retain

	2434 * @return a reference to this

	2435 * @stable ICU 4.4

	2436 */

	2437 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);

	2438

	2439 /* Length operations */

	2440

	2441 /**

	2442 * Pad the start of this UnicodeString with the character <TT>padChar</TT>.

	2443 * If the length of this UnicodeString is less than targetLength,

	2444 * length() - targetLength copies of padChar will be added to the

	2445 * beginning of this UnicodeString.

	2446 * @param targetLength the desired length of the string

	2447 * @param padChar the character to use for padding. Defaults to

	2448 * space (U+0020)

	2449 * @return TRUE if the text was padded, FALSE otherwise.

	2450 * @stable ICU 2.0

	2451 */

	2452 UBool padLeading(int32_t targetLength,

	2453 UChar padChar = 0x0020);

	2454

	2455 /**

	2456 * Pad the end of this UnicodeString with the character <TT>padChar</TT>.

	2457 * If the length of this UnicodeString is less than targetLength,

	2458 * length() - targetLength copies of padChar will be added to the

	2459 * end of this UnicodeString.

	2460 * @param targetLength the desired length of the string

	2461 * @param padChar the character to use for padding. Defaults to

	2462 * space (U+0020)

	2463 * @return TRUE if the text was padded, FALSE otherwise.

	2464 * @stable ICU 2.0

	2465 */

	2466 UBool padTrailing(int32_t targetLength,

	2467 UChar padChar = 0x0020);

	2468

	2469 /**

	2470 * Truncate this UnicodeString to the <TT>targetLength</TT>.

	2471 * @param targetLength the desired length of this UnicodeString.

	2472 * @return TRUE if the text was truncated, FALSE otherwise

	2473 * @stable ICU 2.0

	2474 */

	2475 inline UBool truncate(int32_t targetLength);

	2476

	2477 /**

	2478 * Trims leading and trailing whitespace from this UnicodeString.

	2479 * @return a reference to this

	2480 * @stable ICU 2.0

	2481 */

	2482 UnicodeString& trim(void);

	2483

	2484

	2485 /* Miscellaneous operations */

	2486

	2487 /**

	2488 * Reverse this UnicodeString in place.

	2489 * @return a reference to this

	2490 * @stable ICU 2.0

	2491 */

	2492 inline UnicodeString& reverse(void);

	2493

	2494 /**

	2495 * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in

	2496 * this UnicodeString.

	2497 * @param start the start of the range to reverse

	2498 * @param length the number of characters to to reverse

	2499 * @return a reference to this

	2500 * @stable ICU 2.0

	2501 */

	2502 inline UnicodeString& reverse(int32_t start,

	2503 int32_t length);

	2504

	2505 /**

	2506 * Convert the characters in this to UPPER CASE following the conventions of

	2507 * the default locale.

	2508 * @return A reference to this.

	2509 * @stable ICU 2.0

	2510 */

	2511 UnicodeString& toUpper(void);

	2512

	2513 /**

	2514 * Convert the characters in this to UPPER CASE following the conventions of

	2515 * a specific locale.

	2516 * @param locale The locale containing the conventions to use.

	2517 * @return A reference to this.

	2518 * @stable ICU 2.0

	2519 */

	2520 UnicodeString& toUpper(const Locale& locale);

	2521

	2522 /**

	2523 * Convert the characters in this to lower case following the conventions of

	2524 * the default locale.

	2525 * @return A reference to this.

	2526 * @stable ICU 2.0

	2527 */

	2528 UnicodeString& toLower(void);

	2529

	2530 /**

	2531 * Convert the characters in this to lower case following the conventions of

	2532 * a specific locale.

	2533 * @param locale The locale containing the conventions to use.

	2534 * @return A reference to this.

	2535 * @stable ICU 2.0

	2536 */

	2537 UnicodeString& toLower(const Locale& locale);

	2538

	2539 #if !UCONFIG_NO_BREAK_ITERATION

	2540

	2541 /**

	2542 * Titlecase this string, convenience function using the default locale.

	2543 *

	2544 * Casing is locale-dependent and context-sensitive.

	2545 * Titlecasing uses a break iterator to find the first characters of words

	2546 * that are to be titlecased. It titlecases those characters and lowercases

	2547 * all others.

	2548 *

	2549 * The titlecase break iterator can be provided to customize for arbitrary

	2550 * styles, using rules and dictionaries beyond the standard iterators.

	2551 * It may be more efficient to always provide an iterator to avoid

	2552 * opening and closing one for each string.

	2553 * The standard titlecase iterator for the root locale implements the

	2554 * algorithm of Unicode TR 21.

	2555 *

	2556 * This function uses only the setText(), first() and next() methods of the

	2557 * provided break iterator.

	2558 *

	2559 * @param titleIter A break iterator to find the first characters of words

	2560 * that are to be titlecased.

	2561 * If none is provided (0), then a standard titlecase

	2562 * break iterator is opened.

	2563 * Otherwise the provided iterator is set to the string's tex t.

	2564 * @return A reference to this.

	2565 * @stable ICU 2.1

	2566 */

	2567 UnicodeString &toTitle(BreakIterator *titleIter);

	2568

	2569 /**

	2570 * Titlecase this string.

	2571 *

	2572 * Casing is locale-dependent and context-sensitive.

	2573 * Titlecasing uses a break iterator to find the first characters of words

	2574 * that are to be titlecased. It titlecases those characters and lowercases

	2575 * all others.

	2576 *

	2577 * The titlecase break iterator can be provided to customize for arbitrary

	2578 * styles, using rules and dictionaries beyond the standard iterators.

	2579 * It may be more efficient to always provide an iterator to avoid

	2580 * opening and closing one for each string.

	2581 * The standard titlecase iterator for the root locale implements the

	2582 * algorithm of Unicode TR 21.

	2583 *

	2584 * This function uses only the setText(), first() and next() methods of the

	2585 * provided break iterator.

	2586 *

	2587 * @param titleIter A break iterator to find the first characters of words

	2588 * that are to be titlecased.

	2589 * If none is provided (0), then a standard titlecase

	2590 * break iterator is opened.

	2591 * Otherwise the provided iterator is set to the string's tex t.

	2592 * @param locale The locale to consider.

	2593 * @return A reference to this.

	2594 * @stable ICU 2.1

	2595 */

	2596 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);

	2597

	2598 /**

	2599 * Titlecase this string, with options.

	2600 *

	2601 * Casing is locale-dependent and context-sensitive.

	2602 * Titlecasing uses a break iterator to find the first characters of words

	2603 * that are to be titlecased. It titlecases those characters and lowercases

	2604 * all others. (This can be modified with options.)

	2605 *

	2606 * The titlecase break iterator can be provided to customize for arbitrary

	2607 * styles, using rules and dictionaries beyond the standard iterators.

	2608 * It may be more efficient to always provide an iterator to avoid

	2609 * opening and closing one for each string.

	2610 * The standard titlecase iterator for the root locale implements the

	2611 * algorithm of Unicode TR 21.

	2612 *

	2613 * This function uses only the setText(), first() and next() methods of the

	2614 * provided break iterator.

	2615 *

	2616 * @param titleIter A break iterator to find the first characters of words

	2617 * that are to be titlecased.

	2618 * If none is provided (0), then a standard titlecase

	2619 * break iterator is opened.

	2620 * Otherwise the provided iterator is set to the string's tex t.

	2621 * @param locale The locale to consider.

	2622 * @param options Options bit set, see ucasemap_open().

	2623 * @return A reference to this.

	2624 * @see U_TITLECASE_NO_LOWERCASE

	2625 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT

	2626 * @see ucasemap_open

	2627 * @stable ICU 3.8

	2628 */

	2629 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_ t options);

	2630

	2631 #endif

	2632

	2633 /**

	2634 * Case-fold the characters in this string.

	2635 * Case-folding is locale-independent and not context-sensitive,

	2636 * but there is an option for whether to include or exclude mappings for dotte d I

	2637 * and dotless i that are marked with 'I' in CaseFolding.txt.

	2638 * The result may be longer or shorter than the original.

	2639 *

	2640 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I

	2641 * @return A reference to this.

	2642 * @stable ICU 2.0

	2643 */

	2644 UnicodeString &foldCase(uint32_t options=0 /U_FOLD_CASE_DEFAULT/);

	2645

	2646 //========================================

	2647 // Access to the internal buffer

	2648 //========================================

	2649

	2650 /**

	2651 * Get a read/write pointer to the internal buffer.

	2652 * The buffer is guaranteed to be large enough for at least minCapacity UChars ,

	2653 * writable, and is still owned by the UnicodeString object.

	2654 * Calls to getBuffer(minCapacity) must not be nested, and

	2655 * must be matched with calls to releaseBuffer(newLength).

	2656 * If the string buffer was read-only or shared,

	2657 * then it will be reallocated and copied.

	2658 *

	2659 * An attempted nested call will return 0, and will not further modify the

	2660 * state of the UnicodeString object.

	2661 * It also returns 0 if the string is bogus.

	2662 *

	2663 * The actual capacity of the string buffer may be larger than minCapacity.

	2664 * getCapacity() returns the actual capacity.

	2665 * For many operations, the full capacity should be used to avoid reallocation s.

	2666 *

	2667 * While the buffer is "open" between getBuffer(minCapacity)

	2668 * and releaseBuffer(newLength), the following applies:

	2669 * - The string length is set to 0.

	2670 * - Any read API call on the UnicodeString object will behave like on a 0-len gth string.

	2671 * - Any write API call on the UnicodeString object is disallowed and will hav e no effect.

	2672 * - You can read from and write to the returned buffer.

	2673 * - The previous string contents will still be in the buffer;

	2674 * if you want to use it, then you need to call length() before getBuffer(mi nCapacity).

	2675 * If the length() was greater than minCapacity, then any contents after min Capacity

	2676 * may be lost.

	2677 * The buffer contents is not NUL-terminated by getBuffer().

	2678 * If length()<getCapacity() then you can terminate it by writing a NUL

	2679 * at index length().

	2680 * - You must call releaseBuffer(newLength) before and in order to

	2681 * return to normal UnicodeString operation.

	2682 *

	2683 * @param minCapacity the minimum number of UChars that are to be available

	2684 * in the buffer, starting at the returned pointer;

	2685 * default to the current string capacity if minCapacity==-1

	2686 * @return a writable pointer to the internal string buffer,

	2687 * or 0 if an error occurs (nested calls, out of memory)

	2688 *

	2689 * @see releaseBuffer

	2690 * @see getTerminatedBuffer()

	2691 * @stable ICU 2.0

	2692 */

	2693 UChar *getBuffer(int32_t minCapacity);

	2694

	2695 /**

	2696 * Release a read/write buffer on a UnicodeString object with an

	2697 * "open" getBuffer(minCapacity).

	2698 * This function must be called in a matched pair with getBuffer(minCapacity).

	2699 * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapac ity) is "open".

	2700 *

	2701 * It will set the string length to newLength, at most to the current capacity .

	2702 * If newLength==-1 then it will set the length according to the

	2703 * first NUL in the buffer, or to the capacity if there is no NUL.

	2704 *

	2705 * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.

	2706 *

	2707 * @param newLength the new length of the UnicodeString object;

	2708 * defaults to the current capacity if newLength is greater than that;

	2709 * if newLength==-1, it defaults to u_strlen(buffer) but not more than

	2710 * the current capacity of the string

	2711 *

	2712 * @see getBuffer(int32_t minCapacity)

	2713 * @stable ICU 2.0

	2714 */

	2715 void releaseBuffer(int32_t newLength=-1);

	2716

	2717 /**

	2718 * Get a read-only pointer to the internal buffer.

	2719 * This can be called at any time on a valid UnicodeString.

	2720 *

	2721 * It returns 0 if the string is bogus, or

	2722 * during an "open" getBuffer(minCapacity).

	2723 *

	2724 * It can be called as many times as desired.

	2725 * The pointer that it returns will remain valid until the UnicodeString objec t is modified,

	2726 * at which time the pointer is semantically invalidated and must not be used any more.

	2727 *

	2728 * The capacity of the buffer can be determined with getCapacity().

	2729 * The part after length() may or may not be initialized and valid,

	2730 * depending on the history of the UnicodeString object.

	2731 *

	2732 * The buffer contents is (probably) not NUL-terminated.

	2733 * You can check if it is with

	2734 * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.

	2735 * (See getTerminatedBuffer().)

	2736 *

	2737 * The buffer may reside in read-only memory. Its contents must not

	2738 * be modified.

	2739 *

	2740 * @return a read-only pointer to the internal string buffer,

	2741 * or 0 if the string is empty or bogus

	2742 *

	2743 * @see getBuffer(int32_t minCapacity)

	2744 * @see getTerminatedBuffer()

	2745 * @stable ICU 2.0

	2746 */

	2747 inline const UChar *getBuffer() const;

	2748

	2749 /**

	2750 * Get a read-only pointer to the internal buffer,

	2751 * making sure that it is NUL-terminated.

	2752 * This can be called at any time on a valid UnicodeString.

	2753 *

	2754 * It returns 0 if the string is bogus, or

	2755 * during an "open" getBuffer(minCapacity), or if the buffer cannot

	2756 * be NUL-terminated (because memory allocation failed).

	2757 *

	2758 * It can be called as many times as desired.

	2759 * The pointer that it returns will remain valid until the UnicodeString objec t is modified,

	2760 * at which time the pointer is semantically invalidated and must not be used any more.

	2761 *

	2762 * The capacity of the buffer can be determined with getCapacity().

	2763 * The part after length()+1 may or may not be initialized and valid,

	2764 * depending on the history of the UnicodeString object.

	2765 *

	2766 * The buffer contents is guaranteed to be NUL-terminated.

	2767 * getTerminatedBuffer() may reallocate the buffer if a terminating NUL

	2768 * is written.

	2769 * For this reason, this function is not const, unlike getBuffer().

	2770 * Note that a UnicodeString may also contain NUL characters as part of its co ntents.

	2771 *

	2772 * The buffer may reside in read-only memory. Its contents must not

	2773 * be modified.

	2774 *

	2775 * @return a read-only pointer to the internal string buffer,

	2776 * or 0 if the string is empty or bogus

	2777 *

	2778 * @see getBuffer(int32_t minCapacity)

	2779 * @see getBuffer()

	2780 * @stable ICU 2.2

	2781 */

	2782 inline const UChar *getTerminatedBuffer();

	2783

	2784 //========================================

	2785 // Constructors

	2786 //========================================

	2787

	2788 /** Construct an empty UnicodeString.

	2789 * @stable ICU 2.0

	2790 */

	2791 UnicodeString();

	2792

	2793 /**

	2794 * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars

	2795 * @param capacity the number of UChars this UnicodeString should hold

	2796 * before a resize is necessary; if count is greater than 0 and count

	2797 * code points c take up more space than capacity, then capacity is adjusted

	2798 * accordingly.

	2799 * @param c is used to initially fill the string

	2800 * @param count specifies how many code points c are to be written in the

	2801 * string

	2802 * @stable ICU 2.0

	2803 */

	2804 UnicodeString(int32_t capacity, UChar32 c, int32_t count);

	2805

	2806 /**

	2807 * Single UChar (code unit) constructor.

	2808 * @param ch the character to place in the UnicodeString

	2809 * @stable ICU 2.0

	2810 */

	2811 UnicodeString(UChar ch);

	2812

	2813 /**

	2814 * Single UChar32 (code point) constructor.

	2815 * @param ch the character to place in the UnicodeString

	2816 * @stable ICU 2.0

	2817 */

	2818 UnicodeString(UChar32 ch);

	2819

	2820 /**

	2821 * UChar* constructor.

	2822 * @param text The characters to place in the UnicodeString. <TT>text</TT>

	2823 * must be NULL (U+0000) terminated.

	2824 * @stable ICU 2.0

	2825 */

	2826 UnicodeString(const UChar *text);

	2827

	2828 /**

	2829 * UChar* constructor.

	2830 * @param text The characters to place in the UnicodeString.

	2831 * @param textLength The number of Unicode characters in <TT>text</TT>

	2832 * to copy.

	2833 * @stable ICU 2.0

	2834 */

	2835 UnicodeString(const UChar *text,

	2836 int32_t textLength);

	2837

	2838 /**

	2839 * Readonly-aliasing UChar* constructor.

	2840 * The text will be used for the UnicodeString object, but

	2841 * it will not be released when the UnicodeString is destroyed.

	2842 * This has copy-on-write semantics:

	2843 * When the string is modified, then the buffer is first copied into

	2844 * newly allocated memory.

	2845 * The aliased buffer is never modified.

	2846 * In an assignment to another UnicodeString, the text will be aliased again,

	2847 * so that both strings then alias the same readonly-text.

	2848 *

	2849 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-term inated.

	2850 * This must be true if <code>textLength==-1</code>.

	2851 * @param text The characters to alias for the UnicodeString.

	2852 * @param textLength The number of Unicode characters in <code>text</code> to alias.

	2853 * If -1, then this constructor will determine the length

	2854 * by calling <code>u_strlen()</code>.

	2855 * @stable ICU 2.0

	2856 */

	2857 UnicodeString(UBool isTerminated,

	2858 const UChar *text,

	2859 int32_t textLength);

	2860

	2861 /**

	2862 * Writable-aliasing UChar* constructor.

	2863 * The text will be used for the UnicodeString object, but

	2864 * it will not be released when the UnicodeString is destroyed.

	2865 * This has write-through semantics:

	2866 * For as long as the capacity of the buffer is sufficient, write operations

	2867 * will directly affect the buffer. When more capacity is necessary, then

	2868 * a new buffer will be allocated and the contents copied as with regularly

	2869 * constructed strings.

	2870 * In an assignment to another UnicodeString, the buffer will be copied.

	2871 * The extract(UChar *dst) function detects whether the dst pointer is the sam e

	2872 * as the string buffer itself and will in this case not copy the contents.

	2873 *

	2874 * @param buffer The characters to alias for the UnicodeString.

	2875 * @param buffLength The number of Unicode characters in <code>buffer</code> t o alias.

	2876 * @param buffCapacity The size of <code>buffer</code> in UChars.

	2877 * @stable ICU 2.0

	2878 */

	2879 UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);

	2880

	2881 #if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION

	2882

	2883 /**

	2884 * char* constructor.

	2885 * @param codepageData an array of bytes, null-terminated,

	2886 * in the platform's default codepage.

	2887 * @stable ICU 2.0

	2888 */

	2889 UnicodeString(const char *codepageData);

	2890

	2891 /**

	2892 * char* constructor.

	2893 * @param codepageData an array of bytes in the platform's default codepage.

	2894 * @param dataLength The number of bytes in <TT>codepageData</TT>.

	2895 * @stable ICU 2.0

	2896 */

	2897 UnicodeString(const char *codepageData, int32_t dataLength);

	2898

	2899 #endif

	2900

	2901 #if !UCONFIG_NO_CONVERSION

	2902

	2903 /**

	2904 * char* constructor.

	2905 * @param codepageData an array of bytes, null-terminated

	2906 * @param codepage the encoding of <TT>codepageData</TT>. The special

	2907 * value 0 for <TT>codepage</TT> indicates that the text is in the

	2908 * platform's default codepage.

	2909 *

	2910 * If <code>codepage</code> is an empty string (<code>""</code>),

	2911 * then a simple conversion is performed on the codepage-invariant

	2912 * subset ("invariant characters") of the platform encoding. See utypes.h.

	2913 * Recommendation: For invariant-character strings use the constructor

	2914 * UnicodeString(const char *src, int32_t length, enum EInvariant inv)

	2915 * because it avoids object code dependencies of UnicodeString on

	2916 * the conversion code.

	2917 *

	2918 * @stable ICU 2.0

	2919 */

	2920 UnicodeString(const char codepageData, const char codepage);

	2921

	2922 /**

	2923 * char* constructor.

	2924 * @param codepageData an array of bytes.

	2925 * @param dataLength The number of bytes in <TT>codepageData</TT>.

	2926 * @param codepage the encoding of <TT>codepageData</TT>. The special

	2927 * value 0 for <TT>codepage</TT> indicates that the text is in the

	2928 * platform's default codepage.

	2929 * If <code>codepage</code> is an empty string (<code>""</code>),

	2930 * then a simple conversion is performed on the codepage-invariant

	2931 * subset ("invariant characters") of the platform encoding. See utypes.h.

	2932 * Recommendation: For invariant-character strings use the constructor

	2933 * UnicodeString(const char *src, int32_t length, enum EInvariant inv)

	2934 * because it avoids object code dependencies of UnicodeString on

	2935 * the conversion code.

	2936 *

	2937 * @stable ICU 2.0

	2938 */

	2939 UnicodeString(const char codepageData, int32_t dataLength, const char codepa ge);

	2940

	2941 /**

	2942 * char * / UConverter constructor.

	2943 * This constructor uses an existing UConverter object to

	2944 * convert the codepage string to Unicode and construct a UnicodeString

	2945 * from that.

	2946 *

	2947 * The converter is reset at first.

	2948 * If the error code indicates a failure before this constructor is called,

	2949 * or if an error occurs during conversion or construction,

	2950 * then the string will be bogus.

	2951 *

	2952 * This function avoids the overhead of opening and closing a converter if

	2953 * multiple strings are constructed.

	2954 *

	2955 * @param src input codepage string

	2956 * @param srcLength length of the input string, can be -1 for NUL-terminated s trings

	2957 * @param cnv converter object (ucnv_resetToUnicode() will be called),

	2958 * can be NULL for the default converter

	2959 * @param errorCode normal ICU error code

	2960 * @stable ICU 2.0

	2961 */

	2962 UnicodeString(

	2963 const char *src, int32_t srcLength,

	2964 UConverter *cnv,

	2965 UErrorCode &errorCode);

	2966

	2967 #endif

	2968

	2969 /**

	2970 * Constructs a Unicode string from an invariant-character char * string.

	2971 * About invariant characters see utypes.h.

	2972 * This constructor has no runtime dependency on conversion code and is

	2973 * therefore recommended over ones taking a charset name string

	2974 * (where the empty string "" indicates invariant-character conversion).

	2975 *

	2976 * Use the macro US_INV as the third, signature-distinguishing parameter.

	2977 *

	2978 * For example:

	2979 * \code

	2980 * void fn(const char *s) {

	2981 * UnicodeString ustr(s, -1, US_INV);

	2982 * // use ustr ...

	2983 * }

	2984 * \endcode

	2985 *

	2986 * @param src String using only invariant characters.

	2987 * @param length Length of src, or -1 if NUL-terminated.

	2988 * @param inv Signature-distinguishing paramater, use US_INV.

	2989 *

	2990 * @see US_INV

	2991 * @stable ICU 3.2

	2992 */

	2993 UnicodeString(const char *src, int32_t length, enum EInvariant inv);

	2994

	2995

	2996 /**

	2997 * Copy constructor.

	2998 * @param that The UnicodeString object to copy.

	2999 * @stable ICU 2.0

	3000 */

	3001 UnicodeString(const UnicodeString& that);

	3002

	3003 /**

	3004 * 'Substring' constructor from tail of source string.

	3005 * @param src The UnicodeString object to copy.

	3006 * @param srcStart The offset into <tt>src</tt> at which to start copying.

	3007 * @stable ICU 2.2

	3008 */

	3009 UnicodeString(const UnicodeString& src, int32_t srcStart);

	3010

	3011 /**

	3012 * 'Substring' constructor from subrange of source string.

	3013 * @param src The UnicodeString object to copy.

	3014 * @param srcStart The offset into <tt>src</tt> at which to start copying.

	3015 * @param srcLength The number of characters from <tt>src</tt> to copy.

	3016 * @stable ICU 2.2

	3017 */

	3018 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);

	3019

	3020 /**

	3021 * Clone this object, an instance of a subclass of Replaceable.

	3022 * Clones can be used concurrently in multiple threads.

	3023 * If a subclass does not implement clone(), or if an error occurs,

	3024 * then NULL is returned.

	3025 * The clone functions in all subclasses return a pointer to a Replaceable

	3026 * because some compilers do not support covariant (same-as-this)

	3027 * return types; cast to the appropriate subclass if necessary.

	3028 * The caller must delete the clone.

	3029 *

	3030 * @return a clone of this object

	3031 *

	3032 * @see Replaceable::clone

	3033 * @see getDynamicClassID

	3034 * @stable ICU 2.6

	3035 */

	3036 virtual Replaceable *clone() const;

	3037

	3038 /** Destructor.

	3039 * @stable ICU 2.0

	3040 */

	3041 virtual ~UnicodeString();

	3042

	3043 /**

	3044 * Create a UnicodeString from a UTF-8 string.

	3045 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.

	3046 * Calls u_strFromUTF8WithSub().

	3047 *

	3048 * @param utf8 UTF-8 input string.

	3049 * Note that a StringPiece can be implicitly constructed

	3050 * from a std::string or a NUL-terminated const char * string.

	3051 * @return A UnicodeString with equivalent UTF-16 contents.

	3052 * @see toUTF8

	3053 * @see toUTF8String

	3054 * @stable ICU 4.2

	3055 */

	3056 static UnicodeString fromUTF8(const StringPiece &utf8);

	3057

	3058 /**

	3059 * Create a UnicodeString from a UTF-32 string.

	3060 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.

	3061 * Calls u_strFromUTF32WithSub().

	3062 *

	3063 * @param utf32 UTF-32 input string. Must not be NULL.

	3064 * @param length Length of the input string, or -1 if NUL-terminated.

	3065 * @return A UnicodeString with equivalent UTF-16 contents.

	3066 * @see toUTF32

	3067 * @stable ICU 4.2

	3068 */

	3069 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);

	3070

	3071 /* Miscellaneous operations */

	3072

	3073 /**

	3074 * Unescape a string of characters and return a string containing

	3075 * the result. The following escape sequences are recognized:

	3076 *

	3077 * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]

	3078 * \\Uhhhhhhhh 8 hex digits

	3079 * \\xhh 1-2 hex digits

	3080 * \\ooo 1-3 octal digits; o in [0-7]

	3081 * \\cX control-X; X is masked with 0x1F

	3082 *

	3083 * as well as the standard ANSI C escapes:

	3084 *

	3085 * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,

	3086 * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,

	3087 * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C

	3088 *

	3089 * Anything else following a backslash is generically escaped. For

	3090 * example, "[a\\-z]" returns "[a-z]".

	3091 *

	3092 * If an escape sequence is ill-formed, this method returns an empty

	3093 * string. An example of an ill-formed sequence is "\\u" followed by

	3094 * fewer than 4 hex digits.

	3095 *

	3096 * This function is similar to u_unescape() but not identical to it.

	3097 * The latter takes a source char*, so it does escape recognition

	3098 * and also invariant conversion.

	3099 *

	3100 * @return a string with backslash escapes interpreted, or an

	3101 * empty string on error.

	3102 * @see UnicodeString#unescapeAt()

	3103 * @see u_unescape()

	3104 * @see u_unescapeAt()

	3105 * @stable ICU 2.0

	3106 */

	3107 UnicodeString unescape() const;

	3108

	3109 /**

	3110 * Unescape a single escape sequence and return the represented

	3111 * character. See unescape() for a listing of the recognized escape

	3112 * sequences. The character at offset-1 is assumed (without

	3113 * checking) to be a backslash. If the escape sequence is

	3114 * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is

	3115 * returned.

	3116 *

	3117 * @param offset an input output parameter. On input, it is the

	3118 * offset into this string where the escape sequence is located,

	3119 * after the initial backslash. On output, it is advanced after the

	3120 * last character parsed. On error, it is not advanced at all.

	3121 * @return the character represented by the escape sequence at

	3122 * offset, or (UChar32)0xFFFFFFFF on error.

	3123 * @see UnicodeString#unescape()

	3124 * @see u_unescape()

	3125 * @see u_unescapeAt()

	3126 * @stable ICU 2.0

	3127 */

	3128 UChar32 unescapeAt(int32_t &offset) const;

	3129

	3130 /**

	3131 * ICU "poor man's RTTI", returns a UClassID for this class.

	3132 *

	3133 * @stable ICU 2.2

	3134 */

	3135 static UClassID U_EXPORT2 getStaticClassID();

	3136

	3137 /**

	3138 * ICU "poor man's RTTI", returns a UClassID for the actual class.

	3139 *

	3140 * @stable ICU 2.2

	3141 */

	3142 virtual UClassID getDynamicClassID() const;

	3143

	3144 //========================================

	3145 // Implementation methods

	3146 //========================================

	3147

	3148 protected:

	3149 /**

	3150 * Implement Replaceable::getLength() (see jitterbug 1027).

	3151 * @stable ICU 2.4

	3152 */

	3153 virtual int32_t getLength() const;

	3154

	3155 /**

	3156 * The change in Replaceable to use virtual getCharAt() allows

	3157 * UnicodeString::charAt() to be inline again (see jitterbug 709).

	3158 * @stable ICU 2.4

	3159 */

	3160 virtual UChar getCharAt(int32_t offset) const;

	3161

	3162 /**

	3163 * The change in Replaceable to use virtual getChar32At() allows

	3164 * UnicodeString::char32At() to be inline again (see jitterbug 709).

	3165 * @stable ICU 2.4

	3166 */

	3167 virtual UChar32 getChar32At(int32_t offset) const;

	3168

	3169 private:

	3170 // For char* constructors. Could be made public.

	3171 UnicodeString &setToUTF8(const StringPiece &utf8);

	3172 // For extract(char*).

	3173 // We could make a toUTF8(target, capacity, errorCode) public but not

	3174 // this version: New API will be cleaner if we make callers create substrings

	3175 // rather than having start+length on every method,

	3176 // and it should take a UErrorCode&.

	3177 int32_t

	3178 toUTF8(int32_t start, int32_t len,

	3179 char *target, int32_t capacity) const;

	3180

	3181

	3182 inline int8_t

	3183 doCompare(int32_t start,

	3184 int32_t length,

	3185 const UnicodeString& srcText,

	3186 int32_t srcStart,

	3187 int32_t srcLength) const;

	3188

	3189 int8_t doCompare(int32_t start,

	3190 int32_t length,

	3191 const UChar *srcChars,

	3192 int32_t srcStart,

	3193 int32_t srcLength) const;

	3194

	3195 inline int8_t

	3196 doCompareCodePointOrder(int32_t start,

	3197 int32_t length,

	3198 const UnicodeString& srcText,

	3199 int32_t srcStart,

	3200 int32_t srcLength) const;

	3201

	3202 int8_t doCompareCodePointOrder(int32_t start,

	3203 int32_t length,

	3204 const UChar *srcChars,

	3205 int32_t srcStart,

	3206 int32_t srcLength) const;

	3207

	3208 inline int8_t

	3209 doCaseCompare(int32_t start,

	3210 int32_t length,

	3211 const UnicodeString &srcText,

	3212 int32_t srcStart,

	3213 int32_t srcLength,

	3214 uint32_t options) const;

	3215

	3216 int8_t

	3217 doCaseCompare(int32_t start,

	3218 int32_t length,

	3219 const UChar *srcChars,

	3220 int32_t srcStart,

	3221 int32_t srcLength,

	3222 uint32_t options) const;

	3223

	3224 int32_t doIndexOf(UChar c,

	3225 int32_t start,

	3226 int32_t length) const;

	3227

	3228 int32_t doIndexOf(UChar32 c,

	3229 int32_t start,

	3230 int32_t length) const;

	3231

	3232 int32_t doLastIndexOf(UChar c,

	3233 int32_t start,

	3234 int32_t length) const;

	3235

	3236 int32_t doLastIndexOf(UChar32 c,

	3237 int32_t start,

	3238 int32_t length) const;

	3239

	3240 void doExtract(int32_t start,

	3241 int32_t length,

	3242 UChar *dst,

	3243 int32_t dstStart) const;

	3244

	3245 inline void doExtract(int32_t start,

	3246 int32_t length,

	3247 UnicodeString& target) const;

	3248

	3249 inline UChar doCharAt(int32_t offset) const;

	3250

	3251 UnicodeString& doReplace(int32_t start,

	3252 int32_t length,

	3253 const UnicodeString& srcText,

	3254 int32_t srcStart,

	3255 int32_t srcLength);

	3256

	3257 UnicodeString& doReplace(int32_t start,

	3258 int32_t length,

	3259 const UChar *srcChars,

	3260 int32_t srcStart,

	3261 int32_t srcLength);

	3262

	3263 UnicodeString& doReverse(int32_t start,

	3264 int32_t length);

	3265

	3266 // calculate hash code

	3267 int32_t doHashCode(void) const;

	3268

	3269 // get pointer to start of array

	3270 // these do not check for kOpenGetBuffer, unlike the public getBuffer() functi on

	3271 inline UChar* getArrayStart(void);

	3272 inline const UChar* getArrayStart(void) const;

	3273

	3274 // A UnicodeString object (not necessarily its current buffer)

	3275 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).

	3276 inline UBool isWritable() const;

	3277

	3278 // Is the current buffer writable?

	3279 inline UBool isBufferWritable() const;

	3280

	3281 // None of the following does releaseArray().

	3282 inline void setLength(int32_t len); // sets only fShortLength and fLeng th

	3283 inline void setToEmpty(); // sets fFlags=kShortString

	3284 inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags

	3285

	3286 // allocate the array; result may be fStackBuffer

	3287 // sets refCount to 1 if appropriate

	3288 // sets fArray, fCapacity, and fFlags

	3289 // returns boolean for success or failure

	3290 UBool allocate(int32_t capacity);

	3291

	3292 // release the array if owned

	3293 void releaseArray(void);

	3294

	3295 // turn a bogus string into an empty one

	3296 void unBogus();

	3297

	3298 // implements assigment operator, copy constructor, and fastCopyFrom()

	3299 UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);

	3300

	3301 // Pin start and limit to acceptable values.

	3302 inline void pinIndex(int32_t& start) const;

	3303 inline void pinIndices(int32_t& start,

	3304 int32_t& length) const;

	3305

	3306 #if !UCONFIG_NO_CONVERSION

	3307

	3308 /* Internal extract() using UConverter. */

	3309 int32_t doExtract(int32_t start, int32_t length,

	3310 char *dest, int32_t destCapacity,

	3311 UConverter *cnv,

	3312 UErrorCode &errorCode) const;

	3313

	3314 /*

	3315 * Real constructor for converting from codepage data.

	3316 * It assumes that it is called with !fRefCounted.

	3317 *

	3318 * If <code>codepage==0</code>, then the default converter

	3319 * is used for the platform encoding.

	3320 * If <code>codepage</code> is an empty string (<code>""</code>),

	3321 * then a simple conversion is performed on the codepage-invariant

	3322 * subset ("invariant characters") of the platform encoding. See utypes.h.

	3323 */

	3324 void doCodepageCreate(const char *codepageData,

	3325 int32_t dataLength,

	3326 const char *codepage);

	3327

	3328 /*

	3329 * Worker function for creating a UnicodeString from

	3330 * a codepage string using a UConverter.

	3331 */

	3332 void

	3333 doCodepageCreate(const char *codepageData,

	3334 int32_t dataLength,

	3335 UConverter *converter,

	3336 UErrorCode &status);

	3337

	3338 #endif

	3339

	3340 /*

	3341 * This function is called when write access to the array

	3342 * is necessary.

	3343 *

	3344 * We need to make a copy of the array if

	3345 * the buffer is read-only, or

	3346 * the buffer is refCounted (shared), and refCount>1, or

	3347 * the buffer is too small.

	3348 *

	3349 * Return FALSE if memory could not be allocated.

	3350 */

	3351 UBool cloneArrayIfNeeded(int32_t newCapacity = -1,

	3352 int32_t growCapacity = -1,

	3353 UBool doCopyArray = TRUE,

	3354 int32_t **pBufferToDelete = 0,

	3355 UBool forceClone = FALSE);

	3356

	3357 // common function for case mappings

	3358 UnicodeString &

	3359 caseMap(BreakIterator *titleIter,

	3360 const char *locale,

	3361 uint32_t options,

	3362 int32_t toWhichCase);

	3363

	3364 // ref counting

	3365 void addRef(void);

	3366 int32_t removeRef(void);

	3367 int32_t refCount(void) const;

	3368

	3369 // constants

	3370 enum {

	3371 // Set the stack buffer size so that sizeof(UnicodeString) is a multiple of sizeof(pointer):

	3372 // 32-bit pointers: 4+1+1+13*2 = 32 bytes

	3373 // 64-bit pointers: 8+1+1+15*2 = 40 bytes

	3374 US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for s mall strings

	3375 kInvalidUChar=0xffff, // invalid UChar index

	3376 kGrowSize=128, // grow size for this buffer

	3377 kInvalidHashCode=0, // invalid hash code

	3378 kEmptyHashCode=1, // hash code for empty string

	3379

	3380 // bit flag values for fFlags

	3381 kIsBogus=1, // this string is bogus, i.e., not valid or NULL

	3382 kUsingStackBuffer=2,// fArray==fStackBuffer

	3383 kRefCounted=4, // there is a refCount field before the characters in fA rray

	3384 kBufferIsReadonly=8,// do not write to this buffer

	3385 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),

	3386 // and releaseBuffer(newLength) must be called

	3387

	3388 // combined values for convenience

	3389 kShortString=kUsingStackBuffer,

	3390 kLongString=kRefCounted,

	3391 kReadonlyAlias=kBufferIsReadonly,

	3392 kWritableAlias=0

	3393 };

	3394

	3395 friend class StringThreadTest;

	3396

	3397 union StackBufferOrFields; // forward declaration necessary before frie nd declaration

	3398 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUni on

	3399

	3400 /*

	3401 * The following are all the class fields that are stored

	3402 * in each UnicodeString object.

	3403 * Note that UnicodeString has virtual functions,

	3404 * therefore there is an implicit vtable pointer

	3405 * as the first real field.

	3406 * The fields should be aligned such that no padding is

	3407 * necessary, mostly by having larger types first.

	3408 * On 32-bit machines, the size should be 32 bytes,

	3409 * on 64-bit machines (8-byte pointers), it should be 40 bytes.

	3410 */

	3411 // (implicit) *vtable;

	3412 int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFie lds.fLength

	3413 uint8_t fFlags; // bit flags: see constants above

	3414 union StackBufferOrFields {

	3415 // fStackBuffer is used iff (fFlags&kUsingStackBuffer)

	3416 // else fFields is used

	3417 UChar fStackBuffer [US_STACKBUF_SIZE]; // buffer for small strings

	3418 struct {

	3419 uint16_t fPadding; // align the following field at 8B (32b pointers) or 12B (64b)

	3420 int32_t fLength; // number of characters in fArray if >127; else unde fined

	3421 UChar *fArray; // the Unicode data (aligned at 12B (32b pointers) o r 16B (64b))

	3422 int32_t fCapacity; // sizeof fArray

	3423 } fFields;

	3424 } fUnion;

	3425 };

	3426

	3427 /**

	3428 * Create a new UnicodeString with the concatenation of two others.

	3429 *

	3430 * @param s1 The first string to be copied to the new one.

	3431 * @param s2 The second string to be copied to the new one, after s1.

	3432 * @return UnicodeString(s1).append(s2)

	3433 * @stable ICU 2.8

	3434 */

	3435 U_COMMON_API UnicodeString U_EXPORT2

	3436 operator+ (const UnicodeString &s1, const UnicodeString &s2);

	3437

	3438 //========================================

	3439 // Inline members

	3440 //========================================

	3441

	3442 //========================================

	3443 // Privates

	3444 //========================================

	3445

	3446 inline void

	3447 UnicodeString::pinIndex(int32_t& start) const

	3448 {

	3449 // pin index

	3450 if(start < 0) {

	3451 start = 0;

	3452 } else if(start > length()) {

	3453 start = length();

	3454 }

	3455 }

	3456

	3457 inline void

	3458 UnicodeString::pinIndices(int32_t& start,

	3459 int32_t& _length) const

	3460 {

	3461 // pin indices

	3462 int32_t len = length();

	3463 if(start < 0) {

	3464 start = 0;

	3465 } else if(start > len) {

	3466 start = len;

	3467 }

	3468 if(_length < 0) {

	3469 _length = 0;

	3470 } else if(_length > (len - start)) {

	3471 _length = (len - start);

	3472 }

	3473 }

	3474

	3475 inline UChar*

	3476 UnicodeString::getArrayStart()

	3477 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArra y; }

	3478

	3479 inline const UChar*

	3480 UnicodeString::getArrayStart() const

	3481 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArra y; }

	3482

	3483 //========================================

	3484 // Read-only implementation methods

	3485 //========================================

	3486 inline int32_t

	3487 UnicodeString::length() const

	3488 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }

	3489

	3490 inline int32_t

	3491 UnicodeString::getCapacity() const

	3492 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacit y; }

	3493

	3494 inline int32_t

	3495 UnicodeString::hashCode() const

	3496 { return doHashCode(); }

	3497

	3498 inline UBool

	3499 UnicodeString::isBogus() const

	3500 { return (UBool)(fFlags & kIsBogus); }

	3501

	3502 inline UBool

	3503 UnicodeString::isWritable() const

	3504 { return (UBool)!(fFlags&(kOpenGetBuffer\|kIsBogus)); }

	3505

	3506 inline UBool

	3507 UnicodeString::isBufferWritable() const

	3508 {

	3509 return (UBool)(

	3510 !(fFlags&(kOpenGetBuffer\|kIsBogus\|kBufferIsReadonly)) &&

	3511 (!(fFlags&kRefCounted) \|\| refCount()==1));

	3512 }

	3513

	3514 inline const UChar *

	3515 UnicodeString::getBuffer() const {

	3516 if(fFlags&(kIsBogus\|kOpenGetBuffer)) {

	3517 return 0;

	3518 } else if(fFlags&kUsingStackBuffer) {

	3519 return fUnion.fStackBuffer;

	3520 } else {

	3521 return fUnion.fFields.fArray;

	3522 }

	3523 }

	3524

	3525 //========================================

	3526 // Read-only alias methods

	3527 //========================================

	3528 inline int8_t

	3529 UnicodeString::doCompare(int32_t start,

	3530 int32_t thisLength,

	3531 const UnicodeString& srcText,

	3532 int32_t srcStart,

	3533 int32_t srcLength) const

	3534 {

	3535 if(srcText.isBogus()) {

	3536 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise

	3537 } else {

	3538 srcText.pinIndices(srcStart, srcLength);

	3539 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLe ngth);

	3540 }

	3541 }

	3542

	3543 inline UBool

	3544 UnicodeString::operator== (const UnicodeString& text) const

	3545 {

	3546 if(isBogus()) {

	3547 return text.isBogus();

	3548 } else {

	3549 int32_t len = length(), textLength = text.length();

	3550 return

	3551 !text.isBogus() &&

	3552 len == textLength &&

	3553 doCompare(0, len, text, 0, textLength) == 0;

	3554 }

	3555 }

	3556

	3557 inline UBool

	3558 UnicodeString::operator!= (const UnicodeString& text) const

	3559 { return (! operator==(text)); }

	3560

	3561 inline UBool

	3562 UnicodeString::operator> (const UnicodeString& text) const

	3563 { return doCompare(0, length(), text, 0, text.length()) == 1; }

	3564

	3565 inline UBool

	3566 UnicodeString::operator< (const UnicodeString& text) const

	3567 { return doCompare(0, length(), text, 0, text.length()) == -1; }

	3568

	3569 inline UBool

	3570 UnicodeString::operator>= (const UnicodeString& text) const

	3571 { return doCompare(0, length(), text, 0, text.length()) != -1; }

	3572

	3573 inline UBool

	3574 UnicodeString::operator<= (const UnicodeString& text) const

	3575 { return doCompare(0, length(), text, 0, text.length()) != 1; }

	3576

	3577 inline int8_t

	3578 UnicodeString::compare(const UnicodeString& text) const

	3579 { return doCompare(0, length(), text, 0, text.length()); }

	3580

	3581 inline int8_t

	3582 UnicodeString::compare(int32_t start,

	3583 int32_t _length,

	3584 const UnicodeString& srcText) const

	3585 { return doCompare(start, _length, srcText, 0, srcText.length()); }

	3586

	3587 inline int8_t

	3588 UnicodeString::compare(const UChar *srcChars,

	3589 int32_t srcLength) const

	3590 { return doCompare(0, length(), srcChars, 0, srcLength); }

	3591

	3592 inline int8_t

	3593 UnicodeString::compare(int32_t start,

	3594 int32_t _length,

	3595 const UnicodeString& srcText,

	3596 int32_t srcStart,

	3597 int32_t srcLength) const

	3598 { return doCompare(start, _length, srcText, srcStart, srcLength); }

	3599

	3600 inline int8_t

	3601 UnicodeString::compare(int32_t start,

	3602 int32_t _length,

	3603 const UChar *srcChars) const

	3604 { return doCompare(start, _length, srcChars, 0, _length); }

	3605

	3606 inline int8_t

	3607 UnicodeString::compare(int32_t start,

	3608 int32_t _length,

	3609 const UChar *srcChars,

	3610 int32_t srcStart,

	3611 int32_t srcLength) const

	3612 { return doCompare(start, _length, srcChars, srcStart, srcLength); }

	3613

	3614 inline int8_t

	3615 UnicodeString::compareBetween(int32_t start,

	3616 int32_t limit,

	3617 const UnicodeString& srcText,

	3618 int32_t srcStart,

	3619 int32_t srcLimit) const

	3620 { return doCompare(start, limit - start,

	3621 srcText, srcStart, srcLimit - srcStart); }

	3622

	3623 inline int8_t

	3624 UnicodeString::doCompareCodePointOrder(int32_t start,

	3625 int32_t thisLength,

	3626 const UnicodeString& srcText,

	3627 int32_t srcStart,

	3628 int32_t srcLength) const

	3629 {

	3630 if(srcText.isBogus()) {

	3631 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise

	3632 } else {

	3633 srcText.pinIndices(srcStart, srcLength);

	3634 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), s rcStart, srcLength);

	3635 }

	3636 }

	3637

	3638 inline int8_t

	3639 UnicodeString::compareCodePointOrder(const UnicodeString& text) const

	3640 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }

	3641

	3642 inline int8_t

	3643 UnicodeString::compareCodePointOrder(int32_t start,

	3644 int32_t _length,

	3645 const UnicodeString& srcText) const

	3646 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }

	3647

	3648 inline int8_t

	3649 UnicodeString::compareCodePointOrder(const UChar *srcChars,

	3650 int32_t srcLength) const

	3651 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }

	3652

	3653 inline int8_t

	3654 UnicodeString::compareCodePointOrder(int32_t start,

	3655 int32_t _length,

	3656 const UnicodeString& srcText,

	3657 int32_t srcStart,

	3658 int32_t srcLength) const

	3659 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }

	3660

	3661 inline int8_t

	3662 UnicodeString::compareCodePointOrder(int32_t start,

	3663 int32_t _length,

	3664 const UChar *srcChars) const

	3665 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }

	3666

	3667 inline int8_t

	3668 UnicodeString::compareCodePointOrder(int32_t start,

	3669 int32_t _length,

	3670 const UChar *srcChars,

	3671 int32_t srcStart,

	3672 int32_t srcLength) const

	3673 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }

	3674

	3675 inline int8_t

	3676 UnicodeString::compareCodePointOrderBetween(int32_t start,

	3677 int32_t limit,

	3678 const UnicodeString& srcText,

	3679 int32_t srcStart,

	3680 int32_t srcLimit) const

	3681 { return doCompareCodePointOrder(start, limit - start,

	3682 srcText, srcStart, srcLimit - srcStart); }

	3683

	3684 inline int8_t

	3685 UnicodeString::doCaseCompare(int32_t start,

	3686 int32_t thisLength,

	3687 const UnicodeString &srcText,

	3688 int32_t srcStart,

	3689 int32_t srcLength,

	3690 uint32_t options) const

	3691 {

	3692 if(srcText.isBogus()) {

	3693 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise

	3694 } else {

	3695 srcText.pinIndices(srcStart, srcLength);

	3696 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, s rcLength, options);

	3697 }

	3698 }

	3699

	3700 inline int8_t

	3701 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {

	3702 return doCaseCompare(0, length(), text, 0, text.length(), options);

	3703 }

	3704

	3705 inline int8_t

	3706 UnicodeString::caseCompare(int32_t start,

	3707 int32_t _length,

	3708 const UnicodeString &srcText,

	3709 uint32_t options) const {

	3710 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);

	3711 }

	3712

	3713 inline int8_t

	3714 UnicodeString::caseCompare(const UChar *srcChars,

	3715 int32_t srcLength,

	3716 uint32_t options) const {

	3717 return doCaseCompare(0, length(), srcChars, 0, srcLength, options);

	3718 }

	3719

	3720 inline int8_t

	3721 UnicodeString::caseCompare(int32_t start,

	3722 int32_t _length,

	3723 const UnicodeString &srcText,

	3724 int32_t srcStart,

	3725 int32_t srcLength,

	3726 uint32_t options) const {

	3727 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);

	3728 }

	3729

	3730 inline int8_t

	3731 UnicodeString::caseCompare(int32_t start,

	3732 int32_t _length,

	3733 const UChar *srcChars,

	3734 uint32_t options) const {

	3735 return doCaseCompare(start, _length, srcChars, 0, _length, options);

	3736 }

	3737

	3738 inline int8_t

	3739 UnicodeString::caseCompare(int32_t start,

	3740 int32_t _length,

	3741 const UChar *srcChars,

	3742 int32_t srcStart,

	3743 int32_t srcLength,

	3744 uint32_t options) const {

	3745 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);

	3746 }

	3747

	3748 inline int8_t

	3749 UnicodeString::caseCompareBetween(int32_t start,

	3750 int32_t limit,

	3751 const UnicodeString &srcText,

	3752 int32_t srcStart,

	3753 int32_t srcLimit,

	3754 uint32_t options) const {

	3755 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcSt art, options);

	3756 }

	3757

	3758 inline int32_t

	3759 UnicodeString::indexOf(const UnicodeString& srcText,

	3760 int32_t srcStart,

	3761 int32_t srcLength,

	3762 int32_t start,

	3763 int32_t _length) const

	3764 {

	3765 if(!srcText.isBogus()) {

	3766 srcText.pinIndices(srcStart, srcLength);

	3767 if(srcLength > 0) {

	3768 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _lengt h);

	3769 }

	3770 }

	3771 return -1;

	3772 }

	3773

	3774 inline int32_t

	3775 UnicodeString::indexOf(const UnicodeString& text) const

	3776 { return indexOf(text, 0, text.length(), 0, length()); }

	3777

	3778 inline int32_t

	3779 UnicodeString::indexOf(const UnicodeString& text,

	3780 int32_t start) const {

	3781 pinIndex(start);

	3782 return indexOf(text, 0, text.length(), start, length() - start);

	3783 }

	3784

	3785 inline int32_t

	3786 UnicodeString::indexOf(const UnicodeString& text,

	3787 int32_t start,

	3788 int32_t _length) const

	3789 { return indexOf(text, 0, text.length(), start, _length); }

	3790

	3791 inline int32_t

	3792 UnicodeString::indexOf(const UChar *srcChars,

	3793 int32_t srcLength,

	3794 int32_t start) const {

	3795 pinIndex(start);

	3796 return indexOf(srcChars, 0, srcLength, start, length() - start);

	3797 }

	3798

	3799 inline int32_t

	3800 UnicodeString::indexOf(const UChar *srcChars,

	3801 int32_t srcLength,

	3802 int32_t start,

	3803 int32_t _length) const

	3804 { return indexOf(srcChars, 0, srcLength, start, _length); }

	3805

	3806 inline int32_t

	3807 UnicodeString::indexOf(UChar c,

	3808 int32_t start,

	3809 int32_t _length) const

	3810 { return doIndexOf(c, start, _length); }

	3811

	3812 inline int32_t

	3813 UnicodeString::indexOf(UChar32 c,

	3814 int32_t start,

	3815 int32_t _length) const

	3816 { return doIndexOf(c, start, _length); }

	3817

	3818 inline int32_t

	3819 UnicodeString::indexOf(UChar c) const

	3820 { return doIndexOf(c, 0, length()); }

	3821

	3822 inline int32_t

	3823 UnicodeString::indexOf(UChar32 c) const

	3824 { return indexOf(c, 0, length()); }

	3825

	3826 inline int32_t

	3827 UnicodeString::indexOf(UChar c,

	3828 int32_t start) const {

	3829 pinIndex(start);

	3830 return doIndexOf(c, start, length() - start);

	3831 }

	3832

	3833 inline int32_t

	3834 UnicodeString::indexOf(UChar32 c,

	3835 int32_t start) const {

	3836 pinIndex(start);

	3837 return indexOf(c, start, length() - start);

	3838 }

	3839

	3840 inline int32_t

	3841 UnicodeString::lastIndexOf(const UChar *srcChars,

	3842 int32_t srcLength,

	3843 int32_t start,

	3844 int32_t _length) const

	3845 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }

	3846

	3847 inline int32_t

	3848 UnicodeString::lastIndexOf(const UChar *srcChars,

	3849 int32_t srcLength,

	3850 int32_t start) const {

	3851 pinIndex(start);

	3852 return lastIndexOf(srcChars, 0, srcLength, start, length() - start);

	3853 }

	3854

	3855 inline int32_t

	3856 UnicodeString::lastIndexOf(const UnicodeString& srcText,

	3857 int32_t srcStart,

	3858 int32_t srcLength,

	3859 int32_t start,

	3860 int32_t _length) const

	3861 {

	3862 if(!srcText.isBogus()) {

	3863 srcText.pinIndices(srcStart, srcLength);

	3864 if(srcLength > 0) {

	3865 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _l ength);

	3866 }

	3867 }

	3868 return -1;

	3869 }

	3870

	3871 inline int32_t

	3872 UnicodeString::lastIndexOf(const UnicodeString& text,

	3873 int32_t start,

	3874 int32_t _length) const

	3875 { return lastIndexOf(text, 0, text.length(), start, _length); }

	3876

	3877 inline int32_t

	3878 UnicodeString::lastIndexOf(const UnicodeString& text,

	3879 int32_t start) const {

	3880 pinIndex(start);

	3881 return lastIndexOf(text, 0, text.length(), start, length() - start);

	3882 }

	3883

	3884 inline int32_t

	3885 UnicodeString::lastIndexOf(const UnicodeString& text) const

	3886 { return lastIndexOf(text, 0, text.length(), 0, length()); }

	3887

	3888 inline int32_t

	3889 UnicodeString::lastIndexOf(UChar c,

	3890 int32_t start,

	3891 int32_t _length) const

	3892 { return doLastIndexOf(c, start, _length); }

	3893

	3894 inline int32_t

	3895 UnicodeString::lastIndexOf(UChar32 c,

	3896 int32_t start,

	3897 int32_t _length) const {

	3898 return doLastIndexOf(c, start, _length);

	3899 }

	3900

	3901 inline int32_t

	3902 UnicodeString::lastIndexOf(UChar c) const

	3903 { return doLastIndexOf(c, 0, length()); }

	3904

	3905 inline int32_t

	3906 UnicodeString::lastIndexOf(UChar32 c) const {

	3907 return lastIndexOf(c, 0, length());

	3908 }

	3909

	3910 inline int32_t

	3911 UnicodeString::lastIndexOf(UChar c,

	3912 int32_t start) const {

	3913 pinIndex(start);

	3914 return doLastIndexOf(c, start, length() - start);

	3915 }

	3916

	3917 inline int32_t

	3918 UnicodeString::lastIndexOf(UChar32 c,

	3919 int32_t start) const {

	3920 pinIndex(start);

	3921 return lastIndexOf(c, start, length() - start);

	3922 }

	3923

	3924 inline UBool

	3925 UnicodeString::startsWith(const UnicodeString& text) const

	3926 { return compare(0, text.length(), text, 0, text.length()) == 0; }

	3927

	3928 inline UBool

	3929 UnicodeString::startsWith(const UnicodeString& srcText,

	3930 int32_t srcStart,

	3931 int32_t srcLength) const

	3932 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }

	3933

	3934 inline UBool

	3935 UnicodeString::startsWith(const UChar *srcChars,

	3936 int32_t srcLength) const

	3937 { return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; }

	3938

	3939 inline UBool

	3940 UnicodeString::startsWith(const UChar *srcChars,

	3941 int32_t srcStart,

	3942 int32_t srcLength) const

	3943 { return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;}

	3944

	3945 inline UBool

	3946 UnicodeString::endsWith(const UnicodeString& text) const

	3947 { return doCompare(length() - text.length(), text.length(),

	3948 text, 0, text.length()) == 0; }

	3949

	3950 inline UBool

	3951 UnicodeString::endsWith(const UnicodeString& srcText,

	3952 int32_t srcStart,

	3953 int32_t srcLength) const {

	3954 srcText.pinIndices(srcStart, srcLength);

	3955 return doCompare(length() - srcLength, srcLength,

	3956 srcText, srcStart, srcLength) == 0;

	3957 }

	3958

	3959 inline UBool

	3960 UnicodeString::endsWith(const UChar *srcChars,

	3961 int32_t srcLength) const {

	3962 if(srcLength < 0) {

	3963 srcLength = u_strlen(srcChars);

	3964 }

	3965 return doCompare(length() - srcLength, srcLength,

	3966 srcChars, 0, srcLength) == 0;

	3967 }

	3968

	3969 inline UBool

	3970 UnicodeString::endsWith(const UChar *srcChars,

	3971 int32_t srcStart,

	3972 int32_t srcLength) const {

	3973 if(srcLength < 0) {

	3974 srcLength = u_strlen(srcChars + srcStart);

	3975 }

	3976 return doCompare(length() - srcLength, srcLength,

	3977 srcChars, srcStart, srcLength) == 0;

	3978 }

	3979

	3980 //========================================

	3981 // replace

	3982 //========================================

	3983 inline UnicodeString&

	3984 UnicodeString::replace(int32_t start,

	3985 int32_t _length,

	3986 const UnicodeString& srcText)

	3987 { return doReplace(start, _length, srcText, 0, srcText.length()); }

	3988

	3989 inline UnicodeString&

	3990 UnicodeString::replace(int32_t start,

	3991 int32_t _length,

	3992 const UnicodeString& srcText,

	3993 int32_t srcStart,

	3994 int32_t srcLength)

	3995 { return doReplace(start, _length, srcText, srcStart, srcLength); }

	3996

	3997 inline UnicodeString&

	3998 UnicodeString::replace(int32_t start,

	3999 int32_t _length,

	4000 const UChar *srcChars,

	4001 int32_t srcLength)

	4002 { return doReplace(start, _length, srcChars, 0, srcLength); }

	4003

	4004 inline UnicodeString&

	4005 UnicodeString::replace(int32_t start,

	4006 int32_t _length,

	4007 const UChar *srcChars,

	4008 int32_t srcStart,

	4009 int32_t srcLength)

	4010 { return doReplace(start, _length, srcChars, srcStart, srcLength); }

	4011

	4012 inline UnicodeString&

	4013 UnicodeString::replace(int32_t start,

	4014 int32_t _length,

	4015 UChar srcChar)

	4016 { return doReplace(start, _length, &srcChar, 0, 1); }

	4017

	4018 inline UnicodeString&

	4019 UnicodeString::replace(int32_t start,

	4020 int32_t _length,

	4021 UChar32 srcChar) {

	4022 UChar buffer[U16_MAX_LENGTH];

	4023 int32_t count = 0;

	4024 UBool isError = FALSE;

	4025 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);

	4026 return doReplace(start, _length, buffer, 0, count);

	4027 }

	4028

	4029 inline UnicodeString&

	4030 UnicodeString::replaceBetween(int32_t start,

	4031 int32_t limit,

	4032 const UnicodeString& srcText)

	4033 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }

	4034

	4035 inline UnicodeString&

	4036 UnicodeString::replaceBetween(int32_t start,

	4037 int32_t limit,

	4038 const UnicodeString& srcText,

	4039 int32_t srcStart,

	4040 int32_t srcLimit)

	4041 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart) ; }

	4042

	4043 inline UnicodeString&

	4044 UnicodeString::findAndReplace(const UnicodeString& oldText,

	4045 const UnicodeString& newText)

	4046 { return findAndReplace(0, length(), oldText, 0, oldText.length(),

	4047 newText, 0, newText.length()); }

	4048

	4049 inline UnicodeString&

	4050 UnicodeString::findAndReplace(int32_t start,

	4051 int32_t _length,

	4052 const UnicodeString& oldText,

	4053 const UnicodeString& newText)

	4054 { return findAndReplace(start, _length, oldText, 0, oldText.length(),

	4055 newText, 0, newText.length()); }

	4056

	4057 // ============================

	4058 // extract

	4059 // ============================

	4060 inline void

	4061 UnicodeString::doExtract(int32_t start,

	4062 int32_t _length,

	4063 UnicodeString& target) const

	4064 { target.replace(0, target.length(), *this, start, _length); }

	4065

	4066 inline void

	4067 UnicodeString::extract(int32_t start,

	4068 int32_t _length,

	4069 UChar *target,

	4070 int32_t targetStart) const

	4071 { doExtract(start, _length, target, targetStart); }

	4072

	4073 inline void

	4074 UnicodeString::extract(int32_t start,

	4075 int32_t _length,

	4076 UnicodeString& target) const

	4077 { doExtract(start, _length, target); }

	4078

	4079 #if !UCONFIG_NO_CONVERSION

	4080

	4081 inline int32_t

	4082 UnicodeString::extract(int32_t start,

	4083 int32_t _length,

	4084 char *dst,

	4085 const char *codepage) const

	4086

	4087 {

	4088 // This dstSize value will be checked explicitly

	4089 #if defined(__GNUC__)

	4090 // Ticket #7039: Clip length to the maximum valid length to the end of address able memory given the starting address

	4091 // This is only an issue when using GCC and certain optimizations are turned o n.

	4092 return extract(start, _length, dst, dst!=0 ? ((dst >= (char)((size_t)-1) - UI NT32_MAX) ? (((char)UINT32_MAX) - dst) : UINT32_MAX) : 0, codepage);

	4093 #else

	4094 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);

	4095 #endif

	4096 }

	4097

	4098 #endif

	4099

	4100 inline void

	4101 UnicodeString::extractBetween(int32_t start,

	4102 int32_t limit,

	4103 UChar *dst,

	4104 int32_t dstStart) const {

	4105 pinIndex(start);

	4106 pinIndex(limit);

	4107 doExtract(start, limit - start, dst, dstStart);

	4108 }

	4109

	4110 inline UnicodeString

	4111 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {

	4112 return tempSubString(start, limit - start);

	4113 }

	4114

	4115 inline UChar

	4116 UnicodeString::doCharAt(int32_t offset) const

	4117 {

	4118 if((uint32_t)offset < (uint32_t)length()) {

	4119 return getArrayStart()[offset];

	4120 } else {

	4121 return kInvalidUChar;

	4122 }

	4123 }

	4124

	4125 inline UChar

	4126 UnicodeString::charAt(int32_t offset) const

	4127 { return doCharAt(offset); }

	4128

	4129 inline UChar

	4130 UnicodeString::operator[] (int32_t offset) const

	4131 { return doCharAt(offset); }

	4132

	4133 inline UChar32

	4134 UnicodeString::char32At(int32_t offset) const

	4135 {

	4136 int32_t len = length();

	4137 if((uint32_t)offset < (uint32_t)len) {

	4138 const UChar *array = getArrayStart();

	4139 UChar32 c;

	4140 U16_GET(array, 0, offset, len, c);

	4141 return c;

	4142 } else {

	4143 return kInvalidUChar;

	4144 }

	4145 }

	4146

	4147 inline int32_t

	4148 UnicodeString::getChar32Start(int32_t offset) const {

	4149 if((uint32_t)offset < (uint32_t)length()) {

	4150 const UChar *array = getArrayStart();

	4151 U16_SET_CP_START(array, 0, offset);

	4152 return offset;

	4153 } else {

	4154 return 0;

	4155 }

	4156 }

	4157

	4158 inline int32_t

	4159 UnicodeString::getChar32Limit(int32_t offset) const {

	4160 int32_t len = length();

	4161 if((uint32_t)offset < (uint32_t)len) {

	4162 const UChar *array = getArrayStart();

	4163 U16_SET_CP_LIMIT(array, 0, offset, len);

	4164 return offset;

	4165 } else {

	4166 return len;

	4167 }

	4168 }

	4169

	4170 inline UBool

	4171 UnicodeString::isEmpty() const {

	4172 return fShortLength == 0;

	4173 }

	4174

	4175 //========================================

	4176 // Write implementation methods

	4177 //========================================

	4178 inline void

	4179 UnicodeString::setLength(int32_t len) {

	4180 if(len <= 127) {

	4181 fShortLength = (int8_t)len;

	4182 } else {

	4183 fShortLength = (int8_t)-1;

	4184 fUnion.fFields.fLength = len;

	4185 }

	4186 }

	4187

	4188 inline void

	4189 UnicodeString::setToEmpty() {

	4190 fShortLength = 0;

	4191 fFlags = kShortString;

	4192 }

	4193

	4194 inline void

	4195 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {

	4196 setLength(len);

	4197 fUnion.fFields.fArray = array;

	4198 fUnion.fFields.fCapacity = capacity;

	4199 }

	4200

	4201 inline const UChar *

	4202 UnicodeString::getTerminatedBuffer() {

	4203 if(!isWritable()) {

	4204 return 0;

	4205 } else {

	4206 UChar *array = getArrayStart();

	4207 int32_t len = length();

	4208 if(len < getCapacity() && ((fFlags&kRefCounted) == 0 \|\| refCount() == 1)) {

	4209 /*

	4210 * kRefCounted: Do not write the NUL if the buffer is shared.

	4211 * That is mostly safe, except when the length of one copy was modified

	4212 * without copy-on-write, e.g., via truncate(newLength) or remove(void).

	4213 * Then the NUL would be written into the middle of another copy's string.

	4214 */

	4215 if(!(fFlags&kBufferIsReadonly)) {

	4216 /*

	4217 * We must not write to a readonly buffer, but it is known to be

	4218 * NUL-terminated if len<capacity.

	4219 * A shared, allocated buffer (refCount()>1) must not have its contents

	4220 * modified, but the NUL at [len] is beyond the string contents,

	4221 * and multiple string objects and threads writing the same NUL into the

	4222 * same location is harmless.

	4223 * In all other cases, the buffer is fully writable and it is anyway saf e

	4224 * to write the NUL.

	4225 *

	4226 * Note: An earlier version of this code tested whether there is a NUL

	4227 * at [len] already, but, while safe, it generated lots of warnings from

	4228 * tools like valgrind and Purify.

	4229 */

	4230 array[len] = 0;

	4231 }

	4232 return array;

	4233 } else if(cloneArrayIfNeeded(len+1)) {

	4234 array = getArrayStart();

	4235 array[len] = 0;

	4236 return array;

	4237 } else {

	4238 return 0;

	4239 }

	4240 }

	4241 }

	4242

	4243 inline UnicodeString&

	4244 UnicodeString::operator= (UChar ch)

	4245 { return doReplace(0, length(), &ch, 0, 1); }

	4246

	4247 inline UnicodeString&

	4248 UnicodeString::operator= (UChar32 ch)

	4249 { return replace(0, length(), ch); }

	4250

	4251 inline UnicodeString&

	4252 UnicodeString::setTo(const UnicodeString& srcText,

	4253 int32_t srcStart,

	4254 int32_t srcLength)

	4255 {

	4256 unBogus();

	4257 return doReplace(0, length(), srcText, srcStart, srcLength);

	4258 }

	4259

	4260 inline UnicodeString&

	4261 UnicodeString::setTo(const UnicodeString& srcText,

	4262 int32_t srcStart)

	4263 {

	4264 unBogus();

	4265 srcText.pinIndex(srcStart);

	4266 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);

	4267 }

	4268

	4269 inline UnicodeString&

	4270 UnicodeString::setTo(const UnicodeString& srcText)

	4271 {

	4272 unBogus();

	4273 return doReplace(0, length(), srcText, 0, srcText.length());

	4274 }

	4275

	4276 inline UnicodeString&

	4277 UnicodeString::setTo(const UChar *srcChars,

	4278 int32_t srcLength)

	4279 {

	4280 unBogus();

	4281 return doReplace(0, length(), srcChars, 0, srcLength);

	4282 }

	4283

	4284 inline UnicodeString&

	4285 UnicodeString::setTo(UChar srcChar)

	4286 {

	4287 unBogus();

	4288 return doReplace(0, length(), &srcChar, 0, 1);

	4289 }

	4290

	4291 inline UnicodeString&

	4292 UnicodeString::setTo(UChar32 srcChar)

	4293 {

	4294 unBogus();

	4295 return replace(0, length(), srcChar);

	4296 }

	4297

	4298 inline UnicodeString&

	4299 UnicodeString::append(const UnicodeString& srcText,

	4300 int32_t srcStart,

	4301 int32_t srcLength)

	4302 { return doReplace(length(), 0, srcText, srcStart, srcLength); }

	4303

	4304 inline UnicodeString&

	4305 UnicodeString::append(const UnicodeString& srcText)

	4306 { return doReplace(length(), 0, srcText, 0, srcText.length()); }

	4307

	4308 inline UnicodeString&

	4309 UnicodeString::append(const UChar *srcChars,

	4310 int32_t srcStart,

	4311 int32_t srcLength)

	4312 { return doReplace(length(), 0, srcChars, srcStart, srcLength); }

	4313

	4314 inline UnicodeString&

	4315 UnicodeString::append(const UChar *srcChars,

	4316 int32_t srcLength)

	4317 { return doReplace(length(), 0, srcChars, 0, srcLength); }

	4318

	4319 inline UnicodeString&

	4320 UnicodeString::append(UChar srcChar)

	4321 { return doReplace(length(), 0, &srcChar, 0, 1); }

	4322

	4323 inline UnicodeString&

	4324 UnicodeString::append(UChar32 srcChar) {

	4325 UChar buffer[U16_MAX_LENGTH];

	4326 int32_t _length = 0;

	4327 UBool isError = FALSE;

	4328 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);

	4329 return doReplace(length(), 0, buffer, 0, _length);

	4330 }

	4331

	4332 inline UnicodeString&

	4333 UnicodeString::operator+= (UChar ch)

	4334 { return doReplace(length(), 0, &ch, 0, 1); }

	4335

	4336 inline UnicodeString&

	4337 UnicodeString::operator+= (UChar32 ch) {

	4338 return append(ch);

	4339 }

	4340

	4341 inline UnicodeString&

	4342 UnicodeString::operator+= (const UnicodeString& srcText)

	4343 { return doReplace(length(), 0, srcText, 0, srcText.length()); }

	4344

	4345 inline UnicodeString&

	4346 UnicodeString::insert(int32_t start,

	4347 const UnicodeString& srcText,

	4348 int32_t srcStart,

	4349 int32_t srcLength)

	4350 { return doReplace(start, 0, srcText, srcStart, srcLength); }

	4351

	4352 inline UnicodeString&

	4353 UnicodeString::insert(int32_t start,

	4354 const UnicodeString& srcText)

	4355 { return doReplace(start, 0, srcText, 0, srcText.length()); }

	4356

	4357 inline UnicodeString&

	4358 UnicodeString::insert(int32_t start,

	4359 const UChar *srcChars,

	4360 int32_t srcStart,

	4361 int32_t srcLength)

	4362 { return doReplace(start, 0, srcChars, srcStart, srcLength); }

	4363

	4364 inline UnicodeString&

	4365 UnicodeString::insert(int32_t start,

	4366 const UChar *srcChars,

	4367 int32_t srcLength)

	4368 { return doReplace(start, 0, srcChars, 0, srcLength); }

	4369

	4370 inline UnicodeString&

	4371 UnicodeString::insert(int32_t start,

	4372 UChar srcChar)

	4373 { return doReplace(start, 0, &srcChar, 0, 1); }

	4374

	4375 inline UnicodeString&

	4376 UnicodeString::insert(int32_t start,

	4377 UChar32 srcChar)

	4378 { return replace(start, 0, srcChar); }

	4379

	4380

	4381 inline UnicodeString&

	4382 UnicodeString::remove()

	4383 {

	4384 // remove() of a bogus string makes the string empty and non-bogus

	4385 // we also un-alias a read-only alias to deal with NUL-termination

	4386 // issues with getTerminatedBuffer()

	4387 if(fFlags & (kIsBogus\|kBufferIsReadonly)) {

	4388 setToEmpty();

	4389 } else {

	4390 fShortLength = 0;

	4391 }

	4392 return *this;

	4393 }

	4394

	4395 inline UnicodeString&

	4396 UnicodeString::remove(int32_t start,

	4397 int32_t _length)

	4398 {

	4399 if(start <= 0 && _length == INT32_MAX) {

	4400 // remove(guaranteed everything) of a bogus string makes the string empt y and non-bogus

	4401 return remove();

	4402 }

	4403 return doReplace(start, _length, NULL, 0, 0);

	4404 }

	4405

	4406 inline UnicodeString&

	4407 UnicodeString::removeBetween(int32_t start,

	4408 int32_t limit)

	4409 { return doReplace(start, limit - start, NULL, 0, 0); }

	4410

	4411 inline UnicodeString &

	4412 UnicodeString::retainBetween(int32_t start, int32_t limit) {

	4413 truncate(limit);

	4414 return doReplace(0, start, NULL, 0, 0);

	4415 }

	4416

	4417 inline UBool

	4418 UnicodeString::truncate(int32_t targetLength)

	4419 {

	4420 if(isBogus() && targetLength == 0) {

	4421 // truncate(0) of a bogus string makes the string empty and non-bogus

	4422 unBogus();

	4423 return FALSE;

	4424 } else if((uint32_t)targetLength < (uint32_t)length()) {

	4425 setLength(targetLength);

	4426 if(fFlags&kBufferIsReadonly) {

	4427 fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more

	4428 }

	4429 return TRUE;

	4430 } else {

	4431 return FALSE;

	4432 }

	4433 }

	4434

	4435 inline UnicodeString&

	4436 UnicodeString::reverse()

	4437 { return doReverse(0, length()); }

	4438

	4439 inline UnicodeString&

	4440 UnicodeString::reverse(int32_t start,

	4441 int32_t _length)

	4442 { return doReverse(start, _length); }

	4443

	4444 U_NAMESPACE_END

	4445

	4446 #endif

OLD	NEW

« no previous file with comments | « icu46/source/common/unicode/uniset.h ('k') | icu46/source/common/unicode/unorm.h » ('j') | no next file with comments »