public/common/unicode/unistr.h - Issue 19276009: Move ICU headers part 1

Side by Side Diff: public/common/unicode/unistr.h

Issue 19276009: Move ICU headers part 1 (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/icu46/

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 /*

2 **********************************************************************

3 * Copyright (C) 1998-2010, International Business Machines

4 * Corporation and others. All Rights Reserved.

5 **********************************************************************

6 *

7 * File unistr.h

8 *

9 * Modification History:

10 *

11 * Date Name Description

12 * 09/25/98 stephen Creation.

13 * 11/11/98 stephen Changed per 11/9 code review.

14 * 04/20/99 stephen Overhauled per 4/16 code review.

15 * 11/18/99 aliu Made to inherit from Replaceable. Added method

16 * handleReplaceBetween(); other methods unchanged.

17 * 06/25/01 grhoten Remove dependency on iostream.

18 ******************************************************************************

19 */

20

21 #ifndef UNISTR_H

22 #define UNISTR_H

23

24 /**

25 * \file

26 * \brief C++ API: Unicode String

27 */

28

29 #include "unicode/utypes.h"

30 #include "unicode/rep.h"

31 #include "unicode/std_string.h"

32 #include "unicode/stringpiece.h"

33 #include "unicode/bytestream.h"

34

35 struct UConverter; // unicode/ucnv.h

36 class StringThreadTest;

37

38 #ifndef U_COMPARE_CODE_POINT_ORDER

39 /* see also ustring.h and unorm.h */

40 /**

41 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:

42 * Compare strings in code point order instead of code unit order.

43 * @stable ICU 2.2

44 */

45 #define U_COMPARE_CODE_POINT_ORDER 0x8000

46 #endif

47

48 #ifndef USTRING_H

49 /**

50 * \ingroup ustring_ustrlen

51 */

52 U_STABLE int32_t U_EXPORT2

53 u_strlen(const UChar *s);

54 #endif

55

56 U_NAMESPACE_BEGIN

57

58 class Locale; // unicode/locid.h

59 class StringCharacterIterator;

60 class BreakIterator; // unicode/brkiter.h

61

62 /* The <iostream> include has been moved to unicode/ustream.h */

63

64 /**

65 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constru ctor

66 * which constructs a Unicode string from an invariant-character char * string.

67 * About invariant characters see utypes.h.

68 * This constructor has no runtime dependency on conversion code and is

69 * therefore recommended over ones taking a charset name string

70 * (where the empty string "" indicates invariant-character conversion).

71 *

72 * @stable ICU 3.2

73 */

74 #define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant

75

76 /**

77 * Unicode String literals in C++.

78 * Dependent on the platform properties, different UnicodeString

79 * constructors should be used to create a UnicodeString object from

80 * a string literal.

81 * The macros are defined for maximum performance.

82 * They work only for strings that contain "invariant characters", i.e.,

83 * only latin letters, digits, and some punctuation.

84 * See utypes.h for details.

85 *

86 * The string parameter must be a C string literal.

87 * The length of the string, not including the terminating

88 * <code>NUL</code>, must be specified as a constant.

89 * The U_STRING_DECL macro should be invoked exactly once for one

90 * such string variable before it is used.

91 * @stable ICU 2.0

92 */

93 #if defined(U_DECLARE_UTF16)

94 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)

95 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY \|\| ( U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))

96 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)

97 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY

98 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)

99 #else

100 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _ length, US_INV)

101 #endif

102

103 /**

104 * Unicode String literals in C++.

105 * Dependent on the platform properties, different UnicodeString

106 * constructors should be used to create a UnicodeString object from

107 * a string literal.

108 * The macros are defined for improved performance.

109 * They work only for strings that contain "invariant characters", i.e.,

110 * only latin letters, digits, and some punctuation.

111 * See utypes.h for details.

112 *

113 * The string parameter must be a C string literal.

114 * @stable ICU 2.0

115 */

116 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)

117

118 /**

119 * UnicodeString is a string class that stores Unicode characters directly and p rovides

120 * similar functionality as the Java String and StringBuffer classes.

121 * It is a concrete implementation of the abstract class Replaceable (for transl iteration).

122 *

123 * The UnicodeString class is not suitable for subclassing.

124 *

125 * <p>For an overview of Unicode strings in C and C++ see the

126 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings ch apter</a>.</p>

127 *

128 * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.

129 * A Unicode character may be stored with either one code unit

130 * (the most common case) or with a matched pair of special code units

131 * ("surrogates"). The data type for code units is UChar.

132 * For single-character handling, a Unicode character code <em>point</em> is a v alue

133 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>

134 *

135 * <p>Indexes and offsets into and lengths of strings always count code units, n ot code points.

136 * This is the same as with multi-byte char* strings in traditional string handl ing.

137 * Operations on partial strings typically do not test for code point boundaries .

138 * If necessary, the user needs to take care of such boundaries by testing for t he code unit

139 * values or by using functions like

140 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()

141 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), s ee utf.h).</p>

142 *

143 * UnicodeString methods are more lenient with regard to input parameter values

144 * than other ICU APIs. In particular:

145 * - If indexes are out of bounds for a UnicodeString object

146 * (<0 or >length()) then they are "pinned" to the nearest boundary.

147 * - If primitive string pointer values (e.g., const UChar * or char *)

148 * for input strings are NULL, then those input string parameters are treated

149 * as if they pointed to an empty string.

150 * However, this is <em>not</em> the case for char * parameters for charset na mes

151 * or other IDs.

152 * - Most UnicodeString methods do not take a UErrorCode parameter because

153 * there are usually very few opportunities for failure other than a shortage

154 * of memory, error codes in low-level C++ string methods would be inconvenien t,

155 * and the error code as the last parameter (ICU convention) would prevent

156 * the use of default parameter values.

157 * Instead, such methods set the UnicodeString into a "bogus" state

158 * (see isBogus()) if an error occurs.

159 *

160 * In string comparisons, two UnicodeString objects that are both "bogus"

161 * compare equal (to be transitive and prevent endless loops in sorting),

162 * and a "bogus" string compares less than any non-"bogus" one.

163 *

164 * Const UnicodeString methods are thread-safe. Multiple threads can use

165 * const methods on the same UnicodeString object simultaneously,

166 * but non-const methods must not be called concurrently (in multiple threads)

167 * with any other (const or non-const) methods.

168 *

169 * Similarly, const UnicodeString & parameters are thread-safe.

170 * One object may be passed in as such a parameter concurrently in multiple thre ads.

171 * This includes the const UnicodeString & parameters for

172 * copy construction, assignment, and cloning.

173 *

174 * <p>UnicodeString uses several storage methods.

175 * String contents can be stored inside the UnicodeString object itself,

176 * in an allocated and shared buffer, or in an outside buffer that is "aliased".

177 * Most of this is done transparently, but careful aliasing in particular provid es

178 * significant performance improvements.

179 * Also, the internal buffer is accessible via special functions.

180 * For details see the

181 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings ch apter</a>.</p>

182 *

183 * @see utf.h

184 * @see CharacterIterator

185 * @stable ICU 2.0

186 */

187 class U_COMMON_API UnicodeString : public Replaceable

188 {

189 public:

190

191 /**

192 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) const ructor

193 * which constructs a Unicode string from an invariant-character char * string .

194 * Use the macro US_INV instead of the full qualification for this value.

195 *

196 * @see US_INV

197 * @stable ICU 3.2

198 */

199 enum EInvariant {

200 /**

201 * @see EInvariant

202 * @stable ICU 3.2

203 */

204 kInvariant

205 };

206

207 //========================================

208 // Read-only operations

209 //========================================

210

211 /* Comparison - bitwise only - for international comparison use collation */

212

213 /**

214 * Equality operator. Performs only bitwise comparison.

215 * @param text The UnicodeString to compare to this one.

216 * @return TRUE if <TT>text</TT> contains the same characters as this one,

217 * FALSE otherwise.

218 * @stable ICU 2.0

219 */

220 inline UBool operator== (const UnicodeString& text) const;

221

222 /**

223 * Inequality operator. Performs only bitwise comparison.

224 * @param text The UnicodeString to compare to this one.

225 * @return FALSE if <TT>text</TT> contains the same characters as this one,

226 * TRUE otherwise.

227 * @stable ICU 2.0

228 */

229 inline UBool operator!= (const UnicodeString& text) const;

230

231 /**

232 * Greater than operator. Performs only bitwise comparison.

233 * @param text The UnicodeString to compare to this one.

234 * @return TRUE if the characters in this are bitwise

235 * greater than the characters in <code>text</code>, FALSE otherwise

236 * @stable ICU 2.0

237 */

238 inline UBool operator> (const UnicodeString& text) const;

239

240 /**

241 * Less than operator. Performs only bitwise comparison.

242 * @param text The UnicodeString to compare to this one.

243 * @return TRUE if the characters in this are bitwise

244 * less than the characters in <code>text</code>, FALSE otherwise

245 * @stable ICU 2.0

246 */

247 inline UBool operator< (const UnicodeString& text) const;

248

249 /**

250 * Greater than or equal operator. Performs only bitwise comparison.

251 * @param text The UnicodeString to compare to this one.

252 * @return TRUE if the characters in this are bitwise

253 * greater than or equal to the characters in <code>text</code>, FALSE otherwi se

254 * @stable ICU 2.0

255 */

256 inline UBool operator>= (const UnicodeString& text) const;

257

258 /**

259 * Less than or equal operator. Performs only bitwise comparison.

260 * @param text The UnicodeString to compare to this one.

261 * @return TRUE if the characters in this are bitwise

262 * less than or equal to the characters in <code>text</code>, FALSE otherwise

263 * @stable ICU 2.0

264 */

265 inline UBool operator<= (const UnicodeString& text) const;

266

267 /**

268 * Compare the characters bitwise in this UnicodeString to

269 * the characters in <code>text</code>.

270 * @param text The UnicodeString to compare to this one.

271 * @return The result of bitwise character comparison: 0 if this

272 * contains the same characters as <code>text</code>, -1 if the characters in

273 * this are bitwise less than the characters in <code>text</code>, +1 if the

274 * characters in this are bitwise greater than the characters

275 * in <code>text</code>.

276 * @stable ICU 2.0

277 */

278 inline int8_t compare(const UnicodeString& text) const;

279

280 /**

281 * Compare the characters bitwise in the range

282 * [<TT>start</TT>, <TT>start + length</TT>) with the characters

283 * in <TT>text</TT>

284 * @param start the offset at which the compare operation begins

285 * @param length the number of characters of text to compare.

286 * @param text the other text to be compared against this string.

287 * @return The result of bitwise character comparison: 0 if this

288 * contains the same characters as <code>text</code>, -1 if the characters in

289 * this are bitwise less than the characters in <code>text</code>, +1 if the

290 * characters in this are bitwise greater than the characters

291 * in <code>text</code>.

292 * @stable ICU 2.0

293 */

294 inline int8_t compare(int32_t start,

295 int32_t length,

296 const UnicodeString& text) const;

297

298 /**

299 * Compare the characters bitwise in the range

300 * [<TT>start</TT>, <TT>start + length</TT>) with the characters

301 * in <TT>srcText</TT> in the range

302 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

303 * @param start the offset at which the compare operation begins

304 * @param length the number of characters in this to compare.

305 * @param srcText the text to be compared

306 * @param srcStart the offset into <TT>srcText</TT> to start comparison

307 * @param srcLength the number of characters in <TT>src</TT> to compare

308 * @return The result of bitwise character comparison: 0 if this

309 * contains the same characters as <code>srcText</code>, -1 if the characters in

310 * this are bitwise less than the characters in <code>srcText</code>, +1 if th e

311 * characters in this are bitwise greater than the characters

312 * in <code>srcText</code>.

313 * @stable ICU 2.0

314 */

315 inline int8_t compare(int32_t start,

316 int32_t length,

317 const UnicodeString& srcText,

318 int32_t srcStart,

319 int32_t srcLength) const;

320

321 /**

322 * Compare the characters bitwise in this UnicodeString with the first

323 * <TT>srcLength</TT> characters in <TT>srcChars</TT>.

324 * @param srcChars The characters to compare to this UnicodeString.

325 * @param srcLength the number of characters in <TT>srcChars</TT> to compare

326 * @return The result of bitwise character comparison: 0 if this

327 * contains the same characters as <code>srcChars</code>, -1 if the characters in

328 * this are bitwise less than the characters in <code>srcChars</code>, +1 if t he

329 * characters in this are bitwise greater than the characters

330 * in <code>srcChars</code>.

331 * @stable ICU 2.0

332 */

333 inline int8_t compare(const UChar *srcChars,

334 int32_t srcLength) const;

335

336 /**

337 * Compare the characters bitwise in the range

338 * [<TT>start</TT>, <TT>start + length</TT>) with the first

339 * <TT>length</TT> characters in <TT>srcChars</TT>

340 * @param start the offset at which the compare operation begins

341 * @param length the number of characters to compare.

342 * @param srcChars the characters to be compared

343 * @return The result of bitwise character comparison: 0 if this

344 * contains the same characters as <code>srcChars</code>, -1 if the characters in

345 * this are bitwise less than the characters in <code>srcChars</code>, +1 if t he

346 * characters in this are bitwise greater than the characters

347 * in <code>srcChars</code>.

348 * @stable ICU 2.0

349 */

350 inline int8_t compare(int32_t start,

351 int32_t length,

352 const UChar *srcChars) const;

353

354 /**

355 * Compare the characters bitwise in the range

356 * [<TT>start</TT>, <TT>start + length</TT>) with the characters

357 * in <TT>srcChars</TT> in the range

358 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

359 * @param start the offset at which the compare operation begins

360 * @param length the number of characters in this to compare

361 * @param srcChars the characters to be compared

362 * @param srcStart the offset into <TT>srcChars</TT> to start comparison

363 * @param srcLength the number of characters in <TT>srcChars</TT> to compare

364 * @return The result of bitwise character comparison: 0 if this

365 * contains the same characters as <code>srcChars</code>, -1 if the characters in

366 * this are bitwise less than the characters in <code>srcChars</code>, +1 if t he

367 * characters in this are bitwise greater than the characters

368 * in <code>srcChars</code>.

369 * @stable ICU 2.0

370 */

371 inline int8_t compare(int32_t start,

372 int32_t length,

373 const UChar *srcChars,

374 int32_t srcStart,

375 int32_t srcLength) const;

376

377 /**

378 * Compare the characters bitwise in the range

379 * [<TT>start</TT>, <TT>limit</TT>) with the characters

380 * in <TT>srcText</TT> in the range

381 * [<TT>srcStart</TT>, <TT>srcLimit</TT>).

382 * @param start the offset at which the compare operation begins

383 * @param limit the offset immediately following the compare operation

384 * @param srcText the text to be compared

385 * @param srcStart the offset into <TT>srcText</TT> to start comparison

386 * @param srcLimit the offset into <TT>srcText</TT> to limit comparison

387 * @return The result of bitwise character comparison: 0 if this

388 * contains the same characters as <code>srcText</code>, -1 if the characters in

389 * this are bitwise less than the characters in <code>srcText</code>, +1 if th e

390 * characters in this are bitwise greater than the characters

391 * in <code>srcText</code>.

392 * @stable ICU 2.0

393 */

394 inline int8_t compareBetween(int32_t start,

395 int32_t limit,

396 const UnicodeString& srcText,

397 int32_t srcStart,

398 int32_t srcLimit) const;

399

400 /**

401 * Compare two Unicode strings in code point order.

402 * The result may be different from the results of compare(), operator<, etc.

403 * if supplementary characters are present:

404 *

405 * In UTF-16, supplementary characters (with code points U+10000 and above) ar e

406 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

407 * which means that they compare as less than some other BMP characters like U +feff.

408 * This function compares Unicode strings in code point order.

409 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired su rrogates), then the result is not defined.

410 *

411 * @param text Another string to compare this one to.

412 * @return a negative/zero/positive integer corresponding to whether

413 * this string is less than/equal to/greater than the second one

414 * in code point order

415 * @stable ICU 2.0

416 */

417 inline int8_t compareCodePointOrder(const UnicodeString& text) const;

418

419 /**

420 * Compare two Unicode strings in code point order.

421 * The result may be different from the results of compare(), operator<, etc.

422 * if supplementary characters are present:

423 *

424 * In UTF-16, supplementary characters (with code points U+10000 and above) ar e

425 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

426 * which means that they compare as less than some other BMP characters like U +feff.

427 * This function compares Unicode strings in code point order.

428 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired su rrogates), then the result is not defined.

429 *

430 * @param start The start offset in this string at which the compare operation begins.

431 * @param length The number of code units from this string to compare.

432 * @param srcText Another string to compare this one to.

433 * @return a negative/zero/positive integer corresponding to whether

434 * this string is less than/equal to/greater than the second one

435 * in code point order

436 * @stable ICU 2.0

437 */

438 inline int8_t compareCodePointOrder(int32_t start,

439 int32_t length,

440 const UnicodeString& srcText) const;

441

442 /**

443 * Compare two Unicode strings in code point order.

444 * The result may be different from the results of compare(), operator<, etc.

445 * if supplementary characters are present:

446 *

447 * In UTF-16, supplementary characters (with code points U+10000 and above) ar e

448 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

449 * which means that they compare as less than some other BMP characters like U +feff.

450 * This function compares Unicode strings in code point order.

451 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired su rrogates), then the result is not defined.

452 *

453 * @param start The start offset in this string at which the compare operation begins.

454 * @param length The number of code units from this string to compare.

455 * @param srcText Another string to compare this one to.

456 * @param srcStart The start offset in that string at which the compare operat ion begins.

457 * @param srcLength The number of code units from that string to compare.

458 * @return a negative/zero/positive integer corresponding to whether

459 * this string is less than/equal to/greater than the second one

460 * in code point order

461 * @stable ICU 2.0

462 */

463 inline int8_t compareCodePointOrder(int32_t start,

464 int32_t length,

465 const UnicodeString& srcText,

466 int32_t srcStart,

467 int32_t srcLength) const;

468

469 /**

470 * Compare two Unicode strings in code point order.

471 * The result may be different from the results of compare(), operator<, etc.

472 * if supplementary characters are present:

473 *

474 * In UTF-16, supplementary characters (with code points U+10000 and above) ar e

475 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

476 * which means that they compare as less than some other BMP characters like U +feff.

477 * This function compares Unicode strings in code point order.

478 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired su rrogates), then the result is not defined.

479 *

480 * @param srcChars A pointer to another string to compare this one to.

481 * @param srcLength The number of code units from that string to compare.

482 * @return a negative/zero/positive integer corresponding to whether

483 * this string is less than/equal to/greater than the second one

484 * in code point order

485 * @stable ICU 2.0

486 */

487 inline int8_t compareCodePointOrder(const UChar *srcChars,

488 int32_t srcLength) const;

489

490 /**

491 * Compare two Unicode strings in code point order.

492 * The result may be different from the results of compare(), operator<, etc.

493 * if supplementary characters are present:

494 *

495 * In UTF-16, supplementary characters (with code points U+10000 and above) ar e

496 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

497 * which means that they compare as less than some other BMP characters like U +feff.

498 * This function compares Unicode strings in code point order.

499 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired su rrogates), then the result is not defined.

500 *

501 * @param start The start offset in this string at which the compare operation begins.

502 * @param length The number of code units from this string to compare.

503 * @param srcChars A pointer to another string to compare this one to.

504 * @return a negative/zero/positive integer corresponding to whether

505 * this string is less than/equal to/greater than the second one

506 * in code point order

507 * @stable ICU 2.0

508 */

509 inline int8_t compareCodePointOrder(int32_t start,

510 int32_t length,

511 const UChar *srcChars) const;

512

513 /**

514 * Compare two Unicode strings in code point order.

515 * The result may be different from the results of compare(), operator<, etc.

516 * if supplementary characters are present:

517 *

518 * In UTF-16, supplementary characters (with code points U+10000 and above) ar e

519 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

520 * which means that they compare as less than some other BMP characters like U +feff.

521 * This function compares Unicode strings in code point order.

522 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired su rrogates), then the result is not defined.

523 *

524 * @param start The start offset in this string at which the compare operation begins.

525 * @param length The number of code units from this string to compare.

526 * @param srcChars A pointer to another string to compare this one to.

527 * @param srcStart The start offset in that string at which the compare operat ion begins.

528 * @param srcLength The number of code units from that string to compare.

529 * @return a negative/zero/positive integer corresponding to whether

530 * this string is less than/equal to/greater than the second one

531 * in code point order

532 * @stable ICU 2.0

533 */

534 inline int8_t compareCodePointOrder(int32_t start,

535 int32_t length,

536 const UChar *srcChars,

537 int32_t srcStart,

538 int32_t srcLength) const;

539

540 /**

541 * Compare two Unicode strings in code point order.

542 * The result may be different from the results of compare(), operator<, etc.

543 * if supplementary characters are present:

544 *

545 * In UTF-16, supplementary characters (with code points U+10000 and above) ar e

546 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

547 * which means that they compare as less than some other BMP characters like U +feff.

548 * This function compares Unicode strings in code point order.

549 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired su rrogates), then the result is not defined.

550 *

551 * @param start The start offset in this string at which the compare operation begins.

552 * @param limit The offset after the last code unit from this string to compar e.

553 * @param srcText Another string to compare this one to.

554 * @param srcStart The start offset in that string at which the compare operat ion begins.

555 * @param srcLimit The offset after the last code unit from that string to com pare.

556 * @return a negative/zero/positive integer corresponding to whether

557 * this string is less than/equal to/greater than the second one

558 * in code point order

559 * @stable ICU 2.0

560 */

561 inline int8_t compareCodePointOrderBetween(int32_t start,

562 int32_t limit,

563 const UnicodeString& srcText,

564 int32_t srcStart,

565 int32_t srcLimit) const;

566

567 /**

568 * Compare two strings case-insensitively using full case folding.

569 * This is equivalent to this->foldCase(options).compare(text.foldCase(options )).

570 *

571 * @param text Another string to compare this one to.

572 * @param options A bit set of options:

573 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:

574 * Comparison in code unit order with default case folding.

575 *

576 * - U_COMPARE_CODE_POINT_ORDER

577 * Set to choose code point order instead of code unit order

578 * (see u_strCompare for details).

579 *

580 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I

581 *

582 * @return A negative, zero, or positive integer indicating the comparison res ult.

583 * @stable ICU 2.0

584 */

585 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;

586

587 /**

588 * Compare two strings case-insensitively using full case folding.

589 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(opti ons)).

590 *

591 * @param start The start offset in this string at which the compare operation begins.

592 * @param length The number of code units from this string to compare.

593 * @param srcText Another string to compare this one to.

594 * @param options A bit set of options:

595 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:

596 * Comparison in code unit order with default case folding.

597 *

598 * - U_COMPARE_CODE_POINT_ORDER

599 * Set to choose code point order instead of code unit order

600 * (see u_strCompare for details).

601 *

602 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I

603 *

604 * @return A negative, zero, or positive integer indicating the comparison res ult.

605 * @stable ICU 2.0

606 */

607 inline int8_t caseCompare(int32_t start,

608 int32_t length,

609 const UnicodeString& srcText,

610 uint32_t options) const;

611

612 /**

613 * Compare two strings case-insensitively using full case folding.

614 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(opti ons)).

615 *

616 * @param start The start offset in this string at which the compare operation begins.

617 * @param length The number of code units from this string to compare.

618 * @param srcText Another string to compare this one to.

619 * @param srcStart The start offset in that string at which the compare operat ion begins.

620 * @param srcLength The number of code units from that string to compare.

621 * @param options A bit set of options:

622 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:

623 * Comparison in code unit order with default case folding.

624 *

625 * - U_COMPARE_CODE_POINT_ORDER

626 * Set to choose code point order instead of code unit order

627 * (see u_strCompare for details).

628 *

629 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I

630 *

631 * @return A negative, zero, or positive integer indicating the comparison res ult.

632 * @stable ICU 2.0

633 */

634 inline int8_t caseCompare(int32_t start,

635 int32_t length,

636 const UnicodeString& srcText,

637 int32_t srcStart,

638 int32_t srcLength,

639 uint32_t options) const;

640

641 /**

642 * Compare two strings case-insensitively using full case folding.

643 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(opt ions)).

644 *

645 * @param srcChars A pointer to another string to compare this one to.

646 * @param srcLength The number of code units from that string to compare.

647 * @param options A bit set of options:

648 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:

649 * Comparison in code unit order with default case folding.

650 *

651 * - U_COMPARE_CODE_POINT_ORDER

652 * Set to choose code point order instead of code unit order

653 * (see u_strCompare for details).

654 *

655 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I

656 *

657 * @return A negative, zero, or positive integer indicating the comparison res ult.

658 * @stable ICU 2.0

659 */

660 inline int8_t caseCompare(const UChar *srcChars,

661 int32_t srcLength,

662 uint32_t options) const;

663

664 /**

665 * Compare two strings case-insensitively using full case folding.

666 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(opt ions)).

667 *

668 * @param start The start offset in this string at which the compare operation begins.

669 * @param length The number of code units from this string to compare.

670 * @param srcChars A pointer to another string to compare this one to.

671 * @param options A bit set of options:

672 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:

673 * Comparison in code unit order with default case folding.

674 *

675 * - U_COMPARE_CODE_POINT_ORDER

676 * Set to choose code point order instead of code unit order

677 * (see u_strCompare for details).

678 *

679 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I

680 *

681 * @return A negative, zero, or positive integer indicating the comparison res ult.

682 * @stable ICU 2.0

683 */

684 inline int8_t caseCompare(int32_t start,

685 int32_t length,

686 const UChar *srcChars,

687 uint32_t options) const;

688

689 /**

690 * Compare two strings case-insensitively using full case folding.

691 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(opt ions)).

692 *

693 * @param start The start offset in this string at which the compare operation begins.

694 * @param length The number of code units from this string to compare.

695 * @param srcChars A pointer to another string to compare this one to.

696 * @param srcStart The start offset in that string at which the compare operat ion begins.

697 * @param srcLength The number of code units from that string to compare.

698 * @param options A bit set of options:

699 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:

700 * Comparison in code unit order with default case folding.

701 *

702 * - U_COMPARE_CODE_POINT_ORDER

703 * Set to choose code point order instead of code unit order

704 * (see u_strCompare for details).

705 *

706 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I

707 *

708 * @return A negative, zero, or positive integer indicating the comparison res ult.

709 * @stable ICU 2.0

710 */

711 inline int8_t caseCompare(int32_t start,

712 int32_t length,

713 const UChar *srcChars,

714 int32_t srcStart,

715 int32_t srcLength,

716 uint32_t options) const;

717

718 /**

719 * Compare two strings case-insensitively using full case folding.

720 * This is equivalent to this->foldCase(options).compareBetween(text.foldCase( options)).

721 *

722 * @param start The start offset in this string at which the compare operation begins.

723 * @param limit The offset after the last code unit from this string to compar e.

724 * @param srcText Another string to compare this one to.

725 * @param srcStart The start offset in that string at which the compare operat ion begins.

726 * @param srcLimit The offset after the last code unit from that string to com pare.

727 * @param options A bit set of options:

728 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:

729 * Comparison in code unit order with default case folding.

730 *

731 * - U_COMPARE_CODE_POINT_ORDER

732 * Set to choose code point order instead of code unit order

733 * (see u_strCompare for details).

734 *

735 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I

736 *

737 * @return A negative, zero, or positive integer indicating the comparison res ult.

738 * @stable ICU 2.0

739 */

740 inline int8_t caseCompareBetween(int32_t start,

741 int32_t limit,

742 const UnicodeString& srcText,

743 int32_t srcStart,

744 int32_t srcLimit,

745 uint32_t options) const;

746

747 /**

748 * Determine if this starts with the characters in <TT>text</TT>

749 * @param text The text to match.

750 * @return TRUE if this starts with the characters in <TT>text</TT>,

751 * FALSE otherwise

752 * @stable ICU 2.0

753 */

754 inline UBool startsWith(const UnicodeString& text) const;

755

756 /**

757 * Determine if this starts with the characters in <TT>srcText</TT>

758 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

759 * @param srcText The text to match.

760 * @param srcStart the offset into <TT>srcText</TT> to start matching

761 * @param srcLength the number of characters in <TT>srcText</TT> to match

762 * @return TRUE if this starts with the characters in <TT>text</TT>,

763 * FALSE otherwise

764 * @stable ICU 2.0

765 */

766 inline UBool startsWith(const UnicodeString& srcText,

767 int32_t srcStart,

768 int32_t srcLength) const;

769

770 /**

771 * Determine if this starts with the characters in <TT>srcChars</TT>

772 * @param srcChars The characters to match.

773 * @param srcLength the number of characters in <TT>srcChars</TT>

774 * @return TRUE if this starts with the characters in <TT>srcChars</TT>,

775 * FALSE otherwise

776 * @stable ICU 2.0

777 */

778 inline UBool startsWith(const UChar *srcChars,

779 int32_t srcLength) const;

780

781 /**

782 * Determine if this ends with the characters in <TT>srcChars</TT>

783 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

784 * @param srcChars The characters to match.

785 * @param srcStart the offset into <TT>srcText</TT> to start matching

786 * @param srcLength the number of characters in <TT>srcChars</TT> to match

787 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE o therwise

788 * @stable ICU 2.0

789 */

790 inline UBool startsWith(const UChar *srcChars,

791 int32_t srcStart,

792 int32_t srcLength) const;

793

794 /**

795 * Determine if this ends with the characters in <TT>text</TT>

796 * @param text The text to match.

797 * @return TRUE if this ends with the characters in <TT>text</TT>,

798 * FALSE otherwise

799 * @stable ICU 2.0

800 */

801 inline UBool endsWith(const UnicodeString& text) const;

802

803 /**

804 * Determine if this ends with the characters in <TT>srcText</TT>

805 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

806 * @param srcText The text to match.

807 * @param srcStart the offset into <TT>srcText</TT> to start matching

808 * @param srcLength the number of characters in <TT>srcText</TT> to match

809 * @return TRUE if this ends with the characters in <TT>text</TT>,

810 * FALSE otherwise

811 * @stable ICU 2.0

812 */

813 inline UBool endsWith(const UnicodeString& srcText,

814 int32_t srcStart,

815 int32_t srcLength) const;

816

817 /**

818 * Determine if this ends with the characters in <TT>srcChars</TT>

819 * @param srcChars The characters to match.

820 * @param srcLength the number of characters in <TT>srcChars</TT>

821 * @return TRUE if this ends with the characters in <TT>srcChars</TT>,

822 * FALSE otherwise

823 * @stable ICU 2.0

824 */

825 inline UBool endsWith(const UChar *srcChars,

826 int32_t srcLength) const;

827

828 /**

829 * Determine if this ends with the characters in <TT>srcChars</TT>

830 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

831 * @param srcChars The characters to match.

832 * @param srcStart the offset into <TT>srcText</TT> to start matching

833 * @param srcLength the number of characters in <TT>srcChars</TT> to match

834 * @return TRUE if this ends with the characters in <TT>srcChars</TT>,

835 * FALSE otherwise

836 * @stable ICU 2.0

837 */

838 inline UBool endsWith(const UChar *srcChars,

839 int32_t srcStart,

840 int32_t srcLength) const;

841

842

843 /* Searching - bitwise only */

844

845 /**

846 * Locate in this the first occurrence of the characters in <TT>text</TT>,

847 * using bitwise comparison.

848 * @param text The text to search for.

849 * @return The offset into this of the start of <TT>text</TT>,

850 * or -1 if not found.

851 * @stable ICU 2.0

852 */

853 inline int32_t indexOf(const UnicodeString& text) const;

854

855 /**

856 * Locate in this the first occurrence of the characters in <TT>text</TT>

857 * starting at offset <TT>start</TT>, using bitwise comparison.

858 * @param text The text to search for.

859 * @param start The offset at which searching will start.

860 * @return The offset into this of the start of <TT>text</TT>,

861 * or -1 if not found.

862 * @stable ICU 2.0

863 */

864 inline int32_t indexOf(const UnicodeString& text,

865 int32_t start) const;

866

867 /**

868 * Locate in this the first occurrence in the range

869 * [<TT>start</TT>, <TT>start + length</TT>) of the characters

870 * in <TT>text</TT>, using bitwise comparison.

871 * @param text The text to search for.

872 * @param start The offset at which searching will start.

873 * @param length The number of characters to search

874 * @return The offset into this of the start of <TT>text</TT>,

875 * or -1 if not found.

876 * @stable ICU 2.0

877 */

878 inline int32_t indexOf(const UnicodeString& text,

879 int32_t start,

880 int32_t length) const;

881

882 /**

883 * Locate in this the first occurrence in the range

884 * [<TT>start</TT>, <TT>start + length</TT>) of the characters

885 * in <TT>srcText</TT> in the range

886 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),

887 * using bitwise comparison.

888 * @param srcText The text to search for.

889 * @param srcStart the offset into <TT>srcText</TT> at which

890 * to start matching

891 * @param srcLength the number of characters in <TT>srcText</TT> to match

892 * @param start the offset into this at which to start matching

893 * @param length the number of characters in this to search

894 * @return The offset into this of the start of <TT>text</TT>,

895 * or -1 if not found.

896 * @stable ICU 2.0

897 */

898 inline int32_t indexOf(const UnicodeString& srcText,

899 int32_t srcStart,

900 int32_t srcLength,

901 int32_t start,

902 int32_t length) const;

903

904 /**

905 * Locate in this the first occurrence of the characters in

906 * <TT>srcChars</TT>

907 * starting at offset <TT>start</TT>, using bitwise comparison.

908 * @param srcChars The text to search for.

909 * @param srcLength the number of characters in <TT>srcChars</TT> to match

910 * @param start the offset into this at which to start matching

911 * @return The offset into this of the start of <TT>text</TT>,

912 * or -1 if not found.

913 * @stable ICU 2.0

914 */

915 inline int32_t indexOf(const UChar *srcChars,

916 int32_t srcLength,

917 int32_t start) const;

918

919 /**

920 * Locate in this the first occurrence in the range

921 * [<TT>start</TT>, <TT>start + length</TT>) of the characters

922 * in <TT>srcChars</TT>, using bitwise comparison.

923 * @param srcChars The text to search for.

924 * @param srcLength the number of characters in <TT>srcChars</TT>

925 * @param start The offset at which searching will start.

926 * @param length The number of characters to search

927 * @return The offset into this of the start of <TT>srcChars</TT>,

928 * or -1 if not found.

929 * @stable ICU 2.0

930 */

931 inline int32_t indexOf(const UChar *srcChars,

932 int32_t srcLength,

933 int32_t start,

934 int32_t length) const;

935

936 /**

937 * Locate in this the first occurrence in the range

938 * [<TT>start</TT>, <TT>start + length</TT>) of the characters

939 * in <TT>srcChars</TT> in the range

940 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),

941 * using bitwise comparison.

942 * @param srcChars The text to search for.

943 * @param srcStart the offset into <TT>srcChars</TT> at which

944 * to start matching

945 * @param srcLength the number of characters in <TT>srcChars</TT> to match

946 * @param start the offset into this at which to start matching

947 * @param length the number of characters in this to search

948 * @return The offset into this of the start of <TT>text</TT>,

949 * or -1 if not found.

950 * @stable ICU 2.0

951 */

952 int32_t indexOf(const UChar *srcChars,

953 int32_t srcStart,

954 int32_t srcLength,

955 int32_t start,

956 int32_t length) const;

957

958 /**

959 * Locate in this the first occurrence of the BMP code point <code>c</code>,

960 * using bitwise comparison.

961 * @param c The code unit to search for.

962 * @return The offset into this of <TT>c</TT>, or -1 if not found.

963 * @stable ICU 2.0

964 */

965 inline int32_t indexOf(UChar c) const;

966

967 /**

968 * Locate in this the first occurrence of the code point <TT>c</TT>,

969 * using bitwise comparison.

970 *

971 * @param c The code point to search for.

972 * @return The offset into this of <TT>c</TT>, or -1 if not found.

973 * @stable ICU 2.0

974 */

975 inline int32_t indexOf(UChar32 c) const;

976

977 /**

978 * Locate in this the first occurrence of the BMP code point <code>c</code>,

979 * starting at offset <TT>start</TT>, using bitwise comparison.

980 * @param c The code unit to search for.

981 * @param start The offset at which searching will start.

982 * @return The offset into this of <TT>c</TT>, or -1 if not found.

983 * @stable ICU 2.0

984 */

985 inline int32_t indexOf(UChar c,

986 int32_t start) const;

987

988 /**

989 * Locate in this the first occurrence of the code point <TT>c</TT>

990 * starting at offset <TT>start</TT>, using bitwise comparison.

991 *

992 * @param c The code point to search for.

993 * @param start The offset at which searching will start.

994 * @return The offset into this of <TT>c</TT>, or -1 if not found.

995 * @stable ICU 2.0

996 */

997 inline int32_t indexOf(UChar32 c,

998 int32_t start) const;

999

1000 /**

1001 * Locate in this the first occurrence of the BMP code point <code>c</code>

1002 * in the range [<TT>start</TT>, <TT>start + length</TT>),

1003 * using bitwise comparison.

1004 * @param c The code unit to search for.

1005 * @param start the offset into this at which to start matching

1006 * @param length the number of characters in this to search

1007 * @return The offset into this of <TT>c</TT>, or -1 if not found.

1008 * @stable ICU 2.0

1009 */

1010 inline int32_t indexOf(UChar c,

1011 int32_t start,

1012 int32_t length) const;

1013

1014 /**

1015 * Locate in this the first occurrence of the code point <TT>c</TT>

1016 * in the range [<TT>start</TT>, <TT>start + length</TT>),

1017 * using bitwise comparison.

1018 *

1019 * @param c The code point to search for.

1020 * @param start the offset into this at which to start matching

1021 * @param length the number of characters in this to search

1022 * @return The offset into this of <TT>c</TT>, or -1 if not found.

1023 * @stable ICU 2.0

1024 */

1025 inline int32_t indexOf(UChar32 c,

1026 int32_t start,

1027 int32_t length) const;

1028

1029 /**

1030 * Locate in this the last occurrence of the characters in <TT>text</TT>,

1031 * using bitwise comparison.

1032 * @param text The text to search for.

1033 * @return The offset into this of the start of <TT>text</TT>,

1034 * or -1 if not found.

1035 * @stable ICU 2.0

1036 */

1037 inline int32_t lastIndexOf(const UnicodeString& text) const;

1038

1039 /**

1040 * Locate in this the last occurrence of the characters in <TT>text</TT>

1041 * starting at offset <TT>start</TT>, using bitwise comparison.

1042 * @param text The text to search for.

1043 * @param start The offset at which searching will start.

1044 * @return The offset into this of the start of <TT>text</TT>,

1045 * or -1 if not found.

1046 * @stable ICU 2.0

1047 */

1048 inline int32_t lastIndexOf(const UnicodeString& text,

1049 int32_t start) const;

1050

1051 /**

1052 * Locate in this the last occurrence in the range

1053 * [<TT>start</TT>, <TT>start + length</TT>) of the characters

1054 * in <TT>text</TT>, using bitwise comparison.

1055 * @param text The text to search for.

1056 * @param start The offset at which searching will start.

1057 * @param length The number of characters to search

1058 * @return The offset into this of the start of <TT>text</TT>,

1059 * or -1 if not found.

1060 * @stable ICU 2.0

1061 */

1062 inline int32_t lastIndexOf(const UnicodeString& text,

1063 int32_t start,

1064 int32_t length) const;

1065

1066 /**

1067 * Locate in this the last occurrence in the range

1068 * [<TT>start</TT>, <TT>start + length</TT>) of the characters

1069 * in <TT>srcText</TT> in the range

1070 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),

1071 * using bitwise comparison.

1072 * @param srcText The text to search for.

1073 * @param srcStart the offset into <TT>srcText</TT> at which

1074 * to start matching

1075 * @param srcLength the number of characters in <TT>srcText</TT> to match

1076 * @param start the offset into this at which to start matching

1077 * @param length the number of characters in this to search

1078 * @return The offset into this of the start of <TT>text</TT>,

1079 * or -1 if not found.

1080 * @stable ICU 2.0

1081 */

1082 inline int32_t lastIndexOf(const UnicodeString& srcText,

1083 int32_t srcStart,

1084 int32_t srcLength,

1085 int32_t start,

1086 int32_t length) const;

1087

1088 /**

1089 * Locate in this the last occurrence of the characters in <TT>srcChars</TT>

1090 * starting at offset <TT>start</TT>, using bitwise comparison.

1091 * @param srcChars The text to search for.

1092 * @param srcLength the number of characters in <TT>srcChars</TT> to match

1093 * @param start the offset into this at which to start matching

1094 * @return The offset into this of the start of <TT>text</TT>,

1095 * or -1 if not found.

1096 * @stable ICU 2.0

1097 */

1098 inline int32_t lastIndexOf(const UChar *srcChars,

1099 int32_t srcLength,

1100 int32_t start) const;

1101

1102 /**

1103 * Locate in this the last occurrence in the range

1104 * [<TT>start</TT>, <TT>start + length</TT>) of the characters

1105 * in <TT>srcChars</TT>, using bitwise comparison.

1106 * @param srcChars The text to search for.

1107 * @param srcLength the number of characters in <TT>srcChars</TT>

1108 * @param start The offset at which searching will start.

1109 * @param length The number of characters to search

1110 * @return The offset into this of the start of <TT>srcChars</TT>,

1111 * or -1 if not found.

1112 * @stable ICU 2.0

1113 */

1114 inline int32_t lastIndexOf(const UChar *srcChars,

1115 int32_t srcLength,

1116 int32_t start,

1117 int32_t length) const;

1118

1119 /**

1120 * Locate in this the last occurrence in the range

1121 * [<TT>start</TT>, <TT>start + length</TT>) of the characters

1122 * in <TT>srcChars</TT> in the range

1123 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),

1124 * using bitwise comparison.

1125 * @param srcChars The text to search for.

1126 * @param srcStart the offset into <TT>srcChars</TT> at which

1127 * to start matching

1128 * @param srcLength the number of characters in <TT>srcChars</TT> to match

1129 * @param start the offset into this at which to start matching

1130 * @param length the number of characters in this to search

1131 * @return The offset into this of the start of <TT>text</TT>,

1132 * or -1 if not found.

1133 * @stable ICU 2.0

1134 */

1135 int32_t lastIndexOf(const UChar *srcChars,

1136 int32_t srcStart,

1137 int32_t srcLength,

1138 int32_t start,

1139 int32_t length) const;

1140

1141 /**

1142 * Locate in this the last occurrence of the BMP code point <code>c</code>,

1143 * using bitwise comparison.

1144 * @param c The code unit to search for.

1145 * @return The offset into this of <TT>c</TT>, or -1 if not found.

1146 * @stable ICU 2.0

1147 */

1148 inline int32_t lastIndexOf(UChar c) const;

1149

1150 /**

1151 * Locate in this the last occurrence of the code point <TT>c</TT>,

1152 * using bitwise comparison.

1153 *

1154 * @param c The code point to search for.

1155 * @return The offset into this of <TT>c</TT>, or -1 if not found.

1156 * @stable ICU 2.0

1157 */

1158 inline int32_t lastIndexOf(UChar32 c) const;

1159

1160 /**

1161 * Locate in this the last occurrence of the BMP code point <code>c</code>

1162 * starting at offset <TT>start</TT>, using bitwise comparison.

1163 * @param c The code unit to search for.

1164 * @param start The offset at which searching will start.

1165 * @return The offset into this of <TT>c</TT>, or -1 if not found.

1166 * @stable ICU 2.0

1167 */

1168 inline int32_t lastIndexOf(UChar c,

1169 int32_t start) const;

1170

1171 /**

1172 * Locate in this the last occurrence of the code point <TT>c</TT>

1173 * starting at offset <TT>start</TT>, using bitwise comparison.

1174 *

1175 * @param c The code point to search for.

1176 * @param start The offset at which searching will start.

1177 * @return The offset into this of <TT>c</TT>, or -1 if not found.

1178 * @stable ICU 2.0

1179 */

1180 inline int32_t lastIndexOf(UChar32 c,

1181 int32_t start) const;

1182

1183 /**

1184 * Locate in this the last occurrence of the BMP code point <code>c</code>

1185 * in the range [<TT>start</TT>, <TT>start + length</TT>),

1186 * using bitwise comparison.

1187 * @param c The code unit to search for.

1188 * @param start the offset into this at which to start matching

1189 * @param length the number of characters in this to search

1190 * @return The offset into this of <TT>c</TT>, or -1 if not found.

1191 * @stable ICU 2.0

1192 */

1193 inline int32_t lastIndexOf(UChar c,

1194 int32_t start,

1195 int32_t length) const;

1196

1197 /**

1198 * Locate in this the last occurrence of the code point <TT>c</TT>

1199 * in the range [<TT>start</TT>, <TT>start + length</TT>),

1200 * using bitwise comparison.

1201 *

1202 * @param c The code point to search for.

1203 * @param start the offset into this at which to start matching

1204 * @param length the number of characters in this to search

1205 * @return The offset into this of <TT>c</TT>, or -1 if not found.

1206 * @stable ICU 2.0

1207 */

1208 inline int32_t lastIndexOf(UChar32 c,

1209 int32_t start,

1210 int32_t length) const;

1211

1212

1213 /* Character access */

1214

1215 /**

1216 * Return the code unit at offset <tt>offset</tt>.

1217 * If the offset is not valid (0..length()-1) then U+ffff is returned.

1218 * @param offset a valid offset into the text

1219 * @return the code unit at offset <tt>offset</tt>

1220 * or 0xffff if the offset is not valid for this string

1221 * @stable ICU 2.0

1222 */

1223 inline UChar charAt(int32_t offset) const;

1224

1225 /**

1226 * Return the code unit at offset <tt>offset</tt>.

1227 * If the offset is not valid (0..length()-1) then U+ffff is returned.

1228 * @param offset a valid offset into the text

1229 * @return the code unit at offset <tt>offset</tt>

1230 * @stable ICU 2.0

1231 */

1232 inline UChar operator[] (int32_t offset) const;

1233

1234 /**

1235 * Return the code point that contains the code unit

1236 * at offset <tt>offset</tt>.

1237 * If the offset is not valid (0..length()-1) then U+ffff is returned.

1238 * @param offset a valid offset into the text

1239 * that indicates the text offset of any of the code units

1240 * that will be assembled into a code point (21-bit value) and returned

1241 * @return the code point of text at <tt>offset</tt>

1242 * or 0xffff if the offset is not valid for this string

1243 * @stable ICU 2.0

1244 */

1245 inline UChar32 char32At(int32_t offset) const;

1246

1247 /**

1248 * Adjust a random-access offset so that

1249 * it points to the beginning of a Unicode character.

1250 * The offset that is passed in points to

1251 * any code unit of a code point,

1252 * while the returned offset will point to the first code unit

1253 * of the same code point.

1254 * In UTF-16, if the input offset points to a second surrogate

1255 * of a surrogate pair, then the returned offset will point

1256 * to the first surrogate.

1257 * @param offset a valid offset into one code point of the text

1258 * @return offset of the first code unit of the same code point

1259 * @see U16_SET_CP_START

1260 * @stable ICU 2.0

1261 */

1262 inline int32_t getChar32Start(int32_t offset) const;

1263

1264 /**

1265 * Adjust a random-access offset so that

1266 * it points behind a Unicode character.

1267 * The offset that is passed in points behind

1268 * any code unit of a code point,

1269 * while the returned offset will point behind the last code unit

1270 * of the same code point.

1271 * In UTF-16, if the input offset points behind the first surrogate

1272 * (i.e., to the second surrogate)

1273 * of a surrogate pair, then the returned offset will point

1274 * behind the second surrogate (i.e., to the first surrogate).

1275 * @param offset a valid offset after any code unit of a code point of the tex t

1276 * @return offset of the first code unit after the same code point

1277 * @see U16_SET_CP_LIMIT

1278 * @stable ICU 2.0

1279 */

1280 inline int32_t getChar32Limit(int32_t offset) const;

1281

1282 /**

1283 * Move the code unit index along the string by delta code points.

1284 * Interpret the input index as a code unit-based offset into the string,

1285 * move the index forward or backward by delta code points, and

1286 * return the resulting index.

1287 * The input index should point to the first code unit of a code point,

1288 * if there is more than one.

1289 *

1290 * Both input and output indexes are code unit-based as for all

1291 * string indexes/offsets in ICU (and other libraries, like MBCS char*).

1292 * If delta<0 then the index is moved backward (toward the start of the string ).

1293 * If delta>0 then the index is moved forward (toward the end of the string).

1294 *

1295 * This behaves like CharacterIterator::move32(delta, kCurrent).

1296 *

1297 * Behavior for out-of-bounds indexes:

1298 * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,

1299 * if the input index<0 then it is pinned to 0;

1300 * if it is index>length() then it is pinned to length().

1301 * Afterwards, the index is moved by <code>delta</code> code points

1302 * forward or backward,

1303 * but no further backward than to 0 and no further forward than to length().

1304 * The resulting index return value will be in between 0 and length(), inclusi vely.

1305 *

1306 * Examples:

1307 * <pre>

1308 * // s has code points 'a' U+10000 'b' U+10ffff U+2029

1309 * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unesc ape();

1310 *

1311 * // initial index: position of U+10000

1312 * int32_t index=1;

1313 *

1314 * // the following examples will all result in index==4, position of U+10ffff

1315 *

1316 * // skip 2 code points from some position in the string

1317 * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'

1318 *

1319 * // go to the 3rd code point from the start of s (0-based)

1320 * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'

1321 *

1322 * // go to the next-to-last code point of s

1323 * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff

1324 * </pre>

1325 *

1326 * @param index input code unit index

1327 * @param delta (signed) code point count to move the index forward or backwar d

1328 * in the string

1329 * @return the resulting code unit index

1330 * @stable ICU 2.0

1331 */

1332 int32_t moveIndex32(int32_t index, int32_t delta) const;

1333

1334 /* Substring extraction */

1335

1336 /**

1337 * Copy the characters in the range

1338 * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,

1339 * beginning at <tt>dstStart</tt>.

1340 * If the string aliases to <code>dst</code> itself as an external buffer,

1341 * then extract() will not copy the contents.

1342 *

1343 * @param start offset of first character which will be copied into the array

1344 * @param length the number of characters to extract

1345 * @param dst array in which to copy characters. The length of <tt>dst</tt>

1346 * must be at least (<tt>dstStart + length</tt>).

1347 * @param dstStart the offset in <TT>dst</TT> where the first character

1348 * will be extracted

1349 * @stable ICU 2.0

1350 */

1351 inline void extract(int32_t start,

1352 int32_t length,

1353 UChar *dst,

1354 int32_t dstStart = 0) const;

1355

1356 /**

1357 * Copy the contents of the string into dest.

1358 * This is a convenience function that

1359 * checks if there is enough space in dest,

1360 * extracts the entire string if possible,

1361 * and NUL-terminates dest if possible.

1362 *

1363 * If the string fits into dest but cannot be NUL-terminated

1364 * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINA TED_WARNING.

1365 * If the string itself does not fit into dest

1366 * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERR OR.

1367 *

1368 * If the string aliases to <code>dest</code> itself as an external buffer,

1369 * then extract() will not copy the contents.

1370 *

1371 * @param dest Destination string buffer.

1372 * @param destCapacity Number of UChars available at dest.

1373 * @param errorCode ICU error code.

1374 * @return length()

1375 * @stable ICU 2.0

1376 */

1377 int32_t

1378 extract(UChar *dest, int32_t destCapacity,

1379 UErrorCode &errorCode) const;

1380

1381 /**

1382 * Copy the characters in the range

1383 * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString

1384 * <tt>target</tt>.

1385 * @param start offset of first character which will be copied

1386 * @param length the number of characters to extract

1387 * @param target UnicodeString into which to copy characters.

1388 * @return A reference to <TT>target</TT>

1389 * @stable ICU 2.0

1390 */

1391 inline void extract(int32_t start,

1392 int32_t length,

1393 UnicodeString& target) const;

1394

1395 /**

1396 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)

1397 * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.

1398 * @param start offset of first character which will be copied into the array

1399 * @param limit offset immediately following the last character to be copied

1400 * @param dst array in which to copy characters. The length of <tt>dst</tt>

1401 * must be at least (<tt>dstStart + (limit - start)</tt>).

1402 * @param dstStart the offset in <TT>dst</TT> where the first character

1403 * will be extracted

1404 * @stable ICU 2.0

1405 */

1406 inline void extractBetween(int32_t start,

1407 int32_t limit,

1408 UChar *dst,

1409 int32_t dstStart = 0) const;

1410

1411 /**

1412 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)

1413 * into the UnicodeString <tt>target</tt>. Replaceable API.

1414 * @param start offset of first character which will be copied

1415 * @param limit offset immediately following the last character to be copied

1416 * @param target UnicodeString into which to copy characters.

1417 * @return A reference to <TT>target</TT>

1418 * @stable ICU 2.0

1419 */

1420 virtual void extractBetween(int32_t start,

1421 int32_t limit,

1422 UnicodeString& target) const;

1423

1424 /**

1425 * Copy the characters in the range

1426 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.

1427 * All characters must be invariant (see utypes.h).

1428 * Use US_INV as the last, signature-distinguishing parameter.

1429 *

1430 * This function does not write any more than <code>targetLength</code>

1431 * characters but returns the length of the entire output string

1432 * so that one can allocate a larger buffer and call the function again

1433 * if necessary.

1434 * The output string is NUL-terminated if possible.

1435 *

1436 * @param start offset of first character which will be copied

1437 * @param startLength the number of characters to extract

1438 * @param target the target buffer for extraction, can be NULL

1439 * if targetLength is 0

1440 * @param targetCapacity the length of the target buffer

1441 * @param inv Signature-distinguishing paramater, use US_INV.

1442 * @return the output string length, not including the terminating NUL

1443 * @stable ICU 3.2

1444 */

1445 int32_t extract(int32_t start,

1446 int32_t startLength,

1447 char *target,

1448 int32_t targetCapacity,

1449 enum EInvariant inv) const;

1450

1451 #if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION

1452

1453 /**

1454 * Copy the characters in the range

1455 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters

1456 * in the platform's default codepage.

1457 * This function does not write any more than <code>targetLength</code>

1458 * characters but returns the length of the entire output string

1459 * so that one can allocate a larger buffer and call the function again

1460 * if necessary.

1461 * The output string is NUL-terminated if possible.

1462 *

1463 * @param start offset of first character which will be copied

1464 * @param startLength the number of characters to extract

1465 * @param target the target buffer for extraction

1466 * @param targetLength the length of the target buffer

1467 * If <TT>target</TT> is NULL, then the number of bytes required for

1468 * <TT>target</TT> is returned.

1469 * @return the output string length, not including the terminating NUL

1470 * @stable ICU 2.0

1471 */

1472 int32_t extract(int32_t start,

1473 int32_t startLength,

1474 char *target,

1475 uint32_t targetLength) const;

1476

1477 #endif

1478

1479 #if !UCONFIG_NO_CONVERSION

1480

1481 /**

1482 * Copy the characters in the range

1483 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters

1484 * in a specified codepage.

1485 * The output string is NUL-terminated.

1486 *

1487 * Recommendation: For invariant-character strings use

1488 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity , enum EInvariant inv) const

1489 * because it avoids object code dependencies of UnicodeString on

1490 * the conversion code.

1491 *

1492 * @param start offset of first character which will be copied

1493 * @param startLength the number of characters to extract

1494 * @param target the target buffer for extraction

1495 * @param codepage the desired codepage for the characters. 0 has

1496 * the special meaning of the default codepage

1497 * If <code>codepage</code> is an empty string (<code>""</code>),

1498 * then a simple conversion is performed on the codepage-invariant

1499 * subset ("invariant characters") of the platform encoding. See utypes.h.

1500 * If <TT>target</TT> is NULL, then the number of bytes required for

1501 * <TT>target</TT> is returned. It is assumed that the target is big enough

1502 * to fit all of the characters.

1503 * @return the output string length, not including the terminating NUL

1504 * @stable ICU 2.0

1505 */

1506 inline int32_t extract(int32_t start,

1507 int32_t startLength,

1508 char *target,

1509 const char *codepage = 0) const;

1510

1511 /**

1512 * Copy the characters in the range

1513 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters

1514 * in a specified codepage.

1515 * This function does not write any more than <code>targetLength</code>

1516 * characters but returns the length of the entire output string

1517 * so that one can allocate a larger buffer and call the function again

1518 * if necessary.

1519 * The output string is NUL-terminated if possible.

1520 *

1521 * Recommendation: For invariant-character strings use

1522 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity , enum EInvariant inv) const

1523 * because it avoids object code dependencies of UnicodeString on

1524 * the conversion code.

1525 *

1526 * @param start offset of first character which will be copied

1527 * @param startLength the number of characters to extract

1528 * @param target the target buffer for extraction

1529 * @param targetLength the length of the target buffer

1530 * @param codepage the desired codepage for the characters. 0 has

1531 * the special meaning of the default codepage

1532 * If <code>codepage</code> is an empty string (<code>""</code>),

1533 * then a simple conversion is performed on the codepage-invariant

1534 * subset ("invariant characters") of the platform encoding. See utypes.h.

1535 * If <TT>target</TT> is NULL, then the number of bytes required for

1536 * <TT>target</TT> is returned.

1537 * @return the output string length, not including the terminating NUL

1538 * @stable ICU 2.0

1539 */

1540 int32_t extract(int32_t start,

1541 int32_t startLength,

1542 char *target,

1543 uint32_t targetLength,

1544 const char *codepage) const;

1545

1546 /**

1547 * Convert the UnicodeString into a codepage string using an existing UConvert er.

1548 * The output string is NUL-terminated if possible.

1549 *

1550 * This function avoids the overhead of opening and closing a converter if

1551 * multiple strings are extracted.

1552 *

1553 * @param dest destination string buffer, can be NULL if destCapacity==0

1554 * @param destCapacity the number of chars available at dest

1555 * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),

1556 * or NULL for the default converter

1557 * @param errorCode normal ICU error code

1558 * @return the length of the output string, not counting the terminating NUL;

1559 * if the length is greater than destCapacity, then the string will no t fit

1560 * and a buffer of the indicated length would need to be passed in

1561 * @stable ICU 2.0

1562 */

1563 int32_t extract(char *dest, int32_t destCapacity,

1564 UConverter *cnv,

1565 UErrorCode &errorCode) const;

1566

1567 #endif

1568

1569 /**

1570 * Create a temporary substring for the specified range.

1571 * Unlike the substring constructor and setTo() functions,

1572 * the object returned here will be a read-only alias (using getBuffer())

1573 * rather than copying the text.

1574 * As a result, this substring operation is much faster but requires

1575 * that the original string not be modified or deleted during the lifetime

1576 * of the returned substring object.

1577 * @param start offset of the first character visible in the substring

1578 * @param length length of the substring

1579 * @return a read-only alias UnicodeString object for the substring

1580 * @stable ICU 4.4

1581 */

1582 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;

1583

1584 /**

1585 * Create a temporary substring for the specified range.

1586 * Same as tempSubString(start, length) except that the substring range

1587 * is specified as a (start, limit) pair (with an exclusive limit index)

1588 * rather than a (start, length) pair.

1589 * @param start offset of the first character visible in the substring

1590 * @param limit offset immediately following the last character visible in the substring

1591 * @return a read-only alias UnicodeString object for the substring

1592 * @stable ICU 4.4

1593 */

1594 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_M AX) const;

1595

1596 /**

1597 * Convert the UnicodeString to UTF-8 and write the result

1598 * to a ByteSink. This is called by toUTF8String().

1599 * Unpaired surrogates are replaced with U+FFFD.

1600 * Calls u_strToUTF8WithSub().

1601 *

1602 * @param sink A ByteSink to which the UTF-8 version of the string is written.

1603 * sink.Flush() is called at the end.

1604 * @stable ICU 4.2

1605 * @see toUTF8String

1606 */

1607 void toUTF8(ByteSink &sink) const;

1608

1609 #if U_HAVE_STD_STRING

1610

1611 /**

1612 * Convert the UnicodeString to UTF-8 and append the result

1613 * to a standard string.

1614 * Unpaired surrogates are replaced with U+FFFD.

1615 * Calls toUTF8().

1616 *

1617 * @param result A standard string (or a compatible object)

1618 * to which the UTF-8 version of the string is appended.

1619 * @return The string object.

1620 * @stable ICU 4.2

1621 * @see toUTF8

1622 */

1623 template<typename StringClass>

1624 StringClass &toUTF8String(StringClass &result) const {

1625 StringByteSink<StringClass> sbs(&result);

1626 toUTF8(sbs);

1627 return result;

1628 }

1629

1630 #endif

1631

1632 /**

1633 * Convert the UnicodeString to UTF-32.

1634 * Unpaired surrogates are replaced with U+FFFD.

1635 * Calls u_strToUTF32WithSub().

1636 *

1637 * @param utf32 destination string buffer, can be NULL if capacity==0

1638 * @param capacity the number of UChar32s available at utf32

1639 * @param errorCode Standard ICU error code. Its input value must

1640 * pass the U_SUCCESS() test, or else the function returns

1641 * immediately. Check for U_FAILURE() on output or use with

1642 * function chaining. (See User Guide for details.)

1643 * @return The length of the UTF-32 string.

1644 * @see fromUTF32

1645 * @stable ICU 4.2

1646 */

1647 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const ;

1648

1649 /* Length operations */

1650

1651 /**

1652 * Return the length of the UnicodeString object.

1653 * The length is the number of UChar code units are in the UnicodeString.

1654 * If you want the number of code points, please use countChar32().

1655 * @return the length of the UnicodeString object

1656 * @see countChar32

1657 * @stable ICU 2.0

1658 */

1659 inline int32_t length(void) const;

1660

1661 /**

1662 * Count Unicode code points in the length UChar code units of the string.

1663 * A code point may occupy either one or two UChar code units.

1664 * Counting code points involves reading all code units.

1665 *

1666 * This functions is basically the inverse of moveIndex32().

1667 *

1668 * @param start the index of the first code unit to check

1669 * @param length the number of UChar code units to check

1670 * @return the number of code points in the specified code units

1671 * @see length

1672 * @stable ICU 2.0

1673 */

1674 int32_t

1675 countChar32(int32_t start=0, int32_t length=INT32_MAX) const;

1676

1677 /**

1678 * Check if the length UChar code units of the string

1679 * contain more Unicode code points than a certain number.

1680 * This is more efficient than counting all code points in this part of the st ring

1681 * and comparing that number with a threshold.

1682 * This function may not need to scan the string at all if the length

1683 * falls within a certain range, and

1684 * never needs to count more than 'number+1' code points.

1685 * Logically equivalent to (countChar32(start, length)>number).

1686 * A Unicode code point may occupy either one or two UChar code units.

1687 *

1688 * @param start the index of the first code unit to check (0 for the entire st ring)

1689 * @param length the number of UChar code units to check

1690 * (use INT32_MAX for the entire string; remember that start/len gth

1691 * values are pinned)

1692 * @param number The number of code points in the (sub)string is compared agai nst

1693 * the 'number' parameter.

1694 * @return Boolean value for whether the string contains more Unicode code poi nts

1695 * than 'number'. Same as (u_countChar32(s, length)>number).

1696 * @see countChar32

1697 * @see u_strHasMoreChar32Than

1698 * @stable ICU 2.4

1699 */

1700 UBool

1701 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;

1702

1703 /**

1704 * Determine if this string is empty.

1705 * @return TRUE if this string contains 0 characters, FALSE otherwise.

1706 * @stable ICU 2.0

1707 */

1708 inline UBool isEmpty(void) const;

1709

1710 /**

1711 * Return the capacity of the internal buffer of the UnicodeString object.

1712 * This is useful together with the getBuffer functions.

1713 * See there for details.

1714 *

1715 * @return the number of UChars available in the internal buffer

1716 * @see getBuffer

1717 * @stable ICU 2.0

1718 */

1719 inline int32_t getCapacity(void) const;

1720

1721 /* Other operations */

1722

1723 /**

1724 * Generate a hash code for this object.

1725 * @return The hash code of this UnicodeString.

1726 * @stable ICU 2.0

1727 */

1728 inline int32_t hashCode(void) const;

1729

1730 /**

1731 * Determine if this object contains a valid string.

1732 * A bogus string has no value. It is different from an empty string,

1733 * although in both cases isEmpty() returns TRUE and length() returns 0.

1734 * setToBogus() and isBogus() can be used to indicate that no string value is available.

1735 * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and

1736 * length() returns 0.

1737 *

1738 * @return TRUE if the string is valid, FALSE otherwise

1739 * @see setToBogus()

1740 * @stable ICU 2.0

1741 */

1742 inline UBool isBogus(void) const;

1743

1744

1745 //========================================

1746 // Write operations

1747 //========================================

1748

1749 /* Assignment operations */

1750

1751 /**

1752 * Assignment operator. Replace the characters in this UnicodeString

1753 * with the characters from <TT>srcText</TT>.

1754 * @param srcText The text containing the characters to replace

1755 * @return a reference to this

1756 * @stable ICU 2.0

1757 */

1758 UnicodeString &operator=(const UnicodeString &srcText);

1759

1760 /**

1761 * Almost the same as the assignment operator.

1762 * Replace the characters in this UnicodeString

1763 * with the characters from <code>srcText</code>.

1764 *

1765 * This function works the same for all strings except for ones that

1766 * are readonly aliases.

1767 * Starting with ICU 2.4, the assignment operator and the copy constructor

1768 * allocate a new buffer and copy the buffer contents even for readonly aliase s.

1769 * This function implements the old, more efficient but less safe behavior

1770 * of making this string also a readonly alias to the same buffer.

1771 * The fastCopyFrom function must be used only if it is known that the lifetim e of

1772 * this UnicodeString is at least as long as the lifetime of the aliased buffe r

1773 * including its contents, for example for strings from resource bundles

1774 * or aliases to string contents.

1775 *

1776 * @param src The text containing the characters to replace.

1777 * @return a reference to this

1778 * @stable ICU 2.4

1779 */

1780 UnicodeString &fastCopyFrom(const UnicodeString &src);

1781

1782 /**

1783 * Assignment operator. Replace the characters in this UnicodeString

1784 * with the code unit <TT>ch</TT>.

1785 * @param ch the code unit to replace

1786 * @return a reference to this

1787 * @stable ICU 2.0

1788 */

1789 inline UnicodeString& operator= (UChar ch);

1790

1791 /**

1792 * Assignment operator. Replace the characters in this UnicodeString

1793 * with the code point <TT>ch</TT>.

1794 * @param ch the code point to replace

1795 * @return a reference to this

1796 * @stable ICU 2.0

1797 */

1798 inline UnicodeString& operator= (UChar32 ch);

1799

1800 /**

1801 * Set the text in the UnicodeString object to the characters

1802 * in <TT>srcText</TT> in the range

1803 * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).

1804 * <TT>srcText</TT> is not modified.

1805 * @param srcText the source for the new characters

1806 * @param srcStart the offset into <TT>srcText</TT> where new characters

1807 * will be obtained

1808 * @return a reference to this

1809 * @stable ICU 2.2

1810 */

1811 inline UnicodeString& setTo(const UnicodeString& srcText,

1812 int32_t srcStart);

1813

1814 /**

1815 * Set the text in the UnicodeString object to the characters

1816 * in <TT>srcText</TT> in the range

1817 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

1818 * <TT>srcText</TT> is not modified.

1819 * @param srcText the source for the new characters

1820 * @param srcStart the offset into <TT>srcText</TT> where new characters

1821 * will be obtained

1822 * @param srcLength the number of characters in <TT>srcText</TT> in the

1823 * replace string.

1824 * @return a reference to this

1825 * @stable ICU 2.0

1826 */

1827 inline UnicodeString& setTo(const UnicodeString& srcText,

1828 int32_t srcStart,

1829 int32_t srcLength);

1830

1831 /**

1832 * Set the text in the UnicodeString object to the characters in

1833 * <TT>srcText</TT>.

1834 * <TT>srcText</TT> is not modified.

1835 * @param srcText the source for the new characters

1836 * @return a reference to this

1837 * @stable ICU 2.0

1838 */

1839 inline UnicodeString& setTo(const UnicodeString& srcText);

1840

1841 /**

1842 * Set the characters in the UnicodeString object to the characters

1843 * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.

1844 * @param srcChars the source for the new characters

1845 * @param srcLength the number of Unicode characters in srcChars.

1846 * @return a reference to this

1847 * @stable ICU 2.0

1848 */

1849 inline UnicodeString& setTo(const UChar *srcChars,

1850 int32_t srcLength);

1851

1852 /**

1853 * Set the characters in the UnicodeString object to the code unit

1854 * <TT>srcChar</TT>.

1855 * @param srcChar the code unit which becomes the UnicodeString's character

1856 * content

1857 * @return a reference to this

1858 * @stable ICU 2.0

1859 */

1860 UnicodeString& setTo(UChar srcChar);

1861

1862 /**

1863 * Set the characters in the UnicodeString object to the code point

1864 * <TT>srcChar</TT>.

1865 * @param srcChar the code point which becomes the UnicodeString's character

1866 * content

1867 * @return a reference to this

1868 * @stable ICU 2.0

1869 */

1870 UnicodeString& setTo(UChar32 srcChar);

1871

1872 /**

1873 * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constr uctor.

1874 * The text will be used for the UnicodeString object, but

1875 * it will not be released when the UnicodeString is destroyed.

1876 * This has copy-on-write semantics:

1877 * When the string is modified, then the buffer is first copied into

1878 * newly allocated memory.

1879 * The aliased buffer is never modified.

1880 * In an assignment to another UnicodeString, the text will be aliased again,

1881 * so that both strings then alias the same readonly-text.

1882 *

1883 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-term inated.

1884 * This must be true if <code>textLength==-1</code>.

1885 * @param text The characters to alias for the UnicodeString.

1886 * @param textLength The number of Unicode characters in <code>text</code> to alias.

1887 * If -1, then this constructor will determine the length

1888 * by calling <code>u_strlen()</code>.

1889 * @return a reference to this

1890 * @stable ICU 2.0

1891 */

1892 UnicodeString &setTo(UBool isTerminated,

1893 const UChar *text,

1894 int32_t textLength);

1895

1896 /**

1897 * Aliasing setTo() function, analogous to the writable-aliasing UChar* constr uctor.

1898 * The text will be used for the UnicodeString object, but

1899 * it will not be released when the UnicodeString is destroyed.

1900 * This has write-through semantics:

1901 * For as long as the capacity of the buffer is sufficient, write operations

1902 * will directly affect the buffer. When more capacity is necessary, then

1903 * a new buffer will be allocated and the contents copied as with regularly

1904 * constructed strings.

1905 * In an assignment to another UnicodeString, the buffer will be copied.

1906 * The extract(UChar *dst) function detects whether the dst pointer is the sam e

1907 * as the string buffer itself and will in this case not copy the contents.

1908 *

1909 * @param buffer The characters to alias for the UnicodeString.

1910 * @param buffLength The number of Unicode characters in <code>buffer</code> t o alias.

1911 * @param buffCapacity The size of <code>buffer</code> in UChars.

1912 * @return a reference to this

1913 * @stable ICU 2.0

1914 */

1915 UnicodeString &setTo(UChar *buffer,

1916 int32_t buffLength,

1917 int32_t buffCapacity);

1918

1919 /**

1920 * Make this UnicodeString object invalid.

1921 * The string will test TRUE with isBogus().

1922 *

1923 * A bogus string has no value. It is different from an empty string.

1924 * It can be used to indicate that no string value is available.

1925 * getBuffer() and getTerminatedBuffer() return NULL, and

1926 * length() returns 0.

1927 *

1928 * This utility function is used throughout the UnicodeString

1929 * implementation to indicate that a UnicodeString operation failed,

1930 * and may be used in other functions,

1931 * especially but not exclusively when such functions do not

1932 * take a UErrorCode for simplicity.

1933 *

1934 * The following methods, and no others, will clear a string object's bogus fl ag:

1935 * - remove()

1936 * - remove(0, INT32_MAX)

1937 * - truncate(0)

1938 * - operator=() (assignment operator)

1939 * - setTo(...)

1940 *

1941 * The simplest ways to turn a bogus string into an empty one

1942 * is to use the remove() function.

1943 * Examples for other functions that are equivalent to "set to empty string":

1944 * \code

1945 * if(s.isBogus()) {

1946 * s.remove(); // set to an empty string (remove all), or

1947 * s.remove(0, INT32_MAX); // set to an empty string (remove all), or

1948 * s.truncate(0); // set to an empty string (complete truncation), or

1949 * s=UnicodeString(); // assign an empty string, or

1950 * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or

1951 * static const UChar nul=0;

1952 * s.setTo(&nul, 0); // set to an empty C Unicode string

1953 * }

1954 * \endcode

1955 *

1956 * @see isBogus()

1957 * @stable ICU 2.0

1958 */

1959 void setToBogus();

1960

1961 /**

1962 * Set the character at the specified offset to the specified character.

1963 * @param offset A valid offset into the text of the character to set

1964 * @param ch The new character

1965 * @return A reference to this

1966 * @stable ICU 2.0

1967 */

1968 UnicodeString& setCharAt(int32_t offset,

1969 UChar ch);

1970

1971

1972 /* Append operations */

1973

1974 /**

1975 * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString

1976 * object.

1977 * @param ch the code unit to be appended

1978 * @return a reference to this

1979 * @stable ICU 2.0

1980 */

1981 inline UnicodeString& operator+= (UChar ch);

1982

1983 /**

1984 * Append operator. Append the code point <TT>ch</TT> to the UnicodeString

1985 * object.

1986 * @param ch the code point to be appended

1987 * @return a reference to this

1988 * @stable ICU 2.0

1989 */

1990 inline UnicodeString& operator+= (UChar32 ch);

1991

1992 /**

1993 * Append operator. Append the characters in <TT>srcText</TT> to the

1994 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> is

1995 * not modified.

1996 * @param srcText the source for the new characters

1997 * @return a reference to this

1998 * @stable ICU 2.0

1999 */

2000 inline UnicodeString& operator+= (const UnicodeString& srcText);

2001

2002 /**

2003 * Append the characters

2004 * in <TT>srcText</TT> in the range

2005 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the

2006 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>

2007 * is not modified.

2008 * @param srcText the source for the new characters

2009 * @param srcStart the offset into <TT>srcText</TT> where new characters

2010 * will be obtained

2011 * @param srcLength the number of characters in <TT>srcText</TT> in

2012 * the append string

2013 * @return a reference to this

2014 * @stable ICU 2.0

2015 */

2016 inline UnicodeString& append(const UnicodeString& srcText,

2017 int32_t srcStart,

2018 int32_t srcLength);

2019

2020 /**

2021 * Append the characters in <TT>srcText</TT> to the UnicodeString object at

2022 * offset <TT>start</TT>. <TT>srcText</TT> is not modified.

2023 * @param srcText the source for the new characters

2024 * @return a reference to this

2025 * @stable ICU 2.0

2026 */

2027 inline UnicodeString& append(const UnicodeString& srcText);

2028

2029 /**

2030 * Append the characters in <TT>srcChars</TT> in the range

2031 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString

2032 * object at offset

2033 * <TT>start</TT>. <TT>srcChars</TT> is not modified.

2034 * @param srcChars the source for the new characters

2035 * @param srcStart the offset into <TT>srcChars</TT> where new characters

2036 * will be obtained

2037 * @param srcLength the number of characters in <TT>srcChars</TT> in

2038 * the append string

2039 * @return a reference to this

2040 * @stable ICU 2.0

2041 */

2042 inline UnicodeString& append(const UChar *srcChars,

2043 int32_t srcStart,

2044 int32_t srcLength);

2045

2046 /**

2047 * Append the characters in <TT>srcChars</TT> to the UnicodeString object

2048 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.

2049 * @param srcChars the source for the new characters

2050 * @param srcLength the number of Unicode characters in <TT>srcChars</TT>

2051 * @return a reference to this

2052 * @stable ICU 2.0

2053 */

2054 inline UnicodeString& append(const UChar *srcChars,

2055 int32_t srcLength);

2056

2057 /**

2058 * Append the code unit <TT>srcChar</TT> to the UnicodeString object.

2059 * @param srcChar the code unit to append

2060 * @return a reference to this

2061 * @stable ICU 2.0

2062 */

2063 inline UnicodeString& append(UChar srcChar);

2064

2065 /**

2066 * Append the code point <TT>srcChar</TT> to the UnicodeString object.

2067 * @param srcChar the code point to append

2068 * @return a reference to this

2069 * @stable ICU 2.0

2070 */

2071 inline UnicodeString& append(UChar32 srcChar);

2072

2073

2074 /* Insert operations */

2075

2076 /**

2077 * Insert the characters in <TT>srcText</TT> in the range

2078 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString

2079 * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.

2080 * @param start the offset where the insertion begins

2081 * @param srcText the source for the new characters

2082 * @param srcStart the offset into <TT>srcText</TT> where new characters

2083 * will be obtained

2084 * @param srcLength the number of characters in <TT>srcText</TT> in

2085 * the insert string

2086 * @return a reference to this

2087 * @stable ICU 2.0

2088 */

2089 inline UnicodeString& insert(int32_t start,

2090 const UnicodeString& srcText,

2091 int32_t srcStart,

2092 int32_t srcLength);

2093

2094 /**

2095 * Insert the characters in <TT>srcText</TT> into the UnicodeString object

2096 * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.

2097 * @param start the offset where the insertion begins

2098 * @param srcText the source for the new characters

2099 * @return a reference to this

2100 * @stable ICU 2.0

2101 */

2102 inline UnicodeString& insert(int32_t start,

2103 const UnicodeString& srcText);

2104

2105 /**

2106 * Insert the characters in <TT>srcChars</TT> in the range

2107 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString

2108 * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.

2109 * @param start the offset at which the insertion begins

2110 * @param srcChars the source for the new characters

2111 * @param srcStart the offset into <TT>srcChars</TT> where new characters

2112 * will be obtained

2113 * @param srcLength the number of characters in <TT>srcChars</TT>

2114 * in the insert string

2115 * @return a reference to this

2116 * @stable ICU 2.0

2117 */

2118 inline UnicodeString& insert(int32_t start,

2119 const UChar *srcChars,

2120 int32_t srcStart,

2121 int32_t srcLength);

2122

2123 /**

2124 * Insert the characters in <TT>srcChars</TT> into the UnicodeString object

2125 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.

2126 * @param start the offset where the insertion begins

2127 * @param srcChars the source for the new characters

2128 * @param srcLength the number of Unicode characters in srcChars.

2129 * @return a reference to this

2130 * @stable ICU 2.0

2131 */

2132 inline UnicodeString& insert(int32_t start,

2133 const UChar *srcChars,

2134 int32_t srcLength);

2135

2136 /**

2137 * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at

2138 * offset <TT>start</TT>.

2139 * @param start the offset at which the insertion occurs

2140 * @param srcChar the code unit to insert

2141 * @return a reference to this

2142 * @stable ICU 2.0

2143 */

2144 inline UnicodeString& insert(int32_t start,

2145 UChar srcChar);

2146

2147 /**

2148 * Insert the code point <TT>srcChar</TT> into the UnicodeString object at

2149 * offset <TT>start</TT>.

2150 * @param start the offset at which the insertion occurs

2151 * @param srcChar the code point to insert

2152 * @return a reference to this

2153 * @stable ICU 2.0

2154 */

2155 inline UnicodeString& insert(int32_t start,

2156 UChar32 srcChar);

2157

2158

2159 /* Replace operations */

2160

2161 /**

2162 * Replace the characters in the range

2163 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in

2164 * <TT>srcText</TT> in the range

2165 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

2166 * <TT>srcText</TT> is not modified.

2167 * @param start the offset at which the replace operation begins

2168 * @param length the number of characters to replace. The character at

2169 * <TT>start + length</TT> is not modified.

2170 * @param srcText the source for the new characters

2171 * @param srcStart the offset into <TT>srcText</TT> where new characters

2172 * will be obtained

2173 * @param srcLength the number of characters in <TT>srcText</TT> in

2174 * the replace string

2175 * @return a reference to this

2176 * @stable ICU 2.0

2177 */

2178 UnicodeString& replace(int32_t start,

2179 int32_t length,

2180 const UnicodeString& srcText,

2181 int32_t srcStart,

2182 int32_t srcLength);

2183

2184 /**

2185 * Replace the characters in the range

2186 * [<TT>start</TT>, <TT>start + length</TT>)

2187 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is

2188 * not modified.

2189 * @param start the offset at which the replace operation begins

2190 * @param length the number of characters to replace. The character at

2191 * <TT>start + length</TT> is not modified.

2192 * @param srcText the source for the new characters

2193 * @return a reference to this

2194 * @stable ICU 2.0

2195 */

2196 UnicodeString& replace(int32_t start,

2197 int32_t length,

2198 const UnicodeString& srcText);

2199

2200 /**

2201 * Replace the characters in the range

2202 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in

2203 * <TT>srcChars</TT> in the range

2204 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>

2205 * is not modified.

2206 * @param start the offset at which the replace operation begins

2207 * @param length the number of characters to replace. The character at

2208 * <TT>start + length</TT> is not modified.

2209 * @param srcChars the source for the new characters

2210 * @param srcStart the offset into <TT>srcChars</TT> where new characters

2211 * will be obtained

2212 * @param srcLength the number of characters in <TT>srcChars</TT>

2213 * in the replace string

2214 * @return a reference to this

2215 * @stable ICU 2.0

2216 */

2217 UnicodeString& replace(int32_t start,

2218 int32_t length,

2219 const UChar *srcChars,

2220 int32_t srcStart,

2221 int32_t srcLength);

2222

2223 /**

2224 * Replace the characters in the range

2225 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in

2226 * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.

2227 * @param start the offset at which the replace operation begins

2228 * @param length number of characters to replace. The character at

2229 * <TT>start + length</TT> is not modified.

2230 * @param srcChars the source for the new characters

2231 * @param srcLength the number of Unicode characters in srcChars

2232 * @return a reference to this

2233 * @stable ICU 2.0

2234 */

2235 inline UnicodeString& replace(int32_t start,

2236 int32_t length,

2237 const UChar *srcChars,

2238 int32_t srcLength);

2239

2240 /**

2241 * Replace the characters in the range

2242 * [<TT>start</TT>, <TT>start + length</TT>) with the code unit

2243 * <TT>srcChar</TT>.

2244 * @param start the offset at which the replace operation begins

2245 * @param length the number of characters to replace. The character at

2246 * <TT>start + length</TT> is not modified.

2247 * @param srcChar the new code unit

2248 * @return a reference to this

2249 * @stable ICU 2.0

2250 */

2251 inline UnicodeString& replace(int32_t start,

2252 int32_t length,

2253 UChar srcChar);

2254

2255 /**

2256 * Replace the characters in the range

2257 * [<TT>start</TT>, <TT>start + length</TT>) with the code point

2258 * <TT>srcChar</TT>.

2259 * @param start the offset at which the replace operation begins

2260 * @param length the number of characters to replace. The character at

2261 * <TT>start + length</TT> is not modified.

2262 * @param srcChar the new code point

2263 * @return a reference to this

2264 * @stable ICU 2.0

2265 */

2266 inline UnicodeString& replace(int32_t start,

2267 int32_t length,

2268 UChar32 srcChar);

2269

2270 /**

2271 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)

2272 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.

2273 * @param start the offset at which the replace operation begins

2274 * @param limit the offset immediately following the replace range

2275 * @param srcText the source for the new characters

2276 * @return a reference to this

2277 * @stable ICU 2.0

2278 */

2279 inline UnicodeString& replaceBetween(int32_t start,

2280 int32_t limit,

2281 const UnicodeString& srcText);

2282

2283 /**

2284 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)

2285 * with the characters in <TT>srcText</TT> in the range

2286 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.

2287 * @param start the offset at which the replace operation begins

2288 * @param limit the offset immediately following the replace range

2289 * @param srcText the source for the new characters

2290 * @param srcStart the offset into <TT>srcChars</TT> where new characters

2291 * will be obtained

2292 * @param srcLimit the offset immediately following the range to copy

2293 * in <TT>srcText</TT>

2294 * @return a reference to this

2295 * @stable ICU 2.0

2296 */

2297 inline UnicodeString& replaceBetween(int32_t start,

2298 int32_t limit,

2299 const UnicodeString& srcText,

2300 int32_t srcStart,

2301 int32_t srcLimit);

2302

2303 /**

2304 * Replace a substring of this object with the given text.

2305 * @param start the beginning index, inclusive; <code>0 <= start

2306 * <= limit</code>.

2307 * @param limit the ending index, exclusive; <code>start <= limit

2308 * <= length()</code>.

2309 * @param text the text to replace characters <code>start</code>

2310 * to <code>limit - 1</code>

2311 * @stable ICU 2.0

2312 */

2313 virtual void handleReplaceBetween(int32_t start,

2314 int32_t limit,

2315 const UnicodeString& text);

2316

2317 /**

2318 * Replaceable API

2319 * @return TRUE if it has MetaData

2320 * @stable ICU 2.4

2321 */

2322 virtual UBool hasMetaData() const;

2323

2324 /**

2325 * Copy a substring of this object, retaining attribute (out-of-band)

2326 * information. This method is used to duplicate or reorder substrings.

2327 * The destination index must not overlap the source range.

2328 *

2329 * @param start the beginning index, inclusive; <code>0 <= start <=

2330 * limit</code>.

2331 * @param limit the ending index, exclusive; <code>start <= limit <=

2332 * length()</code>.

2333 * @param dest the destination index. The characters from

2334 * <code>start..limit-1</code> will be copied to <code>dest</code>.

2335 * Implementations of this method may assume that <code>dest <= start \|\|

2336 * dest >= limit</code>.

2337 * @stable ICU 2.0

2338 */

2339 virtual void copy(int32_t start, int32_t limit, int32_t dest);

2340

2341 /* Search and replace operations */

2342

2343 /**

2344 * Replace all occurrences of characters in oldText with the characters

2345 * in newText

2346 * @param oldText the text containing the search text

2347 * @param newText the text containing the replacement text

2348 * @return a reference to this

2349 * @stable ICU 2.0

2350 */

2351 inline UnicodeString& findAndReplace(const UnicodeString& oldText,

2352 const UnicodeString& newText);

2353

2354 /**

2355 * Replace all occurrences of characters in oldText with characters

2356 * in newText

2357 * in the range [<TT>start</TT>, <TT>start + length</TT>).

2358 * @param start the start of the range in which replace will performed

2359 * @param length the length of the range in which replace will be performed

2360 * @param oldText the text containing the search text

2361 * @param newText the text containing the replacement text

2362 * @return a reference to this

2363 * @stable ICU 2.0

2364 */

2365 inline UnicodeString& findAndReplace(int32_t start,

2366 int32_t length,

2367 const UnicodeString& oldText,

2368 const UnicodeString& newText);

2369

2370 /**

2371 * Replace all occurrences of characters in oldText in the range

2372 * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters

2373 * in newText in the range

2374 * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)

2375 * in the range [<TT>start</TT>, <TT>start + length</TT>).

2376 * @param start the start of the range in which replace will performed

2377 * @param length the length of the range in which replace will be performed

2378 * @param oldText the text containing the search text

2379 * @param oldStart the start of the search range in <TT>oldText</TT>

2380 * @param oldLength the length of the search range in <TT>oldText</TT>

2381 * @param newText the text containing the replacement text

2382 * @param newStart the start of the replacement range in <TT>newText</TT>

2383 * @param newLength the length of the replacement range in <TT>newText</TT>

2384 * @return a reference to this

2385 * @stable ICU 2.0

2386 */

2387 UnicodeString& findAndReplace(int32_t start,

2388 int32_t length,

2389 const UnicodeString& oldText,

2390 int32_t oldStart,

2391 int32_t oldLength,

2392 const UnicodeString& newText,

2393 int32_t newStart,

2394 int32_t newLength);

2395

2396

2397 /* Remove operations */

2398

2399 /**

2400 * Remove all characters from the UnicodeString object.

2401 * @return a reference to this

2402 * @stable ICU 2.0

2403 */

2404 inline UnicodeString& remove(void);

2405

2406 /**

2407 * Remove the characters in the range

2408 * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.

2409 * @param start the offset of the first character to remove

2410 * @param length the number of characters to remove

2411 * @return a reference to this

2412 * @stable ICU 2.0

2413 */

2414 inline UnicodeString& remove(int32_t start,

2415 int32_t length = (int32_t)INT32_MAX);

2416

2417 /**

2418 * Remove the characters in the range

2419 * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.

2420 * @param start the offset of the first character to remove

2421 * @param limit the offset immediately following the range to remove

2422 * @return a reference to this

2423 * @stable ICU 2.0

2424 */

2425 inline UnicodeString& removeBetween(int32_t start,

2426 int32_t limit = (int32_t)INT32_MAX);

2427

2428 /**

2429 * Retain only the characters in the range

2430 * [<code>start</code>, <code>limit</code>) from the UnicodeString object.

2431 * Removes characters before <code>start</code> and at and after <code>limit</ code>.

2432 * @param start the offset of the first character to retain

2433 * @param limit the offset immediately following the range to retain

2434 * @return a reference to this

2435 * @stable ICU 4.4

2436 */

2437 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);

2438

2439 /* Length operations */

2440

2441 /**

2442 * Pad the start of this UnicodeString with the character <TT>padChar</TT>.

2443 * If the length of this UnicodeString is less than targetLength,

2444 * length() - targetLength copies of padChar will be added to the

2445 * beginning of this UnicodeString.

2446 * @param targetLength the desired length of the string

2447 * @param padChar the character to use for padding. Defaults to

2448 * space (U+0020)

2449 * @return TRUE if the text was padded, FALSE otherwise.

2450 * @stable ICU 2.0

2451 */

2452 UBool padLeading(int32_t targetLength,

2453 UChar padChar = 0x0020);

2454

2455 /**

2456 * Pad the end of this UnicodeString with the character <TT>padChar</TT>.

2457 * If the length of this UnicodeString is less than targetLength,

2458 * length() - targetLength copies of padChar will be added to the

2459 * end of this UnicodeString.

2460 * @param targetLength the desired length of the string

2461 * @param padChar the character to use for padding. Defaults to

2462 * space (U+0020)

2463 * @return TRUE if the text was padded, FALSE otherwise.

2464 * @stable ICU 2.0

2465 */

2466 UBool padTrailing(int32_t targetLength,

2467 UChar padChar = 0x0020);

2468

2469 /**

2470 * Truncate this UnicodeString to the <TT>targetLength</TT>.

2471 * @param targetLength the desired length of this UnicodeString.

2472 * @return TRUE if the text was truncated, FALSE otherwise

2473 * @stable ICU 2.0

2474 */

2475 inline UBool truncate(int32_t targetLength);

2476

2477 /**

2478 * Trims leading and trailing whitespace from this UnicodeString.

2479 * @return a reference to this

2480 * @stable ICU 2.0

2481 */

2482 UnicodeString& trim(void);

2483

2484

2485 /* Miscellaneous operations */

2486

2487 /**

2488 * Reverse this UnicodeString in place.

2489 * @return a reference to this

2490 * @stable ICU 2.0

2491 */

2492 inline UnicodeString& reverse(void);

2493

2494 /**

2495 * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in

2496 * this UnicodeString.

2497 * @param start the start of the range to reverse

2498 * @param length the number of characters to to reverse

2499 * @return a reference to this

2500 * @stable ICU 2.0

2501 */

2502 inline UnicodeString& reverse(int32_t start,

2503 int32_t length);

2504

2505 /**

2506 * Convert the characters in this to UPPER CASE following the conventions of

2507 * the default locale.

2508 * @return A reference to this.

2509 * @stable ICU 2.0

2510 */

2511 UnicodeString& toUpper(void);

2512

2513 /**

2514 * Convert the characters in this to UPPER CASE following the conventions of

2515 * a specific locale.

2516 * @param locale The locale containing the conventions to use.

2517 * @return A reference to this.

2518 * @stable ICU 2.0

2519 */

2520 UnicodeString& toUpper(const Locale& locale);

2521

2522 /**

2523 * Convert the characters in this to lower case following the conventions of

2524 * the default locale.

2525 * @return A reference to this.

2526 * @stable ICU 2.0

2527 */

2528 UnicodeString& toLower(void);

2529

2530 /**

2531 * Convert the characters in this to lower case following the conventions of

2532 * a specific locale.

2533 * @param locale The locale containing the conventions to use.

2534 * @return A reference to this.

2535 * @stable ICU 2.0

2536 */

2537 UnicodeString& toLower(const Locale& locale);

2538

2539 #if !UCONFIG_NO_BREAK_ITERATION

2540

2541 /**

2542 * Titlecase this string, convenience function using the default locale.

2543 *

2544 * Casing is locale-dependent and context-sensitive.

2545 * Titlecasing uses a break iterator to find the first characters of words

2546 * that are to be titlecased. It titlecases those characters and lowercases

2547 * all others.

2548 *

2549 * The titlecase break iterator can be provided to customize for arbitrary

2550 * styles, using rules and dictionaries beyond the standard iterators.

2551 * It may be more efficient to always provide an iterator to avoid

2552 * opening and closing one for each string.

2553 * The standard titlecase iterator for the root locale implements the

2554 * algorithm of Unicode TR 21.

2555 *

2556 * This function uses only the setText(), first() and next() methods of the

2557 * provided break iterator.

2558 *

2559 * @param titleIter A break iterator to find the first characters of words

2560 * that are to be titlecased.

2561 * If none is provided (0), then a standard titlecase

2562 * break iterator is opened.

2563 * Otherwise the provided iterator is set to the string's tex t.

2564 * @return A reference to this.

2565 * @stable ICU 2.1

2566 */

2567 UnicodeString &toTitle(BreakIterator *titleIter);

2568

2569 /**

2570 * Titlecase this string.

2571 *

2572 * Casing is locale-dependent and context-sensitive.

2573 * Titlecasing uses a break iterator to find the first characters of words

2574 * that are to be titlecased. It titlecases those characters and lowercases

2575 * all others.

2576 *

2577 * The titlecase break iterator can be provided to customize for arbitrary

2578 * styles, using rules and dictionaries beyond the standard iterators.

2579 * It may be more efficient to always provide an iterator to avoid

2580 * opening and closing one for each string.

2581 * The standard titlecase iterator for the root locale implements the

2582 * algorithm of Unicode TR 21.

2583 *

2584 * This function uses only the setText(), first() and next() methods of the

2585 * provided break iterator.

2586 *

2587 * @param titleIter A break iterator to find the first characters of words

2588 * that are to be titlecased.

2589 * If none is provided (0), then a standard titlecase

2590 * break iterator is opened.

2591 * Otherwise the provided iterator is set to the string's tex t.

2592 * @param locale The locale to consider.

2593 * @return A reference to this.

2594 * @stable ICU 2.1

2595 */

2596 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);

2597

2598 /**

2599 * Titlecase this string, with options.

2600 *

2601 * Casing is locale-dependent and context-sensitive.

2602 * Titlecasing uses a break iterator to find the first characters of words

2603 * that are to be titlecased. It titlecases those characters and lowercases

2604 * all others. (This can be modified with options.)

2605 *

2606 * The titlecase break iterator can be provided to customize for arbitrary

2607 * styles, using rules and dictionaries beyond the standard iterators.

2608 * It may be more efficient to always provide an iterator to avoid

2609 * opening and closing one for each string.

2610 * The standard titlecase iterator for the root locale implements the

2611 * algorithm of Unicode TR 21.

2612 *

2613 * This function uses only the setText(), first() and next() methods of the

2614 * provided break iterator.

2615 *

2616 * @param titleIter A break iterator to find the first characters of words

2617 * that are to be titlecased.

2618 * If none is provided (0), then a standard titlecase

2619 * break iterator is opened.

2620 * Otherwise the provided iterator is set to the string's tex t.

2621 * @param locale The locale to consider.

2622 * @param options Options bit set, see ucasemap_open().

2623 * @return A reference to this.

2624 * @see U_TITLECASE_NO_LOWERCASE

2625 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT

2626 * @see ucasemap_open

2627 * @stable ICU 3.8

2628 */

2629 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_ t options);

2630

2631 #endif

2632

2633 /**

2634 * Case-fold the characters in this string.

2635 * Case-folding is locale-independent and not context-sensitive,

2636 * but there is an option for whether to include or exclude mappings for dotte d I

2637 * and dotless i that are marked with 'I' in CaseFolding.txt.

2638 * The result may be longer or shorter than the original.

2639 *

2640 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I

2641 * @return A reference to this.

2642 * @stable ICU 2.0

2643 */

2644 UnicodeString &foldCase(uint32_t options=0 /U_FOLD_CASE_DEFAULT/);

2645

2646 //========================================

2647 // Access to the internal buffer

2648 //========================================

2649

2650 /**

2651 * Get a read/write pointer to the internal buffer.

2652 * The buffer is guaranteed to be large enough for at least minCapacity UChars ,

2653 * writable, and is still owned by the UnicodeString object.

2654 * Calls to getBuffer(minCapacity) must not be nested, and

2655 * must be matched with calls to releaseBuffer(newLength).

2656 * If the string buffer was read-only or shared,

2657 * then it will be reallocated and copied.

2658 *

2659 * An attempted nested call will return 0, and will not further modify the

2660 * state of the UnicodeString object.

2661 * It also returns 0 if the string is bogus.

2662 *

2663 * The actual capacity of the string buffer may be larger than minCapacity.

2664 * getCapacity() returns the actual capacity.

2665 * For many operations, the full capacity should be used to avoid reallocation s.

2666 *

2667 * While the buffer is "open" between getBuffer(minCapacity)

2668 * and releaseBuffer(newLength), the following applies:

2669 * - The string length is set to 0.

2670 * - Any read API call on the UnicodeString object will behave like on a 0-len gth string.

2671 * - Any write API call on the UnicodeString object is disallowed and will hav e no effect.

2672 * - You can read from and write to the returned buffer.

2673 * - The previous string contents will still be in the buffer;

2674 * if you want to use it, then you need to call length() before getBuffer(mi nCapacity).

2675 * If the length() was greater than minCapacity, then any contents after min Capacity

2676 * may be lost.

2677 * The buffer contents is not NUL-terminated by getBuffer().

2678 * If length()<getCapacity() then you can terminate it by writing a NUL

2679 * at index length().

2680 * - You must call releaseBuffer(newLength) before and in order to

2681 * return to normal UnicodeString operation.

2682 *

2683 * @param minCapacity the minimum number of UChars that are to be available

2684 * in the buffer, starting at the returned pointer;

2685 * default to the current string capacity if minCapacity==-1

2686 * @return a writable pointer to the internal string buffer,

2687 * or 0 if an error occurs (nested calls, out of memory)

2688 *

2689 * @see releaseBuffer

2690 * @see getTerminatedBuffer()

2691 * @stable ICU 2.0

2692 */

2693 UChar *getBuffer(int32_t minCapacity);

2694

2695 /**

2696 * Release a read/write buffer on a UnicodeString object with an

2697 * "open" getBuffer(minCapacity).

2698 * This function must be called in a matched pair with getBuffer(minCapacity).

2699 * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapac ity) is "open".

2700 *

2701 * It will set the string length to newLength, at most to the current capacity .

2702 * If newLength==-1 then it will set the length according to the

2703 * first NUL in the buffer, or to the capacity if there is no NUL.

2704 *

2705 * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.

2706 *

2707 * @param newLength the new length of the UnicodeString object;

2708 * defaults to the current capacity if newLength is greater than that;

2709 * if newLength==-1, it defaults to u_strlen(buffer) but not more than

2710 * the current capacity of the string

2711 *

2712 * @see getBuffer(int32_t minCapacity)

2713 * @stable ICU 2.0

2714 */

2715 void releaseBuffer(int32_t newLength=-1);

2716

2717 /**

2718 * Get a read-only pointer to the internal buffer.

2719 * This can be called at any time on a valid UnicodeString.

2720 *

2721 * It returns 0 if the string is bogus, or

2722 * during an "open" getBuffer(minCapacity).

2723 *

2724 * It can be called as many times as desired.

2725 * The pointer that it returns will remain valid until the UnicodeString objec t is modified,

2726 * at which time the pointer is semantically invalidated and must not be used any more.

2727 *

2728 * The capacity of the buffer can be determined with getCapacity().

2729 * The part after length() may or may not be initialized and valid,

2730 * depending on the history of the UnicodeString object.

2731 *

2732 * The buffer contents is (probably) not NUL-terminated.

2733 * You can check if it is with

2734 * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.

2735 * (See getTerminatedBuffer().)

2736 *

2737 * The buffer may reside in read-only memory. Its contents must not

2738 * be modified.

2739 *

2740 * @return a read-only pointer to the internal string buffer,

2741 * or 0 if the string is empty or bogus

2742 *

2743 * @see getBuffer(int32_t minCapacity)

2744 * @see getTerminatedBuffer()

2745 * @stable ICU 2.0

2746 */

2747 inline const UChar *getBuffer() const;

2748

2749 /**

2750 * Get a read-only pointer to the internal buffer,

2751 * making sure that it is NUL-terminated.

2752 * This can be called at any time on a valid UnicodeString.

2753 *

2754 * It returns 0 if the string is bogus, or

2755 * during an "open" getBuffer(minCapacity), or if the buffer cannot

2756 * be NUL-terminated (because memory allocation failed).

2757 *

2758 * It can be called as many times as desired.

2759 * The pointer that it returns will remain valid until the UnicodeString objec t is modified,

2760 * at which time the pointer is semantically invalidated and must not be used any more.

2761 *

2762 * The capacity of the buffer can be determined with getCapacity().

2763 * The part after length()+1 may or may not be initialized and valid,

2764 * depending on the history of the UnicodeString object.

2765 *

2766 * The buffer contents is guaranteed to be NUL-terminated.

2767 * getTerminatedBuffer() may reallocate the buffer if a terminating NUL

2768 * is written.

2769 * For this reason, this function is not const, unlike getBuffer().

2770 * Note that a UnicodeString may also contain NUL characters as part of its co ntents.

2771 *

2772 * The buffer may reside in read-only memory. Its contents must not

2773 * be modified.

2774 *

2775 * @return a read-only pointer to the internal string buffer,

2776 * or 0 if the string is empty or bogus

2777 *

2778 * @see getBuffer(int32_t minCapacity)

2779 * @see getBuffer()

2780 * @stable ICU 2.2

2781 */

2782 inline const UChar *getTerminatedBuffer();

2783

2784 //========================================

2785 // Constructors

2786 //========================================

2787

2788 /** Construct an empty UnicodeString.

2789 * @stable ICU 2.0

2790 */

2791 UnicodeString();

2792

2793 /**

2794 * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars

2795 * @param capacity the number of UChars this UnicodeString should hold

2796 * before a resize is necessary; if count is greater than 0 and count

2797 * code points c take up more space than capacity, then capacity is adjusted

2798 * accordingly.

2799 * @param c is used to initially fill the string

2800 * @param count specifies how many code points c are to be written in the

2801 * string

2802 * @stable ICU 2.0

2803 */

2804 UnicodeString(int32_t capacity, UChar32 c, int32_t count);

2805

2806 /**

2807 * Single UChar (code unit) constructor.

2808 * @param ch the character to place in the UnicodeString

2809 * @stable ICU 2.0

2810 */

2811 UnicodeString(UChar ch);

2812

2813 /**

2814 * Single UChar32 (code point) constructor.

2815 * @param ch the character to place in the UnicodeString

2816 * @stable ICU 2.0

2817 */

2818 UnicodeString(UChar32 ch);

2819

2820 /**

2821 * UChar* constructor.

2822 * @param text The characters to place in the UnicodeString. <TT>text</TT>

2823 * must be NULL (U+0000) terminated.

2824 * @stable ICU 2.0

2825 */

2826 UnicodeString(const UChar *text);

2827

2828 /**

2829 * UChar* constructor.

2830 * @param text The characters to place in the UnicodeString.

2831 * @param textLength The number of Unicode characters in <TT>text</TT>

2832 * to copy.

2833 * @stable ICU 2.0

2834 */

2835 UnicodeString(const UChar *text,

2836 int32_t textLength);

2837

2838 /**

2839 * Readonly-aliasing UChar* constructor.

2840 * The text will be used for the UnicodeString object, but

2841 * it will not be released when the UnicodeString is destroyed.

2842 * This has copy-on-write semantics:

2843 * When the string is modified, then the buffer is first copied into

2844 * newly allocated memory.

2845 * The aliased buffer is never modified.

2846 * In an assignment to another UnicodeString, the text will be aliased again,

2847 * so that both strings then alias the same readonly-text.

2848 *

2849 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-term inated.

2850 * This must be true if <code>textLength==-1</code>.

2851 * @param text The characters to alias for the UnicodeString.

2852 * @param textLength The number of Unicode characters in <code>text</code> to alias.

2853 * If -1, then this constructor will determine the length

2854 * by calling <code>u_strlen()</code>.

2855 * @stable ICU 2.0

2856 */

2857 UnicodeString(UBool isTerminated,

2858 const UChar *text,

2859 int32_t textLength);

2860

2861 /**

2862 * Writable-aliasing UChar* constructor.

2863 * The text will be used for the UnicodeString object, but

2864 * it will not be released when the UnicodeString is destroyed.

2865 * This has write-through semantics:

2866 * For as long as the capacity of the buffer is sufficient, write operations

2867 * will directly affect the buffer. When more capacity is necessary, then

2868 * a new buffer will be allocated and the contents copied as with regularly

2869 * constructed strings.

2870 * In an assignment to another UnicodeString, the buffer will be copied.

2871 * The extract(UChar *dst) function detects whether the dst pointer is the sam e

2872 * as the string buffer itself and will in this case not copy the contents.

2873 *

2874 * @param buffer The characters to alias for the UnicodeString.

2875 * @param buffLength The number of Unicode characters in <code>buffer</code> t o alias.

2876 * @param buffCapacity The size of <code>buffer</code> in UChars.

2877 * @stable ICU 2.0

2878 */

2879 UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);

2880

2881 #if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION

2882

2883 /**

2884 * char* constructor.

2885 * @param codepageData an array of bytes, null-terminated,

2886 * in the platform's default codepage.

2887 * @stable ICU 2.0

2888 */

2889 UnicodeString(const char *codepageData);

2890

2891 /**

2892 * char* constructor.

2893 * @param codepageData an array of bytes in the platform's default codepage.

2894 * @param dataLength The number of bytes in <TT>codepageData</TT>.

2895 * @stable ICU 2.0

2896 */

2897 UnicodeString(const char *codepageData, int32_t dataLength);

2898

2899 #endif

2900

2901 #if !UCONFIG_NO_CONVERSION

2902

2903 /**

2904 * char* constructor.

2905 * @param codepageData an array of bytes, null-terminated

2906 * @param codepage the encoding of <TT>codepageData</TT>. The special

2907 * value 0 for <TT>codepage</TT> indicates that the text is in the

2908 * platform's default codepage.

2909 *

2910 * If <code>codepage</code> is an empty string (<code>""</code>),

2911 * then a simple conversion is performed on the codepage-invariant

2912 * subset ("invariant characters") of the platform encoding. See utypes.h.

2913 * Recommendation: For invariant-character strings use the constructor

2914 * UnicodeString(const char *src, int32_t length, enum EInvariant inv)

2915 * because it avoids object code dependencies of UnicodeString on

2916 * the conversion code.

2917 *

2918 * @stable ICU 2.0

2919 */

2920 UnicodeString(const char codepageData, const char codepage);

2921

2922 /**

2923 * char* constructor.

2924 * @param codepageData an array of bytes.

2925 * @param dataLength The number of bytes in <TT>codepageData</TT>.

2926 * @param codepage the encoding of <TT>codepageData</TT>. The special

2927 * value 0 for <TT>codepage</TT> indicates that the text is in the

2928 * platform's default codepage.

2929 * If <code>codepage</code> is an empty string (<code>""</code>),

2930 * then a simple conversion is performed on the codepage-invariant

2931 * subset ("invariant characters") of the platform encoding. See utypes.h.

2932 * Recommendation: For invariant-character strings use the constructor

2933 * UnicodeString(const char *src, int32_t length, enum EInvariant inv)

2934 * because it avoids object code dependencies of UnicodeString on

2935 * the conversion code.

2936 *

2937 * @stable ICU 2.0

2938 */

2939 UnicodeString(const char codepageData, int32_t dataLength, const char codepa ge);

2940

2941 /**

2942 * char * / UConverter constructor.

2943 * This constructor uses an existing UConverter object to

2944 * convert the codepage string to Unicode and construct a UnicodeString

2945 * from that.

2946 *

2947 * The converter is reset at first.

2948 * If the error code indicates a failure before this constructor is called,

2949 * or if an error occurs during conversion or construction,

2950 * then the string will be bogus.

2951 *

2952 * This function avoids the overhead of opening and closing a converter if

2953 * multiple strings are constructed.

2954 *

2955 * @param src input codepage string

2956 * @param srcLength length of the input string, can be -1 for NUL-terminated s trings

2957 * @param cnv converter object (ucnv_resetToUnicode() will be called),

2958 * can be NULL for the default converter

2959 * @param errorCode normal ICU error code

2960 * @stable ICU 2.0

2961 */

2962 UnicodeString(

2963 const char *src, int32_t srcLength,

2964 UConverter *cnv,

2965 UErrorCode &errorCode);

2966

2967 #endif

2968

2969 /**

2970 * Constructs a Unicode string from an invariant-character char * string.

2971 * About invariant characters see utypes.h.

2972 * This constructor has no runtime dependency on conversion code and is

2973 * therefore recommended over ones taking a charset name string

2974 * (where the empty string "" indicates invariant-character conversion).

2975 *

2976 * Use the macro US_INV as the third, signature-distinguishing parameter.

2977 *

2978 * For example:

2979 * \code

2980 * void fn(const char *s) {

2981 * UnicodeString ustr(s, -1, US_INV);

2982 * // use ustr ...

2983 * }

2984 * \endcode

2985 *

2986 * @param src String using only invariant characters.

2987 * @param length Length of src, or -1 if NUL-terminated.

2988 * @param inv Signature-distinguishing paramater, use US_INV.

2989 *

2990 * @see US_INV

2991 * @stable ICU 3.2

2992 */

2993 UnicodeString(const char *src, int32_t length, enum EInvariant inv);

2994

2995

2996 /**

2997 * Copy constructor.

2998 * @param that The UnicodeString object to copy.

2999 * @stable ICU 2.0

3000 */

3001 UnicodeString(const UnicodeString& that);

3002

3003 /**

3004 * 'Substring' constructor from tail of source string.

3005 * @param src The UnicodeString object to copy.

3006 * @param srcStart The offset into <tt>src</tt> at which to start copying.

3007 * @stable ICU 2.2

3008 */

3009 UnicodeString(const UnicodeString& src, int32_t srcStart);

3010

3011 /**

3012 * 'Substring' constructor from subrange of source string.

3013 * @param src The UnicodeString object to copy.

3014 * @param srcStart The offset into <tt>src</tt> at which to start copying.

3015 * @param srcLength The number of characters from <tt>src</tt> to copy.

3016 * @stable ICU 2.2

3017 */

3018 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);

3019

3020 /**

3021 * Clone this object, an instance of a subclass of Replaceable.

3022 * Clones can be used concurrently in multiple threads.

3023 * If a subclass does not implement clone(), or if an error occurs,

3024 * then NULL is returned.

3025 * The clone functions in all subclasses return a pointer to a Replaceable

3026 * because some compilers do not support covariant (same-as-this)

3027 * return types; cast to the appropriate subclass if necessary.

3028 * The caller must delete the clone.

3029 *

3030 * @return a clone of this object

3031 *

3032 * @see Replaceable::clone

3033 * @see getDynamicClassID

3034 * @stable ICU 2.6

3035 */

3036 virtual Replaceable *clone() const;

3037

3038 /** Destructor.

3039 * @stable ICU 2.0

3040 */

3041 virtual ~UnicodeString();

3042

3043 /**

3044 * Create a UnicodeString from a UTF-8 string.

3045 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.

3046 * Calls u_strFromUTF8WithSub().

3047 *

3048 * @param utf8 UTF-8 input string.

3049 * Note that a StringPiece can be implicitly constructed

3050 * from a std::string or a NUL-terminated const char * string.

3051 * @return A UnicodeString with equivalent UTF-16 contents.

3052 * @see toUTF8

3053 * @see toUTF8String

3054 * @stable ICU 4.2

3055 */

3056 static UnicodeString fromUTF8(const StringPiece &utf8);

3057

3058 /**

3059 * Create a UnicodeString from a UTF-32 string.

3060 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.

3061 * Calls u_strFromUTF32WithSub().

3062 *

3063 * @param utf32 UTF-32 input string. Must not be NULL.

3064 * @param length Length of the input string, or -1 if NUL-terminated.

3065 * @return A UnicodeString with equivalent UTF-16 contents.

3066 * @see toUTF32

3067 * @stable ICU 4.2

3068 */

3069 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);

3070

3071 /* Miscellaneous operations */

3072

3073 /**

3074 * Unescape a string of characters and return a string containing

3075 * the result. The following escape sequences are recognized:

3076 *

3077 * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]

3078 * \\Uhhhhhhhh 8 hex digits

3079 * \\xhh 1-2 hex digits

3080 * \\ooo 1-3 octal digits; o in [0-7]

3081 * \\cX control-X; X is masked with 0x1F

3082 *

3083 * as well as the standard ANSI C escapes:

3084 *

3085 * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,

3086 * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,

3087 * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C

3088 *

3089 * Anything else following a backslash is generically escaped. For

3090 * example, "[a\\-z]" returns "[a-z]".

3091 *

3092 * If an escape sequence is ill-formed, this method returns an empty

3093 * string. An example of an ill-formed sequence is "\\u" followed by

3094 * fewer than 4 hex digits.

3095 *

3096 * This function is similar to u_unescape() but not identical to it.

3097 * The latter takes a source char*, so it does escape recognition

3098 * and also invariant conversion.

3099 *

3100 * @return a string with backslash escapes interpreted, or an

3101 * empty string on error.

3102 * @see UnicodeString#unescapeAt()

3103 * @see u_unescape()

3104 * @see u_unescapeAt()

3105 * @stable ICU 2.0

3106 */

3107 UnicodeString unescape() const;

3108

3109 /**

3110 * Unescape a single escape sequence and return the represented

3111 * character. See unescape() for a listing of the recognized escape

3112 * sequences. The character at offset-1 is assumed (without

3113 * checking) to be a backslash. If the escape sequence is

3114 * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is

3115 * returned.

3116 *

3117 * @param offset an input output parameter. On input, it is the

3118 * offset into this string where the escape sequence is located,

3119 * after the initial backslash. On output, it is advanced after the

3120 * last character parsed. On error, it is not advanced at all.

3121 * @return the character represented by the escape sequence at

3122 * offset, or (UChar32)0xFFFFFFFF on error.

3123 * @see UnicodeString#unescape()

3124 * @see u_unescape()

3125 * @see u_unescapeAt()

3126 * @stable ICU 2.0

3127 */

3128 UChar32 unescapeAt(int32_t &offset) const;

3129

3130 /**

3131 * ICU "poor man's RTTI", returns a UClassID for this class.

3132 *

3133 * @stable ICU 2.2

3134 */

3135 static UClassID U_EXPORT2 getStaticClassID();

3136

3137 /**

3138 * ICU "poor man's RTTI", returns a UClassID for the actual class.

3139 *

3140 * @stable ICU 2.2

3141 */

3142 virtual UClassID getDynamicClassID() const;

3143

3144 //========================================

3145 // Implementation methods

3146 //========================================

3147

3148 protected:

3149 /**

3150 * Implement Replaceable::getLength() (see jitterbug 1027).

3151 * @stable ICU 2.4

3152 */

3153 virtual int32_t getLength() const;

3154

3155 /**

3156 * The change in Replaceable to use virtual getCharAt() allows

3157 * UnicodeString::charAt() to be inline again (see jitterbug 709).

3158 * @stable ICU 2.4

3159 */

3160 virtual UChar getCharAt(int32_t offset) const;

3161

3162 /**

3163 * The change in Replaceable to use virtual getChar32At() allows

3164 * UnicodeString::char32At() to be inline again (see jitterbug 709).

3165 * @stable ICU 2.4

3166 */

3167 virtual UChar32 getChar32At(int32_t offset) const;

3168

3169 private:

3170 // For char* constructors. Could be made public.

3171 UnicodeString &setToUTF8(const StringPiece &utf8);

3172 // For extract(char*).

3173 // We could make a toUTF8(target, capacity, errorCode) public but not

3174 // this version: New API will be cleaner if we make callers create substrings

3175 // rather than having start+length on every method,

3176 // and it should take a UErrorCode&.

3177 int32_t

3178 toUTF8(int32_t start, int32_t len,

3179 char *target, int32_t capacity) const;

3180

3181

3182 inline int8_t

3183 doCompare(int32_t start,

3184 int32_t length,

3185 const UnicodeString& srcText,

3186 int32_t srcStart,

3187 int32_t srcLength) const;

3188

3189 int8_t doCompare(int32_t start,

3190 int32_t length,

3191 const UChar *srcChars,

3192 int32_t srcStart,

3193 int32_t srcLength) const;

3194

3195 inline int8_t

3196 doCompareCodePointOrder(int32_t start,

3197 int32_t length,

3198 const UnicodeString& srcText,

3199 int32_t srcStart,

3200 int32_t srcLength) const;

3201

3202 int8_t doCompareCodePointOrder(int32_t start,

3203 int32_t length,

3204 const UChar *srcChars,

3205 int32_t srcStart,

3206 int32_t srcLength) const;

3207

3208 inline int8_t

3209 doCaseCompare(int32_t start,

3210 int32_t length,

3211 const UnicodeString &srcText,

3212 int32_t srcStart,

3213 int32_t srcLength,

3214 uint32_t options) const;

3215

3216 int8_t

3217 doCaseCompare(int32_t start,

3218 int32_t length,

3219 const UChar *srcChars,

3220 int32_t srcStart,

3221 int32_t srcLength,

3222 uint32_t options) const;

3223

3224 int32_t doIndexOf(UChar c,

3225 int32_t start,

3226 int32_t length) const;

3227

3228 int32_t doIndexOf(UChar32 c,

3229 int32_t start,

3230 int32_t length) const;

3231

3232 int32_t doLastIndexOf(UChar c,

3233 int32_t start,

3234 int32_t length) const;

3235

3236 int32_t doLastIndexOf(UChar32 c,

3237 int32_t start,

3238 int32_t length) const;

3239

3240 void doExtract(int32_t start,

3241 int32_t length,

3242 UChar *dst,

3243 int32_t dstStart) const;

3244

3245 inline void doExtract(int32_t start,

3246 int32_t length,

3247 UnicodeString& target) const;

3248

3249 inline UChar doCharAt(int32_t offset) const;

3250

3251 UnicodeString& doReplace(int32_t start,

3252 int32_t length,

3253 const UnicodeString& srcText,

3254 int32_t srcStart,

3255 int32_t srcLength);

3256

3257 UnicodeString& doReplace(int32_t start,

3258 int32_t length,

3259 const UChar *srcChars,

3260 int32_t srcStart,

3261 int32_t srcLength);

3262

3263 UnicodeString& doReverse(int32_t start,

3264 int32_t length);

3265

3266 // calculate hash code

3267 int32_t doHashCode(void) const;

3268

3269 // get pointer to start of array

3270 // these do not check for kOpenGetBuffer, unlike the public getBuffer() functi on

3271 inline UChar* getArrayStart(void);

3272 inline const UChar* getArrayStart(void) const;

3273

3274 // A UnicodeString object (not necessarily its current buffer)

3275 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).

3276 inline UBool isWritable() const;

3277

3278 // Is the current buffer writable?

3279 inline UBool isBufferWritable() const;

3280

3281 // None of the following does releaseArray().

3282 inline void setLength(int32_t len); // sets only fShortLength and fLeng th

3283 inline void setToEmpty(); // sets fFlags=kShortString

3284 inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags

3285

3286 // allocate the array; result may be fStackBuffer

3287 // sets refCount to 1 if appropriate

3288 // sets fArray, fCapacity, and fFlags

3289 // returns boolean for success or failure

3290 UBool allocate(int32_t capacity);

3291

3292 // release the array if owned

3293 void releaseArray(void);

3294

3295 // turn a bogus string into an empty one

3296 void unBogus();

3297

3298 // implements assigment operator, copy constructor, and fastCopyFrom()

3299 UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);

3300

3301 // Pin start and limit to acceptable values.

3302 inline void pinIndex(int32_t& start) const;

3303 inline void pinIndices(int32_t& start,

3304 int32_t& length) const;

3305

3306 #if !UCONFIG_NO_CONVERSION

3307

3308 /* Internal extract() using UConverter. */

3309 int32_t doExtract(int32_t start, int32_t length,

3310 char *dest, int32_t destCapacity,

3311 UConverter *cnv,

3312 UErrorCode &errorCode) const;

3313

3314 /*

3315 * Real constructor for converting from codepage data.

3316 * It assumes that it is called with !fRefCounted.

3317 *

3318 * If <code>codepage==0</code>, then the default converter

3319 * is used for the platform encoding.

3320 * If <code>codepage</code> is an empty string (<code>""</code>),

3321 * then a simple conversion is performed on the codepage-invariant

3322 * subset ("invariant characters") of the platform encoding. See utypes.h.

3323 */

3324 void doCodepageCreate(const char *codepageData,

3325 int32_t dataLength,

3326 const char *codepage);

3327

3328 /*

3329 * Worker function for creating a UnicodeString from

3330 * a codepage string using a UConverter.

3331 */

3332 void

3333 doCodepageCreate(const char *codepageData,

3334 int32_t dataLength,

3335 UConverter *converter,

3336 UErrorCode &status);

3337

3338 #endif

3339

3340 /*

3341 * This function is called when write access to the array

3342 * is necessary.

3343 *

3344 * We need to make a copy of the array if

3345 * the buffer is read-only, or

3346 * the buffer is refCounted (shared), and refCount>1, or

3347 * the buffer is too small.

3348 *

3349 * Return FALSE if memory could not be allocated.

3350 */

3351 UBool cloneArrayIfNeeded(int32_t newCapacity = -1,

3352 int32_t growCapacity = -1,

3353 UBool doCopyArray = TRUE,

3354 int32_t **pBufferToDelete = 0,

3355 UBool forceClone = FALSE);

3356

3357 // common function for case mappings

3358 UnicodeString &

3359 caseMap(BreakIterator *titleIter,

3360 const char *locale,

3361 uint32_t options,

3362 int32_t toWhichCase);

3363

3364 // ref counting

3365 void addRef(void);

3366 int32_t removeRef(void);

3367 int32_t refCount(void) const;

3368

3369 // constants

3370 enum {

3371 // Set the stack buffer size so that sizeof(UnicodeString) is a multiple of sizeof(pointer):

3372 // 32-bit pointers: 4+1+1+13*2 = 32 bytes

3373 // 64-bit pointers: 8+1+1+15*2 = 40 bytes

3374 US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for s mall strings

3375 kInvalidUChar=0xffff, // invalid UChar index

3376 kGrowSize=128, // grow size for this buffer

3377 kInvalidHashCode=0, // invalid hash code

3378 kEmptyHashCode=1, // hash code for empty string

3379

3380 // bit flag values for fFlags

3381 kIsBogus=1, // this string is bogus, i.e., not valid or NULL

3382 kUsingStackBuffer=2,// fArray==fStackBuffer

3383 kRefCounted=4, // there is a refCount field before the characters in fA rray

3384 kBufferIsReadonly=8,// do not write to this buffer

3385 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),

3386 // and releaseBuffer(newLength) must be called

3387

3388 // combined values for convenience

3389 kShortString=kUsingStackBuffer,

3390 kLongString=kRefCounted,

3391 kReadonlyAlias=kBufferIsReadonly,

3392 kWritableAlias=0

3393 };

3394

3395 friend class StringThreadTest;

3396

3397 union StackBufferOrFields; // forward declaration necessary before frie nd declaration

3398 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUni on

3399

3400 /*

3401 * The following are all the class fields that are stored

3402 * in each UnicodeString object.

3403 * Note that UnicodeString has virtual functions,

3404 * therefore there is an implicit vtable pointer

3405 * as the first real field.

3406 * The fields should be aligned such that no padding is

3407 * necessary, mostly by having larger types first.

3408 * On 32-bit machines, the size should be 32 bytes,

3409 * on 64-bit machines (8-byte pointers), it should be 40 bytes.

3410 */

3411 // (implicit) *vtable;

3412 int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFie lds.fLength

3413 uint8_t fFlags; // bit flags: see constants above

3414 union StackBufferOrFields {

3415 // fStackBuffer is used iff (fFlags&kUsingStackBuffer)

3416 // else fFields is used

3417 UChar fStackBuffer [US_STACKBUF_SIZE]; // buffer for small strings

3418 struct {

3419 uint16_t fPadding; // align the following field at 8B (32b pointers) or 12B (64b)

3420 int32_t fLength; // number of characters in fArray if >127; else unde fined

3421 UChar *fArray; // the Unicode data (aligned at 12B (32b pointers) o r 16B (64b))

3422 int32_t fCapacity; // sizeof fArray

3423 } fFields;

3424 } fUnion;

3425 };

3426

3427 /**

3428 * Create a new UnicodeString with the concatenation of two others.

3429 *

3430 * @param s1 The first string to be copied to the new one.

3431 * @param s2 The second string to be copied to the new one, after s1.

3432 * @return UnicodeString(s1).append(s2)

3433 * @stable ICU 2.8

3434 */

3435 U_COMMON_API UnicodeString U_EXPORT2

3436 operator+ (const UnicodeString &s1, const UnicodeString &s2);

3437

3438 //========================================

3439 // Inline members

3440 //========================================

3441

3442 //========================================

3443 // Privates

3444 //========================================

3445

3446 inline void

3447 UnicodeString::pinIndex(int32_t& start) const

3448 {

3449 // pin index

3450 if(start < 0) {

3451 start = 0;

3452 } else if(start > length()) {

3453 start = length();

3454 }

3455 }

3456

3457 inline void

3458 UnicodeString::pinIndices(int32_t& start,

3459 int32_t& _length) const

3460 {

3461 // pin indices

3462 int32_t len = length();

3463 if(start < 0) {

3464 start = 0;

3465 } else if(start > len) {

3466 start = len;

3467 }

3468 if(_length < 0) {

3469 _length = 0;

3470 } else if(_length > (len - start)) {

3471 _length = (len - start);

3472 }

3473 }

3474

3475 inline UChar*

3476 UnicodeString::getArrayStart()

3477 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArra y; }

3478

3479 inline const UChar*

3480 UnicodeString::getArrayStart() const

3481 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArra y; }

3482

3483 //========================================

3484 // Read-only implementation methods

3485 //========================================

3486 inline int32_t

3487 UnicodeString::length() const

3488 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }

3489

3490 inline int32_t

3491 UnicodeString::getCapacity() const

3492 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacit y; }

3493

3494 inline int32_t

3495 UnicodeString::hashCode() const

3496 { return doHashCode(); }

3497

3498 inline UBool

3499 UnicodeString::isBogus() const

3500 { return (UBool)(fFlags & kIsBogus); }

3501

3502 inline UBool

3503 UnicodeString::isWritable() const

3504 { return (UBool)!(fFlags&(kOpenGetBuffer\|kIsBogus)); }

3505

3506 inline UBool

3507 UnicodeString::isBufferWritable() const

3508 {

3509 return (UBool)(

3510 !(fFlags&(kOpenGetBuffer\|kIsBogus\|kBufferIsReadonly)) &&

3511 (!(fFlags&kRefCounted) \|\| refCount()==1));

3512 }

3513

3514 inline const UChar *

3515 UnicodeString::getBuffer() const {

3516 if(fFlags&(kIsBogus\|kOpenGetBuffer)) {

3517 return 0;

3518 } else if(fFlags&kUsingStackBuffer) {

3519 return fUnion.fStackBuffer;

3520 } else {

3521 return fUnion.fFields.fArray;

3522 }

3523 }

3524

3525 //========================================

3526 // Read-only alias methods

3527 //========================================

3528 inline int8_t

3529 UnicodeString::doCompare(int32_t start,

3530 int32_t thisLength,

3531 const UnicodeString& srcText,

3532 int32_t srcStart,

3533 int32_t srcLength) const

3534 {

3535 if(srcText.isBogus()) {

3536 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise

3537 } else {

3538 srcText.pinIndices(srcStart, srcLength);

3539 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLe ngth);

3540 }

3541 }

3542

3543 inline UBool

3544 UnicodeString::operator== (const UnicodeString& text) const

3545 {

3546 if(isBogus()) {

3547 return text.isBogus();

3548 } else {

3549 int32_t len = length(), textLength = text.length();

3550 return

3551 !text.isBogus() &&

3552 len == textLength &&

3553 doCompare(0, len, text, 0, textLength) == 0;

3554 }

3555 }

3556

3557 inline UBool

3558 UnicodeString::operator!= (const UnicodeString& text) const

3559 { return (! operator==(text)); }

3560

3561 inline UBool

3562 UnicodeString::operator> (const UnicodeString& text) const

3563 { return doCompare(0, length(), text, 0, text.length()) == 1; }

3564

3565 inline UBool

3566 UnicodeString::operator< (const UnicodeString& text) const

3567 { return doCompare(0, length(), text, 0, text.length()) == -1; }

3568

3569 inline UBool

3570 UnicodeString::operator>= (const UnicodeString& text) const

3571 { return doCompare(0, length(), text, 0, text.length()) != -1; }

3572

3573 inline UBool

3574 UnicodeString::operator<= (const UnicodeString& text) const

3575 { return doCompare(0, length(), text, 0, text.length()) != 1; }

3576

3577 inline int8_t

3578 UnicodeString::compare(const UnicodeString& text) const

3579 { return doCompare(0, length(), text, 0, text.length()); }

3580

3581 inline int8_t

3582 UnicodeString::compare(int32_t start,

3583 int32_t _length,

3584 const UnicodeString& srcText) const

3585 { return doCompare(start, _length, srcText, 0, srcText.length()); }

3586

3587 inline int8_t

3588 UnicodeString::compare(const UChar *srcChars,

3589 int32_t srcLength) const

3590 { return doCompare(0, length(), srcChars, 0, srcLength); }

3591

3592 inline int8_t

3593 UnicodeString::compare(int32_t start,

3594 int32_t _length,

3595 const UnicodeString& srcText,

3596 int32_t srcStart,

3597 int32_t srcLength) const

3598 { return doCompare(start, _length, srcText, srcStart, srcLength); }

3599

3600 inline int8_t

3601 UnicodeString::compare(int32_t start,

3602 int32_t _length,

3603 const UChar *srcChars) const

3604 { return doCompare(start, _length, srcChars, 0, _length); }

3605

3606 inline int8_t

3607 UnicodeString::compare(int32_t start,

3608 int32_t _length,

3609 const UChar *srcChars,

3610 int32_t srcStart,

3611 int32_t srcLength) const

3612 { return doCompare(start, _length, srcChars, srcStart, srcLength); }

3613

3614 inline int8_t

3615 UnicodeString::compareBetween(int32_t start,

3616 int32_t limit,

3617 const UnicodeString& srcText,

3618 int32_t srcStart,

3619 int32_t srcLimit) const

3620 { return doCompare(start, limit - start,

3621 srcText, srcStart, srcLimit - srcStart); }

3622

3623 inline int8_t

3624 UnicodeString::doCompareCodePointOrder(int32_t start,

3625 int32_t thisLength,

3626 const UnicodeString& srcText,

3627 int32_t srcStart,

3628 int32_t srcLength) const

3629 {

3630 if(srcText.isBogus()) {

3631 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise

3632 } else {

3633 srcText.pinIndices(srcStart, srcLength);

3634 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), s rcStart, srcLength);

3635 }

3636 }

3637

3638 inline int8_t

3639 UnicodeString::compareCodePointOrder(const UnicodeString& text) const

3640 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }

3641

3642 inline int8_t

3643 UnicodeString::compareCodePointOrder(int32_t start,

3644 int32_t _length,

3645 const UnicodeString& srcText) const

3646 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }

3647

3648 inline int8_t

3649 UnicodeString::compareCodePointOrder(const UChar *srcChars,

3650 int32_t srcLength) const

3651 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }

3652

3653 inline int8_t

3654 UnicodeString::compareCodePointOrder(int32_t start,

3655 int32_t _length,

3656 const UnicodeString& srcText,

3657 int32_t srcStart,

3658 int32_t srcLength) const

3659 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }

3660

3661 inline int8_t

3662 UnicodeString::compareCodePointOrder(int32_t start,

3663 int32_t _length,

3664 const UChar *srcChars) const

3665 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }

3666

3667 inline int8_t

3668 UnicodeString::compareCodePointOrder(int32_t start,

3669 int32_t _length,

3670 const UChar *srcChars,

3671 int32_t srcStart,

3672 int32_t srcLength) const

3673 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }

3674

3675 inline int8_t

3676 UnicodeString::compareCodePointOrderBetween(int32_t start,

3677 int32_t limit,

3678 const UnicodeString& srcText,

3679 int32_t srcStart,

3680 int32_t srcLimit) const

3681 { return doCompareCodePointOrder(start, limit - start,

3682 srcText, srcStart, srcLimit - srcStart); }

3683

3684 inline int8_t

3685 UnicodeString::doCaseCompare(int32_t start,

3686 int32_t thisLength,

3687 const UnicodeString &srcText,

3688 int32_t srcStart,

3689 int32_t srcLength,

3690 uint32_t options) const

3691 {

3692 if(srcText.isBogus()) {

3693 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise

3694 } else {

3695 srcText.pinIndices(srcStart, srcLength);

3696 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, s rcLength, options);

3697 }

3698 }

3699

3700 inline int8_t

3701 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {

3702 return doCaseCompare(0, length(), text, 0, text.length(), options);

3703 }

3704

3705 inline int8_t

3706 UnicodeString::caseCompare(int32_t start,

3707 int32_t _length,

3708 const UnicodeString &srcText,

3709 uint32_t options) const {

3710 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);

3711 }

3712

3713 inline int8_t

3714 UnicodeString::caseCompare(const UChar *srcChars,

3715 int32_t srcLength,

3716 uint32_t options) const {

3717 return doCaseCompare(0, length(), srcChars, 0, srcLength, options);

3718 }

3719

3720 inline int8_t

3721 UnicodeString::caseCompare(int32_t start,

3722 int32_t _length,

3723 const UnicodeString &srcText,

3724 int32_t srcStart,

3725 int32_t srcLength,

3726 uint32_t options) const {

3727 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);

3728 }

3729

3730 inline int8_t

3731 UnicodeString::caseCompare(int32_t start,

3732 int32_t _length,

3733 const UChar *srcChars,

3734 uint32_t options) const {

3735 return doCaseCompare(start, _length, srcChars, 0, _length, options);

3736 }

3737

3738 inline int8_t

3739 UnicodeString::caseCompare(int32_t start,

3740 int32_t _length,

3741 const UChar *srcChars,

3742 int32_t srcStart,

3743 int32_t srcLength,

3744 uint32_t options) const {

3745 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);

3746 }

3747

3748 inline int8_t

3749 UnicodeString::caseCompareBetween(int32_t start,

3750 int32_t limit,

3751 const UnicodeString &srcText,

3752 int32_t srcStart,

3753 int32_t srcLimit,

3754 uint32_t options) const {

3755 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcSt art, options);

3756 }

3757

3758 inline int32_t

3759 UnicodeString::indexOf(const UnicodeString& srcText,

3760 int32_t srcStart,

3761 int32_t srcLength,

3762 int32_t start,

3763 int32_t _length) const

3764 {

3765 if(!srcText.isBogus()) {

3766 srcText.pinIndices(srcStart, srcLength);

3767 if(srcLength > 0) {

3768 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _lengt h);

3769 }

3770 }

3771 return -1;

3772 }

3773

3774 inline int32_t

3775 UnicodeString::indexOf(const UnicodeString& text) const

3776 { return indexOf(text, 0, text.length(), 0, length()); }

3777

3778 inline int32_t

3779 UnicodeString::indexOf(const UnicodeString& text,

3780 int32_t start) const {

3781 pinIndex(start);

3782 return indexOf(text, 0, text.length(), start, length() - start);

3783 }

3784

3785 inline int32_t

3786 UnicodeString::indexOf(const UnicodeString& text,

3787 int32_t start,

3788 int32_t _length) const

3789 { return indexOf(text, 0, text.length(), start, _length); }

3790

3791 inline int32_t

3792 UnicodeString::indexOf(const UChar *srcChars,

3793 int32_t srcLength,

3794 int32_t start) const {

3795 pinIndex(start);

3796 return indexOf(srcChars, 0, srcLength, start, length() - start);

3797 }

3798

3799 inline int32_t

3800 UnicodeString::indexOf(const UChar *srcChars,

3801 int32_t srcLength,

3802 int32_t start,

3803 int32_t _length) const

3804 { return indexOf(srcChars, 0, srcLength, start, _length); }

3805

3806 inline int32_t

3807 UnicodeString::indexOf(UChar c,

3808 int32_t start,

3809 int32_t _length) const

3810 { return doIndexOf(c, start, _length); }

3811

3812 inline int32_t

3813 UnicodeString::indexOf(UChar32 c,

3814 int32_t start,

3815 int32_t _length) const

3816 { return doIndexOf(c, start, _length); }

3817

3818 inline int32_t

3819 UnicodeString::indexOf(UChar c) const

3820 { return doIndexOf(c, 0, length()); }

3821

3822 inline int32_t

3823 UnicodeString::indexOf(UChar32 c) const

3824 { return indexOf(c, 0, length()); }

3825

3826 inline int32_t

3827 UnicodeString::indexOf(UChar c,

3828 int32_t start) const {

3829 pinIndex(start);

3830 return doIndexOf(c, start, length() - start);

3831 }

3832

3833 inline int32_t

3834 UnicodeString::indexOf(UChar32 c,

3835 int32_t start) const {

3836 pinIndex(start);

3837 return indexOf(c, start, length() - start);

3838 }

3839

3840 inline int32_t

3841 UnicodeString::lastIndexOf(const UChar *srcChars,

3842 int32_t srcLength,

3843 int32_t start,

3844 int32_t _length) const

3845 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }

3846

3847 inline int32_t

3848 UnicodeString::lastIndexOf(const UChar *srcChars,

3849 int32_t srcLength,

3850 int32_t start) const {

3851 pinIndex(start);

3852 return lastIndexOf(srcChars, 0, srcLength, start, length() - start);

3853 }

3854

3855 inline int32_t

3856 UnicodeString::lastIndexOf(const UnicodeString& srcText,

3857 int32_t srcStart,

3858 int32_t srcLength,

3859 int32_t start,

3860 int32_t _length) const

3861 {

3862 if(!srcText.isBogus()) {

3863 srcText.pinIndices(srcStart, srcLength);

3864 if(srcLength > 0) {

3865 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _l ength);

3866 }

3867 }

3868 return -1;

3869 }

3870

3871 inline int32_t

3872 UnicodeString::lastIndexOf(const UnicodeString& text,

3873 int32_t start,

3874 int32_t _length) const

3875 { return lastIndexOf(text, 0, text.length(), start, _length); }

3876

3877 inline int32_t

3878 UnicodeString::lastIndexOf(const UnicodeString& text,

3879 int32_t start) const {

3880 pinIndex(start);

3881 return lastIndexOf(text, 0, text.length(), start, length() - start);

3882 }

3883

3884 inline int32_t

3885 UnicodeString::lastIndexOf(const UnicodeString& text) const

3886 { return lastIndexOf(text, 0, text.length(), 0, length()); }

3887

3888 inline int32_t

3889 UnicodeString::lastIndexOf(UChar c,

3890 int32_t start,

3891 int32_t _length) const

3892 { return doLastIndexOf(c, start, _length); }

3893

3894 inline int32_t

3895 UnicodeString::lastIndexOf(UChar32 c,

3896 int32_t start,

3897 int32_t _length) const {

3898 return doLastIndexOf(c, start, _length);

3899 }

3900

3901 inline int32_t

3902 UnicodeString::lastIndexOf(UChar c) const

3903 { return doLastIndexOf(c, 0, length()); }

3904

3905 inline int32_t

3906 UnicodeString::lastIndexOf(UChar32 c) const {

3907 return lastIndexOf(c, 0, length());

3908 }

3909

3910 inline int32_t

3911 UnicodeString::lastIndexOf(UChar c,

3912 int32_t start) const {

3913 pinIndex(start);

3914 return doLastIndexOf(c, start, length() - start);

3915 }

3916

3917 inline int32_t

3918 UnicodeString::lastIndexOf(UChar32 c,

3919 int32_t start) const {

3920 pinIndex(start);

3921 return lastIndexOf(c, start, length() - start);

3922 }

3923

3924 inline UBool

3925 UnicodeString::startsWith(const UnicodeString& text) const

3926 { return compare(0, text.length(), text, 0, text.length()) == 0; }

3927

3928 inline UBool

3929 UnicodeString::startsWith(const UnicodeString& srcText,

3930 int32_t srcStart,

3931 int32_t srcLength) const

3932 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }

3933

3934 inline UBool

3935 UnicodeString::startsWith(const UChar *srcChars,

3936 int32_t srcLength) const

3937 { return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; }

3938

3939 inline UBool

3940 UnicodeString::startsWith(const UChar *srcChars,

3941 int32_t srcStart,

3942 int32_t srcLength) const

3943 { return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;}

3944

3945 inline UBool

3946 UnicodeString::endsWith(const UnicodeString& text) const

3947 { return doCompare(length() - text.length(), text.length(),

3948 text, 0, text.length()) == 0; }

3949

3950 inline UBool

3951 UnicodeString::endsWith(const UnicodeString& srcText,

3952 int32_t srcStart,

3953 int32_t srcLength) const {

3954 srcText.pinIndices(srcStart, srcLength);

3955 return doCompare(length() - srcLength, srcLength,

3956 srcText, srcStart, srcLength) == 0;

3957 }

3958

3959 inline UBool

3960 UnicodeString::endsWith(const UChar *srcChars,

3961 int32_t srcLength) const {

3962 if(srcLength < 0) {

3963 srcLength = u_strlen(srcChars);

3964 }

3965 return doCompare(length() - srcLength, srcLength,

3966 srcChars, 0, srcLength) == 0;

3967 }

3968

3969 inline UBool

3970 UnicodeString::endsWith(const UChar *srcChars,

3971 int32_t srcStart,

3972 int32_t srcLength) const {

3973 if(srcLength < 0) {

3974 srcLength = u_strlen(srcChars + srcStart);

3975 }

3976 return doCompare(length() - srcLength, srcLength,

3977 srcChars, srcStart, srcLength) == 0;

3978 }

3979

3980 //========================================

3981 // replace

3982 //========================================

3983 inline UnicodeString&

3984 UnicodeString::replace(int32_t start,

3985 int32_t _length,

3986 const UnicodeString& srcText)

3987 { return doReplace(start, _length, srcText, 0, srcText.length()); }

3988

3989 inline UnicodeString&

3990 UnicodeString::replace(int32_t start,

3991 int32_t _length,

3992 const UnicodeString& srcText,

3993 int32_t srcStart,

3994 int32_t srcLength)

3995 { return doReplace(start, _length, srcText, srcStart, srcLength); }

3996

3997 inline UnicodeString&

3998 UnicodeString::replace(int32_t start,

3999 int32_t _length,

4000 const UChar *srcChars,

4001 int32_t srcLength)

4002 { return doReplace(start, _length, srcChars, 0, srcLength); }

4003

4004 inline UnicodeString&

4005 UnicodeString::replace(int32_t start,

4006 int32_t _length,

4007 const UChar *srcChars,

4008 int32_t srcStart,

4009 int32_t srcLength)

4010 { return doReplace(start, _length, srcChars, srcStart, srcLength); }

4011

4012 inline UnicodeString&

4013 UnicodeString::replace(int32_t start,

4014 int32_t _length,

4015 UChar srcChar)

4016 { return doReplace(start, _length, &srcChar, 0, 1); }

4017

4018 inline UnicodeString&

4019 UnicodeString::replace(int32_t start,

4020 int32_t _length,

4021 UChar32 srcChar) {

4022 UChar buffer[U16_MAX_LENGTH];

4023 int32_t count = 0;

4024 UBool isError = FALSE;

4025 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);

4026 (void) isError;

4027 return doReplace(start, _length, buffer, 0, count);

4028 }

4029

4030 inline UnicodeString&

4031 UnicodeString::replaceBetween(int32_t start,

4032 int32_t limit,

4033 const UnicodeString& srcText)

4034 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }

4035

4036 inline UnicodeString&

4037 UnicodeString::replaceBetween(int32_t start,

4038 int32_t limit,

4039 const UnicodeString& srcText,

4040 int32_t srcStart,

4041 int32_t srcLimit)

4042 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart) ; }

4043

4044 inline UnicodeString&

4045 UnicodeString::findAndReplace(const UnicodeString& oldText,

4046 const UnicodeString& newText)

4047 { return findAndReplace(0, length(), oldText, 0, oldText.length(),

4048 newText, 0, newText.length()); }

4049

4050 inline UnicodeString&

4051 UnicodeString::findAndReplace(int32_t start,

4052 int32_t _length,

4053 const UnicodeString& oldText,

4054 const UnicodeString& newText)

4055 { return findAndReplace(start, _length, oldText, 0, oldText.length(),

4056 newText, 0, newText.length()); }

4057

4058 // ============================

4059 // extract

4060 // ============================

4061 inline void

4062 UnicodeString::doExtract(int32_t start,

4063 int32_t _length,

4064 UnicodeString& target) const

4065 { target.replace(0, target.length(), *this, start, _length); }

4066

4067 inline void

4068 UnicodeString::extract(int32_t start,

4069 int32_t _length,

4070 UChar *target,

4071 int32_t targetStart) const

4072 { doExtract(start, _length, target, targetStart); }

4073

4074 inline void

4075 UnicodeString::extract(int32_t start,

4076 int32_t _length,

4077 UnicodeString& target) const

4078 { doExtract(start, _length, target); }

4079

4080 #if !UCONFIG_NO_CONVERSION

4081

4082 inline int32_t

4083 UnicodeString::extract(int32_t start,

4084 int32_t _length,

4085 char *dst,

4086 const char *codepage) const

4087

4088 {

4089 // This dstSize value will be checked explicitly

4090 #if defined(__GNUC__)

4091 // Ticket #7039: Clip length to the maximum valid length to the end of address able memory given the starting address

4092 // This is only an issue when using GCC and certain optimizations are turned o n.

4093 return extract(start, _length, dst, dst!=0 ? ((dst >= (char)((size_t)-1) - UI NT32_MAX) ? static_cast<unsigned int>((((char)UINT32_MAX) - dst)) : UINT32_MAX) : 0, codepage);

4094 #else

4095 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);

4096 #endif

4097 }

4098

4099 #endif

4100

4101 inline void

4102 UnicodeString::extractBetween(int32_t start,

4103 int32_t limit,

4104 UChar *dst,

4105 int32_t dstStart) const {

4106 pinIndex(start);

4107 pinIndex(limit);

4108 doExtract(start, limit - start, dst, dstStart);

4109 }

4110

4111 inline UnicodeString

4112 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {

4113 return tempSubString(start, limit - start);

4114 }

4115

4116 inline UChar

4117 UnicodeString::doCharAt(int32_t offset) const

4118 {

4119 if((uint32_t)offset < (uint32_t)length()) {

4120 return getArrayStart()[offset];

4121 } else {

4122 return kInvalidUChar;

4123 }

4124 }

4125

4126 inline UChar

4127 UnicodeString::charAt(int32_t offset) const

4128 { return doCharAt(offset); }

4129

4130 inline UChar

4131 UnicodeString::operator[] (int32_t offset) const

4132 { return doCharAt(offset); }

4133

4134 inline UChar32

4135 UnicodeString::char32At(int32_t offset) const

4136 {

4137 int32_t len = length();

4138 if((uint32_t)offset < (uint32_t)len) {

4139 const UChar *array = getArrayStart();

4140 UChar32 c;

4141 U16_GET(array, 0, offset, len, c);

4142 return c;

4143 } else {

4144 return kInvalidUChar;

4145 }

4146 }

4147

4148 inline int32_t

4149 UnicodeString::getChar32Start(int32_t offset) const {

4150 if((uint32_t)offset < (uint32_t)length()) {

4151 const UChar *array = getArrayStart();

4152 U16_SET_CP_START(array, 0, offset);

4153 return offset;

4154 } else {

4155 return 0;

4156 }

4157 }

4158

4159 inline int32_t

4160 UnicodeString::getChar32Limit(int32_t offset) const {

4161 int32_t len = length();

4162 if((uint32_t)offset < (uint32_t)len) {

4163 const UChar *array = getArrayStart();

4164 U16_SET_CP_LIMIT(array, 0, offset, len);

4165 return offset;

4166 } else {

4167 return len;

4168 }

4169 }

4170

4171 inline UBool

4172 UnicodeString::isEmpty() const {

4173 return fShortLength == 0;

4174 }

4175

4176 //========================================

4177 // Write implementation methods

4178 //========================================

4179 inline void

4180 UnicodeString::setLength(int32_t len) {

4181 if(len <= 127) {

4182 fShortLength = (int8_t)len;

4183 } else {

4184 fShortLength = (int8_t)-1;

4185 fUnion.fFields.fLength = len;

4186 }

4187 }

4188

4189 inline void

4190 UnicodeString::setToEmpty() {

4191 fShortLength = 0;

4192 fFlags = kShortString;

4193 }

4194

4195 inline void

4196 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {

4197 setLength(len);

4198 fUnion.fFields.fArray = array;

4199 fUnion.fFields.fCapacity = capacity;

4200 }

4201

4202 inline const UChar *

4203 UnicodeString::getTerminatedBuffer() {

4204 if(!isWritable()) {

4205 return 0;

4206 } else {

4207 UChar *array = getArrayStart();

4208 int32_t len = length();

4209 if(len < getCapacity() && ((fFlags&kRefCounted) == 0 \|\| refCount() == 1)) {

4210 /*

4211 * kRefCounted: Do not write the NUL if the buffer is shared.

4212 * That is mostly safe, except when the length of one copy was modified

4213 * without copy-on-write, e.g., via truncate(newLength) or remove(void).

4214 * Then the NUL would be written into the middle of another copy's string.

4215 */

4216 if(!(fFlags&kBufferIsReadonly)) {

4217 /*

4218 * We must not write to a readonly buffer, but it is known to be

4219 * NUL-terminated if len<capacity.

4220 * A shared, allocated buffer (refCount()>1) must not have its contents

4221 * modified, but the NUL at [len] is beyond the string contents,

4222 * and multiple string objects and threads writing the same NUL into the

4223 * same location is harmless.

4224 * In all other cases, the buffer is fully writable and it is anyway saf e

4225 * to write the NUL.

4226 *

4227 * Note: An earlier version of this code tested whether there is a NUL

4228 * at [len] already, but, while safe, it generated lots of warnings from

4229 * tools like valgrind and Purify.

4230 */

4231 array[len] = 0;

4232 }

4233 return array;

4234 } else if(cloneArrayIfNeeded(len+1)) {

4235 array = getArrayStart();

4236 array[len] = 0;

4237 return array;

4238 } else {

4239 return 0;

4240 }

4241 }

4242 }

4243

4244 inline UnicodeString&

4245 UnicodeString::operator= (UChar ch)

4246 { return doReplace(0, length(), &ch, 0, 1); }

4247

4248 inline UnicodeString&

4249 UnicodeString::operator= (UChar32 ch)

4250 { return replace(0, length(), ch); }

4251

4252 inline UnicodeString&

4253 UnicodeString::setTo(const UnicodeString& srcText,

4254 int32_t srcStart,

4255 int32_t srcLength)

4256 {

4257 unBogus();

4258 return doReplace(0, length(), srcText, srcStart, srcLength);

4259 }

4260

4261 inline UnicodeString&

4262 UnicodeString::setTo(const UnicodeString& srcText,

4263 int32_t srcStart)

4264 {

4265 unBogus();

4266 srcText.pinIndex(srcStart);

4267 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);

4268 }

4269

4270 inline UnicodeString&

4271 UnicodeString::setTo(const UnicodeString& srcText)

4272 {

4273 unBogus();

4274 return doReplace(0, length(), srcText, 0, srcText.length());

4275 }

4276

4277 inline UnicodeString&

4278 UnicodeString::setTo(const UChar *srcChars,

4279 int32_t srcLength)

4280 {

4281 unBogus();

4282 return doReplace(0, length(), srcChars, 0, srcLength);

4283 }

4284

4285 inline UnicodeString&

4286 UnicodeString::setTo(UChar srcChar)

4287 {

4288 unBogus();

4289 return doReplace(0, length(), &srcChar, 0, 1);

4290 }

4291

4292 inline UnicodeString&

4293 UnicodeString::setTo(UChar32 srcChar)

4294 {

4295 unBogus();

4296 return replace(0, length(), srcChar);

4297 }

4298

4299 inline UnicodeString&

4300 UnicodeString::append(const UnicodeString& srcText,

4301 int32_t srcStart,

4302 int32_t srcLength)

4303 { return doReplace(length(), 0, srcText, srcStart, srcLength); }

4304

4305 inline UnicodeString&

4306 UnicodeString::append(const UnicodeString& srcText)

4307 { return doReplace(length(), 0, srcText, 0, srcText.length()); }

4308

4309 inline UnicodeString&

4310 UnicodeString::append(const UChar *srcChars,

4311 int32_t srcStart,

4312 int32_t srcLength)

4313 { return doReplace(length(), 0, srcChars, srcStart, srcLength); }

4314

4315 inline UnicodeString&

4316 UnicodeString::append(const UChar *srcChars,

4317 int32_t srcLength)

4318 { return doReplace(length(), 0, srcChars, 0, srcLength); }

4319

4320 inline UnicodeString&

4321 UnicodeString::append(UChar srcChar)

4322 { return doReplace(length(), 0, &srcChar, 0, 1); }

4323

4324 inline UnicodeString&

4325 UnicodeString::append(UChar32 srcChar) {

4326 UChar buffer[U16_MAX_LENGTH];

4327 int32_t _length = 0;

4328 UBool isError = FALSE;

4329 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);

4330 (void) isError;

4331 return doReplace(length(), 0, buffer, 0, _length);

4332 }

4333

4334 inline UnicodeString&

4335 UnicodeString::operator+= (UChar ch)

4336 { return doReplace(length(), 0, &ch, 0, 1); }

4337

4338 inline UnicodeString&

4339 UnicodeString::operator+= (UChar32 ch) {

4340 return append(ch);

4341 }

4342

4343 inline UnicodeString&

4344 UnicodeString::operator+= (const UnicodeString& srcText)

4345 { return doReplace(length(), 0, srcText, 0, srcText.length()); }

4346

4347 inline UnicodeString&

4348 UnicodeString::insert(int32_t start,

4349 const UnicodeString& srcText,

4350 int32_t srcStart,

4351 int32_t srcLength)

4352 { return doReplace(start, 0, srcText, srcStart, srcLength); }

4353

4354 inline UnicodeString&

4355 UnicodeString::insert(int32_t start,

4356 const UnicodeString& srcText)

4357 { return doReplace(start, 0, srcText, 0, srcText.length()); }

4358

4359 inline UnicodeString&

4360 UnicodeString::insert(int32_t start,

4361 const UChar *srcChars,

4362 int32_t srcStart,

4363 int32_t srcLength)

4364 { return doReplace(start, 0, srcChars, srcStart, srcLength); }

4365

4366 inline UnicodeString&

4367 UnicodeString::insert(int32_t start,

4368 const UChar *srcChars,

4369 int32_t srcLength)

4370 { return doReplace(start, 0, srcChars, 0, srcLength); }

4371

4372 inline UnicodeString&

4373 UnicodeString::insert(int32_t start,

4374 UChar srcChar)

4375 { return doReplace(start, 0, &srcChar, 0, 1); }

4376

4377 inline UnicodeString&

4378 UnicodeString::insert(int32_t start,

4379 UChar32 srcChar)

4380 { return replace(start, 0, srcChar); }

4381

4382

4383 inline UnicodeString&

4384 UnicodeString::remove()

4385 {

4386 // remove() of a bogus string makes the string empty and non-bogus

4387 // we also un-alias a read-only alias to deal with NUL-termination

4388 // issues with getTerminatedBuffer()

4389 if(fFlags & (kIsBogus\|kBufferIsReadonly)) {

4390 setToEmpty();

4391 } else {

4392 fShortLength = 0;

4393 }

4394 return *this;

4395 }

4396

4397 inline UnicodeString&

4398 UnicodeString::remove(int32_t start,

4399 int32_t _length)

4400 {

4401 if(start <= 0 && _length == INT32_MAX) {

4402 // remove(guaranteed everything) of a bogus string makes the string empt y and non-bogus

4403 return remove();

4404 }

4405 return doReplace(start, _length, NULL, 0, 0);

4406 }

4407

4408 inline UnicodeString&

4409 UnicodeString::removeBetween(int32_t start,

4410 int32_t limit)

4411 { return doReplace(start, limit - start, NULL, 0, 0); }

4412

4413 inline UnicodeString &

4414 UnicodeString::retainBetween(int32_t start, int32_t limit) {

4415 truncate(limit);

4416 return doReplace(0, start, NULL, 0, 0);

4417 }

4418

4419 inline UBool

4420 UnicodeString::truncate(int32_t targetLength)

4421 {

4422 if(isBogus() && targetLength == 0) {

4423 // truncate(0) of a bogus string makes the string empty and non-bogus

4424 unBogus();

4425 return FALSE;

4426 } else if((uint32_t)targetLength < (uint32_t)length()) {

4427 setLength(targetLength);

4428 if(fFlags&kBufferIsReadonly) {

4429 fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more

4430 }

4431 return TRUE;

4432 } else {

4433 return FALSE;

4434 }

4435 }

4436

4437 inline UnicodeString&

4438 UnicodeString::reverse()

4439 { return doReverse(0, length()); }

4440

4441 inline UnicodeString&

4442 UnicodeString::reverse(int32_t start,

4443 int32_t _length)

4444 { return doReverse(start, _length); }

4445

4446 U_NAMESPACE_END

4447

4448 #endif

OLD	NEW

« no previous file with comments | « public/common/unicode/uniset.h ('k') | public/common/unicode/unorm.h » ('j') | no next file with comments »