OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * |
| 4 * Copyright (C) 2004-2010, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ******************************************************************************* |
| 8 * file name: utext.h |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * created on: 2004oct06 |
| 14 * created by: Markus W. Scherer |
| 15 */ |
| 16 |
| 17 #ifndef __UTEXT_H__ |
| 18 #define __UTEXT_H__ |
| 19 |
| 20 /** |
| 21 * \file |
| 22 * \brief C API: Abstract Unicode Text API |
| 23 * |
| 24 * The Text Access API provides a means to allow text that is stored in alternat
ive |
| 25 * formats to work with ICU services. ICU normally operates on text that is |
| 26 * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type |
| 27 * UnicodeString for C++ APIs. |
| 28 * |
| 29 * ICU Text Access allows other formats, such as UTF-8 or non-contiguous |
| 30 * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU servic
es. |
| 31 * |
| 32 * There are three general classes of usage for UText: |
| 33 * |
| 34 * Application Level Use. This is the simplest usage - applications would |
| 35 * use one of the utext_open() functions on their input text, and pass |
| 36 * the resulting UText to the desired ICU service. |
| 37 * |
| 38 * Second is usage in ICU Services, such as break iteration, that will need
to |
| 39 * operate on input presented to them as a UText. These implementations |
| 40 * will need to use the iteration and related UText functions to gain |
| 41 * access to the actual text. |
| 42 * |
| 43 * The third class of UText users are "text providers." These are the |
| 44 * UText implementations for the various text storage formats. An applicati
on |
| 45 * or system with a unique text storage format can implement a set of |
| 46 * UText provider functions for that format, which will then allow |
| 47 * ICU services to operate on that format. |
| 48 * |
| 49 * |
| 50 * <em>Iterating over text</em> |
| 51 * |
| 52 * Here is sample code for a forward iteration over the contents of a UText |
| 53 * |
| 54 * \code |
| 55 * UChar32 c; |
| 56 * UText *ut = whatever(); |
| 57 * |
| 58 * for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) { |
| 59 * // do whatever with the codepoint c here. |
| 60 * } |
| 61 * \endcode |
| 62 * |
| 63 * And here is similar code to iterate in the reverse direction, from the end |
| 64 * of the text towards the beginning. |
| 65 * |
| 66 * \code |
| 67 * UChar32 c; |
| 68 * UText *ut = whatever(); |
| 69 * int textLength = utext_nativeLength(ut); |
| 70 * for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut))
{ |
| 71 * // do whatever with the codepoint c here. |
| 72 * } |
| 73 * \endcode |
| 74 * |
| 75 * <em>Characters and Indexing</em> |
| 76 * |
| 77 * Indexing into text by UText functions is nearly always in terms of the native |
| 78 * indexing of the underlying text storage. The storage format could be UTF-8 |
| 79 * or UTF-32, for example. When coding to the UText access API, no assumptions |
| 80 * can be made regarding the size of characters, or how far an index |
| 81 * may move when iterating between characters. |
| 82 * |
| 83 * All indices supplied to UText functions are pinned to the length of the |
| 84 * text. An out-of-bounds index is not considered to be an error, but is |
| 85 * adjusted to be in the range 0 <= index <= length of input text. |
| 86 * |
| 87 * |
| 88 * When an index position is returned from a UText function, it will be |
| 89 * a native index to the underlying text. In the case of multi-unit characters, |
| 90 * it will always refer to the first position of the character, |
| 91 * never to the interior. This is essentially the same thing as saying that |
| 92 * a returned index will always point to a boundary between characters. |
| 93 * |
| 94 * When a native index is supplied to a UText function, all indices that |
| 95 * refer to any part of a multi-unit character representation are considered |
| 96 * to be equivalent. In the case of multi-unit characters, an incoming index |
| 97 * will be logically normalized to refer to the start of the character. |
| 98 * |
| 99 * It is possible to test whether a native index is on a code point boundary |
| 100 * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex(). |
| 101 * If the index is returned unchanged, it was on a code point boundary. If |
| 102 * an adjusted index is returned, the original index referred to the |
| 103 * interior of a character. |
| 104 * |
| 105 * <em>Conventions for calling UText functions</em> |
| 106 * |
| 107 * Most UText access functions have as their first parameter a (UText *) pointer
, |
| 108 * which specifies the UText to be used. Unless otherwise noted, the |
| 109 * pointer must refer to a valid, open UText. Attempting to |
| 110 * use a closed UText or passing a NULL pointer is a programming error and |
| 111 * will produce undefined results or NULL pointer exceptions. |
| 112 * |
| 113 * The UText_Open family of functions can either open an existing (closed) |
| 114 * UText, or heap allocate a new UText. Here is sample code for creating |
| 115 * a stack-allocated UText. |
| 116 * |
| 117 * \code |
| 118 * char *s = whatever(); // A utf-8 string |
| 119 * U_ErrorCode status = U_ZERO_ERROR; |
| 120 * UText ut = UTEXT_INITIALIZER; |
| 121 * utext_openUTF8(ut, s, -1, &status); |
| 122 * if (U_FAILURE(status)) { |
| 123 * // error handling |
| 124 * } else { |
| 125 * // work with the UText |
| 126 * } |
| 127 * \endcode |
| 128 * |
| 129 * Any existing UText passed to an open function _must_ have been initialized, |
| 130 * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated |
| 131 * by an open function. Passing NULL will cause the open function to |
| 132 * heap-allocate and fully initialize a new UText. |
| 133 * |
| 134 */ |
| 135 |
| 136 |
| 137 |
| 138 #include "unicode/utypes.h" |
| 139 #include "unicode/uchar.h" |
| 140 #if U_SHOW_CPLUSPLUS_API |
| 141 #include "unicode/localpointer.h" |
| 142 #include "unicode/rep.h" |
| 143 #include "unicode/unistr.h" |
| 144 #include "unicode/chariter.h" |
| 145 #endif |
| 146 |
| 147 |
| 148 U_CDECL_BEGIN |
| 149 |
| 150 struct UText; |
| 151 typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */ |
| 152 |
| 153 |
| 154 /*******************************************************************************
******** |
| 155 * |
| 156 * C Functions for creating UText wrappers around various kinds of text string
s. |
| 157 * |
| 158 *******************************************************************************
*********/ |
| 159 |
| 160 |
| 161 /** |
| 162 * Close function for UText instances. |
| 163 * Cleans up, releases any resources being held by an open UText. |
| 164 * <p> |
| 165 * If the UText was originally allocated by one of the utext_open functions, |
| 166 * the storage associated with the utext will also be freed. |
| 167 * If the UText storage originated with the application, as it would with |
| 168 * a local or static instance, the storage will not be deleted. |
| 169 * |
| 170 * An open UText can be reset to refer to new string by using one of the utex
t_open() |
| 171 * functions without first closing the UText. |
| 172 * |
| 173 * @param ut The UText to be closed. |
| 174 * @return NULL if the UText struct was deleted by the close. If the UText
struct |
| 175 * was originally provided by the caller to the open function, it is |
| 176 * returned by this function, and may be safely used again in |
| 177 * a subsequent utext_open. |
| 178 * |
| 179 * @stable ICU 3.4 |
| 180 */ |
| 181 U_STABLE UText * U_EXPORT2 |
| 182 utext_close(UText *ut); |
| 183 |
| 184 #if U_SHOW_CPLUSPLUS_API |
| 185 |
| 186 U_NAMESPACE_BEGIN |
| 187 |
| 188 /** |
| 189 * \class LocalUTextPointer |
| 190 * "Smart pointer" class, closes a UText via utext_close(). |
| 191 * For most methods see the LocalPointerBase base class. |
| 192 * |
| 193 * @see LocalPointerBase |
| 194 * @see LocalPointer |
| 195 * @stable ICU 4.4 |
| 196 */ |
| 197 U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close); |
| 198 |
| 199 U_NAMESPACE_END |
| 200 |
| 201 #endif |
| 202 |
| 203 /** |
| 204 * Open a read-only UText implementation for UTF-8 strings. |
| 205 * |
| 206 * \htmlonly |
| 207 * Any invalid UTF-8 in the input will be handled in this way: |
| 208 * a sequence of bytes that has the form of a truncated, but otherwise valid, |
| 209 * UTF-8 sequence will be replaced by a single unicode replacement character, \u
FFFD. |
| 210 * Any other illegal bytes will each be replaced by a \uFFFD. |
| 211 * \endhtmlonly |
| 212 * |
| 213 * @param ut Pointer to a UText struct. If NULL, a new UText will be create
d. |
| 214 * If non-NULL, must refer to an initialized UText struct, which w
ill then |
| 215 * be reset to reference the specified UTF-8 string. |
| 216 * @param s A UTF-8 string. Must not be NULL. |
| 217 * @param length The length of the UTF-8 string in bytes, or -1 if the string is |
| 218 * zero terminated. |
| 219 * @param status Errors are returned here. |
| 220 * @return A pointer to the UText. If a pre-allocated UText was provided,
it |
| 221 * will always be used and returned. |
| 222 * @stable ICU 3.4 |
| 223 */ |
| 224 U_STABLE UText * U_EXPORT2 |
| 225 utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status); |
| 226 |
| 227 |
| 228 /** |
| 229 * Open a read-only UText for UChar * string. |
| 230 * |
| 231 * @param ut Pointer to a UText struct. If NULL, a new UText will be create
d. |
| 232 * If non-NULL, must refer to an initialized UText struct, which w
ill then |
| 233 * be reset to reference the specified UChar string. |
| 234 * @param s A UChar (UTF-16) string |
| 235 * @param length The number of UChars in the input string, or -1 if the string i
s |
| 236 * zero terminated. |
| 237 * @param status Errors are returned here. |
| 238 * @return A pointer to the UText. If a pre-allocated UText was provided,
it |
| 239 * will always be used and returned. |
| 240 * @stable ICU 3.4 |
| 241 */ |
| 242 U_STABLE UText * U_EXPORT2 |
| 243 utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status); |
| 244 |
| 245 |
| 246 #if U_SHOW_CPLUSPLUS_API |
| 247 /** |
| 248 * Open a writable UText for a non-const UnicodeString. |
| 249 * |
| 250 * @param ut Pointer to a UText struct. If NULL, a new UText will be creat
ed. |
| 251 * If non-NULL, must refer to an initialized UText struct, which
will then |
| 252 * be reset to reference the specified input string. |
| 253 * @param s A UnicodeString. |
| 254 * @param status Errors are returned here. |
| 255 * @return Pointer to the UText. If a UText was supplied as input, this |
| 256 * will always be used and returned. |
| 257 * @stable ICU 3.4 |
| 258 */ |
| 259 U_STABLE UText * U_EXPORT2 |
| 260 utext_openUnicodeString(UText *ut, U_NAMESPACE_QUALIFIER UnicodeString *s, UErro
rCode *status); |
| 261 |
| 262 |
| 263 /** |
| 264 * Open a UText for a const UnicodeString. The resulting UText will not be wri
table. |
| 265 * |
| 266 * @param ut Pointer to a UText struct. If NULL, a new UText will be created
. |
| 267 * If non-NULL, must refer to an initialized UText struct, which w
ill then |
| 268 * be reset to reference the specified input string. |
| 269 * @param s A const UnicodeString to be wrapped. |
| 270 * @param status Errors are returned here. |
| 271 * @return Pointer to the UText. If a UText was supplied as input, this |
| 272 * will always be used and returned. |
| 273 * @stable ICU 3.4 |
| 274 */ |
| 275 U_STABLE UText * U_EXPORT2 |
| 276 utext_openConstUnicodeString(UText *ut, const U_NAMESPACE_QUALIFIER UnicodeStrin
g *s, UErrorCode *status); |
| 277 |
| 278 |
| 279 /** |
| 280 * Open a writable UText implementation for an ICU Replaceable object. |
| 281 * @param ut Pointer to a UText struct. If NULL, a new UText will be created
. |
| 282 * If non-NULL, must refer to an already existing UText, which wil
l then |
| 283 * be reset to reference the specified replaceable text. |
| 284 * @param rep A Replaceable text object. |
| 285 * @param status Errors are returned here. |
| 286 * @return Pointer to the UText. If a UText was supplied as input, this |
| 287 * will always be used and returned. |
| 288 * @see Replaceable |
| 289 * @stable ICU 3.4 |
| 290 */ |
| 291 U_STABLE UText * U_EXPORT2 |
| 292 utext_openReplaceable(UText *ut, U_NAMESPACE_QUALIFIER Replaceable *rep, UErrorC
ode *status); |
| 293 |
| 294 /** |
| 295 * Open a UText implementation over an ICU CharacterIterator. |
| 296 * @param ut Pointer to a UText struct. If NULL, a new UText will be created
. |
| 297 * If non-NULL, must refer to an already existing UText, which wil
l then |
| 298 * be reset to reference the specified replaceable text. |
| 299 * @param ci A Character Iterator. |
| 300 * @param status Errors are returned here. |
| 301 * @return Pointer to the UText. If a UText was supplied as input, this |
| 302 * will always be used and returned. |
| 303 * @see Replaceable |
| 304 * @stable ICU 3.4 |
| 305 */ |
| 306 U_STABLE UText * U_EXPORT2 |
| 307 utext_openCharacterIterator(UText *ut, U_NAMESPACE_QUALIFIER CharacterIterator *
ic, UErrorCode *status); |
| 308 |
| 309 #endif |
| 310 |
| 311 |
| 312 /** |
| 313 * Clone a UText. This is much like opening a UText where the source text is
itself |
| 314 * another UText. |
| 315 * |
| 316 * A deep clone will copy both the UText data structures and the underlying te
xt. |
| 317 * The original and cloned UText will operate completely independently; modifi
cations |
| 318 * made to the text in one will not affect the other. Text providers are not |
| 319 * required to support deep clones. The user of clone() must check the status
return |
| 320 * and be prepared to handle failures. |
| 321 * |
| 322 * The standard UText implementations for UTF8, UChar *, UnicodeString and |
| 323 * Replaceable all support deep cloning. |
| 324 * |
| 325 * The UText returned from a deep clone will be writable, assuming that the te
xt |
| 326 * provider is able to support writing, even if the source UText had been made |
| 327 * non-writable by means of UText_freeze(). |
| 328 * |
| 329 * A shallow clone replicates only the UText data structures; it does not make |
| 330 * a copy of the underlying text. Shallow clones can be used as an efficient
way to |
| 331 * have multiple iterators active in a single text string that is not being |
| 332 * modified. |
| 333 * |
| 334 * A shallow clone operation will not fail, barring truly exceptional conditio
ns such |
| 335 * as memory allocation failures. |
| 336 * |
| 337 * Shallow UText clones should be avoided if the UText functions that modify t
he |
| 338 * text are expected to be used, either on the original or the cloned UText. |
| 339 * Any such modifications can cause unpredictable behavior. Read Only |
| 340 * shallow clones provide some protection against errors of this type by |
| 341 * disabling text modification via the cloned UText. |
| 342 * |
| 343 * A shallow clone made with the readOnly parameter == FALSE will preserve the
|
| 344 * utext_isWritable() state of the source object. Note, however, that |
| 345 * write operations must be avoided while more than one UText exists that refe
r |
| 346 * to the same underlying text. |
| 347 * |
| 348 * A UText and its clone may be safely concurrently accessed by separate threa
ds. |
| 349 * This is true for read access only with shallow clones, and for both read an
d |
| 350 * write access with deep clones. |
| 351 * It is the responsibility of the Text Provider to ensure that this thread sa
fety |
| 352 * constraint is met. |
| 353 * |
| 354 * @param dest A UText struct to be filled in with the result of the clone o
peration, |
| 355 * or NULL if the clone function should heap-allocate a new UTex
t struct. |
| 356 * If non-NULL, must refer to an already existing UText, which w
ill then |
| 357 * be reset to become the clone. |
| 358 * @param src The UText to be cloned. |
| 359 * @param deep TRUE to request a deep clone, FALSE for a shallow clone. |
| 360 * @param readOnly TRUE to request that the cloned UText have read only access
to the |
| 361 * underlying text. |
| 362 |
| 363 * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERR
OR |
| 364 * will be returned if the text provider is unable to clone the |
| 365 * original text. |
| 366 * @return The newly created clone, or NULL if the clone operation faile
d. |
| 367 * @stable ICU 3.4 |
| 368 */ |
| 369 U_STABLE UText * U_EXPORT2 |
| 370 utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCod
e *status); |
| 371 |
| 372 |
| 373 /** |
| 374 * Compare two UText objects for equality. |
| 375 * UTexts are equal if they are iterating over the same text, and |
| 376 * have the same iteration position within the text. |
| 377 * If either or both of the parameters are NULL, the comparison is FALSE. |
| 378 * |
| 379 * @param a The first of the two UTexts to compare. |
| 380 * @param b The other UText to be compared. |
| 381 * @return TRUE if the two UTexts are equal. |
| 382 * @stable ICU 3.6 |
| 383 */ |
| 384 U_STABLE UBool U_EXPORT2 |
| 385 utext_equals(const UText *a, const UText *b); |
| 386 |
| 387 |
| 388 /***************************************************************************** |
| 389 * |
| 390 * Functions to work with the text represeted by a UText wrapper |
| 391 * |
| 392 *****************************************************************************/ |
| 393 |
| 394 /** |
| 395 * Get the length of the text. Depending on the characteristics |
| 396 * of the underlying text representation, this may be expensive. |
| 397 * @see utext_isLengthExpensive() |
| 398 * |
| 399 * |
| 400 * @param ut the text to be accessed. |
| 401 * @return the length of the text, expressed in native units. |
| 402 * |
| 403 * @stable ICU 3.4 |
| 404 */ |
| 405 U_STABLE int64_t U_EXPORT2 |
| 406 utext_nativeLength(UText *ut); |
| 407 |
| 408 /** |
| 409 * Return TRUE if calculating the length of the text could be expensive. |
| 410 * Finding the length of NUL terminated strings is considered to be expensive. |
| 411 * |
| 412 * Note that the value of this function may change |
| 413 * as the result of other operations on a UText. |
| 414 * Once the length of a string has been discovered, it will no longer |
| 415 * be expensive to report it. |
| 416 * |
| 417 * @param ut the text to be accessed. |
| 418 * @return TRUE if determining the length of the text could be time consuming. |
| 419 * @stable ICU 3.4 |
| 420 */ |
| 421 U_STABLE UBool U_EXPORT2 |
| 422 utext_isLengthExpensive(const UText *ut); |
| 423 |
| 424 /** |
| 425 * Returns the code point at the requested index, |
| 426 * or U_SENTINEL (-1) if it is out of bounds. |
| 427 * |
| 428 * If the specified index points to the interior of a multi-unit |
| 429 * character - one of the trail bytes of a UTF-8 sequence, for example - |
| 430 * the complete code point will be returned. |
| 431 * |
| 432 * The iteration position will be set to the start of the returned code point. |
| 433 * |
| 434 * This function is roughly equivalent to the the sequence |
| 435 * utext_setNativeIndex(index); |
| 436 * utext_current32(); |
| 437 * (There is a subtle difference if the index is out of bounds by being less tha
n zero - |
| 438 * utext_setNativeIndex(negative value) sets the index to zero, after which utex
t_current() |
| 439 * will return the char at zero. utext_char32At(negative index), on the other h
and, will |
| 440 * return the U_SENTINEL value of -1.) |
| 441 * |
| 442 * @param ut the text to be accessed |
| 443 * @param nativeIndex the native index of the character to be accessed. If the
index points |
| 444 * to other than the first unit of a multi-unit character, it will be adj
usted |
| 445 * to the start of the character. |
| 446 * @return the code point at the specified index. |
| 447 * @stable ICU 3.4 |
| 448 */ |
| 449 U_STABLE UChar32 U_EXPORT2 |
| 450 utext_char32At(UText *ut, int64_t nativeIndex); |
| 451 |
| 452 |
| 453 /** |
| 454 * |
| 455 * Get the code point at the current iteration position, |
| 456 * or U_SENTINEL (-1) if the iteration has reached the end of |
| 457 * the input text. |
| 458 * |
| 459 * @param ut the text to be accessed. |
| 460 * @return the Unicode code point at the current iterator position. |
| 461 * @stable ICU 3.4 |
| 462 */ |
| 463 U_STABLE UChar32 U_EXPORT2 |
| 464 utext_current32(UText *ut); |
| 465 |
| 466 |
| 467 /** |
| 468 * Get the code point at the current iteration position of the UText, and |
| 469 * advance the position to the first index following the character. |
| 470 * |
| 471 * If the position is at the end of the text (the index following |
| 472 * the last character, which is also the length of the text), |
| 473 * return U_SENTINEL (-1) and do not advance the index. |
| 474 * |
| 475 * This is a post-increment operation. |
| 476 * |
| 477 * An inline macro version of this function, UTEXT_NEXT32(), |
| 478 * is available for performance critical use. |
| 479 * |
| 480 * @param ut the text to be accessed. |
| 481 * @return the Unicode code point at the iteration position. |
| 482 * @see UTEXT_NEXT32 |
| 483 * @stable ICU 3.4 |
| 484 */ |
| 485 U_STABLE UChar32 U_EXPORT2 |
| 486 utext_next32(UText *ut); |
| 487 |
| 488 |
| 489 /** |
| 490 * Move the iterator position to the character (code point) whose |
| 491 * index precedes the current position, and return that character. |
| 492 * This is a pre-decrement operation. |
| 493 * |
| 494 * If the initial position is at the start of the text (index of 0) |
| 495 * return U_SENTINEL (-1), and leave the position unchanged. |
| 496 * |
| 497 * An inline macro version of this function, UTEXT_PREVIOUS32(), |
| 498 * is available for performance critical use. |
| 499 * |
| 500 * @param ut the text to be accessed. |
| 501 * @return the previous UChar32 code point, or U_SENTINEL (-1) |
| 502 * if the iteration has reached the start of the text. |
| 503 * @see UTEXT_PREVIOUS32 |
| 504 * @stable ICU 3.4 |
| 505 */ |
| 506 U_STABLE UChar32 U_EXPORT2 |
| 507 utext_previous32(UText *ut); |
| 508 |
| 509 |
| 510 /** |
| 511 * Set the iteration index and return the code point at that index. |
| 512 * Leave the iteration index at the start of the following code point. |
| 513 * |
| 514 * This function is the most efficient and convenient way to |
| 515 * begin a forward iteration. The results are identical to the those |
| 516 * from the sequence |
| 517 * \code |
| 518 * utext_setIndex(); |
| 519 * utext_next32(); |
| 520 * \endcode |
| 521 * |
| 522 * @param ut the text to be accessed. |
| 523 * @param nativeIndex Iteration index, in the native units of the text provide
r. |
| 524 * @return Code point which starts at or before index, |
| 525 * or U_SENTINEL (-1) if it is out of bounds. |
| 526 * @stable ICU 3.4 |
| 527 */ |
| 528 U_STABLE UChar32 U_EXPORT2 |
| 529 utext_next32From(UText *ut, int64_t nativeIndex); |
| 530 |
| 531 |
| 532 |
| 533 /** |
| 534 * Set the iteration index, and return the code point preceding the |
| 535 * one specified by the initial index. Leave the iteration position |
| 536 * at the start of the returned code point. |
| 537 * |
| 538 * This function is the most efficient and convenient way to |
| 539 * begin a backwards iteration. |
| 540 * |
| 541 * @param ut the text to be accessed. |
| 542 * @param nativeIndex Iteration index in the native units of the text provider. |
| 543 * @return Code point preceding the one at the initial index, |
| 544 * or U_SENTINEL (-1) if it is out of bounds. |
| 545 * |
| 546 * @stable ICU 3.4 |
| 547 */ |
| 548 U_STABLE UChar32 U_EXPORT2 |
| 549 utext_previous32From(UText *ut, int64_t nativeIndex); |
| 550 |
| 551 /** |
| 552 * Get the current iterator position, which can range from 0 to |
| 553 * the length of the text. |
| 554 * The position is a native index into the input text, in whatever format it |
| 555 * may have (possibly UTF-8 for example), and may not always be the same as |
| 556 * the corresponding UChar (UTF-16) index. |
| 557 * The returned position will always be aligned to a code point boundary. |
| 558 * |
| 559 * @param ut the text to be accessed. |
| 560 * @return the current index position, in the native units of the text provider
. |
| 561 * @stable ICU 3.4 |
| 562 */ |
| 563 U_STABLE int64_t U_EXPORT2 |
| 564 utext_getNativeIndex(const UText *ut); |
| 565 |
| 566 /** |
| 567 * Set the current iteration position to the nearest code point |
| 568 * boundary at or preceding the specified index. |
| 569 * The index is in the native units of the original input text. |
| 570 * If the index is out of range, it will be pinned to be within |
| 571 * the range of the input text. |
| 572 * <p> |
| 573 * It will usually be more efficient to begin an iteration |
| 574 * using the functions utext_next32From() or utext_previous32From() |
| 575 * rather than setIndex(). |
| 576 * <p> |
| 577 * Moving the index position to an adjacent character is best done |
| 578 * with utext_next32(), utext_previous32() or utext_moveIndex32(). |
| 579 * Attempting to do direct arithmetic on the index position is |
| 580 * complicated by the fact that the size (in native units) of a |
| 581 * character depends on the underlying representation of the character |
| 582 * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not |
| 583 * easily knowable. |
| 584 * |
| 585 * @param ut the text to be accessed. |
| 586 * @param nativeIndex the native unit index of the new iteration position. |
| 587 * @stable ICU 3.4 |
| 588 */ |
| 589 U_STABLE void U_EXPORT2 |
| 590 utext_setNativeIndex(UText *ut, int64_t nativeIndex); |
| 591 |
| 592 /** |
| 593 * Move the iterator postion by delta code points. The number of code points |
| 594 * is a signed number; a negative delta will move the iterator backwards, |
| 595 * towards the start of the text. |
| 596 * <p> |
| 597 * The index is moved by <code>delta</code> code points |
| 598 * forward or backward, but no further backward than to 0 and |
| 599 * no further forward than to utext_nativeLength(). |
| 600 * The resulting index value will be in between 0 and length, inclusive. |
| 601 * |
| 602 * @param ut the text to be accessed. |
| 603 * @param delta the signed number of code points to move the iteration position. |
| 604 * @return TRUE if the position could be moved the requested number of positions
while |
| 605 * staying within the range [0 - text length]. |
| 606 * @stable ICU 3.4 |
| 607 */ |
| 608 U_STABLE UBool U_EXPORT2 |
| 609 utext_moveIndex32(UText *ut, int32_t delta); |
| 610 |
| 611 /** |
| 612 * Get the native index of the character preceeding the current position. |
| 613 * If the iteration position is already at the start of the text, zero |
| 614 * is returned. |
| 615 * The value returned is the same as that obtained from the following sequence, |
| 616 * but without the side effect of changing the iteration position. |
| 617 * |
| 618 * \code |
| 619 * UText *ut = whatever; |
| 620 * ... |
| 621 * utext_previous(ut) |
| 622 * utext_getNativeIndex(ut); |
| 623 * \endcode |
| 624 * |
| 625 * This function is most useful during forwards iteration, where it will get the |
| 626 * native index of the character most recently returned from utext_next(). |
| 627 * |
| 628 * @param ut the text to be accessed |
| 629 * @return the native index of the character preceeding the current index positi
on, |
| 630 * or zero if the current position is at the start of the text. |
| 631 * @stable ICU 3.6 |
| 632 */ |
| 633 U_STABLE int64_t U_EXPORT2 |
| 634 utext_getPreviousNativeIndex(UText *ut); |
| 635 |
| 636 |
| 637 /** |
| 638 * |
| 639 * Extract text from a UText into a UChar buffer. The range of text to be extra
cted |
| 640 * is specified in the native indices of the UText provider. These may not nece
ssarily |
| 641 * be UTF-16 indices. |
| 642 * <p> |
| 643 * The size (number of 16 bit UChars) of the data to be extracted is returned.
The |
| 644 * full number of UChars is returned, even when the extracted text is truncated |
| 645 * because the specified buffer size is too small. |
| 646 * <p> |
| 647 * The extracted string will (if you are a user) / must (if you are a text provi
der) |
| 648 * be NUL-terminated if there is sufficient space in the destination buffer. Th
is |
| 649 * terminating NUL is not included in the returned length. |
| 650 * <p> |
| 651 * The iteration index is left at the position following the last extracted char
acter. |
| 652 * |
| 653 * @param ut the UText from which to extract data. |
| 654 * @param nativeStart the native index of the first character to extract.\ |
| 655 * If the specified index is out of range, |
| 656 * it will be pinned to to be within 0 <= index <= textLength |
| 657 * @param nativeLimit the native string index of the position following the las
t |
| 658 * character to extract. If the specified index is out of range, |
| 659 * it will be pinned to to be within 0 <= index <= textLength. |
| 660 * nativeLimit must be >= nativeStart. |
| 661 * @param dest the UChar (UTF-16) buffer into which the extracted text is plac
ed |
| 662 * @param destCapacity The size, in UChars, of the destination buffer. May be
zero |
| 663 * for precomputing the required size. |
| 664 * @param status receives any error status. |
| 665 * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the
|
| 666 * buffer was too small. Returns number of UChars for preflighting. |
| 667 * @return Number of UChars in the data to be extracted. Does not include a tra
iling NUL. |
| 668 * |
| 669 * @stable ICU 3.4 |
| 670 */ |
| 671 U_STABLE int32_t U_EXPORT2 |
| 672 utext_extract(UText *ut, |
| 673 int64_t nativeStart, int64_t nativeLimit, |
| 674 UChar *dest, int32_t destCapacity, |
| 675 UErrorCode *status); |
| 676 |
| 677 |
| 678 /** |
| 679 * Compare two UTexts (binary order). The comparison begins at each source text'
s |
| 680 * iteration position. The iteration position of each UText will be left followi
ng |
| 681 * the last character compared. |
| 682 * |
| 683 * The comparison is done in code point order; unlike u_strCompare, you |
| 684 * cannot choose to use code unit order. This is because the characters |
| 685 * in a UText are accessed one code point at a time, and may not be from a UTF-1
6 |
| 686 * context. |
| 687 * |
| 688 * This functions works with strings of different explicitly specified lengths |
| 689 * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. |
| 690 * A length argument of -1 signifies that as much of the string should be used a
s |
| 691 * is necessary to compare with the other string. If both length arguments are -
1, |
| 692 * the entire remaining portionss of both strings are used. |
| 693 * |
| 694 * @param s1 First source string. |
| 695 * @param length1 Length of first source string in UTF-32 code points. |
| 696 * |
| 697 * @param s2 Second source string. |
| 698 * @param length2 Length of second source string in UTF-32 code points. |
| 699 * |
| 700 * @return <0 or 0 or >0 as usual for string comparisons |
| 701 * |
| 702 * @internal ICU 4.4 technology preview |
| 703 */ |
| 704 U_INTERNAL int32_t U_EXPORT2 |
| 705 utext_compare(UText *s1, int32_t length1, |
| 706 UText *s2, int32_t length2); |
| 707 |
| 708 /** |
| 709 * Compare two UTexts (binary order). The comparison begins at each source text'
s |
| 710 * iteration position. The iteration position of each UText will be left followi
ng |
| 711 * the last character compared. This method differs from utext_compare in that |
| 712 * it accepts native limits rather than lengths for each string. |
| 713 * |
| 714 * The comparison is done in code point order; unlike u_strCompare, you |
| 715 * cannot choose to use code unit order. This is because the characters |
| 716 * in a UText are accessed one code point at a time, and may not be from a UTF-1
6 |
| 717 * context. |
| 718 * |
| 719 * This functions works with strings of different explicitly specified lengths |
| 720 * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. |
| 721 * A limit argument of -1 signifies that as much of the string should be used as |
| 722 * is necessary to compare with the other string. If both limit arguments are -1
, |
| 723 * the entire remaining portionss of both strings are used. |
| 724 * |
| 725 * @param s1 First source string. |
| 726 * @param limit1 Native index of the last character in the first source string t
o be considered. |
| 727 * |
| 728 * @param s2 Second source string. |
| 729 * @param limit2 Native index of the last character in the second source string
to be considered. |
| 730 * |
| 731 * @return <0 or 0 or >0 as usual for string comparisons |
| 732 * |
| 733 * @internal ICU 4.4 technology preview |
| 734 */ |
| 735 U_INTERNAL int32_t U_EXPORT2 |
| 736 utext_compareNativeLimit(UText *s1, int64_t limit1, |
| 737 UText *s2, int64_t limit2); |
| 738 |
| 739 /** |
| 740 * Compare two UTexts case-insensitively using full case folding. The comparison |
| 741 * begins at each source text's iteration position. The iteration position of ea
ch |
| 742 * UText will be left following the last character compared. |
| 743 * |
| 744 * The comparison is done in code point order; this is because the characters |
| 745 * in a UText are accessed one code point at a time, and may not be from a UTF-1
6 |
| 746 * context. |
| 747 * |
| 748 * This functions works with strings of different explicitly specified lengths |
| 749 * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. |
| 750 * A length argument of -1 signifies that as much of the string should be used a
s |
| 751 * is necessary to compare with the other string. If both length arguments are -
1, |
| 752 * the entire remaining portionss of both strings are used. |
| 753 * |
| 754 * @param s1 First source string. |
| 755 * @param length1 Length of first source string in UTF-32 code points. |
| 756 * |
| 757 * @param s2 Second source string. |
| 758 * @param length2 Length of second source string in UTF-32 code points. |
| 759 * |
| 760 * @param options A bit set of options: |
| 761 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: |
| 762 * Comparison in code point order with default case folding. |
| 763 * |
| 764 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I |
| 765 * |
| 766 * @param pErrorCode Must be a valid pointer to an error code value, |
| 767 * which must not indicate a failure before the function call. |
| 768 * |
| 769 * @return <0 or 0 or >0 as usual for string comparisons |
| 770 * |
| 771 * @internal ICU 4.4 technology preview |
| 772 */ |
| 773 U_INTERNAL int32_t U_EXPORT2 |
| 774 utext_caseCompare(UText *s1, int32_t length1, |
| 775 UText *s2, int32_t length2, |
| 776 uint32_t options, UErrorCode *pErrorCode); |
| 777 |
| 778 /** |
| 779 * Compare two UTexts case-insensitively using full case folding. The comparison |
| 780 * begins at each source text's iteration position. The iteration position of ea
ch |
| 781 * UText will be left following the last character compared. This method differs
from |
| 782 * utext_caseCompare in that it accepts native limits rather than lengths for ea
ch |
| 783 * string. |
| 784 * |
| 785 * The comparison is done in code point order; this is because the characters |
| 786 * in a UText are accessed one code point at a time, and may not be from a UTF-1
6 |
| 787 * context. |
| 788 * |
| 789 * This functions works with strings of different explicitly specified lengths |
| 790 * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. |
| 791 * A limit argument of -1 signifies that as much of the string should be used as |
| 792 * is necessary to compare with the other string. If both length arguments are -
1, |
| 793 * the entire remaining portionss of both strings are used. |
| 794 * |
| 795 * @param s1 First source string. |
| 796 * @param limit1 Native index of the last character in the first source string t
o be considered. |
| 797 * |
| 798 * @param s2 Second source string. |
| 799 * @param limit2 Native index of the last character in the second source string
to be considered. |
| 800 * |
| 801 * @param options A bit set of options: |
| 802 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: |
| 803 * Comparison in code point order with default case folding. |
| 804 * |
| 805 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I |
| 806 * |
| 807 * @param pErrorCode Must be a valid pointer to an error code value, |
| 808 * which must not indicate a failure before the function call. |
| 809 * |
| 810 * @return <0 or 0 or >0 as usual for string comparisons |
| 811 * |
| 812 * @internal ICU 4.4 technology preview |
| 813 */ |
| 814 U_INTERNAL int32_t U_EXPORT2 |
| 815 utext_caseCompareNativeLimit(UText *s1, int64_t limit1, |
| 816 UText *s2, int64_t limit2, |
| 817 uint32_t options, UErrorCode *pErrorCode); |
| 818 |
| 819 |
| 820 /*******************************************************************************
***** |
| 821 * |
| 822 * #define inline versions of selected performance-critical text access functio
ns |
| 823 * Caution: do not use auto increment++ or decrement-- expressions |
| 824 * as parameters to these macros. |
| 825 * |
| 826 * For most use, where there is no extreme performance constraint, the |
| 827 * normal, non-inline functions are a better choice. The resulting cod
e |
| 828 * will be smaller, and, if the need ever arises, easier to debug. |
| 829 * |
| 830 * These are implemented as #defines rather than real functions |
| 831 * because there is no fully portable way to do inline functions in pla
in C. |
| 832 * |
| 833 *******************************************************************************
*****/ |
| 834 |
| 835 /** |
| 836 * inline version of utext_current32(), for performance-critical situations. |
| 837 * |
| 838 * Get the code point at the current iteration position of the UText. |
| 839 * Returns U_SENTINEL (-1) if the position is at the end of the |
| 840 * text. |
| 841 * |
| 842 * @internal ICU 4.4 technology preview |
| 843 */ |
| 844 #define UTEXT_CURRENT32(ut) \ |
| 845 ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkO
ffset]<0xd800 ? \ |
| 846 ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut)) |
| 847 |
| 848 /** |
| 849 * inline version of utext_next32(), for performance-critical situations. |
| 850 * |
| 851 * Get the code point at the current iteration position of the UText, and |
| 852 * advance the position to the first index following the character. |
| 853 * This is a post-increment operation. |
| 854 * Returns U_SENTINEL (-1) if the position is at the end of the |
| 855 * text. |
| 856 * |
| 857 * @stable ICU 3.4 |
| 858 */ |
| 859 #define UTEXT_NEXT32(ut) \ |
| 860 ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkO
ffset]<0xd800 ? \ |
| 861 ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut)) |
| 862 |
| 863 /** |
| 864 * inline version of utext_previous32(), for performance-critical situations. |
| 865 * |
| 866 * Move the iterator position to the character (code point) whose |
| 867 * index precedes the current position, and return that character. |
| 868 * This is a pre-decrement operation. |
| 869 * Returns U_SENTINEL (-1) if the position is at the start of the text. |
| 870 * |
| 871 * @stable ICU 3.4 |
| 872 */ |
| 873 #define UTEXT_PREVIOUS32(ut) \ |
| 874 ((ut)->chunkOffset > 0 && \ |
| 875 (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \ |
| 876 (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut)) |
| 877 |
| 878 /** |
| 879 * inline version of utext_getNativeIndex(), for performance-critical situatio
ns. |
| 880 * |
| 881 * Get the current iterator position, which can range from 0 to |
| 882 * the length of the text. |
| 883 * The position is a native index into the input text, in whatever format it |
| 884 * may have (possibly UTF-8 for example), and may not always be the same as |
| 885 * the corresponding UChar (UTF-16) index. |
| 886 * The returned position will always be aligned to a code point boundary. |
| 887 * |
| 888 * @stable ICU 3.6 |
| 889 */ |
| 890 #define UTEXT_GETNATIVEINDEX(ut) \ |
| 891 ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \ |
| 892 (ut)->chunkNativeStart+(ut)->chunkOffset : \ |
| 893 (ut)->pFuncs->mapOffsetToNative(ut)) |
| 894 |
| 895 /** |
| 896 * inline version of utext_setNativeIndex(), for performance-critical situatio
ns. |
| 897 * |
| 898 * Set the current iteration position to the nearest code point |
| 899 * boundary at or preceding the specified index. |
| 900 * The index is in the native units of the original input text. |
| 901 * If the index is out of range, it will be pinned to be within |
| 902 * the range of the input text. |
| 903 * |
| 904 * @stable ICU 3.8 |
| 905 */ |
| 906 #define UTEXT_SETNATIVEINDEX(ut, ix) \ |
| 907 { int64_t __offset = (ix) - (ut)->chunkNativeStart; \ |
| 908 if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \ |
| 909 (ut)->chunkOffset=(int32_t)__offset; \ |
| 910 } else { \ |
| 911 utext_setNativeIndex((ut), (ix)); } } |
| 912 |
| 913 |
| 914 |
| 915 /*******************************************************************************
***** |
| 916 * |
| 917 * Functions related to writing or modifying the text. |
| 918 * These will work only with modifiable UTexts. Attempting to |
| 919 * modify a read-only UText will return an error status. |
| 920 * |
| 921 *******************************************************************************
*****/ |
| 922 |
| 923 |
| 924 /** |
| 925 * Return TRUE if the text can be written (modified) with utext_replace() or |
| 926 * utext_copy(). For the text to be writable, the text provider must |
| 927 * be of a type that supports writing and the UText must not be frozen. |
| 928 * |
| 929 * Attempting to modify text when utext_isWriteable() is FALSE will fail - |
| 930 * the text will not be modified, and an error will be returned from the functi
on |
| 931 * that attempted the modification. |
| 932 * |
| 933 * @param ut the UText to be tested. |
| 934 * @return TRUE if the text is modifiable. |
| 935 * |
| 936 * @see utext_freeze() |
| 937 * @see utext_replace() |
| 938 * @see utext_copy() |
| 939 * @stable ICU 3.4 |
| 940 * |
| 941 */ |
| 942 U_STABLE UBool U_EXPORT2 |
| 943 utext_isWritable(const UText *ut); |
| 944 |
| 945 |
| 946 /** |
| 947 * Test whether there is meta data associated with the text. |
| 948 * @see Replaceable::hasMetaData() |
| 949 * |
| 950 * @param ut The UText to be tested |
| 951 * @return TRUE if the underlying text includes meta data. |
| 952 * @stable ICU 3.4 |
| 953 */ |
| 954 U_STABLE UBool U_EXPORT2 |
| 955 utext_hasMetaData(const UText *ut); |
| 956 |
| 957 |
| 958 /** |
| 959 * Replace a range of the original text with a replacement text. |
| 960 * |
| 961 * Leaves the current iteration position at the position following the |
| 962 * newly inserted replacement text. |
| 963 * |
| 964 * This function is only available on UText types that support writing, |
| 965 * that is, ones where utext_isWritable() returns TRUE. |
| 966 * |
| 967 * When using this function, there should be only a single UText opened onto the |
| 968 * underlying native text string. Behavior after a replace operation |
| 969 * on a UText is undefined for any other additional UTexts that refer to the |
| 970 * modified string. |
| 971 * |
| 972 * @param ut the UText representing the text to be operated on. |
| 973 * @param nativeStart the native index of the start of the region to be rep
laced |
| 974 * @param nativeLimit the native index of the character following the regio
n to be replaced. |
| 975 * @param replacementText pointer to the replacement text |
| 976 * @param replacementLength length of the replacement text, or -1 if the text is
NUL terminated. |
| 977 * @param status receives any error status. Possible errors include |
| 978 * U_NO_WRITE_PERMISSION |
| 979 * |
| 980 * @return The signed number of (native) storage units by which |
| 981 * the length of the text expanded or contracted. |
| 982 * |
| 983 * @stable ICU 3.4 |
| 984 */ |
| 985 U_STABLE int32_t U_EXPORT2 |
| 986 utext_replace(UText *ut, |
| 987 int64_t nativeStart, int64_t nativeLimit, |
| 988 const UChar *replacementText, int32_t replacementLength, |
| 989 UErrorCode *status); |
| 990 |
| 991 |
| 992 |
| 993 /** |
| 994 * |
| 995 * Copy or move a substring from one position to another within the text, |
| 996 * while retaining any metadata associated with the text. |
| 997 * This function is used to duplicate or reorder substrings. |
| 998 * The destination index must not overlap the source range. |
| 999 * |
| 1000 * The text to be copied or moved is inserted at destIndex; |
| 1001 * it does not replace or overwrite any existing text. |
| 1002 * |
| 1003 * The iteration position is left following the newly inserted text |
| 1004 * at the destination position. |
| 1005 * |
| 1006 * This function is only available on UText types that support writing, |
| 1007 * that is, ones where utext_isWritable() returns TRUE. |
| 1008 * |
| 1009 * When using this function, there should be only a single UText opened onto the |
| 1010 * underlying native text string. Behavior after a copy operation |
| 1011 * on a UText is undefined in any other additional UTexts that refer to the |
| 1012 * modified string. |
| 1013 * |
| 1014 * @param ut The UText representing the text to be operated on. |
| 1015 * @param nativeStart The native index of the start of the region to be copied
or moved |
| 1016 * @param nativeLimit The native index of the character position following the
region |
| 1017 * to be copied. |
| 1018 * @param destIndex The native destination index to which the source substrin
g is |
| 1019 * copied or moved. |
| 1020 * @param move If TRUE, then the substring is moved, not copied/duplicat
ed. |
| 1021 * @param status receives any error status. Possible errors include U_NO_
WRITE_PERMISSION |
| 1022 * |
| 1023 * @stable ICU 3.4 |
| 1024 */ |
| 1025 U_STABLE void U_EXPORT2 |
| 1026 utext_copy(UText *ut, |
| 1027 int64_t nativeStart, int64_t nativeLimit, |
| 1028 int64_t destIndex, |
| 1029 UBool move, |
| 1030 UErrorCode *status); |
| 1031 |
| 1032 |
| 1033 /** |
| 1034 * <p> |
| 1035 * Freeze a UText. This prevents any modification to the underlying text itse
lf |
| 1036 * by means of functions operating on this UText. |
| 1037 * </p> |
| 1038 * <p> |
| 1039 * Once frozen, a UText can not be unfrozen. The intent is to ensure |
| 1040 * that a the text underlying a frozen UText wrapper cannot be modified via th
at UText. |
| 1041 * </p> |
| 1042 * <p> |
| 1043 * Caution: freezing a UText will disable changes made via the specific |
| 1044 * frozen UText wrapper only; it will not have any effect on the ability to |
| 1045 * directly modify the text by bypassing the UText. Any such backdoor modifi
cations |
| 1046 * are always an error while UText access is occuring because the underlying |
| 1047 * text can get out of sync with UText's buffering. |
| 1048 * </p> |
| 1049 * |
| 1050 * @param ut The UText to be frozen. |
| 1051 * @see utext_isWritable() |
| 1052 * @stable ICU 3.6 |
| 1053 */ |
| 1054 U_STABLE void U_EXPORT2 |
| 1055 utext_freeze(UText *ut); |
| 1056 |
| 1057 |
| 1058 /** |
| 1059 * UText provider properties (bit field indexes). |
| 1060 * |
| 1061 * @see UText |
| 1062 * @stable ICU 3.4 |
| 1063 */ |
| 1064 enum { |
| 1065 /** |
| 1066 * It is potentially time consuming for the provider to determine the length
of the text. |
| 1067 * @stable ICU 3.4 |
| 1068 */ |
| 1069 UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1, |
| 1070 /** |
| 1071 * Text chunks remain valid and usable until the text object is modified or |
| 1072 * deleted, not just until the next time the access() function is called |
| 1073 * (which is the default). |
| 1074 * @stable ICU 3.4 |
| 1075 */ |
| 1076 UTEXT_PROVIDER_STABLE_CHUNKS = 2, |
| 1077 /** |
| 1078 * The provider supports modifying the text via the replace() and copy() |
| 1079 * functions. |
| 1080 * @see Replaceable |
| 1081 * @stable ICU 3.4 |
| 1082 */ |
| 1083 UTEXT_PROVIDER_WRITABLE = 3, |
| 1084 /** |
| 1085 * There is meta data associated with the text. |
| 1086 * @see Replaceable::hasMetaData() |
| 1087 * @stable ICU 3.4 |
| 1088 */ |
| 1089 UTEXT_PROVIDER_HAS_META_DATA = 4, |
| 1090 /** |
| 1091 * Text provider owns the text storage. |
| 1092 * Generally occurs as the result of a deep clone of the UText. |
| 1093 * When closing the UText, the associated text must |
| 1094 * also be closed/deleted/freed/ whatever is appropriate. |
| 1095 * @stable ICU 3.6 |
| 1096 */ |
| 1097 UTEXT_PROVIDER_OWNS_TEXT = 5 |
| 1098 }; |
| 1099 |
| 1100 /** |
| 1101 * Function type declaration for UText.clone(). |
| 1102 * |
| 1103 * clone a UText. Much like opening a UText where the source text is itself |
| 1104 * another UText. |
| 1105 * |
| 1106 * A deep clone will copy both the UText data structures and the underlying te
xt. |
| 1107 * The original and cloned UText will operate completely independently; modifi
cations |
| 1108 * made to the text in one will not effect the other. Text providers are not |
| 1109 * required to support deep clones. The user of clone() must check the status
return |
| 1110 * and be prepared to handle failures. |
| 1111 * |
| 1112 * A shallow clone replicates only the UText data structures; it does not make |
| 1113 * a copy of the underlying text. Shallow clones can be used as an efficient
way to |
| 1114 * have multiple iterators active in a single text string that is not being |
| 1115 * modified. |
| 1116 * |
| 1117 * A shallow clone operation must not fail except for truly exceptional condit
ions such |
| 1118 * as memory allocation failures. |
| 1119 * |
| 1120 * A UText and its clone may be safely concurrently accessed by separate threa
ds. |
| 1121 * This is true for both shallow and deep clones. |
| 1122 * It is the responsibility of the Text Provider to ensure that this thread sa
fety |
| 1123 * constraint is met. |
| 1124 |
| 1125 * |
| 1126 * @param dest A UText struct to be filled in with the result of the clone o
peration, |
| 1127 * or NULL if the clone function should heap-allocate a new UTex
t struct. |
| 1128 * @param src The UText to be cloned. |
| 1129 * @param deep TRUE to request a deep clone, FALSE for a shallow clone. |
| 1130 * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERR
OR |
| 1131 * should be returned if the text provider is unable to clone th
e |
| 1132 * original text. |
| 1133 * @return The newly created clone, or NULL if the clone operation faile
d. |
| 1134 * |
| 1135 * @stable ICU 3.4 |
| 1136 */ |
| 1137 typedef UText * U_CALLCONV |
| 1138 UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status); |
| 1139 |
| 1140 |
| 1141 /** |
| 1142 * Function type declaration for UText.nativeLength(). |
| 1143 * |
| 1144 * @param ut the UText to get the length of. |
| 1145 * @return the length, in the native units of the original text string. |
| 1146 * @see UText |
| 1147 * @stable ICU 3.4 |
| 1148 */ |
| 1149 typedef int64_t U_CALLCONV |
| 1150 UTextNativeLength(UText *ut); |
| 1151 |
| 1152 /** |
| 1153 * Function type declaration for UText.access(). Get the description of the tex
t chunk |
| 1154 * containing the text at a requested native index. The UText's iteration |
| 1155 * position will be left at the requested index. If the index is out |
| 1156 * of bounds, the iteration position will be left at the start or end |
| 1157 * of the string, as appropriate. |
| 1158 * |
| 1159 * Chunks must begin and end on code point boundaries. A single code point |
| 1160 * comprised of multiple storage units must never span a chunk boundary. |
| 1161 * |
| 1162 * |
| 1163 * @param ut the UText being accessed. |
| 1164 * @param nativeIndex Requested index of the text to be accessed. |
| 1165 * @param forward If TRUE, then the returned chunk must contain text |
| 1166 * starting from the index, so that start<=index<limit. |
| 1167 * If FALSE, then the returned chunk must contain text |
| 1168 * before the index, so that start<index<=limit. |
| 1169 * @return True if the requested index could be accessed. The chunk |
| 1170 * will contain the requested text. |
| 1171 * False value if a chunk cannot be accessed |
| 1172 * (the requested index is out of bounds). |
| 1173 * |
| 1174 * @see UText |
| 1175 * @stable ICU 3.4 |
| 1176 */ |
| 1177 typedef UBool U_CALLCONV |
| 1178 UTextAccess(UText *ut, int64_t nativeIndex, UBool forward); |
| 1179 |
| 1180 /** |
| 1181 * Function type declaration for UText.extract(). |
| 1182 * |
| 1183 * Extract text from a UText into a UChar buffer. The range of text to be extra
cted |
| 1184 * is specified in the native indices of the UText provider. These may not nece
ssarily |
| 1185 * be UTF-16 indices. |
| 1186 * <p> |
| 1187 * The size (number of 16 bit UChars) in the data to be extracted is returned.
The |
| 1188 * full amount is returned, even when the specified buffer size is smaller. |
| 1189 * <p> |
| 1190 * The extracted string will (if you are a user) / must (if you are a text provi
der) |
| 1191 * be NUL-terminated if there is sufficient space in the destination buffer. |
| 1192 * |
| 1193 * @param ut the UText from which to extract data. |
| 1194 * @param nativeStart the native index of the first characer to extract. |
| 1195 * @param nativeLimit the native string index of the position following the l
ast |
| 1196 * character to extract. |
| 1197 * @param dest the UChar (UTF-16) buffer into which the extracted text
is placed |
| 1198 * @param destCapacity The size, in UChars, of the destination buffer. May be
zero |
| 1199 * for precomputing the required size. |
| 1200 * @param status receives any error status. |
| 1201 * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars fo
r |
| 1202 * preflighting. |
| 1203 * @return Number of UChars in the data. Does not include a trailing NUL. |
| 1204 * |
| 1205 * @stable ICU 3.4 |
| 1206 */ |
| 1207 typedef int32_t U_CALLCONV |
| 1208 UTextExtract(UText *ut, |
| 1209 int64_t nativeStart, int64_t nativeLimit, |
| 1210 UChar *dest, int32_t destCapacity, |
| 1211 UErrorCode *status); |
| 1212 |
| 1213 /** |
| 1214 * Function type declaration for UText.replace(). |
| 1215 * |
| 1216 * Replace a range of the original text with a replacement text. |
| 1217 * |
| 1218 * Leaves the current iteration position at the position following the |
| 1219 * newly inserted replacement text. |
| 1220 * |
| 1221 * This function need only be implemented on UText types that support writing. |
| 1222 * |
| 1223 * When using this function, there should be only a single UText opened onto the |
| 1224 * underlying native text string. The function is responsible for updating the |
| 1225 * text chunk within the UText to reflect the updated iteration position, |
| 1226 * taking into account any changes to the underlying string's structure caused |
| 1227 * by the replace operation. |
| 1228 * |
| 1229 * @param ut the UText representing the text to be operated on. |
| 1230 * @param nativeStart the index of the start of the region to be replaced |
| 1231 * @param nativeLimit the index of the character following the region to be
replaced. |
| 1232 * @param replacementText pointer to the replacement text |
| 1233 * @param replacmentLength length of the replacement text in UChars, or -1 if th
e text is NUL terminated. |
| 1234 * @param status receives any error status. Possible errors include |
| 1235 * U_NO_WRITE_PERMISSION |
| 1236 * |
| 1237 * @return The signed number of (native) storage units by which |
| 1238 * the length of the text expanded or contracted. |
| 1239 * |
| 1240 * @stable ICU 3.4 |
| 1241 */ |
| 1242 typedef int32_t U_CALLCONV |
| 1243 UTextReplace(UText *ut, |
| 1244 int64_t nativeStart, int64_t nativeLimit, |
| 1245 const UChar *replacementText, int32_t replacmentLength, |
| 1246 UErrorCode *status); |
| 1247 |
| 1248 /** |
| 1249 * Function type declaration for UText.copy(). |
| 1250 * |
| 1251 * Copy or move a substring from one position to another within the text, |
| 1252 * while retaining any metadata associated with the text. |
| 1253 * This function is used to duplicate or reorder substrings. |
| 1254 * The destination index must not overlap the source range. |
| 1255 * |
| 1256 * The text to be copied or moved is inserted at destIndex; |
| 1257 * it does not replace or overwrite any existing text. |
| 1258 * |
| 1259 * This function need only be implemented for UText types that support writing. |
| 1260 * |
| 1261 * When using this function, there should be only a single UText opened onto the |
| 1262 * underlying native text string. The function is responsible for updating the |
| 1263 * text chunk within the UText to reflect the updated iteration position, |
| 1264 * taking into account any changes to the underlying string's structure caused |
| 1265 * by the replace operation. |
| 1266 * |
| 1267 * @param ut The UText representing the text to be operated on. |
| 1268 * @param nativeStart The index of the start of the region to be copied or move
d |
| 1269 * @param nativeLimit The index of the character following the region to be rep
laced. |
| 1270 * @param nativeDest The destination index to which the source substring is co
pied or moved. |
| 1271 * @param move If TRUE, then the substring is moved, not copied/duplicat
ed. |
| 1272 * @param status receives any error status. Possible errors include U_NO_
WRITE_PERMISSION |
| 1273 * |
| 1274 * @stable ICU 3.4 |
| 1275 */ |
| 1276 typedef void U_CALLCONV |
| 1277 UTextCopy(UText *ut, |
| 1278 int64_t nativeStart, int64_t nativeLimit, |
| 1279 int64_t nativeDest, |
| 1280 UBool move, |
| 1281 UErrorCode *status); |
| 1282 |
| 1283 /** |
| 1284 * Function type declaration for UText.mapOffsetToNative(). |
| 1285 * Map from the current UChar offset within the current text chunk to |
| 1286 * the corresponding native index in the original source text. |
| 1287 * |
| 1288 * This is required only for text providers that do not use native UTF-16 indexe
s. |
| 1289 * |
| 1290 * @param ut the UText. |
| 1291 * @return Absolute (native) index corresponding to chunkOffset in the current c
hunk. |
| 1292 * The returned native index should always be to a code point boundary. |
| 1293 * |
| 1294 * @stable ICU 3.4 |
| 1295 */ |
| 1296 typedef int64_t U_CALLCONV |
| 1297 UTextMapOffsetToNative(const UText *ut); |
| 1298 |
| 1299 /** |
| 1300 * Function type declaration for UText.mapIndexToUTF16(). |
| 1301 * Map from a native index to a UChar offset within a text chunk. |
| 1302 * Behavior is undefined if the native index does not fall within the |
| 1303 * current chunk. |
| 1304 * |
| 1305 * This function is required only for text providers that do not use native UTF-
16 indexes. |
| 1306 * |
| 1307 * @param ut The UText containing the text chunk. |
| 1308 * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->
limit. |
| 1309 * @return Chunk-relative UTF-16 offset corresponding to the specifie
d native |
| 1310 * index. |
| 1311 * |
| 1312 * @stable ICU 3.4 |
| 1313 */ |
| 1314 typedef int32_t U_CALLCONV |
| 1315 UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex); |
| 1316 |
| 1317 |
| 1318 /** |
| 1319 * Function type declaration for UText.utextClose(). |
| 1320 * |
| 1321 * A Text Provider close function is only required for provider types that make |
| 1322 * allocations in their open function (or other functions) that must be |
| 1323 * cleaned when the UText is closed. |
| 1324 * |
| 1325 * The allocation of the UText struct itself and any "extra" storage |
| 1326 * associated with the UText is handled by the common UText implementation |
| 1327 * and does not require provider specific cleanup in a close function. |
| 1328 * |
| 1329 * Most UText provider implementations do not need to implement this function. |
| 1330 * |
| 1331 * @param ut A UText object to be closed. |
| 1332 * |
| 1333 * @stable ICU 3.4 |
| 1334 */ |
| 1335 typedef void U_CALLCONV |
| 1336 UTextClose(UText *ut); |
| 1337 |
| 1338 |
| 1339 /** |
| 1340 * (public) Function dispatch table for UText. |
| 1341 * Conceptually very much like a C++ Virtual Function Table. |
| 1342 * This struct defines the organization of the table. |
| 1343 * Each text provider implementation must provide an |
| 1344 * actual table that is initialized with the appropriate functions |
| 1345 * for the type of text being handled. |
| 1346 * @stable ICU 3.6 |
| 1347 */ |
| 1348 struct UTextFuncs { |
| 1349 /** |
| 1350 * (public) Function table size, sizeof(UTextFuncs) |
| 1351 * Intended for use should the table grow to accomodate added |
| 1352 * functions in the future, to allow tests for older format |
| 1353 * function tables that do not contain the extensions. |
| 1354 * |
| 1355 * Fields are placed for optimal alignment on |
| 1356 * 32/64/128-bit-pointer machines, by normally grouping together |
| 1357 * 4 32-bit fields, |
| 1358 * 4 pointers, |
| 1359 * 2 64-bit fields |
| 1360 * in sequence. |
| 1361 * @stable ICU 3.6 |
| 1362 */ |
| 1363 int32_t tableSize; |
| 1364 |
| 1365 /** |
| 1366 * (private) Alignment padding. |
| 1367 * Do not use, reserved for use by the UText framework only. |
| 1368 * @internal |
| 1369 */ |
| 1370 int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserv
ed3; |
| 1371 |
| 1372 |
| 1373 /** |
| 1374 * (public) Function pointer for UTextClone |
| 1375 * |
| 1376 * @see UTextClone |
| 1377 * @stable ICU 3.6 |
| 1378 */ |
| 1379 UTextClone *clone; |
| 1380 |
| 1381 /** |
| 1382 * (public) function pointer for UTextLength |
| 1383 * May be expensive to compute! |
| 1384 * |
| 1385 * @see UTextLength |
| 1386 * @stable ICU 3.6 |
| 1387 */ |
| 1388 UTextNativeLength *nativeLength; |
| 1389 |
| 1390 /** |
| 1391 * (public) Function pointer for UTextAccess. |
| 1392 * |
| 1393 * @see UTextAccess |
| 1394 * @stable ICU 3.6 |
| 1395 */ |
| 1396 UTextAccess *access; |
| 1397 |
| 1398 /** |
| 1399 * (public) Function pointer for UTextExtract. |
| 1400 * |
| 1401 * @see UTextExtract |
| 1402 * @stable ICU 3.6 |
| 1403 */ |
| 1404 UTextExtract *extract; |
| 1405 |
| 1406 /** |
| 1407 * (public) Function pointer for UTextReplace. |
| 1408 * |
| 1409 * @see UTextReplace |
| 1410 * @stable ICU 3.6 |
| 1411 */ |
| 1412 UTextReplace *replace; |
| 1413 |
| 1414 /** |
| 1415 * (public) Function pointer for UTextCopy. |
| 1416 * |
| 1417 * @see UTextCopy |
| 1418 * @stable ICU 3.6 |
| 1419 */ |
| 1420 UTextCopy *copy; |
| 1421 |
| 1422 /** |
| 1423 * (public) Function pointer for UTextMapOffsetToNative. |
| 1424 * |
| 1425 * @see UTextMapOffsetToNative |
| 1426 * @stable ICU 3.6 |
| 1427 */ |
| 1428 UTextMapOffsetToNative *mapOffsetToNative; |
| 1429 |
| 1430 /** |
| 1431 * (public) Function pointer for UTextMapNativeIndexToUTF16. |
| 1432 * |
| 1433 * @see UTextMapNativeIndexToUTF16 |
| 1434 * @stable ICU 3.6 |
| 1435 */ |
| 1436 UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16; |
| 1437 |
| 1438 /** |
| 1439 * (public) Function pointer for UTextClose. |
| 1440 * |
| 1441 * @see UTextClose |
| 1442 * @stable ICU 3.6 |
| 1443 */ |
| 1444 UTextClose *close; |
| 1445 |
| 1446 /** |
| 1447 * (private) Spare function pointer |
| 1448 * @internal |
| 1449 */ |
| 1450 UTextClose *spare1; |
| 1451 |
| 1452 /** |
| 1453 * (private) Spare function pointer |
| 1454 * @internal |
| 1455 */ |
| 1456 UTextClose *spare2; |
| 1457 |
| 1458 /** |
| 1459 * (private) Spare function pointer |
| 1460 * @internal |
| 1461 */ |
| 1462 UTextClose *spare3; |
| 1463 |
| 1464 }; |
| 1465 /** |
| 1466 * Function dispatch table for UText |
| 1467 * @see UTextFuncs |
| 1468 */ |
| 1469 typedef struct UTextFuncs UTextFuncs; |
| 1470 |
| 1471 /** |
| 1472 * UText struct. Provides the interface between the generic UText access cod
e |
| 1473 * and the UText provider code that works on specific kinds of |
| 1474 * text (UTF-8, noncontiguous UTF-16, whatever.) |
| 1475 * |
| 1476 * Applications that are using predefined types of text provid
ers |
| 1477 * to pass text data to ICU services will have no need to view
the |
| 1478 * internals of the UText structs that they open. |
| 1479 * |
| 1480 * @stable ICU 3.6 |
| 1481 */ |
| 1482 struct UText { |
| 1483 /** |
| 1484 * (private) Magic. Used to help detect when UText functions are hande
d |
| 1485 * invalid or unitialized UText structs. |
| 1486 * utext_openXYZ() functions take an initialized, |
| 1487 * but not necessarily open, UText struct as an |
| 1488 * optional fill-in parameter. This magic field |
| 1489 * is used to check for that initialization. |
| 1490 * Text provider close functions must NOT clear |
| 1491 * the magic field because that would prevent |
| 1492 * reuse of the UText struct. |
| 1493 * @internal |
| 1494 */ |
| 1495 uint32_t magic; |
| 1496 |
| 1497 |
| 1498 /** |
| 1499 * (private) Flags for managing the allocation and freeing of |
| 1500 * memory associated with this UText. |
| 1501 * @internal |
| 1502 */ |
| 1503 int32_t flags; |
| 1504 |
| 1505 |
| 1506 /** |
| 1507 * Text provider properties. This set of flags is maintainted by the |
| 1508 * text provider implementation. |
| 1509 * @stable ICU 3.4 |
| 1510 */ |
| 1511 int32_t providerProperties; |
| 1512 |
| 1513 /** |
| 1514 * (public) sizeOfStruct=sizeof(UText) |
| 1515 * Allows possible backward compatible extension. |
| 1516 * |
| 1517 * @stable ICU 3.4 |
| 1518 */ |
| 1519 int32_t sizeOfStruct; |
| 1520 |
| 1521 /* ------ 16 byte alignment boundary ----------- */ |
| 1522 |
| 1523 |
| 1524 /** |
| 1525 * (protected) Native index of the first character position following |
| 1526 * the current chunk. |
| 1527 * @stable ICU 3.6 |
| 1528 */ |
| 1529 int64_t chunkNativeLimit; |
| 1530 |
| 1531 /** |
| 1532 * (protected) Size in bytes of the extra space (pExtra). |
| 1533 * @stable ICU 3.4 |
| 1534 */ |
| 1535 int32_t extraSize; |
| 1536 |
| 1537 /** |
| 1538 * (protected) The highest chunk offset where native indexing and |
| 1539 * chunk (UTF-16) indexing correspond. For UTF-16 sources, value |
| 1540 * will be equal to chunkLength. |
| 1541 * |
| 1542 * @stable ICU 3.6 |
| 1543 */ |
| 1544 int32_t nativeIndexingLimit; |
| 1545 |
| 1546 /* ---- 16 byte alignment boundary------ */ |
| 1547 |
| 1548 /** |
| 1549 * (protected) Native index of the first character in the text chunk. |
| 1550 * @stable ICU 3.6 |
| 1551 */ |
| 1552 int64_t chunkNativeStart; |
| 1553 |
| 1554 /** |
| 1555 * (protected) Current iteration position within the text chunk (UTF-16 buf
fer). |
| 1556 * This is the index to the character that will be returned by utext_next32
(). |
| 1557 * @stable ICU 3.6 |
| 1558 */ |
| 1559 int32_t chunkOffset; |
| 1560 |
| 1561 /** |
| 1562 * (protected) Length the text chunk (UTF-16 buffer), in UChars. |
| 1563 * @stable ICU 3.6 |
| 1564 */ |
| 1565 int32_t chunkLength; |
| 1566 |
| 1567 /* ---- 16 byte alignment boundary-- */ |
| 1568 |
| 1569 |
| 1570 /** |
| 1571 * (protected) pointer to a chunk of text in UTF-16 format. |
| 1572 * May refer either to original storage of the source of the text, or |
| 1573 * if conversion was required, to a buffer owned by the UText. |
| 1574 * @stable ICU 3.6 |
| 1575 */ |
| 1576 const UChar *chunkContents; |
| 1577 |
| 1578 /** |
| 1579 * (public) Pointer to Dispatch table for accessing functions for this
UText. |
| 1580 * @stable ICU 3.6 |
| 1581 */ |
| 1582 const UTextFuncs *pFuncs; |
| 1583 |
| 1584 /** |
| 1585 * (protected) Pointer to additional space requested by the |
| 1586 * text provider during the utext_open operation. |
| 1587 * @stable ICU 3.4 |
| 1588 */ |
| 1589 void *pExtra; |
| 1590 |
| 1591 /** |
| 1592 * (protected) Pointer to string or text-containin object or similar. |
| 1593 * This is the source of the text that this UText is wrapping, in a format |
| 1594 * that is known to the text provider functions. |
| 1595 * @stable ICU 3.4 |
| 1596 */ |
| 1597 const void *context; |
| 1598 |
| 1599 /* --- 16 byte alignment boundary--- */ |
| 1600 |
| 1601 /** |
| 1602 * (protected) Pointer fields available for use by the text provider. |
| 1603 * Not used by UText common code. |
| 1604 * @stable ICU 3.6 |
| 1605 */ |
| 1606 const void *p; |
| 1607 /** |
| 1608 * (protected) Pointer fields available for use by the text provider. |
| 1609 * Not used by UText common code. |
| 1610 * @stable ICU 3.6 |
| 1611 */ |
| 1612 const void *q; |
| 1613 /** |
| 1614 * (protected) Pointer fields available for use by the text provider. |
| 1615 * Not used by UText common code. |
| 1616 * @stable ICU 3.6 |
| 1617 */ |
| 1618 const void *r; |
| 1619 |
| 1620 /** |
| 1621 * Private field reserved for future use by the UText framework |
| 1622 * itself. This is not to be touched by the text providers. |
| 1623 * @internal ICU 3.4 |
| 1624 */ |
| 1625 void *privP; |
| 1626 |
| 1627 |
| 1628 /* --- 16 byte alignment boundary--- */ |
| 1629 |
| 1630 |
| 1631 /** |
| 1632 * (protected) Integer field reserved for use by the text provider. |
| 1633 * Not used by the UText framework, or by the client (user) of the UText. |
| 1634 * @stable ICU 3.4 |
| 1635 */ |
| 1636 int64_t a; |
| 1637 |
| 1638 /** |
| 1639 * (protected) Integer field reserved for use by the text provider. |
| 1640 * Not used by the UText framework, or by the client (user) of the UText. |
| 1641 * @stable ICU 3.4 |
| 1642 */ |
| 1643 int32_t b; |
| 1644 |
| 1645 /** |
| 1646 * (protected) Integer field reserved for use by the text provider. |
| 1647 * Not used by the UText framework, or by the client (user) of the UText. |
| 1648 * @stable ICU 3.4 |
| 1649 */ |
| 1650 int32_t c; |
| 1651 |
| 1652 /* ---- 16 byte alignment boundary---- */ |
| 1653 |
| 1654 |
| 1655 /** |
| 1656 * Private field reserved for future use by the UText framework |
| 1657 * itself. This is not to be touched by the text providers. |
| 1658 * @internal ICU 3.4 |
| 1659 */ |
| 1660 int64_t privA; |
| 1661 /** |
| 1662 * Private field reserved for future use by the UText framework |
| 1663 * itself. This is not to be touched by the text providers. |
| 1664 * @internal ICU 3.4 |
| 1665 */ |
| 1666 int32_t privB; |
| 1667 /** |
| 1668 * Private field reserved for future use by the UText framework |
| 1669 * itself. This is not to be touched by the text providers. |
| 1670 * @internal ICU 3.4 |
| 1671 */ |
| 1672 int32_t privC; |
| 1673 }; |
| 1674 |
| 1675 |
| 1676 /** |
| 1677 * Common function for use by Text Provider implementations to allocate and/or
initialize |
| 1678 * a new UText struct. To be called in the implementation of utext_open() func
tions. |
| 1679 * If the supplied UText parameter is null, a new UText struct will be allocate
d on the heap. |
| 1680 * If the supplied UText is already open, the provider's close function will be
called |
| 1681 * so that the struct can be reused by the open that is in progress. |
| 1682 * |
| 1683 * @param ut pointer to a UText struct to be re-used, or null if a new UText |
| 1684 * should be allocated. |
| 1685 * @param extraSpace The amount of additional space to be allocated as part |
| 1686 * of this UText, for use by types of providers that require |
| 1687 * additional storage. |
| 1688 * @param status Errors are returned here. |
| 1689 * @return pointer to the UText, allocated if necessary, with extra space set up
if requested. |
| 1690 * @stable ICU 3.4 |
| 1691 */ |
| 1692 U_STABLE UText * U_EXPORT2 |
| 1693 utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status); |
| 1694 |
| 1695 /** |
| 1696 * @internal |
| 1697 * Value used to help identify correctly initialized UText structs. |
| 1698 * Note: must be publicly visible so that UTEXT_INITIALIZER can access it. |
| 1699 */ |
| 1700 enum { |
| 1701 UTEXT_MAGIC = 0x345ad82c |
| 1702 }; |
| 1703 |
| 1704 /** |
| 1705 * initializer to be used with local (stack) instances of a UText |
| 1706 * struct. UText structs must be initialized before passing |
| 1707 * them to one of the utext_open functions. |
| 1708 * |
| 1709 * @stable ICU 3.6 |
| 1710 */ |
| 1711 #define UTEXT_INITIALIZER { \ |
| 1712 UTEXT_MAGIC, /* magic */ \ |
| 1713 0, /* flags */ \ |
| 1714 0, /* providerProps */ \ |
| 1715 sizeof(UText), /* sizeOfStruct */ \ |
| 1716 0, /* chunkNativeLimit */ \ |
| 1717 0, /* extraSize */ \ |
| 1718 0, /* nativeIndexingLimit */ \ |
| 1719 0, /* chunkNativeStart */ \ |
| 1720 0, /* chunkOffset */ \ |
| 1721 0, /* chunkLength */ \ |
| 1722 NULL, /* chunkContents */ \ |
| 1723 NULL, /* pFuncs */ \ |
| 1724 NULL, /* pExtra */ \ |
| 1725 NULL, /* context */ \ |
| 1726 NULL, NULL, NULL, /* p, q, r */ \ |
| 1727 NULL, /* privP */ \ |
| 1728 0, 0, 0, /* a, b, c */ \ |
| 1729 0, 0, 0 /* privA,B,C, */ \ |
| 1730 } |
| 1731 |
| 1732 |
| 1733 U_CDECL_END |
| 1734 |
| 1735 |
| 1736 |
| 1737 #endif |
OLD | NEW |