| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 ******************************************************************************* | |
| 3 * | |
| 4 * Copyright (C) 2004-2010, International Business Machines | |
| 5 * Corporation and others. All Rights Reserved. | |
| 6 * | |
| 7 ******************************************************************************* | |
| 8 * file name: utext.h | |
| 9 * encoding: US-ASCII | |
| 10 * tab size: 8 (not used) | |
| 11 * indentation:4 | |
| 12 * | |
| 13 * created on: 2004oct06 | |
| 14 * created by: Markus W. Scherer | |
| 15 */ | |
| 16 | |
| 17 #ifndef __UTEXT_H__ | |
| 18 #define __UTEXT_H__ | |
| 19 | |
| 20 /** | |
| 21 * \file | |
| 22 * \brief C API: Abstract Unicode Text API | |
| 23 * | |
| 24 * The Text Access API provides a means to allow text that is stored in alternat
ive | |
| 25 * formats to work with ICU services. ICU normally operates on text that is | |
| 26 * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type | |
| 27 * UnicodeString for C++ APIs. | |
| 28 * | |
| 29 * ICU Text Access allows other formats, such as UTF-8 or non-contiguous | |
| 30 * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU servic
es. | |
| 31 * | |
| 32 * There are three general classes of usage for UText: | |
| 33 * | |
| 34 * Application Level Use. This is the simplest usage - applications would | |
| 35 * use one of the utext_open() functions on their input text, and pass | |
| 36 * the resulting UText to the desired ICU service. | |
| 37 * | |
| 38 * Second is usage in ICU Services, such as break iteration, that will need
to | |
| 39 * operate on input presented to them as a UText. These implementations | |
| 40 * will need to use the iteration and related UText functions to gain | |
| 41 * access to the actual text. | |
| 42 * | |
| 43 * The third class of UText users are "text providers." These are the | |
| 44 * UText implementations for the various text storage formats. An applicati
on | |
| 45 * or system with a unique text storage format can implement a set of | |
| 46 * UText provider functions for that format, which will then allow | |
| 47 * ICU services to operate on that format. | |
| 48 * | |
| 49 * | |
| 50 * <em>Iterating over text</em> | |
| 51 * | |
| 52 * Here is sample code for a forward iteration over the contents of a UText | |
| 53 * | |
| 54 * \code | |
| 55 * UChar32 c; | |
| 56 * UText *ut = whatever(); | |
| 57 * | |
| 58 * for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) { | |
| 59 * // do whatever with the codepoint c here. | |
| 60 * } | |
| 61 * \endcode | |
| 62 * | |
| 63 * And here is similar code to iterate in the reverse direction, from the end | |
| 64 * of the text towards the beginning. | |
| 65 * | |
| 66 * \code | |
| 67 * UChar32 c; | |
| 68 * UText *ut = whatever(); | |
| 69 * int textLength = utext_nativeLength(ut); | |
| 70 * for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut))
{ | |
| 71 * // do whatever with the codepoint c here. | |
| 72 * } | |
| 73 * \endcode | |
| 74 * | |
| 75 * <em>Characters and Indexing</em> | |
| 76 * | |
| 77 * Indexing into text by UText functions is nearly always in terms of the native | |
| 78 * indexing of the underlying text storage. The storage format could be UTF-8 | |
| 79 * or UTF-32, for example. When coding to the UText access API, no assumptions | |
| 80 * can be made regarding the size of characters, or how far an index | |
| 81 * may move when iterating between characters. | |
| 82 * | |
| 83 * All indices supplied to UText functions are pinned to the length of the | |
| 84 * text. An out-of-bounds index is not considered to be an error, but is | |
| 85 * adjusted to be in the range 0 <= index <= length of input text. | |
| 86 * | |
| 87 * | |
| 88 * When an index position is returned from a UText function, it will be | |
| 89 * a native index to the underlying text. In the case of multi-unit characters, | |
| 90 * it will always refer to the first position of the character, | |
| 91 * never to the interior. This is essentially the same thing as saying that | |
| 92 * a returned index will always point to a boundary between characters. | |
| 93 * | |
| 94 * When a native index is supplied to a UText function, all indices that | |
| 95 * refer to any part of a multi-unit character representation are considered | |
| 96 * to be equivalent. In the case of multi-unit characters, an incoming index | |
| 97 * will be logically normalized to refer to the start of the character. | |
| 98 * | |
| 99 * It is possible to test whether a native index is on a code point boundary | |
| 100 * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex(). | |
| 101 * If the index is returned unchanged, it was on a code point boundary. If | |
| 102 * an adjusted index is returned, the original index referred to the | |
| 103 * interior of a character. | |
| 104 * | |
| 105 * <em>Conventions for calling UText functions</em> | |
| 106 * | |
| 107 * Most UText access functions have as their first parameter a (UText *) pointer
, | |
| 108 * which specifies the UText to be used. Unless otherwise noted, the | |
| 109 * pointer must refer to a valid, open UText. Attempting to | |
| 110 * use a closed UText or passing a NULL pointer is a programming error and | |
| 111 * will produce undefined results or NULL pointer exceptions. | |
| 112 * | |
| 113 * The UText_Open family of functions can either open an existing (closed) | |
| 114 * UText, or heap allocate a new UText. Here is sample code for creating | |
| 115 * a stack-allocated UText. | |
| 116 * | |
| 117 * \code | |
| 118 * char *s = whatever(); // A utf-8 string | |
| 119 * U_ErrorCode status = U_ZERO_ERROR; | |
| 120 * UText ut = UTEXT_INITIALIZER; | |
| 121 * utext_openUTF8(ut, s, -1, &status); | |
| 122 * if (U_FAILURE(status)) { | |
| 123 * // error handling | |
| 124 * } else { | |
| 125 * // work with the UText | |
| 126 * } | |
| 127 * \endcode | |
| 128 * | |
| 129 * Any existing UText passed to an open function _must_ have been initialized, | |
| 130 * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated | |
| 131 * by an open function. Passing NULL will cause the open function to | |
| 132 * heap-allocate and fully initialize a new UText. | |
| 133 * | |
| 134 */ | |
| 135 | |
| 136 | |
| 137 | |
| 138 #include "unicode/utypes.h" | |
| 139 #include "unicode/uchar.h" | |
| 140 #if U_SHOW_CPLUSPLUS_API | |
| 141 #include "unicode/localpointer.h" | |
| 142 #include "unicode/rep.h" | |
| 143 #include "unicode/unistr.h" | |
| 144 #include "unicode/chariter.h" | |
| 145 #endif | |
| 146 | |
| 147 | |
| 148 U_CDECL_BEGIN | |
| 149 | |
| 150 struct UText; | |
| 151 typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */ | |
| 152 | |
| 153 | |
| 154 /*******************************************************************************
******** | |
| 155 * | |
| 156 * C Functions for creating UText wrappers around various kinds of text string
s. | |
| 157 * | |
| 158 *******************************************************************************
*********/ | |
| 159 | |
| 160 | |
| 161 /** | |
| 162 * Close function for UText instances. | |
| 163 * Cleans up, releases any resources being held by an open UText. | |
| 164 * <p> | |
| 165 * If the UText was originally allocated by one of the utext_open functions, | |
| 166 * the storage associated with the utext will also be freed. | |
| 167 * If the UText storage originated with the application, as it would with | |
| 168 * a local or static instance, the storage will not be deleted. | |
| 169 * | |
| 170 * An open UText can be reset to refer to new string by using one of the utex
t_open() | |
| 171 * functions without first closing the UText. | |
| 172 * | |
| 173 * @param ut The UText to be closed. | |
| 174 * @return NULL if the UText struct was deleted by the close. If the UText
struct | |
| 175 * was originally provided by the caller to the open function, it is | |
| 176 * returned by this function, and may be safely used again in | |
| 177 * a subsequent utext_open. | |
| 178 * | |
| 179 * @stable ICU 3.4 | |
| 180 */ | |
| 181 U_STABLE UText * U_EXPORT2 | |
| 182 utext_close(UText *ut); | |
| 183 | |
| 184 #if U_SHOW_CPLUSPLUS_API | |
| 185 | |
| 186 U_NAMESPACE_BEGIN | |
| 187 | |
| 188 /** | |
| 189 * \class LocalUTextPointer | |
| 190 * "Smart pointer" class, closes a UText via utext_close(). | |
| 191 * For most methods see the LocalPointerBase base class. | |
| 192 * | |
| 193 * @see LocalPointerBase | |
| 194 * @see LocalPointer | |
| 195 * @stable ICU 4.4 | |
| 196 */ | |
| 197 U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close); | |
| 198 | |
| 199 U_NAMESPACE_END | |
| 200 | |
| 201 #endif | |
| 202 | |
| 203 /** | |
| 204 * Open a read-only UText implementation for UTF-8 strings. | |
| 205 * | |
| 206 * \htmlonly | |
| 207 * Any invalid UTF-8 in the input will be handled in this way: | |
| 208 * a sequence of bytes that has the form of a truncated, but otherwise valid, | |
| 209 * UTF-8 sequence will be replaced by a single unicode replacement character, \u
FFFD. | |
| 210 * Any other illegal bytes will each be replaced by a \uFFFD. | |
| 211 * \endhtmlonly | |
| 212 * | |
| 213 * @param ut Pointer to a UText struct. If NULL, a new UText will be create
d. | |
| 214 * If non-NULL, must refer to an initialized UText struct, which w
ill then | |
| 215 * be reset to reference the specified UTF-8 string. | |
| 216 * @param s A UTF-8 string. Must not be NULL. | |
| 217 * @param length The length of the UTF-8 string in bytes, or -1 if the string is | |
| 218 * zero terminated. | |
| 219 * @param status Errors are returned here. | |
| 220 * @return A pointer to the UText. If a pre-allocated UText was provided,
it | |
| 221 * will always be used and returned. | |
| 222 * @stable ICU 3.4 | |
| 223 */ | |
| 224 U_STABLE UText * U_EXPORT2 | |
| 225 utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status); | |
| 226 | |
| 227 | |
| 228 /** | |
| 229 * Open a read-only UText for UChar * string. | |
| 230 * | |
| 231 * @param ut Pointer to a UText struct. If NULL, a new UText will be create
d. | |
| 232 * If non-NULL, must refer to an initialized UText struct, which w
ill then | |
| 233 * be reset to reference the specified UChar string. | |
| 234 * @param s A UChar (UTF-16) string | |
| 235 * @param length The number of UChars in the input string, or -1 if the string i
s | |
| 236 * zero terminated. | |
| 237 * @param status Errors are returned here. | |
| 238 * @return A pointer to the UText. If a pre-allocated UText was provided,
it | |
| 239 * will always be used and returned. | |
| 240 * @stable ICU 3.4 | |
| 241 */ | |
| 242 U_STABLE UText * U_EXPORT2 | |
| 243 utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status); | |
| 244 | |
| 245 | |
| 246 #if U_SHOW_CPLUSPLUS_API | |
| 247 /** | |
| 248 * Open a writable UText for a non-const UnicodeString. | |
| 249 * | |
| 250 * @param ut Pointer to a UText struct. If NULL, a new UText will be creat
ed. | |
| 251 * If non-NULL, must refer to an initialized UText struct, which
will then | |
| 252 * be reset to reference the specified input string. | |
| 253 * @param s A UnicodeString. | |
| 254 * @param status Errors are returned here. | |
| 255 * @return Pointer to the UText. If a UText was supplied as input, this | |
| 256 * will always be used and returned. | |
| 257 * @stable ICU 3.4 | |
| 258 */ | |
| 259 U_STABLE UText * U_EXPORT2 | |
| 260 utext_openUnicodeString(UText *ut, U_NAMESPACE_QUALIFIER UnicodeString *s, UErro
rCode *status); | |
| 261 | |
| 262 | |
| 263 /** | |
| 264 * Open a UText for a const UnicodeString. The resulting UText will not be wri
table. | |
| 265 * | |
| 266 * @param ut Pointer to a UText struct. If NULL, a new UText will be created
. | |
| 267 * If non-NULL, must refer to an initialized UText struct, which w
ill then | |
| 268 * be reset to reference the specified input string. | |
| 269 * @param s A const UnicodeString to be wrapped. | |
| 270 * @param status Errors are returned here. | |
| 271 * @return Pointer to the UText. If a UText was supplied as input, this | |
| 272 * will always be used and returned. | |
| 273 * @stable ICU 3.4 | |
| 274 */ | |
| 275 U_STABLE UText * U_EXPORT2 | |
| 276 utext_openConstUnicodeString(UText *ut, const U_NAMESPACE_QUALIFIER UnicodeStrin
g *s, UErrorCode *status); | |
| 277 | |
| 278 | |
| 279 /** | |
| 280 * Open a writable UText implementation for an ICU Replaceable object. | |
| 281 * @param ut Pointer to a UText struct. If NULL, a new UText will be created
. | |
| 282 * If non-NULL, must refer to an already existing UText, which wil
l then | |
| 283 * be reset to reference the specified replaceable text. | |
| 284 * @param rep A Replaceable text object. | |
| 285 * @param status Errors are returned here. | |
| 286 * @return Pointer to the UText. If a UText was supplied as input, this | |
| 287 * will always be used and returned. | |
| 288 * @see Replaceable | |
| 289 * @stable ICU 3.4 | |
| 290 */ | |
| 291 U_STABLE UText * U_EXPORT2 | |
| 292 utext_openReplaceable(UText *ut, U_NAMESPACE_QUALIFIER Replaceable *rep, UErrorC
ode *status); | |
| 293 | |
| 294 /** | |
| 295 * Open a UText implementation over an ICU CharacterIterator. | |
| 296 * @param ut Pointer to a UText struct. If NULL, a new UText will be created
. | |
| 297 * If non-NULL, must refer to an already existing UText, which wil
l then | |
| 298 * be reset to reference the specified replaceable text. | |
| 299 * @param ci A Character Iterator. | |
| 300 * @param status Errors are returned here. | |
| 301 * @return Pointer to the UText. If a UText was supplied as input, this | |
| 302 * will always be used and returned. | |
| 303 * @see Replaceable | |
| 304 * @stable ICU 3.4 | |
| 305 */ | |
| 306 U_STABLE UText * U_EXPORT2 | |
| 307 utext_openCharacterIterator(UText *ut, U_NAMESPACE_QUALIFIER CharacterIterator *
ic, UErrorCode *status); | |
| 308 | |
| 309 #endif | |
| 310 | |
| 311 | |
| 312 /** | |
| 313 * Clone a UText. This is much like opening a UText where the source text is
itself | |
| 314 * another UText. | |
| 315 * | |
| 316 * A deep clone will copy both the UText data structures and the underlying te
xt. | |
| 317 * The original and cloned UText will operate completely independently; modifi
cations | |
| 318 * made to the text in one will not affect the other. Text providers are not | |
| 319 * required to support deep clones. The user of clone() must check the status
return | |
| 320 * and be prepared to handle failures. | |
| 321 * | |
| 322 * The standard UText implementations for UTF8, UChar *, UnicodeString and | |
| 323 * Replaceable all support deep cloning. | |
| 324 * | |
| 325 * The UText returned from a deep clone will be writable, assuming that the te
xt | |
| 326 * provider is able to support writing, even if the source UText had been made | |
| 327 * non-writable by means of UText_freeze(). | |
| 328 * | |
| 329 * A shallow clone replicates only the UText data structures; it does not make | |
| 330 * a copy of the underlying text. Shallow clones can be used as an efficient
way to | |
| 331 * have multiple iterators active in a single text string that is not being | |
| 332 * modified. | |
| 333 * | |
| 334 * A shallow clone operation will not fail, barring truly exceptional conditio
ns such | |
| 335 * as memory allocation failures. | |
| 336 * | |
| 337 * Shallow UText clones should be avoided if the UText functions that modify t
he | |
| 338 * text are expected to be used, either on the original or the cloned UText. | |
| 339 * Any such modifications can cause unpredictable behavior. Read Only | |
| 340 * shallow clones provide some protection against errors of this type by | |
| 341 * disabling text modification via the cloned UText. | |
| 342 * | |
| 343 * A shallow clone made with the readOnly parameter == FALSE will preserve the
| |
| 344 * utext_isWritable() state of the source object. Note, however, that | |
| 345 * write operations must be avoided while more than one UText exists that refe
r | |
| 346 * to the same underlying text. | |
| 347 * | |
| 348 * A UText and its clone may be safely concurrently accessed by separate threa
ds. | |
| 349 * This is true for read access only with shallow clones, and for both read an
d | |
| 350 * write access with deep clones. | |
| 351 * It is the responsibility of the Text Provider to ensure that this thread sa
fety | |
| 352 * constraint is met. | |
| 353 * | |
| 354 * @param dest A UText struct to be filled in with the result of the clone o
peration, | |
| 355 * or NULL if the clone function should heap-allocate a new UTex
t struct. | |
| 356 * If non-NULL, must refer to an already existing UText, which w
ill then | |
| 357 * be reset to become the clone. | |
| 358 * @param src The UText to be cloned. | |
| 359 * @param deep TRUE to request a deep clone, FALSE for a shallow clone. | |
| 360 * @param readOnly TRUE to request that the cloned UText have read only access
to the | |
| 361 * underlying text. | |
| 362 | |
| 363 * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERR
OR | |
| 364 * will be returned if the text provider is unable to clone the | |
| 365 * original text. | |
| 366 * @return The newly created clone, or NULL if the clone operation faile
d. | |
| 367 * @stable ICU 3.4 | |
| 368 */ | |
| 369 U_STABLE UText * U_EXPORT2 | |
| 370 utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCod
e *status); | |
| 371 | |
| 372 | |
| 373 /** | |
| 374 * Compare two UText objects for equality. | |
| 375 * UTexts are equal if they are iterating over the same text, and | |
| 376 * have the same iteration position within the text. | |
| 377 * If either or both of the parameters are NULL, the comparison is FALSE. | |
| 378 * | |
| 379 * @param a The first of the two UTexts to compare. | |
| 380 * @param b The other UText to be compared. | |
| 381 * @return TRUE if the two UTexts are equal. | |
| 382 * @stable ICU 3.6 | |
| 383 */ | |
| 384 U_STABLE UBool U_EXPORT2 | |
| 385 utext_equals(const UText *a, const UText *b); | |
| 386 | |
| 387 | |
| 388 /***************************************************************************** | |
| 389 * | |
| 390 * Functions to work with the text represeted by a UText wrapper | |
| 391 * | |
| 392 *****************************************************************************/ | |
| 393 | |
| 394 /** | |
| 395 * Get the length of the text. Depending on the characteristics | |
| 396 * of the underlying text representation, this may be expensive. | |
| 397 * @see utext_isLengthExpensive() | |
| 398 * | |
| 399 * | |
| 400 * @param ut the text to be accessed. | |
| 401 * @return the length of the text, expressed in native units. | |
| 402 * | |
| 403 * @stable ICU 3.4 | |
| 404 */ | |
| 405 U_STABLE int64_t U_EXPORT2 | |
| 406 utext_nativeLength(UText *ut); | |
| 407 | |
| 408 /** | |
| 409 * Return TRUE if calculating the length of the text could be expensive. | |
| 410 * Finding the length of NUL terminated strings is considered to be expensive. | |
| 411 * | |
| 412 * Note that the value of this function may change | |
| 413 * as the result of other operations on a UText. | |
| 414 * Once the length of a string has been discovered, it will no longer | |
| 415 * be expensive to report it. | |
| 416 * | |
| 417 * @param ut the text to be accessed. | |
| 418 * @return TRUE if determining the length of the text could be time consuming. | |
| 419 * @stable ICU 3.4 | |
| 420 */ | |
| 421 U_STABLE UBool U_EXPORT2 | |
| 422 utext_isLengthExpensive(const UText *ut); | |
| 423 | |
| 424 /** | |
| 425 * Returns the code point at the requested index, | |
| 426 * or U_SENTINEL (-1) if it is out of bounds. | |
| 427 * | |
| 428 * If the specified index points to the interior of a multi-unit | |
| 429 * character - one of the trail bytes of a UTF-8 sequence, for example - | |
| 430 * the complete code point will be returned. | |
| 431 * | |
| 432 * The iteration position will be set to the start of the returned code point. | |
| 433 * | |
| 434 * This function is roughly equivalent to the the sequence | |
| 435 * utext_setNativeIndex(index); | |
| 436 * utext_current32(); | |
| 437 * (There is a subtle difference if the index is out of bounds by being less tha
n zero - | |
| 438 * utext_setNativeIndex(negative value) sets the index to zero, after which utex
t_current() | |
| 439 * will return the char at zero. utext_char32At(negative index), on the other h
and, will | |
| 440 * return the U_SENTINEL value of -1.) | |
| 441 * | |
| 442 * @param ut the text to be accessed | |
| 443 * @param nativeIndex the native index of the character to be accessed. If the
index points | |
| 444 * to other than the first unit of a multi-unit character, it will be adj
usted | |
| 445 * to the start of the character. | |
| 446 * @return the code point at the specified index. | |
| 447 * @stable ICU 3.4 | |
| 448 */ | |
| 449 U_STABLE UChar32 U_EXPORT2 | |
| 450 utext_char32At(UText *ut, int64_t nativeIndex); | |
| 451 | |
| 452 | |
| 453 /** | |
| 454 * | |
| 455 * Get the code point at the current iteration position, | |
| 456 * or U_SENTINEL (-1) if the iteration has reached the end of | |
| 457 * the input text. | |
| 458 * | |
| 459 * @param ut the text to be accessed. | |
| 460 * @return the Unicode code point at the current iterator position. | |
| 461 * @stable ICU 3.4 | |
| 462 */ | |
| 463 U_STABLE UChar32 U_EXPORT2 | |
| 464 utext_current32(UText *ut); | |
| 465 | |
| 466 | |
| 467 /** | |
| 468 * Get the code point at the current iteration position of the UText, and | |
| 469 * advance the position to the first index following the character. | |
| 470 * | |
| 471 * If the position is at the end of the text (the index following | |
| 472 * the last character, which is also the length of the text), | |
| 473 * return U_SENTINEL (-1) and do not advance the index. | |
| 474 * | |
| 475 * This is a post-increment operation. | |
| 476 * | |
| 477 * An inline macro version of this function, UTEXT_NEXT32(), | |
| 478 * is available for performance critical use. | |
| 479 * | |
| 480 * @param ut the text to be accessed. | |
| 481 * @return the Unicode code point at the iteration position. | |
| 482 * @see UTEXT_NEXT32 | |
| 483 * @stable ICU 3.4 | |
| 484 */ | |
| 485 U_STABLE UChar32 U_EXPORT2 | |
| 486 utext_next32(UText *ut); | |
| 487 | |
| 488 | |
| 489 /** | |
| 490 * Move the iterator position to the character (code point) whose | |
| 491 * index precedes the current position, and return that character. | |
| 492 * This is a pre-decrement operation. | |
| 493 * | |
| 494 * If the initial position is at the start of the text (index of 0) | |
| 495 * return U_SENTINEL (-1), and leave the position unchanged. | |
| 496 * | |
| 497 * An inline macro version of this function, UTEXT_PREVIOUS32(), | |
| 498 * is available for performance critical use. | |
| 499 * | |
| 500 * @param ut the text to be accessed. | |
| 501 * @return the previous UChar32 code point, or U_SENTINEL (-1) | |
| 502 * if the iteration has reached the start of the text. | |
| 503 * @see UTEXT_PREVIOUS32 | |
| 504 * @stable ICU 3.4 | |
| 505 */ | |
| 506 U_STABLE UChar32 U_EXPORT2 | |
| 507 utext_previous32(UText *ut); | |
| 508 | |
| 509 | |
| 510 /** | |
| 511 * Set the iteration index and return the code point at that index. | |
| 512 * Leave the iteration index at the start of the following code point. | |
| 513 * | |
| 514 * This function is the most efficient and convenient way to | |
| 515 * begin a forward iteration. The results are identical to the those | |
| 516 * from the sequence | |
| 517 * \code | |
| 518 * utext_setIndex(); | |
| 519 * utext_next32(); | |
| 520 * \endcode | |
| 521 * | |
| 522 * @param ut the text to be accessed. | |
| 523 * @param nativeIndex Iteration index, in the native units of the text provide
r. | |
| 524 * @return Code point which starts at or before index, | |
| 525 * or U_SENTINEL (-1) if it is out of bounds. | |
| 526 * @stable ICU 3.4 | |
| 527 */ | |
| 528 U_STABLE UChar32 U_EXPORT2 | |
| 529 utext_next32From(UText *ut, int64_t nativeIndex); | |
| 530 | |
| 531 | |
| 532 | |
| 533 /** | |
| 534 * Set the iteration index, and return the code point preceding the | |
| 535 * one specified by the initial index. Leave the iteration position | |
| 536 * at the start of the returned code point. | |
| 537 * | |
| 538 * This function is the most efficient and convenient way to | |
| 539 * begin a backwards iteration. | |
| 540 * | |
| 541 * @param ut the text to be accessed. | |
| 542 * @param nativeIndex Iteration index in the native units of the text provider. | |
| 543 * @return Code point preceding the one at the initial index, | |
| 544 * or U_SENTINEL (-1) if it is out of bounds. | |
| 545 * | |
| 546 * @stable ICU 3.4 | |
| 547 */ | |
| 548 U_STABLE UChar32 U_EXPORT2 | |
| 549 utext_previous32From(UText *ut, int64_t nativeIndex); | |
| 550 | |
| 551 /** | |
| 552 * Get the current iterator position, which can range from 0 to | |
| 553 * the length of the text. | |
| 554 * The position is a native index into the input text, in whatever format it | |
| 555 * may have (possibly UTF-8 for example), and may not always be the same as | |
| 556 * the corresponding UChar (UTF-16) index. | |
| 557 * The returned position will always be aligned to a code point boundary. | |
| 558 * | |
| 559 * @param ut the text to be accessed. | |
| 560 * @return the current index position, in the native units of the text provider
. | |
| 561 * @stable ICU 3.4 | |
| 562 */ | |
| 563 U_STABLE int64_t U_EXPORT2 | |
| 564 utext_getNativeIndex(const UText *ut); | |
| 565 | |
| 566 /** | |
| 567 * Set the current iteration position to the nearest code point | |
| 568 * boundary at or preceding the specified index. | |
| 569 * The index is in the native units of the original input text. | |
| 570 * If the index is out of range, it will be pinned to be within | |
| 571 * the range of the input text. | |
| 572 * <p> | |
| 573 * It will usually be more efficient to begin an iteration | |
| 574 * using the functions utext_next32From() or utext_previous32From() | |
| 575 * rather than setIndex(). | |
| 576 * <p> | |
| 577 * Moving the index position to an adjacent character is best done | |
| 578 * with utext_next32(), utext_previous32() or utext_moveIndex32(). | |
| 579 * Attempting to do direct arithmetic on the index position is | |
| 580 * complicated by the fact that the size (in native units) of a | |
| 581 * character depends on the underlying representation of the character | |
| 582 * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not | |
| 583 * easily knowable. | |
| 584 * | |
| 585 * @param ut the text to be accessed. | |
| 586 * @param nativeIndex the native unit index of the new iteration position. | |
| 587 * @stable ICU 3.4 | |
| 588 */ | |
| 589 U_STABLE void U_EXPORT2 | |
| 590 utext_setNativeIndex(UText *ut, int64_t nativeIndex); | |
| 591 | |
| 592 /** | |
| 593 * Move the iterator postion by delta code points. The number of code points | |
| 594 * is a signed number; a negative delta will move the iterator backwards, | |
| 595 * towards the start of the text. | |
| 596 * <p> | |
| 597 * The index is moved by <code>delta</code> code points | |
| 598 * forward or backward, but no further backward than to 0 and | |
| 599 * no further forward than to utext_nativeLength(). | |
| 600 * The resulting index value will be in between 0 and length, inclusive. | |
| 601 * | |
| 602 * @param ut the text to be accessed. | |
| 603 * @param delta the signed number of code points to move the iteration position. | |
| 604 * @return TRUE if the position could be moved the requested number of positions
while | |
| 605 * staying within the range [0 - text length]. | |
| 606 * @stable ICU 3.4 | |
| 607 */ | |
| 608 U_STABLE UBool U_EXPORT2 | |
| 609 utext_moveIndex32(UText *ut, int32_t delta); | |
| 610 | |
| 611 /** | |
| 612 * Get the native index of the character preceeding the current position. | |
| 613 * If the iteration position is already at the start of the text, zero | |
| 614 * is returned. | |
| 615 * The value returned is the same as that obtained from the following sequence, | |
| 616 * but without the side effect of changing the iteration position. | |
| 617 * | |
| 618 * \code | |
| 619 * UText *ut = whatever; | |
| 620 * ... | |
| 621 * utext_previous(ut) | |
| 622 * utext_getNativeIndex(ut); | |
| 623 * \endcode | |
| 624 * | |
| 625 * This function is most useful during forwards iteration, where it will get the | |
| 626 * native index of the character most recently returned from utext_next(). | |
| 627 * | |
| 628 * @param ut the text to be accessed | |
| 629 * @return the native index of the character preceeding the current index positi
on, | |
| 630 * or zero if the current position is at the start of the text. | |
| 631 * @stable ICU 3.6 | |
| 632 */ | |
| 633 U_STABLE int64_t U_EXPORT2 | |
| 634 utext_getPreviousNativeIndex(UText *ut); | |
| 635 | |
| 636 | |
| 637 /** | |
| 638 * | |
| 639 * Extract text from a UText into a UChar buffer. The range of text to be extra
cted | |
| 640 * is specified in the native indices of the UText provider. These may not nece
ssarily | |
| 641 * be UTF-16 indices. | |
| 642 * <p> | |
| 643 * The size (number of 16 bit UChars) of the data to be extracted is returned.
The | |
| 644 * full number of UChars is returned, even when the extracted text is truncated | |
| 645 * because the specified buffer size is too small. | |
| 646 * <p> | |
| 647 * The extracted string will (if you are a user) / must (if you are a text provi
der) | |
| 648 * be NUL-terminated if there is sufficient space in the destination buffer. Th
is | |
| 649 * terminating NUL is not included in the returned length. | |
| 650 * <p> | |
| 651 * The iteration index is left at the position following the last extracted char
acter. | |
| 652 * | |
| 653 * @param ut the UText from which to extract data. | |
| 654 * @param nativeStart the native index of the first character to extract.\ | |
| 655 * If the specified index is out of range, | |
| 656 * it will be pinned to to be within 0 <= index <= textLength | |
| 657 * @param nativeLimit the native string index of the position following the las
t | |
| 658 * character to extract. If the specified index is out of range, | |
| 659 * it will be pinned to to be within 0 <= index <= textLength. | |
| 660 * nativeLimit must be >= nativeStart. | |
| 661 * @param dest the UChar (UTF-16) buffer into which the extracted text is plac
ed | |
| 662 * @param destCapacity The size, in UChars, of the destination buffer. May be
zero | |
| 663 * for precomputing the required size. | |
| 664 * @param status receives any error status. | |
| 665 * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the
| |
| 666 * buffer was too small. Returns number of UChars for preflighting. | |
| 667 * @return Number of UChars in the data to be extracted. Does not include a tra
iling NUL. | |
| 668 * | |
| 669 * @stable ICU 3.4 | |
| 670 */ | |
| 671 U_STABLE int32_t U_EXPORT2 | |
| 672 utext_extract(UText *ut, | |
| 673 int64_t nativeStart, int64_t nativeLimit, | |
| 674 UChar *dest, int32_t destCapacity, | |
| 675 UErrorCode *status); | |
| 676 | |
| 677 | |
| 678 /** | |
| 679 * Compare two UTexts (binary order). The comparison begins at each source text'
s | |
| 680 * iteration position. The iteration position of each UText will be left followi
ng | |
| 681 * the last character compared. | |
| 682 * | |
| 683 * The comparison is done in code point order; unlike u_strCompare, you | |
| 684 * cannot choose to use code unit order. This is because the characters | |
| 685 * in a UText are accessed one code point at a time, and may not be from a UTF-1
6 | |
| 686 * context. | |
| 687 * | |
| 688 * This functions works with strings of different explicitly specified lengths | |
| 689 * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. | |
| 690 * A length argument of -1 signifies that as much of the string should be used a
s | |
| 691 * is necessary to compare with the other string. If both length arguments are -
1, | |
| 692 * the entire remaining portionss of both strings are used. | |
| 693 * | |
| 694 * @param s1 First source string. | |
| 695 * @param length1 Length of first source string in UTF-32 code points. | |
| 696 * | |
| 697 * @param s2 Second source string. | |
| 698 * @param length2 Length of second source string in UTF-32 code points. | |
| 699 * | |
| 700 * @return <0 or 0 or >0 as usual for string comparisons | |
| 701 * | |
| 702 * @internal ICU 4.4 technology preview | |
| 703 */ | |
| 704 U_INTERNAL int32_t U_EXPORT2 | |
| 705 utext_compare(UText *s1, int32_t length1, | |
| 706 UText *s2, int32_t length2); | |
| 707 | |
| 708 /** | |
| 709 * Compare two UTexts (binary order). The comparison begins at each source text'
s | |
| 710 * iteration position. The iteration position of each UText will be left followi
ng | |
| 711 * the last character compared. This method differs from utext_compare in that | |
| 712 * it accepts native limits rather than lengths for each string. | |
| 713 * | |
| 714 * The comparison is done in code point order; unlike u_strCompare, you | |
| 715 * cannot choose to use code unit order. This is because the characters | |
| 716 * in a UText are accessed one code point at a time, and may not be from a UTF-1
6 | |
| 717 * context. | |
| 718 * | |
| 719 * This functions works with strings of different explicitly specified lengths | |
| 720 * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. | |
| 721 * A limit argument of -1 signifies that as much of the string should be used as | |
| 722 * is necessary to compare with the other string. If both limit arguments are -1
, | |
| 723 * the entire remaining portionss of both strings are used. | |
| 724 * | |
| 725 * @param s1 First source string. | |
| 726 * @param limit1 Native index of the last character in the first source string t
o be considered. | |
| 727 * | |
| 728 * @param s2 Second source string. | |
| 729 * @param limit2 Native index of the last character in the second source string
to be considered. | |
| 730 * | |
| 731 * @return <0 or 0 or >0 as usual for string comparisons | |
| 732 * | |
| 733 * @internal ICU 4.4 technology preview | |
| 734 */ | |
| 735 U_INTERNAL int32_t U_EXPORT2 | |
| 736 utext_compareNativeLimit(UText *s1, int64_t limit1, | |
| 737 UText *s2, int64_t limit2); | |
| 738 | |
| 739 /** | |
| 740 * Compare two UTexts case-insensitively using full case folding. The comparison | |
| 741 * begins at each source text's iteration position. The iteration position of ea
ch | |
| 742 * UText will be left following the last character compared. | |
| 743 * | |
| 744 * The comparison is done in code point order; this is because the characters | |
| 745 * in a UText are accessed one code point at a time, and may not be from a UTF-1
6 | |
| 746 * context. | |
| 747 * | |
| 748 * This functions works with strings of different explicitly specified lengths | |
| 749 * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. | |
| 750 * A length argument of -1 signifies that as much of the string should be used a
s | |
| 751 * is necessary to compare with the other string. If both length arguments are -
1, | |
| 752 * the entire remaining portionss of both strings are used. | |
| 753 * | |
| 754 * @param s1 First source string. | |
| 755 * @param length1 Length of first source string in UTF-32 code points. | |
| 756 * | |
| 757 * @param s2 Second source string. | |
| 758 * @param length2 Length of second source string in UTF-32 code points. | |
| 759 * | |
| 760 * @param options A bit set of options: | |
| 761 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: | |
| 762 * Comparison in code point order with default case folding. | |
| 763 * | |
| 764 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I | |
| 765 * | |
| 766 * @param pErrorCode Must be a valid pointer to an error code value, | |
| 767 * which must not indicate a failure before the function call. | |
| 768 * | |
| 769 * @return <0 or 0 or >0 as usual for string comparisons | |
| 770 * | |
| 771 * @internal ICU 4.4 technology preview | |
| 772 */ | |
| 773 U_INTERNAL int32_t U_EXPORT2 | |
| 774 utext_caseCompare(UText *s1, int32_t length1, | |
| 775 UText *s2, int32_t length2, | |
| 776 uint32_t options, UErrorCode *pErrorCode); | |
| 777 | |
| 778 /** | |
| 779 * Compare two UTexts case-insensitively using full case folding. The comparison | |
| 780 * begins at each source text's iteration position. The iteration position of ea
ch | |
| 781 * UText will be left following the last character compared. This method differs
from | |
| 782 * utext_caseCompare in that it accepts native limits rather than lengths for ea
ch | |
| 783 * string. | |
| 784 * | |
| 785 * The comparison is done in code point order; this is because the characters | |
| 786 * in a UText are accessed one code point at a time, and may not be from a UTF-1
6 | |
| 787 * context. | |
| 788 * | |
| 789 * This functions works with strings of different explicitly specified lengths | |
| 790 * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. | |
| 791 * A limit argument of -1 signifies that as much of the string should be used as | |
| 792 * is necessary to compare with the other string. If both length arguments are -
1, | |
| 793 * the entire remaining portionss of both strings are used. | |
| 794 * | |
| 795 * @param s1 First source string. | |
| 796 * @param limit1 Native index of the last character in the first source string t
o be considered. | |
| 797 * | |
| 798 * @param s2 Second source string. | |
| 799 * @param limit2 Native index of the last character in the second source string
to be considered. | |
| 800 * | |
| 801 * @param options A bit set of options: | |
| 802 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: | |
| 803 * Comparison in code point order with default case folding. | |
| 804 * | |
| 805 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I | |
| 806 * | |
| 807 * @param pErrorCode Must be a valid pointer to an error code value, | |
| 808 * which must not indicate a failure before the function call. | |
| 809 * | |
| 810 * @return <0 or 0 or >0 as usual for string comparisons | |
| 811 * | |
| 812 * @internal ICU 4.4 technology preview | |
| 813 */ | |
| 814 U_INTERNAL int32_t U_EXPORT2 | |
| 815 utext_caseCompareNativeLimit(UText *s1, int64_t limit1, | |
| 816 UText *s2, int64_t limit2, | |
| 817 uint32_t options, UErrorCode *pErrorCode); | |
| 818 | |
| 819 | |
| 820 /*******************************************************************************
***** | |
| 821 * | |
| 822 * #define inline versions of selected performance-critical text access functio
ns | |
| 823 * Caution: do not use auto increment++ or decrement-- expressions | |
| 824 * as parameters to these macros. | |
| 825 * | |
| 826 * For most use, where there is no extreme performance constraint, the | |
| 827 * normal, non-inline functions are a better choice. The resulting cod
e | |
| 828 * will be smaller, and, if the need ever arises, easier to debug. | |
| 829 * | |
| 830 * These are implemented as #defines rather than real functions | |
| 831 * because there is no fully portable way to do inline functions in pla
in C. | |
| 832 * | |
| 833 *******************************************************************************
*****/ | |
| 834 | |
| 835 /** | |
| 836 * inline version of utext_current32(), for performance-critical situations. | |
| 837 * | |
| 838 * Get the code point at the current iteration position of the UText. | |
| 839 * Returns U_SENTINEL (-1) if the position is at the end of the | |
| 840 * text. | |
| 841 * | |
| 842 * @internal ICU 4.4 technology preview | |
| 843 */ | |
| 844 #define UTEXT_CURRENT32(ut) \ | |
| 845 ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkO
ffset]<0xd800 ? \ | |
| 846 ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut)) | |
| 847 | |
| 848 /** | |
| 849 * inline version of utext_next32(), for performance-critical situations. | |
| 850 * | |
| 851 * Get the code point at the current iteration position of the UText, and | |
| 852 * advance the position to the first index following the character. | |
| 853 * This is a post-increment operation. | |
| 854 * Returns U_SENTINEL (-1) if the position is at the end of the | |
| 855 * text. | |
| 856 * | |
| 857 * @stable ICU 3.4 | |
| 858 */ | |
| 859 #define UTEXT_NEXT32(ut) \ | |
| 860 ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkO
ffset]<0xd800 ? \ | |
| 861 ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut)) | |
| 862 | |
| 863 /** | |
| 864 * inline version of utext_previous32(), for performance-critical situations. | |
| 865 * | |
| 866 * Move the iterator position to the character (code point) whose | |
| 867 * index precedes the current position, and return that character. | |
| 868 * This is a pre-decrement operation. | |
| 869 * Returns U_SENTINEL (-1) if the position is at the start of the text. | |
| 870 * | |
| 871 * @stable ICU 3.4 | |
| 872 */ | |
| 873 #define UTEXT_PREVIOUS32(ut) \ | |
| 874 ((ut)->chunkOffset > 0 && \ | |
| 875 (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \ | |
| 876 (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut)) | |
| 877 | |
| 878 /** | |
| 879 * inline version of utext_getNativeIndex(), for performance-critical situatio
ns. | |
| 880 * | |
| 881 * Get the current iterator position, which can range from 0 to | |
| 882 * the length of the text. | |
| 883 * The position is a native index into the input text, in whatever format it | |
| 884 * may have (possibly UTF-8 for example), and may not always be the same as | |
| 885 * the corresponding UChar (UTF-16) index. | |
| 886 * The returned position will always be aligned to a code point boundary. | |
| 887 * | |
| 888 * @stable ICU 3.6 | |
| 889 */ | |
| 890 #define UTEXT_GETNATIVEINDEX(ut) \ | |
| 891 ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \ | |
| 892 (ut)->chunkNativeStart+(ut)->chunkOffset : \ | |
| 893 (ut)->pFuncs->mapOffsetToNative(ut)) | |
| 894 | |
| 895 /** | |
| 896 * inline version of utext_setNativeIndex(), for performance-critical situatio
ns. | |
| 897 * | |
| 898 * Set the current iteration position to the nearest code point | |
| 899 * boundary at or preceding the specified index. | |
| 900 * The index is in the native units of the original input text. | |
| 901 * If the index is out of range, it will be pinned to be within | |
| 902 * the range of the input text. | |
| 903 * | |
| 904 * @stable ICU 3.8 | |
| 905 */ | |
| 906 #define UTEXT_SETNATIVEINDEX(ut, ix) \ | |
| 907 { int64_t __offset = (ix) - (ut)->chunkNativeStart; \ | |
| 908 if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \ | |
| 909 (ut)->chunkOffset=(int32_t)__offset; \ | |
| 910 } else { \ | |
| 911 utext_setNativeIndex((ut), (ix)); } } | |
| 912 | |
| 913 | |
| 914 | |
| 915 /*******************************************************************************
***** | |
| 916 * | |
| 917 * Functions related to writing or modifying the text. | |
| 918 * These will work only with modifiable UTexts. Attempting to | |
| 919 * modify a read-only UText will return an error status. | |
| 920 * | |
| 921 *******************************************************************************
*****/ | |
| 922 | |
| 923 | |
| 924 /** | |
| 925 * Return TRUE if the text can be written (modified) with utext_replace() or | |
| 926 * utext_copy(). For the text to be writable, the text provider must | |
| 927 * be of a type that supports writing and the UText must not be frozen. | |
| 928 * | |
| 929 * Attempting to modify text when utext_isWriteable() is FALSE will fail - | |
| 930 * the text will not be modified, and an error will be returned from the functi
on | |
| 931 * that attempted the modification. | |
| 932 * | |
| 933 * @param ut the UText to be tested. | |
| 934 * @return TRUE if the text is modifiable. | |
| 935 * | |
| 936 * @see utext_freeze() | |
| 937 * @see utext_replace() | |
| 938 * @see utext_copy() | |
| 939 * @stable ICU 3.4 | |
| 940 * | |
| 941 */ | |
| 942 U_STABLE UBool U_EXPORT2 | |
| 943 utext_isWritable(const UText *ut); | |
| 944 | |
| 945 | |
| 946 /** | |
| 947 * Test whether there is meta data associated with the text. | |
| 948 * @see Replaceable::hasMetaData() | |
| 949 * | |
| 950 * @param ut The UText to be tested | |
| 951 * @return TRUE if the underlying text includes meta data. | |
| 952 * @stable ICU 3.4 | |
| 953 */ | |
| 954 U_STABLE UBool U_EXPORT2 | |
| 955 utext_hasMetaData(const UText *ut); | |
| 956 | |
| 957 | |
| 958 /** | |
| 959 * Replace a range of the original text with a replacement text. | |
| 960 * | |
| 961 * Leaves the current iteration position at the position following the | |
| 962 * newly inserted replacement text. | |
| 963 * | |
| 964 * This function is only available on UText types that support writing, | |
| 965 * that is, ones where utext_isWritable() returns TRUE. | |
| 966 * | |
| 967 * When using this function, there should be only a single UText opened onto the | |
| 968 * underlying native text string. Behavior after a replace operation | |
| 969 * on a UText is undefined for any other additional UTexts that refer to the | |
| 970 * modified string. | |
| 971 * | |
| 972 * @param ut the UText representing the text to be operated on. | |
| 973 * @param nativeStart the native index of the start of the region to be rep
laced | |
| 974 * @param nativeLimit the native index of the character following the regio
n to be replaced. | |
| 975 * @param replacementText pointer to the replacement text | |
| 976 * @param replacementLength length of the replacement text, or -1 if the text is
NUL terminated. | |
| 977 * @param status receives any error status. Possible errors include | |
| 978 * U_NO_WRITE_PERMISSION | |
| 979 * | |
| 980 * @return The signed number of (native) storage units by which | |
| 981 * the length of the text expanded or contracted. | |
| 982 * | |
| 983 * @stable ICU 3.4 | |
| 984 */ | |
| 985 U_STABLE int32_t U_EXPORT2 | |
| 986 utext_replace(UText *ut, | |
| 987 int64_t nativeStart, int64_t nativeLimit, | |
| 988 const UChar *replacementText, int32_t replacementLength, | |
| 989 UErrorCode *status); | |
| 990 | |
| 991 | |
| 992 | |
| 993 /** | |
| 994 * | |
| 995 * Copy or move a substring from one position to another within the text, | |
| 996 * while retaining any metadata associated with the text. | |
| 997 * This function is used to duplicate or reorder substrings. | |
| 998 * The destination index must not overlap the source range. | |
| 999 * | |
| 1000 * The text to be copied or moved is inserted at destIndex; | |
| 1001 * it does not replace or overwrite any existing text. | |
| 1002 * | |
| 1003 * The iteration position is left following the newly inserted text | |
| 1004 * at the destination position. | |
| 1005 * | |
| 1006 * This function is only available on UText types that support writing, | |
| 1007 * that is, ones where utext_isWritable() returns TRUE. | |
| 1008 * | |
| 1009 * When using this function, there should be only a single UText opened onto the | |
| 1010 * underlying native text string. Behavior after a copy operation | |
| 1011 * on a UText is undefined in any other additional UTexts that refer to the | |
| 1012 * modified string. | |
| 1013 * | |
| 1014 * @param ut The UText representing the text to be operated on. | |
| 1015 * @param nativeStart The native index of the start of the region to be copied
or moved | |
| 1016 * @param nativeLimit The native index of the character position following the
region | |
| 1017 * to be copied. | |
| 1018 * @param destIndex The native destination index to which the source substrin
g is | |
| 1019 * copied or moved. | |
| 1020 * @param move If TRUE, then the substring is moved, not copied/duplicat
ed. | |
| 1021 * @param status receives any error status. Possible errors include U_NO_
WRITE_PERMISSION | |
| 1022 * | |
| 1023 * @stable ICU 3.4 | |
| 1024 */ | |
| 1025 U_STABLE void U_EXPORT2 | |
| 1026 utext_copy(UText *ut, | |
| 1027 int64_t nativeStart, int64_t nativeLimit, | |
| 1028 int64_t destIndex, | |
| 1029 UBool move, | |
| 1030 UErrorCode *status); | |
| 1031 | |
| 1032 | |
| 1033 /** | |
| 1034 * <p> | |
| 1035 * Freeze a UText. This prevents any modification to the underlying text itse
lf | |
| 1036 * by means of functions operating on this UText. | |
| 1037 * </p> | |
| 1038 * <p> | |
| 1039 * Once frozen, a UText can not be unfrozen. The intent is to ensure | |
| 1040 * that a the text underlying a frozen UText wrapper cannot be modified via th
at UText. | |
| 1041 * </p> | |
| 1042 * <p> | |
| 1043 * Caution: freezing a UText will disable changes made via the specific | |
| 1044 * frozen UText wrapper only; it will not have any effect on the ability to | |
| 1045 * directly modify the text by bypassing the UText. Any such backdoor modifi
cations | |
| 1046 * are always an error while UText access is occuring because the underlying | |
| 1047 * text can get out of sync with UText's buffering. | |
| 1048 * </p> | |
| 1049 * | |
| 1050 * @param ut The UText to be frozen. | |
| 1051 * @see utext_isWritable() | |
| 1052 * @stable ICU 3.6 | |
| 1053 */ | |
| 1054 U_STABLE void U_EXPORT2 | |
| 1055 utext_freeze(UText *ut); | |
| 1056 | |
| 1057 | |
| 1058 /** | |
| 1059 * UText provider properties (bit field indexes). | |
| 1060 * | |
| 1061 * @see UText | |
| 1062 * @stable ICU 3.4 | |
| 1063 */ | |
| 1064 enum { | |
| 1065 /** | |
| 1066 * It is potentially time consuming for the provider to determine the length
of the text. | |
| 1067 * @stable ICU 3.4 | |
| 1068 */ | |
| 1069 UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1, | |
| 1070 /** | |
| 1071 * Text chunks remain valid and usable until the text object is modified or | |
| 1072 * deleted, not just until the next time the access() function is called | |
| 1073 * (which is the default). | |
| 1074 * @stable ICU 3.4 | |
| 1075 */ | |
| 1076 UTEXT_PROVIDER_STABLE_CHUNKS = 2, | |
| 1077 /** | |
| 1078 * The provider supports modifying the text via the replace() and copy() | |
| 1079 * functions. | |
| 1080 * @see Replaceable | |
| 1081 * @stable ICU 3.4 | |
| 1082 */ | |
| 1083 UTEXT_PROVIDER_WRITABLE = 3, | |
| 1084 /** | |
| 1085 * There is meta data associated with the text. | |
| 1086 * @see Replaceable::hasMetaData() | |
| 1087 * @stable ICU 3.4 | |
| 1088 */ | |
| 1089 UTEXT_PROVIDER_HAS_META_DATA = 4, | |
| 1090 /** | |
| 1091 * Text provider owns the text storage. | |
| 1092 * Generally occurs as the result of a deep clone of the UText. | |
| 1093 * When closing the UText, the associated text must | |
| 1094 * also be closed/deleted/freed/ whatever is appropriate. | |
| 1095 * @stable ICU 3.6 | |
| 1096 */ | |
| 1097 UTEXT_PROVIDER_OWNS_TEXT = 5 | |
| 1098 }; | |
| 1099 | |
| 1100 /** | |
| 1101 * Function type declaration for UText.clone(). | |
| 1102 * | |
| 1103 * clone a UText. Much like opening a UText where the source text is itself | |
| 1104 * another UText. | |
| 1105 * | |
| 1106 * A deep clone will copy both the UText data structures and the underlying te
xt. | |
| 1107 * The original and cloned UText will operate completely independently; modifi
cations | |
| 1108 * made to the text in one will not effect the other. Text providers are not | |
| 1109 * required to support deep clones. The user of clone() must check the status
return | |
| 1110 * and be prepared to handle failures. | |
| 1111 * | |
| 1112 * A shallow clone replicates only the UText data structures; it does not make | |
| 1113 * a copy of the underlying text. Shallow clones can be used as an efficient
way to | |
| 1114 * have multiple iterators active in a single text string that is not being | |
| 1115 * modified. | |
| 1116 * | |
| 1117 * A shallow clone operation must not fail except for truly exceptional condit
ions such | |
| 1118 * as memory allocation failures. | |
| 1119 * | |
| 1120 * A UText and its clone may be safely concurrently accessed by separate threa
ds. | |
| 1121 * This is true for both shallow and deep clones. | |
| 1122 * It is the responsibility of the Text Provider to ensure that this thread sa
fety | |
| 1123 * constraint is met. | |
| 1124 | |
| 1125 * | |
| 1126 * @param dest A UText struct to be filled in with the result of the clone o
peration, | |
| 1127 * or NULL if the clone function should heap-allocate a new UTex
t struct. | |
| 1128 * @param src The UText to be cloned. | |
| 1129 * @param deep TRUE to request a deep clone, FALSE for a shallow clone. | |
| 1130 * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERR
OR | |
| 1131 * should be returned if the text provider is unable to clone th
e | |
| 1132 * original text. | |
| 1133 * @return The newly created clone, or NULL if the clone operation faile
d. | |
| 1134 * | |
| 1135 * @stable ICU 3.4 | |
| 1136 */ | |
| 1137 typedef UText * U_CALLCONV | |
| 1138 UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status); | |
| 1139 | |
| 1140 | |
| 1141 /** | |
| 1142 * Function type declaration for UText.nativeLength(). | |
| 1143 * | |
| 1144 * @param ut the UText to get the length of. | |
| 1145 * @return the length, in the native units of the original text string. | |
| 1146 * @see UText | |
| 1147 * @stable ICU 3.4 | |
| 1148 */ | |
| 1149 typedef int64_t U_CALLCONV | |
| 1150 UTextNativeLength(UText *ut); | |
| 1151 | |
| 1152 /** | |
| 1153 * Function type declaration for UText.access(). Get the description of the tex
t chunk | |
| 1154 * containing the text at a requested native index. The UText's iteration | |
| 1155 * position will be left at the requested index. If the index is out | |
| 1156 * of bounds, the iteration position will be left at the start or end | |
| 1157 * of the string, as appropriate. | |
| 1158 * | |
| 1159 * Chunks must begin and end on code point boundaries. A single code point | |
| 1160 * comprised of multiple storage units must never span a chunk boundary. | |
| 1161 * | |
| 1162 * | |
| 1163 * @param ut the UText being accessed. | |
| 1164 * @param nativeIndex Requested index of the text to be accessed. | |
| 1165 * @param forward If TRUE, then the returned chunk must contain text | |
| 1166 * starting from the index, so that start<=index<limit. | |
| 1167 * If FALSE, then the returned chunk must contain text | |
| 1168 * before the index, so that start<index<=limit. | |
| 1169 * @return True if the requested index could be accessed. The chunk | |
| 1170 * will contain the requested text. | |
| 1171 * False value if a chunk cannot be accessed | |
| 1172 * (the requested index is out of bounds). | |
| 1173 * | |
| 1174 * @see UText | |
| 1175 * @stable ICU 3.4 | |
| 1176 */ | |
| 1177 typedef UBool U_CALLCONV | |
| 1178 UTextAccess(UText *ut, int64_t nativeIndex, UBool forward); | |
| 1179 | |
| 1180 /** | |
| 1181 * Function type declaration for UText.extract(). | |
| 1182 * | |
| 1183 * Extract text from a UText into a UChar buffer. The range of text to be extra
cted | |
| 1184 * is specified in the native indices of the UText provider. These may not nece
ssarily | |
| 1185 * be UTF-16 indices. | |
| 1186 * <p> | |
| 1187 * The size (number of 16 bit UChars) in the data to be extracted is returned.
The | |
| 1188 * full amount is returned, even when the specified buffer size is smaller. | |
| 1189 * <p> | |
| 1190 * The extracted string will (if you are a user) / must (if you are a text provi
der) | |
| 1191 * be NUL-terminated if there is sufficient space in the destination buffer. | |
| 1192 * | |
| 1193 * @param ut the UText from which to extract data. | |
| 1194 * @param nativeStart the native index of the first characer to extract. | |
| 1195 * @param nativeLimit the native string index of the position following the l
ast | |
| 1196 * character to extract. | |
| 1197 * @param dest the UChar (UTF-16) buffer into which the extracted text
is placed | |
| 1198 * @param destCapacity The size, in UChars, of the destination buffer. May be
zero | |
| 1199 * for precomputing the required size. | |
| 1200 * @param status receives any error status. | |
| 1201 * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars fo
r | |
| 1202 * preflighting. | |
| 1203 * @return Number of UChars in the data. Does not include a trailing NUL. | |
| 1204 * | |
| 1205 * @stable ICU 3.4 | |
| 1206 */ | |
| 1207 typedef int32_t U_CALLCONV | |
| 1208 UTextExtract(UText *ut, | |
| 1209 int64_t nativeStart, int64_t nativeLimit, | |
| 1210 UChar *dest, int32_t destCapacity, | |
| 1211 UErrorCode *status); | |
| 1212 | |
| 1213 /** | |
| 1214 * Function type declaration for UText.replace(). | |
| 1215 * | |
| 1216 * Replace a range of the original text with a replacement text. | |
| 1217 * | |
| 1218 * Leaves the current iteration position at the position following the | |
| 1219 * newly inserted replacement text. | |
| 1220 * | |
| 1221 * This function need only be implemented on UText types that support writing. | |
| 1222 * | |
| 1223 * When using this function, there should be only a single UText opened onto the | |
| 1224 * underlying native text string. The function is responsible for updating the | |
| 1225 * text chunk within the UText to reflect the updated iteration position, | |
| 1226 * taking into account any changes to the underlying string's structure caused | |
| 1227 * by the replace operation. | |
| 1228 * | |
| 1229 * @param ut the UText representing the text to be operated on. | |
| 1230 * @param nativeStart the index of the start of the region to be replaced | |
| 1231 * @param nativeLimit the index of the character following the region to be
replaced. | |
| 1232 * @param replacementText pointer to the replacement text | |
| 1233 * @param replacmentLength length of the replacement text in UChars, or -1 if th
e text is NUL terminated. | |
| 1234 * @param status receives any error status. Possible errors include | |
| 1235 * U_NO_WRITE_PERMISSION | |
| 1236 * | |
| 1237 * @return The signed number of (native) storage units by which | |
| 1238 * the length of the text expanded or contracted. | |
| 1239 * | |
| 1240 * @stable ICU 3.4 | |
| 1241 */ | |
| 1242 typedef int32_t U_CALLCONV | |
| 1243 UTextReplace(UText *ut, | |
| 1244 int64_t nativeStart, int64_t nativeLimit, | |
| 1245 const UChar *replacementText, int32_t replacmentLength, | |
| 1246 UErrorCode *status); | |
| 1247 | |
| 1248 /** | |
| 1249 * Function type declaration for UText.copy(). | |
| 1250 * | |
| 1251 * Copy or move a substring from one position to another within the text, | |
| 1252 * while retaining any metadata associated with the text. | |
| 1253 * This function is used to duplicate or reorder substrings. | |
| 1254 * The destination index must not overlap the source range. | |
| 1255 * | |
| 1256 * The text to be copied or moved is inserted at destIndex; | |
| 1257 * it does not replace or overwrite any existing text. | |
| 1258 * | |
| 1259 * This function need only be implemented for UText types that support writing. | |
| 1260 * | |
| 1261 * When using this function, there should be only a single UText opened onto the | |
| 1262 * underlying native text string. The function is responsible for updating the | |
| 1263 * text chunk within the UText to reflect the updated iteration position, | |
| 1264 * taking into account any changes to the underlying string's structure caused | |
| 1265 * by the replace operation. | |
| 1266 * | |
| 1267 * @param ut The UText representing the text to be operated on. | |
| 1268 * @param nativeStart The index of the start of the region to be copied or move
d | |
| 1269 * @param nativeLimit The index of the character following the region to be rep
laced. | |
| 1270 * @param nativeDest The destination index to which the source substring is co
pied or moved. | |
| 1271 * @param move If TRUE, then the substring is moved, not copied/duplicat
ed. | |
| 1272 * @param status receives any error status. Possible errors include U_NO_
WRITE_PERMISSION | |
| 1273 * | |
| 1274 * @stable ICU 3.4 | |
| 1275 */ | |
| 1276 typedef void U_CALLCONV | |
| 1277 UTextCopy(UText *ut, | |
| 1278 int64_t nativeStart, int64_t nativeLimit, | |
| 1279 int64_t nativeDest, | |
| 1280 UBool move, | |
| 1281 UErrorCode *status); | |
| 1282 | |
| 1283 /** | |
| 1284 * Function type declaration for UText.mapOffsetToNative(). | |
| 1285 * Map from the current UChar offset within the current text chunk to | |
| 1286 * the corresponding native index in the original source text. | |
| 1287 * | |
| 1288 * This is required only for text providers that do not use native UTF-16 indexe
s. | |
| 1289 * | |
| 1290 * @param ut the UText. | |
| 1291 * @return Absolute (native) index corresponding to chunkOffset in the current c
hunk. | |
| 1292 * The returned native index should always be to a code point boundary. | |
| 1293 * | |
| 1294 * @stable ICU 3.4 | |
| 1295 */ | |
| 1296 typedef int64_t U_CALLCONV | |
| 1297 UTextMapOffsetToNative(const UText *ut); | |
| 1298 | |
| 1299 /** | |
| 1300 * Function type declaration for UText.mapIndexToUTF16(). | |
| 1301 * Map from a native index to a UChar offset within a text chunk. | |
| 1302 * Behavior is undefined if the native index does not fall within the | |
| 1303 * current chunk. | |
| 1304 * | |
| 1305 * This function is required only for text providers that do not use native UTF-
16 indexes. | |
| 1306 * | |
| 1307 * @param ut The UText containing the text chunk. | |
| 1308 * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->
limit. | |
| 1309 * @return Chunk-relative UTF-16 offset corresponding to the specifie
d native | |
| 1310 * index. | |
| 1311 * | |
| 1312 * @stable ICU 3.4 | |
| 1313 */ | |
| 1314 typedef int32_t U_CALLCONV | |
| 1315 UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex); | |
| 1316 | |
| 1317 | |
| 1318 /** | |
| 1319 * Function type declaration for UText.utextClose(). | |
| 1320 * | |
| 1321 * A Text Provider close function is only required for provider types that make | |
| 1322 * allocations in their open function (or other functions) that must be | |
| 1323 * cleaned when the UText is closed. | |
| 1324 * | |
| 1325 * The allocation of the UText struct itself and any "extra" storage | |
| 1326 * associated with the UText is handled by the common UText implementation | |
| 1327 * and does not require provider specific cleanup in a close function. | |
| 1328 * | |
| 1329 * Most UText provider implementations do not need to implement this function. | |
| 1330 * | |
| 1331 * @param ut A UText object to be closed. | |
| 1332 * | |
| 1333 * @stable ICU 3.4 | |
| 1334 */ | |
| 1335 typedef void U_CALLCONV | |
| 1336 UTextClose(UText *ut); | |
| 1337 | |
| 1338 | |
| 1339 /** | |
| 1340 * (public) Function dispatch table for UText. | |
| 1341 * Conceptually very much like a C++ Virtual Function Table. | |
| 1342 * This struct defines the organization of the table. | |
| 1343 * Each text provider implementation must provide an | |
| 1344 * actual table that is initialized with the appropriate functions | |
| 1345 * for the type of text being handled. | |
| 1346 * @stable ICU 3.6 | |
| 1347 */ | |
| 1348 struct UTextFuncs { | |
| 1349 /** | |
| 1350 * (public) Function table size, sizeof(UTextFuncs) | |
| 1351 * Intended for use should the table grow to accomodate added | |
| 1352 * functions in the future, to allow tests for older format | |
| 1353 * function tables that do not contain the extensions. | |
| 1354 * | |
| 1355 * Fields are placed for optimal alignment on | |
| 1356 * 32/64/128-bit-pointer machines, by normally grouping together | |
| 1357 * 4 32-bit fields, | |
| 1358 * 4 pointers, | |
| 1359 * 2 64-bit fields | |
| 1360 * in sequence. | |
| 1361 * @stable ICU 3.6 | |
| 1362 */ | |
| 1363 int32_t tableSize; | |
| 1364 | |
| 1365 /** | |
| 1366 * (private) Alignment padding. | |
| 1367 * Do not use, reserved for use by the UText framework only. | |
| 1368 * @internal | |
| 1369 */ | |
| 1370 int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserv
ed3; | |
| 1371 | |
| 1372 | |
| 1373 /** | |
| 1374 * (public) Function pointer for UTextClone | |
| 1375 * | |
| 1376 * @see UTextClone | |
| 1377 * @stable ICU 3.6 | |
| 1378 */ | |
| 1379 UTextClone *clone; | |
| 1380 | |
| 1381 /** | |
| 1382 * (public) function pointer for UTextLength | |
| 1383 * May be expensive to compute! | |
| 1384 * | |
| 1385 * @see UTextLength | |
| 1386 * @stable ICU 3.6 | |
| 1387 */ | |
| 1388 UTextNativeLength *nativeLength; | |
| 1389 | |
| 1390 /** | |
| 1391 * (public) Function pointer for UTextAccess. | |
| 1392 * | |
| 1393 * @see UTextAccess | |
| 1394 * @stable ICU 3.6 | |
| 1395 */ | |
| 1396 UTextAccess *access; | |
| 1397 | |
| 1398 /** | |
| 1399 * (public) Function pointer for UTextExtract. | |
| 1400 * | |
| 1401 * @see UTextExtract | |
| 1402 * @stable ICU 3.6 | |
| 1403 */ | |
| 1404 UTextExtract *extract; | |
| 1405 | |
| 1406 /** | |
| 1407 * (public) Function pointer for UTextReplace. | |
| 1408 * | |
| 1409 * @see UTextReplace | |
| 1410 * @stable ICU 3.6 | |
| 1411 */ | |
| 1412 UTextReplace *replace; | |
| 1413 | |
| 1414 /** | |
| 1415 * (public) Function pointer for UTextCopy. | |
| 1416 * | |
| 1417 * @see UTextCopy | |
| 1418 * @stable ICU 3.6 | |
| 1419 */ | |
| 1420 UTextCopy *copy; | |
| 1421 | |
| 1422 /** | |
| 1423 * (public) Function pointer for UTextMapOffsetToNative. | |
| 1424 * | |
| 1425 * @see UTextMapOffsetToNative | |
| 1426 * @stable ICU 3.6 | |
| 1427 */ | |
| 1428 UTextMapOffsetToNative *mapOffsetToNative; | |
| 1429 | |
| 1430 /** | |
| 1431 * (public) Function pointer for UTextMapNativeIndexToUTF16. | |
| 1432 * | |
| 1433 * @see UTextMapNativeIndexToUTF16 | |
| 1434 * @stable ICU 3.6 | |
| 1435 */ | |
| 1436 UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16; | |
| 1437 | |
| 1438 /** | |
| 1439 * (public) Function pointer for UTextClose. | |
| 1440 * | |
| 1441 * @see UTextClose | |
| 1442 * @stable ICU 3.6 | |
| 1443 */ | |
| 1444 UTextClose *close; | |
| 1445 | |
| 1446 /** | |
| 1447 * (private) Spare function pointer | |
| 1448 * @internal | |
| 1449 */ | |
| 1450 UTextClose *spare1; | |
| 1451 | |
| 1452 /** | |
| 1453 * (private) Spare function pointer | |
| 1454 * @internal | |
| 1455 */ | |
| 1456 UTextClose *spare2; | |
| 1457 | |
| 1458 /** | |
| 1459 * (private) Spare function pointer | |
| 1460 * @internal | |
| 1461 */ | |
| 1462 UTextClose *spare3; | |
| 1463 | |
| 1464 }; | |
| 1465 /** | |
| 1466 * Function dispatch table for UText | |
| 1467 * @see UTextFuncs | |
| 1468 */ | |
| 1469 typedef struct UTextFuncs UTextFuncs; | |
| 1470 | |
| 1471 /** | |
| 1472 * UText struct. Provides the interface between the generic UText access cod
e | |
| 1473 * and the UText provider code that works on specific kinds of | |
| 1474 * text (UTF-8, noncontiguous UTF-16, whatever.) | |
| 1475 * | |
| 1476 * Applications that are using predefined types of text provid
ers | |
| 1477 * to pass text data to ICU services will have no need to view
the | |
| 1478 * internals of the UText structs that they open. | |
| 1479 * | |
| 1480 * @stable ICU 3.6 | |
| 1481 */ | |
| 1482 struct UText { | |
| 1483 /** | |
| 1484 * (private) Magic. Used to help detect when UText functions are hande
d | |
| 1485 * invalid or unitialized UText structs. | |
| 1486 * utext_openXYZ() functions take an initialized, | |
| 1487 * but not necessarily open, UText struct as an | |
| 1488 * optional fill-in parameter. This magic field | |
| 1489 * is used to check for that initialization. | |
| 1490 * Text provider close functions must NOT clear | |
| 1491 * the magic field because that would prevent | |
| 1492 * reuse of the UText struct. | |
| 1493 * @internal | |
| 1494 */ | |
| 1495 uint32_t magic; | |
| 1496 | |
| 1497 | |
| 1498 /** | |
| 1499 * (private) Flags for managing the allocation and freeing of | |
| 1500 * memory associated with this UText. | |
| 1501 * @internal | |
| 1502 */ | |
| 1503 int32_t flags; | |
| 1504 | |
| 1505 | |
| 1506 /** | |
| 1507 * Text provider properties. This set of flags is maintainted by the | |
| 1508 * text provider implementation. | |
| 1509 * @stable ICU 3.4 | |
| 1510 */ | |
| 1511 int32_t providerProperties; | |
| 1512 | |
| 1513 /** | |
| 1514 * (public) sizeOfStruct=sizeof(UText) | |
| 1515 * Allows possible backward compatible extension. | |
| 1516 * | |
| 1517 * @stable ICU 3.4 | |
| 1518 */ | |
| 1519 int32_t sizeOfStruct; | |
| 1520 | |
| 1521 /* ------ 16 byte alignment boundary ----------- */ | |
| 1522 | |
| 1523 | |
| 1524 /** | |
| 1525 * (protected) Native index of the first character position following | |
| 1526 * the current chunk. | |
| 1527 * @stable ICU 3.6 | |
| 1528 */ | |
| 1529 int64_t chunkNativeLimit; | |
| 1530 | |
| 1531 /** | |
| 1532 * (protected) Size in bytes of the extra space (pExtra). | |
| 1533 * @stable ICU 3.4 | |
| 1534 */ | |
| 1535 int32_t extraSize; | |
| 1536 | |
| 1537 /** | |
| 1538 * (protected) The highest chunk offset where native indexing and | |
| 1539 * chunk (UTF-16) indexing correspond. For UTF-16 sources, value | |
| 1540 * will be equal to chunkLength. | |
| 1541 * | |
| 1542 * @stable ICU 3.6 | |
| 1543 */ | |
| 1544 int32_t nativeIndexingLimit; | |
| 1545 | |
| 1546 /* ---- 16 byte alignment boundary------ */ | |
| 1547 | |
| 1548 /** | |
| 1549 * (protected) Native index of the first character in the text chunk. | |
| 1550 * @stable ICU 3.6 | |
| 1551 */ | |
| 1552 int64_t chunkNativeStart; | |
| 1553 | |
| 1554 /** | |
| 1555 * (protected) Current iteration position within the text chunk (UTF-16 buf
fer). | |
| 1556 * This is the index to the character that will be returned by utext_next32
(). | |
| 1557 * @stable ICU 3.6 | |
| 1558 */ | |
| 1559 int32_t chunkOffset; | |
| 1560 | |
| 1561 /** | |
| 1562 * (protected) Length the text chunk (UTF-16 buffer), in UChars. | |
| 1563 * @stable ICU 3.6 | |
| 1564 */ | |
| 1565 int32_t chunkLength; | |
| 1566 | |
| 1567 /* ---- 16 byte alignment boundary-- */ | |
| 1568 | |
| 1569 | |
| 1570 /** | |
| 1571 * (protected) pointer to a chunk of text in UTF-16 format. | |
| 1572 * May refer either to original storage of the source of the text, or | |
| 1573 * if conversion was required, to a buffer owned by the UText. | |
| 1574 * @stable ICU 3.6 | |
| 1575 */ | |
| 1576 const UChar *chunkContents; | |
| 1577 | |
| 1578 /** | |
| 1579 * (public) Pointer to Dispatch table for accessing functions for this
UText. | |
| 1580 * @stable ICU 3.6 | |
| 1581 */ | |
| 1582 const UTextFuncs *pFuncs; | |
| 1583 | |
| 1584 /** | |
| 1585 * (protected) Pointer to additional space requested by the | |
| 1586 * text provider during the utext_open operation. | |
| 1587 * @stable ICU 3.4 | |
| 1588 */ | |
| 1589 void *pExtra; | |
| 1590 | |
| 1591 /** | |
| 1592 * (protected) Pointer to string or text-containin object or similar. | |
| 1593 * This is the source of the text that this UText is wrapping, in a format | |
| 1594 * that is known to the text provider functions. | |
| 1595 * @stable ICU 3.4 | |
| 1596 */ | |
| 1597 const void *context; | |
| 1598 | |
| 1599 /* --- 16 byte alignment boundary--- */ | |
| 1600 | |
| 1601 /** | |
| 1602 * (protected) Pointer fields available for use by the text provider. | |
| 1603 * Not used by UText common code. | |
| 1604 * @stable ICU 3.6 | |
| 1605 */ | |
| 1606 const void *p; | |
| 1607 /** | |
| 1608 * (protected) Pointer fields available for use by the text provider. | |
| 1609 * Not used by UText common code. | |
| 1610 * @stable ICU 3.6 | |
| 1611 */ | |
| 1612 const void *q; | |
| 1613 /** | |
| 1614 * (protected) Pointer fields available for use by the text provider. | |
| 1615 * Not used by UText common code. | |
| 1616 * @stable ICU 3.6 | |
| 1617 */ | |
| 1618 const void *r; | |
| 1619 | |
| 1620 /** | |
| 1621 * Private field reserved for future use by the UText framework | |
| 1622 * itself. This is not to be touched by the text providers. | |
| 1623 * @internal ICU 3.4 | |
| 1624 */ | |
| 1625 void *privP; | |
| 1626 | |
| 1627 | |
| 1628 /* --- 16 byte alignment boundary--- */ | |
| 1629 | |
| 1630 | |
| 1631 /** | |
| 1632 * (protected) Integer field reserved for use by the text provider. | |
| 1633 * Not used by the UText framework, or by the client (user) of the UText. | |
| 1634 * @stable ICU 3.4 | |
| 1635 */ | |
| 1636 int64_t a; | |
| 1637 | |
| 1638 /** | |
| 1639 * (protected) Integer field reserved for use by the text provider. | |
| 1640 * Not used by the UText framework, or by the client (user) of the UText. | |
| 1641 * @stable ICU 3.4 | |
| 1642 */ | |
| 1643 int32_t b; | |
| 1644 | |
| 1645 /** | |
| 1646 * (protected) Integer field reserved for use by the text provider. | |
| 1647 * Not used by the UText framework, or by the client (user) of the UText. | |
| 1648 * @stable ICU 3.4 | |
| 1649 */ | |
| 1650 int32_t c; | |
| 1651 | |
| 1652 /* ---- 16 byte alignment boundary---- */ | |
| 1653 | |
| 1654 | |
| 1655 /** | |
| 1656 * Private field reserved for future use by the UText framework | |
| 1657 * itself. This is not to be touched by the text providers. | |
| 1658 * @internal ICU 3.4 | |
| 1659 */ | |
| 1660 int64_t privA; | |
| 1661 /** | |
| 1662 * Private field reserved for future use by the UText framework | |
| 1663 * itself. This is not to be touched by the text providers. | |
| 1664 * @internal ICU 3.4 | |
| 1665 */ | |
| 1666 int32_t privB; | |
| 1667 /** | |
| 1668 * Private field reserved for future use by the UText framework | |
| 1669 * itself. This is not to be touched by the text providers. | |
| 1670 * @internal ICU 3.4 | |
| 1671 */ | |
| 1672 int32_t privC; | |
| 1673 }; | |
| 1674 | |
| 1675 | |
| 1676 /** | |
| 1677 * Common function for use by Text Provider implementations to allocate and/or
initialize | |
| 1678 * a new UText struct. To be called in the implementation of utext_open() func
tions. | |
| 1679 * If the supplied UText parameter is null, a new UText struct will be allocate
d on the heap. | |
| 1680 * If the supplied UText is already open, the provider's close function will be
called | |
| 1681 * so that the struct can be reused by the open that is in progress. | |
| 1682 * | |
| 1683 * @param ut pointer to a UText struct to be re-used, or null if a new UText | |
| 1684 * should be allocated. | |
| 1685 * @param extraSpace The amount of additional space to be allocated as part | |
| 1686 * of this UText, for use by types of providers that require | |
| 1687 * additional storage. | |
| 1688 * @param status Errors are returned here. | |
| 1689 * @return pointer to the UText, allocated if necessary, with extra space set up
if requested. | |
| 1690 * @stable ICU 3.4 | |
| 1691 */ | |
| 1692 U_STABLE UText * U_EXPORT2 | |
| 1693 utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status); | |
| 1694 | |
| 1695 /** | |
| 1696 * @internal | |
| 1697 * Value used to help identify correctly initialized UText structs. | |
| 1698 * Note: must be publicly visible so that UTEXT_INITIALIZER can access it. | |
| 1699 */ | |
| 1700 enum { | |
| 1701 UTEXT_MAGIC = 0x345ad82c | |
| 1702 }; | |
| 1703 | |
| 1704 /** | |
| 1705 * initializer to be used with local (stack) instances of a UText | |
| 1706 * struct. UText structs must be initialized before passing | |
| 1707 * them to one of the utext_open functions. | |
| 1708 * | |
| 1709 * @stable ICU 3.6 | |
| 1710 */ | |
| 1711 #define UTEXT_INITIALIZER { \ | |
| 1712 UTEXT_MAGIC, /* magic */ \ | |
| 1713 0, /* flags */ \ | |
| 1714 0, /* providerProps */ \ | |
| 1715 sizeof(UText), /* sizeOfStruct */ \ | |
| 1716 0, /* chunkNativeLimit */ \ | |
| 1717 0, /* extraSize */ \ | |
| 1718 0, /* nativeIndexingLimit */ \ | |
| 1719 0, /* chunkNativeStart */ \ | |
| 1720 0, /* chunkOffset */ \ | |
| 1721 0, /* chunkLength */ \ | |
| 1722 NULL, /* chunkContents */ \ | |
| 1723 NULL, /* pFuncs */ \ | |
| 1724 NULL, /* pExtra */ \ | |
| 1725 NULL, /* context */ \ | |
| 1726 NULL, NULL, NULL, /* p, q, r */ \ | |
| 1727 NULL, /* privP */ \ | |
| 1728 0, 0, 0, /* a, b, c */ \ | |
| 1729 0, 0, 0 /* privA,B,C, */ \ | |
| 1730 } | |
| 1731 | |
| 1732 | |
| 1733 U_CDECL_END | |
| 1734 | |
| 1735 | |
| 1736 | |
| 1737 #endif | |
| OLD | NEW |