OLD | NEW |
| (Empty) |
1 /* | |
2 ******************************************************************************* | |
3 * | |
4 * Copyright (C) 2002-2006,2009 International Business Machines | |
5 * Corporation and others. All Rights Reserved. | |
6 * | |
7 ******************************************************************************* | |
8 * file name: uiter.h | |
9 * encoding: US-ASCII | |
10 * tab size: 8 (not used) | |
11 * indentation:4 | |
12 * | |
13 * created on: 2002jan18 | |
14 * created by: Markus W. Scherer | |
15 */ | |
16 | |
17 #ifndef __UITER_H__ | |
18 #define __UITER_H__ | |
19 | |
20 /** | |
21 * \file | |
22 * \brief C API: Unicode Character Iteration | |
23 * | |
24 * @see UCharIterator | |
25 */ | |
26 | |
27 #include "unicode/utypes.h" | |
28 | |
29 #if U_SHOW_CPLUSPLUS_API | |
30 U_NAMESPACE_BEGIN | |
31 | |
32 class CharacterIterator; | |
33 class Replaceable; | |
34 | |
35 U_NAMESPACE_END | |
36 #endif | |
37 | |
38 U_CDECL_BEGIN | |
39 | |
40 struct UCharIterator; | |
41 typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharItera
tor. @stable ICU 2.1 */ | |
42 | |
43 /** | |
44 * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). | |
45 * @see UCharIteratorMove | |
46 * @see UCharIterator | |
47 * @stable ICU 2.1 | |
48 */ | |
49 typedef enum UCharIteratorOrigin { | |
50 UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH | |
51 } UCharIteratorOrigin; | |
52 | |
53 /** Constants for UCharIterator. @stable ICU 2.6 */ | |
54 enum { | |
55 /** | |
56 * Constant value that may be returned by UCharIteratorMove | |
57 * indicating that the final UTF-16 index is not known, but that the move su
cceeded. | |
58 * This can occur when moving relative to limit or length, or | |
59 * when moving relative to the current index after a setState() | |
60 * when the current UTF-16 index is not known. | |
61 * | |
62 * It would be very inefficient to have to count from the beginning of the t
ext | |
63 * just to get the current/limit/length index after moving relative to it. | |
64 * The actual index can be determined with getIndex(UITER_CURRENT) | |
65 * which will count the UChars if necessary. | |
66 * | |
67 * @stable ICU 2.6 | |
68 */ | |
69 UITER_UNKNOWN_INDEX=-2 | |
70 }; | |
71 | |
72 | |
73 /** | |
74 * Constant for UCharIterator getState() indicating an error or | |
75 * an unknown state. | |
76 * Returned by uiter_getState()/UCharIteratorGetState | |
77 * when an error occurs. | |
78 * Also, some UCharIterator implementations may not be able to return | |
79 * a valid state for each position. This will be clearly documented | |
80 * for each such iterator (none of the public ones here). | |
81 * | |
82 * @stable ICU 2.6 | |
83 */ | |
84 #define UITER_NO_STATE ((uint32_t)0xffffffff) | |
85 | |
86 /** | |
87 * Function type declaration for UCharIterator.getIndex(). | |
88 * | |
89 * Gets the current position, or the start or limit of the | |
90 * iteration range. | |
91 * | |
92 * This function may perform slowly for UITER_CURRENT after setState() was calle
d, | |
93 * or for UITER_LENGTH, because an iterator implementation may have to count | |
94 * UChars if the underlying storage is not UTF-16. | |
95 * | |
96 * @param iter the UCharIterator structure ("this pointer") | |
97 * @param origin get the 0, start, limit, length, or current index | |
98 * @return the requested index, or U_SENTINEL in an error condition | |
99 * | |
100 * @see UCharIteratorOrigin | |
101 * @see UCharIterator | |
102 * @stable ICU 2.1 | |
103 */ | |
104 typedef int32_t U_CALLCONV | |
105 UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); | |
106 | |
107 /** | |
108 * Function type declaration for UCharIterator.move(). | |
109 * | |
110 * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(inde
x). | |
111 * | |
112 * Moves the current position relative to the start or limit of the | |
113 * iteration range, or relative to the current position itself. | |
114 * The movement is expressed in numbers of code units forward | |
115 * or backward by specifying a positive or negative delta. | |
116 * Out of bounds movement will be pinned to the start or limit. | |
117 * | |
118 * This function may perform slowly for moving relative to UITER_LENGTH | |
119 * because an iterator implementation may have to count the rest of the | |
120 * UChars if the native storage is not UTF-16. | |
121 * | |
122 * When moving relative to the limit or length, or | |
123 * relative to the current position after setState() was called, | |
124 * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient | |
125 * determination of the actual UTF-16 index. | |
126 * The actual index can be determined with getIndex(UITER_CURRENT) | |
127 * which will count the UChars if necessary. | |
128 * See UITER_UNKNOWN_INDEX for details. | |
129 * | |
130 * @param iter the UCharIterator structure ("this pointer") | |
131 * @param delta can be positive, zero, or negative | |
132 * @param origin move relative to the 0, start, limit, length, or current index | |
133 * @return the new index, or U_SENTINEL on an error condition, | |
134 * or UITER_UNKNOWN_INDEX when the index is not known. | |
135 * | |
136 * @see UCharIteratorOrigin | |
137 * @see UCharIterator | |
138 * @see UITER_UNKNOWN_INDEX | |
139 * @stable ICU 2.1 | |
140 */ | |
141 typedef int32_t U_CALLCONV | |
142 UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin
); | |
143 | |
144 /** | |
145 * Function type declaration for UCharIterator.hasNext(). | |
146 * | |
147 * Check if current() and next() can still | |
148 * return another code unit. | |
149 * | |
150 * @param iter the UCharIterator structure ("this pointer") | |
151 * @return boolean value for whether current() and next() can still return anoth
er code unit | |
152 * | |
153 * @see UCharIterator | |
154 * @stable ICU 2.1 | |
155 */ | |
156 typedef UBool U_CALLCONV | |
157 UCharIteratorHasNext(UCharIterator *iter); | |
158 | |
159 /** | |
160 * Function type declaration for UCharIterator.hasPrevious(). | |
161 * | |
162 * Check if previous() can still return another code unit. | |
163 * | |
164 * @param iter the UCharIterator structure ("this pointer") | |
165 * @return boolean value for whether previous() can still return another code un
it | |
166 * | |
167 * @see UCharIterator | |
168 * @stable ICU 2.1 | |
169 */ | |
170 typedef UBool U_CALLCONV | |
171 UCharIteratorHasPrevious(UCharIterator *iter); | |
172 | |
173 /** | |
174 * Function type declaration for UCharIterator.current(). | |
175 * | |
176 * Return the code unit at the current position, | |
177 * or U_SENTINEL if there is none (index is at the limit). | |
178 * | |
179 * @param iter the UCharIterator structure ("this pointer") | |
180 * @return the current code unit | |
181 * | |
182 * @see UCharIterator | |
183 * @stable ICU 2.1 | |
184 */ | |
185 typedef UChar32 U_CALLCONV | |
186 UCharIteratorCurrent(UCharIterator *iter); | |
187 | |
188 /** | |
189 * Function type declaration for UCharIterator.next(). | |
190 * | |
191 * Return the code unit at the current index and increment | |
192 * the index (post-increment, like s[i++]), | |
193 * or return U_SENTINEL if there is none (index is at the limit). | |
194 * | |
195 * @param iter the UCharIterator structure ("this pointer") | |
196 * @return the current code unit (and post-increment the current index) | |
197 * | |
198 * @see UCharIterator | |
199 * @stable ICU 2.1 | |
200 */ | |
201 typedef UChar32 U_CALLCONV | |
202 UCharIteratorNext(UCharIterator *iter); | |
203 | |
204 /** | |
205 * Function type declaration for UCharIterator.previous(). | |
206 * | |
207 * Decrement the index and return the code unit from there | |
208 * (pre-decrement, like s[--i]), | |
209 * or return U_SENTINEL if there is none (index is at the start). | |
210 * | |
211 * @param iter the UCharIterator structure ("this pointer") | |
212 * @return the previous code unit (after pre-decrementing the current index) | |
213 * | |
214 * @see UCharIterator | |
215 * @stable ICU 2.1 | |
216 */ | |
217 typedef UChar32 U_CALLCONV | |
218 UCharIteratorPrevious(UCharIterator *iter); | |
219 | |
220 /** | |
221 * Function type declaration for UCharIterator.reservedFn(). | |
222 * Reserved for future use. | |
223 * | |
224 * @param iter the UCharIterator structure ("this pointer") | |
225 * @param something some integer argument | |
226 * @return some integer | |
227 * | |
228 * @see UCharIterator | |
229 * @stable ICU 2.1 | |
230 */ | |
231 typedef int32_t U_CALLCONV | |
232 UCharIteratorReserved(UCharIterator *iter, int32_t something); | |
233 | |
234 /** | |
235 * Function type declaration for UCharIterator.getState(). | |
236 * | |
237 * Get the "state" of the iterator in the form of a single 32-bit word. | |
238 * It is recommended that the state value be calculated to be as small as | |
239 * is feasible. For strings with limited lengths, fewer than 32 bits may | |
240 * be sufficient. | |
241 * | |
242 * This is used together with setState()/UCharIteratorSetState | |
243 * to save and restore the iterator position more efficiently than with | |
244 * getIndex()/move(). | |
245 * | |
246 * The iterator state is defined as a uint32_t value because it is designed | |
247 * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state | |
248 * of the character iterator. | |
249 * | |
250 * With some UCharIterator implementations (e.g., UTF-8), | |
251 * getting and setting the UTF-16 index with existing functions | |
252 * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but | |
253 * relatively slow because the iterator has to "walk" from a known index | |
254 * to the requested one. | |
255 * This takes more time the farther it needs to go. | |
256 * | |
257 * An opaque state value allows an iterator implementation to provide | |
258 * an internal index (UTF-8: the source byte array index) for | |
259 * fast, constant-time restoration. | |
260 * | |
261 * After calling setState(), a getIndex(UITER_CURRENT) may be slow because | |
262 * the UTF-16 index may not be restored as well, but the iterator can deliver | |
263 * the correct text contents and move relative to the current position | |
264 * without performance degradation. | |
265 * | |
266 * Some UCharIterator implementations may not be able to return | |
267 * a valid state for each position, in which case they return UITER_NO_STATE ins
tead. | |
268 * This will be clearly documented for each such iterator (none of the public on
es here). | |
269 * | |
270 * @param iter the UCharIterator structure ("this pointer") | |
271 * @return the state word | |
272 * | |
273 * @see UCharIterator | |
274 * @see UCharIteratorSetState | |
275 * @see UITER_NO_STATE | |
276 * @stable ICU 2.6 | |
277 */ | |
278 typedef uint32_t U_CALLCONV | |
279 UCharIteratorGetState(const UCharIterator *iter); | |
280 | |
281 /** | |
282 * Function type declaration for UCharIterator.setState(). | |
283 * | |
284 * Restore the "state" of the iterator using a state word from a getState() call
. | |
285 * The iterator object need not be the same one as for which getState() was call
ed, | |
286 * but it must be of the same type (set up using the same uiter_setXYZ function) | |
287 * and it must iterate over the same string | |
288 * (binary identical regardless of memory address). | |
289 * For more about the state word see UCharIteratorGetState. | |
290 * | |
291 * After calling setState(), a getIndex(UITER_CURRENT) may be slow because | |
292 * the UTF-16 index may not be restored as well, but the iterator can deliver | |
293 * the correct text contents and move relative to the current position | |
294 * without performance degradation. | |
295 * | |
296 * @param iter the UCharIterator structure ("this pointer") | |
297 * @param state the state word from a getState() call | |
298 * on a same-type, same-string iterator | |
299 * @param pErrorCode Must be a valid pointer to an error code value, | |
300 * which must not indicate a failure before the function call. | |
301 * | |
302 * @see UCharIterator | |
303 * @see UCharIteratorGetState | |
304 * @stable ICU 2.6 | |
305 */ | |
306 typedef void U_CALLCONV | |
307 UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCod
e); | |
308 | |
309 | |
310 /** | |
311 * C API for code unit iteration. | |
312 * This can be used as a C wrapper around | |
313 * CharacterIterator, Replaceable, or implemented using simple strings, etc. | |
314 * | |
315 * There are two roles for using UCharIterator: | |
316 * | |
317 * A "provider" sets the necessary function pointers and controls the "protected
" | |
318 * fields of the UCharIterator structure. A "provider" passes a UCharIterator | |
319 * into C APIs that need a UCharIterator as an abstract, flexible string interfa
ce. | |
320 * | |
321 * Implementations of such C APIs are "callers" of UCharIterator functions; | |
322 * they only use the "public" function pointers and never access the "protected" | |
323 * fields directly. | |
324 * | |
325 * The current() and next() functions only check the current index against the | |
326 * limit, and previous() only checks the current index against the start, | |
327 * to see if the iterator already reached the end of the iteration range. | |
328 * | |
329 * The assumption - in all iterators - is that the index is moved via the API, | |
330 * which means it won't go out of bounds, or the index is modified by | |
331 * user code that knows enough about the iterator implementation to set valid | |
332 * index values. | |
333 * | |
334 * UCharIterator functions return code unit values 0..0xffff, | |
335 * or U_SENTINEL if the iteration bounds are reached. | |
336 * | |
337 * @stable ICU 2.1 | |
338 */ | |
339 struct UCharIterator { | |
340 /** | |
341 * (protected) Pointer to string or wrapped object or similar. | |
342 * Not used by caller. | |
343 * @stable ICU 2.1 | |
344 */ | |
345 const void *context; | |
346 | |
347 /** | |
348 * (protected) Length of string or similar. | |
349 * Not used by caller. | |
350 * @stable ICU 2.1 | |
351 */ | |
352 int32_t length; | |
353 | |
354 /** | |
355 * (protected) Start index or similar. | |
356 * Not used by caller. | |
357 * @stable ICU 2.1 | |
358 */ | |
359 int32_t start; | |
360 | |
361 /** | |
362 * (protected) Current index or similar. | |
363 * Not used by caller. | |
364 * @stable ICU 2.1 | |
365 */ | |
366 int32_t index; | |
367 | |
368 /** | |
369 * (protected) Limit index or similar. | |
370 * Not used by caller. | |
371 * @stable ICU 2.1 | |
372 */ | |
373 int32_t limit; | |
374 | |
375 /** | |
376 * (protected) Used by UTF-8 iterators and possibly others. | |
377 * @stable ICU 2.1 | |
378 */ | |
379 int32_t reservedField; | |
380 | |
381 /** | |
382 * (public) Returns the current position or the | |
383 * start or limit index of the iteration range. | |
384 * | |
385 * @see UCharIteratorGetIndex | |
386 * @stable ICU 2.1 | |
387 */ | |
388 UCharIteratorGetIndex *getIndex; | |
389 | |
390 /** | |
391 * (public) Moves the current position relative to the start or limit of the | |
392 * iteration range, or relative to the current position itself. | |
393 * The movement is expressed in numbers of code units forward | |
394 * or backward by specifying a positive or negative delta. | |
395 * | |
396 * @see UCharIteratorMove | |
397 * @stable ICU 2.1 | |
398 */ | |
399 UCharIteratorMove *move; | |
400 | |
401 /** | |
402 * (public) Check if current() and next() can still | |
403 * return another code unit. | |
404 * | |
405 * @see UCharIteratorHasNext | |
406 * @stable ICU 2.1 | |
407 */ | |
408 UCharIteratorHasNext *hasNext; | |
409 | |
410 /** | |
411 * (public) Check if previous() can still return another code unit. | |
412 * | |
413 * @see UCharIteratorHasPrevious | |
414 * @stable ICU 2.1 | |
415 */ | |
416 UCharIteratorHasPrevious *hasPrevious; | |
417 | |
418 /** | |
419 * (public) Return the code unit at the current position, | |
420 * or U_SENTINEL if there is none (index is at the limit). | |
421 * | |
422 * @see UCharIteratorCurrent | |
423 * @stable ICU 2.1 | |
424 */ | |
425 UCharIteratorCurrent *current; | |
426 | |
427 /** | |
428 * (public) Return the code unit at the current index and increment | |
429 * the index (post-increment, like s[i++]), | |
430 * or return U_SENTINEL if there is none (index is at the limit). | |
431 * | |
432 * @see UCharIteratorNext | |
433 * @stable ICU 2.1 | |
434 */ | |
435 UCharIteratorNext *next; | |
436 | |
437 /** | |
438 * (public) Decrement the index and return the code unit from there | |
439 * (pre-decrement, like s[--i]), | |
440 * or return U_SENTINEL if there is none (index is at the start). | |
441 * | |
442 * @see UCharIteratorPrevious | |
443 * @stable ICU 2.1 | |
444 */ | |
445 UCharIteratorPrevious *previous; | |
446 | |
447 /** | |
448 * (public) Reserved for future use. Currently NULL. | |
449 * | |
450 * @see UCharIteratorReserved | |
451 * @stable ICU 2.1 | |
452 */ | |
453 UCharIteratorReserved *reservedFn; | |
454 | |
455 /** | |
456 * (public) Return the state of the iterator, to be restored later with setS
tate(). | |
457 * This function pointer is NULL if the iterator does not implement it. | |
458 * | |
459 * @see UCharIteratorGet | |
460 * @stable ICU 2.6 | |
461 */ | |
462 UCharIteratorGetState *getState; | |
463 | |
464 /** | |
465 * (public) Restore the iterator state from the state word from a call | |
466 * to getState(). | |
467 * This function pointer is NULL if the iterator does not implement it. | |
468 * | |
469 * @see UCharIteratorSet | |
470 * @stable ICU 2.6 | |
471 */ | |
472 UCharIteratorSetState *setState; | |
473 }; | |
474 | |
475 /** | |
476 * Helper function for UCharIterator to get the code point | |
477 * at the current index. | |
478 * | |
479 * Return the code point that includes the code unit at the current position, | |
480 * or U_SENTINEL if there is none (index is at the limit). | |
481 * If the current code unit is a lead or trail surrogate, | |
482 * then the following or preceding surrogate is used to form | |
483 * the code point value. | |
484 * | |
485 * @param iter the UCharIterator structure ("this pointer") | |
486 * @return the current code point | |
487 * | |
488 * @see UCharIterator | |
489 * @see U16_GET | |
490 * @see UnicodeString::char32At() | |
491 * @stable ICU 2.1 | |
492 */ | |
493 U_STABLE UChar32 U_EXPORT2 | |
494 uiter_current32(UCharIterator *iter); | |
495 | |
496 /** | |
497 * Helper function for UCharIterator to get the next code point. | |
498 * | |
499 * Return the code point at the current index and increment | |
500 * the index (post-increment, like s[i++]), | |
501 * or return U_SENTINEL if there is none (index is at the limit). | |
502 * | |
503 * @param iter the UCharIterator structure ("this pointer") | |
504 * @return the current code point (and post-increment the current index) | |
505 * | |
506 * @see UCharIterator | |
507 * @see U16_NEXT | |
508 * @stable ICU 2.1 | |
509 */ | |
510 U_STABLE UChar32 U_EXPORT2 | |
511 uiter_next32(UCharIterator *iter); | |
512 | |
513 /** | |
514 * Helper function for UCharIterator to get the previous code point. | |
515 * | |
516 * Decrement the index and return the code point from there | |
517 * (pre-decrement, like s[--i]), | |
518 * or return U_SENTINEL if there is none (index is at the start). | |
519 * | |
520 * @param iter the UCharIterator structure ("this pointer") | |
521 * @return the previous code point (after pre-decrementing the current index) | |
522 * | |
523 * @see UCharIterator | |
524 * @see U16_PREV | |
525 * @stable ICU 2.1 | |
526 */ | |
527 U_STABLE UChar32 U_EXPORT2 | |
528 uiter_previous32(UCharIterator *iter); | |
529 | |
530 /** | |
531 * Get the "state" of the iterator in the form of a single 32-bit word. | |
532 * This is a convenience function that calls iter->getState(iter) | |
533 * if iter->getState is not NULL; | |
534 * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. | |
535 * | |
536 * Some UCharIterator implementations may not be able to return | |
537 * a valid state for each position, in which case they return UITER_NO_STATE ins
tead. | |
538 * This will be clearly documented for each such iterator (none of the public on
es here). | |
539 * | |
540 * @param iter the UCharIterator structure ("this pointer") | |
541 * @return the state word | |
542 * | |
543 * @see UCharIterator | |
544 * @see UCharIteratorGetState | |
545 * @see UITER_NO_STATE | |
546 * @stable ICU 2.6 | |
547 */ | |
548 U_STABLE uint32_t U_EXPORT2 | |
549 uiter_getState(const UCharIterator *iter); | |
550 | |
551 /** | |
552 * Restore the "state" of the iterator using a state word from a getState() call
. | |
553 * This is a convenience function that calls iter->setState(iter, state, pErrorC
ode) | |
554 * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set
. | |
555 * | |
556 * @param iter the UCharIterator structure ("this pointer") | |
557 * @param state the state word from a getState() call | |
558 * on a same-type, same-string iterator | |
559 * @param pErrorCode Must be a valid pointer to an error code value, | |
560 * which must not indicate a failure before the function call. | |
561 * | |
562 * @see UCharIterator | |
563 * @see UCharIteratorSetState | |
564 * @stable ICU 2.6 | |
565 */ | |
566 U_STABLE void U_EXPORT2 | |
567 uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); | |
568 | |
569 /** | |
570 * Set up a UCharIterator to iterate over a string. | |
571 * | |
572 * Sets the UCharIterator function pointers for iteration over the string s | |
573 * with iteration boundaries start=index=0 and length=limit=string length. | |
574 * The "provider" may set the start, index, and limit values at any time | |
575 * within the range 0..length. | |
576 * The length field will be ignored. | |
577 * | |
578 * The string pointer s is set into UCharIterator.context without copying | |
579 * or reallocating the string contents. | |
580 * | |
581 * getState() simply returns the current index. | |
582 * move() will always return the final index. | |
583 * | |
584 * @param iter UCharIterator structure to be set for iteration | |
585 * @param s String to iterate over | |
586 * @param length Length of s, or -1 if NUL-terminated | |
587 * | |
588 * @see UCharIterator | |
589 * @stable ICU 2.1 | |
590 */ | |
591 U_STABLE void U_EXPORT2 | |
592 uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); | |
593 | |
594 /** | |
595 * Set up a UCharIterator to iterate over a UTF-16BE string | |
596 * (byte vector with a big-endian pair of bytes per UChar). | |
597 * | |
598 * Everything works just like with a normal UChar iterator (uiter_setString), | |
599 * except that UChars are assembled from byte pairs, | |
600 * and that the length argument here indicates an even number of bytes. | |
601 * | |
602 * getState() simply returns the current index. | |
603 * move() will always return the final index. | |
604 * | |
605 * @param iter UCharIterator structure to be set for iteration | |
606 * @param s UTF-16BE string to iterate over | |
607 * @param length Length of s as an even number of bytes, or -1 if NUL-terminated | |
608 * (NUL means pair of 0 bytes at even index from s) | |
609 * | |
610 * @see UCharIterator | |
611 * @see uiter_setString | |
612 * @stable ICU 2.6 | |
613 */ | |
614 U_STABLE void U_EXPORT2 | |
615 uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); | |
616 | |
617 /** | |
618 * Set up a UCharIterator to iterate over a UTF-8 string. | |
619 * | |
620 * Sets the UCharIterator function pointers for iteration over the UTF-8 string
s | |
621 * with UTF-8 iteration boundaries 0 and length. | |
622 * The implementation counts the UTF-16 index on the fly and | |
623 * lazily evaluates the UTF-16 length of the text. | |
624 * | |
625 * The start field is used as the UTF-8 offset, the limit field as the UTF-8 len
gth. | |
626 * When the reservedField is not 0, then it contains a supplementary code point | |
627 * and the UTF-16 index is between the two corresponding surrogates. | |
628 * At that point, the UTF-8 index is behind that code point. | |
629 * | |
630 * The UTF-8 string pointer s is set into UCharIterator.context without copying | |
631 * or reallocating the string contents. | |
632 * | |
633 * getState() returns a state value consisting of | |
634 * - the current UTF-8 source byte index (bits 31..1) | |
635 * - a flag (bit 0) that indicates whether the UChar position is in the middle | |
636 * of a surrogate pair | |
637 * (from a 4-byte UTF-8 sequence for the corresponding supplementary code poin
t) | |
638 * | |
639 * getState() cannot also encode the UTF-16 index in the state value. | |
640 * move(relative to limit or length), or | |
641 * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. | |
642 * | |
643 * @param iter UCharIterator structure to be set for iteration | |
644 * @param s UTF-8 string to iterate over | |
645 * @param length Length of s in bytes, or -1 if NUL-terminated | |
646 * | |
647 * @see UCharIterator | |
648 * @stable ICU 2.6 | |
649 */ | |
650 U_STABLE void U_EXPORT2 | |
651 uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); | |
652 | |
653 #if U_SHOW_CPLUSPLUS_API | |
654 | |
655 /** | |
656 * Set up a UCharIterator to wrap around a C++ CharacterIterator. | |
657 * | |
658 * Sets the UCharIterator function pointers for iteration using the | |
659 * CharacterIterator charIter. | |
660 * | |
661 * The CharacterIterator pointer charIter is set into UCharIterator.context | |
662 * without copying or cloning the CharacterIterator object. | |
663 * The other "protected" UCharIterator fields are set to 0 and will be ignored. | |
664 * The iteration index and boundaries are controlled by the CharacterIterator. | |
665 * | |
666 * getState() simply returns the current index. | |
667 * move() will always return the final index. | |
668 * | |
669 * @param iter UCharIterator structure to be set for iteration | |
670 * @param charIter CharacterIterator to wrap | |
671 * | |
672 * @see UCharIterator | |
673 * @stable ICU 2.1 | |
674 */ | |
675 U_STABLE void U_EXPORT2 | |
676 uiter_setCharacterIterator(UCharIterator *iter, U_NAMESPACE_QUALIFIER CharacterI
terator *charIter); | |
677 | |
678 /** | |
679 * Set up a UCharIterator to iterate over a C++ Replaceable. | |
680 * | |
681 * Sets the UCharIterator function pointers for iteration over the | |
682 * Replaceable rep with iteration boundaries start=index=0 and | |
683 * length=limit=rep->length(). | |
684 * The "provider" may set the start, index, and limit values at any time | |
685 * within the range 0..length=rep->length(). | |
686 * The length field will be ignored. | |
687 * | |
688 * The Replaceable pointer rep is set into UCharIterator.context without copying | |
689 * or cloning/reallocating the Replaceable object. | |
690 * | |
691 * getState() simply returns the current index. | |
692 * move() will always return the final index. | |
693 * | |
694 * @param iter UCharIterator structure to be set for iteration | |
695 * @param rep Replaceable to iterate over | |
696 * | |
697 * @see UCharIterator | |
698 * @stable ICU 2.1 | |
699 */ | |
700 U_STABLE void U_EXPORT2 | |
701 uiter_setReplaceable(UCharIterator *iter, const U_NAMESPACE_QUALIFIER Replaceabl
e *rep); | |
702 | |
703 #endif | |
704 | |
705 U_CDECL_END | |
706 | |
707 #endif | |
OLD | NEW |