OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * |
| 4 * Copyright (C) 2003, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ******************************************************************************* |
| 8 * file name: unorm_it.h |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * created on: 2003jan21 |
| 14 * created by: Markus W. Scherer |
| 15 */ |
| 16 |
| 17 #ifndef __UNORM_IT_H__ |
| 18 #define __UNORM_IT_H__ |
| 19 |
| 20 #include "unicode/utypes.h" |
| 21 |
| 22 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION |
| 23 |
| 24 #include "unicode/uiter.h" |
| 25 #include "unicode/unorm.h" |
| 26 |
| 27 /** |
| 28 * Normalizing UCharIterator wrapper. |
| 29 * This internal API basically duplicates the functionality of the C++ Normalize
r |
| 30 * but |
| 31 * - it actually implements a character iterator (UCharIterator) |
| 32 * with few restrictions (see unorm_setIter()) |
| 33 * - it supports UCharIterator getState()/setState() |
| 34 * - it uses lower-level APIs and buffers more text and states, |
| 35 * hopefully resulting in higher performance |
| 36 * |
| 37 * Usage example: |
| 38 * \code |
| 39 * function(UCharIterator *srcIter) { |
| 40 * UNormIterator *uni; |
| 41 * UCharIterator *iter; |
| 42 * UErrorCode errorCode; |
| 43 * |
| 44 * errorCode=U_ZERO_ERROR; |
| 45 * uni=unorm_openIter(&errorCode); |
| 46 * if(U_FAILURE(errorCode)) { |
| 47 * // report error |
| 48 * return; |
| 49 * } |
| 50 * |
| 51 * iter=unorm_setIter(uni, srcIter, UNORM_FCD, &errorCode); |
| 52 * if(U_FAILURE(errorCode)) { |
| 53 * // report error |
| 54 * } else { |
| 55 * // use iter to iterate over the canonically ordered |
| 56 * // version of srcIter's text |
| 57 * uint32_t state; |
| 58 * |
| 59 * ... |
| 60 * |
| 61 * state=uiter_getState(iter); |
| 62 * if(state!=UITER_NO_STATE) { |
| 63 * // use valid state, store it, use iter some more |
| 64 * ... |
| 65 * |
| 66 * // later restore iter to the saved state: |
| 67 * uiter_setState(iter, state, &errorCode); |
| 68 * |
| 69 * ... |
| 70 * } |
| 71 * |
| 72 * ... |
| 73 * } |
| 74 * unorm_closeIter(uni); |
| 75 * } |
| 76 * \endcode |
| 77 * |
| 78 * See also the ICU test suites. |
| 79 * |
| 80 * @internal |
| 81 */ |
| 82 struct UNormIterator; |
| 83 typedef struct UNormIterator UNormIterator; |
| 84 |
| 85 /** |
| 86 * Size of a stack buffer to hold a UNormIterator, see the stackMem parameter |
| 87 * of unorm_openIter(). |
| 88 * |
| 89 * @internal |
| 90 */ |
| 91 #define UNORM_ITER_SIZE 1024 |
| 92 |
| 93 /** |
| 94 * Open a normalizing iterator. Must be closed later. |
| 95 * Use unorm_setIter(). |
| 96 * |
| 97 * @param stackMem Pointer to preallocated (stack-allocated) buffer to hold |
| 98 * the UNormIterator if possible; can be NULL. |
| 99 * @param stackMemSize Number of bytes at stackMem; can be 0, |
| 100 * or should be >= UNORM_ITER_SIZE for a non-NULL stackMem. |
| 101 * @param pErrorCode ICU error code |
| 102 * @return an allocated and pre-initialized UNormIterator |
| 103 * @internal |
| 104 */ |
| 105 U_CAPI UNormIterator * U_EXPORT2 |
| 106 unorm_openIter(void *stackMem, int32_t stackMemSize, UErrorCode *pErrorCode); |
| 107 |
| 108 /** |
| 109 * Close a normalizing iterator. |
| 110 * |
| 111 * @param uni UNormIterator from unorm_openIter() |
| 112 * @internal |
| 113 */ |
| 114 U_CAPI void U_EXPORT2 |
| 115 unorm_closeIter(UNormIterator *uni); |
| 116 |
| 117 /** |
| 118 * Set a UCharIterator and a normalization mode for the normalizing iterator |
| 119 * to wrap. The normalizing iterator will read from the character iterator, |
| 120 * normalize the text, and in turn deliver it with its own wrapper UCharIterator |
| 121 * interface which it returns. |
| 122 * |
| 123 * The source iterator remains at its current position through the unorm_setIter
() |
| 124 * call but will be used and moved as soon as the |
| 125 * the returned normalizing iterator is. |
| 126 * |
| 127 * The returned interface pointer is valid for as long as the normalizing iterat
or |
| 128 * is open and until another unorm_setIter() call is made on it. |
| 129 * |
| 130 * The normalizing iterator's UCharIterator interface has the following properti
es: |
| 131 * - getIndex() and move() will almost always return UITER_UNKNOWN_INDEX |
| 132 * - getState() will return UITER_NO_STATE for unknown states for positions |
| 133 * that are not at normalization boundaries |
| 134 * |
| 135 * @param uni UNormIterator from unorm_openIter() |
| 136 * @param iter The source text UCharIterator to be wrapped. It is aliases into t
he normalizing iterator. |
| 137 * Must support getState() and setState(). |
| 138 * @param mode The normalization mode. |
| 139 * @param pErrorCode ICU error code |
| 140 * @return an alias to the normalizing iterator's UCharIterator interface |
| 141 * @internal |
| 142 */ |
| 143 U_CAPI UCharIterator * U_EXPORT2 |
| 144 unorm_setIter(UNormIterator *uni, UCharIterator *iter, UNormalizationMode mode,
UErrorCode *pErrorCode); |
| 145 |
| 146 #endif /* uconfig.h switches */ |
| 147 |
| 148 #endif |
OLD | NEW |