OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * Copyright (C) 2001-2009, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ******************************************************************************* |
| 6 * |
| 7 * File ucoleitr.cpp |
| 8 * |
| 9 * Modification History: |
| 10 * |
| 11 * Date Name Description |
| 12 * 02/15/2001 synwee Modified all methods to process its own function |
| 13 * instead of calling the equivalent c++ api (coleitr.h) |
| 14 *******************************************************************************/ |
| 15 |
| 16 #ifndef UCOLEITR_H |
| 17 #define UCOLEITR_H |
| 18 |
| 19 #include "unicode/utypes.h" |
| 20 |
| 21 #if !UCONFIG_NO_COLLATION |
| 22 |
| 23 /** |
| 24 * This indicates an error has occured during processing or if no more CEs is |
| 25 * to be returned. |
| 26 * @stable ICU 2.0 |
| 27 */ |
| 28 #define UCOL_NULLORDER ((int32_t)0xFFFFFFFF) |
| 29 |
| 30 /** |
| 31 * This indicates an error has occured during processing or there are no more CE
s |
| 32 * to be returned. |
| 33 * |
| 34 * @internal |
| 35 */ |
| 36 #define UCOL_PROCESSED_NULLORDER ((int64_t)U_INT64_MAX) |
| 37 |
| 38 #include "unicode/ucol.h" |
| 39 |
| 40 /** |
| 41 * The UCollationElements struct. |
| 42 * For usage in C programs. |
| 43 * @stable ICU 2.0 |
| 44 */ |
| 45 typedef struct UCollationElements UCollationElements; |
| 46 |
| 47 /** |
| 48 * \file |
| 49 * \brief C API: UCollationElements |
| 50 * |
| 51 * The UCollationElements API is used as an iterator to walk through each |
| 52 * character of an international string. Use the iterator to return the |
| 53 * ordering priority of the positioned character. The ordering priority of a |
| 54 * character, which we refer to as a key, defines how a character is collated |
| 55 * in the given collation object. |
| 56 * For example, consider the following in Spanish: |
| 57 * <pre> |
| 58 * . "ca" -> the first key is key('c') and second key is key('a'). |
| 59 * . "cha" -> the first key is key('ch') and second key is key('a'). |
| 60 * </pre> |
| 61 * And in German, |
| 62 * <pre> |
| 63 * . "<ae ligature>b"-> the first key is key('a'), the second key is key('
e'), and |
| 64 * . the third key is key('b'). |
| 65 * </pre> |
| 66 * <p>Example of the iterator usage: (without error checking) |
| 67 * <pre> |
| 68 * . void CollationElementIterator_Example() |
| 69 * . { |
| 70 * . UChar *s; |
| 71 * . t_int32 order, primaryOrder; |
| 72 * . UCollationElements *c; |
| 73 * . UCollatorOld *coll; |
| 74 * . UErrorCode success = U_ZERO_ERROR; |
| 75 * . s=(UChar*)malloc(sizeof(UChar) * (strlen("This is a test")+1) ); |
| 76 * . u_uastrcpy(s, "This is a test"); |
| 77 * . coll = ucol_open(NULL, &success); |
| 78 * . c = ucol_openElements(coll, str, u_strlen(str), &status); |
| 79 * . order = ucol_next(c, &success); |
| 80 * . ucol_reset(c); |
| 81 * . order = ucol_prev(c, &success); |
| 82 * . free(s); |
| 83 * . ucol_close(coll); |
| 84 * . ucol_closeElements(c); |
| 85 * . } |
| 86 * </pre> |
| 87 * <p> |
| 88 * ucol_next() returns the collation order of the next. |
| 89 * ucol_prev() returns the collation order of the previous character. |
| 90 * The Collation Element Iterator moves only in one direction between calls to |
| 91 * ucol_reset. That is, ucol_next() and ucol_prev can not be inter-used. |
| 92 * Whenever ucol_prev is to be called after ucol_next() or vice versa, |
| 93 * ucol_reset has to be called first to reset the status, shifting pointers to |
| 94 * either the end or the start of the string. Hence at the next call of |
| 95 * ucol_prev or ucol_next, the first or last collation order will be returned. |
| 96 * If a change of direction is done without a ucol_reset, the result is |
| 97 * undefined. |
| 98 * The result of a forward iterate (ucol_next) and reversed result of the |
| 99 * backward iterate (ucol_prev) on the same string are equivalent, if |
| 100 * collation orders with the value UCOL_IGNORABLE are ignored. |
| 101 * Character based on the comparison level of the collator. A collation order |
| 102 * consists of primary order, secondary order and tertiary order. The data |
| 103 * type of the collation order is <strong>t_int32</strong>. |
| 104 * |
| 105 * @see UCollator |
| 106 */ |
| 107 |
| 108 /** |
| 109 * Open the collation elements for a string. |
| 110 * |
| 111 * @param coll The collator containing the desired collation rules. |
| 112 * @param text The text to iterate over. |
| 113 * @param textLength The number of characters in text, or -1 if null-terminated |
| 114 * @param status A pointer to an UErrorCode to receive any errors. |
| 115 * @return a struct containing collation element information |
| 116 * @stable ICU 2.0 |
| 117 */ |
| 118 U_STABLE UCollationElements* U_EXPORT2 |
| 119 ucol_openElements(const UCollator *coll, |
| 120 const UChar *text, |
| 121 int32_t textLength, |
| 122 UErrorCode *status); |
| 123 |
| 124 |
| 125 /** |
| 126 * get a hash code for a key... Not very useful! |
| 127 * @param key the given key. |
| 128 * @param length the size of the key array. |
| 129 * @return the hash code. |
| 130 * @stable ICU 2.0 |
| 131 */ |
| 132 U_STABLE int32_t U_EXPORT2 |
| 133 ucol_keyHashCode(const uint8_t* key, int32_t length); |
| 134 |
| 135 /** |
| 136 * Close a UCollationElements. |
| 137 * Once closed, a UCollationElements may no longer be used. |
| 138 * @param elems The UCollationElements to close. |
| 139 * @stable ICU 2.0 |
| 140 */ |
| 141 U_STABLE void U_EXPORT2 |
| 142 ucol_closeElements(UCollationElements *elems); |
| 143 |
| 144 /** |
| 145 * Reset the collation elements to their initial state. |
| 146 * This will move the 'cursor' to the beginning of the text. |
| 147 * Property settings for collation will be reset to the current status. |
| 148 * @param elems The UCollationElements to reset. |
| 149 * @see ucol_next |
| 150 * @see ucol_previous |
| 151 * @stable ICU 2.0 |
| 152 */ |
| 153 U_STABLE void U_EXPORT2 |
| 154 ucol_reset(UCollationElements *elems); |
| 155 |
| 156 /** |
| 157 * Set the collation elements to use implicit ordering for Han |
| 158 * even if they've been tailored. This will also force Hangul |
| 159 * syllables to be ordered by decomposing them to their component |
| 160 * Jamo. |
| 161 * |
| 162 * @param elems The UCollationElements containing the text. |
| 163 * @param status A pointer to a UErrorCode to reveive any errors. |
| 164 * |
| 165 * @internal |
| 166 */ |
| 167 U_INTERNAL void U_EXPORT2 |
| 168 ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status); |
| 169 |
| 170 /** |
| 171 * Get the ordering priority of the next collation element in the text. |
| 172 * A single character may contain more than one collation element. |
| 173 * @param elems The UCollationElements containing the text. |
| 174 * @param status A pointer to an UErrorCode to receive any errors. |
| 175 * @return The next collation elements ordering, otherwise returns NULLORDER |
| 176 * if an error has occured or if the end of string has been reached |
| 177 * @stable ICU 2.0 |
| 178 */ |
| 179 U_STABLE int32_t U_EXPORT2 |
| 180 ucol_next(UCollationElements *elems, UErrorCode *status); |
| 181 |
| 182 /** |
| 183 * Get the ordering priority of the previous collation element in the text. |
| 184 * A single character may contain more than one collation element. |
| 185 * Note that internally a stack is used to store buffered collation elements. |
| 186 * It is very rare that the stack will overflow, however if such a case is |
| 187 * encountered, the problem can be solved by increasing the size |
| 188 * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h. |
| 189 * @param elems The UCollationElements containing the text. |
| 190 * @param status A pointer to an UErrorCode to receive any errors. Noteably |
| 191 * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack |
| 192 * buffer has been exhausted. |
| 193 * @return The previous collation elements ordering, otherwise returns |
| 194 * NULLORDER if an error has occured or if the start of string has |
| 195 * been reached. |
| 196 * @stable ICU 2.0 |
| 197 */ |
| 198 U_STABLE int32_t U_EXPORT2 |
| 199 ucol_previous(UCollationElements *elems, UErrorCode *status); |
| 200 |
| 201 /** |
| 202 * Get the processed ordering priority of the next collation element in the text
. |
| 203 * A single character may contain more than one collation element. |
| 204 * |
| 205 * @param elems The UCollationElements containing the text. |
| 206 * @param ixLow a pointer to an int32_t to receive the iterator index before fet
ching the CE. |
| 207 * @param ixHigh a pointer to an int32_t to receive the iterator index after fet
ching the CE. |
| 208 * @param status A pointer to an UErrorCode to receive any errors. |
| 209 * @return The next collation elements ordering, otherwise returns UCOL_PROCESSE
D_NULLORDER |
| 210 * if an error has occured or if the end of string has been reached |
| 211 * |
| 212 * @internal |
| 213 */ |
| 214 U_INTERNAL int64_t U_EXPORT2 |
| 215 ucol_nextProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHigh, U
ErrorCode *status); |
| 216 |
| 217 /** |
| 218 * Get the processed ordering priority of the previous collation element in the
text. |
| 219 * A single character may contain more than one collation element. |
| 220 * Note that internally a stack is used to store buffered collation elements. |
| 221 * It is very rare that the stack will overflow, however if such a case is |
| 222 * encountered, the problem can be solved by increasing the size |
| 223 * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h. |
| 224 * |
| 225 * @param elems The UCollationElements containing the text. |
| 226 * @param ixLow A pointer to an int32_t to receive the iterator index after fetc
hing the CE |
| 227 * @param ixHigh A pointer to an int32_t to receiver the iterator index before f
etching the CE |
| 228 * @param status A pointer to an UErrorCode to receive any errors. Noteably |
| 229 * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack |
| 230 * buffer has been exhausted. |
| 231 * @return The previous collation elements ordering, otherwise returns |
| 232 * UCOL_PROCESSED_NULLORDER if an error has occured or if the start of |
| 233 * string has been reached. |
| 234 * |
| 235 * @internal |
| 236 */ |
| 237 U_INTERNAL int64_t U_EXPORT2 |
| 238 ucol_previousProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHig
h, UErrorCode *status); |
| 239 |
| 240 /** |
| 241 * Get the maximum length of any expansion sequences that end with the |
| 242 * specified comparison order. |
| 243 * This is useful for .... ? |
| 244 * @param elems The UCollationElements containing the text. |
| 245 * @param order A collation order returned by previous or next. |
| 246 * @return maximum size of the expansion sequences ending with the collation |
| 247 * element or 1 if collation element does not occur at the end of any |
| 248 * expansion sequence |
| 249 * @stable ICU 2.0 |
| 250 */ |
| 251 U_STABLE int32_t U_EXPORT2 |
| 252 ucol_getMaxExpansion(const UCollationElements *elems, int32_t order); |
| 253 |
| 254 /** |
| 255 * Set the text containing the collation elements. |
| 256 * Property settings for collation will remain the same. |
| 257 * In order to reset the iterator to the current collation property settings, |
| 258 * the API reset() has to be called. |
| 259 * @param elems The UCollationElements to set. |
| 260 * @param text The source text containing the collation elements. |
| 261 * @param textLength The length of text, or -1 if null-terminated. |
| 262 * @param status A pointer to an UErrorCode to receive any errors. |
| 263 * @see ucol_getText |
| 264 * @stable ICU 2.0 |
| 265 */ |
| 266 U_STABLE void U_EXPORT2 |
| 267 ucol_setText( UCollationElements *elems, |
| 268 const UChar *text, |
| 269 int32_t textLength, |
| 270 UErrorCode *status); |
| 271 |
| 272 /** |
| 273 * Get the offset of the current source character. |
| 274 * This is an offset into the text of the character containing the current |
| 275 * collation elements. |
| 276 * @param elems The UCollationElements to query. |
| 277 * @return The offset of the current source character. |
| 278 * @see ucol_setOffset |
| 279 * @stable ICU 2.0 |
| 280 */ |
| 281 U_STABLE int32_t U_EXPORT2 |
| 282 ucol_getOffset(const UCollationElements *elems); |
| 283 |
| 284 /** |
| 285 * Set the offset of the current source character. |
| 286 * This is an offset into the text of the character to be processed. |
| 287 * Property settings for collation will remain the same. |
| 288 * In order to reset the iterator to the current collation property settings, |
| 289 * the API reset() has to be called. |
| 290 * @param elems The UCollationElements to set. |
| 291 * @param offset The desired character offset. |
| 292 * @param status A pointer to an UErrorCode to receive any errors. |
| 293 * @see ucol_getOffset |
| 294 * @stable ICU 2.0 |
| 295 */ |
| 296 U_STABLE void U_EXPORT2 |
| 297 ucol_setOffset(UCollationElements *elems, |
| 298 int32_t offset, |
| 299 UErrorCode *status); |
| 300 |
| 301 /** |
| 302 * Get the primary order of a collation order. |
| 303 * @param order the collation order |
| 304 * @return the primary order of a collation order. |
| 305 * @stable ICU 2.6 |
| 306 */ |
| 307 U_STABLE int32_t U_EXPORT2 |
| 308 ucol_primaryOrder (int32_t order); |
| 309 |
| 310 /** |
| 311 * Get the secondary order of a collation order. |
| 312 * @param order the collation order |
| 313 * @return the secondary order of a collation order. |
| 314 * @stable ICU 2.6 |
| 315 */ |
| 316 U_STABLE int32_t U_EXPORT2 |
| 317 ucol_secondaryOrder (int32_t order); |
| 318 |
| 319 /** |
| 320 * Get the tertiary order of a collation order. |
| 321 * @param order the collation order |
| 322 * @return the tertiary order of a collation order. |
| 323 * @stable ICU 2.6 |
| 324 */ |
| 325 U_STABLE int32_t U_EXPORT2 |
| 326 ucol_tertiaryOrder (int32_t order); |
| 327 |
| 328 #endif /* #if !UCONFIG_NO_COLLATION */ |
| 329 |
| 330 #endif |
OLD | NEW |