OLD | NEW |
1 /* | 1 /* |
2 ****************************************************************************** | 2 ****************************************************************************** |
3 * Copyright (C) 1997-2013, International Business Machines | 3 * Copyright (C) 1997-2014, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ****************************************************************************** | 5 ****************************************************************************** |
6 */ | 6 */ |
7 | 7 |
8 /** | 8 /** |
9 * \file | 9 * \file |
10 * \brief C++ API: Collation Element Iterator. | 10 * \brief C++ API: Collation Element Iterator. |
11 */ | 11 */ |
12 | 12 |
13 /** | 13 /** |
14 * File coleitr.h | 14 * File coleitr.h |
15 * | 15 * |
16 * | |
17 * | |
18 * Created by: Helena Shih | 16 * Created by: Helena Shih |
19 * | 17 * |
20 * Modification History: | 18 * Modification History: |
21 * | 19 * |
22 * Date Name Description | 20 * Date Name Description |
23 * | 21 * |
24 * 8/18/97 helena Added internal API documentation. | 22 * 8/18/97 helena Added internal API documentation. |
25 * 08/03/98 erm Synched with 1.2 version CollationElementIterator.java | 23 * 08/03/98 erm Synched with 1.2 version CollationElementIterator.java |
26 * 12/10/99 aliu Ported Thai collation support from Java. | 24 * 12/10/99 aliu Ported Thai collation support from Java. |
27 * 01/25/01 swquek Modified into a C++ wrapper calling C APIs (ucoliter.h
) | 25 * 01/25/01 swquek Modified into a C++ wrapper calling C APIs (ucoliter.h
) |
28 * 02/19/01 swquek Removed CollationElementsIterator() since it is | 26 * 02/19/01 swquek Removed CollationElementsIterator() since it is |
29 * private constructor and no calls are made to it | 27 * private constructor and no calls are made to it |
| 28 * 2012-2014 markus Rewritten in C++ again. |
30 */ | 29 */ |
31 | 30 |
32 #ifndef COLEITR_H | 31 #ifndef COLEITR_H |
33 #define COLEITR_H | 32 #define COLEITR_H |
34 | 33 |
35 #include "unicode/utypes.h" | 34 #include "unicode/utypes.h" |
36 | 35 |
37 | |
38 #if !UCONFIG_NO_COLLATION | 36 #if !UCONFIG_NO_COLLATION |
39 | 37 |
| 38 #include "unicode/unistr.h" |
40 #include "unicode/uobject.h" | 39 #include "unicode/uobject.h" |
41 #include "unicode/tblcoll.h" | |
42 #include "unicode/ucoleitr.h" | |
43 | 40 |
44 /** | 41 struct UCollationElements; |
45 * The UCollationElements struct. | 42 struct UHashtable; |
46 * For usage in C programs. | |
47 * @stable ICU 2.0 | |
48 */ | |
49 typedef struct UCollationElements UCollationElements; | |
50 | 43 |
51 U_NAMESPACE_BEGIN | 44 U_NAMESPACE_BEGIN |
52 | 45 |
| 46 struct CollationData; |
| 47 |
| 48 class CollationIterator; |
| 49 class RuleBasedCollator; |
| 50 class UCollationPCE; |
| 51 class UVector32; |
| 52 |
53 /** | 53 /** |
54 * The CollationElementIterator class is used as an iterator to walk through | 54 * The CollationElementIterator class is used as an iterator to walk through |
55 * each character of an international string. Use the iterator to return the | 55 * each character of an international string. Use the iterator to return the |
56 * ordering priority of the positioned character. The ordering priority of a | 56 * ordering priority of the positioned character. The ordering priority of a |
57 * character, which we refer to as a key, defines how a character is collated in | 57 * character, which we refer to as a key, defines how a character is collated in |
58 * the given collation object. | 58 * the given collation object. |
59 * For example, consider the following in Spanish: | 59 * For example, consider the following in Slovak and in traditional Spanish colla
tion: |
60 * <pre> | 60 * <pre> |
61 * "ca" -> the first key is key('c') and second key is key('a'). | 61 * "ca" -> the first key is key('c') and second key is key('a'). |
62 * "cha" -> the first key is key('ch') and second key is key('a').</pre> | 62 * "cha" -> the first key is key('ch') and second key is key('a').</pre> |
63 * And in German, | 63 * And in German phonebook collation, |
64 * <pre> \htmlonly "æb"-> the first key is key('a'), the second key
is key('e'), and | 64 * <pre> \htmlonly "æb"-> the first key is key('a'), the second key
is key('e'), and |
65 * the third key is key('b'). \endhtmlonly </pre> | 65 * the third key is key('b'). \endhtmlonly </pre> |
66 * The key of a character, is an integer composed of primary order(short), | 66 * The key of a character, is an integer composed of primary order(short), |
67 * secondary order(char), and tertiary order(char). Java strictly defines the | 67 * secondary order(char), and tertiary order(char). Java strictly defines the |
68 * size and signedness of its primitive data types. Therefore, the static | 68 * size and signedness of its primitive data types. Therefore, the static |
69 * functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return | 69 * functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return |
70 * int32_t to ensure the correctness of the key value. | 70 * int32_t to ensure the correctness of the key value. |
71 * <p>Example of the iterator usage: (without error checking) | 71 * <p>Example of the iterator usage: (without error checking) |
72 * <pre> | 72 * <pre> |
73 * \code | 73 * \code |
(...skipping 22 matching lines...) Expand all Loading... |
96 * and previous() can not be inter-used. Whenever previous() is to be called afte
r | 96 * and previous() can not be inter-used. Whenever previous() is to be called afte
r |
97 * next() or vice versa, reset(), setOffset() or setText() has to be called first | 97 * next() or vice versa, reset(), setOffset() or setText() has to be called first |
98 * to reset the status, shifting pointers to either the end or the start of | 98 * to reset the status, shifting pointers to either the end or the start of |
99 * the string (reset() or setText()), or the specified position (setOffset()). | 99 * the string (reset() or setText()), or the specified position (setOffset()). |
100 * Hence at the next call of next() or previous(), the first or last collation or
der, | 100 * Hence at the next call of next() or previous(), the first or last collation or
der, |
101 * or collation order at the spefcifieid position will be returned. If a change o
f | 101 * or collation order at the spefcifieid position will be returned. If a change o
f |
102 * direction is done without one of these calls, the result is undefined. | 102 * direction is done without one of these calls, the result is undefined. |
103 * <p> | 103 * <p> |
104 * The result of a forward iterate (next()) and reversed result of the backward | 104 * The result of a forward iterate (next()) and reversed result of the backward |
105 * iterate (previous()) on the same string are equivalent, if collation orders | 105 * iterate (previous()) on the same string are equivalent, if collation orders |
106 * with the value UCOL_IGNORABLE are ignored. | 106 * with the value 0 are ignored. |
107 * Character based on the comparison level of the collator. A collation order | 107 * Character based on the comparison level of the collator. A collation order |
108 * consists of primary order, secondary order and tertiary order. The data | 108 * consists of primary order, secondary order and tertiary order. The data |
109 * type of the collation order is <strong>t_int32</strong>. | 109 * type of the collation order is <strong>int32_t</strong>. |
110 * | 110 * |
111 * Note, CollationElementIterator should not be subclassed. | 111 * Note, CollationElementIterator should not be subclassed. |
112 * @see Collator | 112 * @see Collator |
113 * @see RuleBasedCollator | 113 * @see RuleBasedCollator |
114 * @version 1.8 Jan 16 2001 | 114 * @version 1.8 Jan 16 2001 |
115 */ | 115 */ |
116 class U_I18N_API CollationElementIterator : public UObject { | 116 class U_I18N_API CollationElementIterator U_FINAL : public UObject { |
117 public: | 117 public: |
118 | 118 |
119 // CollationElementIterator public data member -----------------------------
- | 119 // CollationElementIterator public data member -----------------------------
- |
120 | 120 |
121 enum { | 121 enum { |
122 /** | 122 /** |
123 * NULLORDER indicates that an error has occured while processing | 123 * NULLORDER indicates that an error has occured while processing |
124 * @stable ICU 2.0 | 124 * @stable ICU 2.0 |
125 */ | 125 */ |
126 NULLORDER = (int32_t)0xffffffff | 126 NULLORDER = (int32_t)0xffffffff |
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
276 */ | 276 */ |
277 virtual UClassID getDynamicClassID() const; | 277 virtual UClassID getDynamicClassID() const; |
278 | 278 |
279 /** | 279 /** |
280 * ICU "poor man's RTTI", returns a UClassID for this class. | 280 * ICU "poor man's RTTI", returns a UClassID for this class. |
281 * | 281 * |
282 * @stable ICU 2.2 | 282 * @stable ICU 2.2 |
283 */ | 283 */ |
284 static UClassID U_EXPORT2 getStaticClassID(); | 284 static UClassID U_EXPORT2 getStaticClassID(); |
285 | 285 |
| 286 #ifndef U_HIDE_INTERNAL_API |
| 287 /** @internal */ |
| 288 static inline CollationElementIterator *fromUCollationElements(UCollationEle
ments *uc) { |
| 289 return reinterpret_cast<CollationElementIterator *>(uc); |
| 290 } |
| 291 /** @internal */ |
| 292 static inline const CollationElementIterator *fromUCollationElements(const U
CollationElements *uc) { |
| 293 return reinterpret_cast<const CollationElementIterator *>(uc); |
| 294 } |
| 295 /** @internal */ |
| 296 inline UCollationElements *toUCollationElements() { |
| 297 return reinterpret_cast<UCollationElements *>(this); |
| 298 } |
| 299 /** @internal */ |
| 300 inline const UCollationElements *toUCollationElements() const { |
| 301 return reinterpret_cast<const UCollationElements *>(this); |
| 302 } |
| 303 #endif // U_HIDE_INTERNAL_API |
| 304 |
286 private: | 305 private: |
287 friend class RuleBasedCollator; | 306 friend class RuleBasedCollator; |
| 307 friend class UCollationPCE; |
288 | 308 |
289 /** | 309 /** |
290 * CollationElementIterator constructor. This takes the source string and the
| 310 * CollationElementIterator constructor. This takes the source string and the
|
291 * collation object. The cursor will walk thru the source string based on the
| 311 * collation object. The cursor will walk thru the source string based on the
|
292 * predefined collation rules. If the source string is empty, NULLORDER will | 312 * predefined collation rules. If the source string is empty, NULLORDER will |
293 * be returned on the calls to next(). | 313 * be returned on the calls to next(). |
294 * @param sourceText the source string. | 314 * @param sourceText the source string. |
295 * @param order the collation object. | 315 * @param order the collation object. |
296 * @param status the error code status. | 316 * @param status the error code status. |
297 */ | 317 */ |
298 CollationElementIterator(const UnicodeString& sourceText, | 318 CollationElementIterator(const UnicodeString& sourceText, |
299 const RuleBasedCollator* order, UErrorCode& status); | 319 const RuleBasedCollator* order, UErrorCode& status); |
| 320 // Note: The constructors should take settings & tailoring, not a collator, |
| 321 // to avoid circular dependencies. |
| 322 // However, for operator==() we would need to be able to compare tailoring d
ata for equality |
| 323 // without making CollationData or CollationTailoring depend on TailoredSet. |
| 324 // (See the implementation of RuleBasedCollator::operator==().) |
| 325 // That might require creating an intermediate class that would be used |
| 326 // by both CollationElementIterator and RuleBasedCollator |
| 327 // but only contain the part of RBC== related to data and rules. |
300 | 328 |
301 /** | 329 /** |
302 * CollationElementIterator constructor. This takes the source string and the
| 330 * CollationElementIterator constructor. This takes the source string and the
|
303 * collation object. The cursor will walk thru the source string based on th
e | 331 * collation object. The cursor will walk thru the source string based on th
e |
304 * predefined collation rules. If the source string is empty, NULLORDER will
| 332 * predefined collation rules. If the source string is empty, NULLORDER will
|
305 * be returned on the calls to next(). | 333 * be returned on the calls to next(). |
306 * @param sourceText the source string. | 334 * @param sourceText the source string. |
307 * @param order the collation object. | 335 * @param order the collation object. |
308 * @param status the error code status. | 336 * @param status the error code status. |
309 */ | 337 */ |
310 CollationElementIterator(const CharacterIterator& sourceText, | 338 CollationElementIterator(const CharacterIterator& sourceText, |
311 const RuleBasedCollator* order, UErrorCode& status); | 339 const RuleBasedCollator* order, UErrorCode& status); |
312 | 340 |
313 /** | 341 /** |
314 * Assignment operator | 342 * Assignment operator |
315 * | 343 * |
316 * @param other the object to be copied | 344 * @param other the object to be copied |
317 */ | 345 */ |
318 const CollationElementIterator& | 346 const CollationElementIterator& |
319 operator=(const CollationElementIterator& other); | 347 operator=(const CollationElementIterator& other); |
320 | 348 |
321 CollationElementIterator(); // default constructor not implemented | 349 CollationElementIterator(); // default constructor not implemented |
322 | 350 |
| 351 /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()
). */ |
| 352 inline int8_t normalizeDir() const { return dir_ == 1 ? 0 : dir_; } |
| 353 |
| 354 static UHashtable *computeMaxExpansions(const CollationData *data, UErrorCod
e &errorCode); |
| 355 |
| 356 static int32_t getMaxExpansion(const UHashtable *maxExpansions, int32_t orde
r); |
| 357 |
323 // CollationElementIterator private data members ---------------------------
- | 358 // CollationElementIterator private data members ---------------------------
- |
324 | 359 |
| 360 CollationIterator *iter_; // owned |
| 361 const RuleBasedCollator *rbc_; // aliased |
| 362 uint32_t otherHalf_; |
325 /** | 363 /** |
326 * Data wrapper for collation elements | 364 * <0: backwards; 0: just after reset() (previous() begins from end); |
327 */ | 365 * 1: just after setOffset(); >1: forward |
328 UCollationElements *m_data_; | 366 */ |
| 367 int8_t dir_; |
| 368 /** |
| 369 * Stores offsets from expansions and from unsafe-backwards iteration, |
| 370 * so that getOffset() returns intermediate offsets for the CEs |
| 371 * that are consistent with forward iteration. |
| 372 */ |
| 373 UVector32 *offsets_; |
329 | 374 |
330 /** | 375 UnicodeString string_; |
331 * Indicates if m_data_ belongs to this object. | |
332 */ | |
333 UBool isDataOwned_; | |
334 }; | 376 }; |
335 | 377 |
336 // CollationElementIterator inline method defination -------------------------- | 378 // CollationElementIterator inline method definitions -------------------------- |
337 | 379 |
338 /** | |
339 * Get the primary order of a collation order. | |
340 * @param order the collation order | |
341 * @return the primary order of a collation order. | |
342 */ | |
343 inline int32_t CollationElementIterator::primaryOrder(int32_t order) | 380 inline int32_t CollationElementIterator::primaryOrder(int32_t order) |
344 { | 381 { |
345 order &= RuleBasedCollator::PRIMARYORDERMASK; | 382 return (order >> 16) & 0xffff; |
346 return (order >> RuleBasedCollator::PRIMARYORDERSHIFT); | |
347 } | 383 } |
348 | 384 |
349 /** | |
350 * Get the secondary order of a collation order. | |
351 * @param order the collation order | |
352 * @return the secondary order of a collation order. | |
353 */ | |
354 inline int32_t CollationElementIterator::secondaryOrder(int32_t order) | 385 inline int32_t CollationElementIterator::secondaryOrder(int32_t order) |
355 { | 386 { |
356 order = order & RuleBasedCollator::SECONDARYORDERMASK; | 387 return (order >> 8) & 0xff; |
357 return (order >> RuleBasedCollator::SECONDARYORDERSHIFT); | |
358 } | 388 } |
359 | 389 |
360 /** | |
361 * Get the tertiary order of a collation order. | |
362 * @param order the collation order | |
363 * @return the tertiary order of a collation order. | |
364 */ | |
365 inline int32_t CollationElementIterator::tertiaryOrder(int32_t order) | 390 inline int32_t CollationElementIterator::tertiaryOrder(int32_t order) |
366 { | 391 { |
367 return (order &= RuleBasedCollator::TERTIARYORDERMASK); | 392 return order & 0xff; |
368 } | |
369 | |
370 inline int32_t CollationElementIterator::getMaxExpansion(int32_t order) const | |
371 { | |
372 return ucol_getMaxExpansion(m_data_, (uint32_t)order); | |
373 } | 393 } |
374 | 394 |
375 inline UBool CollationElementIterator::isIgnorable(int32_t order) | 395 inline UBool CollationElementIterator::isIgnorable(int32_t order) |
376 { | 396 { |
377 return (primaryOrder(order) == RuleBasedCollator::PRIMIGNORABLE); | 397 return (order & 0xffff0000) == 0; |
378 } | 398 } |
379 | 399 |
380 U_NAMESPACE_END | 400 U_NAMESPACE_END |
381 | 401 |
382 #endif /* #if !UCONFIG_NO_COLLATION */ | 402 #endif /* #if !UCONFIG_NO_COLLATION */ |
383 | 403 |
384 #endif | 404 #endif |
OLD | NEW |