OLD | NEW |
| (Empty) |
1 /* | |
2 ****************************************************************************** | |
3 * Copyright (C) 1997-2008, International Business Machines | |
4 * Corporation and others. All Rights Reserved. | |
5 ****************************************************************************** | |
6 */ | |
7 | |
8 /** | |
9 * \file | |
10 * \brief C++ API: Collation Element Iterator. | |
11 */ | |
12 | |
13 /** | |
14 * File coleitr.h | |
15 * | |
16 * | |
17 * | |
18 * Created by: Helena Shih | |
19 * | |
20 * Modification History: | |
21 * | |
22 * Date Name Description | |
23 * | |
24 * 8/18/97 helena Added internal API documentation. | |
25 * 08/03/98 erm Synched with 1.2 version CollationElementIterator.java | |
26 * 12/10/99 aliu Ported Thai collation support from Java. | |
27 * 01/25/01 swquek Modified into a C++ wrapper calling C APIs (ucoliter.h
) | |
28 * 02/19/01 swquek Removed CollationElementsIterator() since it is | |
29 * private constructor and no calls are made to it | |
30 */ | |
31 | |
32 #ifndef COLEITR_H | |
33 #define COLEITR_H | |
34 | |
35 #include "unicode/utypes.h" | |
36 | |
37 | |
38 #if !UCONFIG_NO_COLLATION | |
39 | |
40 #include "unicode/uobject.h" | |
41 #include "unicode/tblcoll.h" | |
42 #include "unicode/ucoleitr.h" | |
43 | |
44 /** | |
45 * The UCollationElements struct. | |
46 * For usage in C programs. | |
47 * @stable ICU 2.0 | |
48 */ | |
49 typedef struct UCollationElements UCollationElements; | |
50 | |
51 U_NAMESPACE_BEGIN | |
52 | |
53 /** | |
54 * The CollationElementIterator class is used as an iterator to walk through | |
55 * each character of an international string. Use the iterator to return the | |
56 * ordering priority of the positioned character. The ordering priority of a | |
57 * character, which we refer to as a key, defines how a character is collated in | |
58 * the given collation object. | |
59 * For example, consider the following in Spanish: | |
60 * <pre> | |
61 * "ca" -> the first key is key('c') and second key is key('a'). | |
62 * "cha" -> the first key is key('ch') and second key is key('a').</pre> | |
63 * And in German, | |
64 * <pre> \htmlonly "æb"-> the first key is key('a'), the second key
is key('e'), and | |
65 * the third key is key('b'). \endhtmlonly </pre> | |
66 * The key of a character, is an integer composed of primary order(short), | |
67 * secondary order(char), and tertiary order(char). Java strictly defines the | |
68 * size and signedness of its primitive data types. Therefore, the static | |
69 * functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return | |
70 * int32_t to ensure the correctness of the key value. | |
71 * <p>Example of the iterator usage: (without error checking) | |
72 * <pre> | |
73 * \code | |
74 * void CollationElementIterator_Example() | |
75 * { | |
76 * UnicodeString str = "This is a test"; | |
77 * UErrorCode success = U_ZERO_ERROR; | |
78 * RuleBasedCollator* rbc = | |
79 * (RuleBasedCollator*) RuleBasedCollator::createInstance(success); | |
80 * CollationElementIterator* c = | |
81 * rbc->createCollationElementIterator( str ); | |
82 * int32_t order = c->next(success); | |
83 * c->reset(); | |
84 * order = c->previous(success); | |
85 * delete c; | |
86 * delete rbc; | |
87 * } | |
88 * \endcode | |
89 * </pre> | |
90 * <p> | |
91 * CollationElementIterator::next returns the collation order of the next | |
92 * character based on the comparison level of the collator. | |
93 * CollationElementIterator::previous returns the collation order of the | |
94 * previous character based on the comparison level of the collator. | |
95 * The Collation Element Iterator moves only in one direction between calls to | |
96 * CollationElementIterator::reset. That is, CollationElementIterator::next() | |
97 * and CollationElementIterator::previous can not be inter-used. Whenever | |
98 * CollationElementIterator::previous is to be called after | |
99 * CollationElementIterator::next() or vice versa, | |
100 * CollationElementIterator::reset has to be called first to reset the status, | |
101 * shifting pointers to either the end or the start of the string. Hence at the | |
102 * next call of CollationElementIterator::previous or | |
103 * CollationElementIterator::next(), the first or last collation order will be | |
104 * returned. | |
105 * If a change of direction is done without a CollationElementIterator::reset(), | |
106 * the result is undefined. | |
107 * The result of a forward iterate (CollationElementIterator::next) and | |
108 * reversed result of the backward iterate (CollationElementIterator::previous) | |
109 * on the same string are equivalent, if collation orders with the value | |
110 * UCOL_IGNORABLE are ignored. | |
111 * Character based on the comparison level of the collator. A collation order | |
112 * consists of primary order, secondary order and tertiary order. The data | |
113 * type of the collation order is <strong>t_int32</strong>. | |
114 * | |
115 * Note, CollationElementIterator should not be subclassed. | |
116 * @see Collator | |
117 * @see RuleBasedCollator | |
118 * @version 1.8 Jan 16 2001 | |
119 */ | |
120 class U_I18N_API CollationElementIterator : public UObject { | |
121 public: | |
122 | |
123 // CollationElementIterator public data member -----------------------------
- | |
124 | |
125 enum { | |
126 /** | |
127 * NULLORDER indicates that an error has occured while processing | |
128 * @stable ICU 2.0 | |
129 */ | |
130 NULLORDER = (int32_t)0xffffffff | |
131 }; | |
132 | |
133 // CollationElementIterator public constructor/destructor ------------------
- | |
134 | |
135 /** | |
136 * Copy constructor. | |
137 * | |
138 * @param other the object to be copied from | |
139 * @stable ICU 2.0 | |
140 */ | |
141 CollationElementIterator(const CollationElementIterator& other); | |
142 | |
143 /** | |
144 * Destructor | |
145 * @stable ICU 2.0 | |
146 */ | |
147 virtual ~CollationElementIterator(); | |
148 | |
149 // CollationElementIterator public methods ---------------------------------
- | |
150 | |
151 /** | |
152 * Returns true if "other" is the same as "this" | |
153 * | |
154 * @param other the object to be compared | |
155 * @return true if "other" is the same as "this" | |
156 * @stable ICU 2.0 | |
157 */ | |
158 UBool operator==(const CollationElementIterator& other) const; | |
159 | |
160 /** | |
161 * Returns true if "other" is not the same as "this". | |
162 * | |
163 * @param other the object to be compared | |
164 * @return true if "other" is not the same as "this" | |
165 * @stable ICU 2.0 | |
166 */ | |
167 UBool operator!=(const CollationElementIterator& other) const; | |
168 | |
169 /** | |
170 * Resets the cursor to the beginning of the string. | |
171 * @stable ICU 2.0 | |
172 */ | |
173 void reset(void); | |
174 | |
175 /** | |
176 * Gets the ordering priority of the next character in the string. | |
177 * @param status the error code status. | |
178 * @return the next character's ordering. otherwise returns NULLORDER if an | |
179 * error has occured or if the end of string has been reached | |
180 * @stable ICU 2.0 | |
181 */ | |
182 int32_t next(UErrorCode& status); | |
183 | |
184 /** | |
185 * Get the ordering priority of the previous collation element in the string. | |
186 * @param status the error code status. | |
187 * @return the previous element's ordering. otherwise returns NULLORDER if an
| |
188 * error has occured or if the start of string has been reached | |
189 * @stable ICU 2.0 | |
190 */ | |
191 int32_t previous(UErrorCode& status); | |
192 | |
193 /** | |
194 * Gets the primary order of a collation order. | |
195 * @param order the collation order | |
196 * @return the primary order of a collation order. | |
197 * @stable ICU 2.0 | |
198 */ | |
199 static inline int32_t primaryOrder(int32_t order); | |
200 | |
201 /** | |
202 * Gets the secondary order of a collation order. | |
203 * @param order the collation order | |
204 * @return the secondary order of a collation order. | |
205 * @stable ICU 2.0 | |
206 */ | |
207 static inline int32_t secondaryOrder(int32_t order); | |
208 | |
209 /** | |
210 * Gets the tertiary order of a collation order. | |
211 * @param order the collation order | |
212 * @return the tertiary order of a collation order. | |
213 * @stable ICU 2.0 | |
214 */ | |
215 static inline int32_t tertiaryOrder(int32_t order); | |
216 | |
217 /** | |
218 * Return the maximum length of any expansion sequences that end with the | |
219 * specified comparison order. | |
220 * @param order a collation order returned by previous or next. | |
221 * @return maximum size of the expansion sequences ending with the collation | |
222 * element or 1 if collation element does not occur at the end of any
| |
223 * expansion sequence | |
224 * @stable ICU 2.0 | |
225 */ | |
226 int32_t getMaxExpansion(int32_t order) const; | |
227 | |
228 /** | |
229 * Gets the comparison order in the desired strength. Ignore the other | |
230 * differences. | |
231 * @param order The order value | |
232 * @stable ICU 2.0 | |
233 */ | |
234 int32_t strengthOrder(int32_t order) const; | |
235 | |
236 /** | |
237 * Sets the source string. | |
238 * @param str the source string. | |
239 * @param status the error code status. | |
240 * @stable ICU 2.0 | |
241 */ | |
242 void setText(const UnicodeString& str, UErrorCode& status); | |
243 | |
244 /** | |
245 * Sets the source string. | |
246 * @param str the source character iterator. | |
247 * @param status the error code status. | |
248 * @stable ICU 2.0 | |
249 */ | |
250 void setText(CharacterIterator& str, UErrorCode& status); | |
251 | |
252 /** | |
253 * Checks if a comparison order is ignorable. | |
254 * @param order the collation order. | |
255 * @return TRUE if a character is ignorable, FALSE otherwise. | |
256 * @stable ICU 2.0 | |
257 */ | |
258 static inline UBool isIgnorable(int32_t order); | |
259 | |
260 /** | |
261 * Gets the offset of the currently processed character in the source string. | |
262 * @return the offset of the character. | |
263 * @stable ICU 2.0 | |
264 */ | |
265 int32_t getOffset(void) const; | |
266 | |
267 /** | |
268 * Sets the offset of the currently processed character in the source string. | |
269 * @param newOffset the new offset. | |
270 * @param status the error code status. | |
271 * @return the offset of the character. | |
272 * @stable ICU 2.0 | |
273 */ | |
274 void setOffset(int32_t newOffset, UErrorCode& status); | |
275 | |
276 /** | |
277 * ICU "poor man's RTTI", returns a UClassID for the actual class. | |
278 * | |
279 * @stable ICU 2.2 | |
280 */ | |
281 virtual UClassID getDynamicClassID() const; | |
282 | |
283 /** | |
284 * ICU "poor man's RTTI", returns a UClassID for this class. | |
285 * | |
286 * @stable ICU 2.2 | |
287 */ | |
288 static UClassID U_EXPORT2 getStaticClassID(); | |
289 | |
290 protected: | |
291 | |
292 // CollationElementIterator protected constructors -------------------------
- | |
293 /** | |
294 * @stable ICU 2.0 | |
295 */ | |
296 friend class RuleBasedCollator; | |
297 | |
298 /** | |
299 * CollationElementIterator constructor. This takes the source string and the
| |
300 * collation object. The cursor will walk thru the source string based on the
| |
301 * predefined collation rules. If the source string is empty, NULLORDER will | |
302 * be returned on the calls to next(). | |
303 * @param sourceText the source string. | |
304 * @param order the collation object. | |
305 * @param status the error code status. | |
306 * @stable ICU 2.0 | |
307 */ | |
308 CollationElementIterator(const UnicodeString& sourceText, | |
309 const RuleBasedCollator* order, UErrorCode& status); | |
310 | |
311 /** | |
312 * CollationElementIterator constructor. This takes the source string and the
| |
313 * collation object. The cursor will walk thru the source string based on th
e | |
314 * predefined collation rules. If the source string is empty, NULLORDER will
| |
315 * be returned on the calls to next(). | |
316 * @param sourceText the source string. | |
317 * @param order the collation object. | |
318 * @param status the error code status. | |
319 * @stable ICU 2.0 | |
320 */ | |
321 CollationElementIterator(const CharacterIterator& sourceText, | |
322 const RuleBasedCollator* order, UErrorCode& status); | |
323 | |
324 // CollationElementIterator protected methods ------------------------------
- | |
325 | |
326 /** | |
327 * Assignment operator | |
328 * | |
329 * @param other the object to be copied | |
330 * @stable ICU 2.0 | |
331 */ | |
332 const CollationElementIterator& | |
333 operator=(const CollationElementIterator& other); | |
334 | |
335 private: | |
336 CollationElementIterator(); // default constructor not implemented | |
337 | |
338 // CollationElementIterator private data members ---------------------------
- | |
339 | |
340 /** | |
341 * Data wrapper for collation elements | |
342 */ | |
343 UCollationElements *m_data_; | |
344 | |
345 /** | |
346 * Indicates if m_data_ belongs to this object. | |
347 */ | |
348 UBool isDataOwned_; | |
349 | |
350 }; | |
351 | |
352 // CollationElementIterator inline method defination -------------------------- | |
353 | |
354 /** | |
355 * Get the primary order of a collation order. | |
356 * @param order the collation order | |
357 * @return the primary order of a collation order. | |
358 */ | |
359 inline int32_t CollationElementIterator::primaryOrder(int32_t order) | |
360 { | |
361 order &= RuleBasedCollator::PRIMARYORDERMASK; | |
362 return (order >> RuleBasedCollator::PRIMARYORDERSHIFT); | |
363 } | |
364 | |
365 /** | |
366 * Get the secondary order of a collation order. | |
367 * @param order the collation order | |
368 * @return the secondary order of a collation order. | |
369 */ | |
370 inline int32_t CollationElementIterator::secondaryOrder(int32_t order) | |
371 { | |
372 order = order & RuleBasedCollator::SECONDARYORDERMASK; | |
373 return (order >> RuleBasedCollator::SECONDARYORDERSHIFT); | |
374 } | |
375 | |
376 /** | |
377 * Get the tertiary order of a collation order. | |
378 * @param order the collation order | |
379 * @return the tertiary order of a collation order. | |
380 */ | |
381 inline int32_t CollationElementIterator::tertiaryOrder(int32_t order) | |
382 { | |
383 return (order &= RuleBasedCollator::TERTIARYORDERMASK); | |
384 } | |
385 | |
386 inline int32_t CollationElementIterator::getMaxExpansion(int32_t order) const | |
387 { | |
388 return ucol_getMaxExpansion(m_data_, (uint32_t)order); | |
389 } | |
390 | |
391 inline UBool CollationElementIterator::isIgnorable(int32_t order) | |
392 { | |
393 return (primaryOrder(order) == RuleBasedCollator::PRIMIGNORABLE); | |
394 } | |
395 | |
396 U_NAMESPACE_END | |
397 | |
398 #endif /* #if !UCONFIG_NO_COLLATION */ | |
399 | |
400 #endif | |
OLD | NEW |