Index: source/i18n/unicode/coleitr.h |
diff --git a/source/i18n/unicode/coleitr.h b/source/i18n/unicode/coleitr.h |
index b7d95740c12a69ee99e2de82fc5c935f3ccceab3..8c5d0e94f58d4328f21dd331015a69c30dcf9406 100644 |
--- a/source/i18n/unicode/coleitr.h |
+++ b/source/i18n/unicode/coleitr.h |
@@ -1,6 +1,6 @@ |
/* |
****************************************************************************** |
- * Copyright (C) 1997-2013, International Business Machines |
+ * Copyright (C) 1997-2014, International Business Machines |
* Corporation and others. All Rights Reserved. |
****************************************************************************** |
*/ |
@@ -13,8 +13,6 @@ |
/** |
* File coleitr.h |
* |
-* |
-* |
* Created by: Helena Shih |
* |
* Modification History: |
@@ -27,6 +25,7 @@ |
* 01/25/01 swquek Modified into a C++ wrapper calling C APIs (ucoliter.h) |
* 02/19/01 swquek Removed CollationElementsIterator() since it is |
* private constructor and no calls are made to it |
+* 2012-2014 markus Rewritten in C++ again. |
*/ |
#ifndef COLEITR_H |
@@ -34,33 +33,34 @@ |
#include "unicode/utypes.h" |
- |
#if !UCONFIG_NO_COLLATION |
+#include "unicode/unistr.h" |
#include "unicode/uobject.h" |
-#include "unicode/tblcoll.h" |
-#include "unicode/ucoleitr.h" |
-/** |
- * The UCollationElements struct. |
- * For usage in C programs. |
- * @stable ICU 2.0 |
- */ |
-typedef struct UCollationElements UCollationElements; |
+struct UCollationElements; |
+struct UHashtable; |
U_NAMESPACE_BEGIN |
+struct CollationData; |
+ |
+class CollationIterator; |
+class RuleBasedCollator; |
+class UCollationPCE; |
+class UVector32; |
+ |
/** |
* The CollationElementIterator class is used as an iterator to walk through |
* each character of an international string. Use the iterator to return the |
* ordering priority of the positioned character. The ordering priority of a |
* character, which we refer to as a key, defines how a character is collated in |
* the given collation object. |
-* For example, consider the following in Spanish: |
+* For example, consider the following in Slovak and in traditional Spanish collation: |
* <pre> |
* "ca" -> the first key is key('c') and second key is key('a'). |
* "cha" -> the first key is key('ch') and second key is key('a').</pre> |
-* And in German, |
+* And in German phonebook collation, |
* <pre> \htmlonly "æb"-> the first key is key('a'), the second key is key('e'), and |
* the third key is key('b'). \endhtmlonly </pre> |
* The key of a character, is an integer composed of primary order(short), |
@@ -103,17 +103,17 @@ U_NAMESPACE_BEGIN |
* <p> |
* The result of a forward iterate (next()) and reversed result of the backward |
* iterate (previous()) on the same string are equivalent, if collation orders |
-* with the value UCOL_IGNORABLE are ignored. |
+* with the value 0 are ignored. |
* Character based on the comparison level of the collator. A collation order |
* consists of primary order, secondary order and tertiary order. The data |
-* type of the collation order is <strong>t_int32</strong>. |
+* type of the collation order is <strong>int32_t</strong>. |
* |
* Note, CollationElementIterator should not be subclassed. |
* @see Collator |
* @see RuleBasedCollator |
* @version 1.8 Jan 16 2001 |
*/ |
-class U_I18N_API CollationElementIterator : public UObject { |
+class U_I18N_API CollationElementIterator U_FINAL : public UObject { |
public: |
// CollationElementIterator public data member ------------------------------ |
@@ -283,8 +283,28 @@ public: |
*/ |
static UClassID U_EXPORT2 getStaticClassID(); |
+#ifndef U_HIDE_INTERNAL_API |
+ /** @internal */ |
+ static inline CollationElementIterator *fromUCollationElements(UCollationElements *uc) { |
+ return reinterpret_cast<CollationElementIterator *>(uc); |
+ } |
+ /** @internal */ |
+ static inline const CollationElementIterator *fromUCollationElements(const UCollationElements *uc) { |
+ return reinterpret_cast<const CollationElementIterator *>(uc); |
+ } |
+ /** @internal */ |
+ inline UCollationElements *toUCollationElements() { |
+ return reinterpret_cast<UCollationElements *>(this); |
+ } |
+ /** @internal */ |
+ inline const UCollationElements *toUCollationElements() const { |
+ return reinterpret_cast<const UCollationElements *>(this); |
+ } |
+#endif // U_HIDE_INTERNAL_API |
+ |
private: |
friend class RuleBasedCollator; |
+ friend class UCollationPCE; |
/** |
* CollationElementIterator constructor. This takes the source string and the |
@@ -297,6 +317,14 @@ private: |
*/ |
CollationElementIterator(const UnicodeString& sourceText, |
const RuleBasedCollator* order, UErrorCode& status); |
+ // Note: The constructors should take settings & tailoring, not a collator, |
+ // to avoid circular dependencies. |
+ // However, for operator==() we would need to be able to compare tailoring data for equality |
+ // without making CollationData or CollationTailoring depend on TailoredSet. |
+ // (See the implementation of RuleBasedCollator::operator==().) |
+ // That might require creating an intermediate class that would be used |
+ // by both CollationElementIterator and RuleBasedCollator |
+ // but only contain the part of RBC== related to data and rules. |
/** |
* CollationElementIterator constructor. This takes the source string and the |
@@ -320,61 +348,53 @@ private: |
CollationElementIterator(); // default constructor not implemented |
+ /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()). */ |
+ inline int8_t normalizeDir() const { return dir_ == 1 ? 0 : dir_; } |
+ |
+ static UHashtable *computeMaxExpansions(const CollationData *data, UErrorCode &errorCode); |
+ |
+ static int32_t getMaxExpansion(const UHashtable *maxExpansions, int32_t order); |
+ |
// CollationElementIterator private data members ---------------------------- |
+ CollationIterator *iter_; // owned |
+ const RuleBasedCollator *rbc_; // aliased |
+ uint32_t otherHalf_; |
/** |
- * Data wrapper for collation elements |
- */ |
- UCollationElements *m_data_; |
- |
+ * <0: backwards; 0: just after reset() (previous() begins from end); |
+ * 1: just after setOffset(); >1: forward |
+ */ |
+ int8_t dir_; |
/** |
- * Indicates if m_data_ belongs to this object. |
- */ |
- UBool isDataOwned_; |
+ * Stores offsets from expansions and from unsafe-backwards iteration, |
+ * so that getOffset() returns intermediate offsets for the CEs |
+ * that are consistent with forward iteration. |
+ */ |
+ UVector32 *offsets_; |
+ |
+ UnicodeString string_; |
}; |
-// CollationElementIterator inline method defination -------------------------- |
+// CollationElementIterator inline method definitions -------------------------- |
-/** |
-* Get the primary order of a collation order. |
-* @param order the collation order |
-* @return the primary order of a collation order. |
-*/ |
inline int32_t CollationElementIterator::primaryOrder(int32_t order) |
{ |
- order &= RuleBasedCollator::PRIMARYORDERMASK; |
- return (order >> RuleBasedCollator::PRIMARYORDERSHIFT); |
+ return (order >> 16) & 0xffff; |
} |
-/** |
-* Get the secondary order of a collation order. |
-* @param order the collation order |
-* @return the secondary order of a collation order. |
-*/ |
inline int32_t CollationElementIterator::secondaryOrder(int32_t order) |
{ |
- order = order & RuleBasedCollator::SECONDARYORDERMASK; |
- return (order >> RuleBasedCollator::SECONDARYORDERSHIFT); |
+ return (order >> 8) & 0xff; |
} |
-/** |
-* Get the tertiary order of a collation order. |
-* @param order the collation order |
-* @return the tertiary order of a collation order. |
-*/ |
inline int32_t CollationElementIterator::tertiaryOrder(int32_t order) |
{ |
- return (order &= RuleBasedCollator::TERTIARYORDERMASK); |
-} |
- |
-inline int32_t CollationElementIterator::getMaxExpansion(int32_t order) const |
-{ |
- return ucol_getMaxExpansion(m_data_, (uint32_t)order); |
+ return order & 0xff; |
} |
inline UBool CollationElementIterator::isIgnorable(int32_t order) |
{ |
- return (primaryOrder(order) == RuleBasedCollator::PRIMIGNORABLE); |
+ return (order & 0xffff0000) == 0; |
} |
U_NAMESPACE_END |