OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * Copyright (C) 2012-2014, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ******************************************************************************* |
| 6 * uitercollationiterator.h |
| 7 * |
| 8 * created on: 2012sep23 (from utf16collationiterator.h) |
| 9 * created by: Markus W. Scherer |
| 10 */ |
| 11 |
| 12 #ifndef __UITERCOLLATIONITERATOR_H__ |
| 13 #define __UITERCOLLATIONITERATOR_H__ |
| 14 |
| 15 #include "unicode/utypes.h" |
| 16 |
| 17 #if !UCONFIG_NO_COLLATION |
| 18 |
| 19 #include "unicode/uiter.h" |
| 20 #include "cmemory.h" |
| 21 #include "collation.h" |
| 22 #include "collationdata.h" |
| 23 #include "normalizer2impl.h" |
| 24 |
| 25 U_NAMESPACE_BEGIN |
| 26 |
| 27 /** |
| 28 * UCharIterator-based collation element and character iterator. |
| 29 * Handles normalized text inline, with length or NUL-terminated. |
| 30 * Unnormalized text is handled by a subclass. |
| 31 */ |
| 32 class U_I18N_API UIterCollationIterator : public CollationIterator { |
| 33 public: |
| 34 UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator
&ui) |
| 35 : CollationIterator(d, numeric), iter(ui) {} |
| 36 |
| 37 virtual ~UIterCollationIterator(); |
| 38 |
| 39 virtual void resetToOffset(int32_t newOffset); |
| 40 |
| 41 virtual int32_t getOffset() const; |
| 42 |
| 43 virtual UChar32 nextCodePoint(UErrorCode &errorCode); |
| 44 |
| 45 virtual UChar32 previousCodePoint(UErrorCode &errorCode); |
| 46 |
| 47 protected: |
| 48 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); |
| 49 |
| 50 virtual UChar handleGetTrailSurrogate(); |
| 51 |
| 52 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); |
| 53 |
| 54 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); |
| 55 |
| 56 UCharIterator &iter; |
| 57 }; |
| 58 |
| 59 /** |
| 60 * Incrementally checks the input text for FCD and normalizes where necessary. |
| 61 */ |
| 62 class U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator { |
| 63 public: |
| 64 FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIte
rator &ui, int32_t startIndex) |
| 65 : UIterCollationIterator(data, numeric, ui), |
| 66 state(ITER_CHECK_FWD), start(startIndex), |
| 67 nfcImpl(data->nfcImpl) {} |
| 68 |
| 69 virtual ~FCDUIterCollationIterator(); |
| 70 |
| 71 virtual void resetToOffset(int32_t newOffset); |
| 72 |
| 73 virtual int32_t getOffset() const; |
| 74 |
| 75 virtual UChar32 nextCodePoint(UErrorCode &errorCode); |
| 76 |
| 77 virtual UChar32 previousCodePoint(UErrorCode &errorCode); |
| 78 |
| 79 protected: |
| 80 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); |
| 81 |
| 82 virtual UChar handleGetTrailSurrogate(); |
| 83 |
| 84 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); |
| 85 |
| 86 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); |
| 87 |
| 88 private: |
| 89 /** |
| 90 * Switches to forward checking if possible. |
| 91 */ |
| 92 void switchToForward(); |
| 93 |
| 94 /** |
| 95 * Extends the FCD text segment forward or normalizes around pos. |
| 96 * @return TRUE if success |
| 97 */ |
| 98 UBool nextSegment(UErrorCode &errorCode); |
| 99 |
| 100 /** |
| 101 * Switches to backward checking. |
| 102 */ |
| 103 void switchToBackward(); |
| 104 |
| 105 /** |
| 106 * Extends the FCD text segment backward or normalizes around pos. |
| 107 * @return TRUE if success |
| 108 */ |
| 109 UBool previousSegment(UErrorCode &errorCode); |
| 110 |
| 111 UBool normalize(const UnicodeString &s, UErrorCode &errorCode); |
| 112 |
| 113 enum State { |
| 114 /** |
| 115 * The input text [start..(iter index)[ passes the FCD check. |
| 116 * Moving forward checks incrementally. |
| 117 * pos & limit are undefined. |
| 118 */ |
| 119 ITER_CHECK_FWD, |
| 120 /** |
| 121 * The input text [(iter index)..limit[ passes the FCD check. |
| 122 * Moving backward checks incrementally. |
| 123 * start & pos are undefined. |
| 124 */ |
| 125 ITER_CHECK_BWD, |
| 126 /** |
| 127 * The input text [start..limit[ passes the FCD check. |
| 128 * pos tracks the current text index. |
| 129 */ |
| 130 ITER_IN_FCD_SEGMENT, |
| 131 /** |
| 132 * The input text [start..limit[ failed the FCD check and was normalized
. |
| 133 * pos tracks the current index in the normalized string. |
| 134 * The text iterator is at the limit index. |
| 135 */ |
| 136 IN_NORM_ITER_AT_LIMIT, |
| 137 /** |
| 138 * The input text [start..limit[ failed the FCD check and was normalized
. |
| 139 * pos tracks the current index in the normalized string. |
| 140 * The text iterator is at the start index. |
| 141 */ |
| 142 IN_NORM_ITER_AT_START |
| 143 }; |
| 144 |
| 145 State state; |
| 146 |
| 147 int32_t start; |
| 148 int32_t pos; |
| 149 int32_t limit; |
| 150 |
| 151 const Normalizer2Impl &nfcImpl; |
| 152 UnicodeString normalized; |
| 153 }; |
| 154 |
| 155 U_NAMESPACE_END |
| 156 |
| 157 #endif // !UCONFIG_NO_COLLATION |
| 158 #endif // __UITERCOLLATIONITERATOR_H__ |
OLD | NEW |