OLD | NEW |
1 /* | 1 /* |
2 ********************************************************************** | 2 ********************************************************************** |
3 * Copyright (C) 2001-2011 IBM and others. All rights reserved. | 3 * Copyright (C) 2001-2014 IBM and others. All rights reserved. |
4 ********************************************************************** | 4 ********************************************************************** |
5 * Date Name Description | 5 * Date Name Description |
6 * 08/13/2001 synwee Creation. | 6 * 08/13/2001 synwee Creation. |
7 ********************************************************************** | 7 ********************************************************************** |
8 */ | 8 */ |
9 #ifndef USRCHIMP_H | 9 #ifndef USRCHIMP_H |
10 #define USRCHIMP_H | 10 #define USRCHIMP_H |
11 | 11 |
12 #include "unicode/utypes.h" | 12 #include "unicode/utypes.h" |
13 | 13 |
14 #if !UCONFIG_NO_COLLATION | 14 #if !UCONFIG_NO_COLLATION |
15 | 15 |
16 #include "unicode/normalizer2.h" | 16 #include "unicode/normalizer2.h" |
17 #include "unicode/ucol.h" | 17 #include "unicode/ucol.h" |
18 #include "unicode/ucoleitr.h" | 18 #include "unicode/ucoleitr.h" |
19 #include "unicode/ubrk.h" | 19 #include "unicode/ubrk.h" |
20 | 20 |
| 21 /* mask off anything but primary order */ |
| 22 #define UCOL_PRIMARYORDERMASK 0xffff0000 |
| 23 /* mask off anything but secondary order */ |
| 24 #define UCOL_SECONDARYORDERMASK 0x0000ff00 |
| 25 /* mask off anything but tertiary order */ |
| 26 #define UCOL_TERTIARYORDERMASK 0x000000ff |
| 27 /* primary order shift */ |
| 28 #define UCOL_PRIMARYORDERSHIFT 16 |
| 29 /* secondary order shift */ |
| 30 #define UCOL_SECONDARYORDERSHIFT 8 |
| 31 |
| 32 #define UCOL_IGNORABLE 0 |
| 33 |
| 34 /* get weights from a CE */ |
| 35 #define UCOL_PRIMARYORDER(order) (((order) >> 16) & 0xffff) |
| 36 #define UCOL_SECONDARYORDER(order) (((order) & UCOL_SECONDARYORDERMASK)>> UCOL_S
ECONDARYORDERSHIFT) |
| 37 #define UCOL_TERTIARYORDER(order) ((order) & UCOL_TERTIARYORDERMASK) |
| 38 |
| 39 #define UCOL_CONTINUATION_MARKER 0xC0 |
| 40 |
| 41 #define isContinuation(CE) (((CE) & UCOL_CONTINUATION_MARKER) == UCOL_CONTINUATI
ON_MARKER) |
| 42 |
| 43 /** |
| 44 * This indicates an error has occured during processing or there are no more CE
s |
| 45 * to be returned. |
| 46 */ |
| 47 #define UCOL_PROCESSED_NULLORDER ((int64_t)U_INT64_MAX) |
| 48 |
| 49 U_NAMESPACE_BEGIN |
| 50 |
| 51 class CollationElementIterator; |
| 52 class Collator; |
| 53 |
| 54 struct PCEI |
| 55 { |
| 56 uint64_t ce; |
| 57 int32_t low; |
| 58 int32_t high; |
| 59 }; |
| 60 |
| 61 struct PCEBuffer |
| 62 { |
| 63 PCEI defaultBuffer[16]; |
| 64 PCEI *buffer; |
| 65 int32_t bufferIndex; |
| 66 int32_t bufferSize; |
| 67 |
| 68 PCEBuffer(); |
| 69 ~PCEBuffer(); |
| 70 |
| 71 void reset(); |
| 72 UBool empty() const; |
| 73 void put(uint64_t ce, int32_t ixLow, int32_t ixHigh); |
| 74 const PCEI *get(); |
| 75 }; |
| 76 |
| 77 class UCollationPCE : public UMemory { |
| 78 private: |
| 79 PCEBuffer pceBuffer; |
| 80 CollationElementIterator *cei; |
| 81 UCollationStrength strength; |
| 82 UBool toShift; |
| 83 UBool isShifted; |
| 84 uint32_t variableTop; |
| 85 |
| 86 public: |
| 87 UCollationPCE(UCollationElements *elems); |
| 88 UCollationPCE(CollationElementIterator *iter); |
| 89 ~UCollationPCE(); |
| 90 |
| 91 void init(UCollationElements *elems); |
| 92 void init(CollationElementIterator *iter); |
| 93 |
| 94 /** |
| 95 * Get the processed ordering priority of the next collation element in the
text. |
| 96 * A single character may contain more than one collation element. |
| 97 * |
| 98 * @param ixLow a pointer to an int32_t to receive the iterator index before
fetching the CE. |
| 99 * @param ixHigh a pointer to an int32_t to receive the iterator index after
fetching the CE. |
| 100 * @param status A pointer to an UErrorCode to receive any errors. |
| 101 * @return The next collation elements ordering, otherwise returns UCOL_PROC
ESSED_NULLORDER |
| 102 * if an error has occured or if the end of string has been reached |
| 103 */ |
| 104 int64_t nextProcessed(int32_t *ixLow, int32_t *ixHigh, UErrorCode *status); |
| 105 /** |
| 106 * Get the processed ordering priority of the previous collation element in
the text. |
| 107 * A single character may contain more than one collation element. |
| 108 * |
| 109 * @param ixLow A pointer to an int32_t to receive the iterator index after
fetching the CE |
| 110 * @param ixHigh A pointer to an int32_t to receiver the iterator index befo
re fetching the CE |
| 111 * @param status A pointer to an UErrorCode to receive any errors. Noteably |
| 112 * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack |
| 113 * buffer has been exhausted. |
| 114 * @return The previous collation elements ordering, otherwise returns |
| 115 * UCOL_PROCESSED_NULLORDER if an error has occured or if the start
of |
| 116 * string has been reached. |
| 117 */ |
| 118 int64_t previousProcessed(int32_t *ixLow, int32_t *ixHigh, UErrorCode *statu
s); |
| 119 |
| 120 private: |
| 121 void init(const Collator &coll); |
| 122 uint64_t processCE(uint32_t ce); |
| 123 }; |
| 124 |
| 125 U_NAMESPACE_END |
| 126 |
21 #define INITIAL_ARRAY_SIZE_ 256 | 127 #define INITIAL_ARRAY_SIZE_ 256 |
22 #define MAX_TABLE_SIZE_ 257 | 128 #define MAX_TABLE_SIZE_ 257 |
23 | 129 |
24 struct USearch { | 130 struct USearch { |
25 // required since collation element iterator does not have a getText API | 131 // required since collation element iterator does not have a getText API |
26 const UChar *text; | 132 const UChar *text; |
27 int32_t textLength; // exact length | 133 int32_t textLength; // exact length |
28 UBool isOverlap; | 134 UBool isOverlap; |
29 UBool isCanonicalMatch; | 135 UBool isCanonicalMatch; |
30 int16_t elementComparisonType; | 136 int16_t elementComparisonType; |
31 UBreakIterator *internalBreakIter; //internal character breakiter
ator | 137 UBreakIterator *internalBreakIter; //internal character breakiter
ator |
32 UBreakIterator *breakIter; | 138 UBreakIterator *breakIter; |
33 // value USEARCH_DONE is the default value | 139 // value USEARCH_DONE is the default value |
34 // if we are not at the start of the text or the end of the text, | 140 // if we are not at the start of the text or the end of the text, |
35 // depending on the iteration direction and matchedIndex is USEARCH_DONE | 141 // depending on the iteration direction and matchedIndex is USEARCH_DONE |
36 // it means that we can't find any more matches in that particular direction | 142 // it means that we can't find any more matches in that particular direction |
37 int32_t matchedIndex; | 143 int32_t matchedIndex; |
38 int32_t matchedLength; | 144 int32_t matchedLength; |
39 UBool isForwardSearching; | 145 UBool isForwardSearching; |
40 UBool reset; | 146 UBool reset; |
41 }; | 147 }; |
42 | 148 |
43 struct UPattern { | 149 struct UPattern { |
44 const UChar *text; | 150 const UChar *text; |
45 int32_t textLength; // exact length | 151 int32_t textLength; // exact length |
46 // length required for backwards ce comparison | 152 // length required for backwards ce comparison |
47 int32_t CELength; | 153 int32_t cesLength; |
48 int32_t *CE; | 154 int32_t *ces; |
49 int32_t CEBuffer[INITIAL_ARRAY_SIZE_]; | 155 int32_t cesBuffer[INITIAL_ARRAY_SIZE_]; |
50 int32_t PCELength; | 156 int32_t pcesLength; |
51 int64_t *PCE; | 157 int64_t *pces; |
52 int64_t PCEBuffer[INITIAL_ARRAY_SIZE_]; | 158 int64_t pcesBuffer[INITIAL_ARRAY_SIZE_]; |
53 UBool hasPrefixAccents; | 159 UBool hasPrefixAccents; |
54 UBool hasSuffixAccents; | 160 UBool hasSuffixAccents; |
55 int16_t defaultShiftSize; | 161 int16_t defaultShiftSize; |
56 int16_t shift[MAX_TABLE_SIZE_]; | 162 int16_t shift[MAX_TABLE_SIZE_]; |
57 int16_t backShift[MAX_TABLE_SIZE_]; | 163 int16_t backShift[MAX_TABLE_SIZE_]; |
58 }; | 164 }; |
59 | 165 |
60 struct UStringSearch { | 166 struct UStringSearch { |
61 struct USearch *search; | 167 struct USearch *search; |
62 struct UPattern pattern; | 168 struct UPattern pattern; |
63 const UCollator *collator; | 169 const UCollator *collator; |
64 const icu::Normalizer2 *nfd; | 170 const icu::Normalizer2 *nfd; |
65 // positions within the collation element iterator is used to determine | 171 // positions within the collation element iterator is used to determine |
66 // if we are at the start of the text. | 172 // if we are at the start of the text. |
67 UCollationElements *textIter; | 173 UCollationElements *textIter; |
| 174 icu::UCollationPCE *textProcessedIter; |
68 // utility collation element, used throughout program for temporary | 175 // utility collation element, used throughout program for temporary |
69 // iteration. | 176 // iteration. |
70 UCollationElements *utilIter; | 177 UCollationElements *utilIter; |
71 UBool ownCollator; | 178 UBool ownCollator; |
72 UCollationStrength strength; | 179 UCollationStrength strength; |
73 uint32_t ceMask; | 180 uint32_t ceMask; |
74 uint32_t variableTop; | 181 uint32_t variableTop; |
75 UBool toShift; | 182 UBool toShift; |
76 UChar canonicalPrefixAccents[INITIAL_ARRAY_SIZE_]; | 183 UChar canonicalPrefixAccents[INITIAL_ARRAY_SIZE_]; |
77 UChar canonicalSuffixAccents[INITIAL_ARRAY_SIZE_]; | 184 UChar canonicalSuffixAccents[INITIAL_ARRAY_SIZE_]; |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
131 * @param status error status if any | 238 * @param status error status if any |
132 * @return TRUE if a canonical match is found, FALSE otherwise | 239 * @return TRUE if a canonical match is found, FALSE otherwise |
133 */ | 240 */ |
134 U_CFUNC | 241 U_CFUNC |
135 UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, | 242 UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, |
136 UErrorCode *status); | 243 UErrorCode *status); |
137 | 244 |
138 #endif /* #if !UCONFIG_NO_COLLATION */ | 245 #endif /* #if !UCONFIG_NO_COLLATION */ |
139 | 246 |
140 #endif | 247 #endif |
OLD | NEW |