OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 2013-2014, International Business Machines | 3 * Copyright (C) 2013-2015, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 * collationsettings.h | 6 * collationsettings.h |
7 * | 7 * |
8 * created on: 2013feb07 | 8 * created on: 2013feb07 |
9 * created by: Markus W. Scherer | 9 * created by: Markus W. Scherer |
10 */ | 10 */ |
11 | 11 |
12 #ifndef __COLLATIONSETTINGS_H__ | 12 #ifndef __COLLATIONSETTINGS_H__ |
13 #define __COLLATIONSETTINGS_H__ | 13 #define __COLLATIONSETTINGS_H__ |
14 | 14 |
15 #include "unicode/utypes.h" | 15 #include "unicode/utypes.h" |
16 | 16 |
17 #if !UCONFIG_NO_COLLATION | 17 #if !UCONFIG_NO_COLLATION |
18 | 18 |
19 #include "unicode/ucol.h" | 19 #include "unicode/ucol.h" |
20 #include "collation.h" | 20 #include "collation.h" |
21 #include "sharedobject.h" | 21 #include "sharedobject.h" |
22 #include "umutex.h" | 22 #include "umutex.h" |
23 | 23 |
24 U_NAMESPACE_BEGIN | 24 U_NAMESPACE_BEGIN |
25 | 25 |
| 26 struct CollationData; |
| 27 |
26 /** | 28 /** |
27 * Collation settings/options/attributes. | 29 * Collation settings/options/attributes. |
28 * These are the values that can be changed via API. | 30 * These are the values that can be changed via API. |
29 */ | 31 */ |
30 struct U_I18N_API CollationSettings : public SharedObject { | 32 struct U_I18N_API CollationSettings : public SharedObject { |
31 /** | 33 /** |
32 * Options bit 0: Perform the FCD check on the input text and deliver normal
ized text. | 34 * Options bit 0: Perform the FCD check on the input text and deliver normal
ized text. |
33 */ | 35 */ |
34 static const int32_t CHECK_FCD = 1; | 36 static const int32_t CHECK_FCD = 1; |
35 /** | 37 /** |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
96 MAX_VAR_PUNCT, | 98 MAX_VAR_PUNCT, |
97 MAX_VAR_SYMBOL, | 99 MAX_VAR_SYMBOL, |
98 MAX_VAR_CURRENCY | 100 MAX_VAR_CURRENCY |
99 }; | 101 }; |
100 | 102 |
101 CollationSettings() | 103 CollationSettings() |
102 : options((UCOL_DEFAULT_STRENGTH << STRENGTH_SHIFT) | | 104 : options((UCOL_DEFAULT_STRENGTH << STRENGTH_SHIFT) | |
103 (MAX_VAR_PUNCT << MAX_VARIABLE_SHIFT)), | 105 (MAX_VAR_PUNCT << MAX_VARIABLE_SHIFT)), |
104 variableTop(0), | 106 variableTop(0), |
105 reorderTable(NULL), | 107 reorderTable(NULL), |
| 108 minHighNoReorder(0), |
| 109 reorderRanges(NULL), reorderRangesLength(0), |
106 reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0)
, | 110 reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0)
, |
107 fastLatinOptions(-1) {} | 111 fastLatinOptions(-1) {} |
108 | 112 |
109 CollationSettings(const CollationSettings &other); | 113 CollationSettings(const CollationSettings &other); |
110 virtual ~CollationSettings(); | 114 virtual ~CollationSettings(); |
111 | 115 |
112 UBool operator==(const CollationSettings &other) const; | 116 UBool operator==(const CollationSettings &other) const; |
113 | 117 |
114 inline UBool operator!=(const CollationSettings &other) const { | 118 inline UBool operator!=(const CollationSettings &other) const { |
115 return !operator==(other); | 119 return !operator==(other); |
116 } | 120 } |
117 | 121 |
118 int32_t hashCode() const; | 122 int32_t hashCode() const; |
119 | 123 |
120 void resetReordering(); | 124 void resetReordering(); |
121 void aliasReordering(const int32_t *codes, int32_t length, const uint8_t *ta
ble); | 125 void aliasReordering(const CollationData &data, const int32_t *codes, int32_
t length, |
122 UBool setReordering(const int32_t *codes, int32_t length, const uint8_t tabl
e[256]); | 126 const uint32_t *ranges, int32_t rangesLength, |
| 127 const uint8_t *table, UErrorCode &errorCode); |
| 128 void setReordering(const CollationData &data, const int32_t *codes, int32_t
codesLength, |
| 129 UErrorCode &errorCode); |
| 130 void copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCod
e); |
| 131 |
| 132 inline UBool hasReordering() const { return reorderTable != NULL; } |
| 133 static UBool reorderTableHasSplitBytes(const uint8_t table[256]); |
| 134 inline uint32_t reorder(uint32_t p) const { |
| 135 uint8_t b = reorderTable[p >> 24]; |
| 136 if(b != 0 || p <= Collation::NO_CE_PRIMARY) { |
| 137 return ((uint32_t)b << 24) | (p & 0xffffff); |
| 138 } else { |
| 139 return reorderEx(p); |
| 140 } |
| 141 } |
123 | 142 |
124 void setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCod
e); | 143 void setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCod
e); |
125 | 144 |
126 static int32_t getStrength(int32_t options) { | 145 static int32_t getStrength(int32_t options) { |
127 return options >> STRENGTH_SHIFT; | 146 return options >> STRENGTH_SHIFT; |
128 } | 147 } |
129 | 148 |
130 int32_t getStrength() const { | 149 int32_t getStrength() const { |
131 return getStrength(options); | 150 return getStrength(options); |
132 } | 151 } |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
187 } | 206 } |
188 | 207 |
189 inline UBool isNumeric() const { | 208 inline UBool isNumeric() const { |
190 return (options & NUMERIC) != 0; | 209 return (options & NUMERIC) != 0; |
191 } | 210 } |
192 | 211 |
193 /** CHECK_FCD etc. */ | 212 /** CHECK_FCD etc. */ |
194 int32_t options; | 213 int32_t options; |
195 /** Variable-top primary weight. */ | 214 /** Variable-top primary weight. */ |
196 uint32_t variableTop; | 215 uint32_t variableTop; |
197 /** 256-byte table for reordering permutation of primary lead bytes; NULL if
no reordering. */ | 216 /** |
| 217 * 256-byte table for reordering permutation of primary lead bytes; NULL if
no reordering. |
| 218 * A 0 entry at a non-zero index means that the primary lead byte is "split" |
| 219 * (there are different offsets for primaries that share that lead byte) |
| 220 * and the reordering offset must be determined via the reorderRanges. |
| 221 */ |
198 const uint8_t *reorderTable; | 222 const uint8_t *reorderTable; |
| 223 /** Limit of last reordered range. 0 if no reordering or no split bytes. */ |
| 224 uint32_t minHighNoReorder; |
| 225 /** |
| 226 * Primary-weight ranges for script reordering, |
| 227 * to be used by reorder(p) for split-reordered primary lead bytes. |
| 228 * |
| 229 * Each entry is a (limit, offset) pair. |
| 230 * The upper 16 bits of the entry are the upper 16 bits of the |
| 231 * exclusive primary limit of a range. |
| 232 * Primaries between the previous limit and this one have their lead bytes |
| 233 * modified by the signed offset (-0xff..+0xff) stored in the lower 16 bits. |
| 234 * |
| 235 * CollationData::makeReorderRanges() writes a full list where the first ran
ge |
| 236 * (at least for terminators and separators) has a 0 offset. |
| 237 * The last range has a non-zero offset. |
| 238 * minHighNoReorder is set to the limit of that last range. |
| 239 * |
| 240 * In the settings object, the initial ranges before the first split lead by
te |
| 241 * are omitted for efficiency; they are handled by reorder(p) via the reorde
rTable. |
| 242 * If there are no split-reordered lead bytes, then no ranges are needed. |
| 243 */ |
| 244 const uint32_t *reorderRanges; |
| 245 int32_t reorderRangesLength; |
199 /** Array of reorder codes; ignored if reorderCodesLength == 0. */ | 246 /** Array of reorder codes; ignored if reorderCodesLength == 0. */ |
200 const int32_t *reorderCodes; | 247 const int32_t *reorderCodes; |
201 /** Number of reorder codes; 0 if no reordering. */ | 248 /** Number of reorder codes; 0 if no reordering. */ |
202 int32_t reorderCodesLength; | 249 int32_t reorderCodesLength; |
203 /** | 250 /** |
204 * Capacity of reorderCodes. | 251 * Capacity of reorderCodes. |
205 * If 0, then the table and codes are aliases. | 252 * If 0, then the codes, the ranges, and the table are aliases. |
206 * Otherwise, this object owns the memory via the reorderCodes pointer; | 253 * Otherwise, this object owns the memory via the reorderCodes pointer; |
207 * the table and the codes are in the same memory block, with the codes firs
t. | 254 * the codes, the ranges, and the table are in the same memory block, in tha
t order. |
208 */ | 255 */ |
209 int32_t reorderCodesCapacity; | 256 int32_t reorderCodesCapacity; |
210 | 257 |
211 /** Options for CollationFastLatin. Negative if disabled. */ | 258 /** Options for CollationFastLatin. Negative if disabled. */ |
212 int32_t fastLatinOptions; | 259 int32_t fastLatinOptions; |
213 uint16_t fastLatinPrimaries[0x180]; | 260 uint16_t fastLatinPrimaries[0x180]; |
| 261 |
| 262 private: |
| 263 void setReorderArrays(const int32_t *codes, int32_t codesLength, |
| 264 const uint32_t *ranges, int32_t rangesLength, |
| 265 const uint8_t *table, UErrorCode &errorCode); |
| 266 uint32_t reorderEx(uint32_t p) const; |
214 }; | 267 }; |
215 | 268 |
216 U_NAMESPACE_END | 269 U_NAMESPACE_END |
217 | 270 |
218 #endif // !UCONFIG_NO_COLLATION | 271 #endif // !UCONFIG_NO_COLLATION |
219 #endif // __COLLATIONSETTINGS_H__ | 272 #endif // __COLLATIONSETTINGS_H__ |
OLD | NEW |