OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 2013-2014, International Business Machines | 3 * Copyright (C) 2013-2014, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 * collationrootelements.h | 6 * collationrootelements.h |
7 * | 7 * |
8 * created on: 2013mar01 | 8 * created on: 2013mar01 |
9 * created by: Markus W. Scherer | 9 * created by: Markus W. Scherer |
10 */ | 10 */ |
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
182 int32_t findPrimary(uint32_t p) const; | 182 int32_t findPrimary(uint32_t p) const; |
183 | 183 |
184 /** | 184 /** |
185 * Returns the primary weight after p where index=findPrimary(p). | 185 * Returns the primary weight after p where index=findPrimary(p). |
186 * p must be at least the first root primary. | 186 * p must be at least the first root primary. |
187 */ | 187 */ |
188 uint32_t getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) co
nst; | 188 uint32_t getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) co
nst; |
189 /** | 189 /** |
190 * Returns the secondary weight after [p, s] where index=findPrimary(p) | 190 * Returns the secondary weight after [p, s] where index=findPrimary(p) |
191 * except use index=0 for p=0. | 191 * except use index=0 for p=0. |
| 192 * |
| 193 * Must return a weight for every root [p, s] as well as for every weight |
| 194 * returned by getSecondaryBefore(). If p!=0 then s can be BEFORE_WEIGHT16. |
| 195 * |
| 196 * Exception: [0, 0] is handled by the CollationBuilder: |
| 197 * Both its lower and upper boundaries are special. |
192 */ | 198 */ |
193 uint32_t getSecondaryAfter(int32_t index, uint32_t s) const; | 199 uint32_t getSecondaryAfter(int32_t index, uint32_t s) const; |
194 /** | 200 /** |
195 * Returns the tertiary weight after [p, s, t] where index=findPrimary(p) | 201 * Returns the tertiary weight after [p, s, t] where index=findPrimary(p) |
196 * except use index=0 for p=0. | 202 * except use index=0 for p=0. |
| 203 * |
| 204 * Must return a weight for every root [p, s, t] as well as for every weight |
| 205 * returned by getTertiaryBefore(). If s!=0 then t can be BEFORE_WEIGHT16. |
| 206 * |
| 207 * Exception: [0, 0, 0] is handled by the CollationBuilder: |
| 208 * Both its lower and upper boundaries are special. |
197 */ | 209 */ |
198 uint32_t getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const; | 210 uint32_t getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const; |
199 | 211 |
200 private: | 212 private: |
201 /** | 213 /** |
| 214 * Returns the first secondary & tertiary weights for p where index=findPrim
ary(p)+1. |
| 215 */ |
| 216 uint32_t getFirstSecTerForPrimary(int32_t index) const; |
| 217 |
| 218 /** |
202 * Finds the largest index i where elements[i]<=p. | 219 * Finds the largest index i where elements[i]<=p. |
203 * Requires first primary<=p<0xffffff00 (PRIMARY_SENTINEL). | 220 * Requires first primary<=p<0xffffff00 (PRIMARY_SENTINEL). |
204 * Does not require that p is a root collator primary. | 221 * Does not require that p is a root collator primary. |
205 */ | 222 */ |
206 int32_t findP(uint32_t p) const; | 223 int32_t findP(uint32_t p) const; |
207 | 224 |
208 static inline UBool isEndOfPrimaryRange(uint32_t q) { | 225 static inline UBool isEndOfPrimaryRange(uint32_t q) { |
209 return (q & SEC_TER_DELTA_FLAG) == 0 && (q & PRIMARY_STEP_MASK) != 0; | 226 return (q & SEC_TER_DELTA_FLAG) == 0 && (q & PRIMARY_STEP_MASK) != 0; |
210 } | 227 } |
211 | 228 |
212 /** | 229 /** |
213 * Data structure: | 230 * Data structure: |
214 * | 231 * |
215 * The first few entries are indexes, up to elements[IX_FIRST_TERTIARY_INDEX
]. | 232 * The first few entries are indexes, up to elements[IX_FIRST_TERTIARY_INDEX
]. |
216 * See the comments on the IX_ constants. | 233 * See the comments on the IX_ constants. |
217 * | 234 * |
218 * All other elements are a compact form of the root collator CEs | 235 * All other elements are a compact form of the root collator CEs |
219 * in collation order. | 236 * in mostly collation order. |
220 * | 237 * |
221 * Primary weights have the SEC_TER_DELTA_FLAG flag not set. | 238 * A sequence of one or more root CEs with the same primary weight is stored
as |
222 * A primary-weight element by itself represents a root CE | 239 * one element with the primary weight, with the SEC_TER_DELTA_FLAG flag not
set, |
223 * with Collation::COMMON_SEC_AND_TER_CE. | 240 * followed by elements with only the secondary/tertiary weights, |
| 241 * each with that flag set. |
| 242 * If the lowest secondary/tertiary combination is Collation::COMMON_SEC_AND
_TER_CE, |
| 243 * then the element for that combination is omitted. |
224 * | 244 * |
225 * If there are root CEs with the same primary but other secondary/tertiary
weights, | 245 * Note: If the first actual secondary/tertiary combination is higher than |
226 * then for each such CE there is an element with those secondary and tertia
ry weights, | 246 * Collation::COMMON_SEC_AND_TER_CE (which is unusual), |
227 * and with the SEC_TER_DELTA_FLAG flag set. | 247 * the runtime code will assume anyway that Collation::COMMON_SEC_AND_TER_CE
is present. |
228 * | 248 * |
229 * A range of only-primary CEs with a consistent "step" increment | 249 * A range of only-primary CEs with a consistent "step" increment |
230 * from each primary to the next may be stored as a range. | 250 * from each primary to the next may be stored as a range. |
231 * Only the first and last primary are stored, and the last has the step | 251 * Only the first and last primary are stored, and the last has the step |
232 * value in the low bits (PRIMARY_STEP_MASK). | 252 * value in the low bits (PRIMARY_STEP_MASK). |
233 * | 253 * |
234 * An range-end element may also either start a new range or be followed by | 254 * An range-end element may also either start a new range or be followed by |
235 * elements with secondary/tertiary deltas. | 255 * elements with secondary/tertiary deltas. |
236 * | 256 * |
237 * A primary element that is not a range end has zero step bits. | 257 * A primary element that is not a range end has zero step bits. |
238 * | 258 * |
239 * There is no element for the completely ignorable CE (all weights 0). | 259 * There is no element for the completely ignorable CE (all weights 0). |
240 * | 260 * |
241 * Before elements[IX_FIRST_PRIMARY_INDEX], all elements are secondary/terti
ary deltas, | 261 * Before elements[IX_FIRST_PRIMARY_INDEX], all elements are secondary/terti
ary deltas, |
242 * for all of the ignorable root CEs. | 262 * for all of the ignorable root CEs. |
243 * | 263 * |
244 * There are no elements for unassigned-implicit primary CEs. | 264 * There are no elements for unassigned-implicit primary CEs. |
245 * All primaries stored here are at most 3 bytes long. | 265 * All primaries stored here are at most 3 bytes long. |
246 */ | 266 */ |
247 const uint32_t *elements; | 267 const uint32_t *elements; |
248 int32_t length; | 268 int32_t length; |
249 }; | 269 }; |
250 | 270 |
251 U_NAMESPACE_END | 271 U_NAMESPACE_END |
252 | 272 |
253 #endif // !UCONFIG_NO_COLLATION | 273 #endif // !UCONFIG_NO_COLLATION |
254 #endif // __COLLATIONROOTELEMENTS_H__ | 274 #endif // __COLLATIONROOTELEMENTS_H__ |
OLD | NEW |