OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 2013-2014, International Business Machines | 3 * Copyright (C) 2013-2014, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 * collationbuilder.h | 6 * collationbuilder.h |
7 * | 7 * |
8 * created on: 2013may06 | 8 * created on: 2013may06 |
9 * created by: Markus W. Scherer | 9 * created by: Markus W. Scherer |
10 */ | 10 */ |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
49 UErrorCode &errorCode); | 49 UErrorCode &errorCode); |
50 | 50 |
51 const char *getErrorReason() const { return errorReason; } | 51 const char *getErrorReason() const { return errorReason; } |
52 | 52 |
53 private: | 53 private: |
54 friend class CEFinalizer; | 54 friend class CEFinalizer; |
55 | 55 |
56 /** Implements CollationRuleParser::Sink. */ | 56 /** Implements CollationRuleParser::Sink. */ |
57 virtual void addReset(int32_t strength, const UnicodeString &str, | 57 virtual void addReset(int32_t strength, const UnicodeString &str, |
58 const char *&errorReason, UErrorCode &errorCode); | 58 const char *&errorReason, UErrorCode &errorCode); |
| 59 /** |
| 60 * Returns the secondary or tertiary weight preceding the current node's wei
ght. |
| 61 * node=nodes[index]. |
| 62 */ |
| 63 uint32_t getWeight16Before(int32_t index, int64_t node, int32_t level); |
59 | 64 |
60 int64_t getSpecialResetPosition(const UnicodeString &str, | 65 int64_t getSpecialResetPosition(const UnicodeString &str, |
61 const char *&parserErrorReason, UErrorCode &
errorCode); | 66 const char *&parserErrorReason, UErrorCode &
errorCode); |
62 | 67 |
63 /** Implements CollationRuleParser::Sink. */ | 68 /** Implements CollationRuleParser::Sink. */ |
64 virtual void addRelation(int32_t strength, const UnicodeString &prefix, | 69 virtual void addRelation(int32_t strength, const UnicodeString &prefix, |
65 const UnicodeString &str, const UnicodeString &exte
nsion, | 70 const UnicodeString &str, const UnicodeString &exte
nsion, |
66 const char *&errorReason, UErrorCode &errorCode); | 71 const char *&errorReason, UErrorCode &errorCode); |
67 | 72 |
68 /** | 73 /** |
(...skipping 20 matching lines...) Expand all Loading... |
89 /** | 94 /** |
90 * Inserts a new node into the list, between list-adjacent items. | 95 * Inserts a new node into the list, between list-adjacent items. |
91 * The node's previous and next indexes must not be set yet. | 96 * The node's previous and next indexes must not be set yet. |
92 * @return the new node's index | 97 * @return the new node's index |
93 */ | 98 */ |
94 int32_t insertNodeBetween(int32_t index, int32_t nextIndex, int64_t node, | 99 int32_t insertNodeBetween(int32_t index, int32_t nextIndex, int64_t node, |
95 UErrorCode &errorCode); | 100 UErrorCode &errorCode); |
96 | 101 |
97 /** | 102 /** |
98 * Finds the node which implies or contains a common=05 weight of the given
strength | 103 * Finds the node which implies or contains a common=05 weight of the given
strength |
99 * (secondary or tertiary). | 104 * (secondary or tertiary), if the current node is stronger. |
100 * Skips weaker nodes and tailored nodes if the current node is stronger | 105 * Skips weaker nodes and tailored nodes if the current node is stronger |
101 * and is followed by an explicit-common-weight node. | 106 * and is followed by an explicit-common-weight node. |
102 * Always returns the input index if that node is no stronger than the given
strength. | 107 * Always returns the input index if that node is no stronger than the given
strength. |
103 */ | 108 */ |
104 int32_t findCommonNode(int32_t index, int32_t strength) const; | 109 int32_t findCommonNode(int32_t index, int32_t strength) const; |
105 | 110 |
106 void setCaseBits(const UnicodeString &nfdString, | 111 void setCaseBits(const UnicodeString &nfdString, |
107 const char *&parserErrorReason, UErrorCode &errorCode); | 112 const char *&parserErrorReason, UErrorCode &errorCode); |
108 | 113 |
109 /** Implements CollationRuleParser::Sink. */ | 114 /** Implements CollationRuleParser::Sink. */ |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
208 ((int32_t)(tempCE32 >> 8) & 0x3f); | 213 ((int32_t)(tempCE32 >> 8) & 0x3f); |
209 } | 214 } |
210 static inline UBool isTempCE32(uint32_t ce32) { | 215 static inline UBool isTempCE32(uint32_t ce32) { |
211 return | 216 return |
212 (ce32 & 0xff) >= 2 && // not a long-primary/long-secondary CE32 | 217 (ce32 & 0xff) >= 2 && // not a long-primary/long-secondary CE32 |
213 6 <= ((ce32 >> 8) & 0xff) && ((ce32 >> 8) & 0xff) <= 0x45; | 218 6 <= ((ce32 >> 8) & 0xff) && ((ce32 >> 8) & 0xff) <= 0x45; |
214 } | 219 } |
215 | 220 |
216 static int32_t ceStrength(int64_t ce); | 221 static int32_t ceStrength(int64_t ce); |
217 | 222 |
218 /** The secondary/tertiary lower limit for tailoring before the common weigh
t. */ | |
219 static const uint32_t BEFORE_WEIGHT16 = Collation::MERGE_SEPARATOR_WEIGHT16; | |
220 | |
221 /** At most 1M nodes, limited by the 20 bits in node bit fields. */ | 223 /** At most 1M nodes, limited by the 20 bits in node bit fields. */ |
222 static const int32_t MAX_INDEX = 0xfffff; | 224 static const int32_t MAX_INDEX = 0xfffff; |
223 /** | 225 /** |
224 * Node bit 6 is set on a primary node if there are tailored nodes | 226 * Node bit 6 is set on a primary node if there are nodes |
225 * with secondary values below the common secondary weight (05), | 227 * with secondary values below the common secondary weight (05). |
226 * from a reset-secondary-before (&[before 2]). | |
227 */ | 228 */ |
228 static const int32_t HAS_BEFORE2 = 0x40; | 229 static const int32_t HAS_BEFORE2 = 0x40; |
229 /** | 230 /** |
230 * Node bit 5 is set on a primary or secondary node if there are tailored no
des | 231 * Node bit 5 is set on a primary or secondary node if there are nodes |
231 * with tertiary values below the common tertiary weight (05), | 232 * with tertiary values below the common tertiary weight (05). |
232 * from a reset-tertiary-before (&[before 3]). | |
233 */ | 233 */ |
234 static const int32_t HAS_BEFORE3 = 0x20; | 234 static const int32_t HAS_BEFORE3 = 0x20; |
235 /** | 235 /** |
236 * Node bit 3 distinguishes a tailored node, which has no weight value, | 236 * Node bit 3 distinguishes a tailored node, which has no weight value, |
237 * from a node with an explicit (root or default) weight. | 237 * from a node with an explicit (root or default) weight. |
238 */ | 238 */ |
239 static const int32_t IS_TAILORED = 8; | 239 static const int32_t IS_TAILORED = 8; |
240 | 240 |
241 static inline int64_t nodeFromWeight32(uint32_t weight32) { | 241 static inline int64_t nodeFromWeight32(uint32_t weight32) { |
242 return (int64_t)weight32 << 32; | 242 return (int64_t)weight32 << 32; |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
334 * create a difference of a certain strength from the preceding node. | 334 * create a difference of a certain strength from the preceding node. |
335 * | 335 * |
336 * A root node is followed by either | 336 * A root node is followed by either |
337 * - a root/default node of the same strength, or | 337 * - a root/default node of the same strength, or |
338 * - a root/default node of the next-weaker strength, or | 338 * - a root/default node of the next-weaker strength, or |
339 * - a tailored node of the same strength. | 339 * - a tailored node of the same strength. |
340 * | 340 * |
341 * A node of a given strength normally implies "common" weights on weaker le
vels. | 341 * A node of a given strength normally implies "common" weights on weaker le
vels. |
342 * | 342 * |
343 * A node with HAS_BEFORE2 must be immediately followed by | 343 * A node with HAS_BEFORE2 must be immediately followed by |
344 * a secondary node with BEFORE_WEIGHT16, then a secondary tailored node, | 344 * a secondary node with an explicit below-common weight, then a secondary t
ailored node, |
345 * and later an explicit common-secondary node. | 345 * and later an explicit common-secondary node. |
346 * (&[before 2] resets to the BEFORE_WEIGHT16 node so that | 346 * The below-common weight can be a root weight, |
| 347 * or it can be BEFORE_WEIGHT16 for tailoring before an implied common weigh
t |
| 348 * or before the lowest root weight. |
| 349 * (&[before 2] resets to an explicit secondary node so that |
347 * the following addRelation(secondary) tailors right after that. | 350 * the following addRelation(secondary) tailors right after that. |
348 * If we did not have this node and instead were to reset on the primary nod
e, | 351 * If we did not have this node and instead were to reset on the primary nod
e, |
349 * then addRelation(secondary) would skip forward to the the COMMON_WEIGHT16
node.) | 352 * then addRelation(secondary) would skip forward to the the COMMON_WEIGHT16
node.) |
350 * | 353 * |
351 * All secondary tailored nodes between these two explicit ones | |
352 * will be assigned lower-than-common secondary weights. | |
353 * If the flag is not set, then there are no explicit secondary nodes | 354 * If the flag is not set, then there are no explicit secondary nodes |
354 * with the common or lower weights. | 355 * with the common or lower weights. |
355 * | 356 * |
356 * Same for HAS_BEFORE3 for tertiary nodes and weights. | 357 * Same for HAS_BEFORE3 for tertiary nodes and weights. |
357 * A node must not have both flags set. | 358 * A node must not have both flags set. |
358 * | 359 * |
359 * Tailored CEs are initially represented in a CollationDataBuilder as tempo
rary CEs | 360 * Tailored CEs are initially represented in a CollationDataBuilder as tempo
rary CEs |
360 * which point to stable indexes in this list, | 361 * which point to stable indexes in this list, |
361 * and temporary CEs stored in a CollationDataBuilder only point to tailored
nodes. | 362 * and temporary CEs stored in a CollationDataBuilder only point to tailored
nodes. |
362 * | 363 * |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
397 * in a pointer list so that they can be indexed from temporary CEs, | 398 * in a pointer list so that they can be indexed from temporary CEs, |
398 * and they would require more memory allocations. | 399 * and they would require more memory allocations. |
399 */ | 400 */ |
400 UVector64 nodes; | 401 UVector64 nodes; |
401 }; | 402 }; |
402 | 403 |
403 U_NAMESPACE_END | 404 U_NAMESPACE_END |
404 | 405 |
405 #endif // !UCONFIG_NO_COLLATION | 406 #endif // !UCONFIG_NO_COLLATION |
406 #endif // __COLLATIONBUILDER_H__ | 407 #endif // __COLLATIONBUILDER_H__ |
OLD | NEW |