OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * Copyright (C) 2013-2014, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ******************************************************************************* |
| 6 * collationsettings.h |
| 7 * |
| 8 * created on: 2013feb07 |
| 9 * created by: Markus W. Scherer |
| 10 */ |
| 11 |
| 12 #ifndef __COLLATIONSETTINGS_H__ |
| 13 #define __COLLATIONSETTINGS_H__ |
| 14 |
| 15 #include "unicode/utypes.h" |
| 16 |
| 17 #if !UCONFIG_NO_COLLATION |
| 18 |
| 19 #include "unicode/ucol.h" |
| 20 #include "collation.h" |
| 21 #include "sharedobject.h" |
| 22 #include "umutex.h" |
| 23 |
| 24 U_NAMESPACE_BEGIN |
| 25 |
| 26 /** |
| 27 * Collation settings/options/attributes. |
| 28 * These are the values that can be changed via API. |
| 29 */ |
| 30 struct U_I18N_API CollationSettings : public SharedObject { |
| 31 /** |
| 32 * Options bit 0: Perform the FCD check on the input text and deliver normal
ized text. |
| 33 */ |
| 34 static const int32_t CHECK_FCD = 1; |
| 35 /** |
| 36 * Options bit 1: Numeric collation. |
| 37 * Also known as CODAN = COllate Digits As Numbers. |
| 38 * |
| 39 * Treat digit sequences as numbers with CE sequences in numeric order, |
| 40 * rather than returning a normal CE for each digit. |
| 41 */ |
| 42 static const int32_t NUMERIC = 2; |
| 43 /** |
| 44 * "Shifted" alternate handling, see ALTERNATE_MASK. |
| 45 */ |
| 46 static const int32_t SHIFTED = 4; |
| 47 /** |
| 48 * Options bits 3..2: Alternate-handling mask. 0 for non-ignorable. |
| 49 * Reserve values 8 and 0xc for shift-trimmed and blanked. |
| 50 */ |
| 51 static const int32_t ALTERNATE_MASK = 0xc; |
| 52 /** |
| 53 * Options bits 6..4: The 3-bit maxVariable value bit field is shifted by th
is value. |
| 54 */ |
| 55 static const int32_t MAX_VARIABLE_SHIFT = 4; |
| 56 /** maxVariable options bit mask before shifting. */ |
| 57 static const int32_t MAX_VARIABLE_MASK = 0x70; |
| 58 /** Options bit 7: Reserved/unused/0. */ |
| 59 /** |
| 60 * Options bit 8: Sort uppercase first if caseLevel or caseFirst is on. |
| 61 */ |
| 62 static const int32_t UPPER_FIRST = 0x100; |
| 63 /** |
| 64 * Options bit 9: Keep the case bits in the tertiary weight (they trump othe
r tertiary values) |
| 65 * unless case level is on (when they are *moved* into the separate case lev
el). |
| 66 * By default, the case bits are removed from the tertiary weight (ignored). |
| 67 * |
| 68 * When CASE_FIRST is off, UPPER_FIRST must be off too, corresponding to |
| 69 * the tri-value UCOL_CASE_FIRST attribute: UCOL_OFF vs. UCOL_LOWER_FIRST vs
. UCOL_UPPER_FIRST. |
| 70 */ |
| 71 static const int32_t CASE_FIRST = 0x200; |
| 72 /** |
| 73 * Options bit mask for caseFirst and upperFirst, before shifting. |
| 74 * Same value as caseFirst==upperFirst. |
| 75 */ |
| 76 static const int32_t CASE_FIRST_AND_UPPER_MASK = CASE_FIRST | UPPER_FIRST; |
| 77 /** |
| 78 * Options bit 10: Insert the case level between the secondary and tertiary
levels. |
| 79 */ |
| 80 static const int32_t CASE_LEVEL = 0x400; |
| 81 /** |
| 82 * Options bit 11: Compare secondary weights backwards. ("French secondary") |
| 83 */ |
| 84 static const int32_t BACKWARD_SECONDARY = 0x800; |
| 85 /** |
| 86 * Options bits 15..12: The 4-bit strength value bit field is shifted by thi
s value. |
| 87 * It is the top used bit field in the options. (No need to mask after shift
ing.) |
| 88 */ |
| 89 static const int32_t STRENGTH_SHIFT = 12; |
| 90 /** Strength options bit mask before shifting. */ |
| 91 static const int32_t STRENGTH_MASK = 0xf000; |
| 92 |
| 93 /** maxVariable values */ |
| 94 enum MaxVariable { |
| 95 MAX_VAR_SPACE, |
| 96 MAX_VAR_PUNCT, |
| 97 MAX_VAR_SYMBOL, |
| 98 MAX_VAR_CURRENCY |
| 99 }; |
| 100 |
| 101 CollationSettings() |
| 102 : options((UCOL_DEFAULT_STRENGTH << STRENGTH_SHIFT) | |
| 103 (MAX_VAR_PUNCT << MAX_VARIABLE_SHIFT)), |
| 104 variableTop(0), |
| 105 reorderTable(NULL), |
| 106 reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0)
, |
| 107 fastLatinOptions(-1) {} |
| 108 |
| 109 CollationSettings(const CollationSettings &other); |
| 110 virtual ~CollationSettings(); |
| 111 |
| 112 UBool operator==(const CollationSettings &other) const; |
| 113 |
| 114 inline UBool operator!=(const CollationSettings &other) const { |
| 115 return !operator==(other); |
| 116 } |
| 117 |
| 118 int32_t hashCode() const; |
| 119 |
| 120 void resetReordering(); |
| 121 void aliasReordering(const int32_t *codes, int32_t length, const uint8_t *ta
ble); |
| 122 UBool setReordering(const int32_t *codes, int32_t length, const uint8_t tabl
e[256]); |
| 123 |
| 124 void setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCod
e); |
| 125 |
| 126 static int32_t getStrength(int32_t options) { |
| 127 return options >> STRENGTH_SHIFT; |
| 128 } |
| 129 |
| 130 int32_t getStrength() const { |
| 131 return getStrength(options); |
| 132 } |
| 133 |
| 134 /** Sets the options bit for an on/off attribute. */ |
| 135 void setFlag(int32_t bit, UColAttributeValue value, |
| 136 int32_t defaultOptions, UErrorCode &errorCode); |
| 137 |
| 138 UColAttributeValue getFlag(int32_t bit) const { |
| 139 return ((options & bit) != 0) ? UCOL_ON : UCOL_OFF; |
| 140 } |
| 141 |
| 142 void setCaseFirst(UColAttributeValue value, int32_t defaultOptions, UErrorCo
de &errorCode); |
| 143 |
| 144 UColAttributeValue getCaseFirst() const { |
| 145 int32_t option = options & CASE_FIRST_AND_UPPER_MASK; |
| 146 return (option == 0) ? UCOL_OFF : |
| 147 (option == CASE_FIRST) ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST; |
| 148 } |
| 149 |
| 150 void setAlternateHandling(UColAttributeValue value, |
| 151 int32_t defaultOptions, UErrorCode &errorCode); |
| 152 |
| 153 UColAttributeValue getAlternateHandling() const { |
| 154 return ((options & ALTERNATE_MASK) == 0) ? UCOL_NON_IGNORABLE : UCOL_SHI
FTED; |
| 155 } |
| 156 |
| 157 void setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &error
Code); |
| 158 |
| 159 MaxVariable getMaxVariable() const { |
| 160 return (MaxVariable)((options & MAX_VARIABLE_MASK) >> MAX_VARIABLE_SHIFT
); |
| 161 } |
| 162 |
| 163 /** |
| 164 * Include case bits in the tertiary level if caseLevel=off and caseFirst!=o
ff. |
| 165 */ |
| 166 static inline UBool isTertiaryWithCaseBits(int32_t options) { |
| 167 return (options & (CASE_LEVEL | CASE_FIRST)) == CASE_FIRST; |
| 168 } |
| 169 static uint32_t getTertiaryMask(int32_t options) { |
| 170 // Remove the case bits from the tertiary weight when caseLevel is on or
caseFirst is off. |
| 171 return isTertiaryWithCaseBits(options) ? |
| 172 Collation::CASE_AND_TERTIARY_MASK : Collation::ONLY_TERTIARY_MAS
K; |
| 173 } |
| 174 |
| 175 static UBool sortsTertiaryUpperCaseFirst(int32_t options) { |
| 176 // On tertiary level, consider case bits and sort uppercase first |
| 177 // if caseLevel is off and caseFirst==upperFirst. |
| 178 return (options & (CASE_LEVEL | CASE_FIRST_AND_UPPER_MASK)) == CASE_FIRS
T_AND_UPPER_MASK; |
| 179 } |
| 180 |
| 181 inline UBool dontCheckFCD() const { |
| 182 return (options & CHECK_FCD) == 0; |
| 183 } |
| 184 |
| 185 inline UBool hasBackwardSecondary() const { |
| 186 return (options & BACKWARD_SECONDARY) != 0; |
| 187 } |
| 188 |
| 189 inline UBool isNumeric() const { |
| 190 return (options & NUMERIC) != 0; |
| 191 } |
| 192 |
| 193 /** CHECK_FCD etc. */ |
| 194 int32_t options; |
| 195 /** Variable-top primary weight. */ |
| 196 uint32_t variableTop; |
| 197 /** 256-byte table for reordering permutation of primary lead bytes; NULL if
no reordering. */ |
| 198 const uint8_t *reorderTable; |
| 199 /** Array of reorder codes; ignored if reorderCodesLength == 0. */ |
| 200 const int32_t *reorderCodes; |
| 201 /** Number of reorder codes; 0 if no reordering. */ |
| 202 int32_t reorderCodesLength; |
| 203 /** |
| 204 * Capacity of reorderCodes. |
| 205 * If 0, then the table and codes are aliases. |
| 206 * Otherwise, this object owns the memory via the reorderCodes pointer; |
| 207 * the table and the codes are in the same memory block, with the codes firs
t. |
| 208 */ |
| 209 int32_t reorderCodesCapacity; |
| 210 |
| 211 /** Options for CollationFastLatin. Negative if disabled. */ |
| 212 int32_t fastLatinOptions; |
| 213 uint16_t fastLatinPrimaries[0x180]; |
| 214 }; |
| 215 |
| 216 U_NAMESPACE_END |
| 217 |
| 218 #endif // !UCONFIG_NO_COLLATION |
| 219 #endif // __COLLATIONSETTINGS_H__ |
OLD | NEW |