OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 2013-2014, International Business Machines | 3 * Copyright (C) 2013-2015, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 * collationsettings.cpp | 6 * collationsettings.cpp |
7 * | 7 * |
8 * created on: 2013feb07 | 8 * created on: 2013feb07 |
9 * created by: Markus W. Scherer | 9 * created by: Markus W. Scherer |
10 */ | 10 */ |
11 | 11 |
12 #include "unicode/utypes.h" | 12 #include "unicode/utypes.h" |
13 | 13 |
14 #if !UCONFIG_NO_COLLATION | 14 #if !UCONFIG_NO_COLLATION |
15 | 15 |
16 #include "unicode/ucol.h" | 16 #include "unicode/ucol.h" |
17 #include "cmemory.h" | 17 #include "cmemory.h" |
18 #include "collation.h" | 18 #include "collation.h" |
| 19 #include "collationdata.h" |
19 #include "collationsettings.h" | 20 #include "collationsettings.h" |
20 #include "sharedobject.h" | 21 #include "sharedobject.h" |
21 #include "uassert.h" | 22 #include "uassert.h" |
22 #include "umutex.h" | 23 #include "umutex.h" |
| 24 #include "uvectr32.h" |
23 | 25 |
24 U_NAMESPACE_BEGIN | 26 U_NAMESPACE_BEGIN |
25 | 27 |
26 CollationSettings::CollationSettings(const CollationSettings &other) | 28 CollationSettings::CollationSettings(const CollationSettings &other) |
27 : SharedObject(other), | 29 : SharedObject(other), |
28 options(other.options), variableTop(other.variableTop), | 30 options(other.options), variableTop(other.variableTop), |
29 reorderTable(NULL), | 31 reorderTable(NULL), |
| 32 minHighNoReorder(other.minHighNoReorder), |
| 33 reorderRanges(NULL), reorderRangesLength(0), |
30 reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0), | 34 reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0), |
31 fastLatinOptions(other.fastLatinOptions) { | 35 fastLatinOptions(other.fastLatinOptions) { |
32 int32_t length = other.reorderCodesLength; | 36 UErrorCode errorCode = U_ZERO_ERROR; |
33 if(length == 0) { | 37 copyReorderingFrom(other, errorCode); |
34 U_ASSERT(other.reorderTable == NULL); | |
35 } else { | |
36 U_ASSERT(other.reorderTable != NULL); | |
37 if(other.reorderCodesCapacity == 0) { | |
38 aliasReordering(other.reorderCodes, length, other.reorderTable); | |
39 } else { | |
40 setReordering(other.reorderCodes, length, other.reorderTable); | |
41 } | |
42 } | |
43 if(fastLatinOptions >= 0) { | 38 if(fastLatinOptions >= 0) { |
44 uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLat
inPrimaries)); | 39 uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLat
inPrimaries)); |
45 } | 40 } |
46 } | 41 } |
47 | 42 |
48 CollationSettings::~CollationSettings() { | 43 CollationSettings::~CollationSettings() { |
49 if(reorderCodesCapacity != 0) { | 44 if(reorderCodesCapacity != 0) { |
50 uprv_free(const_cast<int32_t *>(reorderCodes)); | 45 uprv_free(const_cast<int32_t *>(reorderCodes)); |
51 } | 46 } |
52 } | 47 } |
(...skipping 19 matching lines...) Expand all Loading... |
72 } | 67 } |
73 return h; | 68 return h; |
74 } | 69 } |
75 | 70 |
76 void | 71 void |
77 CollationSettings::resetReordering() { | 72 CollationSettings::resetReordering() { |
78 // When we turn off reordering, we want to set a NULL permutation | 73 // When we turn off reordering, we want to set a NULL permutation |
79 // rather than a no-op permutation. | 74 // rather than a no-op permutation. |
80 // Keep the memory via reorderCodes and its capacity. | 75 // Keep the memory via reorderCodes and its capacity. |
81 reorderTable = NULL; | 76 reorderTable = NULL; |
| 77 minHighNoReorder = 0; |
| 78 reorderRangesLength = 0; |
82 reorderCodesLength = 0; | 79 reorderCodesLength = 0; |
83 } | 80 } |
84 | 81 |
85 void | 82 void |
86 CollationSettings::aliasReordering(const int32_t *codes, int32_t length, const u
int8_t *table) { | 83 CollationSettings::aliasReordering(const CollationData &data, const int32_t *cod
es, int32_t length, |
87 if(length == 0) { | 84 const uint32_t *ranges, int32_t rangesLength, |
88 resetReordering(); | 85 const uint8_t *table, UErrorCode &errorCode)
{ |
89 } else { | 86 if(U_FAILURE(errorCode)) { return; } |
| 87 if(table != NULL && |
| 88 (rangesLength == 0 ? |
| 89 !reorderTableHasSplitBytes(table) : |
| 90 rangesLength >= 2 && |
| 91 // The first offset must be 0. The last offset must not be 0
. |
| 92 (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xf
fff) != 0)) { |
90 // We need to release the memory before setting the alias pointer. | 93 // We need to release the memory before setting the alias pointer. |
91 if(reorderCodesCapacity != 0) { | 94 if(reorderCodesCapacity != 0) { |
92 uprv_free(const_cast<int32_t *>(reorderCodes)); | 95 uprv_free(const_cast<int32_t *>(reorderCodes)); |
93 reorderCodesCapacity = 0; | 96 reorderCodesCapacity = 0; |
94 } | 97 } |
95 reorderTable = table; | 98 reorderTable = table; |
96 reorderCodes = codes; | 99 reorderCodes = codes; |
97 reorderCodesLength = length; | 100 reorderCodesLength = length; |
| 101 // Drop ranges before the first split byte. They are reordered by the ta
ble. |
| 102 // This then speeds up reordering of the remaining ranges. |
| 103 int32_t firstSplitByteRangeIndex = 0; |
| 104 while(firstSplitByteRangeIndex < rangesLength && |
| 105 (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) { |
| 106 // The second byte of the primary limit is 0. |
| 107 ++firstSplitByteRangeIndex; |
| 108 } |
| 109 if(firstSplitByteRangeIndex == rangesLength) { |
| 110 U_ASSERT(!reorderTableHasSplitBytes(table)); |
| 111 minHighNoReorder = 0; |
| 112 reorderRanges = NULL; |
| 113 reorderRangesLength = 0; |
| 114 } else { |
| 115 U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0); |
| 116 minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; |
| 117 reorderRanges = ranges + firstSplitByteRangeIndex; |
| 118 reorderRangesLength = rangesLength - firstSplitByteRangeIndex; |
| 119 } |
| 120 return; |
| 121 } |
| 122 // Regenerate missing data. |
| 123 setReordering(data, codes, length, errorCode); |
| 124 } |
| 125 |
| 126 void |
| 127 CollationSettings::setReordering(const CollationData &data, |
| 128 const int32_t *codes, int32_t codesLength, |
| 129 UErrorCode &errorCode) { |
| 130 if(U_FAILURE(errorCode)) { return; } |
| 131 if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NO
NE)) { |
| 132 resetReordering(); |
| 133 return; |
| 134 } |
| 135 UVector32 rangesList(errorCode); |
| 136 data.makeReorderRanges(codes, codesLength, rangesList, errorCode); |
| 137 if(U_FAILURE(errorCode)) { return; } |
| 138 int32_t rangesLength = rangesList.size(); |
| 139 if(rangesLength == 0) { |
| 140 resetReordering(); |
| 141 return; |
| 142 } |
| 143 const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer()
); |
| 144 // ranges[] contains at least two (limit, offset) pairs. |
| 145 // The first offset must be 0. The last offset must not be 0. |
| 146 // Separators (at the low end) and trailing weights (at the high end) |
| 147 // are never reordered. |
| 148 U_ASSERT(rangesLength >= 2); |
| 149 U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) !=
0); |
| 150 minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; |
| 151 |
| 152 // Write the lead byte permutation table. |
| 153 // Set a 0 for each lead byte that has a range boundary in the middle. |
| 154 uint8_t table[256]; |
| 155 int32_t b = 0; |
| 156 int32_t firstSplitByteRangeIndex = -1; |
| 157 for(int32_t i = 0; i < rangesLength; ++i) { |
| 158 uint32_t pair = ranges[i]; |
| 159 int32_t limit1 = (int32_t)(pair >> 24); |
| 160 while(b < limit1) { |
| 161 table[b] = (uint8_t)(b + pair); |
| 162 ++b; |
| 163 } |
| 164 // Check the second byte of the limit. |
| 165 if((pair & 0xff0000) != 0) { |
| 166 table[limit1] = 0; |
| 167 b = limit1 + 1; |
| 168 if(firstSplitByteRangeIndex < 0) { |
| 169 firstSplitByteRangeIndex = i; |
| 170 } |
| 171 } |
| 172 } |
| 173 while(b <= 0xff) { |
| 174 table[b] = (uint8_t)b; |
| 175 ++b; |
| 176 } |
| 177 if(firstSplitByteRangeIndex < 0) { |
| 178 // The lead byte permutation table alone suffices for reordering. |
| 179 rangesLength = 0; |
| 180 } else { |
| 181 // Remove the ranges below the first split byte. |
| 182 ranges += firstSplitByteRangeIndex; |
| 183 rangesLength -= firstSplitByteRangeIndex; |
| 184 } |
| 185 setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode)
; |
| 186 } |
| 187 |
| 188 void |
| 189 CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength, |
| 190 const uint32_t *ranges, int32_t rangesLength
, |
| 191 const uint8_t *table, UErrorCode &errorCode)
{ |
| 192 if(U_FAILURE(errorCode)) { return; } |
| 193 int32_t *ownedCodes; |
| 194 int32_t totalLength = codesLength + rangesLength; |
| 195 U_ASSERT(totalLength > 0); |
| 196 if(totalLength <= reorderCodesCapacity) { |
| 197 ownedCodes = const_cast<int32_t *>(reorderCodes); |
| 198 } else { |
| 199 // Allocate one memory block for the codes, the ranges, and the 16-align
ed table. |
| 200 int32_t capacity = (totalLength + 3) & ~3; // round up to a multiple of
4 ints |
| 201 ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256); |
| 202 if(ownedCodes == NULL) { |
| 203 resetReordering(); |
| 204 errorCode = U_MEMORY_ALLOCATION_ERROR; |
| 205 return; |
| 206 } |
| 207 if(reorderCodesCapacity != 0) { |
| 208 uprv_free(const_cast<int32_t *>(reorderCodes)); |
| 209 } |
| 210 reorderCodes = ownedCodes; |
| 211 reorderCodesCapacity = capacity; |
| 212 } |
| 213 uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256); |
| 214 uprv_memcpy(ownedCodes, codes, codesLength * 4); |
| 215 uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4); |
| 216 reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodes
Capacity); |
| 217 reorderCodesLength = codesLength; |
| 218 reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength; |
| 219 reorderRangesLength = rangesLength; |
| 220 } |
| 221 |
| 222 void |
| 223 CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode
&errorCode) { |
| 224 if(U_FAILURE(errorCode)) { return; } |
| 225 if(!other.hasReordering()) { |
| 226 resetReordering(); |
| 227 return; |
| 228 } |
| 229 minHighNoReorder = other.minHighNoReorder; |
| 230 if(other.reorderCodesCapacity == 0) { |
| 231 // The reorder arrays are aliased to memory-mapped data. |
| 232 reorderTable = other.reorderTable; |
| 233 reorderRanges = other.reorderRanges; |
| 234 reorderRangesLength = other.reorderRangesLength; |
| 235 reorderCodes = other.reorderCodes; |
| 236 reorderCodesLength = other.reorderCodesLength; |
| 237 } else { |
| 238 setReorderArrays(other.reorderCodes, other.reorderCodesLength, |
| 239 other.reorderRanges, other.reorderRangesLength, |
| 240 other.reorderTable, errorCode); |
98 } | 241 } |
99 } | 242 } |
100 | 243 |
101 UBool | 244 UBool |
102 CollationSettings::setReordering(const int32_t *codes, int32_t length, const uin
t8_t table[256]) { | 245 CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) { |
103 if(length == 0) { | 246 U_ASSERT(table[0] == 0); |
104 resetReordering(); | 247 for(int32_t i = 1; i < 256; ++i) { |
105 } else { | 248 if(table[i] == 0) { |
106 uint8_t *ownedTable; | 249 return TRUE; |
107 int32_t *ownedCodes; | |
108 if(length <= reorderCodesCapacity) { | |
109 ownedTable = const_cast<uint8_t *>(reorderTable); | |
110 ownedCodes = const_cast<int32_t *>(reorderCodes); | |
111 } else { | |
112 // Allocate one memory block for the codes and the 16-aligned table. | |
113 int32_t capacity = (length + 3) & ~3; // round up to a multiple of
4 ints | |
114 uint8_t *bytes = (uint8_t *)uprv_malloc(256 + capacity * 4); | |
115 if(bytes == NULL) { return FALSE; } | |
116 if(reorderCodesCapacity != 0) { | |
117 uprv_free(const_cast<int32_t *>(reorderCodes)); | |
118 } | |
119 reorderTable = ownedTable = bytes + capacity * 4; | |
120 reorderCodes = ownedCodes = (int32_t *)bytes; | |
121 reorderCodesCapacity = capacity; | |
122 } | 250 } |
123 uprv_memcpy(ownedTable, table, 256); | |
124 uprv_memcpy(ownedCodes, codes, length * 4); | |
125 reorderCodesLength = length; | |
126 } | 251 } |
127 return TRUE; | 252 return FALSE; |
| 253 } |
| 254 |
| 255 uint32_t |
| 256 CollationSettings::reorderEx(uint32_t p) const { |
| 257 if(p >= minHighNoReorder) { return p; } |
| 258 // Round up p so that its lower 16 bits are >= any offset bits. |
| 259 // Then compare q directly with (limit, offset) pairs. |
| 260 uint32_t q = p | 0xffff; |
| 261 uint32_t r; |
| 262 const uint32_t *ranges = reorderRanges; |
| 263 while(q >= (r = *ranges)) { ++ranges; } |
| 264 return p + (r << 24); |
128 } | 265 } |
129 | 266 |
130 void | 267 void |
131 CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode
&errorCode) { | 268 CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode
&errorCode) { |
132 if(U_FAILURE(errorCode)) { return; } | 269 if(U_FAILURE(errorCode)) { return; } |
133 int32_t noStrength = options & ~STRENGTH_MASK; | 270 int32_t noStrength = options & ~STRENGTH_MASK; |
134 switch(value) { | 271 switch(value) { |
135 case UCOL_PRIMARY: | 272 case UCOL_PRIMARY: |
136 case UCOL_SECONDARY: | 273 case UCOL_SECONDARY: |
137 case UCOL_TERTIARY: | 274 case UCOL_TERTIARY: |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
229 break; | 366 break; |
230 default: | 367 default: |
231 errorCode = U_ILLEGAL_ARGUMENT_ERROR; | 368 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
232 break; | 369 break; |
233 } | 370 } |
234 } | 371 } |
235 | 372 |
236 U_NAMESPACE_END | 373 U_NAMESPACE_END |
237 | 374 |
238 #endif // !UCONFIG_NO_COLLATION | 375 #endif // !UCONFIG_NO_COLLATION |
OLD | NEW |