OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 1996-2014, International Business Machines | 3 * Copyright (C) 1996-2015, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 * collationcompare.cpp | 6 * collationcompare.cpp |
7 * | 7 * |
8 * created on: 2012feb14 with new and old collation code | 8 * created on: 2012feb14 with new and old collation code |
9 * created by: Markus W. Scherer | 9 * created by: Markus W. Scherer |
10 */ | 10 */ |
11 | 11 |
12 #include "unicode/utypes.h" | 12 #include "unicode/utypes.h" |
13 | 13 |
(...skipping 19 matching lines...) Expand all Loading... |
33 uint32_t variableTop; | 33 uint32_t variableTop; |
34 if((options & CollationSettings::ALTERNATE_MASK) == 0) { | 34 if((options & CollationSettings::ALTERNATE_MASK) == 0) { |
35 variableTop = 0; | 35 variableTop = 0; |
36 } else { | 36 } else { |
37 // +1 so that we can use "<" and primary ignorables test out early. | 37 // +1 so that we can use "<" and primary ignorables test out early. |
38 variableTop = settings.variableTop + 1; | 38 variableTop = settings.variableTop + 1; |
39 } | 39 } |
40 UBool anyVariable = FALSE; | 40 UBool anyVariable = FALSE; |
41 | 41 |
42 // Fetch CEs, compare primaries, store secondary & tertiary weights. | 42 // Fetch CEs, compare primaries, store secondary & tertiary weights. |
43 U_ALIGN_CODE(16); | |
44 for(;;) { | 43 for(;;) { |
45 // We fetch CEs until we get a non-ignorable primary or reach the end. | 44 // We fetch CEs until we get a non-ignorable primary or reach the end. |
46 uint32_t leftPrimary; | 45 uint32_t leftPrimary; |
47 do { | 46 do { |
48 int64_t ce = left.nextCE(errorCode); | 47 int64_t ce = left.nextCE(errorCode); |
49 leftPrimary = (uint32_t)(ce >> 32); | 48 leftPrimary = (uint32_t)(ce >> 32); |
50 if(leftPrimary < variableTop && leftPrimary > Collation::MERGE_SEPAR
ATOR_PRIMARY) { | 49 if(leftPrimary < variableTop && leftPrimary > Collation::MERGE_SEPAR
ATOR_PRIMARY) { |
51 // Variable CE, shift it to quaternary level. | 50 // Variable CE, shift it to quaternary level. |
52 // Ignore all following primary ignorables, and shift further va
riable CEs. | 51 // Ignore all following primary ignorables, and shift further va
riable CEs. |
53 anyVariable = TRUE; | 52 anyVariable = TRUE; |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
88 break; | 87 break; |
89 } | 88 } |
90 } | 89 } |
91 } while(rightPrimary < variableTop && | 90 } while(rightPrimary < variableTop && |
92 rightPrimary > Collation::MERGE_SEPARATOR_PRIMARY); | 91 rightPrimary > Collation::MERGE_SEPARATOR_PRIMARY); |
93 } | 92 } |
94 } while(rightPrimary == 0); | 93 } while(rightPrimary == 0); |
95 | 94 |
96 if(leftPrimary != rightPrimary) { | 95 if(leftPrimary != rightPrimary) { |
97 // Return the primary difference, with script reordering. | 96 // Return the primary difference, with script reordering. |
98 const uint8_t *reorderTable = settings.reorderTable; | 97 if(settings.hasReordering()) { |
99 if (reorderTable != NULL) { | 98 leftPrimary = settings.reorder(leftPrimary); |
100 leftPrimary = Collation::reorder(reorderTable, leftPrimary); | 99 rightPrimary = settings.reorder(rightPrimary); |
101 rightPrimary = Collation::reorder(reorderTable, rightPrimary); | |
102 } | 100 } |
103 return (leftPrimary < rightPrimary) ? UCOL_LESS : UCOL_GREATER; | 101 return (leftPrimary < rightPrimary) ? UCOL_LESS : UCOL_GREATER; |
104 } | 102 } |
105 if(leftPrimary == Collation::NO_CE_PRIMARY) { break; } | 103 if(leftPrimary == Collation::NO_CE_PRIMARY) { break; } |
106 } | 104 } |
107 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; } | 105 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; } |
108 | 106 |
109 // Compare the buffered secondary & tertiary weights. | 107 // Compare the buffered secondary & tertiary weights. |
110 // We might skip the secondary level but continue with the case level | 108 // We might skip the secondary level but continue with the case level |
111 // which is turned on separately. | 109 // which is turned on separately. |
(...skipping 17 matching lines...) Expand all Loading... |
129 } | 127 } |
130 if(leftSecondary == Collation::NO_CE_WEIGHT16) { break; } | 128 if(leftSecondary == Collation::NO_CE_WEIGHT16) { break; } |
131 } | 129 } |
132 } else { | 130 } else { |
133 // The backwards secondary level compares secondary weights backward
s | 131 // The backwards secondary level compares secondary weights backward
s |
134 // within segments separated by the merge separator (U+FFFE, weight
02). | 132 // within segments separated by the merge separator (U+FFFE, weight
02). |
135 int32_t leftStart = 0; | 133 int32_t leftStart = 0; |
136 int32_t rightStart = 0; | 134 int32_t rightStart = 0; |
137 for(;;) { | 135 for(;;) { |
138 // Find the merge separator or the NO_CE terminator. | 136 // Find the merge separator or the NO_CE terminator. |
| 137 uint32_t p; |
139 int32_t leftLimit = leftStart; | 138 int32_t leftLimit = leftStart; |
140 uint32_t leftLower32; | 139 while((p = (uint32_t)(left.getCE(leftLimit) >> 32)) > |
141 while((leftLower32 = (uint32_t)left.getCE(leftLimit)) > | 140 Collation::MERGE_SEPARATOR_PRIMARY || |
142 Collation::MERGE_SEPARATOR_LOWER32 || | 141 p == 0) { |
143 leftLower32 == 0) { | |
144 ++leftLimit; | 142 ++leftLimit; |
145 } | 143 } |
146 int32_t rightLimit = rightStart; | 144 int32_t rightLimit = rightStart; |
147 uint32_t rightLower32; | 145 while((p = (uint32_t)(right.getCE(rightLimit) >> 32)) > |
148 while((rightLower32 = (uint32_t)right.getCE(rightLimit)) > | 146 Collation::MERGE_SEPARATOR_PRIMARY || |
149 Collation::MERGE_SEPARATOR_LOWER32 || | 147 p == 0) { |
150 rightLower32 == 0) { | |
151 ++rightLimit; | 148 ++rightLimit; |
152 } | 149 } |
153 | 150 |
154 // Compare the segments. | 151 // Compare the segments. |
155 int32_t leftIndex = leftLimit; | 152 int32_t leftIndex = leftLimit; |
156 int32_t rightIndex = rightLimit; | 153 int32_t rightIndex = rightLimit; |
157 for(;;) { | 154 for(;;) { |
158 int32_t leftSecondary = 0; | 155 int32_t leftSecondary = 0; |
159 while(leftSecondary == 0 && leftIndex > leftStart) { | 156 while(leftSecondary == 0 && leftIndex > leftStart) { |
160 leftSecondary = ((uint32_t)left.getCE(--leftIndex)) >> 1
6; | 157 leftSecondary = ((uint32_t)left.getCE(--leftIndex)) >> 1
6; |
161 } | 158 } |
162 | 159 |
163 int32_t rightSecondary = 0; | 160 int32_t rightSecondary = 0; |
164 while(rightSecondary == 0 && rightIndex > rightStart) { | 161 while(rightSecondary == 0 && rightIndex > rightStart) { |
165 rightSecondary = ((uint32_t)right.getCE(--rightIndex)) >
> 16; | 162 rightSecondary = ((uint32_t)right.getCE(--rightIndex)) >
> 16; |
166 } | 163 } |
167 | 164 |
168 if(leftSecondary != rightSecondary) { | 165 if(leftSecondary != rightSecondary) { |
169 return (leftSecondary < rightSecondary) ? UCOL_LESS : UC
OL_GREATER; | 166 return (leftSecondary < rightSecondary) ? UCOL_LESS : UC
OL_GREATER; |
170 } | 167 } |
171 if(leftSecondary == 0) { break; } | 168 if(leftSecondary == 0) { break; } |
172 } | 169 } |
173 | 170 |
174 // Did we reach the end of either string? | 171 // Did we reach the end of either string? |
175 // Both strings have the same number of merge separators, | 172 // Both strings have the same number of merge separators, |
176 // or else there would have been a primary-level difference. | 173 // or else there would have been a primary-level difference. |
177 U_ASSERT(left.getCE(leftLimit) == right.getCE(rightLimit)); | 174 U_ASSERT(left.getCE(leftLimit) == right.getCE(rightLimit)); |
178 if(left.getCE(leftLimit) == Collation::NO_CE) { break; } | 175 if(p == Collation::NO_CE_PRIMARY) { break; } |
179 // Skip both merge separators and continue. | 176 // Skip both merge separators and continue. |
180 leftStart = leftLimit + 1; | 177 leftStart = leftLimit + 1; |
181 rightStart = rightLimit + 1; | 178 rightStart = rightLimit + 1; |
182 } | 179 } |
183 } | 180 } |
184 } | 181 } |
185 | 182 |
186 if((options & CollationSettings::CASE_LEVEL) != 0) { | 183 if((options & CollationSettings::CASE_LEVEL) != 0) { |
187 int32_t strength = CollationSettings::getStrength(options); | 184 int32_t strength = CollationSettings::getStrength(options); |
188 int32_t leftIndex = 0; | 185 int32_t leftIndex = 0; |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
269 do { | 266 do { |
270 rightLower32 = (uint32_t)right.getCE(rightIndex++); | 267 rightLower32 = (uint32_t)right.getCE(rightIndex++); |
271 anyQuaternaries |= rightLower32; | 268 anyQuaternaries |= rightLower32; |
272 U_ASSERT((rightLower32 & Collation::ONLY_TERTIARY_MASK) != 0 || | 269 U_ASSERT((rightLower32 & Collation::ONLY_TERTIARY_MASK) != 0 || |
273 (rightLower32 & 0xc0c0) == 0); | 270 (rightLower32 & 0xc0c0) == 0); |
274 rightTertiary = rightLower32 & tertiaryMask; | 271 rightTertiary = rightLower32 & tertiaryMask; |
275 } while(rightTertiary == 0); | 272 } while(rightTertiary == 0); |
276 | 273 |
277 if(leftTertiary != rightTertiary) { | 274 if(leftTertiary != rightTertiary) { |
278 if(CollationSettings::sortsTertiaryUpperCaseFirst(options)) { | 275 if(CollationSettings::sortsTertiaryUpperCaseFirst(options)) { |
279 // Pass through NO_CE and MERGE_SEPARATOR | 276 // Pass through NO_CE and keep real tertiary weights larger than
that. |
280 // and keep real tertiary weights larger than the MERGE_SEPARATO
R. | |
281 // Do not change the artificial uppercase weight of a tertiary C
E (0.0.ut), | 277 // Do not change the artificial uppercase weight of a tertiary C
E (0.0.ut), |
282 // to keep tertiary CEs well-formed. | 278 // to keep tertiary CEs well-formed. |
283 // Their case+tertiary weights must be greater than those of | 279 // Their case+tertiary weights must be greater than those of |
284 // primary and secondary CEs. | 280 // primary and secondary CEs. |
285 if(leftTertiary > Collation::MERGE_SEPARATOR_WEIGHT16) { | 281 if(leftTertiary > Collation::NO_CE_WEIGHT16) { |
286 if(leftLower32 > 0xffff) { | 282 if(leftLower32 > 0xffff) { |
287 leftTertiary ^= 0xc000; | 283 leftTertiary ^= 0xc000; |
288 } else { | 284 } else { |
289 leftTertiary += 0x4000; | 285 leftTertiary += 0x4000; |
290 } | 286 } |
291 } | 287 } |
292 if(rightTertiary > Collation::MERGE_SEPARATOR_WEIGHT16) { | 288 if(rightTertiary > Collation::NO_CE_WEIGHT16) { |
293 if(rightLower32 > 0xffff) { | 289 if(rightLower32 > 0xffff) { |
294 rightTertiary ^= 0xc000; | 290 rightTertiary ^= 0xc000; |
295 } else { | 291 } else { |
296 rightTertiary += 0x4000; | 292 rightTertiary += 0x4000; |
297 } | 293 } |
298 } | 294 } |
299 } | 295 } |
300 return (leftTertiary < rightTertiary) ? UCOL_LESS : UCOL_GREATER; | 296 return (leftTertiary < rightTertiary) ? UCOL_LESS : UCOL_GREATER; |
301 } | 297 } |
302 if(leftTertiary == Collation::NO_CE_WEIGHT16) { break; } | 298 if(leftTertiary == Collation::NO_CE_WEIGHT16) { break; } |
303 } | 299 } |
304 if(CollationSettings::getStrength(options) <= UCOL_TERTIARY) { return UCOL_E
QUAL; } | 300 if(CollationSettings::getStrength(options) <= UCOL_TERTIARY) { return UCOL_E
QUAL; } |
305 | 301 |
306 if(!anyVariable && (anyQuaternaries & 0xc0) == 0) { | 302 if(!anyVariable && (anyQuaternaries & 0xc0) == 0) { |
307 // If there are no "variable" CEs and no non-zero quaternary weights, | 303 // If there are no "variable" CEs and no non-zero quaternary weights, |
308 // then there are no quaternary differences. | 304 // then there are no quaternary differences. |
309 return UCOL_EQUAL; | 305 return UCOL_EQUAL; |
310 } | 306 } |
311 | 307 |
312 leftIndex = 0; | 308 leftIndex = 0; |
313 rightIndex = 0; | 309 rightIndex = 0; |
314 for(;;) { | 310 for(;;) { |
315 uint32_t leftQuaternary; | 311 uint32_t leftQuaternary; |
316 do { | 312 do { |
317 int64_t ce = left.getCE(leftIndex++); | 313 int64_t ce = left.getCE(leftIndex++); |
318 leftQuaternary = (uint32_t)ce & 0xffff; | 314 leftQuaternary = (uint32_t)ce & 0xffff; |
319 if(leftQuaternary == 0) { | 315 if(leftQuaternary <= Collation::NO_CE_WEIGHT16) { |
320 // Variable primary or completely ignorable. | 316 // Variable primary or completely ignorable or NO_CE. |
321 leftQuaternary = (uint32_t)(ce >> 32); | 317 leftQuaternary = (uint32_t)(ce >> 32); |
322 } else if(leftQuaternary <= Collation::MERGE_SEPARATOR_WEIGHT16) { | |
323 // Leave NO_CE or MERGE_SEPARATOR as is. | |
324 } else { | 318 } else { |
325 // Regular CE, not tertiary ignorable. | 319 // Regular CE, not tertiary ignorable. |
326 // Preserve the quaternary weight in bits 7..6. | 320 // Preserve the quaternary weight in bits 7..6. |
327 leftQuaternary |= 0xffffff3f; | 321 leftQuaternary |= 0xffffff3f; |
328 } | 322 } |
329 } while(leftQuaternary == 0); | 323 } while(leftQuaternary == 0); |
330 | 324 |
331 uint32_t rightQuaternary; | 325 uint32_t rightQuaternary; |
332 do { | 326 do { |
333 int64_t ce = right.getCE(rightIndex++); | 327 int64_t ce = right.getCE(rightIndex++); |
334 rightQuaternary = (uint32_t)ce & 0xffff; | 328 rightQuaternary = (uint32_t)ce & 0xffff; |
335 if(rightQuaternary == 0) { | 329 if(rightQuaternary <= Collation::NO_CE_WEIGHT16) { |
336 // Variable primary or completely ignorable. | 330 // Variable primary or completely ignorable or NO_CE. |
337 rightQuaternary = (uint32_t)(ce >> 32); | 331 rightQuaternary = (uint32_t)(ce >> 32); |
338 } else if(rightQuaternary <= Collation::MERGE_SEPARATOR_WEIGHT16) { | |
339 // Leave NO_CE or MERGE_SEPARATOR as is. | |
340 } else { | 332 } else { |
341 // Regular CE, not tertiary ignorable. | 333 // Regular CE, not tertiary ignorable. |
342 // Preserve the quaternary weight in bits 7..6. | 334 // Preserve the quaternary weight in bits 7..6. |
343 rightQuaternary |= 0xffffff3f; | 335 rightQuaternary |= 0xffffff3f; |
344 } | 336 } |
345 } while(rightQuaternary == 0); | 337 } while(rightQuaternary == 0); |
346 | 338 |
347 if(leftQuaternary != rightQuaternary) { | 339 if(leftQuaternary != rightQuaternary) { |
348 // Return the difference, with script reordering. | 340 // Return the difference, with script reordering. |
349 const uint8_t *reorderTable = settings.reorderTable; | 341 if(settings.hasReordering()) { |
350 if (reorderTable != NULL) { | 342 leftQuaternary = settings.reorder(leftQuaternary); |
351 leftQuaternary = Collation::reorder(reorderTable, leftQuaternary
); | 343 rightQuaternary = settings.reorder(rightQuaternary); |
352 rightQuaternary = Collation::reorder(reorderTable, rightQuaterna
ry); | |
353 } | 344 } |
354 return (leftQuaternary < rightQuaternary) ? UCOL_LESS : UCOL_GREATER
; | 345 return (leftQuaternary < rightQuaternary) ? UCOL_LESS : UCOL_GREATER
; |
355 } | 346 } |
356 if(leftQuaternary == Collation::NO_CE_WEIGHT16) { break; } | 347 if(leftQuaternary == Collation::NO_CE_PRIMARY) { break; } |
357 } | 348 } |
358 return UCOL_EQUAL; | 349 return UCOL_EQUAL; |
359 } | 350 } |
360 | 351 |
361 U_NAMESPACE_END | 352 U_NAMESPACE_END |
362 | 353 |
363 #endif // !UCONFIG_NO_COLLATION | 354 #endif // !UCONFIG_NO_COLLATION |
OLD | NEW |