Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(262)

Side by Side Diff: source/i18n/collationcompare.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/i18n/collationbuilder.cpp ('k') | source/i18n/collationdata.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ******************************************************************************* 2 *******************************************************************************
3 * Copyright (C) 1996-2014, International Business Machines 3 * Copyright (C) 1996-2015, International Business Machines
4 * Corporation and others. All Rights Reserved. 4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************* 5 *******************************************************************************
6 * collationcompare.cpp 6 * collationcompare.cpp
7 * 7 *
8 * created on: 2012feb14 with new and old collation code 8 * created on: 2012feb14 with new and old collation code
9 * created by: Markus W. Scherer 9 * created by: Markus W. Scherer
10 */ 10 */
11 11
12 #include "unicode/utypes.h" 12 #include "unicode/utypes.h"
13 13
(...skipping 19 matching lines...) Expand all
33 uint32_t variableTop; 33 uint32_t variableTop;
34 if((options & CollationSettings::ALTERNATE_MASK) == 0) { 34 if((options & CollationSettings::ALTERNATE_MASK) == 0) {
35 variableTop = 0; 35 variableTop = 0;
36 } else { 36 } else {
37 // +1 so that we can use "<" and primary ignorables test out early. 37 // +1 so that we can use "<" and primary ignorables test out early.
38 variableTop = settings.variableTop + 1; 38 variableTop = settings.variableTop + 1;
39 } 39 }
40 UBool anyVariable = FALSE; 40 UBool anyVariable = FALSE;
41 41
42 // Fetch CEs, compare primaries, store secondary & tertiary weights. 42 // Fetch CEs, compare primaries, store secondary & tertiary weights.
43 U_ALIGN_CODE(16);
44 for(;;) { 43 for(;;) {
45 // We fetch CEs until we get a non-ignorable primary or reach the end. 44 // We fetch CEs until we get a non-ignorable primary or reach the end.
46 uint32_t leftPrimary; 45 uint32_t leftPrimary;
47 do { 46 do {
48 int64_t ce = left.nextCE(errorCode); 47 int64_t ce = left.nextCE(errorCode);
49 leftPrimary = (uint32_t)(ce >> 32); 48 leftPrimary = (uint32_t)(ce >> 32);
50 if(leftPrimary < variableTop && leftPrimary > Collation::MERGE_SEPAR ATOR_PRIMARY) { 49 if(leftPrimary < variableTop && leftPrimary > Collation::MERGE_SEPAR ATOR_PRIMARY) {
51 // Variable CE, shift it to quaternary level. 50 // Variable CE, shift it to quaternary level.
52 // Ignore all following primary ignorables, and shift further va riable CEs. 51 // Ignore all following primary ignorables, and shift further va riable CEs.
53 anyVariable = TRUE; 52 anyVariable = TRUE;
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
88 break; 87 break;
89 } 88 }
90 } 89 }
91 } while(rightPrimary < variableTop && 90 } while(rightPrimary < variableTop &&
92 rightPrimary > Collation::MERGE_SEPARATOR_PRIMARY); 91 rightPrimary > Collation::MERGE_SEPARATOR_PRIMARY);
93 } 92 }
94 } while(rightPrimary == 0); 93 } while(rightPrimary == 0);
95 94
96 if(leftPrimary != rightPrimary) { 95 if(leftPrimary != rightPrimary) {
97 // Return the primary difference, with script reordering. 96 // Return the primary difference, with script reordering.
98 const uint8_t *reorderTable = settings.reorderTable; 97 if(settings.hasReordering()) {
99 if (reorderTable != NULL) { 98 leftPrimary = settings.reorder(leftPrimary);
100 leftPrimary = Collation::reorder(reorderTable, leftPrimary); 99 rightPrimary = settings.reorder(rightPrimary);
101 rightPrimary = Collation::reorder(reorderTable, rightPrimary);
102 } 100 }
103 return (leftPrimary < rightPrimary) ? UCOL_LESS : UCOL_GREATER; 101 return (leftPrimary < rightPrimary) ? UCOL_LESS : UCOL_GREATER;
104 } 102 }
105 if(leftPrimary == Collation::NO_CE_PRIMARY) { break; } 103 if(leftPrimary == Collation::NO_CE_PRIMARY) { break; }
106 } 104 }
107 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; } 105 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
108 106
109 // Compare the buffered secondary & tertiary weights. 107 // Compare the buffered secondary & tertiary weights.
110 // We might skip the secondary level but continue with the case level 108 // We might skip the secondary level but continue with the case level
111 // which is turned on separately. 109 // which is turned on separately.
(...skipping 17 matching lines...) Expand all
129 } 127 }
130 if(leftSecondary == Collation::NO_CE_WEIGHT16) { break; } 128 if(leftSecondary == Collation::NO_CE_WEIGHT16) { break; }
131 } 129 }
132 } else { 130 } else {
133 // The backwards secondary level compares secondary weights backward s 131 // The backwards secondary level compares secondary weights backward s
134 // within segments separated by the merge separator (U+FFFE, weight 02). 132 // within segments separated by the merge separator (U+FFFE, weight 02).
135 int32_t leftStart = 0; 133 int32_t leftStart = 0;
136 int32_t rightStart = 0; 134 int32_t rightStart = 0;
137 for(;;) { 135 for(;;) {
138 // Find the merge separator or the NO_CE terminator. 136 // Find the merge separator or the NO_CE terminator.
137 uint32_t p;
139 int32_t leftLimit = leftStart; 138 int32_t leftLimit = leftStart;
140 uint32_t leftLower32; 139 while((p = (uint32_t)(left.getCE(leftLimit) >> 32)) >
141 while((leftLower32 = (uint32_t)left.getCE(leftLimit)) > 140 Collation::MERGE_SEPARATOR_PRIMARY ||
142 Collation::MERGE_SEPARATOR_LOWER32 || 141 p == 0) {
143 leftLower32 == 0) {
144 ++leftLimit; 142 ++leftLimit;
145 } 143 }
146 int32_t rightLimit = rightStart; 144 int32_t rightLimit = rightStart;
147 uint32_t rightLower32; 145 while((p = (uint32_t)(right.getCE(rightLimit) >> 32)) >
148 while((rightLower32 = (uint32_t)right.getCE(rightLimit)) > 146 Collation::MERGE_SEPARATOR_PRIMARY ||
149 Collation::MERGE_SEPARATOR_LOWER32 || 147 p == 0) {
150 rightLower32 == 0) {
151 ++rightLimit; 148 ++rightLimit;
152 } 149 }
153 150
154 // Compare the segments. 151 // Compare the segments.
155 int32_t leftIndex = leftLimit; 152 int32_t leftIndex = leftLimit;
156 int32_t rightIndex = rightLimit; 153 int32_t rightIndex = rightLimit;
157 for(;;) { 154 for(;;) {
158 int32_t leftSecondary = 0; 155 int32_t leftSecondary = 0;
159 while(leftSecondary == 0 && leftIndex > leftStart) { 156 while(leftSecondary == 0 && leftIndex > leftStart) {
160 leftSecondary = ((uint32_t)left.getCE(--leftIndex)) >> 1 6; 157 leftSecondary = ((uint32_t)left.getCE(--leftIndex)) >> 1 6;
161 } 158 }
162 159
163 int32_t rightSecondary = 0; 160 int32_t rightSecondary = 0;
164 while(rightSecondary == 0 && rightIndex > rightStart) { 161 while(rightSecondary == 0 && rightIndex > rightStart) {
165 rightSecondary = ((uint32_t)right.getCE(--rightIndex)) > > 16; 162 rightSecondary = ((uint32_t)right.getCE(--rightIndex)) > > 16;
166 } 163 }
167 164
168 if(leftSecondary != rightSecondary) { 165 if(leftSecondary != rightSecondary) {
169 return (leftSecondary < rightSecondary) ? UCOL_LESS : UC OL_GREATER; 166 return (leftSecondary < rightSecondary) ? UCOL_LESS : UC OL_GREATER;
170 } 167 }
171 if(leftSecondary == 0) { break; } 168 if(leftSecondary == 0) { break; }
172 } 169 }
173 170
174 // Did we reach the end of either string? 171 // Did we reach the end of either string?
175 // Both strings have the same number of merge separators, 172 // Both strings have the same number of merge separators,
176 // or else there would have been a primary-level difference. 173 // or else there would have been a primary-level difference.
177 U_ASSERT(left.getCE(leftLimit) == right.getCE(rightLimit)); 174 U_ASSERT(left.getCE(leftLimit) == right.getCE(rightLimit));
178 if(left.getCE(leftLimit) == Collation::NO_CE) { break; } 175 if(p == Collation::NO_CE_PRIMARY) { break; }
179 // Skip both merge separators and continue. 176 // Skip both merge separators and continue.
180 leftStart = leftLimit + 1; 177 leftStart = leftLimit + 1;
181 rightStart = rightLimit + 1; 178 rightStart = rightLimit + 1;
182 } 179 }
183 } 180 }
184 } 181 }
185 182
186 if((options & CollationSettings::CASE_LEVEL) != 0) { 183 if((options & CollationSettings::CASE_LEVEL) != 0) {
187 int32_t strength = CollationSettings::getStrength(options); 184 int32_t strength = CollationSettings::getStrength(options);
188 int32_t leftIndex = 0; 185 int32_t leftIndex = 0;
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
269 do { 266 do {
270 rightLower32 = (uint32_t)right.getCE(rightIndex++); 267 rightLower32 = (uint32_t)right.getCE(rightIndex++);
271 anyQuaternaries |= rightLower32; 268 anyQuaternaries |= rightLower32;
272 U_ASSERT((rightLower32 & Collation::ONLY_TERTIARY_MASK) != 0 || 269 U_ASSERT((rightLower32 & Collation::ONLY_TERTIARY_MASK) != 0 ||
273 (rightLower32 & 0xc0c0) == 0); 270 (rightLower32 & 0xc0c0) == 0);
274 rightTertiary = rightLower32 & tertiaryMask; 271 rightTertiary = rightLower32 & tertiaryMask;
275 } while(rightTertiary == 0); 272 } while(rightTertiary == 0);
276 273
277 if(leftTertiary != rightTertiary) { 274 if(leftTertiary != rightTertiary) {
278 if(CollationSettings::sortsTertiaryUpperCaseFirst(options)) { 275 if(CollationSettings::sortsTertiaryUpperCaseFirst(options)) {
279 // Pass through NO_CE and MERGE_SEPARATOR 276 // Pass through NO_CE and keep real tertiary weights larger than that.
280 // and keep real tertiary weights larger than the MERGE_SEPARATO R.
281 // Do not change the artificial uppercase weight of a tertiary C E (0.0.ut), 277 // Do not change the artificial uppercase weight of a tertiary C E (0.0.ut),
282 // to keep tertiary CEs well-formed. 278 // to keep tertiary CEs well-formed.
283 // Their case+tertiary weights must be greater than those of 279 // Their case+tertiary weights must be greater than those of
284 // primary and secondary CEs. 280 // primary and secondary CEs.
285 if(leftTertiary > Collation::MERGE_SEPARATOR_WEIGHT16) { 281 if(leftTertiary > Collation::NO_CE_WEIGHT16) {
286 if(leftLower32 > 0xffff) { 282 if(leftLower32 > 0xffff) {
287 leftTertiary ^= 0xc000; 283 leftTertiary ^= 0xc000;
288 } else { 284 } else {
289 leftTertiary += 0x4000; 285 leftTertiary += 0x4000;
290 } 286 }
291 } 287 }
292 if(rightTertiary > Collation::MERGE_SEPARATOR_WEIGHT16) { 288 if(rightTertiary > Collation::NO_CE_WEIGHT16) {
293 if(rightLower32 > 0xffff) { 289 if(rightLower32 > 0xffff) {
294 rightTertiary ^= 0xc000; 290 rightTertiary ^= 0xc000;
295 } else { 291 } else {
296 rightTertiary += 0x4000; 292 rightTertiary += 0x4000;
297 } 293 }
298 } 294 }
299 } 295 }
300 return (leftTertiary < rightTertiary) ? UCOL_LESS : UCOL_GREATER; 296 return (leftTertiary < rightTertiary) ? UCOL_LESS : UCOL_GREATER;
301 } 297 }
302 if(leftTertiary == Collation::NO_CE_WEIGHT16) { break; } 298 if(leftTertiary == Collation::NO_CE_WEIGHT16) { break; }
303 } 299 }
304 if(CollationSettings::getStrength(options) <= UCOL_TERTIARY) { return UCOL_E QUAL; } 300 if(CollationSettings::getStrength(options) <= UCOL_TERTIARY) { return UCOL_E QUAL; }
305 301
306 if(!anyVariable && (anyQuaternaries & 0xc0) == 0) { 302 if(!anyVariable && (anyQuaternaries & 0xc0) == 0) {
307 // If there are no "variable" CEs and no non-zero quaternary weights, 303 // If there are no "variable" CEs and no non-zero quaternary weights,
308 // then there are no quaternary differences. 304 // then there are no quaternary differences.
309 return UCOL_EQUAL; 305 return UCOL_EQUAL;
310 } 306 }
311 307
312 leftIndex = 0; 308 leftIndex = 0;
313 rightIndex = 0; 309 rightIndex = 0;
314 for(;;) { 310 for(;;) {
315 uint32_t leftQuaternary; 311 uint32_t leftQuaternary;
316 do { 312 do {
317 int64_t ce = left.getCE(leftIndex++); 313 int64_t ce = left.getCE(leftIndex++);
318 leftQuaternary = (uint32_t)ce & 0xffff; 314 leftQuaternary = (uint32_t)ce & 0xffff;
319 if(leftQuaternary == 0) { 315 if(leftQuaternary <= Collation::NO_CE_WEIGHT16) {
320 // Variable primary or completely ignorable. 316 // Variable primary or completely ignorable or NO_CE.
321 leftQuaternary = (uint32_t)(ce >> 32); 317 leftQuaternary = (uint32_t)(ce >> 32);
322 } else if(leftQuaternary <= Collation::MERGE_SEPARATOR_WEIGHT16) {
323 // Leave NO_CE or MERGE_SEPARATOR as is.
324 } else { 318 } else {
325 // Regular CE, not tertiary ignorable. 319 // Regular CE, not tertiary ignorable.
326 // Preserve the quaternary weight in bits 7..6. 320 // Preserve the quaternary weight in bits 7..6.
327 leftQuaternary |= 0xffffff3f; 321 leftQuaternary |= 0xffffff3f;
328 } 322 }
329 } while(leftQuaternary == 0); 323 } while(leftQuaternary == 0);
330 324
331 uint32_t rightQuaternary; 325 uint32_t rightQuaternary;
332 do { 326 do {
333 int64_t ce = right.getCE(rightIndex++); 327 int64_t ce = right.getCE(rightIndex++);
334 rightQuaternary = (uint32_t)ce & 0xffff; 328 rightQuaternary = (uint32_t)ce & 0xffff;
335 if(rightQuaternary == 0) { 329 if(rightQuaternary <= Collation::NO_CE_WEIGHT16) {
336 // Variable primary or completely ignorable. 330 // Variable primary or completely ignorable or NO_CE.
337 rightQuaternary = (uint32_t)(ce >> 32); 331 rightQuaternary = (uint32_t)(ce >> 32);
338 } else if(rightQuaternary <= Collation::MERGE_SEPARATOR_WEIGHT16) {
339 // Leave NO_CE or MERGE_SEPARATOR as is.
340 } else { 332 } else {
341 // Regular CE, not tertiary ignorable. 333 // Regular CE, not tertiary ignorable.
342 // Preserve the quaternary weight in bits 7..6. 334 // Preserve the quaternary weight in bits 7..6.
343 rightQuaternary |= 0xffffff3f; 335 rightQuaternary |= 0xffffff3f;
344 } 336 }
345 } while(rightQuaternary == 0); 337 } while(rightQuaternary == 0);
346 338
347 if(leftQuaternary != rightQuaternary) { 339 if(leftQuaternary != rightQuaternary) {
348 // Return the difference, with script reordering. 340 // Return the difference, with script reordering.
349 const uint8_t *reorderTable = settings.reorderTable; 341 if(settings.hasReordering()) {
350 if (reorderTable != NULL) { 342 leftQuaternary = settings.reorder(leftQuaternary);
351 leftQuaternary = Collation::reorder(reorderTable, leftQuaternary ); 343 rightQuaternary = settings.reorder(rightQuaternary);
352 rightQuaternary = Collation::reorder(reorderTable, rightQuaterna ry);
353 } 344 }
354 return (leftQuaternary < rightQuaternary) ? UCOL_LESS : UCOL_GREATER ; 345 return (leftQuaternary < rightQuaternary) ? UCOL_LESS : UCOL_GREATER ;
355 } 346 }
356 if(leftQuaternary == Collation::NO_CE_WEIGHT16) { break; } 347 if(leftQuaternary == Collation::NO_CE_PRIMARY) { break; }
357 } 348 }
358 return UCOL_EQUAL; 349 return UCOL_EQUAL;
359 } 350 }
360 351
361 U_NAMESPACE_END 352 U_NAMESPACE_END
362 353
363 #endif // !UCONFIG_NO_COLLATION 354 #endif // !UCONFIG_NO_COLLATION
OLDNEW
« no previous file with comments | « source/i18n/collationbuilder.cpp ('k') | source/i18n/collationdata.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698