Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(127)

Side by Side Diff: source/i18n/collationfastlatinbuilder.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/i18n/collationfastlatinbuilder.h ('k') | source/i18n/collationfcd.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ******************************************************************************* 2 *******************************************************************************
3 * Copyright (C) 2013-2014, International Business Machines 3 * Copyright (C) 2013-2015, International Business Machines
4 * Corporation and others. All Rights Reserved. 4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************* 5 *******************************************************************************
6 * collationfastlatinbuilder.cpp 6 * collationfastlatinbuilder.cpp
7 * 7 *
8 * created on: 2013aug09 8 * created on: 2013aug09
9 * created by: Markus W. Scherer 9 * created by: Markus W. Scherer
10 */ 10 */
11 11
12 #define DEBUG_COLLATION_FAST_LATIN_BUILDER 0 // 0 or 1 or 2 12 #define DEBUG_COLLATION_FAST_LATIN_BUILDER 0 // 0 or 1 or 2
13 #if DEBUG_COLLATION_FAST_LATIN_BUILDER 13 #if DEBUG_COLLATION_FAST_LATIN_BUILDER
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after
129 UBool ok = !shortPrimaryOverflow && 129 UBool ok = !shortPrimaryOverflow &&
130 encodeCharCEs(errorCode) && encodeContractions(errorCode); 130 encodeCharCEs(errorCode) && encodeContractions(errorCode);
131 contractionCEs.removeAllElements(); // might reduce heap memory usage 131 contractionCEs.removeAllElements(); // might reduce heap memory usage
132 uniqueCEs.removeAllElements(); 132 uniqueCEs.removeAllElements();
133 return ok; 133 return ok;
134 } 134 }
135 135
136 UBool 136 UBool
137 CollationFastLatinBuilder::loadGroups(const CollationData &data, UErrorCode &err orCode) { 137 CollationFastLatinBuilder::loadGroups(const CollationData &data, UErrorCode &err orCode) {
138 if(U_FAILURE(errorCode)) { return FALSE; } 138 if(U_FAILURE(errorCode)) { return FALSE; }
139 result.append(0); // reserved for version & headerLength 139 headerLength = 1 + NUM_SPECIAL_GROUPS;
140 uint32_t r0 = (CollationFastLatin::VERSION << 8) | headerLength;
141 result.append((UChar)r0);
140 // The first few reordering groups should be special groups 142 // The first few reordering groups should be special groups
141 // (space, punct, ..., digit) followed by Latn, then Grek and other scripts. 143 // (space, punct, ..., digit) followed by Latn, then Grek and other scripts.
142 for(int32_t i = 0;;) { 144 for(int32_t i = 0; i < NUM_SPECIAL_GROUPS; ++i) {
143 if(i >= data.scriptsLength) { 145 lastSpecialPrimaries[i] = data.getLastPrimaryForGroup(UCOL_REORDER_CODE_ FIRST + i);
144 // no Latn script 146 if(lastSpecialPrimaries[i] == 0) {
145 errorCode = U_INTERNAL_PROGRAM_ERROR; 147 // missing data
146 return FALSE; 148 return FALSE;
147 } 149 }
148 uint32_t head = data.scripts[i]; 150 result.append(0); // reserve a slot for this group
149 uint32_t lastByte = head & 0xff; // last primary byte in the group 151 }
150 int32_t group = data.scripts[i + 2]; 152
151 if(group == UCOL_REORDER_CODE_DIGIT) { 153 firstDigitPrimary = data.getFirstPrimaryForGroup(UCOL_REORDER_CODE_DIGIT);
152 firstDigitPrimary = (head & 0xff00) << 16; 154 firstLatinPrimary = data.getFirstPrimaryForGroup(USCRIPT_LATIN);
153 headerLength = result.length(); 155 lastLatinPrimary = data.getLastPrimaryForGroup(USCRIPT_LATIN);
154 uint32_t r0 = (CollationFastLatin::VERSION << 8) | headerLength; 156 if(firstDigitPrimary == 0 || firstLatinPrimary == 0) {
155 result.setCharAt(0, (UChar)r0); 157 // missing data
156 } else if(group == USCRIPT_LATIN) { 158 return FALSE;
157 if(firstDigitPrimary == 0) {
158 // no digit group
159 errorCode = U_INTERNAL_PROGRAM_ERROR;
160 return FALSE;
161 }
162 firstLatinPrimary = (head & 0xff00) << 16;
163 lastLatinPrimary = (lastByte << 24) | 0xffffff;
164 break;
165 } else if(firstDigitPrimary == 0) {
166 // a group below digits
167 if(lastByte > 0x7f) {
168 // We only use 7 bits for the last byte of a below-digits group.
169 // This does not warrant an errorCode, but we do not build a fas t Latin table.
170 return FALSE;
171 }
172 result.append((UChar)lastByte);
173 }
174 i = i + 2 + data.scripts[i + 1];
175 } 159 }
176 return TRUE; 160 return TRUE;
177 } 161 }
178 162
179 UBool 163 UBool
180 CollationFastLatinBuilder::inSameGroup(uint32_t p, uint32_t q) const { 164 CollationFastLatinBuilder::inSameGroup(uint32_t p, uint32_t q) const {
181 // Both or neither need to be encoded as short primaries, 165 // Both or neither need to be encoded as short primaries,
182 // so that we can test only one and use the same bit mask. 166 // so that we can test only one and use the same bit mask.
183 if(p >= firstShortPrimary) { 167 if(p >= firstShortPrimary) {
184 return q >= firstShortPrimary; 168 return q >= firstShortPrimary;
185 } else if(q >= firstShortPrimary) { 169 } else if(q >= firstShortPrimary) {
186 return FALSE; 170 return FALSE;
187 } 171 }
188 // Both or neither must be potentially-variable, 172 // Both or neither must be potentially-variable,
189 // so that we can test only one and determine if both are variable. 173 // so that we can test only one and determine if both are variable.
190 if(p >= firstDigitPrimary) { 174 uint32_t lastVariablePrimary = lastSpecialPrimaries[NUM_SPECIAL_GROUPS - 1];
191 return q >= firstDigitPrimary; 175 if(p > lastVariablePrimary) {
192 } else if(q >= firstDigitPrimary) { 176 return q > lastVariablePrimary;
177 } else if(q > lastVariablePrimary) {
193 return FALSE; 178 return FALSE;
194 } 179 }
195 // Both will be encoded with long mini primaries. 180 // Both will be encoded with long mini primaries.
196 // They must be in the same special reordering group, 181 // They must be in the same special reordering group,
197 // so that we can test only one and determine if both are variable. 182 // so that we can test only one and determine if both are variable.
198 p >>= 24; // first primary byte
199 q >>= 24;
200 U_ASSERT(p != 0 && q != 0); 183 U_ASSERT(p != 0 && q != 0);
201 U_ASSERT(p <= result[headerLength - 1]); // the loop will terminate 184 for(int32_t i = 0;; ++i) { // will terminate
202 for(int32_t i = 1;; ++i) { 185 uint32_t lastPrimary = lastSpecialPrimaries[i];
203 uint32_t lastByte = result[i]; 186 if(p <= lastPrimary) {
204 if(p <= lastByte) { 187 return q <= lastPrimary;
205 return q <= lastByte; 188 } else if(q <= lastPrimary) {
206 } else if(q <= lastByte) {
207 return FALSE; 189 return FALSE;
208 } 190 }
209 } 191 }
210 } 192 }
211 193
212 void 194 void
213 CollationFastLatinBuilder::resetCEs() { 195 CollationFastLatinBuilder::resetCEs() {
214 contractionCEs.removeAllElements(); 196 contractionCEs.removeAllElements();
215 uniqueCEs.removeAllElements(); 197 uniqueCEs.removeAllElements();
216 shortPrimaryOverflow = FALSE; 198 shortPrimaryOverflow = FALSE;
(...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after
444 426
445 UBool 427 UBool
446 CollationFastLatinBuilder::encodeUniqueCEs(UErrorCode &errorCode) { 428 CollationFastLatinBuilder::encodeUniqueCEs(UErrorCode &errorCode) {
447 if(U_FAILURE(errorCode)) { return FALSE; } 429 if(U_FAILURE(errorCode)) { return FALSE; }
448 uprv_free(miniCEs); 430 uprv_free(miniCEs);
449 miniCEs = (uint16_t *)uprv_malloc(uniqueCEs.size() * 2); 431 miniCEs = (uint16_t *)uprv_malloc(uniqueCEs.size() * 2);
450 if(miniCEs == NULL) { 432 if(miniCEs == NULL) {
451 errorCode = U_MEMORY_ALLOCATION_ERROR; 433 errorCode = U_MEMORY_ALLOCATION_ERROR;
452 return FALSE; 434 return FALSE;
453 } 435 }
454 int32_t group = 1; 436 int32_t group = 0;
455 uint32_t lastGroupByte = result[group]; 437 uint32_t lastGroupPrimary = lastSpecialPrimaries[group];
456 // The lowest unique CE must be at least a secondary CE. 438 // The lowest unique CE must be at least a secondary CE.
457 U_ASSERT(((uint32_t)uniqueCEs.elementAti(0) >> 16) != 0); 439 U_ASSERT(((uint32_t)uniqueCEs.elementAti(0) >> 16) != 0);
458 uint32_t prevPrimary = 0; 440 uint32_t prevPrimary = 0;
459 uint32_t prevSecondary = 0; 441 uint32_t prevSecondary = 0;
460 uint32_t pri = 0; 442 uint32_t pri = 0;
461 uint32_t sec = 0; 443 uint32_t sec = 0;
462 uint32_t ter = CollationFastLatin::COMMON_TER; 444 uint32_t ter = CollationFastLatin::COMMON_TER;
463 for(int32_t i = 0; i < uniqueCEs.size(); ++i) { 445 for(int32_t i = 0; i < uniqueCEs.size(); ++i) {
464 int64_t ce = uniqueCEs.elementAti(i); 446 int64_t ce = uniqueCEs.elementAti(i);
465 // Note: At least one of the p/s/t weights changes from one unique CE to the next. 447 // Note: At least one of the p/s/t weights changes from one unique CE to the next.
466 // (uniqueCEs does not store case bits.) 448 // (uniqueCEs does not store case bits.)
467 uint32_t p = (uint32_t)(ce >> 32); 449 uint32_t p = (uint32_t)(ce >> 32);
468 if(p != prevPrimary) { 450 if(p != prevPrimary) {
469 uint32_t p1 = p >> 24; 451 while(p > lastGroupPrimary) {
470 while(p1 > lastGroupByte) {
471 U_ASSERT(pri <= CollationFastLatin::MAX_LONG); 452 U_ASSERT(pri <= CollationFastLatin::MAX_LONG);
472 // Add the last "long primary" in or before the group 453 // Set the group's header entry to the
473 // into the upper 9 bits of the group entry. 454 // last "long primary" in or before the group.
474 result.setCharAt(group, (UChar)((pri << 4) | lastGroupByte)); 455 result.setCharAt(1 + group, (UChar)pri);
475 if(++group < headerLength) { // group is 1-based 456 if(++group < NUM_SPECIAL_GROUPS) {
476 lastGroupByte = result[group]; 457 lastGroupPrimary = lastSpecialPrimaries[group];
477 } else { 458 } else {
478 lastGroupByte = 0xff; 459 lastGroupPrimary = 0xffffffff;
479 break; 460 break;
480 } 461 }
481 } 462 }
482 if(p < firstShortPrimary) { 463 if(p < firstShortPrimary) {
483 if(pri == 0) { 464 if(pri == 0) {
484 pri = CollationFastLatin::MIN_LONG; 465 pri = CollationFastLatin::MIN_LONG;
485 } else if(pri < CollationFastLatin::MAX_LONG) { 466 } else if(pri < CollationFastLatin::MAX_LONG) {
486 pri += CollationFastLatin::LONG_INC; 467 pri += CollationFastLatin::LONG_INC;
487 } else { 468 } else {
488 #if DEBUG_COLLATION_FAST_LATIN_BUILDER 469 #if DEBUG_COLLATION_FAST_LATIN_BUILDER
(...skipping 236 matching lines...) Expand 10 before | Expand all | Expand 10 after
725 // Secondary CE, or a CE with a short primary, copy the case bits. 706 // Secondary CE, or a CE with a short primary, copy the case bits.
726 case1 = (case1 >> (14 - 3)) + CollationFastLatin::LOWER_CASE; 707 case1 = (case1 >> (14 - 3)) + CollationFastLatin::LOWER_CASE;
727 miniCE1 |= case1; 708 miniCE1 |= case1;
728 } 709 }
729 return (miniCE << 16) | miniCE1; 710 return (miniCE << 16) | miniCE1;
730 } 711 }
731 712
732 U_NAMESPACE_END 713 U_NAMESPACE_END
733 714
734 #endif // !UCONFIG_NO_COLLATION 715 #endif // !UCONFIG_NO_COLLATION
OLDNEW
« no previous file with comments | « source/i18n/collationfastlatinbuilder.h ('k') | source/i18n/collationfcd.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698