Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(28)

Side by Side Diff: source/i18n/collationdata.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/i18n/collationdata.h ('k') | source/i18n/collationdatabuilder.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 *******************************************************************************
3 * Copyright (C) 2012-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * collationdata.cpp
7 *
8 * created on: 2012jul28
9 * created by: Markus W. Scherer
10 */
11
12 #include "unicode/utypes.h"
13
14 #if !UCONFIG_NO_COLLATION
15
16 #include "unicode/ucol.h"
17 #include "unicode/udata.h"
18 #include "unicode/uscript.h"
19 #include "cmemory.h"
20 #include "collation.h"
21 #include "collationdata.h"
22 #include "uassert.h"
23 #include "utrie2.h"
24
25 U_NAMESPACE_BEGIN
26
27 uint32_t
28 CollationData::getIndirectCE32(uint32_t ce32) const {
29 U_ASSERT(Collation::isSpecialCE32(ce32));
30 int32_t tag = Collation::tagFromCE32(ce32);
31 if(tag == Collation::DIGIT_TAG) {
32 // Fetch the non-numeric-collation CE32.
33 ce32 = ce32s[Collation::indexFromCE32(ce32)];
34 } else if(tag == Collation::LEAD_SURROGATE_TAG) {
35 ce32 = Collation::UNASSIGNED_CE32;
36 } else if(tag == Collation::U0000_TAG) {
37 // Fetch the normal ce32 for U+0000.
38 ce32 = ce32s[0];
39 }
40 return ce32;
41 }
42
43 uint32_t
44 CollationData::getFinalCE32(uint32_t ce32) const {
45 if(Collation::isSpecialCE32(ce32)) {
46 ce32 = getIndirectCE32(ce32);
47 }
48 return ce32;
49 }
50
51 int64_t
52 CollationData::getSingleCE(UChar32 c, UErrorCode &errorCode) const {
53 if(U_FAILURE(errorCode)) { return 0; }
54 // Keep parallel with CollationDataBuilder::getSingleCE().
55 const CollationData *d;
56 uint32_t ce32 = getCE32(c);
57 if(ce32 == Collation::FALLBACK_CE32) {
58 d = base;
59 ce32 = base->getCE32(c);
60 } else {
61 d = this;
62 }
63 while(Collation::isSpecialCE32(ce32)) {
64 switch(Collation::tagFromCE32(ce32)) {
65 case Collation::LATIN_EXPANSION_TAG:
66 case Collation::BUILDER_DATA_TAG:
67 case Collation::PREFIX_TAG:
68 case Collation::CONTRACTION_TAG:
69 case Collation::HANGUL_TAG:
70 case Collation::LEAD_SURROGATE_TAG:
71 errorCode = U_UNSUPPORTED_ERROR;
72 return 0;
73 case Collation::FALLBACK_TAG:
74 case Collation::RESERVED_TAG_3:
75 errorCode = U_INTERNAL_PROGRAM_ERROR;
76 return 0;
77 case Collation::LONG_PRIMARY_TAG:
78 return Collation::ceFromLongPrimaryCE32(ce32);
79 case Collation::LONG_SECONDARY_TAG:
80 return Collation::ceFromLongSecondaryCE32(ce32);
81 case Collation::EXPANSION32_TAG:
82 if(Collation::lengthFromCE32(ce32) == 1) {
83 ce32 = d->ce32s[Collation::indexFromCE32(ce32)];
84 break;
85 } else {
86 errorCode = U_UNSUPPORTED_ERROR;
87 return 0;
88 }
89 case Collation::EXPANSION_TAG: {
90 if(Collation::lengthFromCE32(ce32) == 1) {
91 return d->ces[Collation::indexFromCE32(ce32)];
92 } else {
93 errorCode = U_UNSUPPORTED_ERROR;
94 return 0;
95 }
96 }
97 case Collation::DIGIT_TAG:
98 // Fetch the non-numeric-collation CE32 and continue.
99 ce32 = d->ce32s[Collation::indexFromCE32(ce32)];
100 break;
101 case Collation::U0000_TAG:
102 U_ASSERT(c == 0);
103 // Fetch the normal ce32 for U+0000 and continue.
104 ce32 = d->ce32s[0];
105 break;
106 case Collation::OFFSET_TAG:
107 return d->getCEFromOffsetCE32(c, ce32);
108 case Collation::IMPLICIT_TAG:
109 return Collation::unassignedCEFromCodePoint(c);
110 }
111 }
112 return Collation::ceFromSimpleCE32(ce32);
113 }
114
115 uint32_t
116 CollationData::getFirstPrimaryForGroup(int32_t script) const {
117 int32_t index = findScript(script);
118 if(index < 0) {
119 return 0;
120 }
121 uint32_t head = scripts[index];
122 return (head & 0xff00) << 16;
123 }
124
125 uint32_t
126 CollationData::getLastPrimaryForGroup(int32_t script) const {
127 int32_t index = findScript(script);
128 if(index < 0) {
129 return 0;
130 }
131 uint32_t head = scripts[index];
132 uint32_t lastByte = head & 0xff;
133 return ((lastByte + 1) << 24) - 1;
134 }
135
136 int32_t
137 CollationData::getGroupForPrimary(uint32_t p) const {
138 p >>= 24; // Reordering groups are distinguished by primary lead bytes.
139 for(int32_t i = 0; i < scriptsLength; i = i + 2 + scripts[i + 1]) {
140 uint32_t lastByte = scripts[i] & 0xff;
141 if(p <= lastByte) {
142 return scripts[i + 2];
143 }
144 }
145 return -1;
146 }
147
148 int32_t
149 CollationData::findScript(int32_t script) const {
150 if(script < 0 || 0xffff < script) { return -1; }
151 for(int32_t i = 0; i < scriptsLength;) {
152 int32_t limit = i + 2 + scripts[i + 1];
153 for(int32_t j = i + 2; j < limit; ++j) {
154 if(script == scripts[j]) { return i; }
155 }
156 i = limit;
157 }
158 return -1;
159 }
160
161 int32_t
162 CollationData::getEquivalentScripts(int32_t script,
163 int32_t dest[], int32_t capacity,
164 UErrorCode &errorCode) const {
165 if(U_FAILURE(errorCode)) { return 0; }
166 int32_t i = findScript(script);
167 if(i < 0) { return 0; }
168 int32_t length = scripts[i + 1];
169 U_ASSERT(length != 0);
170 if(length > capacity) {
171 errorCode = U_BUFFER_OVERFLOW_ERROR;
172 return length;
173 }
174 i += 2;
175 dest[0] = scripts[i++];
176 for(int32_t j = 1; j < length; ++j) {
177 script = scripts[i++];
178 // Sorted insertion.
179 for(int32_t k = j;; --k) {
180 // Invariant: dest[k] is free to receive either script or dest[k - 1 ].
181 if(k > 0 && script < dest[k - 1]) {
182 dest[k] = dest[k - 1];
183 } else {
184 dest[k] = script;
185 break;
186 }
187 }
188 }
189 return length;
190 }
191
192 void
193 CollationData::makeReorderTable(const int32_t *reorder, int32_t length,
194 uint8_t table[256], UErrorCode &errorCode) const {
195 if(U_FAILURE(errorCode)) { return; }
196
197 // Initialize the table.
198 // Never reorder special low and high primary lead bytes.
199 int32_t lowByte;
200 for(lowByte = 0; lowByte <= Collation::MERGE_SEPARATOR_BYTE; ++lowByte) {
201 table[lowByte] = lowByte;
202 }
203 // lowByte == 03
204
205 int32_t highByte;
206 for(highByte = 0xff; highByte >= Collation::TRAIL_WEIGHT_BYTE; --highByte) {
207 table[highByte] = highByte;
208 }
209 // highByte == FE
210
211 // Set intermediate bytes to 0 to indicate that they have not been set yet.
212 for(int32_t i = lowByte; i <= highByte; ++i) {
213 table[i] = 0;
214 }
215
216 // Get the set of special reorder codes in the input list.
217 // This supports up to 32 special reorder codes;
218 // it works for data with codes beyond UCOL_REORDER_CODE_LIMIT.
219 uint32_t specials = 0;
220 for(int32_t i = 0; i < length; ++i) {
221 int32_t reorderCode = reorder[i] - UCOL_REORDER_CODE_FIRST;
222 if(0 <= reorderCode && reorderCode <= 31) {
223 specials |= (uint32_t)1 << reorderCode;
224 }
225 }
226
227 // Start the reordering with the special low reorder codes that do not occur in the input.
228 for(int32_t i = 0;; i += 3) {
229 if(scripts[i + 1] != 1) { break; } // Went beyond special single-code r eorder codes.
230 int32_t reorderCode = (int32_t)scripts[i + 2] - UCOL_REORDER_CODE_FIRST;
231 if(reorderCode < 0) { break; } // Went beyond special reorder codes.
232 if((specials & ((uint32_t)1 << reorderCode)) == 0) {
233 int32_t head = scripts[i];
234 int32_t firstByte = head >> 8;
235 int32_t lastByte = head & 0xff;
236 do { table[firstByte++] = lowByte++; } while(firstByte <= lastByte);
237 }
238 }
239
240 // Reorder according to the input scripts, continuing from the bottom of the bytes range.
241 for(int32_t i = 0; i < length;) {
242 int32_t script = reorder[i++];
243 if(script == USCRIPT_UNKNOWN) {
244 // Put the remaining scripts at the top.
245 while(i < length) {
246 script = reorder[--length];
247 if(script == USCRIPT_UNKNOWN || // Must occur at most once.
248 script == UCOL_REORDER_CODE_DEFAULT) {
249 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
250 return;
251 }
252 int32_t index = findScript(script);
253 if(index < 0) { continue; }
254 int32_t head = scripts[index];
255 int32_t firstByte = head >> 8;
256 int32_t lastByte = head & 0xff;
257 if(table[firstByte] != 0) { // Duplicate or equivalent script.
258 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
259 return;
260 }
261 do { table[lastByte--] = highByte--; } while(firstByte <= lastBy te);
262 }
263 break;
264 }
265 if(script == UCOL_REORDER_CODE_DEFAULT) {
266 // The default code must be the only one in the list, and that is ha ndled by the caller.
267 // Otherwise it must not be used.
268 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
269 return;
270 }
271 int32_t index = findScript(script);
272 if(index < 0) { continue; }
273 int32_t head = scripts[index];
274 int32_t firstByte = head >> 8;
275 int32_t lastByte = head & 0xff;
276 if(table[firstByte] != 0) { // Duplicate or equivalent script.
277 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
278 return;
279 }
280 do { table[firstByte++] = lowByte++; } while(firstByte <= lastByte);
281 }
282
283 // Put all remaining scripts into the middle.
284 // Avoid table[0] which must remain 0.
285 for(int32_t i = 1; i <= 0xff; ++i) {
286 if(table[i] == 0) { table[i] = lowByte++; }
287 }
288 U_ASSERT(lowByte == highByte + 1);
289 }
290
291 U_NAMESPACE_END
292
293 #endif // !UCONFIG_NO_COLLATION
OLDNEW
« no previous file with comments | « source/i18n/collationdata.h ('k') | source/i18n/collationdatabuilder.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698