Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(123)

Side by Side Diff: source/i18n/ucoleitr.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/i18n/ucol_wgt.cpp ('k') | source/i18n/ucurr.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ****************************************************************************** 2 ******************************************************************************
3 * Copyright (C) 2001-2011, International Business Machines 3 * Copyright (C) 2001-2014, International Business Machines
4 * Corporation and others. All Rights Reserved. 4 * Corporation and others. All Rights Reserved.
5 ****************************************************************************** 5 ******************************************************************************
6 * 6 *
7 * File ucoleitr.cpp 7 * File ucoleitr.cpp
8 * 8 *
9 * Modification History: 9 * Modification History:
10 * 10 *
11 * Date Name Description 11 * Date Name Description
12 * 02/15/2001 synwee Modified all methods to process its own function 12 * 02/15/2001 synwee Modified all methods to process its own function
13 * instead of calling the equivalent c++ api (coleitr.h) 13 * instead of calling the equivalent c++ api (coleitr.h)
14 * 2012-2014 markus Rewritten in C++ again.
14 ******************************************************************************/ 15 ******************************************************************************/
15 16
16 #include "unicode/utypes.h" 17 #include "unicode/utypes.h"
17 18
18 #if !UCONFIG_NO_COLLATION 19 #if !UCONFIG_NO_COLLATION
19 20
21 #include "unicode/coleitr.h"
22 #include "unicode/tblcoll.h"
20 #include "unicode/ucoleitr.h" 23 #include "unicode/ucoleitr.h"
21 #include "unicode/ustring.h" 24 #include "unicode/ustring.h"
22 #include "unicode/sortkey.h" 25 #include "unicode/sortkey.h"
23 #include "unicode/uobject.h" 26 #include "unicode/uobject.h"
24 #include "ucol_imp.h"
25 #include "cmemory.h" 27 #include "cmemory.h"
28 #include "usrchimp.h"
26 29
27 U_NAMESPACE_USE 30 U_NAMESPACE_USE
28 31
29 #define BUFFER_LENGTH 100 32 #define BUFFER_LENGTH 100
30 33
31 #define DEFAULT_BUFFER_SIZE 16 34 #define DEFAULT_BUFFER_SIZE 16
32 #define BUFFER_GROW 8 35 #define BUFFER_GROW 8
33 36
34 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0]) 37 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
35 38
36 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0]) 39 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0])
37 40
38 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) 41 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
39 42
40 #define GROW_ARRAY(array, newSize) uprv_realloc((void *) (array), (newSize) * si zeof (array)[0]) 43 #define GROW_ARRAY(array, newSize) uprv_realloc((void *) (array), (newSize) * si zeof (array)[0])
41 44
42 #define DELETE_ARRAY(array) uprv_free((void *) (array)) 45 #define DELETE_ARRAY(array) uprv_free((void *) (array))
43 46
44 typedef struct icu::collIterate collIterator;
45
46 struct RCEI 47 struct RCEI
47 { 48 {
48 uint32_t ce; 49 uint32_t ce;
49 int32_t low; 50 int32_t low;
50 int32_t high; 51 int32_t high;
51 }; 52 };
52 53
53 U_NAMESPACE_BEGIN 54 U_NAMESPACE_BEGIN
54 55
55 struct RCEBuffer 56 struct RCEBuffer
56 { 57 {
57 RCEI defaultBuffer[DEFAULT_BUFFER_SIZE]; 58 RCEI defaultBuffer[DEFAULT_BUFFER_SIZE];
58 RCEI *buffer; 59 RCEI *buffer;
59 int32_t bufferIndex; 60 int32_t bufferIndex;
60 int32_t bufferSize; 61 int32_t bufferSize;
61 62
62 RCEBuffer(); 63 RCEBuffer();
63 ~RCEBuffer(); 64 ~RCEBuffer();
64 65
65 UBool empty() const; 66 UBool empty() const;
66 void put(uint32_t ce, int32_t ixLow, int32_t ixHigh); 67 void put(uint32_t ce, int32_t ixLow, int32_t ixHigh);
67 const RCEI *get(); 68 const RCEI *get();
68 }; 69 };
69 70
70 RCEBuffer::RCEBuffer() 71 RCEBuffer::RCEBuffer()
71 { 72 {
72 buffer = defaultBuffer; 73 buffer = defaultBuffer;
73 bufferIndex = 0; 74 bufferIndex = 0;
74 bufferSize = DEFAULT_BUFFER_SIZE; 75 bufferSize = UPRV_LENGTHOF(defaultBuffer);
75 } 76 }
76 77
77 RCEBuffer::~RCEBuffer() 78 RCEBuffer::~RCEBuffer()
78 { 79 {
79 if (buffer != defaultBuffer) { 80 if (buffer != defaultBuffer) {
80 DELETE_ARRAY(buffer); 81 DELETE_ARRAY(buffer);
81 } 82 }
82 } 83 }
83 84
84 UBool RCEBuffer::empty() const 85 UBool RCEBuffer::empty() const
(...skipping 25 matching lines...) Expand all
110 111
111 const RCEI *RCEBuffer::get() 112 const RCEI *RCEBuffer::get()
112 { 113 {
113 if (bufferIndex > 0) { 114 if (bufferIndex > 0) {
114 return &buffer[--bufferIndex]; 115 return &buffer[--bufferIndex];
115 } 116 }
116 117
117 return NULL; 118 return NULL;
118 } 119 }
119 120
120 struct PCEI
121 {
122 uint64_t ce;
123 int32_t low;
124 int32_t high;
125 };
126
127 struct PCEBuffer
128 {
129 PCEI defaultBuffer[DEFAULT_BUFFER_SIZE];
130 PCEI *buffer;
131 int32_t bufferIndex;
132 int32_t bufferSize;
133
134 PCEBuffer();
135 ~PCEBuffer();
136
137 void reset();
138 UBool empty() const;
139 void put(uint64_t ce, int32_t ixLow, int32_t ixHigh);
140 const PCEI *get();
141 };
142
143 PCEBuffer::PCEBuffer() 121 PCEBuffer::PCEBuffer()
144 { 122 {
145 buffer = defaultBuffer; 123 buffer = defaultBuffer;
146 bufferIndex = 0; 124 bufferIndex = 0;
147 bufferSize = DEFAULT_BUFFER_SIZE; 125 bufferSize = UPRV_LENGTHOF(defaultBuffer);
148 } 126 }
149 127
150 PCEBuffer::~PCEBuffer() 128 PCEBuffer::~PCEBuffer()
151 { 129 {
152 if (buffer != defaultBuffer) { 130 if (buffer != defaultBuffer) {
153 DELETE_ARRAY(buffer); 131 DELETE_ARRAY(buffer);
154 } 132 }
155 } 133 }
156 134
157 void PCEBuffer::reset() 135 void PCEBuffer::reset()
(...skipping 30 matching lines...) Expand all
188 166
189 const PCEI *PCEBuffer::get() 167 const PCEI *PCEBuffer::get()
190 { 168 {
191 if (bufferIndex > 0) { 169 if (bufferIndex > 0) {
192 return &buffer[--bufferIndex]; 170 return &buffer[--bufferIndex];
193 } 171 }
194 172
195 return NULL; 173 return NULL;
196 } 174 }
197 175
198 /* 176 UCollationPCE::UCollationPCE(UCollationElements *elems) { init(elems); }
199 * This inherits from UObject so that
200 * it can be allocated by new and the
201 * constructor for PCEBuffer is called.
202 */
203 struct UCollationPCE : public UObject
204 {
205 PCEBuffer pceBuffer;
206 UCollationStrength strength;
207 UBool toShift;
208 UBool isShifted;
209 uint32_t variableTop;
210 177
211 UCollationPCE(UCollationElements *elems); 178 UCollationPCE::UCollationPCE(CollationElementIterator *iter) { init(iter); }
212 ~UCollationPCE();
213 179
214 void init(const UCollator *coll); 180 void UCollationPCE::init(UCollationElements *elems) {
215 181 init(CollationElementIterator::fromUCollationElements(elems));
216 virtual UClassID getDynamicClassID() const;
217 static UClassID getStaticClassID();
218 };
219
220 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UCollationPCE)
221
222 UCollationPCE::UCollationPCE(UCollationElements *elems)
223 {
224 init(elems->iteratordata_.coll);
225 } 182 }
226 183
227 void UCollationPCE::init(const UCollator *coll) 184 void UCollationPCE::init(CollationElementIterator *iter)
185 {
186 cei = iter;
187 init(*iter->rbc_);
188 }
189
190 void UCollationPCE::init(const Collator &coll)
228 { 191 {
229 UErrorCode status = U_ZERO_ERROR; 192 UErrorCode status = U_ZERO_ERROR;
230 193
231 strength = ucol_getStrength(coll); 194 strength = coll.getAttribute(UCOL_STRENGTH, status);
232 toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == U COL_SHIFTED; 195 toShift = coll.getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHI FTED;
233 isShifted = FALSE; 196 isShifted = FALSE;
234 variableTop = coll->variableTopValue << 16; 197 variableTop = coll.getVariableTop(status);
235 } 198 }
236 199
237 UCollationPCE::~UCollationPCE() 200 UCollationPCE::~UCollationPCE()
238 { 201 {
239 // nothing to do 202 // nothing to do
240 } 203 }
241 204
242 205 uint64_t UCollationPCE::processCE(uint32_t ce)
243 U_NAMESPACE_END
244
245
246 inline uint64_t processCE(UCollationElements *elems, uint32_t ce)
247 { 206 {
248 uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0; 207 uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
249 208
250 // This is clean, but somewhat slow... 209 // This is clean, but somewhat slow...
251 // We could apply the mask to ce and then 210 // We could apply the mask to ce and then
252 // just get all three orders... 211 // just get all three orders...
253 switch(elems->pce->strength) { 212 switch(strength) {
254 default: 213 default:
255 tertiary = ucol_tertiaryOrder(ce); 214 tertiary = ucol_tertiaryOrder(ce);
256 /* note fall-through */ 215 /* note fall-through */
257 216
258 case UCOL_SECONDARY: 217 case UCOL_SECONDARY:
259 secondary = ucol_secondaryOrder(ce); 218 secondary = ucol_secondaryOrder(ce);
260 /* note fall-through */ 219 /* note fall-through */
261 220
262 case UCOL_PRIMARY: 221 case UCOL_PRIMARY:
263 primary = ucol_primaryOrder(ce); 222 primary = ucol_primaryOrder(ce);
264 } 223 }
265 224
266 // **** This should probably handle continuations too. **** 225 // **** This should probably handle continuations too. ****
267 // **** That means that we need 24 bits for the primary **** 226 // **** That means that we need 24 bits for the primary ****
268 // **** instead of the 16 that we're currently using. **** 227 // **** instead of the 16 that we're currently using. ****
269 // **** So we can lay out the 64 bits as: 24.12.12.16. **** 228 // **** So we can lay out the 64 bits as: 24.12.12.16. ****
270 // **** Another complication with continuations is that **** 229 // **** Another complication with continuations is that ****
271 // **** the *second* CE is marked as a continuation, so **** 230 // **** the *second* CE is marked as a continuation, so ****
272 // **** we always have to peek ahead to know how long **** 231 // **** we always have to peek ahead to know how long ****
273 // **** the primary is... **** 232 // **** the primary is... ****
274 if ((elems->pce->toShift && elems->pce->variableTop > ce && primary != 0) 233 if ((toShift && variableTop > ce && primary != 0)
275 || (elems->pce->isShifted && primary == 0)) { 234 || (isShifted && primary == 0)) {
276 235
277 if (primary == 0) { 236 if (primary == 0) {
278 return UCOL_IGNORABLE; 237 return UCOL_IGNORABLE;
279 } 238 }
280 239
281 if (elems->pce->strength >= UCOL_QUATERNARY) { 240 if (strength >= UCOL_QUATERNARY) {
282 quaternary = primary; 241 quaternary = primary;
283 } 242 }
284 243
285 primary = secondary = tertiary = 0; 244 primary = secondary = tertiary = 0;
286 elems->pce->isShifted = TRUE; 245 isShifted = TRUE;
287 } else { 246 } else {
288 if (elems->pce->strength >= UCOL_QUATERNARY) { 247 if (strength >= UCOL_QUATERNARY) {
289 quaternary = 0xFFFF; 248 quaternary = 0xFFFF;
290 } 249 }
291 250
292 elems->pce->isShifted = FALSE; 251 isShifted = FALSE;
293 } 252 }
294 253
295 return primary << 48 | secondary << 32 | tertiary << 16 | quaternary; 254 return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
296 } 255 }
297 256
298 U_CAPI void U_EXPORT2 257 U_NAMESPACE_END
299 uprv_init_pce(const UCollationElements *elems)
300 {
301 if (elems->pce != NULL) {
302 elems->pce->init(elems->iteratordata_.coll);
303 }
304 }
305
306
307 258
308 /* public methods ---------------------------------------------------- */ 259 /* public methods ---------------------------------------------------- */
309 260
310 U_CAPI UCollationElements* U_EXPORT2 261 U_CAPI UCollationElements* U_EXPORT2
311 ucol_openElements(const UCollator *coll, 262 ucol_openElements(const UCollator *coll,
312 const UChar *text, 263 const UChar *text,
313 int32_t textLength, 264 int32_t textLength,
314 UErrorCode *status) 265 UErrorCode *status)
315 { 266 {
316 if (U_FAILURE(*status)) { 267 if (U_FAILURE(*status)) {
317 return NULL; 268 return NULL;
318 } 269 }
270 if (coll == NULL || (text == NULL && textLength != 0)) {
271 *status = U_ILLEGAL_ARGUMENT_ERROR;
272 return NULL;
273 }
274 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
275 if (rbc == NULL) {
276 *status = U_UNSUPPORTED_ERROR; // coll is a Collator but not a RuleBase dCollator
277 return NULL;
278 }
319 279
320 UCollationElements *result = new UCollationElements; 280 UnicodeString s((UBool)(textLength < 0), text, textLength);
321 if (result == NULL) { 281 CollationElementIterator *cei = rbc->createCollationElementIterator(s);
282 if (cei == NULL) {
322 *status = U_MEMORY_ALLOCATION_ERROR; 283 *status = U_MEMORY_ALLOCATION_ERROR;
323 return NULL; 284 return NULL;
324 } 285 }
325 286
326 result->reset_ = TRUE; 287 return cei->toUCollationElements();
327 result->isWritable = FALSE;
328 result->pce = NULL;
329
330 if (text == NULL) {
331 textLength = 0;
332 }
333 uprv_init_collIterate(coll, text, textLength, &result->iteratordata_, status );
334
335 return result;
336 } 288 }
337 289
338 290
339 U_CAPI void U_EXPORT2 291 U_CAPI void U_EXPORT2
340 ucol_closeElements(UCollationElements *elems) 292 ucol_closeElements(UCollationElements *elems)
341 { 293 {
342 » if (elems != NULL) { 294 delete CollationElementIterator::fromUCollationElements(elems);
343 » collIterate *ci = &elems->iteratordata_;
344
345 » if (ci->extendCEs) {
346 » » uprv_free(ci->extendCEs);
347 » }
348
349 » if (ci->offsetBuffer) {
350 » » uprv_free(ci->offsetBuffer);
351 » }
352
353 » if (elems->isWritable && elems->iteratordata_.string != NULL)
354 » {
355 » » uprv_free((UChar *)elems->iteratordata_.string);
356 » }
357
358 » if (elems->pce != NULL) {
359 » » delete elems->pce;
360 » }
361
362 » delete elems;
363 » }
364 } 295 }
365 296
366 U_CAPI void U_EXPORT2 297 U_CAPI void U_EXPORT2
367 ucol_reset(UCollationElements *elems) 298 ucol_reset(UCollationElements *elems)
368 { 299 {
369 collIterate *ci = &(elems->iteratordata_); 300 CollationElementIterator::fromUCollationElements(elems)->reset();
370 elems->reset_ = TRUE;
371 ci->pos = ci->string;
372 if ((ci->flags & UCOL_ITER_HASLEN) == 0 || ci->endp == NULL) {
373 ci->endp = ci->string + u_strlen(ci->string);
374 }
375 ci->CEpos = ci->toReturn = ci->CEs;
376 ci->flags = (ci->flags & UCOL_FORCE_HAN_IMPLICIT) | UCOL_ITER_HASLEN;
377 if (ci->coll->normalizationMode == UCOL_ON) {
378 ci->flags |= UCOL_ITER_NORM;
379 }
380
381 ci->writableBuffer.remove();
382 ci->fcdPosition = NULL;
383
384 //ci->offsetReturn = ci->offsetStore = NULL;
385 » ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
386 }
387
388 U_CAPI void U_EXPORT2
389 ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status)
390 {
391 if (U_FAILURE(*status)) {
392 return;
393 }
394
395 if (elems == NULL) {
396 *status = U_ILLEGAL_ARGUMENT_ERROR;
397 return;
398 }
399
400 elems->iteratordata_.flags |= UCOL_FORCE_HAN_IMPLICIT;
401 } 301 }
402 302
403 U_CAPI int32_t U_EXPORT2 303 U_CAPI int32_t U_EXPORT2
404 ucol_next(UCollationElements *elems, 304 ucol_next(UCollationElements *elems,
405 UErrorCode *status) 305 UErrorCode *status)
406 { 306 {
407 int32_t result;
408 if (U_FAILURE(*status)) { 307 if (U_FAILURE(*status)) {
409 return UCOL_NULLORDER; 308 return UCOL_NULLORDER;
410 } 309 }
411 310
412 elems->reset_ = FALSE; 311 return CollationElementIterator::fromUCollationElements(elems)->next(*status );
413
414 result = (int32_t)ucol_getNextCE(elems->iteratordata_.coll,
415 &elems->iteratordata_,
416 status);
417
418 if (result == UCOL_NO_MORE_CES) {
419 result = UCOL_NULLORDER;
420 }
421 return result;
422 } 312 }
423 313
424 U_CAPI int64_t U_EXPORT2 314 U_NAMESPACE_BEGIN
425 ucol_nextProcessed(UCollationElements *elems, 315
316 int64_t
317 UCollationPCE::nextProcessed(
426 int32_t *ixLow, 318 int32_t *ixLow,
427 int32_t *ixHigh, 319 int32_t *ixHigh,
428 UErrorCode *status) 320 UErrorCode *status)
429 { 321 {
430 const UCollator *coll = elems->iteratordata_.coll;
431 int64_t result = UCOL_IGNORABLE; 322 int64_t result = UCOL_IGNORABLE;
432 uint32_t low = 0, high = 0; 323 uint32_t low = 0, high = 0;
433 324
434 if (U_FAILURE(*status)) { 325 if (U_FAILURE(*status)) {
435 return UCOL_PROCESSED_NULLORDER; 326 return UCOL_PROCESSED_NULLORDER;
436 } 327 }
437 328
438 if (elems->pce == NULL) { 329 pceBuffer.reset();
439 elems->pce = new UCollationPCE(elems);
440 } else {
441 elems->pce->pceBuffer.reset();
442 }
443
444 elems->reset_ = FALSE;
445 330
446 do { 331 do {
447 low = ucol_getOffset(elems); 332 low = cei->getOffset();
448 uint32_t ce = (uint32_t) ucol_getNextCE(coll, &elems->iteratordata_, sta tus); 333 int32_t ce = cei->next(*status);
449 high = ucol_getOffset(elems); 334 high = cei->getOffset();
450 335
451 if (ce == UCOL_NO_MORE_CES) { 336 if (ce == UCOL_NULLORDER) {
452 result = UCOL_PROCESSED_NULLORDER; 337 result = UCOL_PROCESSED_NULLORDER;
453 break; 338 break;
454 } 339 }
455 340
456 result = processCE(elems, ce); 341 result = processCE((uint32_t)ce);
457 } while (result == UCOL_IGNORABLE); 342 } while (result == UCOL_IGNORABLE);
458 343
459 if (ixLow != NULL) { 344 if (ixLow != NULL) {
460 *ixLow = low; 345 *ixLow = low;
461 } 346 }
462 347
463 if (ixHigh != NULL) { 348 if (ixHigh != NULL) {
464 *ixHigh = high; 349 *ixHigh = high;
465 } 350 }
466 351
467 return result; 352 return result;
468 } 353 }
469 354
355 U_NAMESPACE_END
356
470 U_CAPI int32_t U_EXPORT2 357 U_CAPI int32_t U_EXPORT2
471 ucol_previous(UCollationElements *elems, 358 ucol_previous(UCollationElements *elems,
472 UErrorCode *status) 359 UErrorCode *status)
473 { 360 {
474 if(U_FAILURE(*status)) { 361 if(U_FAILURE(*status)) {
475 return UCOL_NULLORDER; 362 return UCOL_NULLORDER;
476 } 363 }
477 else 364 return CollationElementIterator::fromUCollationElements(elems)->previous(*st atus);
478 {
479 int32_t result;
480
481 if (elems->reset_ && (elems->iteratordata_.pos == elems->iteratordata_.s tring)) {
482 if (elems->iteratordata_.endp == NULL) {
483 elems->iteratordata_.endp = elems->iteratordata_.string +
484 u_strlen(elems->iteratordata_.string );
485 elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
486 }
487 elems->iteratordata_.pos = elems->iteratordata_.endp;
488 elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
489 }
490
491 elems->reset_ = FALSE;
492
493 result = (int32_t)ucol_getPrevCE(elems->iteratordata_.coll,
494 &(elems->iteratordata_),
495 status);
496
497 if (result == UCOL_NO_MORE_CES) {
498 result = UCOL_NULLORDER;
499 }
500
501 return result;
502 }
503 } 365 }
504 366
505 U_CAPI int64_t U_EXPORT2 367 U_NAMESPACE_BEGIN
506 ucol_previousProcessed(UCollationElements *elems, 368
369 int64_t
370 UCollationPCE::previousProcessed(
507 int32_t *ixLow, 371 int32_t *ixLow,
508 int32_t *ixHigh, 372 int32_t *ixHigh,
509 UErrorCode *status) 373 UErrorCode *status)
510 { 374 {
511 const UCollator *coll = elems->iteratordata_.coll;
512 int64_t result = UCOL_IGNORABLE; 375 int64_t result = UCOL_IGNORABLE;
513 // int64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
514 // UCollationStrength strength = ucol_getStrength(coll);
515 // UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED;
516 // uint32_t variableTop = coll->variableTopValue;
517 int32_t low = 0, high = 0; 376 int32_t low = 0, high = 0;
518 377
519 if (U_FAILURE(*status)) { 378 if (U_FAILURE(*status)) {
520 return UCOL_PROCESSED_NULLORDER; 379 return UCOL_PROCESSED_NULLORDER;
521 } 380 }
522 381
523 if (elems->reset_ && 382 // pceBuffer.reset();
524 (elems->iteratordata_.pos == elems->iteratordata_.string)) {
525 if (elems->iteratordata_.endp == NULL) {
526 elems->iteratordata_.endp = elems->iteratordata_.string +
527 u_strlen(elems->iteratordata_.string);
528 elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
529 }
530 383
531 elems->iteratordata_.pos = elems->iteratordata_.endp; 384 while (pceBuffer.empty()) {
532 elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
533 }
534
535 if (elems->pce == NULL) {
536 elems->pce = new UCollationPCE(elems);
537 } else {
538 //elems->pce->pceBuffer.reset();
539 }
540
541 elems->reset_ = FALSE;
542
543 while (elems->pce->pceBuffer.empty()) {
544 // buffer raw CEs up to non-ignorable primary 385 // buffer raw CEs up to non-ignorable primary
545 RCEBuffer rceb; 386 RCEBuffer rceb;
546 uint32_t ce; 387 int32_t ce;
547 388
548 // **** do we need to reset rceb, or will it always be empty at this poi nt **** 389 // **** do we need to reset rceb, or will it always be empty at this poi nt ****
549 do { 390 do {
550 high = ucol_getOffset(elems); 391 high = cei->getOffset();
551 ce = ucol_getPrevCE(coll, &elems->iteratordata_, status); 392 ce = cei->previous(*status);
552 low = ucol_getOffset(elems); 393 low = cei->getOffset();
553 394
554 if (ce == UCOL_NO_MORE_CES) { 395 if (ce == UCOL_NULLORDER) {
555 if (! rceb.empty()) { 396 if (! rceb.empty()) {
556 break; 397 break;
557 } 398 }
558 399
559 goto finish; 400 goto finish;
560 } 401 }
561 402
562 rceb.put(ce, low, high); 403 rceb.put((uint32_t)ce, low, high);
563 } while ((ce & UCOL_PRIMARYMASK) == 0); 404 } while ((ce & UCOL_PRIMARYORDERMASK) == 0 || isContinuation(ce));
564 405
565 // process the raw CEs 406 // process the raw CEs
566 while (! rceb.empty()) { 407 while (! rceb.empty()) {
567 const RCEI *rcei = rceb.get(); 408 const RCEI *rcei = rceb.get();
568 409
569 result = processCE(elems, rcei->ce); 410 result = processCE(rcei->ce);
570 411
571 if (result != UCOL_IGNORABLE) { 412 if (result != UCOL_IGNORABLE) {
572 elems->pce->pceBuffer.put(result, rcei->low, rcei->high); 413 pceBuffer.put(result, rcei->low, rcei->high);
573 } 414 }
574 } 415 }
575 } 416 }
576 417
577 finish: 418 finish:
578 if (elems->pce->pceBuffer.empty()) { 419 if (pceBuffer.empty()) {
579 // **** Is -1 the right value for ixLow, ixHigh? **** 420 // **** Is -1 the right value for ixLow, ixHigh? ****
580 if (ixLow != NULL) { 421 if (ixLow != NULL) {
581 *ixLow = -1; 422 *ixLow = -1;
582 } 423 }
583 424
584 if (ixHigh != NULL) { 425 if (ixHigh != NULL) {
585 *ixHigh = -1 426 *ixHigh = -1
586 ; 427 ;
587 } 428 }
588 return UCOL_PROCESSED_NULLORDER; 429 return UCOL_PROCESSED_NULLORDER;
589 } 430 }
590 431
591 const PCEI *pcei = elems->pce->pceBuffer.get(); 432 const PCEI *pcei = pceBuffer.get();
592 433
593 if (ixLow != NULL) { 434 if (ixLow != NULL) {
594 *ixLow = pcei->low; 435 *ixLow = pcei->low;
595 } 436 }
596 437
597 if (ixHigh != NULL) { 438 if (ixHigh != NULL) {
598 *ixHigh = pcei->high; 439 *ixHigh = pcei->high;
599 } 440 }
600 441
601 return pcei->ce; 442 return pcei->ce;
602 } 443 }
603 444
445 U_NAMESPACE_END
446
604 U_CAPI int32_t U_EXPORT2 447 U_CAPI int32_t U_EXPORT2
605 ucol_getMaxExpansion(const UCollationElements *elems, 448 ucol_getMaxExpansion(const UCollationElements *elems,
606 int32_t order) 449 int32_t order)
607 { 450 {
608 uint8_t result; 451 return CollationElementIterator::fromUCollationElements(elems)->getMaxExpans ion(order);
609 452
610 #if 0 453 // TODO: The old code masked the order according to strength and then did a binary search.
611 UCOL_GETMAXEXPANSION(elems->iteratordata_.coll, (uint32_t)order, result); 454 // However this was probably at least partially broken because of the follow ing comment.
612 #else 455 // Still, it might have found a match when this version may not.
613 const UCollator *coll = elems->iteratordata_.coll;
614 const uint32_t *start;
615 const uint32_t *limit;
616 const uint32_t *mid;
617 uint32_t strengthMask = 0;
618 uint32_t mOrder = (uint32_t) order;
619
620 switch (coll->strength)
621 {
622 default:
623 strengthMask |= UCOL_TERTIARYORDERMASK;
624 /* fall through */
625
626 case UCOL_SECONDARY:
627 strengthMask |= UCOL_SECONDARYORDERMASK;
628 /* fall through */
629
630 case UCOL_PRIMARY:
631 strengthMask |= UCOL_PRIMARYORDERMASK;
632 }
633
634 mOrder &= strengthMask;
635 start = (coll)->endExpansionCE;
636 limit = (coll)->lastEndExpansionCE;
637
638 while (start < limit - 1) {
639 mid = start + ((limit - start) >> 1);
640 if (mOrder <= (*mid & strengthMask)) {
641 limit = mid;
642 } else {
643 start = mid;
644 }
645 }
646 456
647 // FIXME: with a masked search, there might be more than one hit, 457 // FIXME: with a masked search, there might be more than one hit,
648 // so we need to look forward and backward from the match to find all 458 // so we need to look forward and backward from the match to find all
649 // of the hits... 459 // of the hits...
650 if ((*start & strengthMask) == mOrder) { 460 }
651 result = *((coll)->expansionCESize + (start - (coll)->endExpansionCE));
652 } else if ((*limit & strengthMask) == mOrder) {
653 result = *(coll->expansionCESize + (limit - coll->endExpansionCE));
654 } else if ((mOrder & 0xFFFF) == 0x00C0) {
655 result = 2;
656 } else {
657 result = 1;
658 }
659 #endif
660 461
661 return result;
662 }
663
664 U_CAPI void U_EXPORT2 462 U_CAPI void U_EXPORT2
665 ucol_setText( UCollationElements *elems, 463 ucol_setText( UCollationElements *elems,
666 const UChar *text, 464 const UChar *text,
667 int32_t textLength, 465 int32_t textLength,
668 UErrorCode *status) 466 UErrorCode *status)
669 { 467 {
670 if (U_FAILURE(*status)) { 468 if (U_FAILURE(*status)) {
671 return; 469 return;
672 } 470 }
673 471
674 if (elems->isWritable && elems->iteratordata_.string != NULL) 472 if ((text == NULL && textLength != 0)) {
675 { 473 *status = U_ILLEGAL_ARGUMENT_ERROR;
676 uprv_free((UChar *)elems->iteratordata_.string); 474 return;
677 } 475 }
678 476 UnicodeString s((UBool)(textLength < 0), text, textLength);
679 if (text == NULL) { 477 return CollationElementIterator::fromUCollationElements(elems)->setText(s, * status);
680 textLength = 0;
681 }
682
683 elems->isWritable = FALSE;
684
685 /* free offset buffer to avoid memory leak before initializing. */
686 ucol_freeOffsetBuffer(&(elems->iteratordata_));
687 /* Ensure that previously allocated extendCEs is freed before setting to NUL L. */
688 if (elems->iteratordata_.extendCEs != NULL) {
689 uprv_free(elems->iteratordata_.extendCEs);
690 }
691 uprv_init_collIterate(elems->iteratordata_.coll, text, textLength,
692 &elems->iteratordata_, status);
693
694 elems->reset_ = TRUE;
695 } 478 }
696 479
697 U_CAPI int32_t U_EXPORT2 480 U_CAPI int32_t U_EXPORT2
698 ucol_getOffset(const UCollationElements *elems) 481 ucol_getOffset(const UCollationElements *elems)
699 { 482 {
700 const collIterate *ci = &(elems->iteratordata_); 483 return CollationElementIterator::fromUCollationElements(elems)->getOffset();
701
702 if (ci->offsetRepeatCount > 0 && ci->offsetRepeatValue != 0) {
703 return ci->offsetRepeatValue;
704 }
705
706 if (ci->offsetReturn != NULL) {
707 return *ci->offsetReturn;
708 }
709
710 // while processing characters in normalization buffer getOffset will
711 // return the next non-normalized character.
712 // should be inline with the old implementation since the old codes uses
713 // nextDecomp in normalizer which also decomposes the string till the
714 // first base character is found.
715 if (ci->flags & UCOL_ITER_INNORMBUF) {
716 if (ci->fcdPosition == NULL) {
717 return 0;
718 }
719 return (int32_t)(ci->fcdPosition - ci->string);
720 }
721 else {
722 return (int32_t)(ci->pos - ci->string);
723 }
724 } 484 }
725 485
726 U_CAPI void U_EXPORT2 486 U_CAPI void U_EXPORT2
727 ucol_setOffset(UCollationElements *elems, 487 ucol_setOffset(UCollationElements *elems,
728 int32_t offset, 488 int32_t offset,
729 UErrorCode *status) 489 UErrorCode *status)
730 { 490 {
731 if (U_FAILURE(*status)) { 491 if (U_FAILURE(*status)) {
732 return; 492 return;
733 } 493 }
734 494
735 // this methods will clean up any use of the writable buffer and points to 495 CollationElementIterator::fromUCollationElements(elems)->setOffset(offset, * status);
736 // the original string
737 collIterate *ci = &(elems->iteratordata_);
738 ci->pos = ci->string + offset;
739 ci->CEpos = ci->toReturn = ci->CEs;
740 if (ci->flags & UCOL_ITER_INNORMBUF) {
741 ci->flags = ci->origFlags;
742 }
743 if ((ci->flags & UCOL_ITER_HASLEN) == 0) {
744 ci->endp = ci->string + u_strlen(ci->string);
745 ci->flags |= UCOL_ITER_HASLEN;
746 }
747 ci->fcdPosition = NULL;
748 elems->reset_ = FALSE;
749
750 » ci->offsetReturn = NULL;
751 ci->offsetStore = ci->offsetBuffer;
752 » ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
753 } 496 }
754 497
755 U_CAPI int32_t U_EXPORT2 498 U_CAPI int32_t U_EXPORT2
756 ucol_primaryOrder (int32_t order) 499 ucol_primaryOrder (int32_t order)
757 { 500 {
758 order &= UCOL_PRIMARYMASK; 501 return (order >> 16) & 0xffff;
759 return (order >> UCOL_PRIMARYORDERSHIFT);
760 } 502 }
761 503
762 U_CAPI int32_t U_EXPORT2 504 U_CAPI int32_t U_EXPORT2
763 ucol_secondaryOrder (int32_t order) 505 ucol_secondaryOrder (int32_t order)
764 { 506 {
765 order &= UCOL_SECONDARYMASK; 507 return (order >> 8) & 0xff;
766 return (order >> UCOL_SECONDARYORDERSHIFT);
767 } 508 }
768 509
769 U_CAPI int32_t U_EXPORT2 510 U_CAPI int32_t U_EXPORT2
770 ucol_tertiaryOrder (int32_t order) 511 ucol_tertiaryOrder (int32_t order)
771 { 512 {
772 return (order & UCOL_TERTIARYMASK); 513 return order & 0xff;
773 }
774
775
776 void ucol_freeOffsetBuffer(collIterate *s) {
777 if (s != NULL && s->offsetBuffer != NULL) {
778 uprv_free(s->offsetBuffer);
779 s->offsetBuffer = NULL;
780 s->offsetBufferSize = 0;
781 }
782 } 514 }
783 515
784 #endif /* #if !UCONFIG_NO_COLLATION */ 516 #endif /* #if !UCONFIG_NO_COLLATION */
OLDNEW
« no previous file with comments | « source/i18n/ucol_wgt.cpp ('k') | source/i18n/ucurr.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698