OLD | NEW |
1 /* | 1 /* |
2 ****************************************************************************** | 2 ****************************************************************************** |
3 * Copyright (C) 2001-2011, International Business Machines | 3 * Copyright (C) 2001-2014, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ****************************************************************************** | 5 ****************************************************************************** |
6 * | 6 * |
7 * File ucoleitr.cpp | 7 * File ucoleitr.cpp |
8 * | 8 * |
9 * Modification History: | 9 * Modification History: |
10 * | 10 * |
11 * Date Name Description | 11 * Date Name Description |
12 * 02/15/2001 synwee Modified all methods to process its own function | 12 * 02/15/2001 synwee Modified all methods to process its own function |
13 * instead of calling the equivalent c++ api (coleitr.h) | 13 * instead of calling the equivalent c++ api (coleitr.h) |
| 14 * 2012-2014 markus Rewritten in C++ again. |
14 ******************************************************************************/ | 15 ******************************************************************************/ |
15 | 16 |
16 #include "unicode/utypes.h" | 17 #include "unicode/utypes.h" |
17 | 18 |
18 #if !UCONFIG_NO_COLLATION | 19 #if !UCONFIG_NO_COLLATION |
19 | 20 |
| 21 #include "unicode/coleitr.h" |
| 22 #include "unicode/tblcoll.h" |
20 #include "unicode/ucoleitr.h" | 23 #include "unicode/ucoleitr.h" |
21 #include "unicode/ustring.h" | 24 #include "unicode/ustring.h" |
22 #include "unicode/sortkey.h" | 25 #include "unicode/sortkey.h" |
23 #include "unicode/uobject.h" | 26 #include "unicode/uobject.h" |
24 #include "ucol_imp.h" | |
25 #include "cmemory.h" | 27 #include "cmemory.h" |
| 28 #include "usrchimp.h" |
26 | 29 |
27 U_NAMESPACE_USE | 30 U_NAMESPACE_USE |
28 | 31 |
29 #define BUFFER_LENGTH 100 | 32 #define BUFFER_LENGTH 100 |
30 | 33 |
31 #define DEFAULT_BUFFER_SIZE 16 | 34 #define DEFAULT_BUFFER_SIZE 16 |
32 #define BUFFER_GROW 8 | 35 #define BUFFER_GROW 8 |
33 | 36 |
34 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0]) | 37 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0]) |
35 | 38 |
36 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src),
(count) * sizeof (src)[0]) | 39 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src),
(count) * sizeof (src)[0]) |
37 | 40 |
38 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) | 41 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) |
39 | 42 |
40 #define GROW_ARRAY(array, newSize) uprv_realloc((void *) (array), (newSize) * si
zeof (array)[0]) | 43 #define GROW_ARRAY(array, newSize) uprv_realloc((void *) (array), (newSize) * si
zeof (array)[0]) |
41 | 44 |
42 #define DELETE_ARRAY(array) uprv_free((void *) (array)) | 45 #define DELETE_ARRAY(array) uprv_free((void *) (array)) |
43 | 46 |
44 typedef struct icu::collIterate collIterator; | |
45 | |
46 struct RCEI | 47 struct RCEI |
47 { | 48 { |
48 uint32_t ce; | 49 uint32_t ce; |
49 int32_t low; | 50 int32_t low; |
50 int32_t high; | 51 int32_t high; |
51 }; | 52 }; |
52 | 53 |
53 U_NAMESPACE_BEGIN | 54 U_NAMESPACE_BEGIN |
54 | 55 |
55 struct RCEBuffer | 56 struct RCEBuffer |
56 { | 57 { |
57 RCEI defaultBuffer[DEFAULT_BUFFER_SIZE]; | 58 RCEI defaultBuffer[DEFAULT_BUFFER_SIZE]; |
58 RCEI *buffer; | 59 RCEI *buffer; |
59 int32_t bufferIndex; | 60 int32_t bufferIndex; |
60 int32_t bufferSize; | 61 int32_t bufferSize; |
61 | 62 |
62 RCEBuffer(); | 63 RCEBuffer(); |
63 ~RCEBuffer(); | 64 ~RCEBuffer(); |
64 | 65 |
65 UBool empty() const; | 66 UBool empty() const; |
66 void put(uint32_t ce, int32_t ixLow, int32_t ixHigh); | 67 void put(uint32_t ce, int32_t ixLow, int32_t ixHigh); |
67 const RCEI *get(); | 68 const RCEI *get(); |
68 }; | 69 }; |
69 | 70 |
70 RCEBuffer::RCEBuffer() | 71 RCEBuffer::RCEBuffer() |
71 { | 72 { |
72 buffer = defaultBuffer; | 73 buffer = defaultBuffer; |
73 bufferIndex = 0; | 74 bufferIndex = 0; |
74 bufferSize = DEFAULT_BUFFER_SIZE; | 75 bufferSize = UPRV_LENGTHOF(defaultBuffer); |
75 } | 76 } |
76 | 77 |
77 RCEBuffer::~RCEBuffer() | 78 RCEBuffer::~RCEBuffer() |
78 { | 79 { |
79 if (buffer != defaultBuffer) { | 80 if (buffer != defaultBuffer) { |
80 DELETE_ARRAY(buffer); | 81 DELETE_ARRAY(buffer); |
81 } | 82 } |
82 } | 83 } |
83 | 84 |
84 UBool RCEBuffer::empty() const | 85 UBool RCEBuffer::empty() const |
(...skipping 25 matching lines...) Expand all Loading... |
110 | 111 |
111 const RCEI *RCEBuffer::get() | 112 const RCEI *RCEBuffer::get() |
112 { | 113 { |
113 if (bufferIndex > 0) { | 114 if (bufferIndex > 0) { |
114 return &buffer[--bufferIndex]; | 115 return &buffer[--bufferIndex]; |
115 } | 116 } |
116 | 117 |
117 return NULL; | 118 return NULL; |
118 } | 119 } |
119 | 120 |
120 struct PCEI | |
121 { | |
122 uint64_t ce; | |
123 int32_t low; | |
124 int32_t high; | |
125 }; | |
126 | |
127 struct PCEBuffer | |
128 { | |
129 PCEI defaultBuffer[DEFAULT_BUFFER_SIZE]; | |
130 PCEI *buffer; | |
131 int32_t bufferIndex; | |
132 int32_t bufferSize; | |
133 | |
134 PCEBuffer(); | |
135 ~PCEBuffer(); | |
136 | |
137 void reset(); | |
138 UBool empty() const; | |
139 void put(uint64_t ce, int32_t ixLow, int32_t ixHigh); | |
140 const PCEI *get(); | |
141 }; | |
142 | |
143 PCEBuffer::PCEBuffer() | 121 PCEBuffer::PCEBuffer() |
144 { | 122 { |
145 buffer = defaultBuffer; | 123 buffer = defaultBuffer; |
146 bufferIndex = 0; | 124 bufferIndex = 0; |
147 bufferSize = DEFAULT_BUFFER_SIZE; | 125 bufferSize = UPRV_LENGTHOF(defaultBuffer); |
148 } | 126 } |
149 | 127 |
150 PCEBuffer::~PCEBuffer() | 128 PCEBuffer::~PCEBuffer() |
151 { | 129 { |
152 if (buffer != defaultBuffer) { | 130 if (buffer != defaultBuffer) { |
153 DELETE_ARRAY(buffer); | 131 DELETE_ARRAY(buffer); |
154 } | 132 } |
155 } | 133 } |
156 | 134 |
157 void PCEBuffer::reset() | 135 void PCEBuffer::reset() |
(...skipping 30 matching lines...) Expand all Loading... |
188 | 166 |
189 const PCEI *PCEBuffer::get() | 167 const PCEI *PCEBuffer::get() |
190 { | 168 { |
191 if (bufferIndex > 0) { | 169 if (bufferIndex > 0) { |
192 return &buffer[--bufferIndex]; | 170 return &buffer[--bufferIndex]; |
193 } | 171 } |
194 | 172 |
195 return NULL; | 173 return NULL; |
196 } | 174 } |
197 | 175 |
198 /* | 176 UCollationPCE::UCollationPCE(UCollationElements *elems) { init(elems); } |
199 * This inherits from UObject so that | |
200 * it can be allocated by new and the | |
201 * constructor for PCEBuffer is called. | |
202 */ | |
203 struct UCollationPCE : public UObject | |
204 { | |
205 PCEBuffer pceBuffer; | |
206 UCollationStrength strength; | |
207 UBool toShift; | |
208 UBool isShifted; | |
209 uint32_t variableTop; | |
210 | 177 |
211 UCollationPCE(UCollationElements *elems); | 178 UCollationPCE::UCollationPCE(CollationElementIterator *iter) { init(iter); } |
212 ~UCollationPCE(); | |
213 | 179 |
214 void init(const UCollator *coll); | 180 void UCollationPCE::init(UCollationElements *elems) { |
215 | 181 init(CollationElementIterator::fromUCollationElements(elems)); |
216 virtual UClassID getDynamicClassID() const; | |
217 static UClassID getStaticClassID(); | |
218 }; | |
219 | |
220 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UCollationPCE) | |
221 | |
222 UCollationPCE::UCollationPCE(UCollationElements *elems) | |
223 { | |
224 init(elems->iteratordata_.coll); | |
225 } | 182 } |
226 | 183 |
227 void UCollationPCE::init(const UCollator *coll) | 184 void UCollationPCE::init(CollationElementIterator *iter) |
| 185 { |
| 186 cei = iter; |
| 187 init(*iter->rbc_); |
| 188 } |
| 189 |
| 190 void UCollationPCE::init(const Collator &coll) |
228 { | 191 { |
229 UErrorCode status = U_ZERO_ERROR; | 192 UErrorCode status = U_ZERO_ERROR; |
230 | 193 |
231 strength = ucol_getStrength(coll); | 194 strength = coll.getAttribute(UCOL_STRENGTH, status); |
232 toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == U
COL_SHIFTED; | 195 toShift = coll.getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHI
FTED; |
233 isShifted = FALSE; | 196 isShifted = FALSE; |
234 variableTop = coll->variableTopValue << 16; | 197 variableTop = coll.getVariableTop(status); |
235 } | 198 } |
236 | 199 |
237 UCollationPCE::~UCollationPCE() | 200 UCollationPCE::~UCollationPCE() |
238 { | 201 { |
239 // nothing to do | 202 // nothing to do |
240 } | 203 } |
241 | 204 |
242 | 205 uint64_t UCollationPCE::processCE(uint32_t ce) |
243 U_NAMESPACE_END | |
244 | |
245 | |
246 inline uint64_t processCE(UCollationElements *elems, uint32_t ce) | |
247 { | 206 { |
248 uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0; | 207 uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0; |
249 | 208 |
250 // This is clean, but somewhat slow... | 209 // This is clean, but somewhat slow... |
251 // We could apply the mask to ce and then | 210 // We could apply the mask to ce and then |
252 // just get all three orders... | 211 // just get all three orders... |
253 switch(elems->pce->strength) { | 212 switch(strength) { |
254 default: | 213 default: |
255 tertiary = ucol_tertiaryOrder(ce); | 214 tertiary = ucol_tertiaryOrder(ce); |
256 /* note fall-through */ | 215 /* note fall-through */ |
257 | 216 |
258 case UCOL_SECONDARY: | 217 case UCOL_SECONDARY: |
259 secondary = ucol_secondaryOrder(ce); | 218 secondary = ucol_secondaryOrder(ce); |
260 /* note fall-through */ | 219 /* note fall-through */ |
261 | 220 |
262 case UCOL_PRIMARY: | 221 case UCOL_PRIMARY: |
263 primary = ucol_primaryOrder(ce); | 222 primary = ucol_primaryOrder(ce); |
264 } | 223 } |
265 | 224 |
266 // **** This should probably handle continuations too. **** | 225 // **** This should probably handle continuations too. **** |
267 // **** That means that we need 24 bits for the primary **** | 226 // **** That means that we need 24 bits for the primary **** |
268 // **** instead of the 16 that we're currently using. **** | 227 // **** instead of the 16 that we're currently using. **** |
269 // **** So we can lay out the 64 bits as: 24.12.12.16. **** | 228 // **** So we can lay out the 64 bits as: 24.12.12.16. **** |
270 // **** Another complication with continuations is that **** | 229 // **** Another complication with continuations is that **** |
271 // **** the *second* CE is marked as a continuation, so **** | 230 // **** the *second* CE is marked as a continuation, so **** |
272 // **** we always have to peek ahead to know how long **** | 231 // **** we always have to peek ahead to know how long **** |
273 // **** the primary is... **** | 232 // **** the primary is... **** |
274 if ((elems->pce->toShift && elems->pce->variableTop > ce && primary != 0) | 233 if ((toShift && variableTop > ce && primary != 0) |
275 || (elems->pce->isShifted && primary == 0)) { | 234 || (isShifted && primary == 0)) { |
276 | 235 |
277 if (primary == 0) { | 236 if (primary == 0) { |
278 return UCOL_IGNORABLE; | 237 return UCOL_IGNORABLE; |
279 } | 238 } |
280 | 239 |
281 if (elems->pce->strength >= UCOL_QUATERNARY) { | 240 if (strength >= UCOL_QUATERNARY) { |
282 quaternary = primary; | 241 quaternary = primary; |
283 } | 242 } |
284 | 243 |
285 primary = secondary = tertiary = 0; | 244 primary = secondary = tertiary = 0; |
286 elems->pce->isShifted = TRUE; | 245 isShifted = TRUE; |
287 } else { | 246 } else { |
288 if (elems->pce->strength >= UCOL_QUATERNARY) { | 247 if (strength >= UCOL_QUATERNARY) { |
289 quaternary = 0xFFFF; | 248 quaternary = 0xFFFF; |
290 } | 249 } |
291 | 250 |
292 elems->pce->isShifted = FALSE; | 251 isShifted = FALSE; |
293 } | 252 } |
294 | 253 |
295 return primary << 48 | secondary << 32 | tertiary << 16 | quaternary; | 254 return primary << 48 | secondary << 32 | tertiary << 16 | quaternary; |
296 } | 255 } |
297 | 256 |
298 U_CAPI void U_EXPORT2 | 257 U_NAMESPACE_END |
299 uprv_init_pce(const UCollationElements *elems) | |
300 { | |
301 if (elems->pce != NULL) { | |
302 elems->pce->init(elems->iteratordata_.coll); | |
303 } | |
304 } | |
305 | |
306 | |
307 | 258 |
308 /* public methods ---------------------------------------------------- */ | 259 /* public methods ---------------------------------------------------- */ |
309 | 260 |
310 U_CAPI UCollationElements* U_EXPORT2 | 261 U_CAPI UCollationElements* U_EXPORT2 |
311 ucol_openElements(const UCollator *coll, | 262 ucol_openElements(const UCollator *coll, |
312 const UChar *text, | 263 const UChar *text, |
313 int32_t textLength, | 264 int32_t textLength, |
314 UErrorCode *status) | 265 UErrorCode *status) |
315 { | 266 { |
316 if (U_FAILURE(*status)) { | 267 if (U_FAILURE(*status)) { |
317 return NULL; | 268 return NULL; |
318 } | 269 } |
| 270 if (coll == NULL || (text == NULL && textLength != 0)) { |
| 271 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 272 return NULL; |
| 273 } |
| 274 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); |
| 275 if (rbc == NULL) { |
| 276 *status = U_UNSUPPORTED_ERROR; // coll is a Collator but not a RuleBase
dCollator |
| 277 return NULL; |
| 278 } |
319 | 279 |
320 UCollationElements *result = new UCollationElements; | 280 UnicodeString s((UBool)(textLength < 0), text, textLength); |
321 if (result == NULL) { | 281 CollationElementIterator *cei = rbc->createCollationElementIterator(s); |
| 282 if (cei == NULL) { |
322 *status = U_MEMORY_ALLOCATION_ERROR; | 283 *status = U_MEMORY_ALLOCATION_ERROR; |
323 return NULL; | 284 return NULL; |
324 } | 285 } |
325 | 286 |
326 result->reset_ = TRUE; | 287 return cei->toUCollationElements(); |
327 result->isWritable = FALSE; | |
328 result->pce = NULL; | |
329 | |
330 if (text == NULL) { | |
331 textLength = 0; | |
332 } | |
333 uprv_init_collIterate(coll, text, textLength, &result->iteratordata_, status
); | |
334 | |
335 return result; | |
336 } | 288 } |
337 | 289 |
338 | 290 |
339 U_CAPI void U_EXPORT2 | 291 U_CAPI void U_EXPORT2 |
340 ucol_closeElements(UCollationElements *elems) | 292 ucol_closeElements(UCollationElements *elems) |
341 { | 293 { |
342 » if (elems != NULL) { | 294 delete CollationElementIterator::fromUCollationElements(elems); |
343 » collIterate *ci = &elems->iteratordata_; | |
344 | |
345 » if (ci->extendCEs) { | |
346 » » uprv_free(ci->extendCEs); | |
347 » } | |
348 | |
349 » if (ci->offsetBuffer) { | |
350 » » uprv_free(ci->offsetBuffer); | |
351 » } | |
352 | |
353 » if (elems->isWritable && elems->iteratordata_.string != NULL) | |
354 » { | |
355 » » uprv_free((UChar *)elems->iteratordata_.string); | |
356 » } | |
357 | |
358 » if (elems->pce != NULL) { | |
359 » » delete elems->pce; | |
360 » } | |
361 | |
362 » delete elems; | |
363 » } | |
364 } | 295 } |
365 | 296 |
366 U_CAPI void U_EXPORT2 | 297 U_CAPI void U_EXPORT2 |
367 ucol_reset(UCollationElements *elems) | 298 ucol_reset(UCollationElements *elems) |
368 { | 299 { |
369 collIterate *ci = &(elems->iteratordata_); | 300 CollationElementIterator::fromUCollationElements(elems)->reset(); |
370 elems->reset_ = TRUE; | |
371 ci->pos = ci->string; | |
372 if ((ci->flags & UCOL_ITER_HASLEN) == 0 || ci->endp == NULL) { | |
373 ci->endp = ci->string + u_strlen(ci->string); | |
374 } | |
375 ci->CEpos = ci->toReturn = ci->CEs; | |
376 ci->flags = (ci->flags & UCOL_FORCE_HAN_IMPLICIT) | UCOL_ITER_HASLEN; | |
377 if (ci->coll->normalizationMode == UCOL_ON) { | |
378 ci->flags |= UCOL_ITER_NORM; | |
379 } | |
380 | |
381 ci->writableBuffer.remove(); | |
382 ci->fcdPosition = NULL; | |
383 | |
384 //ci->offsetReturn = ci->offsetStore = NULL; | |
385 » ci->offsetRepeatCount = ci->offsetRepeatValue = 0; | |
386 } | |
387 | |
388 U_CAPI void U_EXPORT2 | |
389 ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status) | |
390 { | |
391 if (U_FAILURE(*status)) { | |
392 return; | |
393 } | |
394 | |
395 if (elems == NULL) { | |
396 *status = U_ILLEGAL_ARGUMENT_ERROR; | |
397 return; | |
398 } | |
399 | |
400 elems->iteratordata_.flags |= UCOL_FORCE_HAN_IMPLICIT; | |
401 } | 301 } |
402 | 302 |
403 U_CAPI int32_t U_EXPORT2 | 303 U_CAPI int32_t U_EXPORT2 |
404 ucol_next(UCollationElements *elems, | 304 ucol_next(UCollationElements *elems, |
405 UErrorCode *status) | 305 UErrorCode *status) |
406 { | 306 { |
407 int32_t result; | |
408 if (U_FAILURE(*status)) { | 307 if (U_FAILURE(*status)) { |
409 return UCOL_NULLORDER; | 308 return UCOL_NULLORDER; |
410 } | 309 } |
411 | 310 |
412 elems->reset_ = FALSE; | 311 return CollationElementIterator::fromUCollationElements(elems)->next(*status
); |
413 | |
414 result = (int32_t)ucol_getNextCE(elems->iteratordata_.coll, | |
415 &elems->iteratordata_, | |
416 status); | |
417 | |
418 if (result == UCOL_NO_MORE_CES) { | |
419 result = UCOL_NULLORDER; | |
420 } | |
421 return result; | |
422 } | 312 } |
423 | 313 |
424 U_CAPI int64_t U_EXPORT2 | 314 U_NAMESPACE_BEGIN |
425 ucol_nextProcessed(UCollationElements *elems, | 315 |
| 316 int64_t |
| 317 UCollationPCE::nextProcessed( |
426 int32_t *ixLow, | 318 int32_t *ixLow, |
427 int32_t *ixHigh, | 319 int32_t *ixHigh, |
428 UErrorCode *status) | 320 UErrorCode *status) |
429 { | 321 { |
430 const UCollator *coll = elems->iteratordata_.coll; | |
431 int64_t result = UCOL_IGNORABLE; | 322 int64_t result = UCOL_IGNORABLE; |
432 uint32_t low = 0, high = 0; | 323 uint32_t low = 0, high = 0; |
433 | 324 |
434 if (U_FAILURE(*status)) { | 325 if (U_FAILURE(*status)) { |
435 return UCOL_PROCESSED_NULLORDER; | 326 return UCOL_PROCESSED_NULLORDER; |
436 } | 327 } |
437 | 328 |
438 if (elems->pce == NULL) { | 329 pceBuffer.reset(); |
439 elems->pce = new UCollationPCE(elems); | |
440 } else { | |
441 elems->pce->pceBuffer.reset(); | |
442 } | |
443 | |
444 elems->reset_ = FALSE; | |
445 | 330 |
446 do { | 331 do { |
447 low = ucol_getOffset(elems); | 332 low = cei->getOffset(); |
448 uint32_t ce = (uint32_t) ucol_getNextCE(coll, &elems->iteratordata_, sta
tus); | 333 int32_t ce = cei->next(*status); |
449 high = ucol_getOffset(elems); | 334 high = cei->getOffset(); |
450 | 335 |
451 if (ce == UCOL_NO_MORE_CES) { | 336 if (ce == UCOL_NULLORDER) { |
452 result = UCOL_PROCESSED_NULLORDER; | 337 result = UCOL_PROCESSED_NULLORDER; |
453 break; | 338 break; |
454 } | 339 } |
455 | 340 |
456 result = processCE(elems, ce); | 341 result = processCE((uint32_t)ce); |
457 } while (result == UCOL_IGNORABLE); | 342 } while (result == UCOL_IGNORABLE); |
458 | 343 |
459 if (ixLow != NULL) { | 344 if (ixLow != NULL) { |
460 *ixLow = low; | 345 *ixLow = low; |
461 } | 346 } |
462 | 347 |
463 if (ixHigh != NULL) { | 348 if (ixHigh != NULL) { |
464 *ixHigh = high; | 349 *ixHigh = high; |
465 } | 350 } |
466 | 351 |
467 return result; | 352 return result; |
468 } | 353 } |
469 | 354 |
| 355 U_NAMESPACE_END |
| 356 |
470 U_CAPI int32_t U_EXPORT2 | 357 U_CAPI int32_t U_EXPORT2 |
471 ucol_previous(UCollationElements *elems, | 358 ucol_previous(UCollationElements *elems, |
472 UErrorCode *status) | 359 UErrorCode *status) |
473 { | 360 { |
474 if(U_FAILURE(*status)) { | 361 if(U_FAILURE(*status)) { |
475 return UCOL_NULLORDER; | 362 return UCOL_NULLORDER; |
476 } | 363 } |
477 else | 364 return CollationElementIterator::fromUCollationElements(elems)->previous(*st
atus); |
478 { | |
479 int32_t result; | |
480 | |
481 if (elems->reset_ && (elems->iteratordata_.pos == elems->iteratordata_.s
tring)) { | |
482 if (elems->iteratordata_.endp == NULL) { | |
483 elems->iteratordata_.endp = elems->iteratordata_.string + | |
484 u_strlen(elems->iteratordata_.string
); | |
485 elems->iteratordata_.flags |= UCOL_ITER_HASLEN; | |
486 } | |
487 elems->iteratordata_.pos = elems->iteratordata_.endp; | |
488 elems->iteratordata_.fcdPosition = elems->iteratordata_.endp; | |
489 } | |
490 | |
491 elems->reset_ = FALSE; | |
492 | |
493 result = (int32_t)ucol_getPrevCE(elems->iteratordata_.coll, | |
494 &(elems->iteratordata_), | |
495 status); | |
496 | |
497 if (result == UCOL_NO_MORE_CES) { | |
498 result = UCOL_NULLORDER; | |
499 } | |
500 | |
501 return result; | |
502 } | |
503 } | 365 } |
504 | 366 |
505 U_CAPI int64_t U_EXPORT2 | 367 U_NAMESPACE_BEGIN |
506 ucol_previousProcessed(UCollationElements *elems, | 368 |
| 369 int64_t |
| 370 UCollationPCE::previousProcessed( |
507 int32_t *ixLow, | 371 int32_t *ixLow, |
508 int32_t *ixHigh, | 372 int32_t *ixHigh, |
509 UErrorCode *status) | 373 UErrorCode *status) |
510 { | 374 { |
511 const UCollator *coll = elems->iteratordata_.coll; | |
512 int64_t result = UCOL_IGNORABLE; | 375 int64_t result = UCOL_IGNORABLE; |
513 // int64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0; | |
514 // UCollationStrength strength = ucol_getStrength(coll); | |
515 // UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, status)
== UCOL_SHIFTED; | |
516 // uint32_t variableTop = coll->variableTopValue; | |
517 int32_t low = 0, high = 0; | 376 int32_t low = 0, high = 0; |
518 | 377 |
519 if (U_FAILURE(*status)) { | 378 if (U_FAILURE(*status)) { |
520 return UCOL_PROCESSED_NULLORDER; | 379 return UCOL_PROCESSED_NULLORDER; |
521 } | 380 } |
522 | 381 |
523 if (elems->reset_ && | 382 // pceBuffer.reset(); |
524 (elems->iteratordata_.pos == elems->iteratordata_.string)) { | |
525 if (elems->iteratordata_.endp == NULL) { | |
526 elems->iteratordata_.endp = elems->iteratordata_.string + | |
527 u_strlen(elems->iteratordata_.string); | |
528 elems->iteratordata_.flags |= UCOL_ITER_HASLEN; | |
529 } | |
530 | 383 |
531 elems->iteratordata_.pos = elems->iteratordata_.endp; | 384 while (pceBuffer.empty()) { |
532 elems->iteratordata_.fcdPosition = elems->iteratordata_.endp; | |
533 } | |
534 | |
535 if (elems->pce == NULL) { | |
536 elems->pce = new UCollationPCE(elems); | |
537 } else { | |
538 //elems->pce->pceBuffer.reset(); | |
539 } | |
540 | |
541 elems->reset_ = FALSE; | |
542 | |
543 while (elems->pce->pceBuffer.empty()) { | |
544 // buffer raw CEs up to non-ignorable primary | 385 // buffer raw CEs up to non-ignorable primary |
545 RCEBuffer rceb; | 386 RCEBuffer rceb; |
546 uint32_t ce; | 387 int32_t ce; |
547 | 388 |
548 // **** do we need to reset rceb, or will it always be empty at this poi
nt **** | 389 // **** do we need to reset rceb, or will it always be empty at this poi
nt **** |
549 do { | 390 do { |
550 high = ucol_getOffset(elems); | 391 high = cei->getOffset(); |
551 ce = ucol_getPrevCE(coll, &elems->iteratordata_, status); | 392 ce = cei->previous(*status); |
552 low = ucol_getOffset(elems); | 393 low = cei->getOffset(); |
553 | 394 |
554 if (ce == UCOL_NO_MORE_CES) { | 395 if (ce == UCOL_NULLORDER) { |
555 if (! rceb.empty()) { | 396 if (! rceb.empty()) { |
556 break; | 397 break; |
557 } | 398 } |
558 | 399 |
559 goto finish; | 400 goto finish; |
560 } | 401 } |
561 | 402 |
562 rceb.put(ce, low, high); | 403 rceb.put((uint32_t)ce, low, high); |
563 } while ((ce & UCOL_PRIMARYMASK) == 0); | 404 } while ((ce & UCOL_PRIMARYORDERMASK) == 0 || isContinuation(ce)); |
564 | 405 |
565 // process the raw CEs | 406 // process the raw CEs |
566 while (! rceb.empty()) { | 407 while (! rceb.empty()) { |
567 const RCEI *rcei = rceb.get(); | 408 const RCEI *rcei = rceb.get(); |
568 | 409 |
569 result = processCE(elems, rcei->ce); | 410 result = processCE(rcei->ce); |
570 | 411 |
571 if (result != UCOL_IGNORABLE) { | 412 if (result != UCOL_IGNORABLE) { |
572 elems->pce->pceBuffer.put(result, rcei->low, rcei->high); | 413 pceBuffer.put(result, rcei->low, rcei->high); |
573 } | 414 } |
574 } | 415 } |
575 } | 416 } |
576 | 417 |
577 finish: | 418 finish: |
578 if (elems->pce->pceBuffer.empty()) { | 419 if (pceBuffer.empty()) { |
579 // **** Is -1 the right value for ixLow, ixHigh? **** | 420 // **** Is -1 the right value for ixLow, ixHigh? **** |
580 if (ixLow != NULL) { | 421 if (ixLow != NULL) { |
581 *ixLow = -1; | 422 *ixLow = -1; |
582 } | 423 } |
583 | 424 |
584 if (ixHigh != NULL) { | 425 if (ixHigh != NULL) { |
585 *ixHigh = -1 | 426 *ixHigh = -1 |
586 ; | 427 ; |
587 } | 428 } |
588 return UCOL_PROCESSED_NULLORDER; | 429 return UCOL_PROCESSED_NULLORDER; |
589 } | 430 } |
590 | 431 |
591 const PCEI *pcei = elems->pce->pceBuffer.get(); | 432 const PCEI *pcei = pceBuffer.get(); |
592 | 433 |
593 if (ixLow != NULL) { | 434 if (ixLow != NULL) { |
594 *ixLow = pcei->low; | 435 *ixLow = pcei->low; |
595 } | 436 } |
596 | 437 |
597 if (ixHigh != NULL) { | 438 if (ixHigh != NULL) { |
598 *ixHigh = pcei->high; | 439 *ixHigh = pcei->high; |
599 } | 440 } |
600 | 441 |
601 return pcei->ce; | 442 return pcei->ce; |
602 } | 443 } |
603 | 444 |
| 445 U_NAMESPACE_END |
| 446 |
604 U_CAPI int32_t U_EXPORT2 | 447 U_CAPI int32_t U_EXPORT2 |
605 ucol_getMaxExpansion(const UCollationElements *elems, | 448 ucol_getMaxExpansion(const UCollationElements *elems, |
606 int32_t order) | 449 int32_t order) |
607 { | 450 { |
608 uint8_t result; | 451 return CollationElementIterator::fromUCollationElements(elems)->getMaxExpans
ion(order); |
609 | 452 |
610 #if 0 | 453 // TODO: The old code masked the order according to strength and then did a
binary search. |
611 UCOL_GETMAXEXPANSION(elems->iteratordata_.coll, (uint32_t)order, result); | 454 // However this was probably at least partially broken because of the follow
ing comment. |
612 #else | 455 // Still, it might have found a match when this version may not. |
613 const UCollator *coll = elems->iteratordata_.coll; | |
614 const uint32_t *start; | |
615 const uint32_t *limit; | |
616 const uint32_t *mid; | |
617 uint32_t strengthMask = 0; | |
618 uint32_t mOrder = (uint32_t) order; | |
619 | |
620 switch (coll->strength) | |
621 { | |
622 default: | |
623 strengthMask |= UCOL_TERTIARYORDERMASK; | |
624 /* fall through */ | |
625 | |
626 case UCOL_SECONDARY: | |
627 strengthMask |= UCOL_SECONDARYORDERMASK; | |
628 /* fall through */ | |
629 | |
630 case UCOL_PRIMARY: | |
631 strengthMask |= UCOL_PRIMARYORDERMASK; | |
632 } | |
633 | |
634 mOrder &= strengthMask; | |
635 start = (coll)->endExpansionCE; | |
636 limit = (coll)->lastEndExpansionCE; | |
637 | |
638 while (start < limit - 1) { | |
639 mid = start + ((limit - start) >> 1); | |
640 if (mOrder <= (*mid & strengthMask)) { | |
641 limit = mid; | |
642 } else { | |
643 start = mid; | |
644 } | |
645 } | |
646 | 456 |
647 // FIXME: with a masked search, there might be more than one hit, | 457 // FIXME: with a masked search, there might be more than one hit, |
648 // so we need to look forward and backward from the match to find all | 458 // so we need to look forward and backward from the match to find all |
649 // of the hits... | 459 // of the hits... |
650 if ((*start & strengthMask) == mOrder) { | 460 } |
651 result = *((coll)->expansionCESize + (start - (coll)->endExpansionCE)); | |
652 } else if ((*limit & strengthMask) == mOrder) { | |
653 result = *(coll->expansionCESize + (limit - coll->endExpansionCE)); | |
654 } else if ((mOrder & 0xFFFF) == 0x00C0) { | |
655 result = 2; | |
656 } else { | |
657 result = 1; | |
658 } | |
659 #endif | |
660 | 461 |
661 return result; | |
662 } | |
663 | |
664 U_CAPI void U_EXPORT2 | 462 U_CAPI void U_EXPORT2 |
665 ucol_setText( UCollationElements *elems, | 463 ucol_setText( UCollationElements *elems, |
666 const UChar *text, | 464 const UChar *text, |
667 int32_t textLength, | 465 int32_t textLength, |
668 UErrorCode *status) | 466 UErrorCode *status) |
669 { | 467 { |
670 if (U_FAILURE(*status)) { | 468 if (U_FAILURE(*status)) { |
671 return; | 469 return; |
672 } | 470 } |
673 | 471 |
674 if (elems->isWritable && elems->iteratordata_.string != NULL) | 472 if ((text == NULL && textLength != 0)) { |
675 { | 473 *status = U_ILLEGAL_ARGUMENT_ERROR; |
676 uprv_free((UChar *)elems->iteratordata_.string); | 474 return; |
677 } | 475 } |
678 | 476 UnicodeString s((UBool)(textLength < 0), text, textLength); |
679 if (text == NULL) { | 477 return CollationElementIterator::fromUCollationElements(elems)->setText(s, *
status); |
680 textLength = 0; | |
681 } | |
682 | |
683 elems->isWritable = FALSE; | |
684 | |
685 /* free offset buffer to avoid memory leak before initializing. */ | |
686 ucol_freeOffsetBuffer(&(elems->iteratordata_)); | |
687 /* Ensure that previously allocated extendCEs is freed before setting to NUL
L. */ | |
688 if (elems->iteratordata_.extendCEs != NULL) { | |
689 uprv_free(elems->iteratordata_.extendCEs); | |
690 } | |
691 uprv_init_collIterate(elems->iteratordata_.coll, text, textLength, | |
692 &elems->iteratordata_, status); | |
693 | |
694 elems->reset_ = TRUE; | |
695 } | 478 } |
696 | 479 |
697 U_CAPI int32_t U_EXPORT2 | 480 U_CAPI int32_t U_EXPORT2 |
698 ucol_getOffset(const UCollationElements *elems) | 481 ucol_getOffset(const UCollationElements *elems) |
699 { | 482 { |
700 const collIterate *ci = &(elems->iteratordata_); | 483 return CollationElementIterator::fromUCollationElements(elems)->getOffset(); |
701 | |
702 if (ci->offsetRepeatCount > 0 && ci->offsetRepeatValue != 0) { | |
703 return ci->offsetRepeatValue; | |
704 } | |
705 | |
706 if (ci->offsetReturn != NULL) { | |
707 return *ci->offsetReturn; | |
708 } | |
709 | |
710 // while processing characters in normalization buffer getOffset will | |
711 // return the next non-normalized character. | |
712 // should be inline with the old implementation since the old codes uses | |
713 // nextDecomp in normalizer which also decomposes the string till the | |
714 // first base character is found. | |
715 if (ci->flags & UCOL_ITER_INNORMBUF) { | |
716 if (ci->fcdPosition == NULL) { | |
717 return 0; | |
718 } | |
719 return (int32_t)(ci->fcdPosition - ci->string); | |
720 } | |
721 else { | |
722 return (int32_t)(ci->pos - ci->string); | |
723 } | |
724 } | 484 } |
725 | 485 |
726 U_CAPI void U_EXPORT2 | 486 U_CAPI void U_EXPORT2 |
727 ucol_setOffset(UCollationElements *elems, | 487 ucol_setOffset(UCollationElements *elems, |
728 int32_t offset, | 488 int32_t offset, |
729 UErrorCode *status) | 489 UErrorCode *status) |
730 { | 490 { |
731 if (U_FAILURE(*status)) { | 491 if (U_FAILURE(*status)) { |
732 return; | 492 return; |
733 } | 493 } |
734 | 494 |
735 // this methods will clean up any use of the writable buffer and points to | 495 CollationElementIterator::fromUCollationElements(elems)->setOffset(offset, *
status); |
736 // the original string | |
737 collIterate *ci = &(elems->iteratordata_); | |
738 ci->pos = ci->string + offset; | |
739 ci->CEpos = ci->toReturn = ci->CEs; | |
740 if (ci->flags & UCOL_ITER_INNORMBUF) { | |
741 ci->flags = ci->origFlags; | |
742 } | |
743 if ((ci->flags & UCOL_ITER_HASLEN) == 0) { | |
744 ci->endp = ci->string + u_strlen(ci->string); | |
745 ci->flags |= UCOL_ITER_HASLEN; | |
746 } | |
747 ci->fcdPosition = NULL; | |
748 elems->reset_ = FALSE; | |
749 | |
750 » ci->offsetReturn = NULL; | |
751 ci->offsetStore = ci->offsetBuffer; | |
752 » ci->offsetRepeatCount = ci->offsetRepeatValue = 0; | |
753 } | 496 } |
754 | 497 |
755 U_CAPI int32_t U_EXPORT2 | 498 U_CAPI int32_t U_EXPORT2 |
756 ucol_primaryOrder (int32_t order) | 499 ucol_primaryOrder (int32_t order) |
757 { | 500 { |
758 order &= UCOL_PRIMARYMASK; | 501 return (order >> 16) & 0xffff; |
759 return (order >> UCOL_PRIMARYORDERSHIFT); | |
760 } | 502 } |
761 | 503 |
762 U_CAPI int32_t U_EXPORT2 | 504 U_CAPI int32_t U_EXPORT2 |
763 ucol_secondaryOrder (int32_t order) | 505 ucol_secondaryOrder (int32_t order) |
764 { | 506 { |
765 order &= UCOL_SECONDARYMASK; | 507 return (order >> 8) & 0xff; |
766 return (order >> UCOL_SECONDARYORDERSHIFT); | |
767 } | 508 } |
768 | 509 |
769 U_CAPI int32_t U_EXPORT2 | 510 U_CAPI int32_t U_EXPORT2 |
770 ucol_tertiaryOrder (int32_t order) | 511 ucol_tertiaryOrder (int32_t order) |
771 { | 512 { |
772 return (order & UCOL_TERTIARYMASK); | 513 return order & 0xff; |
773 } | |
774 | |
775 | |
776 void ucol_freeOffsetBuffer(collIterate *s) { | |
777 if (s != NULL && s->offsetBuffer != NULL) { | |
778 uprv_free(s->offsetBuffer); | |
779 s->offsetBuffer = NULL; | |
780 s->offsetBufferSize = 0; | |
781 } | |
782 } | 514 } |
783 | 515 |
784 #endif /* #if !UCONFIG_NO_COLLATION */ | 516 #endif /* #if !UCONFIG_NO_COLLATION */ |
OLD | NEW |