OLD | NEW |
1 /* | 1 /* |
2 ********************************************************************** | 2 ********************************************************************** |
3 * Copyright (C) 2001-2011 IBM and others. All rights reserved. | 3 * Copyright (C) 2001-2014 IBM and others. All rights reserved. |
4 ********************************************************************** | 4 ********************************************************************** |
5 * Date Name Description | 5 * Date Name Description |
6 * 07/02/2001 synwee Creation. | 6 * 07/02/2001 synwee Creation. |
7 ********************************************************************** | 7 ********************************************************************** |
8 */ | 8 */ |
9 | 9 |
10 #include "unicode/utypes.h" | 10 #include "unicode/utypes.h" |
11 | 11 |
12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION | 12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION |
13 | 13 |
14 #include "unicode/usearch.h" | 14 #include "unicode/usearch.h" |
15 #include "unicode/ustring.h" | 15 #include "unicode/ustring.h" |
16 #include "unicode/uchar.h" | 16 #include "unicode/uchar.h" |
17 #include "unicode/utf16.h" | 17 #include "unicode/utf16.h" |
18 #include "normalizer2impl.h" | 18 #include "normalizer2impl.h" |
19 #include "ucol_imp.h" | |
20 #include "usrchimp.h" | 19 #include "usrchimp.h" |
21 #include "cmemory.h" | 20 #include "cmemory.h" |
22 #include "ucln_in.h" | 21 #include "ucln_in.h" |
23 #include "uassert.h" | 22 #include "uassert.h" |
24 #include "ustr_imp.h" | 23 #include "ustr_imp.h" |
25 | 24 |
26 U_NAMESPACE_USE | 25 U_NAMESPACE_USE |
27 | 26 |
28 // don't use Boyer-Moore | 27 // don't use Boyer-Moore |
29 // (and if we decide to turn this on again there are several new TODOs that will
need to be addressed) | 28 // (and if we decide to turn this on again there are several new TODOs that will
need to be addressed) |
30 #define BOYER_MOORE 0 | 29 #define BOYER_MOORE 0 |
31 | 30 |
32 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
33 | |
34 // internal definition --------------------------------------------------- | 31 // internal definition --------------------------------------------------- |
35 | 32 |
36 #define LAST_BYTE_MASK_ 0xFF | 33 #define LAST_BYTE_MASK_ 0xFF |
37 #define SECOND_LAST_BYTE_SHIFT_ 8 | 34 #define SECOND_LAST_BYTE_SHIFT_ 8 |
38 #define SUPPLEMENTARY_MIN_VALUE_ 0x10000 | 35 #define SUPPLEMENTARY_MIN_VALUE_ 0x10000 |
39 | 36 |
40 static const Normalizer2Impl *g_nfcImpl = NULL; | 37 static const Normalizer2Impl *g_nfcImpl = NULL; |
41 | 38 |
42 // internal methods ------------------------------------------------- | 39 // internal methods ------------------------------------------------- |
43 | 40 |
44 /** | 41 /** |
45 * Fast collation element iterator setOffset. | 42 * Fast collation element iterator setOffset. |
46 * This function does not check for bounds. | 43 * This function does not check for bounds. |
47 * @param coleiter collation element iterator | 44 * @param coleiter collation element iterator |
48 * @param offset to set | 45 * @param offset to set |
49 */ | 46 */ |
50 static | 47 static |
51 inline void setColEIterOffset(UCollationElements *elems, | 48 inline void setColEIterOffset(UCollationElements *elems, |
52 int32_t offset) | 49 int32_t offset) |
53 { | 50 { |
54 collIterate *ci = &(elems->iteratordata_); | 51 // Note: Not "fast" any more after the 2013 collation rewrite. |
55 ci->pos = ci->string + offset; | 52 // We do not want to expose more internals than necessary. |
56 ci->CEpos = ci->toReturn = ci->extendCEs ? ci->extendCEs : ci->CEs; | 53 UErrorCode status = U_ZERO_ERROR; |
57 if (ci->flags & UCOL_ITER_INNORMBUF) { | 54 ucol_setOffset(elems, offset, &status); |
58 ci->flags = ci->origFlags; | |
59 } | |
60 ci->fcdPosition = NULL; | |
61 | |
62 ci->offsetReturn = NULL; | |
63 ci->offsetStore = ci->offsetBuffer; | |
64 ci->offsetRepeatCount = ci->offsetRepeatValue = 0; | |
65 } | 55 } |
66 | 56 |
67 /** | 57 /** |
68 * Getting the mask for collation strength | 58 * Getting the mask for collation strength |
69 * @param strength collation strength | 59 * @param strength collation strength |
70 * @return collation element mask | 60 * @return collation element mask |
71 */ | 61 */ |
72 static | 62 static |
73 inline uint32_t getMask(UCollationStrength strength) | 63 inline uint32_t getMask(UCollationStrength strength) |
74 { | 64 { |
(...skipping 216 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
291 * @param status output error if any, caller to check status before calling | 281 * @param status output error if any, caller to check status before calling |
292 * method, status assumed to be success when passed in. | 282 * method, status assumed to be success when passed in. |
293 * @return total number of expansions | 283 * @return total number of expansions |
294 */ | 284 */ |
295 static | 285 static |
296 inline uint16_t initializePatternCETable(UStringSearch *strsrch, | 286 inline uint16_t initializePatternCETable(UStringSearch *strsrch, |
297 UErrorCode *status) | 287 UErrorCode *status) |
298 { | 288 { |
299 UPattern *pattern = &(strsrch->pattern); | 289 UPattern *pattern = &(strsrch->pattern); |
300 uint32_t cetablesize = INITIAL_ARRAY_SIZE_; | 290 uint32_t cetablesize = INITIAL_ARRAY_SIZE_; |
301 int32_t *cetable = pattern->CEBuffer; | 291 int32_t *cetable = pattern->cesBuffer; |
302 uint32_t patternlength = pattern->textLength; | 292 uint32_t patternlength = pattern->textLength; |
303 UCollationElements *coleiter = strsrch->utilIter; | 293 UCollationElements *coleiter = strsrch->utilIter; |
304 | 294 |
305 if (coleiter == NULL) { | 295 if (coleiter == NULL) { |
306 coleiter = ucol_openElements(strsrch->collator, pattern->text, | 296 coleiter = ucol_openElements(strsrch->collator, pattern->text, |
307 patternlength, status); | 297 patternlength, status); |
308 // status will be checked in ucol_next(..) later and if it is an | 298 // status will be checked in ucol_next(..) later and if it is an |
309 // error UCOL_NULLORDER the result of ucol_next(..) and 0 will be | 299 // error UCOL_NULLORDER the result of ucol_next(..) and 0 will be |
310 // returned. | 300 // returned. |
311 strsrch->utilIter = coleiter; | 301 strsrch->utilIter = coleiter; |
312 } | 302 } |
313 else { | 303 else { |
314 uprv_init_collIterate(strsrch->collator, pattern->text, | 304 ucol_setText(coleiter, pattern->text, pattern->textLength, status); |
315 pattern->textLength, | |
316 &coleiter->iteratordata_, | |
317 status); | |
318 } | 305 } |
319 if(U_FAILURE(*status)) { | 306 if(U_FAILURE(*status)) { |
320 return 0; | 307 return 0; |
321 } | 308 } |
322 | 309 |
323 if (pattern->CE != cetable && pattern->CE) { | 310 if (pattern->ces != cetable && pattern->ces) { |
324 uprv_free(pattern->CE); | 311 uprv_free(pattern->ces); |
325 } | 312 } |
326 | 313 |
327 uint16_t offset = 0; | 314 uint16_t offset = 0; |
328 uint16_t result = 0; | 315 uint16_t result = 0; |
329 int32_t ce; | 316 int32_t ce; |
330 | 317 |
331 while ((ce = ucol_next(coleiter, status)) != UCOL_NULLORDER && | 318 while ((ce = ucol_next(coleiter, status)) != UCOL_NULLORDER && |
332 U_SUCCESS(*status)) { | 319 U_SUCCESS(*status)) { |
333 uint32_t newce = getCE(strsrch, ce); | 320 uint32_t newce = getCE(strsrch, ce); |
334 if (newce) { | 321 if (newce) { |
335 int32_t *temp = addTouint32_tArray(cetable, offset, &cetablesize, | 322 int32_t *temp = addTouint32_tArray(cetable, offset, &cetablesize, |
336 newce, | 323 newce, |
337 patternlength - ucol_getOffset(coleiter) + 1, | 324 patternlength - ucol_getOffset(coleiter) + 1, |
338 status); | 325 status); |
339 if (U_FAILURE(*status)) { | 326 if (U_FAILURE(*status)) { |
340 return 0; | 327 return 0; |
341 } | 328 } |
342 offset ++; | 329 offset ++; |
343 if (cetable != temp && cetable != pattern->CEBuffer) { | 330 if (cetable != temp && cetable != pattern->cesBuffer) { |
344 uprv_free(cetable); | 331 uprv_free(cetable); |
345 } | 332 } |
346 cetable = temp; | 333 cetable = temp; |
347 } | 334 } |
348 result += (uint16_t)(ucol_getMaxExpansion(coleiter, ce) - 1); | 335 result += (uint16_t)(ucol_getMaxExpansion(coleiter, ce) - 1); |
349 } | 336 } |
350 | 337 |
351 cetable[offset] = 0; | 338 cetable[offset] = 0; |
352 pattern->CE = cetable; | 339 pattern->ces = cetable; |
353 pattern->CELength = offset; | 340 pattern->cesLength = offset; |
354 | 341 |
355 return result; | 342 return result; |
356 } | 343 } |
357 | 344 |
358 /** | 345 /** |
359 * Initializing the pce table for a pattern. | 346 * Initializing the pce table for a pattern. |
360 * Stores non-ignorable collation keys. | 347 * Stores non-ignorable collation keys. |
361 * Table size will be estimated by the size of the pattern text. Table | 348 * Table size will be estimated by the size of the pattern text. Table |
362 * expansion will be perform as we go along. Adding 1 to ensure that the table | 349 * expansion will be perform as we go along. Adding 1 to ensure that the table |
363 * size definitely increases. | 350 * size definitely increases. |
364 * Internal method, status assumed to be a success. | 351 * Internal method, status assumed to be a success. |
365 * @param strsrch string search data | 352 * @param strsrch string search data |
366 * @param status output error if any, caller to check status before calling | 353 * @param status output error if any, caller to check status before calling |
367 * method, status assumed to be success when passed in. | 354 * method, status assumed to be success when passed in. |
368 * @return total number of expansions | 355 * @return total number of expansions |
369 */ | 356 */ |
370 static | 357 static |
371 inline uint16_t initializePatternPCETable(UStringSearch *strsrch, | 358 inline uint16_t initializePatternPCETable(UStringSearch *strsrch, |
372 UErrorCode *status) | 359 UErrorCode *status) |
373 { | 360 { |
374 UPattern *pattern = &(strsrch->pattern); | 361 UPattern *pattern = &(strsrch->pattern); |
375 uint32_t pcetablesize = INITIAL_ARRAY_SIZE_; | 362 uint32_t pcetablesize = INITIAL_ARRAY_SIZE_; |
376 int64_t *pcetable = pattern->PCEBuffer; | 363 int64_t *pcetable = pattern->pcesBuffer; |
377 uint32_t patternlength = pattern->textLength; | 364 uint32_t patternlength = pattern->textLength; |
378 UCollationElements *coleiter = strsrch->utilIter; | 365 UCollationElements *coleiter = strsrch->utilIter; |
379 | 366 |
380 if (coleiter == NULL) { | 367 if (coleiter == NULL) { |
381 coleiter = ucol_openElements(strsrch->collator, pattern->text, | 368 coleiter = ucol_openElements(strsrch->collator, pattern->text, |
382 patternlength, status); | 369 patternlength, status); |
383 // status will be checked in ucol_next(..) later and if it is an | 370 // status will be checked in ucol_next(..) later and if it is an |
384 // error UCOL_NULLORDER the result of ucol_next(..) and 0 will be | 371 // error UCOL_NULLORDER the result of ucol_next(..) and 0 will be |
385 // returned. | 372 // returned. |
386 strsrch->utilIter = coleiter; | 373 strsrch->utilIter = coleiter; |
387 } else { | 374 } else { |
388 uprv_init_collIterate(strsrch->collator, pattern->text, | 375 ucol_setText(coleiter, pattern->text, pattern->textLength, status); |
389 pattern->textLength, | |
390 &coleiter->iteratordata_, | |
391 status); | |
392 } | 376 } |
393 if(U_FAILURE(*status)) { | 377 if(U_FAILURE(*status)) { |
394 return 0; | 378 return 0; |
395 } | 379 } |
396 | 380 |
397 if (pattern->PCE != pcetable && pattern->PCE != NULL) { | 381 if (pattern->pces != pcetable && pattern->pces != NULL) { |
398 uprv_free(pattern->PCE); | 382 uprv_free(pattern->pces); |
399 } | 383 } |
400 | 384 |
401 uint16_t offset = 0; | 385 uint16_t offset = 0; |
402 uint16_t result = 0; | 386 uint16_t result = 0; |
403 int64_t pce; | 387 int64_t pce; |
404 | 388 |
405 uprv_init_pce(coleiter); | 389 icu::UCollationPCE iter(coleiter); |
406 | 390 |
407 // ** Should processed CEs be signed or unsigned? | 391 // ** Should processed CEs be signed or unsigned? |
408 // ** (the rest of the code in this file seems to play fast-and-loose with | 392 // ** (the rest of the code in this file seems to play fast-and-loose with |
409 // ** whether a CE is signed or unsigned. For example, look at routine abov
e this one.) | 393 // ** whether a CE is signed or unsigned. For example, look at routine abov
e this one.) |
410 while ((pce = ucol_nextProcessed(coleiter, NULL, NULL, status)) != UCOL_PROC
ESSED_NULLORDER && | 394 while ((pce = iter.nextProcessed(NULL, NULL, status)) != UCOL_PROCESSED_NULL
ORDER && |
411 U_SUCCESS(*status)) { | 395 U_SUCCESS(*status)) { |
412 int64_t *temp = addTouint64_tArray(pcetable, offset, &pcetablesize, | 396 int64_t *temp = addTouint64_tArray(pcetable, offset, &pcetablesize, |
413 pce, | 397 pce, |
414 patternlength - ucol_getOffset(coleiter) + 1, | 398 patternlength - ucol_getOffset(coleiter) + 1, |
415 status); | 399 status); |
416 | 400 |
417 if (U_FAILURE(*status)) { | 401 if (U_FAILURE(*status)) { |
418 return 0; | 402 return 0; |
419 } | 403 } |
420 | 404 |
421 offset += 1; | 405 offset += 1; |
422 | 406 |
423 if (pcetable != temp && pcetable != pattern->PCEBuffer) { | 407 if (pcetable != temp && pcetable != pattern->pcesBuffer) { |
424 uprv_free(pcetable); | 408 uprv_free(pcetable); |
425 } | 409 } |
426 | 410 |
427 pcetable = temp; | 411 pcetable = temp; |
428 //result += (uint16_t)(ucol_getMaxExpansion(coleiter, ce) - 1); | 412 //result += (uint16_t)(ucol_getMaxExpansion(coleiter, ce) - 1); |
429 } | 413 } |
430 | 414 |
431 pcetable[offset] = 0; | 415 pcetable[offset] = 0; |
432 pattern->PCE = pcetable; | 416 pattern->pces = pcetable; |
433 pattern->PCELength = offset; | 417 pattern->pcesLength = offset; |
434 | 418 |
435 return result; | 419 return result; |
436 } | 420 } |
437 | 421 |
438 /** | 422 /** |
439 * Initializes the pattern struct. | 423 * Initializes the pattern struct. |
440 * Internal method, status assumed to be success. | 424 * Internal method, status assumed to be success. |
441 * @param strsrch UStringSearch data storage | 425 * @param strsrch UStringSearch data storage |
442 * @param status output error if any, caller to check status before calling | 426 * @param status output error if any, caller to check status before calling |
443 * method, status assumed to be success when passed in. | 427 * method, status assumed to be success when passed in. |
444 * @return expansionsize the total expansion size of the pattern | 428 * @return expansionsize the total expansion size of the pattern |
445 */ | 429 */ |
446 static | 430 static |
447 inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status) | 431 inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status) |
448 { | 432 { |
| 433 if (U_FAILURE(*status)) { return 0; } |
449 UPattern *pattern = &(strsrch->pattern); | 434 UPattern *pattern = &(strsrch->pattern); |
450 const UChar *patterntext = pattern->text; | 435 const UChar *patterntext = pattern->text; |
451 int32_t length = pattern->textLength; | 436 int32_t length = pattern->textLength; |
452 int32_t index = 0; | 437 int32_t index = 0; |
453 | 438 |
454 // Since the strength is primary, accents are ignored in the pattern. | 439 // Since the strength is primary, accents are ignored in the pattern. |
455 if (strsrch->strength == UCOL_PRIMARY) { | 440 if (strsrch->strength == UCOL_PRIMARY) { |
456 pattern->hasPrefixAccents = 0; | 441 pattern->hasPrefixAccents = 0; |
457 pattern->hasSuffixAccents = 0; | 442 pattern->hasSuffixAccents = 0; |
458 } else { | 443 } else { |
459 pattern->hasPrefixAccents = getFCD(patterntext, &index, length) >> | 444 pattern->hasPrefixAccents = getFCD(patterntext, &index, length) >> |
460 SECOND_LAST_BYTE_SHIFT_
; | 445 SECOND_LAST_BYTE_SHIFT_
; |
461 index = length; | 446 index = length; |
462 U16_BACK_1(patterntext, 0, index); | 447 U16_BACK_1(patterntext, 0, index); |
463 pattern->hasSuffixAccents = getFCD(patterntext, &index, length) & | 448 pattern->hasSuffixAccents = getFCD(patterntext, &index, length) & |
464 LAST_BYTE_MASK_
; | 449 LAST_BYTE_MASK_
; |
465 } | 450 } |
466 | 451 |
467 // ** HACK ** | 452 // ** HACK ** |
468 if (strsrch->pattern.PCE != NULL) { | 453 if (strsrch->pattern.pces != NULL) { |
469 if (strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) { | 454 if (strsrch->pattern.pces != strsrch->pattern.pcesBuffer) { |
470 uprv_free(strsrch->pattern.PCE); | 455 uprv_free(strsrch->pattern.pces); |
471 } | 456 } |
472 | 457 |
473 strsrch->pattern.PCE = NULL; | 458 strsrch->pattern.pces = NULL; |
474 } | 459 } |
475 | 460 |
476 // since intializePattern is an internal method status is a success. | 461 // since intializePattern is an internal method status is a success. |
477 return initializePatternCETable(strsrch, status); | 462 return initializePatternCETable(strsrch, status); |
478 } | 463 } |
479 | 464 |
480 /** | 465 /** |
481 * Initializing shift tables, with the default values. | 466 * Initializing shift tables, with the default values. |
482 * If a corresponding default value is 0, the shift table is not set. | 467 * If a corresponding default value is 0, the shift table is not set. |
483 * @param shift table for forwards shift | 468 * @param shift table for forwards shift |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
550 * If pattern has no non-ignorable ce, we return a illegal argument error. | 535 * If pattern has no non-ignorable ce, we return a illegal argument error. |
551 * Internal method, status assumed to be success. | 536 * Internal method, status assumed to be success. |
552 * @param strsrch UStringSearch data storage | 537 * @param strsrch UStringSearch data storage |
553 * @param status for output errors if it occurs, status is assumed to be a | 538 * @param status for output errors if it occurs, status is assumed to be a |
554 * success when it is passed in. | 539 * success when it is passed in. |
555 */ | 540 */ |
556 static | 541 static |
557 inline void initialize(UStringSearch *strsrch, UErrorCode *status) | 542 inline void initialize(UStringSearch *strsrch, UErrorCode *status) |
558 { | 543 { |
559 int16_t expandlength = initializePattern(strsrch, status); | 544 int16_t expandlength = initializePattern(strsrch, status); |
560 if (U_SUCCESS(*status) && strsrch->pattern.CELength > 0) { | 545 if (U_SUCCESS(*status) && strsrch->pattern.cesLength > 0) { |
561 UPattern *pattern = &strsrch->pattern; | 546 UPattern *pattern = &strsrch->pattern; |
562 int32_t cesize = pattern->CELength; | 547 int32_t cesize = pattern->cesLength; |
563 | 548 |
564 int16_t minlength = cesize > expandlength | 549 int16_t minlength = cesize > expandlength |
565 ? (int16_t)cesize - expandlength : 1; | 550 ? (int16_t)cesize - expandlength : 1; |
566 pattern->defaultShiftSize = minlength; | 551 pattern->defaultShiftSize = minlength; |
567 setShiftTable(pattern->shift, pattern->backShift, pattern->CE, | 552 setShiftTable(pattern->shift, pattern->backShift, pattern->ces, |
568 cesize, expandlength, minlength, minlength); | 553 cesize, expandlength, minlength, minlength); |
569 return; | 554 return; |
570 } | 555 } |
571 strsrch->pattern.defaultShiftSize = 0; | 556 strsrch->pattern.defaultShiftSize = 0; |
572 } | 557 } |
573 | 558 |
574 #if BOYER_MOORE | 559 #if BOYER_MOORE |
575 /** | 560 /** |
576 * Check to make sure that the match length is at the end of the character by | 561 * Check to make sure that the match length is at the end of the character by |
577 * using the breakiterator. | 562 * using the breakiterator. |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
633 ubrk_following(breakiterator, start - 1) == start) && | 618 ubrk_following(breakiterator, start - 1) == start) && |
634 (end == endindex || | 619 (end == endindex || |
635 ubrk_following(breakiterator, end - 1) == end); | 620 ubrk_following(breakiterator, end - 1) == end); |
636 if (result) { | 621 if (result) { |
637 // iterates the individual ces | 622 // iterates the individual ces |
638 UCollationElements *coleiter = strsrch->utilIter; | 623 UCollationElements *coleiter = strsrch->utilIter; |
639 const UChar *text = strsrch->search->text + | 624 const UChar *text = strsrch->search->text + |
640 start; | 625 start; |
641 UErrorCode status = U_ZERO_ERROR; | 626 UErrorCode status = U_ZERO_ERROR; |
642 ucol_setText(coleiter, text, end - start, &status); | 627 ucol_setText(coleiter, text, end - start, &status); |
643 for (int32_t count = 0; count < strsrch->pattern.CELength; | 628 for (int32_t count = 0; count < strsrch->pattern.cesLength; |
644 count ++) { | 629 count ++) { |
645 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status)); | 630 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status)); |
646 if (ce == UCOL_IGNORABLE) { | 631 if (ce == UCOL_IGNORABLE) { |
647 count --; | 632 count --; |
648 continue; | 633 continue; |
649 } | 634 } |
650 if (U_FAILURE(status) || ce != strsrch->pattern.CE[count]) { | 635 if (U_FAILURE(status) || ce != strsrch->pattern.ces[count]) { |
651 return FALSE; | 636 return FALSE; |
652 } | 637 } |
653 } | 638 } |
654 int32_t nextce = ucol_next(coleiter, &status); | 639 int32_t nextce = ucol_next(coleiter, &status); |
655 while (ucol_getOffset(coleiter) == (end - start) | 640 while (ucol_getOffset(coleiter) == (end - start) |
656 && getCE(strsrch, nextce) == UCOL_IGNORABLE) { | 641 && getCE(strsrch, nextce) == UCOL_IGNORABLE) { |
657 nextce = ucol_next(coleiter, &status); | 642 nextce = ucol_next(coleiter, &status); |
658 } | 643 } |
659 if (ucol_getOffset(coleiter) == (end - start) | 644 if (ucol_getOffset(coleiter) == (end - start) |
660 && nextce != UCOL_NULLORDER) { | 645 && nextce != UCOL_NULLORDER) { |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
741 inline int32_t shiftForward(UStringSearch *strsrch, | 726 inline int32_t shiftForward(UStringSearch *strsrch, |
742 int32_t textoffset, | 727 int32_t textoffset, |
743 int32_t ce, | 728 int32_t ce, |
744 int32_t patternceindex) | 729 int32_t patternceindex) |
745 { | 730 { |
746 UPattern *pattern = &(strsrch->pattern); | 731 UPattern *pattern = &(strsrch->pattern); |
747 if (ce != UCOL_NULLORDER) { | 732 if (ce != UCOL_NULLORDER) { |
748 int32_t shift = pattern->shift[hash(ce)]; | 733 int32_t shift = pattern->shift[hash(ce)]; |
749 // this is to adjust for characters in the middle of the | 734 // this is to adjust for characters in the middle of the |
750 // substring for matching that failed. | 735 // substring for matching that failed. |
751 int32_t adjust = pattern->CELength - patternceindex; | 736 int32_t adjust = pattern->cesLength - patternceindex; |
752 if (adjust > 1 && shift >= adjust) { | 737 if (adjust > 1 && shift >= adjust) { |
753 shift -= adjust - 1; | 738 shift -= adjust - 1; |
754 } | 739 } |
755 textoffset += shift; | 740 textoffset += shift; |
756 } | 741 } |
757 else { | 742 else { |
758 textoffset += pattern->defaultShiftSize; | 743 textoffset += pattern->defaultShiftSize; |
759 } | 744 } |
760 | 745 |
761 textoffset = getNextUStringSearchBaseOffset(strsrch, textoffset); | 746 textoffset = getNextUStringSearchBaseOffset(strsrch, textoffset); |
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
875 uprv_free(norm); | 860 uprv_free(norm); |
876 return FALSE; | 861 return FALSE; |
877 } | 862 } |
878 } | 863 } |
879 else { | 864 else { |
880 norm = buffer; | 865 norm = buffer; |
881 } | 866 } |
882 | 867 |
883 UCollationElements *coleiter = strsrch->utilIter; | 868 UCollationElements *coleiter = strsrch->utilIter; |
884 ucol_setText(coleiter, norm, size, status); | 869 ucol_setText(coleiter, norm, size, status); |
885 uint32_t firstce = strsrch->pattern.CE[0]; | 870 uint32_t firstce = strsrch->pattern.ces[0]; |
886 UBool ignorable = TRUE; | 871 UBool ignorable = TRUE; |
887 uint32_t ce = UCOL_IGNORABLE; | 872 uint32_t ce = UCOL_IGNORABLE; |
888 while (U_SUCCESS(*status) && ce != firstce && ce != (uint32_t)UCOL_N
ULLORDER) { | 873 while (U_SUCCESS(*status) && ce != firstce && ce != (uint32_t)UCOL_N
ULLORDER) { |
889 offset = ucol_getOffset(coleiter); | 874 offset = ucol_getOffset(coleiter); |
890 if (ce != firstce && ce != UCOL_IGNORABLE) { | 875 if (ce != firstce && ce != UCOL_IGNORABLE) { |
891 ignorable = FALSE; | 876 ignorable = FALSE; |
892 } | 877 } |
893 ce = ucol_next(coleiter, status); | 878 ce = ucol_next(coleiter, status); |
894 } | 879 } |
895 UChar32 codepoint; | 880 UChar32 codepoint; |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
928 */ | 913 */ |
929 static | 914 static |
930 UBool hasAccentsBeforeMatch(const UStringSearch *strsrch, int32_t start, | 915 UBool hasAccentsBeforeMatch(const UStringSearch *strsrch, int32_t start, |
931 int32_t end) | 916 int32_t end) |
932 { | 917 { |
933 if (strsrch->pattern.hasPrefixAccents) { | 918 if (strsrch->pattern.hasPrefixAccents) { |
934 UCollationElements *coleiter = strsrch->textIter; | 919 UCollationElements *coleiter = strsrch->textIter; |
935 UErrorCode status = U_ZERO_ERROR; | 920 UErrorCode status = U_ZERO_ERROR; |
936 // we have been iterating forwards previously | 921 // we have been iterating forwards previously |
937 uint32_t ignorable = TRUE; | 922 uint32_t ignorable = TRUE; |
938 int32_t firstce = strsrch->pattern.CE[0]; | 923 int32_t firstce = strsrch->pattern.ces[0]; |
939 | 924 |
940 setColEIterOffset(coleiter, start); | 925 setColEIterOffset(coleiter, start); |
941 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status)); | 926 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status)); |
942 if (U_FAILURE(status)) { | 927 if (U_FAILURE(status)) { |
943 return TRUE; | 928 return TRUE; |
944 } | 929 } |
945 while (ce != firstce) { | 930 while (ce != firstce) { |
946 if (ce != UCOL_IGNORABLE) { | 931 if (ce != UCOL_IGNORABLE) { |
947 ignorable = FALSE; | 932 ignorable = FALSE; |
948 } | 933 } |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1010 static | 995 static |
1011 UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start, | 996 UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start, |
1012 int32_t end) | 997 int32_t end) |
1013 { | 998 { |
1014 if (strsrch->pattern.hasSuffixAccents) { | 999 if (strsrch->pattern.hasSuffixAccents) { |
1015 const UChar *text = strsrch->search->text; | 1000 const UChar *text = strsrch->search->text; |
1016 int32_t temp = end; | 1001 int32_t temp = end; |
1017 int32_t textlength = strsrch->search->textLength; | 1002 int32_t textlength = strsrch->search->textLength; |
1018 U16_BACK_1(text, 0, temp); | 1003 U16_BACK_1(text, 0, temp); |
1019 if (getFCD(text, &temp, textlength) & LAST_BYTE_MASK_) { | 1004 if (getFCD(text, &temp, textlength) & LAST_BYTE_MASK_) { |
1020 int32_t firstce = strsrch->pattern.CE[0]; | 1005 int32_t firstce = strsrch->pattern.ces[0]; |
1021 UCollationElements *coleiter = strsrch->textIter; | 1006 UCollationElements *coleiter = strsrch->textIter; |
1022 UErrorCode status = U_ZERO_ERROR; | 1007 UErrorCode status = U_ZERO_ERROR; |
1023 int32_t ce; | 1008 int32_t ce; |
1024 setColEIterOffset(coleiter, start); | 1009 setColEIterOffset(coleiter, start); |
1025 while ((ce = getCE(strsrch, ucol_next(coleiter, &status))) != firstc
e) { | 1010 while ((ce = getCE(strsrch, ucol_next(coleiter, &status))) != firstc
e) { |
1026 if (U_FAILURE(status) || ce == UCOL_NULLORDER) { | 1011 if (U_FAILURE(status) || ce == UCOL_NULLORDER) { |
1027 return TRUE; | 1012 return TRUE; |
1028 } | 1013 } |
1029 } | 1014 } |
1030 int32_t count = 1; | 1015 int32_t count = 1; |
1031 while (count < strsrch->pattern.CELength) { | 1016 while (count < strsrch->pattern.cesLength) { |
1032 if (getCE(strsrch, ucol_next(coleiter, &status)) | 1017 if (getCE(strsrch, ucol_next(coleiter, &status)) |
1033 == UCOL_IGNORABLE) { | 1018 == UCOL_IGNORABLE) { |
1034 // Thai can give an ignorable here. | 1019 // Thai can give an ignorable here. |
1035 count --; | 1020 count --; |
1036 } | 1021 } |
1037 if (U_FAILURE(status)) { | 1022 if (U_FAILURE(status)) { |
1038 return TRUE; | 1023 return TRUE; |
1039 } | 1024 } |
1040 count ++; | 1025 count ++; |
1041 } | 1026 } |
(...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1205 if (U_FAILURE(*status)) { | 1190 if (U_FAILURE(*status)) { |
1206 return FALSE; | 1191 return FALSE; |
1207 } | 1192 } |
1208 if (ucol_getOffset(coleiter) != temp) { | 1193 if (ucol_getOffset(coleiter) != temp) { |
1209 *start = temp; | 1194 *start = temp; |
1210 temp = ucol_getOffset(coleiter); | 1195 temp = ucol_getOffset(coleiter); |
1211 } | 1196 } |
1212 expansion --; | 1197 expansion --; |
1213 } | 1198 } |
1214 | 1199 |
1215 int32_t *patternce = strsrch->pattern.CE; | 1200 int32_t *patternce = strsrch->pattern.ces; |
1216 int32_t patterncelength = strsrch->pattern.CELength; | 1201 int32_t patterncelength = strsrch->pattern.cesLength; |
1217 int32_t count = 0; | 1202 int32_t count = 0; |
1218 while (count < patterncelength) { | 1203 while (count < patterncelength) { |
1219 int32_t ce = getCE(strsrch, ucol_next(coleiter, status)); | 1204 int32_t ce = getCE(strsrch, ucol_next(coleiter, status)); |
1220 if (ce == UCOL_IGNORABLE) { | 1205 if (ce == UCOL_IGNORABLE) { |
1221 continue; | 1206 continue; |
1222 } | 1207 } |
1223 if (expandflag && count == 0 && ucol_getOffset(coleiter) != temp) { | 1208 if (expandflag && count == 0 && ucol_getOffset(coleiter) != temp) { |
1224 *start = temp; | 1209 *start = temp; |
1225 temp = ucol_getOffset(coleiter); | 1210 temp = ucol_getOffset(coleiter); |
1226 } | 1211 } |
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1408 * Running through a collation element iterator to see if the contents matches | 1393 * Running through a collation element iterator to see if the contents matches |
1409 * pattern in string search data | 1394 * pattern in string search data |
1410 * @param strsrch string search data | 1395 * @param strsrch string search data |
1411 * @param coleiter collation element iterator | 1396 * @param coleiter collation element iterator |
1412 * @return TRUE if a match if found, FALSE otherwise | 1397 * @return TRUE if a match if found, FALSE otherwise |
1413 */ | 1398 */ |
1414 static | 1399 static |
1415 inline UBool checkCollationMatch(const UStringSearch *strsrch, | 1400 inline UBool checkCollationMatch(const UStringSearch *strsrch, |
1416 UCollationElements *coleiter) | 1401 UCollationElements *coleiter) |
1417 { | 1402 { |
1418 int patternceindex = strsrch->pattern.CELength; | 1403 int patternceindex = strsrch->pattern.cesLength; |
1419 int32_t *patternce = strsrch->pattern.CE; | 1404 int32_t *patternce = strsrch->pattern.ces; |
1420 UErrorCode status = U_ZERO_ERROR; | 1405 UErrorCode status = U_ZERO_ERROR; |
1421 while (patternceindex > 0) { | 1406 while (patternceindex > 0) { |
1422 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status)); | 1407 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status)); |
1423 if (ce == UCOL_IGNORABLE) { | 1408 if (ce == UCOL_IGNORABLE) { |
1424 continue; | 1409 continue; |
1425 } | 1410 } |
1426 if (U_FAILURE(status) || ce != *patternce) { | 1411 if (U_FAILURE(status) || ce != *patternce) { |
1427 return FALSE; | 1412 return FALSE; |
1428 } | 1413 } |
1429 patternce ++; | 1414 patternce ++; |
(...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1608 } | 1593 } |
1609 else { | 1594 else { |
1610 safetextlength = u_strlen(strsrch->canonicalSuffixAccents); | 1595 safetextlength = u_strlen(strsrch->canonicalSuffixAccents); |
1611 safetext = strsrch->canonicalSuffixAccents; | 1596 safetext = strsrch->canonicalSuffixAccents; |
1612 } | 1597 } |
1613 | 1598 |
1614 // if status is a failure, ucol_setText does nothing | 1599 // if status is a failure, ucol_setText does nothing |
1615 ucol_setText(coleiter, safetext, safetextlength, status); | 1600 ucol_setText(coleiter, safetext, safetextlength, status); |
1616 // status checked in loop below | 1601 // status checked in loop below |
1617 | 1602 |
1618 int32_t *ce = strsrch->pattern.CE; | 1603 int32_t *ce = strsrch->pattern.ces; |
1619 int32_t celength = strsrch->pattern.CELength; | 1604 int32_t celength = strsrch->pattern.cesLength; |
1620 int ceindex = celength - 1; | 1605 int ceindex = celength - 1; |
1621 UBool isSafe = TRUE; // indication flag for position in safe zone | 1606 UBool isSafe = TRUE; // indication flag for position in safe zone |
1622 | 1607 |
1623 while (ceindex >= 0) { | 1608 while (ceindex >= 0) { |
1624 int32_t textce = ucol_previous(coleiter, status); | 1609 int32_t textce = ucol_previous(coleiter, status); |
1625 if (U_FAILURE(*status)) { | 1610 if (U_FAILURE(*status)) { |
1626 if (isSafe) { | 1611 if (isSafe) { |
1627 cleanUpSafeText(strsrch, safetext, safebuffer); | 1612 cleanUpSafeText(strsrch, safetext, safebuffer); |
1628 } | 1613 } |
1629 return USEARCH_DONE; | 1614 return USEARCH_DONE; |
(...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1848 if (U_FAILURE(*status)) { | 1833 if (U_FAILURE(*status)) { |
1849 return FALSE; | 1834 return FALSE; |
1850 } | 1835 } |
1851 if (ucol_getOffset(coleiter) != temp) { | 1836 if (ucol_getOffset(coleiter) != temp) { |
1852 *start = temp; | 1837 *start = temp; |
1853 temp = ucol_getOffset(coleiter); | 1838 temp = ucol_getOffset(coleiter); |
1854 } | 1839 } |
1855 expansion --; | 1840 expansion --; |
1856 } | 1841 } |
1857 | 1842 |
1858 int32_t *patternce = strsrch->pattern.CE; | 1843 int32_t *patternce = strsrch->pattern.ces; |
1859 int32_t patterncelength = strsrch->pattern.CELength; | 1844 int32_t patterncelength = strsrch->pattern.cesLength; |
1860 int32_t count = 0; | 1845 int32_t count = 0; |
1861 int32_t textlength = strsrch->search->textLength; | 1846 int32_t textlength = strsrch->search->textLength; |
1862 while (count < patterncelength) { | 1847 while (count < patterncelength) { |
1863 int32_t ce = getCE(strsrch, ucol_next(coleiter, status)); | 1848 int32_t ce = getCE(strsrch, ucol_next(coleiter, status)); |
1864 // status checked below, note that if status is a failure | 1849 // status checked below, note that if status is a failure |
1865 // ucol_next returns UCOL_NULLORDER | 1850 // ucol_next returns UCOL_NULLORDER |
1866 if (ce == UCOL_IGNORABLE) { | 1851 if (ce == UCOL_IGNORABLE) { |
1867 continue; | 1852 continue; |
1868 } | 1853 } |
1869 if (expandflag && count == 0 && ucol_getOffset(coleiter) != temp) { | 1854 if (expandflag && count == 0 && ucol_getOffset(coleiter) != temp) { |
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2046 if (U_FAILURE(*status)) { | 2031 if (U_FAILURE(*status)) { |
2047 return FALSE; | 2032 return FALSE; |
2048 } | 2033 } |
2049 if (ucol_getOffset(coleiter) != temp) { | 2034 if (ucol_getOffset(coleiter) != temp) { |
2050 *end = temp; | 2035 *end = temp; |
2051 temp = ucol_getOffset(coleiter); | 2036 temp = ucol_getOffset(coleiter); |
2052 } | 2037 } |
2053 expansion --; | 2038 expansion --; |
2054 } | 2039 } |
2055 | 2040 |
2056 int32_t *patternce = strsrch->pattern.CE; | 2041 int32_t *patternce = strsrch->pattern.ces; |
2057 int32_t patterncelength = strsrch->pattern.CELength; | 2042 int32_t patterncelength = strsrch->pattern.cesLength; |
2058 int32_t count = patterncelength; | 2043 int32_t count = patterncelength; |
2059 while (count > 0) { | 2044 while (count > 0) { |
2060 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status)); | 2045 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status)); |
2061 // status checked below, note that if status is a failure | 2046 // status checked below, note that if status is a failure |
2062 // ucol_previous returns UCOL_NULLORDER | 2047 // ucol_previous returns UCOL_NULLORDER |
2063 if (ce == UCOL_IGNORABLE) { | 2048 if (ce == UCOL_IGNORABLE) { |
2064 continue; | 2049 continue; |
2065 } | 2050 } |
2066 if (expandflag && count == 0 && | 2051 if (expandflag && count == 0 && |
2067 getColElemIterOffset(coleiter, FALSE) != temp) { | 2052 getColElemIterOffset(coleiter, FALSE) != temp) { |
(...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2271 else { | 2256 else { |
2272 safetextlength = u_strlen(strsrch->canonicalPrefixAccents); | 2257 safetextlength = u_strlen(strsrch->canonicalPrefixAccents); |
2273 safetext = strsrch->canonicalPrefixAccents; | 2258 safetext = strsrch->canonicalPrefixAccents; |
2274 } | 2259 } |
2275 | 2260 |
2276 UCollationElements *coleiter = strsrch->utilIter; | 2261 UCollationElements *coleiter = strsrch->utilIter; |
2277 // if status is a failure, ucol_setText does nothing | 2262 // if status is a failure, ucol_setText does nothing |
2278 ucol_setText(coleiter, safetext, safetextlength, status); | 2263 ucol_setText(coleiter, safetext, safetextlength, status); |
2279 // status checked in loop below | 2264 // status checked in loop below |
2280 | 2265 |
2281 int32_t *ce = strsrch->pattern.CE; | 2266 int32_t *ce = strsrch->pattern.ces; |
2282 int32_t celength = strsrch->pattern.CELength; | 2267 int32_t celength = strsrch->pattern.cesLength; |
2283 int ceindex = 0; | 2268 int ceindex = 0; |
2284 UBool isSafe = TRUE; // safe zone indication flag for position | 2269 UBool isSafe = TRUE; // safe zone indication flag for position |
2285 int32_t prefixlength = u_strlen(strsrch->canonicalPrefixAccents); | 2270 int32_t prefixlength = u_strlen(strsrch->canonicalPrefixAccents); |
2286 | 2271 |
2287 while (ceindex < celength) { | 2272 while (ceindex < celength) { |
2288 int32_t textce = ucol_next(coleiter, status); | 2273 int32_t textce = ucol_next(coleiter, status); |
2289 if (U_FAILURE(*status)) { | 2274 if (U_FAILURE(*status)) { |
2290 if (isSafe) { | 2275 if (isSafe) { |
2291 cleanUpSafeText(strsrch, safetext, safebuffer); | 2276 cleanUpSafeText(strsrch, safetext, safebuffer); |
2292 } | 2277 } |
(...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2486 if (U_FAILURE(*status)) { | 2471 if (U_FAILURE(*status)) { |
2487 return FALSE; | 2472 return FALSE; |
2488 } | 2473 } |
2489 if (ucol_getOffset(coleiter) != temp) { | 2474 if (ucol_getOffset(coleiter) != temp) { |
2490 *end = temp; | 2475 *end = temp; |
2491 temp = ucol_getOffset(coleiter); | 2476 temp = ucol_getOffset(coleiter); |
2492 } | 2477 } |
2493 expansion --; | 2478 expansion --; |
2494 } | 2479 } |
2495 | 2480 |
2496 int32_t *patternce = strsrch->pattern.CE; | 2481 int32_t *patternce = strsrch->pattern.ces; |
2497 int32_t patterncelength = strsrch->pattern.CELength; | 2482 int32_t patterncelength = strsrch->pattern.cesLength; |
2498 int32_t count = patterncelength; | 2483 int32_t count = patterncelength; |
2499 while (count > 0) { | 2484 while (count > 0) { |
2500 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status)); | 2485 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status)); |
2501 // status checked below, note that if status is a failure | 2486 // status checked below, note that if status is a failure |
2502 // ucol_previous returns UCOL_NULLORDER | 2487 // ucol_previous returns UCOL_NULLORDER |
2503 if (ce == UCOL_IGNORABLE) { | 2488 if (ce == UCOL_IGNORABLE) { |
2504 continue; | 2489 continue; |
2505 } | 2490 } |
2506 if (expandflag && count == 0 && | 2491 if (expandflag && count == 0 && |
2507 getColElemIterOffset(coleiter, FALSE) != temp) { | 2492 getColElemIterOffset(coleiter, FALSE) != temp) { |
(...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2693 } | 2678 } |
2694 | 2679 |
2695 result->collator = collator; | 2680 result->collator = collator; |
2696 result->strength = ucol_getStrength(collator); | 2681 result->strength = ucol_getStrength(collator); |
2697 result->ceMask = getMask(result->strength); | 2682 result->ceMask = getMask(result->strength); |
2698 result->toShift = | 2683 result->toShift = |
2699 ucol_getAttribute(collator, UCOL_ALTERNATE_HANDLING, status) == | 2684 ucol_getAttribute(collator, UCOL_ALTERNATE_HANDLING, status) == |
2700 UCOL_SHIFTED; | 2685 UCOL_SHIFTED; |
2701 result->variableTop = ucol_getVariableTop(collator, status); | 2686 result->variableTop = ucol_getVariableTop(collator, status); |
2702 | 2687 |
2703 result->nfd = Normalizer2Factory::getNFDInstance(*status); | 2688 result->nfd = Normalizer2::getNFDInstance(*status); |
2704 | 2689 |
2705 if (U_FAILURE(*status)) { | 2690 if (U_FAILURE(*status)) { |
2706 uprv_free(result); | 2691 uprv_free(result); |
2707 return NULL; | 2692 return NULL; |
2708 } | 2693 } |
2709 | 2694 |
2710 result->search = (USearch *)uprv_malloc(sizeof(USearch)); | 2695 result->search = (USearch *)uprv_malloc(sizeof(USearch)); |
2711 if (result->search == NULL) { | 2696 if (result->search == NULL) { |
2712 *status = U_MEMORY_ALLOCATION_ERROR; | 2697 *status = U_MEMORY_ALLOCATION_ERROR; |
2713 uprv_free(result); | 2698 uprv_free(result); |
2714 return NULL; | 2699 return NULL; |
2715 } | 2700 } |
2716 | 2701 |
2717 result->search->text = text; | 2702 result->search->text = text; |
2718 result->search->textLength = textlength; | 2703 result->search->textLength = textlength; |
2719 | 2704 |
2720 result->pattern.text = pattern; | 2705 result->pattern.text = pattern; |
2721 result->pattern.textLength = patternlength; | 2706 result->pattern.textLength = patternlength; |
2722 result->pattern.CE = NULL; | 2707 result->pattern.ces = NULL; |
2723 result->pattern.PCE = NULL; | 2708 result->pattern.pces = NULL; |
2724 | 2709 |
2725 result->search->breakIter = breakiter; | 2710 result->search->breakIter = breakiter; |
2726 #if !UCONFIG_NO_BREAK_ITERATION | 2711 #if !UCONFIG_NO_BREAK_ITERATION |
2727 result->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLo
caleByType(result->collator, ULOC_VALID_LOCALE, status), text, textlength, statu
s); | 2712 result->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLo
caleByType(result->collator, ULOC_VALID_LOCALE, status), text, textlength, statu
s); |
2728 if (breakiter) { | 2713 if (breakiter) { |
2729 ubrk_setText(breakiter, text, textlength, status); | 2714 ubrk_setText(breakiter, text, textlength, status); |
2730 } | 2715 } |
2731 #endif | 2716 #endif |
2732 | 2717 |
2733 result->ownCollator = FALSE; | 2718 result->ownCollator = FALSE; |
2734 result->search->matchedLength = 0; | 2719 result->search->matchedLength = 0; |
2735 result->search->matchedIndex = USEARCH_DONE; | 2720 result->search->matchedIndex = USEARCH_DONE; |
2736 result->utilIter = NULL; | 2721 result->utilIter = NULL; |
2737 result->textIter = ucol_openElements(collator, text, | 2722 result->textIter = ucol_openElements(collator, text, |
2738 textlength, status); | 2723 textlength, status); |
| 2724 result->textProcessedIter = NULL; |
2739 if (U_FAILURE(*status)) { | 2725 if (U_FAILURE(*status)) { |
2740 usearch_close(result); | 2726 usearch_close(result); |
2741 return NULL; | 2727 return NULL; |
2742 } | 2728 } |
2743 | 2729 |
2744 result->search->isOverlap = FALSE; | 2730 result->search->isOverlap = FALSE; |
2745 result->search->isCanonicalMatch = FALSE; | 2731 result->search->isCanonicalMatch = FALSE; |
2746 result->search->elementComparisonType = 0; | 2732 result->search->elementComparisonType = 0; |
2747 result->search->isForwardSearching = TRUE; | 2733 result->search->isForwardSearching = TRUE; |
2748 result->search->reset = TRUE; | 2734 result->search->reset = TRUE; |
2749 | 2735 |
2750 initialize(result, status); | 2736 initialize(result, status); |
2751 | 2737 |
2752 if (U_FAILURE(*status)) { | 2738 if (U_FAILURE(*status)) { |
2753 usearch_close(result); | 2739 usearch_close(result); |
2754 return NULL; | 2740 return NULL; |
2755 } | 2741 } |
2756 | 2742 |
2757 return result; | 2743 return result; |
2758 } | 2744 } |
2759 return NULL; | 2745 return NULL; |
2760 } | 2746 } |
2761 | 2747 |
2762 U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch) | 2748 U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch) |
2763 { | 2749 { |
2764 if (strsrch) { | 2750 if (strsrch) { |
2765 if (strsrch->pattern.CE != strsrch->pattern.CEBuffer && | 2751 if (strsrch->pattern.ces != strsrch->pattern.cesBuffer && |
2766 strsrch->pattern.CE) { | 2752 strsrch->pattern.ces) { |
2767 uprv_free(strsrch->pattern.CE); | 2753 uprv_free(strsrch->pattern.ces); |
2768 } | 2754 } |
2769 | 2755 |
2770 if (strsrch->pattern.PCE != NULL && | 2756 if (strsrch->pattern.pces != NULL && |
2771 strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) { | 2757 strsrch->pattern.pces != strsrch->pattern.pcesBuffer) { |
2772 uprv_free(strsrch->pattern.PCE); | 2758 uprv_free(strsrch->pattern.pces); |
2773 } | 2759 } |
2774 | 2760 |
| 2761 delete strsrch->textProcessedIter; |
2775 ucol_closeElements(strsrch->textIter); | 2762 ucol_closeElements(strsrch->textIter); |
2776 ucol_closeElements(strsrch->utilIter); | 2763 ucol_closeElements(strsrch->utilIter); |
2777 | 2764 |
2778 if (strsrch->ownCollator && strsrch->collator) { | 2765 if (strsrch->ownCollator && strsrch->collator) { |
2779 ucol_close((UCollator *)strsrch->collator); | 2766 ucol_close((UCollator *)strsrch->collator); |
2780 } | 2767 } |
2781 | 2768 |
2782 #if !UCONFIG_NO_BREAK_ITERATION | 2769 #if !UCONFIG_NO_BREAK_ITERATION |
2783 if (strsrch->search->internalBreakIter) { | 2770 if (strsrch->search->internalBreakIter) { |
2784 ubrk_close(strsrch->search->internalBreakIter); | 2771 ubrk_close(strsrch->search->internalBreakIter); |
2785 } | 2772 } |
2786 #endif | 2773 #endif |
2787 | 2774 |
2788 uprv_free(strsrch->search); | 2775 uprv_free(strsrch->search); |
2789 uprv_free(strsrch); | 2776 uprv_free(strsrch); |
2790 } | 2777 } |
2791 } | 2778 } |
2792 | 2779 |
| 2780 namespace { |
| 2781 |
| 2782 UBool initTextProcessedIter(UStringSearch *strsrch, UErrorCode *status) { |
| 2783 if (U_FAILURE(*status)) { return FALSE; } |
| 2784 if (strsrch->textProcessedIter == NULL) { |
| 2785 strsrch->textProcessedIter = new icu::UCollationPCE(strsrch->textIter); |
| 2786 if (strsrch->textProcessedIter == NULL) { |
| 2787 *status = U_MEMORY_ALLOCATION_ERROR; |
| 2788 return FALSE; |
| 2789 } |
| 2790 } else { |
| 2791 strsrch->textProcessedIter->init(strsrch->textIter); |
| 2792 } |
| 2793 return TRUE; |
| 2794 } |
| 2795 |
| 2796 } |
| 2797 |
2793 // set and get methods -------------------------------------------------- | 2798 // set and get methods -------------------------------------------------- |
2794 | 2799 |
2795 U_CAPI void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch, | 2800 U_CAPI void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch, |
2796 int32_t position, | 2801 int32_t position, |
2797 UErrorCode *status) | 2802 UErrorCode *status) |
2798 { | 2803 { |
2799 if (U_SUCCESS(*status) && strsrch) { | 2804 if (U_SUCCESS(*status) && strsrch) { |
2800 if (isOutOfBounds(strsrch->search->textLength, position)) { | 2805 if (isOutOfBounds(strsrch->search->textLength, position)) { |
2801 *status = U_INDEX_OUTOFBOUNDS_ERROR; | 2806 *status = U_INDEX_OUTOFBOUNDS_ERROR; |
2802 } | 2807 } |
(...skipping 200 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3003 const UCollator *collator, | 3008 const UCollator *collator, |
3004 UErrorCode *status) | 3009 UErrorCode *status) |
3005 { | 3010 { |
3006 if (U_SUCCESS(*status)) { | 3011 if (U_SUCCESS(*status)) { |
3007 if (collator == NULL) { | 3012 if (collator == NULL) { |
3008 *status = U_ILLEGAL_ARGUMENT_ERROR; | 3013 *status = U_ILLEGAL_ARGUMENT_ERROR; |
3009 return; | 3014 return; |
3010 } | 3015 } |
3011 | 3016 |
3012 if (strsrch) { | 3017 if (strsrch) { |
| 3018 delete strsrch->textProcessedIter; |
| 3019 strsrch->textProcessedIter = NULL; |
| 3020 ucol_closeElements(strsrch->textIter); |
| 3021 ucol_closeElements(strsrch->utilIter); |
| 3022 strsrch->textIter = strsrch->utilIter = NULL; |
3013 if (strsrch->ownCollator && (strsrch->collator != collator)) { | 3023 if (strsrch->ownCollator && (strsrch->collator != collator)) { |
3014 ucol_close((UCollator *)strsrch->collator); | 3024 ucol_close((UCollator *)strsrch->collator); |
3015 strsrch->ownCollator = FALSE; | 3025 strsrch->ownCollator = FALSE; |
3016 } | 3026 } |
3017 strsrch->collator = collator; | 3027 strsrch->collator = collator; |
3018 strsrch->strength = ucol_getStrength(collator); | 3028 strsrch->strength = ucol_getStrength(collator); |
3019 strsrch->ceMask = getMask(strsrch->strength); | 3029 strsrch->ceMask = getMask(strsrch->strength); |
3020 #if !UCONFIG_NO_BREAK_ITERATION | 3030 #if !UCONFIG_NO_BREAK_ITERATION |
3021 ubrk_close(strsrch->search->internalBreakIter); | 3031 ubrk_close(strsrch->search->internalBreakIter); |
3022 strsrch->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_
getLocaleByType(collator, ULOC_VALID_LOCALE, status), | 3032 strsrch->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_
getLocaleByType(collator, ULOC_VALID_LOCALE, status), |
3023 strsrch->search->text, strs
rch->search->textLength, status); | 3033 strsrch->search->text, strs
rch->search->textLength, status); |
3024 #endif | 3034 #endif |
3025 // if status is a failure, ucol_getAttribute returns UCOL_DEFAULT | 3035 // if status is a failure, ucol_getAttribute returns UCOL_DEFAULT |
3026 strsrch->toShift = | 3036 strsrch->toShift = |
3027 ucol_getAttribute(collator, UCOL_ALTERNATE_HANDLING, status) == | 3037 ucol_getAttribute(collator, UCOL_ALTERNATE_HANDLING, status) == |
3028 UCOL_SHIFTED; | 3038 UCOL_SHIFTED; |
3029 // if status is a failure, ucol_getVariableTop returns 0 | 3039 // if status is a failure, ucol_getVariableTop returns 0 |
3030 strsrch->variableTop = ucol_getVariableTop(collator, status); | 3040 strsrch->variableTop = ucol_getVariableTop(collator, status); |
3031 if (U_SUCCESS(*status)) { | 3041 strsrch->textIter = ucol_openElements(collator, |
3032 initialize(strsrch, status); | 3042 strsrch->search->text, |
3033 if (U_SUCCESS(*status)) { | 3043 strsrch->search->textLength, |
3034 /* free offset buffer to avoid memory leak before initializi
ng. */ | 3044 status); |
3035 ucol_freeOffsetBuffer(&(strsrch->textIter->iteratordata_)); | 3045 strsrch->utilIter = ucol_openElements( |
3036 uprv_init_collIterate(collator, strsrch->search->text, | 3046 collator, strsrch->pattern.text, strsrch->pattern.textLength
, status); |
3037 strsrch->search->textLength, | 3047 // initialize() _after_ setting the iterators for the new collator. |
3038 &(strsrch->textIter->iteratordata_), | 3048 initialize(strsrch, status); |
3039 status); | |
3040 strsrch->utilIter->iteratordata_.coll = collator; | |
3041 } | |
3042 } | |
3043 } | 3049 } |
3044 | 3050 |
3045 // **** are these calls needed? | 3051 // **** are these calls needed? |
3046 // **** we call uprv_init_pce in initializePatternPCETable | 3052 // **** we call uprv_init_pce in initializePatternPCETable |
3047 // **** and the CEBuffer constructor... | 3053 // **** and the CEIBuffer constructor... |
3048 #if 0 | 3054 #if 0 |
3049 uprv_init_pce(strsrch->textIter); | 3055 uprv_init_pce(strsrch->textIter); |
3050 uprv_init_pce(strsrch->utilIter); | 3056 uprv_init_pce(strsrch->utilIter); |
3051 #endif | 3057 #endif |
3052 } | 3058 } |
3053 } | 3059 } |
3054 | 3060 |
3055 U_CAPI UCollator * U_EXPORT2 usearch_getCollator(const UStringSearch *strsrch) | 3061 U_CAPI UCollator * U_EXPORT2 usearch_getCollator(const UStringSearch *strsrch) |
3056 { | 3062 { |
3057 if (strsrch) { | 3063 if (strsrch) { |
(...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3215 // match is not found. | 3221 // match is not found. |
3216 search->isForwardSearching = TRUE; | 3222 search->isForwardSearching = TRUE; |
3217 if (search->matchedIndex != USEARCH_DONE) { | 3223 if (search->matchedIndex != USEARCH_DONE) { |
3218 // there's no need to set the collation element iterator | 3224 // there's no need to set the collation element iterator |
3219 // the next call to next will set the offset. | 3225 // the next call to next will set the offset. |
3220 return search->matchedIndex; | 3226 return search->matchedIndex; |
3221 } | 3227 } |
3222 } | 3228 } |
3223 | 3229 |
3224 if (U_SUCCESS(*status)) { | 3230 if (U_SUCCESS(*status)) { |
3225 if (strsrch->pattern.CELength == 0) { | 3231 if (strsrch->pattern.cesLength == 0) { |
3226 if (search->matchedIndex == USEARCH_DONE) { | 3232 if (search->matchedIndex == USEARCH_DONE) { |
3227 search->matchedIndex = offset; | 3233 search->matchedIndex = offset; |
3228 } | 3234 } |
3229 else { // moves by codepoints | 3235 else { // moves by codepoints |
3230 U16_FWD_1(search->text, search->matchedIndex, textlength); | 3236 U16_FWD_1(search->text, search->matchedIndex, textlength); |
3231 } | 3237 } |
3232 | 3238 |
3233 search->matchedLength = 0; | 3239 search->matchedLength = 0; |
3234 setColEIterOffset(strsrch->textIter, search->matchedIndex); | 3240 setColEIterOffset(strsrch->textIter, search->matchedIndex); |
3235 // status checked below | 3241 // status checked below |
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3326 // Could check pattern length, but the | 3332 // Could check pattern length, but the |
3327 // linear search will do the right thing | 3333 // linear search will do the right thing |
3328 if (offset == 0 || matchedindex == 0) { | 3334 if (offset == 0 || matchedindex == 0) { |
3329 setMatchNotFound(strsrch); | 3335 setMatchNotFound(strsrch); |
3330 return USEARCH_DONE; | 3336 return USEARCH_DONE; |
3331 } | 3337 } |
3332 #endif | 3338 #endif |
3333 } | 3339 } |
3334 | 3340 |
3335 if (U_SUCCESS(*status)) { | 3341 if (U_SUCCESS(*status)) { |
3336 if (strsrch->pattern.CELength == 0) { | 3342 if (strsrch->pattern.cesLength == 0) { |
3337 search->matchedIndex = | 3343 search->matchedIndex = |
3338 (matchedindex == USEARCH_DONE ? offset : matchedindex); | 3344 (matchedindex == USEARCH_DONE ? offset : matchedindex); |
3339 if (search->matchedIndex == 0) { | 3345 if (search->matchedIndex == 0) { |
3340 setMatchNotFound(strsrch); | 3346 setMatchNotFound(strsrch); |
3341 // status checked below | 3347 // status checked below |
3342 } | 3348 } |
3343 else { // move by codepoints | 3349 else { // move by codepoints |
3344 U16_BACK_1(search->text, 0, search->matchedIndex); | 3350 U16_BACK_1(search->text, 0, search->matchedIndex); |
3345 setColEIterOffset(strsrch->textIter, search->matchedIndex); | 3351 setColEIterOffset(strsrch->textIter, search->matchedIndex); |
3346 // status checked below | 3352 // status checked below |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3409 | 3415 |
3410 // if status is a failure, ucol_getVariableTop returns 0 | 3416 // if status is a failure, ucol_getVariableTop returns 0 |
3411 varTop = ucol_getVariableTop(strsrch->collator, &status); | 3417 varTop = ucol_getVariableTop(strsrch->collator, &status); |
3412 if (strsrch->variableTop != varTop) { | 3418 if (strsrch->variableTop != varTop) { |
3413 strsrch->variableTop = varTop; | 3419 strsrch->variableTop = varTop; |
3414 sameCollAttribute = FALSE; | 3420 sameCollAttribute = FALSE; |
3415 } | 3421 } |
3416 if (!sameCollAttribute) { | 3422 if (!sameCollAttribute) { |
3417 initialize(strsrch, &status); | 3423 initialize(strsrch, &status); |
3418 } | 3424 } |
3419 /* free offset buffer to avoid memory leak before initializing. */ | 3425 ucol_setText(strsrch->textIter, strsrch->search->text, |
3420 ucol_freeOffsetBuffer(&(strsrch->textIter->iteratordata_)); | |
3421 uprv_init_collIterate(strsrch->collator, strsrch->search->text, | |
3422 strsrch->search->textLength, | 3426 strsrch->search->textLength, |
3423 &(strsrch->textIter->iteratordata_), | |
3424 &status); | 3427 &status); |
3425 strsrch->search->matchedLength = 0; | 3428 strsrch->search->matchedLength = 0; |
3426 strsrch->search->matchedIndex = USEARCH_DONE; | 3429 strsrch->search->matchedIndex = USEARCH_DONE; |
3427 strsrch->search->isOverlap = FALSE; | 3430 strsrch->search->isOverlap = FALSE; |
3428 strsrch->search->isCanonicalMatch = FALSE; | 3431 strsrch->search->isCanonicalMatch = FALSE; |
3429 strsrch->search->elementComparisonType = 0; | 3432 strsrch->search->elementComparisonType = 0; |
3430 strsrch->search->isForwardSearching = TRUE; | 3433 strsrch->search->isForwardSearching = TRUE; |
3431 strsrch->search->reset = TRUE; | 3434 strsrch->search->reset = TRUE; |
3432 } | 3435 } |
3433 } | 3436 } |
3434 | 3437 |
3435 // | 3438 // |
3436 // CEI Collation Element + source text index. | 3439 // CEI Collation Element + source text index. |
3437 // These structs are kept in the circular buffer. | 3440 // These structs are kept in the circular buffer. |
3438 // | 3441 // |
3439 struct CEI { | 3442 struct CEI { |
3440 int64_t ce; | 3443 int64_t ce; |
3441 int32_t lowIndex; | 3444 int32_t lowIndex; |
3442 int32_t highIndex; | 3445 int32_t highIndex; |
3443 }; | 3446 }; |
3444 | 3447 |
3445 U_NAMESPACE_BEGIN | 3448 U_NAMESPACE_BEGIN |
3446 | 3449 |
3447 | 3450 namespace { |
3448 // | 3451 // |
3449 // CEBuffer A circular buffer of CEs from the text being searched. | 3452 // CEIBuffer A circular buffer of CEs-with-index from the text being searched
. |
3450 // | 3453 // |
3451 #define DEFAULT_CEBUFFER_SIZE 96 | 3454 #define DEFAULT_CEBUFFER_SIZE 96 |
3452 #define CEBUFFER_EXTRA 32 | 3455 #define CEBUFFER_EXTRA 32 |
3453 // Some typical max values to make buffer size more reasonable for asymmetric se
arch. | 3456 // Some typical max values to make buffer size more reasonable for asymmetric se
arch. |
3454 // #8694 is for a better long-term solution to allocation of this buffer. | 3457 // #8694 is for a better long-term solution to allocation of this buffer. |
3455 #define MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L 8 | 3458 #define MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L 8 |
3456 #define MAX_TARGET_IGNORABLES_PER_PAT_OTHER 3 | 3459 #define MAX_TARGET_IGNORABLES_PER_PAT_OTHER 3 |
3457 #define MIGHT_BE_JAMO_L(c) ((c >= 0x1100 && c <= 0x115E) || (c >= 0x3131 && c
<= 0x314E) || (c >= 0x3165 && c <= 0x3186)) | 3460 #define MIGHT_BE_JAMO_L(c) ((c >= 0x1100 && c <= 0x115E) || (c >= 0x3131 && c
<= 0x314E) || (c >= 0x3165 && c <= 0x3186)) |
3458 struct CEBuffer { | 3461 struct CEIBuffer { |
3459 CEI defBuf[DEFAULT_CEBUFFER_SIZE]; | 3462 CEI defBuf[DEFAULT_CEBUFFER_SIZE]; |
3460 CEI *buf; | 3463 CEI *buf; |
3461 int32_t bufSize; | 3464 int32_t bufSize; |
3462 int32_t firstIx; | 3465 int32_t firstIx; |
3463 int32_t limitIx; | 3466 int32_t limitIx; |
3464 UCollationElements *ceIter; | 3467 UCollationElements *ceIter; |
3465 UStringSearch *strSearch; | 3468 UStringSearch *strSearch; |
3466 | 3469 |
3467 | 3470 |
3468 | 3471 |
3469 CEBuffer(UStringSearch *ss, UErrorCode *status); | 3472 CEIBuffer(UStringSearch *ss, UErrorCode *status); |
3470 ~CEBuffer(); | 3473 ~CEIBuffer(); |
3471 const CEI *get(int32_t index); | 3474 const CEI *get(int32_t index); |
3472 const CEI *getPrevious(int32_t index); | 3475 const CEI *getPrevious(int32_t index); |
3473 }; | 3476 }; |
3474 | 3477 |
3475 | 3478 |
3476 CEBuffer::CEBuffer(UStringSearch *ss, UErrorCode *status) { | 3479 CEIBuffer::CEIBuffer(UStringSearch *ss, UErrorCode *status) { |
3477 buf = defBuf; | 3480 buf = defBuf; |
3478 strSearch = ss; | 3481 strSearch = ss; |
3479 bufSize = ss->pattern.PCELength + CEBUFFER_EXTRA; | 3482 bufSize = ss->pattern.pcesLength + CEBUFFER_EXTRA; |
3480 if (ss->search->elementComparisonType != 0) { | 3483 if (ss->search->elementComparisonType != 0) { |
3481 const UChar * patText = ss->pattern.text; | 3484 const UChar * patText = ss->pattern.text; |
3482 if (patText) { | 3485 if (patText) { |
3483 const UChar * patTextLimit = patText + ss->pattern.textLength; | 3486 const UChar * patTextLimit = patText + ss->pattern.textLength; |
3484 while ( patText < patTextLimit ) { | 3487 while ( patText < patTextLimit ) { |
3485 UChar c = *patText++; | 3488 UChar c = *patText++; |
3486 if (MIGHT_BE_JAMO_L(c)) { | 3489 if (MIGHT_BE_JAMO_L(c)) { |
3487 bufSize += MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L; | 3490 bufSize += MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L; |
3488 } else { | 3491 } else { |
3489 // No check for surrogates, we might allocate slightly more
buffer than necessary. | 3492 // No check for surrogates, we might allocate slightly more
buffer than necessary. |
3490 bufSize += MAX_TARGET_IGNORABLES_PER_PAT_OTHER; | 3493 bufSize += MAX_TARGET_IGNORABLES_PER_PAT_OTHER; |
3491 } | 3494 } |
3492 } | 3495 } |
3493 } | 3496 } |
3494 } | 3497 } |
3495 ceIter = ss->textIter; | 3498 ceIter = ss->textIter; |
3496 firstIx = 0; | 3499 firstIx = 0; |
3497 limitIx = 0; | 3500 limitIx = 0; |
3498 | 3501 |
3499 uprv_init_pce(ceIter); | 3502 if (!initTextProcessedIter(ss, status)) { return; } |
3500 | 3503 |
3501 if (bufSize>DEFAULT_CEBUFFER_SIZE) { | 3504 if (bufSize>DEFAULT_CEBUFFER_SIZE) { |
3502 buf = (CEI *)uprv_malloc(bufSize * sizeof(CEI)); | 3505 buf = (CEI *)uprv_malloc(bufSize * sizeof(CEI)); |
3503 if (buf == NULL) { | 3506 if (buf == NULL) { |
3504 *status = U_MEMORY_ALLOCATION_ERROR; | 3507 *status = U_MEMORY_ALLOCATION_ERROR; |
3505 } | 3508 } |
3506 } | 3509 } |
3507 } | 3510 } |
3508 | 3511 |
3509 // TODO: add a reset or init function so that allocated | 3512 // TODO: add a reset or init function so that allocated |
3510 // buffers can be retained & reused. | 3513 // buffers can be retained & reused. |
3511 | 3514 |
3512 CEBuffer::~CEBuffer() { | 3515 CEIBuffer::~CEIBuffer() { |
3513 if (buf != defBuf) { | 3516 if (buf != defBuf) { |
3514 uprv_free(buf); | 3517 uprv_free(buf); |
3515 } | 3518 } |
3516 } | 3519 } |
3517 | 3520 |
3518 | 3521 |
3519 // Get the CE with the specified index. | 3522 // Get the CE with the specified index. |
3520 // Index must be in the range | 3523 // Index must be in the range |
3521 // n-history_size < index < n+1 | 3524 // n-history_size < index < n+1 |
3522 // where n is the largest index to have been fetched by some previous call to
this function. | 3525 // where n is the largest index to have been fetched by some previous call to
this function. |
3523 // The CE value will be UCOL__PROCESSED_NULLORDER at end of input. | 3526 // The CE value will be UCOL__PROCESSED_NULLORDER at end of input. |
3524 // | 3527 // |
3525 const CEI *CEBuffer::get(int32_t index) { | 3528 const CEI *CEIBuffer::get(int32_t index) { |
3526 int i = index % bufSize; | 3529 int i = index % bufSize; |
3527 | 3530 |
3528 if (index>=firstIx && index<limitIx) { | 3531 if (index>=firstIx && index<limitIx) { |
3529 // The request was for an entry already in our buffer. | 3532 // The request was for an entry already in our buffer. |
3530 // Just return it. | 3533 // Just return it. |
3531 return &buf[i]; | 3534 return &buf[i]; |
3532 } | 3535 } |
3533 | 3536 |
3534 // Caller is requesting a new, never accessed before, CE. | 3537 // Caller is requesting a new, never accessed before, CE. |
3535 // Verify that it is the next one in sequence, which is all | 3538 // Verify that it is the next one in sequence, which is all |
3536 // that is allowed. | 3539 // that is allowed. |
3537 if (index != limitIx) { | 3540 if (index != limitIx) { |
3538 U_ASSERT(FALSE); | 3541 U_ASSERT(FALSE); |
3539 | 3542 |
3540 return NULL; | 3543 return NULL; |
3541 } | 3544 } |
3542 | 3545 |
3543 // Manage the circular CE buffer indexing | 3546 // Manage the circular CE buffer indexing |
3544 limitIx++; | 3547 limitIx++; |
3545 | 3548 |
3546 if (limitIx - firstIx >= bufSize) { | 3549 if (limitIx - firstIx >= bufSize) { |
3547 // The buffer is full, knock out the lowest-indexed entry. | 3550 // The buffer is full, knock out the lowest-indexed entry. |
3548 firstIx++; | 3551 firstIx++; |
3549 } | 3552 } |
3550 | 3553 |
3551 UErrorCode status = U_ZERO_ERROR; | 3554 UErrorCode status = U_ZERO_ERROR; |
3552 | 3555 |
3553 buf[i].ce = ucol_nextProcessed(ceIter, &buf[i].lowIndex, &buf[i].highIndex,
&status); | 3556 buf[i].ce = strSearch->textProcessedIter->nextProcessed(&buf[i].lowIndex, &b
uf[i].highIndex, &status); |
3554 | 3557 |
3555 return &buf[i]; | 3558 return &buf[i]; |
3556 } | 3559 } |
3557 | 3560 |
3558 // Get the CE with the specified index. | 3561 // Get the CE with the specified index. |
3559 // Index must be in the range | 3562 // Index must be in the range |
3560 // n-history_size < index < n+1 | 3563 // n-history_size < index < n+1 |
3561 // where n is the largest index to have been fetched by some previous call to
this function. | 3564 // where n is the largest index to have been fetched by some previous call to
this function. |
3562 // The CE value will be UCOL__PROCESSED_NULLORDER at end of input. | 3565 // The CE value will be UCOL__PROCESSED_NULLORDER at end of input. |
3563 // | 3566 // |
3564 const CEI *CEBuffer::getPrevious(int32_t index) { | 3567 const CEI *CEIBuffer::getPrevious(int32_t index) { |
3565 int i = index % bufSize; | 3568 int i = index % bufSize; |
3566 | 3569 |
3567 if (index>=firstIx && index<limitIx) { | 3570 if (index>=firstIx && index<limitIx) { |
3568 // The request was for an entry already in our buffer. | 3571 // The request was for an entry already in our buffer. |
3569 // Just return it. | 3572 // Just return it. |
3570 return &buf[i]; | 3573 return &buf[i]; |
3571 } | 3574 } |
3572 | 3575 |
3573 // Caller is requesting a new, never accessed before, CE. | 3576 // Caller is requesting a new, never accessed before, CE. |
3574 // Verify that it is the next one in sequence, which is all | 3577 // Verify that it is the next one in sequence, which is all |
3575 // that is allowed. | 3578 // that is allowed. |
3576 if (index != limitIx) { | 3579 if (index != limitIx) { |
3577 U_ASSERT(FALSE); | 3580 U_ASSERT(FALSE); |
3578 | 3581 |
3579 return NULL; | 3582 return NULL; |
3580 } | 3583 } |
3581 | 3584 |
3582 // Manage the circular CE buffer indexing | 3585 // Manage the circular CE buffer indexing |
3583 limitIx++; | 3586 limitIx++; |
3584 | 3587 |
3585 if (limitIx - firstIx >= bufSize) { | 3588 if (limitIx - firstIx >= bufSize) { |
3586 // The buffer is full, knock out the lowest-indexed entry. | 3589 // The buffer is full, knock out the lowest-indexed entry. |
3587 firstIx++; | 3590 firstIx++; |
3588 } | 3591 } |
3589 | 3592 |
3590 UErrorCode status = U_ZERO_ERROR; | 3593 UErrorCode status = U_ZERO_ERROR; |
3591 | 3594 |
3592 buf[i].ce = ucol_previousProcessed(ceIter, &buf[i].lowIndex, &buf[i].highInd
ex, &status); | 3595 buf[i].ce = strSearch->textProcessedIter->previousProcessed(&buf[i].lowIndex
, &buf[i].highIndex, &status); |
3593 | 3596 |
3594 return &buf[i]; | 3597 return &buf[i]; |
3595 } | 3598 } |
3596 | 3599 |
| 3600 } |
| 3601 |
3597 U_NAMESPACE_END | 3602 U_NAMESPACE_END |
3598 | 3603 |
3599 | 3604 |
3600 // #define USEARCH_DEBUG | 3605 // #define USEARCH_DEBUG |
3601 | 3606 |
3602 #ifdef USEARCH_DEBUG | 3607 #ifdef USEARCH_DEBUG |
3603 #include <stdio.h> | 3608 #include <stdio.h> |
3604 #include <stdlib.h> | 3609 #include <stdlib.h> |
3605 #endif | 3610 #endif |
3606 | 3611 |
(...skipping 201 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3808 { | 3813 { |
3809 if (U_FAILURE(*status)) { | 3814 if (U_FAILURE(*status)) { |
3810 return FALSE; | 3815 return FALSE; |
3811 } | 3816 } |
3812 | 3817 |
3813 // TODO: reject search patterns beginning with a combining char. | 3818 // TODO: reject search patterns beginning with a combining char. |
3814 | 3819 |
3815 #ifdef USEARCH_DEBUG | 3820 #ifdef USEARCH_DEBUG |
3816 if (getenv("USEARCH_DEBUG") != NULL) { | 3821 if (getenv("USEARCH_DEBUG") != NULL) { |
3817 printf("Pattern CEs\n"); | 3822 printf("Pattern CEs\n"); |
3818 for (int ii=0; ii<strsrch->pattern.CELength; ii++) { | 3823 for (int ii=0; ii<strsrch->pattern.cesLength; ii++) { |
3819 printf(" %8x", strsrch->pattern.CE[ii]); | 3824 printf(" %8x", strsrch->pattern.ces[ii]); |
3820 } | 3825 } |
3821 printf("\n"); | 3826 printf("\n"); |
3822 } | 3827 } |
3823 | 3828 |
3824 #endif | 3829 #endif |
3825 // Input parameter sanity check. | 3830 // Input parameter sanity check. |
3826 // TODO: should input indicies clip to the text length | 3831 // TODO: should input indicies clip to the text length |
3827 // in the same way that UText does. | 3832 // in the same way that UText does. |
3828 if(strsrch->pattern.CELength == 0 || | 3833 if(strsrch->pattern.cesLength == 0 || |
3829 startIdx < 0 || | 3834 startIdx < 0 || |
3830 startIdx > strsrch->search->textLength || | 3835 startIdx > strsrch->search->textLength || |
3831 strsrch->pattern.CE == NULL) { | 3836 strsrch->pattern.ces == NULL) { |
3832 *status = U_ILLEGAL_ARGUMENT_ERROR; | 3837 *status = U_ILLEGAL_ARGUMENT_ERROR; |
3833 return FALSE; | 3838 return FALSE; |
3834 } | 3839 } |
3835 | 3840 |
3836 if (strsrch->pattern.PCE == NULL) { | 3841 if (strsrch->pattern.pces == NULL) { |
3837 initializePatternPCETable(strsrch, status); | 3842 initializePatternPCETable(strsrch, status); |
3838 } | 3843 } |
3839 | 3844 |
3840 ucol_setOffset(strsrch->textIter, startIdx, status); | 3845 ucol_setOffset(strsrch->textIter, startIdx, status); |
3841 CEBuffer ceb(strsrch, status); | 3846 CEIBuffer ceb(strsrch, status); |
3842 | 3847 |
3843 | 3848 |
3844 int32_t targetIx = 0; | 3849 int32_t targetIx = 0; |
3845 const CEI *targetCEI = NULL; | 3850 const CEI *targetCEI = NULL; |
3846 int32_t patIx; | 3851 int32_t patIx; |
3847 UBool found; | 3852 UBool found; |
3848 | 3853 |
3849 int32_t mStart = -1; | 3854 int32_t mStart = -1; |
3850 int32_t mLimit = -1; | 3855 int32_t mLimit = -1; |
3851 int32_t minLimit; | 3856 int32_t minLimit; |
(...skipping 25 matching lines...) Expand all Loading... |
3877 // For targetIx > 0, this ceb.get gets a CE that is as far back in the r
ing buffer | 3882 // For targetIx > 0, this ceb.get gets a CE that is as far back in the r
ing buffer |
3878 // (compared to the last CE fetched for the previous targetIx value) as
we need to go | 3883 // (compared to the last CE fetched for the previous targetIx value) as
we need to go |
3879 // for this targetIx value, so if it is non-NULL then other ceb.get call
s should be OK. | 3884 // for this targetIx value, so if it is non-NULL then other ceb.get call
s should be OK. |
3880 const CEI *firstCEI = ceb.get(targetIx); | 3885 const CEI *firstCEI = ceb.get(targetIx); |
3881 if (firstCEI == NULL) { | 3886 if (firstCEI == NULL) { |
3882 *status = U_INTERNAL_PROGRAM_ERROR; | 3887 *status = U_INTERNAL_PROGRAM_ERROR; |
3883 found = FALSE; | 3888 found = FALSE; |
3884 break; | 3889 break; |
3885 } | 3890 } |
3886 | 3891 |
3887 for (patIx=0; patIx<strsrch->pattern.PCELength; patIx++) { | 3892 for (patIx=0; patIx<strsrch->pattern.pcesLength; patIx++) { |
3888 patCE = strsrch->pattern.PCE[patIx]; | 3893 patCE = strsrch->pattern.pces[patIx]; |
3889 targetCEI = ceb.get(targetIx+patIx+targetIxOffset); | 3894 targetCEI = ceb.get(targetIx+patIx+targetIxOffset); |
3890 // Compare CE from target string with CE from the pattern. | 3895 // Compare CE from target string with CE from the pattern. |
3891 // Note that the target CE will be UCOL_PROCESSED_NULLORDER if we
reach the end of input, | 3896 // Note that the target CE will be UCOL_PROCESSED_NULLORDER if we
reach the end of input, |
3892 // which will fail the compare, below. | 3897 // which will fail the compare, below. |
3893 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsr
ch->search->elementComparisonType); | 3898 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsr
ch->search->elementComparisonType); |
3894 if ( ceMatch == U_CE_NO_MATCH ) { | 3899 if ( ceMatch == U_CE_NO_MATCH ) { |
3895 found = FALSE; | 3900 found = FALSE; |
3896 break; | 3901 break; |
3897 } else if ( ceMatch > U_CE_NO_MATCH ) { | 3902 } else if ( ceMatch > U_CE_NO_MATCH ) { |
3898 if ( ceMatch == U_CE_SKIP_TARG ) { | 3903 if ( ceMatch == U_CE_SKIP_TARG ) { |
3899 // redo with same patCE, next targCE | 3904 // redo with same patCE, next targCE |
3900 patIx--; | 3905 patIx--; |
3901 targetIxOffset++; | 3906 targetIxOffset++; |
3902 } else { // ceMatch == U_CE_SKIP_PATN | 3907 } else { // ceMatch == U_CE_SKIP_PATN |
3903 // redo with same targCE, next patCE | 3908 // redo with same targCE, next patCE |
3904 targetIxOffset--; | 3909 targetIxOffset--; |
3905 } | 3910 } |
3906 } | 3911 } |
3907 } | 3912 } |
3908 targetIxOffset += strsrch->pattern.PCELength; // this is now the offset
in target CE space to end of the match so far | 3913 targetIxOffset += strsrch->pattern.pcesLength; // this is now the offset
in target CE space to end of the match so far |
3909 | 3914 |
3910 if (!found && ((targetCEI == NULL) || (targetCEI->ce != UCOL_PROCESSED_N
ULLORDER))) { | 3915 if (!found && ((targetCEI == NULL) || (targetCEI->ce != UCOL_PROCESSED_N
ULLORDER))) { |
3911 // No match at this targetIx. Try again at the next. | 3916 // No match at this targetIx. Try again at the next. |
3912 continue; | 3917 continue; |
3913 } | 3918 } |
3914 | 3919 |
3915 if (!found) { | 3920 if (!found) { |
3916 // No match at all, we have run off the end of the target text. | 3921 // No match at all, we have run off the end of the target text. |
3917 break; | 3922 break; |
3918 } | 3923 } |
(...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4075 { | 4080 { |
4076 if (U_FAILURE(*status)) { | 4081 if (U_FAILURE(*status)) { |
4077 return FALSE; | 4082 return FALSE; |
4078 } | 4083 } |
4079 | 4084 |
4080 // TODO: reject search patterns beginning with a combining char. | 4085 // TODO: reject search patterns beginning with a combining char. |
4081 | 4086 |
4082 #ifdef USEARCH_DEBUG | 4087 #ifdef USEARCH_DEBUG |
4083 if (getenv("USEARCH_DEBUG") != NULL) { | 4088 if (getenv("USEARCH_DEBUG") != NULL) { |
4084 printf("Pattern CEs\n"); | 4089 printf("Pattern CEs\n"); |
4085 for (int ii=0; ii<strsrch->pattern.CELength; ii++) { | 4090 for (int ii=0; ii<strsrch->pattern.cesLength; ii++) { |
4086 printf(" %8x", strsrch->pattern.CE[ii]); | 4091 printf(" %8x", strsrch->pattern.ces[ii]); |
4087 } | 4092 } |
4088 printf("\n"); | 4093 printf("\n"); |
4089 } | 4094 } |
4090 | 4095 |
4091 #endif | 4096 #endif |
4092 // Input parameter sanity check. | 4097 // Input parameter sanity check. |
4093 // TODO: should input indicies clip to the text length | 4098 // TODO: should input indicies clip to the text length |
4094 // in the same way that UText does. | 4099 // in the same way that UText does. |
4095 if(strsrch->pattern.CELength == 0 || | 4100 if(strsrch->pattern.cesLength == 0 || |
4096 startIdx < 0 || | 4101 startIdx < 0 || |
4097 startIdx > strsrch->search->textLength || | 4102 startIdx > strsrch->search->textLength || |
4098 strsrch->pattern.CE == NULL) { | 4103 strsrch->pattern.ces == NULL) { |
4099 *status = U_ILLEGAL_ARGUMENT_ERROR; | 4104 *status = U_ILLEGAL_ARGUMENT_ERROR; |
4100 return FALSE; | 4105 return FALSE; |
4101 } | 4106 } |
4102 | 4107 |
4103 if (strsrch->pattern.PCE == NULL) { | 4108 if (strsrch->pattern.pces == NULL) { |
4104 initializePatternPCETable(strsrch, status); | 4109 initializePatternPCETable(strsrch, status); |
4105 } | 4110 } |
4106 | 4111 |
4107 CEBuffer ceb(strsrch, status); | 4112 CEIBuffer ceb(strsrch, status); |
4108 int32_t targetIx = 0; | 4113 int32_t targetIx = 0; |
4109 | 4114 |
4110 /* | 4115 /* |
4111 * Pre-load the buffer with the CE's for the grapheme | 4116 * Pre-load the buffer with the CE's for the grapheme |
4112 * after our starting position so that we're sure that | 4117 * after our starting position so that we're sure that |
4113 * we can look at the CE following the match when we | 4118 * we can look at the CE following the match when we |
4114 * check the match boundaries. | 4119 * check the match boundaries. |
4115 * | 4120 * |
4116 * This will also pre-fetch the first CE that we'll | 4121 * This will also pre-fetch the first CE that we'll |
4117 * consider for the match. | 4122 * consider for the match. |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4158 // for this targetIx value, so if it is non-NULL then other ceb.getPrevi
ous calls should be OK. | 4163 // for this targetIx value, so if it is non-NULL then other ceb.getPrevi
ous calls should be OK. |
4159 const CEI *lastCEI = ceb.getPrevious(targetIx); | 4164 const CEI *lastCEI = ceb.getPrevious(targetIx); |
4160 if (lastCEI == NULL) { | 4165 if (lastCEI == NULL) { |
4161 *status = U_INTERNAL_PROGRAM_ERROR; | 4166 *status = U_INTERNAL_PROGRAM_ERROR; |
4162 found = FALSE; | 4167 found = FALSE; |
4163 break; | 4168 break; |
4164 } | 4169 } |
4165 // Inner loop checks for a match beginning at each | 4170 // Inner loop checks for a match beginning at each |
4166 // position from the outer loop. | 4171 // position from the outer loop. |
4167 int32_t targetIxOffset = 0; | 4172 int32_t targetIxOffset = 0; |
4168 for (patIx = strsrch->pattern.PCELength - 1; patIx >= 0; patIx -= 1) { | 4173 for (patIx = strsrch->pattern.pcesLength - 1; patIx >= 0; patIx -= 1) { |
4169 int64_t patCE = strsrch->pattern.PCE[patIx]; | 4174 int64_t patCE = strsrch->pattern.pces[patIx]; |
4170 | 4175 |
4171 targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength -
1 - patIx + targetIxOffset); | 4176 targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLength -
1 - patIx + targetIxOffset); |
4172 // Compare CE from target string with CE from the pattern. | 4177 // Compare CE from target string with CE from the pattern. |
4173 // Note that the target CE will be UCOL_NULLORDER if we reach the
end of input, | 4178 // Note that the target CE will be UCOL_NULLORDER if we reach the
end of input, |
4174 // which will fail the compare, below. | 4179 // which will fail the compare, below. |
4175 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsr
ch->search->elementComparisonType); | 4180 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsr
ch->search->elementComparisonType); |
4176 if ( ceMatch == U_CE_NO_MATCH ) { | 4181 if ( ceMatch == U_CE_NO_MATCH ) { |
4177 found = FALSE; | 4182 found = FALSE; |
4178 break; | 4183 break; |
4179 } else if ( ceMatch > U_CE_NO_MATCH ) { | 4184 } else if ( ceMatch > U_CE_NO_MATCH ) { |
4180 if ( ceMatch == U_CE_SKIP_TARG ) { | 4185 if ( ceMatch == U_CE_SKIP_TARG ) { |
4181 // redo with same patCE, next targCE | 4186 // redo with same patCE, next targCE |
(...skipping 15 matching lines...) Expand all Loading... |
4197 // No match at all, we have run off the end of the target text. | 4202 // No match at all, we have run off the end of the target text. |
4198 break; | 4203 break; |
4199 } | 4204 } |
4200 | 4205 |
4201 | 4206 |
4202 // We have found a match in CE space. | 4207 // We have found a match in CE space. |
4203 // Now determine the bounds in string index space. | 4208 // Now determine the bounds in string index space. |
4204 // There still is a chance of match failure if the CE range not corresp
ond to | 4209 // There still is a chance of match failure if the CE range not corresp
ond to |
4205 // an acceptable character range. | 4210 // an acceptable character range. |
4206 // | 4211 // |
4207 const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELen
gth - 1 + targetIxOffset); | 4212 const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLe
ngth - 1 + targetIxOffset); |
4208 mStart = firstCEI->lowIndex; | 4213 mStart = firstCEI->lowIndex; |
4209 | 4214 |
4210 // Check for the start of the match being within a combining sequence. | 4215 // Check for the start of the match being within a combining sequence. |
4211 // This can happen if the pattern itself begins with a combining char,
and | 4216 // This can happen if the pattern itself begins with a combining char,
and |
4212 // the match found combining marks in the target text that were attach
ed | 4217 // the match found combining marks in the target text that were attach
ed |
4213 // to something else. | 4218 // to something else. |
4214 // This type of match should be rejected for not completely consuming
a | 4219 // This type of match should be rejected for not completely consuming
a |
4215 // combining sequence. | 4220 // combining sequence. |
4216 if (!isBreakBoundary(strsrch, mStart)) { | 4221 if (!isBreakBoundary(strsrch, mStart)) { |
4217 found = FALSE; | 4222 found = FALSE; |
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4323 UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status) | 4328 UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status) |
4324 { | 4329 { |
4325 if (U_FAILURE(*status)) { | 4330 if (U_FAILURE(*status)) { |
4326 setMatchNotFound(strsrch); | 4331 setMatchNotFound(strsrch); |
4327 return FALSE; | 4332 return FALSE; |
4328 } | 4333 } |
4329 | 4334 |
4330 #if BOYER_MOORE | 4335 #if BOYER_MOORE |
4331 UCollationElements *coleiter = strsrch->textIter; | 4336 UCollationElements *coleiter = strsrch->textIter; |
4332 int32_t textlength = strsrch->search->textLength; | 4337 int32_t textlength = strsrch->search->textLength; |
4333 int32_t *patternce = strsrch->pattern.CE; | 4338 int32_t *patternce = strsrch->pattern.ces; |
4334 int32_t patterncelength = strsrch->pattern.CELength; | 4339 int32_t patterncelength = strsrch->pattern.cesLength; |
4335 int32_t textoffset = ucol_getOffset(coleiter); | 4340 int32_t textoffset = ucol_getOffset(coleiter); |
4336 | 4341 |
4337 // status used in setting coleiter offset, since offset is checked in | 4342 // status used in setting coleiter offset, since offset is checked in |
4338 // shiftForward before setting the coleiter offset, status never | 4343 // shiftForward before setting the coleiter offset, status never |
4339 // a failure | 4344 // a failure |
4340 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER, | 4345 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER, |
4341 patterncelength); | 4346 patterncelength); |
4342 while (textoffset <= textlength) | 4347 while (textoffset <= textlength) |
4343 { | 4348 { |
4344 uint32_t patternceindex = patterncelength - 1; | 4349 uint32_t patternceindex = patterncelength - 1; |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4437 UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status) | 4442 UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status) |
4438 { | 4443 { |
4439 if (U_FAILURE(*status)) { | 4444 if (U_FAILURE(*status)) { |
4440 setMatchNotFound(strsrch); | 4445 setMatchNotFound(strsrch); |
4441 return FALSE; | 4446 return FALSE; |
4442 } | 4447 } |
4443 | 4448 |
4444 #if BOYER_MOORE | 4449 #if BOYER_MOORE |
4445 UCollationElements *coleiter = strsrch->textIter; | 4450 UCollationElements *coleiter = strsrch->textIter; |
4446 int32_t textlength = strsrch->search->textLength; | 4451 int32_t textlength = strsrch->search->textLength; |
4447 int32_t *patternce = strsrch->pattern.CE; | 4452 int32_t *patternce = strsrch->pattern.ces; |
4448 int32_t patterncelength = strsrch->pattern.CELength; | 4453 int32_t patterncelength = strsrch->pattern.cesLength; |
4449 int32_t textoffset = ucol_getOffset(coleiter); | 4454 int32_t textoffset = ucol_getOffset(coleiter); |
4450 UBool hasPatternAccents = | 4455 UBool hasPatternAccents = |
4451 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents; | 4456 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents; |
4452 | 4457 |
4453 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER, | 4458 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER, |
4454 patterncelength); | 4459 patterncelength); |
4455 strsrch->canonicalPrefixAccents[0] = 0; | 4460 strsrch->canonicalPrefixAccents[0] = 0; |
4456 strsrch->canonicalSuffixAccents[0] = 0; | 4461 strsrch->canonicalSuffixAccents[0] = 0; |
4457 | 4462 |
4458 while (textoffset <= textlength) | 4463 while (textoffset <= textlength) |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4551 | 4556 |
4552 UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status) | 4557 UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status) |
4553 { | 4558 { |
4554 if (U_FAILURE(*status)) { | 4559 if (U_FAILURE(*status)) { |
4555 setMatchNotFound(strsrch); | 4560 setMatchNotFound(strsrch); |
4556 return FALSE; | 4561 return FALSE; |
4557 } | 4562 } |
4558 | 4563 |
4559 #if BOYER_MOORE | 4564 #if BOYER_MOORE |
4560 UCollationElements *coleiter = strsrch->textIter; | 4565 UCollationElements *coleiter = strsrch->textIter; |
4561 int32_t *patternce = strsrch->pattern.CE; | 4566 int32_t *patternce = strsrch->pattern.ces; |
4562 int32_t patterncelength = strsrch->pattern.CELength; | 4567 int32_t patterncelength = strsrch->pattern.cesLength; |
4563 int32_t textoffset = ucol_getOffset(coleiter); | 4568 int32_t textoffset = ucol_getOffset(coleiter); |
4564 | 4569 |
4565 // shifting it check for setting offset | 4570 // shifting it check for setting offset |
4566 // if setOffset is called previously or there was no previous match, we | 4571 // if setOffset is called previously or there was no previous match, we |
4567 // leave the offset as it is. | 4572 // leave the offset as it is. |
4568 if (strsrch->search->matchedIndex != USEARCH_DONE) { | 4573 if (strsrch->search->matchedIndex != USEARCH_DONE) { |
4569 textoffset = strsrch->search->matchedIndex; | 4574 textoffset = strsrch->search->matchedIndex; |
4570 } | 4575 } |
4571 | 4576 |
4572 textoffset = reverseShift(strsrch, textoffset, UCOL_NULLORDER, | 4577 textoffset = reverseShift(strsrch, textoffset, UCOL_NULLORDER, |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4652 return FALSE; | 4657 return FALSE; |
4653 #else | 4658 #else |
4654 int32_t textOffset; | 4659 int32_t textOffset; |
4655 | 4660 |
4656 if (strsrch->search->isOverlap) { | 4661 if (strsrch->search->isOverlap) { |
4657 if (strsrch->search->matchedIndex != USEARCH_DONE) { | 4662 if (strsrch->search->matchedIndex != USEARCH_DONE) { |
4658 textOffset = strsrch->search->matchedIndex + strsrch->search->matche
dLength - 1; | 4663 textOffset = strsrch->search->matchedIndex + strsrch->search->matche
dLength - 1; |
4659 } else { | 4664 } else { |
4660 // move the start position at the end of possible match | 4665 // move the start position at the end of possible match |
4661 initializePatternPCETable(strsrch, status); | 4666 initializePatternPCETable(strsrch, status); |
4662 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.PCELength - 1; nPCE
s++) { | 4667 if (!initTextProcessedIter(strsrch, status)) { |
4663 int64_t pce = ucol_nextProcessed(strsrch->textIter, NULL, NULL,
status); | 4668 setMatchNotFound(strsrch); |
| 4669 return FALSE; |
| 4670 } |
| 4671 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPC
Es++) { |
| 4672 int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NU
LL, status); |
4664 if (pce == UCOL_PROCESSED_NULLORDER) { | 4673 if (pce == UCOL_PROCESSED_NULLORDER) { |
4665 // at the end of the text | 4674 // at the end of the text |
4666 break; | 4675 break; |
4667 } | 4676 } |
4668 } | 4677 } |
4669 if (U_FAILURE(*status)) { | 4678 if (U_FAILURE(*status)) { |
4670 setMatchNotFound(strsrch); | 4679 setMatchNotFound(strsrch); |
4671 return FALSE; | 4680 return FALSE; |
4672 } | 4681 } |
4673 textOffset = ucol_getOffset(strsrch->textIter); | 4682 textOffset = ucol_getOffset(strsrch->textIter); |
(...skipping 19 matching lines...) Expand all Loading... |
4693 UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, | 4702 UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, |
4694 UErrorCode *status) | 4703 UErrorCode *status) |
4695 { | 4704 { |
4696 if (U_FAILURE(*status)) { | 4705 if (U_FAILURE(*status)) { |
4697 setMatchNotFound(strsrch); | 4706 setMatchNotFound(strsrch); |
4698 return FALSE; | 4707 return FALSE; |
4699 } | 4708 } |
4700 | 4709 |
4701 #if BOYER_MOORE | 4710 #if BOYER_MOORE |
4702 UCollationElements *coleiter = strsrch->textIter; | 4711 UCollationElements *coleiter = strsrch->textIter; |
4703 int32_t *patternce = strsrch->pattern.CE; | 4712 int32_t *patternce = strsrch->pattern.ces; |
4704 int32_t patterncelength = strsrch->pattern.CELength; | 4713 int32_t patterncelength = strsrch->pattern.cesLength; |
4705 int32_t textoffset = ucol_getOffset(coleiter); | 4714 int32_t textoffset = ucol_getOffset(coleiter); |
4706 UBool hasPatternAccents = | 4715 UBool hasPatternAccents = |
4707 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents; | 4716 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents; |
4708 | 4717 |
4709 // shifting it check for setting offset | 4718 // shifting it check for setting offset |
4710 // if setOffset is called previously or there was no previous match, we | 4719 // if setOffset is called previously or there was no previous match, we |
4711 // leave the offset as it is. | 4720 // leave the offset as it is. |
4712 if (strsrch->search->matchedIndex != USEARCH_DONE) { | 4721 if (strsrch->search->matchedIndex != USEARCH_DONE) { |
4713 textoffset = strsrch->search->matchedIndex; | 4722 textoffset = strsrch->search->matchedIndex; |
4714 } | 4723 } |
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4801 return FALSE; | 4810 return FALSE; |
4802 #else | 4811 #else |
4803 int32_t textOffset; | 4812 int32_t textOffset; |
4804 | 4813 |
4805 if (strsrch->search->isOverlap) { | 4814 if (strsrch->search->isOverlap) { |
4806 if (strsrch->search->matchedIndex != USEARCH_DONE) { | 4815 if (strsrch->search->matchedIndex != USEARCH_DONE) { |
4807 textOffset = strsrch->search->matchedIndex + strsrch->search->matche
dLength - 1; | 4816 textOffset = strsrch->search->matchedIndex + strsrch->search->matche
dLength - 1; |
4808 } else { | 4817 } else { |
4809 // move the start position at the end of possible match | 4818 // move the start position at the end of possible match |
4810 initializePatternPCETable(strsrch, status); | 4819 initializePatternPCETable(strsrch, status); |
4811 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.PCELength - 1; nPCE
s++) { | 4820 if (!initTextProcessedIter(strsrch, status)) { |
4812 int64_t pce = ucol_nextProcessed(strsrch->textIter, NULL, NULL,
status); | 4821 setMatchNotFound(strsrch); |
| 4822 return FALSE; |
| 4823 } |
| 4824 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPC
Es++) { |
| 4825 int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NU
LL, status); |
4813 if (pce == UCOL_PROCESSED_NULLORDER) { | 4826 if (pce == UCOL_PROCESSED_NULLORDER) { |
4814 // at the end of the text | 4827 // at the end of the text |
4815 break; | 4828 break; |
4816 } | 4829 } |
4817 } | 4830 } |
4818 if (U_FAILURE(*status)) { | 4831 if (U_FAILURE(*status)) { |
4819 setMatchNotFound(strsrch); | 4832 setMatchNotFound(strsrch); |
4820 return FALSE; | 4833 return FALSE; |
4821 } | 4834 } |
4822 textOffset = ucol_getOffset(strsrch->textIter); | 4835 textOffset = ucol_getOffset(strsrch->textIter); |
(...skipping 10 matching lines...) Expand all Loading... |
4833 strsrch->search->matchedLength = end - start; | 4846 strsrch->search->matchedLength = end - start; |
4834 return TRUE; | 4847 return TRUE; |
4835 } else { | 4848 } else { |
4836 setMatchNotFound(strsrch); | 4849 setMatchNotFound(strsrch); |
4837 return FALSE; | 4850 return FALSE; |
4838 } | 4851 } |
4839 #endif | 4852 #endif |
4840 } | 4853 } |
4841 | 4854 |
4842 #endif /* #if !UCONFIG_NO_COLLATION */ | 4855 #endif /* #if !UCONFIG_NO_COLLATION */ |
OLD | NEW |