Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(47)

Side by Side Diff: source/i18n/usearch.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/i18n/uregex.cpp ('k') | source/i18n/uspoof.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ********************************************************************** 2 **********************************************************************
3 * Copyright (C) 2001-2011 IBM and others. All rights reserved. 3 * Copyright (C) 2001-2014 IBM and others. All rights reserved.
4 ********************************************************************** 4 **********************************************************************
5 * Date Name Description 5 * Date Name Description
6 * 07/02/2001 synwee Creation. 6 * 07/02/2001 synwee Creation.
7 ********************************************************************** 7 **********************************************************************
8 */ 8 */
9 9
10 #include "unicode/utypes.h" 10 #include "unicode/utypes.h"
11 11
12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION 12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
13 13
14 #include "unicode/usearch.h" 14 #include "unicode/usearch.h"
15 #include "unicode/ustring.h" 15 #include "unicode/ustring.h"
16 #include "unicode/uchar.h" 16 #include "unicode/uchar.h"
17 #include "unicode/utf16.h" 17 #include "unicode/utf16.h"
18 #include "normalizer2impl.h" 18 #include "normalizer2impl.h"
19 #include "ucol_imp.h"
20 #include "usrchimp.h" 19 #include "usrchimp.h"
21 #include "cmemory.h" 20 #include "cmemory.h"
22 #include "ucln_in.h" 21 #include "ucln_in.h"
23 #include "uassert.h" 22 #include "uassert.h"
24 #include "ustr_imp.h" 23 #include "ustr_imp.h"
25 24
26 U_NAMESPACE_USE 25 U_NAMESPACE_USE
27 26
28 // don't use Boyer-Moore 27 // don't use Boyer-Moore
29 // (and if we decide to turn this on again there are several new TODOs that will need to be addressed) 28 // (and if we decide to turn this on again there are several new TODOs that will need to be addressed)
30 #define BOYER_MOORE 0 29 #define BOYER_MOORE 0
31 30
32 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
33
34 // internal definition --------------------------------------------------- 31 // internal definition ---------------------------------------------------
35 32
36 #define LAST_BYTE_MASK_ 0xFF 33 #define LAST_BYTE_MASK_ 0xFF
37 #define SECOND_LAST_BYTE_SHIFT_ 8 34 #define SECOND_LAST_BYTE_SHIFT_ 8
38 #define SUPPLEMENTARY_MIN_VALUE_ 0x10000 35 #define SUPPLEMENTARY_MIN_VALUE_ 0x10000
39 36
40 static const Normalizer2Impl *g_nfcImpl = NULL; 37 static const Normalizer2Impl *g_nfcImpl = NULL;
41 38
42 // internal methods ------------------------------------------------- 39 // internal methods -------------------------------------------------
43 40
44 /** 41 /**
45 * Fast collation element iterator setOffset. 42 * Fast collation element iterator setOffset.
46 * This function does not check for bounds. 43 * This function does not check for bounds.
47 * @param coleiter collation element iterator 44 * @param coleiter collation element iterator
48 * @param offset to set 45 * @param offset to set
49 */ 46 */
50 static 47 static
51 inline void setColEIterOffset(UCollationElements *elems, 48 inline void setColEIterOffset(UCollationElements *elems,
52 int32_t offset) 49 int32_t offset)
53 { 50 {
54 collIterate *ci = &(elems->iteratordata_); 51 // Note: Not "fast" any more after the 2013 collation rewrite.
55 ci->pos = ci->string + offset; 52 // We do not want to expose more internals than necessary.
56 ci->CEpos = ci->toReturn = ci->extendCEs ? ci->extendCEs : ci->CEs; 53 UErrorCode status = U_ZERO_ERROR;
57 if (ci->flags & UCOL_ITER_INNORMBUF) { 54 ucol_setOffset(elems, offset, &status);
58 ci->flags = ci->origFlags;
59 }
60 ci->fcdPosition = NULL;
61
62 ci->offsetReturn = NULL;
63 ci->offsetStore = ci->offsetBuffer;
64 ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
65 } 55 }
66 56
67 /** 57 /**
68 * Getting the mask for collation strength 58 * Getting the mask for collation strength
69 * @param strength collation strength 59 * @param strength collation strength
70 * @return collation element mask 60 * @return collation element mask
71 */ 61 */
72 static 62 static
73 inline uint32_t getMask(UCollationStrength strength) 63 inline uint32_t getMask(UCollationStrength strength)
74 { 64 {
(...skipping 216 matching lines...) Expand 10 before | Expand all | Expand 10 after
291 * @param status output error if any, caller to check status before calling 281 * @param status output error if any, caller to check status before calling
292 * method, status assumed to be success when passed in. 282 * method, status assumed to be success when passed in.
293 * @return total number of expansions 283 * @return total number of expansions
294 */ 284 */
295 static 285 static
296 inline uint16_t initializePatternCETable(UStringSearch *strsrch, 286 inline uint16_t initializePatternCETable(UStringSearch *strsrch,
297 UErrorCode *status) 287 UErrorCode *status)
298 { 288 {
299 UPattern *pattern = &(strsrch->pattern); 289 UPattern *pattern = &(strsrch->pattern);
300 uint32_t cetablesize = INITIAL_ARRAY_SIZE_; 290 uint32_t cetablesize = INITIAL_ARRAY_SIZE_;
301 int32_t *cetable = pattern->CEBuffer; 291 int32_t *cetable = pattern->cesBuffer;
302 uint32_t patternlength = pattern->textLength; 292 uint32_t patternlength = pattern->textLength;
303 UCollationElements *coleiter = strsrch->utilIter; 293 UCollationElements *coleiter = strsrch->utilIter;
304 294
305 if (coleiter == NULL) { 295 if (coleiter == NULL) {
306 coleiter = ucol_openElements(strsrch->collator, pattern->text, 296 coleiter = ucol_openElements(strsrch->collator, pattern->text,
307 patternlength, status); 297 patternlength, status);
308 // status will be checked in ucol_next(..) later and if it is an 298 // status will be checked in ucol_next(..) later and if it is an
309 // error UCOL_NULLORDER the result of ucol_next(..) and 0 will be 299 // error UCOL_NULLORDER the result of ucol_next(..) and 0 will be
310 // returned. 300 // returned.
311 strsrch->utilIter = coleiter; 301 strsrch->utilIter = coleiter;
312 } 302 }
313 else { 303 else {
314 uprv_init_collIterate(strsrch->collator, pattern->text, 304 ucol_setText(coleiter, pattern->text, pattern->textLength, status);
315 pattern->textLength,
316 &coleiter->iteratordata_,
317 status);
318 } 305 }
319 if(U_FAILURE(*status)) { 306 if(U_FAILURE(*status)) {
320 return 0; 307 return 0;
321 } 308 }
322 309
323 if (pattern->CE != cetable && pattern->CE) { 310 if (pattern->ces != cetable && pattern->ces) {
324 uprv_free(pattern->CE); 311 uprv_free(pattern->ces);
325 } 312 }
326 313
327 uint16_t offset = 0; 314 uint16_t offset = 0;
328 uint16_t result = 0; 315 uint16_t result = 0;
329 int32_t ce; 316 int32_t ce;
330 317
331 while ((ce = ucol_next(coleiter, status)) != UCOL_NULLORDER && 318 while ((ce = ucol_next(coleiter, status)) != UCOL_NULLORDER &&
332 U_SUCCESS(*status)) { 319 U_SUCCESS(*status)) {
333 uint32_t newce = getCE(strsrch, ce); 320 uint32_t newce = getCE(strsrch, ce);
334 if (newce) { 321 if (newce) {
335 int32_t *temp = addTouint32_tArray(cetable, offset, &cetablesize, 322 int32_t *temp = addTouint32_tArray(cetable, offset, &cetablesize,
336 newce, 323 newce,
337 patternlength - ucol_getOffset(coleiter) + 1, 324 patternlength - ucol_getOffset(coleiter) + 1,
338 status); 325 status);
339 if (U_FAILURE(*status)) { 326 if (U_FAILURE(*status)) {
340 return 0; 327 return 0;
341 } 328 }
342 offset ++; 329 offset ++;
343 if (cetable != temp && cetable != pattern->CEBuffer) { 330 if (cetable != temp && cetable != pattern->cesBuffer) {
344 uprv_free(cetable); 331 uprv_free(cetable);
345 } 332 }
346 cetable = temp; 333 cetable = temp;
347 } 334 }
348 result += (uint16_t)(ucol_getMaxExpansion(coleiter, ce) - 1); 335 result += (uint16_t)(ucol_getMaxExpansion(coleiter, ce) - 1);
349 } 336 }
350 337
351 cetable[offset] = 0; 338 cetable[offset] = 0;
352 pattern->CE = cetable; 339 pattern->ces = cetable;
353 pattern->CELength = offset; 340 pattern->cesLength = offset;
354 341
355 return result; 342 return result;
356 } 343 }
357 344
358 /** 345 /**
359 * Initializing the pce table for a pattern. 346 * Initializing the pce table for a pattern.
360 * Stores non-ignorable collation keys. 347 * Stores non-ignorable collation keys.
361 * Table size will be estimated by the size of the pattern text. Table 348 * Table size will be estimated by the size of the pattern text. Table
362 * expansion will be perform as we go along. Adding 1 to ensure that the table 349 * expansion will be perform as we go along. Adding 1 to ensure that the table
363 * size definitely increases. 350 * size definitely increases.
364 * Internal method, status assumed to be a success. 351 * Internal method, status assumed to be a success.
365 * @param strsrch string search data 352 * @param strsrch string search data
366 * @param status output error if any, caller to check status before calling 353 * @param status output error if any, caller to check status before calling
367 * method, status assumed to be success when passed in. 354 * method, status assumed to be success when passed in.
368 * @return total number of expansions 355 * @return total number of expansions
369 */ 356 */
370 static 357 static
371 inline uint16_t initializePatternPCETable(UStringSearch *strsrch, 358 inline uint16_t initializePatternPCETable(UStringSearch *strsrch,
372 UErrorCode *status) 359 UErrorCode *status)
373 { 360 {
374 UPattern *pattern = &(strsrch->pattern); 361 UPattern *pattern = &(strsrch->pattern);
375 uint32_t pcetablesize = INITIAL_ARRAY_SIZE_; 362 uint32_t pcetablesize = INITIAL_ARRAY_SIZE_;
376 int64_t *pcetable = pattern->PCEBuffer; 363 int64_t *pcetable = pattern->pcesBuffer;
377 uint32_t patternlength = pattern->textLength; 364 uint32_t patternlength = pattern->textLength;
378 UCollationElements *coleiter = strsrch->utilIter; 365 UCollationElements *coleiter = strsrch->utilIter;
379 366
380 if (coleiter == NULL) { 367 if (coleiter == NULL) {
381 coleiter = ucol_openElements(strsrch->collator, pattern->text, 368 coleiter = ucol_openElements(strsrch->collator, pattern->text,
382 patternlength, status); 369 patternlength, status);
383 // status will be checked in ucol_next(..) later and if it is an 370 // status will be checked in ucol_next(..) later and if it is an
384 // error UCOL_NULLORDER the result of ucol_next(..) and 0 will be 371 // error UCOL_NULLORDER the result of ucol_next(..) and 0 will be
385 // returned. 372 // returned.
386 strsrch->utilIter = coleiter; 373 strsrch->utilIter = coleiter;
387 } else { 374 } else {
388 uprv_init_collIterate(strsrch->collator, pattern->text, 375 ucol_setText(coleiter, pattern->text, pattern->textLength, status);
389 pattern->textLength,
390 &coleiter->iteratordata_,
391 status);
392 } 376 }
393 if(U_FAILURE(*status)) { 377 if(U_FAILURE(*status)) {
394 return 0; 378 return 0;
395 } 379 }
396 380
397 if (pattern->PCE != pcetable && pattern->PCE != NULL) { 381 if (pattern->pces != pcetable && pattern->pces != NULL) {
398 uprv_free(pattern->PCE); 382 uprv_free(pattern->pces);
399 } 383 }
400 384
401 uint16_t offset = 0; 385 uint16_t offset = 0;
402 uint16_t result = 0; 386 uint16_t result = 0;
403 int64_t pce; 387 int64_t pce;
404 388
405 uprv_init_pce(coleiter); 389 icu::UCollationPCE iter(coleiter);
406 390
407 // ** Should processed CEs be signed or unsigned? 391 // ** Should processed CEs be signed or unsigned?
408 // ** (the rest of the code in this file seems to play fast-and-loose with 392 // ** (the rest of the code in this file seems to play fast-and-loose with
409 // ** whether a CE is signed or unsigned. For example, look at routine abov e this one.) 393 // ** whether a CE is signed or unsigned. For example, look at routine abov e this one.)
410 while ((pce = ucol_nextProcessed(coleiter, NULL, NULL, status)) != UCOL_PROC ESSED_NULLORDER && 394 while ((pce = iter.nextProcessed(NULL, NULL, status)) != UCOL_PROCESSED_NULL ORDER &&
411 U_SUCCESS(*status)) { 395 U_SUCCESS(*status)) {
412 int64_t *temp = addTouint64_tArray(pcetable, offset, &pcetablesize, 396 int64_t *temp = addTouint64_tArray(pcetable, offset, &pcetablesize,
413 pce, 397 pce,
414 patternlength - ucol_getOffset(coleiter) + 1, 398 patternlength - ucol_getOffset(coleiter) + 1,
415 status); 399 status);
416 400
417 if (U_FAILURE(*status)) { 401 if (U_FAILURE(*status)) {
418 return 0; 402 return 0;
419 } 403 }
420 404
421 offset += 1; 405 offset += 1;
422 406
423 if (pcetable != temp && pcetable != pattern->PCEBuffer) { 407 if (pcetable != temp && pcetable != pattern->pcesBuffer) {
424 uprv_free(pcetable); 408 uprv_free(pcetable);
425 } 409 }
426 410
427 pcetable = temp; 411 pcetable = temp;
428 //result += (uint16_t)(ucol_getMaxExpansion(coleiter, ce) - 1); 412 //result += (uint16_t)(ucol_getMaxExpansion(coleiter, ce) - 1);
429 } 413 }
430 414
431 pcetable[offset] = 0; 415 pcetable[offset] = 0;
432 pattern->PCE = pcetable; 416 pattern->pces = pcetable;
433 pattern->PCELength = offset; 417 pattern->pcesLength = offset;
434 418
435 return result; 419 return result;
436 } 420 }
437 421
438 /** 422 /**
439 * Initializes the pattern struct. 423 * Initializes the pattern struct.
440 * Internal method, status assumed to be success. 424 * Internal method, status assumed to be success.
441 * @param strsrch UStringSearch data storage 425 * @param strsrch UStringSearch data storage
442 * @param status output error if any, caller to check status before calling 426 * @param status output error if any, caller to check status before calling
443 * method, status assumed to be success when passed in. 427 * method, status assumed to be success when passed in.
444 * @return expansionsize the total expansion size of the pattern 428 * @return expansionsize the total expansion size of the pattern
445 */ 429 */
446 static 430 static
447 inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status) 431 inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status)
448 { 432 {
433 if (U_FAILURE(*status)) { return 0; }
449 UPattern *pattern = &(strsrch->pattern); 434 UPattern *pattern = &(strsrch->pattern);
450 const UChar *patterntext = pattern->text; 435 const UChar *patterntext = pattern->text;
451 int32_t length = pattern->textLength; 436 int32_t length = pattern->textLength;
452 int32_t index = 0; 437 int32_t index = 0;
453 438
454 // Since the strength is primary, accents are ignored in the pattern. 439 // Since the strength is primary, accents are ignored in the pattern.
455 if (strsrch->strength == UCOL_PRIMARY) { 440 if (strsrch->strength == UCOL_PRIMARY) {
456 pattern->hasPrefixAccents = 0; 441 pattern->hasPrefixAccents = 0;
457 pattern->hasSuffixAccents = 0; 442 pattern->hasSuffixAccents = 0;
458 } else { 443 } else {
459 pattern->hasPrefixAccents = getFCD(patterntext, &index, length) >> 444 pattern->hasPrefixAccents = getFCD(patterntext, &index, length) >>
460 SECOND_LAST_BYTE_SHIFT_ ; 445 SECOND_LAST_BYTE_SHIFT_ ;
461 index = length; 446 index = length;
462 U16_BACK_1(patterntext, 0, index); 447 U16_BACK_1(patterntext, 0, index);
463 pattern->hasSuffixAccents = getFCD(patterntext, &index, length) & 448 pattern->hasSuffixAccents = getFCD(patterntext, &index, length) &
464 LAST_BYTE_MASK_ ; 449 LAST_BYTE_MASK_ ;
465 } 450 }
466 451
467 // ** HACK ** 452 // ** HACK **
468 if (strsrch->pattern.PCE != NULL) { 453 if (strsrch->pattern.pces != NULL) {
469 if (strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) { 454 if (strsrch->pattern.pces != strsrch->pattern.pcesBuffer) {
470 uprv_free(strsrch->pattern.PCE); 455 uprv_free(strsrch->pattern.pces);
471 } 456 }
472 457
473 strsrch->pattern.PCE = NULL; 458 strsrch->pattern.pces = NULL;
474 } 459 }
475 460
476 // since intializePattern is an internal method status is a success. 461 // since intializePattern is an internal method status is a success.
477 return initializePatternCETable(strsrch, status); 462 return initializePatternCETable(strsrch, status);
478 } 463 }
479 464
480 /** 465 /**
481 * Initializing shift tables, with the default values. 466 * Initializing shift tables, with the default values.
482 * If a corresponding default value is 0, the shift table is not set. 467 * If a corresponding default value is 0, the shift table is not set.
483 * @param shift table for forwards shift 468 * @param shift table for forwards shift
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
550 * If pattern has no non-ignorable ce, we return a illegal argument error. 535 * If pattern has no non-ignorable ce, we return a illegal argument error.
551 * Internal method, status assumed to be success. 536 * Internal method, status assumed to be success.
552 * @param strsrch UStringSearch data storage 537 * @param strsrch UStringSearch data storage
553 * @param status for output errors if it occurs, status is assumed to be a 538 * @param status for output errors if it occurs, status is assumed to be a
554 * success when it is passed in. 539 * success when it is passed in.
555 */ 540 */
556 static 541 static
557 inline void initialize(UStringSearch *strsrch, UErrorCode *status) 542 inline void initialize(UStringSearch *strsrch, UErrorCode *status)
558 { 543 {
559 int16_t expandlength = initializePattern(strsrch, status); 544 int16_t expandlength = initializePattern(strsrch, status);
560 if (U_SUCCESS(*status) && strsrch->pattern.CELength > 0) { 545 if (U_SUCCESS(*status) && strsrch->pattern.cesLength > 0) {
561 UPattern *pattern = &strsrch->pattern; 546 UPattern *pattern = &strsrch->pattern;
562 int32_t cesize = pattern->CELength; 547 int32_t cesize = pattern->cesLength;
563 548
564 int16_t minlength = cesize > expandlength 549 int16_t minlength = cesize > expandlength
565 ? (int16_t)cesize - expandlength : 1; 550 ? (int16_t)cesize - expandlength : 1;
566 pattern->defaultShiftSize = minlength; 551 pattern->defaultShiftSize = minlength;
567 setShiftTable(pattern->shift, pattern->backShift, pattern->CE, 552 setShiftTable(pattern->shift, pattern->backShift, pattern->ces,
568 cesize, expandlength, minlength, minlength); 553 cesize, expandlength, minlength, minlength);
569 return; 554 return;
570 } 555 }
571 strsrch->pattern.defaultShiftSize = 0; 556 strsrch->pattern.defaultShiftSize = 0;
572 } 557 }
573 558
574 #if BOYER_MOORE 559 #if BOYER_MOORE
575 /** 560 /**
576 * Check to make sure that the match length is at the end of the character by 561 * Check to make sure that the match length is at the end of the character by
577 * using the breakiterator. 562 * using the breakiterator.
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
633 ubrk_following(breakiterator, start - 1) == start) && 618 ubrk_following(breakiterator, start - 1) == start) &&
634 (end == endindex || 619 (end == endindex ||
635 ubrk_following(breakiterator, end - 1) == end); 620 ubrk_following(breakiterator, end - 1) == end);
636 if (result) { 621 if (result) {
637 // iterates the individual ces 622 // iterates the individual ces
638 UCollationElements *coleiter = strsrch->utilIter; 623 UCollationElements *coleiter = strsrch->utilIter;
639 const UChar *text = strsrch->search->text + 624 const UChar *text = strsrch->search->text +
640 start; 625 start;
641 UErrorCode status = U_ZERO_ERROR; 626 UErrorCode status = U_ZERO_ERROR;
642 ucol_setText(coleiter, text, end - start, &status); 627 ucol_setText(coleiter, text, end - start, &status);
643 for (int32_t count = 0; count < strsrch->pattern.CELength; 628 for (int32_t count = 0; count < strsrch->pattern.cesLength;
644 count ++) { 629 count ++) {
645 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status)); 630 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
646 if (ce == UCOL_IGNORABLE) { 631 if (ce == UCOL_IGNORABLE) {
647 count --; 632 count --;
648 continue; 633 continue;
649 } 634 }
650 if (U_FAILURE(status) || ce != strsrch->pattern.CE[count]) { 635 if (U_FAILURE(status) || ce != strsrch->pattern.ces[count]) {
651 return FALSE; 636 return FALSE;
652 } 637 }
653 } 638 }
654 int32_t nextce = ucol_next(coleiter, &status); 639 int32_t nextce = ucol_next(coleiter, &status);
655 while (ucol_getOffset(coleiter) == (end - start) 640 while (ucol_getOffset(coleiter) == (end - start)
656 && getCE(strsrch, nextce) == UCOL_IGNORABLE) { 641 && getCE(strsrch, nextce) == UCOL_IGNORABLE) {
657 nextce = ucol_next(coleiter, &status); 642 nextce = ucol_next(coleiter, &status);
658 } 643 }
659 if (ucol_getOffset(coleiter) == (end - start) 644 if (ucol_getOffset(coleiter) == (end - start)
660 && nextce != UCOL_NULLORDER) { 645 && nextce != UCOL_NULLORDER) {
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
741 inline int32_t shiftForward(UStringSearch *strsrch, 726 inline int32_t shiftForward(UStringSearch *strsrch,
742 int32_t textoffset, 727 int32_t textoffset,
743 int32_t ce, 728 int32_t ce,
744 int32_t patternceindex) 729 int32_t patternceindex)
745 { 730 {
746 UPattern *pattern = &(strsrch->pattern); 731 UPattern *pattern = &(strsrch->pattern);
747 if (ce != UCOL_NULLORDER) { 732 if (ce != UCOL_NULLORDER) {
748 int32_t shift = pattern->shift[hash(ce)]; 733 int32_t shift = pattern->shift[hash(ce)];
749 // this is to adjust for characters in the middle of the 734 // this is to adjust for characters in the middle of the
750 // substring for matching that failed. 735 // substring for matching that failed.
751 int32_t adjust = pattern->CELength - patternceindex; 736 int32_t adjust = pattern->cesLength - patternceindex;
752 if (adjust > 1 && shift >= adjust) { 737 if (adjust > 1 && shift >= adjust) {
753 shift -= adjust - 1; 738 shift -= adjust - 1;
754 } 739 }
755 textoffset += shift; 740 textoffset += shift;
756 } 741 }
757 else { 742 else {
758 textoffset += pattern->defaultShiftSize; 743 textoffset += pattern->defaultShiftSize;
759 } 744 }
760 745
761 textoffset = getNextUStringSearchBaseOffset(strsrch, textoffset); 746 textoffset = getNextUStringSearchBaseOffset(strsrch, textoffset);
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
875 uprv_free(norm); 860 uprv_free(norm);
876 return FALSE; 861 return FALSE;
877 } 862 }
878 } 863 }
879 else { 864 else {
880 norm = buffer; 865 norm = buffer;
881 } 866 }
882 867
883 UCollationElements *coleiter = strsrch->utilIter; 868 UCollationElements *coleiter = strsrch->utilIter;
884 ucol_setText(coleiter, norm, size, status); 869 ucol_setText(coleiter, norm, size, status);
885 uint32_t firstce = strsrch->pattern.CE[0]; 870 uint32_t firstce = strsrch->pattern.ces[0];
886 UBool ignorable = TRUE; 871 UBool ignorable = TRUE;
887 uint32_t ce = UCOL_IGNORABLE; 872 uint32_t ce = UCOL_IGNORABLE;
888 while (U_SUCCESS(*status) && ce != firstce && ce != (uint32_t)UCOL_N ULLORDER) { 873 while (U_SUCCESS(*status) && ce != firstce && ce != (uint32_t)UCOL_N ULLORDER) {
889 offset = ucol_getOffset(coleiter); 874 offset = ucol_getOffset(coleiter);
890 if (ce != firstce && ce != UCOL_IGNORABLE) { 875 if (ce != firstce && ce != UCOL_IGNORABLE) {
891 ignorable = FALSE; 876 ignorable = FALSE;
892 } 877 }
893 ce = ucol_next(coleiter, status); 878 ce = ucol_next(coleiter, status);
894 } 879 }
895 UChar32 codepoint; 880 UChar32 codepoint;
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
928 */ 913 */
929 static 914 static
930 UBool hasAccentsBeforeMatch(const UStringSearch *strsrch, int32_t start, 915 UBool hasAccentsBeforeMatch(const UStringSearch *strsrch, int32_t start,
931 int32_t end) 916 int32_t end)
932 { 917 {
933 if (strsrch->pattern.hasPrefixAccents) { 918 if (strsrch->pattern.hasPrefixAccents) {
934 UCollationElements *coleiter = strsrch->textIter; 919 UCollationElements *coleiter = strsrch->textIter;
935 UErrorCode status = U_ZERO_ERROR; 920 UErrorCode status = U_ZERO_ERROR;
936 // we have been iterating forwards previously 921 // we have been iterating forwards previously
937 uint32_t ignorable = TRUE; 922 uint32_t ignorable = TRUE;
938 int32_t firstce = strsrch->pattern.CE[0]; 923 int32_t firstce = strsrch->pattern.ces[0];
939 924
940 setColEIterOffset(coleiter, start); 925 setColEIterOffset(coleiter, start);
941 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status)); 926 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
942 if (U_FAILURE(status)) { 927 if (U_FAILURE(status)) {
943 return TRUE; 928 return TRUE;
944 } 929 }
945 while (ce != firstce) { 930 while (ce != firstce) {
946 if (ce != UCOL_IGNORABLE) { 931 if (ce != UCOL_IGNORABLE) {
947 ignorable = FALSE; 932 ignorable = FALSE;
948 } 933 }
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
1010 static 995 static
1011 UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start, 996 UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start,
1012 int32_t end) 997 int32_t end)
1013 { 998 {
1014 if (strsrch->pattern.hasSuffixAccents) { 999 if (strsrch->pattern.hasSuffixAccents) {
1015 const UChar *text = strsrch->search->text; 1000 const UChar *text = strsrch->search->text;
1016 int32_t temp = end; 1001 int32_t temp = end;
1017 int32_t textlength = strsrch->search->textLength; 1002 int32_t textlength = strsrch->search->textLength;
1018 U16_BACK_1(text, 0, temp); 1003 U16_BACK_1(text, 0, temp);
1019 if (getFCD(text, &temp, textlength) & LAST_BYTE_MASK_) { 1004 if (getFCD(text, &temp, textlength) & LAST_BYTE_MASK_) {
1020 int32_t firstce = strsrch->pattern.CE[0]; 1005 int32_t firstce = strsrch->pattern.ces[0];
1021 UCollationElements *coleiter = strsrch->textIter; 1006 UCollationElements *coleiter = strsrch->textIter;
1022 UErrorCode status = U_ZERO_ERROR; 1007 UErrorCode status = U_ZERO_ERROR;
1023 int32_t ce; 1008 int32_t ce;
1024 setColEIterOffset(coleiter, start); 1009 setColEIterOffset(coleiter, start);
1025 while ((ce = getCE(strsrch, ucol_next(coleiter, &status))) != firstc e) { 1010 while ((ce = getCE(strsrch, ucol_next(coleiter, &status))) != firstc e) {
1026 if (U_FAILURE(status) || ce == UCOL_NULLORDER) { 1011 if (U_FAILURE(status) || ce == UCOL_NULLORDER) {
1027 return TRUE; 1012 return TRUE;
1028 } 1013 }
1029 } 1014 }
1030 int32_t count = 1; 1015 int32_t count = 1;
1031 while (count < strsrch->pattern.CELength) { 1016 while (count < strsrch->pattern.cesLength) {
1032 if (getCE(strsrch, ucol_next(coleiter, &status)) 1017 if (getCE(strsrch, ucol_next(coleiter, &status))
1033 == UCOL_IGNORABLE) { 1018 == UCOL_IGNORABLE) {
1034 // Thai can give an ignorable here. 1019 // Thai can give an ignorable here.
1035 count --; 1020 count --;
1036 } 1021 }
1037 if (U_FAILURE(status)) { 1022 if (U_FAILURE(status)) {
1038 return TRUE; 1023 return TRUE;
1039 } 1024 }
1040 count ++; 1025 count ++;
1041 } 1026 }
(...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after
1205 if (U_FAILURE(*status)) { 1190 if (U_FAILURE(*status)) {
1206 return FALSE; 1191 return FALSE;
1207 } 1192 }
1208 if (ucol_getOffset(coleiter) != temp) { 1193 if (ucol_getOffset(coleiter) != temp) {
1209 *start = temp; 1194 *start = temp;
1210 temp = ucol_getOffset(coleiter); 1195 temp = ucol_getOffset(coleiter);
1211 } 1196 }
1212 expansion --; 1197 expansion --;
1213 } 1198 }
1214 1199
1215 int32_t *patternce = strsrch->pattern.CE; 1200 int32_t *patternce = strsrch->pattern.ces;
1216 int32_t patterncelength = strsrch->pattern.CELength; 1201 int32_t patterncelength = strsrch->pattern.cesLength;
1217 int32_t count = 0; 1202 int32_t count = 0;
1218 while (count < patterncelength) { 1203 while (count < patterncelength) {
1219 int32_t ce = getCE(strsrch, ucol_next(coleiter, status)); 1204 int32_t ce = getCE(strsrch, ucol_next(coleiter, status));
1220 if (ce == UCOL_IGNORABLE) { 1205 if (ce == UCOL_IGNORABLE) {
1221 continue; 1206 continue;
1222 } 1207 }
1223 if (expandflag && count == 0 && ucol_getOffset(coleiter) != temp) { 1208 if (expandflag && count == 0 && ucol_getOffset(coleiter) != temp) {
1224 *start = temp; 1209 *start = temp;
1225 temp = ucol_getOffset(coleiter); 1210 temp = ucol_getOffset(coleiter);
1226 } 1211 }
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after
1408 * Running through a collation element iterator to see if the contents matches 1393 * Running through a collation element iterator to see if the contents matches
1409 * pattern in string search data 1394 * pattern in string search data
1410 * @param strsrch string search data 1395 * @param strsrch string search data
1411 * @param coleiter collation element iterator 1396 * @param coleiter collation element iterator
1412 * @return TRUE if a match if found, FALSE otherwise 1397 * @return TRUE if a match if found, FALSE otherwise
1413 */ 1398 */
1414 static 1399 static
1415 inline UBool checkCollationMatch(const UStringSearch *strsrch, 1400 inline UBool checkCollationMatch(const UStringSearch *strsrch,
1416 UCollationElements *coleiter) 1401 UCollationElements *coleiter)
1417 { 1402 {
1418 int patternceindex = strsrch->pattern.CELength; 1403 int patternceindex = strsrch->pattern.cesLength;
1419 int32_t *patternce = strsrch->pattern.CE; 1404 int32_t *patternce = strsrch->pattern.ces;
1420 UErrorCode status = U_ZERO_ERROR; 1405 UErrorCode status = U_ZERO_ERROR;
1421 while (patternceindex > 0) { 1406 while (patternceindex > 0) {
1422 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status)); 1407 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
1423 if (ce == UCOL_IGNORABLE) { 1408 if (ce == UCOL_IGNORABLE) {
1424 continue; 1409 continue;
1425 } 1410 }
1426 if (U_FAILURE(status) || ce != *patternce) { 1411 if (U_FAILURE(status) || ce != *patternce) {
1427 return FALSE; 1412 return FALSE;
1428 } 1413 }
1429 patternce ++; 1414 patternce ++;
(...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after
1608 } 1593 }
1609 else { 1594 else {
1610 safetextlength = u_strlen(strsrch->canonicalSuffixAccents); 1595 safetextlength = u_strlen(strsrch->canonicalSuffixAccents);
1611 safetext = strsrch->canonicalSuffixAccents; 1596 safetext = strsrch->canonicalSuffixAccents;
1612 } 1597 }
1613 1598
1614 // if status is a failure, ucol_setText does nothing 1599 // if status is a failure, ucol_setText does nothing
1615 ucol_setText(coleiter, safetext, safetextlength, status); 1600 ucol_setText(coleiter, safetext, safetextlength, status);
1616 // status checked in loop below 1601 // status checked in loop below
1617 1602
1618 int32_t *ce = strsrch->pattern.CE; 1603 int32_t *ce = strsrch->pattern.ces;
1619 int32_t celength = strsrch->pattern.CELength; 1604 int32_t celength = strsrch->pattern.cesLength;
1620 int ceindex = celength - 1; 1605 int ceindex = celength - 1;
1621 UBool isSafe = TRUE; // indication flag for position in safe zone 1606 UBool isSafe = TRUE; // indication flag for position in safe zone
1622 1607
1623 while (ceindex >= 0) { 1608 while (ceindex >= 0) {
1624 int32_t textce = ucol_previous(coleiter, status); 1609 int32_t textce = ucol_previous(coleiter, status);
1625 if (U_FAILURE(*status)) { 1610 if (U_FAILURE(*status)) {
1626 if (isSafe) { 1611 if (isSafe) {
1627 cleanUpSafeText(strsrch, safetext, safebuffer); 1612 cleanUpSafeText(strsrch, safetext, safebuffer);
1628 } 1613 }
1629 return USEARCH_DONE; 1614 return USEARCH_DONE;
(...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after
1848 if (U_FAILURE(*status)) { 1833 if (U_FAILURE(*status)) {
1849 return FALSE; 1834 return FALSE;
1850 } 1835 }
1851 if (ucol_getOffset(coleiter) != temp) { 1836 if (ucol_getOffset(coleiter) != temp) {
1852 *start = temp; 1837 *start = temp;
1853 temp = ucol_getOffset(coleiter); 1838 temp = ucol_getOffset(coleiter);
1854 } 1839 }
1855 expansion --; 1840 expansion --;
1856 } 1841 }
1857 1842
1858 int32_t *patternce = strsrch->pattern.CE; 1843 int32_t *patternce = strsrch->pattern.ces;
1859 int32_t patterncelength = strsrch->pattern.CELength; 1844 int32_t patterncelength = strsrch->pattern.cesLength;
1860 int32_t count = 0; 1845 int32_t count = 0;
1861 int32_t textlength = strsrch->search->textLength; 1846 int32_t textlength = strsrch->search->textLength;
1862 while (count < patterncelength) { 1847 while (count < patterncelength) {
1863 int32_t ce = getCE(strsrch, ucol_next(coleiter, status)); 1848 int32_t ce = getCE(strsrch, ucol_next(coleiter, status));
1864 // status checked below, note that if status is a failure 1849 // status checked below, note that if status is a failure
1865 // ucol_next returns UCOL_NULLORDER 1850 // ucol_next returns UCOL_NULLORDER
1866 if (ce == UCOL_IGNORABLE) { 1851 if (ce == UCOL_IGNORABLE) {
1867 continue; 1852 continue;
1868 } 1853 }
1869 if (expandflag && count == 0 && ucol_getOffset(coleiter) != temp) { 1854 if (expandflag && count == 0 && ucol_getOffset(coleiter) != temp) {
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after
2046 if (U_FAILURE(*status)) { 2031 if (U_FAILURE(*status)) {
2047 return FALSE; 2032 return FALSE;
2048 } 2033 }
2049 if (ucol_getOffset(coleiter) != temp) { 2034 if (ucol_getOffset(coleiter) != temp) {
2050 *end = temp; 2035 *end = temp;
2051 temp = ucol_getOffset(coleiter); 2036 temp = ucol_getOffset(coleiter);
2052 } 2037 }
2053 expansion --; 2038 expansion --;
2054 } 2039 }
2055 2040
2056 int32_t *patternce = strsrch->pattern.CE; 2041 int32_t *patternce = strsrch->pattern.ces;
2057 int32_t patterncelength = strsrch->pattern.CELength; 2042 int32_t patterncelength = strsrch->pattern.cesLength;
2058 int32_t count = patterncelength; 2043 int32_t count = patterncelength;
2059 while (count > 0) { 2044 while (count > 0) {
2060 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status)); 2045 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
2061 // status checked below, note that if status is a failure 2046 // status checked below, note that if status is a failure
2062 // ucol_previous returns UCOL_NULLORDER 2047 // ucol_previous returns UCOL_NULLORDER
2063 if (ce == UCOL_IGNORABLE) { 2048 if (ce == UCOL_IGNORABLE) {
2064 continue; 2049 continue;
2065 } 2050 }
2066 if (expandflag && count == 0 && 2051 if (expandflag && count == 0 &&
2067 getColElemIterOffset(coleiter, FALSE) != temp) { 2052 getColElemIterOffset(coleiter, FALSE) != temp) {
(...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after
2271 else { 2256 else {
2272 safetextlength = u_strlen(strsrch->canonicalPrefixAccents); 2257 safetextlength = u_strlen(strsrch->canonicalPrefixAccents);
2273 safetext = strsrch->canonicalPrefixAccents; 2258 safetext = strsrch->canonicalPrefixAccents;
2274 } 2259 }
2275 2260
2276 UCollationElements *coleiter = strsrch->utilIter; 2261 UCollationElements *coleiter = strsrch->utilIter;
2277 // if status is a failure, ucol_setText does nothing 2262 // if status is a failure, ucol_setText does nothing
2278 ucol_setText(coleiter, safetext, safetextlength, status); 2263 ucol_setText(coleiter, safetext, safetextlength, status);
2279 // status checked in loop below 2264 // status checked in loop below
2280 2265
2281 int32_t *ce = strsrch->pattern.CE; 2266 int32_t *ce = strsrch->pattern.ces;
2282 int32_t celength = strsrch->pattern.CELength; 2267 int32_t celength = strsrch->pattern.cesLength;
2283 int ceindex = 0; 2268 int ceindex = 0;
2284 UBool isSafe = TRUE; // safe zone indication flag for position 2269 UBool isSafe = TRUE; // safe zone indication flag for position
2285 int32_t prefixlength = u_strlen(strsrch->canonicalPrefixAccents); 2270 int32_t prefixlength = u_strlen(strsrch->canonicalPrefixAccents);
2286 2271
2287 while (ceindex < celength) { 2272 while (ceindex < celength) {
2288 int32_t textce = ucol_next(coleiter, status); 2273 int32_t textce = ucol_next(coleiter, status);
2289 if (U_FAILURE(*status)) { 2274 if (U_FAILURE(*status)) {
2290 if (isSafe) { 2275 if (isSafe) {
2291 cleanUpSafeText(strsrch, safetext, safebuffer); 2276 cleanUpSafeText(strsrch, safetext, safebuffer);
2292 } 2277 }
(...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after
2486 if (U_FAILURE(*status)) { 2471 if (U_FAILURE(*status)) {
2487 return FALSE; 2472 return FALSE;
2488 } 2473 }
2489 if (ucol_getOffset(coleiter) != temp) { 2474 if (ucol_getOffset(coleiter) != temp) {
2490 *end = temp; 2475 *end = temp;
2491 temp = ucol_getOffset(coleiter); 2476 temp = ucol_getOffset(coleiter);
2492 } 2477 }
2493 expansion --; 2478 expansion --;
2494 } 2479 }
2495 2480
2496 int32_t *patternce = strsrch->pattern.CE; 2481 int32_t *patternce = strsrch->pattern.ces;
2497 int32_t patterncelength = strsrch->pattern.CELength; 2482 int32_t patterncelength = strsrch->pattern.cesLength;
2498 int32_t count = patterncelength; 2483 int32_t count = patterncelength;
2499 while (count > 0) { 2484 while (count > 0) {
2500 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status)); 2485 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
2501 // status checked below, note that if status is a failure 2486 // status checked below, note that if status is a failure
2502 // ucol_previous returns UCOL_NULLORDER 2487 // ucol_previous returns UCOL_NULLORDER
2503 if (ce == UCOL_IGNORABLE) { 2488 if (ce == UCOL_IGNORABLE) {
2504 continue; 2489 continue;
2505 } 2490 }
2506 if (expandflag && count == 0 && 2491 if (expandflag && count == 0 &&
2507 getColElemIterOffset(coleiter, FALSE) != temp) { 2492 getColElemIterOffset(coleiter, FALSE) != temp) {
(...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after
2693 } 2678 }
2694 2679
2695 result->collator = collator; 2680 result->collator = collator;
2696 result->strength = ucol_getStrength(collator); 2681 result->strength = ucol_getStrength(collator);
2697 result->ceMask = getMask(result->strength); 2682 result->ceMask = getMask(result->strength);
2698 result->toShift = 2683 result->toShift =
2699 ucol_getAttribute(collator, UCOL_ALTERNATE_HANDLING, status) == 2684 ucol_getAttribute(collator, UCOL_ALTERNATE_HANDLING, status) ==
2700 UCOL_SHIFTED; 2685 UCOL_SHIFTED;
2701 result->variableTop = ucol_getVariableTop(collator, status); 2686 result->variableTop = ucol_getVariableTop(collator, status);
2702 2687
2703 result->nfd = Normalizer2Factory::getNFDInstance(*status); 2688 result->nfd = Normalizer2::getNFDInstance(*status);
2704 2689
2705 if (U_FAILURE(*status)) { 2690 if (U_FAILURE(*status)) {
2706 uprv_free(result); 2691 uprv_free(result);
2707 return NULL; 2692 return NULL;
2708 } 2693 }
2709 2694
2710 result->search = (USearch *)uprv_malloc(sizeof(USearch)); 2695 result->search = (USearch *)uprv_malloc(sizeof(USearch));
2711 if (result->search == NULL) { 2696 if (result->search == NULL) {
2712 *status = U_MEMORY_ALLOCATION_ERROR; 2697 *status = U_MEMORY_ALLOCATION_ERROR;
2713 uprv_free(result); 2698 uprv_free(result);
2714 return NULL; 2699 return NULL;
2715 } 2700 }
2716 2701
2717 result->search->text = text; 2702 result->search->text = text;
2718 result->search->textLength = textlength; 2703 result->search->textLength = textlength;
2719 2704
2720 result->pattern.text = pattern; 2705 result->pattern.text = pattern;
2721 result->pattern.textLength = patternlength; 2706 result->pattern.textLength = patternlength;
2722 result->pattern.CE = NULL; 2707 result->pattern.ces = NULL;
2723 result->pattern.PCE = NULL; 2708 result->pattern.pces = NULL;
2724 2709
2725 result->search->breakIter = breakiter; 2710 result->search->breakIter = breakiter;
2726 #if !UCONFIG_NO_BREAK_ITERATION 2711 #if !UCONFIG_NO_BREAK_ITERATION
2727 result->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLo caleByType(result->collator, ULOC_VALID_LOCALE, status), text, textlength, statu s); 2712 result->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLo caleByType(result->collator, ULOC_VALID_LOCALE, status), text, textlength, statu s);
2728 if (breakiter) { 2713 if (breakiter) {
2729 ubrk_setText(breakiter, text, textlength, status); 2714 ubrk_setText(breakiter, text, textlength, status);
2730 } 2715 }
2731 #endif 2716 #endif
2732 2717
2733 result->ownCollator = FALSE; 2718 result->ownCollator = FALSE;
2734 result->search->matchedLength = 0; 2719 result->search->matchedLength = 0;
2735 result->search->matchedIndex = USEARCH_DONE; 2720 result->search->matchedIndex = USEARCH_DONE;
2736 result->utilIter = NULL; 2721 result->utilIter = NULL;
2737 result->textIter = ucol_openElements(collator, text, 2722 result->textIter = ucol_openElements(collator, text,
2738 textlength, status); 2723 textlength, status);
2724 result->textProcessedIter = NULL;
2739 if (U_FAILURE(*status)) { 2725 if (U_FAILURE(*status)) {
2740 usearch_close(result); 2726 usearch_close(result);
2741 return NULL; 2727 return NULL;
2742 } 2728 }
2743 2729
2744 result->search->isOverlap = FALSE; 2730 result->search->isOverlap = FALSE;
2745 result->search->isCanonicalMatch = FALSE; 2731 result->search->isCanonicalMatch = FALSE;
2746 result->search->elementComparisonType = 0; 2732 result->search->elementComparisonType = 0;
2747 result->search->isForwardSearching = TRUE; 2733 result->search->isForwardSearching = TRUE;
2748 result->search->reset = TRUE; 2734 result->search->reset = TRUE;
2749 2735
2750 initialize(result, status); 2736 initialize(result, status);
2751 2737
2752 if (U_FAILURE(*status)) { 2738 if (U_FAILURE(*status)) {
2753 usearch_close(result); 2739 usearch_close(result);
2754 return NULL; 2740 return NULL;
2755 } 2741 }
2756 2742
2757 return result; 2743 return result;
2758 } 2744 }
2759 return NULL; 2745 return NULL;
2760 } 2746 }
2761 2747
2762 U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch) 2748 U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch)
2763 { 2749 {
2764 if (strsrch) { 2750 if (strsrch) {
2765 if (strsrch->pattern.CE != strsrch->pattern.CEBuffer && 2751 if (strsrch->pattern.ces != strsrch->pattern.cesBuffer &&
2766 strsrch->pattern.CE) { 2752 strsrch->pattern.ces) {
2767 uprv_free(strsrch->pattern.CE); 2753 uprv_free(strsrch->pattern.ces);
2768 } 2754 }
2769 2755
2770 if (strsrch->pattern.PCE != NULL && 2756 if (strsrch->pattern.pces != NULL &&
2771 strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) { 2757 strsrch->pattern.pces != strsrch->pattern.pcesBuffer) {
2772 uprv_free(strsrch->pattern.PCE); 2758 uprv_free(strsrch->pattern.pces);
2773 } 2759 }
2774 2760
2761 delete strsrch->textProcessedIter;
2775 ucol_closeElements(strsrch->textIter); 2762 ucol_closeElements(strsrch->textIter);
2776 ucol_closeElements(strsrch->utilIter); 2763 ucol_closeElements(strsrch->utilIter);
2777 2764
2778 if (strsrch->ownCollator && strsrch->collator) { 2765 if (strsrch->ownCollator && strsrch->collator) {
2779 ucol_close((UCollator *)strsrch->collator); 2766 ucol_close((UCollator *)strsrch->collator);
2780 } 2767 }
2781 2768
2782 #if !UCONFIG_NO_BREAK_ITERATION 2769 #if !UCONFIG_NO_BREAK_ITERATION
2783 if (strsrch->search->internalBreakIter) { 2770 if (strsrch->search->internalBreakIter) {
2784 ubrk_close(strsrch->search->internalBreakIter); 2771 ubrk_close(strsrch->search->internalBreakIter);
2785 } 2772 }
2786 #endif 2773 #endif
2787 2774
2788 uprv_free(strsrch->search); 2775 uprv_free(strsrch->search);
2789 uprv_free(strsrch); 2776 uprv_free(strsrch);
2790 } 2777 }
2791 } 2778 }
2792 2779
2780 namespace {
2781
2782 UBool initTextProcessedIter(UStringSearch *strsrch, UErrorCode *status) {
2783 if (U_FAILURE(*status)) { return FALSE; }
2784 if (strsrch->textProcessedIter == NULL) {
2785 strsrch->textProcessedIter = new icu::UCollationPCE(strsrch->textIter);
2786 if (strsrch->textProcessedIter == NULL) {
2787 *status = U_MEMORY_ALLOCATION_ERROR;
2788 return FALSE;
2789 }
2790 } else {
2791 strsrch->textProcessedIter->init(strsrch->textIter);
2792 }
2793 return TRUE;
2794 }
2795
2796 }
2797
2793 // set and get methods -------------------------------------------------- 2798 // set and get methods --------------------------------------------------
2794 2799
2795 U_CAPI void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch, 2800 U_CAPI void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch,
2796 int32_t position, 2801 int32_t position,
2797 UErrorCode *status) 2802 UErrorCode *status)
2798 { 2803 {
2799 if (U_SUCCESS(*status) && strsrch) { 2804 if (U_SUCCESS(*status) && strsrch) {
2800 if (isOutOfBounds(strsrch->search->textLength, position)) { 2805 if (isOutOfBounds(strsrch->search->textLength, position)) {
2801 *status = U_INDEX_OUTOFBOUNDS_ERROR; 2806 *status = U_INDEX_OUTOFBOUNDS_ERROR;
2802 } 2807 }
(...skipping 200 matching lines...) Expand 10 before | Expand all | Expand 10 after
3003 const UCollator *collator, 3008 const UCollator *collator,
3004 UErrorCode *status) 3009 UErrorCode *status)
3005 { 3010 {
3006 if (U_SUCCESS(*status)) { 3011 if (U_SUCCESS(*status)) {
3007 if (collator == NULL) { 3012 if (collator == NULL) {
3008 *status = U_ILLEGAL_ARGUMENT_ERROR; 3013 *status = U_ILLEGAL_ARGUMENT_ERROR;
3009 return; 3014 return;
3010 } 3015 }
3011 3016
3012 if (strsrch) { 3017 if (strsrch) {
3018 delete strsrch->textProcessedIter;
3019 strsrch->textProcessedIter = NULL;
3020 ucol_closeElements(strsrch->textIter);
3021 ucol_closeElements(strsrch->utilIter);
3022 strsrch->textIter = strsrch->utilIter = NULL;
3013 if (strsrch->ownCollator && (strsrch->collator != collator)) { 3023 if (strsrch->ownCollator && (strsrch->collator != collator)) {
3014 ucol_close((UCollator *)strsrch->collator); 3024 ucol_close((UCollator *)strsrch->collator);
3015 strsrch->ownCollator = FALSE; 3025 strsrch->ownCollator = FALSE;
3016 } 3026 }
3017 strsrch->collator = collator; 3027 strsrch->collator = collator;
3018 strsrch->strength = ucol_getStrength(collator); 3028 strsrch->strength = ucol_getStrength(collator);
3019 strsrch->ceMask = getMask(strsrch->strength); 3029 strsrch->ceMask = getMask(strsrch->strength);
3020 #if !UCONFIG_NO_BREAK_ITERATION 3030 #if !UCONFIG_NO_BREAK_ITERATION
3021 ubrk_close(strsrch->search->internalBreakIter); 3031 ubrk_close(strsrch->search->internalBreakIter);
3022 strsrch->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_ getLocaleByType(collator, ULOC_VALID_LOCALE, status), 3032 strsrch->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_ getLocaleByType(collator, ULOC_VALID_LOCALE, status),
3023 strsrch->search->text, strs rch->search->textLength, status); 3033 strsrch->search->text, strs rch->search->textLength, status);
3024 #endif 3034 #endif
3025 // if status is a failure, ucol_getAttribute returns UCOL_DEFAULT 3035 // if status is a failure, ucol_getAttribute returns UCOL_DEFAULT
3026 strsrch->toShift = 3036 strsrch->toShift =
3027 ucol_getAttribute(collator, UCOL_ALTERNATE_HANDLING, status) == 3037 ucol_getAttribute(collator, UCOL_ALTERNATE_HANDLING, status) ==
3028 UCOL_SHIFTED; 3038 UCOL_SHIFTED;
3029 // if status is a failure, ucol_getVariableTop returns 0 3039 // if status is a failure, ucol_getVariableTop returns 0
3030 strsrch->variableTop = ucol_getVariableTop(collator, status); 3040 strsrch->variableTop = ucol_getVariableTop(collator, status);
3031 if (U_SUCCESS(*status)) { 3041 strsrch->textIter = ucol_openElements(collator,
3032 initialize(strsrch, status); 3042 strsrch->search->text,
3033 if (U_SUCCESS(*status)) { 3043 strsrch->search->textLength,
3034 /* free offset buffer to avoid memory leak before initializi ng. */ 3044 status);
3035 ucol_freeOffsetBuffer(&(strsrch->textIter->iteratordata_)); 3045 strsrch->utilIter = ucol_openElements(
3036 uprv_init_collIterate(collator, strsrch->search->text, 3046 collator, strsrch->pattern.text, strsrch->pattern.textLength , status);
3037 strsrch->search->textLength, 3047 // initialize() _after_ setting the iterators for the new collator.
3038 &(strsrch->textIter->iteratordata_), 3048 initialize(strsrch, status);
3039 status);
3040 strsrch->utilIter->iteratordata_.coll = collator;
3041 }
3042 }
3043 } 3049 }
3044 3050
3045 // **** are these calls needed? 3051 // **** are these calls needed?
3046 // **** we call uprv_init_pce in initializePatternPCETable 3052 // **** we call uprv_init_pce in initializePatternPCETable
3047 // **** and the CEBuffer constructor... 3053 // **** and the CEIBuffer constructor...
3048 #if 0 3054 #if 0
3049 uprv_init_pce(strsrch->textIter); 3055 uprv_init_pce(strsrch->textIter);
3050 uprv_init_pce(strsrch->utilIter); 3056 uprv_init_pce(strsrch->utilIter);
3051 #endif 3057 #endif
3052 } 3058 }
3053 } 3059 }
3054 3060
3055 U_CAPI UCollator * U_EXPORT2 usearch_getCollator(const UStringSearch *strsrch) 3061 U_CAPI UCollator * U_EXPORT2 usearch_getCollator(const UStringSearch *strsrch)
3056 { 3062 {
3057 if (strsrch) { 3063 if (strsrch) {
(...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after
3215 // match is not found. 3221 // match is not found.
3216 search->isForwardSearching = TRUE; 3222 search->isForwardSearching = TRUE;
3217 if (search->matchedIndex != USEARCH_DONE) { 3223 if (search->matchedIndex != USEARCH_DONE) {
3218 // there's no need to set the collation element iterator 3224 // there's no need to set the collation element iterator
3219 // the next call to next will set the offset. 3225 // the next call to next will set the offset.
3220 return search->matchedIndex; 3226 return search->matchedIndex;
3221 } 3227 }
3222 } 3228 }
3223 3229
3224 if (U_SUCCESS(*status)) { 3230 if (U_SUCCESS(*status)) {
3225 if (strsrch->pattern.CELength == 0) { 3231 if (strsrch->pattern.cesLength == 0) {
3226 if (search->matchedIndex == USEARCH_DONE) { 3232 if (search->matchedIndex == USEARCH_DONE) {
3227 search->matchedIndex = offset; 3233 search->matchedIndex = offset;
3228 } 3234 }
3229 else { // moves by codepoints 3235 else { // moves by codepoints
3230 U16_FWD_1(search->text, search->matchedIndex, textlength); 3236 U16_FWD_1(search->text, search->matchedIndex, textlength);
3231 } 3237 }
3232 3238
3233 search->matchedLength = 0; 3239 search->matchedLength = 0;
3234 setColEIterOffset(strsrch->textIter, search->matchedIndex); 3240 setColEIterOffset(strsrch->textIter, search->matchedIndex);
3235 // status checked below 3241 // status checked below
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
3326 // Could check pattern length, but the 3332 // Could check pattern length, but the
3327 // linear search will do the right thing 3333 // linear search will do the right thing
3328 if (offset == 0 || matchedindex == 0) { 3334 if (offset == 0 || matchedindex == 0) {
3329 setMatchNotFound(strsrch); 3335 setMatchNotFound(strsrch);
3330 return USEARCH_DONE; 3336 return USEARCH_DONE;
3331 } 3337 }
3332 #endif 3338 #endif
3333 } 3339 }
3334 3340
3335 if (U_SUCCESS(*status)) { 3341 if (U_SUCCESS(*status)) {
3336 if (strsrch->pattern.CELength == 0) { 3342 if (strsrch->pattern.cesLength == 0) {
3337 search->matchedIndex = 3343 search->matchedIndex =
3338 (matchedindex == USEARCH_DONE ? offset : matchedindex); 3344 (matchedindex == USEARCH_DONE ? offset : matchedindex);
3339 if (search->matchedIndex == 0) { 3345 if (search->matchedIndex == 0) {
3340 setMatchNotFound(strsrch); 3346 setMatchNotFound(strsrch);
3341 // status checked below 3347 // status checked below
3342 } 3348 }
3343 else { // move by codepoints 3349 else { // move by codepoints
3344 U16_BACK_1(search->text, 0, search->matchedIndex); 3350 U16_BACK_1(search->text, 0, search->matchedIndex);
3345 setColEIterOffset(strsrch->textIter, search->matchedIndex); 3351 setColEIterOffset(strsrch->textIter, search->matchedIndex);
3346 // status checked below 3352 // status checked below
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
3409 3415
3410 // if status is a failure, ucol_getVariableTop returns 0 3416 // if status is a failure, ucol_getVariableTop returns 0
3411 varTop = ucol_getVariableTop(strsrch->collator, &status); 3417 varTop = ucol_getVariableTop(strsrch->collator, &status);
3412 if (strsrch->variableTop != varTop) { 3418 if (strsrch->variableTop != varTop) {
3413 strsrch->variableTop = varTop; 3419 strsrch->variableTop = varTop;
3414 sameCollAttribute = FALSE; 3420 sameCollAttribute = FALSE;
3415 } 3421 }
3416 if (!sameCollAttribute) { 3422 if (!sameCollAttribute) {
3417 initialize(strsrch, &status); 3423 initialize(strsrch, &status);
3418 } 3424 }
3419 /* free offset buffer to avoid memory leak before initializing. */ 3425 ucol_setText(strsrch->textIter, strsrch->search->text,
3420 ucol_freeOffsetBuffer(&(strsrch->textIter->iteratordata_));
3421 uprv_init_collIterate(strsrch->collator, strsrch->search->text,
3422 strsrch->search->textLength, 3426 strsrch->search->textLength,
3423 &(strsrch->textIter->iteratordata_),
3424 &status); 3427 &status);
3425 strsrch->search->matchedLength = 0; 3428 strsrch->search->matchedLength = 0;
3426 strsrch->search->matchedIndex = USEARCH_DONE; 3429 strsrch->search->matchedIndex = USEARCH_DONE;
3427 strsrch->search->isOverlap = FALSE; 3430 strsrch->search->isOverlap = FALSE;
3428 strsrch->search->isCanonicalMatch = FALSE; 3431 strsrch->search->isCanonicalMatch = FALSE;
3429 strsrch->search->elementComparisonType = 0; 3432 strsrch->search->elementComparisonType = 0;
3430 strsrch->search->isForwardSearching = TRUE; 3433 strsrch->search->isForwardSearching = TRUE;
3431 strsrch->search->reset = TRUE; 3434 strsrch->search->reset = TRUE;
3432 } 3435 }
3433 } 3436 }
3434 3437
3435 // 3438 //
3436 // CEI Collation Element + source text index. 3439 // CEI Collation Element + source text index.
3437 // These structs are kept in the circular buffer. 3440 // These structs are kept in the circular buffer.
3438 // 3441 //
3439 struct CEI { 3442 struct CEI {
3440 int64_t ce; 3443 int64_t ce;
3441 int32_t lowIndex; 3444 int32_t lowIndex;
3442 int32_t highIndex; 3445 int32_t highIndex;
3443 }; 3446 };
3444 3447
3445 U_NAMESPACE_BEGIN 3448 U_NAMESPACE_BEGIN
3446 3449
3447 3450 namespace {
3448 // 3451 //
3449 // CEBuffer A circular buffer of CEs from the text being searched. 3452 // CEIBuffer A circular buffer of CEs-with-index from the text being searched .
3450 // 3453 //
3451 #define DEFAULT_CEBUFFER_SIZE 96 3454 #define DEFAULT_CEBUFFER_SIZE 96
3452 #define CEBUFFER_EXTRA 32 3455 #define CEBUFFER_EXTRA 32
3453 // Some typical max values to make buffer size more reasonable for asymmetric se arch. 3456 // Some typical max values to make buffer size more reasonable for asymmetric se arch.
3454 // #8694 is for a better long-term solution to allocation of this buffer. 3457 // #8694 is for a better long-term solution to allocation of this buffer.
3455 #define MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L 8 3458 #define MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L 8
3456 #define MAX_TARGET_IGNORABLES_PER_PAT_OTHER 3 3459 #define MAX_TARGET_IGNORABLES_PER_PAT_OTHER 3
3457 #define MIGHT_BE_JAMO_L(c) ((c >= 0x1100 && c <= 0x115E) || (c >= 0x3131 && c <= 0x314E) || (c >= 0x3165 && c <= 0x3186)) 3460 #define MIGHT_BE_JAMO_L(c) ((c >= 0x1100 && c <= 0x115E) || (c >= 0x3131 && c <= 0x314E) || (c >= 0x3165 && c <= 0x3186))
3458 struct CEBuffer { 3461 struct CEIBuffer {
3459 CEI defBuf[DEFAULT_CEBUFFER_SIZE]; 3462 CEI defBuf[DEFAULT_CEBUFFER_SIZE];
3460 CEI *buf; 3463 CEI *buf;
3461 int32_t bufSize; 3464 int32_t bufSize;
3462 int32_t firstIx; 3465 int32_t firstIx;
3463 int32_t limitIx; 3466 int32_t limitIx;
3464 UCollationElements *ceIter; 3467 UCollationElements *ceIter;
3465 UStringSearch *strSearch; 3468 UStringSearch *strSearch;
3466 3469
3467 3470
3468 3471
3469 CEBuffer(UStringSearch *ss, UErrorCode *status); 3472 CEIBuffer(UStringSearch *ss, UErrorCode *status);
3470 ~CEBuffer(); 3473 ~CEIBuffer();
3471 const CEI *get(int32_t index); 3474 const CEI *get(int32_t index);
3472 const CEI *getPrevious(int32_t index); 3475 const CEI *getPrevious(int32_t index);
3473 }; 3476 };
3474 3477
3475 3478
3476 CEBuffer::CEBuffer(UStringSearch *ss, UErrorCode *status) { 3479 CEIBuffer::CEIBuffer(UStringSearch *ss, UErrorCode *status) {
3477 buf = defBuf; 3480 buf = defBuf;
3478 strSearch = ss; 3481 strSearch = ss;
3479 bufSize = ss->pattern.PCELength + CEBUFFER_EXTRA; 3482 bufSize = ss->pattern.pcesLength + CEBUFFER_EXTRA;
3480 if (ss->search->elementComparisonType != 0) { 3483 if (ss->search->elementComparisonType != 0) {
3481 const UChar * patText = ss->pattern.text; 3484 const UChar * patText = ss->pattern.text;
3482 if (patText) { 3485 if (patText) {
3483 const UChar * patTextLimit = patText + ss->pattern.textLength; 3486 const UChar * patTextLimit = patText + ss->pattern.textLength;
3484 while ( patText < patTextLimit ) { 3487 while ( patText < patTextLimit ) {
3485 UChar c = *patText++; 3488 UChar c = *patText++;
3486 if (MIGHT_BE_JAMO_L(c)) { 3489 if (MIGHT_BE_JAMO_L(c)) {
3487 bufSize += MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L; 3490 bufSize += MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L;
3488 } else { 3491 } else {
3489 // No check for surrogates, we might allocate slightly more buffer than necessary. 3492 // No check for surrogates, we might allocate slightly more buffer than necessary.
3490 bufSize += MAX_TARGET_IGNORABLES_PER_PAT_OTHER; 3493 bufSize += MAX_TARGET_IGNORABLES_PER_PAT_OTHER;
3491 } 3494 }
3492 } 3495 }
3493 } 3496 }
3494 } 3497 }
3495 ceIter = ss->textIter; 3498 ceIter = ss->textIter;
3496 firstIx = 0; 3499 firstIx = 0;
3497 limitIx = 0; 3500 limitIx = 0;
3498 3501
3499 uprv_init_pce(ceIter); 3502 if (!initTextProcessedIter(ss, status)) { return; }
3500 3503
3501 if (bufSize>DEFAULT_CEBUFFER_SIZE) { 3504 if (bufSize>DEFAULT_CEBUFFER_SIZE) {
3502 buf = (CEI *)uprv_malloc(bufSize * sizeof(CEI)); 3505 buf = (CEI *)uprv_malloc(bufSize * sizeof(CEI));
3503 if (buf == NULL) { 3506 if (buf == NULL) {
3504 *status = U_MEMORY_ALLOCATION_ERROR; 3507 *status = U_MEMORY_ALLOCATION_ERROR;
3505 } 3508 }
3506 } 3509 }
3507 } 3510 }
3508 3511
3509 // TODO: add a reset or init function so that allocated 3512 // TODO: add a reset or init function so that allocated
3510 // buffers can be retained & reused. 3513 // buffers can be retained & reused.
3511 3514
3512 CEBuffer::~CEBuffer() { 3515 CEIBuffer::~CEIBuffer() {
3513 if (buf != defBuf) { 3516 if (buf != defBuf) {
3514 uprv_free(buf); 3517 uprv_free(buf);
3515 } 3518 }
3516 } 3519 }
3517 3520
3518 3521
3519 // Get the CE with the specified index. 3522 // Get the CE with the specified index.
3520 // Index must be in the range 3523 // Index must be in the range
3521 // n-history_size < index < n+1 3524 // n-history_size < index < n+1
3522 // where n is the largest index to have been fetched by some previous call to this function. 3525 // where n is the largest index to have been fetched by some previous call to this function.
3523 // The CE value will be UCOL__PROCESSED_NULLORDER at end of input. 3526 // The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
3524 // 3527 //
3525 const CEI *CEBuffer::get(int32_t index) { 3528 const CEI *CEIBuffer::get(int32_t index) {
3526 int i = index % bufSize; 3529 int i = index % bufSize;
3527 3530
3528 if (index>=firstIx && index<limitIx) { 3531 if (index>=firstIx && index<limitIx) {
3529 // The request was for an entry already in our buffer. 3532 // The request was for an entry already in our buffer.
3530 // Just return it. 3533 // Just return it.
3531 return &buf[i]; 3534 return &buf[i];
3532 } 3535 }
3533 3536
3534 // Caller is requesting a new, never accessed before, CE. 3537 // Caller is requesting a new, never accessed before, CE.
3535 // Verify that it is the next one in sequence, which is all 3538 // Verify that it is the next one in sequence, which is all
3536 // that is allowed. 3539 // that is allowed.
3537 if (index != limitIx) { 3540 if (index != limitIx) {
3538 U_ASSERT(FALSE); 3541 U_ASSERT(FALSE);
3539 3542
3540 return NULL; 3543 return NULL;
3541 } 3544 }
3542 3545
3543 // Manage the circular CE buffer indexing 3546 // Manage the circular CE buffer indexing
3544 limitIx++; 3547 limitIx++;
3545 3548
3546 if (limitIx - firstIx >= bufSize) { 3549 if (limitIx - firstIx >= bufSize) {
3547 // The buffer is full, knock out the lowest-indexed entry. 3550 // The buffer is full, knock out the lowest-indexed entry.
3548 firstIx++; 3551 firstIx++;
3549 } 3552 }
3550 3553
3551 UErrorCode status = U_ZERO_ERROR; 3554 UErrorCode status = U_ZERO_ERROR;
3552 3555
3553 buf[i].ce = ucol_nextProcessed(ceIter, &buf[i].lowIndex, &buf[i].highIndex, &status); 3556 buf[i].ce = strSearch->textProcessedIter->nextProcessed(&buf[i].lowIndex, &b uf[i].highIndex, &status);
3554 3557
3555 return &buf[i]; 3558 return &buf[i];
3556 } 3559 }
3557 3560
3558 // Get the CE with the specified index. 3561 // Get the CE with the specified index.
3559 // Index must be in the range 3562 // Index must be in the range
3560 // n-history_size < index < n+1 3563 // n-history_size < index < n+1
3561 // where n is the largest index to have been fetched by some previous call to this function. 3564 // where n is the largest index to have been fetched by some previous call to this function.
3562 // The CE value will be UCOL__PROCESSED_NULLORDER at end of input. 3565 // The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
3563 // 3566 //
3564 const CEI *CEBuffer::getPrevious(int32_t index) { 3567 const CEI *CEIBuffer::getPrevious(int32_t index) {
3565 int i = index % bufSize; 3568 int i = index % bufSize;
3566 3569
3567 if (index>=firstIx && index<limitIx) { 3570 if (index>=firstIx && index<limitIx) {
3568 // The request was for an entry already in our buffer. 3571 // The request was for an entry already in our buffer.
3569 // Just return it. 3572 // Just return it.
3570 return &buf[i]; 3573 return &buf[i];
3571 } 3574 }
3572 3575
3573 // Caller is requesting a new, never accessed before, CE. 3576 // Caller is requesting a new, never accessed before, CE.
3574 // Verify that it is the next one in sequence, which is all 3577 // Verify that it is the next one in sequence, which is all
3575 // that is allowed. 3578 // that is allowed.
3576 if (index != limitIx) { 3579 if (index != limitIx) {
3577 U_ASSERT(FALSE); 3580 U_ASSERT(FALSE);
3578 3581
3579 return NULL; 3582 return NULL;
3580 } 3583 }
3581 3584
3582 // Manage the circular CE buffer indexing 3585 // Manage the circular CE buffer indexing
3583 limitIx++; 3586 limitIx++;
3584 3587
3585 if (limitIx - firstIx >= bufSize) { 3588 if (limitIx - firstIx >= bufSize) {
3586 // The buffer is full, knock out the lowest-indexed entry. 3589 // The buffer is full, knock out the lowest-indexed entry.
3587 firstIx++; 3590 firstIx++;
3588 } 3591 }
3589 3592
3590 UErrorCode status = U_ZERO_ERROR; 3593 UErrorCode status = U_ZERO_ERROR;
3591 3594
3592 buf[i].ce = ucol_previousProcessed(ceIter, &buf[i].lowIndex, &buf[i].highInd ex, &status); 3595 buf[i].ce = strSearch->textProcessedIter->previousProcessed(&buf[i].lowIndex , &buf[i].highIndex, &status);
3593 3596
3594 return &buf[i]; 3597 return &buf[i];
3595 } 3598 }
3596 3599
3600 }
3601
3597 U_NAMESPACE_END 3602 U_NAMESPACE_END
3598 3603
3599 3604
3600 // #define USEARCH_DEBUG 3605 // #define USEARCH_DEBUG
3601 3606
3602 #ifdef USEARCH_DEBUG 3607 #ifdef USEARCH_DEBUG
3603 #include <stdio.h> 3608 #include <stdio.h>
3604 #include <stdlib.h> 3609 #include <stdlib.h>
3605 #endif 3610 #endif
3606 3611
(...skipping 201 matching lines...) Expand 10 before | Expand all | Expand 10 after
3808 { 3813 {
3809 if (U_FAILURE(*status)) { 3814 if (U_FAILURE(*status)) {
3810 return FALSE; 3815 return FALSE;
3811 } 3816 }
3812 3817
3813 // TODO: reject search patterns beginning with a combining char. 3818 // TODO: reject search patterns beginning with a combining char.
3814 3819
3815 #ifdef USEARCH_DEBUG 3820 #ifdef USEARCH_DEBUG
3816 if (getenv("USEARCH_DEBUG") != NULL) { 3821 if (getenv("USEARCH_DEBUG") != NULL) {
3817 printf("Pattern CEs\n"); 3822 printf("Pattern CEs\n");
3818 for (int ii=0; ii<strsrch->pattern.CELength; ii++) { 3823 for (int ii=0; ii<strsrch->pattern.cesLength; ii++) {
3819 printf(" %8x", strsrch->pattern.CE[ii]); 3824 printf(" %8x", strsrch->pattern.ces[ii]);
3820 } 3825 }
3821 printf("\n"); 3826 printf("\n");
3822 } 3827 }
3823 3828
3824 #endif 3829 #endif
3825 // Input parameter sanity check. 3830 // Input parameter sanity check.
3826 // TODO: should input indicies clip to the text length 3831 // TODO: should input indicies clip to the text length
3827 // in the same way that UText does. 3832 // in the same way that UText does.
3828 if(strsrch->pattern.CELength == 0 || 3833 if(strsrch->pattern.cesLength == 0 ||
3829 startIdx < 0 || 3834 startIdx < 0 ||
3830 startIdx > strsrch->search->textLength || 3835 startIdx > strsrch->search->textLength ||
3831 strsrch->pattern.CE == NULL) { 3836 strsrch->pattern.ces == NULL) {
3832 *status = U_ILLEGAL_ARGUMENT_ERROR; 3837 *status = U_ILLEGAL_ARGUMENT_ERROR;
3833 return FALSE; 3838 return FALSE;
3834 } 3839 }
3835 3840
3836 if (strsrch->pattern.PCE == NULL) { 3841 if (strsrch->pattern.pces == NULL) {
3837 initializePatternPCETable(strsrch, status); 3842 initializePatternPCETable(strsrch, status);
3838 } 3843 }
3839 3844
3840 ucol_setOffset(strsrch->textIter, startIdx, status); 3845 ucol_setOffset(strsrch->textIter, startIdx, status);
3841 CEBuffer ceb(strsrch, status); 3846 CEIBuffer ceb(strsrch, status);
3842 3847
3843 3848
3844 int32_t targetIx = 0; 3849 int32_t targetIx = 0;
3845 const CEI *targetCEI = NULL; 3850 const CEI *targetCEI = NULL;
3846 int32_t patIx; 3851 int32_t patIx;
3847 UBool found; 3852 UBool found;
3848 3853
3849 int32_t mStart = -1; 3854 int32_t mStart = -1;
3850 int32_t mLimit = -1; 3855 int32_t mLimit = -1;
3851 int32_t minLimit; 3856 int32_t minLimit;
(...skipping 25 matching lines...) Expand all
3877 // For targetIx > 0, this ceb.get gets a CE that is as far back in the r ing buffer 3882 // For targetIx > 0, this ceb.get gets a CE that is as far back in the r ing buffer
3878 // (compared to the last CE fetched for the previous targetIx value) as we need to go 3883 // (compared to the last CE fetched for the previous targetIx value) as we need to go
3879 // for this targetIx value, so if it is non-NULL then other ceb.get call s should be OK. 3884 // for this targetIx value, so if it is non-NULL then other ceb.get call s should be OK.
3880 const CEI *firstCEI = ceb.get(targetIx); 3885 const CEI *firstCEI = ceb.get(targetIx);
3881 if (firstCEI == NULL) { 3886 if (firstCEI == NULL) {
3882 *status = U_INTERNAL_PROGRAM_ERROR; 3887 *status = U_INTERNAL_PROGRAM_ERROR;
3883 found = FALSE; 3888 found = FALSE;
3884 break; 3889 break;
3885 } 3890 }
3886 3891
3887 for (patIx=0; patIx<strsrch->pattern.PCELength; patIx++) { 3892 for (patIx=0; patIx<strsrch->pattern.pcesLength; patIx++) {
3888 patCE = strsrch->pattern.PCE[patIx]; 3893 patCE = strsrch->pattern.pces[patIx];
3889 targetCEI = ceb.get(targetIx+patIx+targetIxOffset); 3894 targetCEI = ceb.get(targetIx+patIx+targetIxOffset);
3890 // Compare CE from target string with CE from the pattern. 3895 // Compare CE from target string with CE from the pattern.
3891 // Note that the target CE will be UCOL_PROCESSED_NULLORDER if we reach the end of input, 3896 // Note that the target CE will be UCOL_PROCESSED_NULLORDER if we reach the end of input,
3892 // which will fail the compare, below. 3897 // which will fail the compare, below.
3893 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsr ch->search->elementComparisonType); 3898 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsr ch->search->elementComparisonType);
3894 if ( ceMatch == U_CE_NO_MATCH ) { 3899 if ( ceMatch == U_CE_NO_MATCH ) {
3895 found = FALSE; 3900 found = FALSE;
3896 break; 3901 break;
3897 } else if ( ceMatch > U_CE_NO_MATCH ) { 3902 } else if ( ceMatch > U_CE_NO_MATCH ) {
3898 if ( ceMatch == U_CE_SKIP_TARG ) { 3903 if ( ceMatch == U_CE_SKIP_TARG ) {
3899 // redo with same patCE, next targCE 3904 // redo with same patCE, next targCE
3900 patIx--; 3905 patIx--;
3901 targetIxOffset++; 3906 targetIxOffset++;
3902 } else { // ceMatch == U_CE_SKIP_PATN 3907 } else { // ceMatch == U_CE_SKIP_PATN
3903 // redo with same targCE, next patCE 3908 // redo with same targCE, next patCE
3904 targetIxOffset--; 3909 targetIxOffset--;
3905 } 3910 }
3906 } 3911 }
3907 } 3912 }
3908 targetIxOffset += strsrch->pattern.PCELength; // this is now the offset in target CE space to end of the match so far 3913 targetIxOffset += strsrch->pattern.pcesLength; // this is now the offset in target CE space to end of the match so far
3909 3914
3910 if (!found && ((targetCEI == NULL) || (targetCEI->ce != UCOL_PROCESSED_N ULLORDER))) { 3915 if (!found && ((targetCEI == NULL) || (targetCEI->ce != UCOL_PROCESSED_N ULLORDER))) {
3911 // No match at this targetIx. Try again at the next. 3916 // No match at this targetIx. Try again at the next.
3912 continue; 3917 continue;
3913 } 3918 }
3914 3919
3915 if (!found) { 3920 if (!found) {
3916 // No match at all, we have run off the end of the target text. 3921 // No match at all, we have run off the end of the target text.
3917 break; 3922 break;
3918 } 3923 }
(...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after
4075 { 4080 {
4076 if (U_FAILURE(*status)) { 4081 if (U_FAILURE(*status)) {
4077 return FALSE; 4082 return FALSE;
4078 } 4083 }
4079 4084
4080 // TODO: reject search patterns beginning with a combining char. 4085 // TODO: reject search patterns beginning with a combining char.
4081 4086
4082 #ifdef USEARCH_DEBUG 4087 #ifdef USEARCH_DEBUG
4083 if (getenv("USEARCH_DEBUG") != NULL) { 4088 if (getenv("USEARCH_DEBUG") != NULL) {
4084 printf("Pattern CEs\n"); 4089 printf("Pattern CEs\n");
4085 for (int ii=0; ii<strsrch->pattern.CELength; ii++) { 4090 for (int ii=0; ii<strsrch->pattern.cesLength; ii++) {
4086 printf(" %8x", strsrch->pattern.CE[ii]); 4091 printf(" %8x", strsrch->pattern.ces[ii]);
4087 } 4092 }
4088 printf("\n"); 4093 printf("\n");
4089 } 4094 }
4090 4095
4091 #endif 4096 #endif
4092 // Input parameter sanity check. 4097 // Input parameter sanity check.
4093 // TODO: should input indicies clip to the text length 4098 // TODO: should input indicies clip to the text length
4094 // in the same way that UText does. 4099 // in the same way that UText does.
4095 if(strsrch->pattern.CELength == 0 || 4100 if(strsrch->pattern.cesLength == 0 ||
4096 startIdx < 0 || 4101 startIdx < 0 ||
4097 startIdx > strsrch->search->textLength || 4102 startIdx > strsrch->search->textLength ||
4098 strsrch->pattern.CE == NULL) { 4103 strsrch->pattern.ces == NULL) {
4099 *status = U_ILLEGAL_ARGUMENT_ERROR; 4104 *status = U_ILLEGAL_ARGUMENT_ERROR;
4100 return FALSE; 4105 return FALSE;
4101 } 4106 }
4102 4107
4103 if (strsrch->pattern.PCE == NULL) { 4108 if (strsrch->pattern.pces == NULL) {
4104 initializePatternPCETable(strsrch, status); 4109 initializePatternPCETable(strsrch, status);
4105 } 4110 }
4106 4111
4107 CEBuffer ceb(strsrch, status); 4112 CEIBuffer ceb(strsrch, status);
4108 int32_t targetIx = 0; 4113 int32_t targetIx = 0;
4109 4114
4110 /* 4115 /*
4111 * Pre-load the buffer with the CE's for the grapheme 4116 * Pre-load the buffer with the CE's for the grapheme
4112 * after our starting position so that we're sure that 4117 * after our starting position so that we're sure that
4113 * we can look at the CE following the match when we 4118 * we can look at the CE following the match when we
4114 * check the match boundaries. 4119 * check the match boundaries.
4115 * 4120 *
4116 * This will also pre-fetch the first CE that we'll 4121 * This will also pre-fetch the first CE that we'll
4117 * consider for the match. 4122 * consider for the match.
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
4158 // for this targetIx value, so if it is non-NULL then other ceb.getPrevi ous calls should be OK. 4163 // for this targetIx value, so if it is non-NULL then other ceb.getPrevi ous calls should be OK.
4159 const CEI *lastCEI = ceb.getPrevious(targetIx); 4164 const CEI *lastCEI = ceb.getPrevious(targetIx);
4160 if (lastCEI == NULL) { 4165 if (lastCEI == NULL) {
4161 *status = U_INTERNAL_PROGRAM_ERROR; 4166 *status = U_INTERNAL_PROGRAM_ERROR;
4162 found = FALSE; 4167 found = FALSE;
4163 break; 4168 break;
4164 } 4169 }
4165 // Inner loop checks for a match beginning at each 4170 // Inner loop checks for a match beginning at each
4166 // position from the outer loop. 4171 // position from the outer loop.
4167 int32_t targetIxOffset = 0; 4172 int32_t targetIxOffset = 0;
4168 for (patIx = strsrch->pattern.PCELength - 1; patIx >= 0; patIx -= 1) { 4173 for (patIx = strsrch->pattern.pcesLength - 1; patIx >= 0; patIx -= 1) {
4169 int64_t patCE = strsrch->pattern.PCE[patIx]; 4174 int64_t patCE = strsrch->pattern.pces[patIx];
4170 4175
4171 targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1 - patIx + targetIxOffset); 4176 targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLength - 1 - patIx + targetIxOffset);
4172 // Compare CE from target string with CE from the pattern. 4177 // Compare CE from target string with CE from the pattern.
4173 // Note that the target CE will be UCOL_NULLORDER if we reach the end of input, 4178 // Note that the target CE will be UCOL_NULLORDER if we reach the end of input,
4174 // which will fail the compare, below. 4179 // which will fail the compare, below.
4175 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsr ch->search->elementComparisonType); 4180 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsr ch->search->elementComparisonType);
4176 if ( ceMatch == U_CE_NO_MATCH ) { 4181 if ( ceMatch == U_CE_NO_MATCH ) {
4177 found = FALSE; 4182 found = FALSE;
4178 break; 4183 break;
4179 } else if ( ceMatch > U_CE_NO_MATCH ) { 4184 } else if ( ceMatch > U_CE_NO_MATCH ) {
4180 if ( ceMatch == U_CE_SKIP_TARG ) { 4185 if ( ceMatch == U_CE_SKIP_TARG ) {
4181 // redo with same patCE, next targCE 4186 // redo with same patCE, next targCE
(...skipping 15 matching lines...) Expand all
4197 // No match at all, we have run off the end of the target text. 4202 // No match at all, we have run off the end of the target text.
4198 break; 4203 break;
4199 } 4204 }
4200 4205
4201 4206
4202 // We have found a match in CE space. 4207 // We have found a match in CE space.
4203 // Now determine the bounds in string index space. 4208 // Now determine the bounds in string index space.
4204 // There still is a chance of match failure if the CE range not corresp ond to 4209 // There still is a chance of match failure if the CE range not corresp ond to
4205 // an acceptable character range. 4210 // an acceptable character range.
4206 // 4211 //
4207 const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELen gth - 1 + targetIxOffset); 4212 const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLe ngth - 1 + targetIxOffset);
4208 mStart = firstCEI->lowIndex; 4213 mStart = firstCEI->lowIndex;
4209 4214
4210 // Check for the start of the match being within a combining sequence. 4215 // Check for the start of the match being within a combining sequence.
4211 // This can happen if the pattern itself begins with a combining char, and 4216 // This can happen if the pattern itself begins with a combining char, and
4212 // the match found combining marks in the target text that were attach ed 4217 // the match found combining marks in the target text that were attach ed
4213 // to something else. 4218 // to something else.
4214 // This type of match should be rejected for not completely consuming a 4219 // This type of match should be rejected for not completely consuming a
4215 // combining sequence. 4220 // combining sequence.
4216 if (!isBreakBoundary(strsrch, mStart)) { 4221 if (!isBreakBoundary(strsrch, mStart)) {
4217 found = FALSE; 4222 found = FALSE;
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after
4323 UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status) 4328 UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status)
4324 { 4329 {
4325 if (U_FAILURE(*status)) { 4330 if (U_FAILURE(*status)) {
4326 setMatchNotFound(strsrch); 4331 setMatchNotFound(strsrch);
4327 return FALSE; 4332 return FALSE;
4328 } 4333 }
4329 4334
4330 #if BOYER_MOORE 4335 #if BOYER_MOORE
4331 UCollationElements *coleiter = strsrch->textIter; 4336 UCollationElements *coleiter = strsrch->textIter;
4332 int32_t textlength = strsrch->search->textLength; 4337 int32_t textlength = strsrch->search->textLength;
4333 int32_t *patternce = strsrch->pattern.CE; 4338 int32_t *patternce = strsrch->pattern.ces;
4334 int32_t patterncelength = strsrch->pattern.CELength; 4339 int32_t patterncelength = strsrch->pattern.cesLength;
4335 int32_t textoffset = ucol_getOffset(coleiter); 4340 int32_t textoffset = ucol_getOffset(coleiter);
4336 4341
4337 // status used in setting coleiter offset, since offset is checked in 4342 // status used in setting coleiter offset, since offset is checked in
4338 // shiftForward before setting the coleiter offset, status never 4343 // shiftForward before setting the coleiter offset, status never
4339 // a failure 4344 // a failure
4340 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER, 4345 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER,
4341 patterncelength); 4346 patterncelength);
4342 while (textoffset <= textlength) 4347 while (textoffset <= textlength)
4343 { 4348 {
4344 uint32_t patternceindex = patterncelength - 1; 4349 uint32_t patternceindex = patterncelength - 1;
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
4437 UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status) 4442 UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status)
4438 { 4443 {
4439 if (U_FAILURE(*status)) { 4444 if (U_FAILURE(*status)) {
4440 setMatchNotFound(strsrch); 4445 setMatchNotFound(strsrch);
4441 return FALSE; 4446 return FALSE;
4442 } 4447 }
4443 4448
4444 #if BOYER_MOORE 4449 #if BOYER_MOORE
4445 UCollationElements *coleiter = strsrch->textIter; 4450 UCollationElements *coleiter = strsrch->textIter;
4446 int32_t textlength = strsrch->search->textLength; 4451 int32_t textlength = strsrch->search->textLength;
4447 int32_t *patternce = strsrch->pattern.CE; 4452 int32_t *patternce = strsrch->pattern.ces;
4448 int32_t patterncelength = strsrch->pattern.CELength; 4453 int32_t patterncelength = strsrch->pattern.cesLength;
4449 int32_t textoffset = ucol_getOffset(coleiter); 4454 int32_t textoffset = ucol_getOffset(coleiter);
4450 UBool hasPatternAccents = 4455 UBool hasPatternAccents =
4451 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents; 4456 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
4452 4457
4453 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER, 4458 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER,
4454 patterncelength); 4459 patterncelength);
4455 strsrch->canonicalPrefixAccents[0] = 0; 4460 strsrch->canonicalPrefixAccents[0] = 0;
4456 strsrch->canonicalSuffixAccents[0] = 0; 4461 strsrch->canonicalSuffixAccents[0] = 0;
4457 4462
4458 while (textoffset <= textlength) 4463 while (textoffset <= textlength)
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
4551 4556
4552 UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status) 4557 UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
4553 { 4558 {
4554 if (U_FAILURE(*status)) { 4559 if (U_FAILURE(*status)) {
4555 setMatchNotFound(strsrch); 4560 setMatchNotFound(strsrch);
4556 return FALSE; 4561 return FALSE;
4557 } 4562 }
4558 4563
4559 #if BOYER_MOORE 4564 #if BOYER_MOORE
4560 UCollationElements *coleiter = strsrch->textIter; 4565 UCollationElements *coleiter = strsrch->textIter;
4561 int32_t *patternce = strsrch->pattern.CE; 4566 int32_t *patternce = strsrch->pattern.ces;
4562 int32_t patterncelength = strsrch->pattern.CELength; 4567 int32_t patterncelength = strsrch->pattern.cesLength;
4563 int32_t textoffset = ucol_getOffset(coleiter); 4568 int32_t textoffset = ucol_getOffset(coleiter);
4564 4569
4565 // shifting it check for setting offset 4570 // shifting it check for setting offset
4566 // if setOffset is called previously or there was no previous match, we 4571 // if setOffset is called previously or there was no previous match, we
4567 // leave the offset as it is. 4572 // leave the offset as it is.
4568 if (strsrch->search->matchedIndex != USEARCH_DONE) { 4573 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4569 textoffset = strsrch->search->matchedIndex; 4574 textoffset = strsrch->search->matchedIndex;
4570 } 4575 }
4571 4576
4572 textoffset = reverseShift(strsrch, textoffset, UCOL_NULLORDER, 4577 textoffset = reverseShift(strsrch, textoffset, UCOL_NULLORDER,
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
4652 return FALSE; 4657 return FALSE;
4653 #else 4658 #else
4654 int32_t textOffset; 4659 int32_t textOffset;
4655 4660
4656 if (strsrch->search->isOverlap) { 4661 if (strsrch->search->isOverlap) {
4657 if (strsrch->search->matchedIndex != USEARCH_DONE) { 4662 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4658 textOffset = strsrch->search->matchedIndex + strsrch->search->matche dLength - 1; 4663 textOffset = strsrch->search->matchedIndex + strsrch->search->matche dLength - 1;
4659 } else { 4664 } else {
4660 // move the start position at the end of possible match 4665 // move the start position at the end of possible match
4661 initializePatternPCETable(strsrch, status); 4666 initializePatternPCETable(strsrch, status);
4662 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.PCELength - 1; nPCE s++) { 4667 if (!initTextProcessedIter(strsrch, status)) {
4663 int64_t pce = ucol_nextProcessed(strsrch->textIter, NULL, NULL, status); 4668 setMatchNotFound(strsrch);
4669 return FALSE;
4670 }
4671 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPC Es++) {
4672 int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NU LL, status);
4664 if (pce == UCOL_PROCESSED_NULLORDER) { 4673 if (pce == UCOL_PROCESSED_NULLORDER) {
4665 // at the end of the text 4674 // at the end of the text
4666 break; 4675 break;
4667 } 4676 }
4668 } 4677 }
4669 if (U_FAILURE(*status)) { 4678 if (U_FAILURE(*status)) {
4670 setMatchNotFound(strsrch); 4679 setMatchNotFound(strsrch);
4671 return FALSE; 4680 return FALSE;
4672 } 4681 }
4673 textOffset = ucol_getOffset(strsrch->textIter); 4682 textOffset = ucol_getOffset(strsrch->textIter);
(...skipping 19 matching lines...) Expand all
4693 UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, 4702 UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
4694 UErrorCode *status) 4703 UErrorCode *status)
4695 { 4704 {
4696 if (U_FAILURE(*status)) { 4705 if (U_FAILURE(*status)) {
4697 setMatchNotFound(strsrch); 4706 setMatchNotFound(strsrch);
4698 return FALSE; 4707 return FALSE;
4699 } 4708 }
4700 4709
4701 #if BOYER_MOORE 4710 #if BOYER_MOORE
4702 UCollationElements *coleiter = strsrch->textIter; 4711 UCollationElements *coleiter = strsrch->textIter;
4703 int32_t *patternce = strsrch->pattern.CE; 4712 int32_t *patternce = strsrch->pattern.ces;
4704 int32_t patterncelength = strsrch->pattern.CELength; 4713 int32_t patterncelength = strsrch->pattern.cesLength;
4705 int32_t textoffset = ucol_getOffset(coleiter); 4714 int32_t textoffset = ucol_getOffset(coleiter);
4706 UBool hasPatternAccents = 4715 UBool hasPatternAccents =
4707 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents; 4716 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
4708 4717
4709 // shifting it check for setting offset 4718 // shifting it check for setting offset
4710 // if setOffset is called previously or there was no previous match, we 4719 // if setOffset is called previously or there was no previous match, we
4711 // leave the offset as it is. 4720 // leave the offset as it is.
4712 if (strsrch->search->matchedIndex != USEARCH_DONE) { 4721 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4713 textoffset = strsrch->search->matchedIndex; 4722 textoffset = strsrch->search->matchedIndex;
4714 } 4723 }
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after
4801 return FALSE; 4810 return FALSE;
4802 #else 4811 #else
4803 int32_t textOffset; 4812 int32_t textOffset;
4804 4813
4805 if (strsrch->search->isOverlap) { 4814 if (strsrch->search->isOverlap) {
4806 if (strsrch->search->matchedIndex != USEARCH_DONE) { 4815 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4807 textOffset = strsrch->search->matchedIndex + strsrch->search->matche dLength - 1; 4816 textOffset = strsrch->search->matchedIndex + strsrch->search->matche dLength - 1;
4808 } else { 4817 } else {
4809 // move the start position at the end of possible match 4818 // move the start position at the end of possible match
4810 initializePatternPCETable(strsrch, status); 4819 initializePatternPCETable(strsrch, status);
4811 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.PCELength - 1; nPCE s++) { 4820 if (!initTextProcessedIter(strsrch, status)) {
4812 int64_t pce = ucol_nextProcessed(strsrch->textIter, NULL, NULL, status); 4821 setMatchNotFound(strsrch);
4822 return FALSE;
4823 }
4824 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPC Es++) {
4825 int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NU LL, status);
4813 if (pce == UCOL_PROCESSED_NULLORDER) { 4826 if (pce == UCOL_PROCESSED_NULLORDER) {
4814 // at the end of the text 4827 // at the end of the text
4815 break; 4828 break;
4816 } 4829 }
4817 } 4830 }
4818 if (U_FAILURE(*status)) { 4831 if (U_FAILURE(*status)) {
4819 setMatchNotFound(strsrch); 4832 setMatchNotFound(strsrch);
4820 return FALSE; 4833 return FALSE;
4821 } 4834 }
4822 textOffset = ucol_getOffset(strsrch->textIter); 4835 textOffset = ucol_getOffset(strsrch->textIter);
(...skipping 10 matching lines...) Expand all
4833 strsrch->search->matchedLength = end - start; 4846 strsrch->search->matchedLength = end - start;
4834 return TRUE; 4847 return TRUE;
4835 } else { 4848 } else {
4836 setMatchNotFound(strsrch); 4849 setMatchNotFound(strsrch);
4837 return FALSE; 4850 return FALSE;
4838 } 4851 }
4839 #endif 4852 #endif
4840 } 4853 }
4841 4854
4842 #endif /* #if !UCONFIG_NO_COLLATION */ 4855 #endif /* #if !UCONFIG_NO_COLLATION */
OLDNEW
« no previous file with comments | « source/i18n/uregex.cpp ('k') | source/i18n/uspoof.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698