| OLD | NEW |
| 1 /* | 1 /* |
| 2 ********************************************************************** | 2 ********************************************************************** |
| 3 * Copyright (C) 2001-2011 IBM and others. All rights reserved. | 3 * Copyright (C) 2001-2014 IBM and others. All rights reserved. |
| 4 ********************************************************************** | 4 ********************************************************************** |
| 5 * Date Name Description | 5 * Date Name Description |
| 6 * 07/02/2001 synwee Creation. | 6 * 07/02/2001 synwee Creation. |
| 7 ********************************************************************** | 7 ********************************************************************** |
| 8 */ | 8 */ |
| 9 | 9 |
| 10 #include "unicode/utypes.h" | 10 #include "unicode/utypes.h" |
| 11 | 11 |
| 12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION | 12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION |
| 13 | 13 |
| 14 #include "unicode/usearch.h" | 14 #include "unicode/usearch.h" |
| 15 #include "unicode/ustring.h" | 15 #include "unicode/ustring.h" |
| 16 #include "unicode/uchar.h" | 16 #include "unicode/uchar.h" |
| 17 #include "unicode/utf16.h" | 17 #include "unicode/utf16.h" |
| 18 #include "normalizer2impl.h" | 18 #include "normalizer2impl.h" |
| 19 #include "ucol_imp.h" | |
| 20 #include "usrchimp.h" | 19 #include "usrchimp.h" |
| 21 #include "cmemory.h" | 20 #include "cmemory.h" |
| 22 #include "ucln_in.h" | 21 #include "ucln_in.h" |
| 23 #include "uassert.h" | 22 #include "uassert.h" |
| 24 #include "ustr_imp.h" | 23 #include "ustr_imp.h" |
| 25 | 24 |
| 26 U_NAMESPACE_USE | 25 U_NAMESPACE_USE |
| 27 | 26 |
| 28 // don't use Boyer-Moore | 27 // don't use Boyer-Moore |
| 29 // (and if we decide to turn this on again there are several new TODOs that will
need to be addressed) | 28 // (and if we decide to turn this on again there are several new TODOs that will
need to be addressed) |
| 30 #define BOYER_MOORE 0 | 29 #define BOYER_MOORE 0 |
| 31 | 30 |
| 32 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
| 33 | |
| 34 // internal definition --------------------------------------------------- | 31 // internal definition --------------------------------------------------- |
| 35 | 32 |
| 36 #define LAST_BYTE_MASK_ 0xFF | 33 #define LAST_BYTE_MASK_ 0xFF |
| 37 #define SECOND_LAST_BYTE_SHIFT_ 8 | 34 #define SECOND_LAST_BYTE_SHIFT_ 8 |
| 38 #define SUPPLEMENTARY_MIN_VALUE_ 0x10000 | 35 #define SUPPLEMENTARY_MIN_VALUE_ 0x10000 |
| 39 | 36 |
| 40 static const Normalizer2Impl *g_nfcImpl = NULL; | 37 static const Normalizer2Impl *g_nfcImpl = NULL; |
| 41 | 38 |
| 42 // internal methods ------------------------------------------------- | 39 // internal methods ------------------------------------------------- |
| 43 | 40 |
| 44 /** | 41 /** |
| 45 * Fast collation element iterator setOffset. | 42 * Fast collation element iterator setOffset. |
| 46 * This function does not check for bounds. | 43 * This function does not check for bounds. |
| 47 * @param coleiter collation element iterator | 44 * @param coleiter collation element iterator |
| 48 * @param offset to set | 45 * @param offset to set |
| 49 */ | 46 */ |
| 50 static | 47 static |
| 51 inline void setColEIterOffset(UCollationElements *elems, | 48 inline void setColEIterOffset(UCollationElements *elems, |
| 52 int32_t offset) | 49 int32_t offset) |
| 53 { | 50 { |
| 54 collIterate *ci = &(elems->iteratordata_); | 51 // Note: Not "fast" any more after the 2013 collation rewrite. |
| 55 ci->pos = ci->string + offset; | 52 // We do not want to expose more internals than necessary. |
| 56 ci->CEpos = ci->toReturn = ci->extendCEs ? ci->extendCEs : ci->CEs; | 53 UErrorCode status = U_ZERO_ERROR; |
| 57 if (ci->flags & UCOL_ITER_INNORMBUF) { | 54 ucol_setOffset(elems, offset, &status); |
| 58 ci->flags = ci->origFlags; | |
| 59 } | |
| 60 ci->fcdPosition = NULL; | |
| 61 | |
| 62 ci->offsetReturn = NULL; | |
| 63 ci->offsetStore = ci->offsetBuffer; | |
| 64 ci->offsetRepeatCount = ci->offsetRepeatValue = 0; | |
| 65 } | 55 } |
| 66 | 56 |
| 67 /** | 57 /** |
| 68 * Getting the mask for collation strength | 58 * Getting the mask for collation strength |
| 69 * @param strength collation strength | 59 * @param strength collation strength |
| 70 * @return collation element mask | 60 * @return collation element mask |
| 71 */ | 61 */ |
| 72 static | 62 static |
| 73 inline uint32_t getMask(UCollationStrength strength) | 63 inline uint32_t getMask(UCollationStrength strength) |
| 74 { | 64 { |
| (...skipping 216 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 291 * @param status output error if any, caller to check status before calling | 281 * @param status output error if any, caller to check status before calling |
| 292 * method, status assumed to be success when passed in. | 282 * method, status assumed to be success when passed in. |
| 293 * @return total number of expansions | 283 * @return total number of expansions |
| 294 */ | 284 */ |
| 295 static | 285 static |
| 296 inline uint16_t initializePatternCETable(UStringSearch *strsrch, | 286 inline uint16_t initializePatternCETable(UStringSearch *strsrch, |
| 297 UErrorCode *status) | 287 UErrorCode *status) |
| 298 { | 288 { |
| 299 UPattern *pattern = &(strsrch->pattern); | 289 UPattern *pattern = &(strsrch->pattern); |
| 300 uint32_t cetablesize = INITIAL_ARRAY_SIZE_; | 290 uint32_t cetablesize = INITIAL_ARRAY_SIZE_; |
| 301 int32_t *cetable = pattern->CEBuffer; | 291 int32_t *cetable = pattern->cesBuffer; |
| 302 uint32_t patternlength = pattern->textLength; | 292 uint32_t patternlength = pattern->textLength; |
| 303 UCollationElements *coleiter = strsrch->utilIter; | 293 UCollationElements *coleiter = strsrch->utilIter; |
| 304 | 294 |
| 305 if (coleiter == NULL) { | 295 if (coleiter == NULL) { |
| 306 coleiter = ucol_openElements(strsrch->collator, pattern->text, | 296 coleiter = ucol_openElements(strsrch->collator, pattern->text, |
| 307 patternlength, status); | 297 patternlength, status); |
| 308 // status will be checked in ucol_next(..) later and if it is an | 298 // status will be checked in ucol_next(..) later and if it is an |
| 309 // error UCOL_NULLORDER the result of ucol_next(..) and 0 will be | 299 // error UCOL_NULLORDER the result of ucol_next(..) and 0 will be |
| 310 // returned. | 300 // returned. |
| 311 strsrch->utilIter = coleiter; | 301 strsrch->utilIter = coleiter; |
| 312 } | 302 } |
| 313 else { | 303 else { |
| 314 uprv_init_collIterate(strsrch->collator, pattern->text, | 304 ucol_setText(coleiter, pattern->text, pattern->textLength, status); |
| 315 pattern->textLength, | |
| 316 &coleiter->iteratordata_, | |
| 317 status); | |
| 318 } | 305 } |
| 319 if(U_FAILURE(*status)) { | 306 if(U_FAILURE(*status)) { |
| 320 return 0; | 307 return 0; |
| 321 } | 308 } |
| 322 | 309 |
| 323 if (pattern->CE != cetable && pattern->CE) { | 310 if (pattern->ces != cetable && pattern->ces) { |
| 324 uprv_free(pattern->CE); | 311 uprv_free(pattern->ces); |
| 325 } | 312 } |
| 326 | 313 |
| 327 uint16_t offset = 0; | 314 uint16_t offset = 0; |
| 328 uint16_t result = 0; | 315 uint16_t result = 0; |
| 329 int32_t ce; | 316 int32_t ce; |
| 330 | 317 |
| 331 while ((ce = ucol_next(coleiter, status)) != UCOL_NULLORDER && | 318 while ((ce = ucol_next(coleiter, status)) != UCOL_NULLORDER && |
| 332 U_SUCCESS(*status)) { | 319 U_SUCCESS(*status)) { |
| 333 uint32_t newce = getCE(strsrch, ce); | 320 uint32_t newce = getCE(strsrch, ce); |
| 334 if (newce) { | 321 if (newce) { |
| 335 int32_t *temp = addTouint32_tArray(cetable, offset, &cetablesize, | 322 int32_t *temp = addTouint32_tArray(cetable, offset, &cetablesize, |
| 336 newce, | 323 newce, |
| 337 patternlength - ucol_getOffset(coleiter) + 1, | 324 patternlength - ucol_getOffset(coleiter) + 1, |
| 338 status); | 325 status); |
| 339 if (U_FAILURE(*status)) { | 326 if (U_FAILURE(*status)) { |
| 340 return 0; | 327 return 0; |
| 341 } | 328 } |
| 342 offset ++; | 329 offset ++; |
| 343 if (cetable != temp && cetable != pattern->CEBuffer) { | 330 if (cetable != temp && cetable != pattern->cesBuffer) { |
| 344 uprv_free(cetable); | 331 uprv_free(cetable); |
| 345 } | 332 } |
| 346 cetable = temp; | 333 cetable = temp; |
| 347 } | 334 } |
| 348 result += (uint16_t)(ucol_getMaxExpansion(coleiter, ce) - 1); | 335 result += (uint16_t)(ucol_getMaxExpansion(coleiter, ce) - 1); |
| 349 } | 336 } |
| 350 | 337 |
| 351 cetable[offset] = 0; | 338 cetable[offset] = 0; |
| 352 pattern->CE = cetable; | 339 pattern->ces = cetable; |
| 353 pattern->CELength = offset; | 340 pattern->cesLength = offset; |
| 354 | 341 |
| 355 return result; | 342 return result; |
| 356 } | 343 } |
| 357 | 344 |
| 358 /** | 345 /** |
| 359 * Initializing the pce table for a pattern. | 346 * Initializing the pce table for a pattern. |
| 360 * Stores non-ignorable collation keys. | 347 * Stores non-ignorable collation keys. |
| 361 * Table size will be estimated by the size of the pattern text. Table | 348 * Table size will be estimated by the size of the pattern text. Table |
| 362 * expansion will be perform as we go along. Adding 1 to ensure that the table | 349 * expansion will be perform as we go along. Adding 1 to ensure that the table |
| 363 * size definitely increases. | 350 * size definitely increases. |
| 364 * Internal method, status assumed to be a success. | 351 * Internal method, status assumed to be a success. |
| 365 * @param strsrch string search data | 352 * @param strsrch string search data |
| 366 * @param status output error if any, caller to check status before calling | 353 * @param status output error if any, caller to check status before calling |
| 367 * method, status assumed to be success when passed in. | 354 * method, status assumed to be success when passed in. |
| 368 * @return total number of expansions | 355 * @return total number of expansions |
| 369 */ | 356 */ |
| 370 static | 357 static |
| 371 inline uint16_t initializePatternPCETable(UStringSearch *strsrch, | 358 inline uint16_t initializePatternPCETable(UStringSearch *strsrch, |
| 372 UErrorCode *status) | 359 UErrorCode *status) |
| 373 { | 360 { |
| 374 UPattern *pattern = &(strsrch->pattern); | 361 UPattern *pattern = &(strsrch->pattern); |
| 375 uint32_t pcetablesize = INITIAL_ARRAY_SIZE_; | 362 uint32_t pcetablesize = INITIAL_ARRAY_SIZE_; |
| 376 int64_t *pcetable = pattern->PCEBuffer; | 363 int64_t *pcetable = pattern->pcesBuffer; |
| 377 uint32_t patternlength = pattern->textLength; | 364 uint32_t patternlength = pattern->textLength; |
| 378 UCollationElements *coleiter = strsrch->utilIter; | 365 UCollationElements *coleiter = strsrch->utilIter; |
| 379 | 366 |
| 380 if (coleiter == NULL) { | 367 if (coleiter == NULL) { |
| 381 coleiter = ucol_openElements(strsrch->collator, pattern->text, | 368 coleiter = ucol_openElements(strsrch->collator, pattern->text, |
| 382 patternlength, status); | 369 patternlength, status); |
| 383 // status will be checked in ucol_next(..) later and if it is an | 370 // status will be checked in ucol_next(..) later and if it is an |
| 384 // error UCOL_NULLORDER the result of ucol_next(..) and 0 will be | 371 // error UCOL_NULLORDER the result of ucol_next(..) and 0 will be |
| 385 // returned. | 372 // returned. |
| 386 strsrch->utilIter = coleiter; | 373 strsrch->utilIter = coleiter; |
| 387 } else { | 374 } else { |
| 388 uprv_init_collIterate(strsrch->collator, pattern->text, | 375 ucol_setText(coleiter, pattern->text, pattern->textLength, status); |
| 389 pattern->textLength, | |
| 390 &coleiter->iteratordata_, | |
| 391 status); | |
| 392 } | 376 } |
| 393 if(U_FAILURE(*status)) { | 377 if(U_FAILURE(*status)) { |
| 394 return 0; | 378 return 0; |
| 395 } | 379 } |
| 396 | 380 |
| 397 if (pattern->PCE != pcetable && pattern->PCE != NULL) { | 381 if (pattern->pces != pcetable && pattern->pces != NULL) { |
| 398 uprv_free(pattern->PCE); | 382 uprv_free(pattern->pces); |
| 399 } | 383 } |
| 400 | 384 |
| 401 uint16_t offset = 0; | 385 uint16_t offset = 0; |
| 402 uint16_t result = 0; | 386 uint16_t result = 0; |
| 403 int64_t pce; | 387 int64_t pce; |
| 404 | 388 |
| 405 uprv_init_pce(coleiter); | 389 icu::UCollationPCE iter(coleiter); |
| 406 | 390 |
| 407 // ** Should processed CEs be signed or unsigned? | 391 // ** Should processed CEs be signed or unsigned? |
| 408 // ** (the rest of the code in this file seems to play fast-and-loose with | 392 // ** (the rest of the code in this file seems to play fast-and-loose with |
| 409 // ** whether a CE is signed or unsigned. For example, look at routine abov
e this one.) | 393 // ** whether a CE is signed or unsigned. For example, look at routine abov
e this one.) |
| 410 while ((pce = ucol_nextProcessed(coleiter, NULL, NULL, status)) != UCOL_PROC
ESSED_NULLORDER && | 394 while ((pce = iter.nextProcessed(NULL, NULL, status)) != UCOL_PROCESSED_NULL
ORDER && |
| 411 U_SUCCESS(*status)) { | 395 U_SUCCESS(*status)) { |
| 412 int64_t *temp = addTouint64_tArray(pcetable, offset, &pcetablesize, | 396 int64_t *temp = addTouint64_tArray(pcetable, offset, &pcetablesize, |
| 413 pce, | 397 pce, |
| 414 patternlength - ucol_getOffset(coleiter) + 1, | 398 patternlength - ucol_getOffset(coleiter) + 1, |
| 415 status); | 399 status); |
| 416 | 400 |
| 417 if (U_FAILURE(*status)) { | 401 if (U_FAILURE(*status)) { |
| 418 return 0; | 402 return 0; |
| 419 } | 403 } |
| 420 | 404 |
| 421 offset += 1; | 405 offset += 1; |
| 422 | 406 |
| 423 if (pcetable != temp && pcetable != pattern->PCEBuffer) { | 407 if (pcetable != temp && pcetable != pattern->pcesBuffer) { |
| 424 uprv_free(pcetable); | 408 uprv_free(pcetable); |
| 425 } | 409 } |
| 426 | 410 |
| 427 pcetable = temp; | 411 pcetable = temp; |
| 428 //result += (uint16_t)(ucol_getMaxExpansion(coleiter, ce) - 1); | 412 //result += (uint16_t)(ucol_getMaxExpansion(coleiter, ce) - 1); |
| 429 } | 413 } |
| 430 | 414 |
| 431 pcetable[offset] = 0; | 415 pcetable[offset] = 0; |
| 432 pattern->PCE = pcetable; | 416 pattern->pces = pcetable; |
| 433 pattern->PCELength = offset; | 417 pattern->pcesLength = offset; |
| 434 | 418 |
| 435 return result; | 419 return result; |
| 436 } | 420 } |
| 437 | 421 |
| 438 /** | 422 /** |
| 439 * Initializes the pattern struct. | 423 * Initializes the pattern struct. |
| 440 * Internal method, status assumed to be success. | 424 * Internal method, status assumed to be success. |
| 441 * @param strsrch UStringSearch data storage | 425 * @param strsrch UStringSearch data storage |
| 442 * @param status output error if any, caller to check status before calling | 426 * @param status output error if any, caller to check status before calling |
| 443 * method, status assumed to be success when passed in. | 427 * method, status assumed to be success when passed in. |
| 444 * @return expansionsize the total expansion size of the pattern | 428 * @return expansionsize the total expansion size of the pattern |
| 445 */ | 429 */ |
| 446 static | 430 static |
| 447 inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status) | 431 inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status) |
| 448 { | 432 { |
| 433 if (U_FAILURE(*status)) { return 0; } |
| 449 UPattern *pattern = &(strsrch->pattern); | 434 UPattern *pattern = &(strsrch->pattern); |
| 450 const UChar *patterntext = pattern->text; | 435 const UChar *patterntext = pattern->text; |
| 451 int32_t length = pattern->textLength; | 436 int32_t length = pattern->textLength; |
| 452 int32_t index = 0; | 437 int32_t index = 0; |
| 453 | 438 |
| 454 // Since the strength is primary, accents are ignored in the pattern. | 439 // Since the strength is primary, accents are ignored in the pattern. |
| 455 if (strsrch->strength == UCOL_PRIMARY) { | 440 if (strsrch->strength == UCOL_PRIMARY) { |
| 456 pattern->hasPrefixAccents = 0; | 441 pattern->hasPrefixAccents = 0; |
| 457 pattern->hasSuffixAccents = 0; | 442 pattern->hasSuffixAccents = 0; |
| 458 } else { | 443 } else { |
| 459 pattern->hasPrefixAccents = getFCD(patterntext, &index, length) >> | 444 pattern->hasPrefixAccents = getFCD(patterntext, &index, length) >> |
| 460 SECOND_LAST_BYTE_SHIFT_
; | 445 SECOND_LAST_BYTE_SHIFT_
; |
| 461 index = length; | 446 index = length; |
| 462 U16_BACK_1(patterntext, 0, index); | 447 U16_BACK_1(patterntext, 0, index); |
| 463 pattern->hasSuffixAccents = getFCD(patterntext, &index, length) & | 448 pattern->hasSuffixAccents = getFCD(patterntext, &index, length) & |
| 464 LAST_BYTE_MASK_
; | 449 LAST_BYTE_MASK_
; |
| 465 } | 450 } |
| 466 | 451 |
| 467 // ** HACK ** | 452 // ** HACK ** |
| 468 if (strsrch->pattern.PCE != NULL) { | 453 if (strsrch->pattern.pces != NULL) { |
| 469 if (strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) { | 454 if (strsrch->pattern.pces != strsrch->pattern.pcesBuffer) { |
| 470 uprv_free(strsrch->pattern.PCE); | 455 uprv_free(strsrch->pattern.pces); |
| 471 } | 456 } |
| 472 | 457 |
| 473 strsrch->pattern.PCE = NULL; | 458 strsrch->pattern.pces = NULL; |
| 474 } | 459 } |
| 475 | 460 |
| 476 // since intializePattern is an internal method status is a success. | 461 // since intializePattern is an internal method status is a success. |
| 477 return initializePatternCETable(strsrch, status); | 462 return initializePatternCETable(strsrch, status); |
| 478 } | 463 } |
| 479 | 464 |
| 480 /** | 465 /** |
| 481 * Initializing shift tables, with the default values. | 466 * Initializing shift tables, with the default values. |
| 482 * If a corresponding default value is 0, the shift table is not set. | 467 * If a corresponding default value is 0, the shift table is not set. |
| 483 * @param shift table for forwards shift | 468 * @param shift table for forwards shift |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 550 * If pattern has no non-ignorable ce, we return a illegal argument error. | 535 * If pattern has no non-ignorable ce, we return a illegal argument error. |
| 551 * Internal method, status assumed to be success. | 536 * Internal method, status assumed to be success. |
| 552 * @param strsrch UStringSearch data storage | 537 * @param strsrch UStringSearch data storage |
| 553 * @param status for output errors if it occurs, status is assumed to be a | 538 * @param status for output errors if it occurs, status is assumed to be a |
| 554 * success when it is passed in. | 539 * success when it is passed in. |
| 555 */ | 540 */ |
| 556 static | 541 static |
| 557 inline void initialize(UStringSearch *strsrch, UErrorCode *status) | 542 inline void initialize(UStringSearch *strsrch, UErrorCode *status) |
| 558 { | 543 { |
| 559 int16_t expandlength = initializePattern(strsrch, status); | 544 int16_t expandlength = initializePattern(strsrch, status); |
| 560 if (U_SUCCESS(*status) && strsrch->pattern.CELength > 0) { | 545 if (U_SUCCESS(*status) && strsrch->pattern.cesLength > 0) { |
| 561 UPattern *pattern = &strsrch->pattern; | 546 UPattern *pattern = &strsrch->pattern; |
| 562 int32_t cesize = pattern->CELength; | 547 int32_t cesize = pattern->cesLength; |
| 563 | 548 |
| 564 int16_t minlength = cesize > expandlength | 549 int16_t minlength = cesize > expandlength |
| 565 ? (int16_t)cesize - expandlength : 1; | 550 ? (int16_t)cesize - expandlength : 1; |
| 566 pattern->defaultShiftSize = minlength; | 551 pattern->defaultShiftSize = minlength; |
| 567 setShiftTable(pattern->shift, pattern->backShift, pattern->CE, | 552 setShiftTable(pattern->shift, pattern->backShift, pattern->ces, |
| 568 cesize, expandlength, minlength, minlength); | 553 cesize, expandlength, minlength, minlength); |
| 569 return; | 554 return; |
| 570 } | 555 } |
| 571 strsrch->pattern.defaultShiftSize = 0; | 556 strsrch->pattern.defaultShiftSize = 0; |
| 572 } | 557 } |
| 573 | 558 |
| 574 #if BOYER_MOORE | 559 #if BOYER_MOORE |
| 575 /** | 560 /** |
| 576 * Check to make sure that the match length is at the end of the character by | 561 * Check to make sure that the match length is at the end of the character by |
| 577 * using the breakiterator. | 562 * using the breakiterator. |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 633 ubrk_following(breakiterator, start - 1) == start) && | 618 ubrk_following(breakiterator, start - 1) == start) && |
| 634 (end == endindex || | 619 (end == endindex || |
| 635 ubrk_following(breakiterator, end - 1) == end); | 620 ubrk_following(breakiterator, end - 1) == end); |
| 636 if (result) { | 621 if (result) { |
| 637 // iterates the individual ces | 622 // iterates the individual ces |
| 638 UCollationElements *coleiter = strsrch->utilIter; | 623 UCollationElements *coleiter = strsrch->utilIter; |
| 639 const UChar *text = strsrch->search->text + | 624 const UChar *text = strsrch->search->text + |
| 640 start; | 625 start; |
| 641 UErrorCode status = U_ZERO_ERROR; | 626 UErrorCode status = U_ZERO_ERROR; |
| 642 ucol_setText(coleiter, text, end - start, &status); | 627 ucol_setText(coleiter, text, end - start, &status); |
| 643 for (int32_t count = 0; count < strsrch->pattern.CELength; | 628 for (int32_t count = 0; count < strsrch->pattern.cesLength; |
| 644 count ++) { | 629 count ++) { |
| 645 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status)); | 630 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status)); |
| 646 if (ce == UCOL_IGNORABLE) { | 631 if (ce == UCOL_IGNORABLE) { |
| 647 count --; | 632 count --; |
| 648 continue; | 633 continue; |
| 649 } | 634 } |
| 650 if (U_FAILURE(status) || ce != strsrch->pattern.CE[count]) { | 635 if (U_FAILURE(status) || ce != strsrch->pattern.ces[count]) { |
| 651 return FALSE; | 636 return FALSE; |
| 652 } | 637 } |
| 653 } | 638 } |
| 654 int32_t nextce = ucol_next(coleiter, &status); | 639 int32_t nextce = ucol_next(coleiter, &status); |
| 655 while (ucol_getOffset(coleiter) == (end - start) | 640 while (ucol_getOffset(coleiter) == (end - start) |
| 656 && getCE(strsrch, nextce) == UCOL_IGNORABLE) { | 641 && getCE(strsrch, nextce) == UCOL_IGNORABLE) { |
| 657 nextce = ucol_next(coleiter, &status); | 642 nextce = ucol_next(coleiter, &status); |
| 658 } | 643 } |
| 659 if (ucol_getOffset(coleiter) == (end - start) | 644 if (ucol_getOffset(coleiter) == (end - start) |
| 660 && nextce != UCOL_NULLORDER) { | 645 && nextce != UCOL_NULLORDER) { |
| (...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 741 inline int32_t shiftForward(UStringSearch *strsrch, | 726 inline int32_t shiftForward(UStringSearch *strsrch, |
| 742 int32_t textoffset, | 727 int32_t textoffset, |
| 743 int32_t ce, | 728 int32_t ce, |
| 744 int32_t patternceindex) | 729 int32_t patternceindex) |
| 745 { | 730 { |
| 746 UPattern *pattern = &(strsrch->pattern); | 731 UPattern *pattern = &(strsrch->pattern); |
| 747 if (ce != UCOL_NULLORDER) { | 732 if (ce != UCOL_NULLORDER) { |
| 748 int32_t shift = pattern->shift[hash(ce)]; | 733 int32_t shift = pattern->shift[hash(ce)]; |
| 749 // this is to adjust for characters in the middle of the | 734 // this is to adjust for characters in the middle of the |
| 750 // substring for matching that failed. | 735 // substring for matching that failed. |
| 751 int32_t adjust = pattern->CELength - patternceindex; | 736 int32_t adjust = pattern->cesLength - patternceindex; |
| 752 if (adjust > 1 && shift >= adjust) { | 737 if (adjust > 1 && shift >= adjust) { |
| 753 shift -= adjust - 1; | 738 shift -= adjust - 1; |
| 754 } | 739 } |
| 755 textoffset += shift; | 740 textoffset += shift; |
| 756 } | 741 } |
| 757 else { | 742 else { |
| 758 textoffset += pattern->defaultShiftSize; | 743 textoffset += pattern->defaultShiftSize; |
| 759 } | 744 } |
| 760 | 745 |
| 761 textoffset = getNextUStringSearchBaseOffset(strsrch, textoffset); | 746 textoffset = getNextUStringSearchBaseOffset(strsrch, textoffset); |
| (...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 875 uprv_free(norm); | 860 uprv_free(norm); |
| 876 return FALSE; | 861 return FALSE; |
| 877 } | 862 } |
| 878 } | 863 } |
| 879 else { | 864 else { |
| 880 norm = buffer; | 865 norm = buffer; |
| 881 } | 866 } |
| 882 | 867 |
| 883 UCollationElements *coleiter = strsrch->utilIter; | 868 UCollationElements *coleiter = strsrch->utilIter; |
| 884 ucol_setText(coleiter, norm, size, status); | 869 ucol_setText(coleiter, norm, size, status); |
| 885 uint32_t firstce = strsrch->pattern.CE[0]; | 870 uint32_t firstce = strsrch->pattern.ces[0]; |
| 886 UBool ignorable = TRUE; | 871 UBool ignorable = TRUE; |
| 887 uint32_t ce = UCOL_IGNORABLE; | 872 uint32_t ce = UCOL_IGNORABLE; |
| 888 while (U_SUCCESS(*status) && ce != firstce && ce != (uint32_t)UCOL_N
ULLORDER) { | 873 while (U_SUCCESS(*status) && ce != firstce && ce != (uint32_t)UCOL_N
ULLORDER) { |
| 889 offset = ucol_getOffset(coleiter); | 874 offset = ucol_getOffset(coleiter); |
| 890 if (ce != firstce && ce != UCOL_IGNORABLE) { | 875 if (ce != firstce && ce != UCOL_IGNORABLE) { |
| 891 ignorable = FALSE; | 876 ignorable = FALSE; |
| 892 } | 877 } |
| 893 ce = ucol_next(coleiter, status); | 878 ce = ucol_next(coleiter, status); |
| 894 } | 879 } |
| 895 UChar32 codepoint; | 880 UChar32 codepoint; |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 928 */ | 913 */ |
| 929 static | 914 static |
| 930 UBool hasAccentsBeforeMatch(const UStringSearch *strsrch, int32_t start, | 915 UBool hasAccentsBeforeMatch(const UStringSearch *strsrch, int32_t start, |
| 931 int32_t end) | 916 int32_t end) |
| 932 { | 917 { |
| 933 if (strsrch->pattern.hasPrefixAccents) { | 918 if (strsrch->pattern.hasPrefixAccents) { |
| 934 UCollationElements *coleiter = strsrch->textIter; | 919 UCollationElements *coleiter = strsrch->textIter; |
| 935 UErrorCode status = U_ZERO_ERROR; | 920 UErrorCode status = U_ZERO_ERROR; |
| 936 // we have been iterating forwards previously | 921 // we have been iterating forwards previously |
| 937 uint32_t ignorable = TRUE; | 922 uint32_t ignorable = TRUE; |
| 938 int32_t firstce = strsrch->pattern.CE[0]; | 923 int32_t firstce = strsrch->pattern.ces[0]; |
| 939 | 924 |
| 940 setColEIterOffset(coleiter, start); | 925 setColEIterOffset(coleiter, start); |
| 941 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status)); | 926 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status)); |
| 942 if (U_FAILURE(status)) { | 927 if (U_FAILURE(status)) { |
| 943 return TRUE; | 928 return TRUE; |
| 944 } | 929 } |
| 945 while (ce != firstce) { | 930 while (ce != firstce) { |
| 946 if (ce != UCOL_IGNORABLE) { | 931 if (ce != UCOL_IGNORABLE) { |
| 947 ignorable = FALSE; | 932 ignorable = FALSE; |
| 948 } | 933 } |
| (...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1010 static | 995 static |
| 1011 UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start, | 996 UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start, |
| 1012 int32_t end) | 997 int32_t end) |
| 1013 { | 998 { |
| 1014 if (strsrch->pattern.hasSuffixAccents) { | 999 if (strsrch->pattern.hasSuffixAccents) { |
| 1015 const UChar *text = strsrch->search->text; | 1000 const UChar *text = strsrch->search->text; |
| 1016 int32_t temp = end; | 1001 int32_t temp = end; |
| 1017 int32_t textlength = strsrch->search->textLength; | 1002 int32_t textlength = strsrch->search->textLength; |
| 1018 U16_BACK_1(text, 0, temp); | 1003 U16_BACK_1(text, 0, temp); |
| 1019 if (getFCD(text, &temp, textlength) & LAST_BYTE_MASK_) { | 1004 if (getFCD(text, &temp, textlength) & LAST_BYTE_MASK_) { |
| 1020 int32_t firstce = strsrch->pattern.CE[0]; | 1005 int32_t firstce = strsrch->pattern.ces[0]; |
| 1021 UCollationElements *coleiter = strsrch->textIter; | 1006 UCollationElements *coleiter = strsrch->textIter; |
| 1022 UErrorCode status = U_ZERO_ERROR; | 1007 UErrorCode status = U_ZERO_ERROR; |
| 1023 int32_t ce; | 1008 int32_t ce; |
| 1024 setColEIterOffset(coleiter, start); | 1009 setColEIterOffset(coleiter, start); |
| 1025 while ((ce = getCE(strsrch, ucol_next(coleiter, &status))) != firstc
e) { | 1010 while ((ce = getCE(strsrch, ucol_next(coleiter, &status))) != firstc
e) { |
| 1026 if (U_FAILURE(status) || ce == UCOL_NULLORDER) { | 1011 if (U_FAILURE(status) || ce == UCOL_NULLORDER) { |
| 1027 return TRUE; | 1012 return TRUE; |
| 1028 } | 1013 } |
| 1029 } | 1014 } |
| 1030 int32_t count = 1; | 1015 int32_t count = 1; |
| 1031 while (count < strsrch->pattern.CELength) { | 1016 while (count < strsrch->pattern.cesLength) { |
| 1032 if (getCE(strsrch, ucol_next(coleiter, &status)) | 1017 if (getCE(strsrch, ucol_next(coleiter, &status)) |
| 1033 == UCOL_IGNORABLE) { | 1018 == UCOL_IGNORABLE) { |
| 1034 // Thai can give an ignorable here. | 1019 // Thai can give an ignorable here. |
| 1035 count --; | 1020 count --; |
| 1036 } | 1021 } |
| 1037 if (U_FAILURE(status)) { | 1022 if (U_FAILURE(status)) { |
| 1038 return TRUE; | 1023 return TRUE; |
| 1039 } | 1024 } |
| 1040 count ++; | 1025 count ++; |
| 1041 } | 1026 } |
| (...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1205 if (U_FAILURE(*status)) { | 1190 if (U_FAILURE(*status)) { |
| 1206 return FALSE; | 1191 return FALSE; |
| 1207 } | 1192 } |
| 1208 if (ucol_getOffset(coleiter) != temp) { | 1193 if (ucol_getOffset(coleiter) != temp) { |
| 1209 *start = temp; | 1194 *start = temp; |
| 1210 temp = ucol_getOffset(coleiter); | 1195 temp = ucol_getOffset(coleiter); |
| 1211 } | 1196 } |
| 1212 expansion --; | 1197 expansion --; |
| 1213 } | 1198 } |
| 1214 | 1199 |
| 1215 int32_t *patternce = strsrch->pattern.CE; | 1200 int32_t *patternce = strsrch->pattern.ces; |
| 1216 int32_t patterncelength = strsrch->pattern.CELength; | 1201 int32_t patterncelength = strsrch->pattern.cesLength; |
| 1217 int32_t count = 0; | 1202 int32_t count = 0; |
| 1218 while (count < patterncelength) { | 1203 while (count < patterncelength) { |
| 1219 int32_t ce = getCE(strsrch, ucol_next(coleiter, status)); | 1204 int32_t ce = getCE(strsrch, ucol_next(coleiter, status)); |
| 1220 if (ce == UCOL_IGNORABLE) { | 1205 if (ce == UCOL_IGNORABLE) { |
| 1221 continue; | 1206 continue; |
| 1222 } | 1207 } |
| 1223 if (expandflag && count == 0 && ucol_getOffset(coleiter) != temp) { | 1208 if (expandflag && count == 0 && ucol_getOffset(coleiter) != temp) { |
| 1224 *start = temp; | 1209 *start = temp; |
| 1225 temp = ucol_getOffset(coleiter); | 1210 temp = ucol_getOffset(coleiter); |
| 1226 } | 1211 } |
| (...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1408 * Running through a collation element iterator to see if the contents matches | 1393 * Running through a collation element iterator to see if the contents matches |
| 1409 * pattern in string search data | 1394 * pattern in string search data |
| 1410 * @param strsrch string search data | 1395 * @param strsrch string search data |
| 1411 * @param coleiter collation element iterator | 1396 * @param coleiter collation element iterator |
| 1412 * @return TRUE if a match if found, FALSE otherwise | 1397 * @return TRUE if a match if found, FALSE otherwise |
| 1413 */ | 1398 */ |
| 1414 static | 1399 static |
| 1415 inline UBool checkCollationMatch(const UStringSearch *strsrch, | 1400 inline UBool checkCollationMatch(const UStringSearch *strsrch, |
| 1416 UCollationElements *coleiter) | 1401 UCollationElements *coleiter) |
| 1417 { | 1402 { |
| 1418 int patternceindex = strsrch->pattern.CELength; | 1403 int patternceindex = strsrch->pattern.cesLength; |
| 1419 int32_t *patternce = strsrch->pattern.CE; | 1404 int32_t *patternce = strsrch->pattern.ces; |
| 1420 UErrorCode status = U_ZERO_ERROR; | 1405 UErrorCode status = U_ZERO_ERROR; |
| 1421 while (patternceindex > 0) { | 1406 while (patternceindex > 0) { |
| 1422 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status)); | 1407 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status)); |
| 1423 if (ce == UCOL_IGNORABLE) { | 1408 if (ce == UCOL_IGNORABLE) { |
| 1424 continue; | 1409 continue; |
| 1425 } | 1410 } |
| 1426 if (U_FAILURE(status) || ce != *patternce) { | 1411 if (U_FAILURE(status) || ce != *patternce) { |
| 1427 return FALSE; | 1412 return FALSE; |
| 1428 } | 1413 } |
| 1429 patternce ++; | 1414 patternce ++; |
| (...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1608 } | 1593 } |
| 1609 else { | 1594 else { |
| 1610 safetextlength = u_strlen(strsrch->canonicalSuffixAccents); | 1595 safetextlength = u_strlen(strsrch->canonicalSuffixAccents); |
| 1611 safetext = strsrch->canonicalSuffixAccents; | 1596 safetext = strsrch->canonicalSuffixAccents; |
| 1612 } | 1597 } |
| 1613 | 1598 |
| 1614 // if status is a failure, ucol_setText does nothing | 1599 // if status is a failure, ucol_setText does nothing |
| 1615 ucol_setText(coleiter, safetext, safetextlength, status); | 1600 ucol_setText(coleiter, safetext, safetextlength, status); |
| 1616 // status checked in loop below | 1601 // status checked in loop below |
| 1617 | 1602 |
| 1618 int32_t *ce = strsrch->pattern.CE; | 1603 int32_t *ce = strsrch->pattern.ces; |
| 1619 int32_t celength = strsrch->pattern.CELength; | 1604 int32_t celength = strsrch->pattern.cesLength; |
| 1620 int ceindex = celength - 1; | 1605 int ceindex = celength - 1; |
| 1621 UBool isSafe = TRUE; // indication flag for position in safe zone | 1606 UBool isSafe = TRUE; // indication flag for position in safe zone |
| 1622 | 1607 |
| 1623 while (ceindex >= 0) { | 1608 while (ceindex >= 0) { |
| 1624 int32_t textce = ucol_previous(coleiter, status); | 1609 int32_t textce = ucol_previous(coleiter, status); |
| 1625 if (U_FAILURE(*status)) { | 1610 if (U_FAILURE(*status)) { |
| 1626 if (isSafe) { | 1611 if (isSafe) { |
| 1627 cleanUpSafeText(strsrch, safetext, safebuffer); | 1612 cleanUpSafeText(strsrch, safetext, safebuffer); |
| 1628 } | 1613 } |
| 1629 return USEARCH_DONE; | 1614 return USEARCH_DONE; |
| (...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1848 if (U_FAILURE(*status)) { | 1833 if (U_FAILURE(*status)) { |
| 1849 return FALSE; | 1834 return FALSE; |
| 1850 } | 1835 } |
| 1851 if (ucol_getOffset(coleiter) != temp) { | 1836 if (ucol_getOffset(coleiter) != temp) { |
| 1852 *start = temp; | 1837 *start = temp; |
| 1853 temp = ucol_getOffset(coleiter); | 1838 temp = ucol_getOffset(coleiter); |
| 1854 } | 1839 } |
| 1855 expansion --; | 1840 expansion --; |
| 1856 } | 1841 } |
| 1857 | 1842 |
| 1858 int32_t *patternce = strsrch->pattern.CE; | 1843 int32_t *patternce = strsrch->pattern.ces; |
| 1859 int32_t patterncelength = strsrch->pattern.CELength; | 1844 int32_t patterncelength = strsrch->pattern.cesLength; |
| 1860 int32_t count = 0; | 1845 int32_t count = 0; |
| 1861 int32_t textlength = strsrch->search->textLength; | 1846 int32_t textlength = strsrch->search->textLength; |
| 1862 while (count < patterncelength) { | 1847 while (count < patterncelength) { |
| 1863 int32_t ce = getCE(strsrch, ucol_next(coleiter, status)); | 1848 int32_t ce = getCE(strsrch, ucol_next(coleiter, status)); |
| 1864 // status checked below, note that if status is a failure | 1849 // status checked below, note that if status is a failure |
| 1865 // ucol_next returns UCOL_NULLORDER | 1850 // ucol_next returns UCOL_NULLORDER |
| 1866 if (ce == UCOL_IGNORABLE) { | 1851 if (ce == UCOL_IGNORABLE) { |
| 1867 continue; | 1852 continue; |
| 1868 } | 1853 } |
| 1869 if (expandflag && count == 0 && ucol_getOffset(coleiter) != temp) { | 1854 if (expandflag && count == 0 && ucol_getOffset(coleiter) != temp) { |
| (...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2046 if (U_FAILURE(*status)) { | 2031 if (U_FAILURE(*status)) { |
| 2047 return FALSE; | 2032 return FALSE; |
| 2048 } | 2033 } |
| 2049 if (ucol_getOffset(coleiter) != temp) { | 2034 if (ucol_getOffset(coleiter) != temp) { |
| 2050 *end = temp; | 2035 *end = temp; |
| 2051 temp = ucol_getOffset(coleiter); | 2036 temp = ucol_getOffset(coleiter); |
| 2052 } | 2037 } |
| 2053 expansion --; | 2038 expansion --; |
| 2054 } | 2039 } |
| 2055 | 2040 |
| 2056 int32_t *patternce = strsrch->pattern.CE; | 2041 int32_t *patternce = strsrch->pattern.ces; |
| 2057 int32_t patterncelength = strsrch->pattern.CELength; | 2042 int32_t patterncelength = strsrch->pattern.cesLength; |
| 2058 int32_t count = patterncelength; | 2043 int32_t count = patterncelength; |
| 2059 while (count > 0) { | 2044 while (count > 0) { |
| 2060 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status)); | 2045 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status)); |
| 2061 // status checked below, note that if status is a failure | 2046 // status checked below, note that if status is a failure |
| 2062 // ucol_previous returns UCOL_NULLORDER | 2047 // ucol_previous returns UCOL_NULLORDER |
| 2063 if (ce == UCOL_IGNORABLE) { | 2048 if (ce == UCOL_IGNORABLE) { |
| 2064 continue; | 2049 continue; |
| 2065 } | 2050 } |
| 2066 if (expandflag && count == 0 && | 2051 if (expandflag && count == 0 && |
| 2067 getColElemIterOffset(coleiter, FALSE) != temp) { | 2052 getColElemIterOffset(coleiter, FALSE) != temp) { |
| (...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2271 else { | 2256 else { |
| 2272 safetextlength = u_strlen(strsrch->canonicalPrefixAccents); | 2257 safetextlength = u_strlen(strsrch->canonicalPrefixAccents); |
| 2273 safetext = strsrch->canonicalPrefixAccents; | 2258 safetext = strsrch->canonicalPrefixAccents; |
| 2274 } | 2259 } |
| 2275 | 2260 |
| 2276 UCollationElements *coleiter = strsrch->utilIter; | 2261 UCollationElements *coleiter = strsrch->utilIter; |
| 2277 // if status is a failure, ucol_setText does nothing | 2262 // if status is a failure, ucol_setText does nothing |
| 2278 ucol_setText(coleiter, safetext, safetextlength, status); | 2263 ucol_setText(coleiter, safetext, safetextlength, status); |
| 2279 // status checked in loop below | 2264 // status checked in loop below |
| 2280 | 2265 |
| 2281 int32_t *ce = strsrch->pattern.CE; | 2266 int32_t *ce = strsrch->pattern.ces; |
| 2282 int32_t celength = strsrch->pattern.CELength; | 2267 int32_t celength = strsrch->pattern.cesLength; |
| 2283 int ceindex = 0; | 2268 int ceindex = 0; |
| 2284 UBool isSafe = TRUE; // safe zone indication flag for position | 2269 UBool isSafe = TRUE; // safe zone indication flag for position |
| 2285 int32_t prefixlength = u_strlen(strsrch->canonicalPrefixAccents); | 2270 int32_t prefixlength = u_strlen(strsrch->canonicalPrefixAccents); |
| 2286 | 2271 |
| 2287 while (ceindex < celength) { | 2272 while (ceindex < celength) { |
| 2288 int32_t textce = ucol_next(coleiter, status); | 2273 int32_t textce = ucol_next(coleiter, status); |
| 2289 if (U_FAILURE(*status)) { | 2274 if (U_FAILURE(*status)) { |
| 2290 if (isSafe) { | 2275 if (isSafe) { |
| 2291 cleanUpSafeText(strsrch, safetext, safebuffer); | 2276 cleanUpSafeText(strsrch, safetext, safebuffer); |
| 2292 } | 2277 } |
| (...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2486 if (U_FAILURE(*status)) { | 2471 if (U_FAILURE(*status)) { |
| 2487 return FALSE; | 2472 return FALSE; |
| 2488 } | 2473 } |
| 2489 if (ucol_getOffset(coleiter) != temp) { | 2474 if (ucol_getOffset(coleiter) != temp) { |
| 2490 *end = temp; | 2475 *end = temp; |
| 2491 temp = ucol_getOffset(coleiter); | 2476 temp = ucol_getOffset(coleiter); |
| 2492 } | 2477 } |
| 2493 expansion --; | 2478 expansion --; |
| 2494 } | 2479 } |
| 2495 | 2480 |
| 2496 int32_t *patternce = strsrch->pattern.CE; | 2481 int32_t *patternce = strsrch->pattern.ces; |
| 2497 int32_t patterncelength = strsrch->pattern.CELength; | 2482 int32_t patterncelength = strsrch->pattern.cesLength; |
| 2498 int32_t count = patterncelength; | 2483 int32_t count = patterncelength; |
| 2499 while (count > 0) { | 2484 while (count > 0) { |
| 2500 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status)); | 2485 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status)); |
| 2501 // status checked below, note that if status is a failure | 2486 // status checked below, note that if status is a failure |
| 2502 // ucol_previous returns UCOL_NULLORDER | 2487 // ucol_previous returns UCOL_NULLORDER |
| 2503 if (ce == UCOL_IGNORABLE) { | 2488 if (ce == UCOL_IGNORABLE) { |
| 2504 continue; | 2489 continue; |
| 2505 } | 2490 } |
| 2506 if (expandflag && count == 0 && | 2491 if (expandflag && count == 0 && |
| 2507 getColElemIterOffset(coleiter, FALSE) != temp) { | 2492 getColElemIterOffset(coleiter, FALSE) != temp) { |
| (...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2693 } | 2678 } |
| 2694 | 2679 |
| 2695 result->collator = collator; | 2680 result->collator = collator; |
| 2696 result->strength = ucol_getStrength(collator); | 2681 result->strength = ucol_getStrength(collator); |
| 2697 result->ceMask = getMask(result->strength); | 2682 result->ceMask = getMask(result->strength); |
| 2698 result->toShift = | 2683 result->toShift = |
| 2699 ucol_getAttribute(collator, UCOL_ALTERNATE_HANDLING, status) == | 2684 ucol_getAttribute(collator, UCOL_ALTERNATE_HANDLING, status) == |
| 2700 UCOL_SHIFTED; | 2685 UCOL_SHIFTED; |
| 2701 result->variableTop = ucol_getVariableTop(collator, status); | 2686 result->variableTop = ucol_getVariableTop(collator, status); |
| 2702 | 2687 |
| 2703 result->nfd = Normalizer2Factory::getNFDInstance(*status); | 2688 result->nfd = Normalizer2::getNFDInstance(*status); |
| 2704 | 2689 |
| 2705 if (U_FAILURE(*status)) { | 2690 if (U_FAILURE(*status)) { |
| 2706 uprv_free(result); | 2691 uprv_free(result); |
| 2707 return NULL; | 2692 return NULL; |
| 2708 } | 2693 } |
| 2709 | 2694 |
| 2710 result->search = (USearch *)uprv_malloc(sizeof(USearch)); | 2695 result->search = (USearch *)uprv_malloc(sizeof(USearch)); |
| 2711 if (result->search == NULL) { | 2696 if (result->search == NULL) { |
| 2712 *status = U_MEMORY_ALLOCATION_ERROR; | 2697 *status = U_MEMORY_ALLOCATION_ERROR; |
| 2713 uprv_free(result); | 2698 uprv_free(result); |
| 2714 return NULL; | 2699 return NULL; |
| 2715 } | 2700 } |
| 2716 | 2701 |
| 2717 result->search->text = text; | 2702 result->search->text = text; |
| 2718 result->search->textLength = textlength; | 2703 result->search->textLength = textlength; |
| 2719 | 2704 |
| 2720 result->pattern.text = pattern; | 2705 result->pattern.text = pattern; |
| 2721 result->pattern.textLength = patternlength; | 2706 result->pattern.textLength = patternlength; |
| 2722 result->pattern.CE = NULL; | 2707 result->pattern.ces = NULL; |
| 2723 result->pattern.PCE = NULL; | 2708 result->pattern.pces = NULL; |
| 2724 | 2709 |
| 2725 result->search->breakIter = breakiter; | 2710 result->search->breakIter = breakiter; |
| 2726 #if !UCONFIG_NO_BREAK_ITERATION | 2711 #if !UCONFIG_NO_BREAK_ITERATION |
| 2727 result->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLo
caleByType(result->collator, ULOC_VALID_LOCALE, status), text, textlength, statu
s); | 2712 result->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLo
caleByType(result->collator, ULOC_VALID_LOCALE, status), text, textlength, statu
s); |
| 2728 if (breakiter) { | 2713 if (breakiter) { |
| 2729 ubrk_setText(breakiter, text, textlength, status); | 2714 ubrk_setText(breakiter, text, textlength, status); |
| 2730 } | 2715 } |
| 2731 #endif | 2716 #endif |
| 2732 | 2717 |
| 2733 result->ownCollator = FALSE; | 2718 result->ownCollator = FALSE; |
| 2734 result->search->matchedLength = 0; | 2719 result->search->matchedLength = 0; |
| 2735 result->search->matchedIndex = USEARCH_DONE; | 2720 result->search->matchedIndex = USEARCH_DONE; |
| 2736 result->utilIter = NULL; | 2721 result->utilIter = NULL; |
| 2737 result->textIter = ucol_openElements(collator, text, | 2722 result->textIter = ucol_openElements(collator, text, |
| 2738 textlength, status); | 2723 textlength, status); |
| 2724 result->textProcessedIter = NULL; |
| 2739 if (U_FAILURE(*status)) { | 2725 if (U_FAILURE(*status)) { |
| 2740 usearch_close(result); | 2726 usearch_close(result); |
| 2741 return NULL; | 2727 return NULL; |
| 2742 } | 2728 } |
| 2743 | 2729 |
| 2744 result->search->isOverlap = FALSE; | 2730 result->search->isOverlap = FALSE; |
| 2745 result->search->isCanonicalMatch = FALSE; | 2731 result->search->isCanonicalMatch = FALSE; |
| 2746 result->search->elementComparisonType = 0; | 2732 result->search->elementComparisonType = 0; |
| 2747 result->search->isForwardSearching = TRUE; | 2733 result->search->isForwardSearching = TRUE; |
| 2748 result->search->reset = TRUE; | 2734 result->search->reset = TRUE; |
| 2749 | 2735 |
| 2750 initialize(result, status); | 2736 initialize(result, status); |
| 2751 | 2737 |
| 2752 if (U_FAILURE(*status)) { | 2738 if (U_FAILURE(*status)) { |
| 2753 usearch_close(result); | 2739 usearch_close(result); |
| 2754 return NULL; | 2740 return NULL; |
| 2755 } | 2741 } |
| 2756 | 2742 |
| 2757 return result; | 2743 return result; |
| 2758 } | 2744 } |
| 2759 return NULL; | 2745 return NULL; |
| 2760 } | 2746 } |
| 2761 | 2747 |
| 2762 U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch) | 2748 U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch) |
| 2763 { | 2749 { |
| 2764 if (strsrch) { | 2750 if (strsrch) { |
| 2765 if (strsrch->pattern.CE != strsrch->pattern.CEBuffer && | 2751 if (strsrch->pattern.ces != strsrch->pattern.cesBuffer && |
| 2766 strsrch->pattern.CE) { | 2752 strsrch->pattern.ces) { |
| 2767 uprv_free(strsrch->pattern.CE); | 2753 uprv_free(strsrch->pattern.ces); |
| 2768 } | 2754 } |
| 2769 | 2755 |
| 2770 if (strsrch->pattern.PCE != NULL && | 2756 if (strsrch->pattern.pces != NULL && |
| 2771 strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) { | 2757 strsrch->pattern.pces != strsrch->pattern.pcesBuffer) { |
| 2772 uprv_free(strsrch->pattern.PCE); | 2758 uprv_free(strsrch->pattern.pces); |
| 2773 } | 2759 } |
| 2774 | 2760 |
| 2761 delete strsrch->textProcessedIter; |
| 2775 ucol_closeElements(strsrch->textIter); | 2762 ucol_closeElements(strsrch->textIter); |
| 2776 ucol_closeElements(strsrch->utilIter); | 2763 ucol_closeElements(strsrch->utilIter); |
| 2777 | 2764 |
| 2778 if (strsrch->ownCollator && strsrch->collator) { | 2765 if (strsrch->ownCollator && strsrch->collator) { |
| 2779 ucol_close((UCollator *)strsrch->collator); | 2766 ucol_close((UCollator *)strsrch->collator); |
| 2780 } | 2767 } |
| 2781 | 2768 |
| 2782 #if !UCONFIG_NO_BREAK_ITERATION | 2769 #if !UCONFIG_NO_BREAK_ITERATION |
| 2783 if (strsrch->search->internalBreakIter) { | 2770 if (strsrch->search->internalBreakIter) { |
| 2784 ubrk_close(strsrch->search->internalBreakIter); | 2771 ubrk_close(strsrch->search->internalBreakIter); |
| 2785 } | 2772 } |
| 2786 #endif | 2773 #endif |
| 2787 | 2774 |
| 2788 uprv_free(strsrch->search); | 2775 uprv_free(strsrch->search); |
| 2789 uprv_free(strsrch); | 2776 uprv_free(strsrch); |
| 2790 } | 2777 } |
| 2791 } | 2778 } |
| 2792 | 2779 |
| 2780 namespace { |
| 2781 |
| 2782 UBool initTextProcessedIter(UStringSearch *strsrch, UErrorCode *status) { |
| 2783 if (U_FAILURE(*status)) { return FALSE; } |
| 2784 if (strsrch->textProcessedIter == NULL) { |
| 2785 strsrch->textProcessedIter = new icu::UCollationPCE(strsrch->textIter); |
| 2786 if (strsrch->textProcessedIter == NULL) { |
| 2787 *status = U_MEMORY_ALLOCATION_ERROR; |
| 2788 return FALSE; |
| 2789 } |
| 2790 } else { |
| 2791 strsrch->textProcessedIter->init(strsrch->textIter); |
| 2792 } |
| 2793 return TRUE; |
| 2794 } |
| 2795 |
| 2796 } |
| 2797 |
| 2793 // set and get methods -------------------------------------------------- | 2798 // set and get methods -------------------------------------------------- |
| 2794 | 2799 |
| 2795 U_CAPI void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch, | 2800 U_CAPI void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch, |
| 2796 int32_t position, | 2801 int32_t position, |
| 2797 UErrorCode *status) | 2802 UErrorCode *status) |
| 2798 { | 2803 { |
| 2799 if (U_SUCCESS(*status) && strsrch) { | 2804 if (U_SUCCESS(*status) && strsrch) { |
| 2800 if (isOutOfBounds(strsrch->search->textLength, position)) { | 2805 if (isOutOfBounds(strsrch->search->textLength, position)) { |
| 2801 *status = U_INDEX_OUTOFBOUNDS_ERROR; | 2806 *status = U_INDEX_OUTOFBOUNDS_ERROR; |
| 2802 } | 2807 } |
| (...skipping 200 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3003 const UCollator *collator, | 3008 const UCollator *collator, |
| 3004 UErrorCode *status) | 3009 UErrorCode *status) |
| 3005 { | 3010 { |
| 3006 if (U_SUCCESS(*status)) { | 3011 if (U_SUCCESS(*status)) { |
| 3007 if (collator == NULL) { | 3012 if (collator == NULL) { |
| 3008 *status = U_ILLEGAL_ARGUMENT_ERROR; | 3013 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 3009 return; | 3014 return; |
| 3010 } | 3015 } |
| 3011 | 3016 |
| 3012 if (strsrch) { | 3017 if (strsrch) { |
| 3018 delete strsrch->textProcessedIter; |
| 3019 strsrch->textProcessedIter = NULL; |
| 3020 ucol_closeElements(strsrch->textIter); |
| 3021 ucol_closeElements(strsrch->utilIter); |
| 3022 strsrch->textIter = strsrch->utilIter = NULL; |
| 3013 if (strsrch->ownCollator && (strsrch->collator != collator)) { | 3023 if (strsrch->ownCollator && (strsrch->collator != collator)) { |
| 3014 ucol_close((UCollator *)strsrch->collator); | 3024 ucol_close((UCollator *)strsrch->collator); |
| 3015 strsrch->ownCollator = FALSE; | 3025 strsrch->ownCollator = FALSE; |
| 3016 } | 3026 } |
| 3017 strsrch->collator = collator; | 3027 strsrch->collator = collator; |
| 3018 strsrch->strength = ucol_getStrength(collator); | 3028 strsrch->strength = ucol_getStrength(collator); |
| 3019 strsrch->ceMask = getMask(strsrch->strength); | 3029 strsrch->ceMask = getMask(strsrch->strength); |
| 3020 #if !UCONFIG_NO_BREAK_ITERATION | 3030 #if !UCONFIG_NO_BREAK_ITERATION |
| 3021 ubrk_close(strsrch->search->internalBreakIter); | 3031 ubrk_close(strsrch->search->internalBreakIter); |
| 3022 strsrch->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_
getLocaleByType(collator, ULOC_VALID_LOCALE, status), | 3032 strsrch->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_
getLocaleByType(collator, ULOC_VALID_LOCALE, status), |
| 3023 strsrch->search->text, strs
rch->search->textLength, status); | 3033 strsrch->search->text, strs
rch->search->textLength, status); |
| 3024 #endif | 3034 #endif |
| 3025 // if status is a failure, ucol_getAttribute returns UCOL_DEFAULT | 3035 // if status is a failure, ucol_getAttribute returns UCOL_DEFAULT |
| 3026 strsrch->toShift = | 3036 strsrch->toShift = |
| 3027 ucol_getAttribute(collator, UCOL_ALTERNATE_HANDLING, status) == | 3037 ucol_getAttribute(collator, UCOL_ALTERNATE_HANDLING, status) == |
| 3028 UCOL_SHIFTED; | 3038 UCOL_SHIFTED; |
| 3029 // if status is a failure, ucol_getVariableTop returns 0 | 3039 // if status is a failure, ucol_getVariableTop returns 0 |
| 3030 strsrch->variableTop = ucol_getVariableTop(collator, status); | 3040 strsrch->variableTop = ucol_getVariableTop(collator, status); |
| 3031 if (U_SUCCESS(*status)) { | 3041 strsrch->textIter = ucol_openElements(collator, |
| 3032 initialize(strsrch, status); | 3042 strsrch->search->text, |
| 3033 if (U_SUCCESS(*status)) { | 3043 strsrch->search->textLength, |
| 3034 /* free offset buffer to avoid memory leak before initializi
ng. */ | 3044 status); |
| 3035 ucol_freeOffsetBuffer(&(strsrch->textIter->iteratordata_)); | 3045 strsrch->utilIter = ucol_openElements( |
| 3036 uprv_init_collIterate(collator, strsrch->search->text, | 3046 collator, strsrch->pattern.text, strsrch->pattern.textLength
, status); |
| 3037 strsrch->search->textLength, | 3047 // initialize() _after_ setting the iterators for the new collator. |
| 3038 &(strsrch->textIter->iteratordata_), | 3048 initialize(strsrch, status); |
| 3039 status); | |
| 3040 strsrch->utilIter->iteratordata_.coll = collator; | |
| 3041 } | |
| 3042 } | |
| 3043 } | 3049 } |
| 3044 | 3050 |
| 3045 // **** are these calls needed? | 3051 // **** are these calls needed? |
| 3046 // **** we call uprv_init_pce in initializePatternPCETable | 3052 // **** we call uprv_init_pce in initializePatternPCETable |
| 3047 // **** and the CEBuffer constructor... | 3053 // **** and the CEIBuffer constructor... |
| 3048 #if 0 | 3054 #if 0 |
| 3049 uprv_init_pce(strsrch->textIter); | 3055 uprv_init_pce(strsrch->textIter); |
| 3050 uprv_init_pce(strsrch->utilIter); | 3056 uprv_init_pce(strsrch->utilIter); |
| 3051 #endif | 3057 #endif |
| 3052 } | 3058 } |
| 3053 } | 3059 } |
| 3054 | 3060 |
| 3055 U_CAPI UCollator * U_EXPORT2 usearch_getCollator(const UStringSearch *strsrch) | 3061 U_CAPI UCollator * U_EXPORT2 usearch_getCollator(const UStringSearch *strsrch) |
| 3056 { | 3062 { |
| 3057 if (strsrch) { | 3063 if (strsrch) { |
| (...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3215 // match is not found. | 3221 // match is not found. |
| 3216 search->isForwardSearching = TRUE; | 3222 search->isForwardSearching = TRUE; |
| 3217 if (search->matchedIndex != USEARCH_DONE) { | 3223 if (search->matchedIndex != USEARCH_DONE) { |
| 3218 // there's no need to set the collation element iterator | 3224 // there's no need to set the collation element iterator |
| 3219 // the next call to next will set the offset. | 3225 // the next call to next will set the offset. |
| 3220 return search->matchedIndex; | 3226 return search->matchedIndex; |
| 3221 } | 3227 } |
| 3222 } | 3228 } |
| 3223 | 3229 |
| 3224 if (U_SUCCESS(*status)) { | 3230 if (U_SUCCESS(*status)) { |
| 3225 if (strsrch->pattern.CELength == 0) { | 3231 if (strsrch->pattern.cesLength == 0) { |
| 3226 if (search->matchedIndex == USEARCH_DONE) { | 3232 if (search->matchedIndex == USEARCH_DONE) { |
| 3227 search->matchedIndex = offset; | 3233 search->matchedIndex = offset; |
| 3228 } | 3234 } |
| 3229 else { // moves by codepoints | 3235 else { // moves by codepoints |
| 3230 U16_FWD_1(search->text, search->matchedIndex, textlength); | 3236 U16_FWD_1(search->text, search->matchedIndex, textlength); |
| 3231 } | 3237 } |
| 3232 | 3238 |
| 3233 search->matchedLength = 0; | 3239 search->matchedLength = 0; |
| 3234 setColEIterOffset(strsrch->textIter, search->matchedIndex); | 3240 setColEIterOffset(strsrch->textIter, search->matchedIndex); |
| 3235 // status checked below | 3241 // status checked below |
| (...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3326 // Could check pattern length, but the | 3332 // Could check pattern length, but the |
| 3327 // linear search will do the right thing | 3333 // linear search will do the right thing |
| 3328 if (offset == 0 || matchedindex == 0) { | 3334 if (offset == 0 || matchedindex == 0) { |
| 3329 setMatchNotFound(strsrch); | 3335 setMatchNotFound(strsrch); |
| 3330 return USEARCH_DONE; | 3336 return USEARCH_DONE; |
| 3331 } | 3337 } |
| 3332 #endif | 3338 #endif |
| 3333 } | 3339 } |
| 3334 | 3340 |
| 3335 if (U_SUCCESS(*status)) { | 3341 if (U_SUCCESS(*status)) { |
| 3336 if (strsrch->pattern.CELength == 0) { | 3342 if (strsrch->pattern.cesLength == 0) { |
| 3337 search->matchedIndex = | 3343 search->matchedIndex = |
| 3338 (matchedindex == USEARCH_DONE ? offset : matchedindex); | 3344 (matchedindex == USEARCH_DONE ? offset : matchedindex); |
| 3339 if (search->matchedIndex == 0) { | 3345 if (search->matchedIndex == 0) { |
| 3340 setMatchNotFound(strsrch); | 3346 setMatchNotFound(strsrch); |
| 3341 // status checked below | 3347 // status checked below |
| 3342 } | 3348 } |
| 3343 else { // move by codepoints | 3349 else { // move by codepoints |
| 3344 U16_BACK_1(search->text, 0, search->matchedIndex); | 3350 U16_BACK_1(search->text, 0, search->matchedIndex); |
| 3345 setColEIterOffset(strsrch->textIter, search->matchedIndex); | 3351 setColEIterOffset(strsrch->textIter, search->matchedIndex); |
| 3346 // status checked below | 3352 // status checked below |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3409 | 3415 |
| 3410 // if status is a failure, ucol_getVariableTop returns 0 | 3416 // if status is a failure, ucol_getVariableTop returns 0 |
| 3411 varTop = ucol_getVariableTop(strsrch->collator, &status); | 3417 varTop = ucol_getVariableTop(strsrch->collator, &status); |
| 3412 if (strsrch->variableTop != varTop) { | 3418 if (strsrch->variableTop != varTop) { |
| 3413 strsrch->variableTop = varTop; | 3419 strsrch->variableTop = varTop; |
| 3414 sameCollAttribute = FALSE; | 3420 sameCollAttribute = FALSE; |
| 3415 } | 3421 } |
| 3416 if (!sameCollAttribute) { | 3422 if (!sameCollAttribute) { |
| 3417 initialize(strsrch, &status); | 3423 initialize(strsrch, &status); |
| 3418 } | 3424 } |
| 3419 /* free offset buffer to avoid memory leak before initializing. */ | 3425 ucol_setText(strsrch->textIter, strsrch->search->text, |
| 3420 ucol_freeOffsetBuffer(&(strsrch->textIter->iteratordata_)); | |
| 3421 uprv_init_collIterate(strsrch->collator, strsrch->search->text, | |
| 3422 strsrch->search->textLength, | 3426 strsrch->search->textLength, |
| 3423 &(strsrch->textIter->iteratordata_), | |
| 3424 &status); | 3427 &status); |
| 3425 strsrch->search->matchedLength = 0; | 3428 strsrch->search->matchedLength = 0; |
| 3426 strsrch->search->matchedIndex = USEARCH_DONE; | 3429 strsrch->search->matchedIndex = USEARCH_DONE; |
| 3427 strsrch->search->isOverlap = FALSE; | 3430 strsrch->search->isOverlap = FALSE; |
| 3428 strsrch->search->isCanonicalMatch = FALSE; | 3431 strsrch->search->isCanonicalMatch = FALSE; |
| 3429 strsrch->search->elementComparisonType = 0; | 3432 strsrch->search->elementComparisonType = 0; |
| 3430 strsrch->search->isForwardSearching = TRUE; | 3433 strsrch->search->isForwardSearching = TRUE; |
| 3431 strsrch->search->reset = TRUE; | 3434 strsrch->search->reset = TRUE; |
| 3432 } | 3435 } |
| 3433 } | 3436 } |
| 3434 | 3437 |
| 3435 // | 3438 // |
| 3436 // CEI Collation Element + source text index. | 3439 // CEI Collation Element + source text index. |
| 3437 // These structs are kept in the circular buffer. | 3440 // These structs are kept in the circular buffer. |
| 3438 // | 3441 // |
| 3439 struct CEI { | 3442 struct CEI { |
| 3440 int64_t ce; | 3443 int64_t ce; |
| 3441 int32_t lowIndex; | 3444 int32_t lowIndex; |
| 3442 int32_t highIndex; | 3445 int32_t highIndex; |
| 3443 }; | 3446 }; |
| 3444 | 3447 |
| 3445 U_NAMESPACE_BEGIN | 3448 U_NAMESPACE_BEGIN |
| 3446 | 3449 |
| 3447 | 3450 namespace { |
| 3448 // | 3451 // |
| 3449 // CEBuffer A circular buffer of CEs from the text being searched. | 3452 // CEIBuffer A circular buffer of CEs-with-index from the text being searched
. |
| 3450 // | 3453 // |
| 3451 #define DEFAULT_CEBUFFER_SIZE 96 | 3454 #define DEFAULT_CEBUFFER_SIZE 96 |
| 3452 #define CEBUFFER_EXTRA 32 | 3455 #define CEBUFFER_EXTRA 32 |
| 3453 // Some typical max values to make buffer size more reasonable for asymmetric se
arch. | 3456 // Some typical max values to make buffer size more reasonable for asymmetric se
arch. |
| 3454 // #8694 is for a better long-term solution to allocation of this buffer. | 3457 // #8694 is for a better long-term solution to allocation of this buffer. |
| 3455 #define MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L 8 | 3458 #define MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L 8 |
| 3456 #define MAX_TARGET_IGNORABLES_PER_PAT_OTHER 3 | 3459 #define MAX_TARGET_IGNORABLES_PER_PAT_OTHER 3 |
| 3457 #define MIGHT_BE_JAMO_L(c) ((c >= 0x1100 && c <= 0x115E) || (c >= 0x3131 && c
<= 0x314E) || (c >= 0x3165 && c <= 0x3186)) | 3460 #define MIGHT_BE_JAMO_L(c) ((c >= 0x1100 && c <= 0x115E) || (c >= 0x3131 && c
<= 0x314E) || (c >= 0x3165 && c <= 0x3186)) |
| 3458 struct CEBuffer { | 3461 struct CEIBuffer { |
| 3459 CEI defBuf[DEFAULT_CEBUFFER_SIZE]; | 3462 CEI defBuf[DEFAULT_CEBUFFER_SIZE]; |
| 3460 CEI *buf; | 3463 CEI *buf; |
| 3461 int32_t bufSize; | 3464 int32_t bufSize; |
| 3462 int32_t firstIx; | 3465 int32_t firstIx; |
| 3463 int32_t limitIx; | 3466 int32_t limitIx; |
| 3464 UCollationElements *ceIter; | 3467 UCollationElements *ceIter; |
| 3465 UStringSearch *strSearch; | 3468 UStringSearch *strSearch; |
| 3466 | 3469 |
| 3467 | 3470 |
| 3468 | 3471 |
| 3469 CEBuffer(UStringSearch *ss, UErrorCode *status); | 3472 CEIBuffer(UStringSearch *ss, UErrorCode *status); |
| 3470 ~CEBuffer(); | 3473 ~CEIBuffer(); |
| 3471 const CEI *get(int32_t index); | 3474 const CEI *get(int32_t index); |
| 3472 const CEI *getPrevious(int32_t index); | 3475 const CEI *getPrevious(int32_t index); |
| 3473 }; | 3476 }; |
| 3474 | 3477 |
| 3475 | 3478 |
| 3476 CEBuffer::CEBuffer(UStringSearch *ss, UErrorCode *status) { | 3479 CEIBuffer::CEIBuffer(UStringSearch *ss, UErrorCode *status) { |
| 3477 buf = defBuf; | 3480 buf = defBuf; |
| 3478 strSearch = ss; | 3481 strSearch = ss; |
| 3479 bufSize = ss->pattern.PCELength + CEBUFFER_EXTRA; | 3482 bufSize = ss->pattern.pcesLength + CEBUFFER_EXTRA; |
| 3480 if (ss->search->elementComparisonType != 0) { | 3483 if (ss->search->elementComparisonType != 0) { |
| 3481 const UChar * patText = ss->pattern.text; | 3484 const UChar * patText = ss->pattern.text; |
| 3482 if (patText) { | 3485 if (patText) { |
| 3483 const UChar * patTextLimit = patText + ss->pattern.textLength; | 3486 const UChar * patTextLimit = patText + ss->pattern.textLength; |
| 3484 while ( patText < patTextLimit ) { | 3487 while ( patText < patTextLimit ) { |
| 3485 UChar c = *patText++; | 3488 UChar c = *patText++; |
| 3486 if (MIGHT_BE_JAMO_L(c)) { | 3489 if (MIGHT_BE_JAMO_L(c)) { |
| 3487 bufSize += MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L; | 3490 bufSize += MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L; |
| 3488 } else { | 3491 } else { |
| 3489 // No check for surrogates, we might allocate slightly more
buffer than necessary. | 3492 // No check for surrogates, we might allocate slightly more
buffer than necessary. |
| 3490 bufSize += MAX_TARGET_IGNORABLES_PER_PAT_OTHER; | 3493 bufSize += MAX_TARGET_IGNORABLES_PER_PAT_OTHER; |
| 3491 } | 3494 } |
| 3492 } | 3495 } |
| 3493 } | 3496 } |
| 3494 } | 3497 } |
| 3495 ceIter = ss->textIter; | 3498 ceIter = ss->textIter; |
| 3496 firstIx = 0; | 3499 firstIx = 0; |
| 3497 limitIx = 0; | 3500 limitIx = 0; |
| 3498 | 3501 |
| 3499 uprv_init_pce(ceIter); | 3502 if (!initTextProcessedIter(ss, status)) { return; } |
| 3500 | 3503 |
| 3501 if (bufSize>DEFAULT_CEBUFFER_SIZE) { | 3504 if (bufSize>DEFAULT_CEBUFFER_SIZE) { |
| 3502 buf = (CEI *)uprv_malloc(bufSize * sizeof(CEI)); | 3505 buf = (CEI *)uprv_malloc(bufSize * sizeof(CEI)); |
| 3503 if (buf == NULL) { | 3506 if (buf == NULL) { |
| 3504 *status = U_MEMORY_ALLOCATION_ERROR; | 3507 *status = U_MEMORY_ALLOCATION_ERROR; |
| 3505 } | 3508 } |
| 3506 } | 3509 } |
| 3507 } | 3510 } |
| 3508 | 3511 |
| 3509 // TODO: add a reset or init function so that allocated | 3512 // TODO: add a reset or init function so that allocated |
| 3510 // buffers can be retained & reused. | 3513 // buffers can be retained & reused. |
| 3511 | 3514 |
| 3512 CEBuffer::~CEBuffer() { | 3515 CEIBuffer::~CEIBuffer() { |
| 3513 if (buf != defBuf) { | 3516 if (buf != defBuf) { |
| 3514 uprv_free(buf); | 3517 uprv_free(buf); |
| 3515 } | 3518 } |
| 3516 } | 3519 } |
| 3517 | 3520 |
| 3518 | 3521 |
| 3519 // Get the CE with the specified index. | 3522 // Get the CE with the specified index. |
| 3520 // Index must be in the range | 3523 // Index must be in the range |
| 3521 // n-history_size < index < n+1 | 3524 // n-history_size < index < n+1 |
| 3522 // where n is the largest index to have been fetched by some previous call to
this function. | 3525 // where n is the largest index to have been fetched by some previous call to
this function. |
| 3523 // The CE value will be UCOL__PROCESSED_NULLORDER at end of input. | 3526 // The CE value will be UCOL__PROCESSED_NULLORDER at end of input. |
| 3524 // | 3527 // |
| 3525 const CEI *CEBuffer::get(int32_t index) { | 3528 const CEI *CEIBuffer::get(int32_t index) { |
| 3526 int i = index % bufSize; | 3529 int i = index % bufSize; |
| 3527 | 3530 |
| 3528 if (index>=firstIx && index<limitIx) { | 3531 if (index>=firstIx && index<limitIx) { |
| 3529 // The request was for an entry already in our buffer. | 3532 // The request was for an entry already in our buffer. |
| 3530 // Just return it. | 3533 // Just return it. |
| 3531 return &buf[i]; | 3534 return &buf[i]; |
| 3532 } | 3535 } |
| 3533 | 3536 |
| 3534 // Caller is requesting a new, never accessed before, CE. | 3537 // Caller is requesting a new, never accessed before, CE. |
| 3535 // Verify that it is the next one in sequence, which is all | 3538 // Verify that it is the next one in sequence, which is all |
| 3536 // that is allowed. | 3539 // that is allowed. |
| 3537 if (index != limitIx) { | 3540 if (index != limitIx) { |
| 3538 U_ASSERT(FALSE); | 3541 U_ASSERT(FALSE); |
| 3539 | 3542 |
| 3540 return NULL; | 3543 return NULL; |
| 3541 } | 3544 } |
| 3542 | 3545 |
| 3543 // Manage the circular CE buffer indexing | 3546 // Manage the circular CE buffer indexing |
| 3544 limitIx++; | 3547 limitIx++; |
| 3545 | 3548 |
| 3546 if (limitIx - firstIx >= bufSize) { | 3549 if (limitIx - firstIx >= bufSize) { |
| 3547 // The buffer is full, knock out the lowest-indexed entry. | 3550 // The buffer is full, knock out the lowest-indexed entry. |
| 3548 firstIx++; | 3551 firstIx++; |
| 3549 } | 3552 } |
| 3550 | 3553 |
| 3551 UErrorCode status = U_ZERO_ERROR; | 3554 UErrorCode status = U_ZERO_ERROR; |
| 3552 | 3555 |
| 3553 buf[i].ce = ucol_nextProcessed(ceIter, &buf[i].lowIndex, &buf[i].highIndex,
&status); | 3556 buf[i].ce = strSearch->textProcessedIter->nextProcessed(&buf[i].lowIndex, &b
uf[i].highIndex, &status); |
| 3554 | 3557 |
| 3555 return &buf[i]; | 3558 return &buf[i]; |
| 3556 } | 3559 } |
| 3557 | 3560 |
| 3558 // Get the CE with the specified index. | 3561 // Get the CE with the specified index. |
| 3559 // Index must be in the range | 3562 // Index must be in the range |
| 3560 // n-history_size < index < n+1 | 3563 // n-history_size < index < n+1 |
| 3561 // where n is the largest index to have been fetched by some previous call to
this function. | 3564 // where n is the largest index to have been fetched by some previous call to
this function. |
| 3562 // The CE value will be UCOL__PROCESSED_NULLORDER at end of input. | 3565 // The CE value will be UCOL__PROCESSED_NULLORDER at end of input. |
| 3563 // | 3566 // |
| 3564 const CEI *CEBuffer::getPrevious(int32_t index) { | 3567 const CEI *CEIBuffer::getPrevious(int32_t index) { |
| 3565 int i = index % bufSize; | 3568 int i = index % bufSize; |
| 3566 | 3569 |
| 3567 if (index>=firstIx && index<limitIx) { | 3570 if (index>=firstIx && index<limitIx) { |
| 3568 // The request was for an entry already in our buffer. | 3571 // The request was for an entry already in our buffer. |
| 3569 // Just return it. | 3572 // Just return it. |
| 3570 return &buf[i]; | 3573 return &buf[i]; |
| 3571 } | 3574 } |
| 3572 | 3575 |
| 3573 // Caller is requesting a new, never accessed before, CE. | 3576 // Caller is requesting a new, never accessed before, CE. |
| 3574 // Verify that it is the next one in sequence, which is all | 3577 // Verify that it is the next one in sequence, which is all |
| 3575 // that is allowed. | 3578 // that is allowed. |
| 3576 if (index != limitIx) { | 3579 if (index != limitIx) { |
| 3577 U_ASSERT(FALSE); | 3580 U_ASSERT(FALSE); |
| 3578 | 3581 |
| 3579 return NULL; | 3582 return NULL; |
| 3580 } | 3583 } |
| 3581 | 3584 |
| 3582 // Manage the circular CE buffer indexing | 3585 // Manage the circular CE buffer indexing |
| 3583 limitIx++; | 3586 limitIx++; |
| 3584 | 3587 |
| 3585 if (limitIx - firstIx >= bufSize) { | 3588 if (limitIx - firstIx >= bufSize) { |
| 3586 // The buffer is full, knock out the lowest-indexed entry. | 3589 // The buffer is full, knock out the lowest-indexed entry. |
| 3587 firstIx++; | 3590 firstIx++; |
| 3588 } | 3591 } |
| 3589 | 3592 |
| 3590 UErrorCode status = U_ZERO_ERROR; | 3593 UErrorCode status = U_ZERO_ERROR; |
| 3591 | 3594 |
| 3592 buf[i].ce = ucol_previousProcessed(ceIter, &buf[i].lowIndex, &buf[i].highInd
ex, &status); | 3595 buf[i].ce = strSearch->textProcessedIter->previousProcessed(&buf[i].lowIndex
, &buf[i].highIndex, &status); |
| 3593 | 3596 |
| 3594 return &buf[i]; | 3597 return &buf[i]; |
| 3595 } | 3598 } |
| 3596 | 3599 |
| 3600 } |
| 3601 |
| 3597 U_NAMESPACE_END | 3602 U_NAMESPACE_END |
| 3598 | 3603 |
| 3599 | 3604 |
| 3600 // #define USEARCH_DEBUG | 3605 // #define USEARCH_DEBUG |
| 3601 | 3606 |
| 3602 #ifdef USEARCH_DEBUG | 3607 #ifdef USEARCH_DEBUG |
| 3603 #include <stdio.h> | 3608 #include <stdio.h> |
| 3604 #include <stdlib.h> | 3609 #include <stdlib.h> |
| 3605 #endif | 3610 #endif |
| 3606 | 3611 |
| (...skipping 201 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3808 { | 3813 { |
| 3809 if (U_FAILURE(*status)) { | 3814 if (U_FAILURE(*status)) { |
| 3810 return FALSE; | 3815 return FALSE; |
| 3811 } | 3816 } |
| 3812 | 3817 |
| 3813 // TODO: reject search patterns beginning with a combining char. | 3818 // TODO: reject search patterns beginning with a combining char. |
| 3814 | 3819 |
| 3815 #ifdef USEARCH_DEBUG | 3820 #ifdef USEARCH_DEBUG |
| 3816 if (getenv("USEARCH_DEBUG") != NULL) { | 3821 if (getenv("USEARCH_DEBUG") != NULL) { |
| 3817 printf("Pattern CEs\n"); | 3822 printf("Pattern CEs\n"); |
| 3818 for (int ii=0; ii<strsrch->pattern.CELength; ii++) { | 3823 for (int ii=0; ii<strsrch->pattern.cesLength; ii++) { |
| 3819 printf(" %8x", strsrch->pattern.CE[ii]); | 3824 printf(" %8x", strsrch->pattern.ces[ii]); |
| 3820 } | 3825 } |
| 3821 printf("\n"); | 3826 printf("\n"); |
| 3822 } | 3827 } |
| 3823 | 3828 |
| 3824 #endif | 3829 #endif |
| 3825 // Input parameter sanity check. | 3830 // Input parameter sanity check. |
| 3826 // TODO: should input indicies clip to the text length | 3831 // TODO: should input indicies clip to the text length |
| 3827 // in the same way that UText does. | 3832 // in the same way that UText does. |
| 3828 if(strsrch->pattern.CELength == 0 || | 3833 if(strsrch->pattern.cesLength == 0 || |
| 3829 startIdx < 0 || | 3834 startIdx < 0 || |
| 3830 startIdx > strsrch->search->textLength || | 3835 startIdx > strsrch->search->textLength || |
| 3831 strsrch->pattern.CE == NULL) { | 3836 strsrch->pattern.ces == NULL) { |
| 3832 *status = U_ILLEGAL_ARGUMENT_ERROR; | 3837 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 3833 return FALSE; | 3838 return FALSE; |
| 3834 } | 3839 } |
| 3835 | 3840 |
| 3836 if (strsrch->pattern.PCE == NULL) { | 3841 if (strsrch->pattern.pces == NULL) { |
| 3837 initializePatternPCETable(strsrch, status); | 3842 initializePatternPCETable(strsrch, status); |
| 3838 } | 3843 } |
| 3839 | 3844 |
| 3840 ucol_setOffset(strsrch->textIter, startIdx, status); | 3845 ucol_setOffset(strsrch->textIter, startIdx, status); |
| 3841 CEBuffer ceb(strsrch, status); | 3846 CEIBuffer ceb(strsrch, status); |
| 3842 | 3847 |
| 3843 | 3848 |
| 3844 int32_t targetIx = 0; | 3849 int32_t targetIx = 0; |
| 3845 const CEI *targetCEI = NULL; | 3850 const CEI *targetCEI = NULL; |
| 3846 int32_t patIx; | 3851 int32_t patIx; |
| 3847 UBool found; | 3852 UBool found; |
| 3848 | 3853 |
| 3849 int32_t mStart = -1; | 3854 int32_t mStart = -1; |
| 3850 int32_t mLimit = -1; | 3855 int32_t mLimit = -1; |
| 3851 int32_t minLimit; | 3856 int32_t minLimit; |
| (...skipping 25 matching lines...) Expand all Loading... |
| 3877 // For targetIx > 0, this ceb.get gets a CE that is as far back in the r
ing buffer | 3882 // For targetIx > 0, this ceb.get gets a CE that is as far back in the r
ing buffer |
| 3878 // (compared to the last CE fetched for the previous targetIx value) as
we need to go | 3883 // (compared to the last CE fetched for the previous targetIx value) as
we need to go |
| 3879 // for this targetIx value, so if it is non-NULL then other ceb.get call
s should be OK. | 3884 // for this targetIx value, so if it is non-NULL then other ceb.get call
s should be OK. |
| 3880 const CEI *firstCEI = ceb.get(targetIx); | 3885 const CEI *firstCEI = ceb.get(targetIx); |
| 3881 if (firstCEI == NULL) { | 3886 if (firstCEI == NULL) { |
| 3882 *status = U_INTERNAL_PROGRAM_ERROR; | 3887 *status = U_INTERNAL_PROGRAM_ERROR; |
| 3883 found = FALSE; | 3888 found = FALSE; |
| 3884 break; | 3889 break; |
| 3885 } | 3890 } |
| 3886 | 3891 |
| 3887 for (patIx=0; patIx<strsrch->pattern.PCELength; patIx++) { | 3892 for (patIx=0; patIx<strsrch->pattern.pcesLength; patIx++) { |
| 3888 patCE = strsrch->pattern.PCE[patIx]; | 3893 patCE = strsrch->pattern.pces[patIx]; |
| 3889 targetCEI = ceb.get(targetIx+patIx+targetIxOffset); | 3894 targetCEI = ceb.get(targetIx+patIx+targetIxOffset); |
| 3890 // Compare CE from target string with CE from the pattern. | 3895 // Compare CE from target string with CE from the pattern. |
| 3891 // Note that the target CE will be UCOL_PROCESSED_NULLORDER if we
reach the end of input, | 3896 // Note that the target CE will be UCOL_PROCESSED_NULLORDER if we
reach the end of input, |
| 3892 // which will fail the compare, below. | 3897 // which will fail the compare, below. |
| 3893 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsr
ch->search->elementComparisonType); | 3898 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsr
ch->search->elementComparisonType); |
| 3894 if ( ceMatch == U_CE_NO_MATCH ) { | 3899 if ( ceMatch == U_CE_NO_MATCH ) { |
| 3895 found = FALSE; | 3900 found = FALSE; |
| 3896 break; | 3901 break; |
| 3897 } else if ( ceMatch > U_CE_NO_MATCH ) { | 3902 } else if ( ceMatch > U_CE_NO_MATCH ) { |
| 3898 if ( ceMatch == U_CE_SKIP_TARG ) { | 3903 if ( ceMatch == U_CE_SKIP_TARG ) { |
| 3899 // redo with same patCE, next targCE | 3904 // redo with same patCE, next targCE |
| 3900 patIx--; | 3905 patIx--; |
| 3901 targetIxOffset++; | 3906 targetIxOffset++; |
| 3902 } else { // ceMatch == U_CE_SKIP_PATN | 3907 } else { // ceMatch == U_CE_SKIP_PATN |
| 3903 // redo with same targCE, next patCE | 3908 // redo with same targCE, next patCE |
| 3904 targetIxOffset--; | 3909 targetIxOffset--; |
| 3905 } | 3910 } |
| 3906 } | 3911 } |
| 3907 } | 3912 } |
| 3908 targetIxOffset += strsrch->pattern.PCELength; // this is now the offset
in target CE space to end of the match so far | 3913 targetIxOffset += strsrch->pattern.pcesLength; // this is now the offset
in target CE space to end of the match so far |
| 3909 | 3914 |
| 3910 if (!found && ((targetCEI == NULL) || (targetCEI->ce != UCOL_PROCESSED_N
ULLORDER))) { | 3915 if (!found && ((targetCEI == NULL) || (targetCEI->ce != UCOL_PROCESSED_N
ULLORDER))) { |
| 3911 // No match at this targetIx. Try again at the next. | 3916 // No match at this targetIx. Try again at the next. |
| 3912 continue; | 3917 continue; |
| 3913 } | 3918 } |
| 3914 | 3919 |
| 3915 if (!found) { | 3920 if (!found) { |
| 3916 // No match at all, we have run off the end of the target text. | 3921 // No match at all, we have run off the end of the target text. |
| 3917 break; | 3922 break; |
| 3918 } | 3923 } |
| (...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4075 { | 4080 { |
| 4076 if (U_FAILURE(*status)) { | 4081 if (U_FAILURE(*status)) { |
| 4077 return FALSE; | 4082 return FALSE; |
| 4078 } | 4083 } |
| 4079 | 4084 |
| 4080 // TODO: reject search patterns beginning with a combining char. | 4085 // TODO: reject search patterns beginning with a combining char. |
| 4081 | 4086 |
| 4082 #ifdef USEARCH_DEBUG | 4087 #ifdef USEARCH_DEBUG |
| 4083 if (getenv("USEARCH_DEBUG") != NULL) { | 4088 if (getenv("USEARCH_DEBUG") != NULL) { |
| 4084 printf("Pattern CEs\n"); | 4089 printf("Pattern CEs\n"); |
| 4085 for (int ii=0; ii<strsrch->pattern.CELength; ii++) { | 4090 for (int ii=0; ii<strsrch->pattern.cesLength; ii++) { |
| 4086 printf(" %8x", strsrch->pattern.CE[ii]); | 4091 printf(" %8x", strsrch->pattern.ces[ii]); |
| 4087 } | 4092 } |
| 4088 printf("\n"); | 4093 printf("\n"); |
| 4089 } | 4094 } |
| 4090 | 4095 |
| 4091 #endif | 4096 #endif |
| 4092 // Input parameter sanity check. | 4097 // Input parameter sanity check. |
| 4093 // TODO: should input indicies clip to the text length | 4098 // TODO: should input indicies clip to the text length |
| 4094 // in the same way that UText does. | 4099 // in the same way that UText does. |
| 4095 if(strsrch->pattern.CELength == 0 || | 4100 if(strsrch->pattern.cesLength == 0 || |
| 4096 startIdx < 0 || | 4101 startIdx < 0 || |
| 4097 startIdx > strsrch->search->textLength || | 4102 startIdx > strsrch->search->textLength || |
| 4098 strsrch->pattern.CE == NULL) { | 4103 strsrch->pattern.ces == NULL) { |
| 4099 *status = U_ILLEGAL_ARGUMENT_ERROR; | 4104 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 4100 return FALSE; | 4105 return FALSE; |
| 4101 } | 4106 } |
| 4102 | 4107 |
| 4103 if (strsrch->pattern.PCE == NULL) { | 4108 if (strsrch->pattern.pces == NULL) { |
| 4104 initializePatternPCETable(strsrch, status); | 4109 initializePatternPCETable(strsrch, status); |
| 4105 } | 4110 } |
| 4106 | 4111 |
| 4107 CEBuffer ceb(strsrch, status); | 4112 CEIBuffer ceb(strsrch, status); |
| 4108 int32_t targetIx = 0; | 4113 int32_t targetIx = 0; |
| 4109 | 4114 |
| 4110 /* | 4115 /* |
| 4111 * Pre-load the buffer with the CE's for the grapheme | 4116 * Pre-load the buffer with the CE's for the grapheme |
| 4112 * after our starting position so that we're sure that | 4117 * after our starting position so that we're sure that |
| 4113 * we can look at the CE following the match when we | 4118 * we can look at the CE following the match when we |
| 4114 * check the match boundaries. | 4119 * check the match boundaries. |
| 4115 * | 4120 * |
| 4116 * This will also pre-fetch the first CE that we'll | 4121 * This will also pre-fetch the first CE that we'll |
| 4117 * consider for the match. | 4122 * consider for the match. |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4158 // for this targetIx value, so if it is non-NULL then other ceb.getPrevi
ous calls should be OK. | 4163 // for this targetIx value, so if it is non-NULL then other ceb.getPrevi
ous calls should be OK. |
| 4159 const CEI *lastCEI = ceb.getPrevious(targetIx); | 4164 const CEI *lastCEI = ceb.getPrevious(targetIx); |
| 4160 if (lastCEI == NULL) { | 4165 if (lastCEI == NULL) { |
| 4161 *status = U_INTERNAL_PROGRAM_ERROR; | 4166 *status = U_INTERNAL_PROGRAM_ERROR; |
| 4162 found = FALSE; | 4167 found = FALSE; |
| 4163 break; | 4168 break; |
| 4164 } | 4169 } |
| 4165 // Inner loop checks for a match beginning at each | 4170 // Inner loop checks for a match beginning at each |
| 4166 // position from the outer loop. | 4171 // position from the outer loop. |
| 4167 int32_t targetIxOffset = 0; | 4172 int32_t targetIxOffset = 0; |
| 4168 for (patIx = strsrch->pattern.PCELength - 1; patIx >= 0; patIx -= 1) { | 4173 for (patIx = strsrch->pattern.pcesLength - 1; patIx >= 0; patIx -= 1) { |
| 4169 int64_t patCE = strsrch->pattern.PCE[patIx]; | 4174 int64_t patCE = strsrch->pattern.pces[patIx]; |
| 4170 | 4175 |
| 4171 targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength -
1 - patIx + targetIxOffset); | 4176 targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLength -
1 - patIx + targetIxOffset); |
| 4172 // Compare CE from target string with CE from the pattern. | 4177 // Compare CE from target string with CE from the pattern. |
| 4173 // Note that the target CE will be UCOL_NULLORDER if we reach the
end of input, | 4178 // Note that the target CE will be UCOL_NULLORDER if we reach the
end of input, |
| 4174 // which will fail the compare, below. | 4179 // which will fail the compare, below. |
| 4175 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsr
ch->search->elementComparisonType); | 4180 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsr
ch->search->elementComparisonType); |
| 4176 if ( ceMatch == U_CE_NO_MATCH ) { | 4181 if ( ceMatch == U_CE_NO_MATCH ) { |
| 4177 found = FALSE; | 4182 found = FALSE; |
| 4178 break; | 4183 break; |
| 4179 } else if ( ceMatch > U_CE_NO_MATCH ) { | 4184 } else if ( ceMatch > U_CE_NO_MATCH ) { |
| 4180 if ( ceMatch == U_CE_SKIP_TARG ) { | 4185 if ( ceMatch == U_CE_SKIP_TARG ) { |
| 4181 // redo with same patCE, next targCE | 4186 // redo with same patCE, next targCE |
| (...skipping 15 matching lines...) Expand all Loading... |
| 4197 // No match at all, we have run off the end of the target text. | 4202 // No match at all, we have run off the end of the target text. |
| 4198 break; | 4203 break; |
| 4199 } | 4204 } |
| 4200 | 4205 |
| 4201 | 4206 |
| 4202 // We have found a match in CE space. | 4207 // We have found a match in CE space. |
| 4203 // Now determine the bounds in string index space. | 4208 // Now determine the bounds in string index space. |
| 4204 // There still is a chance of match failure if the CE range not corresp
ond to | 4209 // There still is a chance of match failure if the CE range not corresp
ond to |
| 4205 // an acceptable character range. | 4210 // an acceptable character range. |
| 4206 // | 4211 // |
| 4207 const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELen
gth - 1 + targetIxOffset); | 4212 const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLe
ngth - 1 + targetIxOffset); |
| 4208 mStart = firstCEI->lowIndex; | 4213 mStart = firstCEI->lowIndex; |
| 4209 | 4214 |
| 4210 // Check for the start of the match being within a combining sequence. | 4215 // Check for the start of the match being within a combining sequence. |
| 4211 // This can happen if the pattern itself begins with a combining char,
and | 4216 // This can happen if the pattern itself begins with a combining char,
and |
| 4212 // the match found combining marks in the target text that were attach
ed | 4217 // the match found combining marks in the target text that were attach
ed |
| 4213 // to something else. | 4218 // to something else. |
| 4214 // This type of match should be rejected for not completely consuming
a | 4219 // This type of match should be rejected for not completely consuming
a |
| 4215 // combining sequence. | 4220 // combining sequence. |
| 4216 if (!isBreakBoundary(strsrch, mStart)) { | 4221 if (!isBreakBoundary(strsrch, mStart)) { |
| 4217 found = FALSE; | 4222 found = FALSE; |
| (...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4323 UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status) | 4328 UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status) |
| 4324 { | 4329 { |
| 4325 if (U_FAILURE(*status)) { | 4330 if (U_FAILURE(*status)) { |
| 4326 setMatchNotFound(strsrch); | 4331 setMatchNotFound(strsrch); |
| 4327 return FALSE; | 4332 return FALSE; |
| 4328 } | 4333 } |
| 4329 | 4334 |
| 4330 #if BOYER_MOORE | 4335 #if BOYER_MOORE |
| 4331 UCollationElements *coleiter = strsrch->textIter; | 4336 UCollationElements *coleiter = strsrch->textIter; |
| 4332 int32_t textlength = strsrch->search->textLength; | 4337 int32_t textlength = strsrch->search->textLength; |
| 4333 int32_t *patternce = strsrch->pattern.CE; | 4338 int32_t *patternce = strsrch->pattern.ces; |
| 4334 int32_t patterncelength = strsrch->pattern.CELength; | 4339 int32_t patterncelength = strsrch->pattern.cesLength; |
| 4335 int32_t textoffset = ucol_getOffset(coleiter); | 4340 int32_t textoffset = ucol_getOffset(coleiter); |
| 4336 | 4341 |
| 4337 // status used in setting coleiter offset, since offset is checked in | 4342 // status used in setting coleiter offset, since offset is checked in |
| 4338 // shiftForward before setting the coleiter offset, status never | 4343 // shiftForward before setting the coleiter offset, status never |
| 4339 // a failure | 4344 // a failure |
| 4340 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER, | 4345 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER, |
| 4341 patterncelength); | 4346 patterncelength); |
| 4342 while (textoffset <= textlength) | 4347 while (textoffset <= textlength) |
| 4343 { | 4348 { |
| 4344 uint32_t patternceindex = patterncelength - 1; | 4349 uint32_t patternceindex = patterncelength - 1; |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4437 UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status) | 4442 UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status) |
| 4438 { | 4443 { |
| 4439 if (U_FAILURE(*status)) { | 4444 if (U_FAILURE(*status)) { |
| 4440 setMatchNotFound(strsrch); | 4445 setMatchNotFound(strsrch); |
| 4441 return FALSE; | 4446 return FALSE; |
| 4442 } | 4447 } |
| 4443 | 4448 |
| 4444 #if BOYER_MOORE | 4449 #if BOYER_MOORE |
| 4445 UCollationElements *coleiter = strsrch->textIter; | 4450 UCollationElements *coleiter = strsrch->textIter; |
| 4446 int32_t textlength = strsrch->search->textLength; | 4451 int32_t textlength = strsrch->search->textLength; |
| 4447 int32_t *patternce = strsrch->pattern.CE; | 4452 int32_t *patternce = strsrch->pattern.ces; |
| 4448 int32_t patterncelength = strsrch->pattern.CELength; | 4453 int32_t patterncelength = strsrch->pattern.cesLength; |
| 4449 int32_t textoffset = ucol_getOffset(coleiter); | 4454 int32_t textoffset = ucol_getOffset(coleiter); |
| 4450 UBool hasPatternAccents = | 4455 UBool hasPatternAccents = |
| 4451 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents; | 4456 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents; |
| 4452 | 4457 |
| 4453 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER, | 4458 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER, |
| 4454 patterncelength); | 4459 patterncelength); |
| 4455 strsrch->canonicalPrefixAccents[0] = 0; | 4460 strsrch->canonicalPrefixAccents[0] = 0; |
| 4456 strsrch->canonicalSuffixAccents[0] = 0; | 4461 strsrch->canonicalSuffixAccents[0] = 0; |
| 4457 | 4462 |
| 4458 while (textoffset <= textlength) | 4463 while (textoffset <= textlength) |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4551 | 4556 |
| 4552 UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status) | 4557 UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status) |
| 4553 { | 4558 { |
| 4554 if (U_FAILURE(*status)) { | 4559 if (U_FAILURE(*status)) { |
| 4555 setMatchNotFound(strsrch); | 4560 setMatchNotFound(strsrch); |
| 4556 return FALSE; | 4561 return FALSE; |
| 4557 } | 4562 } |
| 4558 | 4563 |
| 4559 #if BOYER_MOORE | 4564 #if BOYER_MOORE |
| 4560 UCollationElements *coleiter = strsrch->textIter; | 4565 UCollationElements *coleiter = strsrch->textIter; |
| 4561 int32_t *patternce = strsrch->pattern.CE; | 4566 int32_t *patternce = strsrch->pattern.ces; |
| 4562 int32_t patterncelength = strsrch->pattern.CELength; | 4567 int32_t patterncelength = strsrch->pattern.cesLength; |
| 4563 int32_t textoffset = ucol_getOffset(coleiter); | 4568 int32_t textoffset = ucol_getOffset(coleiter); |
| 4564 | 4569 |
| 4565 // shifting it check for setting offset | 4570 // shifting it check for setting offset |
| 4566 // if setOffset is called previously or there was no previous match, we | 4571 // if setOffset is called previously or there was no previous match, we |
| 4567 // leave the offset as it is. | 4572 // leave the offset as it is. |
| 4568 if (strsrch->search->matchedIndex != USEARCH_DONE) { | 4573 if (strsrch->search->matchedIndex != USEARCH_DONE) { |
| 4569 textoffset = strsrch->search->matchedIndex; | 4574 textoffset = strsrch->search->matchedIndex; |
| 4570 } | 4575 } |
| 4571 | 4576 |
| 4572 textoffset = reverseShift(strsrch, textoffset, UCOL_NULLORDER, | 4577 textoffset = reverseShift(strsrch, textoffset, UCOL_NULLORDER, |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4652 return FALSE; | 4657 return FALSE; |
| 4653 #else | 4658 #else |
| 4654 int32_t textOffset; | 4659 int32_t textOffset; |
| 4655 | 4660 |
| 4656 if (strsrch->search->isOverlap) { | 4661 if (strsrch->search->isOverlap) { |
| 4657 if (strsrch->search->matchedIndex != USEARCH_DONE) { | 4662 if (strsrch->search->matchedIndex != USEARCH_DONE) { |
| 4658 textOffset = strsrch->search->matchedIndex + strsrch->search->matche
dLength - 1; | 4663 textOffset = strsrch->search->matchedIndex + strsrch->search->matche
dLength - 1; |
| 4659 } else { | 4664 } else { |
| 4660 // move the start position at the end of possible match | 4665 // move the start position at the end of possible match |
| 4661 initializePatternPCETable(strsrch, status); | 4666 initializePatternPCETable(strsrch, status); |
| 4662 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.PCELength - 1; nPCE
s++) { | 4667 if (!initTextProcessedIter(strsrch, status)) { |
| 4663 int64_t pce = ucol_nextProcessed(strsrch->textIter, NULL, NULL,
status); | 4668 setMatchNotFound(strsrch); |
| 4669 return FALSE; |
| 4670 } |
| 4671 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPC
Es++) { |
| 4672 int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NU
LL, status); |
| 4664 if (pce == UCOL_PROCESSED_NULLORDER) { | 4673 if (pce == UCOL_PROCESSED_NULLORDER) { |
| 4665 // at the end of the text | 4674 // at the end of the text |
| 4666 break; | 4675 break; |
| 4667 } | 4676 } |
| 4668 } | 4677 } |
| 4669 if (U_FAILURE(*status)) { | 4678 if (U_FAILURE(*status)) { |
| 4670 setMatchNotFound(strsrch); | 4679 setMatchNotFound(strsrch); |
| 4671 return FALSE; | 4680 return FALSE; |
| 4672 } | 4681 } |
| 4673 textOffset = ucol_getOffset(strsrch->textIter); | 4682 textOffset = ucol_getOffset(strsrch->textIter); |
| (...skipping 19 matching lines...) Expand all Loading... |
| 4693 UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, | 4702 UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, |
| 4694 UErrorCode *status) | 4703 UErrorCode *status) |
| 4695 { | 4704 { |
| 4696 if (U_FAILURE(*status)) { | 4705 if (U_FAILURE(*status)) { |
| 4697 setMatchNotFound(strsrch); | 4706 setMatchNotFound(strsrch); |
| 4698 return FALSE; | 4707 return FALSE; |
| 4699 } | 4708 } |
| 4700 | 4709 |
| 4701 #if BOYER_MOORE | 4710 #if BOYER_MOORE |
| 4702 UCollationElements *coleiter = strsrch->textIter; | 4711 UCollationElements *coleiter = strsrch->textIter; |
| 4703 int32_t *patternce = strsrch->pattern.CE; | 4712 int32_t *patternce = strsrch->pattern.ces; |
| 4704 int32_t patterncelength = strsrch->pattern.CELength; | 4713 int32_t patterncelength = strsrch->pattern.cesLength; |
| 4705 int32_t textoffset = ucol_getOffset(coleiter); | 4714 int32_t textoffset = ucol_getOffset(coleiter); |
| 4706 UBool hasPatternAccents = | 4715 UBool hasPatternAccents = |
| 4707 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents; | 4716 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents; |
| 4708 | 4717 |
| 4709 // shifting it check for setting offset | 4718 // shifting it check for setting offset |
| 4710 // if setOffset is called previously or there was no previous match, we | 4719 // if setOffset is called previously or there was no previous match, we |
| 4711 // leave the offset as it is. | 4720 // leave the offset as it is. |
| 4712 if (strsrch->search->matchedIndex != USEARCH_DONE) { | 4721 if (strsrch->search->matchedIndex != USEARCH_DONE) { |
| 4713 textoffset = strsrch->search->matchedIndex; | 4722 textoffset = strsrch->search->matchedIndex; |
| 4714 } | 4723 } |
| (...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4801 return FALSE; | 4810 return FALSE; |
| 4802 #else | 4811 #else |
| 4803 int32_t textOffset; | 4812 int32_t textOffset; |
| 4804 | 4813 |
| 4805 if (strsrch->search->isOverlap) { | 4814 if (strsrch->search->isOverlap) { |
| 4806 if (strsrch->search->matchedIndex != USEARCH_DONE) { | 4815 if (strsrch->search->matchedIndex != USEARCH_DONE) { |
| 4807 textOffset = strsrch->search->matchedIndex + strsrch->search->matche
dLength - 1; | 4816 textOffset = strsrch->search->matchedIndex + strsrch->search->matche
dLength - 1; |
| 4808 } else { | 4817 } else { |
| 4809 // move the start position at the end of possible match | 4818 // move the start position at the end of possible match |
| 4810 initializePatternPCETable(strsrch, status); | 4819 initializePatternPCETable(strsrch, status); |
| 4811 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.PCELength - 1; nPCE
s++) { | 4820 if (!initTextProcessedIter(strsrch, status)) { |
| 4812 int64_t pce = ucol_nextProcessed(strsrch->textIter, NULL, NULL,
status); | 4821 setMatchNotFound(strsrch); |
| 4822 return FALSE; |
| 4823 } |
| 4824 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPC
Es++) { |
| 4825 int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NU
LL, status); |
| 4813 if (pce == UCOL_PROCESSED_NULLORDER) { | 4826 if (pce == UCOL_PROCESSED_NULLORDER) { |
| 4814 // at the end of the text | 4827 // at the end of the text |
| 4815 break; | 4828 break; |
| 4816 } | 4829 } |
| 4817 } | 4830 } |
| 4818 if (U_FAILURE(*status)) { | 4831 if (U_FAILURE(*status)) { |
| 4819 setMatchNotFound(strsrch); | 4832 setMatchNotFound(strsrch); |
| 4820 return FALSE; | 4833 return FALSE; |
| 4821 } | 4834 } |
| 4822 textOffset = ucol_getOffset(strsrch->textIter); | 4835 textOffset = ucol_getOffset(strsrch->textIter); |
| (...skipping 10 matching lines...) Expand all Loading... |
| 4833 strsrch->search->matchedLength = end - start; | 4846 strsrch->search->matchedLength = end - start; |
| 4834 return TRUE; | 4847 return TRUE; |
| 4835 } else { | 4848 } else { |
| 4836 setMatchNotFound(strsrch); | 4849 setMatchNotFound(strsrch); |
| 4837 return FALSE; | 4850 return FALSE; |
| 4838 } | 4851 } |
| 4839 #endif | 4852 #endif |
| 4840 } | 4853 } |
| 4841 | 4854 |
| 4842 #endif /* #if !UCONFIG_NO_COLLATION */ | 4855 #endif /* #if !UCONFIG_NO_COLLATION */ |
| OLD | NEW |