| OLD | NEW |
| 1 /* | 1 /* |
| 2 ********************************************************************** | 2 ********************************************************************** |
| 3 * Copyright (C) 2001-2014 IBM and others. All rights reserved. | 3 * Copyright (C) 2001-2015 IBM and others. All rights reserved. |
| 4 ********************************************************************** | 4 ********************************************************************** |
| 5 * Date Name Description | 5 * Date Name Description |
| 6 * 07/02/2001 synwee Creation. | 6 * 07/02/2001 synwee Creation. |
| 7 ********************************************************************** | 7 ********************************************************************** |
| 8 */ | 8 */ |
| 9 | 9 |
| 10 #include "unicode/utypes.h" | 10 #include "unicode/utypes.h" |
| 11 | 11 |
| 12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION | 12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION |
| 13 | 13 |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 68 return UCOL_PRIMARYORDERMASK; | 68 return UCOL_PRIMARYORDERMASK; |
| 69 case UCOL_SECONDARY: | 69 case UCOL_SECONDARY: |
| 70 return UCOL_SECONDARYORDERMASK | UCOL_PRIMARYORDERMASK; | 70 return UCOL_SECONDARYORDERMASK | UCOL_PRIMARYORDERMASK; |
| 71 default: | 71 default: |
| 72 return UCOL_TERTIARYORDERMASK | UCOL_SECONDARYORDERMASK | | 72 return UCOL_TERTIARYORDERMASK | UCOL_SECONDARYORDERMASK | |
| 73 UCOL_PRIMARYORDERMASK; | 73 UCOL_PRIMARYORDERMASK; |
| 74 } | 74 } |
| 75 } | 75 } |
| 76 | 76 |
| 77 /** | 77 /** |
| 78 * This is to squeeze the 21bit ces into a 256 table | 78 * @param ce 32-bit collation element |
| 79 * @param ce collation element | 79 * @return hash code |
| 80 * @return collapsed version of the collation element | |
| 81 */ | 80 */ |
| 82 static | 81 static |
| 83 inline int hash(uint32_t ce) | 82 inline int hashFromCE32(uint32_t ce) |
| 84 { | 83 { |
| 85 // the old value UCOL_PRIMARYORDER(ce) % MAX_TABLE_SIZE_ does not work | 84 int hc = (int)( |
| 86 // well with the new collation where most of the latin 1 characters | 85 ((((((ce >> 24) * 37) + |
| 87 // are of the value xx000xxx. their hashes will most of the time be 0 | 86 (ce >> 16)) * 37) + |
| 88 // to be discussed on the hash algo. | 87 (ce >> 8)) * 37) + |
| 89 return UCOL_PRIMARYORDER(ce) % MAX_TABLE_SIZE_; | 88 ce); |
| 89 hc %= MAX_TABLE_SIZE_; |
| 90 if (hc < 0) { |
| 91 hc += MAX_TABLE_SIZE_; |
| 92 } |
| 93 return hc; |
| 90 } | 94 } |
| 91 | 95 |
| 92 U_CDECL_BEGIN | 96 U_CDECL_BEGIN |
| 93 static UBool U_CALLCONV | 97 static UBool U_CALLCONV |
| 94 usearch_cleanup(void) { | 98 usearch_cleanup(void) { |
| 95 g_nfcImpl = NULL; | 99 g_nfcImpl = NULL; |
| 96 return TRUE; | 100 return TRUE; |
| 97 } | 101 } |
| 98 U_CDECL_END | 102 U_CDECL_END |
| 99 | 103 |
| (...skipping 385 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 485 // the number of ces minus their expansion, since expansions can come | 489 // the number of ces minus their expansion, since expansions can come |
| 486 // from a character. | 490 // from a character. |
| 487 int32_t count; | 491 int32_t count; |
| 488 for (count = 0; count < MAX_TABLE_SIZE_; count ++) { | 492 for (count = 0; count < MAX_TABLE_SIZE_; count ++) { |
| 489 shift[count] = defaultforward; | 493 shift[count] = defaultforward; |
| 490 } | 494 } |
| 491 cesize --; // down to the last index | 495 cesize --; // down to the last index |
| 492 for (count = 0; count < cesize; count ++) { | 496 for (count = 0; count < cesize; count ++) { |
| 493 // number of ces from right of array to the count | 497 // number of ces from right of array to the count |
| 494 int temp = defaultforward - count - 1; | 498 int temp = defaultforward - count - 1; |
| 495 shift[hash(cetable[count])] = temp > 1 ? temp : 1; | 499 shift[hashFromCE32(cetable[count])] = temp > 1 ? temp : 1; |
| 496 } | 500 } |
| 497 shift[hash(cetable[cesize])] = 1; | 501 shift[hashFromCE32(cetable[cesize])] = 1; |
| 498 // for ignorables we just shift by one. see test examples. | 502 // for ignorables we just shift by one. see test examples. |
| 499 shift[hash(0)] = 1; | 503 shift[hashFromCE32(0)] = 1; |
| 500 | 504 |
| 501 for (count = 0; count < MAX_TABLE_SIZE_; count ++) { | 505 for (count = 0; count < MAX_TABLE_SIZE_; count ++) { |
| 502 backshift[count] = defaultbackward; | 506 backshift[count] = defaultbackward; |
| 503 } | 507 } |
| 504 for (count = cesize; count > 0; count --) { | 508 for (count = cesize; count > 0; count --) { |
| 505 // the original value count does not seem to work | 509 // the original value count does not seem to work |
| 506 backshift[hash(cetable[count])] = count > expansionsize ? | 510 backshift[hashFromCE32(cetable[count])] = count > expansionsize ? |
| 507 (int16_t)(count - expansionsize) : 1; | 511 (int16_t)(count - expansionsize) : 1; |
| 508 } | 512 } |
| 509 backshift[hash(cetable[0])] = 1; | 513 backshift[hashFromCE32(cetable[0])] = 1; |
| 510 backshift[hash(0)] = 1; | 514 backshift[hashFromCE32(0)] = 1; |
| 511 } | 515 } |
| 512 | 516 |
| 513 /** | 517 /** |
| 514 * Building of the pattern collation element list and the boyer moore strsrch | 518 * Building of the pattern collation element list and the boyer moore strsrch |
| 515 * table. | 519 * table. |
| 516 * The canonical match will only be performed after the default match fails. | 520 * The canonical match will only be performed after the default match fails. |
| 517 * For both cases we need to remember the size of the composed and decomposed | 521 * For both cases we need to remember the size of the composed and decomposed |
| 518 * versions of the string. Since the Boyer-Moore shift calculations shifts by | 522 * versions of the string. Since the Boyer-Moore shift calculations shifts by |
| 519 * a number of characters in the text and tries to match the pattern from that | 523 * a number of characters in the text and tries to match the pattern from that |
| 520 * offset, the shift value can not be too large in case we miss some | 524 * offset, the shift value can not be too large in case we miss some |
| (...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 723 * @return final offset | 727 * @return final offset |
| 724 */ | 728 */ |
| 725 static | 729 static |
| 726 inline int32_t shiftForward(UStringSearch *strsrch, | 730 inline int32_t shiftForward(UStringSearch *strsrch, |
| 727 int32_t textoffset, | 731 int32_t textoffset, |
| 728 int32_t ce, | 732 int32_t ce, |
| 729 int32_t patternceindex) | 733 int32_t patternceindex) |
| 730 { | 734 { |
| 731 UPattern *pattern = &(strsrch->pattern); | 735 UPattern *pattern = &(strsrch->pattern); |
| 732 if (ce != UCOL_NULLORDER) { | 736 if (ce != UCOL_NULLORDER) { |
| 733 int32_t shift = pattern->shift[hash(ce)]; | 737 int32_t shift = pattern->shift[hashFromCE32(ce)]; |
| 734 // this is to adjust for characters in the middle of the | 738 // this is to adjust for characters in the middle of the |
| 735 // substring for matching that failed. | 739 // substring for matching that failed. |
| 736 int32_t adjust = pattern->cesLength - patternceindex; | 740 int32_t adjust = pattern->cesLength - patternceindex; |
| 737 if (adjust > 1 && shift >= adjust) { | 741 if (adjust > 1 && shift >= adjust) { |
| 738 shift -= adjust - 1; | 742 shift -= adjust - 1; |
| 739 } | 743 } |
| 740 textoffset += shift; | 744 textoffset += shift; |
| 741 } | 745 } |
| 742 else { | 746 else { |
| 743 textoffset += pattern->defaultShiftSize; | 747 textoffset += pattern->defaultShiftSize; |
| (...skipping 1220 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1964 if (strsrch->search->isOverlap) { | 1968 if (strsrch->search->isOverlap) { |
| 1965 if (textoffset != strsrch->search->textLength) { | 1969 if (textoffset != strsrch->search->textLength) { |
| 1966 textoffset --; | 1970 textoffset --; |
| 1967 } | 1971 } |
| 1968 else { | 1972 else { |
| 1969 textoffset -= strsrch->pattern.defaultShiftSize; | 1973 textoffset -= strsrch->pattern.defaultShiftSize; |
| 1970 } | 1974 } |
| 1971 } | 1975 } |
| 1972 else { | 1976 else { |
| 1973 if (ce != UCOL_NULLORDER) { | 1977 if (ce != UCOL_NULLORDER) { |
| 1974 int32_t shift = strsrch->pattern.backShift[hash(ce)]; | 1978 int32_t shift = strsrch->pattern.backShift[hashFromCE32(ce)]; |
| 1975 | 1979 |
| 1976 // this is to adjust for characters in the middle of the substring | 1980 // this is to adjust for characters in the middle of the substring |
| 1977 // for matching that failed. | 1981 // for matching that failed. |
| 1978 int32_t adjust = patternceindex; | 1982 int32_t adjust = patternceindex; |
| 1979 if (adjust > 1 && shift > adjust) { | 1983 if (adjust > 1 && shift > adjust) { |
| 1980 shift -= adjust - 1; | 1984 shift -= adjust - 1; |
| 1981 } | 1985 } |
| 1982 textoffset -= shift; | 1986 textoffset -= shift; |
| 1983 } | 1987 } |
| 1984 else { | 1988 else { |
| (...skipping 1813 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3798 U_CE_MATCH: U_CE_NO_MATCH; | 3802 U_CE_MATCH: U_CE_NO_MATCH; |
| 3799 } | 3803 } |
| 3800 | 3804 |
| 3801 return U_CE_MATCH; | 3805 return U_CE_MATCH; |
| 3802 } | 3806 } |
| 3803 | 3807 |
| 3804 #if BOYER_MOORE | 3808 #if BOYER_MOORE |
| 3805 // TODO: #if BOYER_MOORE, need 32-bit version of compareCE64s | 3809 // TODO: #if BOYER_MOORE, need 32-bit version of compareCE64s |
| 3806 #endif | 3810 #endif |
| 3807 | 3811 |
| 3812 namespace { |
| 3813 |
| 3814 UChar32 codePointAt(const USearch &search, int32_t index) { |
| 3815 if (index < search.textLength) { |
| 3816 UChar32 c; |
| 3817 U16_NEXT(search.text, index, search.textLength, c); |
| 3818 return c; |
| 3819 } |
| 3820 return U_SENTINEL; |
| 3821 } |
| 3822 |
| 3823 UChar32 codePointBefore(const USearch &search, int32_t index) { |
| 3824 if (0 < index) { |
| 3825 UChar32 c; |
| 3826 U16_PREV(search.text, 0, index, c); |
| 3827 return c; |
| 3828 } |
| 3829 return U_SENTINEL; |
| 3830 } |
| 3831 |
| 3832 } // namespace |
| 3833 |
| 3808 U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch, | 3834 U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch, |
| 3809 int32_t startIdx, | 3835 int32_t startIdx, |
| 3810 int32_t *matchStart, | 3836 int32_t *matchStart, |
| 3811 int32_t *matchLimit, | 3837 int32_t *matchLimit, |
| 3812 UErrorCode *status) | 3838 UErrorCode *status) |
| 3813 { | 3839 { |
| 3814 if (U_FAILURE(*status)) { | 3840 if (U_FAILURE(*status)) { |
| 3815 return FALSE; | 3841 return FALSE; |
| 3816 } | 3842 } |
| 3817 | 3843 |
| (...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3991 // Check for the start of the match being within an Collation Element Ex
pansion, | 4017 // Check for the start of the match being within an Collation Element Ex
pansion, |
| 3992 // meaning that the first char of the match is only partially matched. | 4018 // meaning that the first char of the match is only partially matched. |
| 3993 // With exapnsions, the first CE will report the index of the source | 4019 // With exapnsions, the first CE will report the index of the source |
| 3994 // character, and all subsequent (expansions) CEs will report the sour
ce index of the | 4020 // character, and all subsequent (expansions) CEs will report the sour
ce index of the |
| 3995 // _following_ character. | 4021 // _following_ character. |
| 3996 int32_t secondIx = firstCEI->highIndex; | 4022 int32_t secondIx = firstCEI->highIndex; |
| 3997 if (mStart == secondIx) { | 4023 if (mStart == secondIx) { |
| 3998 found = FALSE; | 4024 found = FALSE; |
| 3999 } | 4025 } |
| 4000 | 4026 |
| 4027 // Allow matches to end in the middle of a grapheme cluster if the follo
wing |
| 4028 // conditions are met; this is needed to make prefix search work properl
y in |
| 4029 // Indic, see #11750 |
| 4030 // * the default breakIter is being used |
| 4031 // * the next collation element after this combining sequence |
| 4032 // - has non-zero primary weight |
| 4033 // - corresponds to a separate character following the one at end of t
he current match |
| 4034 // (the second of these conditions, and perhaps both, may be redundant
given the |
| 4035 // subsequent check for normalization boundary; however they are likel
y much faster |
| 4036 // tests in any case) |
| 4037 // * the match limit is a normalization boundary |
| 4038 UBool allowMidclusterMatch = FALSE; |
| 4039 if (strsrch->search->text != NULL && strsrch->search->textLength > maxLi
mit) { |
| 4040 allowMidclusterMatch = |
| 4041 strsrch->search->breakIter == NULL && |
| 4042 nextCEI != NULL && (((nextCEI->ce) >> 32) & 0xFFFF0000UL) !=
0 && |
| 4043 maxLimit >= lastCEI->highIndex && nextCEI->highIndex > maxLi
mit && |
| 4044 (strsrch->nfd->hasBoundaryBefore(codePointAt(*strsrch->searc
h, maxLimit)) || |
| 4045 strsrch->nfd->hasBoundaryAfter(codePointBefore(*strsrch-
>search, maxLimit))); |
| 4046 } |
| 4047 // If those conditions are met, then: |
| 4048 // * do NOT advance the candidate match limit (mLimit) to a break bounda
ry; however |
| 4049 // the match limit may be backed off to a previous break boundary. Thi
s handles |
| 4050 // cases in which mLimit includes target characters that are ignorable
with current |
| 4051 // settings (such as space) and which extend beyond the pattern match. |
| 4052 // * do NOT require that end of the combining sequence not extend beyond
the match in CE space |
| 4053 // * do NOT require that match limit be on a breakIter boundary |
| 4054 |
| 4001 // Advance the match end position to the first acceptable match boundar
y. | 4055 // Advance the match end position to the first acceptable match boundar
y. |
| 4002 // This advances the index over any combining charcters. | 4056 // This advances the index over any combining charcters. |
| 4003 mLimit = maxLimit; | 4057 mLimit = maxLimit; |
| 4004 if (minLimit < maxLimit) { | 4058 if (minLimit < maxLimit) { |
| 4005 // When the last CE's low index is same with its high index, the CE
is likely | 4059 // When the last CE's low index is same with its high index, the CE
is likely |
| 4006 // a part of expansion. In this case, the index is located just afte
r the | 4060 // a part of expansion. In this case, the index is located just afte
r the |
| 4007 // character corresponding to the CEs compared above. If the index i
s right | 4061 // character corresponding to the CEs compared above. If the index i
s right |
| 4008 // at the break boundary, move the position to the next boundary wil
l result | 4062 // at the break boundary, move the position to the next boundary wil
l result |
| 4009 // incorrect match length when there are ignorable characters exist
between | 4063 // incorrect match length when there are ignorable characters exist
between |
| 4010 // the position and the next character produces CE(s). See ticket#84
82. | 4064 // the position and the next character produces CE(s). See ticket#84
82. |
| 4011 if (minLimit == lastCEI->highIndex && isBreakBoundary(strsrch, minLi
mit)) { | 4065 if (minLimit == lastCEI->highIndex && isBreakBoundary(strsrch, minLi
mit)) { |
| 4012 mLimit = minLimit; | 4066 mLimit = minLimit; |
| 4013 } else { | 4067 } else { |
| 4014 int32_t nba = nextBoundaryAfter(strsrch, minLimit); | 4068 int32_t nba = nextBoundaryAfter(strsrch, minLimit); |
| 4015 if (nba >= lastCEI->highIndex) { | 4069 // Note that we can have nba < maxLimit && nba >= minLImit, in w
hich |
| 4070 // case we want to set mLimit to nba regardless of allowMidclust
erMatch |
| 4071 // (i.e. we back off mLimit to the previous breakIterator bounda
ry). |
| 4072 if (nba >= lastCEI->highIndex && (!allowMidclusterMatch || nba <
maxLimit)) { |
| 4016 mLimit = nba; | 4073 mLimit = nba; |
| 4017 } | 4074 } |
| 4018 } | 4075 } |
| 4019 } | 4076 } |
| 4020 | 4077 |
| 4021 #ifdef USEARCH_DEBUG | 4078 #ifdef USEARCH_DEBUG |
| 4022 if (getenv("USEARCH_DEBUG") != NULL) { | 4079 if (getenv("USEARCH_DEBUG") != NULL) { |
| 4023 printf("minLimit, maxLimit, mLimit = %d, %d, %d\n", minLimit, maxLim
it, mLimit); | 4080 printf("minLimit, maxLimit, mLimit = %d, %d, %d\n", minLimit, maxLim
it, mLimit); |
| 4024 } | 4081 } |
| 4025 #endif | 4082 #endif |
| 4026 | 4083 |
| 4027 // If advancing to the end of a combining sequence in character indexing
space | 4084 if (!allowMidclusterMatch) { |
| 4028 // advanced us beyond the end of the match in CE space, reject this ma
tch. | 4085 // If advancing to the end of a combining sequence in character inde
xing space |
| 4029 if (mLimit > maxLimit) { | 4086 // advanced us beyond the end of the match in CE space, reject thi
s match. |
| 4030 found = FALSE; | 4087 if (mLimit > maxLimit) { |
| 4031 } | 4088 found = FALSE; |
| 4089 } |
| 4032 | 4090 |
| 4033 if (!isBreakBoundary(strsrch, mLimit)) { | 4091 if (!isBreakBoundary(strsrch, mLimit)) { |
| 4034 found = FALSE; | 4092 found = FALSE; |
| 4093 } |
| 4035 } | 4094 } |
| 4036 | 4095 |
| 4037 if (! checkIdentical(strsrch, mStart, mLimit)) { | 4096 if (! checkIdentical(strsrch, mStart, mLimit)) { |
| 4038 found = FALSE; | 4097 found = FALSE; |
| 4039 } | 4098 } |
| 4040 | 4099 |
| 4041 if (found) { | 4100 if (found) { |
| 4042 break; | 4101 break; |
| 4043 } | 4102 } |
| 4044 } | 4103 } |
| (...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4241 // 2. The last CE that was part of the match is in an expansion t
hat extends | 4300 // 2. The last CE that was part of the match is in an expansion t
hat extends |
| 4242 // to the first CE after the match. In this case, we reject th
e match. | 4301 // to the first CE after the match. In this case, we reject th
e match. |
| 4243 const CEI *nextCEI = ceb.getPrevious(targetIx - 1); | 4302 const CEI *nextCEI = ceb.getPrevious(targetIx - 1); |
| 4244 | 4303 |
| 4245 if (nextCEI->lowIndex == nextCEI->highIndex && nextCEI->ce != UCOL_P
ROCESSED_NULLORDER) { | 4304 if (nextCEI->lowIndex == nextCEI->highIndex && nextCEI->ce != UCOL_P
ROCESSED_NULLORDER) { |
| 4246 found = FALSE; | 4305 found = FALSE; |
| 4247 } | 4306 } |
| 4248 | 4307 |
| 4249 mLimit = maxLimit = nextCEI->lowIndex; | 4308 mLimit = maxLimit = nextCEI->lowIndex; |
| 4250 | 4309 |
| 4310 // Allow matches to end in the middle of a grapheme cluster if the f
ollowing |
| 4311 // conditions are met; this is needed to make prefix search work pro
perly in |
| 4312 // Indic, see #11750 |
| 4313 // * the default breakIter is being used |
| 4314 // * the next collation element after this combining sequence |
| 4315 // - has non-zero primary weight |
| 4316 // - corresponds to a separate character following the one at end
of the current match |
| 4317 // (the second of these conditions, and perhaps both, may be redun
dant given the |
| 4318 // subsequent check for normalization boundary; however they are l
ikely much faster |
| 4319 // tests in any case) |
| 4320 // * the match limit is a normalization boundary |
| 4321 UBool allowMidclusterMatch = FALSE; |
| 4322 if (strsrch->search->text != NULL && strsrch->search->textLength > m
axLimit) { |
| 4323 allowMidclusterMatch = |
| 4324 strsrch->search->breakIter == NULL && |
| 4325 nextCEI != NULL && (((nextCEI->ce) >> 32) & 0xFFFF0000UL
) != 0 && |
| 4326 maxLimit >= lastCEI->highIndex && nextCEI->highIndex > m
axLimit && |
| 4327 (strsrch->nfd->hasBoundaryBefore(codePointAt(*strsrch->s
earch, maxLimit)) || |
| 4328 strsrch->nfd->hasBoundaryAfter(codePointBefore(*strs
rch->search, maxLimit))); |
| 4329 } |
| 4330 // If those conditions are met, then: |
| 4331 // * do NOT advance the candidate match limit (mLimit) to a break bo
undary; however |
| 4332 // the match limit may be backed off to a previous break boundary.
This handles |
| 4333 // cases in which mLimit includes target characters that are ignor
able with current |
| 4334 // settings (such as space) and which extend beyond the pattern ma
tch. |
| 4335 // * do NOT require that end of the combining sequence not extend be
yond the match in CE space |
| 4336 // * do NOT require that match limit be on a breakIter boundary |
| 4337 |
| 4251 // Advance the match end position to the first acceptable match bou
ndary. | 4338 // Advance the match end position to the first acceptable match bou
ndary. |
| 4252 // This advances the index over any combining charcters. | 4339 // This advances the index over any combining characters. |
| 4253 if (minLimit < maxLimit) { | 4340 if (minLimit < maxLimit) { |
| 4254 int32_t nba = nextBoundaryAfter(strsrch, minLimit); | 4341 int32_t nba = nextBoundaryAfter(strsrch, minLimit); |
| 4255 | 4342 // Note that we can have nba < maxLimit && nba >= minLImit, in w
hich |
| 4256 if (nba >= lastCEI->highIndex) { | 4343 // case we want to set mLimit to nba regardless of allowMidclust
erMatch |
| 4344 // (i.e. we back off mLimit to the previous breakIterator bounda
ry). |
| 4345 if (nba >= lastCEI->highIndex && (!allowMidclusterMatch || nba <
maxLimit)) { |
| 4257 mLimit = nba; | 4346 mLimit = nba; |
| 4258 } | 4347 } |
| 4259 } | 4348 } |
| 4260 | 4349 |
| 4261 // If advancing to the end of a combining sequence in character inde
xing space | 4350 if (!allowMidclusterMatch) { |
| 4262 // advanced us beyond the end of the match in CE space, reject thi
s match. | 4351 // If advancing to the end of a combining sequence in character
indexing space |
| 4263 if (mLimit > maxLimit) { | 4352 // advanced us beyond the end of the match in CE space, reject
this match. |
| 4264 found = FALSE; | 4353 if (mLimit > maxLimit) { |
| 4265 } | 4354 found = FALSE; |
| 4355 } |
| 4266 | 4356 |
| 4267 // Make sure the end of the match is on a break boundary | 4357 // Make sure the end of the match is on a break boundary |
| 4268 if (!isBreakBoundary(strsrch, mLimit)) { | 4358 if (!isBreakBoundary(strsrch, mLimit)) { |
| 4269 found = FALSE; | 4359 found = FALSE; |
| 4360 } |
| 4270 } | 4361 } |
| 4271 | 4362 |
| 4272 } else { | 4363 } else { |
| 4273 // No non-ignorable CEs after this point. | 4364 // No non-ignorable CEs after this point. |
| 4274 // The maximum position is detected by boundary after | 4365 // The maximum position is detected by boundary after |
| 4275 // the last non-ignorable CE. Combining sequence | 4366 // the last non-ignorable CE. Combining sequence |
| 4276 // across the start index will be truncated. | 4367 // across the start index will be truncated. |
| 4277 int32_t nba = nextBoundaryAfter(strsrch, minLimit); | 4368 int32_t nba = nextBoundaryAfter(strsrch, minLimit); |
| 4278 mLimit = maxLimit = (nba > 0) && (startIdx > nba) ? nba : startIdx; | 4369 mLimit = maxLimit = (nba > 0) && (startIdx > nba) ? nba : startIdx; |
| 4279 } | 4370 } |
| (...skipping 566 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4846 strsrch->search->matchedLength = end - start; | 4937 strsrch->search->matchedLength = end - start; |
| 4847 return TRUE; | 4938 return TRUE; |
| 4848 } else { | 4939 } else { |
| 4849 setMatchNotFound(strsrch); | 4940 setMatchNotFound(strsrch); |
| 4850 return FALSE; | 4941 return FALSE; |
| 4851 } | 4942 } |
| 4852 #endif | 4943 #endif |
| 4853 } | 4944 } |
| 4854 | 4945 |
| 4855 #endif /* #if !UCONFIG_NO_COLLATION */ | 4946 #endif /* #if !UCONFIG_NO_COLLATION */ |
| OLD | NEW |