| Index: source/i18n/usearch.cpp
|
| diff --git a/source/i18n/usearch.cpp b/source/i18n/usearch.cpp
|
| index ba463e606c2011950f30154a7821f0d6cf93c20f..96414dbf4d90ab38be5ecad097101c45c79e913f 100644
|
| --- a/source/i18n/usearch.cpp
|
| +++ b/source/i18n/usearch.cpp
|
| @@ -1,6 +1,6 @@
|
| /*
|
| **********************************************************************
|
| -* Copyright (C) 2001-2011 IBM and others. All rights reserved.
|
| +* Copyright (C) 2001-2014 IBM and others. All rights reserved.
|
| **********************************************************************
|
| * Date Name Description
|
| * 07/02/2001 synwee Creation.
|
| @@ -16,7 +16,6 @@
|
| #include "unicode/uchar.h"
|
| #include "unicode/utf16.h"
|
| #include "normalizer2impl.h"
|
| -#include "ucol_imp.h"
|
| #include "usrchimp.h"
|
| #include "cmemory.h"
|
| #include "ucln_in.h"
|
| @@ -29,8 +28,6 @@ U_NAMESPACE_USE
|
| // (and if we decide to turn this on again there are several new TODOs that will need to be addressed)
|
| #define BOYER_MOORE 0
|
|
|
| -#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
| -
|
| // internal definition ---------------------------------------------------
|
|
|
| #define LAST_BYTE_MASK_ 0xFF
|
| @@ -51,17 +48,10 @@ static
|
| inline void setColEIterOffset(UCollationElements *elems,
|
| int32_t offset)
|
| {
|
| - collIterate *ci = &(elems->iteratordata_);
|
| - ci->pos = ci->string + offset;
|
| - ci->CEpos = ci->toReturn = ci->extendCEs ? ci->extendCEs : ci->CEs;
|
| - if (ci->flags & UCOL_ITER_INNORMBUF) {
|
| - ci->flags = ci->origFlags;
|
| - }
|
| - ci->fcdPosition = NULL;
|
| -
|
| - ci->offsetReturn = NULL;
|
| - ci->offsetStore = ci->offsetBuffer;
|
| - ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
|
| + // Note: Not "fast" any more after the 2013 collation rewrite.
|
| + // We do not want to expose more internals than necessary.
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + ucol_setOffset(elems, offset, &status);
|
| }
|
|
|
| /**
|
| @@ -298,7 +288,7 @@ inline uint16_t initializePatternCETable(UStringSearch *strsrch,
|
| {
|
| UPattern *pattern = &(strsrch->pattern);
|
| uint32_t cetablesize = INITIAL_ARRAY_SIZE_;
|
| - int32_t *cetable = pattern->CEBuffer;
|
| + int32_t *cetable = pattern->cesBuffer;
|
| uint32_t patternlength = pattern->textLength;
|
| UCollationElements *coleiter = strsrch->utilIter;
|
|
|
| @@ -311,17 +301,14 @@ inline uint16_t initializePatternCETable(UStringSearch *strsrch,
|
| strsrch->utilIter = coleiter;
|
| }
|
| else {
|
| - uprv_init_collIterate(strsrch->collator, pattern->text,
|
| - pattern->textLength,
|
| - &coleiter->iteratordata_,
|
| - status);
|
| + ucol_setText(coleiter, pattern->text, pattern->textLength, status);
|
| }
|
| if(U_FAILURE(*status)) {
|
| return 0;
|
| }
|
|
|
| - if (pattern->CE != cetable && pattern->CE) {
|
| - uprv_free(pattern->CE);
|
| + if (pattern->ces != cetable && pattern->ces) {
|
| + uprv_free(pattern->ces);
|
| }
|
|
|
| uint16_t offset = 0;
|
| @@ -340,7 +327,7 @@ inline uint16_t initializePatternCETable(UStringSearch *strsrch,
|
| return 0;
|
| }
|
| offset ++;
|
| - if (cetable != temp && cetable != pattern->CEBuffer) {
|
| + if (cetable != temp && cetable != pattern->cesBuffer) {
|
| uprv_free(cetable);
|
| }
|
| cetable = temp;
|
| @@ -349,8 +336,8 @@ inline uint16_t initializePatternCETable(UStringSearch *strsrch,
|
| }
|
|
|
| cetable[offset] = 0;
|
| - pattern->CE = cetable;
|
| - pattern->CELength = offset;
|
| + pattern->ces = cetable;
|
| + pattern->cesLength = offset;
|
|
|
| return result;
|
| }
|
| @@ -373,7 +360,7 @@ inline uint16_t initializePatternPCETable(UStringSearch *strsrch,
|
| {
|
| UPattern *pattern = &(strsrch->pattern);
|
| uint32_t pcetablesize = INITIAL_ARRAY_SIZE_;
|
| - int64_t *pcetable = pattern->PCEBuffer;
|
| + int64_t *pcetable = pattern->pcesBuffer;
|
| uint32_t patternlength = pattern->textLength;
|
| UCollationElements *coleiter = strsrch->utilIter;
|
|
|
| @@ -385,29 +372,26 @@ inline uint16_t initializePatternPCETable(UStringSearch *strsrch,
|
| // returned.
|
| strsrch->utilIter = coleiter;
|
| } else {
|
| - uprv_init_collIterate(strsrch->collator, pattern->text,
|
| - pattern->textLength,
|
| - &coleiter->iteratordata_,
|
| - status);
|
| + ucol_setText(coleiter, pattern->text, pattern->textLength, status);
|
| }
|
| if(U_FAILURE(*status)) {
|
| return 0;
|
| }
|
|
|
| - if (pattern->PCE != pcetable && pattern->PCE != NULL) {
|
| - uprv_free(pattern->PCE);
|
| + if (pattern->pces != pcetable && pattern->pces != NULL) {
|
| + uprv_free(pattern->pces);
|
| }
|
|
|
| uint16_t offset = 0;
|
| uint16_t result = 0;
|
| int64_t pce;
|
|
|
| - uprv_init_pce(coleiter);
|
| + icu::UCollationPCE iter(coleiter);
|
|
|
| // ** Should processed CEs be signed or unsigned?
|
| // ** (the rest of the code in this file seems to play fast-and-loose with
|
| // ** whether a CE is signed or unsigned. For example, look at routine above this one.)
|
| - while ((pce = ucol_nextProcessed(coleiter, NULL, NULL, status)) != UCOL_PROCESSED_NULLORDER &&
|
| + while ((pce = iter.nextProcessed(NULL, NULL, status)) != UCOL_PROCESSED_NULLORDER &&
|
| U_SUCCESS(*status)) {
|
| int64_t *temp = addTouint64_tArray(pcetable, offset, &pcetablesize,
|
| pce,
|
| @@ -420,7 +404,7 @@ inline uint16_t initializePatternPCETable(UStringSearch *strsrch,
|
|
|
| offset += 1;
|
|
|
| - if (pcetable != temp && pcetable != pattern->PCEBuffer) {
|
| + if (pcetable != temp && pcetable != pattern->pcesBuffer) {
|
| uprv_free(pcetable);
|
| }
|
|
|
| @@ -429,8 +413,8 @@ inline uint16_t initializePatternPCETable(UStringSearch *strsrch,
|
| }
|
|
|
| pcetable[offset] = 0;
|
| - pattern->PCE = pcetable;
|
| - pattern->PCELength = offset;
|
| + pattern->pces = pcetable;
|
| + pattern->pcesLength = offset;
|
|
|
| return result;
|
| }
|
| @@ -446,6 +430,7 @@ inline uint16_t initializePatternPCETable(UStringSearch *strsrch,
|
| static
|
| inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status)
|
| {
|
| + if (U_FAILURE(*status)) { return 0; }
|
| UPattern *pattern = &(strsrch->pattern);
|
| const UChar *patterntext = pattern->text;
|
| int32_t length = pattern->textLength;
|
| @@ -465,12 +450,12 @@ inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status)
|
| }
|
|
|
| // ** HACK **
|
| - if (strsrch->pattern.PCE != NULL) {
|
| - if (strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) {
|
| - uprv_free(strsrch->pattern.PCE);
|
| + if (strsrch->pattern.pces != NULL) {
|
| + if (strsrch->pattern.pces != strsrch->pattern.pcesBuffer) {
|
| + uprv_free(strsrch->pattern.pces);
|
| }
|
|
|
| - strsrch->pattern.PCE = NULL;
|
| + strsrch->pattern.pces = NULL;
|
| }
|
|
|
| // since intializePattern is an internal method status is a success.
|
| @@ -557,14 +542,14 @@ static
|
| inline void initialize(UStringSearch *strsrch, UErrorCode *status)
|
| {
|
| int16_t expandlength = initializePattern(strsrch, status);
|
| - if (U_SUCCESS(*status) && strsrch->pattern.CELength > 0) {
|
| + if (U_SUCCESS(*status) && strsrch->pattern.cesLength > 0) {
|
| UPattern *pattern = &strsrch->pattern;
|
| - int32_t cesize = pattern->CELength;
|
| + int32_t cesize = pattern->cesLength;
|
|
|
| int16_t minlength = cesize > expandlength
|
| ? (int16_t)cesize - expandlength : 1;
|
| pattern->defaultShiftSize = minlength;
|
| - setShiftTable(pattern->shift, pattern->backShift, pattern->CE,
|
| + setShiftTable(pattern->shift, pattern->backShift, pattern->ces,
|
| cesize, expandlength, minlength, minlength);
|
| return;
|
| }
|
| @@ -640,14 +625,14 @@ UBool isBreakUnit(const UStringSearch *strsrch, int32_t start,
|
| start;
|
| UErrorCode status = U_ZERO_ERROR;
|
| ucol_setText(coleiter, text, end - start, &status);
|
| - for (int32_t count = 0; count < strsrch->pattern.CELength;
|
| + for (int32_t count = 0; count < strsrch->pattern.cesLength;
|
| count ++) {
|
| int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
|
| if (ce == UCOL_IGNORABLE) {
|
| count --;
|
| continue;
|
| }
|
| - if (U_FAILURE(status) || ce != strsrch->pattern.CE[count]) {
|
| + if (U_FAILURE(status) || ce != strsrch->pattern.ces[count]) {
|
| return FALSE;
|
| }
|
| }
|
| @@ -748,7 +733,7 @@ inline int32_t shiftForward(UStringSearch *strsrch,
|
| int32_t shift = pattern->shift[hash(ce)];
|
| // this is to adjust for characters in the middle of the
|
| // substring for matching that failed.
|
| - int32_t adjust = pattern->CELength - patternceindex;
|
| + int32_t adjust = pattern->cesLength - patternceindex;
|
| if (adjust > 1 && shift >= adjust) {
|
| shift -= adjust - 1;
|
| }
|
| @@ -882,7 +867,7 @@ UBool checkExtraMatchAccents(const UStringSearch *strsrch, int32_t start,
|
|
|
| UCollationElements *coleiter = strsrch->utilIter;
|
| ucol_setText(coleiter, norm, size, status);
|
| - uint32_t firstce = strsrch->pattern.CE[0];
|
| + uint32_t firstce = strsrch->pattern.ces[0];
|
| UBool ignorable = TRUE;
|
| uint32_t ce = UCOL_IGNORABLE;
|
| while (U_SUCCESS(*status) && ce != firstce && ce != (uint32_t)UCOL_NULLORDER) {
|
| @@ -935,7 +920,7 @@ UBool hasAccentsBeforeMatch(const UStringSearch *strsrch, int32_t start,
|
| UErrorCode status = U_ZERO_ERROR;
|
| // we have been iterating forwards previously
|
| uint32_t ignorable = TRUE;
|
| - int32_t firstce = strsrch->pattern.CE[0];
|
| + int32_t firstce = strsrch->pattern.ces[0];
|
|
|
| setColEIterOffset(coleiter, start);
|
| int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
|
| @@ -1017,7 +1002,7 @@ UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start,
|
| int32_t textlength = strsrch->search->textLength;
|
| U16_BACK_1(text, 0, temp);
|
| if (getFCD(text, &temp, textlength) & LAST_BYTE_MASK_) {
|
| - int32_t firstce = strsrch->pattern.CE[0];
|
| + int32_t firstce = strsrch->pattern.ces[0];
|
| UCollationElements *coleiter = strsrch->textIter;
|
| UErrorCode status = U_ZERO_ERROR;
|
| int32_t ce;
|
| @@ -1028,7 +1013,7 @@ UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start,
|
| }
|
| }
|
| int32_t count = 1;
|
| - while (count < strsrch->pattern.CELength) {
|
| + while (count < strsrch->pattern.cesLength) {
|
| if (getCE(strsrch, ucol_next(coleiter, &status))
|
| == UCOL_IGNORABLE) {
|
| // Thai can give an ignorable here.
|
| @@ -1212,8 +1197,8 @@ UBool checkNextExactContractionMatch(UStringSearch *strsrch,
|
| expansion --;
|
| }
|
|
|
| - int32_t *patternce = strsrch->pattern.CE;
|
| - int32_t patterncelength = strsrch->pattern.CELength;
|
| + int32_t *patternce = strsrch->pattern.ces;
|
| + int32_t patterncelength = strsrch->pattern.cesLength;
|
| int32_t count = 0;
|
| while (count < patterncelength) {
|
| int32_t ce = getCE(strsrch, ucol_next(coleiter, status));
|
| @@ -1415,8 +1400,8 @@ static
|
| inline UBool checkCollationMatch(const UStringSearch *strsrch,
|
| UCollationElements *coleiter)
|
| {
|
| - int patternceindex = strsrch->pattern.CELength;
|
| - int32_t *patternce = strsrch->pattern.CE;
|
| + int patternceindex = strsrch->pattern.cesLength;
|
| + int32_t *patternce = strsrch->pattern.ces;
|
| UErrorCode status = U_ZERO_ERROR;
|
| while (patternceindex > 0) {
|
| int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
|
| @@ -1615,8 +1600,8 @@ int32_t doNextCanonicalSuffixMatch(UStringSearch *strsrch,
|
| ucol_setText(coleiter, safetext, safetextlength, status);
|
| // status checked in loop below
|
|
|
| - int32_t *ce = strsrch->pattern.CE;
|
| - int32_t celength = strsrch->pattern.CELength;
|
| + int32_t *ce = strsrch->pattern.ces;
|
| + int32_t celength = strsrch->pattern.cesLength;
|
| int ceindex = celength - 1;
|
| UBool isSafe = TRUE; // indication flag for position in safe zone
|
|
|
| @@ -1855,8 +1840,8 @@ UBool checkNextCanonicalContractionMatch(UStringSearch *strsrch,
|
| expansion --;
|
| }
|
|
|
| - int32_t *patternce = strsrch->pattern.CE;
|
| - int32_t patterncelength = strsrch->pattern.CELength;
|
| + int32_t *patternce = strsrch->pattern.ces;
|
| + int32_t patterncelength = strsrch->pattern.cesLength;
|
| int32_t count = 0;
|
| int32_t textlength = strsrch->search->textLength;
|
| while (count < patterncelength) {
|
| @@ -2053,8 +2038,8 @@ UBool checkPreviousExactContractionMatch(UStringSearch *strsrch,
|
| expansion --;
|
| }
|
|
|
| - int32_t *patternce = strsrch->pattern.CE;
|
| - int32_t patterncelength = strsrch->pattern.CELength;
|
| + int32_t *patternce = strsrch->pattern.ces;
|
| + int32_t patterncelength = strsrch->pattern.cesLength;
|
| int32_t count = patterncelength;
|
| while (count > 0) {
|
| int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
|
| @@ -2278,8 +2263,8 @@ int32_t doPreviousCanonicalPrefixMatch(UStringSearch *strsrch,
|
| ucol_setText(coleiter, safetext, safetextlength, status);
|
| // status checked in loop below
|
|
|
| - int32_t *ce = strsrch->pattern.CE;
|
| - int32_t celength = strsrch->pattern.CELength;
|
| + int32_t *ce = strsrch->pattern.ces;
|
| + int32_t celength = strsrch->pattern.cesLength;
|
| int ceindex = 0;
|
| UBool isSafe = TRUE; // safe zone indication flag for position
|
| int32_t prefixlength = u_strlen(strsrch->canonicalPrefixAccents);
|
| @@ -2493,8 +2478,8 @@ UBool checkPreviousCanonicalContractionMatch(UStringSearch *strsrch,
|
| expansion --;
|
| }
|
|
|
| - int32_t *patternce = strsrch->pattern.CE;
|
| - int32_t patterncelength = strsrch->pattern.CELength;
|
| + int32_t *patternce = strsrch->pattern.ces;
|
| + int32_t patterncelength = strsrch->pattern.cesLength;
|
| int32_t count = patterncelength;
|
| while (count > 0) {
|
| int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
|
| @@ -2700,7 +2685,7 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
|
| UCOL_SHIFTED;
|
| result->variableTop = ucol_getVariableTop(collator, status);
|
|
|
| - result->nfd = Normalizer2Factory::getNFDInstance(*status);
|
| + result->nfd = Normalizer2::getNFDInstance(*status);
|
|
|
| if (U_FAILURE(*status)) {
|
| uprv_free(result);
|
| @@ -2719,8 +2704,8 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
|
|
|
| result->pattern.text = pattern;
|
| result->pattern.textLength = patternlength;
|
| - result->pattern.CE = NULL;
|
| - result->pattern.PCE = NULL;
|
| + result->pattern.ces = NULL;
|
| + result->pattern.pces = NULL;
|
|
|
| result->search->breakIter = breakiter;
|
| #if !UCONFIG_NO_BREAK_ITERATION
|
| @@ -2736,6 +2721,7 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
|
| result->utilIter = NULL;
|
| result->textIter = ucol_openElements(collator, text,
|
| textlength, status);
|
| + result->textProcessedIter = NULL;
|
| if (U_FAILURE(*status)) {
|
| usearch_close(result);
|
| return NULL;
|
| @@ -2762,16 +2748,17 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
|
| U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch)
|
| {
|
| if (strsrch) {
|
| - if (strsrch->pattern.CE != strsrch->pattern.CEBuffer &&
|
| - strsrch->pattern.CE) {
|
| - uprv_free(strsrch->pattern.CE);
|
| + if (strsrch->pattern.ces != strsrch->pattern.cesBuffer &&
|
| + strsrch->pattern.ces) {
|
| + uprv_free(strsrch->pattern.ces);
|
| }
|
|
|
| - if (strsrch->pattern.PCE != NULL &&
|
| - strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) {
|
| - uprv_free(strsrch->pattern.PCE);
|
| + if (strsrch->pattern.pces != NULL &&
|
| + strsrch->pattern.pces != strsrch->pattern.pcesBuffer) {
|
| + uprv_free(strsrch->pattern.pces);
|
| }
|
|
|
| + delete strsrch->textProcessedIter;
|
| ucol_closeElements(strsrch->textIter);
|
| ucol_closeElements(strsrch->utilIter);
|
|
|
| @@ -2790,6 +2777,24 @@ U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch)
|
| }
|
| }
|
|
|
| +namespace {
|
| +
|
| +UBool initTextProcessedIter(UStringSearch *strsrch, UErrorCode *status) {
|
| + if (U_FAILURE(*status)) { return FALSE; }
|
| + if (strsrch->textProcessedIter == NULL) {
|
| + strsrch->textProcessedIter = new icu::UCollationPCE(strsrch->textIter);
|
| + if (strsrch->textProcessedIter == NULL) {
|
| + *status = U_MEMORY_ALLOCATION_ERROR;
|
| + return FALSE;
|
| + }
|
| + } else {
|
| + strsrch->textProcessedIter->init(strsrch->textIter);
|
| + }
|
| + return TRUE;
|
| +}
|
| +
|
| +}
|
| +
|
| // set and get methods --------------------------------------------------
|
|
|
| U_CAPI void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch,
|
| @@ -3010,6 +3015,11 @@ U_CAPI void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch,
|
| }
|
|
|
| if (strsrch) {
|
| + delete strsrch->textProcessedIter;
|
| + strsrch->textProcessedIter = NULL;
|
| + ucol_closeElements(strsrch->textIter);
|
| + ucol_closeElements(strsrch->utilIter);
|
| + strsrch->textIter = strsrch->utilIter = NULL;
|
| if (strsrch->ownCollator && (strsrch->collator != collator)) {
|
| ucol_close((UCollator *)strsrch->collator);
|
| strsrch->ownCollator = FALSE;
|
| @@ -3028,23 +3038,19 @@ U_CAPI void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch,
|
| UCOL_SHIFTED;
|
| // if status is a failure, ucol_getVariableTop returns 0
|
| strsrch->variableTop = ucol_getVariableTop(collator, status);
|
| - if (U_SUCCESS(*status)) {
|
| - initialize(strsrch, status);
|
| - if (U_SUCCESS(*status)) {
|
| - /* free offset buffer to avoid memory leak before initializing. */
|
| - ucol_freeOffsetBuffer(&(strsrch->textIter->iteratordata_));
|
| - uprv_init_collIterate(collator, strsrch->search->text,
|
| - strsrch->search->textLength,
|
| - &(strsrch->textIter->iteratordata_),
|
| - status);
|
| - strsrch->utilIter->iteratordata_.coll = collator;
|
| - }
|
| - }
|
| + strsrch->textIter = ucol_openElements(collator,
|
| + strsrch->search->text,
|
| + strsrch->search->textLength,
|
| + status);
|
| + strsrch->utilIter = ucol_openElements(
|
| + collator, strsrch->pattern.text, strsrch->pattern.textLength, status);
|
| + // initialize() _after_ setting the iterators for the new collator.
|
| + initialize(strsrch, status);
|
| }
|
|
|
| // **** are these calls needed?
|
| // **** we call uprv_init_pce in initializePatternPCETable
|
| - // **** and the CEBuffer constructor...
|
| + // **** and the CEIBuffer constructor...
|
| #if 0
|
| uprv_init_pce(strsrch->textIter);
|
| uprv_init_pce(strsrch->utilIter);
|
| @@ -3222,7 +3228,7 @@ U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch,
|
| }
|
|
|
| if (U_SUCCESS(*status)) {
|
| - if (strsrch->pattern.CELength == 0) {
|
| + if (strsrch->pattern.cesLength == 0) {
|
| if (search->matchedIndex == USEARCH_DONE) {
|
| search->matchedIndex = offset;
|
| }
|
| @@ -3333,7 +3339,7 @@ U_CAPI int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch,
|
| }
|
|
|
| if (U_SUCCESS(*status)) {
|
| - if (strsrch->pattern.CELength == 0) {
|
| + if (strsrch->pattern.cesLength == 0) {
|
| search->matchedIndex =
|
| (matchedindex == USEARCH_DONE ? offset : matchedindex);
|
| if (search->matchedIndex == 0) {
|
| @@ -3416,11 +3422,8 @@ U_CAPI void U_EXPORT2 usearch_reset(UStringSearch *strsrch)
|
| if (!sameCollAttribute) {
|
| initialize(strsrch, &status);
|
| }
|
| - /* free offset buffer to avoid memory leak before initializing. */
|
| - ucol_freeOffsetBuffer(&(strsrch->textIter->iteratordata_));
|
| - uprv_init_collIterate(strsrch->collator, strsrch->search->text,
|
| + ucol_setText(strsrch->textIter, strsrch->search->text,
|
| strsrch->search->textLength,
|
| - &(strsrch->textIter->iteratordata_),
|
| &status);
|
| strsrch->search->matchedLength = 0;
|
| strsrch->search->matchedIndex = USEARCH_DONE;
|
| @@ -3444,9 +3447,9 @@ struct CEI {
|
|
|
| U_NAMESPACE_BEGIN
|
|
|
| -
|
| +namespace {
|
| //
|
| -// CEBuffer A circular buffer of CEs from the text being searched.
|
| +// CEIBuffer A circular buffer of CEs-with-index from the text being searched.
|
| //
|
| #define DEFAULT_CEBUFFER_SIZE 96
|
| #define CEBUFFER_EXTRA 32
|
| @@ -3455,7 +3458,7 @@ U_NAMESPACE_BEGIN
|
| #define MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L 8
|
| #define MAX_TARGET_IGNORABLES_PER_PAT_OTHER 3
|
| #define MIGHT_BE_JAMO_L(c) ((c >= 0x1100 && c <= 0x115E) || (c >= 0x3131 && c <= 0x314E) || (c >= 0x3165 && c <= 0x3186))
|
| -struct CEBuffer {
|
| +struct CEIBuffer {
|
| CEI defBuf[DEFAULT_CEBUFFER_SIZE];
|
| CEI *buf;
|
| int32_t bufSize;
|
| @@ -3466,17 +3469,17 @@ struct CEBuffer {
|
|
|
|
|
|
|
| - CEBuffer(UStringSearch *ss, UErrorCode *status);
|
| - ~CEBuffer();
|
| + CEIBuffer(UStringSearch *ss, UErrorCode *status);
|
| + ~CEIBuffer();
|
| const CEI *get(int32_t index);
|
| const CEI *getPrevious(int32_t index);
|
| };
|
|
|
|
|
| -CEBuffer::CEBuffer(UStringSearch *ss, UErrorCode *status) {
|
| +CEIBuffer::CEIBuffer(UStringSearch *ss, UErrorCode *status) {
|
| buf = defBuf;
|
| strSearch = ss;
|
| - bufSize = ss->pattern.PCELength + CEBUFFER_EXTRA;
|
| + bufSize = ss->pattern.pcesLength + CEBUFFER_EXTRA;
|
| if (ss->search->elementComparisonType != 0) {
|
| const UChar * patText = ss->pattern.text;
|
| if (patText) {
|
| @@ -3496,7 +3499,7 @@ CEBuffer::CEBuffer(UStringSearch *ss, UErrorCode *status) {
|
| firstIx = 0;
|
| limitIx = 0;
|
|
|
| - uprv_init_pce(ceIter);
|
| + if (!initTextProcessedIter(ss, status)) { return; }
|
|
|
| if (bufSize>DEFAULT_CEBUFFER_SIZE) {
|
| buf = (CEI *)uprv_malloc(bufSize * sizeof(CEI));
|
| @@ -3509,7 +3512,7 @@ CEBuffer::CEBuffer(UStringSearch *ss, UErrorCode *status) {
|
| // TODO: add a reset or init function so that allocated
|
| // buffers can be retained & reused.
|
|
|
| -CEBuffer::~CEBuffer() {
|
| +CEIBuffer::~CEIBuffer() {
|
| if (buf != defBuf) {
|
| uprv_free(buf);
|
| }
|
| @@ -3522,7 +3525,7 @@ CEBuffer::~CEBuffer() {
|
| // where n is the largest index to have been fetched by some previous call to this function.
|
| // The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
|
| //
|
| -const CEI *CEBuffer::get(int32_t index) {
|
| +const CEI *CEIBuffer::get(int32_t index) {
|
| int i = index % bufSize;
|
|
|
| if (index>=firstIx && index<limitIx) {
|
| @@ -3550,7 +3553,7 @@ const CEI *CEBuffer::get(int32_t index) {
|
|
|
| UErrorCode status = U_ZERO_ERROR;
|
|
|
| - buf[i].ce = ucol_nextProcessed(ceIter, &buf[i].lowIndex, &buf[i].highIndex, &status);
|
| + buf[i].ce = strSearch->textProcessedIter->nextProcessed(&buf[i].lowIndex, &buf[i].highIndex, &status);
|
|
|
| return &buf[i];
|
| }
|
| @@ -3561,7 +3564,7 @@ const CEI *CEBuffer::get(int32_t index) {
|
| // where n is the largest index to have been fetched by some previous call to this function.
|
| // The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
|
| //
|
| -const CEI *CEBuffer::getPrevious(int32_t index) {
|
| +const CEI *CEIBuffer::getPrevious(int32_t index) {
|
| int i = index % bufSize;
|
|
|
| if (index>=firstIx && index<limitIx) {
|
| @@ -3589,11 +3592,13 @@ const CEI *CEBuffer::getPrevious(int32_t index) {
|
|
|
| UErrorCode status = U_ZERO_ERROR;
|
|
|
| - buf[i].ce = ucol_previousProcessed(ceIter, &buf[i].lowIndex, &buf[i].highIndex, &status);
|
| + buf[i].ce = strSearch->textProcessedIter->previousProcessed(&buf[i].lowIndex, &buf[i].highIndex, &status);
|
|
|
| return &buf[i];
|
| }
|
|
|
| +}
|
| +
|
| U_NAMESPACE_END
|
|
|
|
|
| @@ -3815,8 +3820,8 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
|
| #ifdef USEARCH_DEBUG
|
| if (getenv("USEARCH_DEBUG") != NULL) {
|
| printf("Pattern CEs\n");
|
| - for (int ii=0; ii<strsrch->pattern.CELength; ii++) {
|
| - printf(" %8x", strsrch->pattern.CE[ii]);
|
| + for (int ii=0; ii<strsrch->pattern.cesLength; ii++) {
|
| + printf(" %8x", strsrch->pattern.ces[ii]);
|
| }
|
| printf("\n");
|
| }
|
| @@ -3825,20 +3830,20 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
|
| // Input parameter sanity check.
|
| // TODO: should input indicies clip to the text length
|
| // in the same way that UText does.
|
| - if(strsrch->pattern.CELength == 0 ||
|
| + if(strsrch->pattern.cesLength == 0 ||
|
| startIdx < 0 ||
|
| startIdx > strsrch->search->textLength ||
|
| - strsrch->pattern.CE == NULL) {
|
| + strsrch->pattern.ces == NULL) {
|
| *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| return FALSE;
|
| }
|
|
|
| - if (strsrch->pattern.PCE == NULL) {
|
| + if (strsrch->pattern.pces == NULL) {
|
| initializePatternPCETable(strsrch, status);
|
| }
|
|
|
| ucol_setOffset(strsrch->textIter, startIdx, status);
|
| - CEBuffer ceb(strsrch, status);
|
| + CEIBuffer ceb(strsrch, status);
|
|
|
|
|
| int32_t targetIx = 0;
|
| @@ -3884,8 +3889,8 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
|
| break;
|
| }
|
|
|
| - for (patIx=0; patIx<strsrch->pattern.PCELength; patIx++) {
|
| - patCE = strsrch->pattern.PCE[patIx];
|
| + for (patIx=0; patIx<strsrch->pattern.pcesLength; patIx++) {
|
| + patCE = strsrch->pattern.pces[patIx];
|
| targetCEI = ceb.get(targetIx+patIx+targetIxOffset);
|
| // Compare CE from target string with CE from the pattern.
|
| // Note that the target CE will be UCOL_PROCESSED_NULLORDER if we reach the end of input,
|
| @@ -3905,7 +3910,7 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
|
| }
|
| }
|
| }
|
| - targetIxOffset += strsrch->pattern.PCELength; // this is now the offset in target CE space to end of the match so far
|
| + targetIxOffset += strsrch->pattern.pcesLength; // this is now the offset in target CE space to end of the match so far
|
|
|
| if (!found && ((targetCEI == NULL) || (targetCEI->ce != UCOL_PROCESSED_NULLORDER))) {
|
| // No match at this targetIx. Try again at the next.
|
| @@ -4082,8 +4087,8 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
|
| #ifdef USEARCH_DEBUG
|
| if (getenv("USEARCH_DEBUG") != NULL) {
|
| printf("Pattern CEs\n");
|
| - for (int ii=0; ii<strsrch->pattern.CELength; ii++) {
|
| - printf(" %8x", strsrch->pattern.CE[ii]);
|
| + for (int ii=0; ii<strsrch->pattern.cesLength; ii++) {
|
| + printf(" %8x", strsrch->pattern.ces[ii]);
|
| }
|
| printf("\n");
|
| }
|
| @@ -4092,19 +4097,19 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
|
| // Input parameter sanity check.
|
| // TODO: should input indicies clip to the text length
|
| // in the same way that UText does.
|
| - if(strsrch->pattern.CELength == 0 ||
|
| + if(strsrch->pattern.cesLength == 0 ||
|
| startIdx < 0 ||
|
| startIdx > strsrch->search->textLength ||
|
| - strsrch->pattern.CE == NULL) {
|
| + strsrch->pattern.ces == NULL) {
|
| *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| return FALSE;
|
| }
|
|
|
| - if (strsrch->pattern.PCE == NULL) {
|
| + if (strsrch->pattern.pces == NULL) {
|
| initializePatternPCETable(strsrch, status);
|
| }
|
|
|
| - CEBuffer ceb(strsrch, status);
|
| + CEIBuffer ceb(strsrch, status);
|
| int32_t targetIx = 0;
|
|
|
| /*
|
| @@ -4165,10 +4170,10 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
|
| // Inner loop checks for a match beginning at each
|
| // position from the outer loop.
|
| int32_t targetIxOffset = 0;
|
| - for (patIx = strsrch->pattern.PCELength - 1; patIx >= 0; patIx -= 1) {
|
| - int64_t patCE = strsrch->pattern.PCE[patIx];
|
| + for (patIx = strsrch->pattern.pcesLength - 1; patIx >= 0; patIx -= 1) {
|
| + int64_t patCE = strsrch->pattern.pces[patIx];
|
|
|
| - targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1 - patIx + targetIxOffset);
|
| + targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLength - 1 - patIx + targetIxOffset);
|
| // Compare CE from target string with CE from the pattern.
|
| // Note that the target CE will be UCOL_NULLORDER if we reach the end of input,
|
| // which will fail the compare, below.
|
| @@ -4204,7 +4209,7 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
|
| // There still is a chance of match failure if the CE range not correspond to
|
| // an acceptable character range.
|
| //
|
| - const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1 + targetIxOffset);
|
| + const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLength - 1 + targetIxOffset);
|
| mStart = firstCEI->lowIndex;
|
|
|
| // Check for the start of the match being within a combining sequence.
|
| @@ -4330,8 +4335,8 @@ UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status)
|
| #if BOYER_MOORE
|
| UCollationElements *coleiter = strsrch->textIter;
|
| int32_t textlength = strsrch->search->textLength;
|
| - int32_t *patternce = strsrch->pattern.CE;
|
| - int32_t patterncelength = strsrch->pattern.CELength;
|
| + int32_t *patternce = strsrch->pattern.ces;
|
| + int32_t patterncelength = strsrch->pattern.cesLength;
|
| int32_t textoffset = ucol_getOffset(coleiter);
|
|
|
| // status used in setting coleiter offset, since offset is checked in
|
| @@ -4444,8 +4449,8 @@ UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status)
|
| #if BOYER_MOORE
|
| UCollationElements *coleiter = strsrch->textIter;
|
| int32_t textlength = strsrch->search->textLength;
|
| - int32_t *patternce = strsrch->pattern.CE;
|
| - int32_t patterncelength = strsrch->pattern.CELength;
|
| + int32_t *patternce = strsrch->pattern.ces;
|
| + int32_t patterncelength = strsrch->pattern.cesLength;
|
| int32_t textoffset = ucol_getOffset(coleiter);
|
| UBool hasPatternAccents =
|
| strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
|
| @@ -4558,8 +4563,8 @@ UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
|
|
|
| #if BOYER_MOORE
|
| UCollationElements *coleiter = strsrch->textIter;
|
| - int32_t *patternce = strsrch->pattern.CE;
|
| - int32_t patterncelength = strsrch->pattern.CELength;
|
| + int32_t *patternce = strsrch->pattern.ces;
|
| + int32_t patterncelength = strsrch->pattern.cesLength;
|
| int32_t textoffset = ucol_getOffset(coleiter);
|
|
|
| // shifting it check for setting offset
|
| @@ -4659,8 +4664,12 @@ UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
|
| } else {
|
| // move the start position at the end of possible match
|
| initializePatternPCETable(strsrch, status);
|
| - for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.PCELength - 1; nPCEs++) {
|
| - int64_t pce = ucol_nextProcessed(strsrch->textIter, NULL, NULL, status);
|
| + if (!initTextProcessedIter(strsrch, status)) {
|
| + setMatchNotFound(strsrch);
|
| + return FALSE;
|
| + }
|
| + for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPCEs++) {
|
| + int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NULL, status);
|
| if (pce == UCOL_PROCESSED_NULLORDER) {
|
| // at the end of the text
|
| break;
|
| @@ -4700,8 +4709,8 @@ UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
|
|
|
| #if BOYER_MOORE
|
| UCollationElements *coleiter = strsrch->textIter;
|
| - int32_t *patternce = strsrch->pattern.CE;
|
| - int32_t patterncelength = strsrch->pattern.CELength;
|
| + int32_t *patternce = strsrch->pattern.ces;
|
| + int32_t patterncelength = strsrch->pattern.cesLength;
|
| int32_t textoffset = ucol_getOffset(coleiter);
|
| UBool hasPatternAccents =
|
| strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
|
| @@ -4808,8 +4817,12 @@ UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
|
| } else {
|
| // move the start position at the end of possible match
|
| initializePatternPCETable(strsrch, status);
|
| - for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.PCELength - 1; nPCEs++) {
|
| - int64_t pce = ucol_nextProcessed(strsrch->textIter, NULL, NULL, status);
|
| + if (!initTextProcessedIter(strsrch, status)) {
|
| + setMatchNotFound(strsrch);
|
| + return FALSE;
|
| + }
|
| + for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPCEs++) {
|
| + int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NULL, status);
|
| if (pce == UCOL_PROCESSED_NULLORDER) {
|
| // at the end of the text
|
| break;
|
|
|