Index: icu46/source/i18n/bmsearch.cpp |
=================================================================== |
--- icu46/source/i18n/bmsearch.cpp (revision 0) |
+++ icu46/source/i18n/bmsearch.cpp (revision 0) |
@@ -0,0 +1,817 @@ |
+/* |
+ ****************************************************************************** |
+ * Copyright (C) 1996-2010, International Business Machines * |
+ * Corporation and others. All Rights Reserved. * |
+ ****************************************************************************** |
+ */ |
+ |
+#include "unicode/utypes.h" |
+ |
+#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION |
+ |
+#include "unicode/unistr.h" |
+#include "unicode/putil.h" |
+#include "unicode/usearch.h" |
+ |
+#include "cmemory.h" |
+#include "unicode/coll.h" |
+#include "unicode/tblcoll.h" |
+#include "unicode/coleitr.h" |
+#include "unicode/ucoleitr.h" |
+ |
+#include "unicode/regex.h" // TODO: make conditional on regexp being built. |
+ |
+#include "unicode/uniset.h" |
+#include "unicode/uset.h" |
+#include "unicode/ustring.h" |
+#include "hash.h" |
+#include "uhash.h" |
+#include "ucol_imp.h" |
+#include "normalizer2impl.h" |
+ |
+#include "unicode/colldata.h" |
+#include "unicode/bmsearch.h" |
+ |
+U_NAMESPACE_BEGIN |
+ |
+#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0])) |
+#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) |
+#define DELETE_ARRAY(array) uprv_free((void *) (array)) |
+ |
+ |
+struct CEI |
+{ |
+ uint32_t order; |
+ int32_t lowOffset; |
+ int32_t highOffset; |
+}; |
+ |
+class Target : public UMemory |
+{ |
+public: |
+ Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status); |
+ ~Target(); |
+ |
+ void setTargetString(const UnicodeString *target); |
+ |
+ const CEI *nextCE(int32_t offset); |
+ const CEI *prevCE(int32_t offset); |
+ |
+ int32_t stringLength(); |
+ UChar charAt(int32_t offset); |
+ |
+ UBool isBreakBoundary(int32_t offset); |
+ int32_t nextBreakBoundary(int32_t offset); |
+ int32_t nextSafeBoundary(int32_t offset); |
+ |
+ UBool isIdentical(UnicodeString &pattern, int32_t start, int32_t end); |
+ |
+ void setOffset(int32_t offset); |
+ void setLast(int32_t last); |
+ int32_t getOffset(); |
+ |
+private: |
+ CEI *ceb; |
+ int32_t bufferSize; |
+ int32_t bufferMin; |
+ int32_t bufferMax; |
+ |
+ uint32_t strengthMask; |
+ UCollationStrength strength; |
+ uint32_t variableTop; |
+ UBool toShift; |
+ UCollator *coll; |
+ const Normalizer2 &nfd; |
+ |
+ const UnicodeString *targetString; |
+ const UChar *targetBuffer; |
+ int32_t targetLength; |
+ |
+ UCollationElements *elements; |
+ UBreakIterator *charBreakIterator; |
+}; |
+ |
+Target::Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status) |
+ : bufferSize(0), bufferMin(0), bufferMax(0), |
+ strengthMask(0), strength(UCOL_PRIMARY), variableTop(0), toShift(FALSE), coll(theCollator), |
+ nfd(*Normalizer2Factory::getNFDInstance(status)), |
+ targetString(NULL), targetBuffer(NULL), targetLength(0), elements(NULL), charBreakIterator(NULL) |
+{ |
+ strength = ucol_getStrength(coll); |
+ toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED; |
+ variableTop = ucol_getVariableTop(coll, &status); |
+ |
+ // find the largest expansion |
+ uint8_t maxExpansion = 0; |
+ for (const uint8_t *expansion = coll->expansionCESize; *expansion != 0; expansion += 1) { |
+ if (*expansion > maxExpansion) { |
+ maxExpansion = *expansion; |
+ } |
+ } |
+ |
+ // room for an extra character on each end, plus 4 for safety |
+ bufferSize = patternLength + (2 * maxExpansion) + 4; |
+ |
+ ceb = NEW_ARRAY(CEI, bufferSize); |
+ |
+ if (ceb == NULL) { |
+ status = U_MEMORY_ALLOCATION_ERROR; |
+ return; |
+ } |
+ |
+ if (target != NULL) { |
+ setTargetString(target); |
+ } |
+ |
+ switch (strength) |
+ { |
+ default: |
+ strengthMask |= UCOL_TERTIARYORDERMASK; |
+ /* fall through */ |
+ |
+ case UCOL_SECONDARY: |
+ strengthMask |= UCOL_SECONDARYORDERMASK; |
+ /* fall through */ |
+ |
+ case UCOL_PRIMARY: |
+ strengthMask |= UCOL_PRIMARYORDERMASK; |
+ } |
+} |
+ |
+Target::~Target() |
+{ |
+ ubrk_close(charBreakIterator); |
+ ucol_closeElements(elements); |
+ |
+ DELETE_ARRAY(ceb); |
+} |
+ |
+void Target::setTargetString(const UnicodeString *target) |
+{ |
+ if (charBreakIterator != NULL) { |
+ ubrk_close(charBreakIterator); |
+ ucol_closeElements(elements); |
+ } |
+ |
+ targetString = target; |
+ |
+ if (targetString != NULL) { |
+ UErrorCode status = U_ZERO_ERROR; |
+ |
+ targetBuffer = targetString->getBuffer(); |
+ targetLength = targetString->length(); |
+ |
+ elements = ucol_openElements(coll, target->getBuffer(), target->length(), &status); |
+ ucol_forceHanImplicit(elements, &status); |
+ |
+ charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status), |
+ targetBuffer, targetLength, &status); |
+ } else { |
+ targetBuffer = NULL; |
+ targetLength = 0; |
+ } |
+} |
+ |
+const CEI *Target::nextCE(int32_t offset) |
+{ |
+ UErrorCode status = U_ZERO_ERROR; |
+ int32_t low = -1, high = -1; |
+ uint32_t order; |
+ UBool cont = FALSE; |
+ |
+ if (offset >= bufferMin && offset < bufferMax) { |
+ return &ceb[offset]; |
+ } |
+ |
+ if (bufferMax >= bufferSize || offset != bufferMax) { |
+ return NULL; |
+ } |
+ |
+ do { |
+ low = ucol_getOffset(elements); |
+ order = ucol_next(elements, &status); |
+ high = ucol_getOffset(elements); |
+ |
+ if (order == (uint32_t)UCOL_NULLORDER) { |
+ //high = low = -1; |
+ break; |
+ } |
+ |
+ cont = isContinuation(order); |
+ order &= strengthMask; |
+ |
+ if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) { |
+ if (strength >= UCOL_QUATERNARY) { |
+ order &= UCOL_PRIMARYORDERMASK; |
+ } else { |
+ order = UCOL_IGNORABLE; |
+ } |
+ } |
+ } while (order == UCOL_IGNORABLE); |
+ |
+ if (cont) { |
+ order |= UCOL_CONTINUATION_MARKER; |
+ } |
+ |
+ ceb[offset].order = order; |
+ ceb[offset].lowOffset = low; |
+ ceb[offset].highOffset = high; |
+ |
+ bufferMax += 1; |
+ |
+ return &ceb[offset]; |
+} |
+ |
+const CEI *Target::prevCE(int32_t offset) |
+{ |
+ UErrorCode status = U_ZERO_ERROR; |
+ int32_t low = -1, high = -1; |
+ uint32_t order; |
+ UBool cont = FALSE; |
+ |
+ if (offset >= bufferMin && offset < bufferMax) { |
+ return &ceb[offset]; |
+ } |
+ |
+ if (bufferMax >= bufferSize || offset != bufferMax) { |
+ return NULL; |
+ } |
+ |
+ do { |
+ high = ucol_getOffset(elements); |
+ order = ucol_previous(elements, &status); |
+ low = ucol_getOffset(elements); |
+ |
+ if (order == (uint32_t)UCOL_NULLORDER) { |
+ break; |
+ } |
+ |
+ cont = isContinuation(order); |
+ order &= strengthMask; |
+ |
+ if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) { |
+ if (strength >= UCOL_QUATERNARY) { |
+ order &= UCOL_PRIMARYORDERMASK; |
+ } else { |
+ order = UCOL_IGNORABLE; |
+ } |
+ } |
+ } while (order == UCOL_IGNORABLE); |
+ |
+ bufferMax += 1; |
+ |
+ if (cont) { |
+ order |= UCOL_CONTINUATION_MARKER; |
+ } |
+ |
+ ceb[offset].order = order; |
+ ceb[offset].lowOffset = low; |
+ ceb[offset].highOffset = high; |
+ |
+ return &ceb[offset]; |
+} |
+ |
+int32_t Target::stringLength() |
+{ |
+ if (targetString != NULL) { |
+ return targetLength; |
+ } |
+ |
+ return 0; |
+} |
+ |
+UChar Target::charAt(int32_t offset) |
+{ |
+ if (targetString != NULL) { |
+ return targetBuffer[offset]; |
+ } |
+ |
+ return 0x0000; |
+} |
+ |
+void Target::setOffset(int32_t offset) |
+{ |
+ UErrorCode status = U_ZERO_ERROR; |
+ |
+ bufferMin = 0; |
+ bufferMax = 0; |
+ |
+ ucol_setOffset(elements, offset, &status); |
+} |
+ |
+void Target::setLast(int32_t last) |
+{ |
+ UErrorCode status = U_ZERO_ERROR; |
+ |
+ bufferMin = 0; |
+ bufferMax = 1; |
+ |
+ ceb[0].order = UCOL_NULLORDER; |
+ ceb[0].lowOffset = last; |
+ ceb[0].highOffset = last; |
+ |
+ ucol_setOffset(elements, last, &status); |
+} |
+ |
+int32_t Target::getOffset() |
+{ |
+ return ucol_getOffset(elements); |
+} |
+ |
+UBool Target::isBreakBoundary(int32_t offset) |
+{ |
+ return ubrk_isBoundary(charBreakIterator, offset); |
+} |
+ |
+int32_t Target::nextBreakBoundary(int32_t offset) |
+{ |
+ return ubrk_following(charBreakIterator, offset); |
+} |
+ |
+int32_t Target::nextSafeBoundary(int32_t offset) |
+{ |
+ while (offset < targetLength) { |
+ //UChar ch = charAt(offset); |
+ UChar ch = targetBuffer[offset]; |
+ |
+ if (U_IS_LEAD(ch) || ! ucol_unsafeCP(ch, coll)) { |
+ return offset; |
+ } |
+ |
+ offset += 1; |
+ } |
+ |
+ return targetLength; |
+} |
+ |
+UBool Target::isIdentical(UnicodeString &pattern, int32_t start, int32_t end) |
+{ |
+ if (strength < UCOL_IDENTICAL) { |
+ return TRUE; |
+ } |
+ |
+ // Note: We could use Normalizer::compare() or similar, but for short strings |
+ // which may not be in FCD it might be faster to just NFD them. |
+ UErrorCode status = U_ZERO_ERROR; |
+ UnicodeString t2, p2; |
+ nfd.normalize(UnicodeString(FALSE, targetBuffer + start, end - start), t2, status); |
+ nfd.normalize(pattern, p2, status); |
+ // return FALSE if NFD failed |
+ return U_SUCCESS(status) && t2 == p2; |
+} |
+ |
+#define HASH_TABLE_SIZE 257 |
+ |
+class BadCharacterTable : public UMemory |
+{ |
+public: |
+ BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status); |
+ ~BadCharacterTable(); |
+ |
+ int32_t operator[](uint32_t ce) const; |
+ int32_t getMaxSkip() const; |
+ int32_t minLengthInChars(int32_t index); |
+ |
+private: |
+ static int32_t hash(uint32_t ce); |
+ |
+ int32_t maxSkip; |
+ int32_t badCharacterTable[HASH_TABLE_SIZE]; |
+ |
+ int32_t *minLengthCache; |
+}; |
+ |
+BadCharacterTable::BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status) |
+ : minLengthCache(NULL) |
+{ |
+ int32_t plen = patternCEs.size(); |
+ |
+ // **** need a better way to deal with this **** |
+ if (U_FAILURE(status) || plen == 0) { |
+ return; |
+ } |
+ |
+ int32_t *history = NEW_ARRAY(int32_t, plen); |
+ |
+ if (history == NULL) { |
+ status = U_MEMORY_ALLOCATION_ERROR; |
+ return; |
+ } |
+ |
+ for (int32_t i = 0; i < plen; i += 1) { |
+ history[i] = -1; |
+ } |
+ |
+ minLengthCache = NEW_ARRAY(int32_t, plen + 1); |
+ |
+ if (minLengthCache == NULL) { |
+ DELETE_ARRAY(history); |
+ status = U_MEMORY_ALLOCATION_ERROR; |
+ return; |
+ } |
+ |
+ maxSkip = minLengthCache[0] = data->minLengthInChars(&patternCEs, 0, history); |
+ |
+ for(int32_t j = 0; j < HASH_TABLE_SIZE; j += 1) { |
+ badCharacterTable[j] = maxSkip; |
+ } |
+ |
+ for(int32_t p = 1; p < plen; p += 1) { |
+ minLengthCache[p] = data->minLengthInChars(&patternCEs, p, history); |
+ |
+ // Make sure this entry is not bigger than the previous one. |
+ // Otherwise, we might skip too far in some cases. |
+ if (minLengthCache[p] < 0 || minLengthCache[p] > minLengthCache[p - 1]) { |
+ minLengthCache[p] = minLengthCache[p - 1]; |
+ } |
+ } |
+ |
+ minLengthCache[plen] = 0; |
+ |
+ for(int32_t p = 0; p < plen - 1; p += 1) { |
+ badCharacterTable[hash(patternCEs[p])] = minLengthCache[p + 1]; |
+ } |
+ |
+ DELETE_ARRAY(history); |
+} |
+ |
+BadCharacterTable::~BadCharacterTable() |
+{ |
+ DELETE_ARRAY(minLengthCache); |
+} |
+ |
+int32_t BadCharacterTable::operator[](uint32_t ce) const |
+{ |
+ return badCharacterTable[hash(ce)]; |
+} |
+ |
+int32_t BadCharacterTable::getMaxSkip() const |
+{ |
+ return maxSkip; |
+} |
+ |
+int32_t BadCharacterTable::minLengthInChars(int32_t index) |
+{ |
+ return minLengthCache[index]; |
+} |
+ |
+int32_t BadCharacterTable::hash(uint32_t ce) |
+{ |
+ return UCOL_PRIMARYORDER(ce) % HASH_TABLE_SIZE; |
+} |
+ |
+class GoodSuffixTable : public UMemory |
+{ |
+public: |
+ GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status); |
+ ~GoodSuffixTable(); |
+ |
+ int32_t operator[](int32_t offset) const; |
+ |
+private: |
+ int32_t *goodSuffixTable; |
+}; |
+ |
+GoodSuffixTable::GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status) |
+ : goodSuffixTable(NULL) |
+{ |
+ int32_t patlen = patternCEs.size(); |
+ |
+ // **** need a better way to deal with this **** |
+ if (U_FAILURE(status) || patlen <= 0) { |
+ return; |
+ } |
+ |
+ int32_t *suff = NEW_ARRAY(int32_t, patlen); |
+ int32_t start = patlen - 1, end = - 1; |
+ int32_t maxSkip = badCharacterTable.getMaxSkip(); |
+ |
+ if (suff == NULL) { |
+ status = U_MEMORY_ALLOCATION_ERROR; |
+ return; |
+ } |
+ |
+ // initialze suff |
+ suff[patlen - 1] = patlen; |
+ |
+ for (int32_t i = patlen - 2; i >= 0; i -= 1) { |
+ // (i > start) means we're inside the last suffix match we found |
+ // ((patlen - 1) - end) is how far the end of that match is from end of pattern |
+ // (i - start) is how far we are from start of that match |
+ // (i + (patlen - 1) - end) is index of same character at end of pattern |
+ // so if any suffix match at that character doesn't extend beyond the last match, |
+ // it's the suffix for this character as well |
+ if (i > start && suff[i + patlen - 1 - end] < i - start) { |
+ suff[i] = suff[i + patlen - 1 - end]; |
+ } else { |
+ start = end = i; |
+ |
+ int32_t s = patlen; |
+ |
+ while (start >= 0 && patternCEs[start] == patternCEs[--s]) { |
+ start -= 1; |
+ } |
+ |
+ suff[i] = end - start; |
+ } |
+ } |
+ |
+ // now build goodSuffixTable |
+ goodSuffixTable = NEW_ARRAY(int32_t, patlen); |
+ |
+ if (goodSuffixTable == NULL) { |
+ DELETE_ARRAY(suff); |
+ status = U_MEMORY_ALLOCATION_ERROR; |
+ return; |
+ } |
+ |
+ |
+ // initialize entries to minLengthInChars of the pattern |
+ for (int32_t i = 0; i < patlen; i += 1) { |
+ goodSuffixTable[i] = maxSkip; |
+ } |
+ |
+ int32_t prefix = 0; |
+ |
+ for (int32_t i = patlen - /*1*/ 2; i >= 0; i -= 1) { |
+ if (suff[i] == i + 1) { |
+ // this matching suffix is a prefix of the pattern |
+ int32_t prefixSkip = badCharacterTable.minLengthInChars(i + 1); |
+ |
+ // for any mis-match before this suffix, we should skip |
+ // so that the front of the pattern (i.e. the prefix) |
+ // lines up with the front of the suffix. |
+ // (patlen - 1 - i) is the start of the suffix |
+ while (prefix < patlen - 1 - i) { |
+ // value of maxSkip means never set... |
+ if (goodSuffixTable[prefix] == maxSkip) { |
+ goodSuffixTable[prefix] = prefixSkip; |
+ } |
+ |
+ prefix += 1; |
+ } |
+ } |
+ } |
+ |
+ for (int32_t i = 0; i < patlen - 1; i += 1) { |
+ goodSuffixTable[patlen - 1 - suff[i]] = badCharacterTable.minLengthInChars(i + 1); |
+ } |
+ |
+ DELETE_ARRAY(suff); |
+} |
+ |
+GoodSuffixTable::~GoodSuffixTable() |
+{ |
+ DELETE_ARRAY(goodSuffixTable); |
+} |
+ |
+int32_t GoodSuffixTable::operator[](int32_t offset) const |
+{ |
+ return goodSuffixTable[offset]; |
+} |
+ |
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BoyerMooreSearch) |
+ |
+ |
+UBool BoyerMooreSearch::empty() |
+{ |
+ return patCEs->size() <= 0; |
+} |
+ |
+CollData *BoyerMooreSearch::getData() |
+{ |
+ return data; |
+} |
+ |
+CEList *BoyerMooreSearch::getPatternCEs() |
+{ |
+ return patCEs; |
+} |
+ |
+BadCharacterTable *BoyerMooreSearch::getBadCharacterTable() |
+{ |
+ return badCharacterTable; |
+} |
+ |
+GoodSuffixTable *BoyerMooreSearch::getGoodSuffixTable() |
+{ |
+ return goodSuffixTable; |
+} |
+ |
+BoyerMooreSearch::BoyerMooreSearch(CollData *theData, const UnicodeString &patternString, const UnicodeString *targetString, |
+ UErrorCode &status) |
+ : data(theData), patCEs(NULL), badCharacterTable(NULL), goodSuffixTable(NULL), pattern(patternString), target(NULL) |
+{ |
+ |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ UCollator *collator = data->getCollator(); |
+ |
+ patCEs = new CEList(collator, patternString, status); |
+ |
+ if (patCEs == NULL || U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ badCharacterTable = new BadCharacterTable(*patCEs, data, status); |
+ |
+ if (badCharacterTable == NULL || U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ goodSuffixTable = new GoodSuffixTable(*patCEs, *badCharacterTable, status); |
+ |
+ if (targetString != NULL) { |
+ target = new Target(collator, targetString, patCEs->size(), status); |
+ } |
+} |
+ |
+BoyerMooreSearch::~BoyerMooreSearch() |
+{ |
+ delete target; |
+ delete goodSuffixTable; |
+ delete badCharacterTable; |
+ delete patCEs; |
+} |
+ |
+void BoyerMooreSearch::setTargetString(const UnicodeString *targetString, UErrorCode &status) |
+{ |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ if (target == NULL) { |
+ target = new Target(data->getCollator(), targetString, patCEs->size(), status); |
+ } else { |
+ target->setTargetString(targetString); |
+ } |
+} |
+ |
+// **** main flow of this code from Laura Werner's "Unicode Text Searching in Java" paper. **** |
+/* |
+ * TODO: |
+ * * deal with trailing (and leading?) ignorables. |
+ * * Adding BoyerMooreSearch object slowed it down. How can we speed it up? |
+ */ |
+UBool BoyerMooreSearch::search(int32_t offset, int32_t &start, int32_t &end) |
+{ |
+ /*UCollator *coll =*/ data->getCollator(); |
+ int32_t plen = patCEs->size(); |
+ int32_t tlen = target->stringLength(); |
+ int32_t maxSkip = badCharacterTable->getMaxSkip(); |
+ int32_t tOffset = offset + maxSkip; |
+ |
+ if (plen <= 0) { |
+ // Searching for a zero length pattern always fails. |
+ start = end = -1; |
+ return FALSE; |
+ } |
+ |
+ while (tOffset <= tlen) { |
+ int32_t pIndex = plen - 1; |
+ int32_t tIndex = 0; |
+ int32_t lIndex = 0; |
+ |
+ if (tOffset < tlen) { |
+ // **** we really want to skip ahead enough to **** |
+ // **** be sure we get at least 1 non-ignorable **** |
+ // **** CE after the end of the pattern. **** |
+ int32_t next = target->nextSafeBoundary(tOffset + 1); |
+ |
+ target->setOffset(next); |
+ |
+ for (lIndex = 0; ; lIndex += 1) { |
+ const CEI *cei = target->prevCE(lIndex); |
+ int32_t low = cei->lowOffset; |
+ int32_t high = cei->highOffset; |
+ |
+ if (high == 0 || (low < high && low <= tOffset)) { |
+ if (low < tOffset) { |
+ while (lIndex >= 0 && target->prevCE(lIndex)->highOffset == high) { |
+ lIndex -= 1; |
+ } |
+ |
+ if (high > tOffset) { |
+ tOffset = high; |
+ } |
+ } |
+ |
+ break; |
+ } |
+ } |
+ } else { |
+ target->setLast(tOffset); |
+ lIndex = 0; |
+ } |
+ |
+ tIndex = ++lIndex; |
+ |
+ // Iterate backward until we hit the beginning of the pattern |
+ while (pIndex >= 0) { |
+ uint32_t pce = (*patCEs)[pIndex]; |
+ const CEI *tcei = target->prevCE(tIndex++); |
+ |
+ |
+ if (tcei->order != pce) { |
+ // There is a mismatch at this position. Decide how far |
+ // over to shift the pattern, then try again. |
+ |
+ int32_t gsOffset = tOffset + (*goodSuffixTable)[pIndex]; |
+#ifdef EXTRA_CAUTIOUS |
+ int32_t old = tOffset; |
+#endif |
+ |
+ tOffset += (*badCharacterTable)[tcei->order] - badCharacterTable->minLengthInChars(pIndex + 1); |
+ |
+ if (gsOffset > tOffset) { |
+ tOffset = gsOffset; |
+ } |
+ |
+#ifdef EXTRA_CAUTIOUS |
+ // Make sure we don't skip backwards... |
+ if (tOffset <= old) { |
+ tOffset = old + 1; |
+ } |
+#endif |
+ |
+ break; |
+ } |
+ |
+ pIndex -= 1; |
+ } |
+ |
+ if (pIndex < 0) { |
+ // We made it back to the beginning of the pattern, |
+ // which means we matched it all. Return the location. |
+ const CEI firstCEI = *target->prevCE(tIndex - 1); |
+ const CEI lastCEI = *target->prevCE(lIndex); |
+ int32_t mStart = firstCEI.lowOffset; |
+ int32_t minLimit = lastCEI.lowOffset; |
+ int32_t maxLimit = lastCEI.highOffset; |
+ int32_t mLimit; |
+ UBool found = TRUE; |
+ |
+ target->setOffset(/*tOffset*/maxLimit); |
+ |
+ const CEI nextCEI = *target->nextCE(0); |
+ |
+ if (nextCEI.lowOffset > maxLimit) { |
+ maxLimit = nextCEI.lowOffset; |
+ } |
+ |
+ if (nextCEI.lowOffset == nextCEI.highOffset && nextCEI.order != (uint32_t)UCOL_NULLORDER) { |
+ found = FALSE; |
+ } |
+ |
+ if (! target->isBreakBoundary(mStart)) { |
+ found = FALSE; |
+ } |
+ |
+ if (firstCEI.lowOffset == firstCEI.highOffset) { |
+ found = FALSE; |
+ } |
+ |
+ mLimit = maxLimit; |
+ if (minLimit < maxLimit) { |
+ int32_t nbb = target->nextBreakBoundary(minLimit); |
+ |
+ if (nbb >= lastCEI.highOffset) { |
+ mLimit = nbb; |
+ } |
+ } |
+ |
+ if (mLimit > maxLimit) { |
+ found = FALSE; |
+ } |
+ |
+ if (! target->isBreakBoundary(mLimit)) { |
+ found = FALSE; |
+ } |
+ |
+ if (! target->isIdentical(pattern, mStart, mLimit)) { |
+ found = FALSE; |
+ } |
+ |
+ if (found) { |
+ start = mStart; |
+ end = mLimit; |
+ |
+ return TRUE; |
+ } |
+ |
+ tOffset += (*goodSuffixTable)[0]; // really? Maybe += 1 or += maxSkip? |
+ } |
+ // Otherwise, we're here because of a mismatch, so keep going.... |
+ } |
+ |
+ // no match |
+ start = -1; |
+ end = -1; |
+ return FALSE; |
+} |
+ |
+U_NAMESPACE_END |
+ |
+#endif // #if !UCONFIG_NO_COLLATION |
Property changes on: icu46/source/i18n/bmsearch.cpp |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |