icu46/source/i18n/bmsearch.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/i18n/bmsearch.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 ******************************************************************************

	3 * Copyright (C) 1996-2010, International Business Machines *

	4 * Corporation and others. All Rights Reserved. *

	5 ******************************************************************************

	6 */

	7

	8 #include "unicode/utypes.h"

	9

	10 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION

	11

	12 #include "unicode/unistr.h"

	13 #include "unicode/putil.h"

	14 #include "unicode/usearch.h"

	15

	16 #include "cmemory.h"

	17 #include "unicode/coll.h"

	18 #include "unicode/tblcoll.h"

	19 #include "unicode/coleitr.h"

	20 #include "unicode/ucoleitr.h"

	21

	22 #include "unicode/regex.h" // TODO: make conditional on regexp being buil t.

	23

	24 #include "unicode/uniset.h"

	25 #include "unicode/uset.h"

	26 #include "unicode/ustring.h"

	27 #include "hash.h"

	28 #include "uhash.h"

	29 #include "ucol_imp.h"

	30 #include "normalizer2impl.h"

	31

	32 #include "unicode/colldata.h"

	33 #include "unicode/bmsearch.h"

	34

	35 U_NAMESPACE_BEGIN

	36

	37 #define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))

	38 #define NEW_ARRAY(type, count) (type ) uprv_malloc((count) sizeof(type))

	39 #define DELETE_ARRAY(array) uprv_free((void *) (array))

	40

	41

	42 struct CEI

	43 {

	44 uint32_t order;

	45 int32_t lowOffset;

	46 int32_t highOffset;

	47 };

	48

	49 class Target : public UMemory

	50 {

	51 public:

	52 Target(UCollator theCollator, const UnicodeString target, int32_t patternL ength, UErrorCode &status);

	53 ~Target();

	54

	55 void setTargetString(const UnicodeString *target);

	56

	57 const CEI *nextCE(int32_t offset);

	58 const CEI *prevCE(int32_t offset);

	59

	60 int32_t stringLength();

	61 UChar charAt(int32_t offset);

	62

	63 UBool isBreakBoundary(int32_t offset);

	64 int32_t nextBreakBoundary(int32_t offset);

	65 int32_t nextSafeBoundary(int32_t offset);

	66

	67 UBool isIdentical(UnicodeString &pattern, int32_t start, int32_t end);

	68

	69 void setOffset(int32_t offset);

	70 void setLast(int32_t last);

	71 int32_t getOffset();

	72

	73 private:

	74 CEI *ceb;

	75 int32_t bufferSize;

	76 int32_t bufferMin;

	77 int32_t bufferMax;

	78

	79 uint32_t strengthMask;

	80 UCollationStrength strength;

	81 uint32_t variableTop;

	82 UBool toShift;

	83 UCollator *coll;

	84 const Normalizer2 &nfd;

	85

	86 const UnicodeString *targetString;

	87 const UChar *targetBuffer;

	88 int32_t targetLength;

	89

	90 UCollationElements *elements;

	91 UBreakIterator *charBreakIterator;

	92 };

	93

	94 Target::Target(UCollator theCollator, const UnicodeString target, int32_t patt ernLength, UErrorCode &status)

	95 : bufferSize(0), bufferMin(0), bufferMax(0),

	96 strengthMask(0), strength(UCOL_PRIMARY), variableTop(0), toShift(FALSE), c oll(theCollator),

	97 nfd(*Normalizer2Factory::getNFDInstance(status)),

	98 targetString(NULL), targetBuffer(NULL), targetLength(0), elements(NULL), c harBreakIterator(NULL)

	99 {

	100 strength = ucol_getStrength(coll);

	101 toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL _SHIFTED;

	102 variableTop = ucol_getVariableTop(coll, &status);

	103

	104 // find the largest expansion

	105 uint8_t maxExpansion = 0;

	106 for (const uint8_t expansion = coll->expansionCESize; expansion != 0; expa nsion += 1) {

	107 if (*expansion > maxExpansion) {

	108 maxExpansion = *expansion;

	109 }

	110 }

	111

	112 // room for an extra character on each end, plus 4 for safety

	113 bufferSize = patternLength + (2 * maxExpansion) + 4;

	114

	115 ceb = NEW_ARRAY(CEI, bufferSize);

	116

	117 if (ceb == NULL) {

	118 status = U_MEMORY_ALLOCATION_ERROR;

	119 return;

	120 }

	121

	122 if (target != NULL) {

	123 setTargetString(target);

	124 }

	125

	126 switch (strength)

	127 {

	128 default:

	129 strengthMask \|= UCOL_TERTIARYORDERMASK;

	130 /* fall through */

	131

	132 case UCOL_SECONDARY:

	133 strengthMask \|= UCOL_SECONDARYORDERMASK;

	134 /* fall through */

	135

	136 case UCOL_PRIMARY:

	137 strengthMask \|= UCOL_PRIMARYORDERMASK;

	138 }

	139 }

	140

	141 Target::~Target()

	142 {

	143 ubrk_close(charBreakIterator);

	144 ucol_closeElements(elements);

	145

	146 DELETE_ARRAY(ceb);

	147 }

	148

	149 void Target::setTargetString(const UnicodeString *target)

	150 {

	151 if (charBreakIterator != NULL) {

	152 ubrk_close(charBreakIterator);

	153 ucol_closeElements(elements);

	154 }

	155

	156 targetString = target;

	157

	158 if (targetString != NULL) {

	159 UErrorCode status = U_ZERO_ERROR;

	160

	161 targetBuffer = targetString->getBuffer();

	162 targetLength = targetString->length();

	163

	164 elements = ucol_openElements(coll, target->getBuffer(), target->length() , &status);

	165 ucol_forceHanImplicit(elements, &status);

	166

	167 charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status),

	168 targetBuffer, targetLength, &status);

	169 } else {

	170 targetBuffer = NULL;

	171 targetLength = 0;

	172 }

	173 }

	174

	175 const CEI *Target::nextCE(int32_t offset)

	176 {

	177 UErrorCode status = U_ZERO_ERROR;

	178 int32_t low = -1, high = -1;

	179 uint32_t order;

	180 UBool cont = FALSE;

	181

	182 if (offset >= bufferMin && offset < bufferMax) {

	183 return &ceb[offset];

	184 }

	185

	186 if (bufferMax >= bufferSize \|\| offset != bufferMax) {

	187 return NULL;

	188 }

	189

	190 do {

	191 low = ucol_getOffset(elements);

	192 order = ucol_next(elements, &status);

	193 high = ucol_getOffset(elements);

	194

	195 if (order == (uint32_t)UCOL_NULLORDER) {

	196 //high = low = -1;

	197 break;

	198 }

	199

	200 cont = isContinuation(order);

	201 order &= strengthMask;

	202

	203 if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {

	204 if (strength >= UCOL_QUATERNARY) {

	205 order &= UCOL_PRIMARYORDERMASK;

	206 } else {

	207 order = UCOL_IGNORABLE;

	208 }

	209 }

	210 } while (order == UCOL_IGNORABLE);

	211

	212 if (cont) {

	213 order \|= UCOL_CONTINUATION_MARKER;

	214 }

	215

	216 ceb[offset].order = order;

	217 ceb[offset].lowOffset = low;

	218 ceb[offset].highOffset = high;

	219

	220 bufferMax += 1;

	221

	222 return &ceb[offset];

	223 }

	224

	225 const CEI *Target::prevCE(int32_t offset)

	226 {

	227 UErrorCode status = U_ZERO_ERROR;

	228 int32_t low = -1, high = -1;

	229 uint32_t order;

	230 UBool cont = FALSE;

	231

	232 if (offset >= bufferMin && offset < bufferMax) {

	233 return &ceb[offset];

	234 }

	235

	236 if (bufferMax >= bufferSize \|\| offset != bufferMax) {

	237 return NULL;

	238 }

	239

	240 do {

	241 high = ucol_getOffset(elements);

	242 order = ucol_previous(elements, &status);

	243 low = ucol_getOffset(elements);

	244

	245 if (order == (uint32_t)UCOL_NULLORDER) {

	246 break;

	247 }

	248

	249 cont = isContinuation(order);

	250 order &= strengthMask;

	251

	252 if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {

	253 if (strength >= UCOL_QUATERNARY) {

	254 order &= UCOL_PRIMARYORDERMASK;

	255 } else {

	256 order = UCOL_IGNORABLE;

	257 }

	258 }

	259 } while (order == UCOL_IGNORABLE);

	260

	261 bufferMax += 1;

	262

	263 if (cont) {

	264 order \|= UCOL_CONTINUATION_MARKER;

	265 }

	266

	267 ceb[offset].order = order;

	268 ceb[offset].lowOffset = low;

	269 ceb[offset].highOffset = high;

	270

	271 return &ceb[offset];

	272 }

	273

	274 int32_t Target::stringLength()

	275 {

	276 if (targetString != NULL) {

	277 return targetLength;

	278 }

	279

	280 return 0;

	281 }

	282

	283 UChar Target::charAt(int32_t offset)

	284 {

	285 if (targetString != NULL) {

	286 return targetBuffer[offset];

	287 }

	288

	289 return 0x0000;

	290 }

	291

	292 void Target::setOffset(int32_t offset)

	293 {

	294 UErrorCode status = U_ZERO_ERROR;

	295

	296 bufferMin = 0;

	297 bufferMax = 0;

	298

	299 ucol_setOffset(elements, offset, &status);

	300 }

	301

	302 void Target::setLast(int32_t last)

	303 {

	304 UErrorCode status = U_ZERO_ERROR;

	305

	306 bufferMin = 0;

	307 bufferMax = 1;

	308

	309 ceb[0].order = UCOL_NULLORDER;

	310 ceb[0].lowOffset = last;

	311 ceb[0].highOffset = last;

	312

	313 ucol_setOffset(elements, last, &status);

	314 }

	315

	316 int32_t Target::getOffset()

	317 {

	318 return ucol_getOffset(elements);

	319 }

	320

	321 UBool Target::isBreakBoundary(int32_t offset)

	322 {

	323 return ubrk_isBoundary(charBreakIterator, offset);

	324 }

	325

	326 int32_t Target::nextBreakBoundary(int32_t offset)

	327 {

	328 return ubrk_following(charBreakIterator, offset);

	329 }

	330

	331 int32_t Target::nextSafeBoundary(int32_t offset)

	332 {

	333 while (offset < targetLength) {

	334 //UChar ch = charAt(offset);

	335 UChar ch = targetBuffer[offset];

	336

	337 if (U_IS_LEAD(ch) \|\| ! ucol_unsafeCP(ch, coll)) {

	338 return offset;

	339 }

	340

	341 offset += 1;

	342 }

	343

	344 return targetLength;

	345 }

	346

	347 UBool Target::isIdentical(UnicodeString &pattern, int32_t start, int32_t end)

	348 {

	349 if (strength < UCOL_IDENTICAL) {

	350 return TRUE;

	351 }

	352

	353 // Note: We could use Normalizer::compare() or similar, but for short string s

	354 // which may not be in FCD it might be faster to just NFD them.

	355 UErrorCode status = U_ZERO_ERROR;

	356 UnicodeString t2, p2;

	357 nfd.normalize(UnicodeString(FALSE, targetBuffer + start, end - start), t2, s tatus);

	358 nfd.normalize(pattern, p2, status);

	359 // return FALSE if NFD failed

	360 return U_SUCCESS(status) && t2 == p2;

	361 }

	362

	363 #define HASH_TABLE_SIZE 257

	364

	365 class BadCharacterTable : public UMemory

	366 {

	367 public:

	368 BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status);

	369 ~BadCharacterTable();

	370

	371 int32_t operator[](uint32_t ce) const;

	372 int32_t getMaxSkip() const;

	373 int32_t minLengthInChars(int32_t index);

	374

	375 private:

	376 static int32_t hash(uint32_t ce);

	377

	378 int32_t maxSkip;

	379 int32_t badCharacterTable[HASH_TABLE_SIZE];

	380

	381 int32_t *minLengthCache;

	382 };

	383

	384 BadCharacterTable::BadCharacterTable(CEList &patternCEs, CollData *data, UErrorC ode &status)

	385 : minLengthCache(NULL)

	386 {

	387 int32_t plen = patternCEs.size();

	388

	389 // ** need a better way to deal with this **

	390 if (U_FAILURE(status) \|\| plen == 0) {

	391 return;

	392 }

	393

	394 int32_t *history = NEW_ARRAY(int32_t, plen);

	395

	396 if (history == NULL) {

	397 status = U_MEMORY_ALLOCATION_ERROR;

	398 return;

	399 }

	400

	401 for (int32_t i = 0; i < plen; i += 1) {

	402 history[i] = -1;

	403 }

	404

	405 minLengthCache = NEW_ARRAY(int32_t, plen + 1);

	406

	407 if (minLengthCache == NULL) {

	408 DELETE_ARRAY(history);

	409 status = U_MEMORY_ALLOCATION_ERROR;

	410 return;

	411 }

	412

	413 maxSkip = minLengthCache[0] = data->minLengthInChars(&patternCEs, 0, history );

	414

	415 for(int32_t j = 0; j < HASH_TABLE_SIZE; j += 1) {

	416 badCharacterTable[j] = maxSkip;

	417 }

	418

	419 for(int32_t p = 1; p < plen; p += 1) {

	420 minLengthCache[p] = data->minLengthInChars(&patternCEs, p, history);

	421

	422 // Make sure this entry is not bigger than the previous one.

	423 // Otherwise, we might skip too far in some cases.

	424 if (minLengthCache[p] < 0 \|\| minLengthCache[p] > minLengthCache[p - 1]) {

	425 minLengthCache[p] = minLengthCache[p - 1];

	426 }

	427 }

	428

	429 minLengthCache[plen] = 0;

	430

	431 for(int32_t p = 0; p < plen - 1; p += 1) {

	432 badCharacterTable[hash(patternCEs[p])] = minLengthCache[p + 1];

	433 }

	434

	435 DELETE_ARRAY(history);

	436 }

	437

	438 BadCharacterTable::~BadCharacterTable()

	439 {

	440 DELETE_ARRAY(minLengthCache);

	441 }

	442

	443 int32_t BadCharacterTable::operator[](uint32_t ce) const

	444 {

	445 return badCharacterTable[hash(ce)];

	446 }

	447

	448 int32_t BadCharacterTable::getMaxSkip() const

	449 {

	450 return maxSkip;

	451 }

	452

	453 int32_t BadCharacterTable::minLengthInChars(int32_t index)

	454 {

	455 return minLengthCache[index];

	456 }

	457

	458 int32_t BadCharacterTable::hash(uint32_t ce)

	459 {

	460 return UCOL_PRIMARYORDER(ce) % HASH_TABLE_SIZE;

	461 }

	462

	463 class GoodSuffixTable : public UMemory

	464 {

	465 public:

	466 GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UE rrorCode &status);

	467 ~GoodSuffixTable();

	468

	469 int32_t operator[](int32_t offset) const;

	470

	471 private:

	472 int32_t *goodSuffixTable;

	473 };

	474

	475 GoodSuffixTable::GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badChara cterTable, UErrorCode &status)

	476 : goodSuffixTable(NULL)

	477 {

	478 int32_t patlen = patternCEs.size();

	479

	480 // ** need a better way to deal with this **

	481 if (U_FAILURE(status) \|\| patlen <= 0) {

	482 return;

	483 }

	484

	485 int32_t *suff = NEW_ARRAY(int32_t, patlen);

	486 int32_t start = patlen - 1, end = - 1;

	487 int32_t maxSkip = badCharacterTable.getMaxSkip();

	488

	489 if (suff == NULL) {

	490 status = U_MEMORY_ALLOCATION_ERROR;

	491 return;

	492 }

	493

	494 // initialze suff

	495 suff[patlen - 1] = patlen;

	496

	497 for (int32_t i = patlen - 2; i >= 0; i -= 1) {

	498 // (i > start) means we're inside the last suffix match we found

	499 // ((patlen - 1) - end) is how far the end of that match is from end of pattern

	500 // (i - start) is how far we are from start of that match

	501 // (i + (patlen - 1) - end) is index of same character at end of pattern

	502 // so if any suffix match at that character doesn't extend beyond the la st match,

	503 // it's the suffix for this character as well

	504 if (i > start && suff[i + patlen - 1 - end] < i - start) {

	505 suff[i] = suff[i + patlen - 1 - end];

	506 } else {

	507 start = end = i;

	508

	509 int32_t s = patlen;

	510

	511 while (start >= 0 && patternCEs[start] == patternCEs[--s]) {

	512 start -= 1;

	513 }

	514

	515 suff[i] = end - start;

	516 }

	517 }

	518

	519 // now build goodSuffixTable

	520 goodSuffixTable = NEW_ARRAY(int32_t, patlen);

	521

	522 if (goodSuffixTable == NULL) {

	523 DELETE_ARRAY(suff);

	524 status = U_MEMORY_ALLOCATION_ERROR;

	525 return;

	526 }

	527

	528

	529 // initialize entries to minLengthInChars of the pattern

	530 for (int32_t i = 0; i < patlen; i += 1) {

	531 goodSuffixTable[i] = maxSkip;

	532 }

	533

	534 int32_t prefix = 0;

	535

	536 for (int32_t i = patlen - /1/ 2; i >= 0; i -= 1) {

	537 if (suff[i] == i + 1) {

	538 // this matching suffix is a prefix of the pattern

	539 int32_t prefixSkip = badCharacterTable.minLengthInChars(i + 1);

	540

	541 // for any mis-match before this suffix, we should skip

	542 // so that the front of the pattern (i.e. the prefix)

	543 // lines up with the front of the suffix.

	544 // (patlen - 1 - i) is the start of the suffix

	545 while (prefix < patlen - 1 - i) {

	546 // value of maxSkip means never set...

	547 if (goodSuffixTable[prefix] == maxSkip) {

	548 goodSuffixTable[prefix] = prefixSkip;

	549 }

	550

	551 prefix += 1;

	552 }

	553 }

	554 }

	555

	556 for (int32_t i = 0; i < patlen - 1; i += 1) {

	557 goodSuffixTable[patlen - 1 - suff[i]] = badCharacterTable.minLengthInCha rs(i + 1);

	558 }

	559

	560 DELETE_ARRAY(suff);

	561 }

	562

	563 GoodSuffixTable::~GoodSuffixTable()

	564 {

	565 DELETE_ARRAY(goodSuffixTable);

	566 }

	567

	568 int32_t GoodSuffixTable::operator[](int32_t offset) const

	569 {

	570 return goodSuffixTable[offset];

	571 }

	572

	573 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BoyerMooreSearch)

	574

	575

	576 UBool BoyerMooreSearch::empty()

	577 {

	578 return patCEs->size() <= 0;

	579 }

	580

	581 CollData *BoyerMooreSearch::getData()

	582 {

	583 return data;

	584 }

	585

	586 CEList *BoyerMooreSearch::getPatternCEs()

	587 {

	588 return patCEs;

	589 }

	590

	591 BadCharacterTable *BoyerMooreSearch::getBadCharacterTable()

	592 {

	593 return badCharacterTable;

	594 }

	595

	596 GoodSuffixTable *BoyerMooreSearch::getGoodSuffixTable()

	597 {

	598 return goodSuffixTable;

	599 }

	600

	601 BoyerMooreSearch::BoyerMooreSearch(CollData theData, const UnicodeString &patte rnString, const UnicodeString targetString,

	602 UErrorCode &status)

	603 : data(theData), patCEs(NULL), badCharacterTable(NULL), goodSuffixTable(NULL ), pattern(patternString), target(NULL)

	604 {

	605

	606 if (U_FAILURE(status)) {

	607 return;

	608 }

	609

	610 UCollator *collator = data->getCollator();

	611

	612 patCEs = new CEList(collator, patternString, status);

	613

	614 if (patCEs == NULL \|\| U_FAILURE(status)) {

	615 return;

	616 }

	617

	618 badCharacterTable = new BadCharacterTable(*patCEs, data, status);

	619

	620 if (badCharacterTable == NULL \|\| U_FAILURE(status)) {

	621 return;

	622 }

	623

	624 goodSuffixTable = new GoodSuffixTable(patCEs, badCharacterTable, status);

	625

	626 if (targetString != NULL) {

	627 target = new Target(collator, targetString, patCEs->size(), status);

	628 }

	629 }

	630

	631 BoyerMooreSearch::~BoyerMooreSearch()

	632 {

	633 delete target;

	634 delete goodSuffixTable;

	635 delete badCharacterTable;

	636 delete patCEs;

	637 }

	638

	639 void BoyerMooreSearch::setTargetString(const UnicodeString *targetString, UError Code &status)

	640 {

	641 if (U_FAILURE(status)) {

	642 return;

	643 }

	644

	645 if (target == NULL) {

	646 target = new Target(data->getCollator(), targetString, patCEs->size(), s tatus);

	647 } else {

	648 target->setTargetString(targetString);

	649 }

	650 }

	651

	652 // ** main flow of this code from Laura Werner's "Unicode Text Searching in Ja va" paper. **

	653 /*

	654 * TODO:

	655 * * deal with trailing (and leading?) ignorables.

	656 * * Adding BoyerMooreSearch object slowed it down. How can we speed it up?

	657 */

	658 UBool BoyerMooreSearch::search(int32_t offset, int32_t &start, int32_t &end)

	659 {

	660 /UCollator coll =*/ data->getCollator();

	661 int32_t plen = patCEs->size();

	662 int32_t tlen = target->stringLength();

	663 int32_t maxSkip = badCharacterTable->getMaxSkip();

	664 int32_t tOffset = offset + maxSkip;

	665

	666 if (plen <= 0) {

	667 // Searching for a zero length pattern always fails.

	668 start = end = -1;

	669 return FALSE;

	670 }

	671

	672 while (tOffset <= tlen) {

	673 int32_t pIndex = plen - 1;

	674 int32_t tIndex = 0;

	675 int32_t lIndex = 0;

	676

	677 if (tOffset < tlen) {

	678 // ** we really want to skip ahead enough to **

	679 // ** be sure we get at least 1 non-ignorable **

	680 // ** CE after the end of the pattern. **

	681 int32_t next = target->nextSafeBoundary(tOffset + 1);

	682

	683 target->setOffset(next);

	684

	685 for (lIndex = 0; ; lIndex += 1) {

	686 const CEI *cei = target->prevCE(lIndex);

	687 int32_t low = cei->lowOffset;

	688 int32_t high = cei->highOffset;

	689

	690 if (high == 0 \|\| (low < high && low <= tOffset)) {

	691 if (low < tOffset) {

	692 while (lIndex >= 0 && target->prevCE(lIndex)->highOffset == high) {

	693 lIndex -= 1;

	694 }

	695

	696 if (high > tOffset) {

	697 tOffset = high;

	698 }

	699 }

	700

	701 break;

	702 }

	703 }

	704 } else {

	705 target->setLast(tOffset);

	706 lIndex = 0;

	707 }

	708

	709 tIndex = ++lIndex;

	710

	711 // Iterate backward until we hit the beginning of the pattern

	712 while (pIndex >= 0) {

	713 uint32_t pce = (*patCEs)[pIndex];

	714 const CEI *tcei = target->prevCE(tIndex++);

	715

	716

	717 if (tcei->order != pce) {

	718 // There is a mismatch at this position. Decide how far

	719 // over to shift the pattern, then try again.

	720

	721 int32_t gsOffset = tOffset + (*goodSuffixTable)[pIndex];

	722 #ifdef EXTRA_CAUTIOUS

	723 int32_t old = tOffset;

	724 #endif

	725

	726 tOffset += (*badCharacterTable)[tcei->order] - badCharacterTable ->minLengthInChars(pIndex + 1);

	727

	728 if (gsOffset > tOffset) {

	729 tOffset = gsOffset;

	730 }

	731

	732 #ifdef EXTRA_CAUTIOUS

	733 // Make sure we don't skip backwards...

	734 if (tOffset <= old) {

	735 tOffset = old + 1;

	736 }

	737 #endif

	738

	739 break;

	740 }

	741

	742 pIndex -= 1;

	743 }

	744

	745 if (pIndex < 0) {

	746 // We made it back to the beginning of the pattern,

	747 // which means we matched it all. Return the location.

	748 const CEI firstCEI = *target->prevCE(tIndex - 1);

	749 const CEI lastCEI = *target->prevCE(lIndex);

	750 int32_t mStart = firstCEI.lowOffset;

	751 int32_t minLimit = lastCEI.lowOffset;

	752 int32_t maxLimit = lastCEI.highOffset;

	753 int32_t mLimit;

	754 UBool found = TRUE;

	755

	756 target->setOffset(/tOffset/maxLimit);

	757

	758 const CEI nextCEI = *target->nextCE(0);

	759

	760 if (nextCEI.lowOffset > maxLimit) {

	761 maxLimit = nextCEI.lowOffset;

	762 }

	763

	764 if (nextCEI.lowOffset == nextCEI.highOffset && nextCEI.order != (uin t32_t)UCOL_NULLORDER) {

	765 found = FALSE;

	766 }

	767

	768 if (! target->isBreakBoundary(mStart)) {

	769 found = FALSE;

	770 }

	771

	772 if (firstCEI.lowOffset == firstCEI.highOffset) {

	773 found = FALSE;

	774 }

	775

	776 mLimit = maxLimit;

	777 if (minLimit < maxLimit) {

	778 int32_t nbb = target->nextBreakBoundary(minLimit);

	779

	780 if (nbb >= lastCEI.highOffset) {

	781 mLimit = nbb;

	782 }

	783 }

	784

	785 if (mLimit > maxLimit) {

	786 found = FALSE;

	787 }

	788

	789 if (! target->isBreakBoundary(mLimit)) {

	790 found = FALSE;

	791 }

	792

	793 if (! target->isIdentical(pattern, mStart, mLimit)) {

	794 found = FALSE;

	795 }

	796

	797 if (found) {

	798 start = mStart;

	799 end = mLimit;

	800

	801 return TRUE;

	802 }

	803

	804 tOffset += (*goodSuffixTable)[0]; // really? Maybe += 1 or += maxSki p?

	805 }

	806 // Otherwise, we're here because of a mismatch, so keep going....

	807 }

	808

	809 // no match

	810 start = -1;

	811 end = -1;

	812 return FALSE;

	813 }

	814

	815 U_NAMESPACE_END

	816

	817 #endif // #if !UCONFIG_NO_COLLATION

OLD	NEW

« no previous file with comments | « icu46/source/i18n/bms.cpp ('k') | icu46/source/i18n/bocsu.h » ('j') | no next file with comments »