icu46/source/test/intltest/ssearch.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/test/intltest/ssearch.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 **********************************************************************

	3 * Copyright (C) 2005-2010, International Business Machines

	4 * Corporation and others. All Rights Reserved.

	5 **********************************************************************

	6 */

	7

	8

	9 #include "unicode/utypes.h"

	10

	11 #if !UCONFIG_NO_COLLATION

	12

	13 #include "unicode/unistr.h"

	14 #include "unicode/putil.h"

	15 #include "unicode/usearch.h"

	16

	17 #include "cmemory.h"

	18 #include "unicode/coll.h"

	19 #include "unicode/tblcoll.h"

	20 #include "unicode/coleitr.h"

	21 #include "unicode/ucoleitr.h"

	22

	23 #include "unicode/regex.h" // TODO: make conditional on regexp being buil t.

	24

	25 #include "unicode/uniset.h"

	26 #include "unicode/uset.h"

	27 #include "unicode/ustring.h"

	28 #include "hash.h"

	29 #include "uhash.h"

	30 #include "ucol_imp.h"

	31

	32 #include "intltest.h"

	33 #include "ssearch.h"

	34

	35 #include "unicode/colldata.h"

	36 #include "unicode/bmsearch.h"

	37 #include "unicode/bms.h"

	38

	39 #include "xmlparser.h"

	40 #include "ucbuf.h"

	41

	42 #include <stdlib.h>

	43 #include <string.h>

	44 #include <stdio.h>

	45

	46 char testId[100];

	47

	48 #define TEST_ASSERT(x) {if (!(x)) { \

	49 errln("Failure in file %s, line %d, test ID = \"%s\"", __FILE__, __LINE__, t estId);}}

	50

	51 #define TEST_ASSERT_M(x, m) {if (!(x)) { \

	52 errln("Failure in file %s, line %d. \"%s\"", __FILE__, __LINE__, m);return ;}}

	53

	54 #define TEST_ASSERT_SUCCESS(errcode) {if (U_FAILURE(errcode)) { \

	55 dataerrln("Failure in file %s, line %d, test ID \"%s\", status = \"%s\"", \

	56 __FILE__, __LINE__, testId, u_errorName(errcode));}}

	57

	58 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])

	59 #define NEW_ARRAY(type, count) (type ) uprv_malloc((count) sizeof(type))

	60 #define DELETE_ARRAY(array) uprv_free((void *) (array))

	61

	62 //---------------------------------------------------------------------------

	63 //

	64 // Test class boilerplate

	65 //

	66 //---------------------------------------------------------------------------

	67 SSearchTest::SSearchTest()

	68 {

	69 }

	70

	71 SSearchTest::~SSearchTest()

	72 {

	73 }

	74

	75 void SSearchTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *params )

	76 {

	77 if (exec) logln("TestSuite SSearchTest: ");

	78 switch (index) {

	79 #if !UCONFIG_NO_BREAK_ITERATION

	80 case 0: name = "searchTest";

	81 if (exec) searchTest();

	82 break;

	83

	84 case 1: name = "offsetTest";

	85 if (exec) offsetTest();

	86 break;

	87

	88 case 2: name = "monkeyTest";

	89 if (exec) monkeyTest(params);

	90 break;

	91

	92 case 3: name = "bmMonkeyTest";

	93 if (exec) bmMonkeyTest(params);

	94 break;

	95

	96 case 4: name = "boyerMooreTest";

	97 if (exec) boyerMooreTest();

	98 break;

	99

	100 case 5: name = "goodSuffixTest";

	101 if (exec) goodSuffixTest();

	102 break;

	103

	104 case 6: name = "searchTime";

	105 if (exec) searchTime();

	106 break;

	107

	108 case 7: name = "bmsTest";

	109 if (exec) bmsTest();

	110 break;

	111

	112 case 8: name = "bmSearchTest";

	113 if (exec) bmSearchTest();

	114 break;

	115

	116 case 9: name = "udhrTest";

	117 if (exec) udhrTest();

	118 break;

	119 case 10: name = "stringListTest";

	120 if (exec) stringListTest();

	121 break;

	122 #endif

	123 default: name = "";

	124 break; //needed to end loop

	125 }

	126 }

	127

	128

	129 #if !UCONFIG_NO_BREAK_ITERATION

	130

	131 #define PATH_BUFFER_SIZE 2048

	132 const char SSearchTest::getPath(char buffer[2048], const char filename) {

	133 UErrorCode status = U_ZERO_ERROR;

	134 const char *testDataDirectory = IntlTest::getSourceTestData(status);

	135

	136 if (U_FAILURE(status) \|\| strlen(testDataDirectory) + strlen(filename) + 1 >= PATH_BUFFER_SIZE) {

	137 errln("ERROR: getPath() failed - %s", u_errorName(status));

	138 return NULL;

	139 }

	140

	141 strcpy(buffer, testDataDirectory);

	142 strcat(buffer, filename);

	143 return buffer;

	144 }

	145

	146

	147 void SSearchTest::searchTest()

	148 {

	149 #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FILE_IO

	150 UErrorCode status = U_ZERO_ERROR;

	151 char path[PATH_BUFFER_SIZE];

	152 const char *testFilePath = getPath(path, "ssearch.xml");

	153

	154 if (testFilePath == NULL) {

	155 return; /* Couldn't get path: error message already output. */

	156 }

	157

	158 LocalPointer<UXMLParser> parser(UXMLParser::createParser(status));

	159 TEST_ASSERT_SUCCESS(status);

	160 LocalPointer<UXMLElement> root(parser->parseFile(testFilePath, status));

	161 TEST_ASSERT_SUCCESS(status);

	162 if (U_FAILURE(status)) {

	163 return;

	164 }

	165

	166 const UnicodeString *debugTestCase = root->getAttribute("debug");

	167 if (debugTestCase != NULL) {

	168 // setenv("USEARCH_DEBUG", "1", 1);

	169 }

	170

	171

	172 const UXMLElement *testCase;

	173 int32_t tc = 0;

	174

	175 while((testCase = root->nextChildElement(tc)) != NULL) {

	176

	177 if (testCase->getTagName().compare("test-case") != 0) {

	178 errln("ssearch, unrecognized XML Element in test file");

	179 continue;

	180 }

	181 const UnicodeString *id = testCase->getAttribute("id");

	182 *testId = 0;

	183 if (id != NULL) {

	184 id->extract(0, id->length(), testId, sizeof(testId), US_INV);

	185 }

	186

	187 // If debugging test case has been specified and this is not it, skip to next.

	188 if (id!=NULL && debugTestCase!=NULL && id != debugTestCase) {

	189 continue;

	190 }

	191 //

	192 // Get the requested collation strength.

	193 // Default is tertiary if the XML attribute is missing from the test case.

	194 //

	195 const UnicodeString *strength = testCase->getAttribute("strength");

	196 UColAttributeValue collatorStrength = UCOL_PRIMARY;

	197 if (strength==NULL) { collatorStrength = UCOL_TERTIARY;}

	198 else if (*strength=="PRIMARY") { collatorStrength = UCOL_PRIMARY;}

	199 else if (*strength=="SECONDARY") { collatorStrength = UCOL_SECONDARY;}

	200 else if (*strength=="TERTIARY") { collatorStrength = UCOL_TERTIARY;}

	201 else if (*strength=="QUATERNARY") { collatorStrength = UCOL_QUATERNARY;}

	202 else if (*strength=="IDENTICAL") { collatorStrength = UCOL_IDENTICAL;}

	203 else {

	204 // Bogus value supplied for strength. Shouldn't happen, even from

	205 // typos, if the XML source has been validated.

	206 // This assert is a little deceiving in that strength can be

	207 // any of the allowed values, not just TERTIARY, but it will

	208 // do the job of getting the error output.

	209 TEST_ASSERT(*strength=="TERTIARY")

	210 }

	211

	212 //

	213 // Get the collator normalization flag. Default is UCOL_OFF.

	214 //

	215 UColAttributeValue normalize = UCOL_OFF;

	216 const UnicodeString *norm = testCase->getAttribute("norm");

	217 TEST_ASSERT (norm==NULL \|\| norm=="ON" \|\| norm=="OFF");

	218 if (norm!=NULL && *norm=="ON") {

	219 normalize = UCOL_ON;

	220 }

	221

	222 //

	223 // Get the alternate_handling flag. Default is UCOL_NON_IGNORABLE.

	224 //

	225 UColAttributeValue alternateHandling = UCOL_NON_IGNORABLE;

	226 const UnicodeString *alt = testCase->getAttribute("alternate_handling");

	227 TEST_ASSERT (alt == NULL \|\| alt == "SHIFTED" \|\| alt == "NON_IGNORABLE" );

	228 if (alt != NULL && *alt == "SHIFTED") {

	229 alternateHandling = UCOL_SHIFTED;

	230 }

	231

	232 const UnicodeString defLocale("en");

	233 char clocale[100];

	234 const UnicodeString *locale = testCase->getAttribute("locale");

	235 if (locale == NULL \|\| locale->length()==0) {

	236 locale = &defLocale;

	237 };

	238 locale->extract(0, locale->length(), clocale, sizeof(clocale), NULL);

	239

	240

	241 UnicodeString text;

	242 UnicodeString target;

	243 UnicodeString pattern;

	244 int32_t expectedMatchStart = -1;

	245 int32_t expectedMatchLimit = -1;

	246 const UXMLElement *n;

	247 int32_t nodeCount = 0;

	248

	249 n = testCase->getChildElement("pattern");

	250 TEST_ASSERT(n != NULL);

	251 if (n==NULL) {

	252 continue;

	253 }

	254 text = n->getText(FALSE);

	255 text = text.unescape();

	256 pattern.append(text);

	257 nodeCount++;

	258

	259 n = testCase->getChildElement("pre");

	260 if (n!=NULL) {

	261 text = n->getText(FALSE);

	262 text = text.unescape();

	263 target.append(text);

	264 nodeCount++;

	265 }

	266

	267 n = testCase->getChildElement("m");

	268 if (n!=NULL) {

	269 expectedMatchStart = target.length();

	270 text = n->getText(FALSE);

	271 text = text.unescape();

	272 target.append(text);

	273 expectedMatchLimit = target.length();

	274 nodeCount++;

	275 }

	276

	277 n = testCase->getChildElement("post");

	278 if (n!=NULL) {

	279 text = n->getText(FALSE);

	280 text = text.unescape();

	281 target.append(text);

	282 nodeCount++;

	283 }

	284

	285 // Check that there weren't extra things in the XML

	286 TEST_ASSERT(nodeCount == testCase->countChildren());

	287

	288 // Open a collator and StringSearch based on the parameters

	289 // obtained from the XML.

	290 //

	291 status = U_ZERO_ERROR;

	292 LocalUCollatorPointer collator(ucol_open(clocale, &status));

	293 ucol_setStrength(collator.getAlias(), collatorStrength);

	294 ucol_setAttribute(collator.getAlias(), UCOL_NORMALIZATION_MODE, normaliz e, &status);

	295 ucol_setAttribute(collator.getAlias(), UCOL_ALTERNATE_HANDLING, alternat eHandling, &status);

	296 LocalUStringSearchPointer uss(usearch_openFromCollator(pattern.getBuffer (), pattern.length(),

	297 target.getBuffer( ), target.length(),

	298 collator.getAlias (),

	299 NULL, // the break iterator

	300 &status));

	301

	302 TEST_ASSERT_SUCCESS(status);

	303 if (U_FAILURE(status)) {

	304 continue;

	305 }

	306

	307 int32_t foundStart = 0;

	308 int32_t foundLimit = 0;

	309 UBool foundMatch;

	310

	311 //

	312 // Do the search, check the match result against the expected results.

	313 //

	314 foundMatch= usearch_search(uss.getAlias(), 0, &foundStart, &foundLimit, &status);

	315 TEST_ASSERT_SUCCESS(status);

	316 if ((foundMatch && expectedMatchStart<0) \|\|

	317 (foundStart != expectedMatchStart) \|\|

	318 (foundLimit != expectedMatchLimit)) {

	319 TEST_ASSERT(FALSE); // ouput generic error position

	320 infoln("Found, expected match start = %d, %d \n"

	321 "Found, expected match limit = %d, %d",

	322 foundStart, expectedMatchStart, foundLimit, expectedMatchLimit);

	323 }

	324

	325 // In case there are other matches...

	326 // (should we only do this if the test case passed?)

	327 while (foundMatch) {

	328 expectedMatchStart = foundStart;

	329 expectedMatchLimit = foundLimit;

	330

	331 foundMatch = usearch_search(uss.getAlias(), foundLimit, &foundStart, &foundLimit, &status);

	332 }

	333

	334 uss.adoptInstead(usearch_openFromCollator(pattern.getBuffer(), pattern.l ength(),

	335 target.getBuffer(), target.length(),

	336 collator.getAlias(),

	337 NULL,

	338 &status));

	339

	340 //

	341 // Do the backwards search, check the match result against the expected results.

	342 //

	343 foundMatch= usearch_searchBackwards(uss.getAlias(), target.length(), &fo undStart, &foundLimit, &status);

	344 TEST_ASSERT_SUCCESS(status);

	345 if ((foundMatch && expectedMatchStart<0) \|\|

	346 (foundStart != expectedMatchStart) \|\|

	347 (foundLimit != expectedMatchLimit)) {

	348 TEST_ASSERT(FALSE); // ouput generic error position

	349 infoln("Found, expected backwards match start = %d, %d \n"

	350 "Found, expected backwards match limit = %d, %d",

	351 foundStart, expectedMatchStart, foundLimit, expectedMatchLimit);

	352 }

	353 }

	354 #endif

	355 }

	356

	357 struct UdhrTestCase

	358 {

	359 const char *locale;

	360 const char *file;

	361 };

	362

	363 void SSearchTest::udhrTest()

	364 {

	365 UErrorCode status = U_ZERO_ERROR;

	366 char path[PATH_BUFFER_SIZE];

	367 const char *udhrPath = getPath(path, "udhr");

	368

	369 if (udhrPath == NULL) {

	370 // couldn't get path: error message already output...

	371 return;

	372 }

	373

	374 UdhrTestCase testCases[] = {

	375 {"en", "udhr_eng.txt"},

	376 {"de", "udhr_deu_1996.txt"},

	377 {"fr", "udhr_fra.txt"},

	378 {"ru", "udhr_rus.txt"},

	379 {"th", "udhr_tha.txt"},

	380 {"ja", "udhr_jpn.txt"},

	381 {"ko", "udhr_kor.txt"},

	382 {"zh", "udhr_cmn_hans.txt"},

	383 {"zh_Hant", "udhr_cmn_hant.txt"}

	384 };

	385

	386 int32_t testCount = ARRAY_SIZE(testCases);

	387

	388 for (int32_t t = 0; t < testCount; t += 1) {

	389 int32_t len = 0;

	390 char *resolvedFileName = NULL;

	391 const char *encoding = NULL;

	392 UCHARBUF *ucharBuf = NULL;

	393

	394 ucbuf_resolveFileName(udhrPath, testCases[t].file, NULL, &len, &status);

	395 resolvedFileName = NEW_ARRAY(char, len);

	396

	397 if(resolvedFileName == NULL){

	398 continue;

	399 }

	400

	401 if(status == U_BUFFER_OVERFLOW_ERROR){

	402 status = U_ZERO_ERROR;

	403 }

	404

	405 ucbuf_resolveFileName(udhrPath, testCases[t].file, resolvedFileName, &le n, &status);

	406 ucharBuf = ucbuf_open(resolvedFileName, &encoding, TRUE, FALSE, &status) ;

	407

	408 DELETE_ARRAY(resolvedFileName);

	409

	410 if(U_FAILURE(status)){

	411 infoln("Could not open the input file %s. Test skipped\n", testCases [t].file);

	412 continue;

	413 }

	414

	415 int32_t targetLen = 0;

	416 const UChar *target = ucbuf_getBuffer(ucharBuf, &targetLen, &status);

	417

	418 /* The first line of the file contains the pattern */

	419 int32_t start = 0, end = 0, plen = 0;

	420

	421 for(end = start; ; end += 1) {

	422 UChar ch = target[end];

	423

	424 if (ch == 0x000A \|\| ch == 0x000D \|\| ch == 0x2028) {

	425 break;

	426 }

	427 }

	428

	429 plen = end - start;

	430

	431 UChar *pattern = NEW_ARRAY(UChar, plen);

	432 for (int32_t i = 0; i < plen; i += 1) {

	433 pattern[i] = target[start++];

	434 }

	435

	436 int32_t offset = 0;

	437 UCollator *coll = ucol_open(testCases[t].locale, &status);

	438 UCD *ucd = NULL;

	439 BMS *bms = NULL;

	440

	441 if (U_FAILURE(status)) {

	442 errln("Could not open collator for %s", testCases[t].locale);

	443 goto delete_collator;

	444 }

	445

	446 ucd = ucd_open(coll, &status);

	447

	448 if (U_FAILURE(status)) {

	449 errln("Could not open CollData object for %s", testCases[t].locale);

	450 goto delete_ucd;

	451 }

	452

	453 bms = bms_open(ucd, pattern, plen, target, targetLen, &status);

	454

	455 if (U_FAILURE(status)) {

	456 errln("Could not open search object for %s", testCases[t].locale);

	457 goto delete_bms;

	458 }

	459

	460 start = end = -1;

	461 while (bms_search(bms, offset, &start, &end)) {

	462 offset = end;

	463 }

	464

	465 if (offset == 0) {

	466 errln("Could not find pattern - locale: %s, file: %s ", testCases[t] .locale, testCases[t].file);

	467 }

	468

	469 delete_bms:

	470 bms_close(bms);

	471

	472 delete_ucd:

	473 ucd_close(ucd);

	474

	475 delete_collator:

	476 ucol_close(coll);

	477

	478 DELETE_ARRAY(pattern);

	479 ucbuf_close(ucharBuf);

	480 }

	481

	482 ucd_flushCache();

	483 }

	484

	485 void SSearchTest::bmSearchTest()

	486 {

	487 #if !UCONFIG_NO_REGULAR_EXPRESSIONS

	488 UErrorCode status = U_ZERO_ERROR;

	489 char path[PATH_BUFFER_SIZE];

	490 const char *testFilePath = getPath(path, "ssearch.xml");

	491

	492 if (testFilePath == NULL) {

	493 return; /* Couldn't get path: error message already output. */

	494 }

	495

	496 UXMLParser *parser = UXMLParser::createParser(status);

	497 TEST_ASSERT_SUCCESS(status);

	498 UXMLElement *root = parser->parseFile(testFilePath, status);

	499 TEST_ASSERT_SUCCESS(status);

	500 if (U_FAILURE(status)) {

	501 return;

	502 }

	503

	504 const UnicodeString *debugTestCase = root->getAttribute("debug");

	505 if (debugTestCase != NULL) {

	506 // setenv("USEARCH_DEBUG", "1", 1);

	507 }

	508

	509

	510 const UXMLElement *testCase;

	511 int32_t tc = 0;

	512

	513 while((testCase = root->nextChildElement(tc)) != NULL) {

	514

	515 if (testCase->getTagName().compare("test-case") != 0) {

	516 errln("ssearch, unrecognized XML Element in test file");

	517 continue;

	518 }

	519 const UnicodeString *id = testCase->getAttribute("id");

	520 *testId = 0;

	521 if (id != NULL) {

	522 id->extract(0, id->length(), testId, sizeof(testId), US_INV);

	523 }

	524

	525 // If debugging test case has been specified and this is not it, skip to next.

	526 if (id!=NULL && debugTestCase!=NULL && id != debugTestCase) {

	527 continue;

	528 }

	529 //

	530 // Get the requested collation strength.

	531 // Default is tertiary if the XML attribute is missing from the test case.

	532 //

	533 const UnicodeString *strength = testCase->getAttribute("strength");

	534 UColAttributeValue collatorStrength = UCOL_PRIMARY;

	535 if (strength==NULL) { collatorStrength = UCOL_TERTIARY;}

	536 else if (*strength=="PRIMARY") { collatorStrength = UCOL_PRIMARY;}

	537 else if (*strength=="SECONDARY") { collatorStrength = UCOL_SECONDARY;}

	538 else if (*strength=="TERTIARY") { collatorStrength = UCOL_TERTIARY;}

	539 else if (*strength=="QUATERNARY") { collatorStrength = UCOL_QUATERNARY;}

	540 else if (*strength=="IDENTICAL") { collatorStrength = UCOL_IDENTICAL;}

	541 else {

	542 // Bogus value supplied for strength. Shouldn't happen, even from

	543 // typos, if the XML source has been validated.

	544 // This assert is a little deceiving in that strength can be

	545 // any of the allowed values, not just TERTIARY, but it will

	546 // do the job of getting the error output.

	547 TEST_ASSERT(*strength=="TERTIARY")

	548 }

	549

	550 //

	551 // Get the collator normalization flag. Default is UCOL_OFF.

	552 //

	553 UColAttributeValue normalize = UCOL_OFF;

	554 const UnicodeString *norm = testCase->getAttribute("norm");

	555 TEST_ASSERT (norm==NULL \|\| norm=="ON" \|\| norm=="OFF");

	556 if (norm!=NULL && *norm=="ON") {

	557 normalize = UCOL_ON;

	558 }

	559

	560 //

	561 // Get the alternate_handling flag. Default is UCOL_NON_IGNORABLE.

	562 //

	563 UColAttributeValue alternateHandling = UCOL_NON_IGNORABLE;

	564 const UnicodeString *alt = testCase->getAttribute("alternate_handling");

	565 TEST_ASSERT (alt == NULL \|\| alt == "SHIFTED" \|\| alt == "NON_IGNORABLE" );

	566 if (alt != NULL && *alt == "SHIFTED") {

	567 alternateHandling = UCOL_SHIFTED;

	568 }

	569

	570 const UnicodeString defLocale("en");

	571 char clocale[100];

	572 const UnicodeString *locale = testCase->getAttribute("locale");

	573 if (locale == NULL \|\| locale->length()==0) {

	574 locale = &defLocale;

	575 };

	576 locale->extract(0, locale->length(), clocale, sizeof(clocale), NULL);

	577

	578

	579 UnicodeString text;

	580 UnicodeString target;

	581 UnicodeString pattern;

	582 int32_t expectedMatchStart = -1;

	583 int32_t expectedMatchLimit = -1;

	584 const UXMLElement *n;

	585 int32_t nodeCount = 0;

	586

	587 n = testCase->getChildElement("pattern");

	588 TEST_ASSERT(n != NULL);

	589 if (n==NULL) {

	590 continue;

	591 }

	592 text = n->getText(FALSE);

	593 text = text.unescape();

	594 pattern.append(text);

	595 nodeCount++;

	596

	597 n = testCase->getChildElement("pre");

	598 if (n!=NULL) {

	599 text = n->getText(FALSE);

	600 text = text.unescape();

	601 target.append(text);

	602 nodeCount++;

	603 }

	604

	605 n = testCase->getChildElement("m");

	606 if (n!=NULL) {

	607 expectedMatchStart = target.length();

	608 text = n->getText(FALSE);

	609 text = text.unescape();

	610 target.append(text);

	611 expectedMatchLimit = target.length();

	612 nodeCount++;

	613 }

	614

	615 n = testCase->getChildElement("post");

	616 if (n!=NULL) {

	617 text = n->getText(FALSE);

	618 text = text.unescape();

	619 target.append(text);

	620 nodeCount++;

	621 }

	622

	623 // Check that there weren't extra things in the XML

	624 TEST_ASSERT(nodeCount == testCase->countChildren());

	625

	626 // Open a collator and StringSearch based on the parameters

	627 // obtained from the XML.

	628 //

	629 status = U_ZERO_ERROR;

	630 UCollator *collator = ucol_open(clocale, &status);

	631 ucol_setStrength(collator, collatorStrength);

	632 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, normalize, &status) ;

	633 ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, alternateHandling, &status);

	634 UCD *ucd = ucd_open(collator, &status);

	635 BMS *bms = bms_open(ucd, pattern.getBuffer(), pattern.length(), target.g etBuffer(), target.length(), &status);

	636

	637 TEST_ASSERT_SUCCESS(status);

	638 if (U_FAILURE(status)) {

	639 bms_close(bms);

	640 ucd_close(ucd);

	641 ucol_close(collator);

	642 continue;

	643 }

	644

	645 int32_t foundStart = 0;

	646 int32_t foundLimit = 0;

	647 UBool foundMatch;

	648

	649 //

	650 // Do the search, check the match result against the expected results.

	651 //

	652 foundMatch = bms_search(bms, 0, &foundStart, &foundLimit);

	653 //TEST_ASSERT_SUCCESS(status);

	654 if ((foundMatch && expectedMatchStart < 0) \|\|

	655 (foundStart != expectedMatchStart) \|\|

	656 (foundLimit != expectedMatchLimit)) {

	657 TEST_ASSERT(FALSE); // ouput generic error position

	658 infoln("Found, expected match start = %d, %d \n"

	659 "Found, expected match limit = %d, %d",

	660 foundStart, expectedMatchStart, foundLimit, expectedMatchLimit);

	661 }

	662

	663 bms_close(bms);

	664 ucd_close(ucd);

	665 ucol_close(collator);

	666 }

	667

	668 ucd_flushCache();

	669 delete root;

	670 delete parser;

	671 #endif

	672 }

	673

	674 struct Order

	675 {

	676 int32_t order;

	677 int32_t lowOffset;

	678 int32_t highOffset;

	679 };

	680

	681 class OrderList

	682 {

	683 public:

	684 OrderList();

	685 OrderList(UCollator *coll, const UnicodeString &string, int32_t stringOffset = 0);

	686 ~OrderList();

	687

	688 int32_t size(void) const;

	689 void add(int32_t order, int32_t low, int32_t high);

	690 const Order *get(int32_t index) const;

	691 int32_t getLowOffset(int32_t index) const;

	692 int32_t getHighOffset(int32_t index) const;

	693 int32_t getOrder(int32_t index) const;

	694 void reverse(void);

	695 UBool compare(const OrderList &other) const;

	696 UBool matchesAt(int32_t offset, const OrderList &other) const;

	697

	698 private:

	699 Order *list;

	700 int32_t listMax;

	701 int32_t listSize;

	702 };

	703

	704 OrderList::OrderList()

	705 : list(NULL), listMax(16), listSize(0)

	706 {

	707 list = new Order[listMax];

	708 }

	709

	710 OrderList::OrderList(UCollator *coll, const UnicodeString &string, int32_t strin gOffset)

	711 : list(NULL), listMax(16), listSize(0)

	712 {

	713 UErrorCode status = U_ZERO_ERROR;

	714 UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), stri ng.length(), &status);

	715 uint32_t strengthMask = 0;

	716 int32_t order, low, high;

	717

	718 switch (ucol_getStrength(coll))

	719 {

	720 default:

	721 strengthMask \|= UCOL_TERTIARYORDERMASK;

	722 /* fall through */

	723

	724 case UCOL_SECONDARY:

	725 strengthMask \|= UCOL_SECONDARYORDERMASK;

	726 /* fall through */

	727

	728 case UCOL_PRIMARY:

	729 strengthMask \|= UCOL_PRIMARYORDERMASK;

	730 }

	731

	732 list = new Order[listMax];

	733

	734 ucol_setOffset(elems, stringOffset, &status);

	735

	736 do {

	737 low = ucol_getOffset(elems);

	738 order = ucol_next(elems, &status);

	739 high = ucol_getOffset(elems);

	740

	741 if (order != UCOL_NULLORDER) {

	742 order &= strengthMask;

	743 }

	744

	745 if (order != UCOL_IGNORABLE) {

	746 add(order, low, high);

	747 }

	748 } while (order != UCOL_NULLORDER);

	749

	750 ucol_closeElements(elems);

	751 }

	752

	753 OrderList::~OrderList()

	754 {

	755 delete[] list;

	756 }

	757

	758 void OrderList::add(int32_t order, int32_t low, int32_t high)

	759 {

	760 if (listSize >= listMax) {

	761 listMax *= 2;

	762

	763 Order *newList = new Order[listMax];

	764

	765 uprv_memcpy(newList, list, listSize * sizeof(Order));

	766 delete[] list;

	767 list = newList;

	768 }

	769

	770 list[listSize].order = order;

	771 list[listSize].lowOffset = low;

	772 list[listSize].highOffset = high;

	773

	774 listSize += 1;

	775 }

	776

	777 const Order *OrderList::get(int32_t index) const

	778 {

	779 if (index >= listSize) {

	780 return NULL;

	781 }

	782

	783 return &list[index];

	784 }

	785

	786 int32_t OrderList::getLowOffset(int32_t index) const

	787 {

	788 const Order *order = get(index);

	789

	790 if (order != NULL) {

	791 return order->lowOffset;

	792 }

	793

	794 return -1;

	795 }

	796

	797 int32_t OrderList::getHighOffset(int32_t index) const

	798 {

	799 const Order *order = get(index);

	800

	801 if (order != NULL) {

	802 return order->highOffset;

	803 }

	804

	805 return -1;

	806 }

	807

	808 int32_t OrderList::getOrder(int32_t index) const

	809 {

	810 const Order *order = get(index);

	811

	812 if (order != NULL) {

	813 return order->order;

	814 }

	815

	816 return UCOL_NULLORDER;

	817 }

	818

	819 int32_t OrderList::size() const

	820 {

	821 return listSize;

	822 }

	823

	824 void OrderList::reverse()

	825 {

	826 for(int32_t f = 0, b = listSize - 1; f < b; f += 1, b -= 1) {

	827 Order swap = list[b];

	828

	829 list[b] = list[f];

	830 list[f] = swap;

	831 }

	832 }

	833

	834 UBool OrderList::compare(const OrderList &other) const

	835 {

	836 if (listSize != other.listSize) {

	837 return FALSE;

	838 }

	839

	840 for(int32_t i = 0; i < listSize; i += 1) {

	841 if (list[i].order != other.list[i].order \|\|

	842 list[i].lowOffset != other.list[i].lowOffset \|\|

	843 list[i].highOffset != other.list[i].highOffset) {

	844 return FALSE;

	845 }

	846 }

	847

	848 return TRUE;

	849 }

	850

	851 UBool OrderList::matchesAt(int32_t offset, const OrderList &other) const

	852 {

	853 // NOTE: sizes include the NULLORDER, which we don't want to compare.

	854 int32_t otherSize = other.size() - 1;

	855

	856 if (listSize - 1 - offset < otherSize) {

	857 return FALSE;

	858 }

	859

	860 for (int32_t i = offset, j = 0; j < otherSize; i += 1, j += 1) {

	861 if (getOrder(i) != other.getOrder(j)) {

	862 return FALSE;

	863 }

	864 }

	865

	866 return TRUE;

	867 }

	868

	869 static char printOffsets(char buffer, OrderList &list)

	870 {

	871 int32_t size = list.size();

	872 char *s = buffer;

	873

	874 for(int32_t i = 0; i < size; i += 1) {

	875 const Order *order = list.get(i);

	876

	877 if (i != 0) {

	878 s += sprintf(s, ", ");

	879 }

	880

	881 s += sprintf(s, "(%d, %d)", order->lowOffset, order->highOffset);

	882 }

	883

	884 return buffer;

	885 }

	886

	887 static char printOrders(char buffer, OrderList &list)

	888 {

	889 int32_t size = list.size();

	890 char *s = buffer;

	891

	892 for(int32_t i = 0; i < size; i += 1) {

	893 const Order *order = list.get(i);

	894

	895 if (i != 0) {

	896 s += sprintf(s, ", ");

	897 }

	898

	899 s += sprintf(s, "%8.8X", order->order);

	900 }

	901

	902 return buffer;

	903 }

	904

	905 void SSearchTest::offsetTest()

	906 {

	907 static const UVersionInfo icu47 = { 4, 7, 0, 0 };

	908 const char *test[] = {

	909 // The sequence \u0FB3\u0F71\u0F71\u0F80 contains a discontiguous

	910 // contraction (\u0FB3\u0F71\u0F80) logically followed by \u0F71.

	911 "\\u1E33\\u0FB3\\u0F71\\u0F71\\u0F80\\uD835\\uDF6C\\u01B0",

	912

	913 "\\ua191\\u16ef\\u2036\\u017a",

	914

	915 #if 0

	916 // This results in a complex interaction between contraction,

	917 // expansion and normalization that confuses the backwards offset fixups .

	918 "\\u0F7F\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85",

	919 #endif

	920

	921 "\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85",

	922 "\\u07E9\\u07EA\\u07F1\\u07F2\\u07F3",

	923

	924 "\\u02FE\\u02FF"

	925 "\\u0300\\u0301\\u0302\\u0303\\u0304\\u0305\\u0306\\u0307\\u0308\\u0309\ \u030A\\u030B\\u030C\\u030D\\u030E\\u030F"

	926 "\\u0310\\u0311\\u0312\\u0313\\u0314\\u0315\\u0316\\u0317\\u0318\\u0319\ \u031A\\u031B\\u031C\\u031D\\u031E\\u031F"

	927 "\\u0320\\u0321\\u0322\\u0323\\u0324\\u0325\\u0326\\u0327\\u0328\\u0329\ \u032A\\u032B\\u032C\\u032D\\u032E\\u032F"

	928 "\\u0330\\u0331\\u0332\\u0333\\u0334\\u0335\\u0336\\u0337\\u0338\\u0339\ \u033A\\u033B\\u033C\\u033D\\u033E\\u033F"

	929 "\\u0340\\u0341\\u0342\\u0343\\u0344\\u0345\\u0346\\u0347\\u0348\\u0349\ \u034A\\u034B\\u034C\\u034D\\u034E", // currently not working, see #8081

	930

	931 "\\u02FE\\u02FF\\u0300\\u0301\\u0302\\u0303\\u0316\\u0317\\u0318", // cu rrently not working, see #8081

	932 "a\\u02FF\\u0301\\u0316", // currently not working, see #8081

	933 "a\\u02FF\\u0316\\u0301",

	934 "a\\u0430\\u0301\\u0316",

	935 "a\\u0430\\u0316\\u0301",

	936 "abc\\u0E41\\u0301\\u0316",

	937 "abc\\u0E41\\u0316\\u0301",

	938 "\\u0E41\\u0301\\u0316",

	939 "\\u0E41\\u0316\\u0301",

	940 "a\\u0301\\u0316",

	941 "a\\u0316\\u0301",

	942 "\\uAC52\\uAC53",

	943 "\\u34CA\\u34CB",

	944 "\\u11ED\\u11EE",

	945 "\\u30C3\\u30D0",

	946 "p\\u00E9ch\\u00E9",

	947 "a\\u0301\\u0325",

	948 "a\\u0300\\u0325",

	949 "a\\u0325\\u0300",

	950 "A\\u0323\\u0300B",

	951 "A\\u0300\\u0323B",

	952 "A\\u0301\\u0323B",

	953 "A\\u0302\\u0301\\u0323B",

	954 "abc",

	955 "ab\\u0300c",

	956 "ab\\u0300\\u0323c",

	957 " \\uD800\\uDC00\\uDC00",

	958 "a\\uD800\\uDC00\\uDC00",

	959 "A\\u0301\\u0301",

	960 "A\\u0301\\u0323",

	961 "A\\u0301\\u0323B",

	962 "B\\u0301\\u0323C",

	963 "A\\u0300\\u0323B",

	964 "\\u0301A\\u0301\\u0301",

	965 "abcd\\r\\u0301",

	966 "p\\u00EAche",

	967 "pe\\u0302che",

	968 };

	969

	970 int32_t testCount = ARRAY_SIZE(test);

	971 UErrorCode status = U_ZERO_ERROR;

	972 RuleBasedCollator col = (RuleBasedCollator ) Collator::createInstance(Loca le::getEnglish(), status);

	973 if (U_FAILURE(status)) {

	974 errcheckln(status, "Failed to create collator in offsetTest! - %s", u_er rorName(status));

	975 return;

	976 }

	977 char buffer[4096]; // A bit of a hack... just happens to be long enough for all the test cases...

	978 // We could allocate one that's the right size by (CE_co unt * 10) + 2

	979 // 10 chars is enough room for 8 hex digits plus ", ". 2 extra chars for "[" and "]"

	980

	981 col->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);

	982

	983 for(int32_t i = 0; i < testCount; i += 1) {

	984 if (!isICUVersionAtLeast(icu47) && i>=4 && i<=6) {

	985 continue; // timebomb until ticket #8080 is resolved

	986 }

	987 UnicodeString ts = CharsToUnicodeString(test[i]);

	988 CollationElementIterator *iter = col->createCollationElementIterator(ts) ;

	989 OrderList forwardList;

	990 OrderList backwardList;

	991 int32_t order, low, high;

	992

	993 do {

	994 low = iter->getOffset();

	995 order = iter->next(status);

	996 high = iter->getOffset();

	997

	998 forwardList.add(order, low, high);

	999 } while (order != CollationElementIterator::NULLORDER);

	1000

	1001 iter->reset();

	1002 iter->setOffset(ts.length(), status);

	1003

	1004 backwardList.add(CollationElementIterator::NULLORDER, iter->getOffset(), iter->getOffset());

	1005

	1006 do {

	1007 high = iter->getOffset();

	1008 order = iter->previous(status);

	1009 low = iter->getOffset();

	1010

	1011 if (order == CollationElementIterator::NULLORDER) {

	1012 break;

	1013 }

	1014

	1015 backwardList.add(order, low, high);

	1016 } while (TRUE);

	1017

	1018 backwardList.reverse();

	1019

	1020 if (forwardList.compare(backwardList)) {

	1021 logln("Works with \"%s\"", test[i]);

	1022 logln("Forward offsets: [%s]", printOffsets(buffer, forwardList));

	1023 // logln("Backward offsets: [%s]", printOffsets(buffer, backwardList));

	1024

	1025 logln("Forward CEs: [%s]", printOrders(buffer, forwardList));

	1026 // logln("Backward CEs: [%s]", printOrders(buffer, backwardList));

	1027

	1028 logln();

	1029 } else {

	1030 errln("Fails with \"%s\"", test[i]);

	1031 infoln("Forward offsets: [%s]", printOffsets(buffer, forwardList));

	1032 infoln("Backward offsets: [%s]", printOffsets(buffer, backwardList)) ;

	1033

	1034 infoln("Forward CEs: [%s]", printOrders(buffer, forwardList));

	1035 infoln("Backward CEs: [%s]", printOrders(buffer, backwardList));

	1036

	1037 infoln();

	1038 }

	1039 delete iter;

	1040 }

	1041 delete col;

	1042 }

	1043

	1044 #if 0

	1045 static UnicodeString &escape(const UnicodeString &string, UnicodeString &buffer)

	1046 {

	1047 for(int32_t i = 0; i < string.length(); i += 1) {

	1048 UChar32 ch = string.char32At(i);

	1049

	1050 if (ch >= 0x0020 && ch <= 0x007F) {

	1051 if (ch == 0x005C) {

	1052 buffer.append("\\\\");

	1053 } else {

	1054 buffer.append(ch);

	1055 }

	1056 } else {

	1057 char cbuffer[12];

	1058

	1059 if (ch <= 0xFFFFL) {

	1060 sprintf(cbuffer, "\\u%4.4X", ch);

	1061 } else {

	1062 sprintf(cbuffer, "\\U%8.8X", ch);

	1063 }

	1064

	1065 buffer.append(cbuffer);

	1066 }

	1067

	1068 if (ch >= 0x10000L) {

	1069 i += 1;

	1070 }

	1071 }

	1072

	1073 return buffer;

	1074 }

	1075 #endif

	1076

	1077 #if 1

	1078

	1079 struct PCE

	1080 {

	1081 uint64_t ce;

	1082 int32_t lowOffset;

	1083 int32_t highOffset;

	1084 };

	1085

	1086 class PCEList

	1087 {

	1088 public:

	1089 PCEList(UCollator *coll, const UnicodeString &string);

	1090 ~PCEList();

	1091

	1092 int32_t size() const;

	1093

	1094 const PCE *get(int32_t index) const;

	1095

	1096 int32_t getLowOffset(int32_t index) const;

	1097 int32_t getHighOffset(int32_t index) const;

	1098 uint64_t getOrder(int32_t index) const;

	1099

	1100 UBool matchesAt(int32_t offset, const PCEList &other) const;

	1101

	1102 uint64_t operator[](int32_t index) const;

	1103

	1104 private:

	1105 void add(uint64_t ce, int32_t low, int32_t high);

	1106

	1107 PCE *list;

	1108 int32_t listMax;

	1109 int32_t listSize;

	1110 };

	1111

	1112 PCEList::PCEList(UCollator *coll, const UnicodeString &string)

	1113 {

	1114 UErrorCode status = U_ZERO_ERROR;

	1115 UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), stri ng.length(), &status);

	1116 uint64_t order;

	1117 int32_t low, high;

	1118

	1119 list = new PCE[listMax];

	1120

	1121 ucol_setOffset(elems, 0, &status);

	1122

	1123 do {

	1124 order = ucol_nextProcessed(elems, &low, &high, &status);

	1125 add(order, low, high);

	1126 } while (order != UCOL_PROCESSED_NULLORDER);

	1127

	1128 ucol_closeElements(elems);

	1129 }

	1130

	1131 PCEList::~PCEList()

	1132 {

	1133 delete[] list;

	1134 }

	1135

	1136 void PCEList::add(uint64_t order, int32_t low, int32_t high)

	1137 {

	1138 if (listSize >= listMax) {

	1139 listMax *= 2;

	1140

	1141 PCE *newList = new PCE[listMax];

	1142

	1143 uprv_memcpy(newList, list, listSize * sizeof(Order));

	1144 delete[] list;

	1145 list = newList;

	1146 }

	1147

	1148 list[listSize].ce = order;

	1149 list[listSize].lowOffset = low;

	1150 list[listSize].highOffset = high;

	1151

	1152 listSize += 1;

	1153 }

	1154

	1155 const PCE *PCEList::get(int32_t index) const

	1156 {

	1157 if (index >= listSize) {

	1158 return NULL;

	1159 }

	1160

	1161 return &list[index];

	1162 }

	1163

	1164 int32_t PCEList::getLowOffset(int32_t index) const

	1165 {

	1166 const PCE *pce = get(index);

	1167

	1168 if (pce != NULL) {

	1169 return pce->lowOffset;

	1170 }

	1171

	1172 return -1;

	1173 }

	1174

	1175 int32_t PCEList::getHighOffset(int32_t index) const

	1176 {

	1177 const PCE *pce = get(index);

	1178

	1179 if (pce != NULL) {

	1180 return pce->highOffset;

	1181 }

	1182

	1183 return -1;

	1184 }

	1185

	1186 uint64_t PCEList::getOrder(int32_t index) const

	1187 {

	1188 const PCE *pce = get(index);

	1189

	1190 if (pce != NULL) {

	1191 return pce->ce;

	1192 }

	1193

	1194 return UCOL_PROCESSED_NULLORDER;

	1195 }

	1196

	1197 int32_t PCEList::size() const

	1198 {

	1199 return listSize;

	1200 }

	1201

	1202 UBool PCEList::matchesAt(int32_t offset, const PCEList &other) const

	1203 {

	1204 // NOTE: sizes include the NULLORDER, which we don't want to compare.

	1205 int32_t otherSize = other.size() - 1;

	1206

	1207 if (listSize - 1 - offset < otherSize) {

	1208 return FALSE;

	1209 }

	1210

	1211 for (int32_t i = offset, j = 0; j < otherSize; i += 1, j += 1) {

	1212 if (getOrder(i) != other.getOrder(j)) {

	1213 return FALSE;

	1214 }

	1215 }

	1216

	1217 return TRUE;

	1218 }

	1219

	1220 uint64_t PCEList::operator[](int32_t index) const

	1221 {

	1222 return getOrder(index);

	1223 }

	1224

	1225 void SSearchTest::boyerMooreTest()

	1226 {

	1227 UErrorCode status = U_ZERO_ERROR;

	1228 UCollator *coll = NULL;

	1229 CollData *data = NULL;

	1230 const CEList* ce = NULL;

	1231 const CEList* ce1 = NULL;

	1232 UnicodeString lp = "fuss";

	1233 UnicodeString sp = "fu\\u00DF";

	1234 BoyerMooreSearch *longPattern = NULL;

	1235 BoyerMooreSearch *shortPattern = NULL;

	1236 UnicodeString targets[] = {"fu\\u00DF", "fu\\u00DFball", "1fu\\u00DFball", "12fu\\u00DFball", "123fu\\u00DFball", "1234fu\\u00DFball",

	1237 "ffu\\u00DF", "fufu\\u00DF", "fusfu\\u00DF",

	1238 "fuss", "ffuss", "fufuss", "fusfuss", "1fuss", " 12fuss", "123fuss", "1234fuss", "fu\\u00DF", "1fu\\u00DF", "12fu\\u00DF", "123fu \\u00DF", "1234fu\\u00DF"};

	1239 int32_t start = -1, end = -1;

	1240

	1241 coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status);

	1242 if (U_FAILURE(status)) {

	1243 errcheckln(status, "Could not open collator. - %s", u_errorName(status)) ;

	1244 return;

	1245 }

	1246

	1247 data = CollData::open(coll, status);

	1248 if (U_FAILURE(status)) {

	1249 errln("Could not open CollData object.");

	1250 goto close_data;

	1251 }

	1252

	1253 data->getDynamicClassID();

	1254 if (U_FAILURE(status)) {

	1255 errln("Could not get dynamic class ID of CollData.");

	1256 goto close_patterns;

	1257 }

	1258

	1259 data->getStaticClassID();

	1260 if (U_FAILURE(status)) {

	1261 errln("Could not get static class ID of CollData.");

	1262 goto close_patterns;

	1263 }

	1264

	1265 longPattern = new BoyerMooreSearch(data, lp.unescape(), NULL, status);

	1266 shortPattern = new BoyerMooreSearch(data, sp.unescape(), NULL, status);

	1267 if (U_FAILURE(status)) {

	1268 errln("Could not create pattern objects.");

	1269 goto close_patterns;

	1270 }

	1271

	1272 longPattern->getBadCharacterTable();

	1273 shortPattern->getBadCharacterTable();

	1274 if (U_FAILURE(status)) {

	1275 errln("Could not get bad character table.");

	1276 goto close_patterns;

	1277 }

	1278

	1279 longPattern->getGoodSuffixTable();

	1280 shortPattern->getGoodSuffixTable();

	1281 if (U_FAILURE(status)) {

	1282 errln("Could not get good suffix table.");

	1283 goto close_patterns;

	1284 }

	1285

	1286 longPattern->getDynamicClassID();

	1287 shortPattern->getDynamicClassID();

	1288 if (U_FAILURE(status)) {

	1289 errln("Could not get dynamic class ID of BoyerMooreSearch.");

	1290 goto close_patterns;

	1291 }

	1292

	1293 longPattern->getStaticClassID();

	1294 shortPattern->getStaticClassID();

	1295 if (U_FAILURE(status)) {

	1296 errln("Could not get static class ID of BoyerMooreSearch.");

	1297 goto close_patterns;

	1298 }

	1299

	1300 longPattern->getData();

	1301 shortPattern->getData();

	1302 if (U_FAILURE(status)) {

	1303 errln("Could not get collate data.");

	1304 goto close_patterns;

	1305 }

	1306

	1307 ce = longPattern->getPatternCEs();

	1308 ce1 = shortPattern->getPatternCEs();

	1309 if (U_FAILURE(status)) {

	1310 errln("Could not get pattern CEs.");

	1311 goto close_patterns;

	1312 }

	1313

	1314 ce->getDynamicClassID();

	1315 ce1->getDynamicClassID();

	1316 if (U_FAILURE(status)) {

	1317 errln("Could not get dynamic class ID of CEList.");

	1318 goto close_patterns;

	1319 }

	1320

	1321 ce->getStaticClassID();

	1322 ce1->getStaticClassID();

	1323 if (U_FAILURE(status)) {

	1324 errln("Could not get static class ID of CEList.");

	1325 goto close_patterns;

	1326 }

	1327

	1328 if(data->minLengthInChars(ce,0) != 3){

	1329 errln("Minimal Length in Characters for 'data' with 'ce' was suppose to give 3.");

	1330 goto close_patterns;

	1331 }

	1332

	1333 if(data->minLengthInChars(ce1,0) != 3){

	1334 errln("Minimal Length in Characters for 'data' with 'ce1' was suppose to give 3.");

	1335 goto close_patterns;

	1336 }

	1337

	1338 for (uint32_t t = 0; t < (sizeof(targets)/sizeof(targets[0])); t += 1) {

	1339 UnicodeString target = targets[t].unescape();

	1340

	1341 longPattern->setTargetString(&target, status);

	1342 if (longPattern->search(0, start, end)) {

	1343 logln("Test %d: found long pattern at [%d, %d].", t, start, end);

	1344 } else {

	1345 errln("Test %d: did not find long pattern.", t);

	1346 }

	1347

	1348 shortPattern->setTargetString(&target, status);

	1349 if (shortPattern->search(0, start, end)) {

	1350 logln("Test %d: found short pattern at [%d, %d].", t, start, end);

	1351 } else {

	1352 errln("Test %d: did not find short pattern.", t);

	1353 }

	1354

	1355 if(longPattern->empty()){

	1356 errln("Test %d: Long pattern should not have been empty.");

	1357 }

	1358

	1359 if(shortPattern->empty()){

	1360 errln("Test %d: Short pattern should not have been empty.");

	1361 }

	1362 }

	1363

	1364 close_patterns:

	1365 delete shortPattern;

	1366 delete longPattern;

	1367

	1368 close_data:

	1369 CollData::close(data);

	1370 ucol_close(coll);

	1371 }

	1372

	1373 void SSearchTest::bmsTest()

	1374 {

	1375 UErrorCode status = U_ZERO_ERROR;

	1376 UCollator *coll = NULL;

	1377 UCD *data = NULL;

	1378 UnicodeString lp = "fuss";

	1379 UnicodeString lpu = lp.unescape();

	1380 UnicodeString sp = "fu\\u00DF";

	1381 UnicodeString spu = sp.unescape();

	1382 BMS *longPattern = NULL;

	1383 BMS *shortPattern = NULL;

	1384 UnicodeString targets[] = {"fu\\u00DF", "fu\\u00DFball", "1fu\\u00DFball", "12fu\\u00DFball", "123fu\\u00DFball", "1234fu\\u00DFball",

	1385 "ffu\\u00DF", "fufu\\u00DF", "fusfu\\u00DF",

	1386 "fuss", "ffuss", "fufuss", "fusfuss", "1fuss", " 12fuss", "123fuss", "1234fuss", "fu\\u00DF", "1fu\\u00DF", "12fu\\u00DF", "123fu \\u00DF", "1234fu\\u00DF"};

	1387 int32_t start = -1, end = -1;

	1388

	1389 coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status);

	1390 if (U_FAILURE(status)) {

	1391 errcheckln(status, "Could not open collator. - %s", u_errorName(status)) ;

	1392 return;

	1393 }

	1394

	1395 data = ucd_open(coll, &status);

	1396 if (U_FAILURE(status)) {

	1397 errln("Could not open CollData object.");

	1398 goto close_data;

	1399 }

	1400

	1401 longPattern = bms_open(data, lpu.getBuffer(), lpu.length(), NULL, 0, &status );

	1402 shortPattern = bms_open(data, spu.getBuffer(), spu.length(), NULL, 0, &statu s);

	1403 if (U_FAILURE(status)) {

	1404 errln("Couldn't open pattern objects.");

	1405 goto close_patterns;

	1406 }

	1407

	1408 for (uint32_t t = 0; t < (sizeof(targets)/sizeof(targets[0])); t += 1) {

	1409 UnicodeString target = targets[t].unescape();

	1410

	1411 bms_setTargetString(longPattern, target.getBuffer(), target.length(), &s tatus);

	1412 if (bms_search(longPattern, 0, &start, &end)) {

	1413 logln("Test %d: found long pattern at [%d, %d].", t, start, end);

	1414 } else {

	1415 errln("Test %d: did not find long pattern.", t);

	1416 }

	1417

	1418 bms_setTargetString(shortPattern, target.getBuffer(), target.length(), & status);

	1419 if (bms_search(shortPattern, 0, &start, &end)) {

	1420 logln("Test %d: found short pattern at [%d, %d].", t, start, end);

	1421 } else {

	1422 errln("Test %d: did not find short pattern.", t);

	1423 }

	1424 }

	1425

	1426 /* Add better coverage for bms code. */

	1427 if(bms_empty(longPattern)) {

	1428 errln("FAIL: longgPattern is empty.");

	1429 }

	1430

	1431 if (!bms_getData(longPattern)) {

	1432 errln("FAIL: bms_getData returned NULL.");

	1433 }

	1434

	1435 if (!ucd_getCollator(data)) {

	1436 errln("FAIL: ucd_getCollator returned NULL.");

	1437 }

	1438

	1439 close_patterns:

	1440 bms_close(shortPattern);

	1441 bms_close(longPattern);

	1442

	1443 close_data:

	1444 ucd_close(data);

	1445 ucd_freeCache();

	1446 ucol_close(coll);

	1447 }

	1448

	1449 void SSearchTest::goodSuffixTest()

	1450 {

	1451 UErrorCode status = U_ZERO_ERROR;

	1452 UCollator *coll = NULL;

	1453 CollData *data = NULL;

	1454 UnicodeString pat = /"gcagagag"/ "fxeld";

	1455 UnicodeString target = /"gcatcgcagagagtatacagtacg"/ "cloveldfxeld";

	1456 BoyerMooreSearch *pattern = NULL;

	1457 int32_t start = -1, end = -1;

	1458

	1459 coll = ucol_open(NULL, &status);

	1460 if (U_FAILURE(status)) {

	1461 errcheckln(status, "Couldn't open collator. - %s", u_errorName(status));

	1462 return;

	1463 }

	1464

	1465 data = CollData::open(coll, status);

	1466 if (U_FAILURE(status)) {

	1467 errln("Couldn't open CollData object.");

	1468 goto close_data;

	1469 }

	1470

	1471 pattern = new BoyerMooreSearch(data, pat, &target, status);

	1472 if (U_FAILURE(status)) {

	1473 errln("Couldn't open pattern object.");

	1474 goto close_pattern;

	1475 }

	1476

	1477 if (pattern->search(0, start, end)) {

	1478 logln("Found pattern at [%d, %d].", start, end);

	1479 } else {

	1480 errln("Did not find pattern.");

	1481 }

	1482

	1483 close_pattern:

	1484 delete pattern;

	1485

	1486 close_data:

	1487 CollData::close(data);

	1488 ucol_close(coll);

	1489 }

	1490

	1491 //

	1492 // searchTime() A quick and dirty performance test for string search.

	1493 // Probably doesn't really belong as part of intltest, but it

	1494 // does check that the search succeeds, and gets the right resu lt,

	1495 // so it serves as a functionality test also.

	1496 //

	1497 // To run as a perf test, up the loop count, select by commenti ng

	1498 // and uncommenting in the code the operation to be measured,

	1499 // rebuild, and measure the running time of this test alone.

	1500 //

	1501 // time LD_LIBRARY_PATH=whatever ./intltest collate/SSearc hTest/searchTime

	1502 //

	1503 void SSearchTest::searchTime() {

	1504 static const char *longishText =

	1505 "Whylom, as olde stories tellen us,\n"

	1506 "Ther was a duk that highte Theseus:\n"

	1507 "Of Athenes he was lord and governour,\n"

	1508 "And in his tyme swich a conquerour,\n"

	1509 "That gretter was ther noon under the sonne.\n"

	1510 "Ful many a riche contree hadde he wonne;\n"

	1511 "What with his wisdom and his chivalrye,\n"

	1512 "He conquered al the regne of Femenye,\n"

	1513 "That whylom was y-cleped Scithia;\n"

	1514 "And weddede the quene Ipolita,\n"

	1515 "And broghte hir hoom with him in his contree\n"

	1516 "With muchel glorie and greet solempnitee,\n"

	1517 "And eek hir yonge suster Emelye.\n"

	1518 "And thus with victorie and with melodye\n"

	1519 "Lete I this noble duk to Athenes ryde,\n"

	1520 "And al his hoost, in armes, him bisyde.\n"

	1521 "And certes, if it nere to long to here,\n"

	1522 "I wolde han told yow fully the manere,\n"

	1523 "How wonnen was the regne of Femenye\n"

	1524 "By Theseus, and by his chivalrye;\n"

	1525 "And of the grete bataille for the nones\n"

	1526 "Bitwixen Athen's and Amazones;\n"

	1527 "And how asseged was Ipolita,\n"

	1528 "The faire hardy quene of Scithia;\n"

	1529 "And of the feste that was at hir weddinge,\n"

	1530 "And of the tempest at hir hoom-cominge;\n"

	1531 "But al that thing I moot as now forbere.\n"

	1532 "I have, God woot, a large feeld to ere,\n"

	1533 "And wayke been the oxen in my plough.\n"

	1534 "The remenant of the tale is long y-nough.\n"

	1535 "I wol nat letten eek noon of this route;\n"

	1536 "Lat every felawe telle his tale aboute,\n"

	1537 "And lat see now who shal the soper winne;\n"

	1538 "And ther I lefte, I wol ageyn biginne.\n"

	1539 "This duk, of whom I make mencioun,\n"

	1540 "When he was come almost unto the toun,\n"

	1541 "In al his wele and in his moste pryde,\n"

	1542 "He was war, as he caste his eye asyde,\n"

	1543 "Wher that ther kneled in the hye weye\n"

	1544 "A companye of ladies, tweye and tweye,\n"

	1545 "Ech after other, clad in clothes blake; \n"

	1546 "But swich a cry and swich a wo they make,\n"

	1547 "That in this world nis creature livinge,\n"

	1548 "That herde swich another weymentinge;\n"

	1549 "And of this cry they nolde never stenten,\n"

	1550 "Til they the reynes of his brydel henten.\n"

	1551 "'What folk ben ye, that at myn hoomcominge\n"

	1552 "Perturben so my feste with cryinge'?\n"

	1553 "Quod Theseus, 'have ye so greet envye\n"

	1554 "Of myn honour, that thus compleyne and crye? \n"

	1555 "Or who hath yow misboden, or offended?\n"

	1556 "And telleth me if it may been amended;\n"

	1557 "And why that ye ben clothed thus in blak'?\n"

	1558 "The eldest lady of hem alle spak,\n"

	1559 "When she hadde swowned with a deedly chere,\n"

	1560 "That it was routhe for to seen and here,\n"

	1561 "And seyde: 'Lord, to whom Fortune hath yiven\n"

	1562 "Victorie, and as a conquerour to liven,\n"

	1563 "Noght greveth us your glorie and your honour;\n"

	1564 "But we biseken mercy and socour.\n"

	1565 "Have mercy on our wo and our distresse.\n"

	1566 "Som drope of pitee, thurgh thy gentilesse,\n"

	1567 "Up-on us wrecched wommen lat thou falle.\n"

	1568 "For certes, lord, ther nis noon of us alle,\n"

	1569 "That she nath been a duchesse or a quene;\n"

	1570 "Now be we caitifs, as it is wel sene:\n"

	1571 "Thanked be Fortune, and hir false wheel,\n"

	1572 "That noon estat assureth to be weel.\n"

	1573 "And certes, lord, t'abyden your presence,\n"

	1574 "Here in the temple of the goddesse Clemence\n"

	1575 "We han ben waytinge al this fourtenight;\n"

	1576 "Now help us, lord, sith it is in thy might.\n"

	1577 "I wrecche, which that wepe and waille thus,\n"

	1578 "Was whylom wyf to king Capaneus,\n"

	1579 "That starf at Thebes, cursed be that day!\n"

	1580 "And alle we, that been in this array,\n"

	1581 "And maken al this lamentacioun,\n"

	1582 "We losten alle our housbondes at that toun,\n"

	1583 "Whyl that the sege ther-aboute lay.\n"

	1584 "And yet now th'olde Creon, weylaway!\n"

	1585 "The lord is now of Thebes the citee, \n"

	1586 "Fulfild of ire and of iniquitee,\n"

	1587 "He, for despyt, and for his tirannye,\n"

	1588 "To do the dede bodyes vileinye,\n"

	1589 "Of alle our lordes, whiche that ben slawe,\n"

	1590 "Hath alle the bodyes on an heep y-drawe,\n"

	1591 "And wol nat suffren hem, by noon assent,\n"

	1592 "Neither to been y-buried nor y-brent,\n"

	1593 "But maketh houndes ete hem in despyt. zet'\n";

	1594

	1595 #define TEST_BOYER_MOORE 1

	1596 const char *cPattern = "maketh houndes ete hem";

	1597 //const char *cPattern = "Whylom";

	1598 //const char *cPattern = "zet";

	1599 const char *testId = "searchTime()"; // for error macros.

	1600 UnicodeString target = longishText;

	1601 UErrorCode status = U_ZERO_ERROR;

	1602

	1603

	1604 LocalUCollatorPointer collator(ucol_open("en", &status));

	1605 CollData *data = CollData::open(collator.getAlias(), status);

	1606 if (U_FAILURE(status) \|\| collator.isNull() \|\| data == NULL) {

	1607 errcheckln(status, "Unable to open UCollator or CollData. - %s", u_error Name(status));

	1608 return;

	1609 }

	1610 //ucol_setStrength(collator.getAlias(), collatorStrength);

	1611 //ucol_setAttribute(collator.getAlias(), UCOL_NORMALIZATION_MODE, normalize, &status);

	1612 UnicodeString uPattern = cPattern;

	1613 #ifndef TEST_BOYER_MOORE

	1614 LocalUStringSearchPointer uss(usearch_openFromCollator(uPattern.getBuffer(), uPattern.length(),

	1615 target.getBuffer(), t arget.length(),

	1616 collator.getAlias(),

	1617 NULL, // the brea k iterator

	1618 &status));

	1619 TEST_ASSERT_SUCCESS(status);

	1620 #else

	1621 BoyerMooreSearch bms(data, uPattern, &target, status);

	1622 TEST_ASSERT_SUCCESS(status);

	1623 #endif

	1624

	1625 // int32_t foundStart;

	1626 // int32_t foundEnd;

	1627 UBool found;

	1628

	1629 // Find the match position usgin strstr

	1630 const char *pm = strstr(longishText, cPattern);

	1631 TEST_ASSERT_M(pm!=NULL, "No pattern match with strstr");

	1632 int32_t refMatchPos = (int32_t)(pm - longishText);

	1633 int32_t icuMatchPos;

	1634 int32_t icuMatchEnd;

	1635 #ifndef TEST_BOYER_MOORE

	1636 usearch_search(uss.getAlias(), 0, &icuMatchPos, &icuMatchEnd, &status);

	1637 TEST_ASSERT_SUCCESS(status);

	1638 #else

	1639 found = bms.search(0, icuMatchPos, icuMatchEnd);

	1640 #endif

	1641 TEST_ASSERT_M(refMatchPos == icuMatchPos, "strstr and icu give different mat ch positions.");

	1642

	1643 int32_t i;

	1644 int32_t j=0;

	1645

	1646 // Try loopcounts around 100000 to some millions, depending on the operation ,

	1647 // to get runtimes of at least several seconds.

	1648 for (i=0; i<10000; i++) {

	1649 #ifndef TEST_BOYER_MOORE

	1650 found = usearch_search(uss.getAlias(), 0, &icuMatchPos, &icuMatchEnd, &s tatus);

	1651 #else

	1652 found = bms.search(0, icuMatchPos, icuMatchEnd);

	1653 #endif

	1654 //TEST_ASSERT_SUCCESS(status);

	1655 //TEST_ASSERT(found);

	1656

	1657 // usearch_setOffset(uss.getAlias(), 0, &status);

	1658 // icuMatchPos = usearch_next(uss.getAlias(), &status);

	1659

	1660 // The i+j stuff is to confuse the optimizer and get it to actually lea ve the

	1661 // call to strstr in place.

	1662 //pm = strstr(longishText+j, cPattern);

	1663 //j = (j + i)%5;

	1664 }

	1665

	1666 printf("%ld, %d\n", pm-longishText, j);

	1667 #ifdef TEST_BOYER_MOORE

	1668 CollData::close(data);

	1669 #endif

	1670 }

	1671 #endif

	1672

	1673 //------------------------------------------------------------------------------ ----------

	1674 //

	1675 // Random Numbers. Similar to standard lib rand() and srand()

	1676 // Not using library to

	1677 // 1. Get same results on all platforms.

	1678 // 2. Get access to current seed, to more easily reproduce failures.

	1679 //

	1680 //------------------------------------------------------------------------------ ---------

	1681 static uint32_t m_seed = 1;

	1682

	1683 static uint32_t m_rand()

	1684 {

	1685 m_seed = m_seed * 1103515245 + 12345;

	1686 return (uint32_t)(m_seed/65536) % 32768;

	1687 }

	1688

	1689 class Monkey

	1690 {

	1691 public:

	1692 virtual void append(UnicodeString &test, UnicodeString &alternate) = 0;

	1693

	1694 protected:

	1695 Monkey();

	1696 virtual ~Monkey();

	1697 };

	1698

	1699 Monkey::Monkey()

	1700 {

	1701 // ook?

	1702 }

	1703

	1704 Monkey::~Monkey()

	1705 {

	1706 // ook?

	1707 }

	1708

	1709 class SetMonkey : public Monkey

	1710 {

	1711 public:

	1712 SetMonkey(const USet *theSet);

	1713 ~SetMonkey();

	1714

	1715 virtual void append(UnicodeString &test, UnicodeString &alternate);

	1716

	1717 private:

	1718 const USet *set;

	1719 };

	1720

	1721 SetMonkey::SetMonkey(const USet *theSet)

	1722 : Monkey(), set(theSet)

	1723 {

	1724 // ook?

	1725 }

	1726

	1727 SetMonkey::~SetMonkey()

	1728 {

	1729 //ook...

	1730 }

	1731

	1732 void SetMonkey::append(UnicodeString &test, UnicodeString &alternate)

	1733 {

	1734 int32_t size = uset_size(set);

	1735 int32_t index = m_rand() % size;

	1736 UChar32 ch = uset_charAt(set, index);

	1737 UnicodeString str(ch);

	1738

	1739 test.append(str);

	1740 alternate.append(str); // flip case, or some junk?

	1741 }

	1742

	1743 class StringSetMonkey : public Monkey

	1744 {

	1745 public:

	1746 StringSetMonkey(const USet theSet, UCollator theCollator, CollData *theCol lData);

	1747 ~StringSetMonkey();

	1748

	1749 void append(UnicodeString &testCase, UnicodeString &alternate);

	1750

	1751 private:

	1752 UnicodeString &generateAlternative(const UnicodeString &testCase, UnicodeStr ing &alternate);

	1753

	1754 const USet *set;

	1755 UCollator *coll;

	1756 CollData *collData;

	1757 };

	1758

	1759 StringSetMonkey::StringSetMonkey(const USet theSet, UCollator theCollator, Col lData *theCollData)

	1760 : Monkey(), set(theSet), coll(theCollator), collData(theCollData)

	1761 {

	1762 // ook.

	1763 }

	1764

	1765 StringSetMonkey::~StringSetMonkey()

	1766 {

	1767 // ook?

	1768 }

	1769

	1770 void StringSetMonkey::append(UnicodeString &testCase, UnicodeString &alternate)

	1771 {

	1772 int32_t itemCount = uset_getItemCount(set), len = 0;

	1773 int32_t index = m_rand() % itemCount;

	1774 UChar32 rangeStart = 0, rangeEnd = 0;

	1775 UChar buffer[16];

	1776 UErrorCode err = U_ZERO_ERROR;

	1777

	1778 len = uset_getItem(set, index, &rangeStart, &rangeEnd, buffer, 16, &err);

	1779

	1780 if (len == 0) {

	1781 int32_t offset = m_rand() % (rangeEnd - rangeStart + 1);

	1782 UChar32 ch = rangeStart + offset;

	1783 UnicodeString str(ch);

	1784

	1785 testCase.append(str);

	1786 generateAlternative(str, alternate);

	1787 } else if (len > 0) {

	1788 // should check that len < 16...

	1789 UnicodeString str(buffer, len);

	1790

	1791 testCase.append(str);

	1792 generateAlternative(str, alternate);

	1793 } else {

	1794 // shouldn't happen...

	1795 }

	1796 }

	1797

	1798 UnicodeString &StringSetMonkey::generateAlternative(const UnicodeString &testCas e, UnicodeString &alternate)

	1799 {

	1800 // find out shortest string for the longest sequence of ces.

	1801 // needs to be refined to use dynamic programming, but will be roughly right

	1802 UErrorCode status = U_ZERO_ERROR;

	1803 CEList ceList(coll, testCase, status);

	1804 UnicodeString alt;

	1805 int32_t offset = 0;

	1806

	1807 if (ceList.size() == 0) {

	1808 return alternate.append(testCase);

	1809 }

	1810

	1811 while (offset < ceList.size()) {

	1812 int32_t ce = ceList.get(offset);

	1813 const StringList *strings = collData->getStringList(ce);

	1814

	1815 if (strings == NULL) {

	1816 return alternate.append(testCase);

	1817 }

	1818

	1819 int32_t stringCount = strings->size();

	1820 int32_t tries = 0;

	1821

	1822 // find random string that generates the same CEList

	1823 const CEList *ceList2 = NULL;

	1824 const UnicodeString *string = NULL;

	1825 UBool matches = FALSE;

	1826

	1827 do {

	1828 int32_t s = m_rand() % stringCount;

	1829

	1830 if (tries++ > stringCount) {

	1831 alternate.append(testCase);

	1832 return alternate;

	1833 }

	1834

	1835 string = strings->get(s);

	1836 ceList2 = collData->getCEList(string);

	1837 matches = ceList.matchesAt(offset, ceList2);

	1838

	1839 if (! matches) {

	1840 collData->freeCEList((CEList *) ceList2);

	1841 }

	1842 } while (! matches);

	1843

	1844 alt.append(*string);

	1845 offset += ceList2->size();

	1846 collData->freeCEList(ceList2);

	1847 }

	1848

	1849 const CEList altCEs(coll, alt, status);

	1850

	1851 if (ceList.matchesAt(0, &altCEs)) {

	1852 return alternate.append(alt);

	1853 }

	1854

	1855 return alternate.append(testCase);

	1856 }

	1857

	1858 static void generateTestCase(UCollator coll, Monkey monkeys[], int32_t monkeyC ount, UnicodeString &testCase, UnicodeString &alternate)

	1859 {

	1860 int32_t pieces = (m_rand() % 4) + 1;

	1861 UErrorCode status = U_ZERO_ERROR;

	1862 UBool matches;

	1863

	1864 do {

	1865 testCase.remove();

	1866 alternate.remove();

	1867 monkeys[0]->append(testCase, alternate);

	1868

	1869 for(int32_t piece = 0; piece < pieces; piece += 1) {

	1870 int32_t monkey = m_rand() % monkeyCount;

	1871

	1872 monkeys[monkey]->append(testCase, alternate);

	1873 }

	1874

	1875 const CEList ceTest(coll, testCase, status);

	1876 const CEList ceAlt(coll, alternate, status);

	1877

	1878 matches = ceTest.matchesAt(0, &ceAlt);

	1879 } while (! matches);

	1880 }

	1881

	1882 //

	1883 // Find the next acceptable boundary following the specified starting index

	1884 // in the target text being searched.

	1885 // TODO: refine what is an acceptable boundary. For the moment,

	1886 // choose the next position not within a combining sequence.

	1887 //

	1888 #if 0

	1889 static int32_t nextBoundaryAfter(const UnicodeString &string, int32_t startIndex ) {

	1890 const UChar *text = string.getBuffer();

	1891 int32_t textLen = string.length();

	1892

	1893 if (startIndex >= textLen) {

	1894 return startIndex;

	1895 }

	1896

	1897 UChar32 c;

	1898 int32_t i = startIndex;

	1899

	1900 U16_NEXT(text, i, textLen, c);

	1901

	1902 // If we are on a control character, stop without looking for combining mark s.

	1903 // Control characters do not combine.

	1904 int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);

	1905 if (gcProperty==U_GCB_CONTROL \|\| gcProperty==U_GCB_LF \|\| gcProperty==U_GCB_C R) {

	1906 return i;

	1907 }

	1908

	1909 // The initial character was not a control, and can thus accept trailing

	1910 // combining characters. Advance over however many of them there are.

	1911 int32_t indexOfLastCharChecked;

	1912

	1913 for (;;) {

	1914 indexOfLastCharChecked = i;

	1915

	1916 if (i>=textLen) {

	1917 break;

	1918 }

	1919

	1920 U16_NEXT(text, i, textLen, c);

	1921 gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);

	1922

	1923 if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) {

	1924 break;

	1925 }

	1926 }

	1927

	1928 return indexOfLastCharChecked;

	1929 }

	1930 #endif

	1931

	1932 #if 0

	1933 static UBool isInCombiningSequence(const UnicodeString &string, int32_t index) {

	1934 const UChar *text = string.getBuffer();

	1935 int32_t textLen = string.length();

	1936

	1937 if (index>=textLen \|\| index<=0) {

	1938 return FALSE;

	1939 }

	1940

	1941 // If the character at the current index is not a GRAPHEME_EXTEND

	1942 // then we can not be within a combining sequence.

	1943 UChar32 c;

	1944 U16_GET(text, 0, index, textLen, c);

	1945 int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);

	1946 if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) {

	1947 return FALSE;

	1948 }

	1949

	1950 // We are at a combining mark. If the preceding character is anything

	1951 // except a CONTROL, CR or LF, we are in a combining sequence.

	1952 U16_PREV(text, 0, index, c);

	1953 gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);

	1954

	1955 return !(gcProperty==U_GCB_CONTROL \|\| gcProperty==U_GCB_LF \|\| gcProperty==U_ GCB_CR);

	1956 }

	1957 #endif

	1958

	1959 static UBool simpleSearch(UCollator *coll, const UnicodeString &target, int32_t offset, const UnicodeString &pattern, int32_t &matchStart, int32_t &matchEnd)

	1960 {

	1961 UErrorCode status = U_ZERO_ERROR;

	1962 OrderList targetOrders(coll, target, offset);

	1963 OrderList patternOrders(coll, pattern);

	1964 int32_t targetSize = targetOrders.size() - 1;

	1965 int32_t patternSize = patternOrders.size() - 1;

	1966 UBreakIterator *charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocale ByType(coll, ULOC_VALID_LOCALE, &status),

	1967 target.getBuffer(), target.len gth(), &status);

	1968

	1969 if (patternSize == 0) {

	1970 // Searching for an empty pattern always fails

	1971 matchStart = matchEnd = -1;

	1972 ubrk_close(charBreakIterator);

	1973 return FALSE;

	1974 }

	1975

	1976 matchStart = matchEnd = -1;

	1977

	1978 for(int32_t i = 0; i < targetSize; i += 1) {

	1979 if (targetOrders.matchesAt(i, patternOrders)) {

	1980 int32_t start = targetOrders.getLowOffset(i);

	1981 int32_t maxLimit = targetOrders.getLowOffset(i + patternSize);

	1982 int32_t minLimit = targetOrders.getLowOffset(i + patternSize - 1);

	1983

	1984 // if the low and high offsets of the first CE in

	1985 // the match are the same, it means that the match

	1986 // starts in the middle of an expansion - all but

	1987 // the first CE of the expansion will have the offset

	1988 // of the following character.

	1989 if (start == targetOrders.getHighOffset(i)) {

	1990 continue;

	1991 }

	1992

	1993 // Make sure match starts on a grapheme boundary

	1994 if (! ubrk_isBoundary(charBreakIterator, start)) {

	1995 continue;

	1996 }

	1997

	1998 // If the low and high offsets of the CE after the match

	1999 // are the same, it means that the match ends in the middle

	2000 // of an expansion sequence.

	2001 if (maxLimit == targetOrders.getHighOffset(i + patternSize) &&

	2002 targetOrders.getOrder(i + patternSize) != UCOL_NULLORDER) {

	2003 continue;

	2004 }

	2005

	2006 int32_t mend = maxLimit;

	2007

	2008 // Find the first grapheme break after the character index

	2009 // of the last CE in the match. If it's after character index

	2010 // that's after the last CE in the match, use that index

	2011 // as the end of the match.

	2012 if (minLimit < maxLimit) {

	2013 int32_t nba = ubrk_following(charBreakIterator, minLimit);

	2014

	2015 if (nba >= targetOrders.getHighOffset(i + patternSize - 1)) {

	2016 mend = nba;

	2017 }

	2018 }

	2019

	2020 if (mend > maxLimit) {

	2021 continue;

	2022 }

	2023

	2024 if (! ubrk_isBoundary(charBreakIterator, mend)) {

	2025 continue;

	2026 }

	2027

	2028 matchStart = start;

	2029 matchEnd = mend;

	2030

	2031 ubrk_close(charBreakIterator);

	2032 return TRUE;

	2033 }

	2034 }

	2035

	2036 ubrk_close(charBreakIterator);

	2037 return FALSE;

	2038 }

	2039

	2040 #if !UCONFIG_NO_REGULAR_EXPRESSIONS

	2041 static int32_t getIntParam(UnicodeString name, UnicodeString &params, int32_t d efaultVal) {

	2042 int32_t val = defaultVal;

	2043

	2044 name.append(" = (-?\\d+)");

	2045

	2046 UErrorCode status = U_ZERO_ERROR;

	2047 RegexMatcher m(name, params, 0, status);

	2048

	2049 if (m.find()) {

	2050 // The param exists. Convert the string to an int.

	2051 char valString[100];

	2052 int32_t paramLength = m.end(1, status) - m.start(1, status);

	2053

	2054 if (paramLength >= (int32_t)(sizeof(valString)-1)) {

	2055 paramLength = (int32_t)(sizeof(valString)-2);

	2056 }

	2057

	2058 params.extract(m.start(1, status), paramLength, valString, sizeof(valStr ing));

	2059 val = strtol(valString, NULL, 10);

	2060

	2061 // Delete this parameter from the params string.

	2062 m.reset();

	2063 params = m.replaceFirst("", status);

	2064 }

	2065

	2066 //U_ASSERT(U_SUCCESS(status));

	2067 if (! U_SUCCESS(status)) {

	2068 val = defaultVal;

	2069 }

	2070

	2071 return val;

	2072 }

	2073 #endif

	2074

	2075 #if !UCONFIG_NO_COLLATION

	2076 int32_t SSearchTest::monkeyTestCase(UCollator *coll, const UnicodeString &testCa se, const UnicodeString &pattern, const UnicodeString &altPattern,

	2077 const char name, const char strength, uint 32_t seed)

	2078 {

	2079 UErrorCode status = U_ZERO_ERROR;

	2080 int32_t actualStart = -1, actualEnd = -1;

	2081 //int32_t expectedStart = prefix.length(), expectedEnd = prefix.length() + alt Pattern.length();

	2082 int32_t expectedStart = -1, expectedEnd = -1;

	2083 int32_t notFoundCount = 0;

	2084 LocalUStringSearchPointer uss(usearch_openFromCollator(pattern.getBuffer(), pattern.length(),

	2085 testCase.getBuffer(), testCase.length(),

	2086 coll,

	2087 NULL, // the brea k iterator

	2088 &status));

	2089

	2090 // **** TODO: find all matches, not just first one ****

	2091 simpleSearch(coll, testCase, 0, pattern, expectedStart, expectedEnd);

	2092

	2093 usearch_search(uss.getAlias(), 0, &actualStart, &actualEnd, &status);

	2094

	2095 if (expectedStart >= 0 && (actualStart != expectedStart \|\| actualEnd != expe ctedEnd)) {

	2096 errln("Search for <pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n"

	2097 " strength=%s seed=%d",

	2098 name, expectedStart, expectedEnd, actualStart, actualEnd, strength , seed);

	2099 }

	2100

	2101 if (expectedStart == -1 && actualStart == -1) {

	2102 notFoundCount += 1;

	2103 }

	2104

	2105 // **** TODO: find all matches, not just first one ****

	2106 simpleSearch(coll, testCase, 0, altPattern, expectedStart, expectedEnd);

	2107

	2108 usearch_setPattern(uss.getAlias(), altPattern.getBuffer(), altPattern.length (), &status);

	2109

	2110 usearch_search(uss.getAlias(), 0, &actualStart, &actualEnd, &status);

	2111

	2112 if (expectedStart >= 0 && (actualStart != expectedStart \|\| actualEnd != expe ctedEnd)) {

	2113 errln("Search for <alt_pattern> in <%s> failed: expected [%d, %d], got [ %d, %d]\n"

	2114 " strength=%s seed=%d",

	2115 name, expectedStart, expectedEnd, actualStart, actualEnd, strength , seed);

	2116 }

	2117

	2118 if (expectedStart == -1 && actualStart == -1) {

	2119 notFoundCount += 1;

	2120 }

	2121

	2122 return notFoundCount;

	2123 }

	2124

	2125 static void hexForUnicodeString(const UnicodeString &ustr, char * cbuf, int32_t cbuflen)

	2126 {

	2127 int32_t ustri, ustrlen = ustr.length();

	2128

	2129 for (ustri = 0; ustri < ustrlen; ++ustri) {

	2130 if (cbuflen >= 9 /* format width for single code unit(5) + terminating e llipsis(3) + null(1) */) {

	2131 int len = sprintf(cbuf, " %04X", ustr.charAt(ustri));

	2132 cbuflen -= len;

	2133 cbuf += len;

	2134 } else {

	2135 if (cbuflen >= 4 /* terminating ellipsis(3) + null(1) */) {

	2136 sprintf(cbuf, "...");

	2137 } else if (cbuflen >= 1) {

	2138 cbuf = 0;

	2139 }

	2140 break;

	2141 }

	2142 }

	2143 }

	2144

	2145 int32_t SSearchTest::bmMonkeyTestCase(UCollator *coll, const UnicodeString &test Case, const UnicodeString &pattern, const UnicodeString &altPattern,

	2146 BoyerMooreSearch bms, BoyerMooreSearch abm s,

	2147 const char name, const char strength, uint 32_t seed)

	2148 {

	2149 UErrorCode status = U_ZERO_ERROR;

	2150 int32_t actualStart = -1, actualEnd = -1;

	2151 //int32_t expectedStart = prefix.length(), expectedEnd = prefix.length() + alt Pattern.length();

	2152 int32_t expectedStart = -1, expectedEnd = -1;

	2153 int32_t notFoundCount = 0;

	2154 char hexbuf[128];

	2155

	2156 // **** TODO: find all matches, not just first one ****

	2157 simpleSearch(coll, testCase, 0, pattern, expectedStart, expectedEnd);

	2158

	2159 bms->setTargetString(&testCase, status);

	2160 bms->search(0, actualStart, actualEnd);

	2161

	2162 if (expectedStart >= 0 && (actualStart != expectedStart \|\| actualEnd != expe ctedEnd)) {

	2163 hexForUnicodeString(pattern, hexbuf, sizeof(hexbuf));

	2164 errln("Boyer-Moore Search for <pattern> in <%s> failed: expected [%d, %d ], got [%d, %d]\n"

	2165 " strength=%s seed=%d <pattern>: %s",

	2166 name, expectedStart, expectedEnd, actualStart, actualEnd, strength , seed, hexbuf);

	2167 }

	2168

	2169 if (expectedStart == -1 && actualStart == -1) {

	2170 notFoundCount += 1;

	2171 }

	2172

	2173 // **** TODO: find all matches, not just first one ****

	2174 simpleSearch(coll, testCase, 0, altPattern, expectedStart, expectedEnd);

	2175

	2176 abms->setTargetString(&testCase, status);

	2177 abms->search(0, actualStart, actualEnd);

	2178

	2179 if (expectedStart >= 0 && (actualStart != expectedStart \|\| actualEnd != expe ctedEnd)) {

	2180 hexForUnicodeString(altPattern, hexbuf, sizeof(hexbuf));

	2181 errln("Boyer-Moore Search for <alt_pattern> in <%s> failed: expected [%d , %d], got [%d, %d]\n"

	2182 " strength=%s seed=%d <pattern>: %s",

	2183 name, expectedStart, expectedEnd, actualStart, actualEnd, strength , seed, hexbuf);

	2184 }

	2185

	2186 if (expectedStart == -1 && actualStart == -1) {

	2187 notFoundCount += 1;

	2188 }

	2189

	2190

	2191 return notFoundCount;

	2192 }

	2193 #endif

	2194

	2195 void SSearchTest::monkeyTest(char *params)

	2196 {

	2197 // ook!

	2198 UErrorCode status = U_ZERO_ERROR;

	2199 //UCollator *coll = ucol_open(NULL, &status);

	2200 UCollator *coll = ucol_openFromShortString("S1", FALSE, NULL, &status);

	2201

	2202 if (U_FAILURE(status)) {

	2203 errcheckln(status, "Failed to create collator in MonkeyTest! - %s", u_er rorName(status));

	2204 return;

	2205 }

	2206

	2207 CollData *monkeyData = CollData::open(coll, status);

	2208

	2209 USet *expansions = uset_openEmpty();

	2210 USet *contractions = uset_openEmpty();

	2211

	2212 ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &st atus);

	2213

	2214 U_STRING_DECL(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39) ;

	2215 U_STRING_INIT(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39) ;

	2216 USet *letters = uset_openPattern(letter_pattern, 39, &status);

	2217 SetMonkey letterMonkey(letters);

	2218 StringSetMonkey contractionMonkey(contractions, coll, monkeyData);

	2219 StringSetMonkey expansionMonkey(expansions, coll, monkeyData);

	2220 UnicodeString testCase;

	2221 UnicodeString alternate;

	2222 UnicodeString pattern, altPattern;

	2223 UnicodeString prefix, altPrefix;

	2224 UnicodeString suffix, altSuffix;

	2225

	2226 Monkey *monkeys[] = {

	2227 &letterMonkey,

	2228 &contractionMonkey,

	2229 &expansionMonkey,

	2230 &contractionMonkey,

	2231 &expansionMonkey,

	2232 &contractionMonkey,

	2233 &expansionMonkey,

	2234 &contractionMonkey,

	2235 &expansionMonkey};

	2236 int32_t monkeyCount = sizeof(monkeys) / sizeof(monkeys[0]);

	2237 // int32_t nonMatchCount = 0;

	2238

	2239 UCollationStrength strengths[] = {UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIAR Y};

	2240 const char *strengthNames[] = {"primary", "secondary", "tertiary"};

	2241 int32_t strengthCount = sizeof(strengths) / sizeof(strengths[0]);

	2242 int32_t loopCount = quick? 1000 : 10000;

	2243 int32_t firstStrength = 0;

	2244 int32_t lastStrength = strengthCount - 1; //*/ 0;

	2245

	2246 if (params != NULL) {

	2247 #if !UCONFIG_NO_REGULAR_EXPRESSIONS

	2248 UnicodeString p(params);

	2249

	2250 loopCount = getIntParam("loop", p, loopCount);

	2251 m_seed = getIntParam("seed", p, m_seed);

	2252

	2253 RegexMatcher m(" strength = (primary\|secondary\|tertiary) ", p, 0, st atus);

	2254 if (m.find()) {

	2255 UnicodeString breakType = m.group(1, status);

	2256

	2257 for (int32_t s = 0; s < strengthCount; s += 1) {

	2258 if (breakType == strengthNames[s]) {

	2259 firstStrength = lastStrength = s;

	2260 break;

	2261 }

	2262 }

	2263

	2264 m.reset();

	2265 p = m.replaceFirst("", status);

	2266 }

	2267

	2268 if (RegexMatcher("\\S", p, 0, status).find()) {

	2269 // Each option is stripped out of the option string as it is process ed.

	2270 // All options have been checked. The option string should have bee n completely emptied..

	2271 char buf[100];

	2272 p.extract(buf, sizeof(buf), NULL, status);

	2273 buf[sizeof(buf)-1] = 0;

	2274 errln("Unrecognized or extra parameter: %s\n", buf);

	2275 return;

	2276 }

	2277 #else

	2278 infoln("SSearchTest built with UCONFIG_NO_REGULAR_EXPRESSIONS: ignoring parameters.");

	2279 #endif

	2280 }

	2281

	2282 for(int32_t s = firstStrength; s <= lastStrength; s += 1) {

	2283 int32_t notFoundCount = 0;

	2284

	2285 logln("Setting strength to %s.", strengthNames[s]);

	2286 ucol_setStrength(coll, strengths[s]);

	2287

	2288 // TODO: try alternate prefix and suffix too?

	2289 // TODO: alterntaes are only equal at primary strength. Is this OK?

	2290 for(int32_t t = 0; t < loopCount; t += 1) {

	2291 uint32_t seed = m_seed;

	2292 // int32_t nmc = 0;

	2293

	2294 generateTestCase(coll, monkeys, monkeyCount, pattern, altPattern);

	2295 generateTestCase(coll, monkeys, monkeyCount, prefix, altPrefix);

	2296 generateTestCase(coll, monkeys, monkeyCount, suffix, altSuffix);

	2297

	2298 // pattern

	2299 notFoundCount += monkeyTestCase(coll, pattern, pattern, altPattern, "pattern", strengthNames[s], seed);

	2300

	2301 testCase.remove();

	2302 testCase.append(prefix);

	2303 testCase.append(/alt/pattern);

	2304

	2305 // prefix + pattern

	2306 notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "prefix + pattern", strengthNames[s], seed);

	2307

	2308 testCase.append(suffix);

	2309

	2310 // prefix + pattern + suffix

	2311 notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "prefix + pattern + suffix", strengthNames[s], seed);

	2312

	2313 testCase.remove();

	2314 testCase.append(pattern);

	2315 testCase.append(suffix);

	2316

	2317 // pattern + suffix

	2318 notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "pattern + suffix", strengthNames[s], seed);

	2319 }

	2320

	2321 logln("For strength %s the not found count is %d.", strengthNames[s], not FoundCount);

	2322 }

	2323

	2324 uset_close(contractions);

	2325 uset_close(expansions);

	2326 uset_close(letters);

	2327

	2328 CollData::close(monkeyData);

	2329

	2330 ucol_close(coll);

	2331 }

	2332

	2333 void SSearchTest::bmMonkeyTest(char *params)

	2334 {

	2335 static const UVersionInfo icu47 = { 4, 7, 0, 0 }; // for timebomb

	2336 static const UChar skipChars[] = { 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0 xAAB5, 0xAAB6, 0xAAB9, 0xAABB, 0xAABC, 0 }; // for timebomb

	2337 // ook!

	2338 UErrorCode status = U_ZERO_ERROR;

	2339 UCollator *coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status);

	2340

	2341 if (U_FAILURE(status)) {

	2342 errcheckln(status, "Failed to create collator in MonkeyTest! - %s", u_er rorName(status));

	2343 return;

	2344 }

	2345

	2346 CollData *monkeyData = CollData::open(coll, status);

	2347

	2348 USet *expansions = uset_openEmpty();

	2349 USet *contractions = uset_openEmpty();

	2350

	2351 ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &st atus);

	2352

	2353 U_STRING_DECL(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39) ;

	2354 U_STRING_INIT(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39) ;

	2355 USet *letters = uset_openPattern(letter_pattern, 39, &status);

	2356 SetMonkey letterMonkey(letters);

	2357 StringSetMonkey contractionMonkey(contractions, coll, monkeyData);

	2358 StringSetMonkey expansionMonkey(expansions, coll, monkeyData);

	2359 UnicodeString testCase;

	2360 UnicodeString alternate;

	2361 UnicodeString pattern, altPattern;

	2362 UnicodeString prefix, altPrefix;

	2363 UnicodeString suffix, altSuffix;

	2364

	2365 Monkey *monkeys[] = {

	2366 &letterMonkey,

	2367 &contractionMonkey,

	2368 &expansionMonkey,

	2369 &contractionMonkey,

	2370 &expansionMonkey,

	2371 &contractionMonkey,

	2372 &expansionMonkey,

	2373 &contractionMonkey,

	2374 &expansionMonkey};

	2375 int32_t monkeyCount = sizeof(monkeys) / sizeof(monkeys[0]);

	2376 // int32_t nonMatchCount = 0;

	2377

	2378 UCollationStrength strengths[] = {UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIAR Y};

	2379 const char *strengthNames[] = {"primary", "secondary", "tertiary"};

	2380 int32_t strengthCount = sizeof(strengths) / sizeof(strengths[0]);

	2381 int32_t loopCount = quick? 1000 : 10000;

	2382 int32_t firstStrength = 0;

	2383 int32_t lastStrength = strengthCount - 1; //*/ 0;

	2384

	2385 if (params != NULL) {

	2386 #if !UCONFIG_NO_REGULAR_EXPRESSIONS

	2387 UnicodeString p(params);

	2388

	2389 loopCount = getIntParam("loop", p, loopCount);

	2390 m_seed = getIntParam("seed", p, m_seed);

	2391

	2392 RegexMatcher m(" strength = (primary\|secondary\|tertiary) ", p, 0, st atus);

	2393 if (m.find()) {

	2394 UnicodeString breakType = m.group(1, status);

	2395

	2396 for (int32_t s = 0; s < strengthCount; s += 1) {

	2397 if (breakType == strengthNames[s]) {

	2398 firstStrength = lastStrength = s;

	2399 break;

	2400 }

	2401 }

	2402

	2403 m.reset();

	2404 p = m.replaceFirst("", status);

	2405 }

	2406

	2407 if (RegexMatcher("\\S", p, 0, status).find()) {

	2408 // Each option is stripped out of the option string as it is process ed.

	2409 // All options have been checked. The option string should have bee n completely emptied..

	2410 char buf[100];

	2411 p.extract(buf, sizeof(buf), NULL, status);

	2412 buf[sizeof(buf)-1] = 0;

	2413 errln("Unrecognized or extra parameter: %s\n", buf);

	2414 return;

	2415 }

	2416 #else

	2417 infoln("SSearchTest built with UCONFIG_NO_REGULAR_EXPRESSIONS: ignoring parameters.");

	2418 #endif

	2419 }

	2420

	2421 for(int32_t s = firstStrength; s <= lastStrength; s += 1) {

	2422 int32_t notFoundCount = 0;

	2423

	2424 logln("Setting strength to %s.", strengthNames[s]);

	2425 ucol_setStrength(coll, strengths[s]);

	2426

	2427 CollData *data = CollData::open(coll, status);

	2428

	2429 UnicodeString skipString(skipChars); // for timebomb

	2430 UnicodeSet* skipSet = UnicodeSet::createFromAll(skipString); // for time bomb

	2431 // TODO: try alternate prefix and suffix too?

	2432 // TODO: alterntaes are only equal at primary strength. Is this OK?

	2433 for(int32_t t = 0; t < loopCount; t += 1) {

	2434 uint32_t seed = m_seed;

	2435 // int32_t nmc = 0;

	2436

	2437 generateTestCase(coll, monkeys, monkeyCount, pattern, altPattern);

	2438 generateTestCase(coll, monkeys, monkeyCount, prefix, altPrefix);

	2439 generateTestCase(coll, monkeys, monkeyCount, suffix, altSuffix);

	2440

	2441 if (!isICUVersionAtLeast(icu47) && skipSet->containsSome(pattern)) {

	2442 continue; // timebomb until ticket #8080 is resolved

	2443 }

	2444

	2445 BoyerMooreSearch pat(data, pattern, NULL, status);

	2446 BoyerMooreSearch alt(data, altPattern, NULL, status);

	2447

	2448 // ** need a better way to deal with this **

	2449 #if 0

	2450 if (pat.empty() \|\|

	2451 alt.empty()) {

	2452 continue;

	2453 }

	2454 #endif

	2455

	2456 // pattern

	2457 notFoundCount += bmMonkeyTestCase(coll, pattern, pattern, altPattern , &pat, &alt, "pattern", strengthNames[s], seed);

	2458

	2459 testCase.remove();

	2460 testCase.append(prefix);

	2461 testCase.append(/alt/pattern);

	2462

	2463 // prefix + pattern

	2464 notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPatter n, &pat, &alt, "prefix + pattern", strengthNames[s], seed);

	2465

	2466 testCase.append(suffix);

	2467

	2468 // prefix + pattern + suffix

	2469 notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPatter n, &pat, &alt, "prefix + pattern + suffix", strengthNames[s], seed);

	2470

	2471 testCase.remove();

	2472 testCase.append(pattern);

	2473 testCase.append(suffix);

	2474

	2475 // pattern + suffix

	2476 notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPatter n, &pat, &alt, "pattern + suffix", strengthNames[s], seed);

	2477 }

	2478 delete skipSet; // for timebomb

	2479

	2480 CollData::close(data);

	2481

	2482 logln("For strength %s the not found count is %d.", strengthNames[s], no tFoundCount);

	2483 }

	2484

	2485 uset_close(contractions);

	2486 uset_close(expansions);

	2487 uset_close(letters);

	2488

	2489 CollData::close(monkeyData);

	2490

	2491 ucol_close(coll);

	2492 }

	2493

	2494 void SSearchTest::stringListTest(){

	2495 UErrorCode status = U_ZERO_ERROR;

	2496 StringList *sl = new StringList(status);

	2497 if(U_FAILURE(status)){

	2498 errln("ERROR: stringListTest: Could not start StringList");

	2499 }

	2500

	2501 const UChar chars[] = {

	2502 0x0000

	2503 };

	2504 sl->add(chars, (int32_t) 0, status);

	2505 if(U_FAILURE(status)){

	2506 errln("ERROR: stringListTest: StringList::add");

	2507 }

	2508

	2509 if(sl->getDynamicClassID() != StringList::getStaticClassID()){

	2510 errln("ERROR: stringListTest: getDynamicClassID and getStaticClassID doe s not match");

	2511 }

	2512 delete sl;

	2513 }

	2514

	2515 #endif

	2516

	2517 #endif

OLD	NEW

« no previous file with comments | « icu46/source/test/intltest/ssearch.h ('k') | icu46/source/test/intltest/strcase.cpp » ('j') | no next file with comments »