source/test/intltest/rbbiapts.cpp - Issue 2435373002: Delete source/test

Side by Side Diff: source/test/intltest/rbbiapts.cpp

Issue 2435373002: Delete source/test (Closed)

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 /********************************************************************

2 * Copyright (c) 1999-2014, International Business Machines

3 * Corporation and others. All Rights Reserved.

4 ********************************************************************

5 * Date Name Description

6 * 12/14/99 Madhu Creation.

7 * 01/12/2000 Madhu updated for changed API

8 ********************************************************************/

9

10 #include "unicode/utypes.h"

11

12 #if !UCONFIG_NO_BREAK_ITERATION

13

14 #include "unicode/uchar.h"

15 #include "intltest.h"

16 #include "unicode/rbbi.h"

17 #include "unicode/schriter.h"

18 #include "rbbiapts.h"

19 #include "rbbidata.h"

20 #include "cstring.h"

21 #include "ubrkimpl.h"

22 #include "unicode/locid.h"

23 #include "unicode/ustring.h"

24 #include "unicode/utext.h"

25 #include "cmemory.h"

26 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING

27 #include "unicode/filteredbrk.h"

28 #include <stdio.h> // for sprintf

29 #endif

30 /**

31 * API Test the RuleBasedBreakIterator class

32 */

33

34

35 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\

36 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_error Name(status));}}

37

38 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \

39 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LI NE__, #expr);};}

40

41 void RBBIAPITest::TestCloneEquals()

42 {

43

44 UErrorCode status=U_ZERO_ERROR;

45 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIte rator::createCharacterInstance(Locale::getDefault(), status);

46 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIte rator::createCharacterInstance(Locale::getDefault(), status);

47 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIte rator::createCharacterInstance(Locale::getDefault(), status);

48 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIte rator::createWordInstance(Locale::getDefault(), status);

49 if(U_FAILURE(status)){

50 errcheckln(status, "Fail : in construction - %s", u_errorName(status));

51 return;

52 }

53

54

55 UnicodeString testString="Testing word break iterators's clone() and equals( )";

56 bi1->setText(testString);

57 bi2->setText(testString);

58 biequal->setText(testString);

59

60 bi3->setText("hello");

61

62 logln((UnicodeString)"Testing equals()");

63

64 logln((UnicodeString)"Testing == and !=");

65 UBool b = (bi1 != biequal);

66 b \|= bi1 == bi2;

67 b \|= bi1 == bi3;

68 if (b) {

69 errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");

70 }

71

72 if(bi2 == biequal \|\| bi2 == bi1 \|\| biequal == bi3)

73 errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed.");

74

75

76 // Quick test of RulesBasedBreakIterator assignment -

77 // Check that

78 // two different iterators are !=

79 // they are == after assignment

80 // source and dest iterator produce the same next() after assignment.

81 // deleting one doesn't disable the other.

82 logln("Testing assignment");

83 RuleBasedBreakIterator bix = (RuleBasedBreakIterator )BreakIterator::creat eLineInstance(Locale::getDefault(), status);

84 if(U_FAILURE(status)){

85 errcheckln(status, "Fail : in construction - %s", u_errorName(status));

86 return;

87 }

88

89 RuleBasedBreakIterator biDefault, biDefault2;

90 if(U_FAILURE(status)){

91 errln((UnicodeString)"FAIL : in construction of default iterator");

92 return;

93 }

94 if (biDefault == *bix) {

95 errln((UnicodeString)"ERROR: iterators should not compare ==");

96 return;

97 }

98 if (biDefault != biDefault2) {

99 errln((UnicodeString)"ERROR: iterators should compare ==");

100 return;

101 }

102

103

104 UnicodeString HelloString("Hello Kitty");

105 bix->setText(HelloString);

106 if (bix == bi2) {

107 errln(UnicodeString("ERROR: strings should not be equal before assignmen t."));

108 }

109 bix = bi2;

110 if (bix != bi2) {

111 errln(UnicodeString("ERROR: strings should be equal before assignment.") );

112 }

113

114 int bixnext = bix->next();

115 int bi2next = bi2->next();

116 if (! (bixnext == bi2next && bixnext == 7)) {

117 errln(UnicodeString("ERROR: iterators behaved differently after assignme nt."));

118 }

119 delete bix;

120 if (bi2->next() != 8) {

121 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy." ));

122 }

123

124

125

126 logln((UnicodeString)"Testing clone()");

127 RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();

128 RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();

129

130 if(bi1clone != bi1 \|\| bi1clone != biequal \|\|

131 bi1clone == bi3 \|\| bi1clone == bi2)

132 errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");

133

134 if(bi2clone == bi1 \|\| bi2clone == biequal \|\|

135 bi2clone == bi3 \|\| bi2clone != bi2)

136 errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");

137

138 if(bi1->getText() != bi1clone->getText() \|\|

139 bi2clone->getText() != bi2->getText() \|\|

140 bi2clone == bi1clone )

141 errln((UnicodeString)"ERROR: RBBI's clone() method failed");

142

143 delete bi1clone;

144 delete bi2clone;

145 delete bi1;

146 delete bi3;

147 delete bi2;

148 delete biequal;

149 }

150

151 void RBBIAPITest::TestBoilerPlate()

152 {

153 UErrorCode status = U_ZERO_ERROR;

154 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);

155 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status) ;

156 if (U_FAILURE(status)) {

157 errcheckln(status, "Creation of break iterator failed %s", u_errorName(s tatus));

158 return;

159 }

160 if(a!=b){

161 errln("Failed: boilerplate method operator!= does not return correct res ults");

162 }

163 // Japanese word break iterators are identical to root with

164 // a dictionary-based break iterator

165 BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),statu s);

166 BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),sta tus);

167 if(c && d){

168 if(c!=d){

169 errln("Failed: boilerplate method operator== does not return correct results");

170 }

171 }else{

172 errln("creation of break iterator failed");

173 }

174 delete a;

175 delete b;

176 delete c;

177 delete d;

178 }

179

180 void RBBIAPITest::TestgetRules()

181 {

182 UErrorCode status=U_ZERO_ERROR;

183

184 RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator: :createCharacterInstance(Locale::getDefault(), status);

185 RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator: :createWordInstance(Locale::getDefault(), status);

186 if(U_FAILURE(status)){

187 errcheckln(status, "FAIL: in construction - %s", u_errorName(status));

188 delete bi1;

189 delete bi2;

190 return;

191 }

192

193

194

195 logln((UnicodeString)"Testing toString()");

196

197 bi1->setText((UnicodeString)"Hello there");

198

199 RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone();

200

201 UnicodeString temp=bi1->getRules();

202 UnicodeString temp2=bi2->getRules();

203 UnicodeString temp3=bi3->getRules();

204 if( temp2.compare(temp3) ==0 \|\| temp.compare(temp2) == 0 \|\| temp.compare(tem p3) != 0)

205 errln((UnicodeString)"ERROR: error in getRules() method");

206

207 delete bi1;

208 delete bi2;

209 delete bi3;

210 }

211 void RBBIAPITest::TestHashCode()

212 {

213 UErrorCode status=U_ZERO_ERROR;

214 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIte rator::createCharacterInstance(Locale::getDefault(), status);

215 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIte rator::createCharacterInstance(Locale::getDefault(), status);

216 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIte rator::createWordInstance(Locale::getDefault(), status);

217 if(U_FAILURE(status)){

218 errcheckln(status, "Fail : in construction - %s", u_errorName(status));

219 delete bi1;

220 delete bi2;

221 delete bi3;

222 return;

223 }

224

225

226 logln((UnicodeString)"Testing hashCode()");

227

228 bi1->setText((UnicodeString)"Hash code");

229 bi2->setText((UnicodeString)"Hash code");

230 bi3->setText((UnicodeString)"Hash code");

231

232 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();

233 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();

234

235 if(bi1->hashCode() != bi1clone->hashCode() \|\| bi1->hashCode() != bi3->hashC ode() \|\|

236 bi1clone->hashCode() != bi3->hashCode() \|\| bi2->hashCode() != bi2clone-> hashCode())

237 errln((UnicodeString)"ERROR: identical objects have different hashcodes" );

238

239 if(bi1->hashCode() == bi2->hashCode() \|\| bi2->hashCode() == bi3->hashCode() \|\|

240 bi1clone->hashCode() == bi2clone->hashCode() \|\| bi1clone->hashCode() == bi2->hashCode())

241 errln((UnicodeString)"ERROR: different objects have same hashcodes");

242

243 delete bi1clone;

244 delete bi2clone;

245 delete bi1;

246 delete bi2;

247 delete bi3;

248

249 }

250 void RBBIAPITest::TestGetSetAdoptText()

251 {

252 logln((UnicodeString)"Testing getText setText ");

253 IcuTestErrorCode status(*this, "TestGetSetAdoptText");

254 UnicodeString str1="first string.";

255 UnicodeString str2="Second string.";

256 LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)Rule BasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));

257 LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)Rule BasedBreakIterator::createWordInstance(Locale::getDefault(), status));

258 if(status.isFailure()){

259 errcheckln(status, "Fail : in construction - %s", status.errorName());

260 return;

261 }

262

263

264 CharacterIterator* text1= new StringCharacterIterator(str1);

265 CharacterIterator* text1Clone = text1->clone();

266 CharacterIterator* text2= new StringCharacterIterator(str2);

267 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // " ond str"

268

269 wordIter1->setText(str1);

270 CharacterIterator *tci = &wordIter1->getText();

271 UnicodeString tstr;

272 tci->getText(tstr);

273 TEST_ASSERT(tstr == str1);

274 if(wordIter1->current() != 0)

275 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\ n");

276

277 wordIter1->next(2);

278

279 wordIter1->setText(str2);

280 if(wordIter1->current() != 0)

281 errln((UnicodeString)"ERROR:2 setText did not reset the iteration positi on to the beginning of the text, it is" + wordIter1->current() + (UnicodeString) "\n");

282

283

284 charIter1->adoptText(text1Clone);

285 TEST_ASSERT(wordIter1->getText() != charIter1->getText());

286 tci = &wordIter1->getText();

287 tci->getText(tstr);

288 TEST_ASSERT(tstr == str2);

289 tci = &charIter1->getText();

290 tci->getText(tstr);

291 TEST_ASSERT(tstr == str1);

292

293

294 LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1-> clone());

295 rb->adoptText(text1);

296 if(rb->getText() != *text1)

297 errln((UnicodeString)"ERROR:1 error in adoptText ");

298 rb->adoptText(text2);

299 if(rb->getText() != *text2)

300 errln((UnicodeString)"ERROR:2 error in adoptText ");

301

302 // Adopt where iterator range is less than the entire orignal source string.

303 // (With the change of the break engine to working with UText internally,

304 // CharacterIterators starting at positions other than zero are not suppo rted)

305 rb->adoptText(text3);

306 TEST_ASSERT(rb->preceding(2) == 0);

307 TEST_ASSERT(rb->following(11) == BreakIterator::DONE);

308 //if(rb->preceding(2) != 3) {

309 // errln((UnicodeString)"ERROR:3 error in adoptText ");

310 //}

311 //if(rb->following(11) != BreakIterator::DONE) {

312 // errln((UnicodeString)"ERROR:4 error in adoptText ");

313 //}

314

315 // UText API

316 //

317 // Quick test to see if UText is working at all.

318 //

319 const char s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; / "hello w orld" in UTF-8 */

320 const char s2 = "\x73\x65\x65\x20\x79\x61"; / "see ya" in UTF-8 */

321 // 012345678901

322

323 status.reset();

324 LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));

325 wordIter1->setText(ut.getAlias(), status);

326 TEST_ASSERT_SUCCESS(status);

327

328 int32_t pos;

329 pos = wordIter1->first();

330 TEST_ASSERT(pos==0);

331 pos = wordIter1->next();

332 TEST_ASSERT(pos==5);

333 pos = wordIter1->next();

334 TEST_ASSERT(pos==6);

335 pos = wordIter1->next();

336 TEST_ASSERT(pos==11);

337 pos = wordIter1->next();

338 TEST_ASSERT(pos==UBRK_DONE);

339

340 status.reset();

341 LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));

342 TEST_ASSERT_SUCCESS(status);

343 wordIter1->setText(ut2.getAlias(), status);

344 TEST_ASSERT_SUCCESS(status);

345

346 pos = wordIter1->first();

347 TEST_ASSERT(pos==0);

348 pos = wordIter1->next();

349 TEST_ASSERT(pos==3);

350 pos = wordIter1->next();

351 TEST_ASSERT(pos==4);

352

353 pos = wordIter1->last();

354 TEST_ASSERT(pos==6);

355 pos = wordIter1->previous();

356 TEST_ASSERT(pos==4);

357 pos = wordIter1->previous();

358 TEST_ASSERT(pos==3);

359 pos = wordIter1->previous();

360 TEST_ASSERT(pos==0);

361 pos = wordIter1->previous();

362 TEST_ASSERT(pos==UBRK_DONE);

363

364 status.reset();

365 UnicodeString sEmpty;

366 LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));

367 wordIter1->getUText(gut2.getAlias(), status);

368 TEST_ASSERT_SUCCESS(status);

369 status.reset();

370 }

371

372

373 void RBBIAPITest::TestIteration()

374 {

375 // This test just verifies that the API is present.

376 // Testing for correct operation of the break rules happens elsewhere.

377

378 UErrorCode status=U_ZERO_ERROR;

379 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterato r::createCharacterInstance(Locale::getDefault(), status);

380 if (U_FAILURE(status) \|\| bi == NULL) {

381 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));

382 }

383 delete bi;

384

385 status=U_ZERO_ERROR;

386 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Lo cale::getDefault(), status);

387 if (U_FAILURE(status) \|\| bi == NULL) {

388 errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status));

389 }

390 delete bi;

391

392 status=U_ZERO_ERROR;

393 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Lo cale::getDefault(), status);

394 if (U_FAILURE(status) \|\| bi == NULL) {

395 errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status));

396 }

397 delete bi;

398

399 status=U_ZERO_ERROR;

400 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstanc e(Locale::getDefault(), status);

401 if (U_FAILURE(status) \|\| bi == NULL) {

402 errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status));

403 }

404 delete bi;

405

406 status=U_ZERO_ERROR;

407 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(L ocale::getDefault(), status);

408 if (U_FAILURE(status) \|\| bi == NULL) {

409 errcheckln(status, "Failure creating Title break iterator. Status = %s" , u_errorName(status));

410 }

411 delete bi;

412

413 status=U_ZERO_ERROR;

414 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstan ce(Locale::getDefault(), status);

415 if (U_FAILURE(status) \|\| bi == NULL) {

416 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));

417 return; // Skip the rest of these tests.

418 }

419

420

421 UnicodeString testString="0123456789";

422 bi->setText(testString);

423

424 int32_t i;

425 i = bi->first();

426 if (i != 0) {

427 errln("Incorrect value from bi->first(). Expected 0, got %d.", i);

428 }

429

430 i = bi->last();

431 if (i != 10) {

432 errln("Incorrect value from bi->last(). Expected 10, got %d", i);

433 }

434

435 //

436 // Previous

437 //

438 bi->last();

439 i = bi->previous();

440 if (i != 9) {

441 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__, i);

442 }

443

444

445 bi->first();

446 i = bi->previous();

447 if (i != BreakIterator::DONE) {

448 errln("Incorrect value from bi->previous() at line %d. Expected DONE, g ot %d", __LINE__, i);

449 }

450

451 //

452 // next()

453 //

454 bi->first();

455 i = bi->next();

456 if (i != 1) {

457 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__, i);

458 }

459

460 bi->last();

461 i = bi->next();

462 if (i != BreakIterator::DONE) {

463 errln("Incorrect value from bi->next() at line %d. Expected DONE, got % d", __LINE__, i);

464 }

465

466

467 //

468 // current()

469 //

470 bi->first();

471 i = bi->current();

472 if (i != 0) {

473 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i);

474 }

475

476 bi->next();

477 i = bi->current();

478 if (i != 1) {

479 errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__, i);

480 }

481

482 bi->last();

483 bi->next();

484 i = bi->current();

485 if (i != 10) {

486 errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__, i);

487 }

488

489 bi->first();

490 bi->previous();

491 i = bi->current();

492 if (i != 0) {

493 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i);

494 }

495

496

497 //

498 // Following()

499 //

500 i = bi->following(4);

501 if (i != 5) {

502 errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__, i);

503 }

504

505 i = bi->following(9);

506 if (i != 10) {

507 errln("Incorrect value from bi->following() at line %d. Expected 10, go t %d", __LINE__, i);

508 }

509

510 i = bi->following(10);

511 if (i != BreakIterator::DONE) {

512 errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__, i);

513 }

514

515

516 //

517 // Preceding

518 //

519 i = bi->preceding(4);

520 if (i != 3) {

521 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__, i);

522 }

523

524 i = bi->preceding(10);

525 if (i != 9) {

526 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__, i);

527 }

528

529 i = bi->preceding(1);

530 if (i != 0) {

531 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__, i);

532 }

533

534 i = bi->preceding(0);

535 if (i != BreakIterator::DONE) {

536 errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__, i);

537 }

538

539

540 //

541 // isBoundary()

542 //

543 bi->first();

544 if (bi->isBoundary(3) != TRUE) {

545 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__, i);

546 }

547 i = bi->current();

548 if (i != 3) {

549 errln("Incorrect value from bi->current() at line %d. Expected 3, got % d", __LINE__, i);

550 }

551

552

553 if (bi->isBoundary(11) != FALSE) {

554 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__, i);

555 }

556 i = bi->current();

557 if (i != 10) {

558 errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__, i);

559 }

560

561 //

562 // next(n)

563 //

564 bi->first();

565 i = bi->next(4);

566 if (i != 4) {

567 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__, i);

568 }

569

570 i = bi->next(6);

571 if (i != 10) {

572 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d" , __LINE__, i);

573 }

574

575 bi->first();

576 i = bi->next(11);

577 if (i != BreakIterator::DONE) {

578 errln("Incorrect value from bi->next() at line %d. Expected BreakIterat or::DONE, got %d", __LINE__, i);

579 }

580

581 delete bi;

582

583 }

584

585

586

587

588

589

590 void RBBIAPITest::TestBuilder() {

591 UnicodeString rulesString1 = "$Letters = [:L:];\n"

592 "$Numbers = [:N:];\n"

593 "$Letters+;\n"

594 "$Numbers+;\n"

595 "[^$Letters $Numbers];\n"

596 "!.*;\n";

597 UnicodeString testString1 = "abc123..abc";

598 // 01234567890

599 int32_t bounds1[] = {0, 3, 6, 7, 8, 11};

600 UErrorCode status=U_ZERO_ERROR;

601 UParseError parseError;

602

603 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parse Error, status);

604 if(U_FAILURE(status)) {

605 dataerrln("Fail : in construction - %s", u_errorName(status));

606 } else {

607 bi->setText(testString1);

608 doBoundaryTest(*bi, testString1, bounds1);

609 }

610 delete bi;

611 }

612

613

614 //

615 // TestQuoteGrouping

616 // Single quotes within rules imply a grouping, so that a modifier

617 // following the quoted text (* or +) applies to all of the quoted chars.

618 //

619 void RBBIAPITest::TestQuoteGrouping() {

620 UnicodeString rulesString1 = "#Here comes the rule...\n"

621 "'$@!';\n" // (\$\@\!)

622 ".;\n";

623

624 UnicodeString testString1 = "$@!$@!X$@!!X";

625 // 0123456789012

626 int32_t bounds1[] = {0, 6, 7, 10, 11, 12};

627 UErrorCode status=U_ZERO_ERROR;

628 UParseError parseError;

629

630 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parse Error, status);

631 if(U_FAILURE(status)) {

632 dataerrln("Fail : in construction - %s", u_errorName(status));

633 } else {

634 bi->setText(testString1);

635 doBoundaryTest(*bi, testString1, bounds1);

636 }

637 delete bi;

638 }

639

640 //

641 // TestRuleStatus

642 // Test word break rule status constants.

643 //

644 void RBBIAPITest::TestRuleStatus() {

645 UChar str[30];

646 //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing

647 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO

648 u_unescape("plain word 123.45 \\u30a1\\u30a2 ",

649 // 012345678901234567 8 9 0

650 // Katakana

651 str, 30);

652 UnicodeString testString1(str);

653 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};

654 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER,

655 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE,

656 UBRK_WORD_IDEO, UBRK_WORD_NONE};

657

658 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WOR D_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,

659 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WOR D_NONE_LIMIT,

660 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};

661

662 UErrorCode status=U_ZERO_ERROR;

663

664 BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);

665 if(U_FAILURE(status)) {

666 errcheckln(status, "Fail : in construction - %s", u_errorName(status));

667 } else {

668 bi->setText(testString1);

669 // First test that the breaks are in the right spots.

670 doBoundaryTest(*bi, testString1, bounds1);

671

672 // Then go back and check tag values

673 int32_t i = 0;

674 int32_t pos, tag;

675 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i ++) {

676 if (pos != bounds1[i]) {

677 errln("FAIL: unexpected word break at postion %d", pos);

678 break;

679 }

680 tag = bi->getRuleStatus();

681 if (tag < tag_lo[i] \|\| tag >= tag_hi[i]) {

682 errln("FAIL: incorrect tag value %d at position %d", tag, pos);

683 break;

684 }

685

686 // Check that we get the same tag values from getRuleStatusVec()

687 int32_t vec[10];

688 int t = bi->getRuleStatusVec(vec, 10, status);

689 TEST_ASSERT_SUCCESS(status);

690 TEST_ASSERT(t==1);

691 TEST_ASSERT(vec[0] == tag);

692 }

693 }

694 delete bi;

695

696 // Now test line break status. This test mostly is to confirm that the sta tus constants

697 // are correctly declared in the header.

698 testString1 = "test line. \n";

699 // break type s s h

700

701 bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);

702 if(U_FAILURE(status)) {

703 errcheckln(status, "failed to create word break iterator. - %s", u_erro rName(status));

704 } else {

705 int32_t i = 0;

706 int32_t pos, tag;

707 UBool success;

708

709 bi->setText(testString1);

710 pos = bi->current();

711 tag = bi->getRuleStatus();

712 for (i=0; i<3; i++) {

713 switch (i) {

714 case 0:

715 success = pos==0 && tag==UBRK_LINE_SOFT; break;

716 case 1:

717 success = pos==5 && tag==UBRK_LINE_SOFT; break;

718 case 2:

719 success = pos==12 && tag==UBRK_LINE_HARD; break;

720 default:

721 success = FALSE; break;

722 }

723 if (success == FALSE) {

724 errln("Fail: incorrect word break status or position. i=%d, po s=%d, tag=%d",

725 i, pos, tag);

726 break;

727 }

728 pos = bi->next();

729 tag = bi->getRuleStatus();

730 }

731 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT \|\|

732 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT \|\|

733 (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT _LIMIT)) {

734 errln("UBRK_LINE_* constants from header are inconsistent.");

735 }

736 }

737 delete bi;

738

739 }

740

741

742 //

743 // TestRuleStatusVec

744 // Test the vector form of break rule status.

745 //

746 void RBBIAPITest::TestRuleStatusVec() {

747 UnicodeString rulesString( "[A-N]{100}; \n"

748 "[a-w]{200}; \n"

749 "[\\p{L}]{300}; \n"

750 "[\\p{N}]{400}; \n"

751 "[0-5]{500}; \n"

752 "!.*;\n", -1, US_INV);

753 UnicodeString testString1 = "Aapz5?";

754 int32_t statusVals[10];

755 int32_t numStatuses;

756 int32_t pos;

757

758 UErrorCode status=U_ZERO_ERROR;

759 UParseError parseError;

760

761 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseE rror, status);

762 if (U_FAILURE(status)) {

763 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__ , u_errorName(status));

764 } else {

765 bi->setText(testString1);

766

767 // A

768 pos = bi->next();

769 TEST_ASSERT(pos==1);

770 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);

771 TEST_ASSERT_SUCCESS(status);

772 TEST_ASSERT(numStatuses == 2);

773 TEST_ASSERT(statusVals[0] == 100);

774 TEST_ASSERT(statusVals[1] == 300);

775

776 // a

777 pos = bi->next();

778 TEST_ASSERT(pos==2);

779 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);

780 TEST_ASSERT_SUCCESS(status);

781 TEST_ASSERT(numStatuses == 2);

782 TEST_ASSERT(statusVals[0] == 200);

783 TEST_ASSERT(statusVals[1] == 300);

784

785 // p

786 pos = bi->next();

787 TEST_ASSERT(pos==3);

788 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);

789 TEST_ASSERT_SUCCESS(status);

790 TEST_ASSERT(numStatuses == 2);

791 TEST_ASSERT(statusVals[0] == 200);

792 TEST_ASSERT(statusVals[1] == 300);

793

794 // z

795 pos = bi->next();

796 TEST_ASSERT(pos==4);

797 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);

798 TEST_ASSERT_SUCCESS(status);

799 TEST_ASSERT(numStatuses == 1);

800 TEST_ASSERT(statusVals[0] == 300);

801

802 // 5

803 pos = bi->next();

804 TEST_ASSERT(pos==5);

805 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);

806 TEST_ASSERT_SUCCESS(status);

807 TEST_ASSERT(numStatuses == 2);

808 TEST_ASSERT(statusVals[0] == 400);

809 TEST_ASSERT(statusVals[1] == 500);

810

811 // ?

812 pos = bi->next();

813 TEST_ASSERT(pos==6);

814 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);

815 TEST_ASSERT_SUCCESS(status);

816 TEST_ASSERT(numStatuses == 1);

817 TEST_ASSERT(statusVals[0] == 0);

818

819 //

820 // Check buffer overflow error handling. Char == A

821 //

822 bi->first();

823 pos = bi->next();

824 TEST_ASSERT(pos==1);

825 memset(statusVals, -1, sizeof(statusVals));

826 numStatuses = bi->getRuleStatusVec(statusVals, 0, status);

827 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);

828 TEST_ASSERT(numStatuses == 2);

829 TEST_ASSERT(statusVals[0] == -1);

830

831 status = U_ZERO_ERROR;

832 memset(statusVals, -1, sizeof(statusVals));

833 numStatuses = bi->getRuleStatusVec(statusVals, 1, status);

834 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);

835 TEST_ASSERT(numStatuses == 2);

836 TEST_ASSERT(statusVals[0] == 100);

837 TEST_ASSERT(statusVals[1] == -1);

838

839 status = U_ZERO_ERROR;

840 memset(statusVals, -1, sizeof(statusVals));

841 numStatuses = bi->getRuleStatusVec(statusVals, 2, status);

842 TEST_ASSERT_SUCCESS(status);

843 TEST_ASSERT(numStatuses == 2);

844 TEST_ASSERT(statusVals[0] == 100);

845 TEST_ASSERT(statusVals[1] == 300);

846 TEST_ASSERT(statusVals[2] == -1);

847 }

848 delete bi;

849

850 }

851

852 //

853 // Bug 2190 Regression test. Builder crash on rule consisting of only a

854 // $variable reference

855 void RBBIAPITest::TestBug2190() {

856 UnicodeString rulesString1 = "$aaa = abcd;\n"

857 "$bbb = $aaa;\n"

858 "$bbb;\n";

859 UnicodeString testString1 = "abcdabcd";

860 // 01234567890

861 int32_t bounds1[] = {0, 4, 8};

862 UErrorCode status=U_ZERO_ERROR;

863 UParseError parseError;

864

865 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parse Error, status);

866 if(U_FAILURE(status)) {

867 dataerrln("Fail : in construction - %s", u_errorName(status));

868 } else {

869 bi->setText(testString1);

870 doBoundaryTest(*bi, testString1, bounds1);

871 }

872 delete bi;

873 }

874

875

876 void RBBIAPITest::TestRegistration() {

877 #if !UCONFIG_NO_SERVICE

878 UErrorCode status = U_ZERO_ERROR;

879 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);

880 // ok to not delete these if we exit because of error?

881 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", sta tus);

882 BreakIterator* root_word = BreakIterator::createWordInstance("", status);

883 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status );

884

885 if (status == U_MISSING_RESOURCE_ERROR \|\| status == U_FILE_ACCESS_ERROR) {

886 dataerrln("Error creating instances of break interactors - %s", u_errorN ame(status));

887

888 delete ja_word;

889 delete ja_char;

890 delete root_word;

891 delete root_char;

892

893 return;

894 }

895

896 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);

897 {

898 #if 0 // With a dictionary based word breaking, ja_word is identical to root.

899 if (ja_word && ja_word == root_word) {

900 errln("japan not different from root");

901 }

902 #endif

903 }

904

905 {

906 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", statu s);

907 UBool fail = TRUE;

908 if(result){

909 fail = result != ja_word;

910 }

911 delete result;

912 if (fail) {

913 errln("bad result for xx_XX/word");

914 }

915 }

916

917 {

918 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);

919 UBool fail = TRUE;

920 if(result){

921 fail = result != ja_char;

922 }

923 delete result;

924 if (fail) {

925 errln("bad result for ja_JP/char");

926 }

927 }

928

929 {

930 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);

931 UBool fail = TRUE;

932 if(result){

933 fail = result != root_char;

934 }

935 delete result;

936 if (fail) {

937 errln("bad result for xx_XX/char");

938 }

939 }

940

941 {

942 StringEnumeration* avail = BreakIterator::getAvailableLocales();

943 UBool found = FALSE;

944 const UnicodeString* p;

945 while ((p = avail->snext(status))) {

946 if (p->compare("xx") == 0) {

947 found = TRUE;

948 break;

949 }

950 }

951 delete avail;

952 if (!found) {

953 errln("did not find test locale");

954 }

955 }

956

957 {

958 UBool unreg = BreakIterator::unregister(key, status);

959 if (!unreg) {

960 errln("unable to unregister");

961 }

962 }

963

964 {

965 BreakIterator* result = BreakIterator::createWordInstance("en_US", statu s);

966 BreakIterator* root = BreakIterator::createWordInstance("", status);

967 UBool fail = TRUE;

968 if(root){

969 fail = root != result;

970 }

971 delete root;

972 delete result;

973 if (fail) {

974 errln("did not get root break");

975 }

976 }

977

978 {

979 StringEnumeration* avail = BreakIterator::getAvailableLocales();

980 UBool found = FALSE;

981 const UnicodeString* p;

982 while ((p = avail->snext(status))) {

983 if (p->compare("xx") == 0) {

984 found = TRUE;

985 break;

986 }

987 }

988 delete avail;

989 if (found) {

990 errln("found test locale");

991 }

992 }

993

994 {

995 int32_t count;

996 UBool foundLocale = FALSE;

997 const Locale *avail = BreakIterator::getAvailableLocales(count);

998 for (int i=0; i<count; i++) {

999 if (avail[i] == Locale::getEnglish()) {

1000 foundLocale = TRUE;

1001 break;

1002 }

1003 }

1004 if (foundLocale == FALSE) {

1005 errln("BreakIterator::getAvailableLocales(&count), failed to find EN .");

1006 }

1007 }

1008

1009

1010 // ja_word was adopted by factory

1011 delete ja_char;

1012 delete root_word;

1013 delete root_char;

1014 #endif

1015 }

1016

1017 void RBBIAPITest::RoundtripRule(const char *dataFile) {

1018 UErrorCode status = U_ZERO_ERROR;

1019 UParseError parseError;

1020 parseError.line = 0;

1021 parseError.offset = 0;

1022 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, & status));

1023 uint32_t length;

1024 const UChar *builtSource;

1025 const uint8_t *rbbiRules;

1026 const uint8_t *builtRules;

1027

1028 if (U_FAILURE(status)) {

1029 errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(statu s));

1030 return;

1031 }

1032

1033 builtRules = (const uint8_t *)udata_getMemory(data.getAlias());

1034 builtSource = (const UChar )(builtRules + ((RBBIDataHeader)builtRules)->fR uleSource);

1035 RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, par seError, status);

1036 if (U_FAILURE(status)) {

1037 errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, colum n %d\n",

1038 u_errorName(status), parseError.line, parseError.offset);

1039 return;

1040 };

1041 rbbiRules = brkItr->getBinaryRules(length);

1042 logln("Comparing \"%s\" len=%d", dataFile, length);

1043 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {

1044 errln("Built rules and rebuilt rules are different %s", dataFile);

1045 return;

1046 }

1047 delete brkItr;

1048 }

1049

1050 void RBBIAPITest::TestRoundtripRules() {

1051 RoundtripRule("word");

1052 RoundtripRule("title");

1053 RoundtripRule("sent");

1054 RoundtripRule("line");

1055 RoundtripRule("char");

1056 if (!quick) {

1057 RoundtripRule("word_POSIX");

1058 }

1059 }

1060

1061 // Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader*

1062 // (these are protected so we access them via a local class RBBIWithProtectedFun ctions).

1063 // This is just a sanity check, not a thorough test (e.g. we don't check that th e

1064 // first delete actually frees rulesCopy).

1065 void RBBIAPITest::TestCreateFromRBBIData() {

1066 // Get some handy RBBIData

1067 const char *brkName = "word"; // or "sent", "line", "char", etc.

1068 UErrorCode status = U_ZERO_ERROR;

1069 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &s tatus));

1070 if ( U_SUCCESS(status) ) {

1071 const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMem ory(data.getAlias());

1072 uint32_t length = builtRules->fLength;

1073 RBBIWithProtectedFunctions * brkItr;

1074

1075 // Try the memory-adopting constructor, need to copy the data first

1076 RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length);

1077 if ( rulesCopy ) {

1078 uprv_memcpy( rulesCopy, builtRules, length );

1079

1080 brkItr = new RBBIWithProtectedFunctions(rulesCopy, status);

1081 if ( U_SUCCESS(status) ) {

1082 delete brkItr; // this should free rulesCopy

1083 } else {

1084 errln("create RuleBasedBreakIterator from RBBIData (adopted): IC U Error \"%s\"\n", u_errorName(status) );

1085 status = U_ZERO_ERROR;// reset for the next test

1086 uprv_free( rulesCopy );

1087 }

1088 }

1089

1090 // Now try the non-adopting constructor

1091 brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFun ctions::kDontAdopt, status);

1092 if ( U_SUCCESS(status) ) {

1093 delete brkItr; // this should NOT attempt to free builtRules

1094 if (builtRules->fLength != length) { // sanity check

1095 errln("create RuleBasedBreakIterator from RBBIData (non-adopted) : delete affects data\n" );

1096 }

1097 } else {

1098 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): IC U Error \"%s\"\n", u_errorName(status) );

1099 }

1100 }

1101

1102 // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...)

1103 //

1104 status = U_ZERO_ERROR;

1105 RuleBasedBreakIterator rb = (RuleBasedBreakIterator )BreakIterator::create WordInstance(Locale::getEnglish(), status);

1106 if (rb == NULL \|\| U_FAILURE(status)) {

1107 dataerrln("Unable to create BreakIterator::createWordInstance (Locale::g etEnglish) - %s", u_errorName(status));

1108 } else {

1109 uint32_t length;

1110 const uint8_t *rules = rb->getBinaryRules(length);

1111 RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length, status);

1112 TEST_ASSERT_SUCCESS(status);

1113 TEST_ASSERT(rb == rb2);

1114 UnicodeString words = "one two three ";

1115 rb2->setText(words);

1116 int wordCounter = 0;

1117 while (rb2->next() != UBRK_DONE) {

1118 wordCounter++;

1119 }

1120 TEST_ASSERT(wordCounter == 6);

1121

1122 status = U_ZERO_ERROR;

1123 RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1 , status);

1124 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);

1125

1126 delete rb;

1127 delete rb2;

1128 delete rb3;

1129 }

1130 }

1131

1132

1133 void RBBIAPITest::TestRefreshInputText() {

1134 /*

1135 * RefreshInput changes out the input of a Break Iterator without

1136 * changing anything else in the iterator's state. Used with Java JNI,

1137 * when Java moves the underlying string storage. This test

1138 * runs BreakIterator::next() repeatedly, moving the text in the middle o f the sequence.

1139 * The right set of boundaries should still be found.

1140 */

1141 UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; / * = " A B C D" */

1142 UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0};

1143 UErrorCode status = U_ZERO_ERROR;

1144 UText ut1 = UTEXT_INITIALIZER;

1145 UText ut2 = UTEXT_INITIALIZER;

1146 RuleBasedBreakIterator bi = (RuleBasedBreakIterator )BreakIterator::create LineInstance(Locale::getEnglish(), status);

1147 TEST_ASSERT_SUCCESS(status);

1148

1149 utext_openUChars(&ut1, testStr, -1, &status);

1150 TEST_ASSERT_SUCCESS(status);

1151

1152 if (U_SUCCESS(status)) {

1153 bi->setText(&ut1, status);

1154 TEST_ASSERT_SUCCESS(status);

1155

1156 /* Line boundaries will occur before each letter in the original string */

1157 TEST_ASSERT(1 == bi->next());

1158 TEST_ASSERT(3 == bi->next());

1159

1160 /* Move the string, kill the original string. */

1161 u_strcpy(movedStr, testStr);

1162 u_memset(testStr, 0x20, u_strlen(testStr));

1163 utext_openUChars(&ut2, movedStr, -1, &status);

1164 TEST_ASSERT_SUCCESS(status);

1165 RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status) ;

1166 TEST_ASSERT_SUCCESS(status);

1167 TEST_ASSERT(bi == returnedBI);

1168

1169 /* Find the following matches, now working in the moved string. */

1170 TEST_ASSERT(5 == bi->next());

1171 TEST_ASSERT(7 == bi->next());

1172 TEST_ASSERT(8 == bi->next());

1173 TEST_ASSERT(UBRK_DONE == bi->next());

1174

1175 utext_close(&ut1);

1176 utext_close(&ut2);

1177 }

1178 delete bi;

1179

1180 }

1181

1182 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BRE AK_ITERATION

1183 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {

1184 static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular b rackets

1185 it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));

1186

1187 int32_t *pos = new int32_t[ustr.length()];

1188 int32_t posCount = 0;

1189

1190 // calculate breaks up front, so we can print out

1191 // sans any debugging

1192 for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {

1193 pos[posCount++] = n;

1194 if(posCount>=ustr.length()) {

1195 it.errln("brk count exceeds string length!");

1196 return;

1197 }

1198 }

1199 UnicodeString out;

1200 out.append((UChar)CHSTR);

1201 int32_t prev = 0;

1202 for(int32_t i=0;i<posCount;i++) {

1203 int32_t n=pos[i];

1204 out.append(ustr.tempSubString(prev,n-prev));

1205 out.append((UChar)PILCROW);

1206 prev=n;

1207 }

1208 out.append(ustr.tempSubString(prev,ustr.length()-prev));

1209 out.append((UChar)CHEND);

1210 it.logln(out);

1211

1212 out.remove();

1213 for(int32_t i=0;i<posCount;i++) {

1214 char tmp[100];

1215 sprintf(tmp,"%d ",pos[i]);

1216 out.append(UnicodeString(tmp));

1217 }

1218 it.logln(out);

1219 delete [] pos;

1220 }

1221 #endif

1222

1223 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {

1224 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BRE AK_ITERATION

1225 UErrorCode status = U_ZERO_ERROR;

1226 LocalPointer<FilteredBreakIteratorBuilder> builder;

1227 LocalPointer<BreakIterator> baseBI;

1228 LocalPointer<BreakIterator> filteredBI;

1229 LocalPointer<BreakIterator> frenchBI;

1230

1231 const UnicodeString text("In the meantime Mr. Weston arrived with his small sh ip, which he had now recovered. Capt. Gorges, who informed the Sgt. here that on e purpose of his going east was to meet with Mr. Weston, took this opportunity t o call him to account for some abuses he had to lay to his charge."); // (Willia m Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - ed ited.

1232 const UnicodeString ABBR_MR("Mr.");

1233 const UnicodeString ABBR_CAPT("Capt.");

1234

1235 {

1236 logln("Constructing empty builder\n");

1237 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));

1238 TEST_ASSERT_SUCCESS(status);

1239

1240 logln("Constructing base BI\n");

1241 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish (), status));

1242 TEST_ASSERT_SUCCESS(status);

1243

1244 logln("Building new BI\n");

1245 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));

1246 TEST_ASSERT_SUCCESS(status);

1247

1248 if (U_SUCCESS(status)) {

1249 logln("Testing:");

1250 filteredBI->setText(text);

1251 TEST_ASSERT(20 == filteredBI->next()); // Mr.

1252 TEST_ASSERT(84 == filteredBI->next()); // recovered.

1253 TEST_ASSERT(90 == filteredBI->next()); // Capt.

1254 TEST_ASSERT(181 == filteredBI->next()); // Mr.

1255 TEST_ASSERT(278 == filteredBI->next()); // charge.

1256 filteredBI->first();

1257 prtbrks(filteredBI.getAlias(), text, *this);

1258 }

1259 }

1260

1261 {

1262 logln("Constructing empty builder\n");

1263 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));

1264 TEST_ASSERT_SUCCESS(status);

1265

1266 if (U_SUCCESS(status)) {

1267 logln("Adding Mr. as an exception\n");

1268 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));

1269 TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // a lready have it

1270 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));

1271 TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it

1272 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));

1273 TEST_ASSERT_SUCCESS(status);

1274

1275 logln("Constructing base BI\n");

1276 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEng lish(), status));

1277 TEST_ASSERT_SUCCESS(status);

1278

1279 logln("Building new BI\n");

1280 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));

1281 TEST_ASSERT_SUCCESS(status);

1282

1283 logln("Testing:");

1284 filteredBI->setText(text);

1285 TEST_ASSERT(84 == filteredBI->next());

1286 TEST_ASSERT(90 == filteredBI->next());// Capt.

1287 TEST_ASSERT(278 == filteredBI->next());

1288 filteredBI->first();

1289 prtbrks(filteredBI.getAlias(), text, *this);

1290 }

1291 }

1292

1293

1294 {

1295 logln("Constructing empty builder\n");

1296 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));

1297 TEST_ASSERT_SUCCESS(status);

1298

1299 if (U_SUCCESS(status)) {

1300 logln("Adding Mr. and Capt as an exception\n");

1301 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));

1302 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));

1303 TEST_ASSERT_SUCCESS(status);

1304

1305 logln("Constructing base BI\n");

1306 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEng lish(), status));

1307 TEST_ASSERT_SUCCESS(status);

1308

1309 logln("Building new BI\n");

1310 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));

1311 TEST_ASSERT_SUCCESS(status);

1312

1313 logln("Testing:");

1314 filteredBI->setText(text);

1315 TEST_ASSERT(84 == filteredBI->next());

1316 TEST_ASSERT(278 == filteredBI->next());

1317 filteredBI->first();

1318 prtbrks(filteredBI.getAlias(), text, *this);

1319 }

1320 }

1321

1322

1323 {

1324 logln("Constructing English builder\n");

1325 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::ge tEnglish(), status));

1326 TEST_ASSERT_SUCCESS(status);

1327

1328 logln("Constructing base BI\n");

1329 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish (), status));

1330 TEST_ASSERT_SUCCESS(status);

1331

1332 if (U_SUCCESS(status)) {

1333 logln("unsuppressing 'Capt'");

1334 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));

1335

1336 logln("Building new BI\n");

1337 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));

1338 TEST_ASSERT_SUCCESS(status);

1339

1340 if(filteredBI.isValid()) {

1341 logln("Testing:");

1342 filteredBI->setText(text);

1343 TEST_ASSERT(84 == filteredBI->next());

1344 TEST_ASSERT(90 == filteredBI->next());

1345 TEST_ASSERT(278 == filteredBI->next());

1346 filteredBI->first();

1347 prtbrks(filteredBI.getAlias(), text, *this);

1348 }

1349 }

1350 }

1351

1352

1353 {

1354 logln("Constructing English builder\n");

1355 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::ge tEnglish(), status));

1356 TEST_ASSERT_SUCCESS(status);

1357

1358 logln("Constructing base BI\n");

1359 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish (), status));

1360 TEST_ASSERT_SUCCESS(status);

1361

1362 if (U_SUCCESS(status)) {

1363 logln("Building new BI\n");

1364 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));

1365 TEST_ASSERT_SUCCESS(status);

1366

1367 if(filteredBI.isValid()) {

1368 logln("Testing:");

1369 filteredBI->setText(text);

1370 TEST_ASSERT(84 == filteredBI->next());

1371 TEST_ASSERT(278 == filteredBI->next());

1372 filteredBI->first();

1373 prtbrks(filteredBI.getAlias(), text, *this);

1374 }

1375 }

1376 }

1377

1378 // reenable once french is in

1379 {

1380 logln("Constructing French builder");

1381 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::ge tFrench(), status));

1382 TEST_ASSERT_SUCCESS(status);

1383

1384 logln("Constructing base BI\n");

1385 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench( ), status));

1386 TEST_ASSERT_SUCCESS(status);

1387

1388 if (U_SUCCESS(status)) {

1389 logln("Building new BI\n");

1390 frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));

1391 TEST_ASSERT_SUCCESS(status);

1392 }

1393

1394 if(frenchBI.isValid()) {

1395 logln("Testing:");

1396 UnicodeString frText("C'est MM. Duval.");

1397 frenchBI->setText(frText);

1398 TEST_ASSERT(16 == frenchBI->next());

1399 TEST_ASSERT(BreakIterator::DONE == frenchBI->next());

1400 frenchBI->first();

1401 prtbrks(frenchBI.getAlias(), frText, *this);

1402 logln("Testing against English:");

1403 filteredBI->setText(frText);

1404 TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI i s english.

1405 TEST_ASSERT(16 == filteredBI->next());

1406 TEST_ASSERT(BreakIterator::DONE == filteredBI->next());

1407 filteredBI->first();

1408 prtbrks(filteredBI.getAlias(), frText, *this);

1409

1410 // Verify ==

1411 TEST_ASSERT_TRUE(frenchBI == frenchBI);

1412 TEST_ASSERT_TRUE(filteredBI != frenchBI);

1413 TEST_ASSERT_TRUE(frenchBI != filteredBI);

1414 } else {

1415 dataerrln("French BI: not valid.");

1416 }

1417 }

1418

1419 #else

1420 logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCON FIG_NO_FILTERED_BREAK_ITERATION");

1421 #endif

1422 }

1423

1424 //---------------------------------------------

1425 // runIndexedTest

1426 //---------------------------------------------

1427

1428 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /par/ )

1429 {

1430 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");

1431 switch (index) {

1432 // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break ;

1433 #if !UCONFIG_NO_FILE_IO

1434 case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break;

1435 case 1: name = "TestgetRules"; if (exec) TestgetRules(); break;

1436 case 2: name = "TestHashCode"; if (exec) TestHashCode(); break;

1437 case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break;

1438 case 4: name = "TestIteration"; if (exec) TestIteration(); break;

1439 #else

1440 case 0: case 1: case 2: case 3: case 4: name = "skip"; break;

1441 #endif

1442 case 5: name = "TestBuilder"; if (exec) TestBuilder(); break;

1443 case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); brea k;

1444 case 7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); brea k;

1445 case 8: name = "TestBug2190"; if (exec) TestBug2190(); break;

1446 #if !UCONFIG_NO_FILE_IO

1447 case 9: name = "TestRegistration"; if (exec) TestRegistration(); break;

1448 case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break;

1449 case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break;

1450 case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); br eak;

1451 case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIDa ta(); break;

1452 #else

1453 case 9: case 10: case 11: case 12: case 13: name = "skip"; break;

1454 #endif

1455 case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText() ; break;

1456

1457 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING

1458 case 15: name = "TestFilteredBreakIteratorBuilder"; if(exec) TestFilteredBre akIteratorBuilder(); break;

1459 #else

1460 case 15: name="skip"; break;

1461 #endif

1462 default: name = ""; break; // needed to end loop

1463 }

1464 }

1465

1466 //---------------------------------------------

1467 //Internal subroutines

1468 //---------------------------------------------

1469

1470 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){

1471 logln((UnicodeString)"testIsBoundary():");

1472 int32_t p = 0;

1473 UBool isB;

1474 for (int32_t i = 0; i < text.length(); i++) {

1475 isB = bi.isBoundary(i);

1476 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);

1477

1478 if (i == boundaries[p]) {

1479 if (!isB)

1480 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");

1481 p++;

1482 }

1483 else {

1484 if (isB)

1485 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");

1486 }

1487 }

1488 }

1489 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotof fset, int32_t expectedOffset, const char* expectedString){

1490 UnicodeString selected;

1491 UnicodeString expected=CharsToUnicodeString(expectedString);

1492

1493 if(gotoffset != expectedOffset)

1494 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeStrin g)" instead of #" + expectedOffset);

1495 if(start <= gotoffset){

1496 testString.extractBetween(start, gotoffset, selected);

1497 }

1498 else{

1499 testString.extractBetween(gotoffset, start, selected);

1500 }

1501 if(selected.compare(expected) != 0)

1502 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));

1503 else

1504 logln(prettify("****selected \"" + selected + "\""));

1505 }

1506

1507 //---------------------------------------------

1508 //RBBIWithProtectedFunctions class functions

1509 //---------------------------------------------

1510

1511 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UEr rorCode &status)

1512 : RuleBasedBreakIterator(data, status)

1513 {

1514 }

1515

1516 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* dat a, enum EDontAdopt, UErrorCode &status)

1517 : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status)

1518 {

1519 }

1520

1521 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */

OLD	NEW

« no previous file with comments | « source/test/intltest/rbbiapts.h ('k') | source/test/intltest/rbbitst.h » ('j') | no next file with comments »