source/test/intltest/convtest.cpp - Issue 2435373002: Delete source/test

Side by Side Diff: source/test/intltest/convtest.cpp

Issue 2435373002: Delete source/test (Closed)

Patch Set: Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 /*

2 *******************************************************************************

3 *

4 * Copyright (C) 2003-2014, International Business Machines

5 * Corporation and others. All Rights Reserved.

6 *

7 *******************************************************************************

8 * file name: convtest.cpp

9 * encoding: US-ASCII

10 * tab size: 8 (not used)

11 * indentation:4

12 *

13 * created on: 2003jul15

14 * created by: Markus W. Scherer

15 *

16 * Test file for data-driven conversion tests.

17 */

18

19 #include "unicode/utypes.h"

20

21 #if !UCONFIG_NO_LEGACY_CONVERSION

22 /*

23 * Note: Turning off all of convtest.cpp if !UCONFIG_NO_LEGACY_CONVERSION

24 * is slightly unnecessary - it removes tests for Unicode charsets

25 * like UTF-8 that should work.

26 * However, there is no easy way for the test to detect whether a test case

27 * is for a Unicode charset, so it would be difficult to only exclude those.

28 * Also, regular testing of ICU is done with all modules on, therefore

29 * not testing conversion for a custom configuration like this should be ok.

30 */

31

32 #include "unicode/ucnv.h"

33 #include "unicode/unistr.h"

34 #include "unicode/parsepos.h"

35 #include "unicode/uniset.h"

36 #include "unicode/ustring.h"

37 #include "unicode/ures.h"

38 #include "convtest.h"

39 #include "cmemory.h"

40 #include "unicode/tstdtmod.h"

41 #include <string.h>

42 #include <stdlib.h>

43

44 enum {

45 // characters used in test data for callbacks

46 SUB_CB='?',

47 SKIP_CB='0',

48 STOP_CB='.',

49 ESC_CB='&'

50 };

51

52 ConversionTest::ConversionTest() {

53 UErrorCode errorCode=U_ZERO_ERROR;

54 utf8Cnv=ucnv_open("UTF-8", &errorCode);

55 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err orCode);

56 if(U_FAILURE(errorCode)) {

57 errln("unable to open UTF-8 converter");

58 }

59 }

60

61 ConversionTest::~ConversionTest() {

62 ucnv_close(utf8Cnv);

63 }

64

65 void

66 ConversionTest::runIndexedTest(int32_t index, UBool exec, const char &name, cha r /par/) {

67 if (exec) logln("TestSuite ConversionTest: ");

68 switch (index) {

69 #if !UCONFIG_NO_FILE_IO

70 case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break;

71 case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break;

72 case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break;

73 case 3: name="TestDefaultIgnorableCallback"; if (exec) TestDefaultIgnora bleCallback(); break;

74 #else

75 case 0:

76 case 1:

77 case 2:

78 case 3: name="skip"; break;

79 #endif

80 case 4: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break ;

81 default: name=""; break; //needed to end loop

82 }

83 }

84

85 // test data interface ----------------------------------------------------- ***

86

87 void

88 ConversionTest::TestToUnicode() {

89 ConversionCase cc;

90 char charset[100], cbopt[4];

91 const char *option;

92 UnicodeString s, unicode;

93 int32_t offsetsLength;

94 UConverterToUCallback callback;

95

96 TestDataModule *dataModule;

97 TestData *testData;

98 const DataMap *testCase;

99 UErrorCode errorCode;

100 int32_t i;

101

102 errorCode=U_ZERO_ERROR;

103 dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode) ;

104 if(U_SUCCESS(errorCode)) {

105 testData=dataModule->createTestData("toUnicode", errorCode);

106 if(U_SUCCESS(errorCode)) {

107 for(i=0; testData->nextCase(testCase, errorCode); ++i) {

108 if(U_FAILURE(errorCode)) {

109 errln("error retrieving conversion/toUnicode test case %d - %s",

110 i, u_errorName(errorCode));

111 errorCode=U_ZERO_ERROR;

112 continue;

113 }

114

115 cc.caseNr=i;

116

117 s=testCase->getString("charset", errorCode);

118 s.extract(0, 0x7fffffff, charset, sizeof(charset), "");

119 cc.charset=charset;

120

121 cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode) ;

122 unicode=testCase->getString("unicode", errorCode);

123 cc.unicode=unicode.getBuffer();

124 cc.unicodeLength=unicode.length();

125

126 offsetsLength=0;

127 cc.offsets=testCase->getIntVector(offsetsLength, "offsets", erro rCode);

128 if(offsetsLength==0) {

129 cc.offsets=NULL;

130 } else if(offsetsLength!=unicode.length()) {

131 errln("toUnicode[%d] unicode[%d] and offsets[%d] must have t he same length",

132 i, unicode.length(), offsetsLength);

133 errorCode=U_ILLEGAL_ARGUMENT_ERROR;

134 }

135

136 cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);

137 cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);

138

139 s=testCase->getString("errorCode", errorCode);

140 if(s==UNICODE_STRING("invalid", 7)) {

141 cc.outErrorCode=U_INVALID_CHAR_FOUND;

142 } else if(s==UNICODE_STRING("illegal", 7)) {

143 cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;

144 } else if(s==UNICODE_STRING("truncated", 9)) {

145 cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;

146 } else if(s==UNICODE_STRING("illesc", 6)) {

147 cc.outErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE;

148 } else if(s==UNICODE_STRING("unsuppesc", 9)) {

149 cc.outErrorCode=U_UNSUPPORTED_ESCAPE_SEQUENCE;

150 } else {

151 cc.outErrorCode=U_ZERO_ERROR;

152 }

153

154 s=testCase->getString("callback", errorCode);

155 s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");

156 cc.cbopt=cbopt;

157 switch(cbopt[0]) {

158 case SUB_CB:

159 callback=UCNV_TO_U_CALLBACK_SUBSTITUTE;

160 break;

161 case SKIP_CB:

162 callback=UCNV_TO_U_CALLBACK_SKIP;

163 break;

164 case STOP_CB:

165 callback=UCNV_TO_U_CALLBACK_STOP;

166 break;

167 case ESC_CB:

168 callback=UCNV_TO_U_CALLBACK_ESCAPE;

169 break;

170 default:

171 callback=NULL;

172 break;

173 }

174 option=callback==NULL ? cbopt : cbopt+1;

175 if(*option==0) {

176 option=NULL;

177 }

178

179 cc.invalidChars=testCase->getBinary(cc.invalidLength, "invalidCh ars", errorCode);

180

181 if(U_FAILURE(errorCode)) {

182 errln("error parsing conversion/toUnicode test case %d - %s" ,

183 i, u_errorName(errorCode));

184 errorCode=U_ZERO_ERROR;

185 } else {

186 logln("TestToUnicode[%d] %s", i, charset);

187 ToUnicodeCase(cc, callback, option);

188 }

189 }

190 delete testData;

191 }

192 delete dataModule;

193 }

194 else {

195 dataerrln("Could not load test conversion data");

196 }

197 }

198

199 void

200 ConversionTest::TestFromUnicode() {

201 ConversionCase cc;

202 char charset[100], cbopt[4];

203 const char *option;

204 UnicodeString s, unicode, invalidUChars;

205 int32_t offsetsLength, index;

206 UConverterFromUCallback callback;

207

208 TestDataModule *dataModule;

209 TestData *testData;

210 const DataMap *testCase;

211 const UChar *p;

212 UErrorCode errorCode;

213 int32_t i, length;

214

215 errorCode=U_ZERO_ERROR;

216 dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode) ;

217 if(U_SUCCESS(errorCode)) {

218 testData=dataModule->createTestData("fromUnicode", errorCode);

219 if(U_SUCCESS(errorCode)) {

220 for(i=0; testData->nextCase(testCase, errorCode); ++i) {

221 if(U_FAILURE(errorCode)) {

222 errln("error retrieving conversion/fromUnicode test case %d - %s",

223 i, u_errorName(errorCode));

224 errorCode=U_ZERO_ERROR;

225 continue;

226 }

227

228 cc.caseNr=i;

229

230 s=testCase->getString("charset", errorCode);

231 s.extract(0, 0x7fffffff, charset, sizeof(charset), "");

232 cc.charset=charset;

233

234 unicode=testCase->getString("unicode", errorCode);

235 cc.unicode=unicode.getBuffer();

236 cc.unicodeLength=unicode.length();

237 cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode) ;

238

239 offsetsLength=0;

240 cc.offsets=testCase->getIntVector(offsetsLength, "offsets", erro rCode);

241 if(offsetsLength==0) {

242 cc.offsets=NULL;

243 } else if(offsetsLength!=cc.bytesLength) {

244 errln("fromUnicode[%d] bytes[%d] and offsets[%d] must have t he same length",

245 i, cc.bytesLength, offsetsLength);

246 errorCode=U_ILLEGAL_ARGUMENT_ERROR;

247 }

248

249 cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);

250 cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);

251

252 s=testCase->getString("errorCode", errorCode);

253 if(s==UNICODE_STRING("invalid", 7)) {

254 cc.outErrorCode=U_INVALID_CHAR_FOUND;

255 } else if(s==UNICODE_STRING("illegal", 7)) {

256 cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;

257 } else if(s==UNICODE_STRING("truncated", 9)) {

258 cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;

259 } else {

260 cc.outErrorCode=U_ZERO_ERROR;

261 }

262

263 s=testCase->getString("callback", errorCode);

264 cc.setSub=0; // default: no subchar

265

266 if((index=s.indexOf((UChar)0))>0) {

267 // read NUL-separated subchar first, if any

268 // copy the subchar from Latin-1 characters

269 // start after the NUL

270 p=s.getTerminatedBuffer();

271 length=index+1;

272 p+=length;

273 length=s.length()-length;

274 if(length<=0 \|\| length>=(int32_t)sizeof(cc.subchar)) {

275 errorCode=U_ILLEGAL_ARGUMENT_ERROR;

276 } else {

277 int32_t j;

278

279 for(j=0; j<length; ++j) {

280 cc.subchar[j]=(char)p[j];

281 }

282 // NUL-terminate the subchar

283 cc.subchar[j]=0;

284 cc.setSub=1;

285 }

286

287 // remove the NUL and subchar from s

288 s.truncate(index);

289 } else if((index=s.indexOf((UChar)0x3d))>0) /* '=' */ {

290 // read a substitution string, separated by an equal sign

291 p=s.getBuffer()+index+1;

292 length=s.length()-(index+1);

293 if(length<0 \|\| length>=UPRV_LENGTHOF(cc.subString)) {

294 errorCode=U_ILLEGAL_ARGUMENT_ERROR;

295 } else {

296 u_memcpy(cc.subString, p, length);

297 // NUL-terminate the subString

298 cc.subString[length]=0;

299 cc.setSub=-1;

300 }

301

302 // remove the equal sign and subString from s

303 s.truncate(index);

304 }

305

306 s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");

307 cc.cbopt=cbopt;

308 switch(cbopt[0]) {

309 case SUB_CB:

310 callback=UCNV_FROM_U_CALLBACK_SUBSTITUTE;

311 break;

312 case SKIP_CB:

313 callback=UCNV_FROM_U_CALLBACK_SKIP;

314 break;

315 case STOP_CB:

316 callback=UCNV_FROM_U_CALLBACK_STOP;

317 break;

318 case ESC_CB:

319 callback=UCNV_FROM_U_CALLBACK_ESCAPE;

320 break;

321 default:

322 callback=NULL;

323 break;

324 }

325 option=callback==NULL ? cbopt : cbopt+1;

326 if(*option==0) {

327 option=NULL;

328 }

329

330 invalidUChars=testCase->getString("invalidUChars", errorCode);

331 cc.invalidUChars=invalidUChars.getBuffer();

332 cc.invalidLength=invalidUChars.length();

333

334 if(U_FAILURE(errorCode)) {

335 errln("error parsing conversion/fromUnicode test case %d - % s",

336 i, u_errorName(errorCode));

337 errorCode=U_ZERO_ERROR;

338 } else {

339 logln("TestFromUnicode[%d] %s", i, charset);

340 FromUnicodeCase(cc, callback, option);

341 }

342 }

343 delete testData;

344 }

345 delete dataModule;

346 }

347 else {

348 dataerrln("Could not load test conversion data");

349 }

350 }

351

352 static const UChar ellipsis[]={ 0x2e, 0x2e, 0x2e };

353

354 void

355 ConversionTest::TestGetUnicodeSet() {

356 char charset[100];

357 UnicodeString s, map, mapnot;

358 int32_t which;

359

360 ParsePosition pos;

361 UnicodeSet cnvSet, mapSet, mapnotSet, diffSet;

362 UnicodeSet *cnvSetPtr = &cnvSet;

363 LocalUConverterPointer cnv;

364

365 TestDataModule *dataModule;

366 TestData *testData;

367 const DataMap *testCase;

368 UErrorCode errorCode;

369 int32_t i;

370

371 errorCode=U_ZERO_ERROR;

372 dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode) ;

373 if(U_SUCCESS(errorCode)) {

374 testData=dataModule->createTestData("getUnicodeSet", errorCode);

375 if(U_SUCCESS(errorCode)) {

376 for(i=0; testData->nextCase(testCase, errorCode); ++i) {

377 if(U_FAILURE(errorCode)) {

378 errln("error retrieving conversion/getUnicodeSet test case % d - %s",

379 i, u_errorName(errorCode));

380 errorCode=U_ZERO_ERROR;

381 continue;

382 }

383

384 s=testCase->getString("charset", errorCode);

385 s.extract(0, 0x7fffffff, charset, sizeof(charset), "");

386

387 map=testCase->getString("map", errorCode);

388 mapnot=testCase->getString("mapnot", errorCode);

389

390 which=testCase->getInt28("which", errorCode);

391

392 if(U_FAILURE(errorCode)) {

393 errln("error parsing conversion/getUnicodeSet test case %d - %s",

394 i, u_errorName(errorCode));

395 errorCode=U_ZERO_ERROR;

396 continue;

397 }

398

399 // test this test case

400 mapSet.clear();

401 mapnotSet.clear();

402

403 pos.setIndex(0);

404 mapSet.applyPattern(map, pos, 0, NULL, errorCode);

405 if(U_FAILURE(errorCode) \|\| pos.getIndex()!=map.length()) {

406 errln("error creating the map set for conversion/getUnicodeS et test case %d - %s\n"

407 " error index %d index %d U+%04x",

408 i, u_errorName(errorCode), pos.getErrorIndex(), pos. getIndex(), map.char32At(pos.getIndex()));

409 errorCode=U_ZERO_ERROR;

410 continue;

411 }

412

413 pos.setIndex(0);

414 mapnotSet.applyPattern(mapnot, pos, 0, NULL, errorCode);

415 if(U_FAILURE(errorCode) \|\| pos.getIndex()!=mapnot.length()) {

416 errln("error creating the mapnot set for conversion/getUnico deSet test case %d - %s\n"

417 " error index %d index %d U+%04x",

418 i, u_errorName(errorCode), pos.getErrorIndex(), pos. getIndex(), mapnot.char32At(pos.getIndex()));

419 errorCode=U_ZERO_ERROR;

420 continue;

421 }

422

423 logln("TestGetUnicodeSet[%d] %s", i, charset);

424

425 cnv.adoptInstead(cnv_open(charset, errorCode));

426 if(U_FAILURE(errorCode)) {

427 errcheckln(errorCode, "error opening \"%s\" for conversion/g etUnicodeSet test case %d - %s",

428 charset, i, u_errorName(errorCode));

429 errorCode=U_ZERO_ERROR;

430 continue;

431 }

432

433 ucnv_getUnicodeSet(cnv.getAlias(), cnvSetPtr->toUSet(), (UConver terUnicodeSet)which, &errorCode);

434

435 if(U_FAILURE(errorCode)) {

436 errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/ge tUnicodeSet test case %d - %s",

437 charset, i, u_errorName(errorCode));

438 errorCode=U_ZERO_ERROR;

439 continue;

440 }

441

442 // are there items that must be in cnvSet but are not?

443 (diffSet=mapSet).removeAll(cnvSet);

444 if(!diffSet.isEmpty()) {

445 diffSet.toPattern(s, TRUE);

446 if(s.length()>100) {

447 s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellip sis));

448 }

449 errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d",

450 charset, i);

451 errln(s);

452 }

453

454 // are there items that must not be in cnvSet but are?

455 (diffSet=mapnotSet).retainAll(cnvSet);

456 if(!diffSet.isEmpty()) {

457 diffSet.toPattern(s, TRUE);

458 if(s.length()>100) {

459 s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellip sis));

460 }

461 errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d",

462 charset, i);

463 errln(s);

464 }

465 }

466 delete testData;

467 }

468 delete dataModule;

469 }

470 else {

471 dataerrln("Could not load test conversion data");

472 }

473 }

474

475 U_CDECL_BEGIN

476 static void U_CALLCONV

477 getUnicodeSetCallback(const void *context,

478 UConverterFromUnicodeArgs * /fromUArgs/,

479 const UChar* /codeUnits/,

480 int32_t /length/,

481 UChar32 codePoint,

482 UConverterCallbackReason reason,

483 UErrorCode *pErrorCode) {

484 if(reason<=UCNV_IRREGULAR) {

485 ((UnicodeSet *)context)->remove(codePoint); // the converter cannot con vert this code point

486 *pErrorCode=U_ZERO_ERROR; // skip

487 } // else ignore the reset, close and clone calls.

488 }

489 U_CDECL_END

490

491 // Compare ucnv_getUnicodeSet() with the set of characters that can be converted .

492 void

493 ConversionTest::TestGetUnicodeSet2() {

494 // Build a string with all code points.

495 UChar32 cpLimit;

496 int32_t s0Length;

497 if(quick) {

498 cpLimit=s0Length=0x10000; // BMP only

499 } else {

500 cpLimit=0x110000;

501 s0Length=0x10000+0x200000; // BMP + surrogate pairs

502 }

503 UChar *s0=new UChar[s0Length];

504 if(s0==NULL) {

505 return;

506 }

507 UChar *s=s0;

508 UChar32 c;

509 UChar c2;

510 // low BMP

511 for(c=0; c<=0xd7ff; ++c) {

512 *s++=(UChar)c;

513 }

514 // trail surrogates

515 for(c=0xdc00; c<=0xdfff; ++c) {

516 *s++=(UChar)c;

517 }

518 // lead surrogates

519 // (after trails so that there is not even one surrogate pair in between)

520 for(c=0xd800; c<=0xdbff; ++c) {

521 *s++=(UChar)c;

522 }

523 // high BMP

524 for(c=0xe000; c<=0xffff; ++c) {

525 *s++=(UChar)c;

526 }

527 // supplementary code points = surrogate pairs

528 if(cpLimit==0x110000) {

529 for(c=0xd800; c<=0xdbff; ++c) {

530 for(c2=0xdc00; c2<=0xdfff; ++c2) {

531 *s++=(UChar)c;

532 *s++=c2;

533 }

534 }

535 }

536

537 static const char *const cnvNames[]={

538 "UTF-8",

539 "UTF-7",

540 "UTF-16",

541 "US-ASCII",

542 "ISO-8859-1",

543 "windows-1252",

544 "Shift-JIS",

545 "ibm-1390", // EBCDIC_STATEFUL table

546 "ibm-16684", // DBCS-only extension table based on EBCDIC_STATEFUL tabl e

547 "HZ",

548 "ISO-2022-JP",

549 "JIS7",

550 "ISO-2022-CN",

551 "ISO-2022-CN-EXT",

552 "LMBCS"

553 };

554 LocalUConverterPointer cnv;

555 char buffer[1024];

556 int32_t i;

557 for(i=0; i<UPRV_LENGTHOF(cnvNames); ++i) {

558 UErrorCode errorCode=U_ZERO_ERROR;

559 cnv.adoptInstead(cnv_open(cnvNames[i], errorCode));

560 if(U_FAILURE(errorCode)) {

561 errcheckln(errorCode, "failed to open converter %s - %s", cnvNames[i ], u_errorName(errorCode));

562 continue;

563 }

564 UnicodeSet expected;

565 ucnv_setFromUCallBack(cnv.getAlias(), getUnicodeSetCallback, &expected, NULL, NULL, &errorCode);

566 if(U_FAILURE(errorCode)) {

567 errln("failed to set the callback on converter %s - %s", cnvNames[i] , u_errorName(errorCode));

568 continue;

569 }

570 UConverterUnicodeSet which;

571 for(which=UCNV_ROUNDTRIP_SET; which<UCNV_SET_COUNT; which=(UConverterUni codeSet)((int)which+1)) {

572 if(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {

573 ucnv_setFallback(cnv.getAlias(), TRUE);

574 }

575 expected.add(0, cpLimit-1);

576 s=s0;

577 UBool flush;

578 do {

579 char *t=buffer;

580 flush=(UBool)(s==s0+s0Length);

581 ucnv_fromUnicode(cnv.getAlias(), &t, buffer+sizeof(buffer), (con st UChar **)&s, s0+s0Length, NULL, flush, &errorCode);

582 if(U_FAILURE(errorCode)) {

583 if(errorCode==U_BUFFER_OVERFLOW_ERROR) {

584 errorCode=U_ZERO_ERROR;

585 continue;

586 } else {

587 break; // unexpected error, should not occur

588 }

589 }

590 } while(!flush);

591 UnicodeSet set;

592 ucnv_getUnicodeSet(cnv.getAlias(), set.toUSet(), which, &errorCode);

593 if(cpLimit<0x110000) {

594 set.remove(cpLimit, 0x10ffff);

595 }

596 if(which==UCNV_ROUNDTRIP_SET) {

597 // ignore PUA code points because they will be converted even if they

598 // are fallbacks and when other fallbacks are turned off,

599 // but ucnv_getUnicodeSet(UCNV_ROUNDTRIP_SET) delivers true roun dtrips

600 expected.remove(0xe000, 0xf8ff);

601 expected.remove(0xf0000, 0xffffd);

602 expected.remove(0x100000, 0x10fffd);

603 set.remove(0xe000, 0xf8ff);

604 set.remove(0xf0000, 0xffffd);

605 set.remove(0x100000, 0x10fffd);

606 }

607 if(set!=expected) {

608 // First try to see if we have different sets because ucnv_getUn icodeSet()

609 // added strings: The above conversion method does not tell us w hat strings might be convertible.

610 // Remove strings from the set and compare again.

611 // Unfortunately, there are no good, direct set methods for find ing out whether there are strings

612 // in the set, nor for enumerating or removing just them.

613 // Intersect all code points with the set. The intersection will not contain strings.

614 UnicodeSet temp(0, 0x10ffff);

615 temp.retainAll(set);

616 set=temp;

617 }

618 if(set!=expected) {

619 UnicodeSet diffSet;

620 UnicodeString out;

621

622 // are there items that must be in the set but are not?

623 (diffSet=expected).removeAll(set);

624 if(!diffSet.isEmpty()) {

625 diffSet.toPattern(out, TRUE);

626 if(out.length()>100) {

627 out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ell ipsis));

628 }

629 errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d",

630 cnvNames[i], which);

631 errln(out);

632 }

633

634 // are there items that must not be in the set but are?

635 (diffSet=set).removeAll(expected);

636 if(!diffSet.isEmpty()) {

637 diffSet.toPattern(out, TRUE);

638 if(out.length()>100) {

639 out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ell ipsis));

640 }

641 errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d",

642 cnvNames[i], which);

643 errln(out);

644 }

645 }

646 }

647 }

648

649 delete [] s0;

650 }

651

652 // Test all codepoints which has the default ignorable Unicode property are igno red if they have no mapping

653 // If there are any failures, the hard coded list (IS_DEFAULT_IGNORABLE_CODE_POI NT) in ucnv_err.c should be updated

654 void

655 ConversionTest::TestDefaultIgnorableCallback() {

656 UErrorCode status = U_ZERO_ERROR;

657 const char *cnv_name = "euc-jp-2007";

658 const char *pattern_ignorable = "[:Default_Ignorable_Code_Point:]";

659 const char *pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]";

660

661 UnicodeSet *set_ignorable = new UnicodeSet(pattern_ignorable, status);

662 if (U_FAILURE(status)) {

663 dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable, u _errorName(status));

664 return;

665 }

666

667 UnicodeSet *set_not_ignorable = new UnicodeSet(pattern_not_ignorable, status );

668 if (U_FAILURE(status)) {

669 dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorabl e, u_errorName(status));

670 return;

671 }

672

673 UConverter *cnv = cnv_open(cnv_name, status);

674 if (U_FAILURE(status)) {

675 dataerrln("Unable to open converter: %s - %s\n", cnv_name, u_errorName(s tatus));

676 return;

677 }

678

679 // set callback for the converter

680 ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL , &status);

681

682 UChar32 input[1];

683 char output[10];

684 int32_t outputLength;

685

686 // test default ignorables are ignored

687 int size = set_ignorable->size();

688 for (int i = 0; i < size; i++) {

689 status = U_ZERO_ERROR;

690 outputLength= 0;

691

692 input[0] = set_ignorable->charAt(i);

693

694 outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32 (input, 1).getTerminatedBuffer(), -1, &status);

695 if (U_FAILURE(status) \|\| outputLength != 0) {

696 errln("Ignorable code point: U+%04X not skipped as expected - %s", i nput[0], u_errorName(status));

697 }

698 }

699

700 // test non-ignorables are not ignored

701 size = set_not_ignorable->size();

702 for (int i = 0; i < size; i++) {

703 status = U_ZERO_ERROR;

704 outputLength= 0;

705

706 input[0] = set_not_ignorable->charAt(i);

707

708 if (input[0] == 0) {

709 continue;

710 }

711

712 outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32 (input, 1).getTerminatedBuffer(), -1, &status);

713 if (U_FAILURE(status) \|\| outputLength <= 0) {

714 errln("Non-ignorable code point: U+%04X skipped unexpectedly - %s", input[0], u_errorName(status));

715 }

716 }

717

718 ucnv_close(cnv);

719 delete set_not_ignorable;

720 delete set_ignorable;

721 }

722

723 // open testdata or ICU data converter ------------------------------------- ***

724

725 UConverter *

726 ConversionTest::cnv_open(const char *name, UErrorCode &errorCode) {

727 if(name!=NULL && *name=='+') {

728 // Converter names that start with '+' are ignored in ICU4J tests.

729 ++name;

730 }

731 if(name!=NULL && name=='') {

732 /* loadTestData(): set the data directory */

733 return ucnv_openPackage(loadTestData(errorCode), name+1, &errorCode);

734 } else {

735 return ucnv_open(name, &errorCode);

736 }

737 }

738

739 // output helpers ---------------------------------------------------------- ***

740

741 static inline char

742 hexDigit(uint8_t digit) {

743 return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit);

744 }

745

746 static char *

747 printBytes(const uint8_t bytes, int32_t length, char out) {

748 uint8_t b;

749

750 if(length>0) {

751 b=*bytes++;

752 --length;

753 *out++=hexDigit((uint8_t)(b>>4));

754 *out++=hexDigit((uint8_t)(b&0xf));

755 }

756

757 while(length>0) {

758 b=*bytes++;

759 --length;

760 *out++=' ';

761 *out++=hexDigit((uint8_t)(b>>4));

762 *out++=hexDigit((uint8_t)(b&0xf));

763 }

764 *out++=0;

765 return out;

766 }

767

768 static char *

769 printUnicode(const UChar unicode, int32_t length, char out) {

770 UChar32 c;

771 int32_t i;

772

773 for(i=0; i<length;) {

774 if(i>0) {

775 *out++=' ';

776 }

777 U16_NEXT(unicode, i, length, c);

778 // write 4..6 digits

779 if(c>=0x100000) {

780 *out++='1';

781 }

782 if(c>=0x10000) {

783 *out++=hexDigit((uint8_t)((c>>16)&0xf));

784 }

785 *out++=hexDigit((uint8_t)((c>>12)&0xf));

786 *out++=hexDigit((uint8_t)((c>>8)&0xf));

787 *out++=hexDigit((uint8_t)((c>>4)&0xf));

788 *out++=hexDigit((uint8_t)(c&0xf));

789 }

790 *out++=0;

791 return out;

792 }

793

794 static char *

795 printOffsets(const int32_t offsets, int32_t length, char out) {

796 int32_t i, o, d;

797

798 if(offsets==NULL) {

799 length=0;

800 }

801

802 for(i=0; i<length; ++i) {

803 if(i>0) {

804 *out++=' ';

805 }

806 o=offsets[i];

807

808 // print all offsets with 2 characters each (-x, -9..99, xx)

809 if(o<-9) {

810 *out++='-';

811 *out++='x';

812 } else if(o<0) {

813 *out++='-';

814 *out++=(char)('0'-o);

815 } else if(o<=99) {

816 *out++=(d=o/10)==0 ? ' ' : (char)('0'+d);

817 *out++=(char)('0'+o%10);

818 } else /* o>99 */ {

819 *out++='x';

820 *out++='x';

821 }

822 }

823 *out++=0;

824 return out;

825 }

826

827 // toUnicode test worker functions ----------------------------------------- ***

828

829 static int32_t

830 stepToUnicode(ConversionCase &cc, UConverter *cnv,

831 UChar *result, int32_t resultCapacity,

832 int32_t resultOffsets, / also resultCapacity */

833 int32_t step,

834 UErrorCode *pErrorCode) {

835 const char source, sourceLimit, *bytesLimit;

836 UChar target, targetLimit, *resultLimit;

837 UBool flush;

838

839 source=(const char *)cc.bytes;

840 target=result;

841 bytesLimit=source+cc.bytesLength;

842 resultLimit=result+resultCapacity;

843

844 if(step>=0) {

845 // call ucnv_toUnicode() with in/out buffers no larger than (step) at a time

846 // move only one buffer (in vs. out) at a time to be extra mean

847 // step==0 performs bulk conversion and generates offsets

848

849 // initialize the partial limits for the loop

850 if(step==0) {

851 // use the entire buffers

852 sourceLimit=bytesLimit;

853 targetLimit=resultLimit;

854 flush=cc.finalFlush;

855 } else {

856 // start with empty partial buffers

857 sourceLimit=source;

858 targetLimit=target;

859 flush=FALSE;

860

861 // output offsets only for bulk conversion

862 resultOffsets=NULL;

863 }

864

865 for(;;) {

866 // resetting the opposite conversion direction must not affect this one

867 ucnv_resetFromUnicode(cnv);

868

869 // convert

870 ucnv_toUnicode(cnv,

871 &target, targetLimit,

872 &source, sourceLimit,

873 resultOffsets,

874 flush, pErrorCode);

875

876 // check pointers and errors

877 if(source>sourceLimit \|\| target>targetLimit) {

878 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

879 break;

880 } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

881 if(target!=targetLimit) {

882 // buffer overflow must only be set when the target is fille d

883 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

884 break;

885 } else if(targetLimit==resultLimit) {

886 // not just a partial overflow

887 break;

888 }

889

890 // the partial target is filled, set a new limit, reset the erro r and continue

891 targetLimit=(resultLimit-target)>=step ? target+step : resultLim it;

892 *pErrorCode=U_ZERO_ERROR;

893 } else if(U_FAILURE(*pErrorCode)) {

894 // some other error occurred, done

895 break;

896 } else {

897 if(source!=sourceLimit) {

898 // when no error occurs, then the input must be consumed

899 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

900 break;

901 }

902

903 if(sourceLimit==bytesLimit) {

904 // we are done

905 break;

906 }

907

908 // the partial conversion succeeded, set a new limit and continu e

909 sourceLimit=(bytesLimit-source)>=step ? source+step : bytesLimit ;

910 flush=(UBool)(cc.finalFlush && sourceLimit==bytesLimit);

911 }

912 }

913 } else /* step<0 */ {

914 /*

915 * step==-1: call only ucnv_getNextUChar()

916 * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar()

917 * if step==-2 or -3, then give ucnv_toUnicode() the whole remaining i nput,

918 * else give it at most (-step-2)/2 bytes

919 */

920 UChar32 c;

921

922 // end the loop by getting an index out of bounds error

923 for(;;) {

924 // resetting the opposite conversion direction must not affect this one

925 ucnv_resetFromUnicode(cnv);

926

927 // convert

928 if((step&1)!=0 /* odd: -1, -3, -5, ... */) {

929 sourceLimit=source; // use sourceLimit not as a real limit

930 // but to remember the pre-getNextUChar sour ce pointer

931 c=ucnv_getNextUChar(cnv, &source, bytesLimit, pErrorCode);

932

933 // check pointers and errors

934 if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) {

935 if(source!=bytesLimit) {

936 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

937 } else {

938 *pErrorCode=U_ZERO_ERROR;

939 }

940 break;

941 } else if(U_FAILURE(*pErrorCode)) {

942 break;

943 }

944 // source may not move if c is from previous overflow

945

946 if(target==resultLimit) {

947 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

948 break;

949 }

950 if(c<=0xffff) {

951 *target++=(UChar)c;

952 } else {

953 *target++=U16_LEAD(c);

954 if(target==resultLimit) {

955 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

956 break;

957 }

958 *target++=U16_TRAIL(c);

959 }

960

961 // alternate between -n-1 and -n but leave -1 alone

962 if(step<-1) {

963 ++step;

964 }

965 } else /* step is even */ {

966 // allow only one UChar output

967 targetLimit=target<resultLimit ? target+1 : resultLimit;

968

969 // as with ucnv_getNextUChar(), we always flush (if we go to byt esLimit)

970 // and never output offsets

971 if(step==-2) {

972 sourceLimit=bytesLimit;

973 } else {

974 sourceLimit=source+(-step-2)/2;

975 if(sourceLimit>bytesLimit) {

976 sourceLimit=bytesLimit;

977 }

978 }

979

980 ucnv_toUnicode(cnv,

981 &target, targetLimit,

982 &source, sourceLimit,

983 NULL, (UBool)(sourceLimit==bytesLimit), pErrorCode);

984

985 // check pointers and errors

986 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

987 if(target!=targetLimit) {

988 // buffer overflow must only be set when the target is f illed

989 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

990 break;

991 } else if(targetLimit==resultLimit) {

992 // not just a partial overflow

993 break;

994 }

995

996 // the partial target is filled, set a new limit and continu e

997 *pErrorCode=U_ZERO_ERROR;

998 } else if(U_FAILURE(*pErrorCode)) {

999 // some other error occurred, done

1000 break;

1001 } else {

1002 if(source!=sourceLimit) {

1003 // when no error occurs, then the input must be consumed

1004 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

1005 break;

1006 }

1007

1008 // we are done (flush==TRUE) but we continue, to get the ind ex out of bounds error above

1009 }

1010

1011 --step;

1012 }

1013 }

1014 }

1015

1016 return (int32_t)(target-result);

1017 }

1018

1019 UBool

1020 ConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback , const char *option) {

1021 // open the converter

1022 IcuTestErrorCode errorCode(*this, "ToUnicodeCase");

1023 LocalUConverterPointer cnv(cnv_open(cc.charset, errorCode));

1024 if(errorCode.isFailure()) {

1025 errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_o pen() failed - %s",

1026 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, er rorCode.errorName());

1027 errorCode.reset();

1028 return FALSE;

1029 }

1030

1031 // set the callback

1032 if(callback!=NULL) {

1033 ucnv_setToUCallBack(cnv.getAlias(), callback, option, NULL, NULL, errorC ode);

1034 if(U_FAILURE(errorCode)) {

1035 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBac k() failed - %s",

1036 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush , u_errorName(errorCode));

1037 return FALSE;

1038 }

1039 }

1040

1041 int32_t resultOffsets[256];

1042 UChar result[256];

1043 int32_t resultLength;

1044 UBool ok;

1045

1046 static const struct {

1047 int32_t step;

1048 const char *name;

1049 } steps[]={

1050 { 0, "bulk" }, // must be first for offsets to be checked

1051 { 1, "step=1" },

1052 { 3, "step=3" },

1053 { 7, "step=7" },

1054 { -1, "getNext" },

1055 { -2, "toU(bulk)+getNext" },

1056 { -3, "getNext+toU(bulk)" },

1057 { -4, "toU(1)+getNext" },

1058 { -5, "getNext+toU(1)" },

1059 { -12, "toU(5)+getNext" },

1060 { -13, "getNext+toU(5)" },

1061 };

1062 int32_t i, step;

1063

1064 ok=TRUE;

1065 for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {

1066 step=steps[i].step;

1067 if(step<0 && !cc.finalFlush) {

1068 // skip ucnv_getNextUChar() if !finalFlush because

1069 // ucnv_getNextUChar() always implies flush

1070 continue;

1071 }

1072 if(step!=0) {

1073 // bulk test is first, then offsets are not checked any more

1074 cc.offsets=NULL;

1075 }

1076 else {

1077 memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));

1078 }

1079 memset(result, -1, UPRV_LENGTHOF(result));

1080 errorCode.reset();

1081 resultLength=stepToUnicode(cc, cnv.getAlias(),

1082 result, UPRV_LENGTHOF(result),

1083 step==0 ? resultOffsets : NULL,

1084 step, errorCode);

1085 ok=checkToUnicode(

1086 cc, cnv.getAlias(), steps[i].name,

1087 result, resultLength,

1088 cc.offsets!=NULL ? resultOffsets : NULL,

1089 errorCode);

1090 if(errorCode.isFailure() \|\| !cc.finalFlush) {

1091 // reset if an error occurred or we did not flush

1092 // otherwise do nothing to make sure that flushing resets

1093 ucnv_resetToUnicode(cnv.getAlias());

1094 }

1095 if (cc.offsets != NULL && resultOffsets[resultLength] != -1) {

1096 errln("toUnicode[%d](%s) Conversion wrote too much to offsets at ind ex %d",

1097 cc.caseNr, cc.charset, resultLength);

1098 }

1099 if (result[resultLength] != (UChar)-1) {

1100 errln("toUnicode[%d](%s) Conversion wrote too much to result at inde x %d",

1101 cc.caseNr, cc.charset, resultLength);

1102 }

1103 }

1104

1105 // not a real loop, just a convenience for breaking out of the block

1106 while(ok && cc.finalFlush) {

1107 // test ucnv_toUChars()

1108 memset(result, 0, sizeof(result));

1109

1110 errorCode.reset();

1111 resultLength=ucnv_toUChars(cnv.getAlias(),

1112 result, UPRV_LENGTHOF(result),

1113 (const char *)cc.bytes, cc.bytesLength,

1114 errorCode);

1115 ok=checkToUnicode(

1116 cc, cnv.getAlias(), "toUChars",

1117 result, resultLength,

1118 NULL,

1119 errorCode);

1120 if(!ok) {

1121 break;

1122 }

1123

1124 // test preflighting

1125 // keep the correct result for simple checking

1126 errorCode.reset();

1127 resultLength=ucnv_toUChars(cnv.getAlias(),

1128 NULL, 0,

1129 (const char *)cc.bytes, cc.bytesLength,

1130 errorCode);

1131 if(errorCode.get()==U_STRING_NOT_TERMINATED_WARNING \|\| errorCode.get()== U_BUFFER_OVERFLOW_ERROR) {

1132 errorCode.reset();

1133 }

1134 ok=checkToUnicode(

1135 cc, cnv.getAlias(), "preflight toUChars",

1136 result, resultLength,

1137 NULL,

1138 errorCode);

1139 break;

1140 }

1141

1142 errorCode.reset(); // all errors have already been reported

1143 return ok;

1144 }

1145

1146 UBool

1147 ConversionTest::checkToUnicode(ConversionCase &cc, UConverter cnv, const char name,

1148 const UChar *result, int32_t resultLength,

1149 const int32_t *resultOffsets,

1150 UErrorCode resultErrorCode) {

1151 char resultInvalidChars[8];

1152 int8_t resultInvalidLength;

1153 UErrorCode errorCode;

1154

1155 const char *msg;

1156

1157 // reset the message; NULL will mean "ok"

1158 msg=NULL;

1159

1160 errorCode=U_ZERO_ERROR;

1161 resultInvalidLength=sizeof(resultInvalidChars);

1162 ucnv_getInvalidChars(cnv, resultInvalidChars, &resultInvalidLength, &errorCo de);

1163 if(U_FAILURE(errorCode)) {

1164 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidChar s() failed - %s",

1165 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, na me, u_errorName(errorCode));

1166 return FALSE;

1167 }

1168

1169 // check everything that might have gone wrong

1170 if(cc.unicodeLength!=resultLength) {

1171 msg="wrong result length";

1172 } else if(0!=u_memcmp(cc.unicode, result, cc.unicodeLength)) {

1173 msg="wrong result string";

1174 } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.unicod eLengthsizeof(cc.offsets))) {

1175 msg="wrong offsets";

1176 } else if(cc.outErrorCode!=resultErrorCode) {

1177 msg="wrong error code";

1178 } else if(cc.invalidLength!=resultInvalidLength) {

1179 msg="wrong length of last invalid input";

1180 } else if(0!=memcmp(cc.invalidChars, resultInvalidChars, cc.invalidLength)) {

1181 msg="wrong last invalid input";

1182 }

1183

1184 if(msg==NULL) {

1185 return TRUE;

1186 } else {

1187 char buffer[2000]; // one buffer for all strings

1188 char s, bytesString, unicodeString, resultString,

1189 offsetsString, resultOffsetsString,

1190 invalidCharsString, resultInvalidCharsString;

1191

1192 bytesString=s=buffer;

1193 s=printBytes(cc.bytes, cc.bytesLength, bytesString);

1194 s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString=s);

1195 s=printUnicode(result, resultLength, resultString=s);

1196 s=printOffsets(cc.offsets, cc.unicodeLength, offsetsString=s);

1197 s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);

1198 s=printBytes(cc.invalidChars, cc.invalidLength, invalidCharsString=s);

1199 s=printBytes((uint8_t *)resultInvalidChars, resultInvalidLength, resultI nvalidCharsString=s);

1200

1201 if((s-buffer)>(int32_t)sizeof(buffer)) {

1202 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: ch eckToUnicode() test output buffer overflow writing %d chars\n",

1203 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush , name, (int)(s-buffer));

1204 exit(1);

1205 }

1206

1207 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"

1208 " bytes <%s>[%d]\n"

1209 " expected <%s>[%d]\n"

1210 " result <%s>[%d]\n"

1211 " offsets <%s>\n"

1212 " result offsets <%s>\n"

1213 " error code expected %s got %s\n"

1214 " invalidChars expected <%s> got <%s>\n",

1215 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name , msg,

1216 bytesString, cc.bytesLength,

1217 unicodeString, cc.unicodeLength,

1218 resultString, resultLength,

1219 offsetsString,

1220 resultOffsetsString,

1221 u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),

1222 invalidCharsString, resultInvalidCharsString);

1223

1224 return FALSE;

1225 }

1226 }

1227

1228 // fromUnicode test worker functions --------------------------------------- ***

1229

1230 static int32_t

1231 stepFromUTF8(ConversionCase &cc,

1232 UConverter utf8Cnv, UConverter cnv,

1233 char *result, int32_t resultCapacity,

1234 int32_t step,

1235 UErrorCode *pErrorCode) {

1236 const char source, sourceLimit, *utf8Limit;

1237 UChar pivotBuffer[32];

1238 UChar pivotSource, pivotTarget, *pivotLimit;

1239 char target, targetLimit, *resultLimit;

1240 UBool flush;

1241

1242 source=cc.utf8;

1243 pivotSource=pivotTarget=pivotBuffer;

1244 target=result;

1245 utf8Limit=source+cc.utf8Length;

1246 resultLimit=result+resultCapacity;

1247

1248 // call ucnv_convertEx() with in/out buffers no larger than (step) at a time

1249 // move only one buffer (in vs. out) at a time to be extra mean

1250 // step==0 performs bulk conversion

1251

1252 // initialize the partial limits for the loop

1253 if(step==0) {

1254 // use the entire buffers

1255 sourceLimit=utf8Limit;

1256 targetLimit=resultLimit;

1257 flush=cc.finalFlush;

1258

1259 pivotLimit=pivotBuffer+UPRV_LENGTHOF(pivotBuffer);

1260 } else {

1261 // start with empty partial buffers

1262 sourceLimit=source;

1263 targetLimit=target;

1264 flush=FALSE;

1265

1266 // empty pivot is not allowed, make it of length step

1267 pivotLimit=pivotBuffer+step;

1268 }

1269

1270 for(;;) {

1271 // resetting the opposite conversion direction must not affect this one

1272 ucnv_resetFromUnicode(utf8Cnv);

1273 ucnv_resetToUnicode(cnv);

1274

1275 // convert

1276 ucnv_convertEx(cnv, utf8Cnv,

1277 &target, targetLimit,

1278 &source, sourceLimit,

1279 pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,

1280 FALSE, flush, pErrorCode);

1281

1282 // check pointers and errors

1283 if(source>sourceLimit \|\| target>targetLimit) {

1284 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

1285 break;

1286 } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

1287 if(target!=targetLimit) {

1288 // buffer overflow must only be set when the target is filled

1289 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

1290 break;

1291 } else if(targetLimit==resultLimit) {

1292 // not just a partial overflow

1293 break;

1294 }

1295

1296 // the partial target is filled, set a new limit, reset the error an d continue

1297 targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;

1298 *pErrorCode=U_ZERO_ERROR;

1299 } else if(U_FAILURE(*pErrorCode)) {

1300 if(pivotSource==pivotBuffer) {

1301 // toUnicode error, should not occur

1302 // toUnicode errors are tested in cintltst TestConvertExFromUTF8 ()

1303 break;

1304 } else {

1305 // fromUnicode error

1306 // some other error occurred, done

1307 break;

1308 }

1309 } else {

1310 if(source!=sourceLimit) {

1311 // when no error occurs, then the input must be consumed

1312 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

1313 break;

1314 }

1315

1316 if(sourceLimit==utf8Limit) {

1317 // we are done

1318 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {

1319 // ucnv_convertEx() warns about not terminating the output

1320 // but ucnv_fromUnicode() does not and so

1321 // checkFromUnicode() does not expect it

1322 *pErrorCode=U_ZERO_ERROR;

1323 }

1324 break;

1325 }

1326

1327 // the partial conversion succeeded, set a new limit and continue

1328 sourceLimit=(utf8Limit-source)>=step ? source+step : utf8Limit;

1329 flush=(UBool)(cc.finalFlush && sourceLimit==utf8Limit);

1330 }

1331 }

1332

1333 return (int32_t)(target-result);

1334 }

1335

1336 static int32_t

1337 stepFromUnicode(ConversionCase &cc, UConverter *cnv,

1338 char *result, int32_t resultCapacity,

1339 int32_t resultOffsets, / also resultCapacity */

1340 int32_t step,

1341 UErrorCode *pErrorCode) {

1342 const UChar source, sourceLimit, *unicodeLimit;

1343 char target, targetLimit, *resultLimit;

1344 UBool flush;

1345

1346 source=cc.unicode;

1347 target=result;

1348 unicodeLimit=source+cc.unicodeLength;

1349 resultLimit=result+resultCapacity;

1350

1351 // call ucnv_fromUnicode() with in/out buffers no larger than (step) at a ti me

1352 // move only one buffer (in vs. out) at a time to be extra mean

1353 // step==0 performs bulk conversion and generates offsets

1354

1355 // initialize the partial limits for the loop

1356 if(step==0) {

1357 // use the entire buffers

1358 sourceLimit=unicodeLimit;

1359 targetLimit=resultLimit;

1360 flush=cc.finalFlush;

1361 } else {

1362 // start with empty partial buffers

1363 sourceLimit=source;

1364 targetLimit=target;

1365 flush=FALSE;

1366

1367 // output offsets only for bulk conversion

1368 resultOffsets=NULL;

1369 }

1370

1371 for(;;) {

1372 // resetting the opposite conversion direction must not affect this one

1373 ucnv_resetToUnicode(cnv);

1374

1375 // convert

1376 ucnv_fromUnicode(cnv,

1377 &target, targetLimit,

1378 &source, sourceLimit,

1379 resultOffsets,

1380 flush, pErrorCode);

1381

1382 // check pointers and errors

1383 if(source>sourceLimit \|\| target>targetLimit) {

1384 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

1385 break;

1386 } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

1387 if(target!=targetLimit) {

1388 // buffer overflow must only be set when the target is filled

1389 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

1390 break;

1391 } else if(targetLimit==resultLimit) {

1392 // not just a partial overflow

1393 break;

1394 }

1395

1396 // the partial target is filled, set a new limit, reset the error an d continue

1397 targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;

1398 *pErrorCode=U_ZERO_ERROR;

1399 } else if(U_FAILURE(*pErrorCode)) {

1400 // some other error occurred, done

1401 break;

1402 } else {

1403 if(source!=sourceLimit) {

1404 // when no error occurs, then the input must be consumed

1405 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

1406 break;

1407 }

1408

1409 if(sourceLimit==unicodeLimit) {

1410 // we are done

1411 break;

1412 }

1413

1414 // the partial conversion succeeded, set a new limit and continue

1415 sourceLimit=(unicodeLimit-source)>=step ? source+step : unicodeLimit ;

1416 flush=(UBool)(cc.finalFlush && sourceLimit==unicodeLimit);

1417 }

1418 }

1419

1420 return (int32_t)(target-result);

1421 }

1422

1423 UBool

1424 ConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback call back, const char *option) {

1425 UConverter *cnv;

1426 UErrorCode errorCode;

1427

1428 // open the converter

1429 errorCode=U_ZERO_ERROR;

1430 cnv=cnv_open(cc.charset, errorCode);

1431 if(U_FAILURE(errorCode)) {

1432 errcheckln(errorCode, "fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv _open() failed - %s",

1433 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_ errorName(errorCode));

1434 return FALSE;

1435 }

1436 ucnv_resetToUnicode(utf8Cnv);

1437

1438 // set the callback

1439 if(callback!=NULL) {

1440 ucnv_setFromUCallBack(cnv, callback, option, NULL, NULL, &errorCode);

1441 if(U_FAILURE(errorCode)) {

1442 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCal lBack() failed - %s",

1443 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush , u_errorName(errorCode));

1444 ucnv_close(cnv);

1445 return FALSE;

1446 }

1447 }

1448

1449 // set the fallbacks flag

1450 // TODO change with Jitterbug 2401, then add a similar call for toUnicode to o

1451 ucnv_setFallback(cnv, cc.fallbacks);

1452

1453 // set the subchar

1454 int32_t length;

1455

1456 if(cc.setSub>0) {

1457 length=(int32_t)strlen(cc.subchar);

1458 ucnv_setSubstChars(cnv, cc.subchar, (int8_t)length, &errorCode);

1459 if(U_FAILURE(errorCode)) {

1460 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstCha rs() failed - %s",

1461 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush , u_errorName(errorCode));

1462 ucnv_close(cnv);

1463 return FALSE;

1464 }

1465 } else if(cc.setSub<0) {

1466 ucnv_setSubstString(cnv, cc.subString, -1, &errorCode);

1467 if(U_FAILURE(errorCode)) {

1468 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstStr ing() failed - %s",

1469 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush , u_errorName(errorCode));

1470 ucnv_close(cnv);

1471 return FALSE;

1472 }

1473 }

1474

1475 // convert unicode to utf8

1476 char utf8[256];

1477 cc.utf8=utf8;

1478 u_strToUTF8(utf8, UPRV_LENGTHOF(utf8), &cc.utf8Length,

1479 cc.unicode, cc.unicodeLength,

1480 &errorCode);

1481 if(U_FAILURE(errorCode)) {

1482 // skip UTF-8 testing of a string with an unpaired surrogate,

1483 // or of one that's too long

1484 // toUnicode errors are tested in cintltst TestConvertExFromUTF8()

1485 cc.utf8Length=-1;

1486 }

1487

1488 int32_t resultOffsets[256];

1489 char result[256];

1490 int32_t resultLength;

1491 UBool ok;

1492

1493 static const struct {

1494 int32_t step;

1495 const char name, utf8Name;

1496 } steps[]={

1497 { 0, "bulk", "utf8" }, // must be first for offsets to be checked

1498 { 1, "step=1", "utf8 step=1" },

1499 { 3, "step=3", "utf8 step=3" },

1500 { 7, "step=7", "utf8 step=7" }

1501 };

1502 int32_t i, step;

1503

1504 ok=TRUE;

1505 for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {

1506 step=steps[i].step;

1507 memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));

1508 memset(result, -1, UPRV_LENGTHOF(result));

1509 errorCode=U_ZERO_ERROR;

1510 resultLength=stepFromUnicode(cc, cnv,

1511 result, UPRV_LENGTHOF(result),

1512 step==0 ? resultOffsets : NULL,

1513 step, &errorCode);

1514 ok=checkFromUnicode(

1515 cc, cnv, steps[i].name,

1516 (uint8_t *)result, resultLength,

1517 cc.offsets!=NULL ? resultOffsets : NULL,

1518 errorCode);

1519 if(U_FAILURE(errorCode) \|\| !cc.finalFlush) {

1520 // reset if an error occurred or we did not flush

1521 // otherwise do nothing to make sure that flushing resets

1522 ucnv_resetFromUnicode(cnv);

1523 }

1524 if (resultOffsets[resultLength] != -1) {

1525 errln("fromUnicode[%d](%s) Conversion wrote too much to offsets at i ndex %d",

1526 cc.caseNr, cc.charset, resultLength);

1527 }

1528 if (result[resultLength] != (char)-1) {

1529 errln("fromUnicode[%d](%s) Conversion wrote too much to result at in dex %d",

1530 cc.caseNr, cc.charset, resultLength);

1531 }

1532

1533 // bulk test is first, then offsets are not checked any more

1534 cc.offsets=NULL;

1535

1536 // test direct conversion from UTF-8

1537 if(cc.utf8Length>=0) {

1538 errorCode=U_ZERO_ERROR;

1539 resultLength=stepFromUTF8(cc, utf8Cnv, cnv,

1540 result, UPRV_LENGTHOF(result),

1541 step, &errorCode);

1542 ok=checkFromUnicode(

1543 cc, cnv, steps[i].utf8Name,

1544 (uint8_t *)result, resultLength,

1545 NULL,

1546 errorCode);

1547 if(U_FAILURE(errorCode) \|\| !cc.finalFlush) {

1548 // reset if an error occurred or we did not flush

1549 // otherwise do nothing to make sure that flushing resets

1550 ucnv_resetToUnicode(utf8Cnv);

1551 ucnv_resetFromUnicode(cnv);

1552 }

1553 }

1554 }

1555

1556 // not a real loop, just a convenience for breaking out of the block

1557 while(ok && cc.finalFlush) {

1558 // test ucnv_fromUChars()

1559 memset(result, 0, sizeof(result));

1560

1561 errorCode=U_ZERO_ERROR;

1562 resultLength=ucnv_fromUChars(cnv,

1563 result, UPRV_LENGTHOF(result),

1564 cc.unicode, cc.unicodeLength,

1565 &errorCode);

1566 ok=checkFromUnicode(

1567 cc, cnv, "fromUChars",

1568 (uint8_t *)result, resultLength,

1569 NULL,

1570 errorCode);

1571 if(!ok) {

1572 break;

1573 }

1574

1575 // test preflighting

1576 // keep the correct result for simple checking

1577 errorCode=U_ZERO_ERROR;

1578 resultLength=ucnv_fromUChars(cnv,

1579 NULL, 0,

1580 cc.unicode, cc.unicodeLength,

1581 &errorCode);

1582 if(errorCode==U_STRING_NOT_TERMINATED_WARNING \|\| errorCode==U_BUFFER_OVE RFLOW_ERROR) {

1583 errorCode=U_ZERO_ERROR;

1584 }

1585 ok=checkFromUnicode(

1586 cc, cnv, "preflight fromUChars",

1587 (uint8_t *)result, resultLength,

1588 NULL,

1589 errorCode);

1590 break;

1591 }

1592

1593 ucnv_close(cnv);

1594 return ok;

1595 }

1596

1597 UBool

1598 ConversionTest::checkFromUnicode(ConversionCase &cc, UConverter cnv, const char name,

1599 const uint8_t *result, int32_t resultLength,

1600 const int32_t *resultOffsets,

1601 UErrorCode resultErrorCode) {

1602 UChar resultInvalidUChars[8];

1603 int8_t resultInvalidLength;

1604 UErrorCode errorCode;

1605

1606 const char *msg;

1607

1608 // reset the message; NULL will mean "ok"

1609 msg=NULL;

1610

1611 errorCode=U_ZERO_ERROR;

1612 resultInvalidLength=UPRV_LENGTHOF(resultInvalidUChars);

1613 ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &error Code);

1614 if(U_FAILURE(errorCode)) {

1615 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUC hars() failed - %s",

1616 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, na me, u_errorName(errorCode));

1617 return FALSE;

1618 }

1619

1620 // check everything that might have gone wrong

1621 if(cc.bytesLength!=resultLength) {

1622 msg="wrong result length";

1623 } else if(0!=memcmp(cc.bytes, result, cc.bytesLength)) {

1624 msg="wrong result string";

1625 } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.bytesL engthsizeof(cc.offsets))) {

1626 msg="wrong offsets";

1627 } else if(cc.outErrorCode!=resultErrorCode) {

1628 msg="wrong error code";

1629 } else if(cc.invalidLength!=resultInvalidLength) {

1630 msg="wrong length of last invalid input";

1631 } else if(0!=u_memcmp(cc.invalidUChars, resultInvalidUChars, cc.invalidLengt h)) {

1632 msg="wrong last invalid input";

1633 }

1634

1635 if(msg==NULL) {

1636 return TRUE;

1637 } else {

1638 char buffer[2000]; // one buffer for all strings

1639 char s, unicodeString, bytesString, resultString,

1640 offsetsString, resultOffsetsString,

1641 invalidCharsString, resultInvalidUCharsString;

1642

1643 unicodeString=s=buffer;

1644 s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString);

1645 s=printBytes(cc.bytes, cc.bytesLength, bytesString=s);

1646 s=printBytes(result, resultLength, resultString=s);

1647 s=printOffsets(cc.offsets, cc.bytesLength, offsetsString=s);

1648 s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);

1649 s=printUnicode(cc.invalidUChars, cc.invalidLength, invalidCharsString=s) ;

1650 s=printUnicode(resultInvalidUChars, resultInvalidLength, resultInvalidUC harsString=s);

1651

1652 if((s-buffer)>(int32_t)sizeof(buffer)) {

1653 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkFromUnicode() test output buffer overflow writing %d chars\n",

1654 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush , name, (int)(s-buffer));

1655 exit(1);

1656 }

1657

1658 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"

1659 " unicode <%s>[%d]\n"

1660 " expected <%s>[%d]\n"

1661 " result <%s>[%d]\n"

1662 " offsets <%s>\n"

1663 " result offsets <%s>\n"

1664 " error code expected %s got %s\n"

1665 " invalidChars expected <%s> got <%s>\n",

1666 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name , msg,

1667 unicodeString, cc.unicodeLength,

1668 bytesString, cc.bytesLength,

1669 resultString, resultLength,

1670 offsetsString,

1671 resultOffsetsString,

1672 u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),

1673 invalidCharsString, resultInvalidUCharsString);

1674

1675 return FALSE;

1676 }

1677 }

1678

1679 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

OLD	NEW

« no previous file with comments | « source/test/intltest/convtest.h ('k') | source/test/intltest/cpdtrtst.h » ('j') | no next file with comments »