icu46/source/test/intltest/convtest.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/test/intltest/convtest.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 *******************************************************************************

	3 *

	4 * Copyright (C) 2003-2010, International Business Machines

	5 * Corporation and others. All Rights Reserved.

	6 *

	7 *******************************************************************************

	8 * file name: convtest.cpp

	9 * encoding: US-ASCII

	10 * tab size: 8 (not used)

	11 * indentation:4

	12 *

	13 * created on: 2003jul15

	14 * created by: Markus W. Scherer

	15 *

	16 * Test file for data-driven conversion tests.

	17 */

	18

	19 #include "unicode/utypes.h"

	20

	21 #if !UCONFIG_NO_LEGACY_CONVERSION

	22 /*

	23 * Note: Turning off all of convtest.cpp if !UCONFIG_NO_LEGACY_CONVERSION

	24 * is slightly unnecessary - it removes tests for Unicode charsets

	25 * like UTF-8 that should work.

	26 * However, there is no easy way for the test to detect whether a test case

	27 * is for a Unicode charset, so it would be difficult to only exclude those.

	28 * Also, regular testing of ICU is done with all modules on, therefore

	29 * not testing conversion for a custom configuration like this should be ok.

	30 */

	31

	32 #include "unicode/ucnv.h"

	33 #include "unicode/unistr.h"

	34 #include "unicode/parsepos.h"

	35 #include "unicode/uniset.h"

	36 #include "unicode/ustring.h"

	37 #include "unicode/ures.h"

	38 #include "convtest.h"

	39 #include "unicode/tstdtmod.h"

	40 #include <string.h>

	41 #include <stdlib.h>

	42

	43 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))

	44

	45 enum {

	46 // characters used in test data for callbacks

	47 SUB_CB='?',

	48 SKIP_CB='0',

	49 STOP_CB='.',

	50 ESC_CB='&'

	51 };

	52

	53 ConversionTest::ConversionTest() {

	54 UErrorCode errorCode=U_ZERO_ERROR;

	55 utf8Cnv=ucnv_open("UTF-8", &errorCode);

	56 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err orCode);

	57 if(U_FAILURE(errorCode)) {

	58 errln("unable to open UTF-8 converter");

	59 }

	60 }

	61

	62 ConversionTest::~ConversionTest() {

	63 ucnv_close(utf8Cnv);

	64 }

	65

	66 void

	67 ConversionTest::runIndexedTest(int32_t index, UBool exec, const char &name, cha r /par/) {

	68 if (exec) logln("TestSuite ConversionTest: ");

	69 switch (index) {

	70 #if !UCONFIG_NO_FILE_IO

	71 case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break;

	72 case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break;

	73 case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break;

	74 #else

	75 case 0:

	76 case 1:

	77 case 2: name="skip"; break;

	78 #endif

	79 case 3: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break ;

	80 default: name=""; break; //needed to end loop

	81 }

	82 }

	83

	84 // test data interface ----------------------------------------------------- ***

	85

	86 void

	87 ConversionTest::TestToUnicode() {

	88 ConversionCase cc;

	89 char charset[100], cbopt[4];

	90 const char *option;

	91 UnicodeString s, unicode;

	92 int32_t offsetsLength;

	93 UConverterToUCallback callback;

	94

	95 TestDataModule *dataModule;

	96 TestData *testData;

	97 const DataMap *testCase;

	98 UErrorCode errorCode;

	99 int32_t i;

	100

	101 errorCode=U_ZERO_ERROR;

	102 dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode) ;

	103 if(U_SUCCESS(errorCode)) {

	104 testData=dataModule->createTestData("toUnicode", errorCode);

	105 if(U_SUCCESS(errorCode)) {

	106 for(i=0; testData->nextCase(testCase, errorCode); ++i) {

	107 if(U_FAILURE(errorCode)) {

	108 errln("error retrieving conversion/toUnicode test case %d - %s",

	109 i, u_errorName(errorCode));

	110 errorCode=U_ZERO_ERROR;

	111 continue;

	112 }

	113

	114 cc.caseNr=i;

	115

	116 s=testCase->getString("charset", errorCode);

	117 s.extract(0, 0x7fffffff, charset, sizeof(charset), "");

	118 cc.charset=charset;

	119

	120 cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode) ;

	121 unicode=testCase->getString("unicode", errorCode);

	122 cc.unicode=unicode.getBuffer();

	123 cc.unicodeLength=unicode.length();

	124

	125 offsetsLength=0;

	126 cc.offsets=testCase->getIntVector(offsetsLength, "offsets", erro rCode);

	127 if(offsetsLength==0) {

	128 cc.offsets=NULL;

	129 } else if(offsetsLength!=unicode.length()) {

	130 errln("toUnicode[%d] unicode[%d] and offsets[%d] must have t he same length",

	131 i, unicode.length(), offsetsLength);

	132 errorCode=U_ILLEGAL_ARGUMENT_ERROR;

	133 }

	134

	135 cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);

	136 cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);

	137

	138 s=testCase->getString("errorCode", errorCode);

	139 if(s==UNICODE_STRING("invalid", 7)) {

	140 cc.outErrorCode=U_INVALID_CHAR_FOUND;

	141 } else if(s==UNICODE_STRING("illegal", 7)) {

	142 cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;

	143 } else if(s==UNICODE_STRING("truncated", 9)) {

	144 cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;

	145 } else if(s==UNICODE_STRING("illesc", 6)) {

	146 cc.outErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE;

	147 } else if(s==UNICODE_STRING("unsuppesc", 9)) {

	148 cc.outErrorCode=U_UNSUPPORTED_ESCAPE_SEQUENCE;

	149 } else {

	150 cc.outErrorCode=U_ZERO_ERROR;

	151 }

	152

	153 s=testCase->getString("callback", errorCode);

	154 s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");

	155 cc.cbopt=cbopt;

	156 switch(cbopt[0]) {

	157 case SUB_CB:

	158 callback=UCNV_TO_U_CALLBACK_SUBSTITUTE;

	159 break;

	160 case SKIP_CB:

	161 callback=UCNV_TO_U_CALLBACK_SKIP;

	162 break;

	163 case STOP_CB:

	164 callback=UCNV_TO_U_CALLBACK_STOP;

	165 break;

	166 case ESC_CB:

	167 callback=UCNV_TO_U_CALLBACK_ESCAPE;

	168 break;

	169 default:

	170 callback=NULL;

	171 break;

	172 }

	173 option=callback==NULL ? cbopt : cbopt+1;

	174 if(*option==0) {

	175 option=NULL;

	176 }

	177

	178 cc.invalidChars=testCase->getBinary(cc.invalidLength, "invalidCh ars", errorCode);

	179

	180 if(U_FAILURE(errorCode)) {

	181 errln("error parsing conversion/toUnicode test case %d - %s" ,

	182 i, u_errorName(errorCode));

	183 errorCode=U_ZERO_ERROR;

	184 } else {

	185 logln("TestToUnicode[%d] %s", i, charset);

	186 ToUnicodeCase(cc, callback, option);

	187 }

	188 }

	189 delete testData;

	190 }

	191 delete dataModule;

	192 }

	193 else {

	194 dataerrln("Could not load test conversion data");

	195 }

	196 }

	197

	198 void

	199 ConversionTest::TestFromUnicode() {

	200 ConversionCase cc;

	201 char charset[100], cbopt[4];

	202 const char *option;

	203 UnicodeString s, unicode, invalidUChars;

	204 int32_t offsetsLength, index;

	205 UConverterFromUCallback callback;

	206

	207 TestDataModule *dataModule;

	208 TestData *testData;

	209 const DataMap *testCase;

	210 const UChar *p;

	211 UErrorCode errorCode;

	212 int32_t i, length;

	213

	214 errorCode=U_ZERO_ERROR;

	215 dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode) ;

	216 if(U_SUCCESS(errorCode)) {

	217 testData=dataModule->createTestData("fromUnicode", errorCode);

	218 if(U_SUCCESS(errorCode)) {

	219 for(i=0; testData->nextCase(testCase, errorCode); ++i) {

	220 if(U_FAILURE(errorCode)) {

	221 errln("error retrieving conversion/fromUnicode test case %d - %s",

	222 i, u_errorName(errorCode));

	223 errorCode=U_ZERO_ERROR;

	224 continue;

	225 }

	226

	227 cc.caseNr=i;

	228

	229 s=testCase->getString("charset", errorCode);

	230 s.extract(0, 0x7fffffff, charset, sizeof(charset), "");

	231 cc.charset=charset;

	232

	233 unicode=testCase->getString("unicode", errorCode);

	234 cc.unicode=unicode.getBuffer();

	235 cc.unicodeLength=unicode.length();

	236 cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode) ;

	237

	238 offsetsLength=0;

	239 cc.offsets=testCase->getIntVector(offsetsLength, "offsets", erro rCode);

	240 if(offsetsLength==0) {

	241 cc.offsets=NULL;

	242 } else if(offsetsLength!=cc.bytesLength) {

	243 errln("fromUnicode[%d] bytes[%d] and offsets[%d] must have t he same length",

	244 i, cc.bytesLength, offsetsLength);

	245 errorCode=U_ILLEGAL_ARGUMENT_ERROR;

	246 }

	247

	248 cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);

	249 cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);

	250

	251 s=testCase->getString("errorCode", errorCode);

	252 if(s==UNICODE_STRING("invalid", 7)) {

	253 cc.outErrorCode=U_INVALID_CHAR_FOUND;

	254 } else if(s==UNICODE_STRING("illegal", 7)) {

	255 cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;

	256 } else if(s==UNICODE_STRING("truncated", 9)) {

	257 cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;

	258 } else {

	259 cc.outErrorCode=U_ZERO_ERROR;

	260 }

	261

	262 s=testCase->getString("callback", errorCode);

	263 cc.setSub=0; // default: no subchar

	264

	265 if((index=s.indexOf((UChar)0))>0) {

	266 // read NUL-separated subchar first, if any

	267 // copy the subchar from Latin-1 characters

	268 // start after the NUL

	269 p=s.getTerminatedBuffer();

	270 length=index+1;

	271 p+=length;

	272 length=s.length()-length;

	273 if(length<=0 \|\| length>=(int32_t)sizeof(cc.subchar)) {

	274 errorCode=U_ILLEGAL_ARGUMENT_ERROR;

	275 } else {

	276 int32_t j;

	277

	278 for(j=0; j<length; ++j) {

	279 cc.subchar[j]=(char)p[j];

	280 }

	281 // NUL-terminate the subchar

	282 cc.subchar[j]=0;

	283 cc.setSub=1;

	284 }

	285

	286 // remove the NUL and subchar from s

	287 s.truncate(index);

	288 } else if((index=s.indexOf((UChar)0x3d))>0) /* '=' */ {

	289 // read a substitution string, separated by an equal sign

	290 p=s.getBuffer()+index+1;

	291 length=s.length()-(index+1);

	292 if(length<0 \|\| length>=LENGTHOF(cc.subString)) {

	293 errorCode=U_ILLEGAL_ARGUMENT_ERROR;

	294 } else {

	295 u_memcpy(cc.subString, p, length);

	296 // NUL-terminate the subString

	297 cc.subString[length]=0;

	298 cc.setSub=-1;

	299 }

	300

	301 // remove the equal sign and subString from s

	302 s.truncate(index);

	303 }

	304

	305 s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");

	306 cc.cbopt=cbopt;

	307 switch(cbopt[0]) {

	308 case SUB_CB:

	309 callback=UCNV_FROM_U_CALLBACK_SUBSTITUTE;

	310 break;

	311 case SKIP_CB:

	312 callback=UCNV_FROM_U_CALLBACK_SKIP;

	313 break;

	314 case STOP_CB:

	315 callback=UCNV_FROM_U_CALLBACK_STOP;

	316 break;

	317 case ESC_CB:

	318 callback=UCNV_FROM_U_CALLBACK_ESCAPE;

	319 break;

	320 default:

	321 callback=NULL;

	322 break;

	323 }

	324 option=callback==NULL ? cbopt : cbopt+1;

	325 if(*option==0) {

	326 option=NULL;

	327 }

	328

	329 invalidUChars=testCase->getString("invalidUChars", errorCode);

	330 cc.invalidUChars=invalidUChars.getBuffer();

	331 cc.invalidLength=invalidUChars.length();

	332

	333 if(U_FAILURE(errorCode)) {

	334 errln("error parsing conversion/fromUnicode test case %d - % s",

	335 i, u_errorName(errorCode));

	336 errorCode=U_ZERO_ERROR;

	337 } else {

	338 logln("TestFromUnicode[%d] %s", i, charset);

	339 FromUnicodeCase(cc, callback, option);

	340 }

	341 }

	342 delete testData;

	343 }

	344 delete dataModule;

	345 }

	346 else {

	347 dataerrln("Could not load test conversion data");

	348 }

	349 }

	350

	351 static const UChar ellipsis[]={ 0x2e, 0x2e, 0x2e };

	352

	353 void

	354 ConversionTest::TestGetUnicodeSet() {

	355 char charset[100];

	356 UnicodeString s, map, mapnot;

	357 int32_t which;

	358

	359 ParsePosition pos;

	360 UnicodeSet cnvSet, mapSet, mapnotSet, diffSet;

	361 UnicodeSet *cnvSetPtr = &cnvSet;

	362 LocalUConverterPointer cnv;

	363

	364 TestDataModule *dataModule;

	365 TestData *testData;

	366 const DataMap *testCase;

	367 UErrorCode errorCode;

	368 int32_t i;

	369

	370 errorCode=U_ZERO_ERROR;

	371 dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode) ;

	372 if(U_SUCCESS(errorCode)) {

	373 testData=dataModule->createTestData("getUnicodeSet", errorCode);

	374 if(U_SUCCESS(errorCode)) {

	375 for(i=0; testData->nextCase(testCase, errorCode); ++i) {

	376 if(U_FAILURE(errorCode)) {

	377 errln("error retrieving conversion/getUnicodeSet test case % d - %s",

	378 i, u_errorName(errorCode));

	379 errorCode=U_ZERO_ERROR;

	380 continue;

	381 }

	382

	383 s=testCase->getString("charset", errorCode);

	384 s.extract(0, 0x7fffffff, charset, sizeof(charset), "");

	385

	386 map=testCase->getString("map", errorCode);

	387 mapnot=testCase->getString("mapnot", errorCode);

	388

	389 which=testCase->getInt28("which", errorCode);

	390

	391 if(U_FAILURE(errorCode)) {

	392 errln("error parsing conversion/getUnicodeSet test case %d - %s",

	393 i, u_errorName(errorCode));

	394 errorCode=U_ZERO_ERROR;

	395 continue;

	396 }

	397

	398 // test this test case

	399 mapSet.clear();

	400 mapnotSet.clear();

	401

	402 pos.setIndex(0);

	403 mapSet.applyPattern(map, pos, 0, NULL, errorCode);

	404 if(U_FAILURE(errorCode) \|\| pos.getIndex()!=map.length()) {

	405 errln("error creating the map set for conversion/getUnicodeS et test case %d - %s\n"

	406 " error index %d index %d U+%04x",

	407 i, u_errorName(errorCode), pos.getErrorIndex(), pos. getIndex(), map.char32At(pos.getIndex()));

	408 errorCode=U_ZERO_ERROR;

	409 continue;

	410 }

	411

	412 pos.setIndex(0);

	413 mapnotSet.applyPattern(mapnot, pos, 0, NULL, errorCode);

	414 if(U_FAILURE(errorCode) \|\| pos.getIndex()!=mapnot.length()) {

	415 errln("error creating the mapnot set for conversion/getUnico deSet test case %d - %s\n"

	416 " error index %d index %d U+%04x",

	417 i, u_errorName(errorCode), pos.getErrorIndex(), pos. getIndex(), mapnot.char32At(pos.getIndex()));

	418 errorCode=U_ZERO_ERROR;

	419 continue;

	420 }

	421

	422 logln("TestGetUnicodeSet[%d] %s", i, charset);

	423

	424 cnv.adoptInstead(cnv_open(charset, errorCode));

	425 if(U_FAILURE(errorCode)) {

	426 errcheckln(errorCode, "error opening \"%s\" for conversion/g etUnicodeSet test case %d - %s",

	427 charset, i, u_errorName(errorCode));

	428 errorCode=U_ZERO_ERROR;

	429 continue;

	430 }

	431

	432 ucnv_getUnicodeSet(cnv.getAlias(), cnvSetPtr->toUSet(), (UConver terUnicodeSet)which, &errorCode);

	433

	434 if(U_FAILURE(errorCode)) {

	435 errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/ge tUnicodeSet test case %d - %s",

	436 charset, i, u_errorName(errorCode));

	437 errorCode=U_ZERO_ERROR;

	438 continue;

	439 }

	440

	441 // are there items that must be in cnvSet but are not?

	442 (diffSet=mapSet).removeAll(cnvSet);

	443 if(!diffSet.isEmpty()) {

	444 diffSet.toPattern(s, TRUE);

	445 if(s.length()>100) {

	446 s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis)) ;

	447 }

	448 errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d",

	449 charset, i);

	450 errln(s);

	451 }

	452

	453 // are there items that must not be in cnvSet but are?

	454 (diffSet=mapnotSet).retainAll(cnvSet);

	455 if(!diffSet.isEmpty()) {

	456 diffSet.toPattern(s, TRUE);

	457 if(s.length()>100) {

	458 s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis)) ;

	459 }

	460 errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d",

	461 charset, i);

	462 errln(s);

	463 }

	464 }

	465 delete testData;

	466 }

	467 delete dataModule;

	468 }

	469 else {

	470 dataerrln("Could not load test conversion data");

	471 }

	472 }

	473

	474 U_CDECL_BEGIN

	475 static void U_CALLCONV

	476 getUnicodeSetCallback(const void *context,

	477 UConverterFromUnicodeArgs * /fromUArgs/,

	478 const UChar* /codeUnits/,

	479 int32_t /length/,

	480 UChar32 codePoint,

	481 UConverterCallbackReason reason,

	482 UErrorCode *pErrorCode) {

	483 if(reason<=UCNV_IRREGULAR) {

	484 ((UnicodeSet *)context)->remove(codePoint); // the converter cannot con vert this code point

	485 *pErrorCode=U_ZERO_ERROR; // skip

	486 } // else ignore the reset, close and clone calls.

	487 }

	488 U_CDECL_END

	489

	490 // Compare ucnv_getUnicodeSet() with the set of characters that can be converted .

	491 void

	492 ConversionTest::TestGetUnicodeSet2() {

	493 // Build a string with all code points.

	494 UChar32 cpLimit;

	495 int32_t s0Length;

	496 if(quick) {

	497 cpLimit=s0Length=0x10000; // BMP only

	498 } else {

	499 cpLimit=0x110000;

	500 s0Length=0x10000+0x200000; // BMP + surrogate pairs

	501 }

	502 UChar *s0=new UChar[s0Length];

	503 if(s0==NULL) {

	504 return;

	505 }

	506 UChar *s=s0;

	507 UChar32 c;

	508 UChar c2;

	509 // low BMP

	510 for(c=0; c<=0xd7ff; ++c) {

	511 *s++=(UChar)c;

	512 }

	513 // trail surrogates

	514 for(c=0xdc00; c<=0xdfff; ++c) {

	515 *s++=(UChar)c;

	516 }

	517 // lead surrogates

	518 // (after trails so that there is not even one surrogate pair in between)

	519 for(c=0xd800; c<=0xdbff; ++c) {

	520 *s++=(UChar)c;

	521 }

	522 // high BMP

	523 for(c=0xe000; c<=0xffff; ++c) {

	524 *s++=(UChar)c;

	525 }

	526 // supplementary code points = surrogate pairs

	527 if(cpLimit==0x110000) {

	528 for(c=0xd800; c<=0xdbff; ++c) {

	529 for(c2=0xdc00; c2<=0xdfff; ++c2) {

	530 *s++=(UChar)c;

	531 *s++=c2;

	532 }

	533 }

	534 }

	535

	536 static const char *const cnvNames[]={

	537 "UTF-8",

	538 "UTF-7",

	539 "UTF-16",

	540 "US-ASCII",

	541 "ISO-8859-1",

	542 "windows-1252",

	543 "Shift-JIS",

	544 "ibm-1390", // EBCDIC_STATEFUL table

	545 "ibm-16684", // DBCS-only extension table based on EBCDIC_STATEFUL tabl e

	546 "HZ",

	547 "ISO-2022-JP",

	548 "JIS7",

	549 "ISO-2022-CN",

	550 "ISO-2022-CN-EXT",

	551 "LMBCS"

	552 };

	553 LocalUConverterPointer cnv;

	554 char buffer[1024];

	555 int32_t i;

	556 for(i=0; i<LENGTHOF(cnvNames); ++i) {

	557 UErrorCode errorCode=U_ZERO_ERROR;

	558 cnv.adoptInstead(cnv_open(cnvNames[i], errorCode));

	559 if(U_FAILURE(errorCode)) {

	560 errcheckln(errorCode, "failed to open converter %s - %s", cnvNames[i ], u_errorName(errorCode));

	561 continue;

	562 }

	563 UnicodeSet expected;

	564 ucnv_setFromUCallBack(cnv.getAlias(), getUnicodeSetCallback, &expected, NULL, NULL, &errorCode);

	565 if(U_FAILURE(errorCode)) {

	566 errln("failed to set the callback on converter %s - %s", cnvNames[i] , u_errorName(errorCode));

	567 continue;

	568 }

	569 UConverterUnicodeSet which;

	570 for(which=UCNV_ROUNDTRIP_SET; which<UCNV_SET_COUNT; which=(UConverterUni codeSet)((int)which+1)) {

	571 if(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {

	572 ucnv_setFallback(cnv.getAlias(), TRUE);

	573 }

	574 expected.add(0, cpLimit-1);

	575 s=s0;

	576 UBool flush;

	577 do {

	578 char *t=buffer;

	579 flush=(UBool)(s==s0+s0Length);

	580 ucnv_fromUnicode(cnv.getAlias(), &t, buffer+sizeof(buffer), (con st UChar **)&s, s0+s0Length, NULL, flush, &errorCode);

	581 if(U_FAILURE(errorCode)) {

	582 if(errorCode==U_BUFFER_OVERFLOW_ERROR) {

	583 errorCode=U_ZERO_ERROR;

	584 continue;

	585 } else {

	586 break; // unexpected error, should not occur

	587 }

	588 }

	589 } while(!flush);

	590 UnicodeSet set;

	591 ucnv_getUnicodeSet(cnv.getAlias(), set.toUSet(), which, &errorCode);

	592 if(cpLimit<0x110000) {

	593 set.remove(cpLimit, 0x10ffff);

	594 }

	595 if(which==UCNV_ROUNDTRIP_SET) {

	596 // ignore PUA code points because they will be converted even if they

	597 // are fallbacks and when other fallbacks are turned off,

	598 // but ucnv_getUnicodeSet(UCNV_ROUNDTRIP_SET) delivers true roun dtrips

	599 expected.remove(0xe000, 0xf8ff);

	600 expected.remove(0xf0000, 0xffffd);

	601 expected.remove(0x100000, 0x10fffd);

	602 set.remove(0xe000, 0xf8ff);

	603 set.remove(0xf0000, 0xffffd);

	604 set.remove(0x100000, 0x10fffd);

	605 }

	606 if(set!=expected) {

	607 // First try to see if we have different sets because ucnv_getUn icodeSet()

	608 // added strings: The above conversion method does not tell us w hat strings might be convertible.

	609 // Remove strings from the set and compare again.

	610 // Unfortunately, there are no good, direct set methods for find ing out whether there are strings

	611 // in the set, nor for enumerating or removing just them.

	612 // Intersect all code points with the set. The intersection will not contain strings.

	613 UnicodeSet temp(0, 0x10ffff);

	614 temp.retainAll(set);

	615 set=temp;

	616 }

	617 if(set!=expected) {

	618 UnicodeSet diffSet;

	619 UnicodeString out;

	620

	621 // are there items that must be in the set but are not?

	622 (diffSet=expected).removeAll(set);

	623 if(!diffSet.isEmpty()) {

	624 diffSet.toPattern(out, TRUE);

	625 if(out.length()>100) {

	626 out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis ));

	627 }

	628 errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d",

	629 cnvNames[i], which);

	630 errln(out);

	631 }

	632

	633 // are there items that must not be in the set but are?

	634 (diffSet=set).removeAll(expected);

	635 if(!diffSet.isEmpty()) {

	636 diffSet.toPattern(out, TRUE);

	637 if(out.length()>100) {

	638 out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis ));

	639 }

	640 errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d",

	641 cnvNames[i], which);

	642 errln(out);

	643 }

	644 }

	645 }

	646 }

	647

	648 delete [] s0;

	649 }

	650

	651 // open testdata or ICU data converter ------------------------------------- ***

	652

	653 UConverter *

	654 ConversionTest::cnv_open(const char *name, UErrorCode &errorCode) {

	655 if(name!=NULL && name=='') {

	656 /* loadTestData(): set the data directory */

	657 return ucnv_openPackage(loadTestData(errorCode), name+1, &errorCode);

	658 } else if(name!=NULL && *name=='+') {

	659 return ucnv_open((name+1), &errorCode);

	660 } else {

	661 return ucnv_open(name, &errorCode);

	662 }

	663 }

	664

	665 // output helpers ---------------------------------------------------------- ***

	666

	667 static inline char

	668 hexDigit(uint8_t digit) {

	669 return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit);

	670 }

	671

	672 static char *

	673 printBytes(const uint8_t bytes, int32_t length, char out) {

	674 uint8_t b;

	675

	676 if(length>0) {

	677 b=*bytes++;

	678 --length;

	679 *out++=hexDigit((uint8_t)(b>>4));

	680 *out++=hexDigit((uint8_t)(b&0xf));

	681 }

	682

	683 while(length>0) {

	684 b=*bytes++;

	685 --length;

	686 *out++=' ';

	687 *out++=hexDigit((uint8_t)(b>>4));

	688 *out++=hexDigit((uint8_t)(b&0xf));

	689 }

	690 *out++=0;

	691 return out;

	692 }

	693

	694 static char *

	695 printUnicode(const UChar unicode, int32_t length, char out) {

	696 UChar32 c;

	697 int32_t i;

	698

	699 for(i=0; i<length;) {

	700 if(i>0) {

	701 *out++=' ';

	702 }

	703 U16_NEXT(unicode, i, length, c);

	704 // write 4..6 digits

	705 if(c>=0x100000) {

	706 *out++='1';

	707 }

	708 if(c>=0x10000) {

	709 *out++=hexDigit((uint8_t)((c>>16)&0xf));

	710 }

	711 *out++=hexDigit((uint8_t)((c>>12)&0xf));

	712 *out++=hexDigit((uint8_t)((c>>8)&0xf));

	713 *out++=hexDigit((uint8_t)((c>>4)&0xf));

	714 *out++=hexDigit((uint8_t)(c&0xf));

	715 }

	716 *out++=0;

	717 return out;

	718 }

	719

	720 static char *

	721 printOffsets(const int32_t offsets, int32_t length, char out) {

	722 int32_t i, o, d;

	723

	724 if(offsets==NULL) {

	725 length=0;

	726 }

	727

	728 for(i=0; i<length; ++i) {

	729 if(i>0) {

	730 *out++=' ';

	731 }

	732 o=offsets[i];

	733

	734 // print all offsets with 2 characters each (-x, -9..99, xx)

	735 if(o<-9) {

	736 *out++='-';

	737 *out++='x';

	738 } else if(o<0) {

	739 *out++='-';

	740 *out++=(char)('0'-o);

	741 } else if(o<=99) {

	742 *out++=(d=o/10)==0 ? ' ' : (char)('0'+d);

	743 *out++=(char)('0'+o%10);

	744 } else /* o>99 */ {

	745 *out++='x';

	746 *out++='x';

	747 }

	748 }

	749 *out++=0;

	750 return out;

	751 }

	752

	753 // toUnicode test worker functions ----------------------------------------- ***

	754

	755 static int32_t

	756 stepToUnicode(ConversionCase &cc, UConverter *cnv,

	757 UChar *result, int32_t resultCapacity,

	758 int32_t resultOffsets, / also resultCapacity */

	759 int32_t step,

	760 UErrorCode *pErrorCode) {

	761 const char source, sourceLimit, *bytesLimit;

	762 UChar target, targetLimit, *resultLimit;

	763 UBool flush;

	764

	765 source=(const char *)cc.bytes;

	766 target=result;

	767 bytesLimit=source+cc.bytesLength;

	768 resultLimit=result+resultCapacity;

	769

	770 if(step>=0) {

	771 // call ucnv_toUnicode() with in/out buffers no larger than (step) at a time

	772 // move only one buffer (in vs. out) at a time to be extra mean

	773 // step==0 performs bulk conversion and generates offsets

	774

	775 // initialize the partial limits for the loop

	776 if(step==0) {

	777 // use the entire buffers

	778 sourceLimit=bytesLimit;

	779 targetLimit=resultLimit;

	780 flush=cc.finalFlush;

	781 } else {

	782 // start with empty partial buffers

	783 sourceLimit=source;

	784 targetLimit=target;

	785 flush=FALSE;

	786

	787 // output offsets only for bulk conversion

	788 resultOffsets=NULL;

	789 }

	790

	791 for(;;) {

	792 // resetting the opposite conversion direction must not affect this one

	793 ucnv_resetFromUnicode(cnv);

	794

	795 // convert

	796 ucnv_toUnicode(cnv,

	797 &target, targetLimit,

	798 &source, sourceLimit,

	799 resultOffsets,

	800 flush, pErrorCode);

	801

	802 // check pointers and errors

	803 if(source>sourceLimit \|\| target>targetLimit) {

	804 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

	805 break;

	806 } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

	807 if(target!=targetLimit) {

	808 // buffer overflow must only be set when the target is fille d

	809 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

	810 break;

	811 } else if(targetLimit==resultLimit) {

	812 // not just a partial overflow

	813 break;

	814 }

	815

	816 // the partial target is filled, set a new limit, reset the erro r and continue

	817 targetLimit=(resultLimit-target)>=step ? target+step : resultLim it;

	818 *pErrorCode=U_ZERO_ERROR;

	819 } else if(U_FAILURE(*pErrorCode)) {

	820 // some other error occurred, done

	821 break;

	822 } else {

	823 if(source!=sourceLimit) {

	824 // when no error occurs, then the input must be consumed

	825 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

	826 break;

	827 }

	828

	829 if(sourceLimit==bytesLimit) {

	830 // we are done

	831 break;

	832 }

	833

	834 // the partial conversion succeeded, set a new limit and continu e

	835 sourceLimit=(bytesLimit-source)>=step ? source+step : bytesLimit ;

	836 flush=(UBool)(cc.finalFlush && sourceLimit==bytesLimit);

	837 }

	838 }

	839 } else /* step<0 */ {

	840 /*

	841 * step==-1: call only ucnv_getNextUChar()

	842 * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar()

	843 * if step==-2 or -3, then give ucnv_toUnicode() the whole remaining i nput,

	844 * else give it at most (-step-2)/2 bytes

	845 */

	846 UChar32 c;

	847

	848 // end the loop by getting an index out of bounds error

	849 for(;;) {

	850 // resetting the opposite conversion direction must not affect this one

	851 ucnv_resetFromUnicode(cnv);

	852

	853 // convert

	854 if((step&1)!=0 /* odd: -1, -3, -5, ... */) {

	855 sourceLimit=source; // use sourceLimit not as a real limit

	856 // but to remember the pre-getNextUChar sour ce pointer

	857 c=ucnv_getNextUChar(cnv, &source, bytesLimit, pErrorCode);

	858

	859 // check pointers and errors

	860 if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) {

	861 if(source!=bytesLimit) {

	862 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

	863 } else {

	864 *pErrorCode=U_ZERO_ERROR;

	865 }

	866 break;

	867 } else if(U_FAILURE(*pErrorCode)) {

	868 break;

	869 }

	870 // source may not move if c is from previous overflow

	871

	872 if(target==resultLimit) {

	873 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	874 break;

	875 }

	876 if(c<=0xffff) {

	877 *target++=(UChar)c;

	878 } else {

	879 *target++=U16_LEAD(c);

	880 if(target==resultLimit) {

	881 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	882 break;

	883 }

	884 *target++=U16_TRAIL(c);

	885 }

	886

	887 // alternate between -n-1 and -n but leave -1 alone

	888 if(step<-1) {

	889 ++step;

	890 }

	891 } else /* step is even */ {

	892 // allow only one UChar output

	893 targetLimit=target<resultLimit ? target+1 : resultLimit;

	894

	895 // as with ucnv_getNextUChar(), we always flush (if we go to byt esLimit)

	896 // and never output offsets

	897 if(step==-2) {

	898 sourceLimit=bytesLimit;

	899 } else {

	900 sourceLimit=source+(-step-2)/2;

	901 if(sourceLimit>bytesLimit) {

	902 sourceLimit=bytesLimit;

	903 }

	904 }

	905

	906 ucnv_toUnicode(cnv,

	907 &target, targetLimit,

	908 &source, sourceLimit,

	909 NULL, (UBool)(sourceLimit==bytesLimit), pErrorCode);

	910

	911 // check pointers and errors

	912 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

	913 if(target!=targetLimit) {

	914 // buffer overflow must only be set when the target is f illed

	915 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

	916 break;

	917 } else if(targetLimit==resultLimit) {

	918 // not just a partial overflow

	919 break;

	920 }

	921

	922 // the partial target is filled, set a new limit and continu e

	923 *pErrorCode=U_ZERO_ERROR;

	924 } else if(U_FAILURE(*pErrorCode)) {

	925 // some other error occurred, done

	926 break;

	927 } else {

	928 if(source!=sourceLimit) {

	929 // when no error occurs, then the input must be consumed

	930 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

	931 break;

	932 }

	933

	934 // we are done (flush==TRUE) but we continue, to get the ind ex out of bounds error above

	935 }

	936

	937 --step;

	938 }

	939 }

	940 }

	941

	942 return (int32_t)(target-result);

	943 }

	944

	945 UBool

	946 ConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback , const char *option) {

	947 // open the converter

	948 IcuTestErrorCode errorCode(*this, "ToUnicodeCase");

	949 LocalUConverterPointer cnv(cnv_open(cc.charset, errorCode));

	950 if(errorCode.isFailure()) {

	951 errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_o pen() failed - %s",

	952 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, er rorCode.errorName());

	953 errorCode.reset();

	954 return FALSE;

	955 }

	956

	957 // set the callback

	958 if(callback!=NULL) {

	959 ucnv_setToUCallBack(cnv.getAlias(), callback, option, NULL, NULL, errorC ode);

	960 if(U_FAILURE(errorCode)) {

	961 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBac k() failed - %s",

	962 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush , u_errorName(errorCode));

	963 return FALSE;

	964 }

	965 }

	966

	967 int32_t resultOffsets[256];

	968 UChar result[256];

	969 int32_t resultLength;

	970 UBool ok;

	971

	972 static const struct {

	973 int32_t step;

	974 const char *name;

	975 } steps[]={

	976 { 0, "bulk" }, // must be first for offsets to be checked

	977 { 1, "step=1" },

	978 { 3, "step=3" },

	979 { 7, "step=7" },

	980 { -1, "getNext" },

	981 { -2, "toU(bulk)+getNext" },

	982 { -3, "getNext+toU(bulk)" },

	983 { -4, "toU(1)+getNext" },

	984 { -5, "getNext+toU(1)" },

	985 { -12, "toU(5)+getNext" },

	986 { -13, "getNext+toU(5)" },

	987 };

	988 int32_t i, step;

	989

	990 ok=TRUE;

	991 for(i=0; i<LENGTHOF(steps) && ok; ++i) {

	992 step=steps[i].step;

	993 if(step<0 && !cc.finalFlush) {

	994 // skip ucnv_getNextUChar() if !finalFlush because

	995 // ucnv_getNextUChar() always implies flush

	996 continue;

	997 }

	998 if(step!=0) {

	999 // bulk test is first, then offsets are not checked any more

	1000 cc.offsets=NULL;

	1001 }

	1002 else {

	1003 memset(resultOffsets, -1, LENGTHOF(resultOffsets));

	1004 }

	1005 memset(result, -1, LENGTHOF(result));

	1006 errorCode.reset();

	1007 resultLength=stepToUnicode(cc, cnv.getAlias(),

	1008 result, LENGTHOF(result),

	1009 step==0 ? resultOffsets : NULL,

	1010 step, errorCode);

	1011 ok=checkToUnicode(

	1012 cc, cnv.getAlias(), steps[i].name,

	1013 result, resultLength,

	1014 cc.offsets!=NULL ? resultOffsets : NULL,

	1015 errorCode);

	1016 if(errorCode.isFailure() \|\| !cc.finalFlush) {

	1017 // reset if an error occurred or we did not flush

	1018 // otherwise do nothing to make sure that flushing resets

	1019 ucnv_resetToUnicode(cnv.getAlias());

	1020 }

	1021 if (cc.offsets != NULL && resultOffsets[resultLength] != -1) {

	1022 errln("toUnicode[%d](%s) Conversion wrote too much to offsets at ind ex %d",

	1023 cc.caseNr, cc.charset, resultLength);

	1024 }

	1025 if (result[resultLength] != (UChar)-1) {

	1026 errln("toUnicode[%d](%s) Conversion wrote too much to result at inde x %d",

	1027 cc.caseNr, cc.charset, resultLength);

	1028 }

	1029 }

	1030

	1031 // not a real loop, just a convenience for breaking out of the block

	1032 while(ok && cc.finalFlush) {

	1033 // test ucnv_toUChars()

	1034 memset(result, 0, sizeof(result));

	1035

	1036 errorCode.reset();

	1037 resultLength=ucnv_toUChars(cnv.getAlias(),

	1038 result, LENGTHOF(result),

	1039 (const char *)cc.bytes, cc.bytesLength,

	1040 errorCode);

	1041 ok=checkToUnicode(

	1042 cc, cnv.getAlias(), "toUChars",

	1043 result, resultLength,

	1044 NULL,

	1045 errorCode);

	1046 if(!ok) {

	1047 break;

	1048 }

	1049

	1050 // test preflighting

	1051 // keep the correct result for simple checking

	1052 errorCode.reset();

	1053 resultLength=ucnv_toUChars(cnv.getAlias(),

	1054 NULL, 0,

	1055 (const char *)cc.bytes, cc.bytesLength,

	1056 errorCode);

	1057 if(errorCode.get()==U_STRING_NOT_TERMINATED_WARNING \|\| errorCode.get()== U_BUFFER_OVERFLOW_ERROR) {

	1058 errorCode.reset();

	1059 }

	1060 ok=checkToUnicode(

	1061 cc, cnv.getAlias(), "preflight toUChars",

	1062 result, resultLength,

	1063 NULL,

	1064 errorCode);

	1065 break;

	1066 }

	1067

	1068 errorCode.reset(); // all errors have already been reported

	1069 return ok;

	1070 }

	1071

	1072 UBool

	1073 ConversionTest::checkToUnicode(ConversionCase &cc, UConverter cnv, const char name,

	1074 const UChar *result, int32_t resultLength,

	1075 const int32_t *resultOffsets,

	1076 UErrorCode resultErrorCode) {

	1077 char resultInvalidChars[8];

	1078 int8_t resultInvalidLength;

	1079 UErrorCode errorCode;

	1080

	1081 const char *msg;

	1082

	1083 // reset the message; NULL will mean "ok"

	1084 msg=NULL;

	1085

	1086 errorCode=U_ZERO_ERROR;

	1087 resultInvalidLength=sizeof(resultInvalidChars);

	1088 ucnv_getInvalidChars(cnv, resultInvalidChars, &resultInvalidLength, &errorCo de);

	1089 if(U_FAILURE(errorCode)) {

	1090 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidChar s() failed - %s",

	1091 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, na me, u_errorName(errorCode));

	1092 return FALSE;

	1093 }

	1094

	1095 // check everything that might have gone wrong

	1096 if(cc.unicodeLength!=resultLength) {

	1097 msg="wrong result length";

	1098 } else if(0!=u_memcmp(cc.unicode, result, cc.unicodeLength)) {

	1099 msg="wrong result string";

	1100 } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.unicod eLengthsizeof(cc.offsets))) {

	1101 msg="wrong offsets";

	1102 } else if(cc.outErrorCode!=resultErrorCode) {

	1103 msg="wrong error code";

	1104 } else if(cc.invalidLength!=resultInvalidLength) {

	1105 msg="wrong length of last invalid input";

	1106 } else if(0!=memcmp(cc.invalidChars, resultInvalidChars, cc.invalidLength)) {

	1107 msg="wrong last invalid input";

	1108 }

	1109

	1110 if(msg==NULL) {

	1111 return TRUE;

	1112 } else {

	1113 char buffer[2000]; // one buffer for all strings

	1114 char s, bytesString, unicodeString, resultString,

	1115 offsetsString, resultOffsetsString,

	1116 invalidCharsString, resultInvalidCharsString;

	1117

	1118 bytesString=s=buffer;

	1119 s=printBytes(cc.bytes, cc.bytesLength, bytesString);

	1120 s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString=s);

	1121 s=printUnicode(result, resultLength, resultString=s);

	1122 s=printOffsets(cc.offsets, cc.unicodeLength, offsetsString=s);

	1123 s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);

	1124 s=printBytes(cc.invalidChars, cc.invalidLength, invalidCharsString=s);

	1125 s=printBytes((uint8_t *)resultInvalidChars, resultInvalidLength, resultI nvalidCharsString=s);

	1126

	1127 if((s-buffer)>(int32_t)sizeof(buffer)) {

	1128 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: ch eckToUnicode() test output buffer overflow writing %d chars\n",

	1129 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush , name, (int)(s-buffer));

	1130 exit(1);

	1131 }

	1132

	1133 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"

	1134 " bytes <%s>[%d]\n"

	1135 " expected <%s>[%d]\n"

	1136 " result <%s>[%d]\n"

	1137 " offsets <%s>\n"

	1138 " result offsets <%s>\n"

	1139 " error code expected %s got %s\n"

	1140 " invalidChars expected <%s> got <%s>\n",

	1141 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name , msg,

	1142 bytesString, cc.bytesLength,

	1143 unicodeString, cc.unicodeLength,

	1144 resultString, resultLength,

	1145 offsetsString,

	1146 resultOffsetsString,

	1147 u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),

	1148 invalidCharsString, resultInvalidCharsString);

	1149

	1150 return FALSE;

	1151 }

	1152 }

	1153

	1154 // fromUnicode test worker functions --------------------------------------- ***

	1155

	1156 static int32_t

	1157 stepFromUTF8(ConversionCase &cc,

	1158 UConverter utf8Cnv, UConverter cnv,

	1159 char *result, int32_t resultCapacity,

	1160 int32_t step,

	1161 UErrorCode *pErrorCode) {

	1162 const char source, sourceLimit, *utf8Limit;

	1163 UChar pivotBuffer[32];

	1164 UChar pivotSource, pivotTarget, *pivotLimit;

	1165 char target, targetLimit, *resultLimit;

	1166 UBool flush;

	1167

	1168 source=cc.utf8;

	1169 pivotSource=pivotTarget=pivotBuffer;

	1170 target=result;

	1171 utf8Limit=source+cc.utf8Length;

	1172 resultLimit=result+resultCapacity;

	1173

	1174 // call ucnv_convertEx() with in/out buffers no larger than (step) at a time

	1175 // move only one buffer (in vs. out) at a time to be extra mean

	1176 // step==0 performs bulk conversion

	1177

	1178 // initialize the partial limits for the loop

	1179 if(step==0) {

	1180 // use the entire buffers

	1181 sourceLimit=utf8Limit;

	1182 targetLimit=resultLimit;

	1183 flush=cc.finalFlush;

	1184

	1185 pivotLimit=pivotBuffer+LENGTHOF(pivotBuffer);

	1186 } else {

	1187 // start with empty partial buffers

	1188 sourceLimit=source;

	1189 targetLimit=target;

	1190 flush=FALSE;

	1191

	1192 // empty pivot is not allowed, make it of length step

	1193 pivotLimit=pivotBuffer+step;

	1194 }

	1195

	1196 for(;;) {

	1197 // resetting the opposite conversion direction must not affect this one

	1198 ucnv_resetFromUnicode(utf8Cnv);

	1199 ucnv_resetToUnicode(cnv);

	1200

	1201 // convert

	1202 ucnv_convertEx(cnv, utf8Cnv,

	1203 &target, targetLimit,

	1204 &source, sourceLimit,

	1205 pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,

	1206 FALSE, flush, pErrorCode);

	1207

	1208 // check pointers and errors

	1209 if(source>sourceLimit \|\| target>targetLimit) {

	1210 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

	1211 break;

	1212 } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

	1213 if(target!=targetLimit) {

	1214 // buffer overflow must only be set when the target is filled

	1215 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

	1216 break;

	1217 } else if(targetLimit==resultLimit) {

	1218 // not just a partial overflow

	1219 break;

	1220 }

	1221

	1222 // the partial target is filled, set a new limit, reset the error an d continue

	1223 targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;

	1224 *pErrorCode=U_ZERO_ERROR;

	1225 } else if(U_FAILURE(*pErrorCode)) {

	1226 if(pivotSource==pivotBuffer) {

	1227 // toUnicode error, should not occur

	1228 // toUnicode errors are tested in cintltst TestConvertExFromUTF8 ()

	1229 break;

	1230 } else {

	1231 // fromUnicode error

	1232 // some other error occurred, done

	1233 break;

	1234 }

	1235 } else {

	1236 if(source!=sourceLimit) {

	1237 // when no error occurs, then the input must be consumed

	1238 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

	1239 break;

	1240 }

	1241

	1242 if(sourceLimit==utf8Limit) {

	1243 // we are done

	1244 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {

	1245 // ucnv_convertEx() warns about not terminating the output

	1246 // but ucnv_fromUnicode() does not and so

	1247 // checkFromUnicode() does not expect it

	1248 *pErrorCode=U_ZERO_ERROR;

	1249 }

	1250 break;

	1251 }

	1252

	1253 // the partial conversion succeeded, set a new limit and continue

	1254 sourceLimit=(utf8Limit-source)>=step ? source+step : utf8Limit;

	1255 flush=(UBool)(cc.finalFlush && sourceLimit==utf8Limit);

	1256 }

	1257 }

	1258

	1259 return (int32_t)(target-result);

	1260 }

	1261

	1262 static int32_t

	1263 stepFromUnicode(ConversionCase &cc, UConverter *cnv,

	1264 char *result, int32_t resultCapacity,

	1265 int32_t resultOffsets, / also resultCapacity */

	1266 int32_t step,

	1267 UErrorCode *pErrorCode) {

	1268 const UChar source, sourceLimit, *unicodeLimit;

	1269 char target, targetLimit, *resultLimit;

	1270 UBool flush;

	1271

	1272 source=cc.unicode;

	1273 target=result;

	1274 unicodeLimit=source+cc.unicodeLength;

	1275 resultLimit=result+resultCapacity;

	1276

	1277 // call ucnv_fromUnicode() with in/out buffers no larger than (step) at a ti me

	1278 // move only one buffer (in vs. out) at a time to be extra mean

	1279 // step==0 performs bulk conversion and generates offsets

	1280

	1281 // initialize the partial limits for the loop

	1282 if(step==0) {

	1283 // use the entire buffers

	1284 sourceLimit=unicodeLimit;

	1285 targetLimit=resultLimit;

	1286 flush=cc.finalFlush;

	1287 } else {

	1288 // start with empty partial buffers

	1289 sourceLimit=source;

	1290 targetLimit=target;

	1291 flush=FALSE;

	1292

	1293 // output offsets only for bulk conversion

	1294 resultOffsets=NULL;

	1295 }

	1296

	1297 for(;;) {

	1298 // resetting the opposite conversion direction must not affect this one

	1299 ucnv_resetToUnicode(cnv);

	1300

	1301 // convert

	1302 ucnv_fromUnicode(cnv,

	1303 &target, targetLimit,

	1304 &source, sourceLimit,

	1305 resultOffsets,

	1306 flush, pErrorCode);

	1307

	1308 // check pointers and errors

	1309 if(source>sourceLimit \|\| target>targetLimit) {

	1310 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

	1311 break;

	1312 } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

	1313 if(target!=targetLimit) {

	1314 // buffer overflow must only be set when the target is filled

	1315 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

	1316 break;

	1317 } else if(targetLimit==resultLimit) {

	1318 // not just a partial overflow

	1319 break;

	1320 }

	1321

	1322 // the partial target is filled, set a new limit, reset the error an d continue

	1323 targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;

	1324 *pErrorCode=U_ZERO_ERROR;

	1325 } else if(U_FAILURE(*pErrorCode)) {

	1326 // some other error occurred, done

	1327 break;

	1328 } else {

	1329 if(source!=sourceLimit) {

	1330 // when no error occurs, then the input must be consumed

	1331 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

	1332 break;

	1333 }

	1334

	1335 if(sourceLimit==unicodeLimit) {

	1336 // we are done

	1337 break;

	1338 }

	1339

	1340 // the partial conversion succeeded, set a new limit and continue

	1341 sourceLimit=(unicodeLimit-source)>=step ? source+step : unicodeLimit ;

	1342 flush=(UBool)(cc.finalFlush && sourceLimit==unicodeLimit);

	1343 }

	1344 }

	1345

	1346 return (int32_t)(target-result);

	1347 }

	1348

	1349 UBool

	1350 ConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback call back, const char *option) {

	1351 UConverter *cnv;

	1352 UErrorCode errorCode;

	1353

	1354 // open the converter

	1355 errorCode=U_ZERO_ERROR;

	1356 cnv=cnv_open(cc.charset, errorCode);

	1357 if(U_FAILURE(errorCode)) {

	1358 errcheckln(errorCode, "fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv _open() failed - %s",

	1359 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_ errorName(errorCode));

	1360 return FALSE;

	1361 }

	1362 ucnv_resetToUnicode(utf8Cnv);

	1363

	1364 // set the callback

	1365 if(callback!=NULL) {

	1366 ucnv_setFromUCallBack(cnv, callback, option, NULL, NULL, &errorCode);

	1367 if(U_FAILURE(errorCode)) {

	1368 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCal lBack() failed - %s",

	1369 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush , u_errorName(errorCode));

	1370 ucnv_close(cnv);

	1371 return FALSE;

	1372 }

	1373 }

	1374

	1375 // set the fallbacks flag

	1376 // TODO change with Jitterbug 2401, then add a similar call for toUnicode to o

	1377 ucnv_setFallback(cnv, cc.fallbacks);

	1378

	1379 // set the subchar

	1380 int32_t length;

	1381

	1382 if(cc.setSub>0) {

	1383 length=(int32_t)strlen(cc.subchar);

	1384 ucnv_setSubstChars(cnv, cc.subchar, (int8_t)length, &errorCode);

	1385 if(U_FAILURE(errorCode)) {

	1386 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstCha rs() failed - %s",

	1387 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush , u_errorName(errorCode));

	1388 ucnv_close(cnv);

	1389 return FALSE;

	1390 }

	1391 } else if(cc.setSub<0) {

	1392 ucnv_setSubstString(cnv, cc.subString, -1, &errorCode);

	1393 if(U_FAILURE(errorCode)) {

	1394 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstStr ing() failed - %s",

	1395 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush , u_errorName(errorCode));

	1396 ucnv_close(cnv);

	1397 return FALSE;

	1398 }

	1399 }

	1400

	1401 // convert unicode to utf8

	1402 char utf8[256];

	1403 cc.utf8=utf8;

	1404 u_strToUTF8(utf8, LENGTHOF(utf8), &cc.utf8Length,

	1405 cc.unicode, cc.unicodeLength,

	1406 &errorCode);

	1407 if(U_FAILURE(errorCode)) {

	1408 // skip UTF-8 testing of a string with an unpaired surrogate,

	1409 // or of one that's too long

	1410 // toUnicode errors are tested in cintltst TestConvertExFromUTF8()

	1411 cc.utf8Length=-1;

	1412 }

	1413

	1414 int32_t resultOffsets[256];

	1415 char result[256];

	1416 int32_t resultLength;

	1417 UBool ok;

	1418

	1419 static const struct {

	1420 int32_t step;

	1421 const char name, utf8Name;

	1422 } steps[]={

	1423 { 0, "bulk", "utf8" }, // must be first for offsets to be checked

	1424 { 1, "step=1", "utf8 step=1" },

	1425 { 3, "step=3", "utf8 step=3" },

	1426 { 7, "step=7", "utf8 step=7" }

	1427 };

	1428 int32_t i, step;

	1429

	1430 ok=TRUE;

	1431 for(i=0; i<LENGTHOF(steps) && ok; ++i) {

	1432 step=steps[i].step;

	1433 memset(resultOffsets, -1, LENGTHOF(resultOffsets));

	1434 memset(result, -1, LENGTHOF(result));

	1435 errorCode=U_ZERO_ERROR;

	1436 resultLength=stepFromUnicode(cc, cnv,

	1437 result, LENGTHOF(result),

	1438 step==0 ? resultOffsets : NULL,

	1439 step, &errorCode);

	1440 ok=checkFromUnicode(

	1441 cc, cnv, steps[i].name,

	1442 (uint8_t *)result, resultLength,

	1443 cc.offsets!=NULL ? resultOffsets : NULL,

	1444 errorCode);

	1445 if(U_FAILURE(errorCode) \|\| !cc.finalFlush) {

	1446 // reset if an error occurred or we did not flush

	1447 // otherwise do nothing to make sure that flushing resets

	1448 ucnv_resetFromUnicode(cnv);

	1449 }

	1450 if (resultOffsets[resultLength] != -1) {

	1451 errln("fromUnicode[%d](%s) Conversion wrote too much to offsets at i ndex %d",

	1452 cc.caseNr, cc.charset, resultLength);

	1453 }

	1454 if (result[resultLength] != (char)-1) {

	1455 errln("fromUnicode[%d](%s) Conversion wrote too much to result at in dex %d",

	1456 cc.caseNr, cc.charset, resultLength);

	1457 }

	1458

	1459 // bulk test is first, then offsets are not checked any more

	1460 cc.offsets=NULL;

	1461

	1462 // test direct conversion from UTF-8

	1463 if(cc.utf8Length>=0) {

	1464 errorCode=U_ZERO_ERROR;

	1465 resultLength=stepFromUTF8(cc, utf8Cnv, cnv,

	1466 result, LENGTHOF(result),

	1467 step, &errorCode);

	1468 ok=checkFromUnicode(

	1469 cc, cnv, steps[i].utf8Name,

	1470 (uint8_t *)result, resultLength,

	1471 NULL,

	1472 errorCode);

	1473 if(U_FAILURE(errorCode) \|\| !cc.finalFlush) {

	1474 // reset if an error occurred or we did not flush

	1475 // otherwise do nothing to make sure that flushing resets

	1476 ucnv_resetToUnicode(utf8Cnv);

	1477 ucnv_resetFromUnicode(cnv);

	1478 }

	1479 }

	1480 }

	1481

	1482 // not a real loop, just a convenience for breaking out of the block

	1483 while(ok && cc.finalFlush) {

	1484 // test ucnv_fromUChars()

	1485 memset(result, 0, sizeof(result));

	1486

	1487 errorCode=U_ZERO_ERROR;

	1488 resultLength=ucnv_fromUChars(cnv,

	1489 result, LENGTHOF(result),

	1490 cc.unicode, cc.unicodeLength,

	1491 &errorCode);

	1492 ok=checkFromUnicode(

	1493 cc, cnv, "fromUChars",

	1494 (uint8_t *)result, resultLength,

	1495 NULL,

	1496 errorCode);

	1497 if(!ok) {

	1498 break;

	1499 }

	1500

	1501 // test preflighting

	1502 // keep the correct result for simple checking

	1503 errorCode=U_ZERO_ERROR;

	1504 resultLength=ucnv_fromUChars(cnv,

	1505 NULL, 0,

	1506 cc.unicode, cc.unicodeLength,

	1507 &errorCode);

	1508 if(errorCode==U_STRING_NOT_TERMINATED_WARNING \|\| errorCode==U_BUFFER_OVE RFLOW_ERROR) {

	1509 errorCode=U_ZERO_ERROR;

	1510 }

	1511 ok=checkFromUnicode(

	1512 cc, cnv, "preflight fromUChars",

	1513 (uint8_t *)result, resultLength,

	1514 NULL,

	1515 errorCode);

	1516 break;

	1517 }

	1518

	1519 ucnv_close(cnv);

	1520 return ok;

	1521 }

	1522

	1523 UBool

	1524 ConversionTest::checkFromUnicode(ConversionCase &cc, UConverter cnv, const char name,

	1525 const uint8_t *result, int32_t resultLength,

	1526 const int32_t *resultOffsets,

	1527 UErrorCode resultErrorCode) {

	1528 UChar resultInvalidUChars[8];

	1529 int8_t resultInvalidLength;

	1530 UErrorCode errorCode;

	1531

	1532 const char *msg;

	1533

	1534 // reset the message; NULL will mean "ok"

	1535 msg=NULL;

	1536

	1537 errorCode=U_ZERO_ERROR;

	1538 resultInvalidLength=LENGTHOF(resultInvalidUChars);

	1539 ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &error Code);

	1540 if(U_FAILURE(errorCode)) {

	1541 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUC hars() failed - %s",

	1542 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, na me, u_errorName(errorCode));

	1543 return FALSE;

	1544 }

	1545

	1546 // check everything that might have gone wrong

	1547 if(cc.bytesLength!=resultLength) {

	1548 msg="wrong result length";

	1549 } else if(0!=memcmp(cc.bytes, result, cc.bytesLength)) {

	1550 msg="wrong result string";

	1551 } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.bytesL engthsizeof(cc.offsets))) {

	1552 msg="wrong offsets";

	1553 } else if(cc.outErrorCode!=resultErrorCode) {

	1554 msg="wrong error code";

	1555 } else if(cc.invalidLength!=resultInvalidLength) {

	1556 msg="wrong length of last invalid input";

	1557 } else if(0!=u_memcmp(cc.invalidUChars, resultInvalidUChars, cc.invalidLengt h)) {

	1558 msg="wrong last invalid input";

	1559 }

	1560

	1561 if(msg==NULL) {

	1562 return TRUE;

	1563 } else {

	1564 char buffer[2000]; // one buffer for all strings

	1565 char s, unicodeString, bytesString, resultString,

	1566 offsetsString, resultOffsetsString,

	1567 invalidCharsString, resultInvalidUCharsString;

	1568

	1569 unicodeString=s=buffer;

	1570 s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString);

	1571 s=printBytes(cc.bytes, cc.bytesLength, bytesString=s);

	1572 s=printBytes(result, resultLength, resultString=s);

	1573 s=printOffsets(cc.offsets, cc.bytesLength, offsetsString=s);

	1574 s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);

	1575 s=printUnicode(cc.invalidUChars, cc.invalidLength, invalidCharsString=s) ;

	1576 s=printUnicode(resultInvalidUChars, resultInvalidLength, resultInvalidUC harsString=s);

	1577

	1578 if((s-buffer)>(int32_t)sizeof(buffer)) {

	1579 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkFromUnicode() test output buffer overflow writing %d chars\n",

	1580 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush , name, (int)(s-buffer));

	1581 exit(1);

	1582 }

	1583

	1584 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"

	1585 " unicode <%s>[%d]\n"

	1586 " expected <%s>[%d]\n"

	1587 " result <%s>[%d]\n"

	1588 " offsets <%s>\n"

	1589 " result offsets <%s>\n"

	1590 " error code expected %s got %s\n"

	1591 " invalidChars expected <%s> got <%s>\n",

	1592 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name , msg,

	1593 unicodeString, cc.unicodeLength,

	1594 bytesString, cc.bytesLength,

	1595 resultString, resultLength,

	1596 offsetsString,

	1597 resultOffsetsString,

	1598 u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),

	1599 invalidCharsString, resultInvalidUCharsString);

	1600

	1601 return FALSE;

	1602 }

	1603 }

	1604

	1605 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

OLD	NEW

« no previous file with comments | « icu46/source/test/intltest/convtest.h ('k') | icu46/source/test/intltest/cpdtrtst.h » ('j') | no next file with comments »