icu46/source/test/intltest/convtest.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Unified Diff: icu46/source/test/intltest/convtest.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu46/source/test/intltest/convtest.cpp

===================================================================

--- icu46/source/test/intltest/convtest.cpp (revision 0)

+++ icu46/source/test/intltest/convtest.cpp (revision 0)

@@ -0,0 +1,1605 @@

+/*

+*******************************************************************************

+* file name: convtest.cpp

+* encoding: US-ASCII

+* tab size: 8 (not used)

+* indentation:4

+* created on: 2003jul15

+* created by: Markus W. Scherer

+* Test file for data-driven conversion tests.

+*/

+#include "unicode/utypes.h"

+#if !UCONFIG_NO_LEGACY_CONVERSION

+/*

+ * Note: Turning off all of convtest.cpp if !UCONFIG_NO_LEGACY_CONVERSION

+ * is slightly unnecessary - it removes tests for Unicode charsets

+ * like UTF-8 that should work.

+ * However, there is no easy way for the test to detect whether a test case

+ * is for a Unicode charset, so it would be difficult to only exclude those.

+ * Also, regular testing of ICU is done with all modules on, therefore

+ * not testing conversion for a custom configuration like this should be ok.

+ */

+#include "unicode/ucnv.h"

+#include "unicode/unistr.h"

+#include "unicode/parsepos.h"

+#include "unicode/uniset.h"

+#include "unicode/ustring.h"

+#include "unicode/ures.h"

+#include "convtest.h"

+#include "unicode/tstdtmod.h"

+#include <string.h>

+#include <stdlib.h>

+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))

+enum {

+ // characters used in test data for callbacks

+ SUB_CB='?',

+ SKIP_CB='0',

+ STOP_CB='.',

+ ESC_CB='&'

+};

+ConversionTest::ConversionTest() {

+ UErrorCode errorCode=U_ZERO_ERROR;

+ utf8Cnv=ucnv_open("UTF-8", &errorCode);

+ ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);

+ if(U_FAILURE(errorCode)) {

+ errln("unable to open UTF-8 converter");

+ }

+ConversionTest::~ConversionTest() {

+ ucnv_close(utf8Cnv);

+void

+ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {

+ if (exec) logln("TestSuite ConversionTest: ");

+ switch (index) {

+#if !UCONFIG_NO_FILE_IO

+ case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break;

+ case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break;

+ case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break;

+#else

+ case 0:

+ case 1:

+ case 2: name="skip"; break;

+#endif

+ case 3: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break;

+ default: name=""; break; //needed to end loop

+ }

+// test data interface ----------------------------------------------------- ***

+void

+ConversionTest::TestToUnicode() {

+ ConversionCase cc;

+ char charset[100], cbopt[4];

+ const char *option;

+ UnicodeString s, unicode;

+ int32_t offsetsLength;

+ UConverterToUCallback callback;

+ TestDataModule *dataModule;

+ TestData *testData;

+ const DataMap *testCase;

+ UErrorCode errorCode;

+ int32_t i;

+ errorCode=U_ZERO_ERROR;

+ dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);

+ if(U_SUCCESS(errorCode)) {

+ testData=dataModule->createTestData("toUnicode", errorCode);

+ if(U_SUCCESS(errorCode)) {

+ for(i=0; testData->nextCase(testCase, errorCode); ++i) {

+ if(U_FAILURE(errorCode)) {

+ errln("error retrieving conversion/toUnicode test case %d - %s",

+ i, u_errorName(errorCode));

+ errorCode=U_ZERO_ERROR;

+ continue;

+ }

+ cc.caseNr=i;

+ s=testCase->getString("charset", errorCode);

+ s.extract(0, 0x7fffffff, charset, sizeof(charset), "");

+ cc.charset=charset;

+ cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode);

+ unicode=testCase->getString("unicode", errorCode);

+ cc.unicode=unicode.getBuffer();

+ cc.unicodeLength=unicode.length();

+ offsetsLength=0;

+ cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode);

+ if(offsetsLength==0) {

+ cc.offsets=NULL;

+ } else if(offsetsLength!=unicode.length()) {

+ errln("toUnicode[%d] unicode[%d] and offsets[%d] must have the same length",

+ i, unicode.length(), offsetsLength);

+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ }

+ cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);

+ cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);

+ s=testCase->getString("errorCode", errorCode);

+ if(s==UNICODE_STRING("invalid", 7)) {

+ cc.outErrorCode=U_INVALID_CHAR_FOUND;

+ } else if(s==UNICODE_STRING("illegal", 7)) {

+ cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;

+ } else if(s==UNICODE_STRING("truncated", 9)) {

+ cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;

+ } else if(s==UNICODE_STRING("illesc", 6)) {

+ cc.outErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE;

+ } else if(s==UNICODE_STRING("unsuppesc", 9)) {

+ cc.outErrorCode=U_UNSUPPORTED_ESCAPE_SEQUENCE;

+ } else {

+ cc.outErrorCode=U_ZERO_ERROR;

+ }

+ s=testCase->getString("callback", errorCode);

+ s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");

+ cc.cbopt=cbopt;

+ switch(cbopt[0]) {

+ case SUB_CB:

+ callback=UCNV_TO_U_CALLBACK_SUBSTITUTE;

+ break;

+ case SKIP_CB:

+ callback=UCNV_TO_U_CALLBACK_SKIP;

+ break;

+ case STOP_CB:

+ callback=UCNV_TO_U_CALLBACK_STOP;

+ break;

+ case ESC_CB:

+ callback=UCNV_TO_U_CALLBACK_ESCAPE;

+ break;

+ default:

+ callback=NULL;

+ break;

+ }

+ option=callback==NULL ? cbopt : cbopt+1;

+ if(*option==0) {

+ option=NULL;

+ }

+ cc.invalidChars=testCase->getBinary(cc.invalidLength, "invalidChars", errorCode);

+ if(U_FAILURE(errorCode)) {

+ errln("error parsing conversion/toUnicode test case %d - %s",

+ i, u_errorName(errorCode));

+ errorCode=U_ZERO_ERROR;

+ } else {

+ logln("TestToUnicode[%d] %s", i, charset);

+ ToUnicodeCase(cc, callback, option);

+ }

+ delete testData;

+ }

+ delete dataModule;

+ }

+ else {

+ dataerrln("Could not load test conversion data");

+ }

+void

+ConversionTest::TestFromUnicode() {

+ ConversionCase cc;

+ char charset[100], cbopt[4];

+ const char *option;

+ UnicodeString s, unicode, invalidUChars;

+ int32_t offsetsLength, index;

+ UConverterFromUCallback callback;

+ TestDataModule *dataModule;

+ TestData *testData;

+ const DataMap *testCase;

+ const UChar *p;

+ UErrorCode errorCode;

+ int32_t i, length;

+ errorCode=U_ZERO_ERROR;

+ dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);

+ if(U_SUCCESS(errorCode)) {

+ testData=dataModule->createTestData("fromUnicode", errorCode);

+ if(U_SUCCESS(errorCode)) {

+ for(i=0; testData->nextCase(testCase, errorCode); ++i) {

+ if(U_FAILURE(errorCode)) {

+ errln("error retrieving conversion/fromUnicode test case %d - %s",

+ i, u_errorName(errorCode));

+ errorCode=U_ZERO_ERROR;

+ continue;

+ }

+ cc.caseNr=i;

+ s=testCase->getString("charset", errorCode);

+ s.extract(0, 0x7fffffff, charset, sizeof(charset), "");

+ cc.charset=charset;

+ unicode=testCase->getString("unicode", errorCode);

+ cc.unicode=unicode.getBuffer();

+ cc.unicodeLength=unicode.length();

+ cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode);

+ offsetsLength=0;

+ cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode);

+ if(offsetsLength==0) {

+ cc.offsets=NULL;

+ } else if(offsetsLength!=cc.bytesLength) {

+ errln("fromUnicode[%d] bytes[%d] and offsets[%d] must have the same length",

+ i, cc.bytesLength, offsetsLength);

+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ }

+ cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);

+ cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);

+ s=testCase->getString("errorCode", errorCode);

+ if(s==UNICODE_STRING("invalid", 7)) {

+ cc.outErrorCode=U_INVALID_CHAR_FOUND;

+ } else if(s==UNICODE_STRING("illegal", 7)) {

+ cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;

+ } else if(s==UNICODE_STRING("truncated", 9)) {

+ cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;

+ } else {

+ cc.outErrorCode=U_ZERO_ERROR;

+ }

+ s=testCase->getString("callback", errorCode);

+ cc.setSub=0; // default: no subchar

+ if((index=s.indexOf((UChar)0))>0) {

+ // read NUL-separated subchar first, if any

+ // copy the subchar from Latin-1 characters

+ // start after the NUL

+ p=s.getTerminatedBuffer();

+ length=index+1;

+ p+=length;

+ length=s.length()-length;

+ if(length<=0 || length>=(int32_t)sizeof(cc.subchar)) {

+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ } else {

+ int32_t j;

+ for(j=0; j<length; ++j) {

+ cc.subchar[j]=(char)p[j];

+ }

+ // NUL-terminate the subchar

+ cc.subchar[j]=0;

+ cc.setSub=1;

+ }

+ // remove the NUL and subchar from s

+ s.truncate(index);

+ } else if((index=s.indexOf((UChar)0x3d))>0) /* '=' */ {

+ // read a substitution string, separated by an equal sign

+ p=s.getBuffer()+index+1;

+ length=s.length()-(index+1);

+ if(length<0 || length>=LENGTHOF(cc.subString)) {

+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ } else {

+ u_memcpy(cc.subString, p, length);

+ // NUL-terminate the subString

+ cc.subString[length]=0;

+ cc.setSub=-1;

+ }

+ // remove the equal sign and subString from s

+ s.truncate(index);

+ }

+ s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");

+ cc.cbopt=cbopt;

+ switch(cbopt[0]) {

+ case SUB_CB:

+ callback=UCNV_FROM_U_CALLBACK_SUBSTITUTE;

+ break;

+ case SKIP_CB:

+ callback=UCNV_FROM_U_CALLBACK_SKIP;

+ break;

+ case STOP_CB:

+ callback=UCNV_FROM_U_CALLBACK_STOP;

+ break;

+ case ESC_CB:

+ callback=UCNV_FROM_U_CALLBACK_ESCAPE;

+ break;

+ default:

+ callback=NULL;

+ break;

+ }

+ option=callback==NULL ? cbopt : cbopt+1;

+ if(*option==0) {

+ option=NULL;

+ }

+ invalidUChars=testCase->getString("invalidUChars", errorCode);

+ cc.invalidUChars=invalidUChars.getBuffer();

+ cc.invalidLength=invalidUChars.length();

+ if(U_FAILURE(errorCode)) {

+ errln("error parsing conversion/fromUnicode test case %d - %s",

+ i, u_errorName(errorCode));

+ errorCode=U_ZERO_ERROR;

+ } else {

+ logln("TestFromUnicode[%d] %s", i, charset);

+ FromUnicodeCase(cc, callback, option);

+ }

+ delete testData;

+ }

+ delete dataModule;

+ }

+ else {

+ dataerrln("Could not load test conversion data");

+ }

+static const UChar ellipsis[]={ 0x2e, 0x2e, 0x2e };

+void

+ConversionTest::TestGetUnicodeSet() {

+ char charset[100];

+ UnicodeString s, map, mapnot;

+ int32_t which;

+ ParsePosition pos;

+ UnicodeSet cnvSet, mapSet, mapnotSet, diffSet;

+ UnicodeSet *cnvSetPtr = &cnvSet;

+ LocalUConverterPointer cnv;

+ TestDataModule *dataModule;

+ TestData *testData;

+ const DataMap *testCase;

+ UErrorCode errorCode;

+ int32_t i;

+ errorCode=U_ZERO_ERROR;

+ dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);

+ if(U_SUCCESS(errorCode)) {

+ testData=dataModule->createTestData("getUnicodeSet", errorCode);

+ if(U_SUCCESS(errorCode)) {

+ for(i=0; testData->nextCase(testCase, errorCode); ++i) {

+ if(U_FAILURE(errorCode)) {

+ errln("error retrieving conversion/getUnicodeSet test case %d - %s",

+ i, u_errorName(errorCode));

+ errorCode=U_ZERO_ERROR;

+ continue;

+ }

+ s=testCase->getString("charset", errorCode);

+ s.extract(0, 0x7fffffff, charset, sizeof(charset), "");

+ map=testCase->getString("map", errorCode);

+ mapnot=testCase->getString("mapnot", errorCode);

+ which=testCase->getInt28("which", errorCode);

+ if(U_FAILURE(errorCode)) {

+ errln("error parsing conversion/getUnicodeSet test case %d - %s",

+ i, u_errorName(errorCode));

+ errorCode=U_ZERO_ERROR;

+ continue;

+ }

+ // test this test case

+ mapSet.clear();

+ mapnotSet.clear();

+ pos.setIndex(0);

+ mapSet.applyPattern(map, pos, 0, NULL, errorCode);

+ if(U_FAILURE(errorCode) || pos.getIndex()!=map.length()) {

+ errln("error creating the map set for conversion/getUnicodeSet test case %d - %s\n"

+ " error index %d index %d U+%04x",

+ i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), map.char32At(pos.getIndex()));

+ errorCode=U_ZERO_ERROR;

+ continue;

+ }

+ pos.setIndex(0);

+ mapnotSet.applyPattern(mapnot, pos, 0, NULL, errorCode);

+ if(U_FAILURE(errorCode) || pos.getIndex()!=mapnot.length()) {

+ errln("error creating the mapnot set for conversion/getUnicodeSet test case %d - %s\n"

+ " error index %d index %d U+%04x",

+ i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), mapnot.char32At(pos.getIndex()));

+ errorCode=U_ZERO_ERROR;

+ continue;

+ }

+ logln("TestGetUnicodeSet[%d] %s", i, charset);

+ cnv.adoptInstead(cnv_open(charset, errorCode));

+ if(U_FAILURE(errorCode)) {

+ errcheckln(errorCode, "error opening \"%s\" for conversion/getUnicodeSet test case %d - %s",

+ charset, i, u_errorName(errorCode));

+ errorCode=U_ZERO_ERROR;

+ continue;

+ }

+ ucnv_getUnicodeSet(cnv.getAlias(), cnvSetPtr->toUSet(), (UConverterUnicodeSet)which, &errorCode);

+ if(U_FAILURE(errorCode)) {

+ errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/getUnicodeSet test case %d - %s",

+ charset, i, u_errorName(errorCode));

+ errorCode=U_ZERO_ERROR;

+ continue;

+ }

+ // are there items that must be in cnvSet but are not?

+ (diffSet=mapSet).removeAll(cnvSet);

+ if(!diffSet.isEmpty()) {

+ diffSet.toPattern(s, TRUE);

+ if(s.length()>100) {

+ s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));

+ }

+ errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d",

+ charset, i);

+ errln(s);

+ }

+ // are there items that must not be in cnvSet but are?

+ (diffSet=mapnotSet).retainAll(cnvSet);

+ if(!diffSet.isEmpty()) {

+ diffSet.toPattern(s, TRUE);

+ if(s.length()>100) {

+ s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));

+ }

+ errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d",

+ charset, i);

+ errln(s);

+ }

+ delete testData;

+ }

+ delete dataModule;

+ }

+ else {

+ dataerrln("Could not load test conversion data");

+ }

+U_CDECL_BEGIN

+static void U_CALLCONV

+getUnicodeSetCallback(const void *context,

+ UConverterFromUnicodeArgs * /*fromUArgs*/,

+ const UChar* /*codeUnits*/,

+ int32_t /*length*/,

+ UChar32 codePoint,

+ UConverterCallbackReason reason,

+ UErrorCode *pErrorCode) {

+ if(reason<=UCNV_IRREGULAR) {

+ ((UnicodeSet *)context)->remove(codePoint); // the converter cannot convert this code point

+ *pErrorCode=U_ZERO_ERROR; // skip

+ } // else ignore the reset, close and clone calls.

+U_CDECL_END

+// Compare ucnv_getUnicodeSet() with the set of characters that can be converted.

+void

+ConversionTest::TestGetUnicodeSet2() {

+ // Build a string with all code points.

+ UChar32 cpLimit;

+ int32_t s0Length;

+ if(quick) {

+ cpLimit=s0Length=0x10000; // BMP only

+ } else {

+ cpLimit=0x110000;

+ s0Length=0x10000+0x200000; // BMP + surrogate pairs

+ }

+ UChar *s0=new UChar[s0Length];

+ if(s0==NULL) {

+ return;

+ }

+ UChar *s=s0;

+ UChar32 c;

+ UChar c2;

+ // low BMP

+ for(c=0; c<=0xd7ff; ++c) {

+ *s++=(UChar)c;

+ }

+ // trail surrogates

+ for(c=0xdc00; c<=0xdfff; ++c) {

+ *s++=(UChar)c;

+ }

+ // lead surrogates

+ // (after trails so that there is not even one surrogate pair in between)

+ for(c=0xd800; c<=0xdbff; ++c) {

+ *s++=(UChar)c;

+ }

+ // high BMP

+ for(c=0xe000; c<=0xffff; ++c) {

+ *s++=(UChar)c;

+ }

+ // supplementary code points = surrogate pairs

+ if(cpLimit==0x110000) {

+ for(c=0xd800; c<=0xdbff; ++c) {

+ for(c2=0xdc00; c2<=0xdfff; ++c2) {

+ *s++=(UChar)c;

+ *s++=c2;

+ }

+ static const char *const cnvNames[]={

+ "UTF-8",

+ "UTF-7",

+ "UTF-16",

+ "US-ASCII",

+ "ISO-8859-1",

+ "windows-1252",

+ "Shift-JIS",

+ "ibm-1390", // EBCDIC_STATEFUL table

+ "ibm-16684", // DBCS-only extension table based on EBCDIC_STATEFUL table

+ "HZ",

+ "ISO-2022-JP",

+ "JIS7",

+ "ISO-2022-CN",

+ "ISO-2022-CN-EXT",

+ "LMBCS"

+ };

+ LocalUConverterPointer cnv;

+ char buffer[1024];

+ int32_t i;

+ for(i=0; i<LENGTHOF(cnvNames); ++i) {

+ UErrorCode errorCode=U_ZERO_ERROR;

+ cnv.adoptInstead(cnv_open(cnvNames[i], errorCode));

+ if(U_FAILURE(errorCode)) {

+ errcheckln(errorCode, "failed to open converter %s - %s", cnvNames[i], u_errorName(errorCode));

+ continue;

+ }

+ UnicodeSet expected;

+ ucnv_setFromUCallBack(cnv.getAlias(), getUnicodeSetCallback, &expected, NULL, NULL, &errorCode);

+ if(U_FAILURE(errorCode)) {

+ errln("failed to set the callback on converter %s - %s", cnvNames[i], u_errorName(errorCode));

+ continue;

+ }

+ UConverterUnicodeSet which;

+ for(which=UCNV_ROUNDTRIP_SET; which<UCNV_SET_COUNT; which=(UConverterUnicodeSet)((int)which+1)) {

+ if(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {

+ ucnv_setFallback(cnv.getAlias(), TRUE);

+ }

+ expected.add(0, cpLimit-1);

+ s=s0;

+ UBool flush;

+ do {

+ char *t=buffer;

+ flush=(UBool)(s==s0+s0Length);

+ ucnv_fromUnicode(cnv.getAlias(), &t, buffer+sizeof(buffer), (const UChar **)&s, s0+s0Length, NULL, flush, &errorCode);

+ if(U_FAILURE(errorCode)) {

+ if(errorCode==U_BUFFER_OVERFLOW_ERROR) {

+ errorCode=U_ZERO_ERROR;

+ continue;

+ } else {

+ break; // unexpected error, should not occur

+ }

+ } while(!flush);

+ UnicodeSet set;

+ ucnv_getUnicodeSet(cnv.getAlias(), set.toUSet(), which, &errorCode);

+ if(cpLimit<0x110000) {

+ set.remove(cpLimit, 0x10ffff);

+ }

+ if(which==UCNV_ROUNDTRIP_SET) {

+ // ignore PUA code points because they will be converted even if they

+ // are fallbacks and when other fallbacks are turned off,

+ // but ucnv_getUnicodeSet(UCNV_ROUNDTRIP_SET) delivers true roundtrips

+ expected.remove(0xe000, 0xf8ff);

+ expected.remove(0xf0000, 0xffffd);

+ expected.remove(0x100000, 0x10fffd);

+ set.remove(0xe000, 0xf8ff);

+ set.remove(0xf0000, 0xffffd);

+ set.remove(0x100000, 0x10fffd);

+ }

+ if(set!=expected) {

+ // First try to see if we have different sets because ucnv_getUnicodeSet()

+ // added strings: The above conversion method does not tell us what strings might be convertible.

+ // Remove strings from the set and compare again.

+ // Unfortunately, there are no good, direct set methods for finding out whether there are strings

+ // in the set, nor for enumerating or removing just them.

+ // Intersect all code points with the set. The intersection will not contain strings.

+ UnicodeSet temp(0, 0x10ffff);

+ temp.retainAll(set);

+ set=temp;

+ }

+ if(set!=expected) {

+ UnicodeSet diffSet;

+ UnicodeString out;

+ // are there items that must be in the set but are not?

+ (diffSet=expected).removeAll(set);

+ if(!diffSet.isEmpty()) {

+ diffSet.toPattern(out, TRUE);

+ if(out.length()>100) {

+ out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));

+ }

+ errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d",

+ cnvNames[i], which);

+ errln(out);

+ }

+ // are there items that must not be in the set but are?

+ (diffSet=set).removeAll(expected);

+ if(!diffSet.isEmpty()) {

+ diffSet.toPattern(out, TRUE);

+ if(out.length()>100) {

+ out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));

+ }

+ errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d",

+ cnvNames[i], which);

+ errln(out);

+ }

+ delete [] s0;

+// open testdata or ICU data converter ------------------------------------- ***

+UConverter *

+ConversionTest::cnv_open(const char *name, UErrorCode &errorCode) {

+ if(name!=NULL && *name=='*') {

+ /* loadTestData(): set the data directory */

+ return ucnv_openPackage(loadTestData(errorCode), name+1, &errorCode);

+ } else if(name!=NULL && *name=='+') {

+ return ucnv_open((name+1), &errorCode);

+ } else {

+ return ucnv_open(name, &errorCode);

+ }

+// output helpers ---------------------------------------------------------- ***

+static inline char

+hexDigit(uint8_t digit) {

+ return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit);

+static char *

+printBytes(const uint8_t *bytes, int32_t length, char *out) {

+ uint8_t b;

+ if(length>0) {

+ b=*bytes++;

+ --length;

+ *out++=hexDigit((uint8_t)(b>>4));

+ *out++=hexDigit((uint8_t)(b&0xf));

+ }

+ while(length>0) {

+ b=*bytes++;

+ --length;

+ *out++=' ';

+ *out++=hexDigit((uint8_t)(b>>4));

+ *out++=hexDigit((uint8_t)(b&0xf));

+ }

+ *out++=0;

+ return out;

+static char *

+printUnicode(const UChar *unicode, int32_t length, char *out) {

+ UChar32 c;

+ int32_t i;

+ for(i=0; i<length;) {

+ if(i>0) {

+ *out++=' ';

+ }

+ U16_NEXT(unicode, i, length, c);

+ // write 4..6 digits

+ if(c>=0x100000) {

+ *out++='1';

+ }

+ if(c>=0x10000) {

+ *out++=hexDigit((uint8_t)((c>>16)&0xf));

+ }

+ *out++=hexDigit((uint8_t)((c>>12)&0xf));

+ *out++=hexDigit((uint8_t)((c>>8)&0xf));

+ *out++=hexDigit((uint8_t)((c>>4)&0xf));

+ *out++=hexDigit((uint8_t)(c&0xf));

+ }

+ *out++=0;

+ return out;

+static char *

+printOffsets(const int32_t *offsets, int32_t length, char *out) {

+ int32_t i, o, d;

+ if(offsets==NULL) {

+ length=0;

+ }

+ for(i=0; i<length; ++i) {

+ if(i>0) {

+ *out++=' ';

+ }

+ o=offsets[i];

+ // print all offsets with 2 characters each (-x, -9..99, xx)

+ if(o<-9) {

+ *out++='-';

+ *out++='x';

+ } else if(o<0) {

+ *out++='-';

+ *out++=(char)('0'-o);

+ } else if(o<=99) {

+ *out++=(d=o/10)==0 ? ' ' : (char)('0'+d);

+ *out++=(char)('0'+o%10);

+ } else /* o>99 */ {

+ *out++='x';

+ }

+ *out++=0;

+ return out;

+// toUnicode test worker functions ----------------------------------------- ***

+static int32_t

+stepToUnicode(ConversionCase &cc, UConverter *cnv,

+ UChar *result, int32_t resultCapacity,

+ int32_t *resultOffsets, /* also resultCapacity */

+ int32_t step,

+ UErrorCode *pErrorCode) {

+ const char *source, *sourceLimit, *bytesLimit;

+ UChar *target, *targetLimit, *resultLimit;

+ UBool flush;

+ source=(const char *)cc.bytes;

+ target=result;

+ bytesLimit=source+cc.bytesLength;

+ resultLimit=result+resultCapacity;

+ if(step>=0) {

+ // call ucnv_toUnicode() with in/out buffers no larger than (step) at a time

+ // move only one buffer (in vs. out) at a time to be extra mean

+ // step==0 performs bulk conversion and generates offsets

+ // initialize the partial limits for the loop

+ if(step==0) {

+ // use the entire buffers

+ sourceLimit=bytesLimit;

+ targetLimit=resultLimit;

+ flush=cc.finalFlush;

+ } else {

+ // start with empty partial buffers

+ sourceLimit=source;

+ targetLimit=target;

+ flush=FALSE;

+ // output offsets only for bulk conversion

+ resultOffsets=NULL;

+ }

+ for(;;) {

+ // resetting the opposite conversion direction must not affect this one

+ ucnv_resetFromUnicode(cnv);

+ // convert

+ ucnv_toUnicode(cnv,

+ &target, targetLimit,

+ &source, sourceLimit,

+ resultOffsets,

+ flush, pErrorCode);

+ // check pointers and errors

+ if(source>sourceLimit || target>targetLimit) {

+ *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

+ break;

+ } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

+ if(target!=targetLimit) {

+ // buffer overflow must only be set when the target is filled

+ *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

+ break;

+ } else if(targetLimit==resultLimit) {

+ // not just a partial overflow

+ break;

+ }

+ // the partial target is filled, set a new limit, reset the error and continue

+ targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;

+ *pErrorCode=U_ZERO_ERROR;

+ } else if(U_FAILURE(*pErrorCode)) {

+ // some other error occurred, done

+ break;

+ } else {

+ if(source!=sourceLimit) {

+ // when no error occurs, then the input must be consumed

+ *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

+ break;

+ }

+ if(sourceLimit==bytesLimit) {

+ // we are done

+ break;

+ }

+ // the partial conversion succeeded, set a new limit and continue

+ sourceLimit=(bytesLimit-source)>=step ? source+step : bytesLimit;

+ flush=(UBool)(cc.finalFlush && sourceLimit==bytesLimit);

+ }

+ } else /* step<0 */ {

+ /*

+ * step==-1: call only ucnv_getNextUChar()

+ * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar()

+ * if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input,

+ * else give it at most (-step-2)/2 bytes

+ */

+ UChar32 c;

+ // end the loop by getting an index out of bounds error

+ for(;;) {

+ // resetting the opposite conversion direction must not affect this one

+ ucnv_resetFromUnicode(cnv);

+ // convert

+ if((step&1)!=0 /* odd: -1, -3, -5, ... */) {

+ sourceLimit=source; // use sourceLimit not as a real limit

+ // but to remember the pre-getNextUChar source pointer

+ c=ucnv_getNextUChar(cnv, &source, bytesLimit, pErrorCode);

+ // check pointers and errors

+ if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) {

+ if(source!=bytesLimit) {

+ *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

+ } else {

+ *pErrorCode=U_ZERO_ERROR;

+ }

+ break;

+ } else if(U_FAILURE(*pErrorCode)) {

+ break;

+ }

+ // source may not move if c is from previous overflow

+ if(target==resultLimit) {

+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

+ break;

+ }

+ if(c<=0xffff) {

+ *target++=(UChar)c;

+ } else {

+ *target++=U16_LEAD(c);

+ if(target==resultLimit) {

+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

+ break;

+ }

+ *target++=U16_TRAIL(c);

+ }

+ // alternate between -n-1 and -n but leave -1 alone

+ if(step<-1) {

+ ++step;

+ }

+ } else /* step is even */ {

+ // allow only one UChar output

+ targetLimit=target<resultLimit ? target+1 : resultLimit;

+ // as with ucnv_getNextUChar(), we always flush (if we go to bytesLimit)

+ // and never output offsets

+ if(step==-2) {

+ sourceLimit=bytesLimit;

+ } else {

+ sourceLimit=source+(-step-2)/2;

+ if(sourceLimit>bytesLimit) {

+ sourceLimit=bytesLimit;

+ }

+ ucnv_toUnicode(cnv,

+ &target, targetLimit,

+ &source, sourceLimit,

+ NULL, (UBool)(sourceLimit==bytesLimit), pErrorCode);

+ // check pointers and errors

+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

+ if(target!=targetLimit) {

+ // buffer overflow must only be set when the target is filled

+ *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

+ break;

+ } else if(targetLimit==resultLimit) {

+ // not just a partial overflow

+ break;

+ }

+ // the partial target is filled, set a new limit and continue

+ *pErrorCode=U_ZERO_ERROR;

+ } else if(U_FAILURE(*pErrorCode)) {

+ // some other error occurred, done

+ break;

+ } else {

+ if(source!=sourceLimit) {

+ // when no error occurs, then the input must be consumed

+ *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

+ break;

+ }

+ // we are done (flush==TRUE) but we continue, to get the index out of bounds error above

+ }

+ --step;

+ }

+ return (int32_t)(target-result);

+UBool

+ConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback, const char *option) {

+ // open the converter

+ IcuTestErrorCode errorCode(*this, "ToUnicodeCase");

+ LocalUConverterPointer cnv(cnv_open(cc.charset, errorCode));

+ if(errorCode.isFailure()) {

+ errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",

+ cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, errorCode.errorName());

+ errorCode.reset();

+ return FALSE;

+ }

+ // set the callback

+ if(callback!=NULL) {

+ ucnv_setToUCallBack(cnv.getAlias(), callback, option, NULL, NULL, errorCode);

+ if(U_FAILURE(errorCode)) {

+ errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBack() failed - %s",

+ cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));

+ return FALSE;

+ }

+ int32_t resultOffsets[256];

+ UChar result[256];

+ int32_t resultLength;

+ UBool ok;

+ static const struct {

+ int32_t step;

+ const char *name;

+ } steps[]={

+ { 0, "bulk" }, // must be first for offsets to be checked

+ { 1, "step=1" },

+ { 3, "step=3" },

+ { 7, "step=7" },

+ { -1, "getNext" },

+ { -2, "toU(bulk)+getNext" },

+ { -3, "getNext+toU(bulk)" },

+ { -4, "toU(1)+getNext" },

+ { -5, "getNext+toU(1)" },

+ { -12, "toU(5)+getNext" },

+ { -13, "getNext+toU(5)" },

+ };

+ int32_t i, step;

+ ok=TRUE;

+ for(i=0; i<LENGTHOF(steps) && ok; ++i) {

+ step=steps[i].step;

+ if(step<0 && !cc.finalFlush) {

+ // skip ucnv_getNextUChar() if !finalFlush because

+ // ucnv_getNextUChar() always implies flush

+ continue;

+ }

+ if(step!=0) {

+ // bulk test is first, then offsets are not checked any more

+ cc.offsets=NULL;

+ }

+ else {

+ memset(resultOffsets, -1, LENGTHOF(resultOffsets));

+ }

+ memset(result, -1, LENGTHOF(result));

+ errorCode.reset();

+ resultLength=stepToUnicode(cc, cnv.getAlias(),

+ result, LENGTHOF(result),

+ step==0 ? resultOffsets : NULL,

+ step, errorCode);

+ ok=checkToUnicode(

+ cc, cnv.getAlias(), steps[i].name,

+ result, resultLength,

+ cc.offsets!=NULL ? resultOffsets : NULL,

+ errorCode);

+ if(errorCode.isFailure() || !cc.finalFlush) {

+ // reset if an error occurred or we did not flush

+ // otherwise do nothing to make sure that flushing resets

+ ucnv_resetToUnicode(cnv.getAlias());

+ }

+ if (cc.offsets != NULL && resultOffsets[resultLength] != -1) {

+ errln("toUnicode[%d](%s) Conversion wrote too much to offsets at index %d",

+ cc.caseNr, cc.charset, resultLength);

+ }

+ if (result[resultLength] != (UChar)-1) {

+ errln("toUnicode[%d](%s) Conversion wrote too much to result at index %d",

+ cc.caseNr, cc.charset, resultLength);

+ }

+ // not a real loop, just a convenience for breaking out of the block

+ while(ok && cc.finalFlush) {

+ // test ucnv_toUChars()

+ memset(result, 0, sizeof(result));

+ errorCode.reset();

+ resultLength=ucnv_toUChars(cnv.getAlias(),

+ result, LENGTHOF(result),

+ (const char *)cc.bytes, cc.bytesLength,

+ errorCode);

+ ok=checkToUnicode(

+ cc, cnv.getAlias(), "toUChars",

+ result, resultLength,

+ NULL,

+ errorCode);

+ if(!ok) {

+ break;

+ }

+ // test preflighting

+ // keep the correct result for simple checking

+ errorCode.reset();

+ resultLength=ucnv_toUChars(cnv.getAlias(),

+ NULL, 0,

+ (const char *)cc.bytes, cc.bytesLength,

+ errorCode);

+ if(errorCode.get()==U_STRING_NOT_TERMINATED_WARNING || errorCode.get()==U_BUFFER_OVERFLOW_ERROR) {

+ errorCode.reset();

+ }

+ ok=checkToUnicode(

+ cc, cnv.getAlias(), "preflight toUChars",

+ result, resultLength,

+ NULL,

+ errorCode);

+ break;

+ }

+ errorCode.reset(); // all errors have already been reported

+ return ok;

+UBool

+ConversionTest::checkToUnicode(ConversionCase &cc, UConverter *cnv, const char *name,

+ const UChar *result, int32_t resultLength,

+ const int32_t *resultOffsets,

+ UErrorCode resultErrorCode) {

+ char resultInvalidChars[8];

+ int8_t resultInvalidLength;

+ UErrorCode errorCode;

+ const char *msg;

+ // reset the message; NULL will mean "ok"

+ msg=NULL;

+ errorCode=U_ZERO_ERROR;

+ resultInvalidLength=sizeof(resultInvalidChars);

+ ucnv_getInvalidChars(cnv, resultInvalidChars, &resultInvalidLength, &errorCode);

+ if(U_FAILURE(errorCode)) {

+ errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidChars() failed - %s",

+ cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode));

+ return FALSE;

+ }

+ // check everything that might have gone wrong

+ if(cc.unicodeLength!=resultLength) {

+ msg="wrong result length";

+ } else if(0!=u_memcmp(cc.unicode, result, cc.unicodeLength)) {

+ msg="wrong result string";

+ } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.unicodeLength*sizeof(*cc.offsets))) {

+ msg="wrong offsets";

+ } else if(cc.outErrorCode!=resultErrorCode) {

+ msg="wrong error code";

+ } else if(cc.invalidLength!=resultInvalidLength) {

+ msg="wrong length of last invalid input";

+ } else if(0!=memcmp(cc.invalidChars, resultInvalidChars, cc.invalidLength)) {

+ msg="wrong last invalid input";

+ }

+ if(msg==NULL) {

+ return TRUE;

+ } else {

+ char buffer[2000]; // one buffer for all strings

+ char *s, *bytesString, *unicodeString, *resultString,

+ *offsetsString, *resultOffsetsString,

+ *invalidCharsString, *resultInvalidCharsString;

+ bytesString=s=buffer;

+ s=printBytes(cc.bytes, cc.bytesLength, bytesString);

+ s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString=s);

+ s=printUnicode(result, resultLength, resultString=s);

+ s=printOffsets(cc.offsets, cc.unicodeLength, offsetsString=s);

+ s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);

+ s=printBytes(cc.invalidChars, cc.invalidLength, invalidCharsString=s);

+ s=printBytes((uint8_t *)resultInvalidChars, resultInvalidLength, resultInvalidCharsString=s);

+ if((s-buffer)>(int32_t)sizeof(buffer)) {

+ errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkToUnicode() test output buffer overflow writing %d chars\n",

+ cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer));

+ exit(1);

+ }

+ errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"

+ " bytes <%s>[%d]\n"

+ " expected <%s>[%d]\n"

+ " result <%s>[%d]\n"

+ " offsets <%s>\n"

+ " result offsets <%s>\n"

+ " error code expected %s got %s\n"

+ " invalidChars expected <%s> got <%s>\n",

+ cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg,

+ bytesString, cc.bytesLength,

+ unicodeString, cc.unicodeLength,

+ resultString, resultLength,

+ offsetsString,

+ resultOffsetsString,

+ u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),

+ invalidCharsString, resultInvalidCharsString);

+ return FALSE;

+ }

+// fromUnicode test worker functions --------------------------------------- ***

+static int32_t

+stepFromUTF8(ConversionCase &cc,

+ UConverter *utf8Cnv, UConverter *cnv,

+ char *result, int32_t resultCapacity,

+ int32_t step,

+ UErrorCode *pErrorCode) {

+ const char *source, *sourceLimit, *utf8Limit;

+ UChar pivotBuffer[32];

+ UChar *pivotSource, *pivotTarget, *pivotLimit;

+ char *target, *targetLimit, *resultLimit;

+ UBool flush;

+ source=cc.utf8;

+ pivotSource=pivotTarget=pivotBuffer;

+ target=result;

+ utf8Limit=source+cc.utf8Length;

+ resultLimit=result+resultCapacity;

+ // call ucnv_convertEx() with in/out buffers no larger than (step) at a time

+ // move only one buffer (in vs. out) at a time to be extra mean

+ // step==0 performs bulk conversion

+ // initialize the partial limits for the loop

+ if(step==0) {

+ // use the entire buffers

+ sourceLimit=utf8Limit;

+ targetLimit=resultLimit;

+ flush=cc.finalFlush;

+ pivotLimit=pivotBuffer+LENGTHOF(pivotBuffer);

+ } else {

+ // start with empty partial buffers

+ sourceLimit=source;

+ targetLimit=target;

+ flush=FALSE;

+ // empty pivot is not allowed, make it of length step

+ pivotLimit=pivotBuffer+step;

+ }

+ for(;;) {

+ // resetting the opposite conversion direction must not affect this one

+ ucnv_resetFromUnicode(utf8Cnv);

+ ucnv_resetToUnicode(cnv);

+ // convert

+ ucnv_convertEx(cnv, utf8Cnv,

+ &target, targetLimit,

+ &source, sourceLimit,

+ pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,

+ FALSE, flush, pErrorCode);

+ // check pointers and errors

+ if(source>sourceLimit || target>targetLimit) {

+ *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

+ break;

+ } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

+ if(target!=targetLimit) {

+ // buffer overflow must only be set when the target is filled

+ *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

+ break;

+ } else if(targetLimit==resultLimit) {

+ // not just a partial overflow

+ break;

+ }

+ // the partial target is filled, set a new limit, reset the error and continue

+ targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;

+ *pErrorCode=U_ZERO_ERROR;

+ } else if(U_FAILURE(*pErrorCode)) {

+ if(pivotSource==pivotBuffer) {

+ // toUnicode error, should not occur

+ // toUnicode errors are tested in cintltst TestConvertExFromUTF8()

+ break;

+ } else {

+ // fromUnicode error

+ // some other error occurred, done

+ break;

+ }

+ } else {

+ if(source!=sourceLimit) {

+ // when no error occurs, then the input must be consumed

+ *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

+ break;

+ }

+ if(sourceLimit==utf8Limit) {

+ // we are done

+ if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {

+ // ucnv_convertEx() warns about not terminating the output

+ // but ucnv_fromUnicode() does not and so

+ // checkFromUnicode() does not expect it

+ *pErrorCode=U_ZERO_ERROR;

+ }

+ break;

+ }

+ // the partial conversion succeeded, set a new limit and continue

+ sourceLimit=(utf8Limit-source)>=step ? source+step : utf8Limit;

+ flush=(UBool)(cc.finalFlush && sourceLimit==utf8Limit);

+ }

+ return (int32_t)(target-result);

+static int32_t

+stepFromUnicode(ConversionCase &cc, UConverter *cnv,

+ char *result, int32_t resultCapacity,

+ int32_t *resultOffsets, /* also resultCapacity */

+ int32_t step,

+ UErrorCode *pErrorCode) {

+ const UChar *source, *sourceLimit, *unicodeLimit;

+ char *target, *targetLimit, *resultLimit;

+ UBool flush;

+ source=cc.unicode;

+ target=result;

+ unicodeLimit=source+cc.unicodeLength;

+ resultLimit=result+resultCapacity;

+ // call ucnv_fromUnicode() with in/out buffers no larger than (step) at a time

+ // move only one buffer (in vs. out) at a time to be extra mean

+ // step==0 performs bulk conversion and generates offsets

+ // initialize the partial limits for the loop

+ if(step==0) {

+ // use the entire buffers

+ sourceLimit=unicodeLimit;

+ targetLimit=resultLimit;

+ flush=cc.finalFlush;

+ } else {

+ // start with empty partial buffers

+ sourceLimit=source;

+ targetLimit=target;

+ flush=FALSE;

+ // output offsets only for bulk conversion

+ resultOffsets=NULL;

+ }

+ for(;;) {

+ // resetting the opposite conversion direction must not affect this one

+ ucnv_resetToUnicode(cnv);

+ // convert

+ ucnv_fromUnicode(cnv,

+ &target, targetLimit,

+ &source, sourceLimit,

+ resultOffsets,

+ flush, pErrorCode);

+ // check pointers and errors

+ if(source>sourceLimit || target>targetLimit) {

+ *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

+ break;

+ } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

+ if(target!=targetLimit) {

+ // buffer overflow must only be set when the target is filled

+ *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

+ break;

+ } else if(targetLimit==resultLimit) {

+ // not just a partial overflow

+ break;

+ }

+ // the partial target is filled, set a new limit, reset the error and continue

+ targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;

+ *pErrorCode=U_ZERO_ERROR;

+ } else if(U_FAILURE(*pErrorCode)) {

+ // some other error occurred, done

+ break;

+ } else {

+ if(source!=sourceLimit) {

+ // when no error occurs, then the input must be consumed

+ *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

+ break;

+ }

+ if(sourceLimit==unicodeLimit) {

+ // we are done

+ break;

+ }

+ // the partial conversion succeeded, set a new limit and continue

+ sourceLimit=(unicodeLimit-source)>=step ? source+step : unicodeLimit;

+ flush=(UBool)(cc.finalFlush && sourceLimit==unicodeLimit);

+ }

+ return (int32_t)(target-result);

+UBool

+ConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback callback, const char *option) {

+ UConverter *cnv;

+ UErrorCode errorCode;

+ // open the converter

+ errorCode=U_ZERO_ERROR;

+ cnv=cnv_open(cc.charset, errorCode);

+ if(U_FAILURE(errorCode)) {

+ errcheckln(errorCode, "fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",

+ cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));

+ return FALSE;

+ }

+ ucnv_resetToUnicode(utf8Cnv);

+ // set the callback

+ if(callback!=NULL) {

+ ucnv_setFromUCallBack(cnv, callback, option, NULL, NULL, &errorCode);

+ if(U_FAILURE(errorCode)) {

+ errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCallBack() failed - %s",

+ cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));

+ ucnv_close(cnv);

+ return FALSE;

+ }

+ // set the fallbacks flag

+ // TODO change with Jitterbug 2401, then add a similar call for toUnicode too

+ ucnv_setFallback(cnv, cc.fallbacks);

+ // set the subchar

+ int32_t length;

+ if(cc.setSub>0) {

+ length=(int32_t)strlen(cc.subchar);

+ ucnv_setSubstChars(cnv, cc.subchar, (int8_t)length, &errorCode);

+ if(U_FAILURE(errorCode)) {

+ errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstChars() failed - %s",

+ cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));

+ ucnv_close(cnv);

+ return FALSE;

+ }

+ } else if(cc.setSub<0) {

+ ucnv_setSubstString(cnv, cc.subString, -1, &errorCode);

+ if(U_FAILURE(errorCode)) {

+ errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstString() failed - %s",

+ cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));

+ ucnv_close(cnv);

+ return FALSE;

+ }

+ // convert unicode to utf8

+ char utf8[256];

+ cc.utf8=utf8;

+ u_strToUTF8(utf8, LENGTHOF(utf8), &cc.utf8Length,

+ cc.unicode, cc.unicodeLength,

+ &errorCode);

+ if(U_FAILURE(errorCode)) {

+ // skip UTF-8 testing of a string with an unpaired surrogate,

+ // or of one that's too long

+ // toUnicode errors are tested in cintltst TestConvertExFromUTF8()

+ cc.utf8Length=-1;

+ }

+ int32_t resultOffsets[256];

+ char result[256];

+ int32_t resultLength;

+ UBool ok;

+ static const struct {

+ int32_t step;

+ const char *name, *utf8Name;

+ } steps[]={

+ { 0, "bulk", "utf8" }, // must be first for offsets to be checked

+ { 1, "step=1", "utf8 step=1" },

+ { 3, "step=3", "utf8 step=3" },

+ { 7, "step=7", "utf8 step=7" }

+ };

+ int32_t i, step;

+ ok=TRUE;

+ for(i=0; i<LENGTHOF(steps) && ok; ++i) {

+ step=steps[i].step;

+ memset(resultOffsets, -1, LENGTHOF(resultOffsets));

+ memset(result, -1, LENGTHOF(result));

+ errorCode=U_ZERO_ERROR;

+ resultLength=stepFromUnicode(cc, cnv,

+ result, LENGTHOF(result),

+ step==0 ? resultOffsets : NULL,

+ step, &errorCode);

+ ok=checkFromUnicode(

+ cc, cnv, steps[i].name,

+ (uint8_t *)result, resultLength,

+ cc.offsets!=NULL ? resultOffsets : NULL,

+ errorCode);

+ if(U_FAILURE(errorCode) || !cc.finalFlush) {

+ // reset if an error occurred or we did not flush

+ // otherwise do nothing to make sure that flushing resets

+ ucnv_resetFromUnicode(cnv);

+ }

+ if (resultOffsets[resultLength] != -1) {

+ errln("fromUnicode[%d](%s) Conversion wrote too much to offsets at index %d",

+ cc.caseNr, cc.charset, resultLength);

+ }

+ if (result[resultLength] != (char)-1) {

+ errln("fromUnicode[%d](%s) Conversion wrote too much to result at index %d",

+ cc.caseNr, cc.charset, resultLength);

+ }

+ // bulk test is first, then offsets are not checked any more

+ cc.offsets=NULL;

+ // test direct conversion from UTF-8

+ if(cc.utf8Length>=0) {

+ errorCode=U_ZERO_ERROR;

+ resultLength=stepFromUTF8(cc, utf8Cnv, cnv,

+ result, LENGTHOF(result),

+ step, &errorCode);

+ ok=checkFromUnicode(

+ cc, cnv, steps[i].utf8Name,

+ (uint8_t *)result, resultLength,

+ NULL,

+ errorCode);

+ if(U_FAILURE(errorCode) || !cc.finalFlush) {

+ // reset if an error occurred or we did not flush

+ // otherwise do nothing to make sure that flushing resets

+ ucnv_resetToUnicode(utf8Cnv);

+ ucnv_resetFromUnicode(cnv);

+ }

+ // not a real loop, just a convenience for breaking out of the block

+ while(ok && cc.finalFlush) {

+ // test ucnv_fromUChars()

+ memset(result, 0, sizeof(result));

+ errorCode=U_ZERO_ERROR;

+ resultLength=ucnv_fromUChars(cnv,

+ result, LENGTHOF(result),

+ cc.unicode, cc.unicodeLength,

+ &errorCode);

+ ok=checkFromUnicode(

+ cc, cnv, "fromUChars",

+ (uint8_t *)result, resultLength,

+ NULL,

+ errorCode);

+ if(!ok) {

+ break;

+ }

+ // test preflighting

+ // keep the correct result for simple checking

+ errorCode=U_ZERO_ERROR;

+ resultLength=ucnv_fromUChars(cnv,

+ NULL, 0,

+ cc.unicode, cc.unicodeLength,

+ &errorCode);

+ if(errorCode==U_STRING_NOT_TERMINATED_WARNING || errorCode==U_BUFFER_OVERFLOW_ERROR) {

+ errorCode=U_ZERO_ERROR;

+ }

+ ok=checkFromUnicode(

+ cc, cnv, "preflight fromUChars",

+ (uint8_t *)result, resultLength,

+ NULL,

+ errorCode);

+ break;

+ }

+ ucnv_close(cnv);

+ return ok;

+UBool

+ConversionTest::checkFromUnicode(ConversionCase &cc, UConverter *cnv, const char *name,

+ const uint8_t *result, int32_t resultLength,

+ const int32_t *resultOffsets,

+ UErrorCode resultErrorCode) {

+ UChar resultInvalidUChars[8];

+ int8_t resultInvalidLength;

+ UErrorCode errorCode;

+ const char *msg;

+ // reset the message; NULL will mean "ok"

+ msg=NULL;

+ errorCode=U_ZERO_ERROR;

+ resultInvalidLength=LENGTHOF(resultInvalidUChars);

+ ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &errorCode);

+ if(U_FAILURE(errorCode)) {

+ errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s",

+ cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode));

+ return FALSE;

+ }

+ // check everything that might have gone wrong

+ if(cc.bytesLength!=resultLength) {

+ msg="wrong result length";

+ } else if(0!=memcmp(cc.bytes, result, cc.bytesLength)) {

+ msg="wrong result string";

+ } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.bytesLength*sizeof(*cc.offsets))) {

+ msg="wrong offsets";

+ } else if(cc.outErrorCode!=resultErrorCode) {

+ msg="wrong error code";

+ } else if(cc.invalidLength!=resultInvalidLength) {

+ msg="wrong length of last invalid input";

+ } else if(0!=u_memcmp(cc.invalidUChars, resultInvalidUChars, cc.invalidLength)) {

+ msg="wrong last invalid input";

+ }

+ if(msg==NULL) {

+ return TRUE;

+ } else {

+ char buffer[2000]; // one buffer for all strings

+ char *s, *unicodeString, *bytesString, *resultString,

+ *offsetsString, *resultOffsetsString,

+ *invalidCharsString, *resultInvalidUCharsString;

+ unicodeString=s=buffer;

+ s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString);

+ s=printBytes(cc.bytes, cc.bytesLength, bytesString=s);

+ s=printBytes(result, resultLength, resultString=s);

+ s=printOffsets(cc.offsets, cc.bytesLength, offsetsString=s);

+ s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);

+ s=printUnicode(cc.invalidUChars, cc.invalidLength, invalidCharsString=s);

+ s=printUnicode(resultInvalidUChars, resultInvalidLength, resultInvalidUCharsString=s);

+ if((s-buffer)>(int32_t)sizeof(buffer)) {

+ errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkFromUnicode() test output buffer overflow writing %d chars\n",

+ cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer));

+ exit(1);

+ }

+ errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"

+ " unicode <%s>[%d]\n"

+ " expected <%s>[%d]\n"

+ " result <%s>[%d]\n"

+ " offsets <%s>\n"

+ " result offsets <%s>\n"

+ " error code expected %s got %s\n"

+ " invalidChars expected <%s> got <%s>\n",

+ cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg,

+ unicodeString, cc.unicodeLength,

+ bytesString, cc.bytesLength,

+ resultString, resultLength,

+ offsetsString,

+ resultOffsetsString,

+ u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),

+ invalidCharsString, resultInvalidUCharsString);

+ return FALSE;

+ }

+#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

Property changes on: icu46/source/test/intltest/convtest.cpp

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu46/source/test/intltest/convtest.h ('k') | icu46/source/test/intltest/cpdtrtst.h » ('j') | no next file with comments »