source/test/intltest/convtest.cpp - Issue 2435373002: Delete source/test

Unified Diff: source/test/intltest/convtest.cpp

Issue 2435373002: Delete source/test (Closed)

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: source/test/intltest/convtest.cpp

diff --git a/source/test/intltest/convtest.cpp b/source/test/intltest/convtest.cpp

deleted file mode 100644

index 19bc7d520b7450ead294c4cd9d511677fb8fa6e3..0000000000000000000000000000000000000000

--- a/source/test/intltest/convtest.cpp

+++ /dev/null

@@ -1,1679 +0,0 @@

-/*

-*******************************************************************************

-* file name: convtest.cpp

-* encoding: US-ASCII

-* tab size: 8 (not used)

-* indentation:4

-* created on: 2003jul15

-* created by: Markus W. Scherer

-* Test file for data-driven conversion tests.

-*/

-#include "unicode/utypes.h"

-#if !UCONFIG_NO_LEGACY_CONVERSION

-/*

- * Note: Turning off all of convtest.cpp if !UCONFIG_NO_LEGACY_CONVERSION

- * is slightly unnecessary - it removes tests for Unicode charsets

- * like UTF-8 that should work.

- * However, there is no easy way for the test to detect whether a test case

- * is for a Unicode charset, so it would be difficult to only exclude those.

- * Also, regular testing of ICU is done with all modules on, therefore

- * not testing conversion for a custom configuration like this should be ok.

- */

-#include "unicode/ucnv.h"

-#include "unicode/unistr.h"

-#include "unicode/parsepos.h"

-#include "unicode/uniset.h"

-#include "unicode/ustring.h"

-#include "unicode/ures.h"

-#include "convtest.h"

-#include "cmemory.h"

-#include "unicode/tstdtmod.h"

-#include <string.h>

-#include <stdlib.h>

-enum {

- // characters used in test data for callbacks

- SUB_CB='?',

- SKIP_CB='0',

- STOP_CB='.',

- ESC_CB='&'

-};

-ConversionTest::ConversionTest() {

- UErrorCode errorCode=U_ZERO_ERROR;

- utf8Cnv=ucnv_open("UTF-8", &errorCode);

- ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);

- if(U_FAILURE(errorCode)) {

- errln("unable to open UTF-8 converter");

- }

-ConversionTest::~ConversionTest() {

- ucnv_close(utf8Cnv);

-void

-ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {

- if (exec) logln("TestSuite ConversionTest: ");

- switch (index) {

-#if !UCONFIG_NO_FILE_IO

- case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break;

- case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break;

- case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break;

- case 3: name="TestDefaultIgnorableCallback"; if (exec) TestDefaultIgnorableCallback(); break;

-#else

- case 0:

- case 1:

- case 2:

- case 3: name="skip"; break;

-#endif

- case 4: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break;

- default: name=""; break; //needed to end loop

- }

-// test data interface ----------------------------------------------------- ***

-void

-ConversionTest::TestToUnicode() {

- ConversionCase cc;

- char charset[100], cbopt[4];

- const char *option;

- UnicodeString s, unicode;

- int32_t offsetsLength;

- UConverterToUCallback callback;

- TestDataModule *dataModule;

- TestData *testData;

- const DataMap *testCase;

- UErrorCode errorCode;

- int32_t i;

- errorCode=U_ZERO_ERROR;

- dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);

- if(U_SUCCESS(errorCode)) {

- testData=dataModule->createTestData("toUnicode", errorCode);

- if(U_SUCCESS(errorCode)) {

- for(i=0; testData->nextCase(testCase, errorCode); ++i) {

- if(U_FAILURE(errorCode)) {

- errln("error retrieving conversion/toUnicode test case %d - %s",

- i, u_errorName(errorCode));

- errorCode=U_ZERO_ERROR;

- continue;

- }

- cc.caseNr=i;

- s=testCase->getString("charset", errorCode);

- s.extract(0, 0x7fffffff, charset, sizeof(charset), "");

- cc.charset=charset;

- cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode);

- unicode=testCase->getString("unicode", errorCode);

- cc.unicode=unicode.getBuffer();

- cc.unicodeLength=unicode.length();

- offsetsLength=0;

- cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode);

- if(offsetsLength==0) {

- cc.offsets=NULL;

- } else if(offsetsLength!=unicode.length()) {

- errln("toUnicode[%d] unicode[%d] and offsets[%d] must have the same length",

- i, unicode.length(), offsetsLength);

- errorCode=U_ILLEGAL_ARGUMENT_ERROR;

- }

- cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);

- cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);

- s=testCase->getString("errorCode", errorCode);

- if(s==UNICODE_STRING("invalid", 7)) {

- cc.outErrorCode=U_INVALID_CHAR_FOUND;

- } else if(s==UNICODE_STRING("illegal", 7)) {

- cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;

- } else if(s==UNICODE_STRING("truncated", 9)) {

- cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;

- } else if(s==UNICODE_STRING("illesc", 6)) {

- cc.outErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE;

- } else if(s==UNICODE_STRING("unsuppesc", 9)) {

- cc.outErrorCode=U_UNSUPPORTED_ESCAPE_SEQUENCE;

- } else {

- cc.outErrorCode=U_ZERO_ERROR;

- }

- s=testCase->getString("callback", errorCode);

- s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");

- cc.cbopt=cbopt;

- switch(cbopt[0]) {

- case SUB_CB:

- callback=UCNV_TO_U_CALLBACK_SUBSTITUTE;

- break;

- case SKIP_CB:

- callback=UCNV_TO_U_CALLBACK_SKIP;

- break;

- case STOP_CB:

- callback=UCNV_TO_U_CALLBACK_STOP;

- break;

- case ESC_CB:

- callback=UCNV_TO_U_CALLBACK_ESCAPE;

- break;

- default:

- callback=NULL;

- break;

- }

- option=callback==NULL ? cbopt : cbopt+1;

- if(*option==0) {

- option=NULL;

- }

- cc.invalidChars=testCase->getBinary(cc.invalidLength, "invalidChars", errorCode);

- if(U_FAILURE(errorCode)) {

- errln("error parsing conversion/toUnicode test case %d - %s",

- i, u_errorName(errorCode));

- errorCode=U_ZERO_ERROR;

- } else {

- logln("TestToUnicode[%d] %s", i, charset);

- ToUnicodeCase(cc, callback, option);

- }

- delete testData;

- }

- delete dataModule;

- }

- else {

- dataerrln("Could not load test conversion data");

- }

-void

-ConversionTest::TestFromUnicode() {

- ConversionCase cc;

- char charset[100], cbopt[4];

- const char *option;

- UnicodeString s, unicode, invalidUChars;

- int32_t offsetsLength, index;

- UConverterFromUCallback callback;

- TestDataModule *dataModule;

- TestData *testData;

- const DataMap *testCase;

- const UChar *p;

- UErrorCode errorCode;

- int32_t i, length;

- errorCode=U_ZERO_ERROR;

- dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);

- if(U_SUCCESS(errorCode)) {

- testData=dataModule->createTestData("fromUnicode", errorCode);

- if(U_SUCCESS(errorCode)) {

- for(i=0; testData->nextCase(testCase, errorCode); ++i) {

- if(U_FAILURE(errorCode)) {

- errln("error retrieving conversion/fromUnicode test case %d - %s",

- i, u_errorName(errorCode));

- errorCode=U_ZERO_ERROR;

- continue;

- }

- cc.caseNr=i;

- s=testCase->getString("charset", errorCode);

- s.extract(0, 0x7fffffff, charset, sizeof(charset), "");

- cc.charset=charset;

- unicode=testCase->getString("unicode", errorCode);

- cc.unicode=unicode.getBuffer();

- cc.unicodeLength=unicode.length();

- cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode);

- offsetsLength=0;

- cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode);

- if(offsetsLength==0) {

- cc.offsets=NULL;

- } else if(offsetsLength!=cc.bytesLength) {

- errln("fromUnicode[%d] bytes[%d] and offsets[%d] must have the same length",

- i, cc.bytesLength, offsetsLength);

- errorCode=U_ILLEGAL_ARGUMENT_ERROR;

- }

- cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);

- cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);

- s=testCase->getString("errorCode", errorCode);

- if(s==UNICODE_STRING("invalid", 7)) {

- cc.outErrorCode=U_INVALID_CHAR_FOUND;

- } else if(s==UNICODE_STRING("illegal", 7)) {

- cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;

- } else if(s==UNICODE_STRING("truncated", 9)) {

- cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;

- } else {

- cc.outErrorCode=U_ZERO_ERROR;

- }

- s=testCase->getString("callback", errorCode);

- cc.setSub=0; // default: no subchar

- if((index=s.indexOf((UChar)0))>0) {

- // read NUL-separated subchar first, if any

- // copy the subchar from Latin-1 characters

- // start after the NUL

- p=s.getTerminatedBuffer();

- length=index+1;

- p+=length;

- length=s.length()-length;

- if(length<=0 || length>=(int32_t)sizeof(cc.subchar)) {

- errorCode=U_ILLEGAL_ARGUMENT_ERROR;

- } else {

- int32_t j;

- for(j=0; j<length; ++j) {

- cc.subchar[j]=(char)p[j];

- }

- // NUL-terminate the subchar

- cc.subchar[j]=0;

- cc.setSub=1;

- }

- // remove the NUL and subchar from s

- s.truncate(index);

- } else if((index=s.indexOf((UChar)0x3d))>0) /* '=' */ {

- // read a substitution string, separated by an equal sign

- p=s.getBuffer()+index+1;

- length=s.length()-(index+1);

- if(length<0 || length>=UPRV_LENGTHOF(cc.subString)) {

- errorCode=U_ILLEGAL_ARGUMENT_ERROR;

- } else {

- u_memcpy(cc.subString, p, length);

- // NUL-terminate the subString

- cc.subString[length]=0;

- cc.setSub=-1;

- }

- // remove the equal sign and subString from s

- s.truncate(index);

- }

- s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");

- cc.cbopt=cbopt;

- switch(cbopt[0]) {

- case SUB_CB:

- callback=UCNV_FROM_U_CALLBACK_SUBSTITUTE;

- break;

- case SKIP_CB:

- callback=UCNV_FROM_U_CALLBACK_SKIP;

- break;

- case STOP_CB:

- callback=UCNV_FROM_U_CALLBACK_STOP;

- break;

- case ESC_CB:

- callback=UCNV_FROM_U_CALLBACK_ESCAPE;

- break;

- default:

- callback=NULL;

- break;

- }

- option=callback==NULL ? cbopt : cbopt+1;

- if(*option==0) {

- option=NULL;

- }

- invalidUChars=testCase->getString("invalidUChars", errorCode);

- cc.invalidUChars=invalidUChars.getBuffer();

- cc.invalidLength=invalidUChars.length();

- if(U_FAILURE(errorCode)) {

- errln("error parsing conversion/fromUnicode test case %d - %s",

- i, u_errorName(errorCode));

- errorCode=U_ZERO_ERROR;

- } else {

- logln("TestFromUnicode[%d] %s", i, charset);

- FromUnicodeCase(cc, callback, option);

- }

- delete testData;

- }

- delete dataModule;

- }

- else {

- dataerrln("Could not load test conversion data");

- }

-static const UChar ellipsis[]={ 0x2e, 0x2e, 0x2e };

-void

-ConversionTest::TestGetUnicodeSet() {

- char charset[100];

- UnicodeString s, map, mapnot;

- int32_t which;

- ParsePosition pos;

- UnicodeSet cnvSet, mapSet, mapnotSet, diffSet;

- UnicodeSet *cnvSetPtr = &cnvSet;

- LocalUConverterPointer cnv;

- TestDataModule *dataModule;

- TestData *testData;

- const DataMap *testCase;

- UErrorCode errorCode;

- int32_t i;

- errorCode=U_ZERO_ERROR;

- dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);

- if(U_SUCCESS(errorCode)) {

- testData=dataModule->createTestData("getUnicodeSet", errorCode);

- if(U_SUCCESS(errorCode)) {

- for(i=0; testData->nextCase(testCase, errorCode); ++i) {

- if(U_FAILURE(errorCode)) {

- errln("error retrieving conversion/getUnicodeSet test case %d - %s",

- i, u_errorName(errorCode));

- errorCode=U_ZERO_ERROR;

- continue;

- }

- s=testCase->getString("charset", errorCode);

- s.extract(0, 0x7fffffff, charset, sizeof(charset), "");

- map=testCase->getString("map", errorCode);

- mapnot=testCase->getString("mapnot", errorCode);

- which=testCase->getInt28("which", errorCode);

- if(U_FAILURE(errorCode)) {

- errln("error parsing conversion/getUnicodeSet test case %d - %s",

- i, u_errorName(errorCode));

- errorCode=U_ZERO_ERROR;

- continue;

- }

- // test this test case

- mapSet.clear();

- mapnotSet.clear();

- pos.setIndex(0);

- mapSet.applyPattern(map, pos, 0, NULL, errorCode);

- if(U_FAILURE(errorCode) || pos.getIndex()!=map.length()) {

- errln("error creating the map set for conversion/getUnicodeSet test case %d - %s\n"

- " error index %d index %d U+%04x",

- i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), map.char32At(pos.getIndex()));

- errorCode=U_ZERO_ERROR;

- continue;

- }

- pos.setIndex(0);

- mapnotSet.applyPattern(mapnot, pos, 0, NULL, errorCode);

- if(U_FAILURE(errorCode) || pos.getIndex()!=mapnot.length()) {

- errln("error creating the mapnot set for conversion/getUnicodeSet test case %d - %s\n"

- " error index %d index %d U+%04x",

- i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), mapnot.char32At(pos.getIndex()));

- errorCode=U_ZERO_ERROR;

- continue;

- }

- logln("TestGetUnicodeSet[%d] %s", i, charset);

- cnv.adoptInstead(cnv_open(charset, errorCode));

- if(U_FAILURE(errorCode)) {

- errcheckln(errorCode, "error opening \"%s\" for conversion/getUnicodeSet test case %d - %s",

- charset, i, u_errorName(errorCode));

- errorCode=U_ZERO_ERROR;

- continue;

- }

- ucnv_getUnicodeSet(cnv.getAlias(), cnvSetPtr->toUSet(), (UConverterUnicodeSet)which, &errorCode);

- if(U_FAILURE(errorCode)) {

- errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/getUnicodeSet test case %d - %s",

- charset, i, u_errorName(errorCode));

- errorCode=U_ZERO_ERROR;

- continue;

- }

- // are there items that must be in cnvSet but are not?

- (diffSet=mapSet).removeAll(cnvSet);

- if(!diffSet.isEmpty()) {

- diffSet.toPattern(s, TRUE);

- if(s.length()>100) {

- s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));

- }

- errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d",

- charset, i);

- errln(s);

- }

- // are there items that must not be in cnvSet but are?

- (diffSet=mapnotSet).retainAll(cnvSet);

- if(!diffSet.isEmpty()) {

- diffSet.toPattern(s, TRUE);

- if(s.length()>100) {

- s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));

- }

- errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d",

- charset, i);

- errln(s);

- }

- delete testData;

- }

- delete dataModule;

- }

- else {

- dataerrln("Could not load test conversion data");

- }

-U_CDECL_BEGIN

-static void U_CALLCONV

-getUnicodeSetCallback(const void *context,

- UConverterFromUnicodeArgs * /*fromUArgs*/,

- const UChar* /*codeUnits*/,

- int32_t /*length*/,

- UChar32 codePoint,

- UConverterCallbackReason reason,

- UErrorCode *pErrorCode) {

- if(reason<=UCNV_IRREGULAR) {

- ((UnicodeSet *)context)->remove(codePoint); // the converter cannot convert this code point

- *pErrorCode=U_ZERO_ERROR; // skip

- } // else ignore the reset, close and clone calls.

-U_CDECL_END

-// Compare ucnv_getUnicodeSet() with the set of characters that can be converted.

-void

-ConversionTest::TestGetUnicodeSet2() {

- // Build a string with all code points.

- UChar32 cpLimit;

- int32_t s0Length;

- if(quick) {

- cpLimit=s0Length=0x10000; // BMP only

- } else {

- cpLimit=0x110000;

- s0Length=0x10000+0x200000; // BMP + surrogate pairs

- }

- UChar *s0=new UChar[s0Length];

- if(s0==NULL) {

- return;

- }

- UChar *s=s0;

- UChar32 c;

- UChar c2;

- // low BMP

- for(c=0; c<=0xd7ff; ++c) {

- *s++=(UChar)c;

- }

- // trail surrogates

- for(c=0xdc00; c<=0xdfff; ++c) {

- *s++=(UChar)c;

- }

- // lead surrogates

- // (after trails so that there is not even one surrogate pair in between)

- for(c=0xd800; c<=0xdbff; ++c) {

- *s++=(UChar)c;

- }

- // high BMP

- for(c=0xe000; c<=0xffff; ++c) {

- *s++=(UChar)c;

- }

- // supplementary code points = surrogate pairs

- if(cpLimit==0x110000) {

- for(c=0xd800; c<=0xdbff; ++c) {

- for(c2=0xdc00; c2<=0xdfff; ++c2) {

- *s++=(UChar)c;

- *s++=c2;

- }

- static const char *const cnvNames[]={

- "UTF-8",

- "UTF-7",

- "UTF-16",

- "US-ASCII",

- "ISO-8859-1",

- "windows-1252",

- "Shift-JIS",

- "ibm-1390", // EBCDIC_STATEFUL table

- "ibm-16684", // DBCS-only extension table based on EBCDIC_STATEFUL table

- "HZ",

- "ISO-2022-JP",

- "JIS7",

- "ISO-2022-CN",

- "ISO-2022-CN-EXT",

- "LMBCS"

- };

- LocalUConverterPointer cnv;

- char buffer[1024];

- int32_t i;

- for(i=0; i<UPRV_LENGTHOF(cnvNames); ++i) {

- UErrorCode errorCode=U_ZERO_ERROR;

- cnv.adoptInstead(cnv_open(cnvNames[i], errorCode));

- if(U_FAILURE(errorCode)) {

- errcheckln(errorCode, "failed to open converter %s - %s", cnvNames[i], u_errorName(errorCode));

- continue;

- }

- UnicodeSet expected;

- ucnv_setFromUCallBack(cnv.getAlias(), getUnicodeSetCallback, &expected, NULL, NULL, &errorCode);

- if(U_FAILURE(errorCode)) {

- errln("failed to set the callback on converter %s - %s", cnvNames[i], u_errorName(errorCode));

- continue;

- }

- UConverterUnicodeSet which;

- for(which=UCNV_ROUNDTRIP_SET; which<UCNV_SET_COUNT; which=(UConverterUnicodeSet)((int)which+1)) {

- if(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {

- ucnv_setFallback(cnv.getAlias(), TRUE);

- }

- expected.add(0, cpLimit-1);

- s=s0;

- UBool flush;

- do {

- char *t=buffer;

- flush=(UBool)(s==s0+s0Length);

- ucnv_fromUnicode(cnv.getAlias(), &t, buffer+sizeof(buffer), (const UChar **)&s, s0+s0Length, NULL, flush, &errorCode);

- if(U_FAILURE(errorCode)) {

- if(errorCode==U_BUFFER_OVERFLOW_ERROR) {

- errorCode=U_ZERO_ERROR;

- continue;

- } else {

- break; // unexpected error, should not occur

- }

- } while(!flush);

- UnicodeSet set;

- ucnv_getUnicodeSet(cnv.getAlias(), set.toUSet(), which, &errorCode);

- if(cpLimit<0x110000) {

- set.remove(cpLimit, 0x10ffff);

- }

- if(which==UCNV_ROUNDTRIP_SET) {

- // ignore PUA code points because they will be converted even if they

- // are fallbacks and when other fallbacks are turned off,

- // but ucnv_getUnicodeSet(UCNV_ROUNDTRIP_SET) delivers true roundtrips

- expected.remove(0xe000, 0xf8ff);

- expected.remove(0xf0000, 0xffffd);

- expected.remove(0x100000, 0x10fffd);

- set.remove(0xe000, 0xf8ff);

- set.remove(0xf0000, 0xffffd);

- set.remove(0x100000, 0x10fffd);

- }

- if(set!=expected) {

- // First try to see if we have different sets because ucnv_getUnicodeSet()

- // added strings: The above conversion method does not tell us what strings might be convertible.

- // Remove strings from the set and compare again.

- // Unfortunately, there are no good, direct set methods for finding out whether there are strings

- // in the set, nor for enumerating or removing just them.

- // Intersect all code points with the set. The intersection will not contain strings.

- UnicodeSet temp(0, 0x10ffff);

- temp.retainAll(set);

- set=temp;

- }

- if(set!=expected) {

- UnicodeSet diffSet;

- UnicodeString out;

- // are there items that must be in the set but are not?

- (diffSet=expected).removeAll(set);

- if(!diffSet.isEmpty()) {

- diffSet.toPattern(out, TRUE);

- if(out.length()>100) {

- out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));

- }

- errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d",

- cnvNames[i], which);

- errln(out);

- }

- // are there items that must not be in the set but are?

- (diffSet=set).removeAll(expected);

- if(!diffSet.isEmpty()) {

- diffSet.toPattern(out, TRUE);

- if(out.length()>100) {

- out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));

- }

- errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d",

- cnvNames[i], which);

- errln(out);

- }

- delete [] s0;

-// Test all codepoints which has the default ignorable Unicode property are ignored if they have no mapping

-// If there are any failures, the hard coded list (IS_DEFAULT_IGNORABLE_CODE_POINT) in ucnv_err.c should be updated

-void

-ConversionTest::TestDefaultIgnorableCallback() {

- UErrorCode status = U_ZERO_ERROR;

- const char *cnv_name = "euc-jp-2007";

- const char *pattern_ignorable = "[:Default_Ignorable_Code_Point:]";

- const char *pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]";

- UnicodeSet *set_ignorable = new UnicodeSet(pattern_ignorable, status);

- if (U_FAILURE(status)) {

- dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable, u_errorName(status));

- return;

- }

- UnicodeSet *set_not_ignorable = new UnicodeSet(pattern_not_ignorable, status);

- if (U_FAILURE(status)) {

- dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorable, u_errorName(status));

- return;

- }

- UConverter *cnv = cnv_open(cnv_name, status);

- if (U_FAILURE(status)) {

- dataerrln("Unable to open converter: %s - %s\n", cnv_name, u_errorName(status));

- return;

- }

- // set callback for the converter

- ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status);

- UChar32 input[1];

- char output[10];

- int32_t outputLength;

- // test default ignorables are ignored

- int size = set_ignorable->size();

- for (int i = 0; i < size; i++) {

- status = U_ZERO_ERROR;

- outputLength= 0;

- input[0] = set_ignorable->charAt(i);

- outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);

- if (U_FAILURE(status) || outputLength != 0) {

- errln("Ignorable code point: U+%04X not skipped as expected - %s", input[0], u_errorName(status));

- }

- // test non-ignorables are not ignored

- size = set_not_ignorable->size();

- for (int i = 0; i < size; i++) {

- status = U_ZERO_ERROR;

- outputLength= 0;

- input[0] = set_not_ignorable->charAt(i);

- if (input[0] == 0) {

- continue;

- }

- outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);

- if (U_FAILURE(status) || outputLength <= 0) {

- errln("Non-ignorable code point: U+%04X skipped unexpectedly - %s", input[0], u_errorName(status));

- }

- ucnv_close(cnv);

- delete set_not_ignorable;

- delete set_ignorable;

-// open testdata or ICU data converter ------------------------------------- ***

-UConverter *

-ConversionTest::cnv_open(const char *name, UErrorCode &errorCode) {

- if(name!=NULL && *name=='+') {

- // Converter names that start with '+' are ignored in ICU4J tests.

- ++name;

- }

- if(name!=NULL && *name=='*') {

- /* loadTestData(): set the data directory */

- return ucnv_openPackage(loadTestData(errorCode), name+1, &errorCode);

- } else {

- return ucnv_open(name, &errorCode);

- }

-// output helpers ---------------------------------------------------------- ***

-static inline char

-hexDigit(uint8_t digit) {

- return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit);

-static char *

-printBytes(const uint8_t *bytes, int32_t length, char *out) {

- uint8_t b;

- if(length>0) {

- b=*bytes++;

- --length;

- *out++=hexDigit((uint8_t)(b>>4));

- *out++=hexDigit((uint8_t)(b&0xf));

- }

- while(length>0) {

- b=*bytes++;

- --length;

- *out++=' ';

- *out++=hexDigit((uint8_t)(b>>4));

- *out++=hexDigit((uint8_t)(b&0xf));

- }

- *out++=0;

- return out;

-static char *

-printUnicode(const UChar *unicode, int32_t length, char *out) {

- UChar32 c;

- int32_t i;

- for(i=0; i<length;) {

- if(i>0) {

- *out++=' ';

- }

- U16_NEXT(unicode, i, length, c);

- // write 4..6 digits

- if(c>=0x100000) {

- *out++='1';

- }

- if(c>=0x10000) {

- *out++=hexDigit((uint8_t)((c>>16)&0xf));

- }

- *out++=hexDigit((uint8_t)((c>>12)&0xf));

- *out++=hexDigit((uint8_t)((c>>8)&0xf));

- *out++=hexDigit((uint8_t)((c>>4)&0xf));

- *out++=hexDigit((uint8_t)(c&0xf));

- }

- *out++=0;

- return out;

-static char *

-printOffsets(const int32_t *offsets, int32_t length, char *out) {

- int32_t i, o, d;

- if(offsets==NULL) {

- length=0;

- }

- for(i=0; i<length; ++i) {

- if(i>0) {

- *out++=' ';

- }

- o=offsets[i];

- // print all offsets with 2 characters each (-x, -9..99, xx)

- if(o<-9) {

- *out++='-';

- *out++='x';

- } else if(o<0) {

- *out++='-';

- *out++=(char)('0'-o);

- } else if(o<=99) {

- *out++=(d=o/10)==0 ? ' ' : (char)('0'+d);

- *out++=(char)('0'+o%10);

- } else /* o>99 */ {

- *out++='x';

- }

- *out++=0;

- return out;

-// toUnicode test worker functions ----------------------------------------- ***

-static int32_t

-stepToUnicode(ConversionCase &cc, UConverter *cnv,

- UChar *result, int32_t resultCapacity,

- int32_t *resultOffsets, /* also resultCapacity */

- int32_t step,

- UErrorCode *pErrorCode) {

- const char *source, *sourceLimit, *bytesLimit;

- UChar *target, *targetLimit, *resultLimit;

- UBool flush;

- source=(const char *)cc.bytes;

- target=result;

- bytesLimit=source+cc.bytesLength;

- resultLimit=result+resultCapacity;

- if(step>=0) {

- // call ucnv_toUnicode() with in/out buffers no larger than (step) at a time

- // move only one buffer (in vs. out) at a time to be extra mean

- // step==0 performs bulk conversion and generates offsets

- // initialize the partial limits for the loop

- if(step==0) {

- // use the entire buffers

- sourceLimit=bytesLimit;

- targetLimit=resultLimit;

- flush=cc.finalFlush;

- } else {

- // start with empty partial buffers

- sourceLimit=source;

- targetLimit=target;

- flush=FALSE;

- // output offsets only for bulk conversion

- resultOffsets=NULL;

- }

- for(;;) {

- // resetting the opposite conversion direction must not affect this one

- ucnv_resetFromUnicode(cnv);

- // convert

- ucnv_toUnicode(cnv,

- &target, targetLimit,

- &source, sourceLimit,

- resultOffsets,

- flush, pErrorCode);

- // check pointers and errors

- if(source>sourceLimit || target>targetLimit) {

- *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

- break;

- } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

- if(target!=targetLimit) {

- // buffer overflow must only be set when the target is filled

- *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

- break;

- } else if(targetLimit==resultLimit) {

- // not just a partial overflow

- break;

- }

- // the partial target is filled, set a new limit, reset the error and continue

- targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;

- *pErrorCode=U_ZERO_ERROR;

- } else if(U_FAILURE(*pErrorCode)) {

- // some other error occurred, done

- break;

- } else {

- if(source!=sourceLimit) {

- // when no error occurs, then the input must be consumed

- *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

- break;

- }

- if(sourceLimit==bytesLimit) {

- // we are done

- break;

- }

- // the partial conversion succeeded, set a new limit and continue

- sourceLimit=(bytesLimit-source)>=step ? source+step : bytesLimit;

- flush=(UBool)(cc.finalFlush && sourceLimit==bytesLimit);

- }

- } else /* step<0 */ {

- /*

- * step==-1: call only ucnv_getNextUChar()

- * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar()

- * if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input,

- * else give it at most (-step-2)/2 bytes

- */

- UChar32 c;

- // end the loop by getting an index out of bounds error

- for(;;) {

- // resetting the opposite conversion direction must not affect this one

- ucnv_resetFromUnicode(cnv);

- // convert

- if((step&1)!=0 /* odd: -1, -3, -5, ... */) {

- sourceLimit=source; // use sourceLimit not as a real limit

- // but to remember the pre-getNextUChar source pointer

- c=ucnv_getNextUChar(cnv, &source, bytesLimit, pErrorCode);

- // check pointers and errors

- if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) {

- if(source!=bytesLimit) {

- *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

- } else {

- *pErrorCode=U_ZERO_ERROR;

- }

- break;

- } else if(U_FAILURE(*pErrorCode)) {

- break;

- }

- // source may not move if c is from previous overflow

- if(target==resultLimit) {

- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

- break;

- }

- if(c<=0xffff) {

- *target++=(UChar)c;

- } else {

- *target++=U16_LEAD(c);

- if(target==resultLimit) {

- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

- break;

- }

- *target++=U16_TRAIL(c);

- }

- // alternate between -n-1 and -n but leave -1 alone

- if(step<-1) {

- ++step;

- }

- } else /* step is even */ {

- // allow only one UChar output

- targetLimit=target<resultLimit ? target+1 : resultLimit;

- // as with ucnv_getNextUChar(), we always flush (if we go to bytesLimit)

- // and never output offsets

- if(step==-2) {

- sourceLimit=bytesLimit;

- } else {

- sourceLimit=source+(-step-2)/2;

- if(sourceLimit>bytesLimit) {

- sourceLimit=bytesLimit;

- }

- ucnv_toUnicode(cnv,

- &target, targetLimit,

- &source, sourceLimit,

- NULL, (UBool)(sourceLimit==bytesLimit), pErrorCode);

- // check pointers and errors

- if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

- if(target!=targetLimit) {

- // buffer overflow must only be set when the target is filled

- *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

- break;

- } else if(targetLimit==resultLimit) {

- // not just a partial overflow

- break;

- }

- // the partial target is filled, set a new limit and continue

- *pErrorCode=U_ZERO_ERROR;

- } else if(U_FAILURE(*pErrorCode)) {

- // some other error occurred, done

- break;

- } else {

- if(source!=sourceLimit) {

- // when no error occurs, then the input must be consumed

- *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

- break;

- }

- // we are done (flush==TRUE) but we continue, to get the index out of bounds error above

- }

- --step;

- }

- return (int32_t)(target-result);

-UBool

-ConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback, const char *option) {

- // open the converter

- IcuTestErrorCode errorCode(*this, "ToUnicodeCase");

- LocalUConverterPointer cnv(cnv_open(cc.charset, errorCode));

- if(errorCode.isFailure()) {

- errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",

- cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, errorCode.errorName());

- errorCode.reset();

- return FALSE;

- }

- // set the callback

- if(callback!=NULL) {

- ucnv_setToUCallBack(cnv.getAlias(), callback, option, NULL, NULL, errorCode);

- if(U_FAILURE(errorCode)) {

- errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBack() failed - %s",

- cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));

- return FALSE;

- }

- int32_t resultOffsets[256];

- UChar result[256];

- int32_t resultLength;

- UBool ok;

- static const struct {

- int32_t step;

- const char *name;

- } steps[]={

- { 0, "bulk" }, // must be first for offsets to be checked

- { 1, "step=1" },

- { 3, "step=3" },

- { 7, "step=7" },

- { -1, "getNext" },

- { -2, "toU(bulk)+getNext" },

- { -3, "getNext+toU(bulk)" },

- { -4, "toU(1)+getNext" },

- { -5, "getNext+toU(1)" },

- { -12, "toU(5)+getNext" },

- { -13, "getNext+toU(5)" },

- };

- int32_t i, step;

- ok=TRUE;

- for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {

- step=steps[i].step;

- if(step<0 && !cc.finalFlush) {

- // skip ucnv_getNextUChar() if !finalFlush because

- // ucnv_getNextUChar() always implies flush

- continue;

- }

- if(step!=0) {

- // bulk test is first, then offsets are not checked any more

- cc.offsets=NULL;

- }

- else {

- memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));

- }

- memset(result, -1, UPRV_LENGTHOF(result));

- errorCode.reset();

- resultLength=stepToUnicode(cc, cnv.getAlias(),

- result, UPRV_LENGTHOF(result),

- step==0 ? resultOffsets : NULL,

- step, errorCode);

- ok=checkToUnicode(

- cc, cnv.getAlias(), steps[i].name,

- result, resultLength,

- cc.offsets!=NULL ? resultOffsets : NULL,

- errorCode);

- if(errorCode.isFailure() || !cc.finalFlush) {

- // reset if an error occurred or we did not flush

- // otherwise do nothing to make sure that flushing resets

- ucnv_resetToUnicode(cnv.getAlias());

- }

- if (cc.offsets != NULL && resultOffsets[resultLength] != -1) {

- errln("toUnicode[%d](%s) Conversion wrote too much to offsets at index %d",

- cc.caseNr, cc.charset, resultLength);

- }

- if (result[resultLength] != (UChar)-1) {

- errln("toUnicode[%d](%s) Conversion wrote too much to result at index %d",

- cc.caseNr, cc.charset, resultLength);

- }

- // not a real loop, just a convenience for breaking out of the block

- while(ok && cc.finalFlush) {

- // test ucnv_toUChars()

- memset(result, 0, sizeof(result));

- errorCode.reset();

- resultLength=ucnv_toUChars(cnv.getAlias(),

- result, UPRV_LENGTHOF(result),

- (const char *)cc.bytes, cc.bytesLength,

- errorCode);

- ok=checkToUnicode(

- cc, cnv.getAlias(), "toUChars",

- result, resultLength,

- NULL,

- errorCode);

- if(!ok) {

- break;

- }

- // test preflighting

- // keep the correct result for simple checking

- errorCode.reset();

- resultLength=ucnv_toUChars(cnv.getAlias(),

- NULL, 0,

- (const char *)cc.bytes, cc.bytesLength,

- errorCode);

- if(errorCode.get()==U_STRING_NOT_TERMINATED_WARNING || errorCode.get()==U_BUFFER_OVERFLOW_ERROR) {

- errorCode.reset();

- }

- ok=checkToUnicode(

- cc, cnv.getAlias(), "preflight toUChars",

- result, resultLength,

- NULL,

- errorCode);

- break;

- }

- errorCode.reset(); // all errors have already been reported

- return ok;

-UBool

-ConversionTest::checkToUnicode(ConversionCase &cc, UConverter *cnv, const char *name,

- const UChar *result, int32_t resultLength,

- const int32_t *resultOffsets,

- UErrorCode resultErrorCode) {

- char resultInvalidChars[8];

- int8_t resultInvalidLength;

- UErrorCode errorCode;

- const char *msg;

- // reset the message; NULL will mean "ok"

- msg=NULL;

- errorCode=U_ZERO_ERROR;

- resultInvalidLength=sizeof(resultInvalidChars);

- ucnv_getInvalidChars(cnv, resultInvalidChars, &resultInvalidLength, &errorCode);

- if(U_FAILURE(errorCode)) {

- errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidChars() failed - %s",

- cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode));

- return FALSE;

- }

- // check everything that might have gone wrong

- if(cc.unicodeLength!=resultLength) {

- msg="wrong result length";

- } else if(0!=u_memcmp(cc.unicode, result, cc.unicodeLength)) {

- msg="wrong result string";

- } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.unicodeLength*sizeof(*cc.offsets))) {

- msg="wrong offsets";

- } else if(cc.outErrorCode!=resultErrorCode) {

- msg="wrong error code";

- } else if(cc.invalidLength!=resultInvalidLength) {

- msg="wrong length of last invalid input";

- } else if(0!=memcmp(cc.invalidChars, resultInvalidChars, cc.invalidLength)) {

- msg="wrong last invalid input";

- }

- if(msg==NULL) {

- return TRUE;

- } else {

- char buffer[2000]; // one buffer for all strings

- char *s, *bytesString, *unicodeString, *resultString,

- *offsetsString, *resultOffsetsString,

- *invalidCharsString, *resultInvalidCharsString;

- bytesString=s=buffer;

- s=printBytes(cc.bytes, cc.bytesLength, bytesString);

- s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString=s);

- s=printUnicode(result, resultLength, resultString=s);

- s=printOffsets(cc.offsets, cc.unicodeLength, offsetsString=s);

- s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);

- s=printBytes(cc.invalidChars, cc.invalidLength, invalidCharsString=s);

- s=printBytes((uint8_t *)resultInvalidChars, resultInvalidLength, resultInvalidCharsString=s);

- if((s-buffer)>(int32_t)sizeof(buffer)) {

- errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkToUnicode() test output buffer overflow writing %d chars\n",

- cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer));

- exit(1);

- }

- errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"

- " bytes <%s>[%d]\n"

- " expected <%s>[%d]\n"

- " result <%s>[%d]\n"

- " offsets <%s>\n"

- " result offsets <%s>\n"

- " error code expected %s got %s\n"

- " invalidChars expected <%s> got <%s>\n",

- cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg,

- bytesString, cc.bytesLength,

- unicodeString, cc.unicodeLength,

- resultString, resultLength,

- offsetsString,

- resultOffsetsString,

- u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),

- invalidCharsString, resultInvalidCharsString);

- return FALSE;

- }

-// fromUnicode test worker functions --------------------------------------- ***

-static int32_t

-stepFromUTF8(ConversionCase &cc,

- UConverter *utf8Cnv, UConverter *cnv,

- char *result, int32_t resultCapacity,

- int32_t step,

- UErrorCode *pErrorCode) {

- const char *source, *sourceLimit, *utf8Limit;

- UChar pivotBuffer[32];

- UChar *pivotSource, *pivotTarget, *pivotLimit;

- char *target, *targetLimit, *resultLimit;

- UBool flush;

- source=cc.utf8;

- pivotSource=pivotTarget=pivotBuffer;

- target=result;

- utf8Limit=source+cc.utf8Length;

- resultLimit=result+resultCapacity;

- // call ucnv_convertEx() with in/out buffers no larger than (step) at a time

- // move only one buffer (in vs. out) at a time to be extra mean

- // step==0 performs bulk conversion

- // initialize the partial limits for the loop

- if(step==0) {

- // use the entire buffers

- sourceLimit=utf8Limit;

- targetLimit=resultLimit;

- flush=cc.finalFlush;

- pivotLimit=pivotBuffer+UPRV_LENGTHOF(pivotBuffer);

- } else {

- // start with empty partial buffers

- sourceLimit=source;

- targetLimit=target;

- flush=FALSE;

- // empty pivot is not allowed, make it of length step

- pivotLimit=pivotBuffer+step;

- }

- for(;;) {

- // resetting the opposite conversion direction must not affect this one

- ucnv_resetFromUnicode(utf8Cnv);

- ucnv_resetToUnicode(cnv);

- // convert

- ucnv_convertEx(cnv, utf8Cnv,

- &target, targetLimit,

- &source, sourceLimit,

- pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,

- FALSE, flush, pErrorCode);

- // check pointers and errors

- if(source>sourceLimit || target>targetLimit) {

- *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

- break;

- } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

- if(target!=targetLimit) {

- // buffer overflow must only be set when the target is filled

- *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

- break;

- } else if(targetLimit==resultLimit) {

- // not just a partial overflow

- break;

- }

- // the partial target is filled, set a new limit, reset the error and continue

- targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;

- *pErrorCode=U_ZERO_ERROR;

- } else if(U_FAILURE(*pErrorCode)) {

- if(pivotSource==pivotBuffer) {

- // toUnicode error, should not occur

- // toUnicode errors are tested in cintltst TestConvertExFromUTF8()

- break;

- } else {

- // fromUnicode error

- // some other error occurred, done

- break;

- }

- } else {

- if(source!=sourceLimit) {

- // when no error occurs, then the input must be consumed

- *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

- break;

- }

- if(sourceLimit==utf8Limit) {

- // we are done

- if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {

- // ucnv_convertEx() warns about not terminating the output

- // but ucnv_fromUnicode() does not and so

- // checkFromUnicode() does not expect it

- *pErrorCode=U_ZERO_ERROR;

- }

- break;

- }

- // the partial conversion succeeded, set a new limit and continue

- sourceLimit=(utf8Limit-source)>=step ? source+step : utf8Limit;

- flush=(UBool)(cc.finalFlush && sourceLimit==utf8Limit);

- }

- return (int32_t)(target-result);

-static int32_t

-stepFromUnicode(ConversionCase &cc, UConverter *cnv,

- char *result, int32_t resultCapacity,

- int32_t *resultOffsets, /* also resultCapacity */

- int32_t step,

- UErrorCode *pErrorCode) {

- const UChar *source, *sourceLimit, *unicodeLimit;

- char *target, *targetLimit, *resultLimit;

- UBool flush;

- source=cc.unicode;

- target=result;

- unicodeLimit=source+cc.unicodeLength;

- resultLimit=result+resultCapacity;

- // call ucnv_fromUnicode() with in/out buffers no larger than (step) at a time

- // move only one buffer (in vs. out) at a time to be extra mean

- // step==0 performs bulk conversion and generates offsets

- // initialize the partial limits for the loop

- if(step==0) {

- // use the entire buffers

- sourceLimit=unicodeLimit;

- targetLimit=resultLimit;

- flush=cc.finalFlush;

- } else {

- // start with empty partial buffers

- sourceLimit=source;

- targetLimit=target;

- flush=FALSE;

- // output offsets only for bulk conversion

- resultOffsets=NULL;

- }

- for(;;) {

- // resetting the opposite conversion direction must not affect this one

- ucnv_resetToUnicode(cnv);

- // convert

- ucnv_fromUnicode(cnv,

- &target, targetLimit,

- &source, sourceLimit,

- resultOffsets,

- flush, pErrorCode);

- // check pointers and errors

- if(source>sourceLimit || target>targetLimit) {

- *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

- break;

- } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

- if(target!=targetLimit) {

- // buffer overflow must only be set when the target is filled

- *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

- break;

- } else if(targetLimit==resultLimit) {

- // not just a partial overflow

- break;

- }

- // the partial target is filled, set a new limit, reset the error and continue

- targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;

- *pErrorCode=U_ZERO_ERROR;

- } else if(U_FAILURE(*pErrorCode)) {

- // some other error occurred, done

- break;

- } else {

- if(source!=sourceLimit) {

- // when no error occurs, then the input must be consumed

- *pErrorCode=U_INTERNAL_PROGRAM_ERROR;

- break;

- }

- if(sourceLimit==unicodeLimit) {

- // we are done

- break;

- }

- // the partial conversion succeeded, set a new limit and continue

- sourceLimit=(unicodeLimit-source)>=step ? source+step : unicodeLimit;

- flush=(UBool)(cc.finalFlush && sourceLimit==unicodeLimit);

- }

- return (int32_t)(target-result);

-UBool

-ConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback callback, const char *option) {

- UConverter *cnv;

- UErrorCode errorCode;

- // open the converter

- errorCode=U_ZERO_ERROR;

- cnv=cnv_open(cc.charset, errorCode);

- if(U_FAILURE(errorCode)) {

- errcheckln(errorCode, "fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",

- cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));

- return FALSE;

- }

- ucnv_resetToUnicode(utf8Cnv);

- // set the callback

- if(callback!=NULL) {

- ucnv_setFromUCallBack(cnv, callback, option, NULL, NULL, &errorCode);

- if(U_FAILURE(errorCode)) {

- errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCallBack() failed - %s",

- cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));

- ucnv_close(cnv);

- return FALSE;

- }

- // set the fallbacks flag

- // TODO change with Jitterbug 2401, then add a similar call for toUnicode too

- ucnv_setFallback(cnv, cc.fallbacks);

- // set the subchar

- int32_t length;

- if(cc.setSub>0) {

- length=(int32_t)strlen(cc.subchar);

- ucnv_setSubstChars(cnv, cc.subchar, (int8_t)length, &errorCode);

- if(U_FAILURE(errorCode)) {

- errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstChars() failed - %s",

- cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));

- ucnv_close(cnv);

- return FALSE;

- }

- } else if(cc.setSub<0) {

- ucnv_setSubstString(cnv, cc.subString, -1, &errorCode);

- if(U_FAILURE(errorCode)) {

- errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstString() failed - %s",

- cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));

- ucnv_close(cnv);

- return FALSE;

- }

- // convert unicode to utf8

- char utf8[256];

- cc.utf8=utf8;

- u_strToUTF8(utf8, UPRV_LENGTHOF(utf8), &cc.utf8Length,

- cc.unicode, cc.unicodeLength,

- &errorCode);

- if(U_FAILURE(errorCode)) {

- // skip UTF-8 testing of a string with an unpaired surrogate,

- // or of one that's too long

- // toUnicode errors are tested in cintltst TestConvertExFromUTF8()

- cc.utf8Length=-1;

- }

- int32_t resultOffsets[256];

- char result[256];

- int32_t resultLength;

- UBool ok;

- static const struct {

- int32_t step;

- const char *name, *utf8Name;

- } steps[]={

- { 0, "bulk", "utf8" }, // must be first for offsets to be checked

- { 1, "step=1", "utf8 step=1" },

- { 3, "step=3", "utf8 step=3" },

- { 7, "step=7", "utf8 step=7" }

- };

- int32_t i, step;

- ok=TRUE;

- for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {

- step=steps[i].step;

- memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));

- memset(result, -1, UPRV_LENGTHOF(result));

- errorCode=U_ZERO_ERROR;

- resultLength=stepFromUnicode(cc, cnv,

- result, UPRV_LENGTHOF(result),

- step==0 ? resultOffsets : NULL,

- step, &errorCode);

- ok=checkFromUnicode(

- cc, cnv, steps[i].name,

- (uint8_t *)result, resultLength,

- cc.offsets!=NULL ? resultOffsets : NULL,

- errorCode);

- if(U_FAILURE(errorCode) || !cc.finalFlush) {

- // reset if an error occurred or we did not flush

- // otherwise do nothing to make sure that flushing resets

- ucnv_resetFromUnicode(cnv);

- }

- if (resultOffsets[resultLength] != -1) {

- errln("fromUnicode[%d](%s) Conversion wrote too much to offsets at index %d",

- cc.caseNr, cc.charset, resultLength);

- }

- if (result[resultLength] != (char)-1) {

- errln("fromUnicode[%d](%s) Conversion wrote too much to result at index %d",

- cc.caseNr, cc.charset, resultLength);

- }

- // bulk test is first, then offsets are not checked any more

- cc.offsets=NULL;

- // test direct conversion from UTF-8

- if(cc.utf8Length>=0) {

- errorCode=U_ZERO_ERROR;

- resultLength=stepFromUTF8(cc, utf8Cnv, cnv,

- result, UPRV_LENGTHOF(result),

- step, &errorCode);

- ok=checkFromUnicode(

- cc, cnv, steps[i].utf8Name,

- (uint8_t *)result, resultLength,

- NULL,

- errorCode);

- if(U_FAILURE(errorCode) || !cc.finalFlush) {

- // reset if an error occurred or we did not flush

- // otherwise do nothing to make sure that flushing resets

- ucnv_resetToUnicode(utf8Cnv);

- ucnv_resetFromUnicode(cnv);

- }

- // not a real loop, just a convenience for breaking out of the block

- while(ok && cc.finalFlush) {

- // test ucnv_fromUChars()

- memset(result, 0, sizeof(result));

- errorCode=U_ZERO_ERROR;

- resultLength=ucnv_fromUChars(cnv,

- result, UPRV_LENGTHOF(result),

- cc.unicode, cc.unicodeLength,

- &errorCode);

- ok=checkFromUnicode(

- cc, cnv, "fromUChars",

- (uint8_t *)result, resultLength,

- NULL,

- errorCode);

- if(!ok) {

- break;

- }

- // test preflighting

- // keep the correct result for simple checking

- errorCode=U_ZERO_ERROR;

- resultLength=ucnv_fromUChars(cnv,

- NULL, 0,

- cc.unicode, cc.unicodeLength,

- &errorCode);

- if(errorCode==U_STRING_NOT_TERMINATED_WARNING || errorCode==U_BUFFER_OVERFLOW_ERROR) {

- errorCode=U_ZERO_ERROR;

- }

- ok=checkFromUnicode(

- cc, cnv, "preflight fromUChars",

- (uint8_t *)result, resultLength,

- NULL,

- errorCode);

- break;

- }

- ucnv_close(cnv);

- return ok;

-UBool

-ConversionTest::checkFromUnicode(ConversionCase &cc, UConverter *cnv, const char *name,

- const uint8_t *result, int32_t resultLength,

- const int32_t *resultOffsets,

- UErrorCode resultErrorCode) {

- UChar resultInvalidUChars[8];

- int8_t resultInvalidLength;

- UErrorCode errorCode;

- const char *msg;

- // reset the message; NULL will mean "ok"

- msg=NULL;

- errorCode=U_ZERO_ERROR;

- resultInvalidLength=UPRV_LENGTHOF(resultInvalidUChars);

- ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &errorCode);

- if(U_FAILURE(errorCode)) {

- errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s",

- cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode));

- return FALSE;

- }

- // check everything that might have gone wrong

- if(cc.bytesLength!=resultLength) {

- msg="wrong result length";

- } else if(0!=memcmp(cc.bytes, result, cc.bytesLength)) {

- msg="wrong result string";

- } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.bytesLength*sizeof(*cc.offsets))) {

- msg="wrong offsets";

- } else if(cc.outErrorCode!=resultErrorCode) {

- msg="wrong error code";

- } else if(cc.invalidLength!=resultInvalidLength) {

- msg="wrong length of last invalid input";

- } else if(0!=u_memcmp(cc.invalidUChars, resultInvalidUChars, cc.invalidLength)) {

- msg="wrong last invalid input";

- }

- if(msg==NULL) {

- return TRUE;

- } else {

- char buffer[2000]; // one buffer for all strings

- char *s, *unicodeString, *bytesString, *resultString,

- *offsetsString, *resultOffsetsString,

- *invalidCharsString, *resultInvalidUCharsString;

- unicodeString=s=buffer;

- s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString);

- s=printBytes(cc.bytes, cc.bytesLength, bytesString=s);

- s=printBytes(result, resultLength, resultString=s);

- s=printOffsets(cc.offsets, cc.bytesLength, offsetsString=s);

- s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);

- s=printUnicode(cc.invalidUChars, cc.invalidLength, invalidCharsString=s);

- s=printUnicode(resultInvalidUChars, resultInvalidLength, resultInvalidUCharsString=s);

- if((s-buffer)>(int32_t)sizeof(buffer)) {

- errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkFromUnicode() test output buffer overflow writing %d chars\n",

- cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer));

- exit(1);

- }

- errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"

- " unicode <%s>[%d]\n"

- " expected <%s>[%d]\n"

- " result <%s>[%d]\n"

- " offsets <%s>\n"

- " result offsets <%s>\n"

- " error code expected %s got %s\n"

- " invalidChars expected <%s> got <%s>\n",

- cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg,

- unicodeString, cc.unicodeLength,

- bytesString, cc.bytesLength,

- resultString, resultLength,

- offsetsString,

- resultOffsetsString,

- u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),

- invalidCharsString, resultInvalidUCharsString);

- return FALSE;

- }

-#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

« no previous file with comments | « source/test/intltest/convtest.h ('k') | source/test/intltest/cpdtrtst.h » ('j') | no next file with comments »