icu46/source/test/intltest/ucdtest.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/test/intltest/ucdtest.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /********************************************************************

	2 * COPYRIGHT:

	3 * Copyright (c) 1997-2010, International Business Machines Corporation and

	4 * others. All Rights Reserved.

	5 ********************************************************************/

	6

	7 #include "unicode/ustring.h"

	8 #include "unicode/uchar.h"

	9 #include "unicode/uniset.h"

	10 #include "unicode/putil.h"

	11 #include "cstring.h"

	12 #include "hash.h"

	13 #include "normalizer2impl.h"

	14 #include "uparse.h"

	15 #include "ucdtest.h"

	16

	17 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof(array[0]))

	18

	19 static const char *ignorePropNames[]={

	20 "FC_NFKC",

	21 "NFD_QC",

	22 "NFC_QC",

	23 "NFKD_QC",

	24 "NFKC_QC",

	25 "Expands_On_NFD",

	26 "Expands_On_NFC",

	27 "Expands_On_NFKD",

	28 "Expands_On_NFKC",

	29 "NFKC_CF"

	30 };

	31

	32 UnicodeTest::UnicodeTest()

	33 {

	34 UErrorCode errorCode=U_ZERO_ERROR;

	35 unknownPropertyNames=new U_NAMESPACE_QUALIFIER Hashtable(errorCode);

	36 if(U_FAILURE(errorCode)) {

	37 delete unknownPropertyNames;

	38 unknownPropertyNames=NULL;

	39 }

	40 // Ignore some property names altogether.

	41 for(int32_t i=0; i<LENGTHOF(ignorePropNames); ++i) {

	42 unknownPropertyNames->puti(UnicodeString(ignorePropNames[i], -1, US_INV) , 1, errorCode);

	43 }

	44 }

	45

	46 UnicodeTest::~UnicodeTest()

	47 {

	48 delete unknownPropertyNames;

	49 }

	50

	51 void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /par/ )

	52 {

	53 if (exec) logln("TestSuite UnicodeTest: ");

	54 switch (index) {

	55 case 0: name = "TestAdditionalProperties"; if(exec) TestAdditionalProper ties(); break;

	56 case 1: name = "TestBinaryValues"; if(exec) TestBinaryValues(); break;

	57 case 2: name = "TestConsistency"; if(exec) TestConsistency(); break;

	58 default: name = ""; break; //needed to end loop

	59 }

	60 }

	61

	62 //====================================================

	63 // private data used by the tests

	64 //====================================================

	65

	66 // test DerivedCoreProperties.txt -------------------------------------------

	67

	68 // copied from genprops.c

	69 static int32_t

	70 getTokenIndex(const char const tokens[], int32_t countTokens, const char s) {

	71 const char t, z;

	72 int32_t i, j;

	73

	74 s=u_skipWhitespace(s);

	75 for(i=0; i<countTokens; ++i) {

	76 t=tokens[i];

	77 if(t!=NULL) {

	78 for(j=0;; ++j) {

	79 if(t[j]!=0) {

	80 if(s[j]!=t[j]) {

	81 break;

	82 }

	83 } else {

	84 z=u_skipWhitespace(s+j);

	85 if(z==';' \|\| z==0) {

	86 return i;

	87 } else {

	88 break;

	89 }

	90 }

	91 }

	92 }

	93 }

	94 return -1;

	95 }

	96

	97 static const char *const

	98 derivedPropsNames[]={

	99 "Math",

	100 "Alphabetic",

	101 "Lowercase",

	102 "Uppercase",

	103 "ID_Start",

	104 "ID_Continue",

	105 "XID_Start",

	106 "XID_Continue",

	107 "Default_Ignorable_Code_Point",

	108 "Full_Composition_Exclusion",

	109 "Grapheme_Extend",

	110 "Grapheme_Link", /* Unicode 5 moves this property here from PropList.txt */

	111 "Grapheme_Base",

	112 "Cased",

	113 "Case_Ignorable",

	114 "Changes_When_Lowercased",

	115 "Changes_When_Uppercased",

	116 "Changes_When_Titlecased",

	117 "Changes_When_Casefolded",

	118 "Changes_When_Casemapped",

	119 "Changes_When_NFKC_Casefolded"

	120 };

	121

	122 static const UProperty

	123 derivedPropsIndex[]={

	124 UCHAR_MATH,

	125 UCHAR_ALPHABETIC,

	126 UCHAR_LOWERCASE,

	127 UCHAR_UPPERCASE,

	128 UCHAR_ID_START,

	129 UCHAR_ID_CONTINUE,

	130 UCHAR_XID_START,

	131 UCHAR_XID_CONTINUE,

	132 UCHAR_DEFAULT_IGNORABLE_CODE_POINT,

	133 UCHAR_FULL_COMPOSITION_EXCLUSION,

	134 UCHAR_GRAPHEME_EXTEND,

	135 UCHAR_GRAPHEME_LINK,

	136 UCHAR_GRAPHEME_BASE,

	137 UCHAR_CASED,

	138 UCHAR_CASE_IGNORABLE,

	139 UCHAR_CHANGES_WHEN_LOWERCASED,

	140 UCHAR_CHANGES_WHEN_UPPERCASED,

	141 UCHAR_CHANGES_WHEN_TITLECASED,

	142 UCHAR_CHANGES_WHEN_CASEFOLDED,

	143 UCHAR_CHANGES_WHEN_CASEMAPPED,

	144 UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED

	145 };

	146

	147 static int32_t numErrors[LENGTHOF(derivedPropsIndex)]={ 0 };

	148

	149 enum { MAX_ERRORS=50 };

	150

	151 U_CFUNC void U_CALLCONV

	152 derivedPropsLineFn(void *context,

	153 char fields[][2], int32_t / fieldCount */,

	154 UErrorCode *pErrorCode)

	155 {

	156 UnicodeTest me=(UnicodeTest )context;

	157 uint32_t start, end;

	158 int32_t i;

	159

	160 u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);

	161 if(U_FAILURE(*pErrorCode)) {

	162 me->errln("UnicodeTest: syntax error in DerivedCoreProperties.txt or Der ivedNormalizationProps.txt field 0 at %s\n", fields[0][0]);

	163 return;

	164 }

	165

	166 /* parse derived binary property name, ignore unknown names */

	167 i=getTokenIndex(derivedPropsNames, LENGTHOF(derivedPropsNames), fields[1][0] );

	168 if(i<0) {

	169 UnicodeString propName(fields[1][0], (int32_t)(fields[1][1]-fields[1][0] ));

	170 propName.trim();

	171 if(me->unknownPropertyNames->find(propName)==NULL) {

	172 UErrorCode errorCode=U_ZERO_ERROR;

	173 me->unknownPropertyNames->puti(propName, 1, errorCode);

	174 me->errln("UnicodeTest warning: unknown property name '%s' in Derive dCoreProperties.txt or DerivedNormalizationProps.txt\n", fields[1][0]);

	175 }

	176 return;

	177 }

	178

	179 me->derivedProps[i].add(start, end);

	180 }

	181

	182 void UnicodeTest::TestAdditionalProperties() {

	183 #if !UCONFIG_NO_NORMALIZATION

	184 // test DerivedCoreProperties.txt and DerivedNormalizationProps.txt

	185 if(LENGTHOF(derivedProps)<LENGTHOF(derivedPropsNames)) {

	186 errln("error: UnicodeTest::derivedProps[] too short, need at least %d Un icodeSets\n",

	187 LENGTHOF(derivedPropsNames));

	188 return;

	189 }

	190 if(LENGTHOF(derivedPropsIndex)!=LENGTHOF(derivedPropsNames)) {

	191 errln("error in ucdtest.cpp: LENGTHOF(derivedPropsIndex)!=LENGTHOF(deriv edPropsNames)\n");

	192 return;

	193 }

	194

	195 char newPath[256];

	196 char backupPath[256];

	197 char *fields[2][2];

	198 UErrorCode errorCode=U_ZERO_ERROR;

	199

	200 /* Look inside ICU_DATA first */

	201 strcpy(newPath, pathToDataDirectory());

	202 strcat(newPath, "unidata" U_FILE_SEP_STRING "DerivedCoreProperties.txt");

	203

	204 // As a fallback, try to guess where the source data was located

	205 // at the time ICU was built, and look there.

	206 # ifdef U_TOPSRCDIR

	207 strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data");

	208 # else

	209 strcpy(backupPath, loadTestData(errorCode));

	210 strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_ SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");

	211 # endif

	212 strcat(backupPath, U_FILE_SEP_STRING);

	213 strcat(backupPath, "unidata" U_FILE_SEP_STRING "DerivedCoreProperties.txt");

	214

	215 char *path=newPath;

	216 u_parseDelimitedFile(newPath, ';', fields, 2, derivedPropsLineFn, this, &err orCode);

	217

	218 if(errorCode==U_FILE_ACCESS_ERROR) {

	219 errorCode=U_ZERO_ERROR;

	220 path=backupPath;

	221 u_parseDelimitedFile(backupPath, ';', fields, 2, derivedPropsLineFn, thi s, &errorCode);

	222 }

	223 if(U_FAILURE(errorCode)) {

	224 errln("error parsing DerivedCoreProperties.txt: %s\n", u_errorName(error Code));

	225 return;

	226 }

	227 char *basename=path+strlen(path)-strlen("DerivedCoreProperties.txt");

	228 strcpy(basename, "DerivedNormalizationProps.txt");

	229 u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorC ode);

	230 if(U_FAILURE(errorCode)) {

	231 errln("error parsing DerivedNormalizationProps.txt: %s\n", u_errorName(e rrorCode));

	232 return;

	233 }

	234

	235 // now we have all derived core properties in the UnicodeSets

	236 // run them all through the API

	237 int32_t rangeCount, range;

	238 uint32_t i;

	239 UChar32 start, end;

	240

	241 // test all TRUE properties

	242 for(i=0; i<LENGTHOF(derivedPropsNames); ++i) {

	243 rangeCount=derivedProps[i].getRangeCount();

	244 for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) {

	245 start=derivedProps[i].getRangeStart(range);

	246 end=derivedProps[i].getRangeEnd(range);

	247 for(; start<=end; ++start) {

	248 if(!u_hasBinaryProperty(start, derivedPropsIndex[i])) {

	249 dataerrln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, % s)==FALSE is wrong", start, derivedPropsNames[i]);

	250 if(++numErrors[i]>=MAX_ERRORS) {

	251 dataerrln("Too many errors, moving to the next test");

	252 break;

	253 }

	254 }

	255 }

	256 }

	257 }

	258

	259 // invert all properties

	260 for(i=0; i<LENGTHOF(derivedPropsNames); ++i) {

	261 derivedProps[i].complement();

	262 }

	263

	264 // test all FALSE properties

	265 for(i=0; i<LENGTHOF(derivedPropsNames); ++i) {

	266 rangeCount=derivedProps[i].getRangeCount();

	267 for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) {

	268 start=derivedProps[i].getRangeStart(range);

	269 end=derivedProps[i].getRangeEnd(range);

	270 for(; start<=end; ++start) {

	271 if(u_hasBinaryProperty(start, derivedPropsIndex[i])) {

	272 errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)== TRUE is wrong\n", start, derivedPropsNames[i]);

	273 if(++numErrors[i]>=MAX_ERRORS) {

	274 errln("Too many errors, moving to the next test");

	275 break;

	276 }

	277 }

	278 }

	279 }

	280 }

	281 #endif /* !UCONFIG_NO_NORMALIZATION */

	282 }

	283

	284 void UnicodeTest::TestBinaryValues() {

	285 /*

	286 * Unicode 5.1 explicitly defines binary property value aliases.

	287 * Verify that they are all recognized.

	288 */

	289 UErrorCode errorCode=U_ZERO_ERROR;

	290 UnicodeSet alpha(UNICODE_STRING_SIMPLE("[:Alphabetic:]"), errorCode);

	291 if(U_FAILURE(errorCode)) {

	292 dataerrln("UnicodeSet([:Alphabetic:]) failed - %s", u_errorName(errorCod e));

	293 return;

	294 }

	295

	296 static const char *const falseValues[]={ "N", "No", "F", "False" };

	297 static const char *const trueValues[]={ "Y", "Yes", "T", "True" };

	298 int32_t i;

	299 for(i=0; i<LENGTHOF(falseValues); ++i) {

	300 UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]");

	301 pattern.insert(pattern.length()-2, UnicodeString(falseValues[i], -1, US_ INV));

	302 errorCode=U_ZERO_ERROR;

	303 UnicodeSet set(pattern, errorCode);

	304 if(U_FAILURE(errorCode)) {

	305 errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", falseValues[i], u_errorName(errorCode));

	306 continue;

	307 }

	308 set.complement();

	309 if(set!=alpha) {

	310 errln("UnicodeSet([:Alphabetic=%s:]).complement()!=UnicodeSet([:Alph abetic:])\n", falseValues[i]);

	311 }

	312 }

	313 for(i=0; i<LENGTHOF(trueValues); ++i) {

	314 UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]");

	315 pattern.insert(pattern.length()-2, UnicodeString(trueValues[i], -1, US_I NV));

	316 errorCode=U_ZERO_ERROR;

	317 UnicodeSet set(pattern, errorCode);

	318 if(U_FAILURE(errorCode)) {

	319 errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", trueValues[i], u_errorName(errorCode));

	320 continue;

	321 }

	322 if(set!=alpha) {

	323 errln("UnicodeSet([:Alphabetic=%s:])!=UnicodeSet([:Alphabetic:])\n", trueValues[i]);

	324 }

	325 }

	326 }

	327

	328 void UnicodeTest::TestConsistency() {

	329 #if !UCONFIG_NO_NORMALIZATION

	330 /*

	331 * Test for an example that getCanonStartSet() delivers

	332 * all characters that compose from the input one,

	333 * even in multiple steps.

	334 * For example, the set for "I" (0049) should contain both

	335 * I-diaeresis (00CF) and I-diaeresis-acute (1E2E).

	336 * In general, the set for the middle such character should be a subset

	337 * of the set for the first.

	338 */

	339 IcuTestErrorCode errorCode(*this, "TestConsistency");

	340 const Normalizer2 *nfd=Normalizer2::getInstance(NULL, "nfc", UNORM2_DECOMPOS E, errorCode);

	341 const Normalizer2Impl *nfcImpl=Normalizer2Factory::getNFCImpl(errorCode);

	342 if(errorCode.isFailure()) {

	343 dataerrln("Normalizer2::getInstance(NFD) or Normalizer2Factory::getNFCIm pl() failed - %s\n",

	344 errorCode.errorName());

	345 errorCode.reset();

	346 return;

	347 }

	348

	349 UnicodeSet set1, set2;

	350 if (nfcImpl->getCanonStartSet(0x49, set1)) {

	351 /* enumerate all characters that are plausible to be latin letters */

	352 for(UChar start=0xa0; start<0x2000; ++start) {

	353 UnicodeString decomp=nfd->normalize(UnicodeString(start), errorCode) ;

	354 if(decomp.length()>1 && decomp[0]==0x49) {

	355 set2.add(start);

	356 }

	357 }

	358

	359 if (set1!=set2) {

	360 errln("[canon start set of 0049] != [all c with canon decomp with 00 49]");

	361 }

	362 // This was available in cucdtst.c but the test had to move to intltest

	363 // because the new internal normalization functions are in C++.

	364 //compareUSets(set1, set2,

	365 // "[canon start set of 0049]", "[all c with canon decomp wi th 0049]",

	366 // TRUE);

	367 } else {

	368 errln("NFC.getCanonStartSet() returned FALSE");

	369 }

	370 #endif

	371 }

OLD	NEW

« no previous file with comments | « icu46/source/test/intltest/ucdtest.h ('k') | icu46/source/test/intltest/uobjtest.h » ('j') | no next file with comments »