icu46/source/test/cintltst/cucdtst.c - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/test/cintltst/cucdtst.c

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /********************************************************************

	2 * COPYRIGHT:

	3 * Copyright (c) 1997-2010, International Business Machines Corporation and

	4 * others. All Rights Reserved.

	5 ********************************************************************/

	6 /*******************************************************************************

	7 *

	8 * File CUCDTST.C

	9 *

	10 * Modification History:

	11 * Name Description

	12 * Madhu Katragadda Ported for C API, added tests for string funct ions

	13 ********************************************************************************

	14 */

	15

	16 #include <string.h>

	17 #include <math.h>

	18 #include <stdlib.h>

	19

	20 #include "unicode/utypes.h"

	21 #include "unicode/uchar.h"

	22 #include "unicode/putil.h"

	23 #include "unicode/ustring.h"

	24 #include "unicode/uloc.h"

	25 #include "unicode/unorm2.h"

	26

	27 #include "cintltst.h"

	28 #include "putilimp.h"

	29 #include "uparse.h"

	30 #include "ucase.h"

	31 #include "ubidi_props.h"

	32 #include "uprops.h"

	33 #include "uset_imp.h"

	34 #include "usc_impl.h"

	35 #include "udatamem.h" /* for testing ucase_openBinary() */

	36 #include "cucdapi.h"

	37

	38 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))

	39

	40 /* prototypes --------------------------------------------------------------- */

	41

	42 static void TestUpperLower(void);

	43 static void TestLetterNumber(void);

	44 static void TestMisc(void);

	45 static void TestPOSIX(void);

	46 static void TestControlPrint(void);

	47 static void TestIdentifier(void);

	48 static void TestUnicodeData(void);

	49 static void TestCodeUnit(void);

	50 static void TestCodePoint(void);

	51 static void TestCharLength(void);

	52 static void TestCharNames(void);

	53 static void TestMirroring(void);

	54 static void TestUScriptRunAPI(void);

	55 static void TestAdditionalProperties(void);

	56 static void TestNumericProperties(void);

	57 static void TestPropertyNames(void);

	58 static void TestPropertyValues(void);

	59 static void TestConsistency(void);

	60 static void TestUCase(void);

	61 static void TestUBiDiProps(void);

	62 static void TestCaseFolding(void);

	63

	64 /* internal methods used */

	65 static int32_t MakeProp(char* str);

	66 static int32_t MakeDir(char* str);

	67

	68 /* helpers ------------------------------------------------------------------ */

	69

	70 static void

	71 parseUCDFile(const char *filename,

	72 char *fields[][2], int32_t fieldCount,

	73 UParseLineFn lineFn, void context,

	74 UErrorCode *pErrorCode) {

	75 char path[256];

	76 char backupPath[256];

	77

	78 if(U_FAILURE(*pErrorCode)) {

	79 return;

	80 }

	81

	82 /* Look inside ICU_DATA first */

	83 strcpy(path, u_getDataDirectory());

	84 strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);

	85 strcat(path, filename);

	86

	87 /* As a fallback, try to guess where the source data was located

	88 * at the time ICU was built, and look there.

	89 */

	90 strcpy(backupPath, ctest_dataSrcDir());

	91 strcat(backupPath, U_FILE_SEP_STRING);

	92 strcat(backupPath, "unidata" U_FILE_SEP_STRING);

	93 strcat(backupPath, filename);

	94

	95 u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorC ode);

	96 if(*pErrorCode==U_FILE_ACCESS_ERROR) {

	97 *pErrorCode=U_ZERO_ERROR;

	98 u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, contex t, pErrorCode);

	99 }

	100 if(U_FAILURE(*pErrorCode)) {

	101 log_err_status(pErrorCode, "error parsing %s: %s\n", filename, u_errorN ame(pErrorCode));

	102 }

	103 }

	104

	105 /* test data ---------------------------------------------------------------- */

	106

	107 static const UChar LAST_CHAR_CODE_IN_FILE = 0xFFFD;

	108 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPe PoSmScSkSoPiPf";

	109 static const int32_t tagValues[] =

	110 {

	111 /* Mn */ U_NON_SPACING_MARK,

	112 /* Mc */ U_COMBINING_SPACING_MARK,

	113 /* Me */ U_ENCLOSING_MARK,

	114 /* Nd */ U_DECIMAL_DIGIT_NUMBER,

	115 /* Nl */ U_LETTER_NUMBER,

	116 /* No */ U_OTHER_NUMBER,

	117 /* Zs */ U_SPACE_SEPARATOR,

	118 /* Zl */ U_LINE_SEPARATOR,

	119 /* Zp */ U_PARAGRAPH_SEPARATOR,

	120 /* Cc */ U_CONTROL_CHAR,

	121 /* Cf */ U_FORMAT_CHAR,

	122 /* Cs */ U_SURROGATE,

	123 /* Co */ U_PRIVATE_USE_CHAR,

	124 /* Cn */ U_UNASSIGNED,

	125 /* Lu */ U_UPPERCASE_LETTER,

	126 /* Ll */ U_LOWERCASE_LETTER,

	127 /* Lt */ U_TITLECASE_LETTER,

	128 /* Lm */ U_MODIFIER_LETTER,

	129 /* Lo */ U_OTHER_LETTER,

	130 /* Pc */ U_CONNECTOR_PUNCTUATION,

	131 /* Pd */ U_DASH_PUNCTUATION,

	132 /* Ps */ U_START_PUNCTUATION,

	133 /* Pe */ U_END_PUNCTUATION,

	134 /* Po */ U_OTHER_PUNCTUATION,

	135 /* Sm */ U_MATH_SYMBOL,

	136 /* Sc */ U_CURRENCY_SYMBOL,

	137 /* Sk */ U_MODIFIER_SYMBOL,

	138 /* So */ U_OTHER_SYMBOL,

	139 /* Pi */ U_INITIAL_PUNCTUATION,

	140 /* Pf */ U_FINAL_PUNCTUATION

	141 };

	142

	143 static const char dirStrings[][5] = {

	144 "L",

	145 "R",

	146 "EN",

	147 "ES",

	148 "ET",

	149 "AN",

	150 "CS",

	151 "B",

	152 "S",

	153 "WS",

	154 "ON",

	155 "LRE",

	156 "LRO",

	157 "AL",

	158 "RLE",

	159 "RLO",

	160 "PDF",

	161 "NSM",

	162 "BN"

	163 };

	164

	165 void addUnicodeTest(TestNode** root);

	166

	167 void addUnicodeTest(TestNode** root)

	168 {

	169 addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");

	170 addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");

	171 addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");

	172 addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");

	173 addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");

	174 addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalPrope rties");

	175 addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties" );

	176 addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");

	177 addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");

	178 addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");

	179 addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");

	180 addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");

	181 addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");

	182 addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");

	183 addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");

	184 addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");

	185 addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");

	186 addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensi ons");

	187 addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");

	188 addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");

	189 addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");

	190 addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");

	191 addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");

	192 addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");

	193 addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");

	194 }

	195

	196 /==================================================== /

	197 /* test u_toupper() and u_tolower() */

	198 /==================================================== /

	199 static void TestUpperLower()

	200 {

	201 const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0 x01c9, 0x000c, 0x0000};

	202 const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0 x01c9, 0x000c, 0x0000};

	203 U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);

	204 U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);

	205 int32_t i;

	206

	207 U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);

	208 U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);

	209

	210 /*

	211 Checks LetterLike Symbols which were previously a source of confusion

	212 [Bertrand A. D. 02/04/98]

	213 */

	214 for (i=0x2100;i<0x2138;i++)

	215 {

	216 /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */

	217 if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)

	218 {

	219 if (i != (int)u_tolower(i)) /* itself */

	220 log_err("Failed case conversion with itself: U+%04x\n", i);

	221 if (i != (int)u_toupper(i))

	222 log_err("Failed case conversion with itself: U+%04x\n", i);

	223 }

	224 }

	225

	226 for(i=0; i < u_strlen(upper); i++){

	227 if(u_tolower(upper[i]) != lower[i]){

	228 log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i ], lower[i], u_tolower(upper[i]));

	229 }

	230 }

	231

	232 log_verbose("testing upper lower\n");

	233 for (i = 0; i < 21; i++) {

	234

	235 if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))

	236 {

	237 log_err("Failed isLowerCase test at %c\n", upperTest[i]);

	238 }

	239 else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))

	240 {

	241 log_err("Failed isUpperCase test at %c\n", lowerTest[i]);

	242 }

	243 else if (upperTest[i] != u_tolower(lowerTest[i]))

	244 {

	245 log_err("Failed case conversion from %c To %c :\n", lowerTest[i], u pperTest[i]);

	246 }

	247 else if (lowerTest[i] != u_toupper(upperTest[i]))

	248 {

	249 log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerT est[i]);

	250 }

	251 else if (upperTest[i] != u_tolower(upperTest[i]))

	252 {

	253 log_err("Failed case conversion with itself: %c\n", upperTest[i]);

	254 }

	255 else if (lowerTest[i] != u_toupper(lowerTest[i]))

	256 {

	257 log_err("Failed case conversion with itself: %c\n", lowerTest[i]);

	258 }

	259 }

	260 log_verbose("done testing upper lower\n");

	261

	262 log_verbose("testing u_istitle\n");

	263 {

	264 static const UChar expected[] = {

	265 0x1F88,

	266 0x1F89,

	267 0x1F8A,

	268 0x1F8B,

	269 0x1F8C,

	270 0x1F8D,

	271 0x1F8E,

	272 0x1F8F,

	273 0x1F88,

	274 0x1F89,

	275 0x1F8A,

	276 0x1F8B,

	277 0x1F8C,

	278 0x1F8D,

	279 0x1F8E,

	280 0x1F8F,

	281 0x1F98,

	282 0x1F99,

	283 0x1F9A,

	284 0x1F9B,

	285 0x1F9C,

	286 0x1F9D,

	287 0x1F9E,

	288 0x1F9F,

	289 0x1F98,

	290 0x1F99,

	291 0x1F9A,

	292 0x1F9B,

	293 0x1F9C,

	294 0x1F9D,

	295 0x1F9E,

	296 0x1F9F,

	297 0x1FA8,

	298 0x1FA9,

	299 0x1FAA,

	300 0x1FAB,

	301 0x1FAC,

	302 0x1FAD,

	303 0x1FAE,

	304 0x1FAF,

	305 0x1FA8,

	306 0x1FA9,

	307 0x1FAA,

	308 0x1FAB,

	309 0x1FAC,

	310 0x1FAD,

	311 0x1FAE,

	312 0x1FAF,

	313 0x1FBC,

	314 0x1FBC,

	315 0x1FCC,

	316 0x1FCC,

	317 0x1FFC,

	318 0x1FFC,

	319 };

	320 int32_t num = sizeof(expected)/sizeof(expected[0]);

	321 for(i=0; i<num; i++){

	322 if(!u_istitle(expected[i])){

	323 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n" ,expected[i]);

	324 }

	325 }

	326

	327 }

	328 }

	329

	330 /* compare two sets and verify that their difference or intersection is empty */

	331 static UBool

	332 showADiffB(const USet a, const USet b,

	333 const char a_name, const char b_name,

	334 UBool expect, UBool diffIsError) {

	335 USet *aa;

	336 int32_t i, start, end, length;

	337 UErrorCode errorCode;

	338

	339 /*

	340 * expect:

	341 * TRUE -> a-b should be empty, that is, b should contain all of a

	342 * FALSE -> a&b should be empty, that is, a should contain none of b (and vi ce versa)

	343 */

	344 if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {

	345 return TRUE;

	346 }

	347

	348 /* clone a to aa because a is const */

	349 aa=uset_open(1, 0);

	350 if(aa==NULL) {

	351 /* unusual problem - out of memory? */

	352 return FALSE;

	353 }

	354 uset_addAll(aa, a);

	355

	356 /* compute the set in question */

	357 if(expect) {

	358 /* a-b */

	359 uset_removeAll(aa, b);

	360 } else {

	361 /* a&b */

	362 uset_retainAll(aa, b);

	363 }

	364

	365 /* aa is not empty because of the initial tests above; show its contents */

	366 errorCode=U_ZERO_ERROR;

	367 i=0;

	368 for(;;) {

	369 length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);

	370 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {

	371 break; /* done */

	372 }

	373 if(U_FAILURE(errorCode)) {

	374 log_err("error comparing %s with %s at difference item %d: %s\n",

	375 a_name, b_name, i, u_errorName(errorCode));

	376 break;

	377 }

	378 if(length!=0) {

	379 break; /* done with code points, got a string or -1 */

	380 }

	381

	382 if(diffIsError) {

	383 if(expect) {

	384 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a _name, start, end, b_name);

	385 } else {

	386 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);

	387 }

	388 } else {

	389 if(expect) {

	390 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n" , a_name, start, end, b_name);

	391 } else {

	392 log_verbose("info: %s and %s both contain U+%04x..U+%04x but sho uld not intersect\n", a_name, b_name, start, end);

	393 }

	394 }

	395

	396 ++i;

	397 }

	398

	399 uset_close(aa);

	400 return FALSE;

	401 }

	402

	403 static UBool

	404 showAMinusB(const USet a, const USet b,

	405 const char a_name, const char b_name,

	406 UBool diffIsError) {

	407 return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);

	408 }

	409

	410 static UBool

	411 showAIntersectB(const USet a, const USet b,

	412 const char a_name, const char b_name,

	413 UBool diffIsError) {

	414 return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);

	415 }

	416

	417 static UBool

	418 compareUSets(const USet a, const USet b,

	419 const char a_name, const char b_name,

	420 UBool diffIsError) {

	421 /*

	422 * Use an arithmetic & not a logical && so that both branches

	423 * are always taken and all differences are shown.

	424 */

	425 return

	426 showAMinusB(a, b, a_name, b_name, diffIsError) &

	427 showAMinusB(b, a, b_name, a_name, diffIsError);

	428 }

	429

	430 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */

	431 static void TestLetterNumber()

	432 {

	433 UChar i = 0x0000;

	434

	435 log_verbose("Testing for isalpha\n");

	436 for (i = 0x0041; i < 0x005B; i++) {

	437 if (!u_isalpha(i))

	438 {

	439 log_err("Failed isLetter test at %.4X\n", i);

	440 }

	441 }

	442 for (i = 0x0660; i < 0x066A; i++) {

	443 if (u_isalpha(i))

	444 {

	445 log_err("Failed isLetter test with numbers at %.4X\n", i);

	446 }

	447 }

	448

	449 log_verbose("Testing for isdigit\n");

	450 for (i = 0x0660; i < 0x066A; i++) {

	451 if (!u_isdigit(i))

	452 {

	453 log_verbose("Failed isNumber test at %.4X\n", i);

	454 }

	455 }

	456

	457 log_verbose("Testing for isalnum\n");

	458 for (i = 0x0041; i < 0x005B; i++) {

	459 if (!u_isalnum(i))

	460 {

	461 log_err("Failed isAlNum test at %.4X\n", i);

	462 }

	463 }

	464 for (i = 0x0660; i < 0x066A; i++) {

	465 if (!u_isalnum(i))

	466 {

	467 log_err("Failed isAlNum test at %.4X\n", i);

	468 }

	469 }

	470

	471 {

	472 /*

	473 * The following checks work only starting from Unicode 4.0.

	474 * Check the version number here.

	475 */

	476 static UVersionInfo u401={ 4, 0, 1, 0 };

	477 UVersionInfo version;

	478 u_getUnicodeVersion(version);

	479 if(version[0]<4 \|\| 0==memcmp(version, u401, 4)) {

	480 return;

	481 }

	482 }

	483

	484 {

	485 /*

	486 * Sanity check:

	487 * Verify that exactly the digit characters have decimal digit values.

	488 * This assumption is used in the implementation of u_digit()

	489 * (which checks nt=de)

	490 * compared with the parallel java.lang.Character.digit()

	491 * (which checks Nd).

	492 *

	493 * This was not true in Unicode 3.2 and earlier.

	494 * Unicode 4.0 fixed discrepancies.

	495 * Unicode 4.0.1 re-introduced problems in this area due to an

	496 * unintentionally incomplete last-minute change.

	497 */

	498 U_STRING_DECL(digitsPattern, "[:Nd:]", 6);

	499 U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);

	500

	501 USet digits, decimalValues;

	502 UErrorCode errorCode;

	503

	504 U_STRING_INIT(digitsPattern, "[:Nd:]", 6);

	505 U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);

	506 errorCode=U_ZERO_ERROR;

	507 digits=uset_openPattern(digitsPattern, 6, &errorCode);

	508 decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);

	509

	510 if(U_SUCCESS(errorCode)) {

	511 compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decima l:]", TRUE);

	512 }

	513

	514 uset_close(digits);

	515 uset_close(decimalValues);

	516 }

	517 }

	518

	519 static void testSampleCharProps(UBool propFn(UChar32), const char *propName,

	520 const UChar32 *sampleChars, int32_t sampleCharsL ength,

	521 UBool expected) {

	522 int32_t i;

	523 for (i = 0; i < sampleCharsLength; ++i) {

	524 UBool result = propFn(sampleChars[i]);

	525 if (result != expected) {

	526 log_err("error: character property function %s(U+%04x)=%d is wrong\n ",

	527 propName, sampleChars[i], result);

	528 }

	529 }

	530 }

	531

	532 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_issp ace()), isWhiteSpace(), u_CharDigitValue() */

	533 static void TestMisc()

	534 {

	535 static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x200 5};

	536 static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};

	537 static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};

	538 static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};

	539 static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};

	540 static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};

	541 /* static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8 , 0xFFF0};*/

	542 static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};

	543 static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};

	544 static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};

	545 static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};

	546

	547 static const int32_t sampleDigitValues[] = {0, 2, 3, 5};

	548

	549 uint32_t mask;

	550

	551 int32_t i;

	552 char icuVersion[U_MAX_VERSION_STRING_LENGTH];

	553 UVersionInfo realVersion;

	554

	555 memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);

	556

	557 testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, LENGTHOF(sampleSpa ces), TRUE);

	558 testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, LENGTHOF(sample NonSpaces), FALSE);

	559

	560 testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",

	561 sampleSpaces, LENGTHOF(sampleSpaces), TRUE);

	562 testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",

	563 sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);

	564

	565 testSampleCharProps(u_isWhitespace, "u_isWhitespace",

	566 sampleWhiteSpaces, LENGTHOF(sampleWhiteSpaces), TRUE);

	567 testSampleCharProps(u_isWhitespace, "u_isWhitespace",

	568 sampleNonWhiteSpaces, LENGTHOF(sampleNonWhiteSpaces), FA LSE);

	569

	570 testSampleCharProps(u_isdefined, "u_isdefined",

	571 sampleDefined, LENGTHOF(sampleDefined), TRUE);

	572 testSampleCharProps(u_isdefined, "u_isdefined",

	573 sampleUndefined, LENGTHOF(sampleUndefined), FALSE);

	574

	575 testSampleCharProps(u_isbase, "u_isbase", sampleBase, LENGTHOF(sampleBase), TRUE);

	576 testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, LENGTHOF(sampleNonB ase), FALSE);

	577

	578 testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, LENGTHOF(sampleDig its), TRUE);

	579 testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, LENGTHOF(sample NonDigits), FALSE);

	580

	581 for (i = 0; i < LENGTHOF(sampleDigits); i++) {

	582 if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {

	583 log_err("error: u_charDigitValue(U+04x)=%d != %d\n",

	584 sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDi gitValues[i]);

	585 }

	586 }

	587

	588 /* Tests the ICU version #*/

	589 u_getVersion(realVersion);

	590 u_versionToString(realVersion, icuVersion);

	591 if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)

	592 {

	593 log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERS ION, icuVersion);

	594 }

	595 #if defined(ICU_VERSION)

	596 /* test only happens where we have configure.in with VERSION - sanity check. */

	597 if(strcmp(U_ICU_VERSION, ICU_VERSION))

	598 {

	599 log_err("ICU version mismatch: Header says %s, build environment says %s .\n", U_ICU_VERSION, ICU_VERSION);

	600 }

	601 #endif

	602

	603 /* test U_GC_... */

	604 if(

	605 U_GET_GC_MASK(0x41)!=U_GC_LU_MASK \|\|

	606 U_GET_GC_MASK(0x662)!=U_GC_ND_MASK \|\|

	607 U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK \|\|

	608 U_GET_GC_MASK(0x28)!=U_GC_PS_MASK \|\|

	609 U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK \|\|

	610 U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK

	611 ) {

	612 log_err("error: U_GET_GC_MASK does not work properly\n");

	613 }

	614

	615 mask=0;

	616 mask=(mask&~U_GC_CN_MASK)\|U_GC_CN_MASK;

	617

	618 mask=(mask&~U_GC_LU_MASK)\|U_GC_LU_MASK;

	619 mask=(mask&~U_GC_LL_MASK)\|U_GC_LL_MASK;

	620 mask=(mask&~U_GC_LT_MASK)\|U_GC_LT_MASK;

	621 mask=(mask&~U_GC_LM_MASK)\|U_GC_LM_MASK;

	622 mask=(mask&~U_GC_LO_MASK)\|U_GC_LO_MASK;

	623

	624 mask=(mask&~U_GC_MN_MASK)\|U_GC_MN_MASK;

	625 mask=(mask&~U_GC_ME_MASK)\|U_GC_ME_MASK;

	626 mask=(mask&~U_GC_MC_MASK)\|U_GC_MC_MASK;

	627

	628 mask=(mask&~U_GC_ND_MASK)\|U_GC_ND_MASK;

	629 mask=(mask&~U_GC_NL_MASK)\|U_GC_NL_MASK;

	630 mask=(mask&~U_GC_NO_MASK)\|U_GC_NO_MASK;

	631

	632 mask=(mask&~U_GC_ZS_MASK)\|U_GC_ZS_MASK;

	633 mask=(mask&~U_GC_ZL_MASK)\|U_GC_ZL_MASK;

	634 mask=(mask&~U_GC_ZP_MASK)\|U_GC_ZP_MASK;

	635

	636 mask=(mask&~U_GC_CC_MASK)\|U_GC_CC_MASK;

	637 mask=(mask&~U_GC_CF_MASK)\|U_GC_CF_MASK;

	638 mask=(mask&~U_GC_CO_MASK)\|U_GC_CO_MASK;

	639 mask=(mask&~U_GC_CS_MASK)\|U_GC_CS_MASK;

	640

	641 mask=(mask&~U_GC_PD_MASK)\|U_GC_PD_MASK;

	642 mask=(mask&~U_GC_PS_MASK)\|U_GC_PS_MASK;

	643 mask=(mask&~U_GC_PE_MASK)\|U_GC_PE_MASK;

	644 mask=(mask&~U_GC_PC_MASK)\|U_GC_PC_MASK;

	645 mask=(mask&~U_GC_PO_MASK)\|U_GC_PO_MASK;

	646

	647 mask=(mask&~U_GC_SM_MASK)\|U_GC_SM_MASK;

	648 mask=(mask&~U_GC_SC_MASK)\|U_GC_SC_MASK;

	649 mask=(mask&~U_GC_SK_MASK)\|U_GC_SK_MASK;

	650 mask=(mask&~U_GC_SO_MASK)\|U_GC_SO_MASK;

	651

	652 mask=(mask&~U_GC_PI_MASK)\|U_GC_PI_MASK;

	653 mask=(mask&~U_GC_PF_MASK)\|U_GC_PF_MASK;

	654

	655 if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffff ffff)) {

	656 log_err("error: problems with U_GC_XX_MASK constants\n");

	657 }

	658

	659 mask=0;

	660 mask=(mask&~U_GC_C_MASK)\|U_GC_C_MASK;

	661 mask=(mask&~U_GC_L_MASK)\|U_GC_L_MASK;

	662 mask=(mask&~U_GC_M_MASK)\|U_GC_M_MASK;

	663 mask=(mask&~U_GC_N_MASK)\|U_GC_N_MASK;

	664 mask=(mask&~U_GC_Z_MASK)\|U_GC_Z_MASK;

	665 mask=(mask&~U_GC_P_MASK)\|U_GC_P_MASK;

	666 mask=(mask&~U_GC_S_MASK)\|U_GC_S_MASK;

	667

	668 if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffff ffff)) {

	669 log_err("error: problems with U_GC_Y_MASK constants\n");

	670 }

	671 {

	672 static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x00 35,0x0036,0x0037,0x0038,0x0039 };

	673 for(i=0; i<10; i++){

	674 if(digit[i]!=u_forDigit(i,10)){

	675 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n" ,i,digit[i],u_forDigit(i,10));

	676 }

	677 }

	678 }

	679

	680 /* test u_digit() */

	681 {

	682 static const struct {

	683 UChar32 c;

	684 int8_t radix, value;

	685 } data[]={

	686 /* base 16 */

	687 { 0x0031, 16, 1 },

	688 { 0x0038, 16, 8 },

	689 { 0x0043, 16, 12 },

	690 { 0x0066, 16, 15 },

	691 { 0x00e4, 16, -1 },

	692 { 0x0662, 16, 2 },

	693 { 0x06f5, 16, 5 },

	694 { 0xff13, 16, 3 },

	695 { 0xff41, 16, 10 },

	696

	697 /* base 8 */

	698 { 0x0031, 8, 1 },

	699 { 0x0038, 8, -1 },

	700 { 0x0043, 8, -1 },

	701 { 0x0066, 8, -1 },

	702 { 0x00e4, 8, -1 },

	703 { 0x0662, 8, 2 },

	704 { 0x06f5, 8, 5 },

	705 { 0xff13, 8, 3 },

	706 { 0xff41, 8, -1 },

	707

	708 /* base 36 */

	709 { 0x5a, 36, 35 },

	710 { 0x7a, 36, 35 },

	711 { 0xff3a, 36, 35 },

	712 { 0xff5a, 36, 35 },

	713

	714 /* wrong radix values */

	715 { 0x0031, 1, -1 },

	716 { 0xff3a, 37, -1 }

	717 };

	718

	719 for(i=0; i<LENGTHOF(data); ++i) {

	720 if(u_digit(data[i].c, data[i].radix)!=data[i].value) {

	721 log_err("u_digit(U+%04x, %d)=%d expected %d\n",

	722 data[i].c,

	723 data[i].radix,

	724 u_digit(data[i].c, data[i].radix),

	725 data[i].value);

	726 }

	727 }

	728 }

	729 }

	730

	731 /* test C/POSIX-style functions --------------------------------------------- */

	732

	733 /* bit flags */

	734 #define ISAL 1

	735 #define ISLO 2

	736 #define ISUP 4

	737

	738 #define ISDI 8

	739 #define ISXD 0x10

	740

	741 #define ISAN 0x20

	742

	743 #define ISPU 0x40

	744 #define ISGR 0x80

	745 #define ISPR 0x100

	746

	747 #define ISSP 0x200

	748 #define ISBL 0x400

	749 #define ISCN 0x800

	750

	751 /* C/POSIX-style functions, in the same order as the bit flags */

	752 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);

	753

	754 static const struct {

	755 IsPOSIXClass *fn;

	756 const char *name;

	757 } posixClasses[]={

	758 { u_isalpha, "isalpha" },

	759 { u_islower, "islower" },

	760 { u_isupper, "isupper" },

	761 { u_isdigit, "isdigit" },

	762 { u_isxdigit, "isxdigit" },

	763 { u_isalnum, "isalnum" },

	764 { u_ispunct, "ispunct" },

	765 { u_isgraph, "isgraph" },

	766 { u_isprint, "isprint" },

	767 { u_isspace, "isspace" },

	768 { u_isblank, "isblank" },

	769 { u_iscntrl, "iscntrl" }

	770 };

	771

	772 static const struct {

	773 UChar32 c;

	774 uint32_t posixResults;

	775 } posixData[]={

	776 { 0x0008, ISCN }, /* backspace */

	777 { 0x0009, ISSP\|ISBL\|ISCN }, /* TAB */

	778 { 0x000a, ISSP\| ISCN }, /* LF */

	779 { 0x000c, ISSP\| ISCN }, /* FF */

	780 { 0x000d, ISSP\| ISCN }, /* CR */

	781 { 0x0020, ISPR\|ISSP\|ISBL }, /* space */

	782 { 0x0021, ISPU\|ISGR\|ISPR }, /* ! */

	783 { 0x0033, ISDI\|ISXD\|ISAN\| ISGR\|ISPR }, /* 3 */

	784 { 0x0040, ISPU\|ISGR\|ISPR }, /* @ */

	785 { 0x0041, ISAL\| ISUP\| ISXD\|ISAN\| ISGR\|ISPR }, /* A */

	786 { 0x007a, ISAL\|ISLO\| ISAN\| ISGR\|ISPR }, /* z */

	787 { 0x007b, ISPU\|ISGR\|ISPR }, /* { */

	788 { 0x0085, ISSP\| ISCN }, /* NEL */

	789 { 0x00a0, ISPR\|ISSP\|ISBL }, /* NBSP */

	790 { 0x00a4, ISGR\|ISPR }, /* currency sign */

	791 { 0x00e4, ISAL\|ISLO\| ISAN\| ISGR\|ISPR }, /* a-umlaut */

	792 { 0x0300, ISGR\|ISPR }, /* combining grave */

	793 { 0x0600, ISCN }, /* arabic number sign */

	794 { 0x0627, ISAL\| ISAN\| ISGR\|ISPR }, /* alef */

	795 { 0x0663, ISDI\|ISXD\|ISAN\| ISGR\|ISPR }, /* arabic 3 */

	796 { 0x2002, ISPR\|ISSP\|ISBL }, /* en space */

	797 { 0x2007, ISPR\|ISSP\|ISBL }, /* figure space */

	798 { 0x2009, ISPR\|ISSP\|ISBL }, /* thin space */

	799 { 0x200b, ISCN }, /* ZWSP */

	800 /{ 0x200b, ISPR\|ISSP },/ /* ZWSP / / ZWSP became a control char in 4.0.1*/

	801 { 0x200e, ISCN }, /* LRM */

	802 { 0x2028, ISPR\|ISSP\| ISCN }, /* LS */

	803 { 0x2029, ISPR\|ISSP\| ISCN }, /* PS */

	804 { 0x20ac, ISGR\|ISPR }, /* Euro */

	805 { 0xff15, ISDI\|ISXD\|ISAN\| ISGR\|ISPR }, /* fullwidth 5 */

	806 { 0xff25, ISAL\| ISUP\| ISXD\|ISAN\| ISGR\|ISPR }, /* fullwidth E */

	807 { 0xff35, ISAL\| ISUP\| ISAN\| ISGR\|ISPR }, /* fullwidth U */

	808 { 0xff45, ISAL\|ISLO\| ISXD\|ISAN\| ISGR\|ISPR }, /* fullwidth e */

	809 { 0xff55, ISAL\|ISLO\| ISAN\| ISGR\|ISPR } /* fullwidth u */

	810 };

	811

	812 static void

	813 TestPOSIX() {

	814 uint32_t mask;

	815 int32_t cl, i;

	816 UBool expect;

	817

	818 mask=1;

	819 for(cl=0; cl<12; ++cl) {

	820 for(i=0; i<LENGTHOF(posixData); ++i) {

	821 expect=(UBool)((posixData[i].posixResults&mask)!=0);

	822 if(posixClasses[cl].fn(posixData[i].c)!=expect) {

	823 log_err("u_%s(U+%04x)=%s is wrong\n",

	824 posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "T RUE");

	825 }

	826 }

	827 mask<<=1;

	828 }

	829 }

	830

	831 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */

	832 static void TestControlPrint()

	833 {

	834 const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0 x202b};

	835 const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};

	836 const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};

	837 const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};

	838 UChar32 c;

	839

	840 testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, LENGTHOF(sampleCo ntrol), TRUE);

	841 testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, LENGTHOF(sampl eNonControl), FALSE);

	842

	843 testSampleCharProps(u_isprint, "u_isprint",

	844 samplePrintable, LENGTHOF(samplePrintable), TRUE);

	845 testSampleCharProps(u_isprint, "u_isprint",

	846 sampleNonPrintable, LENGTHOF(sampleNonPrintable), FALSE) ;

	847

	848 /* test all ISO 8 controls */

	849 for(c=0; c<=0x9f; ++c) {

	850 if(c==0x20) {

	851 /* skip ASCII graphic characters and continue with DEL */

	852 c=0x7f;

	853 }

	854 if(!u_iscntrl(c)) {

	855 log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);

	856 }

	857 if(!u_isISOControl(c)) {

	858 log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);

	859 }

	860 if(u_isprint(c)) {

	861 log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);

	862 }

	863 }

	864

	865 /* test all Latin-1 graphic characters */

	866 for(c=0x20; c<=0xff; ++c) {

	867 if(c==0x7f) {

	868 c=0xa0;

	869 } else if(c==0xad) {

	870 /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not p rintable) */

	871 ++c;

	872 }

	873 if(!u_isprint(c)) {

	874 log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n" , c);

	875 }

	876 }

	877 }

	878

	879 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable ()*/

	880 static void TestIdentifier()

	881 {

	882 const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};

	883 const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};

	884 const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};

	885 const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};

	886 const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};

	887 const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};

	888 const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};

	889 const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};

	890 const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};

	891 const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};

	892

	893 testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",

	894 sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);

	895 testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",

	896 sampleNonJavaIDStart, LENGTHOF(sampleNonJavaIDStart), FA LSE);

	897

	898 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",

	899 sampleJavaIDPart, LENGTHOF(sampleJavaIDPart), TRUE);

	900 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",

	901 sampleNonJavaIDPart, LENGTHOF(sampleNonJavaIDPart), FALS E);

	902

	903 /* IDPart should imply IDStart */

	904 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",

	905 sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);

	906

	907 testSampleCharProps(u_isIDStart, "u_isIDStart",

	908 sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TR UE);

	909 testSampleCharProps(u_isIDStart, "u_isIDStart",

	910 sampleNonUnicodeIDStart, LENGTHOF(sampleNonUnicodeIDStar t), FALSE);

	911

	912 testSampleCharProps(u_isIDPart, "u_isIDPart",

	913 sampleUnicodeIDPart, LENGTHOF(sampleUnicodeIDPart), TRUE );

	914 testSampleCharProps(u_isIDPart, "u_isIDPart",

	915 sampleNonUnicodeIDPart, LENGTHOF(sampleNonUnicodeIDPart) , FALSE);

	916

	917 /* IDPart should imply IDStart */

	918 testSampleCharProps(u_isIDPart, "u_isIDPart",

	919 sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TR UE);

	920

	921 testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",

	922 sampleIDIgnore, LENGTHOF(sampleIDIgnore), TRUE);

	923 testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",

	924 sampleNonIDIgnore, LENGTHOF(sampleNonIDIgnore), FALSE);

	925 }

	926

	927 /* for each line of UnicodeData.txt, check some of the properties */

	928 /*

	929 * ### TODO

	930 * This test fails incorrectly if the First or Last code point of a repetitive a rea

	931 * is overridden, which is allowed and is encouraged for the PUAs.

	932 * Currently, this means that both area First/Last and override lines are

	933 * tested against the properties from the API,

	934 * and the area boundary will not match and cause an error.

	935 *

	936 * This function should detect area boundaries and skip them for the test of ind ividual

	937 * code points' properties.

	938 * Then it should check that the areas contain all the same properties except wh ere overridden.

	939 * For this, it would have had to set a flag for which code points were listed e xplicitly.

	940 */

	941 static void U_CALLCONV

	942 unicodeDataLineFn(void *context,

	943 char *fields[][2], int32_t fieldCount,

	944 UErrorCode *pErrorCode)

	945 {

	946 char buffer[100];

	947 char *end;

	948 uint32_t value;

	949 UChar32 c;

	950 int32_t i;

	951 int8_t type;

	952

	953 /* get the character code, field 0 */

	954 c=strtoul(fields[0][0], &end, 16);

	955 if(end<=fields[0][0] \|\| end!=fields[0][1]) {

	956 log_err("error: syntax error in field 0 at %s\n", fields[0][0]);

	957 return;

	958 }

	959 if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {

	960 log_err("error in UnicodeData.txt: code point %lu out of range\n", c);

	961 return;

	962 }

	963

	964 /* get general category, field 2 */

	965 *fields[2][1]=0;

	966 type = (int8_t)tagValues[MakeProp(fields[2][0])];

	967 if(u_charType(c)!=type) {

	968 log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType( c), type);

	969 }

	970 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(t ype)) {

	971 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_C ATEGORY_MASK)!=U_MASK(u_charType())\n", c);

	972 }

	973

	974 /* get canonical combining class, field 3 */

	975 value=strtoul(fields[3][0], &end, 10);

	976 if(end<=fields[3][0] \|\| end!=fields[3][1]) {

	977 log_err("error: syntax error in field 3 at code 0x%lx\n", c);

	978 return;

	979 }

	980 if(value>255) {

	981 log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);

	982 return;

	983 }

	984 #if !UCONFIG_NO_NORMALIZATION

	985 if(value!=u_getCombiningClass(c) \|\| value!=(uint32_t)u_getIntPropertyValue(c , UCHAR_CANONICAL_COMBINING_CLASS)) {

	986 log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);

	987 }

	988 #endif

	989

	990 /* get BiDi category, field 4 */

	991 *fields[4][1]=0;

	992 i=MakeDir(fields[4][0]);

	993 if(i!=u_charDirection(c) \|\| i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {

	994 log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u _charDirection(c), MakeDir(fields[4][0]), fields[4][0]);

	995 }

	996

	997 /* get ISO Comment, field 11 */

	998 *fields[11][1]=0;

	999 i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);

	1000 if(U_FAILURE(*pErrorCode) \|\| 0!=strcmp(fields[11][0], buffer)) {

	1001 log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",

	1002 c, u_errorName(*pErrorCode),

	1003 U_FAILURE(*pErrorCode) ? buffer : "[error]",

	1004 fields[11][0]);

	1005 }

	1006

	1007 /* get uppercase mapping, field 12 */

	1008 if(fields[12][0]!=fields[12][1]) {

	1009 value=strtoul(fields[12][0], &end, 16);

	1010 if(end!=fields[12][1]) {

	1011 log_err("error: syntax error in field 12 at code 0x%lx\n", c);

	1012 return;

	1013 }

	1014 if((UChar32)value!=u_toupper(c)) {

	1015 log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c , u_toupper(c), value);

	1016 }

	1017 } else {

	1018 /* no case mapping: the API must map the code point to itself */

	1019 if(c!=u_toupper(c)) {

	1020 log_err("error: U+%04lx does not have an uppercase mapping but u_tou pper()==U+%04lx\n", c, u_toupper(c));

	1021 }

	1022 }

	1023

	1024 /* get lowercase mapping, field 13 */

	1025 if(fields[13][0]!=fields[13][1]) {

	1026 value=strtoul(fields[13][0], &end, 16);

	1027 if(end!=fields[13][1]) {

	1028 log_err("error: syntax error in field 13 at code 0x%lx\n", c);

	1029 return;

	1030 }

	1031 if((UChar32)value!=u_tolower(c)) {

	1032 log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c , u_tolower(c), value);

	1033 }

	1034 } else {

	1035 /* no case mapping: the API must map the code point to itself */

	1036 if(c!=u_tolower(c)) {

	1037 log_err("error: U+%04lx does not have a lowercase mapping but u_tolo wer()==U+%04lx\n", c, u_tolower(c));

	1038 }

	1039 }

	1040

	1041 /* get titlecase mapping, field 14 */

	1042 if(fields[14][0]!=fields[14][1]) {

	1043 value=strtoul(fields[14][0], &end, 16);

	1044 if(end!=fields[14][1]) {

	1045 log_err("error: syntax error in field 14 at code 0x%lx\n", c);

	1046 return;

	1047 }

	1048 if((UChar32)value!=u_totitle(c)) {

	1049 log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c , u_totitle(c), value);

	1050 }

	1051 } else {

	1052 /* no case mapping: the API must map the code point to itself */

	1053 if(c!=u_totitle(c)) {

	1054 log_err("error: U+%04lx does not have a titlecase mapping but u_toti tle()==U+%04lx\n", c, u_totitle(c));

	1055 }

	1056 }

	1057 }

	1058

	1059 static UBool U_CALLCONV

	1060 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory t ype) {

	1061 static const UChar32 test[][2]={

	1062 {0x41, U_UPPERCASE_LETTER},

	1063 {0x308, U_NON_SPACING_MARK},

	1064 {0xfffe, U_GENERAL_OTHER_TYPES},

	1065 {0xe0041, U_FORMAT_CHAR},

	1066 {0xeffff, U_UNASSIGNED}

	1067 };

	1068

	1069 int32_t i, count;

	1070

	1071 if(0!=strcmp((const char *)context, "a1")) {

	1072 log_err("error: u_enumCharTypes() passes on an incorrect context pointer \n");

	1073 return FALSE;

	1074 }

	1075

	1076 count=LENGTHOF(test);

	1077 for(i=0; i<count; ++i) {

	1078 if(start<=test[i][0] && test[i][0]<limit) {

	1079 if(type!=(UCharCategory)test[i][1]) {

	1080 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ w ith %ld instead of U+%04lx with %ld\n",

	1081 start, limit, (long)type, test[i][0], test[i][1]);

	1082 }

	1083 /* stop at the range that includes the last test code point (increas es code coverage for enumeration) */

	1084 return i==(count-1) ? FALSE : TRUE;

	1085 }

	1086 }

	1087

	1088 if(start>test[count-1][0]) {

	1089 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",

	1090 start, limit, (long)type);

	1091 return FALSE;

	1092 }

	1093

	1094 return TRUE;

	1095 }

	1096

	1097 static UBool U_CALLCONV

	1098 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCatego ry type) {

	1099 /* default Bidi classes for unassigned code points */

	1100 static const int32_t defaultBidi[][2]={ /* { limit, class } */

	1101 { 0x0590, U_LEFT_TO_RIGHT },

	1102 { 0x0600, U_RIGHT_TO_LEFT },

	1103 { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },

	1104 { 0x0900, U_RIGHT_TO_LEFT },

	1105 { 0xFB1D, U_LEFT_TO_RIGHT },

	1106 { 0xFB50, U_RIGHT_TO_LEFT },

	1107 { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },

	1108 { 0xFE70, U_LEFT_TO_RIGHT },

	1109 { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },

	1110 { 0x10800, U_LEFT_TO_RIGHT },

	1111 { 0x11000, U_RIGHT_TO_LEFT },

	1112 { 0x1E800, U_LEFT_TO_RIGHT }, /* new default-R range in Unicode 5.2: U+ 1E800 - U+1EFFF */

	1113 { 0x1F000, U_RIGHT_TO_LEFT },

	1114 { 0x110000, U_LEFT_TO_RIGHT }

	1115 };

	1116

	1117 UChar32 c;

	1118 int32_t i;

	1119 UCharDirection shouldBeDir;

	1120

	1121 /*

	1122 * LineBreak.txt specifies:

	1123 * # - Assigned characters that are not listed explicitly are given the v alue

	1124 * # "AL".

	1125 * # - Unassigned characters are given the value "XX".

	1126 *

	1127 * PUA characters are listed explicitly with "XX".

	1128 * Verify that no assigned character has "XX".

	1129 */

	1130 if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {

	1131 c=start;

	1132 while(c<limit) {

	1133 if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {

	1134 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);

	1135 }

	1136 ++c;

	1137 }

	1138 }

	1139

	1140 /*

	1141 * Verify default Bidi classes.

	1142 * For recent Unicode versions, see UCD.html.

	1143 *

	1144 * For older Unicode versions:

	1145 * See table 3-7 "Bidirectional Character Types" in UAX #9.

	1146 * http://www.unicode.org/reports/tr9/

	1147 *

	1148 * See also DerivedBidiClass.txt for Cn code points!

	1149 *

	1150 * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/reso lved-pri.html)

	1151 * changed some default values.

	1152 * In particular, non-characters and unassigned Default Ignorable Code Point s

	1153 * change from L to BN.

	1154 *

	1155 * UCD.html version 4.0.1 does not yet reflect these changes.

	1156 */

	1157 if(type==U_UNASSIGNED \|\| type==U_PRIVATE_USE_CHAR) {

	1158 /* enumerate the intersections of defaultBidi ranges with [start..limit[ */

	1159 c=start;

	1160 for(i=0; i<LENGTHOF(defaultBidi) && c<limit; ++i) {

	1161 if((int32_t)c<defaultBidi[i][0]) {

	1162 while(c<limit && (int32_t)c<defaultBidi[i][0]) {

	1163 if(U_IS_UNICODE_NONCHAR(c) \|\| u_hasBinaryProperty(c, UCHAR_D EFAULT_IGNORABLE_CODE_POINT)) {

	1164 shouldBeDir=U_BOUNDARY_NEUTRAL;

	1165 } else {

	1166 shouldBeDir=(UCharDirection)defaultBidi[i][1];

	1167 }

	1168

	1169 if( u_charDirection(c)!=shouldBeDir \|\|

	1170 u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir

	1171 ) {

	1172 log_err("error: u_charDirection(unassigned/PUA U+%04lx)= %s should be %s\n",

	1173 c, dirStrings[u_charDirection(c)], dirStrings[should BeDir]);

	1174 }

	1175 ++c;

	1176 }

	1177 }

	1178 }

	1179 }

	1180

	1181 return TRUE;

	1182 }

	1183

	1184 /* tests for several properties */

	1185 static void TestUnicodeData()

	1186 {

	1187 UVersionInfo expectVersionArray;

	1188 UVersionInfo versionArray;

	1189 char *fields[15][2];

	1190 UErrorCode errorCode;

	1191 UChar32 c;

	1192 int8_t type;

	1193

	1194 u_versionFromString(expectVersionArray, U_UNICODE_VERSION);

	1195 u_getUnicodeVersion(versionArray);

	1196 if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)

	1197 {

	1198 log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",

	1199 versionArray[0], versionArray[1], versionArray[2], versionArray[3]);

	1200 }

	1201

	1202 #if defined(ICU_UNICODE_VERSION)

	1203 /* test only happens where we have configure.in with UNICODE_VERSION - sanit y check. */

	1204 if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))

	1205 {

	1206 log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNIC ODE_VERSION " got " ICU_UNICODE_VERSION "\n");

	1207 }

	1208 #endif

	1209

	1210 if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN \|\| u_getIntPropertyV alue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {

	1211 log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \ n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));

	1212 }

	1213

	1214 errorCode=U_ZERO_ERROR;

	1215 parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, NULL, &errorC ode);

	1216 if(U_FAILURE(errorCode)) {

	1217 return; /* if we couldn't parse UnicodeData.txt, we should return */

	1218 }

	1219

	1220 /* sanity check on repeated properties */

	1221 for(c=0xfffe; c<=0x10ffff;) {

	1222 type=u_charType(c);

	1223 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MA SK(type)) {

	1224 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENER AL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);

	1225 }

	1226 if(type!=U_UNASSIGNED) {

	1227 log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c , u_charType(c));

	1228 }

	1229 if((c&0xffff)==0xfffe) {

	1230 ++c;

	1231 } else {

	1232 c+=0xffff;

	1233 }

	1234 }

	1235

	1236 /* test that PUA is not "unassigned" */

	1237 for(c=0xe000; c<=0x10fffd;) {

	1238 type=u_charType(c);

	1239 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MA SK(type)) {

	1240 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENER AL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);

	1241 }

	1242 if(type==U_UNASSIGNED) {

	1243 log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);

	1244 } else if(type!=U_PRIVATE_USE_CHAR) {

	1245 log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);

	1246 }

	1247 if(c==0xf8ff) {

	1248 c=0xf0000;

	1249 } else if(c==0xffffd) {

	1250 c=0x100000;

	1251 } else {

	1252 ++c;

	1253 }

	1254 }

	1255

	1256 /* test u_enumCharTypes() */

	1257 u_enumCharTypes(enumTypeRange, "a1");

	1258

	1259 /* check default properties */

	1260 u_enumCharTypes(enumDefaultsRange, NULL);

	1261 }

	1262

	1263 static void TestCodeUnit(){

	1264 const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xd bff,0xdc00,0xdc02,0xddee,0xdfff,0};

	1265

	1266 int32_t i;

	1267

	1268 for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){

	1269 UChar c=codeunit[i];

	1270 if(i<4){

	1271 if(!(UTF_IS_SINGLE(c)) \|\| (UTF_IS_LEAD(c)) \|\| (UTF_IS_TRAIL(c)) \|\|(U TF_IS_SURROGATE(c))){

	1272 log_err("ERROR: U+%04x is a single", c);

	1273 }

	1274

	1275 }

	1276 if(i >= 4 && i< 8){

	1277 if(!(UTF_IS_LEAD(c)) \|\| UTF_IS_SINGLE(c) \|\| UTF_IS_TRAIL(c) \|\| !(UTF _IS_SURROGATE(c))){

	1278 log_err("ERROR: U+%04x is a first surrogate", c);

	1279 }

	1280 }

	1281 if(i >= 8 && i< 12){

	1282 if(!(UTF_IS_TRAIL(c)) \|\| UTF_IS_SINGLE(c) \|\| UTF_IS_LEAD(c) \|\| !(UTF _IS_SURROGATE(c))){

	1283 log_err("ERROR: U+%04x is a second surrogate", c);

	1284 }

	1285 }

	1286 }

	1287

	1288 }

	1289

	1290 static void TestCodePoint(){

	1291 const UChar32 codePoint[]={

	1292 /surrogate, notvalid(codepoint), not a UnicodeChar, not Error /

	1293 0xd800,

	1294 0xdbff,

	1295 0xdc00,

	1296 0xdfff,

	1297 0xdc04,

	1298 0xd821,

	1299 /not a surrogate, valid, isUnicodeChar , not Error/

	1300 0x20ac,

	1301 0xd7ff,

	1302 0xe000,

	1303 0xe123,

	1304 0x0061,

	1305 0xe065,

	1306 0x20402,

	1307 0x24506,

	1308 0x23456,

	1309 0x20402,

	1310 0x10402,

	1311 0x23456,

	1312 /not a surrogate, not valid, isUnicodeChar, isError /

	1313 0x0015,

	1314 0x009f,

	1315 /not a surrogate, not valid, not isUnicodeChar, isError /

	1316 0xffff,

	1317 0xfffe,

	1318 };

	1319 int32_t i;

	1320 for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){

	1321 UChar32 c=codePoint[i];

	1322 if(i<6){

	1323 if(!UTF_IS_SURROGATE(c) \|\| !U_IS_SURROGATE(c) \|\| !U16_IS_SURROGATE(c )){

	1324 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);

	1325 }

	1326 if(UTF_IS_VALID(c)){

	1327 log_err("ERROR: isValid() failed for U+%04x\n", c);

	1328 }

	1329 if(UTF_IS_UNICODE_CHAR(c) \|\| U_IS_UNICODE_CHAR(c)){

	1330 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);

	1331 }

	1332 if(UTF_IS_ERROR(c)){

	1333 log_err("ERROR: isError() failed for U+%04x\n", c);

	1334 }

	1335 }else if(i >=6 && i<18){

	1336 if(UTF_IS_SURROGATE(c) \|\| U_IS_SURROGATE(c) \|\| U16_IS_SURROGATE(c)){

	1337 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);

	1338 }

	1339 if(!UTF_IS_VALID(c)){

	1340 log_err("ERROR: isValid() failed for U+%04x\n", c);

	1341 }

	1342 if(!UTF_IS_UNICODE_CHAR(c) \|\| !U_IS_UNICODE_CHAR(c)){

	1343 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);

	1344 }

	1345 if(UTF_IS_ERROR(c)){

	1346 log_err("ERROR: isError() failed for U+%04x\n", c);

	1347 }

	1348 }else if(i >=18 && i<20){

	1349 if(UTF_IS_SURROGATE(c) \|\| U_IS_SURROGATE(c) \|\| U16_IS_SURROGATE(c)){

	1350 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);

	1351 }

	1352 if(UTF_IS_VALID(c)){

	1353 log_err("ERROR: isValid() failed for U+%04x\n", c);

	1354 }

	1355 if(!UTF_IS_UNICODE_CHAR(c) \|\| !U_IS_UNICODE_CHAR(c)){

	1356 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);

	1357 }

	1358 if(!UTF_IS_ERROR(c)){

	1359 log_err("ERROR: isError() failed for U+%04x\n", c);

	1360 }

	1361 }

	1362 else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){

	1363 if(UTF_IS_SURROGATE(c) \|\| U_IS_SURROGATE(c) \|\| U16_IS_SURROGATE(c)){

	1364 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);

	1365 }

	1366 if(UTF_IS_VALID(c)){

	1367 log_err("ERROR: isValid() failed for U+%04x\n", c);

	1368 }

	1369 if(UTF_IS_UNICODE_CHAR(c) \|\| U_IS_UNICODE_CHAR(c)){

	1370 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);

	1371 }

	1372 if(!UTF_IS_ERROR(c)){

	1373 log_err("ERROR: isError() failed for U+%04x\n", c);

	1374 }

	1375 }

	1376 }

	1377

	1378 if(

	1379 !U_IS_BMP(0) \|\| !U_IS_BMP(0x61) \|\| !U_IS_BMP(0x20ac) \|\|

	1380 !U_IS_BMP(0xd9da) \|\| !U_IS_BMP(0xdfed) \|\| !U_IS_BMP(0xffff) \|\|

	1381 U_IS_BMP(U_SENTINEL) \|\| U_IS_BMP(0x10000) \|\| U_IS_BMP(0x50005) \|\|

	1382 U_IS_BMP(0x10ffff) \|\| U_IS_BMP(0x110000) \|\| U_IS_BMP(0x7fffffff)

	1383 ) {

	1384 log_err("error with U_IS_BMP()\n");

	1385 }

	1386

	1387 if(

	1388 U_IS_SUPPLEMENTARY(0) \|\| U_IS_SUPPLEMENTARY(0x61) \|\| U_IS_SUPPLEMENTARY( 0x20ac) \|\|

	1389 U_IS_SUPPLEMENTARY(0xd9da) \|\| U_IS_SUPPLEMENTARY(0xdfed) \|\| U_IS_SUPPLEM ENTARY(0xffff) \|\|

	1390 U_IS_SUPPLEMENTARY(U_SENTINEL) \|\| !U_IS_SUPPLEMENTARY(0x10000) \|\| !U_IS_ SUPPLEMENTARY(0x50005) \|\|

	1391 !U_IS_SUPPLEMENTARY(0x10ffff) \|\| U_IS_SUPPLEMENTARY(0x110000) \|\| U_IS_SU PPLEMENTARY(0x7fffffff)

	1392 ) {

	1393 log_err("error with U_IS_SUPPLEMENTARY()\n");

	1394 }

	1395 }

	1396

	1397 static void TestCharLength()

	1398 {

	1399 const int32_t codepoint[]={

	1400 1, 0x0061,

	1401 1, 0xe065,

	1402 1, 0x20ac,

	1403 2, 0x20402,

	1404 2, 0x23456,

	1405 2, 0x24506,

	1406 2, 0x20402,

	1407 2, 0x10402,

	1408 1, 0xd7ff,

	1409 1, 0xe000

	1410 };

	1411

	1412 int32_t i;

	1413 UBool multiple;

	1414 for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+ 2)){

	1415 UChar32 c=codepoint[i+1];

	1416 if(UTF_CHAR_LENGTH(c) != codepoint[i] \|\| U16_LENGTH(c) != codepoint[i]){

	1417 log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], UTF_CHAR_LENGTH(c));

	1418 }

	1419 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);

	1420 if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){

	1421 log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c );

	1422 }

	1423 }

	1424 }

	1425

	1426 /internal functions ----/

	1427 static int32_t MakeProp(char* str)

	1428 {

	1429 int32_t result = 0;

	1430 char* matchPosition =0;

	1431

	1432 matchPosition = strstr(tagStrings, str);

	1433 if (matchPosition == 0)

	1434 {

	1435 log_err("unrecognized type letter ");

	1436 log_err(str);

	1437 }

	1438 else

	1439 result = (int32_t)((matchPosition - tagStrings) / 2);

	1440 return result;

	1441 }

	1442

	1443 static int32_t MakeDir(char* str)

	1444 {

	1445 int32_t pos = 0;

	1446 for (pos = 0; pos < 19; pos++) {

	1447 if (strcmp(str, dirStrings[pos]) == 0) {

	1448 return pos;

	1449 }

	1450 }

	1451 return -1;

	1452 }

	1453

	1454 /* test u_charName() -------------------------------------------------------- */

	1455

	1456 static const struct {

	1457 uint32_t code;

	1458 const char name, oldName, extName, alias;

	1459 } names[]={

	1460 {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},

	1461 {0x01a2, "LATIN CAPITAL LETTER OI",

	1462 "LATIN CAPITAL LETTER O I",

	1463 "LATIN CAPITAL LETTER OI",

	1464 "LATIN CAPITAL LETTER GHA"},

	1465 {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",

	1466 "LATIN SMALL LETTER DOTLESS J BAR HOOK",

	1467 "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },

	1468 {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",

	1469 "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",

	1470 "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},

	1471 {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },

	1472 {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },

	1473 {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },

	1474 {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },

	1475 {0xd800, "", "", "<lead surrogate-D800>" },

	1476 {0xdc00, "", "", "<trail surrogate-DC00>" },

	1477 {0xff08, "FULLWIDTH LEFT PARENTHESIS", "FULLWIDTH OPENING PARENTHESIS", "FUL LWIDTH LEFT PARENTHESIS" },

	1478 {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },

	1479 {0xffff, "", "", "<noncharacter-FFFF>" },

	1480 {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",

	1481 "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",

	1482 "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},

	1483 {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }

	1484 };

	1485

	1486 static UBool

	1487 enumCharNamesFn(void *context,

	1488 UChar32 code, UCharNameChoice nameChoice,

	1489 const char *name, int32_t length) {

	1490 int32_t pCount=(int32_t )context;

	1491 const char *expected;

	1492 int i;

	1493

	1494 if(length<=0 \|\| length!=(int32_t)strlen(name)) {

	1495 /* should not be called with an empty string or invalid length */

	1496 log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);

	1497 return TRUE;

	1498 }

	1499

	1500 ++*pCount;

	1501 for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {

	1502 if(code==(UChar32)names[i].code) {

	1503 switch (nameChoice) {

	1504 case U_EXTENDED_CHAR_NAME:

	1505 if(0!=strcmp(name, names[i].extName)) {

	1506 log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);

	1507 }

	1508 break;

	1509 case U_UNICODE_CHAR_NAME:

	1510 if(0!=strcmp(name, names[i].name)) {

	1511 log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code , name, names[i].name);

	1512 }

	1513 break;

	1514 case U_UNICODE_10_CHAR_NAME:

	1515 expected=names[i].oldName;

	1516 if(expected[0]==0 \|\| 0!=strcmp(name, expected)) {

	1517 log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n" , code, name, expected);

	1518 }

	1519 break;

	1520 case U_CHAR_NAME_ALIAS:

	1521 expected=names[i].alias;

	1522 if(expected==NULL \|\| expected[0]==0 \|\| 0!=strcmp(name, expec ted)) {

	1523 log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\ n", code, name, expected);

	1524 }

	1525 break;

	1526 case U_CHAR_NAME_CHOICE_COUNT:

	1527 break;

	1528 }

	1529 break;

	1530 }

	1531 }

	1532 return TRUE;

	1533 }

	1534

	1535 struct enumExtCharNamesContext {

	1536 uint32_t length;

	1537 int32_t last;

	1538 };

	1539

	1540 static UBool

	1541 enumExtCharNamesFn(void *context,

	1542 UChar32 code, UCharNameChoice nameChoice,

	1543 const char *name, int32_t length) {

	1544 struct enumExtCharNamesContext ecncp = (struct enumExtCharNamesContext ) c ontext;

	1545

	1546 if (ecncp->last != (int32_t) code - 1) {

	1547 if (ecncp->last < 0) {

	1548 log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ex t) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1 );

	1549 } else {

	1550 log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);

	1551 }

	1552 }

	1553 ecncp->last = (int32_t) code;

	1554

	1555 if (!*name) {

	1556 log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", c ode);

	1557 }

	1558

	1559 return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);

	1560 }

	1561

	1562 /**

	1563 * This can be made more efficient by moving it into putil.c and having

	1564 * it directly access the ebcdic translation tables.

	1565 * TODO: If we get this method in putil.c, then delete it from here.

	1566 */

	1567 static UChar

	1568 u_charToUChar(char c) {

	1569 UChar uc;

	1570 u_charsToUChars(&c, &uc, 1);

	1571 return uc;

	1572 }

	1573

	1574 static void

	1575 TestCharNames() {

	1576 static char name[80];

	1577 UErrorCode errorCode=U_ZERO_ERROR;

	1578 struct enumExtCharNamesContext extContext;

	1579 const char *expected;

	1580 int32_t length;

	1581 UChar32 c;

	1582 int32_t i;

	1583

	1584 log_verbose("Testing uprv_getMaxCharNameLength()\n");

	1585 length=uprv_getMaxCharNameLength();

	1586 if(length==0) {

	1587 /* no names data available */

	1588 return;

	1589 }

	1590 if(length<83) { /* Unicode 3.2 max char name length */

	1591 log_err("uprv_getMaxCharNameLength()=%d is too short");

	1592 }

	1593 /* ### TODO same tests for max ISO comment length as for max name length */

	1594

	1595 log_verbose("Testing u_charName()\n");

	1596 for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {

	1597 /* modern Unicode character name */

	1598 length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name) , &errorCode);

	1599 if(U_FAILURE(errorCode)) {

	1600 log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(e rrorCode));

	1601 return;

	1602 }

	1603 if(length<0 \|\| 0!=strcmp(name, names[i].name) \|\| length!=(uint16_t)strle n(name)) {

	1604 log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);

	1605 }

	1606

	1607 /* find the modern name */

	1608 if (*names[i].name) {

	1609 c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);

	1610 if(U_FAILURE(errorCode)) {

	1611 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorN ame(errorCode));

	1612 return;

	1613 }

	1614 if(c!=(UChar32)names[i].code) {

	1615 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", name s[i].name, c, names[i].code);

	1616 }

	1617 }

	1618

	1619 /* Unicode 1.0 character name */

	1620 length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(na me), &errorCode);

	1621 if(U_FAILURE(errorCode)) {

	1622 log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_error Name(errorCode));

	1623 return;

	1624 }

	1625 if(length<0 \|\| (length>0 && 0!=strcmp(name, names[i].oldName)) \|\| length !=(uint16_t)strlen(name)) {

	1626 log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothi ng or %s\n", names[i].code, name, length, names[i].oldName);

	1627 }

	1628

	1629 /* find the Unicode 1.0 name if it is stored (length>0 means that we cou ld read it) */

	1630 if(names[i].oldName[0]!=0 /* && length>0 */) {

	1631 c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCod e);

	1632 if(U_FAILURE(errorCode)) {

	1633 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));

	1634 return;

	1635 }

	1636 if(c!=(UChar32)names[i].code) {

	1637 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n" , names[i].oldName, c, names[i].code);

	1638 }

	1639 }

	1640

	1641 /* Unicode character name alias */

	1642 length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);

	1643 if(U_FAILURE(errorCode)) {

	1644 log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_err orName(errorCode));

	1645 return;

	1646 }

	1647 expected=names[i].alias;

	1648 if(expected==NULL) {

	1649 expected="";

	1650 }

	1651 if(length<0 \|\| (length>0 && 0!=strcmp(name, expected)) \|\| length!=(uint1 6_t)strlen(name)) {

	1652 log_err("u_charName(0x%lx - alias) gets %s length %ld instead of not hing or %s\n",

	1653 names[i].code, name, length, expected);

	1654 }

	1655

	1656 /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */

	1657 if(expected[0]!=0 /* && length>0 */) {

	1658 c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);

	1659 if(U_FAILURE(errorCode)) {

	1660 log_err("u_charFromName(%s - alias) error %s\n",

	1661 expected, u_errorName(errorCode));

	1662 return;

	1663 }

	1664 if(c!=(UChar32)names[i].code) {

	1665 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\ n",

	1666 expected, c, names[i].code);

	1667 }

	1668 }

	1669 }

	1670

	1671 /* test u_enumCharNames() */

	1672 length=0;

	1673 errorCode=U_ZERO_ERROR;

	1674 u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &leng th, U_UNICODE_CHAR_NAME, &errorCode);

	1675 if(U_FAILURE(errorCode) \|\| length<94140) {

	1676 log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MI N_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);

	1677 }

	1678

	1679 extContext.length = 0;

	1680 extContext.last = -1;

	1681 errorCode=U_ZERO_ERROR;

	1682 u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &e xtContext, U_EXTENDED_CHAR_NAME, &errorCode);

	1683 if(U_FAILURE(errorCode) \|\| extContext.length<UCHAR_MAX_VALUE + 1) {

	1684 log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld \n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.le ngth);

	1685 }

	1686

	1687 /* test that u_charFromName() uppercases the input name, i.e., works with mi xed-case names (new in 2.0) */

	1688 if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorC ode)) {

	1689 log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") d id not find U+0061 (%s)\n", u_errorName(errorCode));

	1690 }

	1691

	1692 /* Test getCharNameCharacters */

	1693 if(!getTestOption(QUICK_OPTION)) {

	1694 enum { BUFSIZE = 256 };

	1695 UErrorCode ec = U_ZERO_ERROR;

	1696 char buf[BUFSIZE];

	1697 int32_t maxLength;

	1698 UChar32 cp;

	1699 UChar pat[BUFSIZE], dumbPat[BUFSIZE];

	1700 int32_t l1, l2;

	1701 UBool map[256];

	1702 UBool ok;

	1703

	1704 USet* set = uset_open(1, 0); /* empty set */

	1705 USet* dumb = uset_open(1, 0); /* empty set */

	1706

	1707 /*

	1708 * uprv_getCharNameCharacters() will likely return more lowercase

	1709 * letters than actual character names contain because

	1710 * it includes all the characters in lowercased names of

	1711 * general categories, for the full possible set of extended names.

	1712 */

	1713 {

	1714 USetAdder sa={

	1715 NULL,

	1716 uset_add,

	1717 uset_addRange,

	1718 uset_addString,

	1719 NULL /* don't need remove() */

	1720 };

	1721 sa.set=set;

	1722 uprv_getCharNameCharacters(&sa);

	1723 }

	1724

	1725 /* build set the dumb (but sure-fire) way */

	1726 for (i=0; i<256; ++i) {

	1727 map[i] = FALSE;

	1728 }

	1729

	1730 maxLength=0;

	1731 for (cp=0; cp<0x110000; ++cp) {

	1732 int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,

	1733 buf, BUFSIZE, &ec);

	1734 if (U_FAILURE(ec)) {

	1735 log_err("FAIL: u_charName failed when it shouldn't\n");

	1736 uset_close(set);

	1737 uset_close(dumb);

	1738 return;

	1739 }

	1740 if(len>maxLength) {

	1741 maxLength=len;

	1742 }

	1743

	1744 for (i=0; i<len; ++i) {

	1745 if (!map[(uint8_t) buf[i]]) {

	1746 uset_add(dumb, (UChar32)u_charToUChar(buf[i]));

	1747 map[(uint8_t) buf[i]] = TRUE;

	1748 }

	1749 }

	1750

	1751 /* test for leading/trailing whitespace */

	1752 if(buf[0]==' ' \|\| buf[0]=='\t' \|\| buf[len-1]==' ' \|\| buf[len-1]=='\t ') {

	1753 log_err("u_charName(U+%04x) returns a name with leading or trail ing whitespace\n", cp);

	1754 }

	1755 }

	1756

	1757 if(map[(uint8_t)'\t']) {

	1758 log_err("u_charName() returned a name with a TAB for some code point \n", cp);

	1759 }

	1760

	1761 length=uprv_getMaxCharNameLength();

	1762 if(length!=maxLength) {

	1763 log_err("uprv_getMaxCharNameLength()=%d differs from the maximum len gth %d of all extended names\n",

	1764 length, maxLength);

	1765 }

	1766

	1767 /* compare the sets. Where is my uset_equals?!! */

	1768 ok=TRUE;

	1769 for(i=0; i<256; ++i) {

	1770 if(uset_contains(set, i)!=uset_contains(dumb, i)) {

	1771 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !use t_contains(dumb, i)) {

	1772 /* ignore lowercase a-z that are in set but not in dumb */

	1773 ok=TRUE;

	1774 } else {

	1775 ok=FALSE;

	1776 break;

	1777 }

	1778 }

	1779 }

	1780

	1781 l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);

	1782 l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);

	1783 if (U_FAILURE(ec)) {

	1784 log_err("FAIL: uset_toPattern failed when it shouldn't\n");

	1785 uset_close(set);

	1786 uset_close(dumb);

	1787 return;

	1788 }

	1789

	1790 if (l1 >= BUFSIZE) {

	1791 l1 = BUFSIZE-1;

	1792 pat[l1] = 0;

	1793 }

	1794 if (l2 >= BUFSIZE) {

	1795 l2 = BUFSIZE-1;

	1796 dumbPat[l2] = 0;

	1797 }

	1798

	1799 if (!ok) {

	1800 log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",

	1801 aescstrdup(pat, l1), aescstrdup(dumbPat, l2));

	1802 } else if(getTestOption(VERBOSITY_OPTION)) {

	1803 log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescst rdup(pat, l1));

	1804 }

	1805

	1806 uset_close(set);

	1807 uset_close(dumb);

	1808 }

	1809

	1810 /* ### TODO: test error cases and other interesting things */

	1811 }

	1812

	1813 /* test u_isMirrored() and u_charMirror() ----------------------------------- */

	1814

	1815 static void

	1816 TestMirroring() {

	1817 USet *set;

	1818 UErrorCode errorCode;

	1819

	1820 UChar32 start, end, c2, c3;

	1821 int32_t i;

	1822

	1823 U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);

	1824

	1825 U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);

	1826

	1827 log_verbose("Testing u_isMirrored()\n");

	1828 if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_i sMirrored(0x232a) &&

	1829 !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && ! u_isMirrored(0x3400)

	1830 )

	1831 ) {

	1832 log_err("u_isMirrored() does not work correctly\n");

	1833 }

	1834

	1835 log_verbose("Testing u_charMirror()\n");

	1836 if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x 208d)==0x208e && u_charMirror(0x3017)==0x3016 &&

	1837 u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirro r(0x29F5)==0x2215 && /* large delta between the code points */

	1838 u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror( 0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&

	1839 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrig endum6.html */

	1840 u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charM irror(0x301d)==0x301d

	1841 )

	1842 ) {

	1843 log_err("u_charMirror() does not work correctly\n");

	1844 }

	1845

	1846 /* verify that Bidi_Mirroring_Glyph roundtrips */

	1847 errorCode=U_ZERO_ERROR;

	1848 set=uset_openPattern(mirroredPattern, 17, &errorCode);

	1849

	1850 if (U_FAILURE(errorCode)) {

	1851 log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\ n");

	1852 } else {

	1853 for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i ) {

	1854 do {

	1855 c2=u_charMirror(start);

	1856 c3=u_charMirror(c2);

	1857 if(c3!=start) {

	1858 log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx ->U+%04lx\n", (long)start, (long)c2, (long)c3);

	1859 }

	1860 } while(++start<=end);

	1861 }

	1862 }

	1863

	1864 uset_close(set);

	1865 }

	1866

	1867

	1868 struct RunTestData

	1869 {

	1870 const char *runText;

	1871 UScriptCode runCode;

	1872 };

	1873

	1874 typedef struct RunTestData RunTestData;

	1875

	1876 static void

	1877 CheckScriptRuns(UScriptRun scriptRun, int32_t runStarts, const RunTestData *te stData, int32_t nRuns,

	1878 const char *prefix)

	1879 {

	1880 int32_t run, runStart, runLimit;

	1881 UScriptCode runCode;

	1882

	1883 /* iterate over all the runs */

	1884 run = 0;

	1885 while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {

	1886 if (runStart != runStarts[run]) {

	1887 log_err("%s: incorrect start offset for run %d: expected %d, got %d\ n",

	1888 prefix, run, runStarts[run], runStart);

	1889 }

	1890

	1891 if (runLimit != runStarts[run + 1]) {

	1892 log_err("%s: incorrect limit offset for run %d: expected %d, got %d\ n",

	1893 prefix, run, runStarts[run + 1], runLimit);

	1894 }

	1895

	1896 if (runCode != testData[run].runCode) {

	1897 log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\ "\n",

	1898 prefix, run, uscript_getName(testData[run].runCode), uscript_get Name(runCode));

	1899 }

	1900

	1901 run += 1;

	1902

	1903 /* stop when we've seen all the runs we expect to see */

	1904 if (run >= nRuns) {

	1905 break;

	1906 }

	1907 }

	1908

	1909 /* Complain if we didn't see then number of runs we expected */

	1910 if (run != nRuns) {

	1911 log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, r un, nRuns);

	1912 }

	1913 }

	1914

	1915 static void

	1916 TestUScriptRunAPI()

	1917 {

	1918 static const RunTestData testData1[] = {

	1919 {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCR IPT_DEVANAGARI},

	1920 {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARA BIC},

	1921 {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYR ILLIC},

	1922 {"English (", USCRIPT_LATIN},

	1923 {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},

	1924 {") ", USCRIPT_LATIN},

	1925 {"\\u6F22\\u5B75", USCRIPT_HAN},

	1926 {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},

	1927 {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},

	1928 {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}

	1929 };

	1930

	1931 static const RunTestData testData2[] = {

	1932 {"((((((((((abc))))))))))", USCRIPT_LATIN}

	1933 };

	1934

	1935 static const struct {

	1936 const RunTestData *testData;

	1937 int32_t nRuns;

	1938 } testDataEntries[] = {

	1939 {testData1, LENGTHOF(testData1)},

	1940 {testData2, LENGTHOF(testData2)}

	1941 };

	1942

	1943 static const int32_t nTestEntries = LENGTHOF(testDataEntries);

	1944 int32_t testEntry;

	1945

	1946 for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {

	1947 UChar testString[1024];

	1948 int32_t runStarts[256];

	1949 int32_t nTestRuns = testDataEntries[testEntry].nRuns;

	1950 const RunTestData *testData = testDataEntries[testEntry].testData;

	1951

	1952 int32_t run, stringLimit;

	1953 UScriptRun *scriptRun = NULL;

	1954 UErrorCode err;

	1955

	1956 /*

	1957 * Fill in the test string and the runStarts array.

	1958 */

	1959 stringLimit = 0;

	1960 for (run = 0; run < nTestRuns; run += 1) {

	1961 runStarts[run] = stringLimit;

	1962 stringLimit += u_unescape(testData[run].runText, &testString[stringL imit], 1024 - stringLimit);

	1963 /stringLimit -= 1;/

	1964 }

	1965

	1966 /* The limit of the last run */

	1967 runStarts[nTestRuns] = stringLimit;

	1968

	1969 /*

	1970 * Make sure that calling uscript_OpenRun with a NULL text pointer

	1971 * and a non-zero text length returns the correct error.

	1972 */

	1973 err = U_ZERO_ERROR;

	1974 scriptRun = uscript_openRun(NULL, stringLimit, &err);

	1975

	1976 if (err != U_ILLEGAL_ARGUMENT_ERROR) {

	1977 log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instea d of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));

	1978 }

	1979

	1980 if (scriptRun != NULL) {

	1981 log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NUL L result.\n");

	1982 uscript_closeRun(scriptRun);

	1983 }

	1984

	1985 /*

	1986 * Make sure that calling uscript_OpenRun with a non-NULL text pointer

	1987 * and a zero text length returns the correct error.

	1988 */

	1989 err = U_ZERO_ERROR;

	1990 scriptRun = uscript_openRun(testString, 0, &err);

	1991

	1992 if (err != U_ILLEGAL_ARGUMENT_ERROR) {

	1993 log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));

	1994 }

	1995

	1996 if (scriptRun != NULL) {

	1997 log_err("uscript_openRun(testString, 0, &err) returned a non-NULL re sult.\n");

	1998 uscript_closeRun(scriptRun);

	1999 }

	2000

	2001 /*

	2002 * Make sure that calling uscript_openRun with a NULL text pointer

	2003 * and a zero text length doesn't return an error.

	2004 */

	2005 err = U_ZERO_ERROR;

	2006 scriptRun = uscript_openRun(NULL, 0, &err);

	2007

	2008 if (U_FAILURE(err)) {

	2009 log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_erro rName(err));

	2010 }

	2011

	2012 /* Make sure that the empty iterator doesn't find any runs */

	2013 if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {

	2014 log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n ");

	2015 }

	2016

	2017 /*

	2018 * Make sure that calling uscript_setRunText with a NULL text pointer

	2019 * and a non-zero text length returns the correct error.

	2020 */

	2021 err = U_ZERO_ERROR;

	2022 uscript_setRunText(scriptRun, NULL, stringLimit, &err);

	2023

	2024 if (err != U_ILLEGAL_ARGUMENT_ERROR) {

	2025 log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) retu rned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));

	2026 }

	2027

	2028 /*

	2029 * Make sure that calling uscript_OpenRun with a non-NULL text pointer

	2030 * and a zero text length returns the correct error.

	2031 */

	2032 err = U_ZERO_ERROR;

	2033 uscript_setRunText(scriptRun, testString, 0, &err);

	2034

	2035 if (err != U_ILLEGAL_ARGUMENT_ERROR) {

	2036 log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));

	2037 }

	2038

	2039 /*

	2040 * Now call uscript_setRunText on the empty iterator

	2041 * and make sure that it works.

	2042 */

	2043 err = U_ZERO_ERROR;

	2044 uscript_setRunText(scriptRun, testString, stringLimit, &err);

	2045

	2046 if (U_FAILURE(err)) {

	2047 log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(e rr));

	2048 } else {

	2049 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_ setRunText");

	2050 }

	2051

	2052 uscript_closeRun(scriptRun);

	2053

	2054 /*

	2055 * Now open an interator over the testString

	2056 * using uscript_openRun and make sure that it works

	2057 */

	2058 scriptRun = uscript_openRun(testString, stringLimit, &err);

	2059

	2060 if (U_FAILURE(err)) {

	2061 log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err) );

	2062 } else {

	2063 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_ openRun");

	2064 }

	2065

	2066 /* Now reset the iterator, and make sure

	2067 * that it still works.

	2068 */

	2069 uscript_resetRun(scriptRun);

	2070

	2071 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_rese tRun");

	2072

	2073 /* Close the iterator */

	2074 uscript_closeRun(scriptRun);

	2075 }

	2076 }

	2077

	2078 /* test additional, non-core properties */

	2079 static void

	2080 TestAdditionalProperties() {

	2081 /* test data for u_charAge() */

	2082 static const struct {

	2083 UChar32 c;

	2084 UVersionInfo version;

	2085 } charAges[]={

	2086 {0x41, { 1, 1, 0, 0 }},

	2087 {0xffff, { 1, 1, 0, 0 }},

	2088 {0x20ab, { 2, 0, 0, 0 }},

	2089 {0x2fffe, { 2, 0, 0, 0 }},

	2090 {0x20ac, { 2, 1, 0, 0 }},

	2091 {0xfb1d, { 3, 0, 0, 0 }},

	2092 {0x3f4, { 3, 1, 0, 0 }},

	2093 {0x10300, { 3, 1, 0, 0 }},

	2094 {0x220, { 3, 2, 0, 0 }},

	2095 {0xff60, { 3, 2, 0, 0 }}

	2096 };

	2097

	2098 /* test data for u_hasBinaryProperty() */

	2099 static const int32_t

	2100 props[][3]={ /* code point, property, value */

	2101 { 0x0627, UCHAR_ALPHABETIC, TRUE },

	2102 { 0x1034a, UCHAR_ALPHABETIC, TRUE },

	2103 { 0x2028, UCHAR_ALPHABETIC, FALSE },

	2104

	2105 { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },

	2106 { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },

	2107

	2108 { 0x202c, UCHAR_BIDI_CONTROL, TRUE },

	2109 { 0x202f, UCHAR_BIDI_CONTROL, FALSE },

	2110

	2111 { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },

	2112 { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },

	2113

	2114 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrige ndum6.html */

	2115 { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },

	2116 { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },

	2117 { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },

	2118 { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },

	2119

	2120 { 0x058a, UCHAR_DASH, TRUE },

	2121 { 0x007e, UCHAR_DASH, FALSE },

	2122

	2123 { 0x0c4d, UCHAR_DIACRITIC, TRUE },

	2124 { 0x3000, UCHAR_DIACRITIC, FALSE },

	2125

	2126 { 0x0e46, UCHAR_EXTENDER, TRUE },

	2127 { 0x0020, UCHAR_EXTENDER, FALSE },

	2128

	2129 #if !UCONFIG_NO_NORMALIZATION

	2130 { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },

	2131 { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },

	2132 { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },

	2133

	2134 { 0x110a, UCHAR_NFD_INERT, TRUE }, /* Jamo L */

	2135 { 0x0308, UCHAR_NFD_INERT, FALSE },

	2136

	2137 { 0x1164, UCHAR_NFKD_INERT, TRUE }, /* Jamo V */

	2138 { 0x1d79d, UCHAR_NFKD_INERT, FALSE }, /* math compat version of xi */

	2139

	2140 { 0x0021, UCHAR_NFC_INERT, TRUE }, /* ! */

	2141 { 0x0061, UCHAR_NFC_INERT, FALSE }, /* a */

	2142 { 0x00e4, UCHAR_NFC_INERT, FALSE }, /* a-umlaut */

	2143 { 0x0102, UCHAR_NFC_INERT, FALSE }, /* a-breve */

	2144 { 0xac1c, UCHAR_NFC_INERT, FALSE }, /* Hangul LV */

	2145 { 0xac1d, UCHAR_NFC_INERT, TRUE }, /* Hangul LVT */

	2146

	2147 { 0x1d79d, UCHAR_NFKC_INERT, FALSE }, /* math compat version of xi */

	2148 { 0x2a6d6, UCHAR_NFKC_INERT, TRUE }, /* Han, last of CJK ext. B */

	2149

	2150 { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },

	2151 { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },

	2152 { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */

	2153 { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */

	2154 { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */

	2155 { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */

	2156 #endif

	2157

	2158 { 0x0044, UCHAR_HEX_DIGIT, TRUE },

	2159 { 0xff46, UCHAR_HEX_DIGIT, TRUE },

	2160 { 0x0047, UCHAR_HEX_DIGIT, FALSE },

	2161

	2162 { 0x30fb, UCHAR_HYPHEN, TRUE },

	2163 { 0xfe58, UCHAR_HYPHEN, FALSE },

	2164

	2165 { 0x2172, UCHAR_ID_CONTINUE, TRUE },

	2166 { 0x0307, UCHAR_ID_CONTINUE, TRUE },

	2167 { 0x005c, UCHAR_ID_CONTINUE, FALSE },

	2168

	2169 { 0x2172, UCHAR_ID_START, TRUE },

	2170 { 0x007a, UCHAR_ID_START, TRUE },

	2171 { 0x0039, UCHAR_ID_START, FALSE },

	2172

	2173 { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },

	2174 { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },

	2175 { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },

	2176

	2177 { 0x200c, UCHAR_JOIN_CONTROL, TRUE },

	2178 { 0x2029, UCHAR_JOIN_CONTROL, FALSE },

	2179

	2180 { 0x1d7bc, UCHAR_LOWERCASE, TRUE },

	2181 { 0x0345, UCHAR_LOWERCASE, TRUE },

	2182 { 0x0030, UCHAR_LOWERCASE, FALSE },

	2183

	2184 { 0x1d7a9, UCHAR_MATH, TRUE },

	2185 { 0x2135, UCHAR_MATH, TRUE },

	2186 { 0x0062, UCHAR_MATH, FALSE },

	2187

	2188 { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },

	2189 { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },

	2190 { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },

	2191

	2192 { 0x0022, UCHAR_QUOTATION_MARK, TRUE },

	2193 { 0xff62, UCHAR_QUOTATION_MARK, TRUE },

	2194 { 0xd840, UCHAR_QUOTATION_MARK, FALSE },

	2195

	2196 { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },

	2197 { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },

	2198

	2199 { 0x1d44a, UCHAR_UPPERCASE, TRUE },

	2200 { 0x2162, UCHAR_UPPERCASE, TRUE },

	2201 { 0x0345, UCHAR_UPPERCASE, FALSE },

	2202

	2203 { 0x0020, UCHAR_WHITE_SPACE, TRUE },

	2204 { 0x202f, UCHAR_WHITE_SPACE, TRUE },

	2205 { 0x3001, UCHAR_WHITE_SPACE, FALSE },

	2206

	2207 { 0x0711, UCHAR_XID_CONTINUE, TRUE },

	2208 { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },

	2209 { 0x007c, UCHAR_XID_CONTINUE, FALSE },

	2210

	2211 { 0x16ee, UCHAR_XID_START, TRUE },

	2212 { 0x23456, UCHAR_XID_START, TRUE },

	2213 { 0x1d1aa, UCHAR_XID_START, FALSE },

	2214

	2215 /*

	2216 * Version break:

	2217 * The following properties are only supported starting with the

	2218 * Unicode version indicated in the second field.

	2219 */

	2220 { -1, 0x320, 0 },

	2221

	2222 { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },

	2223 { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },

	2224 { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },

	2225

	2226 { 0x0149, UCHAR_DEPRECATED, TRUE }, /* changed in Unicode 5.2 */

	2227 { 0x0341, UCHAR_DEPRECATED, FALSE }, /* changed in Unicode 5.2 */

	2228 { 0xe0041, UCHAR_DEPRECATED, TRUE }, /* changed from Unicode 5 to 5.1 */

	2229 { 0xe0100, UCHAR_DEPRECATED, FALSE },

	2230

	2231 { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },

	2232 { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },

	2233 { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },

	2234 { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE }, /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */

	2235

	2236 { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },

	2237 { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },

	2238 { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE }, /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */

	2239 { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },

	2240

	2241 { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },

	2242 { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },

	2243

	2244 { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },

	2245 { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },

	2246

	2247 { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },

	2248 { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },

	2249

	2250 { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },

	2251 { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },

	2252

	2253 { 0x2e9b, UCHAR_RADICAL, TRUE },

	2254 { 0x4e00, UCHAR_RADICAL, FALSE },

	2255

	2256 { 0x012f, UCHAR_SOFT_DOTTED, TRUE },

	2257 { 0x0049, UCHAR_SOFT_DOTTED, FALSE },

	2258

	2259 { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },

	2260 { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },

	2261

	2262 { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */

	2263

	2264 { 0x002e, UCHAR_S_TERM, TRUE },

	2265 { 0x0061, UCHAR_S_TERM, FALSE },

	2266

	2267 { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },

	2268 { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },

	2269 { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },

	2270 { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },

	2271

	2272 /* enum/integer type properties */

	2273

	2274 /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */

	2275 /* test default Bidi classes for unassigned code points */

	2276 { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

	2277 { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

	2278 { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

	2279 { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Uni code 5.0 */

	2280 { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */

	2281 { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

	2282 { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

	2283 { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

	2284 { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

	2285 { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

	2286 { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

	2287

	2288 { 0x0605, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },

	2289 { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },

	2290 { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },

	2291 { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },

	2292 { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },

	2293 { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },

	2294 { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },

	2295 { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },

	2296

	2297 { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },

	2298 { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },

	2299 { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },

	2300 { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },

	2301 { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },

	2302 { 0x1AFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },

	2303 { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },

	2304 { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },

	2305 { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },

	2306 { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },

	2307 { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },

	2308

	2309 /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in Tes tUnicodeData() */

	2310 { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },

	2311

	2312 { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },

	2313 { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },

	2314 { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },

	2315 { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },

	2316 { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },

	2317 { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },

	2318 { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },

	2319 { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },

	2320 { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },

	2321

	2322 { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },

	2323 { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },

	2324 { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },

	2325 { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },

	2326 { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },

	2327 { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },

	2328 { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },

	2329 { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },

	2330 { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },

	2331 { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },

	2332 { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },

	2333 { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },

	2334 { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },

	2335 { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */

	2336 { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },

	2337 { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },

	2338 { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },

	2339

	2340 /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeD ata() */

	2341 { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },

	2342 { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER }, /* changed in Un icode 5.2 */

	2343

	2344 { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },

	2345 { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },

	2346 { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },

	2347 { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },

	2348 { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },

	2349

	2350 { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },

	2351 { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },

	2352 { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },

	2353 { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },

	2354 { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },

	2355 { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },

	2356 { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },

	2357 { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },

	2358

	2359 /* TestUnicodeData() verifies that no assigned character has "XX" (unkno wn) */

	2360 { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },

	2361 { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },

	2362 { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },

	2363 { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },

	2364 { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },

	2365 { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },

	2366 { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },

	2367 { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },

	2368 { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },

	2369 { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },

	2370 { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },

	2371 { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },

	2372 { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },

	2373 { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },

	2374 { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },

	2375

	2376 /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */

	2377

	2378 /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */

	2379

	2380 { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },

	2381 { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },

	2382 { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },

	2383 { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },

	2384 { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* chang ed in Unicode 5.2 */

	2385 { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* chang ed in Unicode 5.2 */

	2386 { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },

	2387

	2388 { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },

	2389 { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* chang ed in Unicode 5.2 */

	2390 { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* chang ed in Unicode 5.2 */

	2391 { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },

	2392

	2393 { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },

	2394 { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },

	2395 { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },

	2396 { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },

	2397 { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* chang ed in Unicode 5.2 */

	2398 { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* chang ed in Unicode 5.2 */

	2399

	2400 { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },

	2401 { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* chang ed in Unicode 5.2 */

	2402 { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* chang ed in Unicode 5.2 */

	2403 { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },

	2404

	2405 { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },

	2406 { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },

	2407 { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },

	2408 { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },

	2409 { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* chang ed in Unicode 5.2 */

	2410 { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* chang ed in Unicode 5.2 */

	2411 { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },

	2412

	2413 { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },

	2414 { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* chang ed in Unicode 5.2 */

	2415 { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* chang ed in Unicode 5.2 */

	2416 { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },

	2417

	2418 { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },

	2419 { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },

	2420 { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },

	2421 { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },

	2422

	2423 { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },

	2424 { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },

	2425 { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },

	2426 { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },

	2427 { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },

	2428

	2429 { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },

	2430

	2431 { -1, 0x410, 0 }, /* version break for Unicode 4.1 */

	2432

	2433 { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },

	2434 { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },

	2435 { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },

	2436

	2437 { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },

	2438 { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },

	2439 { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },

	2440 { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },

	2441 { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },

	2442

	2443 { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },

	2444 { 0x2c8e, UCHAR_BLOCK, UBLOCK_COPTIC },

	2445 { 0xfe17, UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },

	2446

	2447 { 0x1a00, UCHAR_SCRIPT, USCRIPT_BUGINESE },

	2448 { 0x2cea, UCHAR_SCRIPT, USCRIPT_COPTIC },

	2449 { 0xa82b, UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },

	2450 { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },

	2451

	2452 { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },

	2453 { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },

	2454 { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },

	2455 { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },

	2456 { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },

	2457 { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },

	2458

	2459 { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },

	2460 { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },

	2461 { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },

	2462 { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },

	2463

	2464 { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },

	2465 { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },

	2466 { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },

	2467 { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },

	2468

	2469 { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },

	2470 { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },

	2471 { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },

	2472 { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },

	2473

	2474 { -1, 0x520, 0 }, /* version break for Unicode 5.2 */

	2475

	2476 /* test some script codes >127 */

	2477 { 0xa6e6, UCHAR_SCRIPT, USCRIPT_BAMUM },

	2478 { 0xa4d0, UCHAR_SCRIPT, USCRIPT_LISU },

	2479 { 0x10a7f, UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },

	2480

	2481 { -1, 0x600, 0 }, /* version break for Unicode 6.0 */

	2482

	2483 /* value changed in Unicode 6.0 */

	2484 { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },

	2485

	2486 /* undefined UProperty values */

	2487 { 0x61, 0x4a7, 0 },

	2488 { 0x234bc, 0x15ed, 0 }

	2489 };

	2490

	2491 UVersionInfo version;

	2492 UChar32 c;

	2493 int32_t i, result, uVersion;

	2494 UProperty which;

	2495

	2496 /* what is our Unicode version? */

	2497 u_getUnicodeVersion(version);

	2498 uVersion=((int32_t)version[0]<<8)\|(version[1]<<4)\|version[2]; /* major/minor /update version numbers */

	2499

	2500 u_charAge(0x20, version);

	2501 if(version[0]==0) {

	2502 /* no additional properties available */

	2503 log_err("TestAdditionalProperties: no additional properties available, n ot tested\n");

	2504 return;

	2505 }

	2506

	2507 /* test u_charAge() */

	2508 for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {

	2509 u_charAge(charAges[i].c, version);

	2510 if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {

	2511 log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { % u, %u, %u, %u }\n",

	2512 charAges[i].c,

	2513 version[0], version[1], version[2], version[3],

	2514 charAges[i].version[0], charAges[i].version[1], charAges[i].vers ion[2], charAges[i].version[3]);

	2515 }

	2516 }

	2517

	2518 if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 \|\|

	2519 u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 \|\|

	2520 u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 \|\| /* j2478 */

	2521 u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 \|\| /JB#2410/

	2522 u_getIntPropertyMinValue(0x2345)!=0

	2523 ) {

	2524 log_err("error: u_getIntPropertyMinValue() wrong\n");

	2525 }

	2526 if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {

	2527 log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");

	2528 }

	2529 if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {

	2530 log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");

	2531 }

	2532 if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {

	2533 log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n") ;

	2534 }

	2535 if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_CO UNT-1 ) {

	2536 log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");

	2537 }

	2538 if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {

	2539 log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");

	2540 }

	2541 if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {

	2542 log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");

	2543 }

	2544 if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {

	2545 log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");

	2546 }

	2547 if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {

	2548 log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");

	2549 }

	2550 if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGOR Y_COUNT-1) {

	2551 log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n ");

	2552 }

	2553 if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUN T-1) {

	2554 log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wro ng\n");

	2555 }

	2556 if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_CO UNT-1) {

	2557 log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) w rong\n");

	2558 }

	2559 if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {

	2560 log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n") ;

	2561 }

	2562 if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {

	2563 log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");

	2564 }

	2565 /JB#2410/

	2566 if( u_getIntPropertyMaxValue(0x2345)!=-1) {

	2567 log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");

	2568 }

	2569 if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_CO UNT - 1)) {

	2570 log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong \n");

	2571 }

	2572 if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) != (int32_t) (U_JG_COUNT -1)) {

	2573 log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");

	2574 }

	2575 if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1 )) {

	2576 log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");

	2577 }

	2578 if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUN T -1)) {

	2579 log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n ");

	2580 }

	2581

	2582 /* test u_hasBinaryProperty() and u_getIntPropertyValue() */

	2583 for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {

	2584 const char *whichName;

	2585

	2586 if(props[i][0]<0) {

	2587 /* Unicode version break */

	2588 if(uVersion<props[i][1]) {

	2589 break; /* do not test properties that are not yet supported */

	2590 } else {

	2591 continue; /* skip this row */

	2592 }

	2593 }

	2594

	2595 c=(UChar32)props[i][0];

	2596 which=(UProperty)props[i][1];

	2597 whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);

	2598

	2599 if(which<UCHAR_INT_START) {

	2600 result=u_hasBinaryProperty(c, which);

	2601 if(result!=props[i][2]) {

	2602 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wron g (props[%d]) - (Are you missing data?)\n",

	2603 c, whichName, result, i);

	2604 }

	2605 }

	2606

	2607 result=u_getIntPropertyValue(c, which);

	2608 if(result!=props[i][2]) {

	2609 log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",

	2610 c, whichName, result, props[i][2], i);

	2611 }

	2612

	2613 /* test separate functions, too */

	2614 switch((UProperty)props[i][1]) {

	2615 case UCHAR_ALPHABETIC:

	2616 if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {

	2617 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d]) \n",

	2618 props[i][0], result, i);

	2619 }

	2620 break;

	2621 case UCHAR_LOWERCASE:

	2622 if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {

	2623 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\ n",

	2624 props[i][0], result, i);

	2625 }

	2626 break;

	2627 case UCHAR_UPPERCASE:

	2628 if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {

	2629 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\ n",

	2630 props[i][0], result, i);

	2631 }

	2632 break;

	2633 case UCHAR_WHITE_SPACE:

	2634 if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {

	2635 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d]) \n",

	2636 props[i][0], result, i);

	2637 }

	2638 break;

	2639 default:

	2640 break;

	2641 }

	2642 }

	2643 }

	2644

	2645 static void

	2646 TestNumericProperties(void) {

	2647 /* see UnicodeData.txt, DerivedNumericValues.txt */

	2648 static const struct {

	2649 UChar32 c;

	2650 int32_t type;

	2651 double numValue;

	2652 } values[]={

	2653 { 0x0F33, U_NT_NUMERIC, -1./2. },

	2654 { 0x0C66, U_NT_DECIMAL, 0 },

	2655 { 0x96f6, U_NT_NUMERIC, 0 },

	2656 { 0xa833, U_NT_NUMERIC, 1./16. },

	2657 { 0x2152, U_NT_NUMERIC, 1./10. },

	2658 { 0x2151, U_NT_NUMERIC, 1./9. },

	2659 { 0x1245f, U_NT_NUMERIC, 1./8. },

	2660 { 0x2150, U_NT_NUMERIC, 1./7. },

	2661 { 0x2159, U_NT_NUMERIC, 1./6. },

	2662 { 0x09f6, U_NT_NUMERIC, 3./16. },

	2663 { 0x2155, U_NT_NUMERIC, 1./5. },

	2664 { 0x00BD, U_NT_NUMERIC, 1./2. },

	2665 { 0x0031, U_NT_DECIMAL, 1. },

	2666 { 0x4e00, U_NT_NUMERIC, 1. },

	2667 { 0x58f1, U_NT_NUMERIC, 1. },

	2668 { 0x10320, U_NT_NUMERIC, 1. },

	2669 { 0x0F2B, U_NT_NUMERIC, 3./2. },

	2670 { 0x00B2, U_NT_DIGIT, 2. },

	2671 { 0x5f10, U_NT_NUMERIC, 2. },

	2672 { 0x1813, U_NT_DECIMAL, 3. },

	2673 { 0x5f0e, U_NT_NUMERIC, 3. },

	2674 { 0x2173, U_NT_NUMERIC, 4. },

	2675 { 0x8086, U_NT_NUMERIC, 4. },

	2676 { 0x278E, U_NT_DIGIT, 5. },

	2677 { 0x1D7F2, U_NT_DECIMAL, 6. },

	2678 { 0x247A, U_NT_DIGIT, 7. },

	2679 { 0x7396, U_NT_NUMERIC, 9. },

	2680 { 0x1372, U_NT_NUMERIC, 10. },

	2681 { 0x216B, U_NT_NUMERIC, 12. },

	2682 { 0x16EE, U_NT_NUMERIC, 17. },

	2683 { 0x249A, U_NT_NUMERIC, 19. },

	2684 { 0x303A, U_NT_NUMERIC, 30. },

	2685 { 0x5345, U_NT_NUMERIC, 30. },

	2686 { 0x32B2, U_NT_NUMERIC, 37. },

	2687 { 0x1375, U_NT_NUMERIC, 40. },

	2688 { 0x10323, U_NT_NUMERIC, 50. },

	2689 { 0x0BF1, U_NT_NUMERIC, 100. },

	2690 { 0x964c, U_NT_NUMERIC, 100. },

	2691 { 0x217E, U_NT_NUMERIC, 500. },

	2692 { 0x2180, U_NT_NUMERIC, 1000. },

	2693 { 0x4edf, U_NT_NUMERIC, 1000. },

	2694 { 0x2181, U_NT_NUMERIC, 5000. },

	2695 { 0x137C, U_NT_NUMERIC, 10000. },

	2696 { 0x4e07, U_NT_NUMERIC, 10000. },

	2697 { 0x4ebf, U_NT_NUMERIC, 100000000. },

	2698 { 0x5146, U_NT_NUMERIC, 1000000000000. },

	2699 { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },

	2700 { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },

	2701 { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },

	2702 { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },

	2703 { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },

	2704 { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },

	2705 { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },

	2706 { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }

	2707 };

	2708

	2709 double nv;

	2710 UChar32 c;

	2711 int32_t i, type;

	2712

	2713 for(i=0; i<LENGTHOF(values); ++i) {

	2714 c=values[i].c;

	2715 type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);

	2716 nv=u_getNumericValue(c);

	2717

	2718 if(type!=values[i].type) {

	2719 log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, va lues[i].type);

	2720 }

	2721 if(0.000001 <= fabs(nv - values[i].numValue)) {

	2722 log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, value s[i].numValue);

	2723 }

	2724 }

	2725 }

	2726

	2727 /**

	2728 * Test the property names and property value names API.

	2729 */

	2730 static void

	2731 TestPropertyNames(void) {

	2732 int32_t p, v, choice=0, rev;

	2733 UBool atLeastSomething = FALSE;

	2734

	2735 for (p=0; ; ++p) {

	2736 UProperty propEnum = (UProperty)p;

	2737 UBool sawProp = FALSE;

	2738 if(p > 10 && !atLeastSomething) {

	2739 log_data_err("Never got anything after 10 tries.\nYour data is probabl y fried. Quitting this test\n", p, choice);

	2740 return;

	2741 }

	2742

	2743 for (choice=0; ; ++choice) {

	2744 const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice) choice);

	2745 if (name) {

	2746 if (!sawProp)

	2747 log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);

	2748 log_verbose("%d=\"%s\"", choice, name);

	2749 sawProp = TRUE;

	2750 atLeastSomething = TRUE;

	2751

	2752 /* test reverse mapping */

	2753 rev = u_getPropertyEnum(name);

	2754 if (rev != p) {

	2755 log_err("Property round-trip failure: %d -> %s -> %d\n",

	2756 p, name, rev);

	2757 }

	2758 }

	2759 if (!name && choice>0) break;

	2760 }

	2761 if (sawProp) {

	2762 /* looks like a valid property; check the values */

	2763 const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME );

	2764 int32_t max = 0;

	2765 if (p == UCHAR_CANONICAL_COMBINING_CLASS) {

	2766 max = 255;

	2767 } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {

	2768 /* it's far too slow to iterate all the way up to

	2769 the real max, U_GC_P_MASK */

	2770 max = U_GC_NL_MASK;

	2771 } else if (p == UCHAR_BLOCK) {

	2772 /* UBlockCodes, unlike other values, start at 1 */

	2773 max = 1;

	2774 }

	2775 log_verbose("\n");

	2776 for (v=-1; ; ++v) {

	2777 UBool sawValue = FALSE;

	2778 for (choice=0; ; ++choice) {

	2779 const char* vname = u_getPropertyValueName(propEnum, v, (UPr opertyNameChoice)choice);

	2780 if (vname) {

	2781 if (!sawValue) log_verbose(" %s, value %d:", pname, v);

	2782 log_verbose("%d=\"%s\"", choice, vname);

	2783 sawValue = TRUE;

	2784

	2785 /* test reverse mapping */

	2786 rev = u_getPropertyValueEnum(propEnum, vname);

	2787 if (rev != v) {

	2788 log_err("Value round-trip failure (%s): %d -> %s -> %d\n",

	2789 pname, v, vname, rev);

	2790 }

	2791 }

	2792 if (!vname && choice>0) break;

	2793 }

	2794 if (sawValue) {

	2795 log_verbose("\n");

	2796 }

	2797 if (!sawValue && v>=max) break;

	2798 }

	2799 }

	2800 if (!sawProp) {

	2801 if (p>=UCHAR_STRING_LIMIT) {

	2802 break;

	2803 } else if (p>=UCHAR_DOUBLE_LIMIT) {

	2804 p = UCHAR_STRING_START - 1;

	2805 } else if (p>=UCHAR_MASK_LIMIT) {

	2806 p = UCHAR_DOUBLE_START - 1;

	2807 } else if (p>=UCHAR_INT_LIMIT) {

	2808 p = UCHAR_MASK_START - 1;

	2809 } else if (p>=UCHAR_BINARY_LIMIT) {

	2810 p = UCHAR_INT_START - 1;

	2811 }

	2812 }

	2813 }

	2814 }

	2815

	2816 /**

	2817 * Test the property values API. See JB#2410.

	2818 */

	2819 static void

	2820 TestPropertyValues(void) {

	2821 int32_t i, p, min, max;

	2822 UErrorCode ec;

	2823

	2824 /* Min should be 0 for everything. */

	2825 /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */

	2826 for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {

	2827 UProperty propEnum = (UProperty)p;

	2828 min = u_getIntPropertyMinValue(propEnum);

	2829 if (min != 0) {

	2830 if (p == UCHAR_BLOCK) {

	2831 /* This is okay...for now. See JB#2487.

	2832 TODO Update this for JB#2487. */

	2833 } else {

	2834 const char* name;

	2835 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);

	2836 if (name == NULL)

	2837 name = "<ERROR>";

	2838 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",

	2839 name, min);

	2840 }

	2841 }

	2842 }

	2843

	2844 if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 \|\|

	2845 u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {

	2846 log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK ) is wrong\n");

	2847 }

	2848

	2849 /* Max should be -1 for invalid properties. */

	2850 max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);

	2851 if (max != -1) {

	2852 log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",

	2853 max);

	2854 }

	2855

	2856 /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */

	2857 for (i=0; i<2; ++i) {

	2858 int32_t script;

	2859 const char* desc;

	2860 ec = U_ZERO_ERROR;

	2861 switch (i) {

	2862 case 0:

	2863 script = uscript_getScript(-1, &ec);

	2864 desc = "uscript_getScript(-1)";

	2865 break;

	2866 case 1:

	2867 script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);

	2868 desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";

	2869 break;

	2870 default:

	2871 log_err("Internal test error. Too many scripts\n");

	2872 return;

	2873 }

	2874 /* We don't explicitly test ec. It should be U_FAILURE but it

	2875 isn't documented as such. */

	2876 if (script != (int32_t)USCRIPT_INVALID_CODE) {

	2877 log_err("FAIL: %s = %d, exp. 0\n",

	2878 desc, script);

	2879 }

	2880 }

	2881 }

	2882

	2883 /* various tests for consistency of UCD data and API behavior */

	2884 static void

	2885 TestConsistency() {

	2886 char buffer[300];

	2887 USet set1, set2, set3, set4;

	2888 UErrorCode errorCode;

	2889

	2890 UChar32 start, end;

	2891 int32_t i, length;

	2892

	2893 U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);

	2894 U_STRING_DECL(dashPattern, "[:Dash:]", 8);

	2895 U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);

	2896 U_STRING_DECL(formatPattern, "[:Cf:]", 6);

	2897 U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);

	2898

	2899 U_STRING_DECL(mathBlocksPattern,

	2900 "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Sym bols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathe matical Operators:][:block=Mathematical Alphanumeric Symbols:]]",

	2901 1+32+46+46+45+43+1+1); /* +1 for NUL */

	2902 U_STRING_DECL(mathPattern, "[:Math:]", 8);

	2903 U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);

	2904 U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);

	2905 U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);

	2906

	2907 U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);

	2908 U_STRING_INIT(dashPattern, "[:Dash:]", 8);

	2909 U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);

	2910 U_STRING_INIT(formatPattern, "[:Cf:]", 6);

	2911 U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);

	2912

	2913 U_STRING_INIT(mathBlocksPattern,

	2914 "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Sym bols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathe matical Operators:][:block=Mathematical Alphanumeric Symbols:]]",

	2915 1+32+46+46+45+43+1+1); /* +1 for NUL */

	2916 U_STRING_INIT(mathPattern, "[:Math:]", 8);

	2917 U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);

	2918 U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);

	2919 U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);

	2920

	2921 /*

	2922 * It used to be that UCD.html and its precursors said

	2923 * "Those dashes used to mark connections between pieces of words,

	2924 * plus the Katakana middle dot."

	2925 *

	2926 * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash

	2927 * but not from Hyphen.

	2928 * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.

	2929 * Therefore, do not show errors when testing the Hyphen property.

	2930 */

	2931 log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"

	2932 "known to the UTC and not considered errors.\n");

	2933

	2934 errorCode=U_ZERO_ERROR;

	2935 set1=uset_openPattern(hyphenPattern, 10, &errorCode);

	2936 set2=uset_openPattern(dashPattern, 8, &errorCode);

	2937 if(U_SUCCESS(errorCode)) {

	2938 /* remove the Katakana middle dot(s) from set1 */

	2939 uset_remove(set1, 0x30fb);

	2940 uset_remove(set1, 0xff65); /* halfwidth variant */

	2941 showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);

	2942 } else {

	2943 log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));

	2944 }

	2945

	2946 /* check that Cf is neither Hyphen nor Dash nor Alphabetic */

	2947 set3=uset_openPattern(formatPattern, 6, &errorCode);

	2948 set4=uset_openPattern(alphaPattern, 14, &errorCode);

	2949 if(U_SUCCESS(errorCode)) {

	2950 showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);

	2951 showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);

	2952 showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);

	2953 } else {

	2954 log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missi ng data?)\n", u_errorName(errorCode));

	2955 }

	2956

	2957 uset_close(set1);

	2958 uset_close(set2);

	2959 uset_close(set3);

	2960 uset_close(set4);

	2961

	2962 /*

	2963 * Check that each lowercase character has "small" in its name

	2964 * and not "capital".

	2965 * There are some such characters, some of which seem odd.

	2966 * Use the verbose flag to see these notices.

	2967 */

	2968 errorCode=U_ZERO_ERROR;

	2969 set1=uset_openPattern(lowerPattern, 13, &errorCode);

	2970 if(U_SUCCESS(errorCode)) {

	2971 for(i=0;; ++i) {

	2972 length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);

	2973 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {

	2974 break; /* done */

	2975 }

	2976 if(U_FAILURE(errorCode)) {

	2977 log_err("error iterating over [:Lowercase:] at item %d: %s\n",

	2978 i, u_errorName(errorCode));

	2979 break;

	2980 }

	2981 if(length!=0) {

	2982 break; /* done with code points, got a string or -1 */

	2983 }

	2984

	2985 while(start<=end) {

	2986 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buf fer), &errorCode);

	2987 if(U_FAILURE(errorCode)) {

	2988 log_err("error getting the name of U+%04x - %s\n", start, u_ errorName(errorCode));

	2989 errorCode=U_ZERO_ERROR;

	2990 continue;

	2991 }

	2992 if( (strstr(buffer, "SMALL")==NULL \|\| strstr(buffer, "CAPITAL")! =NULL) &&

	2993 strstr(buffer, "SMALL CAPITAL")==NULL

	2994 ) {

	2995 log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);

	2996 }

	2997 ++start;

	2998 }

	2999 }

	3000 } else {

	3001 log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n ", u_errorName(errorCode));

	3002 }

	3003 uset_close(set1);

	3004

	3005 /* verify that all assigned characters in Math blocks are exactly Math chara cters */

	3006 errorCode=U_ZERO_ERROR;

	3007 set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);

	3008 set2=uset_openPattern(mathPattern, 8, &errorCode);

	3009 set3=uset_openPattern(unassignedPattern, 6, &errorCode);

	3010 if(U_SUCCESS(errorCode)) {

	3011 uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */

	3012 uset_complement(set3); /* assigned characters */

	3013 uset_retainAll(set1, set3); /* [math blocks]&[assigned] */

	3014 compareUSets(set1, set2,

	3015 "[assigned Math block chars]", "[math blocks]&[:Math:]",

	3016 TRUE);

	3017 } else {

	3018 log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Ar e you missing data?)\n", u_errorName(errorCode));

	3019 }

	3020 uset_close(set1);

	3021 uset_close(set2);

	3022 uset_close(set3);

	3023

	3024 /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */

	3025 errorCode=U_ZERO_ERROR;

	3026 set1=uset_openPattern(unknownPattern, 14, &errorCode);

	3027 set2=uset_openPattern(reservedPattern, 20, &errorCode);

	3028 if(U_SUCCESS(errorCode)) {

	3029 compareUSets(set1, set2,

	3030 "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",

	3031 TRUE);

	3032 } else {

	3033 log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));

	3034 }

	3035 uset_close(set1);

	3036 uset_close(set2);

	3037 }

	3038

	3039 /*

	3040 * Starting with ICU4C 3.4, the core Unicode properties files

	3041 * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)

	3042 * are hardcoded in the common DLL and therefore not included

	3043 * in the data package any more.

	3044 * Test requiring these files are disabled so that

	3045 * we need not jump through hoops (like adding snapshots of these files

	3046 * to testdata).

	3047 * See Jitterbug 4497.

	3048 */

	3049 #define HARDCODED_DATA_4497 1

	3050

	3051 /* API coverage for ucase.c */

	3052 static void TestUCase() {

	3053 #if !HARDCODED_DATA_4497

	3054 UDataMemory *pData;

	3055 UCaseProps *csp;

	3056 const UCaseProps *ccsp;

	3057 UErrorCode errorCode;

	3058

	3059 /* coverage for ucase_openBinary() */

	3060 errorCode=U_ZERO_ERROR;

	3061 pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);

	3062 if(U_FAILURE(errorCode)) {

	3063 log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s \n",

	3064 u_errorName(errorCode));

	3065 return;

	3066 }

	3067

	3068 csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);

	3069 if(U_FAILURE(errorCode)) {

	3070 log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",

	3071 u_errorName(errorCode));

	3072 udata_close(pData);

	3073 return;

	3074 }

	3075

	3076 if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */

	3077 log_err("ucase_openBinary() does not seem to return working UCaseProps\n ");

	3078 }

	3079

	3080 ucase_close(csp);

	3081 udata_close(pData);

	3082

	3083 /* coverage for ucase_getDummy() */

	3084 errorCode=U_ZERO_ERROR;

	3085 ccsp=ucase_getDummy(&errorCode);

	3086 if(ucase_tolower(ccsp, 0x41)!=0x41) {

	3087 log_err("ucase_tolower(dummy, A)!=A\n");

	3088 }

	3089 #endif

	3090 }

	3091

	3092 /* API coverage for ubidi_props.c */

	3093 static void TestUBiDiProps() {

	3094 #if !HARDCODED_DATA_4497

	3095 UDataMemory *pData;

	3096 UBiDiProps *bdp;

	3097 const UBiDiProps *cbdp;

	3098 UErrorCode errorCode;

	3099

	3100 /* coverage for ubidi_openBinary() */

	3101 errorCode=U_ZERO_ERROR;

	3102 pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);

	3103 if(U_FAILURE(errorCode)) {

	3104 log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s \n",

	3105 u_errorName(errorCode));

	3106 return;

	3107 }

	3108

	3109 bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);

	3110 if(U_FAILURE(errorCode)) {

	3111 log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",

	3112 u_errorName(errorCode));

	3113 udata_close(pData);

	3114 return;

	3115 }

	3116

	3117 if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */

	3118 log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n ");

	3119 }

	3120

	3121 ubidi_closeProps(bdp);

	3122 udata_close(pData);

	3123

	3124 /* coverage for ubidi_getDummy() */

	3125 errorCode=U_ZERO_ERROR;

	3126 cbdp=ubidi_getDummy(&errorCode);

	3127 if(ubidi_getClass(cbdp, 0x20)!=0) {

	3128 log_err("ubidi_getClass(dummy, space)!=0\n");

	3129 }

	3130 #endif

	3131 }

	3132

	3133 /* test case folding, compare return values with CaseFolding.txt ------------ */

	3134

	3135 /* bit set for which case foldings for a character have been tested already */

	3136 enum {

	3137 CF_SIMPLE=1,

	3138 CF_FULL=2,

	3139 CF_TURKIC=4,

	3140 CF_ALL=7

	3141 };

	3142

	3143 static void

	3144 testFold(UChar32 c, int which,

	3145 UChar32 simple, UChar32 turkic,

	3146 const UChar *full, int32_t fullLength,

	3147 const UChar *turkicFull, int32_t turkicFullLength) {

	3148 UChar s[2], t[32];

	3149 UChar32 c2;

	3150 int32_t length, length2;

	3151

	3152 UErrorCode errorCode=U_ZERO_ERROR;

	3153

	3154 length=0;

	3155 U16_APPEND_UNSAFE(s, length, c);

	3156

	3157 if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {

	3158 log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (l ong)c2, (long)simple);

	3159 }

	3160 if((which&CF_FULL)!=0) {

	3161 length2=u_strFoldCase(t, LENGTHOF(t), s, length, 0, &errorCode);

	3162 if(length2!=fullLength \|\| 0!=u_memcmp(t, full, fullLength)) {

	3163 log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);

	3164 }

	3165 }

	3166 if((which&CF_TURKIC)!=0) {

	3167 if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {

	3168 log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);

	3169 }

	3170

	3171 length2=u_strFoldCase(t, LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPE CIAL_I, &errorCode);

	3172 if(length2!=turkicFullLength \|\| 0!=u_memcmp(t, turkicFull, length2)) {

	3173 log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", ( long)c);

	3174 }

	3175 }

	3176 }

	3177

	3178 /* test that c case-folds to itself */

	3179 static void

	3180 testFoldToSelf(UChar32 c, int which) {

	3181 UChar s[2];

	3182 int32_t length;

	3183

	3184 length=0;

	3185 U16_APPEND_UNSAFE(s, length, c);

	3186 testFold(c, which, c, c, s, length, s, length);

	3187 }

	3188

	3189 struct CaseFoldingData {

	3190 USet *notSeen;

	3191 UChar32 prev, prevSimple;

	3192 UChar prevFull[32];

	3193 int32_t prevFullLength;

	3194 int which;

	3195 };

	3196 typedef struct CaseFoldingData CaseFoldingData;

	3197

	3198 static void U_CALLCONV

	3199 caseFoldingLineFn(void *context,

	3200 char *fields[][2], int32_t fieldCount,

	3201 UErrorCode *pErrorCode) {

	3202 CaseFoldingData pData=(CaseFoldingData )context;

	3203 char *end;

	3204 UChar full[32];

	3205 UChar32 c, prev, simple;

	3206 int32_t count;

	3207 int which;

	3208 char status;

	3209

	3210 /* get code point */

	3211 c=(UChar32)strtoul(u_skipWhitespace(fields[0][0]), &end, 16);

	3212 end=(char *)u_skipWhitespace(end);

	3213 if(end<=fields[0][0] \|\| end!=fields[0][1]) {

	3214 log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]) ;

	3215 *pErrorCode=U_PARSE_ERROR;

	3216 return;

	3217 }

	3218

	3219 /* get the status of this mapping */

	3220 status=*u_skipWhitespace(fields[1][0]);

	3221 if(status!='C' && status!='S' && status!='F' && status!='T') {

	3222 log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0 ][0]);

	3223 *pErrorCode=U_PARSE_ERROR;

	3224 return;

	3225 }

	3226

	3227 /* get the mapping */

	3228 count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode) ;

	3229 if(U_FAILURE(*pErrorCode)) {

	3230 log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);

	3231 return;

	3232 }

	3233

	3234 /* there is a simple mapping only if there is exactly one code point (count is in UChars) */

	3235 if(count==0 \|\| count>2 \|\| (count==2 && U16_IS_SINGLE(full[1]))) {

	3236 simple=c;

	3237 }

	3238

	3239 if(c!=(prev=pData->prev)) {

	3240 /*

	3241 * Test remaining mappings for the previous code point.

	3242 * If a turkic folding was not mentioned, then it should fold the same

	3243 * as the regular simple case folding.

	3244 */

	3245 UChar s[2];

	3246 int32_t length;

	3247

	3248 length=0;

	3249 U16_APPEND_UNSAFE(s, length, prev);

	3250 testFold(prev, (~pData->which)&CF_ALL,

	3251 prev, pData->prevSimple,

	3252 s, length,

	3253 pData->prevFull, pData->prevFullLength);

	3254 pData->prev=pData->prevSimple=c;

	3255 length=0;

	3256 U16_APPEND_UNSAFE(pData->prevFull, length, c);

	3257 pData->prevFullLength=length;

	3258 pData->which=0;

	3259 }

	3260

	3261 /*

	3262 * Turn the status into a bit set of case foldings to test.

	3263 * Remember non-Turkic case foldings as defaults for Turkic mode.

	3264 */

	3265 switch(status) {

	3266 case 'C':

	3267 which=CF_SIMPLE\|CF_FULL;

	3268 pData->prevSimple=simple;

	3269 u_memcpy(pData->prevFull, full, count);

	3270 pData->prevFullLength=count;

	3271 break;

	3272 case 'S':

	3273 which=CF_SIMPLE;

	3274 pData->prevSimple=simple;

	3275 break;

	3276 case 'F':

	3277 which=CF_FULL;

	3278 u_memcpy(pData->prevFull, full, count);

	3279 pData->prevFullLength=count;

	3280 break;

	3281 case 'T':

	3282 which=CF_TURKIC;

	3283 break;

	3284 default:

	3285 which=0;

	3286 break; /* won't happen because of test above */

	3287 }

	3288

	3289 testFold(c, which, simple, simple, full, count, full, count);

	3290

	3291 /* remember which case foldings of c have been tested */

	3292 pData->which\|=which;

	3293

	3294 /* remove c from the set of ones not mentioned in CaseFolding.txt */

	3295 uset_remove(pData->notSeen, c);

	3296 }

	3297

	3298 static void

	3299 TestCaseFolding() {

	3300 CaseFoldingData data={ NULL };

	3301 char *fields[3][2];

	3302 UErrorCode errorCode;

	3303

	3304 static char lastLine= (char )"10FFFF; C; 10FFFF;";

	3305

	3306 errorCode=U_ZERO_ERROR;

	3307 /* test BMP & plane 1 - nothing interesting above */

	3308 data.notSeen=uset_open(0, 0x1ffff);

	3309 data.prevFullLength=1; /* length of full case folding of U+0000 */

	3310

	3311 parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorC ode);

	3312 if(U_SUCCESS(errorCode)) {

	3313 int32_t i, start, end;

	3314

	3315 /* add a pseudo-last line to finish testing of the actual last one */

	3316 fields[0][0]=lastLine;

	3317 fields[0][1]=lastLine+6;

	3318 fields[1][0]=lastLine+7;

	3319 fields[1][1]=lastLine+9;

	3320 fields[2][0]=lastLine+10;

	3321 fields[2][1]=lastLine+17;

	3322 caseFoldingLineFn(&data, fields, 3, &errorCode);

	3323

	3324 /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */

	3325 for(i=0;

	3326 0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&

	3327 U_SUCCESS(errorCode);

	3328 ++i

	3329 ) {

	3330 do {

	3331 testFoldToSelf(start, CF_ALL);

	3332 } while(++start<=end);

	3333 }

	3334 }

	3335

	3336 uset_close(data.notSeen);

	3337 }

OLD	NEW

« no previous file with comments | « icu46/source/test/cintltst/cucdapi.c ('k') | icu46/source/test/cintltst/currtest.c » ('j') | no next file with comments »