| OLD | NEW |
| (Empty) |
| 1 /******************************************************************** | |
| 2 * COPYRIGHT: | |
| 3 * Copyright (c) 1997-2015, International Business Machines Corporation and | |
| 4 * others. All Rights Reserved. | |
| 5 ********************************************************************/ | |
| 6 /******************************************************************************* | |
| 7 * | |
| 8 * File CUCDTST.C | |
| 9 * | |
| 10 * Modification History: | |
| 11 * Name Description | |
| 12 * Madhu Katragadda Ported for C API, added tests for string funct
ions | |
| 13 ******************************************************************************** | |
| 14 */ | |
| 15 | |
| 16 #include <string.h> | |
| 17 #include <math.h> | |
| 18 #include <stdlib.h> | |
| 19 | |
| 20 #include "unicode/utypes.h" | |
| 21 #include "unicode/uchar.h" | |
| 22 #include "unicode/putil.h" | |
| 23 #include "unicode/ustring.h" | |
| 24 #include "unicode/uloc.h" | |
| 25 #include "unicode/unorm2.h" | |
| 26 | |
| 27 #include "cintltst.h" | |
| 28 #include "putilimp.h" | |
| 29 #include "uparse.h" | |
| 30 #include "ucase.h" | |
| 31 #include "ubidi_props.h" | |
| 32 #include "uprops.h" | |
| 33 #include "uset_imp.h" | |
| 34 #include "usc_impl.h" | |
| 35 #include "udatamem.h" /* for testing ucase_openBinary() */ | |
| 36 #include "cucdapi.h" | |
| 37 #include "cmemory.h" | |
| 38 | |
| 39 /* prototypes --------------------------------------------------------------- */ | |
| 40 | |
| 41 static void TestUpperLower(void); | |
| 42 static void TestLetterNumber(void); | |
| 43 static void TestMisc(void); | |
| 44 static void TestPOSIX(void); | |
| 45 static void TestControlPrint(void); | |
| 46 static void TestIdentifier(void); | |
| 47 static void TestUnicodeData(void); | |
| 48 static void TestCodeUnit(void); | |
| 49 static void TestCodePoint(void); | |
| 50 static void TestCharLength(void); | |
| 51 static void TestCharNames(void); | |
| 52 static void TestUCharFromNameUnderflow(void); | |
| 53 static void TestMirroring(void); | |
| 54 static void TestUScriptRunAPI(void); | |
| 55 static void TestAdditionalProperties(void); | |
| 56 static void TestNumericProperties(void); | |
| 57 static void TestPropertyNames(void); | |
| 58 static void TestPropertyValues(void); | |
| 59 static void TestConsistency(void); | |
| 60 static void TestUCase(void); | |
| 61 static void TestUBiDiProps(void); | |
| 62 static void TestCaseFolding(void); | |
| 63 | |
| 64 /* internal methods used */ | |
| 65 static int32_t MakeProp(char* str); | |
| 66 static int32_t MakeDir(char* str); | |
| 67 | |
| 68 /* helpers ------------------------------------------------------------------ */ | |
| 69 | |
| 70 static void | |
| 71 parseUCDFile(const char *filename, | |
| 72 char *fields[][2], int32_t fieldCount, | |
| 73 UParseLineFn *lineFn, void *context, | |
| 74 UErrorCode *pErrorCode) { | |
| 75 char path[256]; | |
| 76 char backupPath[256]; | |
| 77 | |
| 78 if(U_FAILURE(*pErrorCode)) { | |
| 79 return; | |
| 80 } | |
| 81 | |
| 82 /* Look inside ICU_DATA first */ | |
| 83 strcpy(path, u_getDataDirectory()); | |
| 84 strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING); | |
| 85 strcat(path, filename); | |
| 86 | |
| 87 /* As a fallback, try to guess where the source data was located | |
| 88 * at the time ICU was built, and look there. | |
| 89 */ | |
| 90 strcpy(backupPath, ctest_dataSrcDir()); | |
| 91 strcat(backupPath, U_FILE_SEP_STRING); | |
| 92 strcat(backupPath, "unidata" U_FILE_SEP_STRING); | |
| 93 strcat(backupPath, filename); | |
| 94 | |
| 95 u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorC
ode); | |
| 96 if(*pErrorCode==U_FILE_ACCESS_ERROR) { | |
| 97 *pErrorCode=U_ZERO_ERROR; | |
| 98 u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, contex
t, pErrorCode); | |
| 99 } | |
| 100 if(U_FAILURE(*pErrorCode)) { | |
| 101 log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorN
ame(*pErrorCode)); | |
| 102 } | |
| 103 } | |
| 104 | |
| 105 /* test data ---------------------------------------------------------------- */ | |
| 106 | |
| 107 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPe
PoSmScSkSoPiPf"; | |
| 108 static const int32_t tagValues[] = | |
| 109 { | |
| 110 /* Mn */ U_NON_SPACING_MARK, | |
| 111 /* Mc */ U_COMBINING_SPACING_MARK, | |
| 112 /* Me */ U_ENCLOSING_MARK, | |
| 113 /* Nd */ U_DECIMAL_DIGIT_NUMBER, | |
| 114 /* Nl */ U_LETTER_NUMBER, | |
| 115 /* No */ U_OTHER_NUMBER, | |
| 116 /* Zs */ U_SPACE_SEPARATOR, | |
| 117 /* Zl */ U_LINE_SEPARATOR, | |
| 118 /* Zp */ U_PARAGRAPH_SEPARATOR, | |
| 119 /* Cc */ U_CONTROL_CHAR, | |
| 120 /* Cf */ U_FORMAT_CHAR, | |
| 121 /* Cs */ U_SURROGATE, | |
| 122 /* Co */ U_PRIVATE_USE_CHAR, | |
| 123 /* Cn */ U_UNASSIGNED, | |
| 124 /* Lu */ U_UPPERCASE_LETTER, | |
| 125 /* Ll */ U_LOWERCASE_LETTER, | |
| 126 /* Lt */ U_TITLECASE_LETTER, | |
| 127 /* Lm */ U_MODIFIER_LETTER, | |
| 128 /* Lo */ U_OTHER_LETTER, | |
| 129 /* Pc */ U_CONNECTOR_PUNCTUATION, | |
| 130 /* Pd */ U_DASH_PUNCTUATION, | |
| 131 /* Ps */ U_START_PUNCTUATION, | |
| 132 /* Pe */ U_END_PUNCTUATION, | |
| 133 /* Po */ U_OTHER_PUNCTUATION, | |
| 134 /* Sm */ U_MATH_SYMBOL, | |
| 135 /* Sc */ U_CURRENCY_SYMBOL, | |
| 136 /* Sk */ U_MODIFIER_SYMBOL, | |
| 137 /* So */ U_OTHER_SYMBOL, | |
| 138 /* Pi */ U_INITIAL_PUNCTUATION, | |
| 139 /* Pf */ U_FINAL_PUNCTUATION | |
| 140 }; | |
| 141 | |
| 142 static const char dirStrings[][5] = { | |
| 143 "L", | |
| 144 "R", | |
| 145 "EN", | |
| 146 "ES", | |
| 147 "ET", | |
| 148 "AN", | |
| 149 "CS", | |
| 150 "B", | |
| 151 "S", | |
| 152 "WS", | |
| 153 "ON", | |
| 154 "LRE", | |
| 155 "LRO", | |
| 156 "AL", | |
| 157 "RLE", | |
| 158 "RLO", | |
| 159 "PDF", | |
| 160 "NSM", | |
| 161 "BN", | |
| 162 /* new in Unicode 6.3/ICU 52 */ | |
| 163 "FSI", | |
| 164 "LRI", | |
| 165 "RLI", | |
| 166 "PDI" | |
| 167 }; | |
| 168 | |
| 169 void addUnicodeTest(TestNode** root); | |
| 170 | |
| 171 void addUnicodeTest(TestNode** root) | |
| 172 { | |
| 173 addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit"); | |
| 174 addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint"); | |
| 175 addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength"); | |
| 176 addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues"); | |
| 177 addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData"); | |
| 178 addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalPrope
rties"); | |
| 179 addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties"
); | |
| 180 addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower"); | |
| 181 addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber"); | |
| 182 addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc"); | |
| 183 addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX"); | |
| 184 addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint"); | |
| 185 addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier"); | |
| 186 addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames"); | |
| 187 addTest(root, &TestUCharFromNameUnderflow, "tsutil/cucdtst/TestUCharFromName
Underflow"); | |
| 188 addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring"); | |
| 189 addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI"); | |
| 190 addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript"); | |
| 191 addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensi
ons"); | |
| 192 addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI"
); | |
| 193 addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI"); | |
| 194 addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames"); | |
| 195 addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues"); | |
| 196 addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency"); | |
| 197 addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase"); | |
| 198 addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps"); | |
| 199 addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding"); | |
| 200 } | |
| 201 | |
| 202 /*==================================================== */ | |
| 203 /* test u_toupper() and u_tolower() */ | |
| 204 /*==================================================== */ | |
| 205 static void TestUpperLower() | |
| 206 { | |
| 207 const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0
x01c9, 0x000c, 0x0000}; | |
| 208 const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0
x01c9, 0x000c, 0x0000}; | |
| 209 U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21); | |
| 210 U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21); | |
| 211 int32_t i; | |
| 212 | |
| 213 U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21); | |
| 214 U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21); | |
| 215 | |
| 216 /* | |
| 217 Checks LetterLike Symbols which were previously a source of confusion | |
| 218 [Bertrand A. D. 02/04/98] | |
| 219 */ | |
| 220 for (i=0x2100;i<0x2138;i++) | |
| 221 { | |
| 222 /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED
CAPITAL F) */ | |
| 223 if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132) | |
| 224 { | |
| 225 if (i != (int)u_tolower(i)) /* itself */ | |
| 226 log_err("Failed case conversion with itself: U+%04x\n", i); | |
| 227 if (i != (int)u_toupper(i)) | |
| 228 log_err("Failed case conversion with itself: U+%04x\n", i); | |
| 229 } | |
| 230 } | |
| 231 | |
| 232 for(i=0; i < u_strlen(upper); i++){ | |
| 233 if(u_tolower(upper[i]) != lower[i]){ | |
| 234 log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i
], lower[i], u_tolower(upper[i])); | |
| 235 } | |
| 236 } | |
| 237 | |
| 238 log_verbose("testing upper lower\n"); | |
| 239 for (i = 0; i < 21; i++) { | |
| 240 | |
| 241 if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i])) | |
| 242 { | |
| 243 log_err("Failed isLowerCase test at %c\n", upperTest[i]); | |
| 244 } | |
| 245 else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i])) | |
| 246 { | |
| 247 log_err("Failed isUpperCase test at %c\n", lowerTest[i]); | |
| 248 } | |
| 249 else if (upperTest[i] != u_tolower(lowerTest[i])) | |
| 250 { | |
| 251 log_err("Failed case conversion from %c To %c :\n", lowerTest[i], u
pperTest[i]); | |
| 252 } | |
| 253 else if (lowerTest[i] != u_toupper(upperTest[i])) | |
| 254 { | |
| 255 log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerT
est[i]); | |
| 256 } | |
| 257 else if (upperTest[i] != u_tolower(upperTest[i])) | |
| 258 { | |
| 259 log_err("Failed case conversion with itself: %c\n", upperTest[i]); | |
| 260 } | |
| 261 else if (lowerTest[i] != u_toupper(lowerTest[i])) | |
| 262 { | |
| 263 log_err("Failed case conversion with itself: %c\n", lowerTest[i]); | |
| 264 } | |
| 265 } | |
| 266 log_verbose("done testing upper lower\n"); | |
| 267 | |
| 268 log_verbose("testing u_istitle\n"); | |
| 269 { | |
| 270 static const UChar expected[] = { | |
| 271 0x1F88, | |
| 272 0x1F89, | |
| 273 0x1F8A, | |
| 274 0x1F8B, | |
| 275 0x1F8C, | |
| 276 0x1F8D, | |
| 277 0x1F8E, | |
| 278 0x1F8F, | |
| 279 0x1F88, | |
| 280 0x1F89, | |
| 281 0x1F8A, | |
| 282 0x1F8B, | |
| 283 0x1F8C, | |
| 284 0x1F8D, | |
| 285 0x1F8E, | |
| 286 0x1F8F, | |
| 287 0x1F98, | |
| 288 0x1F99, | |
| 289 0x1F9A, | |
| 290 0x1F9B, | |
| 291 0x1F9C, | |
| 292 0x1F9D, | |
| 293 0x1F9E, | |
| 294 0x1F9F, | |
| 295 0x1F98, | |
| 296 0x1F99, | |
| 297 0x1F9A, | |
| 298 0x1F9B, | |
| 299 0x1F9C, | |
| 300 0x1F9D, | |
| 301 0x1F9E, | |
| 302 0x1F9F, | |
| 303 0x1FA8, | |
| 304 0x1FA9, | |
| 305 0x1FAA, | |
| 306 0x1FAB, | |
| 307 0x1FAC, | |
| 308 0x1FAD, | |
| 309 0x1FAE, | |
| 310 0x1FAF, | |
| 311 0x1FA8, | |
| 312 0x1FA9, | |
| 313 0x1FAA, | |
| 314 0x1FAB, | |
| 315 0x1FAC, | |
| 316 0x1FAD, | |
| 317 0x1FAE, | |
| 318 0x1FAF, | |
| 319 0x1FBC, | |
| 320 0x1FBC, | |
| 321 0x1FCC, | |
| 322 0x1FCC, | |
| 323 0x1FFC, | |
| 324 0x1FFC, | |
| 325 }; | |
| 326 int32_t num = sizeof(expected)/sizeof(expected[0]); | |
| 327 for(i=0; i<num; i++){ | |
| 328 if(!u_istitle(expected[i])){ | |
| 329 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n"
,expected[i]); | |
| 330 } | |
| 331 } | |
| 332 | |
| 333 } | |
| 334 } | |
| 335 | |
| 336 /* compare two sets and verify that their difference or intersection is empty */ | |
| 337 static UBool | |
| 338 showADiffB(const USet *a, const USet *b, | |
| 339 const char *a_name, const char *b_name, | |
| 340 UBool expect, UBool diffIsError) { | |
| 341 USet *aa; | |
| 342 int32_t i, start, end, length; | |
| 343 UErrorCode errorCode; | |
| 344 | |
| 345 /* | |
| 346 * expect: | |
| 347 * TRUE -> a-b should be empty, that is, b should contain all of a | |
| 348 * FALSE -> a&b should be empty, that is, a should contain none of b (and vi
ce versa) | |
| 349 */ | |
| 350 if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) { | |
| 351 return TRUE; | |
| 352 } | |
| 353 | |
| 354 /* clone a to aa because a is const */ | |
| 355 aa=uset_open(1, 0); | |
| 356 if(aa==NULL) { | |
| 357 /* unusual problem - out of memory? */ | |
| 358 return FALSE; | |
| 359 } | |
| 360 uset_addAll(aa, a); | |
| 361 | |
| 362 /* compute the set in question */ | |
| 363 if(expect) { | |
| 364 /* a-b */ | |
| 365 uset_removeAll(aa, b); | |
| 366 } else { | |
| 367 /* a&b */ | |
| 368 uset_retainAll(aa, b); | |
| 369 } | |
| 370 | |
| 371 /* aa is not empty because of the initial tests above; show its contents */ | |
| 372 errorCode=U_ZERO_ERROR; | |
| 373 i=0; | |
| 374 for(;;) { | |
| 375 length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode); | |
| 376 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { | |
| 377 break; /* done */ | |
| 378 } | |
| 379 if(U_FAILURE(errorCode)) { | |
| 380 log_err("error comparing %s with %s at difference item %d: %s\n", | |
| 381 a_name, b_name, i, u_errorName(errorCode)); | |
| 382 break; | |
| 383 } | |
| 384 if(length!=0) { | |
| 385 break; /* done with code points, got a string or -1 */ | |
| 386 } | |
| 387 | |
| 388 if(diffIsError) { | |
| 389 if(expect) { | |
| 390 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a
_name, start, end, b_name); | |
| 391 } else { | |
| 392 log_err("error: %s and %s both contain U+%04x..U+%04x but should
not intersect\n", a_name, b_name, start, end); | |
| 393 } | |
| 394 } else { | |
| 395 if(expect) { | |
| 396 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n"
, a_name, start, end, b_name); | |
| 397 } else { | |
| 398 log_verbose("info: %s and %s both contain U+%04x..U+%04x but sho
uld not intersect\n", a_name, b_name, start, end); | |
| 399 } | |
| 400 } | |
| 401 | |
| 402 ++i; | |
| 403 } | |
| 404 | |
| 405 uset_close(aa); | |
| 406 return FALSE; | |
| 407 } | |
| 408 | |
| 409 static UBool | |
| 410 showAMinusB(const USet *a, const USet *b, | |
| 411 const char *a_name, const char *b_name, | |
| 412 UBool diffIsError) { | |
| 413 return showADiffB(a, b, a_name, b_name, TRUE, diffIsError); | |
| 414 } | |
| 415 | |
| 416 static UBool | |
| 417 showAIntersectB(const USet *a, const USet *b, | |
| 418 const char *a_name, const char *b_name, | |
| 419 UBool diffIsError) { | |
| 420 return showADiffB(a, b, a_name, b_name, FALSE, diffIsError); | |
| 421 } | |
| 422 | |
| 423 static UBool | |
| 424 compareUSets(const USet *a, const USet *b, | |
| 425 const char *a_name, const char *b_name, | |
| 426 UBool diffIsError) { | |
| 427 /* | |
| 428 * Use an arithmetic & not a logical && so that both branches | |
| 429 * are always taken and all differences are shown. | |
| 430 */ | |
| 431 return | |
| 432 showAMinusB(a, b, a_name, b_name, diffIsError) & | |
| 433 showAMinusB(b, a, b_name, a_name, diffIsError); | |
| 434 } | |
| 435 | |
| 436 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */ | |
| 437 static void TestLetterNumber() | |
| 438 { | |
| 439 UChar i = 0x0000; | |
| 440 | |
| 441 log_verbose("Testing for isalpha\n"); | |
| 442 for (i = 0x0041; i < 0x005B; i++) { | |
| 443 if (!u_isalpha(i)) | |
| 444 { | |
| 445 log_err("Failed isLetter test at %.4X\n", i); | |
| 446 } | |
| 447 } | |
| 448 for (i = 0x0660; i < 0x066A; i++) { | |
| 449 if (u_isalpha(i)) | |
| 450 { | |
| 451 log_err("Failed isLetter test with numbers at %.4X\n", i); | |
| 452 } | |
| 453 } | |
| 454 | |
| 455 log_verbose("Testing for isdigit\n"); | |
| 456 for (i = 0x0660; i < 0x066A; i++) { | |
| 457 if (!u_isdigit(i)) | |
| 458 { | |
| 459 log_verbose("Failed isNumber test at %.4X\n", i); | |
| 460 } | |
| 461 } | |
| 462 | |
| 463 log_verbose("Testing for isalnum\n"); | |
| 464 for (i = 0x0041; i < 0x005B; i++) { | |
| 465 if (!u_isalnum(i)) | |
| 466 { | |
| 467 log_err("Failed isAlNum test at %.4X\n", i); | |
| 468 } | |
| 469 } | |
| 470 for (i = 0x0660; i < 0x066A; i++) { | |
| 471 if (!u_isalnum(i)) | |
| 472 { | |
| 473 log_err("Failed isAlNum test at %.4X\n", i); | |
| 474 } | |
| 475 } | |
| 476 | |
| 477 { | |
| 478 /* | |
| 479 * The following checks work only starting from Unicode 4.0. | |
| 480 * Check the version number here. | |
| 481 */ | |
| 482 static UVersionInfo u401={ 4, 0, 1, 0 }; | |
| 483 UVersionInfo version; | |
| 484 u_getUnicodeVersion(version); | |
| 485 if(version[0]<4 || 0==memcmp(version, u401, 4)) { | |
| 486 return; | |
| 487 } | |
| 488 } | |
| 489 | |
| 490 { | |
| 491 /* | |
| 492 * Sanity check: | |
| 493 * Verify that exactly the digit characters have decimal digit values. | |
| 494 * This assumption is used in the implementation of u_digit() | |
| 495 * (which checks nt=de) | |
| 496 * compared with the parallel java.lang.Character.digit() | |
| 497 * (which checks Nd). | |
| 498 * | |
| 499 * This was not true in Unicode 3.2 and earlier. | |
| 500 * Unicode 4.0 fixed discrepancies. | |
| 501 * Unicode 4.0.1 re-introduced problems in this area due to an | |
| 502 * unintentionally incomplete last-minute change. | |
| 503 */ | |
| 504 U_STRING_DECL(digitsPattern, "[:Nd:]", 6); | |
| 505 U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24); | |
| 506 | |
| 507 USet *digits, *decimalValues; | |
| 508 UErrorCode errorCode; | |
| 509 | |
| 510 U_STRING_INIT(digitsPattern, "[:Nd:]", 6); | |
| 511 U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24); | |
| 512 errorCode=U_ZERO_ERROR; | |
| 513 digits=uset_openPattern(digitsPattern, 6, &errorCode); | |
| 514 decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode); | |
| 515 | |
| 516 if(U_SUCCESS(errorCode)) { | |
| 517 compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decima
l:]", TRUE); | |
| 518 } | |
| 519 | |
| 520 uset_close(digits); | |
| 521 uset_close(decimalValues); | |
| 522 } | |
| 523 } | |
| 524 | |
| 525 static void testSampleCharProps(UBool propFn(UChar32), const char *propName, | |
| 526 const UChar32 *sampleChars, int32_t sampleCharsL
ength, | |
| 527 UBool expected) { | |
| 528 int32_t i; | |
| 529 for (i = 0; i < sampleCharsLength; ++i) { | |
| 530 UBool result = propFn(sampleChars[i]); | |
| 531 if (result != expected) { | |
| 532 log_err("error: character property function %s(U+%04x)=%d is wrong\n
", | |
| 533 propName, sampleChars[i], result); | |
| 534 } | |
| 535 } | |
| 536 } | |
| 537 | |
| 538 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_issp
ace()), isWhiteSpace(), u_CharDigitValue() */ | |
| 539 static void TestMisc() | |
| 540 { | |
| 541 static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x200
5}; | |
| 542 static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74}; | |
| 543 static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e}; | |
| 544 static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd}; | |
| 545 static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2}; | |
| 546 static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B}; | |
| 547 /* static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8
, 0xFFF0};*/ | |
| 548 static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5}; | |
| 549 static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE}; | |
| 550 static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c,
0x000c}; | |
| 551 static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f,
0x85, 0x2007, 0xffef}; | |
| 552 | |
| 553 static const int32_t sampleDigitValues[] = {0, 2, 3, 5}; | |
| 554 | |
| 555 uint32_t mask; | |
| 556 | |
| 557 int32_t i; | |
| 558 char icuVersion[U_MAX_VERSION_STRING_LENGTH]; | |
| 559 UVersionInfo realVersion; | |
| 560 | |
| 561 memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH); | |
| 562 | |
| 563 testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, UPRV_LENGTHOF(samp
leSpaces), TRUE); | |
| 564 testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, UPRV_LENGTHOF(s
ampleNonSpaces), FALSE); | |
| 565 | |
| 566 testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar", | |
| 567 sampleSpaces, UPRV_LENGTHOF(sampleSpaces), TRUE); | |
| 568 testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar", | |
| 569 sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), FALSE); | |
| 570 | |
| 571 testSampleCharProps(u_isWhitespace, "u_isWhitespace", | |
| 572 sampleWhiteSpaces, UPRV_LENGTHOF(sampleWhiteSpaces), TRU
E); | |
| 573 testSampleCharProps(u_isWhitespace, "u_isWhitespace", | |
| 574 sampleNonWhiteSpaces, UPRV_LENGTHOF(sampleNonWhiteSpaces
), FALSE); | |
| 575 | |
| 576 testSampleCharProps(u_isdefined, "u_isdefined", | |
| 577 sampleDefined, UPRV_LENGTHOF(sampleDefined), TRUE); | |
| 578 testSampleCharProps(u_isdefined, "u_isdefined", | |
| 579 sampleUndefined, UPRV_LENGTHOF(sampleUndefined), FALSE); | |
| 580 | |
| 581 testSampleCharProps(u_isbase, "u_isbase", sampleBase, UPRV_LENGTHOF(sampleBa
se), TRUE); | |
| 582 testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, UPRV_LENGTHOF(sampl
eNonBase), FALSE); | |
| 583 | |
| 584 testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, UPRV_LENGTHOF(samp
leDigits), TRUE); | |
| 585 testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, UPRV_LENGTHOF(s
ampleNonDigits), FALSE); | |
| 586 | |
| 587 for (i = 0; i < UPRV_LENGTHOF(sampleDigits); i++) { | |
| 588 if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) { | |
| 589 log_err("error: u_charDigitValue(U+04x)=%d != %d\n", | |
| 590 sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDi
gitValues[i]); | |
| 591 } | |
| 592 } | |
| 593 | |
| 594 /* Tests the ICU version #*/ | |
| 595 u_getVersion(realVersion); | |
| 596 u_versionToString(realVersion, icuVersion); | |
| 597 if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion),
(int32_t)strlen(U_ICU_VERSION))) != 0) | |
| 598 { | |
| 599 log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERS
ION, icuVersion); | |
| 600 } | |
| 601 #if defined(ICU_VERSION) | |
| 602 /* test only happens where we have configure.in with VERSION - sanity check.
*/ | |
| 603 if(strcmp(U_ICU_VERSION, ICU_VERSION)) | |
| 604 { | |
| 605 log_err("ICU version mismatch: Header says %s, build environment says %s
.\n", U_ICU_VERSION, ICU_VERSION); | |
| 606 } | |
| 607 #endif | |
| 608 | |
| 609 /* test U_GC_... */ | |
| 610 if( | |
| 611 U_GET_GC_MASK(0x41)!=U_GC_LU_MASK || | |
| 612 U_GET_GC_MASK(0x662)!=U_GC_ND_MASK || | |
| 613 U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK || | |
| 614 U_GET_GC_MASK(0x28)!=U_GC_PS_MASK || | |
| 615 U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK || | |
| 616 U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK | |
| 617 ) { | |
| 618 log_err("error: U_GET_GC_MASK does not work properly\n"); | |
| 619 } | |
| 620 | |
| 621 mask=0; | |
| 622 mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK; | |
| 623 | |
| 624 mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK; | |
| 625 mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK; | |
| 626 mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK; | |
| 627 mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK; | |
| 628 mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK; | |
| 629 | |
| 630 mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK; | |
| 631 mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK; | |
| 632 mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK; | |
| 633 | |
| 634 mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK; | |
| 635 mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK; | |
| 636 mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK; | |
| 637 | |
| 638 mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK; | |
| 639 mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK; | |
| 640 mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK; | |
| 641 | |
| 642 mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK; | |
| 643 mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK; | |
| 644 mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK; | |
| 645 mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK; | |
| 646 | |
| 647 mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK; | |
| 648 mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK; | |
| 649 mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK; | |
| 650 mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK; | |
| 651 mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK; | |
| 652 | |
| 653 mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK; | |
| 654 mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK; | |
| 655 mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK; | |
| 656 mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK; | |
| 657 | |
| 658 mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK; | |
| 659 mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK; | |
| 660 | |
| 661 if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffff
ffff)) { | |
| 662 log_err("error: problems with U_GC_XX_MASK constants\n"); | |
| 663 } | |
| 664 | |
| 665 mask=0; | |
| 666 mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK; | |
| 667 mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK; | |
| 668 mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK; | |
| 669 mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK; | |
| 670 mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK; | |
| 671 mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK; | |
| 672 mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK; | |
| 673 | |
| 674 if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffff
ffff)) { | |
| 675 log_err("error: problems with U_GC_Y_MASK constants\n"); | |
| 676 } | |
| 677 { | |
| 678 static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x00
35,0x0036,0x0037,0x0038,0x0039 }; | |
| 679 for(i=0; i<10; i++){ | |
| 680 if(digit[i]!=u_forDigit(i,10)){ | |
| 681 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n"
,i,digit[i],u_forDigit(i,10)); | |
| 682 } | |
| 683 } | |
| 684 } | |
| 685 | |
| 686 /* test u_digit() */ | |
| 687 { | |
| 688 static const struct { | |
| 689 UChar32 c; | |
| 690 int8_t radix, value; | |
| 691 } data[]={ | |
| 692 /* base 16 */ | |
| 693 { 0x0031, 16, 1 }, | |
| 694 { 0x0038, 16, 8 }, | |
| 695 { 0x0043, 16, 12 }, | |
| 696 { 0x0066, 16, 15 }, | |
| 697 { 0x00e4, 16, -1 }, | |
| 698 { 0x0662, 16, 2 }, | |
| 699 { 0x06f5, 16, 5 }, | |
| 700 { 0xff13, 16, 3 }, | |
| 701 { 0xff41, 16, 10 }, | |
| 702 | |
| 703 /* base 8 */ | |
| 704 { 0x0031, 8, 1 }, | |
| 705 { 0x0038, 8, -1 }, | |
| 706 { 0x0043, 8, -1 }, | |
| 707 { 0x0066, 8, -1 }, | |
| 708 { 0x00e4, 8, -1 }, | |
| 709 { 0x0662, 8, 2 }, | |
| 710 { 0x06f5, 8, 5 }, | |
| 711 { 0xff13, 8, 3 }, | |
| 712 { 0xff41, 8, -1 }, | |
| 713 | |
| 714 /* base 36 */ | |
| 715 { 0x5a, 36, 35 }, | |
| 716 { 0x7a, 36, 35 }, | |
| 717 { 0xff3a, 36, 35 }, | |
| 718 { 0xff5a, 36, 35 }, | |
| 719 | |
| 720 /* wrong radix values */ | |
| 721 { 0x0031, 1, -1 }, | |
| 722 { 0xff3a, 37, -1 } | |
| 723 }; | |
| 724 | |
| 725 for(i=0; i<UPRV_LENGTHOF(data); ++i) { | |
| 726 if(u_digit(data[i].c, data[i].radix)!=data[i].value) { | |
| 727 log_err("u_digit(U+%04x, %d)=%d expected %d\n", | |
| 728 data[i].c, | |
| 729 data[i].radix, | |
| 730 u_digit(data[i].c, data[i].radix), | |
| 731 data[i].value); | |
| 732 } | |
| 733 } | |
| 734 } | |
| 735 } | |
| 736 | |
| 737 /* test C/POSIX-style functions --------------------------------------------- */ | |
| 738 | |
| 739 /* bit flags */ | |
| 740 #define ISAL 1 | |
| 741 #define ISLO 2 | |
| 742 #define ISUP 4 | |
| 743 | |
| 744 #define ISDI 8 | |
| 745 #define ISXD 0x10 | |
| 746 | |
| 747 #define ISAN 0x20 | |
| 748 | |
| 749 #define ISPU 0x40 | |
| 750 #define ISGR 0x80 | |
| 751 #define ISPR 0x100 | |
| 752 | |
| 753 #define ISSP 0x200 | |
| 754 #define ISBL 0x400 | |
| 755 #define ISCN 0x800 | |
| 756 | |
| 757 /* C/POSIX-style functions, in the same order as the bit flags */ | |
| 758 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c); | |
| 759 | |
| 760 static const struct { | |
| 761 IsPOSIXClass *fn; | |
| 762 const char *name; | |
| 763 } posixClasses[]={ | |
| 764 { u_isalpha, "isalpha" }, | |
| 765 { u_islower, "islower" }, | |
| 766 { u_isupper, "isupper" }, | |
| 767 { u_isdigit, "isdigit" }, | |
| 768 { u_isxdigit, "isxdigit" }, | |
| 769 { u_isalnum, "isalnum" }, | |
| 770 { u_ispunct, "ispunct" }, | |
| 771 { u_isgraph, "isgraph" }, | |
| 772 { u_isprint, "isprint" }, | |
| 773 { u_isspace, "isspace" }, | |
| 774 { u_isblank, "isblank" }, | |
| 775 { u_iscntrl, "iscntrl" } | |
| 776 }; | |
| 777 | |
| 778 static const struct { | |
| 779 UChar32 c; | |
| 780 uint32_t posixResults; | |
| 781 } posixData[]={ | |
| 782 { 0x0008, ISCN },
/* backspace */ | |
| 783 { 0x0009, ISSP|ISBL|ISCN },
/* TAB */ | |
| 784 { 0x000a, ISSP| ISCN },
/* LF */ | |
| 785 { 0x000c, ISSP| ISCN },
/* FF */ | |
| 786 { 0x000d, ISSP| ISCN },
/* CR */ | |
| 787 { 0x0020, ISPR|ISSP|ISBL },
/* space */ | |
| 788 { 0x0021, ISPU|ISGR|ISPR },
/* ! */ | |
| 789 { 0x0033, ISDI|ISXD|ISAN| ISGR|ISPR },
/* 3 */ | |
| 790 { 0x0040, ISPU|ISGR|ISPR },
/* @ */ | |
| 791 { 0x0041, ISAL| ISUP| ISXD|ISAN| ISGR|ISPR },
/* A */ | |
| 792 { 0x007a, ISAL|ISLO| ISAN| ISGR|ISPR },
/* z */ | |
| 793 { 0x007b, ISPU|ISGR|ISPR },
/* { */ | |
| 794 { 0x0085, ISSP| ISCN },
/* NEL */ | |
| 795 { 0x00a0, ISPR|ISSP|ISBL },
/* NBSP */ | |
| 796 { 0x00a4, ISGR|ISPR },
/* currency sign */ | |
| 797 { 0x00e4, ISAL|ISLO| ISAN| ISGR|ISPR },
/* a-umlaut */ | |
| 798 { 0x0300, ISGR|ISPR },
/* combining grave */ | |
| 799 { 0x0600, ISCN },
/* arabic number sign */ | |
| 800 { 0x0627, ISAL| ISAN| ISGR|ISPR },
/* alef */ | |
| 801 { 0x0663, ISDI|ISXD|ISAN| ISGR|ISPR },
/* arabic 3 */ | |
| 802 { 0x2002, ISPR|ISSP|ISBL },
/* en space */ | |
| 803 { 0x2007, ISPR|ISSP|ISBL },
/* figure space */ | |
| 804 { 0x2009, ISPR|ISSP|ISBL },
/* thin space */ | |
| 805 { 0x200b, ISCN },
/* ZWSP */ | |
| 806 /*{ 0x200b, ISPR|ISSP },*/
/* ZWSP */ /* ZWSP became a control char in 4.0.1*/ | |
| 807 { 0x200e, ISCN },
/* LRM */ | |
| 808 { 0x2028, ISPR|ISSP| ISCN },
/* LS */ | |
| 809 { 0x2029, ISPR|ISSP| ISCN },
/* PS */ | |
| 810 { 0x20ac, ISGR|ISPR },
/* Euro */ | |
| 811 { 0xff15, ISDI|ISXD|ISAN| ISGR|ISPR },
/* fullwidth 5 */ | |
| 812 { 0xff25, ISAL| ISUP| ISXD|ISAN| ISGR|ISPR },
/* fullwidth E */ | |
| 813 { 0xff35, ISAL| ISUP| ISAN| ISGR|ISPR },
/* fullwidth U */ | |
| 814 { 0xff45, ISAL|ISLO| ISXD|ISAN| ISGR|ISPR },
/* fullwidth e */ | |
| 815 { 0xff55, ISAL|ISLO| ISAN| ISGR|ISPR }
/* fullwidth u */ | |
| 816 }; | |
| 817 | |
| 818 static void | |
| 819 TestPOSIX() { | |
| 820 uint32_t mask; | |
| 821 int32_t cl, i; | |
| 822 UBool expect; | |
| 823 | |
| 824 mask=1; | |
| 825 for(cl=0; cl<12; ++cl) { | |
| 826 for(i=0; i<UPRV_LENGTHOF(posixData); ++i) { | |
| 827 expect=(UBool)((posixData[i].posixResults&mask)!=0); | |
| 828 if(posixClasses[cl].fn(posixData[i].c)!=expect) { | |
| 829 log_err("u_%s(U+%04x)=%s is wrong\n", | |
| 830 posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "T
RUE"); | |
| 831 } | |
| 832 } | |
| 833 mask<<=1; | |
| 834 } | |
| 835 } | |
| 836 | |
| 837 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */ | |
| 838 static void TestControlPrint() | |
| 839 { | |
| 840 const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0
x202b}; | |
| 841 const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2}; | |
| 842 const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014}; | |
| 843 const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b}; | |
| 844 UChar32 c; | |
| 845 | |
| 846 testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, UPRV_LENGTHOF(sam
pleControl), TRUE); | |
| 847 testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, UPRV_LENGTHOF(
sampleNonControl), FALSE); | |
| 848 | |
| 849 testSampleCharProps(u_isprint, "u_isprint", | |
| 850 samplePrintable, UPRV_LENGTHOF(samplePrintable), TRUE); | |
| 851 testSampleCharProps(u_isprint, "u_isprint", | |
| 852 sampleNonPrintable, UPRV_LENGTHOF(sampleNonPrintable), F
ALSE); | |
| 853 | |
| 854 /* test all ISO 8 controls */ | |
| 855 for(c=0; c<=0x9f; ++c) { | |
| 856 if(c==0x20) { | |
| 857 /* skip ASCII graphic characters and continue with DEL */ | |
| 858 c=0x7f; | |
| 859 } | |
| 860 if(!u_iscntrl(c)) { | |
| 861 log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c); | |
| 862 } | |
| 863 if(!u_isISOControl(c)) { | |
| 864 log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c); | |
| 865 } | |
| 866 if(u_isprint(c)) { | |
| 867 log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c); | |
| 868 } | |
| 869 } | |
| 870 | |
| 871 /* test all Latin-1 graphic characters */ | |
| 872 for(c=0x20; c<=0xff; ++c) { | |
| 873 if(c==0x7f) { | |
| 874 c=0xa0; | |
| 875 } else if(c==0xad) { | |
| 876 /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not p
rintable) */ | |
| 877 ++c; | |
| 878 } | |
| 879 if(!u_isprint(c)) { | |
| 880 log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n"
, c); | |
| 881 } | |
| 882 } | |
| 883 } | |
| 884 | |
| 885 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable
()*/ | |
| 886 static void TestIdentifier() | |
| 887 { | |
| 888 const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f}; | |
| 889 const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082}; | |
| 890 const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045}; | |
| 891 const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020}; | |
| 892 const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061}; | |
| 893 const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019}; | |
| 894 const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045}; | |
| 895 const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020}; | |
| 896 const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85}; | |
| 897 const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061}; | |
| 898 | |
| 899 testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart", | |
| 900 sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRU
E); | |
| 901 testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart", | |
| 902 sampleNonJavaIDStart, UPRV_LENGTHOF(sampleNonJavaIDStart
), FALSE); | |
| 903 | |
| 904 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart", | |
| 905 sampleJavaIDPart, UPRV_LENGTHOF(sampleJavaIDPart), TRUE)
; | |
| 906 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart", | |
| 907 sampleNonJavaIDPart, UPRV_LENGTHOF(sampleNonJavaIDPart),
FALSE); | |
| 908 | |
| 909 /* IDPart should imply IDStart */ | |
| 910 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart", | |
| 911 sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRU
E); | |
| 912 | |
| 913 testSampleCharProps(u_isIDStart, "u_isIDStart", | |
| 914 sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart
), TRUE); | |
| 915 testSampleCharProps(u_isIDStart, "u_isIDStart", | |
| 916 sampleNonUnicodeIDStart, UPRV_LENGTHOF(sampleNonUnicodeI
DStart), FALSE); | |
| 917 | |
| 918 testSampleCharProps(u_isIDPart, "u_isIDPart", | |
| 919 sampleUnicodeIDPart, UPRV_LENGTHOF(sampleUnicodeIDPart),
TRUE); | |
| 920 testSampleCharProps(u_isIDPart, "u_isIDPart", | |
| 921 sampleNonUnicodeIDPart, UPRV_LENGTHOF(sampleNonUnicodeID
Part), FALSE); | |
| 922 | |
| 923 /* IDPart should imply IDStart */ | |
| 924 testSampleCharProps(u_isIDPart, "u_isIDPart", | |
| 925 sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart
), TRUE); | |
| 926 | |
| 927 testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable", | |
| 928 sampleIDIgnore, UPRV_LENGTHOF(sampleIDIgnore), TRUE); | |
| 929 testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable", | |
| 930 sampleNonIDIgnore, UPRV_LENGTHOF(sampleNonIDIgnore), FAL
SE); | |
| 931 } | |
| 932 | |
| 933 /* for each line of UnicodeData.txt, check some of the properties */ | |
| 934 typedef struct UnicodeDataContext { | |
| 935 #if UCONFIG_NO_NORMALIZATION | |
| 936 const void *dummy; | |
| 937 #else | |
| 938 const UNormalizer2 *nfc; | |
| 939 const UNormalizer2 *nfkc; | |
| 940 #endif | |
| 941 } UnicodeDataContext; | |
| 942 | |
| 943 /* | |
| 944 * ### TODO | |
| 945 * This test fails incorrectly if the First or Last code point of a repetitive a
rea | |
| 946 * is overridden, which is allowed and is encouraged for the PUAs. | |
| 947 * Currently, this means that both area First/Last and override lines are | |
| 948 * tested against the properties from the API, | |
| 949 * and the area boundary will not match and cause an error. | |
| 950 * | |
| 951 * This function should detect area boundaries and skip them for the test of ind
ividual | |
| 952 * code points' properties. | |
| 953 * Then it should check that the areas contain all the same properties except wh
ere overridden. | |
| 954 * For this, it would have had to set a flag for which code points were listed e
xplicitly. | |
| 955 */ | |
| 956 static void U_CALLCONV | |
| 957 unicodeDataLineFn(void *context, | |
| 958 char *fields[][2], int32_t fieldCount, | |
| 959 UErrorCode *pErrorCode) | |
| 960 { | |
| 961 char buffer[100]; | |
| 962 const char *d; | |
| 963 char *end; | |
| 964 uint32_t value; | |
| 965 UChar32 c; | |
| 966 int32_t i; | |
| 967 int8_t type; | |
| 968 int32_t dt; | |
| 969 UChar dm[32], s[32]; | |
| 970 int32_t dmLength, length; | |
| 971 | |
| 972 #if !UCONFIG_NO_NORMALIZATION | |
| 973 const UNormalizer2 *nfc, *nfkc; | |
| 974 #endif | |
| 975 | |
| 976 /* get the character code, field 0 */ | |
| 977 c=strtoul(fields[0][0], &end, 16); | |
| 978 if(end<=fields[0][0] || end!=fields[0][1]) { | |
| 979 log_err("error: syntax error in field 0 at %s\n", fields[0][0]); | |
| 980 return; | |
| 981 } | |
| 982 if((uint32_t)c>=UCHAR_MAX_VALUE + 1) { | |
| 983 log_err("error in UnicodeData.txt: code point %lu out of range\n", c); | |
| 984 return; | |
| 985 } | |
| 986 | |
| 987 /* get general category, field 2 */ | |
| 988 *fields[2][1]=0; | |
| 989 type = (int8_t)tagValues[MakeProp(fields[2][0])]; | |
| 990 if(u_charType(c)!=type) { | |
| 991 log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(
c), type); | |
| 992 } | |
| 993 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(t
ype)) { | |
| 994 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_C
ATEGORY_MASK)!=U_MASK(u_charType())\n", c); | |
| 995 } | |
| 996 | |
| 997 /* get canonical combining class, field 3 */ | |
| 998 value=strtoul(fields[3][0], &end, 10); | |
| 999 if(end<=fields[3][0] || end!=fields[3][1]) { | |
| 1000 log_err("error: syntax error in field 3 at code 0x%lx\n", c); | |
| 1001 return; | |
| 1002 } | |
| 1003 if(value>255) { | |
| 1004 log_err("error in UnicodeData.txt: combining class %lu out of range\n",
value); | |
| 1005 return; | |
| 1006 } | |
| 1007 #if !UCONFIG_NO_NORMALIZATION | |
| 1008 if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c
, UCHAR_CANONICAL_COMBINING_CLASS)) { | |
| 1009 log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c,
u_getCombiningClass(c), value); | |
| 1010 } | |
| 1011 nfkc=((UnicodeDataContext *)context)->nfkc; | |
| 1012 if(value!=unorm2_getCombiningClass(nfkc, c)) { | |
| 1013 log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of
%lu\n", c, unorm2_getCombiningClass(nfkc, c), value); | |
| 1014 } | |
| 1015 #endif | |
| 1016 | |
| 1017 /* get BiDi category, field 4 */ | |
| 1018 *fields[4][1]=0; | |
| 1019 i=MakeDir(fields[4][0]); | |
| 1020 if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) { | |
| 1021 log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u
_charDirection(c), MakeDir(fields[4][0]), fields[4][0]); | |
| 1022 } | |
| 1023 | |
| 1024 /* get Decomposition_Type & Decomposition_Mapping, field 5 */ | |
| 1025 d=NULL; | |
| 1026 if(fields[5][0]==fields[5][1]) { | |
| 1027 /* no decomposition, except UnicodeData.txt omits Hangul syllable decomp
ositions */ | |
| 1028 if(c==0xac00 || c==0xd7a3) { | |
| 1029 dt=U_DT_CANONICAL; | |
| 1030 } else { | |
| 1031 dt=U_DT_NONE; | |
| 1032 } | |
| 1033 } else { | |
| 1034 d=fields[5][0]; | |
| 1035 *fields[5][1]=0; | |
| 1036 dt=UCHAR_INVALID_CODE; | |
| 1037 if(*d=='<') { | |
| 1038 end=strchr(++d, '>'); | |
| 1039 if(end!=NULL) { | |
| 1040 *end=0; | |
| 1041 dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d); | |
| 1042 d=u_skipWhitespace(end+1); | |
| 1043 } | |
| 1044 } else { | |
| 1045 dt=U_DT_CANONICAL; | |
| 1046 } | |
| 1047 } | |
| 1048 if(dt>U_DT_NONE) { | |
| 1049 if(c==0xac00) { | |
| 1050 dm[0]=0x1100; | |
| 1051 dm[1]=0x1161; | |
| 1052 dm[2]=0; | |
| 1053 dmLength=2; | |
| 1054 } else if(c==0xd7a3) { | |
| 1055 dm[0]=0xd788; | |
| 1056 dm[1]=0x11c2; | |
| 1057 dm[2]=0; | |
| 1058 dmLength=2; | |
| 1059 } else { | |
| 1060 dmLength=u_parseString(d, dm, 32, NULL, pErrorCode); | |
| 1061 } | |
| 1062 } else { | |
| 1063 dmLength=-1; | |
| 1064 } | |
| 1065 if(dt<0 || U_FAILURE(*pErrorCode)) { | |
| 1066 log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition
field\n", (long)c); | |
| 1067 return; | |
| 1068 } | |
| 1069 #if !UCONFIG_NO_NORMALIZATION | |
| 1070 i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE); | |
| 1071 if(i!=dt) { | |
| 1072 log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)
==%d instead of %d\n", c, i, dt); | |
| 1073 } | |
| 1074 /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */ | |
| 1075 length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode); | |
| 1076 if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s,
dm))) { | |
| 1077 log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of
%d " | |
| 1078 "or the Decomposition_Mapping is different (%s)\n", | |
| 1079 c, length, dmLength, u_errorName(*pErrorCode)); | |
| 1080 return; | |
| 1081 } | |
| 1082 /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRa
wDecomposition(c). */ | |
| 1083 if(dt!=U_DT_CANONICAL) { | |
| 1084 dmLength=-1; | |
| 1085 } | |
| 1086 nfc=((UnicodeDataContext *)context)->nfc; | |
| 1087 length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode); | |
| 1088 if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s,
dm))) { | |
| 1089 log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of
%d " | |
| 1090 "or the Decomposition_Mapping is different (%s)\n", | |
| 1091 c, length, dmLength, u_errorName(*pErrorCode)); | |
| 1092 return; | |
| 1093 } | |
| 1094 /* recompose */ | |
| 1095 if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCL
USION)) { | |
| 1096 UChar32 a, b, composite; | |
| 1097 i=0; | |
| 1098 U16_NEXT(dm, i, dmLength, a); | |
| 1099 U16_NEXT(dm, i, dmLength, b); | |
| 1100 /* i==dmLength */ | |
| 1101 composite=unorm2_composePair(nfc, a, b); | |
| 1102 if(composite!=c) { | |
| 1103 log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does n
ot compose back (instead U+%04lX)\n", | |
| 1104 (long)c, (long)a, (long)b, (long)composite); | |
| 1105 } | |
| 1106 /* | |
| 1107 * Note: NFKC has fewer round-trip mappings than NFC, | |
| 1108 * so we can't just test unorm2_composePair(nfkc, a, b) here without fur
ther data. | |
| 1109 */ | |
| 1110 } | |
| 1111 #endif | |
| 1112 | |
| 1113 /* get ISO Comment, field 11 */ | |
| 1114 *fields[11][1]=0; | |
| 1115 i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode); | |
| 1116 if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) { | |
| 1117 log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s):
\"%s\" should be \"%s\"\n", | |
| 1118 c, u_errorName(*pErrorCode), | |
| 1119 U_FAILURE(*pErrorCode) ? buffer : "[error]", | |
| 1120 fields[11][0]); | |
| 1121 } | |
| 1122 | |
| 1123 /* get uppercase mapping, field 12 */ | |
| 1124 if(fields[12][0]!=fields[12][1]) { | |
| 1125 value=strtoul(fields[12][0], &end, 16); | |
| 1126 if(end!=fields[12][1]) { | |
| 1127 log_err("error: syntax error in field 12 at code 0x%lx\n", c); | |
| 1128 return; | |
| 1129 } | |
| 1130 if((UChar32)value!=u_toupper(c)) { | |
| 1131 log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c
, u_toupper(c), value); | |
| 1132 } | |
| 1133 } else { | |
| 1134 /* no case mapping: the API must map the code point to itself */ | |
| 1135 if(c!=u_toupper(c)) { | |
| 1136 log_err("error: U+%04lx does not have an uppercase mapping but u_tou
pper()==U+%04lx\n", c, u_toupper(c)); | |
| 1137 } | |
| 1138 } | |
| 1139 | |
| 1140 /* get lowercase mapping, field 13 */ | |
| 1141 if(fields[13][0]!=fields[13][1]) { | |
| 1142 value=strtoul(fields[13][0], &end, 16); | |
| 1143 if(end!=fields[13][1]) { | |
| 1144 log_err("error: syntax error in field 13 at code 0x%lx\n", c); | |
| 1145 return; | |
| 1146 } | |
| 1147 if((UChar32)value!=u_tolower(c)) { | |
| 1148 log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c
, u_tolower(c), value); | |
| 1149 } | |
| 1150 } else { | |
| 1151 /* no case mapping: the API must map the code point to itself */ | |
| 1152 if(c!=u_tolower(c)) { | |
| 1153 log_err("error: U+%04lx does not have a lowercase mapping but u_tolo
wer()==U+%04lx\n", c, u_tolower(c)); | |
| 1154 } | |
| 1155 } | |
| 1156 | |
| 1157 /* get titlecase mapping, field 14 */ | |
| 1158 if(fields[14][0]!=fields[14][1]) { | |
| 1159 value=strtoul(fields[14][0], &end, 16); | |
| 1160 if(end!=fields[14][1]) { | |
| 1161 log_err("error: syntax error in field 14 at code 0x%lx\n", c); | |
| 1162 return; | |
| 1163 } | |
| 1164 if((UChar32)value!=u_totitle(c)) { | |
| 1165 log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c
, u_totitle(c), value); | |
| 1166 } | |
| 1167 } else { | |
| 1168 /* no case mapping: the API must map the code point to itself */ | |
| 1169 if(c!=u_totitle(c)) { | |
| 1170 log_err("error: U+%04lx does not have a titlecase mapping but u_toti
tle()==U+%04lx\n", c, u_totitle(c)); | |
| 1171 } | |
| 1172 } | |
| 1173 } | |
| 1174 | |
| 1175 static UBool U_CALLCONV | |
| 1176 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory t
ype) { | |
| 1177 static const UChar32 test[][2]={ | |
| 1178 {0x41, U_UPPERCASE_LETTER}, | |
| 1179 {0x308, U_NON_SPACING_MARK}, | |
| 1180 {0xfffe, U_GENERAL_OTHER_TYPES}, | |
| 1181 {0xe0041, U_FORMAT_CHAR}, | |
| 1182 {0xeffff, U_UNASSIGNED} | |
| 1183 }; | |
| 1184 | |
| 1185 int32_t i, count; | |
| 1186 | |
| 1187 if(0!=strcmp((const char *)context, "a1")) { | |
| 1188 log_err("error: u_enumCharTypes() passes on an incorrect context pointer
\n"); | |
| 1189 return FALSE; | |
| 1190 } | |
| 1191 | |
| 1192 count=UPRV_LENGTHOF(test); | |
| 1193 for(i=0; i<count; ++i) { | |
| 1194 if(start<=test[i][0] && test[i][0]<limit) { | |
| 1195 if(type!=(UCharCategory)test[i][1]) { | |
| 1196 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ w
ith %ld instead of U+%04lx with %ld\n", | |
| 1197 start, limit, (long)type, test[i][0], test[i][1]); | |
| 1198 } | |
| 1199 /* stop at the range that includes the last test code point (increas
es code coverage for enumeration) */ | |
| 1200 return i==(count-1) ? FALSE : TRUE; | |
| 1201 } | |
| 1202 } | |
| 1203 | |
| 1204 if(start>test[count-1][0]) { | |
| 1205 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld
after it should have stopped\n", | |
| 1206 start, limit, (long)type); | |
| 1207 return FALSE; | |
| 1208 } | |
| 1209 | |
| 1210 return TRUE; | |
| 1211 } | |
| 1212 | |
| 1213 static UBool U_CALLCONV | |
| 1214 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCatego
ry type) { | |
| 1215 /* default Bidi classes for unassigned code points, from the DerivedBidiClas
s.txt header */ | |
| 1216 static const int32_t defaultBidi[][2]={ /* { limit, class } */ | |
| 1217 { 0x0590, U_LEFT_TO_RIGHT }, | |
| 1218 { 0x0600, U_RIGHT_TO_LEFT }, | |
| 1219 { 0x07C0, U_RIGHT_TO_LEFT_ARABIC }, | |
| 1220 { 0x08A0, U_RIGHT_TO_LEFT }, | |
| 1221 { 0x0900, U_RIGHT_TO_LEFT_ARABIC }, /* Unicode 6.1 changes U+08A0..U+08
FF from R to AL */ | |
| 1222 { 0x20A0, U_LEFT_TO_RIGHT }, | |
| 1223 { 0x20D0, U_EUROPEAN_NUMBER_TERMINATOR }, /* Unicode 6.3 changes the cu
rrency symbols block U+20A0..U+20CF to default to ET not L */ | |
| 1224 { 0xFB1D, U_LEFT_TO_RIGHT }, | |
| 1225 { 0xFB50, U_RIGHT_TO_LEFT }, | |
| 1226 { 0xFE00, U_RIGHT_TO_LEFT_ARABIC }, | |
| 1227 { 0xFE70, U_LEFT_TO_RIGHT }, | |
| 1228 { 0xFF00, U_RIGHT_TO_LEFT_ARABIC }, | |
| 1229 { 0x10800, U_LEFT_TO_RIGHT }, | |
| 1230 { 0x11000, U_RIGHT_TO_LEFT }, | |
| 1231 { 0x1E800, U_LEFT_TO_RIGHT }, /* new default-R range in Unicode 5.2: U+
1E800 - U+1EFFF */ | |
| 1232 { 0x1EE00, U_RIGHT_TO_LEFT }, | |
| 1233 { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC }, /* Unicode 6.1 changes U+1EE00..U+
1EEFF from R to AL */ | |
| 1234 { 0x1F000, U_RIGHT_TO_LEFT }, | |
| 1235 { 0x110000, U_LEFT_TO_RIGHT } | |
| 1236 }; | |
| 1237 | |
| 1238 UChar32 c; | |
| 1239 int32_t i; | |
| 1240 UCharDirection shouldBeDir; | |
| 1241 | |
| 1242 /* | |
| 1243 * LineBreak.txt specifies: | |
| 1244 * # - Assigned characters that are not listed explicitly are given the v
alue | |
| 1245 * # "AL". | |
| 1246 * # - Unassigned characters are given the value "XX". | |
| 1247 * | |
| 1248 * PUA characters are listed explicitly with "XX". | |
| 1249 * Verify that no assigned character has "XX". | |
| 1250 */ | |
| 1251 if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) { | |
| 1252 c=start; | |
| 1253 while(c<limit) { | |
| 1254 if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) { | |
| 1255 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c); | |
| 1256 } | |
| 1257 ++c; | |
| 1258 } | |
| 1259 } | |
| 1260 | |
| 1261 /* | |
| 1262 * Verify default Bidi classes. | |
| 1263 * For recent Unicode versions, see UCD.html. | |
| 1264 * | |
| 1265 * For older Unicode versions: | |
| 1266 * See table 3-7 "Bidirectional Character Types" in UAX #9. | |
| 1267 * http://www.unicode.org/reports/tr9/ | |
| 1268 * | |
| 1269 * See also DerivedBidiClass.txt for Cn code points! | |
| 1270 * | |
| 1271 * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/reso
lved-pri.html) | |
| 1272 * changed some default values. | |
| 1273 * In particular, non-characters and unassigned Default Ignorable Code Point
s | |
| 1274 * change from L to BN. | |
| 1275 * | |
| 1276 * UCD.html version 4.0.1 does not yet reflect these changes. | |
| 1277 */ | |
| 1278 if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) { | |
| 1279 /* enumerate the intersections of defaultBidi ranges with [start..limit[
*/ | |
| 1280 c=start; | |
| 1281 for(i=0; i<UPRV_LENGTHOF(defaultBidi) && c<limit; ++i) { | |
| 1282 if((int32_t)c<defaultBidi[i][0]) { | |
| 1283 while(c<limit && (int32_t)c<defaultBidi[i][0]) { | |
| 1284 if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_D
EFAULT_IGNORABLE_CODE_POINT)) { | |
| 1285 shouldBeDir=U_BOUNDARY_NEUTRAL; | |
| 1286 } else { | |
| 1287 shouldBeDir=(UCharDirection)defaultBidi[i][1]; | |
| 1288 } | |
| 1289 | |
| 1290 if( u_charDirection(c)!=shouldBeDir || | |
| 1291 u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir | |
| 1292 ) { | |
| 1293 log_err("error: u_charDirection(unassigned/PUA U+%04lx)=
%s should be %s\n", | |
| 1294 c, dirStrings[u_charDirection(c)], dirStrings[should
BeDir]); | |
| 1295 } | |
| 1296 ++c; | |
| 1297 } | |
| 1298 } | |
| 1299 } | |
| 1300 } | |
| 1301 | |
| 1302 return TRUE; | |
| 1303 } | |
| 1304 | |
| 1305 /* tests for several properties */ | |
| 1306 static void TestUnicodeData() | |
| 1307 { | |
| 1308 UVersionInfo expectVersionArray; | |
| 1309 UVersionInfo versionArray; | |
| 1310 char *fields[15][2]; | |
| 1311 UErrorCode errorCode; | |
| 1312 UChar32 c; | |
| 1313 int8_t type; | |
| 1314 | |
| 1315 UnicodeDataContext context; | |
| 1316 | |
| 1317 u_versionFromString(expectVersionArray, U_UNICODE_VERSION); | |
| 1318 u_getUnicodeVersion(versionArray); | |
| 1319 if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0) | |
| 1320 { | |
| 1321 log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION "
got %d.%d.%d.%d\n", | |
| 1322 versionArray[0], versionArray[1], versionArray[2], versionArray[3]); | |
| 1323 } | |
| 1324 | |
| 1325 #if defined(ICU_UNICODE_VERSION) | |
| 1326 /* test only happens where we have configure.in with UNICODE_VERSION - sanit
y check. */ | |
| 1327 if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION)) | |
| 1328 { | |
| 1329 log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNIC
ODE_VERSION " got " ICU_UNICODE_VERSION "\n"); | |
| 1330 } | |
| 1331 #endif | |
| 1332 | |
| 1333 if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyV
alue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) { | |
| 1334 log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \
n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041)); | |
| 1335 } | |
| 1336 | |
| 1337 errorCode=U_ZERO_ERROR; | |
| 1338 #if !UCONFIG_NO_NORMALIZATION | |
| 1339 context.nfc=unorm2_getNFCInstance(&errorCode); | |
| 1340 context.nfkc=unorm2_getNFKCInstance(&errorCode); | |
| 1341 if(U_FAILURE(errorCode)) { | |
| 1342 log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n",
u_errorName(errorCode)); | |
| 1343 return; | |
| 1344 } | |
| 1345 #endif | |
| 1346 parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &er
rorCode); | |
| 1347 if(U_FAILURE(errorCode)) { | |
| 1348 return; /* if we couldn't parse UnicodeData.txt, we should return */ | |
| 1349 } | |
| 1350 | |
| 1351 /* sanity check on repeated properties */ | |
| 1352 for(c=0xfffe; c<=0x10ffff;) { | |
| 1353 type=u_charType(c); | |
| 1354 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MA
SK(type)) { | |
| 1355 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENER
AL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c); | |
| 1356 } | |
| 1357 if(type!=U_UNASSIGNED) { | |
| 1358 log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c
, u_charType(c)); | |
| 1359 } | |
| 1360 if((c&0xffff)==0xfffe) { | |
| 1361 ++c; | |
| 1362 } else { | |
| 1363 c+=0xffff; | |
| 1364 } | |
| 1365 } | |
| 1366 | |
| 1367 /* test that PUA is not "unassigned" */ | |
| 1368 for(c=0xe000; c<=0x10fffd;) { | |
| 1369 type=u_charType(c); | |
| 1370 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MA
SK(type)) { | |
| 1371 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENER
AL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c); | |
| 1372 } | |
| 1373 if(type==U_UNASSIGNED) { | |
| 1374 log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c); | |
| 1375 } else if(type!=U_PRIVATE_USE_CHAR) { | |
| 1376 log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type); | |
| 1377 } | |
| 1378 if(c==0xf8ff) { | |
| 1379 c=0xf0000; | |
| 1380 } else if(c==0xffffd) { | |
| 1381 c=0x100000; | |
| 1382 } else { | |
| 1383 ++c; | |
| 1384 } | |
| 1385 } | |
| 1386 | |
| 1387 /* test u_enumCharTypes() */ | |
| 1388 u_enumCharTypes(enumTypeRange, "a1"); | |
| 1389 | |
| 1390 /* check default properties */ | |
| 1391 u_enumCharTypes(enumDefaultsRange, NULL); | |
| 1392 } | |
| 1393 | |
| 1394 static void TestCodeUnit(){ | |
| 1395 const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xd
bff,0xdc00,0xdc02,0xddee,0xdfff,0}; | |
| 1396 | |
| 1397 int32_t i; | |
| 1398 | |
| 1399 for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){ | |
| 1400 UChar c=codeunit[i]; | |
| 1401 if(i<4){ | |
| 1402 if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(U
TF_IS_SURROGATE(c))){ | |
| 1403 log_err("ERROR: U+%04x is a single", c); | |
| 1404 } | |
| 1405 | |
| 1406 } | |
| 1407 if(i >= 4 && i< 8){ | |
| 1408 if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF
_IS_SURROGATE(c))){ | |
| 1409 log_err("ERROR: U+%04x is a first surrogate", c); | |
| 1410 } | |
| 1411 } | |
| 1412 if(i >= 8 && i< 12){ | |
| 1413 if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF
_IS_SURROGATE(c))){ | |
| 1414 log_err("ERROR: U+%04x is a second surrogate", c); | |
| 1415 } | |
| 1416 } | |
| 1417 } | |
| 1418 | |
| 1419 } | |
| 1420 | |
| 1421 static void TestCodePoint(){ | |
| 1422 const UChar32 codePoint[]={ | |
| 1423 /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */ | |
| 1424 0xd800, | |
| 1425 0xdbff, | |
| 1426 0xdc00, | |
| 1427 0xdfff, | |
| 1428 0xdc04, | |
| 1429 0xd821, | |
| 1430 /*not a surrogate, valid, isUnicodeChar , not Error*/ | |
| 1431 0x20ac, | |
| 1432 0xd7ff, | |
| 1433 0xe000, | |
| 1434 0xe123, | |
| 1435 0x0061, | |
| 1436 0xe065, | |
| 1437 0x20402, | |
| 1438 0x24506, | |
| 1439 0x23456, | |
| 1440 0x20402, | |
| 1441 0x10402, | |
| 1442 0x23456, | |
| 1443 /*not a surrogate, not valid, isUnicodeChar, isError */ | |
| 1444 0x0015, | |
| 1445 0x009f, | |
| 1446 /*not a surrogate, not valid, not isUnicodeChar, isError */ | |
| 1447 0xffff, | |
| 1448 0xfffe, | |
| 1449 }; | |
| 1450 int32_t i; | |
| 1451 for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){ | |
| 1452 UChar32 c=codePoint[i]; | |
| 1453 if(i<6){ | |
| 1454 if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c
)){ | |
| 1455 log_err("ERROR: isSurrogate() failed for U+%04x\n", c); | |
| 1456 } | |
| 1457 if(UTF_IS_VALID(c)){ | |
| 1458 log_err("ERROR: isValid() failed for U+%04x\n", c); | |
| 1459 } | |
| 1460 if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){ | |
| 1461 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c); | |
| 1462 } | |
| 1463 if(UTF_IS_ERROR(c)){ | |
| 1464 log_err("ERROR: isError() failed for U+%04x\n", c); | |
| 1465 } | |
| 1466 }else if(i >=6 && i<18){ | |
| 1467 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){ | |
| 1468 log_err("ERROR: isSurrogate() failed for U+%04x\n", c); | |
| 1469 } | |
| 1470 if(!UTF_IS_VALID(c)){ | |
| 1471 log_err("ERROR: isValid() failed for U+%04x\n", c); | |
| 1472 } | |
| 1473 if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){ | |
| 1474 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c); | |
| 1475 } | |
| 1476 if(UTF_IS_ERROR(c)){ | |
| 1477 log_err("ERROR: isError() failed for U+%04x\n", c); | |
| 1478 } | |
| 1479 }else if(i >=18 && i<20){ | |
| 1480 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){ | |
| 1481 log_err("ERROR: isSurrogate() failed for U+%04x\n", c); | |
| 1482 } | |
| 1483 if(UTF_IS_VALID(c)){ | |
| 1484 log_err("ERROR: isValid() failed for U+%04x\n", c); | |
| 1485 } | |
| 1486 if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){ | |
| 1487 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c); | |
| 1488 } | |
| 1489 if(!UTF_IS_ERROR(c)){ | |
| 1490 log_err("ERROR: isError() failed for U+%04x\n", c); | |
| 1491 } | |
| 1492 } | |
| 1493 else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){ | |
| 1494 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){ | |
| 1495 log_err("ERROR: isSurrogate() failed for U+%04x\n", c); | |
| 1496 } | |
| 1497 if(UTF_IS_VALID(c)){ | |
| 1498 log_err("ERROR: isValid() failed for U+%04x\n", c); | |
| 1499 } | |
| 1500 if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){ | |
| 1501 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c); | |
| 1502 } | |
| 1503 if(!UTF_IS_ERROR(c)){ | |
| 1504 log_err("ERROR: isError() failed for U+%04x\n", c); | |
| 1505 } | |
| 1506 } | |
| 1507 } | |
| 1508 | |
| 1509 if( | |
| 1510 !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) || | |
| 1511 !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) || | |
| 1512 U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) || | |
| 1513 U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff) | |
| 1514 ) { | |
| 1515 log_err("error with U_IS_BMP()\n"); | |
| 1516 } | |
| 1517 | |
| 1518 if( | |
| 1519 U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(
0x20ac) || | |
| 1520 U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEM
ENTARY(0xffff) || | |
| 1521 U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_
SUPPLEMENTARY(0x50005) || | |
| 1522 !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SU
PPLEMENTARY(0x7fffffff) | |
| 1523 ) { | |
| 1524 log_err("error with U_IS_SUPPLEMENTARY()\n"); | |
| 1525 } | |
| 1526 } | |
| 1527 | |
| 1528 static void TestCharLength() | |
| 1529 { | |
| 1530 const int32_t codepoint[]={ | |
| 1531 1, 0x0061, | |
| 1532 1, 0xe065, | |
| 1533 1, 0x20ac, | |
| 1534 2, 0x20402, | |
| 1535 2, 0x23456, | |
| 1536 2, 0x24506, | |
| 1537 2, 0x20402, | |
| 1538 2, 0x10402, | |
| 1539 1, 0xd7ff, | |
| 1540 1, 0xe000 | |
| 1541 }; | |
| 1542 | |
| 1543 int32_t i; | |
| 1544 UBool multiple; | |
| 1545 for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+
2)){ | |
| 1546 UChar32 c=codepoint[i+1]; | |
| 1547 if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){ | |
| 1548 log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n",
c, codepoint[i], U16_LENGTH(c)); | |
| 1549 } | |
| 1550 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE); | |
| 1551 if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){ | |
| 1552 log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c
); | |
| 1553 } | |
| 1554 } | |
| 1555 } | |
| 1556 | |
| 1557 /*internal functions ----*/ | |
| 1558 static int32_t MakeProp(char* str) | |
| 1559 { | |
| 1560 int32_t result = 0; | |
| 1561 char* matchPosition =0; | |
| 1562 | |
| 1563 matchPosition = strstr(tagStrings, str); | |
| 1564 if (matchPosition == 0) | |
| 1565 { | |
| 1566 log_err("unrecognized type letter "); | |
| 1567 log_err(str); | |
| 1568 } | |
| 1569 else | |
| 1570 result = (int32_t)((matchPosition - tagStrings) / 2); | |
| 1571 return result; | |
| 1572 } | |
| 1573 | |
| 1574 static int32_t MakeDir(char* str) | |
| 1575 { | |
| 1576 int32_t pos = 0; | |
| 1577 for (pos = 0; pos < U_CHAR_DIRECTION_COUNT; pos++) { | |
| 1578 if (strcmp(str, dirStrings[pos]) == 0) { | |
| 1579 return pos; | |
| 1580 } | |
| 1581 } | |
| 1582 return -1; | |
| 1583 } | |
| 1584 | |
| 1585 /* test u_charName() -------------------------------------------------------- */ | |
| 1586 | |
| 1587 static const struct { | |
| 1588 uint32_t code; | |
| 1589 const char *name, *oldName, *extName, *alias; | |
| 1590 } names[]={ | |
| 1591 {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"}, | |
| 1592 {0x01a2, "LATIN CAPITAL LETTER OI", "", | |
| 1593 "LATIN CAPITAL LETTER OI", | |
| 1594 "LATIN CAPITAL LETTER GHA"}, | |
| 1595 {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "", | |
| 1596 "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" }, | |
| 1597 {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "", | |
| 1598 "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", | |
| 1599 "TIBETAN MARK BKA- SHOG GI MGO RGYAN"}, | |
| 1600 {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" }, | |
| 1601 {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" }, | |
| 1602 {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" }, | |
| 1603 {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" }, | |
| 1604 {0xd800, "", "", "<lead surrogate-D800>" }, | |
| 1605 {0xdc00, "", "", "<trail surrogate-DC00>" }, | |
| 1606 {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" }, | |
| 1607 {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" }, | |
| 1608 {0xffff, "", "", "<noncharacter-FFFF>" }, | |
| 1609 {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "", | |
| 1610 "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", | |
| 1611 "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"}, | |
| 1612 {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" } | |
| 1613 }; | |
| 1614 | |
| 1615 static UBool | |
| 1616 enumCharNamesFn(void *context, | |
| 1617 UChar32 code, UCharNameChoice nameChoice, | |
| 1618 const char *name, int32_t length) { | |
| 1619 int32_t *pCount=(int32_t *)context; | |
| 1620 const char *expected; | |
| 1621 int i; | |
| 1622 | |
| 1623 if(length<=0 || length!=(int32_t)strlen(name)) { | |
| 1624 /* should not be called with an empty string or invalid length */ | |
| 1625 log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length); | |
| 1626 return TRUE; | |
| 1627 } | |
| 1628 | |
| 1629 ++*pCount; | |
| 1630 for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) { | |
| 1631 if(code==(UChar32)names[i].code) { | |
| 1632 switch (nameChoice) { | |
| 1633 case U_EXTENDED_CHAR_NAME: | |
| 1634 if(0!=strcmp(name, names[i].extName)) { | |
| 1635 log_err("u_enumCharName(0x%lx - Extended)=%s instead of
%s\n", code, name, names[i].extName); | |
| 1636 } | |
| 1637 break; | |
| 1638 case U_UNICODE_CHAR_NAME: | |
| 1639 if(0!=strcmp(name, names[i].name)) { | |
| 1640 log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code
, name, names[i].name); | |
| 1641 } | |
| 1642 break; | |
| 1643 case U_UNICODE_10_CHAR_NAME: | |
| 1644 expected=names[i].oldName; | |
| 1645 if(expected[0]==0 || 0!=strcmp(name, expected)) { | |
| 1646 log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n"
, code, name, expected); | |
| 1647 } | |
| 1648 break; | |
| 1649 case U_CHAR_NAME_ALIAS: | |
| 1650 expected=names[i].alias; | |
| 1651 if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expec
ted)) { | |
| 1652 log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\
n", code, name, expected); | |
| 1653 } | |
| 1654 break; | |
| 1655 case U_CHAR_NAME_CHOICE_COUNT: | |
| 1656 break; | |
| 1657 } | |
| 1658 break; | |
| 1659 } | |
| 1660 } | |
| 1661 return TRUE; | |
| 1662 } | |
| 1663 | |
| 1664 struct enumExtCharNamesContext { | |
| 1665 uint32_t length; | |
| 1666 int32_t last; | |
| 1667 }; | |
| 1668 | |
| 1669 static UBool | |
| 1670 enumExtCharNamesFn(void *context, | |
| 1671 UChar32 code, UCharNameChoice nameChoice, | |
| 1672 const char *name, int32_t length) { | |
| 1673 struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) c
ontext; | |
| 1674 | |
| 1675 if (ecncp->last != (int32_t) code - 1) { | |
| 1676 if (ecncp->last < 0) { | |
| 1677 log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ex
t) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1
); | |
| 1678 } else { | |
| 1679 log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 -
Ext)\n", code); | |
| 1680 } | |
| 1681 } | |
| 1682 ecncp->last = (int32_t) code; | |
| 1683 | |
| 1684 if (!*name) { | |
| 1685 log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", c
ode); | |
| 1686 } | |
| 1687 | |
| 1688 return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length); | |
| 1689 } | |
| 1690 | |
| 1691 /** | |
| 1692 * This can be made more efficient by moving it into putil.c and having | |
| 1693 * it directly access the ebcdic translation tables. | |
| 1694 * TODO: If we get this method in putil.c, then delete it from here. | |
| 1695 */ | |
| 1696 static UChar | |
| 1697 u_charToUChar(char c) { | |
| 1698 UChar uc; | |
| 1699 u_charsToUChars(&c, &uc, 1); | |
| 1700 return uc; | |
| 1701 } | |
| 1702 | |
| 1703 static void | |
| 1704 TestCharNames() { | |
| 1705 static char name[80]; | |
| 1706 UErrorCode errorCode=U_ZERO_ERROR; | |
| 1707 struct enumExtCharNamesContext extContext; | |
| 1708 const char *expected; | |
| 1709 int32_t length; | |
| 1710 UChar32 c; | |
| 1711 int32_t i; | |
| 1712 | |
| 1713 log_verbose("Testing uprv_getMaxCharNameLength()\n"); | |
| 1714 length=uprv_getMaxCharNameLength(); | |
| 1715 if(length==0) { | |
| 1716 /* no names data available */ | |
| 1717 return; | |
| 1718 } | |
| 1719 if(length<83) { /* Unicode 3.2 max char name length */ | |
| 1720 log_err("uprv_getMaxCharNameLength()=%d is too short"); | |
| 1721 } | |
| 1722 /* ### TODO same tests for max ISO comment length as for max name length */ | |
| 1723 | |
| 1724 log_verbose("Testing u_charName()\n"); | |
| 1725 for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) { | |
| 1726 /* modern Unicode character name */ | |
| 1727 length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name)
, &errorCode); | |
| 1728 if(U_FAILURE(errorCode)) { | |
| 1729 log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(e
rrorCode)); | |
| 1730 return; | |
| 1731 } | |
| 1732 if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strle
n(name)) { | |
| 1733 log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n",
names[i].code, name, length, names[i].name); | |
| 1734 } | |
| 1735 | |
| 1736 /* find the modern name */ | |
| 1737 if (*names[i].name) { | |
| 1738 c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode); | |
| 1739 if(U_FAILURE(errorCode)) { | |
| 1740 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorN
ame(errorCode)); | |
| 1741 return; | |
| 1742 } | |
| 1743 if(c!=(UChar32)names[i].code) { | |
| 1744 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", name
s[i].name, c, names[i].code); | |
| 1745 } | |
| 1746 } | |
| 1747 | |
| 1748 /* Unicode 1.0 character name */ | |
| 1749 length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(na
me), &errorCode); | |
| 1750 if(U_FAILURE(errorCode)) { | |
| 1751 log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_error
Name(errorCode)); | |
| 1752 return; | |
| 1753 } | |
| 1754 if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length
!=(uint16_t)strlen(name)) { | |
| 1755 log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothi
ng or %s\n", names[i].code, name, length, names[i].oldName); | |
| 1756 } | |
| 1757 | |
| 1758 /* find the Unicode 1.0 name if it is stored (length>0 means that we cou
ld read it) */ | |
| 1759 if(names[i].oldName[0]!=0 /* && length>0 */) { | |
| 1760 c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCod
e); | |
| 1761 if(U_FAILURE(errorCode)) { | |
| 1762 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName,
u_errorName(errorCode)); | |
| 1763 return; | |
| 1764 } | |
| 1765 if(c!=(UChar32)names[i].code) { | |
| 1766 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n"
, names[i].oldName, c, names[i].code); | |
| 1767 } | |
| 1768 } | |
| 1769 | |
| 1770 /* Unicode character name alias */ | |
| 1771 length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name),
&errorCode); | |
| 1772 if(U_FAILURE(errorCode)) { | |
| 1773 log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_err
orName(errorCode)); | |
| 1774 return; | |
| 1775 } | |
| 1776 expected=names[i].alias; | |
| 1777 if(expected==NULL) { | |
| 1778 expected=""; | |
| 1779 } | |
| 1780 if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint1
6_t)strlen(name)) { | |
| 1781 log_err("u_charName(0x%lx - alias) gets %s length %ld instead of not
hing or %s\n", | |
| 1782 names[i].code, name, length, expected); | |
| 1783 } | |
| 1784 | |
| 1785 /* find the Unicode character name alias if it is stored (length>0 means
that we could read it) */ | |
| 1786 if(expected[0]!=0 /* && length>0 */) { | |
| 1787 c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode); | |
| 1788 if(U_FAILURE(errorCode)) { | |
| 1789 log_err("u_charFromName(%s - alias) error %s\n", | |
| 1790 expected, u_errorName(errorCode)); | |
| 1791 return; | |
| 1792 } | |
| 1793 if(c!=(UChar32)names[i].code) { | |
| 1794 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\
n", | |
| 1795 expected, c, names[i].code); | |
| 1796 } | |
| 1797 } | |
| 1798 } | |
| 1799 | |
| 1800 /* test u_enumCharNames() */ | |
| 1801 length=0; | |
| 1802 errorCode=U_ZERO_ERROR; | |
| 1803 u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &leng
th, U_UNICODE_CHAR_NAME, &errorCode); | |
| 1804 if(U_FAILURE(errorCode) || length<94140) { | |
| 1805 log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MI
N_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length); | |
| 1806 } | |
| 1807 | |
| 1808 extContext.length = 0; | |
| 1809 extContext.last = -1; | |
| 1810 errorCode=U_ZERO_ERROR; | |
| 1811 u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &e
xtContext, U_EXTENDED_CHAR_NAME, &errorCode); | |
| 1812 if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) { | |
| 1813 log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld
\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.le
ngth); | |
| 1814 } | |
| 1815 | |
| 1816 /* test that u_charFromName() uppercases the input name, i.e., works with mi
xed-case names (new in 2.0) */ | |
| 1817 if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorC
ode)) { | |
| 1818 log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") d
id not find U+0061 (%s)\n", u_errorName(errorCode)); | |
| 1819 } | |
| 1820 | |
| 1821 /* Test getCharNameCharacters */ | |
| 1822 if(!getTestOption(QUICK_OPTION)) { | |
| 1823 enum { BUFSIZE = 256 }; | |
| 1824 UErrorCode ec = U_ZERO_ERROR; | |
| 1825 char buf[BUFSIZE]; | |
| 1826 int32_t maxLength; | |
| 1827 UChar32 cp; | |
| 1828 UChar pat[BUFSIZE], dumbPat[BUFSIZE]; | |
| 1829 int32_t l1, l2; | |
| 1830 UBool map[256]; | |
| 1831 UBool ok; | |
| 1832 | |
| 1833 USet* set = uset_open(1, 0); /* empty set */ | |
| 1834 USet* dumb = uset_open(1, 0); /* empty set */ | |
| 1835 | |
| 1836 /* | |
| 1837 * uprv_getCharNameCharacters() will likely return more lowercase | |
| 1838 * letters than actual character names contain because | |
| 1839 * it includes all the characters in lowercased names of | |
| 1840 * general categories, for the full possible set of extended names. | |
| 1841 */ | |
| 1842 { | |
| 1843 USetAdder sa={ | |
| 1844 NULL, | |
| 1845 uset_add, | |
| 1846 uset_addRange, | |
| 1847 uset_addString, | |
| 1848 NULL /* don't need remove() */ | |
| 1849 }; | |
| 1850 sa.set=set; | |
| 1851 uprv_getCharNameCharacters(&sa); | |
| 1852 } | |
| 1853 | |
| 1854 /* build set the dumb (but sure-fire) way */ | |
| 1855 for (i=0; i<256; ++i) { | |
| 1856 map[i] = FALSE; | |
| 1857 } | |
| 1858 | |
| 1859 maxLength=0; | |
| 1860 for (cp=0; cp<0x110000; ++cp) { | |
| 1861 int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME, | |
| 1862 buf, BUFSIZE, &ec); | |
| 1863 if (U_FAILURE(ec)) { | |
| 1864 log_err("FAIL: u_charName failed when it shouldn't\n"); | |
| 1865 uset_close(set); | |
| 1866 uset_close(dumb); | |
| 1867 return; | |
| 1868 } | |
| 1869 if(len>maxLength) { | |
| 1870 maxLength=len; | |
| 1871 } | |
| 1872 | |
| 1873 for (i=0; i<len; ++i) { | |
| 1874 if (!map[(uint8_t) buf[i]]) { | |
| 1875 uset_add(dumb, (UChar32)u_charToUChar(buf[i])); | |
| 1876 map[(uint8_t) buf[i]] = TRUE; | |
| 1877 } | |
| 1878 } | |
| 1879 | |
| 1880 /* test for leading/trailing whitespace */ | |
| 1881 if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t
') { | |
| 1882 log_err("u_charName(U+%04x) returns a name with leading or trail
ing whitespace\n", cp); | |
| 1883 } | |
| 1884 } | |
| 1885 | |
| 1886 if(map[(uint8_t)'\t']) { | |
| 1887 log_err("u_charName() returned a name with a TAB for some code point
\n", cp); | |
| 1888 } | |
| 1889 | |
| 1890 length=uprv_getMaxCharNameLength(); | |
| 1891 if(length!=maxLength) { | |
| 1892 log_err("uprv_getMaxCharNameLength()=%d differs from the maximum len
gth %d of all extended names\n", | |
| 1893 length, maxLength); | |
| 1894 } | |
| 1895 | |
| 1896 /* compare the sets. Where is my uset_equals?!! */ | |
| 1897 ok=TRUE; | |
| 1898 for(i=0; i<256; ++i) { | |
| 1899 if(uset_contains(set, i)!=uset_contains(dumb, i)) { | |
| 1900 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !use
t_contains(dumb, i)) { | |
| 1901 /* ignore lowercase a-z that are in set but not in dumb */ | |
| 1902 ok=TRUE; | |
| 1903 } else { | |
| 1904 ok=FALSE; | |
| 1905 break; | |
| 1906 } | |
| 1907 } | |
| 1908 } | |
| 1909 | |
| 1910 l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec); | |
| 1911 l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec); | |
| 1912 if (U_FAILURE(ec)) { | |
| 1913 log_err("FAIL: uset_toPattern failed when it shouldn't\n"); | |
| 1914 uset_close(set); | |
| 1915 uset_close(dumb); | |
| 1916 return; | |
| 1917 } | |
| 1918 | |
| 1919 if (l1 >= BUFSIZE) { | |
| 1920 l1 = BUFSIZE-1; | |
| 1921 pat[l1] = 0; | |
| 1922 } | |
| 1923 if (l2 >= BUFSIZE) { | |
| 1924 l2 = BUFSIZE-1; | |
| 1925 dumbPat[l2] = 0; | |
| 1926 } | |
| 1927 | |
| 1928 if (!ok) { | |
| 1929 log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s
(too many lowercase a-z are ok)\n", | |
| 1930 aescstrdup(pat, l1), aescstrdup(dumbPat, l2)); | |
| 1931 } else if(getTestOption(VERBOSITY_OPTION)) { | |
| 1932 log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescst
rdup(pat, l1)); | |
| 1933 } | |
| 1934 | |
| 1935 uset_close(set); | |
| 1936 uset_close(dumb); | |
| 1937 } | |
| 1938 | |
| 1939 /* ### TODO: test error cases and other interesting things */ | |
| 1940 } | |
| 1941 | |
| 1942 static void | |
| 1943 TestUCharFromNameUnderflow() { | |
| 1944 // Ticket #10889: Underflow crash when there is no dash. | |
| 1945 UErrorCode errorCode=U_ZERO_ERROR; | |
| 1946 UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<NO BREAK SPACE>", &errorCod
e); | |
| 1947 if(U_SUCCESS(errorCode)) { | |
| 1948 log_err("u_charFromName(<NO BREAK SPACE>) = U+%04x but should fail - %s\
n", c, u_errorName(errorCode)); | |
| 1949 } | |
| 1950 | |
| 1951 // Test related edge cases. | |
| 1952 errorCode=U_ZERO_ERROR; | |
| 1953 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<-00a0>", &errorCode); | |
| 1954 if(U_SUCCESS(errorCode)) { | |
| 1955 log_err("u_charFromName(<-00a0>) = U+%04x but should fail - %s\n", c, u_
errorName(errorCode)); | |
| 1956 } | |
| 1957 | |
| 1958 errorCode=U_ZERO_ERROR; | |
| 1959 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control->", &errorCode); | |
| 1960 if(U_SUCCESS(errorCode)) { | |
| 1961 log_err("u_charFromName(<control->) = U+%04x but should fail - %s\n", c,
u_errorName(errorCode)); | |
| 1962 } | |
| 1963 | |
| 1964 errorCode=U_ZERO_ERROR; | |
| 1965 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control-111111>", &errorCode); | |
| 1966 if(U_SUCCESS(errorCode)) { | |
| 1967 log_err("u_charFromName(<control-111111>) = U+%04x but should fail - %s\
n", c, u_errorName(errorCode)); | |
| 1968 } | |
| 1969 } | |
| 1970 | |
| 1971 /* test u_isMirrored() and u_charMirror() ----------------------------------- */ | |
| 1972 | |
| 1973 static void | |
| 1974 TestMirroring() { | |
| 1975 USet *set; | |
| 1976 UErrorCode errorCode; | |
| 1977 | |
| 1978 UChar32 start, end, c2, c3; | |
| 1979 int32_t i; | |
| 1980 | |
| 1981 U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17); | |
| 1982 | |
| 1983 U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17); | |
| 1984 | |
| 1985 log_verbose("Testing u_isMirrored()\n"); | |
| 1986 if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_i
sMirrored(0x232a) && | |
| 1987 !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !
u_isMirrored(0x3400) | |
| 1988 ) | |
| 1989 ) { | |
| 1990 log_err("u_isMirrored() does not work correctly\n"); | |
| 1991 } | |
| 1992 | |
| 1993 log_verbose("Testing u_charMirror()\n"); | |
| 1994 if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x
208d)==0x208e && u_charMirror(0x3017)==0x3016 && | |
| 1995 u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirro
r(0x29F5)==0x2215 && /* large delta between the code points */ | |
| 1996 u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(
0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab && | |
| 1997 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrig
endum6.html */ | |
| 1998 u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charM
irror(0x301d)==0x301d | |
| 1999 ) | |
| 2000 ) { | |
| 2001 log_err("u_charMirror() does not work correctly\n"); | |
| 2002 } | |
| 2003 | |
| 2004 /* verify that Bidi_Mirroring_Glyph roundtrips */ | |
| 2005 errorCode=U_ZERO_ERROR; | |
| 2006 set=uset_openPattern(mirroredPattern, 17, &errorCode); | |
| 2007 | |
| 2008 if (U_FAILURE(errorCode)) { | |
| 2009 log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\
n"); | |
| 2010 } else { | |
| 2011 for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i
) { | |
| 2012 do { | |
| 2013 c2=u_charMirror(start); | |
| 2014 c3=u_charMirror(c2); | |
| 2015 if(c3!=start) { | |
| 2016 log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx
->U+%04lx\n", (long)start, (long)c2, (long)c3); | |
| 2017 } | |
| 2018 c3=u_getBidiPairedBracket(start); | |
| 2019 if(u_getIntPropertyValue(start, UCHAR_BIDI_PAIRED_BRACKET_TYPE)=
=U_BPT_NONE) { | |
| 2020 if(c3!=start) { | |
| 2021 log_err("u_getBidiPairedBracket(U+%04lx) != self for bpt
(c)==None\n", | |
| 2022 (long)start); | |
| 2023 } | |
| 2024 } else { | |
| 2025 if(c3!=c2) { | |
| 2026 log_err("u_getBidiPairedBracket(U+%04lx) != U+%04lx = bm
g(c)'\n", | |
| 2027 (long)start, (long)c2); | |
| 2028 } | |
| 2029 } | |
| 2030 } while(++start<=end); | |
| 2031 } | |
| 2032 } | |
| 2033 | |
| 2034 uset_close(set); | |
| 2035 } | |
| 2036 | |
| 2037 | |
| 2038 struct RunTestData | |
| 2039 { | |
| 2040 const char *runText; | |
| 2041 UScriptCode runCode; | |
| 2042 }; | |
| 2043 | |
| 2044 typedef struct RunTestData RunTestData; | |
| 2045 | |
| 2046 static void | |
| 2047 CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *te
stData, int32_t nRuns, | |
| 2048 const char *prefix) | |
| 2049 { | |
| 2050 int32_t run, runStart, runLimit; | |
| 2051 UScriptCode runCode; | |
| 2052 | |
| 2053 /* iterate over all the runs */ | |
| 2054 run = 0; | |
| 2055 while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) { | |
| 2056 if (runStart != runStarts[run]) { | |
| 2057 log_err("%s: incorrect start offset for run %d: expected %d, got %d\
n", | |
| 2058 prefix, run, runStarts[run], runStart); | |
| 2059 } | |
| 2060 | |
| 2061 if (runLimit != runStarts[run + 1]) { | |
| 2062 log_err("%s: incorrect limit offset for run %d: expected %d, got %d\
n", | |
| 2063 prefix, run, runStarts[run + 1], runLimit); | |
| 2064 } | |
| 2065 | |
| 2066 if (runCode != testData[run].runCode) { | |
| 2067 log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\
"\n", | |
| 2068 prefix, run, uscript_getName(testData[run].runCode), uscript_get
Name(runCode)); | |
| 2069 } | |
| 2070 | |
| 2071 run += 1; | |
| 2072 | |
| 2073 /* stop when we've seen all the runs we expect to see */ | |
| 2074 if (run >= nRuns) { | |
| 2075 break; | |
| 2076 } | |
| 2077 } | |
| 2078 | |
| 2079 /* Complain if we didn't see then number of runs we expected */ | |
| 2080 if (run != nRuns) { | |
| 2081 log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, r
un, nRuns); | |
| 2082 } | |
| 2083 } | |
| 2084 | |
| 2085 static void | |
| 2086 TestUScriptRunAPI() | |
| 2087 { | |
| 2088 static const RunTestData testData1[] = { | |
| 2089 {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCR
IPT_DEVANAGARI}, | |
| 2090 {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARA
BIC}, | |
| 2091 {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYR
ILLIC}, | |
| 2092 {"English (", USCRIPT_LATIN}, | |
| 2093 {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI}, | |
| 2094 {") ", USCRIPT_LATIN}, | |
| 2095 {"\\u6F22\\u5B75", USCRIPT_HAN}, | |
| 2096 {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA}, | |
| 2097 {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA}, | |
| 2098 {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET} | |
| 2099 }; | |
| 2100 | |
| 2101 static const RunTestData testData2[] = { | |
| 2102 {"((((((((((abc))))))))))", USCRIPT_LATIN} | |
| 2103 }; | |
| 2104 | |
| 2105 static const struct { | |
| 2106 const RunTestData *testData; | |
| 2107 int32_t nRuns; | |
| 2108 } testDataEntries[] = { | |
| 2109 {testData1, UPRV_LENGTHOF(testData1)}, | |
| 2110 {testData2, UPRV_LENGTHOF(testData2)} | |
| 2111 }; | |
| 2112 | |
| 2113 static const int32_t nTestEntries = UPRV_LENGTHOF(testDataEntries); | |
| 2114 int32_t testEntry; | |
| 2115 | |
| 2116 for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) { | |
| 2117 UChar testString[1024]; | |
| 2118 int32_t runStarts[256]; | |
| 2119 int32_t nTestRuns = testDataEntries[testEntry].nRuns; | |
| 2120 const RunTestData *testData = testDataEntries[testEntry].testData; | |
| 2121 | |
| 2122 int32_t run, stringLimit; | |
| 2123 UScriptRun *scriptRun = NULL; | |
| 2124 UErrorCode err; | |
| 2125 | |
| 2126 /* | |
| 2127 * Fill in the test string and the runStarts array. | |
| 2128 */ | |
| 2129 stringLimit = 0; | |
| 2130 for (run = 0; run < nTestRuns; run += 1) { | |
| 2131 runStarts[run] = stringLimit; | |
| 2132 stringLimit += u_unescape(testData[run].runText, &testString[stringL
imit], 1024 - stringLimit); | |
| 2133 /*stringLimit -= 1;*/ | |
| 2134 } | |
| 2135 | |
| 2136 /* The limit of the last run */ | |
| 2137 runStarts[nTestRuns] = stringLimit; | |
| 2138 | |
| 2139 /* | |
| 2140 * Make sure that calling uscript_OpenRun with a NULL text pointer | |
| 2141 * and a non-zero text length returns the correct error. | |
| 2142 */ | |
| 2143 err = U_ZERO_ERROR; | |
| 2144 scriptRun = uscript_openRun(NULL, stringLimit, &err); | |
| 2145 | |
| 2146 if (err != U_ILLEGAL_ARGUMENT_ERROR) { | |
| 2147 log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instea
d of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err)); | |
| 2148 } | |
| 2149 | |
| 2150 if (scriptRun != NULL) { | |
| 2151 log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NUL
L result.\n"); | |
| 2152 uscript_closeRun(scriptRun); | |
| 2153 } | |
| 2154 | |
| 2155 /* | |
| 2156 * Make sure that calling uscript_OpenRun with a non-NULL text pointer | |
| 2157 * and a zero text length returns the correct error. | |
| 2158 */ | |
| 2159 err = U_ZERO_ERROR; | |
| 2160 scriptRun = uscript_openRun(testString, 0, &err); | |
| 2161 | |
| 2162 if (err != U_ILLEGAL_ARGUMENT_ERROR) { | |
| 2163 log_err("uscript_openRun(testString, 0, &err) returned %s instead of
U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err)); | |
| 2164 } | |
| 2165 | |
| 2166 if (scriptRun != NULL) { | |
| 2167 log_err("uscript_openRun(testString, 0, &err) returned a non-NULL re
sult.\n"); | |
| 2168 uscript_closeRun(scriptRun); | |
| 2169 } | |
| 2170 | |
| 2171 /* | |
| 2172 * Make sure that calling uscript_openRun with a NULL text pointer | |
| 2173 * and a zero text length doesn't return an error. | |
| 2174 */ | |
| 2175 err = U_ZERO_ERROR; | |
| 2176 scriptRun = uscript_openRun(NULL, 0, &err); | |
| 2177 | |
| 2178 if (U_FAILURE(err)) { | |
| 2179 log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_erro
rName(err)); | |
| 2180 } | |
| 2181 | |
| 2182 /* Make sure that the empty iterator doesn't find any runs */ | |
| 2183 if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) { | |
| 2184 log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n
"); | |
| 2185 } | |
| 2186 | |
| 2187 /* | |
| 2188 * Make sure that calling uscript_setRunText with a NULL text pointer | |
| 2189 * and a non-zero text length returns the correct error. | |
| 2190 */ | |
| 2191 err = U_ZERO_ERROR; | |
| 2192 uscript_setRunText(scriptRun, NULL, stringLimit, &err); | |
| 2193 | |
| 2194 if (err != U_ILLEGAL_ARGUMENT_ERROR) { | |
| 2195 log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) retu
rned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err)); | |
| 2196 } | |
| 2197 | |
| 2198 /* | |
| 2199 * Make sure that calling uscript_OpenRun with a non-NULL text pointer | |
| 2200 * and a zero text length returns the correct error. | |
| 2201 */ | |
| 2202 err = U_ZERO_ERROR; | |
| 2203 uscript_setRunText(scriptRun, testString, 0, &err); | |
| 2204 | |
| 2205 if (err != U_ILLEGAL_ARGUMENT_ERROR) { | |
| 2206 log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned
%s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err)); | |
| 2207 } | |
| 2208 | |
| 2209 /* | |
| 2210 * Now call uscript_setRunText on the empty iterator | |
| 2211 * and make sure that it works. | |
| 2212 */ | |
| 2213 err = U_ZERO_ERROR; | |
| 2214 uscript_setRunText(scriptRun, testString, stringLimit, &err); | |
| 2215 | |
| 2216 if (U_FAILURE(err)) { | |
| 2217 log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(e
rr)); | |
| 2218 } else { | |
| 2219 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_
setRunText"); | |
| 2220 } | |
| 2221 | |
| 2222 uscript_closeRun(scriptRun); | |
| 2223 | |
| 2224 /* | |
| 2225 * Now open an interator over the testString | |
| 2226 * using uscript_openRun and make sure that it works | |
| 2227 */ | |
| 2228 scriptRun = uscript_openRun(testString, stringLimit, &err); | |
| 2229 | |
| 2230 if (U_FAILURE(err)) { | |
| 2231 log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err)
); | |
| 2232 } else { | |
| 2233 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_
openRun"); | |
| 2234 } | |
| 2235 | |
| 2236 /* Now reset the iterator, and make sure | |
| 2237 * that it still works. | |
| 2238 */ | |
| 2239 uscript_resetRun(scriptRun); | |
| 2240 | |
| 2241 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_rese
tRun"); | |
| 2242 | |
| 2243 /* Close the iterator */ | |
| 2244 uscript_closeRun(scriptRun); | |
| 2245 } | |
| 2246 } | |
| 2247 | |
| 2248 /* test additional, non-core properties */ | |
| 2249 static void | |
| 2250 TestAdditionalProperties() { | |
| 2251 /* test data for u_charAge() */ | |
| 2252 static const struct { | |
| 2253 UChar32 c; | |
| 2254 UVersionInfo version; | |
| 2255 } charAges[]={ | |
| 2256 {0x41, { 1, 1, 0, 0 }}, | |
| 2257 {0xffff, { 1, 1, 0, 0 }}, | |
| 2258 {0x20ab, { 2, 0, 0, 0 }}, | |
| 2259 {0x2fffe, { 2, 0, 0, 0 }}, | |
| 2260 {0x20ac, { 2, 1, 0, 0 }}, | |
| 2261 {0xfb1d, { 3, 0, 0, 0 }}, | |
| 2262 {0x3f4, { 3, 1, 0, 0 }}, | |
| 2263 {0x10300, { 3, 1, 0, 0 }}, | |
| 2264 {0x220, { 3, 2, 0, 0 }}, | |
| 2265 {0xff60, { 3, 2, 0, 0 }} | |
| 2266 }; | |
| 2267 | |
| 2268 /* test data for u_hasBinaryProperty() */ | |
| 2269 static const int32_t | |
| 2270 props[][3]={ /* code point, property, value */ | |
| 2271 { 0x0627, UCHAR_ALPHABETIC, TRUE }, | |
| 2272 { 0x1034a, UCHAR_ALPHABETIC, TRUE }, | |
| 2273 { 0x2028, UCHAR_ALPHABETIC, FALSE }, | |
| 2274 | |
| 2275 { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE }, | |
| 2276 { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE }, | |
| 2277 | |
| 2278 { 0x202c, UCHAR_BIDI_CONTROL, TRUE }, | |
| 2279 { 0x202f, UCHAR_BIDI_CONTROL, FALSE }, | |
| 2280 | |
| 2281 { 0x003c, UCHAR_BIDI_MIRRORED, TRUE }, | |
| 2282 { 0x003d, UCHAR_BIDI_MIRRORED, FALSE }, | |
| 2283 | |
| 2284 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrige
ndum6.html */ | |
| 2285 { 0x2018, UCHAR_BIDI_MIRRORED, FALSE }, | |
| 2286 { 0x201d, UCHAR_BIDI_MIRRORED, FALSE }, | |
| 2287 { 0x201f, UCHAR_BIDI_MIRRORED, FALSE }, | |
| 2288 { 0x301e, UCHAR_BIDI_MIRRORED, FALSE }, | |
| 2289 | |
| 2290 { 0x058a, UCHAR_DASH, TRUE }, | |
| 2291 { 0x007e, UCHAR_DASH, FALSE }, | |
| 2292 | |
| 2293 { 0x0c4d, UCHAR_DIACRITIC, TRUE }, | |
| 2294 { 0x3000, UCHAR_DIACRITIC, FALSE }, | |
| 2295 | |
| 2296 { 0x0e46, UCHAR_EXTENDER, TRUE }, | |
| 2297 { 0x0020, UCHAR_EXTENDER, FALSE }, | |
| 2298 | |
| 2299 #if !UCONFIG_NO_NORMALIZATION | |
| 2300 { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE }, | |
| 2301 { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE }, | |
| 2302 { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE }, | |
| 2303 | |
| 2304 { 0x110a, UCHAR_NFD_INERT, TRUE }, /* Jamo L */ | |
| 2305 { 0x0308, UCHAR_NFD_INERT, FALSE }, | |
| 2306 | |
| 2307 { 0x1164, UCHAR_NFKD_INERT, TRUE }, /* Jamo V */ | |
| 2308 { 0x1d79d, UCHAR_NFKD_INERT, FALSE }, /* math compat version of xi */ | |
| 2309 | |
| 2310 { 0x0021, UCHAR_NFC_INERT, TRUE }, /* ! */ | |
| 2311 { 0x0061, UCHAR_NFC_INERT, FALSE }, /* a */ | |
| 2312 { 0x00e4, UCHAR_NFC_INERT, FALSE }, /* a-umlaut */ | |
| 2313 { 0x0102, UCHAR_NFC_INERT, FALSE }, /* a-breve */ | |
| 2314 { 0xac1c, UCHAR_NFC_INERT, FALSE }, /* Hangul LV */ | |
| 2315 { 0xac1d, UCHAR_NFC_INERT, TRUE }, /* Hangul LVT */ | |
| 2316 | |
| 2317 { 0x1d79d, UCHAR_NFKC_INERT, FALSE }, /* math compat version of xi */ | |
| 2318 { 0x2a6d6, UCHAR_NFKC_INERT, TRUE }, /* Han, last of CJK ext. B */ | |
| 2319 | |
| 2320 { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE }, | |
| 2321 { 0x0308, UCHAR_SEGMENT_STARTER, FALSE }, | |
| 2322 { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */ | |
| 2323 { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */ | |
| 2324 { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */ | |
| 2325 { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */ | |
| 2326 #endif | |
| 2327 | |
| 2328 { 0x0044, UCHAR_HEX_DIGIT, TRUE }, | |
| 2329 { 0xff46, UCHAR_HEX_DIGIT, TRUE }, | |
| 2330 { 0x0047, UCHAR_HEX_DIGIT, FALSE }, | |
| 2331 | |
| 2332 { 0x30fb, UCHAR_HYPHEN, TRUE }, | |
| 2333 { 0xfe58, UCHAR_HYPHEN, FALSE }, | |
| 2334 | |
| 2335 { 0x2172, UCHAR_ID_CONTINUE, TRUE }, | |
| 2336 { 0x0307, UCHAR_ID_CONTINUE, TRUE }, | |
| 2337 { 0x005c, UCHAR_ID_CONTINUE, FALSE }, | |
| 2338 | |
| 2339 { 0x2172, UCHAR_ID_START, TRUE }, | |
| 2340 { 0x007a, UCHAR_ID_START, TRUE }, | |
| 2341 { 0x0039, UCHAR_ID_START, FALSE }, | |
| 2342 | |
| 2343 { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE }, | |
| 2344 { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE }, | |
| 2345 { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE }, | |
| 2346 | |
| 2347 { 0x200c, UCHAR_JOIN_CONTROL, TRUE }, | |
| 2348 { 0x2029, UCHAR_JOIN_CONTROL, FALSE }, | |
| 2349 | |
| 2350 { 0x1d7bc, UCHAR_LOWERCASE, TRUE }, | |
| 2351 { 0x0345, UCHAR_LOWERCASE, TRUE }, | |
| 2352 { 0x0030, UCHAR_LOWERCASE, FALSE }, | |
| 2353 | |
| 2354 { 0x1d7a9, UCHAR_MATH, TRUE }, | |
| 2355 { 0x2135, UCHAR_MATH, TRUE }, | |
| 2356 { 0x0062, UCHAR_MATH, FALSE }, | |
| 2357 | |
| 2358 { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE }, | |
| 2359 { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE }, | |
| 2360 { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE }, | |
| 2361 | |
| 2362 { 0x0022, UCHAR_QUOTATION_MARK, TRUE }, | |
| 2363 { 0xff62, UCHAR_QUOTATION_MARK, TRUE }, | |
| 2364 { 0xd840, UCHAR_QUOTATION_MARK, FALSE }, | |
| 2365 | |
| 2366 { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE }, | |
| 2367 { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE }, | |
| 2368 | |
| 2369 { 0x1d44a, UCHAR_UPPERCASE, TRUE }, | |
| 2370 { 0x2162, UCHAR_UPPERCASE, TRUE }, | |
| 2371 { 0x0345, UCHAR_UPPERCASE, FALSE }, | |
| 2372 | |
| 2373 { 0x0020, UCHAR_WHITE_SPACE, TRUE }, | |
| 2374 { 0x202f, UCHAR_WHITE_SPACE, TRUE }, | |
| 2375 { 0x3001, UCHAR_WHITE_SPACE, FALSE }, | |
| 2376 | |
| 2377 { 0x0711, UCHAR_XID_CONTINUE, TRUE }, | |
| 2378 { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE }, | |
| 2379 { 0x007c, UCHAR_XID_CONTINUE, FALSE }, | |
| 2380 | |
| 2381 { 0x16ee, UCHAR_XID_START, TRUE }, | |
| 2382 { 0x23456, UCHAR_XID_START, TRUE }, | |
| 2383 { 0x1d1aa, UCHAR_XID_START, FALSE }, | |
| 2384 | |
| 2385 /* | |
| 2386 * Version break: | |
| 2387 * The following properties are only supported starting with the | |
| 2388 * Unicode version indicated in the second field. | |
| 2389 */ | |
| 2390 { -1, 0x320, 0 }, | |
| 2391 | |
| 2392 { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE }, | |
| 2393 { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE }, | |
| 2394 { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE }, | |
| 2395 | |
| 2396 { 0x0149, UCHAR_DEPRECATED, TRUE }, /* changed in Unicode 5.2 */ | |
| 2397 { 0x0341, UCHAR_DEPRECATED, FALSE }, /* changed in Unicode 5.2 */ | |
| 2398 { 0xe0001, UCHAR_DEPRECATED, TRUE }, /* changed from Unicode 5 to
5.1 */ | |
| 2399 { 0xe0100, UCHAR_DEPRECATED, FALSE }, | |
| 2400 | |
| 2401 { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE }, | |
| 2402 { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE }, | |
| 2403 { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE }, | |
| 2404 { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE }, /* changed from Unicode 3.2
to 4 and again from 5 to 5.1 */ | |
| 2405 | |
| 2406 { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE }, | |
| 2407 { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE }, | |
| 2408 { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE }, /* changed from Unicode 3.2
to 4 and again from 5 to 5.1 */ | |
| 2409 { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE }, | |
| 2410 | |
| 2411 { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE }, | |
| 2412 { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE }, | |
| 2413 | |
| 2414 { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE }, | |
| 2415 { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE }, | |
| 2416 | |
| 2417 { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE }, | |
| 2418 { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE }, | |
| 2419 | |
| 2420 { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE }, | |
| 2421 { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE }, | |
| 2422 | |
| 2423 { 0x2e9b, UCHAR_RADICAL, TRUE }, | |
| 2424 { 0x4e00, UCHAR_RADICAL, FALSE }, | |
| 2425 | |
| 2426 { 0x012f, UCHAR_SOFT_DOTTED, TRUE }, | |
| 2427 { 0x0049, UCHAR_SOFT_DOTTED, FALSE }, | |
| 2428 | |
| 2429 { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE }, | |
| 2430 { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE }, | |
| 2431 | |
| 2432 { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */ | |
| 2433 | |
| 2434 { 0x002e, UCHAR_S_TERM, TRUE }, | |
| 2435 { 0x0061, UCHAR_S_TERM, FALSE }, | |
| 2436 | |
| 2437 { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE }, | |
| 2438 { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE }, | |
| 2439 { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE }, | |
| 2440 { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE }, | |
| 2441 | |
| 2442 /* enum/integer type properties */ | |
| 2443 | |
| 2444 /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData()
*/ | |
| 2445 /* test default Bidi classes for unassigned code points */ | |
| 2446 { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, | |
| 2447 { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, | |
| 2448 { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, | |
| 2449 { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Uni
code 5.0 */ | |
| 2450 { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */ | |
| 2451 { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, | |
| 2452 { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, | |
| 2453 { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, | |
| 2454 { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, | |
| 2455 { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, | |
| 2456 { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, | |
| 2457 | |
| 2458 { 0x061d, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, | |
| 2459 { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, | |
| 2460 { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, | |
| 2461 { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, | |
| 2462 { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, | |
| 2463 { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, | |
| 2464 { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, | |
| 2465 | |
| 2466 { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS }, | |
| 2467 { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU }, | |
| 2468 { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS }, | |
| 2469 { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG }, | |
| 2470 { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU }, | |
| 2471 { 0x1CBF, UCHAR_BLOCK, UBLOCK_NO_BLOCK }, | |
| 2472 { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA }, | |
| 2473 { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS }, | |
| 2474 { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK }, | |
| 2475 { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK }, | |
| 2476 { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B }, | |
| 2477 | |
| 2478 /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in Tes
tUnicodeData() */ | |
| 2479 { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 }, | |
| 2480 | |
| 2481 { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK }, | |
| 2482 { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT }, | |
| 2483 { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE }, | |
| 2484 { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL }, | |
| 2485 { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL }, | |
| 2486 { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL }, | |
| 2487 { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL }, | |
| 2488 { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT }, | |
| 2489 { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE }, | |
| 2490 | |
| 2491 { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL }, | |
| 2492 { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW }, | |
| 2493 { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS }, | |
| 2494 { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH }, | |
| 2495 { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, | |
| 2496 { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH }, | |
| 2497 { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, | |
| 2498 { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, | |
| 2499 { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, | |
| 2500 { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS }, | |
| 2501 { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL }, | |
| 2502 { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, | |
| 2503 { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, | |
| 2504 { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W
values in Unicode 4 */ | |
| 2505 { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL }, | |
| 2506 { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS }, | |
| 2507 { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS }, | |
| 2508 | |
| 2509 /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeD
ata() */ | |
| 2510 { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 }, | |
| 2511 { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER }, /* changed in Un
icode 5.2 */ | |
| 2512 | |
| 2513 { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP }, | |
| 2514 { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN }, | |
| 2515 { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH }, | |
| 2516 { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH }, | |
| 2517 { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL }, | |
| 2518 | |
| 2519 { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING }, | |
| 2520 { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING }, | |
| 2521 { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING }, | |
| 2522 { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING }, | |
| 2523 { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING }, | |
| 2524 { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT }, | |
| 2525 { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT }, | |
| 2526 { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT }, | |
| 2527 | |
| 2528 /* TestUnicodeData() verifies that no assigned character has "XX" (unkno
wn) */ | |
| 2529 { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN }, | |
| 2530 { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN }, | |
| 2531 { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION }, | |
| 2532 { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION }, | |
| 2533 { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC }, | |
| 2534 { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC }, | |
| 2535 { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC }, | |
| 2536 { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC }, | |
| 2537 { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE }, | |
| 2538 { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE }, | |
| 2539 { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE }, | |
| 2540 { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION }, | |
| 2541 { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS }, | |
| 2542 { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC }, | |
| 2543 { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC }, | |
| 2544 | |
| 2545 /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */ | |
| 2546 | |
| 2547 /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */ | |
| 2548 | |
| 2549 { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, | |
| 2550 { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, | |
| 2551 { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, | |
| 2552 { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, | |
| 2553 { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* chang
ed in Unicode 5.2 */ | |
| 2554 { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* chang
ed in Unicode 5.2 */ | |
| 2555 { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, | |
| 2556 | |
| 2557 { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, | |
| 2558 { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* chang
ed in Unicode 5.2 */ | |
| 2559 { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* chang
ed in Unicode 5.2 */ | |
| 2560 { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, | |
| 2561 | |
| 2562 { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, | |
| 2563 { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, | |
| 2564 { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, | |
| 2565 { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, | |
| 2566 { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* chang
ed in Unicode 5.2 */ | |
| 2567 { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* chang
ed in Unicode 5.2 */ | |
| 2568 | |
| 2569 { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, | |
| 2570 { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* chang
ed in Unicode 5.2 */ | |
| 2571 { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* chang
ed in Unicode 5.2 */ | |
| 2572 { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, | |
| 2573 | |
| 2574 { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, | |
| 2575 { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, | |
| 2576 { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, | |
| 2577 { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, | |
| 2578 { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* chang
ed in Unicode 5.2 */ | |
| 2579 { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* chang
ed in Unicode 5.2 */ | |
| 2580 { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, | |
| 2581 | |
| 2582 { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, | |
| 2583 { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* chang
ed in Unicode 5.2 */ | |
| 2584 { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* chang
ed in Unicode 5.2 */ | |
| 2585 { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, | |
| 2586 | |
| 2587 { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE }, | |
| 2588 { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE }, | |
| 2589 { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE }, | |
| 2590 { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE }, | |
| 2591 | |
| 2592 { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE }, | |
| 2593 { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE }, | |
| 2594 { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE }, | |
| 2595 { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE }, | |
| 2596 { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE }, | |
| 2597 | |
| 2598 { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, | |
| 2599 | |
| 2600 { -1, 0x410, 0 }, /* version break for Unicode 4.1 */ | |
| 2601 | |
| 2602 { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE }, | |
| 2603 { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE }, | |
| 2604 { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE }, | |
| 2605 | |
| 2606 { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE }, | |
| 2607 { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE }, | |
| 2608 { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE }, | |
| 2609 { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE }, | |
| 2610 { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE }, | |
| 2611 | |
| 2612 { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION }, | |
| 2613 { 0x2c8e, UCHAR_BLOCK, UBLOCK_COPTIC }, | |
| 2614 { 0xfe17, UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS }, | |
| 2615 | |
| 2616 { 0x1a00, UCHAR_SCRIPT, USCRIPT_BUGINESE }, | |
| 2617 { 0x2cea, UCHAR_SCRIPT, USCRIPT_COPTIC }, | |
| 2618 { 0xa82b, UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI }, | |
| 2619 { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN }, | |
| 2620 | |
| 2621 { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 }, | |
| 2622 { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 }, | |
| 2623 { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 }, | |
| 2624 { 0x115f, UCHAR_LINE_BREAK, U_LB_JL }, | |
| 2625 { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT }, | |
| 2626 { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV }, | |
| 2627 | |
| 2628 { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT }, | |
| 2629 { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND }, | |
| 2630 { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL }, | |
| 2631 { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V }, | |
| 2632 | |
| 2633 { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER }, | |
| 2634 { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER }, | |
| 2635 { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC }, | |
| 2636 { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM }, | |
| 2637 | |
| 2638 { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER }, | |
| 2639 { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER }, | |
| 2640 { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE }, | |
| 2641 { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP }, | |
| 2642 | |
| 2643 { -1, 0x520, 0 }, /* version break for Unicode 5.2 */ | |
| 2644 | |
| 2645 /* unassigned code points in new default Bidi R blocks */ | |
| 2646 { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, | |
| 2647 { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, | |
| 2648 | |
| 2649 /* test some script codes >127 */ | |
| 2650 { 0xa6e6, UCHAR_SCRIPT, USCRIPT_BAMUM }, | |
| 2651 { 0xa4d0, UCHAR_SCRIPT, USCRIPT_LISU }, | |
| 2652 { 0x10a7f, UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN }, | |
| 2653 | |
| 2654 { -1, 0x600, 0 }, /* version break for Unicode 6.0 */ | |
| 2655 | |
| 2656 /* value changed in Unicode 6.0 */ | |
| 2657 { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL }, | |
| 2658 | |
| 2659 { -1, 0x610, 0 }, /* version break for Unicode 6.1 */ | |
| 2660 | |
| 2661 /* unassigned code points in new/changed default Bidi AL blocks */ | |
| 2662 { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, | |
| 2663 { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, | |
| 2664 | |
| 2665 { -1, 0x630, 0 }, /* version break for Unicode 6.3 */ | |
| 2666 | |
| 2667 /* unassigned code points in the currency symbols block now default to E
T */ | |
| 2668 { 0x20C0, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR }, | |
| 2669 { 0x20CF, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR }, | |
| 2670 | |
| 2671 /* new property in Unicode 6.3 */ | |
| 2672 { 0x0027, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE }, | |
| 2673 { 0x0028, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN }, | |
| 2674 { 0x0029, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE }, | |
| 2675 { 0xFF5C, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE }, | |
| 2676 { 0xFF5B, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN }, | |
| 2677 { 0xFF5D, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE }, | |
| 2678 | |
| 2679 { -1, 0x700, 0 }, /* version break for Unicode 7.0 */ | |
| 2680 | |
| 2681 /* new character range with Joining_Group values */ | |
| 2682 { 0x10ABF, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP }, | |
| 2683 { 0x10AC0, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_ALEPH }, | |
| 2684 { 0x10AC1, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_BETH }, | |
| 2685 { 0x10AEF, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_HUNDRED }, | |
| 2686 { 0x10AF0, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP }, | |
| 2687 | |
| 2688 /* undefined UProperty values */ | |
| 2689 { 0x61, 0x4a7, 0 }, | |
| 2690 { 0x234bc, 0x15ed, 0 } | |
| 2691 }; | |
| 2692 | |
| 2693 UVersionInfo version; | |
| 2694 UChar32 c; | |
| 2695 int32_t i, result, uVersion; | |
| 2696 UProperty which; | |
| 2697 | |
| 2698 /* what is our Unicode version? */ | |
| 2699 u_getUnicodeVersion(version); | |
| 2700 uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor
/update version numbers */ | |
| 2701 | |
| 2702 u_charAge(0x20, version); | |
| 2703 if(version[0]==0) { | |
| 2704 /* no additional properties available */ | |
| 2705 log_err("TestAdditionalProperties: no additional properties available, n
ot tested\n"); | |
| 2706 return; | |
| 2707 } | |
| 2708 | |
| 2709 /* test u_charAge() */ | |
| 2710 for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) { | |
| 2711 u_charAge(charAges[i].c, version); | |
| 2712 if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) { | |
| 2713 log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %
u, %u, %u, %u }\n", | |
| 2714 charAges[i].c, | |
| 2715 version[0], version[1], version[2], version[3], | |
| 2716 charAges[i].version[0], charAges[i].version[1], charAges[i].vers
ion[2], charAges[i].version[3]); | |
| 2717 } | |
| 2718 } | |
| 2719 | |
| 2720 if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 || | |
| 2721 u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 || | |
| 2722 u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 || /* j2478 */ | |
| 2723 u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/ | |
| 2724 u_getIntPropertyMinValue(0x2345)!=0 | |
| 2725 ) { | |
| 2726 log_err("error: u_getIntPropertyMinValue() wrong\n"); | |
| 2727 } | |
| 2728 if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) { | |
| 2729 log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n"); | |
| 2730 } | |
| 2731 if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) { | |
| 2732 log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n"); | |
| 2733 } | |
| 2734 if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) { | |
| 2735 log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n")
; | |
| 2736 } | |
| 2737 if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_CO
UNT-1 ) { | |
| 2738 log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n"); | |
| 2739 } | |
| 2740 if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) { | |
| 2741 log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n"); | |
| 2742 } | |
| 2743 if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) { | |
| 2744 log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n"); | |
| 2745 } | |
| 2746 if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) { | |
| 2747 log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n"); | |
| 2748 } | |
| 2749 if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) { | |
| 2750 log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n"); | |
| 2751 } | |
| 2752 if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGOR
Y_COUNT-1) { | |
| 2753 log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n
"); | |
| 2754 } | |
| 2755 if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUN
T-1) { | |
| 2756 log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wro
ng\n"); | |
| 2757 } | |
| 2758 if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_CO
UNT-1) { | |
| 2759 log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) w
rong\n"); | |
| 2760 } | |
| 2761 if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) { | |
| 2762 log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n")
; | |
| 2763 } | |
| 2764 if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) { | |
| 2765 log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n"); | |
| 2766 } | |
| 2767 if(u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE)!=(int32_t)U_BPT_
COUNT-1) { | |
| 2768 log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE)
wrong\n"); | |
| 2769 } | |
| 2770 /*JB#2410*/ | |
| 2771 if( u_getIntPropertyMaxValue(0x2345)!=-1) { | |
| 2772 log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n"); | |
| 2773 } | |
| 2774 if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_CO
UNT - 1)) { | |
| 2775 log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong
\n"); | |
| 2776 } | |
| 2777 if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) != (int32_t) (U_JG_COUNT
-1)) { | |
| 2778 log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n"); | |
| 2779 } | |
| 2780 if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1
)) { | |
| 2781 log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n"); | |
| 2782 } | |
| 2783 if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUN
T -1)) { | |
| 2784 log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n
"); | |
| 2785 } | |
| 2786 | |
| 2787 /* test u_hasBinaryProperty() and u_getIntPropertyValue() */ | |
| 2788 for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) { | |
| 2789 const char *whichName; | |
| 2790 | |
| 2791 if(props[i][0]<0) { | |
| 2792 /* Unicode version break */ | |
| 2793 if(uVersion<props[i][1]) { | |
| 2794 break; /* do not test properties that are not yet supported */ | |
| 2795 } else { | |
| 2796 continue; /* skip this row */ | |
| 2797 } | |
| 2798 } | |
| 2799 | |
| 2800 c=(UChar32)props[i][0]; | |
| 2801 which=(UProperty)props[i][1]; | |
| 2802 whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME); | |
| 2803 | |
| 2804 if(which<UCHAR_INT_START) { | |
| 2805 result=u_hasBinaryProperty(c, which); | |
| 2806 if(result!=props[i][2]) { | |
| 2807 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wron
g (props[%d]) - (Are you missing data?)\n", | |
| 2808 c, whichName, result, i); | |
| 2809 } | |
| 2810 } | |
| 2811 | |
| 2812 result=u_getIntPropertyValue(c, which); | |
| 2813 if(result!=props[i][2]) { | |
| 2814 log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong,
should be %d (props[%d]) - (Are you missing data?)\n", | |
| 2815 c, whichName, result, props[i][2], i); | |
| 2816 } | |
| 2817 | |
| 2818 /* test separate functions, too */ | |
| 2819 switch((UProperty)props[i][1]) { | |
| 2820 case UCHAR_ALPHABETIC: | |
| 2821 if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) { | |
| 2822 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])
\n", | |
| 2823 props[i][0], result, i); | |
| 2824 } | |
| 2825 break; | |
| 2826 case UCHAR_LOWERCASE: | |
| 2827 if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) { | |
| 2828 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\
n", | |
| 2829 props[i][0], result, i); | |
| 2830 } | |
| 2831 break; | |
| 2832 case UCHAR_UPPERCASE: | |
| 2833 if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) { | |
| 2834 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\
n", | |
| 2835 props[i][0], result, i); | |
| 2836 } | |
| 2837 break; | |
| 2838 case UCHAR_WHITE_SPACE: | |
| 2839 if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) { | |
| 2840 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])
\n", | |
| 2841 props[i][0], result, i); | |
| 2842 } | |
| 2843 break; | |
| 2844 default: | |
| 2845 break; | |
| 2846 } | |
| 2847 } | |
| 2848 } | |
| 2849 | |
| 2850 static void | |
| 2851 TestNumericProperties(void) { | |
| 2852 /* see UnicodeData.txt, DerivedNumericValues.txt */ | |
| 2853 static const struct { | |
| 2854 UChar32 c; | |
| 2855 int32_t type; | |
| 2856 double numValue; | |
| 2857 } values[]={ | |
| 2858 { 0x0F33, U_NT_NUMERIC, -1./2. }, | |
| 2859 { 0x0C66, U_NT_DECIMAL, 0 }, | |
| 2860 { 0x96f6, U_NT_NUMERIC, 0 }, | |
| 2861 { 0xa833, U_NT_NUMERIC, 1./16. }, | |
| 2862 { 0x2152, U_NT_NUMERIC, 1./10. }, | |
| 2863 { 0x2151, U_NT_NUMERIC, 1./9. }, | |
| 2864 { 0x1245f, U_NT_NUMERIC, 1./8. }, | |
| 2865 { 0x2150, U_NT_NUMERIC, 1./7. }, | |
| 2866 { 0x2159, U_NT_NUMERIC, 1./6. }, | |
| 2867 { 0x09f6, U_NT_NUMERIC, 3./16. }, | |
| 2868 { 0x2155, U_NT_NUMERIC, 1./5. }, | |
| 2869 { 0x00BD, U_NT_NUMERIC, 1./2. }, | |
| 2870 { 0x0031, U_NT_DECIMAL, 1. }, | |
| 2871 { 0x4e00, U_NT_NUMERIC, 1. }, | |
| 2872 { 0x58f1, U_NT_NUMERIC, 1. }, | |
| 2873 { 0x10320, U_NT_NUMERIC, 1. }, | |
| 2874 { 0x0F2B, U_NT_NUMERIC, 3./2. }, | |
| 2875 { 0x00B2, U_NT_DIGIT, 2. }, | |
| 2876 { 0x5f10, U_NT_NUMERIC, 2. }, | |
| 2877 { 0x1813, U_NT_DECIMAL, 3. }, | |
| 2878 { 0x5f0e, U_NT_NUMERIC, 3. }, | |
| 2879 { 0x2173, U_NT_NUMERIC, 4. }, | |
| 2880 { 0x8086, U_NT_NUMERIC, 4. }, | |
| 2881 { 0x278E, U_NT_DIGIT, 5. }, | |
| 2882 { 0x1D7F2, U_NT_DECIMAL, 6. }, | |
| 2883 { 0x247A, U_NT_DIGIT, 7. }, | |
| 2884 { 0x7396, U_NT_NUMERIC, 9. }, | |
| 2885 { 0x1372, U_NT_NUMERIC, 10. }, | |
| 2886 { 0x216B, U_NT_NUMERIC, 12. }, | |
| 2887 { 0x16EE, U_NT_NUMERIC, 17. }, | |
| 2888 { 0x249A, U_NT_NUMERIC, 19. }, | |
| 2889 { 0x303A, U_NT_NUMERIC, 30. }, | |
| 2890 { 0x5345, U_NT_NUMERIC, 30. }, | |
| 2891 { 0x32B2, U_NT_NUMERIC, 37. }, | |
| 2892 { 0x1375, U_NT_NUMERIC, 40. }, | |
| 2893 { 0x10323, U_NT_NUMERIC, 50. }, | |
| 2894 { 0x0BF1, U_NT_NUMERIC, 100. }, | |
| 2895 { 0x964c, U_NT_NUMERIC, 100. }, | |
| 2896 { 0x217E, U_NT_NUMERIC, 500. }, | |
| 2897 { 0x2180, U_NT_NUMERIC, 1000. }, | |
| 2898 { 0x4edf, U_NT_NUMERIC, 1000. }, | |
| 2899 { 0x2181, U_NT_NUMERIC, 5000. }, | |
| 2900 { 0x137C, U_NT_NUMERIC, 10000. }, | |
| 2901 { 0x4e07, U_NT_NUMERIC, 10000. }, | |
| 2902 { 0x12432, U_NT_NUMERIC, 216000. }, | |
| 2903 { 0x12433, U_NT_NUMERIC, 432000. }, | |
| 2904 { 0x4ebf, U_NT_NUMERIC, 100000000. }, | |
| 2905 { 0x5146, U_NT_NUMERIC, 1000000000000. }, | |
| 2906 { -1, U_NT_NONE, U_NO_NUMERIC_VALUE }, | |
| 2907 { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE }, | |
| 2908 { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE }, | |
| 2909 { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE }, | |
| 2910 { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE }, | |
| 2911 { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE }, | |
| 2912 { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE }, | |
| 2913 { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE } | |
| 2914 }; | |
| 2915 | |
| 2916 double nv; | |
| 2917 UChar32 c; | |
| 2918 int32_t i, type; | |
| 2919 | |
| 2920 for(i=0; i<UPRV_LENGTHOF(values); ++i) { | |
| 2921 c=values[i].c; | |
| 2922 type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE); | |
| 2923 nv=u_getNumericValue(c); | |
| 2924 | |
| 2925 if(type!=values[i].type) { | |
| 2926 log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, va
lues[i].type); | |
| 2927 } | |
| 2928 if(0.000001 <= fabs(nv - values[i].numValue)) { | |
| 2929 log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, value
s[i].numValue); | |
| 2930 } | |
| 2931 } | |
| 2932 } | |
| 2933 | |
| 2934 /** | |
| 2935 * Test the property names and property value names API. | |
| 2936 */ | |
| 2937 static void | |
| 2938 TestPropertyNames(void) { | |
| 2939 int32_t p, v, choice=0, rev; | |
| 2940 UBool atLeastSomething = FALSE; | |
| 2941 | |
| 2942 for (p=0; ; ++p) { | |
| 2943 UProperty propEnum = (UProperty)p; | |
| 2944 UBool sawProp = FALSE; | |
| 2945 if(p > 10 && !atLeastSomething) { | |
| 2946 log_data_err("Never got anything after 10 tries.\nYour data is probabl
y fried. Quitting this test\n", p, choice); | |
| 2947 return; | |
| 2948 } | |
| 2949 | |
| 2950 for (choice=0; ; ++choice) { | |
| 2951 const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)
choice); | |
| 2952 if (name) { | |
| 2953 if (!sawProp) | |
| 2954 log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff); | |
| 2955 log_verbose("%d=\"%s\"", choice, name); | |
| 2956 sawProp = TRUE; | |
| 2957 atLeastSomething = TRUE; | |
| 2958 | |
| 2959 /* test reverse mapping */ | |
| 2960 rev = u_getPropertyEnum(name); | |
| 2961 if (rev != p) { | |
| 2962 log_err("Property round-trip failure: %d -> %s -> %d\n", | |
| 2963 p, name, rev); | |
| 2964 } | |
| 2965 } | |
| 2966 if (!name && choice>0) break; | |
| 2967 } | |
| 2968 if (sawProp) { | |
| 2969 /* looks like a valid property; check the values */ | |
| 2970 const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME
); | |
| 2971 int32_t max = 0; | |
| 2972 if (p == UCHAR_CANONICAL_COMBINING_CLASS) { | |
| 2973 max = 255; | |
| 2974 } else if (p == UCHAR_GENERAL_CATEGORY_MASK) { | |
| 2975 /* it's far too slow to iterate all the way up to | |
| 2976 the real max, U_GC_P_MASK */ | |
| 2977 max = U_GC_NL_MASK; | |
| 2978 } else if (p == UCHAR_BLOCK) { | |
| 2979 /* UBlockCodes, unlike other values, start at 1 */ | |
| 2980 max = 1; | |
| 2981 } | |
| 2982 log_verbose("\n"); | |
| 2983 for (v=-1; ; ++v) { | |
| 2984 UBool sawValue = FALSE; | |
| 2985 for (choice=0; ; ++choice) { | |
| 2986 const char* vname = u_getPropertyValueName(propEnum, v, (UPr
opertyNameChoice)choice); | |
| 2987 if (vname) { | |
| 2988 if (!sawValue) log_verbose(" %s, value %d:", pname, v); | |
| 2989 log_verbose("%d=\"%s\"", choice, vname); | |
| 2990 sawValue = TRUE; | |
| 2991 | |
| 2992 /* test reverse mapping */ | |
| 2993 rev = u_getPropertyValueEnum(propEnum, vname); | |
| 2994 if (rev != v) { | |
| 2995 log_err("Value round-trip failure (%s): %d -> %s ->
%d\n", | |
| 2996 pname, v, vname, rev); | |
| 2997 } | |
| 2998 } | |
| 2999 if (!vname && choice>0) break; | |
| 3000 } | |
| 3001 if (sawValue) { | |
| 3002 log_verbose("\n"); | |
| 3003 } | |
| 3004 if (!sawValue && v>=max) break; | |
| 3005 } | |
| 3006 } | |
| 3007 if (!sawProp) { | |
| 3008 if (p>=UCHAR_STRING_LIMIT) { | |
| 3009 break; | |
| 3010 } else if (p>=UCHAR_DOUBLE_LIMIT) { | |
| 3011 p = UCHAR_STRING_START - 1; | |
| 3012 } else if (p>=UCHAR_MASK_LIMIT) { | |
| 3013 p = UCHAR_DOUBLE_START - 1; | |
| 3014 } else if (p>=UCHAR_INT_LIMIT) { | |
| 3015 p = UCHAR_MASK_START - 1; | |
| 3016 } else if (p>=UCHAR_BINARY_LIMIT) { | |
| 3017 p = UCHAR_INT_START - 1; | |
| 3018 } | |
| 3019 } | |
| 3020 } | |
| 3021 } | |
| 3022 | |
| 3023 /** | |
| 3024 * Test the property values API. See JB#2410. | |
| 3025 */ | |
| 3026 static void | |
| 3027 TestPropertyValues(void) { | |
| 3028 int32_t i, p, min, max; | |
| 3029 UErrorCode ec; | |
| 3030 | |
| 3031 /* Min should be 0 for everything. */ | |
| 3032 /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */ | |
| 3033 for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) { | |
| 3034 UProperty propEnum = (UProperty)p; | |
| 3035 min = u_getIntPropertyMinValue(propEnum); | |
| 3036 if (min != 0) { | |
| 3037 if (p == UCHAR_BLOCK) { | |
| 3038 /* This is okay...for now. See JB#2487. | |
| 3039 TODO Update this for JB#2487. */ | |
| 3040 } else { | |
| 3041 const char* name; | |
| 3042 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME); | |
| 3043 if (name == NULL) | |
| 3044 name = "<ERROR>"; | |
| 3045 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n", | |
| 3046 name, min); | |
| 3047 } | |
| 3048 } | |
| 3049 } | |
| 3050 | |
| 3051 if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 || | |
| 3052 u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) { | |
| 3053 log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK
) is wrong\n"); | |
| 3054 } | |
| 3055 | |
| 3056 /* Max should be -1 for invalid properties. */ | |
| 3057 max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE); | |
| 3058 if (max != -1) { | |
| 3059 log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n", | |
| 3060 max); | |
| 3061 } | |
| 3062 | |
| 3063 /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */ | |
| 3064 for (i=0; i<2; ++i) { | |
| 3065 int32_t script; | |
| 3066 const char* desc; | |
| 3067 ec = U_ZERO_ERROR; | |
| 3068 switch (i) { | |
| 3069 case 0: | |
| 3070 script = uscript_getScript(-1, &ec); | |
| 3071 desc = "uscript_getScript(-1)"; | |
| 3072 break; | |
| 3073 case 1: | |
| 3074 script = u_getIntPropertyValue(-1, UCHAR_SCRIPT); | |
| 3075 desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)"; | |
| 3076 break; | |
| 3077 default: | |
| 3078 log_err("Internal test error. Too many scripts\n"); | |
| 3079 return; | |
| 3080 } | |
| 3081 /* We don't explicitly test ec. It should be U_FAILURE but it | |
| 3082 isn't documented as such. */ | |
| 3083 if (script != (int32_t)USCRIPT_INVALID_CODE) { | |
| 3084 log_err("FAIL: %s = %d, exp. 0\n", | |
| 3085 desc, script); | |
| 3086 } | |
| 3087 } | |
| 3088 } | |
| 3089 | |
| 3090 /* various tests for consistency of UCD data and API behavior */ | |
| 3091 static void | |
| 3092 TestConsistency() { | |
| 3093 char buffer[300]; | |
| 3094 USet *set1, *set2, *set3, *set4; | |
| 3095 UErrorCode errorCode; | |
| 3096 | |
| 3097 UChar32 start, end; | |
| 3098 int32_t i, length; | |
| 3099 | |
| 3100 U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10); | |
| 3101 U_STRING_DECL(dashPattern, "[:Dash:]", 8); | |
| 3102 U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13); | |
| 3103 U_STRING_DECL(formatPattern, "[:Cf:]", 6); | |
| 3104 U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14); | |
| 3105 | |
| 3106 U_STRING_DECL(mathBlocksPattern, | |
| 3107 "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Sym
bols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathe
matical Operators:][:block=Mathematical Alphanumeric Symbols:]]", | |
| 3108 214); | |
| 3109 U_STRING_DECL(mathPattern, "[:Math:]", 8); | |
| 3110 U_STRING_DECL(unassignedPattern, "[:Cn:]", 6); | |
| 3111 U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14); | |
| 3112 U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20); | |
| 3113 | |
| 3114 U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10); | |
| 3115 U_STRING_INIT(dashPattern, "[:Dash:]", 8); | |
| 3116 U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13); | |
| 3117 U_STRING_INIT(formatPattern, "[:Cf:]", 6); | |
| 3118 U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14); | |
| 3119 | |
| 3120 U_STRING_INIT(mathBlocksPattern, | |
| 3121 "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Sym
bols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathe
matical Operators:][:block=Mathematical Alphanumeric Symbols:]]", | |
| 3122 214); | |
| 3123 U_STRING_INIT(mathPattern, "[:Math:]", 8); | |
| 3124 U_STRING_INIT(unassignedPattern, "[:Cn:]", 6); | |
| 3125 U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14); | |
| 3126 U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20); | |
| 3127 | |
| 3128 /* | |
| 3129 * It used to be that UCD.html and its precursors said | |
| 3130 * "Those dashes used to mark connections between pieces of words, | |
| 3131 * plus the Katakana middle dot." | |
| 3132 * | |
| 3133 * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash | |
| 3134 * but not from Hyphen. | |
| 3135 * UTC 94 (2003mar) decided to leave it that way and to change UCD.html. | |
| 3136 * Therefore, do not show errors when testing the Hyphen property. | |
| 3137 */ | |
| 3138 log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n" | |
| 3139 "known to the UTC and not considered errors.\n"); | |
| 3140 | |
| 3141 errorCode=U_ZERO_ERROR; | |
| 3142 set1=uset_openPattern(hyphenPattern, 10, &errorCode); | |
| 3143 set2=uset_openPattern(dashPattern, 8, &errorCode); | |
| 3144 if(U_SUCCESS(errorCode)) { | |
| 3145 /* remove the Katakana middle dot(s) from set1 */ | |
| 3146 uset_remove(set1, 0x30fb); | |
| 3147 uset_remove(set1, 0xff65); /* halfwidth variant */ | |
| 3148 showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE); | |
| 3149 } else { | |
| 3150 log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing
data?)\n", u_errorName(errorCode)); | |
| 3151 } | |
| 3152 | |
| 3153 /* check that Cf is neither Hyphen nor Dash nor Alphabetic */ | |
| 3154 set3=uset_openPattern(formatPattern, 6, &errorCode); | |
| 3155 set4=uset_openPattern(alphaPattern, 14, &errorCode); | |
| 3156 if(U_SUCCESS(errorCode)) { | |
| 3157 showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE); | |
| 3158 showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE); | |
| 3159 showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE); | |
| 3160 } else { | |
| 3161 log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missi
ng data?)\n", u_errorName(errorCode)); | |
| 3162 } | |
| 3163 | |
| 3164 uset_close(set1); | |
| 3165 uset_close(set2); | |
| 3166 uset_close(set3); | |
| 3167 uset_close(set4); | |
| 3168 | |
| 3169 /* | |
| 3170 * Check that each lowercase character has "small" in its name | |
| 3171 * and not "capital". | |
| 3172 * There are some such characters, some of which seem odd. | |
| 3173 * Use the verbose flag to see these notices. | |
| 3174 */ | |
| 3175 errorCode=U_ZERO_ERROR; | |
| 3176 set1=uset_openPattern(lowerPattern, 13, &errorCode); | |
| 3177 if(U_SUCCESS(errorCode)) { | |
| 3178 for(i=0;; ++i) { | |
| 3179 length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode); | |
| 3180 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { | |
| 3181 break; /* done */ | |
| 3182 } | |
| 3183 if(U_FAILURE(errorCode)) { | |
| 3184 log_err("error iterating over [:Lowercase:] at item %d: %s\n", | |
| 3185 i, u_errorName(errorCode)); | |
| 3186 break; | |
| 3187 } | |
| 3188 if(length!=0) { | |
| 3189 break; /* done with code points, got a string or -1 */ | |
| 3190 } | |
| 3191 | |
| 3192 while(start<=end) { | |
| 3193 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buf
fer), &errorCode); | |
| 3194 if(U_FAILURE(errorCode)) { | |
| 3195 log_data_err("error getting the name of U+%04x - %s\n", star
t, u_errorName(errorCode)); | |
| 3196 errorCode=U_ZERO_ERROR; | |
| 3197 } | |
| 3198 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!
=NULL) && | |
| 3199 strstr(buffer, "SMALL CAPITAL")==NULL | |
| 3200 ) { | |
| 3201 log_verbose("info: [:Lowercase:] contains U+%04x whose name
does not suggest lowercase: %s\n", start, buffer); | |
| 3202 } | |
| 3203 ++start; | |
| 3204 } | |
| 3205 } | |
| 3206 } else { | |
| 3207 log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n
", u_errorName(errorCode)); | |
| 3208 } | |
| 3209 uset_close(set1); | |
| 3210 | |
| 3211 /* verify that all assigned characters in Math blocks are exactly Math chara
cters */ | |
| 3212 errorCode=U_ZERO_ERROR; | |
| 3213 set1=uset_openPattern(mathBlocksPattern, -1, &errorCode); | |
| 3214 set2=uset_openPattern(mathPattern, 8, &errorCode); | |
| 3215 set3=uset_openPattern(unassignedPattern, 6, &errorCode); | |
| 3216 if(U_SUCCESS(errorCode)) { | |
| 3217 uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */ | |
| 3218 uset_complement(set3); /* assigned characters */ | |
| 3219 uset_retainAll(set1, set3); /* [math blocks]&[assigned] */ | |
| 3220 compareUSets(set1, set2, | |
| 3221 "[assigned Math block chars]", "[math blocks]&[:Math:]", | |
| 3222 TRUE); | |
| 3223 } else { | |
| 3224 log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Ar
e you missing data?)\n", u_errorName(errorCode)); | |
| 3225 } | |
| 3226 uset_close(set1); | |
| 3227 uset_close(set2); | |
| 3228 uset_close(set3); | |
| 3229 | |
| 3230 /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have
script=Unknown */ | |
| 3231 errorCode=U_ZERO_ERROR; | |
| 3232 set1=uset_openPattern(unknownPattern, 14, &errorCode); | |
| 3233 set2=uset_openPattern(reservedPattern, 20, &errorCode); | |
| 3234 if(U_SUCCESS(errorCode)) { | |
| 3235 compareUSets(set1, set2, | |
| 3236 "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]", | |
| 3237 TRUE); | |
| 3238 } else { | |
| 3239 log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s
(Are you missing data?)\n", u_errorName(errorCode)); | |
| 3240 } | |
| 3241 uset_close(set1); | |
| 3242 uset_close(set2); | |
| 3243 } | |
| 3244 | |
| 3245 /* | |
| 3246 * Starting with ICU4C 3.4, the core Unicode properties files | |
| 3247 * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu) | |
| 3248 * are hardcoded in the common DLL and therefore not included | |
| 3249 * in the data package any more. | |
| 3250 * Test requiring these files are disabled so that | |
| 3251 * we need not jump through hoops (like adding snapshots of these files | |
| 3252 * to testdata). | |
| 3253 * See Jitterbug 4497. | |
| 3254 */ | |
| 3255 #define HARDCODED_DATA_4497 1 | |
| 3256 | |
| 3257 /* API coverage for ucase.c */ | |
| 3258 static void TestUCase() { | |
| 3259 #if !HARDCODED_DATA_4497 | |
| 3260 UDataMemory *pData; | |
| 3261 UCaseProps *csp; | |
| 3262 const UCaseProps *ccsp; | |
| 3263 UErrorCode errorCode; | |
| 3264 | |
| 3265 /* coverage for ucase_openBinary() */ | |
| 3266 errorCode=U_ZERO_ERROR; | |
| 3267 pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode); | |
| 3268 if(U_FAILURE(errorCode)) { | |
| 3269 log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s
\n", | |
| 3270 u_errorName(errorCode)); | |
| 3271 return; | |
| 3272 } | |
| 3273 | |
| 3274 csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode); | |
| 3275 if(U_FAILURE(errorCode)) { | |
| 3276 log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME
"." UCASE_DATA_TYPE ": %s\n", | |
| 3277 u_errorName(errorCode)); | |
| 3278 udata_close(pData); | |
| 3279 return; | |
| 3280 } | |
| 3281 | |
| 3282 if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */ | |
| 3283 log_err("ucase_openBinary() does not seem to return working UCaseProps\n
"); | |
| 3284 } | |
| 3285 | |
| 3286 ucase_close(csp); | |
| 3287 udata_close(pData); | |
| 3288 | |
| 3289 /* coverage for ucase_getDummy() */ | |
| 3290 errorCode=U_ZERO_ERROR; | |
| 3291 ccsp=ucase_getDummy(&errorCode); | |
| 3292 if(ucase_tolower(ccsp, 0x41)!=0x41) { | |
| 3293 log_err("ucase_tolower(dummy, A)!=A\n"); | |
| 3294 } | |
| 3295 #endif | |
| 3296 } | |
| 3297 | |
| 3298 /* API coverage for ubidi_props.c */ | |
| 3299 static void TestUBiDiProps() { | |
| 3300 #if !HARDCODED_DATA_4497 | |
| 3301 UDataMemory *pData; | |
| 3302 UBiDiProps *bdp; | |
| 3303 const UBiDiProps *cbdp; | |
| 3304 UErrorCode errorCode; | |
| 3305 | |
| 3306 /* coverage for ubidi_openBinary() */ | |
| 3307 errorCode=U_ZERO_ERROR; | |
| 3308 pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode); | |
| 3309 if(U_FAILURE(errorCode)) { | |
| 3310 log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s
\n", | |
| 3311 u_errorName(errorCode)); | |
| 3312 return; | |
| 3313 } | |
| 3314 | |
| 3315 bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode); | |
| 3316 if(U_FAILURE(errorCode)) { | |
| 3317 log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME
"." UBIDI_DATA_TYPE ": %s\n", | |
| 3318 u_errorName(errorCode)); | |
| 3319 udata_close(pData); | |
| 3320 return; | |
| 3321 } | |
| 3322 | |
| 3323 if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */ | |
| 3324 log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n
"); | |
| 3325 } | |
| 3326 | |
| 3327 ubidi_closeProps(bdp); | |
| 3328 udata_close(pData); | |
| 3329 | |
| 3330 /* coverage for ubidi_getDummy() */ | |
| 3331 errorCode=U_ZERO_ERROR; | |
| 3332 cbdp=ubidi_getDummy(&errorCode); | |
| 3333 if(ubidi_getClass(cbdp, 0x20)!=0) { | |
| 3334 log_err("ubidi_getClass(dummy, space)!=0\n"); | |
| 3335 } | |
| 3336 #endif | |
| 3337 } | |
| 3338 | |
| 3339 /* test case folding, compare return values with CaseFolding.txt ------------ */ | |
| 3340 | |
| 3341 /* bit set for which case foldings for a character have been tested already */ | |
| 3342 enum { | |
| 3343 CF_SIMPLE=1, | |
| 3344 CF_FULL=2, | |
| 3345 CF_TURKIC=4, | |
| 3346 CF_ALL=7 | |
| 3347 }; | |
| 3348 | |
| 3349 static void | |
| 3350 testFold(UChar32 c, int which, | |
| 3351 UChar32 simple, UChar32 turkic, | |
| 3352 const UChar *full, int32_t fullLength, | |
| 3353 const UChar *turkicFull, int32_t turkicFullLength) { | |
| 3354 UChar s[2], t[32]; | |
| 3355 UChar32 c2; | |
| 3356 int32_t length, length2; | |
| 3357 | |
| 3358 UErrorCode errorCode=U_ZERO_ERROR; | |
| 3359 | |
| 3360 length=0; | |
| 3361 U16_APPEND_UNSAFE(s, length, c); | |
| 3362 | |
| 3363 if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) { | |
| 3364 log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (l
ong)c2, (long)simple); | |
| 3365 } | |
| 3366 if((which&CF_FULL)!=0) { | |
| 3367 length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, 0, &errorCode); | |
| 3368 if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) { | |
| 3369 log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n",
(long)c); | |
| 3370 } | |
| 3371 } | |
| 3372 if((which&CF_TURKIC)!=0) { | |
| 3373 if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) { | |
| 3374 log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c,
(long)c2, (long)simple); | |
| 3375 } | |
| 3376 | |
| 3377 length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUD
E_SPECIAL_I, &errorCode); | |
| 3378 if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) { | |
| 3379 log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (
long)c); | |
| 3380 } | |
| 3381 } | |
| 3382 } | |
| 3383 | |
| 3384 /* test that c case-folds to itself */ | |
| 3385 static void | |
| 3386 testFoldToSelf(UChar32 c, int which) { | |
| 3387 UChar s[2]; | |
| 3388 int32_t length; | |
| 3389 | |
| 3390 length=0; | |
| 3391 U16_APPEND_UNSAFE(s, length, c); | |
| 3392 testFold(c, which, c, c, s, length, s, length); | |
| 3393 } | |
| 3394 | |
| 3395 struct CaseFoldingData { | |
| 3396 USet *notSeen; | |
| 3397 UChar32 prev, prevSimple; | |
| 3398 UChar prevFull[32]; | |
| 3399 int32_t prevFullLength; | |
| 3400 int which; | |
| 3401 }; | |
| 3402 typedef struct CaseFoldingData CaseFoldingData; | |
| 3403 | |
| 3404 static void U_CALLCONV | |
| 3405 caseFoldingLineFn(void *context, | |
| 3406 char *fields[][2], int32_t fieldCount, | |
| 3407 UErrorCode *pErrorCode) { | |
| 3408 CaseFoldingData *pData=(CaseFoldingData *)context; | |
| 3409 char *end; | |
| 3410 UChar full[32]; | |
| 3411 UChar32 c, prev, simple; | |
| 3412 int32_t count; | |
| 3413 int which; | |
| 3414 char status; | |
| 3415 | |
| 3416 /* get code point */ | |
| 3417 const char *s=u_skipWhitespace(fields[0][0]); | |
| 3418 if(0==strncmp(s, "0000..10FFFF", 12)) { | |
| 3419 /* | |
| 3420 * Ignore the line | |
| 3421 * # @missing: 0000..10FFFF; C; <code point> | |
| 3422 * because maps-to-self is already our default, and this line breaks thi
s parser. | |
| 3423 */ | |
| 3424 return; | |
| 3425 } | |
| 3426 c=(UChar32)strtoul(s, &end, 16); | |
| 3427 end=(char *)u_skipWhitespace(end); | |
| 3428 if(end<=fields[0][0] || end!=fields[0][1]) { | |
| 3429 log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0])
; | |
| 3430 *pErrorCode=U_PARSE_ERROR; | |
| 3431 return; | |
| 3432 } | |
| 3433 | |
| 3434 /* get the status of this mapping */ | |
| 3435 status=*u_skipWhitespace(fields[1][0]); | |
| 3436 if(status!='C' && status!='S' && status!='F' && status!='T') { | |
| 3437 log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0
][0]); | |
| 3438 *pErrorCode=U_PARSE_ERROR; | |
| 3439 return; | |
| 3440 } | |
| 3441 | |
| 3442 /* get the mapping */ | |
| 3443 count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode)
; | |
| 3444 if(U_FAILURE(*pErrorCode)) { | |
| 3445 log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]); | |
| 3446 return; | |
| 3447 } | |
| 3448 | |
| 3449 /* there is a simple mapping only if there is exactly one code point (count
is in UChars) */ | |
| 3450 if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) { | |
| 3451 simple=c; | |
| 3452 } | |
| 3453 | |
| 3454 if(c!=(prev=pData->prev)) { | |
| 3455 /* | |
| 3456 * Test remaining mappings for the previous code point. | |
| 3457 * If a turkic folding was not mentioned, then it should fold the same | |
| 3458 * as the regular simple case folding. | |
| 3459 */ | |
| 3460 UChar prevString[2]; | |
| 3461 int32_t length; | |
| 3462 | |
| 3463 length=0; | |
| 3464 U16_APPEND_UNSAFE(prevString, length, prev); | |
| 3465 testFold(prev, (~pData->which)&CF_ALL, | |
| 3466 prev, pData->prevSimple, | |
| 3467 prevString, length, | |
| 3468 pData->prevFull, pData->prevFullLength); | |
| 3469 pData->prev=pData->prevSimple=c; | |
| 3470 length=0; | |
| 3471 U16_APPEND_UNSAFE(pData->prevFull, length, c); | |
| 3472 pData->prevFullLength=length; | |
| 3473 pData->which=0; | |
| 3474 } | |
| 3475 | |
| 3476 /* | |
| 3477 * Turn the status into a bit set of case foldings to test. | |
| 3478 * Remember non-Turkic case foldings as defaults for Turkic mode. | |
| 3479 */ | |
| 3480 switch(status) { | |
| 3481 case 'C': | |
| 3482 which=CF_SIMPLE|CF_FULL; | |
| 3483 pData->prevSimple=simple; | |
| 3484 u_memcpy(pData->prevFull, full, count); | |
| 3485 pData->prevFullLength=count; | |
| 3486 break; | |
| 3487 case 'S': | |
| 3488 which=CF_SIMPLE; | |
| 3489 pData->prevSimple=simple; | |
| 3490 break; | |
| 3491 case 'F': | |
| 3492 which=CF_FULL; | |
| 3493 u_memcpy(pData->prevFull, full, count); | |
| 3494 pData->prevFullLength=count; | |
| 3495 break; | |
| 3496 case 'T': | |
| 3497 which=CF_TURKIC; | |
| 3498 break; | |
| 3499 default: | |
| 3500 which=0; | |
| 3501 break; /* won't happen because of test above */ | |
| 3502 } | |
| 3503 | |
| 3504 testFold(c, which, simple, simple, full, count, full, count); | |
| 3505 | |
| 3506 /* remember which case foldings of c have been tested */ | |
| 3507 pData->which|=which; | |
| 3508 | |
| 3509 /* remove c from the set of ones not mentioned in CaseFolding.txt */ | |
| 3510 uset_remove(pData->notSeen, c); | |
| 3511 } | |
| 3512 | |
| 3513 static void | |
| 3514 TestCaseFolding() { | |
| 3515 CaseFoldingData data={ NULL }; | |
| 3516 char *fields[3][2]; | |
| 3517 UErrorCode errorCode; | |
| 3518 | |
| 3519 static char *lastLine= (char *)"10FFFF; C; 10FFFF;"; | |
| 3520 | |
| 3521 errorCode=U_ZERO_ERROR; | |
| 3522 /* test BMP & plane 1 - nothing interesting above */ | |
| 3523 data.notSeen=uset_open(0, 0x1ffff); | |
| 3524 data.prevFullLength=1; /* length of full case folding of U+0000 */ | |
| 3525 | |
| 3526 parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorC
ode); | |
| 3527 if(U_SUCCESS(errorCode)) { | |
| 3528 int32_t i, start, end; | |
| 3529 | |
| 3530 /* add a pseudo-last line to finish testing of the actual last one */ | |
| 3531 fields[0][0]=lastLine; | |
| 3532 fields[0][1]=lastLine+6; | |
| 3533 fields[1][0]=lastLine+7; | |
| 3534 fields[1][1]=lastLine+9; | |
| 3535 fields[2][0]=lastLine+10; | |
| 3536 fields[2][1]=lastLine+17; | |
| 3537 caseFoldingLineFn(&data, fields, 3, &errorCode); | |
| 3538 | |
| 3539 /* verify that all code points that are not mentioned in CaseFolding.txt
fold to themselves */ | |
| 3540 for(i=0; | |
| 3541 0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode)
&& | |
| 3542 U_SUCCESS(errorCode); | |
| 3543 ++i | |
| 3544 ) { | |
| 3545 do { | |
| 3546 testFoldToSelf(start, CF_ALL); | |
| 3547 } while(++start<=end); | |
| 3548 } | |
| 3549 } | |
| 3550 | |
| 3551 uset_close(data.notSeen); | |
| 3552 } | |
| OLD | NEW |