| OLD | NEW |
| (Empty) |
| 1 /******************************************************************** | |
| 2 * Copyright (c) 1997-2016, International Business Machines Corporation and | |
| 3 * others. All Rights Reserved. | |
| 4 ********************************************************************/ | |
| 5 | |
| 6 #include "unicode/ustring.h" | |
| 7 #include "unicode/uchar.h" | |
| 8 #include "unicode/uniset.h" | |
| 9 #include "unicode/putil.h" | |
| 10 #include "unicode/uscript.h" | |
| 11 #include "cstring.h" | |
| 12 #include "hash.h" | |
| 13 #include "patternprops.h" | |
| 14 #include "normalizer2impl.h" | |
| 15 #include "uparse.h" | |
| 16 #include "ucdtest.h" | |
| 17 | |
| 18 static const char *ignorePropNames[]={ | |
| 19 "FC_NFKC", | |
| 20 "NFD_QC", | |
| 21 "NFC_QC", | |
| 22 "NFKD_QC", | |
| 23 "NFKC_QC", | |
| 24 "Expands_On_NFD", | |
| 25 "Expands_On_NFC", | |
| 26 "Expands_On_NFKD", | |
| 27 "Expands_On_NFKC", | |
| 28 "NFKC_CF" | |
| 29 }; | |
| 30 | |
| 31 UnicodeTest::UnicodeTest() | |
| 32 { | |
| 33 UErrorCode errorCode=U_ZERO_ERROR; | |
| 34 unknownPropertyNames=new U_NAMESPACE_QUALIFIER Hashtable(errorCode); | |
| 35 if(U_FAILURE(errorCode)) { | |
| 36 delete unknownPropertyNames; | |
| 37 unknownPropertyNames=NULL; | |
| 38 } | |
| 39 // Ignore some property names altogether. | |
| 40 for(int32_t i=0; i<UPRV_LENGTHOF(ignorePropNames); ++i) { | |
| 41 unknownPropertyNames->puti(UnicodeString(ignorePropNames[i], -1, US_INV)
, 1, errorCode); | |
| 42 } | |
| 43 } | |
| 44 | |
| 45 UnicodeTest::~UnicodeTest() | |
| 46 { | |
| 47 delete unknownPropertyNames; | |
| 48 } | |
| 49 | |
| 50 void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name,
char* /*par*/ ) | |
| 51 { | |
| 52 if(exec) { | |
| 53 logln("TestSuite UnicodeTest: "); | |
| 54 } | |
| 55 TESTCASE_AUTO_BEGIN; | |
| 56 TESTCASE_AUTO(TestAdditionalProperties); | |
| 57 TESTCASE_AUTO(TestBinaryValues); | |
| 58 TESTCASE_AUTO(TestConsistency); | |
| 59 TESTCASE_AUTO(TestPatternProperties); | |
| 60 TESTCASE_AUTO(TestScriptMetadata); | |
| 61 TESTCASE_AUTO(TestBidiPairedBracketType); | |
| 62 TESTCASE_AUTO(TestEmojiProperties); | |
| 63 TESTCASE_AUTO_END; | |
| 64 } | |
| 65 | |
| 66 //==================================================== | |
| 67 // private data used by the tests | |
| 68 //==================================================== | |
| 69 | |
| 70 // test DerivedCoreProperties.txt ------------------------------------------- | |
| 71 | |
| 72 // copied from genprops.c | |
| 73 static int32_t | |
| 74 getTokenIndex(const char *const tokens[], int32_t countTokens, const char *s) { | |
| 75 const char *t, *z; | |
| 76 int32_t i, j; | |
| 77 | |
| 78 s=u_skipWhitespace(s); | |
| 79 for(i=0; i<countTokens; ++i) { | |
| 80 t=tokens[i]; | |
| 81 if(t!=NULL) { | |
| 82 for(j=0;; ++j) { | |
| 83 if(t[j]!=0) { | |
| 84 if(s[j]!=t[j]) { | |
| 85 break; | |
| 86 } | |
| 87 } else { | |
| 88 z=u_skipWhitespace(s+j); | |
| 89 if(*z==';' || *z==0) { | |
| 90 return i; | |
| 91 } else { | |
| 92 break; | |
| 93 } | |
| 94 } | |
| 95 } | |
| 96 } | |
| 97 } | |
| 98 return -1; | |
| 99 } | |
| 100 | |
| 101 static const char *const | |
| 102 derivedPropsNames[]={ | |
| 103 "Math", | |
| 104 "Alphabetic", | |
| 105 "Lowercase", | |
| 106 "Uppercase", | |
| 107 "ID_Start", | |
| 108 "ID_Continue", | |
| 109 "XID_Start", | |
| 110 "XID_Continue", | |
| 111 "Default_Ignorable_Code_Point", | |
| 112 "Full_Composition_Exclusion", | |
| 113 "Grapheme_Extend", | |
| 114 "Grapheme_Link", /* Unicode 5 moves this property here from PropList.txt */ | |
| 115 "Grapheme_Base", | |
| 116 "Cased", | |
| 117 "Case_Ignorable", | |
| 118 "Changes_When_Lowercased", | |
| 119 "Changes_When_Uppercased", | |
| 120 "Changes_When_Titlecased", | |
| 121 "Changes_When_Casefolded", | |
| 122 "Changes_When_Casemapped", | |
| 123 "Changes_When_NFKC_Casefolded" | |
| 124 }; | |
| 125 | |
| 126 static const UProperty | |
| 127 derivedPropsIndex[]={ | |
| 128 UCHAR_MATH, | |
| 129 UCHAR_ALPHABETIC, | |
| 130 UCHAR_LOWERCASE, | |
| 131 UCHAR_UPPERCASE, | |
| 132 UCHAR_ID_START, | |
| 133 UCHAR_ID_CONTINUE, | |
| 134 UCHAR_XID_START, | |
| 135 UCHAR_XID_CONTINUE, | |
| 136 UCHAR_DEFAULT_IGNORABLE_CODE_POINT, | |
| 137 UCHAR_FULL_COMPOSITION_EXCLUSION, | |
| 138 UCHAR_GRAPHEME_EXTEND, | |
| 139 UCHAR_GRAPHEME_LINK, | |
| 140 UCHAR_GRAPHEME_BASE, | |
| 141 UCHAR_CASED, | |
| 142 UCHAR_CASE_IGNORABLE, | |
| 143 UCHAR_CHANGES_WHEN_LOWERCASED, | |
| 144 UCHAR_CHANGES_WHEN_UPPERCASED, | |
| 145 UCHAR_CHANGES_WHEN_TITLECASED, | |
| 146 UCHAR_CHANGES_WHEN_CASEFOLDED, | |
| 147 UCHAR_CHANGES_WHEN_CASEMAPPED, | |
| 148 UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED | |
| 149 }; | |
| 150 | |
| 151 static int32_t numErrors[UPRV_LENGTHOF(derivedPropsIndex)]={ 0 }; | |
| 152 | |
| 153 enum { MAX_ERRORS=50 }; | |
| 154 | |
| 155 U_CFUNC void U_CALLCONV | |
| 156 derivedPropsLineFn(void *context, | |
| 157 char *fields[][2], int32_t /* fieldCount */, | |
| 158 UErrorCode *pErrorCode) | |
| 159 { | |
| 160 UnicodeTest *me=(UnicodeTest *)context; | |
| 161 uint32_t start, end; | |
| 162 int32_t i; | |
| 163 | |
| 164 u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); | |
| 165 if(U_FAILURE(*pErrorCode)) { | |
| 166 me->errln("UnicodeTest: syntax error in DerivedCoreProperties.txt or Der
ivedNormalizationProps.txt field 0 at %s\n", fields[0][0]); | |
| 167 return; | |
| 168 } | |
| 169 | |
| 170 /* parse derived binary property name, ignore unknown names */ | |
| 171 i=getTokenIndex(derivedPropsNames, UPRV_LENGTHOF(derivedPropsNames), fields[
1][0]); | |
| 172 if(i<0) { | |
| 173 UnicodeString propName(fields[1][0], (int32_t)(fields[1][1]-fields[1][0]
)); | |
| 174 propName.trim(); | |
| 175 if(me->unknownPropertyNames->find(propName)==NULL) { | |
| 176 UErrorCode errorCode=U_ZERO_ERROR; | |
| 177 me->unknownPropertyNames->puti(propName, 1, errorCode); | |
| 178 me->errln("UnicodeTest warning: unknown property name '%s' in Derive
dCoreProperties.txt or DerivedNormalizationProps.txt\n", fields[1][0]); | |
| 179 } | |
| 180 return; | |
| 181 } | |
| 182 | |
| 183 me->derivedProps[i].add(start, end); | |
| 184 } | |
| 185 | |
| 186 void UnicodeTest::TestAdditionalProperties() { | |
| 187 #if !UCONFIG_NO_NORMALIZATION | |
| 188 // test DerivedCoreProperties.txt and DerivedNormalizationProps.txt | |
| 189 if(UPRV_LENGTHOF(derivedProps)<UPRV_LENGTHOF(derivedPropsNames)) { | |
| 190 errln("error: UnicodeTest::derivedProps[] too short, need at least %d Un
icodeSets\n", | |
| 191 UPRV_LENGTHOF(derivedPropsNames)); | |
| 192 return; | |
| 193 } | |
| 194 if(UPRV_LENGTHOF(derivedPropsIndex)!=UPRV_LENGTHOF(derivedPropsNames)) { | |
| 195 errln("error in ucdtest.cpp: UPRV_LENGTHOF(derivedPropsIndex)!=UPRV_LENG
THOF(derivedPropsNames)\n"); | |
| 196 return; | |
| 197 } | |
| 198 | |
| 199 char path[500]; | |
| 200 if(getUnidataPath(path) == NULL) { | |
| 201 errln("unable to find path to source/data/unidata/"); | |
| 202 return; | |
| 203 } | |
| 204 char *basename=strchr(path, 0); | |
| 205 strcpy(basename, "DerivedCoreProperties.txt"); | |
| 206 | |
| 207 char *fields[2][2]; | |
| 208 UErrorCode errorCode=U_ZERO_ERROR; | |
| 209 u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorC
ode); | |
| 210 if(U_FAILURE(errorCode)) { | |
| 211 errln("error parsing DerivedCoreProperties.txt: %s\n", u_errorName(error
Code)); | |
| 212 return; | |
| 213 } | |
| 214 | |
| 215 strcpy(basename, "DerivedNormalizationProps.txt"); | |
| 216 u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorC
ode); | |
| 217 if(U_FAILURE(errorCode)) { | |
| 218 errln("error parsing DerivedNormalizationProps.txt: %s\n", u_errorName(e
rrorCode)); | |
| 219 return; | |
| 220 } | |
| 221 | |
| 222 // now we have all derived core properties in the UnicodeSets | |
| 223 // run them all through the API | |
| 224 int32_t rangeCount, range; | |
| 225 uint32_t i; | |
| 226 UChar32 start, end; | |
| 227 | |
| 228 // test all TRUE properties | |
| 229 for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) { | |
| 230 rangeCount=derivedProps[i].getRangeCount(); | |
| 231 for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) { | |
| 232 start=derivedProps[i].getRangeStart(range); | |
| 233 end=derivedProps[i].getRangeEnd(range); | |
| 234 for(; start<=end; ++start) { | |
| 235 if(!u_hasBinaryProperty(start, derivedPropsIndex[i])) { | |
| 236 dataerrln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %
s)==FALSE is wrong", start, derivedPropsNames[i]); | |
| 237 if(++numErrors[i]>=MAX_ERRORS) { | |
| 238 dataerrln("Too many errors, moving to the next test"); | |
| 239 break; | |
| 240 } | |
| 241 } | |
| 242 } | |
| 243 } | |
| 244 } | |
| 245 | |
| 246 // invert all properties | |
| 247 for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) { | |
| 248 derivedProps[i].complement(); | |
| 249 } | |
| 250 | |
| 251 // test all FALSE properties | |
| 252 for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) { | |
| 253 rangeCount=derivedProps[i].getRangeCount(); | |
| 254 for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) { | |
| 255 start=derivedProps[i].getRangeStart(range); | |
| 256 end=derivedProps[i].getRangeEnd(range); | |
| 257 for(; start<=end; ++start) { | |
| 258 if(u_hasBinaryProperty(start, derivedPropsIndex[i])) { | |
| 259 errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==
TRUE is wrong\n", start, derivedPropsNames[i]); | |
| 260 if(++numErrors[i]>=MAX_ERRORS) { | |
| 261 errln("Too many errors, moving to the next test"); | |
| 262 break; | |
| 263 } | |
| 264 } | |
| 265 } | |
| 266 } | |
| 267 } | |
| 268 #endif /* !UCONFIG_NO_NORMALIZATION */ | |
| 269 } | |
| 270 | |
| 271 void UnicodeTest::TestBinaryValues() { | |
| 272 /* | |
| 273 * Unicode 5.1 explicitly defines binary property value aliases. | |
| 274 * Verify that they are all recognized. | |
| 275 */ | |
| 276 UErrorCode errorCode=U_ZERO_ERROR; | |
| 277 UnicodeSet alpha(UNICODE_STRING_SIMPLE("[:Alphabetic:]"), errorCode); | |
| 278 if(U_FAILURE(errorCode)) { | |
| 279 dataerrln("UnicodeSet([:Alphabetic:]) failed - %s", u_errorName(errorCod
e)); | |
| 280 return; | |
| 281 } | |
| 282 | |
| 283 static const char *const falseValues[]={ "N", "No", "F", "False" }; | |
| 284 static const char *const trueValues[]={ "Y", "Yes", "T", "True" }; | |
| 285 int32_t i; | |
| 286 for(i=0; i<UPRV_LENGTHOF(falseValues); ++i) { | |
| 287 UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]"); | |
| 288 pattern.insert(pattern.length()-2, UnicodeString(falseValues[i], -1, US_
INV)); | |
| 289 errorCode=U_ZERO_ERROR; | |
| 290 UnicodeSet set(pattern, errorCode); | |
| 291 if(U_FAILURE(errorCode)) { | |
| 292 errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", falseValues[i],
u_errorName(errorCode)); | |
| 293 continue; | |
| 294 } | |
| 295 set.complement(); | |
| 296 if(set!=alpha) { | |
| 297 errln("UnicodeSet([:Alphabetic=%s:]).complement()!=UnicodeSet([:Alph
abetic:])\n", falseValues[i]); | |
| 298 } | |
| 299 } | |
| 300 for(i=0; i<UPRV_LENGTHOF(trueValues); ++i) { | |
| 301 UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]"); | |
| 302 pattern.insert(pattern.length()-2, UnicodeString(trueValues[i], -1, US_I
NV)); | |
| 303 errorCode=U_ZERO_ERROR; | |
| 304 UnicodeSet set(pattern, errorCode); | |
| 305 if(U_FAILURE(errorCode)) { | |
| 306 errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", trueValues[i],
u_errorName(errorCode)); | |
| 307 continue; | |
| 308 } | |
| 309 if(set!=alpha) { | |
| 310 errln("UnicodeSet([:Alphabetic=%s:])!=UnicodeSet([:Alphabetic:])\n",
trueValues[i]); | |
| 311 } | |
| 312 } | |
| 313 } | |
| 314 | |
| 315 void UnicodeTest::TestConsistency() { | |
| 316 #if !UCONFIG_NO_NORMALIZATION | |
| 317 /* | |
| 318 * Test for an example that getCanonStartSet() delivers | |
| 319 * all characters that compose from the input one, | |
| 320 * even in multiple steps. | |
| 321 * For example, the set for "I" (0049) should contain both | |
| 322 * I-diaeresis (00CF) and I-diaeresis-acute (1E2E). | |
| 323 * In general, the set for the middle such character should be a subset | |
| 324 * of the set for the first. | |
| 325 */ | |
| 326 IcuTestErrorCode errorCode(*this, "TestConsistency"); | |
| 327 const Normalizer2 *nfd=Normalizer2::getNFDInstance(errorCode); | |
| 328 const Normalizer2Impl *nfcImpl=Normalizer2Factory::getNFCImpl(errorCode); | |
| 329 if(!nfcImpl->ensureCanonIterData(errorCode) || errorCode.isFailure()) { | |
| 330 dataerrln("Normalizer2::getInstance(NFD) or Normalizer2Factory::getNFCIm
pl() failed - %s\n", | |
| 331 errorCode.errorName()); | |
| 332 errorCode.reset(); | |
| 333 return; | |
| 334 } | |
| 335 | |
| 336 UnicodeSet set1, set2; | |
| 337 if (nfcImpl->getCanonStartSet(0x49, set1)) { | |
| 338 /* enumerate all characters that are plausible to be latin letters */ | |
| 339 for(UChar start=0xa0; start<0x2000; ++start) { | |
| 340 UnicodeString decomp=nfd->normalize(UnicodeString(start), errorCode)
; | |
| 341 if(decomp.length()>1 && decomp[0]==0x49) { | |
| 342 set2.add(start); | |
| 343 } | |
| 344 } | |
| 345 | |
| 346 if (set1!=set2) { | |
| 347 errln("[canon start set of 0049] != [all c with canon decomp with 00
49]"); | |
| 348 } | |
| 349 // This was available in cucdtst.c but the test had to move to intltest | |
| 350 // because the new internal normalization functions are in C++. | |
| 351 //compareUSets(set1, set2, | |
| 352 // "[canon start set of 0049]", "[all c with canon decomp wi
th 0049]", | |
| 353 // TRUE); | |
| 354 } else { | |
| 355 errln("NFC.getCanonStartSet() returned FALSE"); | |
| 356 } | |
| 357 #endif | |
| 358 } | |
| 359 | |
| 360 /** | |
| 361 * Test various implementations of Pattern_Syntax & Pattern_White_Space. | |
| 362 */ | |
| 363 void UnicodeTest::TestPatternProperties() { | |
| 364 IcuTestErrorCode errorCode(*this, "TestPatternProperties()"); | |
| 365 UnicodeSet syn_pp; | |
| 366 UnicodeSet syn_prop(UNICODE_STRING_SIMPLE("[:Pattern_Syntax:]"), errorCode); | |
| 367 UnicodeSet syn_list( | |
| 368 "[!-/\\:-@\\[-\\^`\\{-~" | |
| 369 "\\u00A1-\\u00A7\\u00A9\\u00AB\\u00AC\\u00AE\\u00B0\\u00B1\\u00B6\\u00BB
\\u00BF\\u00D7\\u00F7" | |
| 370 "\\u2010-\\u2027\\u2030-\\u203E\\u2041-\\u2053\\u2055-\\u205E\\u2190-\\u
245F\\u2500-\\u2775" | |
| 371 "\\u2794-\\u2BFF\\u2E00-\\u2E7F\\u3001-\\u3003\\u3008-\\u3020\\u3030\\uF
D3E\\uFD3F\\uFE45\\uFE46]", errorCode); | |
| 372 UnicodeSet ws_pp; | |
| 373 UnicodeSet ws_prop(UNICODE_STRING_SIMPLE("[:Pattern_White_Space:]"), errorCo
de); | |
| 374 UnicodeSet ws_list(UNICODE_STRING_SIMPLE("[\\u0009-\\u000D\\ \\u0085\\u200E\
\u200F\\u2028\\u2029]"), errorCode); | |
| 375 UnicodeSet syn_ws_pp; | |
| 376 UnicodeSet syn_ws_prop(syn_prop); | |
| 377 syn_ws_prop.addAll(ws_prop); | |
| 378 for(UChar32 c=0; c<=0xffff; ++c) { | |
| 379 if(PatternProps::isSyntax(c)) { | |
| 380 syn_pp.add(c); | |
| 381 } | |
| 382 if(PatternProps::isWhiteSpace(c)) { | |
| 383 ws_pp.add(c); | |
| 384 } | |
| 385 if(PatternProps::isSyntaxOrWhiteSpace(c)) { | |
| 386 syn_ws_pp.add(c); | |
| 387 } | |
| 388 } | |
| 389 compareUSets(syn_pp, syn_prop, | |
| 390 "PatternProps.isSyntax()", "[:Pattern_Syntax:]", TRUE); | |
| 391 compareUSets(syn_pp, syn_list, | |
| 392 "PatternProps.isSyntax()", "[Pattern_Syntax ranges]", TRUE); | |
| 393 compareUSets(ws_pp, ws_prop, | |
| 394 "PatternProps.isWhiteSpace()", "[:Pattern_White_Space:]", TRUE)
; | |
| 395 compareUSets(ws_pp, ws_list, | |
| 396 "PatternProps.isWhiteSpace()", "[Pattern_White_Space ranges]",
TRUE); | |
| 397 compareUSets(syn_ws_pp, syn_ws_prop, | |
| 398 "PatternProps.isSyntaxOrWhiteSpace()", | |
| 399 "[[:Pattern_Syntax:][:Pattern_White_Space:]]", TRUE); | |
| 400 } | |
| 401 | |
| 402 // So far only minimal port of Java & cucdtst.c compareUSets(). | |
| 403 UBool | |
| 404 UnicodeTest::compareUSets(const UnicodeSet &a, const UnicodeSet &b, | |
| 405 const char *a_name, const char *b_name, | |
| 406 UBool diffIsError) { | |
| 407 UBool same= a==b; | |
| 408 if(!same && diffIsError) { | |
| 409 errln("Sets are different: %s vs. %s\n", a_name, b_name); | |
| 410 } | |
| 411 return same; | |
| 412 } | |
| 413 | |
| 414 namespace { | |
| 415 | |
| 416 /** | |
| 417 * Maps a special script code to the most common script of its encoded character
s. | |
| 418 */ | |
| 419 UScriptCode getCharScript(UScriptCode script) { | |
| 420 switch(script) { | |
| 421 case USCRIPT_SIMPLIFIED_HAN: | |
| 422 case USCRIPT_TRADITIONAL_HAN: | |
| 423 return USCRIPT_HAN; | |
| 424 case USCRIPT_JAPANESE: | |
| 425 return USCRIPT_HIRAGANA; | |
| 426 case USCRIPT_KOREAN: | |
| 427 return USCRIPT_HANGUL; | |
| 428 default: | |
| 429 return script; | |
| 430 } | |
| 431 } | |
| 432 | |
| 433 } // namespace | |
| 434 | |
| 435 void UnicodeTest::TestScriptMetadata() { | |
| 436 IcuTestErrorCode errorCode(*this, "TestScriptMetadata()"); | |
| 437 UnicodeSet rtl("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]", errorCode); | |
| 438 // So far, sample characters are uppercase. | |
| 439 // Georgian is special. | |
| 440 UnicodeSet cased("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]", errorCode); | |
| 441 for(int32_t sci = 0; sci < USCRIPT_CODE_LIMIT; ++sci) { | |
| 442 UScriptCode sc = (UScriptCode)sci; | |
| 443 // Run the test with -v to see which script has failures: | |
| 444 // .../intltest$ make && ./intltest utility/UnicodeTest/TestScriptMetada
ta -v | grep -C 3 FAIL | |
| 445 logln(uscript_getShortName(sc)); | |
| 446 UScriptUsage usage = uscript_getUsage(sc); | |
| 447 UnicodeString sample = uscript_getSampleUnicodeString(sc); | |
| 448 UnicodeSet scriptSet; | |
| 449 scriptSet.applyIntPropertyValue(UCHAR_SCRIPT, sc, errorCode); | |
| 450 if(usage == USCRIPT_USAGE_NOT_ENCODED) { | |
| 451 assertTrue("not encoded, no sample", sample.isEmpty()); | |
| 452 assertFalse("not encoded, not RTL", uscript_isRightToLeft(sc)); | |
| 453 assertFalse("not encoded, not LB letters", uscript_breaksBetweenLett
ers(sc)); | |
| 454 assertFalse("not encoded, not cased", uscript_isCased(sc)); | |
| 455 assertTrue("not encoded, no characters", scriptSet.isEmpty()); | |
| 456 } else { | |
| 457 assertFalse("encoded, has a sample character", sample.isEmpty()); | |
| 458 UChar32 firstChar = sample.char32At(0); | |
| 459 UScriptCode charScript = getCharScript(sc); | |
| 460 assertEquals("script(sample(script))", | |
| 461 (int32_t)charScript, (int32_t)uscript_getScript(firstCh
ar, errorCode)); | |
| 462 assertEquals("RTL vs. set", (UBool)rtl.contains(firstChar), (UBool)u
script_isRightToLeft(sc)); | |
| 463 assertEquals("cased vs. set", (UBool)cased.contains(firstChar), (UBo
ol)uscript_isCased(sc)); | |
| 464 assertEquals("encoded, has characters", (UBool)(sc == charScript), (
UBool)(!scriptSet.isEmpty())); | |
| 465 if(uscript_isRightToLeft(sc)) { | |
| 466 rtl.removeAll(scriptSet); | |
| 467 } | |
| 468 if(uscript_isCased(sc)) { | |
| 469 cased.removeAll(scriptSet); | |
| 470 } | |
| 471 } | |
| 472 } | |
| 473 UnicodeString pattern; | |
| 474 assertEquals("no remaining RTL characters", | |
| 475 UnicodeString("[]"), rtl.toPattern(pattern)); | |
| 476 assertEquals("no remaining cased characters", | |
| 477 UnicodeString("[]"), cased.toPattern(pattern)); | |
| 478 | |
| 479 assertTrue("Hani breaks between letters", uscript_breaksBetweenLetters(USCRI
PT_HAN)); | |
| 480 assertTrue("Thai breaks between letters", uscript_breaksBetweenLetters(USCRI
PT_THAI)); | |
| 481 assertFalse("Latn does not break between letters", uscript_breaksBetweenLett
ers(USCRIPT_LATIN)); | |
| 482 } | |
| 483 | |
| 484 void UnicodeTest::TestBidiPairedBracketType() { | |
| 485 // BidiBrackets-6.3.0.txt says: | |
| 486 // | |
| 487 // The set of code points listed in this file was originally derived | |
| 488 // using the character properties General_Category (gc), Bidi_Class (bc), | |
| 489 // Bidi_Mirrored (Bidi_M), and Bidi_Mirroring_Glyph (bmg), as follows: | |
| 490 // two characters, A and B, form a pair if A has gc=Ps and B has gc=Pe, | |
| 491 // both have bc=ON and Bidi_M=Y, and bmg of A is B. Bidi_Paired_Bracket | |
| 492 // maps A to B and vice versa, and their Bidi_Paired_Bracket_Type | |
| 493 // property values are Open and Close, respectively. | |
| 494 IcuTestErrorCode errorCode(*this, "TestBidiPairedBracketType()"); | |
| 495 UnicodeSet bpt("[:^bpt=n:]", errorCode); | |
| 496 assertTrue("bpt!=None is not empty", !bpt.isEmpty()); | |
| 497 // The following should always be true. | |
| 498 UnicodeSet mirrored("[:Bidi_M:]", errorCode); | |
| 499 UnicodeSet other_neutral("[:bc=ON:]", errorCode); | |
| 500 assertTrue("bpt!=None is a subset of Bidi_M", mirrored.containsAll(bpt)); | |
| 501 assertTrue("bpt!=None is a subset of bc=ON", other_neutral.containsAll(bpt))
; | |
| 502 // The following are true at least initially in Unicode 6.3. | |
| 503 UnicodeSet bpt_open("[:bpt=o:]", errorCode); | |
| 504 UnicodeSet bpt_close("[:bpt=c:]", errorCode); | |
| 505 UnicodeSet ps("[:Ps:]", errorCode); | |
| 506 UnicodeSet pe("[:Pe:]", errorCode); | |
| 507 assertTrue("bpt=Open is a subset of Ps", ps.containsAll(bpt_open)); | |
| 508 assertTrue("bpt=Close is a subset of Pe", pe.containsAll(bpt_close)); | |
| 509 } | |
| 510 | |
| 511 void UnicodeTest::TestEmojiProperties() { | |
| 512 assertFalse("space is not Emoji", u_hasBinaryProperty(0x20, UCHAR_EMOJI)); | |
| 513 assertTrue("shooting star is Emoji", u_hasBinaryProperty(0x1F320, UCHAR_EMOJ
I)); | |
| 514 IcuTestErrorCode errorCode(*this, "TestEmojiProperties()"); | |
| 515 UnicodeSet emoji("[:Emoji:]", errorCode); | |
| 516 assertTrue("lots of Emoji", emoji.size() > 700); | |
| 517 | |
| 518 assertTrue("shooting star is Emoji_Presentation", | |
| 519 u_hasBinaryProperty(0x1F320, UCHAR_EMOJI_PRESENTATION)); | |
| 520 assertTrue("Fitzpatrick 6 is Emoji_Modifier", | |
| 521 u_hasBinaryProperty(0x1F3FF, UCHAR_EMOJI_MODIFIER)); | |
| 522 assertTrue("happy person is Emoji_Modifier_Base", | |
| 523 u_hasBinaryProperty(0x1F64B, UCHAR_EMOJI_MODIFIER_BASE)); | |
| 524 } | |
| OLD | NEW |