| OLD | NEW |
| 1 /* | 1 /* |
| 2 ******************************************************************************** | 2 ******************************************************************************** |
| 3 * Copyright (C) 1999-2013 International Business Machines Corporation and | 3 * Copyright (C) 1999-2014 International Business Machines Corporation and |
| 4 * others. All Rights Reserved. | 4 * others. All Rights Reserved. |
| 5 ******************************************************************************** | 5 ******************************************************************************** |
| 6 * Date Name Description | 6 * Date Name Description |
| 7 * 10/20/99 alan Creation. | 7 * 10/20/99 alan Creation. |
| 8 * 03/22/2000 Madhu Added additional tests | 8 * 03/22/2000 Madhu Added additional tests |
| 9 ******************************************************************************** | 9 ******************************************************************************** |
| 10 */ | 10 */ |
| 11 | 11 |
| 12 #include <stdio.h> | 12 #include <stdio.h> |
| 13 | 13 |
| 14 #include <string.h> | 14 #include <string.h> |
| 15 #include "unicode/utypes.h" | 15 #include "unicode/utypes.h" |
| 16 #include "usettest.h" | 16 #include "usettest.h" |
| 17 #include "unicode/ucnv.h" | 17 #include "unicode/ucnv.h" |
| 18 #include "unicode/uniset.h" | 18 #include "unicode/uniset.h" |
| 19 #include "unicode/uchar.h" | 19 #include "unicode/uchar.h" |
| 20 #include "unicode/usetiter.h" | 20 #include "unicode/usetiter.h" |
| 21 #include "unicode/ustring.h" | 21 #include "unicode/ustring.h" |
| 22 #include "unicode/parsepos.h" | 22 #include "unicode/parsepos.h" |
| 23 #include "unicode/symtable.h" | 23 #include "unicode/symtable.h" |
| 24 #include "unicode/uversion.h" | 24 #include "unicode/uversion.h" |
| 25 #include "hash.h" | 25 #include "hash.h" |
| 26 | 26 |
| 27 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
| 28 | |
| 29 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ | 27 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ |
| 30 dataerrln("fail in file \"%s\", line %d: \"%s\"", __FILE__, __LINE__, \ | 28 dataerrln("fail in file \"%s\", line %d: \"%s\"", __FILE__, __LINE__, \ |
| 31 u_errorName(status));}} | 29 u_errorName(status));}} |
| 32 | 30 |
| 33 #define TEST_ASSERT(expr) {if (!(expr)) { \ | 31 #define TEST_ASSERT(expr) {if (!(expr)) { \ |
| 34 dataerrln("fail in file \"%s\", line %d", __FILE__, __LINE__); }} | 32 dataerrln("fail in file \"%s\", line %d", __FILE__, __LINE__); }} |
| 35 | 33 |
| 36 UnicodeString operator+(const UnicodeString& left, const UnicodeSet& set) { | 34 UnicodeString operator+(const UnicodeString& left, const UnicodeSet& set) { |
| 37 UnicodeString pat; | 35 UnicodeString pat; |
| 38 set.toPattern(pat); | 36 set.toPattern(pat); |
| (...skipping 1008 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1047 "\\u0888\\uFDD3\\uFFFE\\U00050005", | 1045 "\\u0888\\uFDD3\\uFFFE\\U00050005", |
| 1048 | 1046 |
| 1049 // Script_Extensions, new in Unicode 6.0 | 1047 // Script_Extensions, new in Unicode 6.0 |
| 1050 "[:scx=Arab:]", | 1048 "[:scx=Arab:]", |
| 1051 "\\u061E\\u061F\\u0620\\u0621\\u063F\\u0640\\u0650\\u065E\\uFDF1\\uFDF2\
\uFDF3", | 1049 "\\u061E\\u061F\\u0620\\u0621\\u063F\\u0640\\u0650\\u065E\\uFDF1\\uFDF2\
\uFDF3", |
| 1052 "\\u061D\\uFDEF\\uFDFE", | 1050 "\\u061D\\uFDEF\\uFDFE", |
| 1053 | 1051 |
| 1054 // U+FDF2 has Script=Arabic and also Arab in its Script_Extensions, | 1052 // U+FDF2 has Script=Arabic and also Arab in its Script_Extensions, |
| 1055 // so scx-sc is missing U+FDF2. | 1053 // so scx-sc is missing U+FDF2. |
| 1056 "[[:Script_Extensions=Arabic:]-[:Arab:]]", | 1054 "[[:Script_Extensions=Arabic:]-[:Arab:]]", |
| 1057 "\\u0640\\u064B\\u0650\\u0655\\uFDFD", | 1055 "\\u0640\\u064B\\u0650\\u0655", |
| 1058 "\\uFDF2" | 1056 "\\uFDF2" |
| 1059 }; | 1057 }; |
| 1060 | 1058 |
| 1061 static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]); | 1059 static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]); |
| 1062 | 1060 |
| 1063 for (int32_t i=0; i<DATA_LEN; i+=3) { | 1061 for (int32_t i=0; i<DATA_LEN; i+=3) { |
| 1064 expectContainment(UnicodeString(DATA[i], -1, US_INV), CharsToUnicodeStri
ng(DATA[i+1]), | 1062 expectContainment(UnicodeString(DATA[i], -1, US_INV), CharsToUnicodeStri
ng(DATA[i+1]), |
| 1065 CharsToUnicodeString(DATA[i+2])); | 1063 CharsToUnicodeString(DATA[i+2])); |
| 1066 } | 1064 } |
| 1067 } | 1065 } |
| (...skipping 1247 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2315 set(normalSet), stringsLength(0), hasSurrogates(FALSE) { | 2313 set(normalSet), stringsLength(0), hasSurrogates(FALSE) { |
| 2316 int32_t size=set.size(); | 2314 int32_t size=set.size(); |
| 2317 if(size>0 && set.charAt(size-1)<0) { | 2315 if(size>0 && set.charAt(size-1)<0) { |
| 2318 // If a set's last element is not a code point, then it must contain
strings. | 2316 // If a set's last element is not a code point, then it must contain
strings. |
| 2319 // Iterate over the set, skip all code point ranges, and cache the s
trings. | 2317 // Iterate over the set, skip all code point ranges, and cache the s
trings. |
| 2320 // Convert them to UTF-8 for spanUTF8(). | 2318 // Convert them to UTF-8 for spanUTF8(). |
| 2321 UnicodeSetIterator iter(set); | 2319 UnicodeSetIterator iter(set); |
| 2322 const UnicodeString *s; | 2320 const UnicodeString *s; |
| 2323 char *s8=utf8; | 2321 char *s8=utf8; |
| 2324 int32_t length8, utf8Count=0; | 2322 int32_t length8, utf8Count=0; |
| 2325 while(iter.nextRange() && stringsLength<LENGTHOF(strings)) { | 2323 while(iter.nextRange() && stringsLength<UPRV_LENGTHOF(strings)) { |
| 2326 if(iter.isString()) { | 2324 if(iter.isString()) { |
| 2327 // Store the pointer to the set's string element | 2325 // Store the pointer to the set's string element |
| 2328 // which we happen to know is a stable pointer. | 2326 // which we happen to know is a stable pointer. |
| 2329 strings[stringsLength]=s=&iter.getString(); | 2327 strings[stringsLength]=s=&iter.getString(); |
| 2330 utf8Count+= | 2328 utf8Count+= |
| 2331 utf8Lengths[stringsLength]=length8= | 2329 utf8Lengths[stringsLength]=length8= |
| 2332 appendUTF8(s->getBuffer(), s->length(), | 2330 appendUTF8(s->getBuffer(), s->length(), |
| 2333 s8, (int32_t)(sizeof(utf8)-utf8Count)); | 2331 s8, (int32_t)(sizeof(utf8)-utf8Count)); |
| 2334 if(length8==0) { | 2332 if(length8==0) { |
| 2335 hasSurrogates=TRUE; // Contains unpaired surrogates. | 2333 hasSurrogates=TRUE; // Contains unpaired surrogates. |
| (...skipping 733 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3069 // Odd-numbered sets are complemented. | 3067 // Odd-numbered sets are complemented. |
| 3070 if((whichSpans&SPAN_COMPLEMENT)==0) { | 3068 if((whichSpans&SPAN_COMPLEMENT)==0) { |
| 3071 continue; | 3069 continue; |
| 3072 } | 3070 } |
| 3073 } | 3071 } |
| 3074 for(type=0;; ++type) { | 3072 for(type=0;; ++type) { |
| 3075 limitsCount=getSpans(*sets[i], (UBool)(i&1), | 3073 limitsCount=getSpans(*sets[i], (UBool)(i&1), |
| 3076 s, length, isUTF16, | 3074 s, length, isUTF16, |
| 3077 whichSpans, | 3075 whichSpans, |
| 3078 type, typeName, | 3076 type, typeName, |
| 3079 limits, LENGTHOF(limits), expectCount); | 3077 limits, UPRV_LENGTHOF(limits), expectCount); |
| 3080 if(typeName[0]==0) { | 3078 if(typeName[0]==0) { |
| 3081 break; // All types tried. | 3079 break; // All types tried. |
| 3082 } | 3080 } |
| 3083 if(limitsCount<0) { | 3081 if(limitsCount<0) { |
| 3084 continue; // Span option filtered out. | 3082 continue; // Span option filtered out. |
| 3085 } | 3083 } |
| 3086 if(expectCount<0) { | 3084 if(expectCount<0) { |
| 3087 expectCount=limitsCount; | 3085 expectCount=limitsCount; |
| 3088 if(limitsCount>LENGTHOF(limits)) { | 3086 if(limitsCount>UPRV_LENGTHOF(limits)) { |
| 3089 errln("FAIL: %s[0x%lx].%s.%s span count=%ld > %ld capacity -
too many spans", | 3087 errln("FAIL: %s[0x%lx].%s.%s span count=%ld > %ld capacity -
too many spans", |
| 3090 testName, (long)index, setNames[i], typeName, (long)li
mitsCount, (long)LENGTHOF(limits)); | 3088 testName, (long)index, setNames[i], typeName, (long)li
mitsCount, (long)UPRV_LENGTHOF(limits)); |
| 3091 return; | 3089 return; |
| 3092 } | 3090 } |
| 3093 memcpy(expectLimits, limits, limitsCount*4); | 3091 memcpy(expectLimits, limits, limitsCount*4); |
| 3094 } else if(limitsCount!=expectCount) { | 3092 } else if(limitsCount!=expectCount) { |
| 3095 errln("FAIL: %s[0x%lx].%s.%s span count=%ld != %ld", | 3093 errln("FAIL: %s[0x%lx].%s.%s span count=%ld != %ld", |
| 3096 testName, (long)index, setNames[i], typeName, (long)limits
Count, (long)expectCount); | 3094 testName, (long)index, setNames[i], typeName, (long)limits
Count, (long)expectCount); |
| 3097 } else { | 3095 } else { |
| 3098 for(j=0; j<limitsCount; ++j) { | 3096 for(j=0; j<limitsCount; ++j) { |
| 3099 if(limits[j]!=expectLimits[j]) { | 3097 if(limits[j]!=expectLimits[j]) { |
| 3100 errln("FAIL: %s[0x%lx].%s.%s span count=%ld limits[%d]=%
ld != %ld", | 3098 errln("FAIL: %s[0x%lx].%s.%s span count=%ld limits[%d]=%
ld != %ld", |
| (...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3273 sets[0]->getSet().contains(0xd800, 0xdfff) : | 3271 sets[0]->getSet().contains(0xd800, 0xdfff) : |
| 3274 sets[0]->getSet().containsNone(0xd800, 0xdfff)) || | 3272 sets[0]->getSet().containsNone(0xd800, 0xdfff)) || |
| 3275 sets[0]->hasStringsWithSurrogates()); | 3273 sets[0]->hasStringsWithSurrogates()); |
| 3276 | 3274 |
| 3277 UChar s[1000]; | 3275 UChar s[1000]; |
| 3278 int32_t length=0; | 3276 int32_t length=0; |
| 3279 uint32_t localWhichSpans; | 3277 uint32_t localWhichSpans; |
| 3280 | 3278 |
| 3281 UChar32 c, first; | 3279 UChar32 c, first; |
| 3282 for(first=c=0;; c=nextCodePoint(c)) { | 3280 for(first=c=0;; c=nextCodePoint(c)) { |
| 3283 if(c>0x10ffff || length>(LENGTHOF(s)-U16_MAX_LENGTH)) { | 3281 if(c>0x10ffff || length>(UPRV_LENGTHOF(s)-U16_MAX_LENGTH)) { |
| 3284 localWhichSpans=whichSpans; | 3282 localWhichSpans=whichSpans; |
| 3285 if(stringContainsUnpairedSurrogate(s, length) && inconsistentSurroga
tes) { | 3283 if(stringContainsUnpairedSurrogate(s, length) && inconsistentSurroga
tes) { |
| 3286 localWhichSpans&=~SPAN_UTF8; | 3284 localWhichSpans&=~SPAN_UTF8; |
| 3287 } | 3285 } |
| 3288 testSpanBothUTFs(sets, s, length, localWhichSpans, testName, first); | 3286 testSpanBothUTFs(sets, s, length, localWhichSpans, testName, first); |
| 3289 if(c>0x10ffff) { | 3287 if(c>0x10ffff) { |
| 3290 break; | 3288 break; |
| 3291 } | 3289 } |
| 3292 length=0; | 3290 length=0; |
| 3293 first=c; | 3291 first=c; |
| (...skipping 15 matching lines...) Expand all Loading... |
| 3309 0xd900, 0xdc05, // unassigned supplementary | 3307 0xd900, 0xdc05, // unassigned supplementary |
| 3310 0xd840, 0xdfff, 0xd860, 0xdffe, // Han supplementary | 3308 0xd840, 0xdfff, 0xd860, 0xdffe, // Han supplementary |
| 3311 0xd7a4, 0xdc05, 0xd900, 0x2028, // unassigned, surrogates in wro
ng order, LS | 3309 0xd7a4, 0xdc05, 0xd900, 0x2028, // unassigned, surrogates in wro
ng order, LS |
| 3312 0 // NUL | 3310 0 // NUL |
| 3313 }; | 3311 }; |
| 3314 | 3312 |
| 3315 if((whichSpans&SPAN_UTF16)==0) { | 3313 if((whichSpans&SPAN_UTF16)==0) { |
| 3316 return; | 3314 return; |
| 3317 } | 3315 } |
| 3318 testSpan(sets, s, -1, TRUE, (whichSpans&~SPAN_UTF8), testName, 0); | 3316 testSpan(sets, s, -1, TRUE, (whichSpans&~SPAN_UTF8), testName, 0); |
| 3319 testSpan(sets, s, LENGTHOF(s)-1, TRUE, (whichSpans&~SPAN_UTF8), testName, 1)
; | 3317 testSpan(sets, s, UPRV_LENGTHOF(s)-1, TRUE, (whichSpans&~SPAN_UTF8), testNam
e, 1); |
| 3320 } | 3318 } |
| 3321 | 3319 |
| 3322 void UnicodeSetTest::testSpanUTF8String(const UnicodeSetWithStrings *sets[4], ui
nt32_t whichSpans, const char *testName) { | 3320 void UnicodeSetTest::testSpanUTF8String(const UnicodeSetWithStrings *sets[4], ui
nt32_t whichSpans, const char *testName) { |
| 3323 static const char s[]={ | 3321 static const char s[]={ |
| 3324 "abc" // Latin | 3322 "abc" // Latin |
| 3325 | 3323 |
| 3326 /* trail byte in lead position */ | 3324 /* trail byte in lead position */ |
| 3327 "\x80" | 3325 "\x80" |
| 3328 | 3326 |
| 3329 " " // space | 3327 " " // space |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3406 /* trail byte in lead position */ | 3404 /* trail byte in lead position */ |
| 3407 "\x80" | 3405 "\x80" |
| 3408 | 3406 |
| 3409 "\xED\x9E\xA4\xE2\x80\xA8" // unassigned, LS, NUL-terminate
d | 3407 "\xED\x9E\xA4\xE2\x80\xA8" // unassigned, LS, NUL-terminate
d |
| 3410 }; | 3408 }; |
| 3411 | 3409 |
| 3412 if((whichSpans&SPAN_UTF8)==0) { | 3410 if((whichSpans&SPAN_UTF8)==0) { |
| 3413 return; | 3411 return; |
| 3414 } | 3412 } |
| 3415 testSpan(sets, s, -1, FALSE, (whichSpans&~SPAN_UTF16), testName, 0); | 3413 testSpan(sets, s, -1, FALSE, (whichSpans&~SPAN_UTF16), testName, 0); |
| 3416 testSpan(sets, s, LENGTHOF(s)-1, FALSE, (whichSpans&~SPAN_UTF16), testName,
1); | 3414 testSpan(sets, s, UPRV_LENGTHOF(s)-1, FALSE, (whichSpans&~SPAN_UTF16), testN
ame, 1); |
| 3417 } | 3415 } |
| 3418 | 3416 |
| 3419 // Take a set of span options and multiply them so that | 3417 // Take a set of span options and multiply them so that |
| 3420 // each portion only has one of the options a, b and c. | 3418 // each portion only has one of the options a, b and c. |
| 3421 // If b==0, then the set of options is just modified with mask and a. | 3419 // If b==0, then the set of options is just modified with mask and a. |
| 3422 // If b!=0 and c==0, then the set of options is just modified with mask, a and b
. | 3420 // If b!=0 and c==0, then the set of options is just modified with mask, a and b
. |
| 3423 static int32_t | 3421 static int32_t |
| 3424 addAlternative(uint32_t whichSpans[], int32_t whichSpansCount, | 3422 addAlternative(uint32_t whichSpans[], int32_t whichSpansCount, |
| 3425 uint32_t mask, uint32_t a, uint32_t b, uint32_t c) { | 3423 uint32_t mask, uint32_t a, uint32_t b, uint32_t c) { |
| 3426 uint32_t s; | 3424 uint32_t s; |
| (...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3608 uint32_t whichSpans[96]={ SPAN_ALL }; | 3606 uint32_t whichSpans[96]={ SPAN_ALL }; |
| 3609 int32_t whichSpansCount=1; | 3607 int32_t whichSpansCount=1; |
| 3610 | 3608 |
| 3611 UnicodeSet *sets[SET_COUNT]={ NULL }; | 3609 UnicodeSet *sets[SET_COUNT]={ NULL }; |
| 3612 const UnicodeSetWithStrings *sets_with_str[SET_COUNT]={ NULL }; | 3610 const UnicodeSetWithStrings *sets_with_str[SET_COUNT]={ NULL }; |
| 3613 | 3611 |
| 3614 char testName[1024]; | 3612 char testName[1024]; |
| 3615 char *testNameLimit=testName; | 3613 char *testNameLimit=testName; |
| 3616 | 3614 |
| 3617 int32_t i, j; | 3615 int32_t i, j; |
| 3618 for(i=0; i<LENGTHOF(testdata); ++i) { | 3616 for(i=0; i<UPRV_LENGTHOF(testdata); ++i) { |
| 3619 const char *s=testdata[i]; | 3617 const char *s=testdata[i]; |
| 3620 if(s[0]=='[') { | 3618 if(s[0]=='[') { |
| 3621 // Create new test sets from this pattern. | 3619 // Create new test sets from this pattern. |
| 3622 for(j=0; j<SET_COUNT; ++j) { | 3620 for(j=0; j<SET_COUNT; ++j) { |
| 3623 delete sets_with_str[j]; | 3621 delete sets_with_str[j]; |
| 3624 delete sets[j]; | 3622 delete sets[j]; |
| 3625 } | 3623 } |
| 3626 UErrorCode errorCode=U_ZERO_ERROR; | 3624 UErrorCode errorCode=U_ZERO_ERROR; |
| 3627 sets[SLOW]=new UnicodeSet(UnicodeString(s, -1, US_INV).unescape(), e
rrorCode); | 3625 sets[SLOW]=new UnicodeSet(UnicodeString(s, -1, US_INV).unescape(), e
rrorCode); |
| 3628 if(U_FAILURE(errorCode)) { | 3626 if(U_FAILURE(errorCode)) { |
| (...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3807 string16=UNICODE_STRING_SIMPLE("abbcdabcdabd"); | 3805 string16=UNICODE_STRING_SIMPLE("abbcdabcdabd"); |
| 3808 s16=string16.getBuffer(); | 3806 s16=string16.getBuffer(); |
| 3809 length16=string16.length(); | 3807 length16=string16.length(); |
| 3810 if( set.spanBack(s16, 12, USET_SPAN_CONTAINED)!=0 || | 3808 if( set.spanBack(s16, 12, USET_SPAN_CONTAINED)!=0 || |
| 3811 set.spanBack(s16, 12, USET_SPAN_SIMPLE)!=6 || | 3809 set.spanBack(s16, 12, USET_SPAN_SIMPLE)!=6 || |
| 3812 set.spanBack(s16, 5, USET_SPAN_SIMPLE)!=0 | 3810 set.spanBack(s16, 5, USET_SPAN_SIMPLE)!=0 |
| 3813 ) { | 3811 ) { |
| 3814 errln("FAIL: UnicodeSet(%s).spanBack(while longest match) returns the wr
ong value", pattern); | 3812 errln("FAIL: UnicodeSet(%s).spanBack(while longest match) returns the wr
ong value", pattern); |
| 3815 } | 3813 } |
| 3816 } | 3814 } |
| OLD | NEW |