OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************** | 2 ******************************************************************************** |
3 * Copyright (C) 1999-2013 International Business Machines Corporation and | 3 * Copyright (C) 1999-2014 International Business Machines Corporation and |
4 * others. All Rights Reserved. | 4 * others. All Rights Reserved. |
5 ******************************************************************************** | 5 ******************************************************************************** |
6 * Date Name Description | 6 * Date Name Description |
7 * 10/20/99 alan Creation. | 7 * 10/20/99 alan Creation. |
8 * 03/22/2000 Madhu Added additional tests | 8 * 03/22/2000 Madhu Added additional tests |
9 ******************************************************************************** | 9 ******************************************************************************** |
10 */ | 10 */ |
11 | 11 |
12 #include <stdio.h> | 12 #include <stdio.h> |
13 | 13 |
14 #include <string.h> | 14 #include <string.h> |
15 #include "unicode/utypes.h" | 15 #include "unicode/utypes.h" |
16 #include "usettest.h" | 16 #include "usettest.h" |
17 #include "unicode/ucnv.h" | 17 #include "unicode/ucnv.h" |
18 #include "unicode/uniset.h" | 18 #include "unicode/uniset.h" |
19 #include "unicode/uchar.h" | 19 #include "unicode/uchar.h" |
20 #include "unicode/usetiter.h" | 20 #include "unicode/usetiter.h" |
21 #include "unicode/ustring.h" | 21 #include "unicode/ustring.h" |
22 #include "unicode/parsepos.h" | 22 #include "unicode/parsepos.h" |
23 #include "unicode/symtable.h" | 23 #include "unicode/symtable.h" |
24 #include "unicode/uversion.h" | 24 #include "unicode/uversion.h" |
25 #include "hash.h" | 25 #include "hash.h" |
26 | 26 |
27 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
28 | |
29 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ | 27 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ |
30 dataerrln("fail in file \"%s\", line %d: \"%s\"", __FILE__, __LINE__, \ | 28 dataerrln("fail in file \"%s\", line %d: \"%s\"", __FILE__, __LINE__, \ |
31 u_errorName(status));}} | 29 u_errorName(status));}} |
32 | 30 |
33 #define TEST_ASSERT(expr) {if (!(expr)) { \ | 31 #define TEST_ASSERT(expr) {if (!(expr)) { \ |
34 dataerrln("fail in file \"%s\", line %d", __FILE__, __LINE__); }} | 32 dataerrln("fail in file \"%s\", line %d", __FILE__, __LINE__); }} |
35 | 33 |
36 UnicodeString operator+(const UnicodeString& left, const UnicodeSet& set) { | 34 UnicodeString operator+(const UnicodeString& left, const UnicodeSet& set) { |
37 UnicodeString pat; | 35 UnicodeString pat; |
38 set.toPattern(pat); | 36 set.toPattern(pat); |
(...skipping 1008 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1047 "\\u0888\\uFDD3\\uFFFE\\U00050005", | 1045 "\\u0888\\uFDD3\\uFFFE\\U00050005", |
1048 | 1046 |
1049 // Script_Extensions, new in Unicode 6.0 | 1047 // Script_Extensions, new in Unicode 6.0 |
1050 "[:scx=Arab:]", | 1048 "[:scx=Arab:]", |
1051 "\\u061E\\u061F\\u0620\\u0621\\u063F\\u0640\\u0650\\u065E\\uFDF1\\uFDF2\
\uFDF3", | 1049 "\\u061E\\u061F\\u0620\\u0621\\u063F\\u0640\\u0650\\u065E\\uFDF1\\uFDF2\
\uFDF3", |
1052 "\\u061D\\uFDEF\\uFDFE", | 1050 "\\u061D\\uFDEF\\uFDFE", |
1053 | 1051 |
1054 // U+FDF2 has Script=Arabic and also Arab in its Script_Extensions, | 1052 // U+FDF2 has Script=Arabic and also Arab in its Script_Extensions, |
1055 // so scx-sc is missing U+FDF2. | 1053 // so scx-sc is missing U+FDF2. |
1056 "[[:Script_Extensions=Arabic:]-[:Arab:]]", | 1054 "[[:Script_Extensions=Arabic:]-[:Arab:]]", |
1057 "\\u0640\\u064B\\u0650\\u0655\\uFDFD", | 1055 "\\u0640\\u064B\\u0650\\u0655", |
1058 "\\uFDF2" | 1056 "\\uFDF2" |
1059 }; | 1057 }; |
1060 | 1058 |
1061 static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]); | 1059 static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]); |
1062 | 1060 |
1063 for (int32_t i=0; i<DATA_LEN; i+=3) { | 1061 for (int32_t i=0; i<DATA_LEN; i+=3) { |
1064 expectContainment(UnicodeString(DATA[i], -1, US_INV), CharsToUnicodeStri
ng(DATA[i+1]), | 1062 expectContainment(UnicodeString(DATA[i], -1, US_INV), CharsToUnicodeStri
ng(DATA[i+1]), |
1065 CharsToUnicodeString(DATA[i+2])); | 1063 CharsToUnicodeString(DATA[i+2])); |
1066 } | 1064 } |
1067 } | 1065 } |
(...skipping 1247 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2315 set(normalSet), stringsLength(0), hasSurrogates(FALSE) { | 2313 set(normalSet), stringsLength(0), hasSurrogates(FALSE) { |
2316 int32_t size=set.size(); | 2314 int32_t size=set.size(); |
2317 if(size>0 && set.charAt(size-1)<0) { | 2315 if(size>0 && set.charAt(size-1)<0) { |
2318 // If a set's last element is not a code point, then it must contain
strings. | 2316 // If a set's last element is not a code point, then it must contain
strings. |
2319 // Iterate over the set, skip all code point ranges, and cache the s
trings. | 2317 // Iterate over the set, skip all code point ranges, and cache the s
trings. |
2320 // Convert them to UTF-8 for spanUTF8(). | 2318 // Convert them to UTF-8 for spanUTF8(). |
2321 UnicodeSetIterator iter(set); | 2319 UnicodeSetIterator iter(set); |
2322 const UnicodeString *s; | 2320 const UnicodeString *s; |
2323 char *s8=utf8; | 2321 char *s8=utf8; |
2324 int32_t length8, utf8Count=0; | 2322 int32_t length8, utf8Count=0; |
2325 while(iter.nextRange() && stringsLength<LENGTHOF(strings)) { | 2323 while(iter.nextRange() && stringsLength<UPRV_LENGTHOF(strings)) { |
2326 if(iter.isString()) { | 2324 if(iter.isString()) { |
2327 // Store the pointer to the set's string element | 2325 // Store the pointer to the set's string element |
2328 // which we happen to know is a stable pointer. | 2326 // which we happen to know is a stable pointer. |
2329 strings[stringsLength]=s=&iter.getString(); | 2327 strings[stringsLength]=s=&iter.getString(); |
2330 utf8Count+= | 2328 utf8Count+= |
2331 utf8Lengths[stringsLength]=length8= | 2329 utf8Lengths[stringsLength]=length8= |
2332 appendUTF8(s->getBuffer(), s->length(), | 2330 appendUTF8(s->getBuffer(), s->length(), |
2333 s8, (int32_t)(sizeof(utf8)-utf8Count)); | 2331 s8, (int32_t)(sizeof(utf8)-utf8Count)); |
2334 if(length8==0) { | 2332 if(length8==0) { |
2335 hasSurrogates=TRUE; // Contains unpaired surrogates. | 2333 hasSurrogates=TRUE; // Contains unpaired surrogates. |
(...skipping 733 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3069 // Odd-numbered sets are complemented. | 3067 // Odd-numbered sets are complemented. |
3070 if((whichSpans&SPAN_COMPLEMENT)==0) { | 3068 if((whichSpans&SPAN_COMPLEMENT)==0) { |
3071 continue; | 3069 continue; |
3072 } | 3070 } |
3073 } | 3071 } |
3074 for(type=0;; ++type) { | 3072 for(type=0;; ++type) { |
3075 limitsCount=getSpans(*sets[i], (UBool)(i&1), | 3073 limitsCount=getSpans(*sets[i], (UBool)(i&1), |
3076 s, length, isUTF16, | 3074 s, length, isUTF16, |
3077 whichSpans, | 3075 whichSpans, |
3078 type, typeName, | 3076 type, typeName, |
3079 limits, LENGTHOF(limits), expectCount); | 3077 limits, UPRV_LENGTHOF(limits), expectCount); |
3080 if(typeName[0]==0) { | 3078 if(typeName[0]==0) { |
3081 break; // All types tried. | 3079 break; // All types tried. |
3082 } | 3080 } |
3083 if(limitsCount<0) { | 3081 if(limitsCount<0) { |
3084 continue; // Span option filtered out. | 3082 continue; // Span option filtered out. |
3085 } | 3083 } |
3086 if(expectCount<0) { | 3084 if(expectCount<0) { |
3087 expectCount=limitsCount; | 3085 expectCount=limitsCount; |
3088 if(limitsCount>LENGTHOF(limits)) { | 3086 if(limitsCount>UPRV_LENGTHOF(limits)) { |
3089 errln("FAIL: %s[0x%lx].%s.%s span count=%ld > %ld capacity -
too many spans", | 3087 errln("FAIL: %s[0x%lx].%s.%s span count=%ld > %ld capacity -
too many spans", |
3090 testName, (long)index, setNames[i], typeName, (long)li
mitsCount, (long)LENGTHOF(limits)); | 3088 testName, (long)index, setNames[i], typeName, (long)li
mitsCount, (long)UPRV_LENGTHOF(limits)); |
3091 return; | 3089 return; |
3092 } | 3090 } |
3093 memcpy(expectLimits, limits, limitsCount*4); | 3091 memcpy(expectLimits, limits, limitsCount*4); |
3094 } else if(limitsCount!=expectCount) { | 3092 } else if(limitsCount!=expectCount) { |
3095 errln("FAIL: %s[0x%lx].%s.%s span count=%ld != %ld", | 3093 errln("FAIL: %s[0x%lx].%s.%s span count=%ld != %ld", |
3096 testName, (long)index, setNames[i], typeName, (long)limits
Count, (long)expectCount); | 3094 testName, (long)index, setNames[i], typeName, (long)limits
Count, (long)expectCount); |
3097 } else { | 3095 } else { |
3098 for(j=0; j<limitsCount; ++j) { | 3096 for(j=0; j<limitsCount; ++j) { |
3099 if(limits[j]!=expectLimits[j]) { | 3097 if(limits[j]!=expectLimits[j]) { |
3100 errln("FAIL: %s[0x%lx].%s.%s span count=%ld limits[%d]=%
ld != %ld", | 3098 errln("FAIL: %s[0x%lx].%s.%s span count=%ld limits[%d]=%
ld != %ld", |
(...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3273 sets[0]->getSet().contains(0xd800, 0xdfff) : | 3271 sets[0]->getSet().contains(0xd800, 0xdfff) : |
3274 sets[0]->getSet().containsNone(0xd800, 0xdfff)) || | 3272 sets[0]->getSet().containsNone(0xd800, 0xdfff)) || |
3275 sets[0]->hasStringsWithSurrogates()); | 3273 sets[0]->hasStringsWithSurrogates()); |
3276 | 3274 |
3277 UChar s[1000]; | 3275 UChar s[1000]; |
3278 int32_t length=0; | 3276 int32_t length=0; |
3279 uint32_t localWhichSpans; | 3277 uint32_t localWhichSpans; |
3280 | 3278 |
3281 UChar32 c, first; | 3279 UChar32 c, first; |
3282 for(first=c=0;; c=nextCodePoint(c)) { | 3280 for(first=c=0;; c=nextCodePoint(c)) { |
3283 if(c>0x10ffff || length>(LENGTHOF(s)-U16_MAX_LENGTH)) { | 3281 if(c>0x10ffff || length>(UPRV_LENGTHOF(s)-U16_MAX_LENGTH)) { |
3284 localWhichSpans=whichSpans; | 3282 localWhichSpans=whichSpans; |
3285 if(stringContainsUnpairedSurrogate(s, length) && inconsistentSurroga
tes) { | 3283 if(stringContainsUnpairedSurrogate(s, length) && inconsistentSurroga
tes) { |
3286 localWhichSpans&=~SPAN_UTF8; | 3284 localWhichSpans&=~SPAN_UTF8; |
3287 } | 3285 } |
3288 testSpanBothUTFs(sets, s, length, localWhichSpans, testName, first); | 3286 testSpanBothUTFs(sets, s, length, localWhichSpans, testName, first); |
3289 if(c>0x10ffff) { | 3287 if(c>0x10ffff) { |
3290 break; | 3288 break; |
3291 } | 3289 } |
3292 length=0; | 3290 length=0; |
3293 first=c; | 3291 first=c; |
(...skipping 15 matching lines...) Expand all Loading... |
3309 0xd900, 0xdc05, // unassigned supplementary | 3307 0xd900, 0xdc05, // unassigned supplementary |
3310 0xd840, 0xdfff, 0xd860, 0xdffe, // Han supplementary | 3308 0xd840, 0xdfff, 0xd860, 0xdffe, // Han supplementary |
3311 0xd7a4, 0xdc05, 0xd900, 0x2028, // unassigned, surrogates in wro
ng order, LS | 3309 0xd7a4, 0xdc05, 0xd900, 0x2028, // unassigned, surrogates in wro
ng order, LS |
3312 0 // NUL | 3310 0 // NUL |
3313 }; | 3311 }; |
3314 | 3312 |
3315 if((whichSpans&SPAN_UTF16)==0) { | 3313 if((whichSpans&SPAN_UTF16)==0) { |
3316 return; | 3314 return; |
3317 } | 3315 } |
3318 testSpan(sets, s, -1, TRUE, (whichSpans&~SPAN_UTF8), testName, 0); | 3316 testSpan(sets, s, -1, TRUE, (whichSpans&~SPAN_UTF8), testName, 0); |
3319 testSpan(sets, s, LENGTHOF(s)-1, TRUE, (whichSpans&~SPAN_UTF8), testName, 1)
; | 3317 testSpan(sets, s, UPRV_LENGTHOF(s)-1, TRUE, (whichSpans&~SPAN_UTF8), testNam
e, 1); |
3320 } | 3318 } |
3321 | 3319 |
3322 void UnicodeSetTest::testSpanUTF8String(const UnicodeSetWithStrings *sets[4], ui
nt32_t whichSpans, const char *testName) { | 3320 void UnicodeSetTest::testSpanUTF8String(const UnicodeSetWithStrings *sets[4], ui
nt32_t whichSpans, const char *testName) { |
3323 static const char s[]={ | 3321 static const char s[]={ |
3324 "abc" // Latin | 3322 "abc" // Latin |
3325 | 3323 |
3326 /* trail byte in lead position */ | 3324 /* trail byte in lead position */ |
3327 "\x80" | 3325 "\x80" |
3328 | 3326 |
3329 " " // space | 3327 " " // space |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3406 /* trail byte in lead position */ | 3404 /* trail byte in lead position */ |
3407 "\x80" | 3405 "\x80" |
3408 | 3406 |
3409 "\xED\x9E\xA4\xE2\x80\xA8" // unassigned, LS, NUL-terminate
d | 3407 "\xED\x9E\xA4\xE2\x80\xA8" // unassigned, LS, NUL-terminate
d |
3410 }; | 3408 }; |
3411 | 3409 |
3412 if((whichSpans&SPAN_UTF8)==0) { | 3410 if((whichSpans&SPAN_UTF8)==0) { |
3413 return; | 3411 return; |
3414 } | 3412 } |
3415 testSpan(sets, s, -1, FALSE, (whichSpans&~SPAN_UTF16), testName, 0); | 3413 testSpan(sets, s, -1, FALSE, (whichSpans&~SPAN_UTF16), testName, 0); |
3416 testSpan(sets, s, LENGTHOF(s)-1, FALSE, (whichSpans&~SPAN_UTF16), testName,
1); | 3414 testSpan(sets, s, UPRV_LENGTHOF(s)-1, FALSE, (whichSpans&~SPAN_UTF16), testN
ame, 1); |
3417 } | 3415 } |
3418 | 3416 |
3419 // Take a set of span options and multiply them so that | 3417 // Take a set of span options and multiply them so that |
3420 // each portion only has one of the options a, b and c. | 3418 // each portion only has one of the options a, b and c. |
3421 // If b==0, then the set of options is just modified with mask and a. | 3419 // If b==0, then the set of options is just modified with mask and a. |
3422 // If b!=0 and c==0, then the set of options is just modified with mask, a and b
. | 3420 // If b!=0 and c==0, then the set of options is just modified with mask, a and b
. |
3423 static int32_t | 3421 static int32_t |
3424 addAlternative(uint32_t whichSpans[], int32_t whichSpansCount, | 3422 addAlternative(uint32_t whichSpans[], int32_t whichSpansCount, |
3425 uint32_t mask, uint32_t a, uint32_t b, uint32_t c) { | 3423 uint32_t mask, uint32_t a, uint32_t b, uint32_t c) { |
3426 uint32_t s; | 3424 uint32_t s; |
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3608 uint32_t whichSpans[96]={ SPAN_ALL }; | 3606 uint32_t whichSpans[96]={ SPAN_ALL }; |
3609 int32_t whichSpansCount=1; | 3607 int32_t whichSpansCount=1; |
3610 | 3608 |
3611 UnicodeSet *sets[SET_COUNT]={ NULL }; | 3609 UnicodeSet *sets[SET_COUNT]={ NULL }; |
3612 const UnicodeSetWithStrings *sets_with_str[SET_COUNT]={ NULL }; | 3610 const UnicodeSetWithStrings *sets_with_str[SET_COUNT]={ NULL }; |
3613 | 3611 |
3614 char testName[1024]; | 3612 char testName[1024]; |
3615 char *testNameLimit=testName; | 3613 char *testNameLimit=testName; |
3616 | 3614 |
3617 int32_t i, j; | 3615 int32_t i, j; |
3618 for(i=0; i<LENGTHOF(testdata); ++i) { | 3616 for(i=0; i<UPRV_LENGTHOF(testdata); ++i) { |
3619 const char *s=testdata[i]; | 3617 const char *s=testdata[i]; |
3620 if(s[0]=='[') { | 3618 if(s[0]=='[') { |
3621 // Create new test sets from this pattern. | 3619 // Create new test sets from this pattern. |
3622 for(j=0; j<SET_COUNT; ++j) { | 3620 for(j=0; j<SET_COUNT; ++j) { |
3623 delete sets_with_str[j]; | 3621 delete sets_with_str[j]; |
3624 delete sets[j]; | 3622 delete sets[j]; |
3625 } | 3623 } |
3626 UErrorCode errorCode=U_ZERO_ERROR; | 3624 UErrorCode errorCode=U_ZERO_ERROR; |
3627 sets[SLOW]=new UnicodeSet(UnicodeString(s, -1, US_INV).unescape(), e
rrorCode); | 3625 sets[SLOW]=new UnicodeSet(UnicodeString(s, -1, US_INV).unescape(), e
rrorCode); |
3628 if(U_FAILURE(errorCode)) { | 3626 if(U_FAILURE(errorCode)) { |
(...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3807 string16=UNICODE_STRING_SIMPLE("abbcdabcdabd"); | 3805 string16=UNICODE_STRING_SIMPLE("abbcdabcdabd"); |
3808 s16=string16.getBuffer(); | 3806 s16=string16.getBuffer(); |
3809 length16=string16.length(); | 3807 length16=string16.length(); |
3810 if( set.spanBack(s16, 12, USET_SPAN_CONTAINED)!=0 || | 3808 if( set.spanBack(s16, 12, USET_SPAN_CONTAINED)!=0 || |
3811 set.spanBack(s16, 12, USET_SPAN_SIMPLE)!=6 || | 3809 set.spanBack(s16, 12, USET_SPAN_SIMPLE)!=6 || |
3812 set.spanBack(s16, 5, USET_SPAN_SIMPLE)!=0 | 3810 set.spanBack(s16, 5, USET_SPAN_SIMPLE)!=0 |
3813 ) { | 3811 ) { |
3814 errln("FAIL: UnicodeSet(%s).spanBack(while longest match) returns the wr
ong value", pattern); | 3812 errln("FAIL: UnicodeSet(%s).spanBack(while longest match) returns the wr
ong value", pattern); |
3815 } | 3813 } |
3816 } | 3814 } |
OLD | NEW |