OLD | NEW |
1 /******************************************************************** | 1 /******************************************************************** |
2 * COPYRIGHT: | 2 * COPYRIGHT: |
3 * Copyright (c) 1997-2013, International Business Machines Corporation and | 3 * Copyright (c) 1997-2014, International Business Machines Corporation and |
4 * others. All Rights Reserved. | 4 * others. All Rights Reserved. |
5 ********************************************************************/ | 5 ********************************************************************/ |
6 | 6 |
7 #include "unicode/ustring.h" | 7 #include "unicode/ustring.h" |
8 #include "unicode/uchar.h" | 8 #include "unicode/uchar.h" |
9 #include "unicode/uniset.h" | 9 #include "unicode/uniset.h" |
10 #include "unicode/putil.h" | 10 #include "unicode/putil.h" |
11 #include "unicode/uscript.h" | 11 #include "unicode/uscript.h" |
12 #include "cstring.h" | 12 #include "cstring.h" |
13 #include "hash.h" | 13 #include "hash.h" |
14 #include "patternprops.h" | 14 #include "patternprops.h" |
15 #include "normalizer2impl.h" | 15 #include "normalizer2impl.h" |
16 #include "uparse.h" | 16 #include "uparse.h" |
17 #include "ucdtest.h" | 17 #include "ucdtest.h" |
18 | 18 |
19 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof(array[0])) | |
20 | |
21 static const char *ignorePropNames[]={ | 19 static const char *ignorePropNames[]={ |
22 "FC_NFKC", | 20 "FC_NFKC", |
23 "NFD_QC", | 21 "NFD_QC", |
24 "NFC_QC", | 22 "NFC_QC", |
25 "NFKD_QC", | 23 "NFKD_QC", |
26 "NFKC_QC", | 24 "NFKC_QC", |
27 "Expands_On_NFD", | 25 "Expands_On_NFD", |
28 "Expands_On_NFC", | 26 "Expands_On_NFC", |
29 "Expands_On_NFKD", | 27 "Expands_On_NFKD", |
30 "Expands_On_NFKC", | 28 "Expands_On_NFKC", |
31 "NFKC_CF" | 29 "NFKC_CF" |
32 }; | 30 }; |
33 | 31 |
34 UnicodeTest::UnicodeTest() | 32 UnicodeTest::UnicodeTest() |
35 { | 33 { |
36 UErrorCode errorCode=U_ZERO_ERROR; | 34 UErrorCode errorCode=U_ZERO_ERROR; |
37 unknownPropertyNames=new U_NAMESPACE_QUALIFIER Hashtable(errorCode); | 35 unknownPropertyNames=new U_NAMESPACE_QUALIFIER Hashtable(errorCode); |
38 if(U_FAILURE(errorCode)) { | 36 if(U_FAILURE(errorCode)) { |
39 delete unknownPropertyNames; | 37 delete unknownPropertyNames; |
40 unknownPropertyNames=NULL; | 38 unknownPropertyNames=NULL; |
41 } | 39 } |
42 // Ignore some property names altogether. | 40 // Ignore some property names altogether. |
43 for(int32_t i=0; i<LENGTHOF(ignorePropNames); ++i) { | 41 for(int32_t i=0; i<UPRV_LENGTHOF(ignorePropNames); ++i) { |
44 unknownPropertyNames->puti(UnicodeString(ignorePropNames[i], -1, US_INV)
, 1, errorCode); | 42 unknownPropertyNames->puti(UnicodeString(ignorePropNames[i], -1, US_INV)
, 1, errorCode); |
45 } | 43 } |
46 } | 44 } |
47 | 45 |
48 UnicodeTest::~UnicodeTest() | 46 UnicodeTest::~UnicodeTest() |
49 { | 47 { |
50 delete unknownPropertyNames; | 48 delete unknownPropertyNames; |
51 } | 49 } |
52 | 50 |
53 void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name,
char* /*par*/ ) | 51 void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name,
char* /*par*/ ) |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
143 UCHAR_CASED, | 141 UCHAR_CASED, |
144 UCHAR_CASE_IGNORABLE, | 142 UCHAR_CASE_IGNORABLE, |
145 UCHAR_CHANGES_WHEN_LOWERCASED, | 143 UCHAR_CHANGES_WHEN_LOWERCASED, |
146 UCHAR_CHANGES_WHEN_UPPERCASED, | 144 UCHAR_CHANGES_WHEN_UPPERCASED, |
147 UCHAR_CHANGES_WHEN_TITLECASED, | 145 UCHAR_CHANGES_WHEN_TITLECASED, |
148 UCHAR_CHANGES_WHEN_CASEFOLDED, | 146 UCHAR_CHANGES_WHEN_CASEFOLDED, |
149 UCHAR_CHANGES_WHEN_CASEMAPPED, | 147 UCHAR_CHANGES_WHEN_CASEMAPPED, |
150 UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED | 148 UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED |
151 }; | 149 }; |
152 | 150 |
153 static int32_t numErrors[LENGTHOF(derivedPropsIndex)]={ 0 }; | 151 static int32_t numErrors[UPRV_LENGTHOF(derivedPropsIndex)]={ 0 }; |
154 | 152 |
155 enum { MAX_ERRORS=50 }; | 153 enum { MAX_ERRORS=50 }; |
156 | 154 |
157 U_CFUNC void U_CALLCONV | 155 U_CFUNC void U_CALLCONV |
158 derivedPropsLineFn(void *context, | 156 derivedPropsLineFn(void *context, |
159 char *fields[][2], int32_t /* fieldCount */, | 157 char *fields[][2], int32_t /* fieldCount */, |
160 UErrorCode *pErrorCode) | 158 UErrorCode *pErrorCode) |
161 { | 159 { |
162 UnicodeTest *me=(UnicodeTest *)context; | 160 UnicodeTest *me=(UnicodeTest *)context; |
163 uint32_t start, end; | 161 uint32_t start, end; |
164 int32_t i; | 162 int32_t i; |
165 | 163 |
166 u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); | 164 u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); |
167 if(U_FAILURE(*pErrorCode)) { | 165 if(U_FAILURE(*pErrorCode)) { |
168 me->errln("UnicodeTest: syntax error in DerivedCoreProperties.txt or Der
ivedNormalizationProps.txt field 0 at %s\n", fields[0][0]); | 166 me->errln("UnicodeTest: syntax error in DerivedCoreProperties.txt or Der
ivedNormalizationProps.txt field 0 at %s\n", fields[0][0]); |
169 return; | 167 return; |
170 } | 168 } |
171 | 169 |
172 /* parse derived binary property name, ignore unknown names */ | 170 /* parse derived binary property name, ignore unknown names */ |
173 i=getTokenIndex(derivedPropsNames, LENGTHOF(derivedPropsNames), fields[1][0]
); | 171 i=getTokenIndex(derivedPropsNames, UPRV_LENGTHOF(derivedPropsNames), fields[
1][0]); |
174 if(i<0) { | 172 if(i<0) { |
175 UnicodeString propName(fields[1][0], (int32_t)(fields[1][1]-fields[1][0]
)); | 173 UnicodeString propName(fields[1][0], (int32_t)(fields[1][1]-fields[1][0]
)); |
176 propName.trim(); | 174 propName.trim(); |
177 if(me->unknownPropertyNames->find(propName)==NULL) { | 175 if(me->unknownPropertyNames->find(propName)==NULL) { |
178 UErrorCode errorCode=U_ZERO_ERROR; | 176 UErrorCode errorCode=U_ZERO_ERROR; |
179 me->unknownPropertyNames->puti(propName, 1, errorCode); | 177 me->unknownPropertyNames->puti(propName, 1, errorCode); |
180 me->errln("UnicodeTest warning: unknown property name '%s' in Derive
dCoreProperties.txt or DerivedNormalizationProps.txt\n", fields[1][0]); | 178 me->errln("UnicodeTest warning: unknown property name '%s' in Derive
dCoreProperties.txt or DerivedNormalizationProps.txt\n", fields[1][0]); |
181 } | 179 } |
182 return; | 180 return; |
183 } | 181 } |
184 | 182 |
185 me->derivedProps[i].add(start, end); | 183 me->derivedProps[i].add(start, end); |
186 } | 184 } |
187 | 185 |
188 void UnicodeTest::TestAdditionalProperties() { | 186 void UnicodeTest::TestAdditionalProperties() { |
189 #if !UCONFIG_NO_NORMALIZATION | 187 #if !UCONFIG_NO_NORMALIZATION |
190 // test DerivedCoreProperties.txt and DerivedNormalizationProps.txt | 188 // test DerivedCoreProperties.txt and DerivedNormalizationProps.txt |
191 if(LENGTHOF(derivedProps)<LENGTHOF(derivedPropsNames)) { | 189 if(UPRV_LENGTHOF(derivedProps)<UPRV_LENGTHOF(derivedPropsNames)) { |
192 errln("error: UnicodeTest::derivedProps[] too short, need at least %d Un
icodeSets\n", | 190 errln("error: UnicodeTest::derivedProps[] too short, need at least %d Un
icodeSets\n", |
193 LENGTHOF(derivedPropsNames)); | 191 UPRV_LENGTHOF(derivedPropsNames)); |
194 return; | 192 return; |
195 } | 193 } |
196 if(LENGTHOF(derivedPropsIndex)!=LENGTHOF(derivedPropsNames)) { | 194 if(UPRV_LENGTHOF(derivedPropsIndex)!=UPRV_LENGTHOF(derivedPropsNames)) { |
197 errln("error in ucdtest.cpp: LENGTHOF(derivedPropsIndex)!=LENGTHOF(deriv
edPropsNames)\n"); | 195 errln("error in ucdtest.cpp: UPRV_LENGTHOF(derivedPropsIndex)!=UPRV_LENG
THOF(derivedPropsNames)\n"); |
198 return; | 196 return; |
199 } | 197 } |
200 | 198 |
201 char newPath[256]; | 199 char path[500]; |
202 char backupPath[256]; | 200 if(getUnidataPath(path) == NULL) { |
| 201 errln("unable to find path to source/data/unidata/"); |
| 202 return; |
| 203 } |
| 204 char *basename=strchr(path, 0); |
| 205 strcpy(basename, "DerivedCoreProperties.txt"); |
| 206 |
203 char *fields[2][2]; | 207 char *fields[2][2]; |
204 UErrorCode errorCode=U_ZERO_ERROR; | 208 UErrorCode errorCode=U_ZERO_ERROR; |
205 | 209 u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorC
ode); |
206 /* Look inside ICU_DATA first */ | |
207 strcpy(newPath, pathToDataDirectory()); | |
208 strcat(newPath, "unidata" U_FILE_SEP_STRING "DerivedCoreProperties.txt"); | |
209 | |
210 // As a fallback, try to guess where the source data was located | |
211 // at the time ICU was built, and look there. | |
212 # ifdef U_TOPSRCDIR | |
213 strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data"); | |
214 # else | |
215 strcpy(backupPath, loadTestData(errorCode)); | |
216 strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_
SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data"); | |
217 # endif | |
218 strcat(backupPath, U_FILE_SEP_STRING); | |
219 strcat(backupPath, "unidata" U_FILE_SEP_STRING "DerivedCoreProperties.txt"); | |
220 | |
221 char *path=newPath; | |
222 u_parseDelimitedFile(newPath, ';', fields, 2, derivedPropsLineFn, this, &err
orCode); | |
223 | |
224 if(errorCode==U_FILE_ACCESS_ERROR) { | |
225 errorCode=U_ZERO_ERROR; | |
226 path=backupPath; | |
227 u_parseDelimitedFile(backupPath, ';', fields, 2, derivedPropsLineFn, thi
s, &errorCode); | |
228 } | |
229 if(U_FAILURE(errorCode)) { | 210 if(U_FAILURE(errorCode)) { |
230 errln("error parsing DerivedCoreProperties.txt: %s\n", u_errorName(error
Code)); | 211 errln("error parsing DerivedCoreProperties.txt: %s\n", u_errorName(error
Code)); |
231 return; | 212 return; |
232 } | 213 } |
233 char *basename=path+strlen(path)-strlen("DerivedCoreProperties.txt"); | 214 |
234 strcpy(basename, "DerivedNormalizationProps.txt"); | 215 strcpy(basename, "DerivedNormalizationProps.txt"); |
235 u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorC
ode); | 216 u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorC
ode); |
236 if(U_FAILURE(errorCode)) { | 217 if(U_FAILURE(errorCode)) { |
237 errln("error parsing DerivedNormalizationProps.txt: %s\n", u_errorName(e
rrorCode)); | 218 errln("error parsing DerivedNormalizationProps.txt: %s\n", u_errorName(e
rrorCode)); |
238 return; | 219 return; |
239 } | 220 } |
240 | 221 |
241 // now we have all derived core properties in the UnicodeSets | 222 // now we have all derived core properties in the UnicodeSets |
242 // run them all through the API | 223 // run them all through the API |
243 int32_t rangeCount, range; | 224 int32_t rangeCount, range; |
244 uint32_t i; | 225 uint32_t i; |
245 UChar32 start, end; | 226 UChar32 start, end; |
246 | 227 |
247 // test all TRUE properties | 228 // test all TRUE properties |
248 for(i=0; i<LENGTHOF(derivedPropsNames); ++i) { | 229 for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) { |
249 rangeCount=derivedProps[i].getRangeCount(); | 230 rangeCount=derivedProps[i].getRangeCount(); |
250 for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) { | 231 for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) { |
251 start=derivedProps[i].getRangeStart(range); | 232 start=derivedProps[i].getRangeStart(range); |
252 end=derivedProps[i].getRangeEnd(range); | 233 end=derivedProps[i].getRangeEnd(range); |
253 for(; start<=end; ++start) { | 234 for(; start<=end; ++start) { |
254 if(!u_hasBinaryProperty(start, derivedPropsIndex[i])) { | 235 if(!u_hasBinaryProperty(start, derivedPropsIndex[i])) { |
255 dataerrln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %
s)==FALSE is wrong", start, derivedPropsNames[i]); | 236 dataerrln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %
s)==FALSE is wrong", start, derivedPropsNames[i]); |
256 if(++numErrors[i]>=MAX_ERRORS) { | 237 if(++numErrors[i]>=MAX_ERRORS) { |
257 dataerrln("Too many errors, moving to the next test"); | 238 dataerrln("Too many errors, moving to the next test"); |
258 break; | 239 break; |
259 } | 240 } |
260 } | 241 } |
261 } | 242 } |
262 } | 243 } |
263 } | 244 } |
264 | 245 |
265 // invert all properties | 246 // invert all properties |
266 for(i=0; i<LENGTHOF(derivedPropsNames); ++i) { | 247 for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) { |
267 derivedProps[i].complement(); | 248 derivedProps[i].complement(); |
268 } | 249 } |
269 | 250 |
270 // test all FALSE properties | 251 // test all FALSE properties |
271 for(i=0; i<LENGTHOF(derivedPropsNames); ++i) { | 252 for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) { |
272 rangeCount=derivedProps[i].getRangeCount(); | 253 rangeCount=derivedProps[i].getRangeCount(); |
273 for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) { | 254 for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) { |
274 start=derivedProps[i].getRangeStart(range); | 255 start=derivedProps[i].getRangeStart(range); |
275 end=derivedProps[i].getRangeEnd(range); | 256 end=derivedProps[i].getRangeEnd(range); |
276 for(; start<=end; ++start) { | 257 for(; start<=end; ++start) { |
277 if(u_hasBinaryProperty(start, derivedPropsIndex[i])) { | 258 if(u_hasBinaryProperty(start, derivedPropsIndex[i])) { |
278 errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==
TRUE is wrong\n", start, derivedPropsNames[i]); | 259 errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==
TRUE is wrong\n", start, derivedPropsNames[i]); |
279 if(++numErrors[i]>=MAX_ERRORS) { | 260 if(++numErrors[i]>=MAX_ERRORS) { |
280 errln("Too many errors, moving to the next test"); | 261 errln("Too many errors, moving to the next test"); |
281 break; | 262 break; |
(...skipping 13 matching lines...) Expand all Loading... |
295 UErrorCode errorCode=U_ZERO_ERROR; | 276 UErrorCode errorCode=U_ZERO_ERROR; |
296 UnicodeSet alpha(UNICODE_STRING_SIMPLE("[:Alphabetic:]"), errorCode); | 277 UnicodeSet alpha(UNICODE_STRING_SIMPLE("[:Alphabetic:]"), errorCode); |
297 if(U_FAILURE(errorCode)) { | 278 if(U_FAILURE(errorCode)) { |
298 dataerrln("UnicodeSet([:Alphabetic:]) failed - %s", u_errorName(errorCod
e)); | 279 dataerrln("UnicodeSet([:Alphabetic:]) failed - %s", u_errorName(errorCod
e)); |
299 return; | 280 return; |
300 } | 281 } |
301 | 282 |
302 static const char *const falseValues[]={ "N", "No", "F", "False" }; | 283 static const char *const falseValues[]={ "N", "No", "F", "False" }; |
303 static const char *const trueValues[]={ "Y", "Yes", "T", "True" }; | 284 static const char *const trueValues[]={ "Y", "Yes", "T", "True" }; |
304 int32_t i; | 285 int32_t i; |
305 for(i=0; i<LENGTHOF(falseValues); ++i) { | 286 for(i=0; i<UPRV_LENGTHOF(falseValues); ++i) { |
306 UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]"); | 287 UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]"); |
307 pattern.insert(pattern.length()-2, UnicodeString(falseValues[i], -1, US_
INV)); | 288 pattern.insert(pattern.length()-2, UnicodeString(falseValues[i], -1, US_
INV)); |
308 errorCode=U_ZERO_ERROR; | 289 errorCode=U_ZERO_ERROR; |
309 UnicodeSet set(pattern, errorCode); | 290 UnicodeSet set(pattern, errorCode); |
310 if(U_FAILURE(errorCode)) { | 291 if(U_FAILURE(errorCode)) { |
311 errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", falseValues[i],
u_errorName(errorCode)); | 292 errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", falseValues[i],
u_errorName(errorCode)); |
312 continue; | 293 continue; |
313 } | 294 } |
314 set.complement(); | 295 set.complement(); |
315 if(set!=alpha) { | 296 if(set!=alpha) { |
316 errln("UnicodeSet([:Alphabetic=%s:]).complement()!=UnicodeSet([:Alph
abetic:])\n", falseValues[i]); | 297 errln("UnicodeSet([:Alphabetic=%s:]).complement()!=UnicodeSet([:Alph
abetic:])\n", falseValues[i]); |
317 } | 298 } |
318 } | 299 } |
319 for(i=0; i<LENGTHOF(trueValues); ++i) { | 300 for(i=0; i<UPRV_LENGTHOF(trueValues); ++i) { |
320 UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]"); | 301 UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]"); |
321 pattern.insert(pattern.length()-2, UnicodeString(trueValues[i], -1, US_I
NV)); | 302 pattern.insert(pattern.length()-2, UnicodeString(trueValues[i], -1, US_I
NV)); |
322 errorCode=U_ZERO_ERROR; | 303 errorCode=U_ZERO_ERROR; |
323 UnicodeSet set(pattern, errorCode); | 304 UnicodeSet set(pattern, errorCode); |
324 if(U_FAILURE(errorCode)) { | 305 if(U_FAILURE(errorCode)) { |
325 errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", trueValues[i],
u_errorName(errorCode)); | 306 errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", trueValues[i],
u_errorName(errorCode)); |
326 continue; | 307 continue; |
327 } | 308 } |
328 if(set!=alpha) { | 309 if(set!=alpha) { |
329 errln("UnicodeSet([:Alphabetic=%s:])!=UnicodeSet([:Alphabetic:])\n",
trueValues[i]); | 310 errln("UnicodeSet([:Alphabetic=%s:])!=UnicodeSet([:Alphabetic:])\n",
trueValues[i]); |
(...skipping 189 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
519 assertTrue("bpt!=None is a subset of Bidi_M", mirrored.containsAll(bpt)); | 500 assertTrue("bpt!=None is a subset of Bidi_M", mirrored.containsAll(bpt)); |
520 assertTrue("bpt!=None is a subset of bc=ON", other_neutral.containsAll(bpt))
; | 501 assertTrue("bpt!=None is a subset of bc=ON", other_neutral.containsAll(bpt))
; |
521 // The following are true at least initially in Unicode 6.3. | 502 // The following are true at least initially in Unicode 6.3. |
522 UnicodeSet bpt_open("[:bpt=o:]", errorCode); | 503 UnicodeSet bpt_open("[:bpt=o:]", errorCode); |
523 UnicodeSet bpt_close("[:bpt=c:]", errorCode); | 504 UnicodeSet bpt_close("[:bpt=c:]", errorCode); |
524 UnicodeSet ps("[:Ps:]", errorCode); | 505 UnicodeSet ps("[:Ps:]", errorCode); |
525 UnicodeSet pe("[:Pe:]", errorCode); | 506 UnicodeSet pe("[:Pe:]", errorCode); |
526 assertTrue("bpt=Open is a subset of Ps", ps.containsAll(bpt_open)); | 507 assertTrue("bpt=Open is a subset of Ps", ps.containsAll(bpt_open)); |
527 assertTrue("bpt=Close is a subset of Pe", pe.containsAll(bpt_close)); | 508 assertTrue("bpt=Close is a subset of Pe", pe.containsAll(bpt_close)); |
528 } | 509 } |
OLD | NEW |