OLD | NEW |
| (Empty) |
1 /* | |
2 ******************************************************************************* | |
3 * | |
4 * Copyright (C) 2002-2015, International Business Machines | |
5 * Corporation and others. All Rights Reserved. | |
6 * | |
7 ******************************************************************************* | |
8 * file name: cstrcase.c | |
9 * encoding: US-ASCII | |
10 * tab size: 8 (not used) | |
11 * indentation:4 | |
12 * | |
13 * created on: 2002feb21 | |
14 * created by: Markus W. Scherer | |
15 * | |
16 * Test file for string casing C API functions. | |
17 */ | |
18 | |
19 #include <string.h> | |
20 #include "unicode/utypes.h" | |
21 #include "unicode/uchar.h" | |
22 #include "unicode/ustring.h" | |
23 #include "unicode/uloc.h" | |
24 #include "unicode/ubrk.h" | |
25 #include "unicode/ucasemap.h" | |
26 #include "cmemory.h" | |
27 #include "cintltst.h" | |
28 #include "ustr_imp.h" | |
29 | |
30 /* test string case mapping functions --------------------------------------- */ | |
31 | |
32 static void | |
33 TestCaseLower(void) { | |
34 static const UChar | |
35 | |
36 beforeLower[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff
}, | |
37 lowerRoot[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff
}, | |
38 lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff
}; | |
39 | |
40 UChar buffer[32]; | |
41 int32_t length; | |
42 UErrorCode errorCode; | |
43 | |
44 /* lowercase with root locale and separate buffers */ | |
45 buffer[0]=0xabcd; | |
46 errorCode=U_ZERO_ERROR; | |
47 length=u_strToLower(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
48 beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR, | |
49 "", | |
50 &errorCode); | |
51 if( U_FAILURE(errorCode) || | |
52 length!=(sizeof(lowerRoot)/U_SIZEOF_UCHAR) || | |
53 uprv_memcmp(lowerRoot, buffer, length*U_SIZEOF_UCHAR)!=0 || | |
54 buffer[length]!=0 | |
55 ) { | |
56 log_err("error in u_strToLower(root locale)=%ld error=%s string matches:
%s\t\nlowerRoot=%s\t\nbuffer=%s\n", | |
57 length, | |
58 u_errorName(errorCode), | |
59 uprv_memcmp(lowerRoot, buffer, length*U_SIZEOF_UCHAR)==0 && | |
60 buffer[length]==0 ? "yes" : "no", | |
61 aescstrdup(lowerRoot,-1), | |
62 aescstrdup(buffer,-1)); | |
63 } | |
64 | |
65 /* lowercase with turkish locale and in the same buffer */ | |
66 uprv_memcpy(buffer, beforeLower, sizeof(beforeLower)); | |
67 buffer[sizeof(beforeLower)/U_SIZEOF_UCHAR]=0; | |
68 errorCode=U_ZERO_ERROR; | |
69 length=u_strToLower(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
70 buffer, -1, /* implicit srcLength */ | |
71 "tr", | |
72 &errorCode); | |
73 if( U_FAILURE(errorCode) || | |
74 length!=(sizeof(lowerTurkish)/U_SIZEOF_UCHAR) || | |
75 uprv_memcmp(lowerTurkish, buffer, length*U_SIZEOF_UCHAR)!=0 || | |
76 buffer[length]!=0 | |
77 ) { | |
78 log_err("error in u_strToLower(turkish locale)=%ld error=%s string match
es: %s\n", | |
79 length, | |
80 u_errorName(errorCode), | |
81 uprv_memcmp(lowerTurkish, buffer, length*U_SIZEOF_UCHAR)==0 && buffe
r[length]==0 ? "yes" : "no"); | |
82 } | |
83 | |
84 /* test preflighting */ | |
85 buffer[0]=buffer[2]=0xabcd; | |
86 errorCode=U_ZERO_ERROR; | |
87 length=u_strToLower(buffer, 2, /* set destCapacity=2 */ | |
88 beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR, | |
89 "", | |
90 &errorCode); | |
91 if( errorCode!=U_BUFFER_OVERFLOW_ERROR || | |
92 length!=(sizeof(lowerRoot)/U_SIZEOF_UCHAR) || | |
93 uprv_memcmp(lowerRoot, buffer, 2*U_SIZEOF_UCHAR)!=0 || | |
94 buffer[2]!=0xabcd | |
95 ) { | |
96 log_err("error in u_strToLower(root locale preflighting)=%ld error=%s st
ring matches: %s\n", | |
97 length, | |
98 u_errorName(errorCode), | |
99 uprv_memcmp(lowerRoot, buffer, 2*U_SIZEOF_UCHAR)==0 && buffer[2]==0x
abcd ? "yes" : "no"); | |
100 } | |
101 | |
102 /* test error handling */ | |
103 errorCode=U_ZERO_ERROR; | |
104 length=u_strToLower(NULL, sizeof(buffer)/U_SIZEOF_UCHAR, | |
105 beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR, | |
106 "", | |
107 &errorCode); | |
108 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
109 log_err("error in u_strToLower(root locale dest=NULL)=%ld error=%s\n", | |
110 length, | |
111 u_errorName(errorCode)); | |
112 } | |
113 | |
114 buffer[0]=0xabcd; | |
115 errorCode=U_ZERO_ERROR; | |
116 length=u_strToLower(buffer, -1, | |
117 beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR, | |
118 "", | |
119 &errorCode); | |
120 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR || | |
121 buffer[0]!=0xabcd | |
122 ) { | |
123 log_err("error in u_strToLower(root locale destCapacity=-1)=%ld error=%s
buffer[0]==0x%lx\n", | |
124 length, | |
125 u_errorName(errorCode), | |
126 buffer[0]); | |
127 } | |
128 } | |
129 | |
130 static void | |
131 TestCaseUpper(void) { | |
132 static const UChar | |
133 | |
134 beforeUpper[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03,
0xd93f, 0xdfff }, | |
135 upperRoot[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x
46, 0x49, 0xd93f, 0xdfff }, | |
136 upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x
46, 0x49, 0xd93f, 0xdfff }; | |
137 | |
138 UChar buffer[32]; | |
139 int32_t length; | |
140 UErrorCode errorCode; | |
141 | |
142 /* uppercase with root locale and in the same buffer */ | |
143 uprv_memcpy(buffer, beforeUpper, sizeof(beforeUpper)); | |
144 errorCode=U_ZERO_ERROR; | |
145 length=u_strToUpper(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
146 buffer, sizeof(beforeUpper)/U_SIZEOF_UCHAR, | |
147 "", | |
148 &errorCode); | |
149 if( U_FAILURE(errorCode) || | |
150 length!=(sizeof(upperRoot)/U_SIZEOF_UCHAR) || | |
151 uprv_memcmp(upperRoot, buffer, length*U_SIZEOF_UCHAR)!=0 || | |
152 buffer[length]!=0 | |
153 ) { | |
154 log_err("error in u_strToUpper(root locale)=%ld error=%s string matches:
%s\n", | |
155 length, | |
156 u_errorName(errorCode), | |
157 uprv_memcmp(upperRoot, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[l
ength]==0 ? "yes" : "no"); | |
158 } | |
159 | |
160 /* uppercase with turkish locale and separate buffers */ | |
161 buffer[0]=0xabcd; | |
162 errorCode=U_ZERO_ERROR; | |
163 length=u_strToUpper(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
164 beforeUpper, sizeof(beforeUpper)/U_SIZEOF_UCHAR, | |
165 "tr", | |
166 &errorCode); | |
167 if( U_FAILURE(errorCode) || | |
168 length!=(sizeof(upperTurkish)/U_SIZEOF_UCHAR) || | |
169 uprv_memcmp(upperTurkish, buffer, length*U_SIZEOF_UCHAR)!=0 || | |
170 buffer[length]!=0 | |
171 ) { | |
172 log_err("error in u_strToUpper(turkish locale)=%ld error=%s string match
es: %s\n", | |
173 length, | |
174 u_errorName(errorCode), | |
175 uprv_memcmp(upperTurkish, buffer, length*U_SIZEOF_UCHAR)==0 && buffe
r[length]==0 ? "yes" : "no"); | |
176 } | |
177 | |
178 /* test preflighting */ | |
179 errorCode=U_ZERO_ERROR; | |
180 length=u_strToUpper(NULL, 0, | |
181 beforeUpper, sizeof(beforeUpper)/U_SIZEOF_UCHAR, | |
182 "tr", | |
183 &errorCode); | |
184 if( errorCode!=U_BUFFER_OVERFLOW_ERROR || | |
185 length!=(sizeof(upperTurkish)/U_SIZEOF_UCHAR) | |
186 ) { | |
187 log_err("error in u_strToUpper(turkish locale pure preflighting)=%ld err
or=%s\n", | |
188 length, | |
189 u_errorName(errorCode)); | |
190 } | |
191 | |
192 /* test error handling */ | |
193 buffer[0]=0xabcd; | |
194 errorCode=U_ZERO_ERROR; | |
195 length=u_strToUpper(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
196 NULL, sizeof(beforeUpper)/U_SIZEOF_UCHAR, | |
197 "tr", | |
198 &errorCode); | |
199 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR || | |
200 buffer[0]!=0xabcd | |
201 ) { | |
202 log_err("error in u_strToUpper(turkish locale src=NULL)=%ld error=%s buf
fer[0]==0x%lx\n", | |
203 length, | |
204 u_errorName(errorCode), | |
205 buffer[0]); | |
206 } | |
207 | |
208 buffer[0]=0xabcd; | |
209 errorCode=U_ZERO_ERROR; | |
210 length=u_strToUpper(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
211 beforeUpper, -2, | |
212 "tr", | |
213 &errorCode); | |
214 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR || | |
215 buffer[0]!=0xabcd | |
216 ) { | |
217 log_err("error in u_strToUpper(turkish locale srcLength=-2)=%ld error=%s
buffer[0]==0x%lx\n", | |
218 length, | |
219 u_errorName(errorCode), | |
220 buffer[0]); | |
221 } | |
222 } | |
223 | |
224 #if !UCONFIG_NO_BREAK_ITERATION | |
225 | |
226 static void | |
227 TestCaseTitle(void) { | |
228 static const UChar | |
229 | |
230 beforeTitle[]= { 0x61, 0x42, 0x20, 0x69, 0x3c2, 0x20, 0xdf, 0x3c3, 0x
2f, 0xfb03, 0xd93f, 0xdfff }, | |
231 titleWord[]= { 0x41, 0x62, 0x20, 0x49, 0x3c2, 0x20, 0x53, 0x73, 0x3c3, 0x
2f, 0x46, 0x66, 0x69, 0xd93f, 0xdfff }, | |
232 titleChar[]= { 0x41, 0x42, 0x20, 0x49, 0x3a3, 0x20, 0x53, 0x73, 0x3a3, 0x
2f, 0x46, 0x66, 0x69, 0xd93f, 0xdfff }; | |
233 | |
234 UChar buffer[32]; | |
235 UBreakIterator *titleIterChars; | |
236 int32_t length; | |
237 UErrorCode errorCode; | |
238 | |
239 errorCode=U_ZERO_ERROR; | |
240 titleIterChars=ubrk_open(UBRK_CHARACTER, "", beforeTitle, sizeof(beforeTitle
)/U_SIZEOF_UCHAR, &errorCode); | |
241 if(U_FAILURE(errorCode)) { | |
242 log_err_status(errorCode, "error: ubrk_open(UBRK_CHARACTER)->%s\n", u_er
rorName(errorCode)); | |
243 return; | |
244 } | |
245 | |
246 /* titlecase with standard break iterator and in the same buffer */ | |
247 uprv_memcpy(buffer, beforeTitle, sizeof(beforeTitle)); | |
248 errorCode=U_ZERO_ERROR; | |
249 length=u_strToTitle(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
250 buffer, sizeof(beforeTitle)/U_SIZEOF_UCHAR, | |
251 NULL, "", | |
252 &errorCode); | |
253 if( U_FAILURE(errorCode) || | |
254 length!=(sizeof(titleWord)/U_SIZEOF_UCHAR) || | |
255 uprv_memcmp(titleWord, buffer, length*U_SIZEOF_UCHAR)!=0 || | |
256 buffer[length]!=0 | |
257 ) { | |
258 log_err("error in u_strToTitle(standard iterator)=%ld error=%s string ma
tches: %s\n", | |
259 length, | |
260 u_errorName(errorCode), | |
261 uprv_memcmp(titleWord, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[l
ength]==0 ? "yes" : "no"); | |
262 } | |
263 | |
264 /* titlecase with UBRK_CHARACTERS and separate buffers */ | |
265 buffer[0]=0xabcd; | |
266 errorCode=U_ZERO_ERROR; | |
267 length=u_strToTitle(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
268 beforeTitle, sizeof(beforeTitle)/U_SIZEOF_UCHAR, | |
269 titleIterChars, "", | |
270 &errorCode); | |
271 if( U_FAILURE(errorCode) || | |
272 length!=(sizeof(titleChar)/U_SIZEOF_UCHAR) || | |
273 uprv_memcmp(titleChar, buffer, length*U_SIZEOF_UCHAR)!=0 || | |
274 buffer[length]!=0 | |
275 ) { | |
276 log_err("error in u_strToTitle(UBRK_CHARACTERS)=%ld error=%s string matc
hes: %s\n", | |
277 length, | |
278 u_errorName(errorCode), | |
279 uprv_memcmp(titleChar, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[l
ength]==0 ? "yes" : "no"); | |
280 } | |
281 | |
282 /* test preflighting */ | |
283 errorCode=U_ZERO_ERROR; | |
284 length=u_strToTitle(NULL, 0, | |
285 beforeTitle, sizeof(beforeTitle)/U_SIZEOF_UCHAR, | |
286 titleIterChars, "", | |
287 &errorCode); | |
288 if( errorCode!=U_BUFFER_OVERFLOW_ERROR || | |
289 length!=(sizeof(titleChar)/U_SIZEOF_UCHAR) | |
290 ) { | |
291 log_err("error in u_strToTitle(UBRK_CHARACTERS pure preflighting)=%ld er
ror=%s\n", | |
292 length, | |
293 u_errorName(errorCode)); | |
294 } | |
295 | |
296 /* test error handling */ | |
297 buffer[0]=0xabcd; | |
298 errorCode=U_ZERO_ERROR; | |
299 length=u_strToTitle(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
300 NULL, sizeof(beforeTitle)/U_SIZEOF_UCHAR, | |
301 titleIterChars, "", | |
302 &errorCode); | |
303 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR || | |
304 buffer[0]!=0xabcd | |
305 ) { | |
306 log_err("error in u_strToTitle(UBRK_CHARACTERS src=NULL)=%ld error=%s bu
ffer[0]==0x%lx\n", | |
307 length, | |
308 u_errorName(errorCode), | |
309 buffer[0]); | |
310 } | |
311 | |
312 buffer[0]=0xabcd; | |
313 errorCode=U_ZERO_ERROR; | |
314 length=u_strToTitle(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
315 beforeTitle, -2, | |
316 titleIterChars, "", | |
317 &errorCode); | |
318 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR || | |
319 buffer[0]!=0xabcd | |
320 ) { | |
321 log_err("error in u_strToTitle(UBRK_CHARACTERS srcLength=-2)=%ld error=%
s buffer[0]==0x%lx\n", | |
322 length, | |
323 u_errorName(errorCode), | |
324 buffer[0]); | |
325 } | |
326 | |
327 ubrk_close(titleIterChars); | |
328 } | |
329 | |
330 static void | |
331 TestCaseDutchTitle(void) { | |
332 static const UChar | |
333 | |
334 beforeTitle[]= { 0x69, 0x6A, 0x73, 0x73, 0x45, 0x6c, 0x20, 0x69, 0x67, 0x6c
, 0x4f, 0x6f , 0x20 , 0x49, 0x4A, 0x53, 0x53, 0x45, 0x4C }, | |
335 titleRoot[]= { 0x49, 0x6A, 0x73, 0x73, 0x65, 0x6c, 0x20, 0x49, 0x67, 0x6c
, 0x6f, 0x6f , 0x20 , 0x49, 0x6A, 0x73, 0x73, 0x65, 0x6C }, | |
336 titleDutch[]= { 0x49, 0x4A, 0x73, 0x73, 0x65, 0x6c, 0x20, 0x49, 0x67, 0x6c
, 0x6f, 0x6f , 0x20 , 0x49, 0x4A, 0x73, 0x73, 0x65, 0x6C }; | |
337 | |
338 UChar buffer[32]; | |
339 UBreakIterator *titleIterWord; | |
340 int32_t length; | |
341 UErrorCode errorCode; | |
342 | |
343 errorCode=U_ZERO_ERROR; | |
344 titleIterWord=ubrk_open(UBRK_WORD, "", beforeTitle, sizeof(beforeTitle)/U_SI
ZEOF_UCHAR, &errorCode); | |
345 if(U_FAILURE(errorCode)) { | |
346 log_err_status(errorCode, "error: ubrk_open(UBRK_WORD)->%s\n", u_errorNa
me(errorCode)); | |
347 return; | |
348 } | |
349 | |
350 /* titlecase with default locale */ | |
351 buffer[0]=0xabcd; | |
352 errorCode=U_ZERO_ERROR; | |
353 length=u_strToTitle(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
354 beforeTitle, sizeof(beforeTitle)/U_SIZEOF_UCHAR, | |
355 titleIterWord, "", | |
356 &errorCode); | |
357 if( U_FAILURE(errorCode) || | |
358 length!=(sizeof(titleRoot)/U_SIZEOF_UCHAR) || | |
359 uprv_memcmp(titleRoot, buffer, length*U_SIZEOF_UCHAR)!=0 || | |
360 buffer[length]!=0 | |
361 ) { | |
362 char charsOut[21]; | |
363 u_UCharsToChars(buffer,charsOut,sizeof(charsOut)); | |
364 log_err("error in u_strToTitle(UBRK_CHARACTERS)=%ld error=%s root locale
string matches: %s\noutput buffer is {%s}\n", | |
365 length, | |
366 u_errorName(errorCode), | |
367 uprv_memcmp(titleRoot, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[l
ength]==0 ? "yes" : "no", charsOut); | |
368 } | |
369 /* titlecase with Dutch locale */ | |
370 buffer[0]=0xabcd; | |
371 errorCode=U_ZERO_ERROR; | |
372 length=u_strToTitle(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
373 beforeTitle, sizeof(beforeTitle)/U_SIZEOF_UCHAR, | |
374 titleIterWord, "nl", | |
375 &errorCode); | |
376 if( U_FAILURE(errorCode) || | |
377 length!=(sizeof(titleDutch)/U_SIZEOF_UCHAR) || | |
378 uprv_memcmp(titleDutch, buffer, length*U_SIZEOF_UCHAR)!=0 || | |
379 buffer[length]!=0 | |
380 ) { | |
381 char charsOut[21]; | |
382 u_UCharsToChars(buffer,charsOut,sizeof(charsOut)); | |
383 log_err("error in u_strToTitle(UBRK_CHARACTERS)=%ld error=%s dutch local
e string matches: %s\noutput buffer is {%s}\n", | |
384 length, | |
385 u_errorName(errorCode), | |
386 uprv_memcmp(titleDutch, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[
length]==0 ? "yes" : "no", charsOut); | |
387 } | |
388 | |
389 ubrk_close(titleIterWord); | |
390 } | |
391 | |
392 #endif | |
393 | |
394 /* test case folding and case-insensitive string compare -------------------- */ | |
395 | |
396 static void | |
397 TestCaseFolding(void) { | |
398 /* | |
399 * CaseFolding.txt says about i and its cousins: | |
400 * 0049; C; 0069; # LATIN CAPITAL LETTER I | |
401 * 0049; T; 0131; # LATIN CAPITAL LETTER I | |
402 * | |
403 * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE | |
404 * 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE | |
405 * That's all. | |
406 * See CaseFolding.txt and the Unicode Standard for how to apply the case fo
ldings. | |
407 */ | |
408 static const UChar32 | |
409 simple[]={ | |
410 /* input, default, exclude special i */ | |
411 0x61, 0x61, 0x61, | |
412 0x49, 0x69, 0x131, | |
413 0x130, 0x130, 0x69, | |
414 0x131, 0x131, 0x131, | |
415 0xdf, 0xdf, 0xdf, | |
416 0xfb03, 0xfb03, 0xfb03, | |
417 0x1040e,0x10436,0x10436, | |
418 0x5ffff,0x5ffff,0x5ffff | |
419 }; | |
420 | |
421 static const UChar | |
422 mixed[]= { 0x61, 0x42, 0x130, 0x49, 0x131, 0x3d0, 0xdf
, 0xfb03, 0xd93f, 0xdfff }, | |
423 foldedDefault[]= { 0x61, 0x62, 0x69, 0x307, 0x69, 0x131, 0x3b2, 0x73
, 0x73, 0x66, 0x66, 0x69, 0xd93f, 0xdfff }, | |
424 foldedExcludeSpecialI[]={ 0x61, 0x62, 0x69, 0x131, 0x131, 0x3b2, 0x73
, 0x73, 0x66, 0x66, 0x69, 0xd93f, 0xdfff }; | |
425 | |
426 UVersionInfo unicodeVersion={ 0, 0, 17, 89 }, unicode_3_1={ 3, 1, 0, 0 }; | |
427 | |
428 const UChar32 *p; | |
429 int32_t i; | |
430 | |
431 UChar buffer[32]; | |
432 int32_t length; | |
433 UErrorCode errorCode; | |
434 UBool isUnicode_3_1; | |
435 | |
436 /* if unicodeVersion()>=3.1 then test exclude-special-i cases as well */ | |
437 u_getUnicodeVersion(unicodeVersion); | |
438 isUnicode_3_1= uprv_memcmp(unicodeVersion, unicode_3_1, 4)>=0; | |
439 | |
440 /* test simple case folding */ | |
441 p=simple; | |
442 for(i=0; i<sizeof(simple)/12; p+=3, ++i) { | |
443 if(u_foldCase(p[0], U_FOLD_CASE_DEFAULT)!=p[1]) { | |
444 log_err("error: u_foldCase(0x%04lx, default)=0x%04lx instead of 0x%0
4lx\n", | |
445 p[0], u_foldCase(p[0], U_FOLD_CASE_DEFAULT), p[1]); | |
446 return; | |
447 } | |
448 | |
449 if(isUnicode_3_1 && u_foldCase(p[0], U_FOLD_CASE_EXCLUDE_SPECIAL_I)!=p[2
]) { | |
450 log_err("error: u_foldCase(0x%04lx, exclude special i)=0x%04lx inste
ad of 0x%04lx\n", | |
451 p[0], u_foldCase(p[0], U_FOLD_CASE_EXCLUDE_SPECIAL_I), p[2])
; | |
452 return; | |
453 } | |
454 } | |
455 | |
456 /* test full string case folding with default option and separate buffers */ | |
457 buffer[0]=0xabcd; | |
458 errorCode=U_ZERO_ERROR; | |
459 length=u_strFoldCase(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
460 mixed, sizeof(mixed)/U_SIZEOF_UCHAR, | |
461 U_FOLD_CASE_DEFAULT, | |
462 &errorCode); | |
463 if( U_FAILURE(errorCode) || | |
464 length!=(sizeof(foldedDefault)/U_SIZEOF_UCHAR) || | |
465 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)!=0 || | |
466 buffer[length]!=0 | |
467 ) { | |
468 log_err("error in u_strFoldCase(default)=%ld error=%s string matches: %s
\n", | |
469 length, | |
470 u_errorName(errorCode), | |
471 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)==0 && buff
er[length]==0 ? "yes" : "no"); | |
472 } | |
473 | |
474 /* exclude special i */ | |
475 if(isUnicode_3_1) { | |
476 buffer[0]=0xabcd; | |
477 errorCode=U_ZERO_ERROR; | |
478 length=u_strFoldCase(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
479 mixed, sizeof(mixed)/U_SIZEOF_UCHAR, | |
480 U_FOLD_CASE_EXCLUDE_SPECIAL_I, | |
481 &errorCode); | |
482 if( U_FAILURE(errorCode) || | |
483 length!=(sizeof(foldedExcludeSpecialI)/U_SIZEOF_UCHAR) || | |
484 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)!=0
|| | |
485 buffer[length]!=0 | |
486 ) { | |
487 log_err("error in u_strFoldCase(exclude special i)=%ld error=%s stri
ng matches: %s\n", | |
488 length, | |
489 u_errorName(errorCode), | |
490 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR
)==0 && buffer[length]==0 ? "yes" : "no"); | |
491 } | |
492 } | |
493 | |
494 /* test full string case folding with default option and in the same buffer
*/ | |
495 uprv_memcpy(buffer, mixed, sizeof(mixed)); | |
496 buffer[sizeof(mixed)/U_SIZEOF_UCHAR]=0; | |
497 errorCode=U_ZERO_ERROR; | |
498 length=u_strFoldCase(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
499 buffer, -1, /* implicit srcLength */ | |
500 U_FOLD_CASE_DEFAULT, | |
501 &errorCode); | |
502 if( U_FAILURE(errorCode) || | |
503 length!=(sizeof(foldedDefault)/U_SIZEOF_UCHAR) || | |
504 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)!=0 || | |
505 buffer[length]!=0 | |
506 ) { | |
507 log_err("error in u_strFoldCase(default same buffer)=%ld error=%s string
matches: %s\n", | |
508 length, | |
509 u_errorName(errorCode), | |
510 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)==0 && buff
er[length]==0 ? "yes" : "no"); | |
511 } | |
512 | |
513 /* test full string case folding, exclude special i, in the same buffer */ | |
514 if(isUnicode_3_1) { | |
515 uprv_memcpy(buffer, mixed, sizeof(mixed)); | |
516 errorCode=U_ZERO_ERROR; | |
517 length=u_strFoldCase(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
518 buffer, sizeof(mixed)/U_SIZEOF_UCHAR, | |
519 U_FOLD_CASE_EXCLUDE_SPECIAL_I, | |
520 &errorCode); | |
521 if( U_FAILURE(errorCode) || | |
522 length!=(sizeof(foldedExcludeSpecialI)/U_SIZEOF_UCHAR) || | |
523 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)!=0
|| | |
524 buffer[length]!=0 | |
525 ) { | |
526 log_err("error in u_strFoldCase(exclude special i same buffer)=%ld e
rror=%s string matches: %s\n", | |
527 length, | |
528 u_errorName(errorCode), | |
529 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR
)==0 && buffer[length]==0 ? "yes" : "no"); | |
530 } | |
531 } | |
532 | |
533 /* test preflighting */ | |
534 buffer[0]=buffer[2]=0xabcd; | |
535 errorCode=U_ZERO_ERROR; | |
536 length=u_strFoldCase(buffer, 2, /* set destCapacity=2 */ | |
537 mixed, sizeof(mixed)/U_SIZEOF_UCHAR, | |
538 U_FOLD_CASE_DEFAULT, | |
539 &errorCode); | |
540 if( errorCode!=U_BUFFER_OVERFLOW_ERROR || | |
541 length!=(sizeof(foldedDefault)/U_SIZEOF_UCHAR) || | |
542 uprv_memcmp(foldedDefault, buffer, 2*U_SIZEOF_UCHAR)!=0 || | |
543 buffer[2]!=0xabcd | |
544 ) { | |
545 log_err("error in u_strFoldCase(default preflighting)=%ld error=%s strin
g matches: %s\n", | |
546 length, | |
547 u_errorName(errorCode), | |
548 uprv_memcmp(foldedDefault, buffer, 2*U_SIZEOF_UCHAR)==0 && buffer[2]
==0xabcd ? "yes" : "no"); | |
549 } | |
550 | |
551 errorCode=U_ZERO_ERROR; | |
552 length=u_strFoldCase(NULL, 0, | |
553 mixed, sizeof(mixed)/U_SIZEOF_UCHAR, | |
554 U_FOLD_CASE_DEFAULT, | |
555 &errorCode); | |
556 if( errorCode!=U_BUFFER_OVERFLOW_ERROR || | |
557 length!=(sizeof(foldedDefault)/U_SIZEOF_UCHAR) | |
558 ) { | |
559 log_err("error in u_strFoldCase(default pure preflighting)=%ld error=%s\
n", | |
560 length, | |
561 u_errorName(errorCode)); | |
562 } | |
563 | |
564 /* test error handling */ | |
565 errorCode=U_ZERO_ERROR; | |
566 length=u_strFoldCase(NULL, sizeof(buffer)/U_SIZEOF_UCHAR, | |
567 mixed, sizeof(mixed)/U_SIZEOF_UCHAR, | |
568 U_FOLD_CASE_DEFAULT, | |
569 &errorCode); | |
570 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
571 log_err("error in u_strFoldCase(default dest=NULL)=%ld error=%s\n", | |
572 length, | |
573 u_errorName(errorCode)); | |
574 } | |
575 | |
576 buffer[0]=0xabcd; | |
577 errorCode=U_ZERO_ERROR; | |
578 length=u_strFoldCase(buffer, -1, | |
579 mixed, sizeof(mixed)/U_SIZEOF_UCHAR, | |
580 U_FOLD_CASE_DEFAULT, | |
581 &errorCode); | |
582 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR || | |
583 buffer[0]!=0xabcd | |
584 ) { | |
585 log_err("error in u_strFoldCase(default destCapacity=-1)=%ld error=%s bu
ffer[0]==0x%lx\n", | |
586 length, | |
587 u_errorName(errorCode), | |
588 buffer[0]); | |
589 } | |
590 | |
591 buffer[0]=0xabcd; | |
592 errorCode=U_ZERO_ERROR; | |
593 length=u_strFoldCase(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
594 NULL, sizeof(mixed)/U_SIZEOF_UCHAR, | |
595 U_FOLD_CASE_EXCLUDE_SPECIAL_I, | |
596 &errorCode); | |
597 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR || | |
598 buffer[0]!=0xabcd | |
599 ) { | |
600 log_err("error in u_strFoldCase(exclude special i src=NULL)=%ld error=%s
buffer[0]==0x%lx\n", | |
601 length, | |
602 u_errorName(errorCode), | |
603 buffer[0]); | |
604 } | |
605 | |
606 buffer[0]=0xabcd; | |
607 errorCode=U_ZERO_ERROR; | |
608 length=u_strFoldCase(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
609 mixed, -2, | |
610 U_FOLD_CASE_EXCLUDE_SPECIAL_I, | |
611 &errorCode); | |
612 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR || | |
613 buffer[0]!=0xabcd | |
614 ) { | |
615 log_err("error in u_strFoldCase(exclude special i srcLength=-2)=%ld erro
r=%s buffer[0]==0x%lx\n", | |
616 length, | |
617 u_errorName(errorCode), | |
618 buffer[0]); | |
619 } | |
620 } | |
621 | |
622 static void | |
623 TestCaseCompare(void) { | |
624 static const UChar | |
625 | |
626 mixed[]= { 0x61, 0x42, 0x131, 0x3a3, 0xdf, 0xfb03,
0xd93f, 0xdfff, 0 }, | |
627 otherDefault[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x46, 0x66, 0
x49, 0xd93f, 0xdfff, 0 }, | |
628 otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x66, 0x46, 0
x69, 0xd93f, 0xdfff, 0 }, | |
629 different[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x46, 0x66, 0
x49, 0xd93f, 0xdffd, 0 }; | |
630 | |
631 UVersionInfo unicodeVersion={ 0, 0, 17, 89 }, unicode_3_1={ 3, 1, 0, 0 }; | |
632 | |
633 int32_t result, lenMixed, lenOtherDefault, lenOtherExcludeSpecialI, lenDiffe
rent; | |
634 UErrorCode errorCode; | |
635 UBool isUnicode_3_1; | |
636 | |
637 errorCode=U_ZERO_ERROR; | |
638 | |
639 lenMixed=u_strlen(mixed); | |
640 lenOtherDefault=u_strlen(otherDefault); | |
641 (void)lenOtherDefault; /* Suppress set but not used warning. */ | |
642 lenOtherExcludeSpecialI=u_strlen(otherExcludeSpecialI); | |
643 lenDifferent=u_strlen(different); | |
644 | |
645 /* if unicodeVersion()>=3.1 then test exclude-special-i cases as well */ | |
646 u_getUnicodeVersion(unicodeVersion); | |
647 isUnicode_3_1= uprv_memcmp(unicodeVersion, unicode_3_1, 4)>=0; | |
648 (void)isUnicode_3_1; /* Suppress set but not used warning. */ | |
649 | |
650 /* test u_strcasecmp() */ | |
651 result=u_strcasecmp(mixed, otherDefault, U_FOLD_CASE_DEFAULT); | |
652 if(result!=0) { | |
653 log_err("error: u_strcasecmp(mixed, other, default)=%ld instead of 0\n",
result); | |
654 } | |
655 result=u_strCaseCompare(mixed, -1, otherDefault, -1, U_FOLD_CASE_DEFAULT, &e
rrorCode); | |
656 if(result!=0) { | |
657 log_err("error: u_strCaseCompare(mixed, other, default)=%ld instead of 0
\n", result); | |
658 } | |
659 | |
660 /* test u_strcasecmp() - exclude special i */ | |
661 result=u_strcasecmp(mixed, otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL
_I); | |
662 if(result!=0) { | |
663 log_err("error: u_strcasecmp(mixed, other, exclude special i)=%ld instea
d of 0\n", result); | |
664 } | |
665 result=u_strCaseCompare(mixed, lenMixed, otherExcludeSpecialI, lenOtherExclu
deSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode); | |
666 if(result!=0) { | |
667 log_err("error: u_strCaseCompare(mixed, other, exclude special i)=%ld in
stead of 0\n", result); | |
668 } | |
669 | |
670 /* test u_strcasecmp() */ | |
671 result=u_strcasecmp(mixed, different, U_FOLD_CASE_DEFAULT); | |
672 if(result<=0) { | |
673 log_err("error: u_strcasecmp(mixed, different, default)=%ld instead of p
ositive\n", result); | |
674 } | |
675 result=u_strCaseCompare(mixed, -1, different, lenDifferent, U_FOLD_CASE_DEFA
ULT, &errorCode); | |
676 if(result<=0) { | |
677 log_err("error: u_strCaseCompare(mixed, different, default)=%ld instead
of positive\n", result); | |
678 } | |
679 | |
680 /* test u_strncasecmp() - stop before the sharp s (U+00df) */ | |
681 result=u_strncasecmp(mixed, different, 4, U_FOLD_CASE_DEFAULT); | |
682 if(result!=0) { | |
683 log_err("error: u_strncasecmp(mixed, different, 4, default)=%ld instead
of 0\n", result); | |
684 } | |
685 result=u_strCaseCompare(mixed, 4, different, 4, U_FOLD_CASE_DEFAULT, &errorC
ode); | |
686 if(result!=0) { | |
687 log_err("error: u_strCaseCompare(mixed, 4, different, 4, default)=%ld in
stead of 0\n", result); | |
688 } | |
689 | |
690 /* test u_strncasecmp() - stop in the middle of the sharp s (U+00df) */ | |
691 result=u_strncasecmp(mixed, different, 5, U_FOLD_CASE_DEFAULT); | |
692 if(result<=0) { | |
693 log_err("error: u_strncasecmp(mixed, different, 5, default)=%ld instead
of positive\n", result); | |
694 } | |
695 result=u_strCaseCompare(mixed, 5, different, 5, U_FOLD_CASE_DEFAULT, &errorC
ode); | |
696 if(result<=0) { | |
697 log_err("error: u_strCaseCompare(mixed, 5, different, 5, default)=%ld in
stead of positive\n", result); | |
698 } | |
699 | |
700 /* test u_memcasecmp() - stop before the sharp s (U+00df) */ | |
701 result=u_memcasecmp(mixed, different, 4, U_FOLD_CASE_DEFAULT); | |
702 if(result!=0) { | |
703 log_err("error: u_memcasecmp(mixed, different, 4, default)=%ld instead o
f 0\n", result); | |
704 } | |
705 | |
706 /* test u_memcasecmp() - stop in the middle of the sharp s (U+00df) */ | |
707 result=u_memcasecmp(mixed, different, 5, U_FOLD_CASE_DEFAULT); | |
708 if(result<=0) { | |
709 log_err("error: u_memcasecmp(mixed, different, 5, default)=%ld instead o
f positive\n", result); | |
710 } | |
711 } | |
712 | |
713 /* test UCaseMap ------------------------------------------------------------ */ | |
714 | |
715 /* | |
716 * API test for UCaseMap; | |
717 * test cases for actual case mappings using UCaseMap see | |
718 * intltest utility/UnicodeStringTest/StringCaseTest/TestCasing | |
719 */ | |
720 static void | |
721 TestUCaseMap(void) { | |
722 static const char | |
723 aBc[] ={ 0x61, 0x42, 0x63, 0 }, | |
724 abc[] ={ 0x61, 0x62, 0x63, 0 }, | |
725 ABCg[]={ 0x41, 0x42, 0x43, 0x67, 0 }, | |
726 defg[]={ 0x64, 0x65, 0x66, 0x67, 0 }; | |
727 char utf8Out[8]; | |
728 | |
729 UCaseMap *csm; | |
730 const char *locale; | |
731 uint32_t options; | |
732 int32_t length; | |
733 UErrorCode errorCode; | |
734 | |
735 errorCode=U_ZERO_ERROR; | |
736 csm=ucasemap_open("tur", 0xa5, &errorCode); | |
737 if(U_FAILURE(errorCode)) { | |
738 log_err("ucasemap_open(\"tur\") failed - %s\n", u_errorName(errorCode)); | |
739 return; | |
740 } | |
741 locale=ucasemap_getLocale(csm); | |
742 if(0!=strcmp(locale, "tr")) { | |
743 log_err("ucasemap_getLocale(ucasemap_open(\"tur\"))==%s!=\"tr\"\n", loca
le); | |
744 } | |
745 /* overly long locale IDs get truncated to their language code to avoid unne
cessary allocation */ | |
746 ucasemap_setLocale(csm, "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-d
og", &errorCode); | |
747 locale=ucasemap_getLocale(csm); | |
748 if(0!=strcmp(locale, "i-klingon")) { | |
749 log_err("ucasemap_getLocale(ucasemap_setLocale(\"I-kLInGOn-the-quick-br.
..\"))==%s!=\"i-klingon\"\n", locale); | |
750 } | |
751 | |
752 errorCode=U_ZERO_ERROR; | |
753 options=ucasemap_getOptions(csm); | |
754 if(options!=0xa5) { | |
755 log_err("ucasemap_getOptions(ucasemap_open(0xa5))==0x%lx!=0xa5\n", (long
)options); | |
756 } | |
757 ucasemap_setOptions(csm, 0x333333, &errorCode); | |
758 options=ucasemap_getOptions(csm); | |
759 if(options!=0x333333) { | |
760 log_err("ucasemap_getOptions(ucasemap_setOptions(0x333333))==0x%lx!=0x33
3333\n", (long)options); | |
761 } | |
762 | |
763 /* test case mapping API; not all permutations necessary due to shared imple
mentation code */ | |
764 | |
765 /* NUL terminated source */ | |
766 errorCode=U_ZERO_ERROR; | |
767 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, -1,
&errorCode); | |
768 if(U_FAILURE(errorCode) || length!=3 || 0!=strcmp(abc, utf8Out)) { | |
769 log_err("ucasemap_utf8ToLower(aBc\\0) failed\n"); | |
770 } | |
771 | |
772 /* incoming failure code */ | |
773 errorCode=U_PARSE_ERROR; | |
774 strcpy(utf8Out, defg); | |
775 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, -1,
&errorCode); | |
776 if(errorCode!=U_PARSE_ERROR || 0!=strcmp(defg, utf8Out)) { | |
777 log_err("ucasemap_utf8ToLower(failure) failed\n"); | |
778 } | |
779 | |
780 /* overlapping input & output */ | |
781 errorCode=U_ZERO_ERROR; | |
782 strcpy(utf8Out, aBc); | |
783 length=ucasemap_utf8ToUpper(csm, utf8Out, 2, utf8Out+1, 2, &errorCode); | |
784 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(aBc, utf8Out)) { | |
785 log_err("ucasemap_utf8ToUpper(overlap 1) failed\n"); | |
786 } | |
787 | |
788 /* overlap in the other direction */ | |
789 errorCode=U_ZERO_ERROR; | |
790 strcpy(utf8Out, aBc); | |
791 length=ucasemap_utf8ToUpper(csm, utf8Out+1, 2, utf8Out, 2, &errorCode); | |
792 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(aBc, utf8Out)) { | |
793 log_err("ucasemap_utf8ToUpper(overlap 2) failed\n"); | |
794 } | |
795 | |
796 /* NULL destination */ | |
797 errorCode=U_ZERO_ERROR; | |
798 strcpy(utf8Out, defg); | |
799 length=ucasemap_utf8ToLower(csm, NULL, (int32_t)sizeof(utf8Out), aBc, -1, &e
rrorCode); | |
800 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) { | |
801 log_err("ucasemap_utf8ToLower(dest=NULL) failed\n"); | |
802 } | |
803 | |
804 /* destCapacity<0 */ | |
805 errorCode=U_ZERO_ERROR; | |
806 strcpy(utf8Out, defg); | |
807 length=ucasemap_utf8ToLower(csm, utf8Out, -2, aBc, -1, &errorCode); | |
808 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) { | |
809 log_err("ucasemap_utf8ToLower(destCapacity<0) failed\n"); | |
810 } | |
811 | |
812 /* NULL source */ | |
813 errorCode=U_ZERO_ERROR; | |
814 strcpy(utf8Out, defg); | |
815 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), NULL, -1
, &errorCode); | |
816 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) { | |
817 log_err("ucasemap_utf8ToLower(src=NULL) failed\n"); | |
818 } | |
819 | |
820 /* srcLength<-1 */ | |
821 errorCode=U_ZERO_ERROR; | |
822 strcpy(utf8Out, defg); | |
823 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, -2,
&errorCode); | |
824 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) { | |
825 log_err("ucasemap_utf8ToLower(srcLength<-1) failed\n"); | |
826 } | |
827 | |
828 /* buffer overflow */ | |
829 errorCode=U_ZERO_ERROR; | |
830 strcpy(utf8Out, defg); | |
831 length=ucasemap_utf8ToUpper(csm, utf8Out, 2, aBc, 3, &errorCode); | |
832 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3 || 0!=strcmp(defg+2, utf8
Out+2)) { | |
833 log_err("ucasemap_utf8ToUpper(overflow) failed\n"); | |
834 } | |
835 | |
836 /* dest not terminated (leaves g from defg alone) */ | |
837 errorCode=U_ZERO_ERROR; | |
838 strcpy(utf8Out, defg); | |
839 length=ucasemap_utf8ToUpper(csm, utf8Out, 3, aBc, 3, &errorCode); | |
840 if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=3 || 0!=strcmp(ABCg
, utf8Out)) { | |
841 log_err("ucasemap_utf8ToUpper(overflow) failed\n"); | |
842 } | |
843 | |
844 /* C API coverage for case folding. More thorough test via C++ intltest's St
ringCaseTest::TestCasing(). */ | |
845 errorCode=U_ZERO_ERROR; | |
846 utf8Out[0]=0; | |
847 length=ucasemap_utf8FoldCase(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, 3,
&errorCode); | |
848 if(U_FAILURE(errorCode) || length!=3 || 0!=strcmp(abc, utf8Out)) { | |
849 log_err("ucasemap_utf8FoldCase(aBc) failed\n"); | |
850 } | |
851 | |
852 ucasemap_close(csm); | |
853 } | |
854 | |
855 #if !UCONFIG_NO_BREAK_ITERATION | |
856 | |
857 /* Try titlecasing with options. */ | |
858 static void | |
859 TestUCaseMapToTitle(void) { | |
860 /* "a 'CaT. A 'dOg! 'eTc." where '=U+02BB */ | |
861 /* | |
862 * Note: The sentence BreakIterator does not recognize a '.' | |
863 * as a sentence terminator if it is followed by lowercase. | |
864 * That is why the example has the '!'. | |
865 */ | |
866 static const UChar | |
867 | |
868 beforeTitle[]= { 0x61, 0x20, 0x2bb, 0x43, 0x61, 0x54, 0x2e, 0x20, 0x41,
0x20, 0x2bb, 0x64, 0x4f, 0x67, 0x21, 0x20, 0x2bb, 0x65, 0x54, 0x63, 0x2e }, | |
869 titleWord[]= { 0x41, 0x20, 0x2bb, 0x43, 0x61, 0x74, 0x2e, 0x20, 0x41,
0x20, 0x2bb, 0x44, 0x6f, 0x67, 0x21, 0x20, 0x2bb, 0x45, 0x74, 0x63, 0x2e }, | |
870 titleWordNoAdjust[]={ 0x41, 0x20, 0x2bb, 0x63, 0x61, 0x74, 0x2e, 0x20, 0x41,
0x20, 0x2bb, 0x64, 0x6f, 0x67, 0x21, 0x20, 0x2bb, 0x65, 0x74, 0x63, 0x2e }, | |
871 titleSentNoLower[]= { 0x41, 0x20, 0x2bb, 0x43, 0x61, 0x54, 0x2e, 0x20, 0x41,
0x20, 0x2bb, 0x64, 0x4f, 0x67, 0x21, 0x20, 0x2bb, 0x45, 0x54, 0x63, 0x2e }; | |
872 | |
873 UChar buffer[32]; | |
874 UCaseMap *csm; | |
875 UBreakIterator *sentenceIter; | |
876 const UBreakIterator *iter; | |
877 int32_t length; | |
878 UErrorCode errorCode; | |
879 | |
880 errorCode=U_ZERO_ERROR; | |
881 csm=ucasemap_open("", 0, &errorCode); | |
882 if(U_FAILURE(errorCode)) { | |
883 log_err("ucasemap_open(\"\") failed - %s\n", u_errorName(errorCode)); | |
884 return; | |
885 } | |
886 | |
887 iter=ucasemap_getBreakIterator(csm); | |
888 if(iter!=NULL) { | |
889 log_err("ucasemap_getBreakIterator() returns %p!=NULL before setting any
iterator or titlecasing\n", iter); | |
890 } | |
891 | |
892 /* Use default UBreakIterator: Word breaks. */ | |
893 length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPR
V_LENGTHOF(beforeTitle), &errorCode); | |
894 if( U_FAILURE(errorCode) || | |
895 length!=UPRV_LENGTHOF(titleWord) || | |
896 0!=u_memcmp(buffer, titleWord, length) || | |
897 buffer[length]!=0 | |
898 ) { | |
899 log_err_status(errorCode, "ucasemap_toTitle(default iterator)=%ld failed
- %s\n", (long)length, u_errorName(errorCode)); | |
900 } | |
901 if (U_SUCCESS(errorCode)) { | |
902 iter=ucasemap_getBreakIterator(csm); | |
903 if(iter==NULL) { | |
904 log_err("ucasemap_getBreakIterator() returns NULL after titlecasing\
n"); | |
905 } | |
906 } | |
907 | |
908 /* Try U_TITLECASE_NO_BREAK_ADJUSTMENT. */ | |
909 ucasemap_setOptions(csm, U_TITLECASE_NO_BREAK_ADJUSTMENT, &errorCode); | |
910 if(U_FAILURE(errorCode)) { | |
911 log_err_status(errorCode, "error: ucasemap_setOptions(U_TITLECASE_NO_BRE
AK_ADJUSTMENT) failed - %s\n", u_errorName(errorCode)); | |
912 return; | |
913 } | |
914 | |
915 length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPR
V_LENGTHOF(beforeTitle), &errorCode); | |
916 if( U_FAILURE(errorCode) || | |
917 length!=UPRV_LENGTHOF(titleWordNoAdjust) || | |
918 0!=u_memcmp(buffer, titleWordNoAdjust, length) || | |
919 buffer[length]!=0 | |
920 ) { | |
921 log_err("ucasemap_toTitle(default iterator, no break adjustment)=%ld fai
led - %s\n", (long)length, u_errorName(errorCode)); | |
922 } | |
923 | |
924 /* Set a sentence break iterator. */ | |
925 errorCode=U_ZERO_ERROR; | |
926 sentenceIter=ubrk_open(UBRK_SENTENCE, "", NULL, 0, &errorCode); | |
927 if(U_FAILURE(errorCode)) { | |
928 log_err("error: ubrk_open(UBRK_SENTENCE) failed - %s\n", u_errorName(err
orCode)); | |
929 ucasemap_close(csm); | |
930 return; | |
931 } | |
932 ucasemap_setBreakIterator(csm, sentenceIter, &errorCode); | |
933 if(U_FAILURE(errorCode)) { | |
934 log_err("error: ucasemap_setBreakIterator(sentence iterator) failed - %s
\n", u_errorName(errorCode)); | |
935 ubrk_close(sentenceIter); | |
936 ucasemap_close(csm); | |
937 return; | |
938 } | |
939 iter=ucasemap_getBreakIterator(csm); | |
940 if(iter!=sentenceIter) { | |
941 log_err("ucasemap_getBreakIterator() returns %p!=%p after setting the it
erator\n", iter, sentenceIter); | |
942 } | |
943 | |
944 ucasemap_setOptions(csm, U_TITLECASE_NO_LOWERCASE, &errorCode); | |
945 if(U_FAILURE(errorCode)) { | |
946 log_err("error: ucasemap_setOptions(U_TITLECASE_NO_LOWERCASE) failed - %
s\n", u_errorName(errorCode)); | |
947 return; | |
948 } | |
949 | |
950 /* Use the sentence break iterator with the option. Preflight first. */ | |
951 length=ucasemap_toTitle(csm, NULL, 0, beforeTitle, UPRV_LENGTHOF(beforeTitle
), &errorCode); | |
952 if( errorCode!=U_BUFFER_OVERFLOW_ERROR || | |
953 length!=UPRV_LENGTHOF(titleSentNoLower) | |
954 ) { | |
955 log_err("ucasemap_toTitle(preflight sentence break iterator, no lowercas
ing)=%ld failed - %s\n", (long)length, u_errorName(errorCode)); | |
956 } | |
957 | |
958 errorCode=U_ZERO_ERROR; | |
959 buffer[0]=0; | |
960 length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPR
V_LENGTHOF(beforeTitle), &errorCode); | |
961 if( U_FAILURE(errorCode) || | |
962 length!=UPRV_LENGTHOF(titleSentNoLower) || | |
963 0!=u_memcmp(buffer, titleSentNoLower, length) || | |
964 buffer[length]!=0 | |
965 ) { | |
966 log_err("ucasemap_toTitle(sentence break iterator, no lowercasing)=%ld f
ailed - %s\n", (long)length, u_errorName(errorCode)); | |
967 } | |
968 | |
969 /* UTF-8 C API coverage. More thorough test via C++ intltest's StringCaseTes
t::TestCasing(). */ | |
970 { | |
971 char utf8BeforeTitle[64], utf8TitleSentNoLower[64], utf8[64]; | |
972 int32_t utf8BeforeTitleLength, utf8TitleSentNoLowerLength; | |
973 | |
974 errorCode=U_ZERO_ERROR; | |
975 u_strToUTF8(utf8BeforeTitle, (int32_t)sizeof(utf8BeforeTitle), &utf8Befo
reTitleLength, beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode); | |
976 u_strToUTF8(utf8TitleSentNoLower, (int32_t)sizeof(utf8TitleSentNoLower),
&utf8TitleSentNoLowerLength, titleSentNoLower, UPRV_LENGTHOF(titleSentNoLower),
&errorCode); | |
977 | |
978 length=ucasemap_utf8ToTitle(csm, utf8, (int32_t)sizeof(utf8), utf8Before
Title, utf8BeforeTitleLength, &errorCode); | |
979 if( U_FAILURE(errorCode) || | |
980 length!=utf8TitleSentNoLowerLength || | |
981 0!=uprv_memcmp(utf8, utf8TitleSentNoLower, length) || | |
982 utf8[length]!=0 | |
983 ) { | |
984 log_err("ucasemap_utf8ToTitle(sentence break iterator, no lowercasin
g)=%ld failed - %s\n", (long)length, u_errorName(errorCode)); | |
985 } | |
986 } | |
987 | |
988 ucasemap_close(csm); | |
989 } | |
990 | |
991 #endif | |
992 | |
993 /* Test case for internal API u_caseInsensitivePrefixMatch */ | |
994 static void | |
995 TestUCaseInsensitivePrefixMatch(void) { | |
996 struct { | |
997 const char *s1; | |
998 const char *s2; | |
999 int32_t r1; | |
1000 int32_t r2; | |
1001 } testCases[] = { | |
1002 {"ABC", "ab", 2, 2}, | |
1003 {"ABCD", "abcx", 3, 3}, | |
1004 {"ABC", "xyz", 0, 0}, | |
1005 /* U+00DF LATIN SMALL LETTER SHARP S */ | |
1006 {"A\\u00dfBC", "Ass", 2, 3}, | |
1007 {"Fust", "Fu\\u00dfball", 2, 2}, | |
1008 {"\\u00dfsA", "s\\u00dfB", 2, 2}, | |
1009 {"\\u00dfs", "s\\u00df", 2, 2}, | |
1010 /* U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE */ | |
1011 {"XYZ\\u0130i\\u0307xxx", "xyzi\\u0307\\u0130yyy", 6, 6}, | |
1012 {0, 0, 0, 0} | |
1013 }; | |
1014 int32_t i; | |
1015 | |
1016 for (i = 0; testCases[i].s1 != 0; i++) { | |
1017 UErrorCode sts = U_ZERO_ERROR; | |
1018 UChar u1[64], u2[64]; | |
1019 int32_t matchLen1, matchLen2; | |
1020 | |
1021 u_unescape(testCases[i].s1, u1, 64); | |
1022 u_unescape(testCases[i].s2, u2, 64); | |
1023 | |
1024 u_caseInsensitivePrefixMatch(u1, -1, u2, -1, 0, &matchLen1, &matchLen2,
&sts); | |
1025 if (U_FAILURE(sts)) { | |
1026 log_err("error: %s, s1=%s, s2=%s", u_errorName(sts), testCases[i].s1
, testCases[i].s2); | |
1027 } else if (matchLen1 != testCases[i].r1 || matchLen2 != testCases[i].r2)
{ | |
1028 log_err("s1=%s, s2=%2 / match len1=%d, len2=%d / expected len1=%d, l
en2=%d", | |
1029 testCases[i].s1, testCases[i].s2, | |
1030 matchLen1, matchLen2, | |
1031 testCases[i].r1, testCases[i].r2); | |
1032 } | |
1033 } | |
1034 } | |
1035 | |
1036 void addCaseTest(TestNode** root); | |
1037 | |
1038 void addCaseTest(TestNode** root) { | |
1039 /* cstrcase.c functions, declared in cucdtst.h */ | |
1040 addTest(root, &TestCaseLower, "tsutil/cstrcase/TestCaseLower"); | |
1041 addTest(root, &TestCaseUpper, "tsutil/cstrcase/TestCaseUpper"); | |
1042 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO | |
1043 addTest(root, &TestCaseTitle, "tsutil/cstrcase/TestCaseTitle"); | |
1044 addTest(root, &TestCaseDutchTitle, "tsutil/cstrcase/TestCaseDutchTitle"); | |
1045 #endif | |
1046 addTest(root, &TestCaseFolding, "tsutil/cstrcase/TestCaseFolding"); | |
1047 addTest(root, &TestCaseCompare, "tsutil/cstrcase/TestCaseCompare"); | |
1048 addTest(root, &TestUCaseMap, "tsutil/cstrcase/TestUCaseMap"); | |
1049 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO | |
1050 addTest(root, &TestUCaseMapToTitle, "tsutil/cstrcase/TestUCaseMapToTitle"); | |
1051 #endif | |
1052 addTest(root, &TestUCaseInsensitivePrefixMatch, "tsutil/cstrcase/TestUCaseIn
sensitivePrefixMatch"); | |
1053 } | |
OLD | NEW |