OLD | NEW |
| (Empty) |
1 /******************************************************************** | |
2 * COPYRIGHT: | |
3 * Copyright (c) 1997-2014, International Business Machines Corporation and | |
4 * others. All Rights Reserved. | |
5 ********************************************************************/ | |
6 /*******************************************************************************
* | |
7 * | |
8 * File CNORMTST.C | |
9 * | |
10 * Modification History: | |
11 * Name Description | |
12 * Madhu Katragadda Ported for C API | |
13 * synwee added test for quick check | |
14 * synwee added test for checkFCD | |
15 ********************************************************************************
*/ | |
16 /*tests for u_normalization*/ | |
17 #include "unicode/utypes.h" | |
18 #include "unicode/unorm.h" | |
19 #include "unicode/utf16.h" | |
20 #include "cintltst.h" | |
21 #include "cmemory.h" | |
22 | |
23 #if !UCONFIG_NO_NORMALIZATION | |
24 | |
25 #include <stdlib.h> | |
26 #include <time.h> | |
27 #include "unicode/uchar.h" | |
28 #include "unicode/ustring.h" | |
29 #include "unicode/unorm.h" | |
30 #include "cnormtst.h" | |
31 | |
32 static void | |
33 TestAPI(void); | |
34 | |
35 static void | |
36 TestNormCoverage(void); | |
37 | |
38 static void | |
39 TestConcatenate(void); | |
40 | |
41 static void | |
42 TestNextPrevious(void); | |
43 | |
44 static void TestIsNormalized(void); | |
45 | |
46 static void | |
47 TestFCNFKCClosure(void); | |
48 | |
49 static void | |
50 TestQuickCheckPerCP(void); | |
51 | |
52 static void | |
53 TestComposition(void); | |
54 | |
55 static void | |
56 TestFCD(void); | |
57 | |
58 static void | |
59 TestGetDecomposition(void); | |
60 | |
61 static void | |
62 TestGetRawDecomposition(void); | |
63 | |
64 static void TestAppendRestoreMiddle(void); | |
65 static void TestGetEasyToUseInstance(void); | |
66 | |
67 static const char* const canonTests[][3] = { | |
68 /* Input*/ /*Decomposed*/ /*Composed*/ | |
69 { "cat", "cat", "cat"
}, | |
70 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark
", }, | |
71 | |
72 { "\\u1e0a", "D\\u0307", "\\u1e0a"
}, /* D-dot_above*/ | |
73 { "D\\u0307", "D\\u0307", "\\u1e0a"
}, /* D dot_above*/ | |
74 | |
75 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307
" }, /* D-dot_below dot_above*/ | |
76 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307
" }, /* D-dot_above dot_below */ | |
77 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307"
}, /* D dot_below dot_above */ | |
78 | |
79 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\
u0307" }, /*D dot_below cedilla dot_above*/ | |
80 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\
\u0307" }, /* D dot_above ogonek dot_below*/ | |
81 | |
82 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14"
}, /* E-macron-grave*/ | |
83 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14"
}, /* E-macron + grave*/ | |
84 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304
" }, /* E-grave + macron*/ | |
85 | |
86 { "\\u212b", "A\\u030a", "\\u00c5"
}, /* angstrom_sign*/ | |
87 { "\\u00c5", "A\\u030a", "\\u00c5"
}, /* A-ring*/ | |
88 | |
89 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin"
}, | |
90 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n"
}, | |
91 | |
92 { "Henry IV", "Henry IV", "Henry IV"
}, | |
93 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163"
}, | |
94 | |
95 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC"
}, /* ga (Katakana)*/ | |
96 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC"
}, /*ka + ten*/ | |
97 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E"
}, /* hw_ka + hw_ten*/ | |
98 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E"
}, /* ka + hw_ten*/ | |
99 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099"
}, /* hw_ka + ten*/ | |
100 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316"
}, /* hw_ka + ten*/ | |
101 { "", "", "" } | |
102 }; | |
103 | |
104 static const char* const compatTests[][3] = { | |
105 /* Input*/ /*Decomposed */ /*Compos
ed*/ | |
106 { "cat", "cat", "cat"
}, | |
107 | |
108 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u
05DC" }, /* Alef-Lamed vs. Alef, Lamed*/ | |
109 | |
110 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4f
fin" }, | |
111 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4f
fin" }, /* ffi ligature -> f + f + i*/ | |
112 | |
113 { "Henry IV", "Henry IV", "Henry I
V" }, | |
114 { "Henry \\u2163", "Henry IV", "Henry
IV" }, | |
115 | |
116 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC"
}, /* ga (Katakana)*/ | |
117 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC
" }, /*ka + ten*/ | |
118 | |
119 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC
" }, /* hw_ka + ten*/ | |
120 | |
121 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/ | |
122 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC
" }, /* hw_ka + hw_ten*/ | |
123 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC
" }, /* ka + hw_ten*/ | |
124 { "", "", "" } | |
125 }; | |
126 | |
127 static const char* const fcdTests[][3] = { | |
128 /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */ | |
129 { "\\u010e\\u0327", "D\\u0327\\u030c", NULL }, /* D-caron + cedilla */ | |
130 { "\\u010e", "\\u010e", NULL } /* D-caron */ | |
131 }; | |
132 | |
133 void addNormTest(TestNode** root); | |
134 | |
135 void addNormTest(TestNode** root) | |
136 { | |
137 addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI"); | |
138 addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp"); | |
139 addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp"); | |
140 addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompo
se"); | |
141 addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCom
pose"); | |
142 addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD"); | |
143 addTest(root, &TestNull, "tsnorm/cnormtst/TestNull"); | |
144 addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck"); | |
145 addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP"); | |
146 addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized"); | |
147 addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD"); | |
148 addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage"); | |
149 addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate"); | |
150 addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious"); | |
151 addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure"); | |
152 addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition"); | |
153 addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition")
; | |
154 addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposi
tion"); | |
155 addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMi
ddle"); | |
156 addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseIn
stance"); | |
157 } | |
158 | |
159 static const char* const modeStrings[]={ | |
160 "UNORM_NONE", | |
161 "UNORM_NFD", | |
162 "UNORM_NFKD", | |
163 "UNORM_NFC", | |
164 "UNORM_NFKC", | |
165 "UNORM_FCD", | |
166 "UNORM_MODE_COUNT" | |
167 }; | |
168 | |
169 static void TestNormCases(UNormalizationMode mode, | |
170 const char* const cases[][3], int32_t lengthOfCases) { | |
171 int32_t x, neededLen, length2; | |
172 int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1; | |
173 UChar *source=NULL; | |
174 UChar result[16]; | |
175 log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]); | |
176 for(x=0; x < lengthOfCases; x++) | |
177 { | |
178 UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR; | |
179 source=CharsToUChars(cases[x][0]); | |
180 neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &
status); | |
181 length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2); | |
182 if(neededLen!=length2) { | |
183 log_err("ERROR in unorm_normalize(%s)[%d]: " | |
184 "preflight length/NUL %d!=%d preflight length/srcLength\n", | |
185 modeStrings[mode], (int)x, (int)neededLen, (int)length2); | |
186 } | |
187 if(status==U_BUFFER_OVERFLOW_ERROR) | |
188 { | |
189 status=U_ZERO_ERROR; | |
190 } | |
191 length2=unorm_normalize(source, u_strlen(source), mode, 0, result, UPRV_
LENGTHOF(result), &status); | |
192 if(U_FAILURE(status) || neededLen!=length2) { | |
193 log_data_err("ERROR in unorm_normalize(%s/NUL) at %s: %s - (Are you
missing data?)\n", | |
194 modeStrings[mode], austrdup(source), myErrorName(status
)); | |
195 } else { | |
196 assertEqual(result, cases[x][expIndex], x); | |
197 } | |
198 length2=unorm_normalize(source, -1, mode, 0, result, UPRV_LENGTHOF(resul
t), &status); | |
199 if(U_FAILURE(status) || neededLen!=length2) { | |
200 log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s: %s - (A
re you missing data?)\n", | |
201 modeStrings[mode], austrdup(source), myErrorName(status
)); | |
202 } else { | |
203 assertEqual(result, cases[x][expIndex], x); | |
204 } | |
205 free(source); | |
206 } | |
207 } | |
208 | |
209 void TestDecomp() { | |
210 TestNormCases(UNORM_NFD, canonTests, UPRV_LENGTHOF(canonTests)); | |
211 } | |
212 | |
213 void TestCompatDecomp() { | |
214 TestNormCases(UNORM_NFKD, compatTests, UPRV_LENGTHOF(compatTests)); | |
215 } | |
216 | |
217 void TestCanonDecompCompose() { | |
218 TestNormCases(UNORM_NFC, canonTests, UPRV_LENGTHOF(canonTests)); | |
219 } | |
220 | |
221 void TestCompatDecompCompose() { | |
222 TestNormCases(UNORM_NFKC, compatTests, UPRV_LENGTHOF(compatTests)); | |
223 } | |
224 | |
225 void TestFCD() { | |
226 TestNormCases(UNORM_FCD, fcdTests, UPRV_LENGTHOF(fcdTests)); | |
227 } | |
228 | |
229 static void assertEqual(const UChar* result, const char* expected, int32_t index
) | |
230 { | |
231 UChar *expectedUni = CharsToUChars(expected); | |
232 if(u_strcmp(result, expectedUni)!=0){ | |
233 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n"
, index, expected, | |
234 austrdup(result) ); | |
235 } | |
236 free(expectedUni); | |
237 } | |
238 | |
239 static void TestNull_check(UChar *src, int32_t srcLen, | |
240 UChar *exp, int32_t expLen, | |
241 UNormalizationMode mode, | |
242 const char *name) | |
243 { | |
244 UErrorCode status = U_ZERO_ERROR; | |
245 int32_t len, i; | |
246 | |
247 UChar result[50]; | |
248 | |
249 | |
250 status = U_ZERO_ERROR; | |
251 | |
252 for(i=0;i<50;i++) | |
253 { | |
254 result[i] = 0xFFFD; | |
255 } | |
256 | |
257 len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status); | |
258 | |
259 if(U_FAILURE(status)) { | |
260 log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missin
g data?)\n", name, u_errorName(status)); | |
261 } else if (len != expLen) { | |
262 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n
", name, expLen, len); | |
263 } | |
264 | |
265 { | |
266 for(i=0;i<len;i++){ | |
267 if(exp[i] != result[i]) { | |
268 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n", | |
269 name, | |
270 i, | |
271 exp[i], | |
272 result[i]); | |
273 return; | |
274 } | |
275 log_verbose(" %d: \\u%04X\n", i, result[i]); | |
276 } | |
277 } | |
278 | |
279 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name); | |
280 } | |
281 | |
282 void TestNull() | |
283 { | |
284 | |
285 UChar source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 }; | |
286 int32_t source_comp_len = 4; | |
287 UChar expect_comp[] = { 0x0061, 0x0000, 0x1e0a }; | |
288 int32_t expect_comp_len = 3; | |
289 | |
290 UChar source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 }; | |
291 int32_t source_dcmp_len = 3; | |
292 UChar expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C }; | |
293 int32_t expect_dcmp_len = 5; | |
294 | |
295 TestNull_check(source_comp, | |
296 source_comp_len, | |
297 expect_comp, | |
298 expect_comp_len, | |
299 UNORM_NFC, | |
300 "UNORM_NFC"); | |
301 | |
302 TestNull_check(source_dcmp, | |
303 source_dcmp_len, | |
304 expect_dcmp, | |
305 expect_dcmp_len, | |
306 UNORM_NFD, | |
307 "UNORM_NFD"); | |
308 | |
309 TestNull_check(source_comp, | |
310 source_comp_len, | |
311 expect_comp, | |
312 expect_comp_len, | |
313 UNORM_NFKC, | |
314 "UNORM_NFKC"); | |
315 | |
316 | |
317 } | |
318 | |
319 static void TestQuickCheckResultNO() | |
320 { | |
321 const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C, | |
322 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E}; | |
323 const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB, | |
324 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E}; | |
325 const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE, | |
326 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D}; | |
327 const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE, | |
328 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D}; | |
329 | |
330 | |
331 const int SIZE = 10; | |
332 | |
333 int count = 0; | |
334 UErrorCode error = U_ZERO_ERROR; | |
335 | |
336 for (; count < SIZE; count ++) | |
337 { | |
338 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) != | |
339 UNORM_NO) | |
340 { | |
341 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]); | |
342 return; | |
343 } | |
344 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) != | |
345 UNORM_NO) | |
346 { | |
347 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]); | |
348 return; | |
349 } | |
350 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) != | |
351 UNORM_NO) | |
352 { | |
353 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]); | |
354 return; | |
355 } | |
356 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != | |
357 UNORM_NO) | |
358 { | |
359 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); | |
360 return; | |
361 } | |
362 } | |
363 } | |
364 | |
365 | |
366 static void TestQuickCheckResultYES() | |
367 { | |
368 const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A, | |
369 0x2261, 0x3075, 0x4000, 0x5000, 0xF000}; | |
370 const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500, | |
371 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000}; | |
372 const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB, | |
373 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27}; | |
374 const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000, | |
375 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E}; | |
376 | |
377 const int SIZE = 10; | |
378 int count = 0; | |
379 UErrorCode error = U_ZERO_ERROR; | |
380 | |
381 UChar cp = 0; | |
382 while (cp < 0xA0) | |
383 { | |
384 if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES) | |
385 { | |
386 log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)
\n", cp); | |
387 return; | |
388 } | |
389 if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) != | |
390 UNORM_YES) | |
391 { | |
392 log_err("ERROR in NFC quick check at U+%04x\n", cp); | |
393 return; | |
394 } | |
395 if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES) | |
396 { | |
397 log_data_err("ERROR in NFKD quick check at U+%04x\n", cp); | |
398 return; | |
399 } | |
400 if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) != | |
401 UNORM_YES) | |
402 { | |
403 log_err("ERROR in NFKC quick check at U+%04x\n", cp); | |
404 return; | |
405 } | |
406 cp ++; | |
407 } | |
408 | |
409 for (; count < SIZE; count ++) | |
410 { | |
411 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) != | |
412 UNORM_YES) | |
413 { | |
414 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]); | |
415 return; | |
416 } | |
417 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) | |
418 != UNORM_YES) | |
419 { | |
420 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]); | |
421 return; | |
422 } | |
423 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) != | |
424 UNORM_YES) | |
425 { | |
426 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]); | |
427 return; | |
428 } | |
429 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != | |
430 UNORM_YES) | |
431 { | |
432 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); | |
433 return; | |
434 } | |
435 } | |
436 } | |
437 | |
438 static void TestQuickCheckResultMAYBE() | |
439 { | |
440 const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161, | |
441 0x116A, 0x1173, 0x1175, 0x3099, 0x309A}; | |
442 const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E, | |
443 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099}; | |
444 | |
445 | |
446 const int SIZE = 10; | |
447 | |
448 int count = 0; | |
449 UErrorCode error = U_ZERO_ERROR; | |
450 | |
451 /* NFD and NFKD does not have any MAYBE codepoints */ | |
452 for (; count < SIZE; count ++) | |
453 { | |
454 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) != | |
455 UNORM_MAYBE) | |
456 { | |
457 log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)
\n", CPNFC[count]); | |
458 return; | |
459 } | |
460 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != | |
461 UNORM_MAYBE) | |
462 { | |
463 log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); | |
464 return; | |
465 } | |
466 } | |
467 } | |
468 | |
469 static void TestQuickCheckStringResult() | |
470 { | |
471 int count; | |
472 UChar *d = NULL; | |
473 UChar *c = NULL; | |
474 UErrorCode error = U_ZERO_ERROR; | |
475 | |
476 for (count = 0; count < UPRV_LENGTHOF(canonTests); count ++) | |
477 { | |
478 d = CharsToUChars(canonTests[count][1]); | |
479 c = CharsToUChars(canonTests[count][2]); | |
480 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) != | |
481 UNORM_YES) | |
482 { | |
483 log_data_err("ERROR in NFD quick check for string at count %d - (Are you m
issing data?)\n", count); | |
484 return; | |
485 } | |
486 | |
487 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) == | |
488 UNORM_NO) | |
489 { | |
490 log_err("ERROR in NFC quick check for string at count %d\n", count); | |
491 return; | |
492 } | |
493 | |
494 free(d); | |
495 free(c); | |
496 } | |
497 | |
498 for (count = 0; count < UPRV_LENGTHOF(compatTests); count ++) | |
499 { | |
500 d = CharsToUChars(compatTests[count][1]); | |
501 c = CharsToUChars(compatTests[count][2]); | |
502 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) != | |
503 UNORM_YES) | |
504 { | |
505 log_data_err("ERROR in NFKD quick check for string at count %d\n", count); | |
506 return; | |
507 } | |
508 | |
509 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) != | |
510 UNORM_YES) | |
511 { | |
512 log_err("ERROR in NFKC quick check for string at count %d\n", count); | |
513 return; | |
514 } | |
515 | |
516 free(d); | |
517 free(c); | |
518 } | |
519 } | |
520 | |
521 void TestQuickCheck() | |
522 { | |
523 TestQuickCheckResultNO(); | |
524 TestQuickCheckResultYES(); | |
525 TestQuickCheckResultMAYBE(); | |
526 TestQuickCheckStringResult(); | |
527 } | |
528 | |
529 /* | |
530 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_ | |
531 * normalized, and some that are not. | |
532 * Here we pick some specific cases and test the C API. | |
533 */ | |
534 static void TestIsNormalized(void) { | |
535 static const UChar notNFC[][8]={ /* strings that are not in NFC *
/ | |
536 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */ | |
537 { 0xfb1d, 0 }, /* excluded from composition */ | |
538 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */ | |
539 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */ | |
540 }; | |
541 static const UChar notNFKC[][8]={ /* strings that are not in NFKC
*/ | |
542 { 0x1100, 0x1161, 0 }, /* Jamo compose */ | |
543 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */ | |
544 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */ | |
545 }; | |
546 | |
547 int32_t i; | |
548 UErrorCode errorCode; | |
549 | |
550 /* API test */ | |
551 | |
552 /* normal case with length>=0 (length -1 used for special cases below) */ | |
553 errorCode=U_ZERO_ERROR; | |
554 if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(e
rrorCode)) { | |
555 log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missin
g data?)\n", u_errorName(errorCode)); | |
556 } | |
557 | |
558 /* incoming U_FAILURE */ | |
559 errorCode=U_TRUNCATED_CHAR_FOUND; | |
560 (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode); | |
561 if(errorCode!=U_TRUNCATED_CHAR_FOUND) { | |
562 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error c
ode to %s\n", u_errorName(errorCode)); | |
563 } | |
564 | |
565 /* NULL source */ | |
566 errorCode=U_ZERO_ERROR; | |
567 (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode); | |
568 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
569 log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_E
RROR but %s - (Are you missing data?)\n", u_errorName(errorCode)); | |
570 } | |
571 | |
572 /* bad length */ | |
573 errorCode=U_ZERO_ERROR; | |
574 (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode); | |
575 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
576 log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_E
RROR but %s - (Are you missing data?)\n", u_errorName(errorCode)); | |
577 } | |
578 | |
579 /* specific cases */ | |
580 for(i=0; i<UPRV_LENGTHOF(notNFC); ++i) { | |
581 errorCode=U_ZERO_ERROR; | |
582 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE
(errorCode)) { | |
583 log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (
Are you missing data?)\n", i, u_errorName(errorCode)); | |
584 } | |
585 errorCode=U_ZERO_ERROR; | |
586 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILUR
E(errorCode)) { | |
587 log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) -
(Are you missing data?)\n", i, u_errorName(errorCode)); | |
588 } | |
589 } | |
590 for(i=0; i<UPRV_LENGTHOF(notNFKC); ++i) { | |
591 errorCode=U_ZERO_ERROR; | |
592 if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILU
RE(errorCode)) { | |
593 log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) -
(Are you missing data?)\n", i, u_errorName(errorCode)); | |
594 } | |
595 } | |
596 } | |
597 | |
598 void TestCheckFCD() | |
599 { | |
600 UErrorCode status = U_ZERO_ERROR; | |
601 static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x09, | |
602 0x0A}; | |
603 static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
| |
604 0x02B9, 0x0314, 0x0315, 0x0316}; | |
605 static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7, | |
606 0x0050, 0x0730, 0x09EE, 0x1E10}; | |
607 | |
608 static const UChar datastr[][5] = | |
609 { {0x0061, 0x030A, 0x1E05, 0x0302, 0}, | |
610 {0x0061, 0x030A, 0x00E2, 0x0323, 0}, | |
611 {0x0061, 0x0323, 0x00E2, 0x0323, 0}, | |
612 {0x0061, 0x0323, 0x1E05, 0x0302, 0} }; | |
613 static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES}; | |
614 | |
615 static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x6
7, 0x68, 0x69, | |
616 0x6a, | |
617 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8
, 0xe9, | |
618 0xea, | |
619 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x03
06, | |
620 0x0307, 0x0308, 0x0309, 0x030a, | |
621 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x03
26, | |
622 0x0327, 0x0328, 0x0329, 0x032a, | |
623 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e
06, | |
624 0x1e07, 0x1e08, 0x1e09, 0x1e0a}; | |
625 | |
626 int count = 0; | |
627 | |
628 if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES) | |
629 log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_qu
ickCheck is UNORM_YES - (Are you missing data?)\n"); | |
630 if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO) | |
631 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickC
heck is UNORM_NO\n"); | |
632 if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES) | |
633 log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm
_quickCheck is UNORM_YES - (Are you missing data?)\n"); | |
634 | |
635 if (U_FAILURE(status)) | |
636 log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n",
u_errorName(status)); | |
637 | |
638 while (count < 4) | |
639 { | |
640 UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status); | |
641 if (U_FAILURE(status)) { | |
642 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set
%d - (Are you missing data?)\n", count); | |
643 break; | |
644 } | |
645 else { | |
646 if (result[count] != fcdresult) { | |
647 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n",
count, | |
648 result[count]); | |
649 } | |
650 } | |
651 count ++; | |
652 } | |
653 | |
654 /* random checks of long strings */ | |
655 status = U_ZERO_ERROR; | |
656 srand((unsigned)time( NULL )); | |
657 | |
658 for (count = 0; count < 50; count ++) | |
659 { | |
660 int size = 0; | |
661 UBool testresult = UNORM_YES; | |
662 UChar data[20]; | |
663 UChar norm[100]; | |
664 UChar nfd[100]; | |
665 int normsize = 0; | |
666 int nfdsize = 0; | |
667 | |
668 while (size != 19) { | |
669 data[size] = datachar[(rand() * 50) / RAND_MAX]; | |
670 log_verbose("0x%x", data[size]); | |
671 normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0, | |
672 norm + normsize, 100 - normsize, &status);
| |
673 if (U_FAILURE(status)) { | |
674 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data ge
neration - (Are you missing data?)\n"); | |
675 break; | |
676 } | |
677 size ++; | |
678 } | |
679 log_verbose("\n"); | |
680 | |
681 nfdsize = unorm_normalize(data, size, UNORM_NFD, 0, | |
682 nfd, 100, &status); | |
683 if (U_FAILURE(status)) { | |
684 log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalize
d data generation - (Are you missing data?)\n"); | |
685 } | |
686 | |
687 if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) { | |
688 testresult = UNORM_NO; | |
689 } | |
690 if (testresult == UNORM_YES) { | |
691 log_verbose("result UNORM_YES\n"); | |
692 } | |
693 else { | |
694 log_verbose("result UNORM_NO\n"); | |
695 } | |
696 | |
697 if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAIL
URE(status)) { | |
698 log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data -
(Are you missing data?)\n", testresult); | |
699 } | |
700 } | |
701 } | |
702 | |
703 static void | |
704 TestAPI() { | |
705 static const UChar in[]={ 0x68, 0xe4 }; | |
706 UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff }; | |
707 UErrorCode errorCode; | |
708 int32_t length; | |
709 | |
710 /* try preflighting */ | |
711 errorCode=U_ZERO_ERROR; | |
712 length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode); | |
713 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) { | |
714 log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s
- (Are you missing data?)\n", length, u_errorName(errorCode)); | |
715 return; | |
716 } | |
717 | |
718 errorCode=U_ZERO_ERROR; | |
719 length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode); | |
720 if(U_FAILURE(errorCode)) { | |
721 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName
(errorCode)); | |
722 return; | |
723 } | |
724 if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) { | |
725 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+
%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]); | |
726 return; | |
727 } | |
728 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode); | |
729 if(U_FAILURE(errorCode)) { | |
730 log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with
%s\n", (long)length, u_errorName(errorCode)); | |
731 return; | |
732 } | |
733 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode); | |
734 if(U_FAILURE(errorCode)) { | |
735 log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with
%s\n", (long)length, u_errorName(errorCode)); | |
736 return; | |
737 } | |
738 } | |
739 | |
740 /* test cases to improve test code coverage */ | |
741 enum { | |
742 HANGUL_K_KIYEOK=0x3131, /* NFKD->Jamo L U+1100 */ | |
743 HANGUL_K_WEO=0x315d, /* NFKD->Jamo V U+116f */ | |
744 HANGUL_K_KIYEOK_SIOS=0x3133, /* NFKD->Jamo T U+11aa */ | |
745 | |
746 HANGUL_KIYEOK=0x1100, /* Jamo L U+1100 */ | |
747 HANGUL_WEO=0x116f, /* Jamo V U+116f */ | |
748 HANGUL_KIYEOK_SIOS=0x11aa, /* Jamo T U+11aa */ | |
749 | |
750 HANGUL_AC00=0xac00, /* Hangul syllable = Jamo LV U+ac00 */ | |
751 HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11a
a */ | |
752 | |
753 MUSICAL_VOID_NOTEHEAD=0x1d157, | |
754 MUSICAL_HALF_NOTE=0x1d15e, /* NFC/NFD->Notehead+Stem */ | |
755 MUSICAL_STEM=0x1d165, /* cc=216 */ | |
756 MUSICAL_STACCATO=0x1d17c /* cc=220 */ | |
757 }; | |
758 | |
759 static void | |
760 TestNormCoverage() { | |
761 UChar input[1000], expect[1000], output[1000]; | |
762 UErrorCode errorCode; | |
763 int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLeng
th; | |
764 | |
765 /* create a long and nasty string with NFKC-unsafe characters */ | |
766 inLength=0; | |
767 | |
768 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */ | |
769 input[inLength++]=HANGUL_KIYEOK; | |
770 input[inLength++]=HANGUL_WEO; | |
771 input[inLength++]=HANGUL_KIYEOK_SIOS; | |
772 | |
773 input[inLength++]=HANGUL_KIYEOK; | |
774 input[inLength++]=HANGUL_WEO; | |
775 input[inLength++]=HANGUL_K_KIYEOK_SIOS; | |
776 | |
777 input[inLength++]=HANGUL_KIYEOK; | |
778 input[inLength++]=HANGUL_K_WEO; | |
779 input[inLength++]=HANGUL_KIYEOK_SIOS; | |
780 | |
781 input[inLength++]=HANGUL_KIYEOK; | |
782 input[inLength++]=HANGUL_K_WEO; | |
783 input[inLength++]=HANGUL_K_KIYEOK_SIOS; | |
784 | |
785 input[inLength++]=HANGUL_K_KIYEOK; | |
786 input[inLength++]=HANGUL_WEO; | |
787 input[inLength++]=HANGUL_KIYEOK_SIOS; | |
788 | |
789 input[inLength++]=HANGUL_K_KIYEOK; | |
790 input[inLength++]=HANGUL_WEO; | |
791 input[inLength++]=HANGUL_K_KIYEOK_SIOS; | |
792 | |
793 input[inLength++]=HANGUL_K_KIYEOK; | |
794 input[inLength++]=HANGUL_K_WEO; | |
795 input[inLength++]=HANGUL_KIYEOK_SIOS; | |
796 | |
797 input[inLength++]=HANGUL_K_KIYEOK; | |
798 input[inLength++]=HANGUL_K_WEO; | |
799 input[inLength++]=HANGUL_K_KIYEOK_SIOS; | |
800 | |
801 /* Hangul LV with normal/compatibility Jamo T */ | |
802 input[inLength++]=HANGUL_AC00; | |
803 input[inLength++]=HANGUL_KIYEOK_SIOS; | |
804 | |
805 input[inLength++]=HANGUL_AC00; | |
806 input[inLength++]=HANGUL_K_KIYEOK_SIOS; | |
807 | |
808 /* compatibility Jamo L, V */ | |
809 input[inLength++]=HANGUL_K_KIYEOK; | |
810 input[inLength++]=HANGUL_K_WEO; | |
811 | |
812 hangulPrefixLength=inLength; | |
813 | |
814 input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE); | |
815 input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE); | |
816 for(i=0; i<200; ++i) { | |
817 input[inLength++]=U16_LEAD(MUSICAL_STACCATO); | |
818 input[inLength++]=U16_TRAIL(MUSICAL_STACCATO); | |
819 input[inLength++]=U16_LEAD(MUSICAL_STEM); | |
820 input[inLength++]=U16_TRAIL(MUSICAL_STEM); | |
821 } | |
822 | |
823 /* (compatibility) Jamo L, T do not compose */ | |
824 input[inLength++]=HANGUL_K_KIYEOK; | |
825 input[inLength++]=HANGUL_K_KIYEOK_SIOS; | |
826 | |
827 /* quick checks */ | |
828 errorCode=U_ZERO_ERROR; | |
829 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_F
AILURE(errorCode)) { | |
830 log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (
Are you missing data?)\n", u_errorName(errorCode)); | |
831 } | |
832 errorCode=U_ZERO_ERROR; | |
833 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_
FAILURE(errorCode)) { | |
834 log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) -
(Are you missing data?)\n", u_errorName(errorCode)); | |
835 } | |
836 errorCode=U_ZERO_ERROR; | |
837 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_F
AILURE(errorCode)) { | |
838 log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (
Are you missing data?)\n", u_errorName(errorCode)); | |
839 } | |
840 errorCode=U_ZERO_ERROR; | |
841 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_
FAILURE(errorCode)) { | |
842 log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) -
(Are you missing data?)\n", u_errorName(errorCode)); | |
843 } | |
844 errorCode=U_ZERO_ERROR; | |
845 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_F
AILURE(errorCode)) { | |
846 log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (
Are you missing data?)\n", u_errorName(errorCode)); | |
847 } | |
848 | |
849 /* NFKC */ | |
850 expectLength=0; | |
851 expect[expectLength++]=HANGUL_SYLLABLE; | |
852 | |
853 expect[expectLength++]=HANGUL_SYLLABLE; | |
854 | |
855 expect[expectLength++]=HANGUL_SYLLABLE; | |
856 | |
857 expect[expectLength++]=HANGUL_SYLLABLE; | |
858 | |
859 expect[expectLength++]=HANGUL_SYLLABLE; | |
860 | |
861 expect[expectLength++]=HANGUL_SYLLABLE; | |
862 | |
863 expect[expectLength++]=HANGUL_SYLLABLE; | |
864 | |
865 expect[expectLength++]=HANGUL_SYLLABLE; | |
866 | |
867 expect[expectLength++]=HANGUL_AC00+3; | |
868 | |
869 expect[expectLength++]=HANGUL_AC00+3; | |
870 | |
871 expect[expectLength++]=HANGUL_AC00+14*28; | |
872 | |
873 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD); | |
874 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD); | |
875 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); | |
876 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); | |
877 for(i=0; i<200; ++i) { | |
878 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); | |
879 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); | |
880 } | |
881 for(i=0; i<200; ++i) { | |
882 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO); | |
883 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO); | |
884 } | |
885 | |
886 expect[expectLength++]=HANGUL_KIYEOK; | |
887 expect[expectLength++]=HANGUL_KIYEOK_SIOS; | |
888 | |
889 /* try destination overflow first */ | |
890 errorCode=U_ZERO_ERROR; | |
891 preflightLength=unorm_normalize(input, inLength, | |
892 UNORM_NFKC, 0, | |
893 output, 100, /* too short */ | |
894 &errorCode); | |
895 if(errorCode!=U_BUFFER_OVERFLOW_ERROR) { | |
896 log_data_err("error unorm_normalize(long input, output too short, UNORM_
NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCod
e)); | |
897 } | |
898 | |
899 /* real NFKC */ | |
900 errorCode=U_ZERO_ERROR; | |
901 length=unorm_normalize(input, inLength, | |
902 UNORM_NFKC, 0, | |
903 output, sizeof(output)/U_SIZEOF_UCHAR, | |
904 &errorCode); | |
905 if(U_FAILURE(errorCode)) { | |
906 log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with
%s - (Are you missing data?)\n", u_errorName(errorCode)); | |
907 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) { | |
908 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong re
sult\n"); | |
909 for(i=0; i<length; ++i) { | |
910 if(output[i]!=expect[i]) { | |
911 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i]
, expect[i]); | |
912 break; | |
913 } | |
914 } | |
915 } | |
916 if(length!=preflightLength) { | |
917 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but prefligh
tLength==%ld\n", length, preflightLength); | |
918 } | |
919 | |
920 /* FCD */ | |
921 u_memcpy(expect, input, hangulPrefixLength); | |
922 expectLength=hangulPrefixLength; | |
923 | |
924 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD); | |
925 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD); | |
926 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); | |
927 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); | |
928 for(i=0; i<200; ++i) { | |
929 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); | |
930 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); | |
931 } | |
932 for(i=0; i<200; ++i) { | |
933 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO); | |
934 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO); | |
935 } | |
936 | |
937 expect[expectLength++]=HANGUL_K_KIYEOK; | |
938 expect[expectLength++]=HANGUL_K_KIYEOK_SIOS; | |
939 | |
940 errorCode=U_ZERO_ERROR; | |
941 length=unorm_normalize(input, inLength, | |
942 UNORM_FCD, 0, | |
943 output, sizeof(output)/U_SIZEOF_UCHAR, | |
944 &errorCode); | |
945 if(U_FAILURE(errorCode)) { | |
946 log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %
s - (Are you missing data?)\n", u_errorName(errorCode)); | |
947 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) { | |
948 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong res
ult\n"); | |
949 for(i=0; i<length; ++i) { | |
950 if(output[i]!=expect[i]) { | |
951 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i],
expect[i]); | |
952 break; | |
953 } | |
954 } | |
955 } | |
956 } | |
957 | |
958 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm
.cpp */ | |
959 static void | |
960 TestConcatenate(void) { | |
961 /* "re + 'sume'" */ | |
962 static const UChar | |
963 left[]={ | |
964 0x72, 0x65, 0 | |
965 }, | |
966 right[]={ | |
967 0x301, 0x73, 0x75, 0x6d, 0xe9, 0 | |
968 }, | |
969 expect[]={ | |
970 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0 | |
971 }; | |
972 | |
973 UChar buffer[100]; | |
974 UErrorCode errorCode; | |
975 int32_t length; | |
976 | |
977 /* left with length, right NUL-terminated */ | |
978 errorCode=U_ZERO_ERROR; | |
979 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &err
orCode); | |
980 if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length))
{ | |
981 log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s -
(Are you missing data?)\n", length, u_errorName(errorCode)); | |
982 } | |
983 | |
984 /* preflighting */ | |
985 errorCode=U_ZERO_ERROR; | |
986 length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCo
de); | |
987 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) { | |
988 log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) fail
ed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); | |
989 } | |
990 | |
991 buffer[2]=0x5555; | |
992 errorCode=U_ZERO_ERROR; | |
993 length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &error
Code); | |
994 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) { | |
995 log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) fa
iled with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); | |
996 } | |
997 | |
998 /* enter with U_FAILURE */ | |
999 buffer[2]=0xaaaa; | |
1000 errorCode=U_UNEXPECTED_TOKEN; | |
1001 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &err
orCode); | |
1002 if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) { | |
1003 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length
, u_errorName(errorCode)); | |
1004 } | |
1005 | |
1006 /* illegal arguments */ | |
1007 buffer[2]=0xaaaa; | |
1008 errorCode=U_ZERO_ERROR; | |
1009 length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &err
orCode); | |
1010 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) { | |
1011 log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (
Are you missing data?)\n", length, u_errorName(errorCode)); | |
1012 } | |
1013 | |
1014 errorCode=U_ZERO_ERROR; | |
1015 length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &error
Code); | |
1016 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
1017 log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s -
(Are you missing data?)\n", length, u_errorName(errorCode)); | |
1018 } | |
1019 } | |
1020 | |
1021 enum { | |
1022 _PLUS=0x2b | |
1023 }; | |
1024 | |
1025 static const char *const _modeString[UNORM_MODE_COUNT]={ | |
1026 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD" | |
1027 }; | |
1028 | |
1029 static void | |
1030 _testIter(const UChar *src, int32_t srcLength, | |
1031 UCharIterator *iter, UNormalizationMode mode, UBool forward, | |
1032 const UChar *out, int32_t outLength, | |
1033 const int32_t *srcIndexes, int32_t srcIndexesLength) { | |
1034 UChar buffer[4]; | |
1035 const UChar *expect, *outLimit, *in; | |
1036 int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength; | |
1037 UErrorCode errorCode; | |
1038 UBool neededToNormalize, expectNeeded; | |
1039 | |
1040 errorCode=U_ZERO_ERROR; | |
1041 outLimit=out+outLength; | |
1042 if(forward) { | |
1043 expect=out; | |
1044 i=index=0; | |
1045 } else { | |
1046 expect=outLimit; | |
1047 i=srcIndexesLength-2; | |
1048 index=srcLength; | |
1049 } | |
1050 | |
1051 for(;;) { | |
1052 prevIndex=index; | |
1053 if(forward) { | |
1054 if(!iter->hasNext(iter)) { | |
1055 return; | |
1056 } | |
1057 length=unorm_next(iter, | |
1058 buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
1059 mode, 0, | |
1060 (UBool)(out!=NULL), &neededToNormalize, | |
1061 &errorCode); | |
1062 expectIndex=srcIndexes[i+1]; | |
1063 in=src+prevIndex; | |
1064 inLength=expectIndex-prevIndex; | |
1065 | |
1066 if(out!=NULL) { | |
1067 /* get output piece from between plus signs */ | |
1068 expectLength=0; | |
1069 while((expect+expectLength)!=outLimit && expect[expectLength]!=_
PLUS) { | |
1070 ++expectLength; | |
1071 } | |
1072 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength)); | |
1073 } else { | |
1074 expect=in; | |
1075 expectLength=inLength; | |
1076 expectNeeded=FALSE; | |
1077 } | |
1078 } else { | |
1079 if(!iter->hasPrevious(iter)) { | |
1080 return; | |
1081 } | |
1082 length=unorm_previous(iter, | |
1083 buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
1084 mode, 0, | |
1085 (UBool)(out!=NULL), &neededToNormalize, | |
1086 &errorCode); | |
1087 expectIndex=srcIndexes[i]; | |
1088 in=src+expectIndex; | |
1089 inLength=prevIndex-expectIndex; | |
1090 | |
1091 if(out!=NULL) { | |
1092 /* get output piece from between plus signs */ | |
1093 expectLength=0; | |
1094 while(expect!=out && expect[-1]!=_PLUS) { | |
1095 ++expectLength; | |
1096 --expect; | |
1097 } | |
1098 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength)); | |
1099 } else { | |
1100 expect=in; | |
1101 expectLength=inLength; | |
1102 expectNeeded=FALSE; | |
1103 } | |
1104 } | |
1105 index=iter->getIndex(iter, UITER_CURRENT); | |
1106 | |
1107 if(U_FAILURE(errorCode)) { | |
1108 log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s -
(Are you missing data?)\n", | |
1109 forward, _modeString[mode], i, u_errorName(errorCode)); | |
1110 return; | |
1111 } | |
1112 if(expectIndex!=index) { | |
1113 log_err("error unorm iteration (next/previous %d %s): index[%d] wron
g, got %d expected %d\n", | |
1114 forward, _modeString[mode], i, index, expectIndex); | |
1115 return; | |
1116 } | |
1117 if(expectLength!=length) { | |
1118 log_err("error unorm iteration (next/previous %d %s): length[%d] wro
ng, got %d expected %d\n", | |
1119 forward, _modeString[mode], i, length, expectLength); | |
1120 return; | |
1121 } | |
1122 if(0!=u_memcmp(expect, buffer, length)) { | |
1123 log_err("error unorm iteration (next/previous %d %s): output string[
%d] wrong\n", | |
1124 forward, _modeString[mode], i); | |
1125 return; | |
1126 } | |
1127 if(neededToNormalize!=expectNeeded) { | |
1128 } | |
1129 | |
1130 if(forward) { | |
1131 expect+=expectLength+1; /* go after the + */ | |
1132 ++i; | |
1133 } else { | |
1134 --expect; /* go before the + */ | |
1135 --i; | |
1136 } | |
1137 } | |
1138 } | |
1139 | |
1140 static void | |
1141 TestNextPrevious() { | |
1142 static const UChar | |
1143 src[]={ /* input string */ | |
1144 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133 | |
1145 }, | |
1146 nfd[]={ /* + separates expected output pieces */ | |
1147 0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x11
61, _PLUS, 0x3133 | |
1148 }, | |
1149 nfkd[]={ | |
1150 0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x11
61, _PLUS, 0x11aa | |
1151 }, | |
1152 nfc[]={ | |
1153 0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133 | |
1154 }, | |
1155 nfkc[]={ | |
1156 0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03 | |
1157 }, | |
1158 fcd[]={ | |
1159 0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x31
33 | |
1160 }; | |
1161 | |
1162 /* expected iterator indexes in the source string for each iteration piece *
/ | |
1163 static const int32_t | |
1164 nfdIndexes[]={ | |
1165 0, 1, 2, 5, 6, 7 | |
1166 }, | |
1167 nfkdIndexes[]={ | |
1168 0, 1, 2, 5, 6, 7 | |
1169 }, | |
1170 nfcIndexes[]={ | |
1171 0, 1, 2, 5, 6, 7 | |
1172 }, | |
1173 nfkcIndexes[]={ | |
1174 0, 1, 2, 5, 7 | |
1175 }, | |
1176 fcdIndexes[]={ | |
1177 0, 1, 2, 5, 6, 7 | |
1178 }; | |
1179 | |
1180 UCharIterator iter; | |
1181 | |
1182 UChar buffer[4]; | |
1183 int32_t length; | |
1184 | |
1185 UBool neededToNormalize; | |
1186 UErrorCode errorCode; | |
1187 | |
1188 uiter_setString(&iter, src, sizeof(src)/U_SIZEOF_UCHAR); | |
1189 | |
1190 /* test iteration with doNormalize */ | |
1191 iter.index=0; | |
1192 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, nfd, size
of(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4); | |
1193 iter.index=0; | |
1194 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, nfkd, si
zeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4); | |
1195 iter.index=0; | |
1196 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, nfc, size
of(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4); | |
1197 iter.index=0; | |
1198 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, nfkc, si
zeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4); | |
1199 iter.index=0; | |
1200 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, fcd, size
of(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4); | |
1201 | |
1202 iter.index=iter.length; | |
1203 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, nfd, siz
eof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4); | |
1204 iter.index=iter.length; | |
1205 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, nfkd, s
izeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4); | |
1206 iter.index=iter.length; | |
1207 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, nfc, siz
eof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4); | |
1208 iter.index=iter.length; | |
1209 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, nfkc, s
izeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4); | |
1210 iter.index=iter.length; | |
1211 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, fcd, siz
eof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4); | |
1212 | |
1213 /* test iteration without doNormalize */ | |
1214 iter.index=0; | |
1215 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, NULL, 0,
nfdIndexes, sizeof(nfdIndexes)/4); | |
1216 iter.index=0; | |
1217 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, NULL, 0,
nfkdIndexes, sizeof(nfkdIndexes)/4); | |
1218 iter.index=0; | |
1219 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, NULL, 0,
nfcIndexes, sizeof(nfcIndexes)/4); | |
1220 iter.index=0; | |
1221 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, NULL, 0,
nfkcIndexes, sizeof(nfkcIndexes)/4); | |
1222 iter.index=0; | |
1223 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, NULL, 0,
fcdIndexes, sizeof(fcdIndexes)/4); | |
1224 | |
1225 iter.index=iter.length; | |
1226 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, NULL, 0,
nfdIndexes, sizeof(nfdIndexes)/4); | |
1227 iter.index=iter.length; | |
1228 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, NULL, 0
, nfkdIndexes, sizeof(nfkdIndexes)/4); | |
1229 iter.index=iter.length; | |
1230 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, NULL, 0,
nfcIndexes, sizeof(nfcIndexes)/4); | |
1231 iter.index=iter.length; | |
1232 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, NULL, 0
, nfkcIndexes, sizeof(nfkcIndexes)/4); | |
1233 iter.index=iter.length; | |
1234 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, NULL, 0,
fcdIndexes, sizeof(fcdIndexes)/4); | |
1235 | |
1236 /* try without neededToNormalize */ | |
1237 errorCode=U_ZERO_ERROR; | |
1238 buffer[0]=5; | |
1239 iter.index=1; | |
1240 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
1241 UNORM_NFD, 0, TRUE, NULL, | |
1242 &errorCode); | |
1243 if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[
3]) { | |
1244 log_data_err("error unorm_next(without needed) %s - (Are you missing dat
a?)\n", u_errorName(errorCode)); | |
1245 return; | |
1246 } | |
1247 | |
1248 /* preflight */ | |
1249 neededToNormalize=9; | |
1250 iter.index=1; | |
1251 length=unorm_next(&iter, NULL, 0, | |
1252 UNORM_NFD, 0, TRUE, &neededToNormalize, | |
1253 &errorCode); | |
1254 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!
=2) { | |
1255 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCod
e)); | |
1256 return; | |
1257 } | |
1258 | |
1259 errorCode=U_ZERO_ERROR; | |
1260 buffer[0]=buffer[1]=5; | |
1261 neededToNormalize=9; | |
1262 iter.index=1; | |
1263 length=unorm_next(&iter, buffer, 1, | |
1264 UNORM_NFD, 0, TRUE, &neededToNormalize, | |
1265 &errorCode); | |
1266 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!
=2 || buffer[1]!=5) { | |
1267 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode)); | |
1268 return; | |
1269 } | |
1270 | |
1271 /* no iterator */ | |
1272 errorCode=U_ZERO_ERROR; | |
1273 buffer[0]=buffer[1]=5; | |
1274 neededToNormalize=9; | |
1275 iter.index=1; | |
1276 length=unorm_next(NULL, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
1277 UNORM_NFD, 0, TRUE, &neededToNormalize, | |
1278 &errorCode); | |
1279 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
1280 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode)); | |
1281 return; | |
1282 } | |
1283 | |
1284 /* illegal mode */ | |
1285 buffer[0]=buffer[1]=5; | |
1286 neededToNormalize=9; | |
1287 iter.index=1; | |
1288 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
1289 (UNormalizationMode)0, 0, TRUE, &neededToNormalize, | |
1290 &errorCode); | |
1291 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
1292 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode)); | |
1293 return; | |
1294 } | |
1295 | |
1296 /* error coming in */ | |
1297 errorCode=U_MISPLACED_QUANTIFIER; | |
1298 buffer[0]=5; | |
1299 iter.index=1; | |
1300 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
1301 UNORM_NFD, 0, TRUE, NULL, | |
1302 &errorCode); | |
1303 if(errorCode!=U_MISPLACED_QUANTIFIER) { | |
1304 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(err
orCode)); | |
1305 return; | |
1306 } | |
1307 } | |
1308 | |
1309 static void | |
1310 TestFCNFKCClosure(void) { | |
1311 static const struct { | |
1312 UChar32 c; | |
1313 const UChar s[6]; | |
1314 } tests[]={ | |
1315 { 0x00C4, { 0 } }, | |
1316 { 0x00E4, { 0 } }, | |
1317 { 0x037A, { 0x0020, 0x03B9, 0 } }, | |
1318 { 0x03D2, { 0x03C5, 0 } }, | |
1319 { 0x20A8, { 0x0072, 0x0073, 0 } }, | |
1320 { 0x210B, { 0x0068, 0 } }, | |
1321 { 0x210C, { 0x0068, 0 } }, | |
1322 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } }, | |
1323 { 0x2122, { 0x0074, 0x006D, 0 } }, | |
1324 { 0x2128, { 0x007A, 0 } }, | |
1325 { 0x1D5DB, { 0x0068, 0 } }, | |
1326 { 0x1D5ED, { 0x007A, 0 } }, | |
1327 { 0x0061, { 0 } } | |
1328 }; | |
1329 | |
1330 UChar buffer[8]; | |
1331 UErrorCode errorCode; | |
1332 int32_t i, length; | |
1333 | |
1334 for(i=0; i<UPRV_LENGTHOF(tests); ++i) { | |
1335 errorCode=U_ZERO_ERROR; | |
1336 length=u_getFC_NFKC_Closure(tests[i].c, buffer, UPRV_LENGTHOF(buffer), &
errorCode); | |
1337 if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests
[i].s, buffer)) { | |
1338 log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you
missing data?)\n", tests[i].c, u_errorName(errorCode)); | |
1339 } | |
1340 } | |
1341 | |
1342 /* error handling */ | |
1343 errorCode=U_ZERO_ERROR; | |
1344 length=u_getFC_NFKC_Closure(0x5c, NULL, UPRV_LENGTHOF(buffer), &errorCode); | |
1345 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
1346 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(e
rrorCode)); | |
1347 } | |
1348 | |
1349 length=u_getFC_NFKC_Closure(0x5c, buffer, UPRV_LENGTHOF(buffer), &errorCode)
; | |
1350 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
1351 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(e
rrorCode)); | |
1352 } | |
1353 } | |
1354 | |
1355 static void | |
1356 TestQuickCheckPerCP() { | |
1357 UErrorCode errorCode; | |
1358 UChar32 c, lead, trail; | |
1359 UChar s[U16_MAX_LENGTH], nfd[16]; | |
1360 int32_t length, lccc1, lccc2, tccc1, tccc2; | |
1361 int32_t qc1, qc2; | |
1362 | |
1363 if( | |
1364 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES || | |
1365 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES || | |
1366 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE || | |
1367 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE |
| | |
1368 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getInt
PropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) || | |
1369 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIn
tPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) | |
1370 ) { | |
1371 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*
_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n"); | |
1372 } | |
1373 | |
1374 /* | |
1375 * compare the quick check property values for some code points | |
1376 * to the quick check results for checking same-code point strings | |
1377 */ | |
1378 errorCode=U_ZERO_ERROR; | |
1379 c=0; | |
1380 while(c<0x110000) { | |
1381 length=0; | |
1382 U16_APPEND_UNSAFE(s, length, c); | |
1383 | |
1384 qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK); | |
1385 qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode); | |
1386 if(qc1!=qc2) { | |
1387 log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(N
FC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); | |
1388 } | |
1389 | |
1390 qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK); | |
1391 qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode); | |
1392 if(qc1!=qc2) { | |
1393 log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(N
FD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); | |
1394 } | |
1395 | |
1396 qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK); | |
1397 qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode); | |
1398 if(qc1!=qc2) { | |
1399 log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(
NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); | |
1400 } | |
1401 | |
1402 qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK); | |
1403 qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode); | |
1404 if(qc1!=qc2) { | |
1405 log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(
NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); | |
1406 } | |
1407 | |
1408 length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, UPRV_LENGTHOF(nfd),
&errorCode); | |
1409 /* length-length == 0 is used to get around a compiler warning. */ | |
1410 U16_GET(nfd, 0, length-length, length, lead); | |
1411 U16_GET(nfd, 0, length-1, length, trail); | |
1412 | |
1413 lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS); | |
1414 lccc2=u_getCombiningClass(lead); | |
1415 tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS); | |
1416 tccc2=u_getCombiningClass(trail); | |
1417 | |
1418 if(lccc1!=lccc2) { | |
1419 log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningCla
ss(lead) for U+%04x\n", | |
1420 lccc1, lccc2, c); | |
1421 } | |
1422 if(tccc1!=tccc2) { | |
1423 log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningCla
ss(trail) for U+%04x\n", | |
1424 tccc1, tccc2, c); | |
1425 } | |
1426 | |
1427 /* skip some code points */ | |
1428 c=(20*c)/19+1; | |
1429 } | |
1430 } | |
1431 | |
1432 static void | |
1433 TestComposition(void) { | |
1434 static const struct { | |
1435 UNormalizationMode mode; | |
1436 uint32_t options; | |
1437 UChar input[12]; | |
1438 UChar expect[12]; | |
1439 } cases[]={ | |
1440 /* | |
1441 * special cases for UAX #15 bug | |
1442 * see Unicode Corrigendum #5: Normalization Idempotency | |
1443 * at http://unicode.org/versions/corrigendum5.html | |
1444 * (was Public Review Issue #29) | |
1445 */ | |
1446 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x
0300, 0x1161, 0x0327 } }, | |
1447 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x
0300, 0x1161, 0x0327, 0x11a8 } }, | |
1448 { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x
0327, 0x0300, 0x11a8 } }, | |
1449 { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x
0300, 0x0b3e } }, | |
1450 | |
1451 /* TODO: add test cases for UNORM_FCC here (j2151) */ | |
1452 }; | |
1453 | |
1454 UChar output[16]; | |
1455 UErrorCode errorCode; | |
1456 int32_t i, length; | |
1457 | |
1458 for(i=0; i<UPRV_LENGTHOF(cases); ++i) { | |
1459 errorCode=U_ZERO_ERROR; | |
1460 length=unorm_normalize( | |
1461 cases[i].input, -1, | |
1462 cases[i].mode, cases[i].options, | |
1463 output, UPRV_LENGTHOF(output), | |
1464 &errorCode); | |
1465 if( U_FAILURE(errorCode) || | |
1466 length!=u_strlen(cases[i].expect) || | |
1467 0!=u_memcmp(output, cases[i].expect, length) | |
1468 ) { | |
1469 log_data_err("unexpected result for case %d - (Are you missing data?
)\n", i); | |
1470 } | |
1471 } | |
1472 } | |
1473 | |
1474 static void | |
1475 TestGetDecomposition() { | |
1476 UChar decomp[32]; | |
1477 int32_t length; | |
1478 | |
1479 UErrorCode errorCode=U_ZERO_ERROR; | |
1480 const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIG
UOUS, &errorCode); | |
1481 if(U_FAILURE(errorCode)) { | |
1482 log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_
errorName(errorCode)); | |
1483 return; | |
1484 } | |
1485 | |
1486 length=unorm2_getDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &err
orCode); | |
1487 if(U_FAILURE(errorCode) || length>=0) { | |
1488 log_err("unorm2_getDecomposition(fcc, space) failed\n"); | |
1489 } | |
1490 errorCode=U_ZERO_ERROR; | |
1491 length=unorm2_getDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &err
orCode); | |
1492 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308
|| decomp[2]!=0) { | |
1493 log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n"); | |
1494 } | |
1495 errorCode=U_ZERO_ERROR; | |
1496 length=unorm2_getDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &e
rrorCode); | |
1497 if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x11
61 || decomp[2]!=0x11a8 || decomp[3]!=0) { | |
1498 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n")
; | |
1499 } | |
1500 errorCode=U_ZERO_ERROR; | |
1501 length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode); | |
1502 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) { | |
1503 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow f
ailed\n"); | |
1504 } | |
1505 errorCode=U_ZERO_ERROR; | |
1506 length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode); | |
1507 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
1508 log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n"); | |
1509 } | |
1510 errorCode=U_ZERO_ERROR; | |
1511 length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode); | |
1512 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
1513 log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n"); | |
1514 } | |
1515 } | |
1516 | |
1517 static void | |
1518 TestGetRawDecomposition() { | |
1519 UChar decomp[32]; | |
1520 int32_t length; | |
1521 | |
1522 UErrorCode errorCode=U_ZERO_ERROR; | |
1523 const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode); | |
1524 if(U_FAILURE(errorCode)) { | |
1525 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_err
orName(errorCode)); | |
1526 return; | |
1527 } | |
1528 /* | |
1529 * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping v
alues, | |
1530 * without recursive decomposition. | |
1531 */ | |
1532 | |
1533 length=unorm2_getRawDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &
errorCode); | |
1534 if(U_FAILURE(errorCode) || length>=0) { | |
1535 log_err("unorm2_getDecomposition(nfkc, space) failed\n"); | |
1536 } | |
1537 errorCode=U_ZERO_ERROR; | |
1538 length=unorm2_getRawDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &
errorCode); | |
1539 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308
|| decomp[2]!=0) { | |
1540 log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n"); | |
1541 } | |
1542 /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */ | |
1543 errorCode=U_ZERO_ERROR; | |
1544 length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, UPRV_LENGTHOF(decomp),
&errorCode); | |
1545 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301
|| decomp[2]!=0) { | |
1546 log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n"); | |
1547 } | |
1548 /* U+212B ANGSTROM SIGN */ | |
1549 errorCode=U_ZERO_ERROR; | |
1550 length=unorm2_getRawDecomposition(n2, 0x212b, decomp, UPRV_LENGTHOF(decomp),
&errorCode); | |
1551 if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) { | |
1552 log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n"); | |
1553 } | |
1554 errorCode=U_ZERO_ERROR; | |
1555 length=unorm2_getRawDecomposition(n2, 0xac00, decomp, UPRV_LENGTHOF(decomp),
&errorCode); | |
1556 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x11
61 || decomp[2]!=0) { | |
1557 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n"
); | |
1558 } | |
1559 /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */ | |
1560 errorCode=U_ZERO_ERROR; | |
1561 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp),
&errorCode); | |
1562 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11
a8 || decomp[2]!=0) { | |
1563 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n"
); | |
1564 } | |
1565 errorCode=U_ZERO_ERROR; | |
1566 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode); | |
1567 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) { | |
1568 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow
failed\n"); | |
1569 } | |
1570 errorCode=U_ZERO_ERROR; | |
1571 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode); | |
1572 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
1573 log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n"); | |
1574 } | |
1575 errorCode=U_ZERO_ERROR; | |
1576 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode); | |
1577 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
1578 log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n"); | |
1579 } | |
1580 } | |
1581 | |
1582 static void | |
1583 TestAppendRestoreMiddle() { | |
1584 UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 }; /* last chars are 'A' and
'cedilla' NFC */ | |
1585 static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 }; /* first char is 'ri
ng above' NFC */ | |
1586 /* NFC: C5 is 'A with ring above' */ | |
1587 static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0
x66 }; | |
1588 int32_t length; | |
1589 UErrorCode errorCode=U_ZERO_ERROR; | |
1590 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode); | |
1591 if(U_FAILURE(errorCode)) { | |
1592 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_erro
rName(errorCode)); | |
1593 return; | |
1594 } | |
1595 /* | |
1596 * Use length=-1 to fool the estimate of the ReorderingBuffer capacity. | |
1597 * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A> | |
1598 * still fits into a[] but the full result still overflows this capacity. | |
1599 * (Let it modify the destination buffer before reallocating internally.) | |
1600 */ | |
1601 length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode); | |
1602 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(expected)) { | |
1603 log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)l
ength); | |
1604 return; | |
1605 } | |
1606 /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */ | |
1607 if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[
5]!=0) { | |
1608 log_err("unorm2_append(overflow) modified the first string\n"); | |
1609 return; | |
1610 } | |
1611 errorCode=U_ZERO_ERROR; | |
1612 length=unorm2_append(n2, a, -1, UPRV_LENGTHOF(a), b, -1, &errorCode); | |
1613 if(U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(expected) || 0!=u_memcmp(a,
expected, length)) { | |
1614 log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(erro
rCode), (int)length); | |
1615 return; | |
1616 } | |
1617 } | |
1618 | |
1619 static void | |
1620 TestGetEasyToUseInstance() { | |
1621 static const UChar in[]={ | |
1622 0xA0, /* -> <noBreak> 0020 */ | |
1623 0xC7, 0x301 /* = 1E08 = 0043 0327 0301 */ | |
1624 }; | |
1625 UChar out[32]; | |
1626 int32_t length; | |
1627 | |
1628 UErrorCode errorCode=U_ZERO_ERROR; | |
1629 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode); | |
1630 if(U_FAILURE(errorCode)) { | |
1631 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_erro
rName(errorCode)); | |
1632 return; | |
1633 } | |
1634 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out),
&errorCode); | |
1635 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) { | |
1636 log_err("unorm2_getNFCInstance() did not return an NFC instance (normali
zed length=%d; %s)\n", | |
1637 (int)length, u_errorName(errorCode)); | |
1638 } | |
1639 | |
1640 errorCode=U_ZERO_ERROR; | |
1641 n2=unorm2_getNFDInstance(&errorCode); | |
1642 if(U_FAILURE(errorCode)) { | |
1643 log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_erro
rName(errorCode)); | |
1644 return; | |
1645 } | |
1646 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out),
&errorCode); | |
1647 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[
2]!=0x327 || out[3]!=0x301) { | |
1648 log_err("unorm2_getNFDInstance() did not return an NFD instance (normali
zed length=%d; %s)\n", | |
1649 (int)length, u_errorName(errorCode)); | |
1650 } | |
1651 | |
1652 errorCode=U_ZERO_ERROR; | |
1653 n2=unorm2_getNFKCInstance(&errorCode); | |
1654 if(U_FAILURE(errorCode)) { | |
1655 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_err
orName(errorCode)); | |
1656 return; | |
1657 } | |
1658 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out),
&errorCode); | |
1659 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) { | |
1660 log_err("unorm2_getNFKCInstance() did not return an NFKC instance (norma
lized length=%d; %s)\n", | |
1661 (int)length, u_errorName(errorCode)); | |
1662 } | |
1663 | |
1664 errorCode=U_ZERO_ERROR; | |
1665 n2=unorm2_getNFKDInstance(&errorCode); | |
1666 if(U_FAILURE(errorCode)) { | |
1667 log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_err
orName(errorCode)); | |
1668 return; | |
1669 } | |
1670 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out),
&errorCode); | |
1671 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[
2]!=0x327 || out[3]!=0x301) { | |
1672 log_err("unorm2_getNFKDInstance() did not return an NFKD instance (norma
lized length=%d; %s)\n", | |
1673 (int)length, u_errorName(errorCode)); | |
1674 } | |
1675 | |
1676 errorCode=U_ZERO_ERROR; | |
1677 n2=unorm2_getNFKCCasefoldInstance(&errorCode); | |
1678 if(U_FAILURE(errorCode)) { | |
1679 log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n
", u_errorName(errorCode)); | |
1680 return; | |
1681 } | |
1682 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out),
&errorCode); | |
1683 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) { | |
1684 log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefol
d instance (normalized length=%d; %s)\n", | |
1685 (int)length, u_errorName(errorCode)); | |
1686 } | |
1687 } | |
1688 | |
1689 #endif /* #if !UCONFIG_NO_NORMALIZATION */ | |
OLD | NEW |