OLD | NEW |
(Empty) | |
| 1 /******************************************************************** |
| 2 * COPYRIGHT: |
| 3 * Copyright (c) 1997-2010, International Business Machines Corporation and |
| 4 * others. All Rights Reserved. |
| 5 ********************************************************************/ |
| 6 /*******************************************************************************
* |
| 7 * |
| 8 * File CNORMTST.C |
| 9 * |
| 10 * Modification History: |
| 11 * Name Description |
| 12 * Madhu Katragadda Ported for C API |
| 13 * synwee added test for quick check |
| 14 * synwee added test for checkFCD |
| 15 ********************************************************************************
*/ |
| 16 /*tests for u_normalization*/ |
| 17 #include "unicode/utypes.h" |
| 18 #include "unicode/unorm.h" |
| 19 #include "cintltst.h" |
| 20 |
| 21 #if UCONFIG_NO_NORMALIZATION |
| 22 |
| 23 void addNormTest(TestNode** root) { |
| 24 /* no normalization - nothing to do */ |
| 25 } |
| 26 |
| 27 #else |
| 28 |
| 29 #include <stdlib.h> |
| 30 #include <time.h> |
| 31 #include "unicode/uchar.h" |
| 32 #include "unicode/ustring.h" |
| 33 #include "unicode/unorm.h" |
| 34 #include "cnormtst.h" |
| 35 |
| 36 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof ((array)[0])) |
| 37 |
| 38 static void |
| 39 TestAPI(void); |
| 40 |
| 41 static void |
| 42 TestNormCoverage(void); |
| 43 |
| 44 static void |
| 45 TestConcatenate(void); |
| 46 |
| 47 static void |
| 48 TestNextPrevious(void); |
| 49 |
| 50 static void TestIsNormalized(void); |
| 51 |
| 52 static void |
| 53 TestFCNFKCClosure(void); |
| 54 |
| 55 static void |
| 56 TestQuickCheckPerCP(void); |
| 57 |
| 58 static void |
| 59 TestComposition(void); |
| 60 |
| 61 static void |
| 62 TestFCD(void); |
| 63 |
| 64 static void |
| 65 TestGetDecomposition(void); |
| 66 |
| 67 static const char* const canonTests[][3] = { |
| 68 /* Input*/ /*Decomposed*/ /*Composed*/ |
| 69 { "cat", "cat", "cat"
}, |
| 70 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark
", }, |
| 71 |
| 72 { "\\u1e0a", "D\\u0307", "\\u1e0a"
}, /* D-dot_above*/ |
| 73 { "D\\u0307", "D\\u0307", "\\u1e0a"
}, /* D dot_above*/ |
| 74 |
| 75 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307
" }, /* D-dot_below dot_above*/ |
| 76 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307
" }, /* D-dot_above dot_below */ |
| 77 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307"
}, /* D dot_below dot_above */ |
| 78 |
| 79 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\
u0307" }, /*D dot_below cedilla dot_above*/ |
| 80 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\
\u0307" }, /* D dot_above ogonek dot_below*/ |
| 81 |
| 82 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14"
}, /* E-macron-grave*/ |
| 83 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14"
}, /* E-macron + grave*/ |
| 84 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304
" }, /* E-grave + macron*/ |
| 85 |
| 86 { "\\u212b", "A\\u030a", "\\u00c5"
}, /* angstrom_sign*/ |
| 87 { "\\u00c5", "A\\u030a", "\\u00c5"
}, /* A-ring*/ |
| 88 |
| 89 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin"
}, |
| 90 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n"
}, |
| 91 |
| 92 { "Henry IV", "Henry IV", "Henry IV"
}, |
| 93 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163"
}, |
| 94 |
| 95 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC"
}, /* ga (Katakana)*/ |
| 96 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC"
}, /*ka + ten*/ |
| 97 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E"
}, /* hw_ka + hw_ten*/ |
| 98 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E"
}, /* ka + hw_ten*/ |
| 99 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099"
}, /* hw_ka + ten*/ |
| 100 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316"
}, /* hw_ka + ten*/ |
| 101 { "", "", "" } |
| 102 }; |
| 103 |
| 104 static const char* const compatTests[][3] = { |
| 105 /* Input*/ /*Decomposed */ /*Compos
ed*/ |
| 106 { "cat", "cat", "cat"
}, |
| 107 |
| 108 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u
05DC" }, /* Alef-Lamed vs. Alef, Lamed*/ |
| 109 |
| 110 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4f
fin" }, |
| 111 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4f
fin" }, /* ffi ligature -> f + f + i*/ |
| 112 |
| 113 { "Henry IV", "Henry IV", "Henry I
V" }, |
| 114 { "Henry \\u2163", "Henry IV", "Henry
IV" }, |
| 115 |
| 116 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC"
}, /* ga (Katakana)*/ |
| 117 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC
" }, /*ka + ten*/ |
| 118 |
| 119 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC
" }, /* hw_ka + ten*/ |
| 120 |
| 121 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/ |
| 122 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC
" }, /* hw_ka + hw_ten*/ |
| 123 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC
" }, /* ka + hw_ten*/ |
| 124 { "", "", "" } |
| 125 }; |
| 126 |
| 127 static const char* const fcdTests[][3] = { |
| 128 /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */ |
| 129 { "\\u010e\\u0327", "D\\u0327\\u030c", NULL }, /* D-caron + cedilla */ |
| 130 { "\\u010e", "\\u010e", NULL } /* D-caron */ |
| 131 }; |
| 132 |
| 133 void addNormTest(TestNode** root); |
| 134 |
| 135 void addNormTest(TestNode** root) |
| 136 { |
| 137 addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI"); |
| 138 addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp"); |
| 139 addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp"); |
| 140 addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompo
se"); |
| 141 addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCom
pose"); |
| 142 addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD"); |
| 143 addTest(root, &TestNull, "tsnorm/cnormtst/TestNull"); |
| 144 addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck"); |
| 145 addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP"); |
| 146 addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized"); |
| 147 addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD"); |
| 148 addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage"); |
| 149 addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate"); |
| 150 addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious"); |
| 151 addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure"); |
| 152 addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition"); |
| 153 addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition")
; |
| 154 } |
| 155 |
| 156 static const char* const modeStrings[]={ |
| 157 "UNORM_NONE", |
| 158 "UNORM_NFD", |
| 159 "UNORM_NFKD", |
| 160 "UNORM_NFC", |
| 161 "UNORM_NFKC", |
| 162 "UNORM_FCD", |
| 163 "UNORM_MODE_COUNT" |
| 164 }; |
| 165 |
| 166 static void TestNormCases(UNormalizationMode mode, |
| 167 const char* const cases[][3], int32_t lengthOfCases) { |
| 168 int32_t x, neededLen, length2; |
| 169 int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1; |
| 170 UChar *source=NULL; |
| 171 UChar result[16]; |
| 172 log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]); |
| 173 for(x=0; x < lengthOfCases; x++) |
| 174 { |
| 175 UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR; |
| 176 source=CharsToUChars(cases[x][0]); |
| 177 neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &
status); |
| 178 length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2); |
| 179 if(neededLen!=length2) { |
| 180 log_err("ERROR in unorm_normalize(%s)[%d]: " |
| 181 "preflight length/NUL %d!=%d preflight length/srcLength\n", |
| 182 modeStrings[mode], (int)x, (int)neededLen, (int)length2); |
| 183 } |
| 184 if(status==U_BUFFER_OVERFLOW_ERROR) |
| 185 { |
| 186 status=U_ZERO_ERROR; |
| 187 } |
| 188 length2=unorm_normalize(source, u_strlen(source), mode, 0, result, LENGT
HOF(result), &status); |
| 189 if(U_FAILURE(status) || neededLen!=length2) { |
| 190 log_data_err("ERROR in unorm_normalize(%s/NUL) at %s: %s - (Are you
missing data?)\n", |
| 191 modeStrings[mode], austrdup(source), myErrorName(status
)); |
| 192 } else { |
| 193 assertEqual(result, cases[x][expIndex], x); |
| 194 } |
| 195 length2=unorm_normalize(source, -1, mode, 0, result, LENGTHOF(result), &
status); |
| 196 if(U_FAILURE(status) || neededLen!=length2) { |
| 197 log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s: %s - (A
re you missing data?)\n", |
| 198 modeStrings[mode], austrdup(source), myErrorName(status
)); |
| 199 } else { |
| 200 assertEqual(result, cases[x][expIndex], x); |
| 201 } |
| 202 free(source); |
| 203 } |
| 204 } |
| 205 |
| 206 void TestDecomp() { |
| 207 TestNormCases(UNORM_NFD, canonTests, LENGTHOF(canonTests)); |
| 208 } |
| 209 |
| 210 void TestCompatDecomp() { |
| 211 TestNormCases(UNORM_NFKD, compatTests, LENGTHOF(compatTests)); |
| 212 } |
| 213 |
| 214 void TestCanonDecompCompose() { |
| 215 TestNormCases(UNORM_NFC, canonTests, LENGTHOF(canonTests)); |
| 216 } |
| 217 |
| 218 void TestCompatDecompCompose() { |
| 219 TestNormCases(UNORM_NFKC, compatTests, LENGTHOF(compatTests)); |
| 220 } |
| 221 |
| 222 void TestFCD() { |
| 223 TestNormCases(UNORM_FCD, fcdTests, LENGTHOF(fcdTests)); |
| 224 } |
| 225 |
| 226 static void assertEqual(const UChar* result, const char* expected, int32_t index
) |
| 227 { |
| 228 UChar *expectedUni = CharsToUChars(expected); |
| 229 if(u_strcmp(result, expectedUni)!=0){ |
| 230 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n"
, index, expected, |
| 231 austrdup(result) ); |
| 232 } |
| 233 free(expectedUni); |
| 234 } |
| 235 |
| 236 static void TestNull_check(UChar *src, int32_t srcLen, |
| 237 UChar *exp, int32_t expLen, |
| 238 UNormalizationMode mode, |
| 239 const char *name) |
| 240 { |
| 241 UErrorCode status = U_ZERO_ERROR; |
| 242 int32_t len, i; |
| 243 |
| 244 UChar result[50]; |
| 245 |
| 246 |
| 247 status = U_ZERO_ERROR; |
| 248 |
| 249 for(i=0;i<50;i++) |
| 250 { |
| 251 result[i] = 0xFFFD; |
| 252 } |
| 253 |
| 254 len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status); |
| 255 |
| 256 if(U_FAILURE(status)) { |
| 257 log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missin
g data?)\n", name, u_errorName(status)); |
| 258 } else if (len != expLen) { |
| 259 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n
", name, expLen, len); |
| 260 } |
| 261 |
| 262 { |
| 263 for(i=0;i<len;i++){ |
| 264 if(exp[i] != result[i]) { |
| 265 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n", |
| 266 name, |
| 267 i, |
| 268 exp[i], |
| 269 result[i]); |
| 270 return; |
| 271 } |
| 272 log_verbose(" %d: \\u%04X\n", i, result[i]); |
| 273 } |
| 274 } |
| 275 |
| 276 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name); |
| 277 } |
| 278 |
| 279 void TestNull() |
| 280 { |
| 281 |
| 282 UChar source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 }; |
| 283 int32_t source_comp_len = 4; |
| 284 UChar expect_comp[] = { 0x0061, 0x0000, 0x1e0a }; |
| 285 int32_t expect_comp_len = 3; |
| 286 |
| 287 UChar source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 }; |
| 288 int32_t source_dcmp_len = 3; |
| 289 UChar expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C }; |
| 290 int32_t expect_dcmp_len = 5; |
| 291 |
| 292 TestNull_check(source_comp, |
| 293 source_comp_len, |
| 294 expect_comp, |
| 295 expect_comp_len, |
| 296 UNORM_NFC, |
| 297 "UNORM_NFC"); |
| 298 |
| 299 TestNull_check(source_dcmp, |
| 300 source_dcmp_len, |
| 301 expect_dcmp, |
| 302 expect_dcmp_len, |
| 303 UNORM_NFD, |
| 304 "UNORM_NFD"); |
| 305 |
| 306 TestNull_check(source_comp, |
| 307 source_comp_len, |
| 308 expect_comp, |
| 309 expect_comp_len, |
| 310 UNORM_NFKC, |
| 311 "UNORM_NFKC"); |
| 312 |
| 313 |
| 314 } |
| 315 |
| 316 static void TestQuickCheckResultNO() |
| 317 { |
| 318 const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C, |
| 319 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E}; |
| 320 const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB, |
| 321 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E}; |
| 322 const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE, |
| 323 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D}; |
| 324 const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE, |
| 325 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D}; |
| 326 |
| 327 |
| 328 const int SIZE = 10; |
| 329 |
| 330 int count = 0; |
| 331 UErrorCode error = U_ZERO_ERROR; |
| 332 |
| 333 for (; count < SIZE; count ++) |
| 334 { |
| 335 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) != |
| 336 UNORM_NO) |
| 337 { |
| 338 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]); |
| 339 return; |
| 340 } |
| 341 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) != |
| 342 UNORM_NO) |
| 343 { |
| 344 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]); |
| 345 return; |
| 346 } |
| 347 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) != |
| 348 UNORM_NO) |
| 349 { |
| 350 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]); |
| 351 return; |
| 352 } |
| 353 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != |
| 354 UNORM_NO) |
| 355 { |
| 356 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); |
| 357 return; |
| 358 } |
| 359 } |
| 360 } |
| 361 |
| 362 |
| 363 static void TestQuickCheckResultYES() |
| 364 { |
| 365 const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A, |
| 366 0x2261, 0x3075, 0x4000, 0x5000, 0xF000}; |
| 367 const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500, |
| 368 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000}; |
| 369 const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB, |
| 370 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27}; |
| 371 const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000, |
| 372 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E}; |
| 373 |
| 374 const int SIZE = 10; |
| 375 int count = 0; |
| 376 UErrorCode error = U_ZERO_ERROR; |
| 377 |
| 378 UChar cp = 0; |
| 379 while (cp < 0xA0) |
| 380 { |
| 381 if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES) |
| 382 { |
| 383 log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)
\n", cp); |
| 384 return; |
| 385 } |
| 386 if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) != |
| 387 UNORM_YES) |
| 388 { |
| 389 log_err("ERROR in NFC quick check at U+%04x\n", cp); |
| 390 return; |
| 391 } |
| 392 if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES) |
| 393 { |
| 394 log_err("ERROR in NFKD quick check at U+%04x\n", cp); |
| 395 return; |
| 396 } |
| 397 if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) != |
| 398 UNORM_YES) |
| 399 { |
| 400 log_err("ERROR in NFKC quick check at U+%04x\n", cp); |
| 401 return; |
| 402 } |
| 403 cp ++; |
| 404 } |
| 405 |
| 406 for (; count < SIZE; count ++) |
| 407 { |
| 408 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) != |
| 409 UNORM_YES) |
| 410 { |
| 411 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]); |
| 412 return; |
| 413 } |
| 414 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) |
| 415 != UNORM_YES) |
| 416 { |
| 417 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]); |
| 418 return; |
| 419 } |
| 420 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) != |
| 421 UNORM_YES) |
| 422 { |
| 423 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]); |
| 424 return; |
| 425 } |
| 426 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != |
| 427 UNORM_YES) |
| 428 { |
| 429 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); |
| 430 return; |
| 431 } |
| 432 } |
| 433 } |
| 434 |
| 435 static void TestQuickCheckResultMAYBE() |
| 436 { |
| 437 const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161, |
| 438 0x116A, 0x1173, 0x1175, 0x3099, 0x309A}; |
| 439 const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E, |
| 440 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099}; |
| 441 |
| 442 |
| 443 const int SIZE = 10; |
| 444 |
| 445 int count = 0; |
| 446 UErrorCode error = U_ZERO_ERROR; |
| 447 |
| 448 /* NFD and NFKD does not have any MAYBE codepoints */ |
| 449 for (; count < SIZE; count ++) |
| 450 { |
| 451 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) != |
| 452 UNORM_MAYBE) |
| 453 { |
| 454 log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)
\n", CPNFC[count]); |
| 455 return; |
| 456 } |
| 457 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != |
| 458 UNORM_MAYBE) |
| 459 { |
| 460 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); |
| 461 return; |
| 462 } |
| 463 } |
| 464 } |
| 465 |
| 466 static void TestQuickCheckStringResult() |
| 467 { |
| 468 int count; |
| 469 UChar *d = NULL; |
| 470 UChar *c = NULL; |
| 471 UErrorCode error = U_ZERO_ERROR; |
| 472 |
| 473 for (count = 0; count < LENGTHOF(canonTests); count ++) |
| 474 { |
| 475 d = CharsToUChars(canonTests[count][1]); |
| 476 c = CharsToUChars(canonTests[count][2]); |
| 477 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) != |
| 478 UNORM_YES) |
| 479 { |
| 480 log_data_err("ERROR in NFD quick check for string at count %d - (Are you m
issing data?)\n", count); |
| 481 return; |
| 482 } |
| 483 |
| 484 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) == |
| 485 UNORM_NO) |
| 486 { |
| 487 log_err("ERROR in NFC quick check for string at count %d\n", count); |
| 488 return; |
| 489 } |
| 490 |
| 491 free(d); |
| 492 free(c); |
| 493 } |
| 494 |
| 495 for (count = 0; count < LENGTHOF(compatTests); count ++) |
| 496 { |
| 497 d = CharsToUChars(compatTests[count][1]); |
| 498 c = CharsToUChars(compatTests[count][2]); |
| 499 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) != |
| 500 UNORM_YES) |
| 501 { |
| 502 log_err("ERROR in NFKD quick check for string at count %d\n", count); |
| 503 return; |
| 504 } |
| 505 |
| 506 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) != |
| 507 UNORM_YES) |
| 508 { |
| 509 log_err("ERROR in NFKC quick check for string at count %d\n", count); |
| 510 return; |
| 511 } |
| 512 |
| 513 free(d); |
| 514 free(c); |
| 515 } |
| 516 } |
| 517 |
| 518 void TestQuickCheck() |
| 519 { |
| 520 TestQuickCheckResultNO(); |
| 521 TestQuickCheckResultYES(); |
| 522 TestQuickCheckResultMAYBE(); |
| 523 TestQuickCheckStringResult(); |
| 524 } |
| 525 |
| 526 /* |
| 527 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_ |
| 528 * normalized, and some that are not. |
| 529 * Here we pick some specific cases and test the C API. |
| 530 */ |
| 531 static void TestIsNormalized(void) { |
| 532 static const UChar notNFC[][8]={ /* strings that are not in NFC *
/ |
| 533 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */ |
| 534 { 0xfb1d, 0 }, /* excluded from composition */ |
| 535 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */ |
| 536 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */ |
| 537 }; |
| 538 static const UChar notNFKC[][8]={ /* strings that are not in NFKC
*/ |
| 539 { 0x1100, 0x1161, 0 }, /* Jamo compose */ |
| 540 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */ |
| 541 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */ |
| 542 }; |
| 543 |
| 544 int32_t i; |
| 545 UErrorCode errorCode; |
| 546 |
| 547 /* API test */ |
| 548 |
| 549 /* normal case with length>=0 (length -1 used for special cases below) */ |
| 550 errorCode=U_ZERO_ERROR; |
| 551 if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(e
rrorCode)) { |
| 552 log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missin
g data?)\n", u_errorName(errorCode)); |
| 553 } |
| 554 |
| 555 /* incoming U_FAILURE */ |
| 556 errorCode=U_TRUNCATED_CHAR_FOUND; |
| 557 (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode); |
| 558 if(errorCode!=U_TRUNCATED_CHAR_FOUND) { |
| 559 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error c
ode to %s\n", u_errorName(errorCode)); |
| 560 } |
| 561 |
| 562 /* NULL source */ |
| 563 errorCode=U_ZERO_ERROR; |
| 564 (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode); |
| 565 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { |
| 566 log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_E
RROR but %s - (Are you missing data?)\n", u_errorName(errorCode)); |
| 567 } |
| 568 |
| 569 /* bad length */ |
| 570 errorCode=U_ZERO_ERROR; |
| 571 (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode); |
| 572 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { |
| 573 log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_E
RROR but %s - (Are you missing data?)\n", u_errorName(errorCode)); |
| 574 } |
| 575 |
| 576 /* specific cases */ |
| 577 for(i=0; i<LENGTHOF(notNFC); ++i) { |
| 578 errorCode=U_ZERO_ERROR; |
| 579 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE
(errorCode)) { |
| 580 log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (
Are you missing data?)\n", i, u_errorName(errorCode)); |
| 581 } |
| 582 errorCode=U_ZERO_ERROR; |
| 583 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILUR
E(errorCode)) { |
| 584 log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) -
(Are you missing data?)\n", i, u_errorName(errorCode)); |
| 585 } |
| 586 } |
| 587 for(i=0; i<LENGTHOF(notNFKC); ++i) { |
| 588 errorCode=U_ZERO_ERROR; |
| 589 if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILU
RE(errorCode)) { |
| 590 log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) -
(Are you missing data?)\n", i, u_errorName(errorCode)); |
| 591 } |
| 592 } |
| 593 } |
| 594 |
| 595 void TestCheckFCD() |
| 596 { |
| 597 UErrorCode status = U_ZERO_ERROR; |
| 598 static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x09, |
| 599 0x0A}; |
| 600 static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
|
| 601 0x02B9, 0x0314, 0x0315, 0x0316}; |
| 602 static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7, |
| 603 0x0050, 0x0730, 0x09EE, 0x1E10}; |
| 604 |
| 605 static const UChar datastr[][5] = |
| 606 { {0x0061, 0x030A, 0x1E05, 0x0302, 0}, |
| 607 {0x0061, 0x030A, 0x00E2, 0x0323, 0}, |
| 608 {0x0061, 0x0323, 0x00E2, 0x0323, 0}, |
| 609 {0x0061, 0x0323, 0x1E05, 0x0302, 0} }; |
| 610 static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES}; |
| 611 |
| 612 static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x6
7, 0x68, 0x69, |
| 613 0x6a, |
| 614 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8
, 0xe9, |
| 615 0xea, |
| 616 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x03
06, |
| 617 0x0307, 0x0308, 0x0309, 0x030a, |
| 618 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x03
26, |
| 619 0x0327, 0x0328, 0x0329, 0x032a, |
| 620 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e
06, |
| 621 0x1e07, 0x1e08, 0x1e09, 0x1e0a}; |
| 622 |
| 623 int count = 0; |
| 624 |
| 625 if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES) |
| 626 log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_qu
ickCheck is UNORM_YES - (Are you missing data?)\n"); |
| 627 if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO) |
| 628 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickC
heck is UNORM_NO\n"); |
| 629 if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES) |
| 630 log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm
_quickCheck is UNORM_YES - (Are you missing data?)\n"); |
| 631 |
| 632 if (U_FAILURE(status)) |
| 633 log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n",
u_errorName(status)); |
| 634 |
| 635 while (count < 4) |
| 636 { |
| 637 UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status); |
| 638 if (U_FAILURE(status)) { |
| 639 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set
%d - (Are you missing data?)\n", count); |
| 640 break; |
| 641 } |
| 642 else { |
| 643 if (result[count] != fcdresult) { |
| 644 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n",
count, |
| 645 result[count]); |
| 646 } |
| 647 } |
| 648 count ++; |
| 649 } |
| 650 |
| 651 /* random checks of long strings */ |
| 652 status = U_ZERO_ERROR; |
| 653 srand((unsigned)time( NULL )); |
| 654 |
| 655 for (count = 0; count < 50; count ++) |
| 656 { |
| 657 int size = 0; |
| 658 UBool testresult = UNORM_YES; |
| 659 UChar data[20]; |
| 660 UChar norm[100]; |
| 661 UChar nfd[100]; |
| 662 int normsize = 0; |
| 663 int nfdsize = 0; |
| 664 |
| 665 while (size != 19) { |
| 666 data[size] = datachar[(rand() * 50) / RAND_MAX]; |
| 667 log_verbose("0x%x", data[size]); |
| 668 normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0, |
| 669 norm + normsize, 100 - normsize, &status);
|
| 670 if (U_FAILURE(status)) { |
| 671 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data ge
neration - (Are you missing data?)\n"); |
| 672 break; |
| 673 } |
| 674 size ++; |
| 675 } |
| 676 log_verbose("\n"); |
| 677 |
| 678 nfdsize = unorm_normalize(data, size, UNORM_NFD, 0, |
| 679 nfd, 100, &status); |
| 680 if (U_FAILURE(status)) { |
| 681 log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalize
d data generation - (Are you missing data?)\n"); |
| 682 } |
| 683 |
| 684 if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) { |
| 685 testresult = UNORM_NO; |
| 686 } |
| 687 if (testresult == UNORM_YES) { |
| 688 log_verbose("result UNORM_YES\n"); |
| 689 } |
| 690 else { |
| 691 log_verbose("result UNORM_NO\n"); |
| 692 } |
| 693 |
| 694 if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAIL
URE(status)) { |
| 695 log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data -
(Are you missing data?)\n", testresult); |
| 696 } |
| 697 } |
| 698 } |
| 699 |
| 700 static void |
| 701 TestAPI() { |
| 702 static const UChar in[]={ 0x68, 0xe4 }; |
| 703 UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff }; |
| 704 UErrorCode errorCode; |
| 705 int32_t length; |
| 706 |
| 707 /* try preflighting */ |
| 708 errorCode=U_ZERO_ERROR; |
| 709 length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode); |
| 710 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) { |
| 711 log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s
- (Are you missing data?)\n", length, u_errorName(errorCode)); |
| 712 return; |
| 713 } |
| 714 |
| 715 errorCode=U_ZERO_ERROR; |
| 716 length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode); |
| 717 if(U_FAILURE(errorCode)) { |
| 718 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName
(errorCode)); |
| 719 return; |
| 720 } |
| 721 if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) { |
| 722 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+
%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]); |
| 723 return; |
| 724 } |
| 725 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode); |
| 726 if(U_FAILURE(errorCode)) { |
| 727 log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with
%s\n", (long)length, u_errorName(errorCode)); |
| 728 return; |
| 729 } |
| 730 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode); |
| 731 if(U_FAILURE(errorCode)) { |
| 732 log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with
%s\n", (long)length, u_errorName(errorCode)); |
| 733 return; |
| 734 } |
| 735 } |
| 736 |
| 737 /* test cases to improve test code coverage */ |
| 738 enum { |
| 739 HANGUL_K_KIYEOK=0x3131, /* NFKD->Jamo L U+1100 */ |
| 740 HANGUL_K_WEO=0x315d, /* NFKD->Jamo V U+116f */ |
| 741 HANGUL_K_KIYEOK_SIOS=0x3133, /* NFKD->Jamo T U+11aa */ |
| 742 |
| 743 HANGUL_KIYEOK=0x1100, /* Jamo L U+1100 */ |
| 744 HANGUL_WEO=0x116f, /* Jamo V U+116f */ |
| 745 HANGUL_KIYEOK_SIOS=0x11aa, /* Jamo T U+11aa */ |
| 746 |
| 747 HANGUL_AC00=0xac00, /* Hangul syllable = Jamo LV U+ac00 */ |
| 748 HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11a
a */ |
| 749 |
| 750 MUSICAL_VOID_NOTEHEAD=0x1d157, |
| 751 MUSICAL_HALF_NOTE=0x1d15e, /* NFC/NFD->Notehead+Stem */ |
| 752 MUSICAL_STEM=0x1d165, /* cc=216 */ |
| 753 MUSICAL_STACCATO=0x1d17c /* cc=220 */ |
| 754 }; |
| 755 |
| 756 static void |
| 757 TestNormCoverage() { |
| 758 UChar input[1000], expect[1000], output[1000]; |
| 759 UErrorCode errorCode; |
| 760 int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLeng
th; |
| 761 |
| 762 /* create a long and nasty string with NFKC-unsafe characters */ |
| 763 inLength=0; |
| 764 |
| 765 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */ |
| 766 input[inLength++]=HANGUL_KIYEOK; |
| 767 input[inLength++]=HANGUL_WEO; |
| 768 input[inLength++]=HANGUL_KIYEOK_SIOS; |
| 769 |
| 770 input[inLength++]=HANGUL_KIYEOK; |
| 771 input[inLength++]=HANGUL_WEO; |
| 772 input[inLength++]=HANGUL_K_KIYEOK_SIOS; |
| 773 |
| 774 input[inLength++]=HANGUL_KIYEOK; |
| 775 input[inLength++]=HANGUL_K_WEO; |
| 776 input[inLength++]=HANGUL_KIYEOK_SIOS; |
| 777 |
| 778 input[inLength++]=HANGUL_KIYEOK; |
| 779 input[inLength++]=HANGUL_K_WEO; |
| 780 input[inLength++]=HANGUL_K_KIYEOK_SIOS; |
| 781 |
| 782 input[inLength++]=HANGUL_K_KIYEOK; |
| 783 input[inLength++]=HANGUL_WEO; |
| 784 input[inLength++]=HANGUL_KIYEOK_SIOS; |
| 785 |
| 786 input[inLength++]=HANGUL_K_KIYEOK; |
| 787 input[inLength++]=HANGUL_WEO; |
| 788 input[inLength++]=HANGUL_K_KIYEOK_SIOS; |
| 789 |
| 790 input[inLength++]=HANGUL_K_KIYEOK; |
| 791 input[inLength++]=HANGUL_K_WEO; |
| 792 input[inLength++]=HANGUL_KIYEOK_SIOS; |
| 793 |
| 794 input[inLength++]=HANGUL_K_KIYEOK; |
| 795 input[inLength++]=HANGUL_K_WEO; |
| 796 input[inLength++]=HANGUL_K_KIYEOK_SIOS; |
| 797 |
| 798 /* Hangul LV with normal/compatibility Jamo T */ |
| 799 input[inLength++]=HANGUL_AC00; |
| 800 input[inLength++]=HANGUL_KIYEOK_SIOS; |
| 801 |
| 802 input[inLength++]=HANGUL_AC00; |
| 803 input[inLength++]=HANGUL_K_KIYEOK_SIOS; |
| 804 |
| 805 /* compatibility Jamo L, V */ |
| 806 input[inLength++]=HANGUL_K_KIYEOK; |
| 807 input[inLength++]=HANGUL_K_WEO; |
| 808 |
| 809 hangulPrefixLength=inLength; |
| 810 |
| 811 input[inLength++]=UTF16_LEAD(MUSICAL_HALF_NOTE); |
| 812 input[inLength++]=UTF16_TRAIL(MUSICAL_HALF_NOTE); |
| 813 for(i=0; i<200; ++i) { |
| 814 input[inLength++]=UTF16_LEAD(MUSICAL_STACCATO); |
| 815 input[inLength++]=UTF16_TRAIL(MUSICAL_STACCATO); |
| 816 input[inLength++]=UTF16_LEAD(MUSICAL_STEM); |
| 817 input[inLength++]=UTF16_TRAIL(MUSICAL_STEM); |
| 818 } |
| 819 |
| 820 /* (compatibility) Jamo L, T do not compose */ |
| 821 input[inLength++]=HANGUL_K_KIYEOK; |
| 822 input[inLength++]=HANGUL_K_KIYEOK_SIOS; |
| 823 |
| 824 /* quick checks */ |
| 825 errorCode=U_ZERO_ERROR; |
| 826 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_F
AILURE(errorCode)) { |
| 827 log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (
Are you missing data?)\n", u_errorName(errorCode)); |
| 828 } |
| 829 errorCode=U_ZERO_ERROR; |
| 830 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_
FAILURE(errorCode)) { |
| 831 log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) -
(Are you missing data?)\n", u_errorName(errorCode)); |
| 832 } |
| 833 errorCode=U_ZERO_ERROR; |
| 834 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_F
AILURE(errorCode)) { |
| 835 log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (
Are you missing data?)\n", u_errorName(errorCode)); |
| 836 } |
| 837 errorCode=U_ZERO_ERROR; |
| 838 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_
FAILURE(errorCode)) { |
| 839 log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) -
(Are you missing data?)\n", u_errorName(errorCode)); |
| 840 } |
| 841 errorCode=U_ZERO_ERROR; |
| 842 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_F
AILURE(errorCode)) { |
| 843 log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (
Are you missing data?)\n", u_errorName(errorCode)); |
| 844 } |
| 845 |
| 846 /* NFKC */ |
| 847 expectLength=0; |
| 848 expect[expectLength++]=HANGUL_SYLLABLE; |
| 849 |
| 850 expect[expectLength++]=HANGUL_SYLLABLE; |
| 851 |
| 852 expect[expectLength++]=HANGUL_SYLLABLE; |
| 853 |
| 854 expect[expectLength++]=HANGUL_SYLLABLE; |
| 855 |
| 856 expect[expectLength++]=HANGUL_SYLLABLE; |
| 857 |
| 858 expect[expectLength++]=HANGUL_SYLLABLE; |
| 859 |
| 860 expect[expectLength++]=HANGUL_SYLLABLE; |
| 861 |
| 862 expect[expectLength++]=HANGUL_SYLLABLE; |
| 863 |
| 864 expect[expectLength++]=HANGUL_AC00+3; |
| 865 |
| 866 expect[expectLength++]=HANGUL_AC00+3; |
| 867 |
| 868 expect[expectLength++]=HANGUL_AC00+14*28; |
| 869 |
| 870 expect[expectLength++]=UTF16_LEAD(MUSICAL_VOID_NOTEHEAD); |
| 871 expect[expectLength++]=UTF16_TRAIL(MUSICAL_VOID_NOTEHEAD); |
| 872 expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM); |
| 873 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM); |
| 874 for(i=0; i<200; ++i) { |
| 875 expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM); |
| 876 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM); |
| 877 } |
| 878 for(i=0; i<200; ++i) { |
| 879 expect[expectLength++]=UTF16_LEAD(MUSICAL_STACCATO); |
| 880 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STACCATO); |
| 881 } |
| 882 |
| 883 expect[expectLength++]=HANGUL_KIYEOK; |
| 884 expect[expectLength++]=HANGUL_KIYEOK_SIOS; |
| 885 |
| 886 /* try destination overflow first */ |
| 887 errorCode=U_ZERO_ERROR; |
| 888 preflightLength=unorm_normalize(input, inLength, |
| 889 UNORM_NFKC, 0, |
| 890 output, 100, /* too short */ |
| 891 &errorCode); |
| 892 if(errorCode!=U_BUFFER_OVERFLOW_ERROR) { |
| 893 log_data_err("error unorm_normalize(long input, output too short, UNORM_
NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCod
e)); |
| 894 } |
| 895 |
| 896 /* real NFKC */ |
| 897 errorCode=U_ZERO_ERROR; |
| 898 length=unorm_normalize(input, inLength, |
| 899 UNORM_NFKC, 0, |
| 900 output, sizeof(output)/U_SIZEOF_UCHAR, |
| 901 &errorCode); |
| 902 if(U_FAILURE(errorCode)) { |
| 903 log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with
%s - (Are you missing data?)\n", u_errorName(errorCode)); |
| 904 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) { |
| 905 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong re
sult\n"); |
| 906 for(i=0; i<length; ++i) { |
| 907 if(output[i]!=expect[i]) { |
| 908 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i]
, expect[i]); |
| 909 break; |
| 910 } |
| 911 } |
| 912 } |
| 913 if(length!=preflightLength) { |
| 914 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but prefligh
tLength==%ld\n", length, preflightLength); |
| 915 } |
| 916 |
| 917 /* FCD */ |
| 918 u_memcpy(expect, input, hangulPrefixLength); |
| 919 expectLength=hangulPrefixLength; |
| 920 |
| 921 expect[expectLength++]=UTF16_LEAD(MUSICAL_VOID_NOTEHEAD); |
| 922 expect[expectLength++]=UTF16_TRAIL(MUSICAL_VOID_NOTEHEAD); |
| 923 expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM); |
| 924 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM); |
| 925 for(i=0; i<200; ++i) { |
| 926 expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM); |
| 927 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM); |
| 928 } |
| 929 for(i=0; i<200; ++i) { |
| 930 expect[expectLength++]=UTF16_LEAD(MUSICAL_STACCATO); |
| 931 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STACCATO); |
| 932 } |
| 933 |
| 934 expect[expectLength++]=HANGUL_K_KIYEOK; |
| 935 expect[expectLength++]=HANGUL_K_KIYEOK_SIOS; |
| 936 |
| 937 errorCode=U_ZERO_ERROR; |
| 938 length=unorm_normalize(input, inLength, |
| 939 UNORM_FCD, 0, |
| 940 output, sizeof(output)/U_SIZEOF_UCHAR, |
| 941 &errorCode); |
| 942 if(U_FAILURE(errorCode)) { |
| 943 log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %
s - (Are you missing data?)\n", u_errorName(errorCode)); |
| 944 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) { |
| 945 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong res
ult\n"); |
| 946 for(i=0; i<length; ++i) { |
| 947 if(output[i]!=expect[i]) { |
| 948 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i],
expect[i]); |
| 949 break; |
| 950 } |
| 951 } |
| 952 } |
| 953 } |
| 954 |
| 955 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm
.cpp */ |
| 956 static void |
| 957 TestConcatenate(void) { |
| 958 /* "re + 'sume'" */ |
| 959 static const UChar |
| 960 left[]={ |
| 961 0x72, 0x65, 0 |
| 962 }, |
| 963 right[]={ |
| 964 0x301, 0x73, 0x75, 0x6d, 0xe9, 0 |
| 965 }, |
| 966 expect[]={ |
| 967 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0 |
| 968 }; |
| 969 |
| 970 UChar buffer[100]; |
| 971 UErrorCode errorCode; |
| 972 int32_t length; |
| 973 |
| 974 /* left with length, right NUL-terminated */ |
| 975 errorCode=U_ZERO_ERROR; |
| 976 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &err
orCode); |
| 977 if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length))
{ |
| 978 log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s -
(Are you missing data?)\n", length, u_errorName(errorCode)); |
| 979 } |
| 980 |
| 981 /* preflighting */ |
| 982 errorCode=U_ZERO_ERROR; |
| 983 length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCo
de); |
| 984 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) { |
| 985 log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) fail
ed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); |
| 986 } |
| 987 |
| 988 buffer[2]=0x5555; |
| 989 errorCode=U_ZERO_ERROR; |
| 990 length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &error
Code); |
| 991 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) { |
| 992 log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) fa
iled with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); |
| 993 } |
| 994 |
| 995 /* enter with U_FAILURE */ |
| 996 buffer[2]=0xaaaa; |
| 997 errorCode=U_UNEXPECTED_TOKEN; |
| 998 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &err
orCode); |
| 999 if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) { |
| 1000 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length
, u_errorName(errorCode)); |
| 1001 } |
| 1002 |
| 1003 /* illegal arguments */ |
| 1004 buffer[2]=0xaaaa; |
| 1005 errorCode=U_ZERO_ERROR; |
| 1006 length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &err
orCode); |
| 1007 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) { |
| 1008 log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (
Are you missing data?)\n", length, u_errorName(errorCode)); |
| 1009 } |
| 1010 |
| 1011 errorCode=U_ZERO_ERROR; |
| 1012 length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &error
Code); |
| 1013 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { |
| 1014 log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s -
(Are you missing data?)\n", length, u_errorName(errorCode)); |
| 1015 } |
| 1016 } |
| 1017 |
| 1018 enum { |
| 1019 _PLUS=0x2b |
| 1020 }; |
| 1021 |
| 1022 static const char *const _modeString[UNORM_MODE_COUNT]={ |
| 1023 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD" |
| 1024 }; |
| 1025 |
| 1026 static void |
| 1027 _testIter(const UChar *src, int32_t srcLength, |
| 1028 UCharIterator *iter, UNormalizationMode mode, UBool forward, |
| 1029 const UChar *out, int32_t outLength, |
| 1030 const int32_t *srcIndexes, int32_t srcIndexesLength) { |
| 1031 UChar buffer[4]; |
| 1032 const UChar *expect, *outLimit, *in; |
| 1033 int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength; |
| 1034 UErrorCode errorCode; |
| 1035 UBool neededToNormalize, expectNeeded; |
| 1036 |
| 1037 errorCode=U_ZERO_ERROR; |
| 1038 outLimit=out+outLength; |
| 1039 if(forward) { |
| 1040 expect=out; |
| 1041 i=index=0; |
| 1042 } else { |
| 1043 expect=outLimit; |
| 1044 i=srcIndexesLength-2; |
| 1045 index=srcLength; |
| 1046 } |
| 1047 |
| 1048 for(;;) { |
| 1049 prevIndex=index; |
| 1050 if(forward) { |
| 1051 if(!iter->hasNext(iter)) { |
| 1052 return; |
| 1053 } |
| 1054 length=unorm_next(iter, |
| 1055 buffer, sizeof(buffer)/U_SIZEOF_UCHAR, |
| 1056 mode, 0, |
| 1057 (UBool)(out!=NULL), &neededToNormalize, |
| 1058 &errorCode); |
| 1059 expectIndex=srcIndexes[i+1]; |
| 1060 in=src+prevIndex; |
| 1061 inLength=expectIndex-prevIndex; |
| 1062 |
| 1063 if(out!=NULL) { |
| 1064 /* get output piece from between plus signs */ |
| 1065 expectLength=0; |
| 1066 while((expect+expectLength)!=outLimit && expect[expectLength]!=_
PLUS) { |
| 1067 ++expectLength; |
| 1068 } |
| 1069 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength)); |
| 1070 } else { |
| 1071 expect=in; |
| 1072 expectLength=inLength; |
| 1073 expectNeeded=FALSE; |
| 1074 } |
| 1075 } else { |
| 1076 if(!iter->hasPrevious(iter)) { |
| 1077 return; |
| 1078 } |
| 1079 length=unorm_previous(iter, |
| 1080 buffer, sizeof(buffer)/U_SIZEOF_UCHAR, |
| 1081 mode, 0, |
| 1082 (UBool)(out!=NULL), &neededToNormalize, |
| 1083 &errorCode); |
| 1084 expectIndex=srcIndexes[i]; |
| 1085 in=src+expectIndex; |
| 1086 inLength=prevIndex-expectIndex; |
| 1087 |
| 1088 if(out!=NULL) { |
| 1089 /* get output piece from between plus signs */ |
| 1090 expectLength=0; |
| 1091 while(expect!=out && expect[-1]!=_PLUS) { |
| 1092 ++expectLength; |
| 1093 --expect; |
| 1094 } |
| 1095 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength)); |
| 1096 } else { |
| 1097 expect=in; |
| 1098 expectLength=inLength; |
| 1099 expectNeeded=FALSE; |
| 1100 } |
| 1101 } |
| 1102 index=iter->getIndex(iter, UITER_CURRENT); |
| 1103 |
| 1104 if(U_FAILURE(errorCode)) { |
| 1105 log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s -
(Are you missing data?)\n", |
| 1106 forward, _modeString[mode], i, u_errorName(errorCode)); |
| 1107 return; |
| 1108 } |
| 1109 if(expectIndex!=index) { |
| 1110 log_err("error unorm iteration (next/previous %d %s): index[%d] wron
g, got %d expected %d\n", |
| 1111 forward, _modeString[mode], i, index, expectIndex); |
| 1112 return; |
| 1113 } |
| 1114 if(expectLength!=length) { |
| 1115 log_err("error unorm iteration (next/previous %d %s): length[%d] wro
ng, got %d expected %d\n", |
| 1116 forward, _modeString[mode], i, length, expectLength); |
| 1117 return; |
| 1118 } |
| 1119 if(0!=u_memcmp(expect, buffer, length)) { |
| 1120 log_err("error unorm iteration (next/previous %d %s): output string[
%d] wrong\n", |
| 1121 forward, _modeString[mode], i); |
| 1122 return; |
| 1123 } |
| 1124 if(neededToNormalize!=expectNeeded) { |
| 1125 } |
| 1126 |
| 1127 if(forward) { |
| 1128 expect+=expectLength+1; /* go after the + */ |
| 1129 ++i; |
| 1130 } else { |
| 1131 --expect; /* go before the + */ |
| 1132 --i; |
| 1133 } |
| 1134 } |
| 1135 } |
| 1136 |
| 1137 static void |
| 1138 TestNextPrevious() { |
| 1139 static const UChar |
| 1140 src[]={ /* input string */ |
| 1141 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133 |
| 1142 }, |
| 1143 nfd[]={ /* + separates expected output pieces */ |
| 1144 0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x11
61, _PLUS, 0x3133 |
| 1145 }, |
| 1146 nfkd[]={ |
| 1147 0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x11
61, _PLUS, 0x11aa |
| 1148 }, |
| 1149 nfc[]={ |
| 1150 0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133 |
| 1151 }, |
| 1152 nfkc[]={ |
| 1153 0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03 |
| 1154 }, |
| 1155 fcd[]={ |
| 1156 0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x31
33 |
| 1157 }; |
| 1158 |
| 1159 /* expected iterator indexes in the source string for each iteration piece *
/ |
| 1160 static const int32_t |
| 1161 nfdIndexes[]={ |
| 1162 0, 1, 2, 5, 6, 7 |
| 1163 }, |
| 1164 nfkdIndexes[]={ |
| 1165 0, 1, 2, 5, 6, 7 |
| 1166 }, |
| 1167 nfcIndexes[]={ |
| 1168 0, 1, 2, 5, 6, 7 |
| 1169 }, |
| 1170 nfkcIndexes[]={ |
| 1171 0, 1, 2, 5, 7 |
| 1172 }, |
| 1173 fcdIndexes[]={ |
| 1174 0, 1, 2, 5, 6, 7 |
| 1175 }; |
| 1176 |
| 1177 UCharIterator iter; |
| 1178 |
| 1179 UChar buffer[4]; |
| 1180 int32_t length; |
| 1181 |
| 1182 UBool neededToNormalize; |
| 1183 UErrorCode errorCode; |
| 1184 |
| 1185 uiter_setString(&iter, src, sizeof(src)/U_SIZEOF_UCHAR); |
| 1186 |
| 1187 /* test iteration with doNormalize */ |
| 1188 iter.index=0; |
| 1189 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, nfd, size
of(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4); |
| 1190 iter.index=0; |
| 1191 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, nfkd, si
zeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4); |
| 1192 iter.index=0; |
| 1193 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, nfc, size
of(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4); |
| 1194 iter.index=0; |
| 1195 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, nfkc, si
zeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4); |
| 1196 iter.index=0; |
| 1197 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, fcd, size
of(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4); |
| 1198 |
| 1199 iter.index=iter.length; |
| 1200 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, nfd, siz
eof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4); |
| 1201 iter.index=iter.length; |
| 1202 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, nfkd, s
izeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4); |
| 1203 iter.index=iter.length; |
| 1204 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, nfc, siz
eof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4); |
| 1205 iter.index=iter.length; |
| 1206 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, nfkc, s
izeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4); |
| 1207 iter.index=iter.length; |
| 1208 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, fcd, siz
eof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4); |
| 1209 |
| 1210 /* test iteration without doNormalize */ |
| 1211 iter.index=0; |
| 1212 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, NULL, 0,
nfdIndexes, sizeof(nfdIndexes)/4); |
| 1213 iter.index=0; |
| 1214 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, NULL, 0,
nfkdIndexes, sizeof(nfkdIndexes)/4); |
| 1215 iter.index=0; |
| 1216 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, NULL, 0,
nfcIndexes, sizeof(nfcIndexes)/4); |
| 1217 iter.index=0; |
| 1218 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, NULL, 0,
nfkcIndexes, sizeof(nfkcIndexes)/4); |
| 1219 iter.index=0; |
| 1220 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, NULL, 0,
fcdIndexes, sizeof(fcdIndexes)/4); |
| 1221 |
| 1222 iter.index=iter.length; |
| 1223 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, NULL, 0,
nfdIndexes, sizeof(nfdIndexes)/4); |
| 1224 iter.index=iter.length; |
| 1225 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, NULL, 0
, nfkdIndexes, sizeof(nfkdIndexes)/4); |
| 1226 iter.index=iter.length; |
| 1227 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, NULL, 0,
nfcIndexes, sizeof(nfcIndexes)/4); |
| 1228 iter.index=iter.length; |
| 1229 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, NULL, 0
, nfkcIndexes, sizeof(nfkcIndexes)/4); |
| 1230 iter.index=iter.length; |
| 1231 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, NULL, 0,
fcdIndexes, sizeof(fcdIndexes)/4); |
| 1232 |
| 1233 /* try without neededToNormalize */ |
| 1234 errorCode=U_ZERO_ERROR; |
| 1235 buffer[0]=5; |
| 1236 iter.index=1; |
| 1237 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, |
| 1238 UNORM_NFD, 0, TRUE, NULL, |
| 1239 &errorCode); |
| 1240 if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[
3]) { |
| 1241 log_data_err("error unorm_next(without needed) %s - (Are you missing dat
a?)\n", u_errorName(errorCode)); |
| 1242 return; |
| 1243 } |
| 1244 |
| 1245 /* preflight */ |
| 1246 neededToNormalize=9; |
| 1247 iter.index=1; |
| 1248 length=unorm_next(&iter, NULL, 0, |
| 1249 UNORM_NFD, 0, TRUE, &neededToNormalize, |
| 1250 &errorCode); |
| 1251 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!
=2) { |
| 1252 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCod
e)); |
| 1253 return; |
| 1254 } |
| 1255 |
| 1256 errorCode=U_ZERO_ERROR; |
| 1257 buffer[0]=buffer[1]=5; |
| 1258 neededToNormalize=9; |
| 1259 iter.index=1; |
| 1260 length=unorm_next(&iter, buffer, 1, |
| 1261 UNORM_NFD, 0, TRUE, &neededToNormalize, |
| 1262 &errorCode); |
| 1263 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!
=2 || buffer[1]!=5) { |
| 1264 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode)); |
| 1265 return; |
| 1266 } |
| 1267 |
| 1268 /* no iterator */ |
| 1269 errorCode=U_ZERO_ERROR; |
| 1270 buffer[0]=buffer[1]=5; |
| 1271 neededToNormalize=9; |
| 1272 iter.index=1; |
| 1273 length=unorm_next(NULL, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, |
| 1274 UNORM_NFD, 0, TRUE, &neededToNormalize, |
| 1275 &errorCode); |
| 1276 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { |
| 1277 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode)); |
| 1278 return; |
| 1279 } |
| 1280 |
| 1281 /* illegal mode */ |
| 1282 buffer[0]=buffer[1]=5; |
| 1283 neededToNormalize=9; |
| 1284 iter.index=1; |
| 1285 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, |
| 1286 (UNormalizationMode)0, 0, TRUE, &neededToNormalize, |
| 1287 &errorCode); |
| 1288 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { |
| 1289 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode)); |
| 1290 return; |
| 1291 } |
| 1292 |
| 1293 /* error coming in */ |
| 1294 errorCode=U_MISPLACED_QUANTIFIER; |
| 1295 buffer[0]=5; |
| 1296 iter.index=1; |
| 1297 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, |
| 1298 UNORM_NFD, 0, TRUE, NULL, |
| 1299 &errorCode); |
| 1300 if(errorCode!=U_MISPLACED_QUANTIFIER) { |
| 1301 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(err
orCode)); |
| 1302 return; |
| 1303 } |
| 1304 } |
| 1305 |
| 1306 static void |
| 1307 TestFCNFKCClosure(void) { |
| 1308 static const struct { |
| 1309 UChar32 c; |
| 1310 const UChar s[6]; |
| 1311 } tests[]={ |
| 1312 { 0x00C4, { 0 } }, |
| 1313 { 0x00E4, { 0 } }, |
| 1314 { 0x037A, { 0x0020, 0x03B9, 0 } }, |
| 1315 { 0x03D2, { 0x03C5, 0 } }, |
| 1316 { 0x20A8, { 0x0072, 0x0073, 0 } }, |
| 1317 { 0x210B, { 0x0068, 0 } }, |
| 1318 { 0x210C, { 0x0068, 0 } }, |
| 1319 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } }, |
| 1320 { 0x2122, { 0x0074, 0x006D, 0 } }, |
| 1321 { 0x2128, { 0x007A, 0 } }, |
| 1322 { 0x1D5DB, { 0x0068, 0 } }, |
| 1323 { 0x1D5ED, { 0x007A, 0 } }, |
| 1324 { 0x0061, { 0 } } |
| 1325 }; |
| 1326 |
| 1327 UChar buffer[8]; |
| 1328 UErrorCode errorCode; |
| 1329 int32_t i, length; |
| 1330 |
| 1331 for(i=0; i<LENGTHOF(tests); ++i) { |
| 1332 errorCode=U_ZERO_ERROR; |
| 1333 length=u_getFC_NFKC_Closure(tests[i].c, buffer, LENGTHOF(buffer), &error
Code); |
| 1334 if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests
[i].s, buffer)) { |
| 1335 log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you
missing data?)\n", tests[i].c, u_errorName(errorCode)); |
| 1336 } |
| 1337 } |
| 1338 |
| 1339 /* error handling */ |
| 1340 errorCode=U_ZERO_ERROR; |
| 1341 length=u_getFC_NFKC_Closure(0x5c, NULL, LENGTHOF(buffer), &errorCode); |
| 1342 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { |
| 1343 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(e
rrorCode)); |
| 1344 } |
| 1345 |
| 1346 length=u_getFC_NFKC_Closure(0x5c, buffer, LENGTHOF(buffer), &errorCode); |
| 1347 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { |
| 1348 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(e
rrorCode)); |
| 1349 } |
| 1350 } |
| 1351 |
| 1352 static void |
| 1353 TestQuickCheckPerCP() { |
| 1354 UErrorCode errorCode; |
| 1355 UChar32 c, lead, trail; |
| 1356 UChar s[U16_MAX_LENGTH], nfd[16]; |
| 1357 int32_t length, lccc1, lccc2, tccc1, tccc2; |
| 1358 int32_t qc1, qc2; |
| 1359 |
| 1360 if( |
| 1361 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES || |
| 1362 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES || |
| 1363 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE || |
| 1364 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE |
| |
| 1365 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getInt
PropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) || |
| 1366 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIn
tPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) |
| 1367 ) { |
| 1368 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*
_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n"); |
| 1369 } |
| 1370 |
| 1371 /* |
| 1372 * compare the quick check property values for some code points |
| 1373 * to the quick check results for checking same-code point strings |
| 1374 */ |
| 1375 errorCode=U_ZERO_ERROR; |
| 1376 c=0; |
| 1377 while(c<0x110000) { |
| 1378 length=0; |
| 1379 U16_APPEND_UNSAFE(s, length, c); |
| 1380 |
| 1381 qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK); |
| 1382 qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode); |
| 1383 if(qc1!=qc2) { |
| 1384 log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(N
FC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); |
| 1385 } |
| 1386 |
| 1387 qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK); |
| 1388 qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode); |
| 1389 if(qc1!=qc2) { |
| 1390 log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(N
FD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); |
| 1391 } |
| 1392 |
| 1393 qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK); |
| 1394 qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode); |
| 1395 if(qc1!=qc2) { |
| 1396 log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(
NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); |
| 1397 } |
| 1398 |
| 1399 qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK); |
| 1400 qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode); |
| 1401 if(qc1!=qc2) { |
| 1402 log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(
NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); |
| 1403 } |
| 1404 |
| 1405 length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, LENGTHOF(nfd), &err
orCode); |
| 1406 /* length-length == 0 is used to get around a compiler warning. */ |
| 1407 U16_GET(nfd, 0, length-length, length, lead); |
| 1408 U16_GET(nfd, 0, length-1, length, trail); |
| 1409 |
| 1410 lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS); |
| 1411 lccc2=u_getCombiningClass(lead); |
| 1412 tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS); |
| 1413 tccc2=u_getCombiningClass(trail); |
| 1414 |
| 1415 if(lccc1!=lccc2) { |
| 1416 log_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(le
ad) for U+%04x\n", |
| 1417 lccc1, lccc2, c); |
| 1418 } |
| 1419 if(tccc1!=tccc2) { |
| 1420 log_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(tr
ail) for U+%04x\n", |
| 1421 tccc1, tccc2, c); |
| 1422 } |
| 1423 |
| 1424 /* skip some code points */ |
| 1425 c=(20*c)/19+1; |
| 1426 } |
| 1427 } |
| 1428 |
| 1429 static void |
| 1430 TestComposition(void) { |
| 1431 static const struct { |
| 1432 UNormalizationMode mode; |
| 1433 uint32_t options; |
| 1434 UChar input[12]; |
| 1435 UChar expect[12]; |
| 1436 } cases[]={ |
| 1437 /* |
| 1438 * special cases for UAX #15 bug |
| 1439 * see Unicode Corrigendum #5: Normalization Idempotency |
| 1440 * at http://unicode.org/versions/corrigendum5.html |
| 1441 * (was Public Review Issue #29) |
| 1442 */ |
| 1443 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x
0300, 0x1161, 0x0327 } }, |
| 1444 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x
0300, 0x1161, 0x0327, 0x11a8 } }, |
| 1445 { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x
0327, 0x0300, 0x11a8 } }, |
| 1446 { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x
0300, 0x0b3e } }, |
| 1447 |
| 1448 /* TODO: add test cases for UNORM_FCC here (j2151) */ |
| 1449 }; |
| 1450 |
| 1451 UChar output[16]; |
| 1452 UErrorCode errorCode; |
| 1453 int32_t i, length; |
| 1454 |
| 1455 for(i=0; i<LENGTHOF(cases); ++i) { |
| 1456 errorCode=U_ZERO_ERROR; |
| 1457 length=unorm_normalize( |
| 1458 cases[i].input, -1, |
| 1459 cases[i].mode, cases[i].options, |
| 1460 output, LENGTHOF(output), |
| 1461 &errorCode); |
| 1462 if( U_FAILURE(errorCode) || |
| 1463 length!=u_strlen(cases[i].expect) || |
| 1464 0!=u_memcmp(output, cases[i].expect, length) |
| 1465 ) { |
| 1466 log_data_err("unexpected result for case %d - (Are you missing data?
)\n", i); |
| 1467 } |
| 1468 } |
| 1469 } |
| 1470 |
| 1471 static void |
| 1472 TestGetDecomposition() { |
| 1473 UChar decomp[32]; |
| 1474 int32_t length; |
| 1475 |
| 1476 UErrorCode errorCode=U_ZERO_ERROR; |
| 1477 const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIG
UOUS, &errorCode); |
| 1478 if(U_FAILURE(errorCode)) { |
| 1479 log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_
errorName(errorCode)); |
| 1480 return; |
| 1481 } |
| 1482 |
| 1483 length=unorm2_getDecomposition(n2, 0x20, decomp, LENGTHOF(decomp), &errorCod
e); |
| 1484 if(U_FAILURE(errorCode) || length>=0) { |
| 1485 log_err("unorm2_getDecomposition(space) failed\n"); |
| 1486 } |
| 1487 errorCode=U_ZERO_ERROR; |
| 1488 length=unorm2_getDecomposition(n2, 0xe4, decomp, LENGTHOF(decomp), &errorCod
e); |
| 1489 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308
|| decomp[2]!=0) { |
| 1490 log_err("unorm2_getDecomposition(a-umlaut) failed\n"); |
| 1491 } |
| 1492 errorCode=U_ZERO_ERROR; |
| 1493 length=unorm2_getDecomposition(n2, 0xac01, decomp, LENGTHOF(decomp), &errorC
ode); |
| 1494 if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x11
61 || decomp[2]!=0x11a8 || decomp[3]!=0) { |
| 1495 log_err("unorm2_getDecomposition(Hangul syllable U+AC01) failed\n"); |
| 1496 } |
| 1497 errorCode=U_ZERO_ERROR; |
| 1498 length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode); |
| 1499 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) { |
| 1500 log_err("unorm2_getDecomposition(Hangul syllable U+AC01) overflow failed
\n"); |
| 1501 } |
| 1502 errorCode=U_ZERO_ERROR; |
| 1503 length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode); |
| 1504 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { |
| 1505 log_err("unorm2_getDecomposition(capacity<0) failed\n"); |
| 1506 } |
| 1507 errorCode=U_ZERO_ERROR; |
| 1508 length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode); |
| 1509 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { |
| 1510 log_err("unorm2_getDecomposition(decomposition=NULL) failed\n"); |
| 1511 } |
| 1512 } |
| 1513 |
| 1514 #endif /* #if !UCONFIG_NO_NORMALIZATION */ |
OLD | NEW |