| OLD | NEW |
| (Empty) |
| 1 /******************************************************************** | |
| 2 * COPYRIGHT: | |
| 3 * Copyright (c) 1997-2014, International Business Machines Corporation and | |
| 4 * others. All Rights Reserved. | |
| 5 ********************************************************************/ | |
| 6 /*******************************************************************************
* | |
| 7 * | |
| 8 * File CNORMTST.C | |
| 9 * | |
| 10 * Modification History: | |
| 11 * Name Description | |
| 12 * Madhu Katragadda Ported for C API | |
| 13 * synwee added test for quick check | |
| 14 * synwee added test for checkFCD | |
| 15 ********************************************************************************
*/ | |
| 16 /*tests for u_normalization*/ | |
| 17 #include "unicode/utypes.h" | |
| 18 #include "unicode/unorm.h" | |
| 19 #include "unicode/utf16.h" | |
| 20 #include "cintltst.h" | |
| 21 #include "cmemory.h" | |
| 22 | |
| 23 #if !UCONFIG_NO_NORMALIZATION | |
| 24 | |
| 25 #include <stdlib.h> | |
| 26 #include <time.h> | |
| 27 #include "unicode/uchar.h" | |
| 28 #include "unicode/ustring.h" | |
| 29 #include "unicode/unorm.h" | |
| 30 #include "cnormtst.h" | |
| 31 | |
| 32 static void | |
| 33 TestAPI(void); | |
| 34 | |
| 35 static void | |
| 36 TestNormCoverage(void); | |
| 37 | |
| 38 static void | |
| 39 TestConcatenate(void); | |
| 40 | |
| 41 static void | |
| 42 TestNextPrevious(void); | |
| 43 | |
| 44 static void TestIsNormalized(void); | |
| 45 | |
| 46 static void | |
| 47 TestFCNFKCClosure(void); | |
| 48 | |
| 49 static void | |
| 50 TestQuickCheckPerCP(void); | |
| 51 | |
| 52 static void | |
| 53 TestComposition(void); | |
| 54 | |
| 55 static void | |
| 56 TestFCD(void); | |
| 57 | |
| 58 static void | |
| 59 TestGetDecomposition(void); | |
| 60 | |
| 61 static void | |
| 62 TestGetRawDecomposition(void); | |
| 63 | |
| 64 static void TestAppendRestoreMiddle(void); | |
| 65 static void TestGetEasyToUseInstance(void); | |
| 66 | |
| 67 static const char* const canonTests[][3] = { | |
| 68 /* Input*/ /*Decomposed*/ /*Composed*/ | |
| 69 { "cat", "cat", "cat"
}, | |
| 70 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark
", }, | |
| 71 | |
| 72 { "\\u1e0a", "D\\u0307", "\\u1e0a"
}, /* D-dot_above*/ | |
| 73 { "D\\u0307", "D\\u0307", "\\u1e0a"
}, /* D dot_above*/ | |
| 74 | |
| 75 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307
" }, /* D-dot_below dot_above*/ | |
| 76 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307
" }, /* D-dot_above dot_below */ | |
| 77 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307"
}, /* D dot_below dot_above */ | |
| 78 | |
| 79 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\
u0307" }, /*D dot_below cedilla dot_above*/ | |
| 80 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\
\u0307" }, /* D dot_above ogonek dot_below*/ | |
| 81 | |
| 82 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14"
}, /* E-macron-grave*/ | |
| 83 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14"
}, /* E-macron + grave*/ | |
| 84 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304
" }, /* E-grave + macron*/ | |
| 85 | |
| 86 { "\\u212b", "A\\u030a", "\\u00c5"
}, /* angstrom_sign*/ | |
| 87 { "\\u00c5", "A\\u030a", "\\u00c5"
}, /* A-ring*/ | |
| 88 | |
| 89 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin"
}, | |
| 90 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n"
}, | |
| 91 | |
| 92 { "Henry IV", "Henry IV", "Henry IV"
}, | |
| 93 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163"
}, | |
| 94 | |
| 95 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC"
}, /* ga (Katakana)*/ | |
| 96 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC"
}, /*ka + ten*/ | |
| 97 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E"
}, /* hw_ka + hw_ten*/ | |
| 98 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E"
}, /* ka + hw_ten*/ | |
| 99 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099"
}, /* hw_ka + ten*/ | |
| 100 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316"
}, /* hw_ka + ten*/ | |
| 101 { "", "", "" } | |
| 102 }; | |
| 103 | |
| 104 static const char* const compatTests[][3] = { | |
| 105 /* Input*/ /*Decomposed */ /*Compos
ed*/ | |
| 106 { "cat", "cat", "cat"
}, | |
| 107 | |
| 108 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u
05DC" }, /* Alef-Lamed vs. Alef, Lamed*/ | |
| 109 | |
| 110 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4f
fin" }, | |
| 111 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4f
fin" }, /* ffi ligature -> f + f + i*/ | |
| 112 | |
| 113 { "Henry IV", "Henry IV", "Henry I
V" }, | |
| 114 { "Henry \\u2163", "Henry IV", "Henry
IV" }, | |
| 115 | |
| 116 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC"
}, /* ga (Katakana)*/ | |
| 117 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC
" }, /*ka + ten*/ | |
| 118 | |
| 119 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC
" }, /* hw_ka + ten*/ | |
| 120 | |
| 121 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/ | |
| 122 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC
" }, /* hw_ka + hw_ten*/ | |
| 123 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC
" }, /* ka + hw_ten*/ | |
| 124 { "", "", "" } | |
| 125 }; | |
| 126 | |
| 127 static const char* const fcdTests[][3] = { | |
| 128 /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */ | |
| 129 { "\\u010e\\u0327", "D\\u0327\\u030c", NULL }, /* D-caron + cedilla */ | |
| 130 { "\\u010e", "\\u010e", NULL } /* D-caron */ | |
| 131 }; | |
| 132 | |
| 133 void addNormTest(TestNode** root); | |
| 134 | |
| 135 void addNormTest(TestNode** root) | |
| 136 { | |
| 137 addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI"); | |
| 138 addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp"); | |
| 139 addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp"); | |
| 140 addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompo
se"); | |
| 141 addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCom
pose"); | |
| 142 addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD"); | |
| 143 addTest(root, &TestNull, "tsnorm/cnormtst/TestNull"); | |
| 144 addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck"); | |
| 145 addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP"); | |
| 146 addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized"); | |
| 147 addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD"); | |
| 148 addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage"); | |
| 149 addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate"); | |
| 150 addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious"); | |
| 151 addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure"); | |
| 152 addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition"); | |
| 153 addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition")
; | |
| 154 addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposi
tion"); | |
| 155 addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMi
ddle"); | |
| 156 addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseIn
stance"); | |
| 157 } | |
| 158 | |
| 159 static const char* const modeStrings[]={ | |
| 160 "UNORM_NONE", | |
| 161 "UNORM_NFD", | |
| 162 "UNORM_NFKD", | |
| 163 "UNORM_NFC", | |
| 164 "UNORM_NFKC", | |
| 165 "UNORM_FCD", | |
| 166 "UNORM_MODE_COUNT" | |
| 167 }; | |
| 168 | |
| 169 static void TestNormCases(UNormalizationMode mode, | |
| 170 const char* const cases[][3], int32_t lengthOfCases) { | |
| 171 int32_t x, neededLen, length2; | |
| 172 int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1; | |
| 173 UChar *source=NULL; | |
| 174 UChar result[16]; | |
| 175 log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]); | |
| 176 for(x=0; x < lengthOfCases; x++) | |
| 177 { | |
| 178 UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR; | |
| 179 source=CharsToUChars(cases[x][0]); | |
| 180 neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &
status); | |
| 181 length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2); | |
| 182 if(neededLen!=length2) { | |
| 183 log_err("ERROR in unorm_normalize(%s)[%d]: " | |
| 184 "preflight length/NUL %d!=%d preflight length/srcLength\n", | |
| 185 modeStrings[mode], (int)x, (int)neededLen, (int)length2); | |
| 186 } | |
| 187 if(status==U_BUFFER_OVERFLOW_ERROR) | |
| 188 { | |
| 189 status=U_ZERO_ERROR; | |
| 190 } | |
| 191 length2=unorm_normalize(source, u_strlen(source), mode, 0, result, UPRV_
LENGTHOF(result), &status); | |
| 192 if(U_FAILURE(status) || neededLen!=length2) { | |
| 193 log_data_err("ERROR in unorm_normalize(%s/NUL) at %s: %s - (Are you
missing data?)\n", | |
| 194 modeStrings[mode], austrdup(source), myErrorName(status
)); | |
| 195 } else { | |
| 196 assertEqual(result, cases[x][expIndex], x); | |
| 197 } | |
| 198 length2=unorm_normalize(source, -1, mode, 0, result, UPRV_LENGTHOF(resul
t), &status); | |
| 199 if(U_FAILURE(status) || neededLen!=length2) { | |
| 200 log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s: %s - (A
re you missing data?)\n", | |
| 201 modeStrings[mode], austrdup(source), myErrorName(status
)); | |
| 202 } else { | |
| 203 assertEqual(result, cases[x][expIndex], x); | |
| 204 } | |
| 205 free(source); | |
| 206 } | |
| 207 } | |
| 208 | |
| 209 void TestDecomp() { | |
| 210 TestNormCases(UNORM_NFD, canonTests, UPRV_LENGTHOF(canonTests)); | |
| 211 } | |
| 212 | |
| 213 void TestCompatDecomp() { | |
| 214 TestNormCases(UNORM_NFKD, compatTests, UPRV_LENGTHOF(compatTests)); | |
| 215 } | |
| 216 | |
| 217 void TestCanonDecompCompose() { | |
| 218 TestNormCases(UNORM_NFC, canonTests, UPRV_LENGTHOF(canonTests)); | |
| 219 } | |
| 220 | |
| 221 void TestCompatDecompCompose() { | |
| 222 TestNormCases(UNORM_NFKC, compatTests, UPRV_LENGTHOF(compatTests)); | |
| 223 } | |
| 224 | |
| 225 void TestFCD() { | |
| 226 TestNormCases(UNORM_FCD, fcdTests, UPRV_LENGTHOF(fcdTests)); | |
| 227 } | |
| 228 | |
| 229 static void assertEqual(const UChar* result, const char* expected, int32_t index
) | |
| 230 { | |
| 231 UChar *expectedUni = CharsToUChars(expected); | |
| 232 if(u_strcmp(result, expectedUni)!=0){ | |
| 233 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n"
, index, expected, | |
| 234 austrdup(result) ); | |
| 235 } | |
| 236 free(expectedUni); | |
| 237 } | |
| 238 | |
| 239 static void TestNull_check(UChar *src, int32_t srcLen, | |
| 240 UChar *exp, int32_t expLen, | |
| 241 UNormalizationMode mode, | |
| 242 const char *name) | |
| 243 { | |
| 244 UErrorCode status = U_ZERO_ERROR; | |
| 245 int32_t len, i; | |
| 246 | |
| 247 UChar result[50]; | |
| 248 | |
| 249 | |
| 250 status = U_ZERO_ERROR; | |
| 251 | |
| 252 for(i=0;i<50;i++) | |
| 253 { | |
| 254 result[i] = 0xFFFD; | |
| 255 } | |
| 256 | |
| 257 len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status); | |
| 258 | |
| 259 if(U_FAILURE(status)) { | |
| 260 log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missin
g data?)\n", name, u_errorName(status)); | |
| 261 } else if (len != expLen) { | |
| 262 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n
", name, expLen, len); | |
| 263 } | |
| 264 | |
| 265 { | |
| 266 for(i=0;i<len;i++){ | |
| 267 if(exp[i] != result[i]) { | |
| 268 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n", | |
| 269 name, | |
| 270 i, | |
| 271 exp[i], | |
| 272 result[i]); | |
| 273 return; | |
| 274 } | |
| 275 log_verbose(" %d: \\u%04X\n", i, result[i]); | |
| 276 } | |
| 277 } | |
| 278 | |
| 279 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name); | |
| 280 } | |
| 281 | |
| 282 void TestNull() | |
| 283 { | |
| 284 | |
| 285 UChar source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 }; | |
| 286 int32_t source_comp_len = 4; | |
| 287 UChar expect_comp[] = { 0x0061, 0x0000, 0x1e0a }; | |
| 288 int32_t expect_comp_len = 3; | |
| 289 | |
| 290 UChar source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 }; | |
| 291 int32_t source_dcmp_len = 3; | |
| 292 UChar expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C }; | |
| 293 int32_t expect_dcmp_len = 5; | |
| 294 | |
| 295 TestNull_check(source_comp, | |
| 296 source_comp_len, | |
| 297 expect_comp, | |
| 298 expect_comp_len, | |
| 299 UNORM_NFC, | |
| 300 "UNORM_NFC"); | |
| 301 | |
| 302 TestNull_check(source_dcmp, | |
| 303 source_dcmp_len, | |
| 304 expect_dcmp, | |
| 305 expect_dcmp_len, | |
| 306 UNORM_NFD, | |
| 307 "UNORM_NFD"); | |
| 308 | |
| 309 TestNull_check(source_comp, | |
| 310 source_comp_len, | |
| 311 expect_comp, | |
| 312 expect_comp_len, | |
| 313 UNORM_NFKC, | |
| 314 "UNORM_NFKC"); | |
| 315 | |
| 316 | |
| 317 } | |
| 318 | |
| 319 static void TestQuickCheckResultNO() | |
| 320 { | |
| 321 const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C, | |
| 322 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E}; | |
| 323 const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB, | |
| 324 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E}; | |
| 325 const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE, | |
| 326 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D}; | |
| 327 const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE, | |
| 328 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D}; | |
| 329 | |
| 330 | |
| 331 const int SIZE = 10; | |
| 332 | |
| 333 int count = 0; | |
| 334 UErrorCode error = U_ZERO_ERROR; | |
| 335 | |
| 336 for (; count < SIZE; count ++) | |
| 337 { | |
| 338 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) != | |
| 339 UNORM_NO) | |
| 340 { | |
| 341 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]); | |
| 342 return; | |
| 343 } | |
| 344 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) != | |
| 345 UNORM_NO) | |
| 346 { | |
| 347 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]); | |
| 348 return; | |
| 349 } | |
| 350 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) != | |
| 351 UNORM_NO) | |
| 352 { | |
| 353 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]); | |
| 354 return; | |
| 355 } | |
| 356 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != | |
| 357 UNORM_NO) | |
| 358 { | |
| 359 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); | |
| 360 return; | |
| 361 } | |
| 362 } | |
| 363 } | |
| 364 | |
| 365 | |
| 366 static void TestQuickCheckResultYES() | |
| 367 { | |
| 368 const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A, | |
| 369 0x2261, 0x3075, 0x4000, 0x5000, 0xF000}; | |
| 370 const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500, | |
| 371 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000}; | |
| 372 const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB, | |
| 373 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27}; | |
| 374 const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000, | |
| 375 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E}; | |
| 376 | |
| 377 const int SIZE = 10; | |
| 378 int count = 0; | |
| 379 UErrorCode error = U_ZERO_ERROR; | |
| 380 | |
| 381 UChar cp = 0; | |
| 382 while (cp < 0xA0) | |
| 383 { | |
| 384 if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES) | |
| 385 { | |
| 386 log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)
\n", cp); | |
| 387 return; | |
| 388 } | |
| 389 if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) != | |
| 390 UNORM_YES) | |
| 391 { | |
| 392 log_err("ERROR in NFC quick check at U+%04x\n", cp); | |
| 393 return; | |
| 394 } | |
| 395 if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES) | |
| 396 { | |
| 397 log_data_err("ERROR in NFKD quick check at U+%04x\n", cp); | |
| 398 return; | |
| 399 } | |
| 400 if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) != | |
| 401 UNORM_YES) | |
| 402 { | |
| 403 log_err("ERROR in NFKC quick check at U+%04x\n", cp); | |
| 404 return; | |
| 405 } | |
| 406 cp ++; | |
| 407 } | |
| 408 | |
| 409 for (; count < SIZE; count ++) | |
| 410 { | |
| 411 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) != | |
| 412 UNORM_YES) | |
| 413 { | |
| 414 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]); | |
| 415 return; | |
| 416 } | |
| 417 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) | |
| 418 != UNORM_YES) | |
| 419 { | |
| 420 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]); | |
| 421 return; | |
| 422 } | |
| 423 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) != | |
| 424 UNORM_YES) | |
| 425 { | |
| 426 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]); | |
| 427 return; | |
| 428 } | |
| 429 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != | |
| 430 UNORM_YES) | |
| 431 { | |
| 432 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); | |
| 433 return; | |
| 434 } | |
| 435 } | |
| 436 } | |
| 437 | |
| 438 static void TestQuickCheckResultMAYBE() | |
| 439 { | |
| 440 const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161, | |
| 441 0x116A, 0x1173, 0x1175, 0x3099, 0x309A}; | |
| 442 const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E, | |
| 443 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099}; | |
| 444 | |
| 445 | |
| 446 const int SIZE = 10; | |
| 447 | |
| 448 int count = 0; | |
| 449 UErrorCode error = U_ZERO_ERROR; | |
| 450 | |
| 451 /* NFD and NFKD does not have any MAYBE codepoints */ | |
| 452 for (; count < SIZE; count ++) | |
| 453 { | |
| 454 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) != | |
| 455 UNORM_MAYBE) | |
| 456 { | |
| 457 log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)
\n", CPNFC[count]); | |
| 458 return; | |
| 459 } | |
| 460 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != | |
| 461 UNORM_MAYBE) | |
| 462 { | |
| 463 log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]); | |
| 464 return; | |
| 465 } | |
| 466 } | |
| 467 } | |
| 468 | |
| 469 static void TestQuickCheckStringResult() | |
| 470 { | |
| 471 int count; | |
| 472 UChar *d = NULL; | |
| 473 UChar *c = NULL; | |
| 474 UErrorCode error = U_ZERO_ERROR; | |
| 475 | |
| 476 for (count = 0; count < UPRV_LENGTHOF(canonTests); count ++) | |
| 477 { | |
| 478 d = CharsToUChars(canonTests[count][1]); | |
| 479 c = CharsToUChars(canonTests[count][2]); | |
| 480 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) != | |
| 481 UNORM_YES) | |
| 482 { | |
| 483 log_data_err("ERROR in NFD quick check for string at count %d - (Are you m
issing data?)\n", count); | |
| 484 return; | |
| 485 } | |
| 486 | |
| 487 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) == | |
| 488 UNORM_NO) | |
| 489 { | |
| 490 log_err("ERROR in NFC quick check for string at count %d\n", count); | |
| 491 return; | |
| 492 } | |
| 493 | |
| 494 free(d); | |
| 495 free(c); | |
| 496 } | |
| 497 | |
| 498 for (count = 0; count < UPRV_LENGTHOF(compatTests); count ++) | |
| 499 { | |
| 500 d = CharsToUChars(compatTests[count][1]); | |
| 501 c = CharsToUChars(compatTests[count][2]); | |
| 502 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) != | |
| 503 UNORM_YES) | |
| 504 { | |
| 505 log_data_err("ERROR in NFKD quick check for string at count %d\n", count); | |
| 506 return; | |
| 507 } | |
| 508 | |
| 509 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) != | |
| 510 UNORM_YES) | |
| 511 { | |
| 512 log_err("ERROR in NFKC quick check for string at count %d\n", count); | |
| 513 return; | |
| 514 } | |
| 515 | |
| 516 free(d); | |
| 517 free(c); | |
| 518 } | |
| 519 } | |
| 520 | |
| 521 void TestQuickCheck() | |
| 522 { | |
| 523 TestQuickCheckResultNO(); | |
| 524 TestQuickCheckResultYES(); | |
| 525 TestQuickCheckResultMAYBE(); | |
| 526 TestQuickCheckStringResult(); | |
| 527 } | |
| 528 | |
| 529 /* | |
| 530 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_ | |
| 531 * normalized, and some that are not. | |
| 532 * Here we pick some specific cases and test the C API. | |
| 533 */ | |
| 534 static void TestIsNormalized(void) { | |
| 535 static const UChar notNFC[][8]={ /* strings that are not in NFC *
/ | |
| 536 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */ | |
| 537 { 0xfb1d, 0 }, /* excluded from composition */ | |
| 538 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */ | |
| 539 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */ | |
| 540 }; | |
| 541 static const UChar notNFKC[][8]={ /* strings that are not in NFKC
*/ | |
| 542 { 0x1100, 0x1161, 0 }, /* Jamo compose */ | |
| 543 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */ | |
| 544 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */ | |
| 545 }; | |
| 546 | |
| 547 int32_t i; | |
| 548 UErrorCode errorCode; | |
| 549 | |
| 550 /* API test */ | |
| 551 | |
| 552 /* normal case with length>=0 (length -1 used for special cases below) */ | |
| 553 errorCode=U_ZERO_ERROR; | |
| 554 if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(e
rrorCode)) { | |
| 555 log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missin
g data?)\n", u_errorName(errorCode)); | |
| 556 } | |
| 557 | |
| 558 /* incoming U_FAILURE */ | |
| 559 errorCode=U_TRUNCATED_CHAR_FOUND; | |
| 560 (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode); | |
| 561 if(errorCode!=U_TRUNCATED_CHAR_FOUND) { | |
| 562 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error c
ode to %s\n", u_errorName(errorCode)); | |
| 563 } | |
| 564 | |
| 565 /* NULL source */ | |
| 566 errorCode=U_ZERO_ERROR; | |
| 567 (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode); | |
| 568 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
| 569 log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_E
RROR but %s - (Are you missing data?)\n", u_errorName(errorCode)); | |
| 570 } | |
| 571 | |
| 572 /* bad length */ | |
| 573 errorCode=U_ZERO_ERROR; | |
| 574 (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode); | |
| 575 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
| 576 log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_E
RROR but %s - (Are you missing data?)\n", u_errorName(errorCode)); | |
| 577 } | |
| 578 | |
| 579 /* specific cases */ | |
| 580 for(i=0; i<UPRV_LENGTHOF(notNFC); ++i) { | |
| 581 errorCode=U_ZERO_ERROR; | |
| 582 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE
(errorCode)) { | |
| 583 log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (
Are you missing data?)\n", i, u_errorName(errorCode)); | |
| 584 } | |
| 585 errorCode=U_ZERO_ERROR; | |
| 586 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILUR
E(errorCode)) { | |
| 587 log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) -
(Are you missing data?)\n", i, u_errorName(errorCode)); | |
| 588 } | |
| 589 } | |
| 590 for(i=0; i<UPRV_LENGTHOF(notNFKC); ++i) { | |
| 591 errorCode=U_ZERO_ERROR; | |
| 592 if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILU
RE(errorCode)) { | |
| 593 log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) -
(Are you missing data?)\n", i, u_errorName(errorCode)); | |
| 594 } | |
| 595 } | |
| 596 } | |
| 597 | |
| 598 void TestCheckFCD() | |
| 599 { | |
| 600 UErrorCode status = U_ZERO_ERROR; | |
| 601 static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x09, | |
| 602 0x0A}; | |
| 603 static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
| |
| 604 0x02B9, 0x0314, 0x0315, 0x0316}; | |
| 605 static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7, | |
| 606 0x0050, 0x0730, 0x09EE, 0x1E10}; | |
| 607 | |
| 608 static const UChar datastr[][5] = | |
| 609 { {0x0061, 0x030A, 0x1E05, 0x0302, 0}, | |
| 610 {0x0061, 0x030A, 0x00E2, 0x0323, 0}, | |
| 611 {0x0061, 0x0323, 0x00E2, 0x0323, 0}, | |
| 612 {0x0061, 0x0323, 0x1E05, 0x0302, 0} }; | |
| 613 static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES}; | |
| 614 | |
| 615 static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x6
7, 0x68, 0x69, | |
| 616 0x6a, | |
| 617 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8
, 0xe9, | |
| 618 0xea, | |
| 619 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x03
06, | |
| 620 0x0307, 0x0308, 0x0309, 0x030a, | |
| 621 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x03
26, | |
| 622 0x0327, 0x0328, 0x0329, 0x032a, | |
| 623 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e
06, | |
| 624 0x1e07, 0x1e08, 0x1e09, 0x1e0a}; | |
| 625 | |
| 626 int count = 0; | |
| 627 | |
| 628 if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES) | |
| 629 log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_qu
ickCheck is UNORM_YES - (Are you missing data?)\n"); | |
| 630 if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO) | |
| 631 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickC
heck is UNORM_NO\n"); | |
| 632 if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES) | |
| 633 log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm
_quickCheck is UNORM_YES - (Are you missing data?)\n"); | |
| 634 | |
| 635 if (U_FAILURE(status)) | |
| 636 log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n",
u_errorName(status)); | |
| 637 | |
| 638 while (count < 4) | |
| 639 { | |
| 640 UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status); | |
| 641 if (U_FAILURE(status)) { | |
| 642 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set
%d - (Are you missing data?)\n", count); | |
| 643 break; | |
| 644 } | |
| 645 else { | |
| 646 if (result[count] != fcdresult) { | |
| 647 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n",
count, | |
| 648 result[count]); | |
| 649 } | |
| 650 } | |
| 651 count ++; | |
| 652 } | |
| 653 | |
| 654 /* random checks of long strings */ | |
| 655 status = U_ZERO_ERROR; | |
| 656 srand((unsigned)time( NULL )); | |
| 657 | |
| 658 for (count = 0; count < 50; count ++) | |
| 659 { | |
| 660 int size = 0; | |
| 661 UBool testresult = UNORM_YES; | |
| 662 UChar data[20]; | |
| 663 UChar norm[100]; | |
| 664 UChar nfd[100]; | |
| 665 int normsize = 0; | |
| 666 int nfdsize = 0; | |
| 667 | |
| 668 while (size != 19) { | |
| 669 data[size] = datachar[(rand() * 50) / RAND_MAX]; | |
| 670 log_verbose("0x%x", data[size]); | |
| 671 normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0, | |
| 672 norm + normsize, 100 - normsize, &status);
| |
| 673 if (U_FAILURE(status)) { | |
| 674 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data ge
neration - (Are you missing data?)\n"); | |
| 675 break; | |
| 676 } | |
| 677 size ++; | |
| 678 } | |
| 679 log_verbose("\n"); | |
| 680 | |
| 681 nfdsize = unorm_normalize(data, size, UNORM_NFD, 0, | |
| 682 nfd, 100, &status); | |
| 683 if (U_FAILURE(status)) { | |
| 684 log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalize
d data generation - (Are you missing data?)\n"); | |
| 685 } | |
| 686 | |
| 687 if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) { | |
| 688 testresult = UNORM_NO; | |
| 689 } | |
| 690 if (testresult == UNORM_YES) { | |
| 691 log_verbose("result UNORM_YES\n"); | |
| 692 } | |
| 693 else { | |
| 694 log_verbose("result UNORM_NO\n"); | |
| 695 } | |
| 696 | |
| 697 if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAIL
URE(status)) { | |
| 698 log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data -
(Are you missing data?)\n", testresult); | |
| 699 } | |
| 700 } | |
| 701 } | |
| 702 | |
| 703 static void | |
| 704 TestAPI() { | |
| 705 static const UChar in[]={ 0x68, 0xe4 }; | |
| 706 UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff }; | |
| 707 UErrorCode errorCode; | |
| 708 int32_t length; | |
| 709 | |
| 710 /* try preflighting */ | |
| 711 errorCode=U_ZERO_ERROR; | |
| 712 length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode); | |
| 713 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) { | |
| 714 log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s
- (Are you missing data?)\n", length, u_errorName(errorCode)); | |
| 715 return; | |
| 716 } | |
| 717 | |
| 718 errorCode=U_ZERO_ERROR; | |
| 719 length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode); | |
| 720 if(U_FAILURE(errorCode)) { | |
| 721 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName
(errorCode)); | |
| 722 return; | |
| 723 } | |
| 724 if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) { | |
| 725 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+
%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]); | |
| 726 return; | |
| 727 } | |
| 728 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode); | |
| 729 if(U_FAILURE(errorCode)) { | |
| 730 log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with
%s\n", (long)length, u_errorName(errorCode)); | |
| 731 return; | |
| 732 } | |
| 733 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode); | |
| 734 if(U_FAILURE(errorCode)) { | |
| 735 log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with
%s\n", (long)length, u_errorName(errorCode)); | |
| 736 return; | |
| 737 } | |
| 738 } | |
| 739 | |
| 740 /* test cases to improve test code coverage */ | |
| 741 enum { | |
| 742 HANGUL_K_KIYEOK=0x3131, /* NFKD->Jamo L U+1100 */ | |
| 743 HANGUL_K_WEO=0x315d, /* NFKD->Jamo V U+116f */ | |
| 744 HANGUL_K_KIYEOK_SIOS=0x3133, /* NFKD->Jamo T U+11aa */ | |
| 745 | |
| 746 HANGUL_KIYEOK=0x1100, /* Jamo L U+1100 */ | |
| 747 HANGUL_WEO=0x116f, /* Jamo V U+116f */ | |
| 748 HANGUL_KIYEOK_SIOS=0x11aa, /* Jamo T U+11aa */ | |
| 749 | |
| 750 HANGUL_AC00=0xac00, /* Hangul syllable = Jamo LV U+ac00 */ | |
| 751 HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11a
a */ | |
| 752 | |
| 753 MUSICAL_VOID_NOTEHEAD=0x1d157, | |
| 754 MUSICAL_HALF_NOTE=0x1d15e, /* NFC/NFD->Notehead+Stem */ | |
| 755 MUSICAL_STEM=0x1d165, /* cc=216 */ | |
| 756 MUSICAL_STACCATO=0x1d17c /* cc=220 */ | |
| 757 }; | |
| 758 | |
| 759 static void | |
| 760 TestNormCoverage() { | |
| 761 UChar input[1000], expect[1000], output[1000]; | |
| 762 UErrorCode errorCode; | |
| 763 int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLeng
th; | |
| 764 | |
| 765 /* create a long and nasty string with NFKC-unsafe characters */ | |
| 766 inLength=0; | |
| 767 | |
| 768 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */ | |
| 769 input[inLength++]=HANGUL_KIYEOK; | |
| 770 input[inLength++]=HANGUL_WEO; | |
| 771 input[inLength++]=HANGUL_KIYEOK_SIOS; | |
| 772 | |
| 773 input[inLength++]=HANGUL_KIYEOK; | |
| 774 input[inLength++]=HANGUL_WEO; | |
| 775 input[inLength++]=HANGUL_K_KIYEOK_SIOS; | |
| 776 | |
| 777 input[inLength++]=HANGUL_KIYEOK; | |
| 778 input[inLength++]=HANGUL_K_WEO; | |
| 779 input[inLength++]=HANGUL_KIYEOK_SIOS; | |
| 780 | |
| 781 input[inLength++]=HANGUL_KIYEOK; | |
| 782 input[inLength++]=HANGUL_K_WEO; | |
| 783 input[inLength++]=HANGUL_K_KIYEOK_SIOS; | |
| 784 | |
| 785 input[inLength++]=HANGUL_K_KIYEOK; | |
| 786 input[inLength++]=HANGUL_WEO; | |
| 787 input[inLength++]=HANGUL_KIYEOK_SIOS; | |
| 788 | |
| 789 input[inLength++]=HANGUL_K_KIYEOK; | |
| 790 input[inLength++]=HANGUL_WEO; | |
| 791 input[inLength++]=HANGUL_K_KIYEOK_SIOS; | |
| 792 | |
| 793 input[inLength++]=HANGUL_K_KIYEOK; | |
| 794 input[inLength++]=HANGUL_K_WEO; | |
| 795 input[inLength++]=HANGUL_KIYEOK_SIOS; | |
| 796 | |
| 797 input[inLength++]=HANGUL_K_KIYEOK; | |
| 798 input[inLength++]=HANGUL_K_WEO; | |
| 799 input[inLength++]=HANGUL_K_KIYEOK_SIOS; | |
| 800 | |
| 801 /* Hangul LV with normal/compatibility Jamo T */ | |
| 802 input[inLength++]=HANGUL_AC00; | |
| 803 input[inLength++]=HANGUL_KIYEOK_SIOS; | |
| 804 | |
| 805 input[inLength++]=HANGUL_AC00; | |
| 806 input[inLength++]=HANGUL_K_KIYEOK_SIOS; | |
| 807 | |
| 808 /* compatibility Jamo L, V */ | |
| 809 input[inLength++]=HANGUL_K_KIYEOK; | |
| 810 input[inLength++]=HANGUL_K_WEO; | |
| 811 | |
| 812 hangulPrefixLength=inLength; | |
| 813 | |
| 814 input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE); | |
| 815 input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE); | |
| 816 for(i=0; i<200; ++i) { | |
| 817 input[inLength++]=U16_LEAD(MUSICAL_STACCATO); | |
| 818 input[inLength++]=U16_TRAIL(MUSICAL_STACCATO); | |
| 819 input[inLength++]=U16_LEAD(MUSICAL_STEM); | |
| 820 input[inLength++]=U16_TRAIL(MUSICAL_STEM); | |
| 821 } | |
| 822 | |
| 823 /* (compatibility) Jamo L, T do not compose */ | |
| 824 input[inLength++]=HANGUL_K_KIYEOK; | |
| 825 input[inLength++]=HANGUL_K_KIYEOK_SIOS; | |
| 826 | |
| 827 /* quick checks */ | |
| 828 errorCode=U_ZERO_ERROR; | |
| 829 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_F
AILURE(errorCode)) { | |
| 830 log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (
Are you missing data?)\n", u_errorName(errorCode)); | |
| 831 } | |
| 832 errorCode=U_ZERO_ERROR; | |
| 833 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_
FAILURE(errorCode)) { | |
| 834 log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) -
(Are you missing data?)\n", u_errorName(errorCode)); | |
| 835 } | |
| 836 errorCode=U_ZERO_ERROR; | |
| 837 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_F
AILURE(errorCode)) { | |
| 838 log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (
Are you missing data?)\n", u_errorName(errorCode)); | |
| 839 } | |
| 840 errorCode=U_ZERO_ERROR; | |
| 841 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_
FAILURE(errorCode)) { | |
| 842 log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) -
(Are you missing data?)\n", u_errorName(errorCode)); | |
| 843 } | |
| 844 errorCode=U_ZERO_ERROR; | |
| 845 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_F
AILURE(errorCode)) { | |
| 846 log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (
Are you missing data?)\n", u_errorName(errorCode)); | |
| 847 } | |
| 848 | |
| 849 /* NFKC */ | |
| 850 expectLength=0; | |
| 851 expect[expectLength++]=HANGUL_SYLLABLE; | |
| 852 | |
| 853 expect[expectLength++]=HANGUL_SYLLABLE; | |
| 854 | |
| 855 expect[expectLength++]=HANGUL_SYLLABLE; | |
| 856 | |
| 857 expect[expectLength++]=HANGUL_SYLLABLE; | |
| 858 | |
| 859 expect[expectLength++]=HANGUL_SYLLABLE; | |
| 860 | |
| 861 expect[expectLength++]=HANGUL_SYLLABLE; | |
| 862 | |
| 863 expect[expectLength++]=HANGUL_SYLLABLE; | |
| 864 | |
| 865 expect[expectLength++]=HANGUL_SYLLABLE; | |
| 866 | |
| 867 expect[expectLength++]=HANGUL_AC00+3; | |
| 868 | |
| 869 expect[expectLength++]=HANGUL_AC00+3; | |
| 870 | |
| 871 expect[expectLength++]=HANGUL_AC00+14*28; | |
| 872 | |
| 873 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD); | |
| 874 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD); | |
| 875 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); | |
| 876 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); | |
| 877 for(i=0; i<200; ++i) { | |
| 878 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); | |
| 879 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); | |
| 880 } | |
| 881 for(i=0; i<200; ++i) { | |
| 882 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO); | |
| 883 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO); | |
| 884 } | |
| 885 | |
| 886 expect[expectLength++]=HANGUL_KIYEOK; | |
| 887 expect[expectLength++]=HANGUL_KIYEOK_SIOS; | |
| 888 | |
| 889 /* try destination overflow first */ | |
| 890 errorCode=U_ZERO_ERROR; | |
| 891 preflightLength=unorm_normalize(input, inLength, | |
| 892 UNORM_NFKC, 0, | |
| 893 output, 100, /* too short */ | |
| 894 &errorCode); | |
| 895 if(errorCode!=U_BUFFER_OVERFLOW_ERROR) { | |
| 896 log_data_err("error unorm_normalize(long input, output too short, UNORM_
NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCod
e)); | |
| 897 } | |
| 898 | |
| 899 /* real NFKC */ | |
| 900 errorCode=U_ZERO_ERROR; | |
| 901 length=unorm_normalize(input, inLength, | |
| 902 UNORM_NFKC, 0, | |
| 903 output, sizeof(output)/U_SIZEOF_UCHAR, | |
| 904 &errorCode); | |
| 905 if(U_FAILURE(errorCode)) { | |
| 906 log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with
%s - (Are you missing data?)\n", u_errorName(errorCode)); | |
| 907 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) { | |
| 908 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong re
sult\n"); | |
| 909 for(i=0; i<length; ++i) { | |
| 910 if(output[i]!=expect[i]) { | |
| 911 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i]
, expect[i]); | |
| 912 break; | |
| 913 } | |
| 914 } | |
| 915 } | |
| 916 if(length!=preflightLength) { | |
| 917 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but prefligh
tLength==%ld\n", length, preflightLength); | |
| 918 } | |
| 919 | |
| 920 /* FCD */ | |
| 921 u_memcpy(expect, input, hangulPrefixLength); | |
| 922 expectLength=hangulPrefixLength; | |
| 923 | |
| 924 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD); | |
| 925 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD); | |
| 926 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); | |
| 927 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); | |
| 928 for(i=0; i<200; ++i) { | |
| 929 expect[expectLength++]=U16_LEAD(MUSICAL_STEM); | |
| 930 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM); | |
| 931 } | |
| 932 for(i=0; i<200; ++i) { | |
| 933 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO); | |
| 934 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO); | |
| 935 } | |
| 936 | |
| 937 expect[expectLength++]=HANGUL_K_KIYEOK; | |
| 938 expect[expectLength++]=HANGUL_K_KIYEOK_SIOS; | |
| 939 | |
| 940 errorCode=U_ZERO_ERROR; | |
| 941 length=unorm_normalize(input, inLength, | |
| 942 UNORM_FCD, 0, | |
| 943 output, sizeof(output)/U_SIZEOF_UCHAR, | |
| 944 &errorCode); | |
| 945 if(U_FAILURE(errorCode)) { | |
| 946 log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %
s - (Are you missing data?)\n", u_errorName(errorCode)); | |
| 947 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) { | |
| 948 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong res
ult\n"); | |
| 949 for(i=0; i<length; ++i) { | |
| 950 if(output[i]!=expect[i]) { | |
| 951 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i],
expect[i]); | |
| 952 break; | |
| 953 } | |
| 954 } | |
| 955 } | |
| 956 } | |
| 957 | |
| 958 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm
.cpp */ | |
| 959 static void | |
| 960 TestConcatenate(void) { | |
| 961 /* "re + 'sume'" */ | |
| 962 static const UChar | |
| 963 left[]={ | |
| 964 0x72, 0x65, 0 | |
| 965 }, | |
| 966 right[]={ | |
| 967 0x301, 0x73, 0x75, 0x6d, 0xe9, 0 | |
| 968 }, | |
| 969 expect[]={ | |
| 970 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0 | |
| 971 }; | |
| 972 | |
| 973 UChar buffer[100]; | |
| 974 UErrorCode errorCode; | |
| 975 int32_t length; | |
| 976 | |
| 977 /* left with length, right NUL-terminated */ | |
| 978 errorCode=U_ZERO_ERROR; | |
| 979 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &err
orCode); | |
| 980 if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length))
{ | |
| 981 log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s -
(Are you missing data?)\n", length, u_errorName(errorCode)); | |
| 982 } | |
| 983 | |
| 984 /* preflighting */ | |
| 985 errorCode=U_ZERO_ERROR; | |
| 986 length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCo
de); | |
| 987 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) { | |
| 988 log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) fail
ed with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); | |
| 989 } | |
| 990 | |
| 991 buffer[2]=0x5555; | |
| 992 errorCode=U_ZERO_ERROR; | |
| 993 length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &error
Code); | |
| 994 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) { | |
| 995 log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) fa
iled with %s - (Are you missing data?)\n", length, u_errorName(errorCode)); | |
| 996 } | |
| 997 | |
| 998 /* enter with U_FAILURE */ | |
| 999 buffer[2]=0xaaaa; | |
| 1000 errorCode=U_UNEXPECTED_TOKEN; | |
| 1001 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &err
orCode); | |
| 1002 if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) { | |
| 1003 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length
, u_errorName(errorCode)); | |
| 1004 } | |
| 1005 | |
| 1006 /* illegal arguments */ | |
| 1007 buffer[2]=0xaaaa; | |
| 1008 errorCode=U_ZERO_ERROR; | |
| 1009 length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &err
orCode); | |
| 1010 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) { | |
| 1011 log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (
Are you missing data?)\n", length, u_errorName(errorCode)); | |
| 1012 } | |
| 1013 | |
| 1014 errorCode=U_ZERO_ERROR; | |
| 1015 length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &error
Code); | |
| 1016 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
| 1017 log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s -
(Are you missing data?)\n", length, u_errorName(errorCode)); | |
| 1018 } | |
| 1019 } | |
| 1020 | |
| 1021 enum { | |
| 1022 _PLUS=0x2b | |
| 1023 }; | |
| 1024 | |
| 1025 static const char *const _modeString[UNORM_MODE_COUNT]={ | |
| 1026 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD" | |
| 1027 }; | |
| 1028 | |
| 1029 static void | |
| 1030 _testIter(const UChar *src, int32_t srcLength, | |
| 1031 UCharIterator *iter, UNormalizationMode mode, UBool forward, | |
| 1032 const UChar *out, int32_t outLength, | |
| 1033 const int32_t *srcIndexes, int32_t srcIndexesLength) { | |
| 1034 UChar buffer[4]; | |
| 1035 const UChar *expect, *outLimit, *in; | |
| 1036 int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength; | |
| 1037 UErrorCode errorCode; | |
| 1038 UBool neededToNormalize, expectNeeded; | |
| 1039 | |
| 1040 errorCode=U_ZERO_ERROR; | |
| 1041 outLimit=out+outLength; | |
| 1042 if(forward) { | |
| 1043 expect=out; | |
| 1044 i=index=0; | |
| 1045 } else { | |
| 1046 expect=outLimit; | |
| 1047 i=srcIndexesLength-2; | |
| 1048 index=srcLength; | |
| 1049 } | |
| 1050 | |
| 1051 for(;;) { | |
| 1052 prevIndex=index; | |
| 1053 if(forward) { | |
| 1054 if(!iter->hasNext(iter)) { | |
| 1055 return; | |
| 1056 } | |
| 1057 length=unorm_next(iter, | |
| 1058 buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
| 1059 mode, 0, | |
| 1060 (UBool)(out!=NULL), &neededToNormalize, | |
| 1061 &errorCode); | |
| 1062 expectIndex=srcIndexes[i+1]; | |
| 1063 in=src+prevIndex; | |
| 1064 inLength=expectIndex-prevIndex; | |
| 1065 | |
| 1066 if(out!=NULL) { | |
| 1067 /* get output piece from between plus signs */ | |
| 1068 expectLength=0; | |
| 1069 while((expect+expectLength)!=outLimit && expect[expectLength]!=_
PLUS) { | |
| 1070 ++expectLength; | |
| 1071 } | |
| 1072 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength)); | |
| 1073 } else { | |
| 1074 expect=in; | |
| 1075 expectLength=inLength; | |
| 1076 expectNeeded=FALSE; | |
| 1077 } | |
| 1078 } else { | |
| 1079 if(!iter->hasPrevious(iter)) { | |
| 1080 return; | |
| 1081 } | |
| 1082 length=unorm_previous(iter, | |
| 1083 buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
| 1084 mode, 0, | |
| 1085 (UBool)(out!=NULL), &neededToNormalize, | |
| 1086 &errorCode); | |
| 1087 expectIndex=srcIndexes[i]; | |
| 1088 in=src+expectIndex; | |
| 1089 inLength=prevIndex-expectIndex; | |
| 1090 | |
| 1091 if(out!=NULL) { | |
| 1092 /* get output piece from between plus signs */ | |
| 1093 expectLength=0; | |
| 1094 while(expect!=out && expect[-1]!=_PLUS) { | |
| 1095 ++expectLength; | |
| 1096 --expect; | |
| 1097 } | |
| 1098 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength)); | |
| 1099 } else { | |
| 1100 expect=in; | |
| 1101 expectLength=inLength; | |
| 1102 expectNeeded=FALSE; | |
| 1103 } | |
| 1104 } | |
| 1105 index=iter->getIndex(iter, UITER_CURRENT); | |
| 1106 | |
| 1107 if(U_FAILURE(errorCode)) { | |
| 1108 log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s -
(Are you missing data?)\n", | |
| 1109 forward, _modeString[mode], i, u_errorName(errorCode)); | |
| 1110 return; | |
| 1111 } | |
| 1112 if(expectIndex!=index) { | |
| 1113 log_err("error unorm iteration (next/previous %d %s): index[%d] wron
g, got %d expected %d\n", | |
| 1114 forward, _modeString[mode], i, index, expectIndex); | |
| 1115 return; | |
| 1116 } | |
| 1117 if(expectLength!=length) { | |
| 1118 log_err("error unorm iteration (next/previous %d %s): length[%d] wro
ng, got %d expected %d\n", | |
| 1119 forward, _modeString[mode], i, length, expectLength); | |
| 1120 return; | |
| 1121 } | |
| 1122 if(0!=u_memcmp(expect, buffer, length)) { | |
| 1123 log_err("error unorm iteration (next/previous %d %s): output string[
%d] wrong\n", | |
| 1124 forward, _modeString[mode], i); | |
| 1125 return; | |
| 1126 } | |
| 1127 if(neededToNormalize!=expectNeeded) { | |
| 1128 } | |
| 1129 | |
| 1130 if(forward) { | |
| 1131 expect+=expectLength+1; /* go after the + */ | |
| 1132 ++i; | |
| 1133 } else { | |
| 1134 --expect; /* go before the + */ | |
| 1135 --i; | |
| 1136 } | |
| 1137 } | |
| 1138 } | |
| 1139 | |
| 1140 static void | |
| 1141 TestNextPrevious() { | |
| 1142 static const UChar | |
| 1143 src[]={ /* input string */ | |
| 1144 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133 | |
| 1145 }, | |
| 1146 nfd[]={ /* + separates expected output pieces */ | |
| 1147 0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x11
61, _PLUS, 0x3133 | |
| 1148 }, | |
| 1149 nfkd[]={ | |
| 1150 0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x11
61, _PLUS, 0x11aa | |
| 1151 }, | |
| 1152 nfc[]={ | |
| 1153 0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133 | |
| 1154 }, | |
| 1155 nfkc[]={ | |
| 1156 0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03 | |
| 1157 }, | |
| 1158 fcd[]={ | |
| 1159 0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x31
33 | |
| 1160 }; | |
| 1161 | |
| 1162 /* expected iterator indexes in the source string for each iteration piece *
/ | |
| 1163 static const int32_t | |
| 1164 nfdIndexes[]={ | |
| 1165 0, 1, 2, 5, 6, 7 | |
| 1166 }, | |
| 1167 nfkdIndexes[]={ | |
| 1168 0, 1, 2, 5, 6, 7 | |
| 1169 }, | |
| 1170 nfcIndexes[]={ | |
| 1171 0, 1, 2, 5, 6, 7 | |
| 1172 }, | |
| 1173 nfkcIndexes[]={ | |
| 1174 0, 1, 2, 5, 7 | |
| 1175 }, | |
| 1176 fcdIndexes[]={ | |
| 1177 0, 1, 2, 5, 6, 7 | |
| 1178 }; | |
| 1179 | |
| 1180 UCharIterator iter; | |
| 1181 | |
| 1182 UChar buffer[4]; | |
| 1183 int32_t length; | |
| 1184 | |
| 1185 UBool neededToNormalize; | |
| 1186 UErrorCode errorCode; | |
| 1187 | |
| 1188 uiter_setString(&iter, src, sizeof(src)/U_SIZEOF_UCHAR); | |
| 1189 | |
| 1190 /* test iteration with doNormalize */ | |
| 1191 iter.index=0; | |
| 1192 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, nfd, size
of(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4); | |
| 1193 iter.index=0; | |
| 1194 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, nfkd, si
zeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4); | |
| 1195 iter.index=0; | |
| 1196 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, nfc, size
of(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4); | |
| 1197 iter.index=0; | |
| 1198 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, nfkc, si
zeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4); | |
| 1199 iter.index=0; | |
| 1200 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, fcd, size
of(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4); | |
| 1201 | |
| 1202 iter.index=iter.length; | |
| 1203 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, nfd, siz
eof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4); | |
| 1204 iter.index=iter.length; | |
| 1205 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, nfkd, s
izeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4); | |
| 1206 iter.index=iter.length; | |
| 1207 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, nfc, siz
eof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4); | |
| 1208 iter.index=iter.length; | |
| 1209 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, nfkc, s
izeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4); | |
| 1210 iter.index=iter.length; | |
| 1211 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, fcd, siz
eof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4); | |
| 1212 | |
| 1213 /* test iteration without doNormalize */ | |
| 1214 iter.index=0; | |
| 1215 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, NULL, 0,
nfdIndexes, sizeof(nfdIndexes)/4); | |
| 1216 iter.index=0; | |
| 1217 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, NULL, 0,
nfkdIndexes, sizeof(nfkdIndexes)/4); | |
| 1218 iter.index=0; | |
| 1219 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, NULL, 0,
nfcIndexes, sizeof(nfcIndexes)/4); | |
| 1220 iter.index=0; | |
| 1221 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, NULL, 0,
nfkcIndexes, sizeof(nfkcIndexes)/4); | |
| 1222 iter.index=0; | |
| 1223 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, NULL, 0,
fcdIndexes, sizeof(fcdIndexes)/4); | |
| 1224 | |
| 1225 iter.index=iter.length; | |
| 1226 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, NULL, 0,
nfdIndexes, sizeof(nfdIndexes)/4); | |
| 1227 iter.index=iter.length; | |
| 1228 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, NULL, 0
, nfkdIndexes, sizeof(nfkdIndexes)/4); | |
| 1229 iter.index=iter.length; | |
| 1230 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, NULL, 0,
nfcIndexes, sizeof(nfcIndexes)/4); | |
| 1231 iter.index=iter.length; | |
| 1232 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, NULL, 0
, nfkcIndexes, sizeof(nfkcIndexes)/4); | |
| 1233 iter.index=iter.length; | |
| 1234 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, NULL, 0,
fcdIndexes, sizeof(fcdIndexes)/4); | |
| 1235 | |
| 1236 /* try without neededToNormalize */ | |
| 1237 errorCode=U_ZERO_ERROR; | |
| 1238 buffer[0]=5; | |
| 1239 iter.index=1; | |
| 1240 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
| 1241 UNORM_NFD, 0, TRUE, NULL, | |
| 1242 &errorCode); | |
| 1243 if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[
3]) { | |
| 1244 log_data_err("error unorm_next(without needed) %s - (Are you missing dat
a?)\n", u_errorName(errorCode)); | |
| 1245 return; | |
| 1246 } | |
| 1247 | |
| 1248 /* preflight */ | |
| 1249 neededToNormalize=9; | |
| 1250 iter.index=1; | |
| 1251 length=unorm_next(&iter, NULL, 0, | |
| 1252 UNORM_NFD, 0, TRUE, &neededToNormalize, | |
| 1253 &errorCode); | |
| 1254 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!
=2) { | |
| 1255 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCod
e)); | |
| 1256 return; | |
| 1257 } | |
| 1258 | |
| 1259 errorCode=U_ZERO_ERROR; | |
| 1260 buffer[0]=buffer[1]=5; | |
| 1261 neededToNormalize=9; | |
| 1262 iter.index=1; | |
| 1263 length=unorm_next(&iter, buffer, 1, | |
| 1264 UNORM_NFD, 0, TRUE, &neededToNormalize, | |
| 1265 &errorCode); | |
| 1266 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!
=2 || buffer[1]!=5) { | |
| 1267 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode)); | |
| 1268 return; | |
| 1269 } | |
| 1270 | |
| 1271 /* no iterator */ | |
| 1272 errorCode=U_ZERO_ERROR; | |
| 1273 buffer[0]=buffer[1]=5; | |
| 1274 neededToNormalize=9; | |
| 1275 iter.index=1; | |
| 1276 length=unorm_next(NULL, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
| 1277 UNORM_NFD, 0, TRUE, &neededToNormalize, | |
| 1278 &errorCode); | |
| 1279 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
| 1280 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode)); | |
| 1281 return; | |
| 1282 } | |
| 1283 | |
| 1284 /* illegal mode */ | |
| 1285 buffer[0]=buffer[1]=5; | |
| 1286 neededToNormalize=9; | |
| 1287 iter.index=1; | |
| 1288 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
| 1289 (UNormalizationMode)0, 0, TRUE, &neededToNormalize, | |
| 1290 &errorCode); | |
| 1291 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
| 1292 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode)); | |
| 1293 return; | |
| 1294 } | |
| 1295 | |
| 1296 /* error coming in */ | |
| 1297 errorCode=U_MISPLACED_QUANTIFIER; | |
| 1298 buffer[0]=5; | |
| 1299 iter.index=1; | |
| 1300 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR, | |
| 1301 UNORM_NFD, 0, TRUE, NULL, | |
| 1302 &errorCode); | |
| 1303 if(errorCode!=U_MISPLACED_QUANTIFIER) { | |
| 1304 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(err
orCode)); | |
| 1305 return; | |
| 1306 } | |
| 1307 } | |
| 1308 | |
| 1309 static void | |
| 1310 TestFCNFKCClosure(void) { | |
| 1311 static const struct { | |
| 1312 UChar32 c; | |
| 1313 const UChar s[6]; | |
| 1314 } tests[]={ | |
| 1315 { 0x00C4, { 0 } }, | |
| 1316 { 0x00E4, { 0 } }, | |
| 1317 { 0x037A, { 0x0020, 0x03B9, 0 } }, | |
| 1318 { 0x03D2, { 0x03C5, 0 } }, | |
| 1319 { 0x20A8, { 0x0072, 0x0073, 0 } }, | |
| 1320 { 0x210B, { 0x0068, 0 } }, | |
| 1321 { 0x210C, { 0x0068, 0 } }, | |
| 1322 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } }, | |
| 1323 { 0x2122, { 0x0074, 0x006D, 0 } }, | |
| 1324 { 0x2128, { 0x007A, 0 } }, | |
| 1325 { 0x1D5DB, { 0x0068, 0 } }, | |
| 1326 { 0x1D5ED, { 0x007A, 0 } }, | |
| 1327 { 0x0061, { 0 } } | |
| 1328 }; | |
| 1329 | |
| 1330 UChar buffer[8]; | |
| 1331 UErrorCode errorCode; | |
| 1332 int32_t i, length; | |
| 1333 | |
| 1334 for(i=0; i<UPRV_LENGTHOF(tests); ++i) { | |
| 1335 errorCode=U_ZERO_ERROR; | |
| 1336 length=u_getFC_NFKC_Closure(tests[i].c, buffer, UPRV_LENGTHOF(buffer), &
errorCode); | |
| 1337 if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests
[i].s, buffer)) { | |
| 1338 log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you
missing data?)\n", tests[i].c, u_errorName(errorCode)); | |
| 1339 } | |
| 1340 } | |
| 1341 | |
| 1342 /* error handling */ | |
| 1343 errorCode=U_ZERO_ERROR; | |
| 1344 length=u_getFC_NFKC_Closure(0x5c, NULL, UPRV_LENGTHOF(buffer), &errorCode); | |
| 1345 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
| 1346 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(e
rrorCode)); | |
| 1347 } | |
| 1348 | |
| 1349 length=u_getFC_NFKC_Closure(0x5c, buffer, UPRV_LENGTHOF(buffer), &errorCode)
; | |
| 1350 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
| 1351 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(e
rrorCode)); | |
| 1352 } | |
| 1353 } | |
| 1354 | |
| 1355 static void | |
| 1356 TestQuickCheckPerCP() { | |
| 1357 UErrorCode errorCode; | |
| 1358 UChar32 c, lead, trail; | |
| 1359 UChar s[U16_MAX_LENGTH], nfd[16]; | |
| 1360 int32_t length, lccc1, lccc2, tccc1, tccc2; | |
| 1361 int32_t qc1, qc2; | |
| 1362 | |
| 1363 if( | |
| 1364 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES || | |
| 1365 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES || | |
| 1366 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE || | |
| 1367 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE |
| | |
| 1368 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getInt
PropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) || | |
| 1369 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIn
tPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) | |
| 1370 ) { | |
| 1371 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*
_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n"); | |
| 1372 } | |
| 1373 | |
| 1374 /* | |
| 1375 * compare the quick check property values for some code points | |
| 1376 * to the quick check results for checking same-code point strings | |
| 1377 */ | |
| 1378 errorCode=U_ZERO_ERROR; | |
| 1379 c=0; | |
| 1380 while(c<0x110000) { | |
| 1381 length=0; | |
| 1382 U16_APPEND_UNSAFE(s, length, c); | |
| 1383 | |
| 1384 qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK); | |
| 1385 qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode); | |
| 1386 if(qc1!=qc2) { | |
| 1387 log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(N
FC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); | |
| 1388 } | |
| 1389 | |
| 1390 qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK); | |
| 1391 qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode); | |
| 1392 if(qc1!=qc2) { | |
| 1393 log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(N
FD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); | |
| 1394 } | |
| 1395 | |
| 1396 qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK); | |
| 1397 qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode); | |
| 1398 if(qc1!=qc2) { | |
| 1399 log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(
NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); | |
| 1400 } | |
| 1401 | |
| 1402 qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK); | |
| 1403 qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode); | |
| 1404 if(qc1!=qc2) { | |
| 1405 log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(
NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c); | |
| 1406 } | |
| 1407 | |
| 1408 length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, UPRV_LENGTHOF(nfd),
&errorCode); | |
| 1409 /* length-length == 0 is used to get around a compiler warning. */ | |
| 1410 U16_GET(nfd, 0, length-length, length, lead); | |
| 1411 U16_GET(nfd, 0, length-1, length, trail); | |
| 1412 | |
| 1413 lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS); | |
| 1414 lccc2=u_getCombiningClass(lead); | |
| 1415 tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS); | |
| 1416 tccc2=u_getCombiningClass(trail); | |
| 1417 | |
| 1418 if(lccc1!=lccc2) { | |
| 1419 log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningCla
ss(lead) for U+%04x\n", | |
| 1420 lccc1, lccc2, c); | |
| 1421 } | |
| 1422 if(tccc1!=tccc2) { | |
| 1423 log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningCla
ss(trail) for U+%04x\n", | |
| 1424 tccc1, tccc2, c); | |
| 1425 } | |
| 1426 | |
| 1427 /* skip some code points */ | |
| 1428 c=(20*c)/19+1; | |
| 1429 } | |
| 1430 } | |
| 1431 | |
| 1432 static void | |
| 1433 TestComposition(void) { | |
| 1434 static const struct { | |
| 1435 UNormalizationMode mode; | |
| 1436 uint32_t options; | |
| 1437 UChar input[12]; | |
| 1438 UChar expect[12]; | |
| 1439 } cases[]={ | |
| 1440 /* | |
| 1441 * special cases for UAX #15 bug | |
| 1442 * see Unicode Corrigendum #5: Normalization Idempotency | |
| 1443 * at http://unicode.org/versions/corrigendum5.html | |
| 1444 * (was Public Review Issue #29) | |
| 1445 */ | |
| 1446 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x
0300, 0x1161, 0x0327 } }, | |
| 1447 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x
0300, 0x1161, 0x0327, 0x11a8 } }, | |
| 1448 { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x
0327, 0x0300, 0x11a8 } }, | |
| 1449 { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x
0300, 0x0b3e } }, | |
| 1450 | |
| 1451 /* TODO: add test cases for UNORM_FCC here (j2151) */ | |
| 1452 }; | |
| 1453 | |
| 1454 UChar output[16]; | |
| 1455 UErrorCode errorCode; | |
| 1456 int32_t i, length; | |
| 1457 | |
| 1458 for(i=0; i<UPRV_LENGTHOF(cases); ++i) { | |
| 1459 errorCode=U_ZERO_ERROR; | |
| 1460 length=unorm_normalize( | |
| 1461 cases[i].input, -1, | |
| 1462 cases[i].mode, cases[i].options, | |
| 1463 output, UPRV_LENGTHOF(output), | |
| 1464 &errorCode); | |
| 1465 if( U_FAILURE(errorCode) || | |
| 1466 length!=u_strlen(cases[i].expect) || | |
| 1467 0!=u_memcmp(output, cases[i].expect, length) | |
| 1468 ) { | |
| 1469 log_data_err("unexpected result for case %d - (Are you missing data?
)\n", i); | |
| 1470 } | |
| 1471 } | |
| 1472 } | |
| 1473 | |
| 1474 static void | |
| 1475 TestGetDecomposition() { | |
| 1476 UChar decomp[32]; | |
| 1477 int32_t length; | |
| 1478 | |
| 1479 UErrorCode errorCode=U_ZERO_ERROR; | |
| 1480 const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIG
UOUS, &errorCode); | |
| 1481 if(U_FAILURE(errorCode)) { | |
| 1482 log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_
errorName(errorCode)); | |
| 1483 return; | |
| 1484 } | |
| 1485 | |
| 1486 length=unorm2_getDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &err
orCode); | |
| 1487 if(U_FAILURE(errorCode) || length>=0) { | |
| 1488 log_err("unorm2_getDecomposition(fcc, space) failed\n"); | |
| 1489 } | |
| 1490 errorCode=U_ZERO_ERROR; | |
| 1491 length=unorm2_getDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &err
orCode); | |
| 1492 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308
|| decomp[2]!=0) { | |
| 1493 log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n"); | |
| 1494 } | |
| 1495 errorCode=U_ZERO_ERROR; | |
| 1496 length=unorm2_getDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &e
rrorCode); | |
| 1497 if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x11
61 || decomp[2]!=0x11a8 || decomp[3]!=0) { | |
| 1498 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n")
; | |
| 1499 } | |
| 1500 errorCode=U_ZERO_ERROR; | |
| 1501 length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode); | |
| 1502 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) { | |
| 1503 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow f
ailed\n"); | |
| 1504 } | |
| 1505 errorCode=U_ZERO_ERROR; | |
| 1506 length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode); | |
| 1507 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
| 1508 log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n"); | |
| 1509 } | |
| 1510 errorCode=U_ZERO_ERROR; | |
| 1511 length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode); | |
| 1512 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
| 1513 log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n"); | |
| 1514 } | |
| 1515 } | |
| 1516 | |
| 1517 static void | |
| 1518 TestGetRawDecomposition() { | |
| 1519 UChar decomp[32]; | |
| 1520 int32_t length; | |
| 1521 | |
| 1522 UErrorCode errorCode=U_ZERO_ERROR; | |
| 1523 const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode); | |
| 1524 if(U_FAILURE(errorCode)) { | |
| 1525 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_err
orName(errorCode)); | |
| 1526 return; | |
| 1527 } | |
| 1528 /* | |
| 1529 * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping v
alues, | |
| 1530 * without recursive decomposition. | |
| 1531 */ | |
| 1532 | |
| 1533 length=unorm2_getRawDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &
errorCode); | |
| 1534 if(U_FAILURE(errorCode) || length>=0) { | |
| 1535 log_err("unorm2_getDecomposition(nfkc, space) failed\n"); | |
| 1536 } | |
| 1537 errorCode=U_ZERO_ERROR; | |
| 1538 length=unorm2_getRawDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &
errorCode); | |
| 1539 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308
|| decomp[2]!=0) { | |
| 1540 log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n"); | |
| 1541 } | |
| 1542 /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */ | |
| 1543 errorCode=U_ZERO_ERROR; | |
| 1544 length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, UPRV_LENGTHOF(decomp),
&errorCode); | |
| 1545 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301
|| decomp[2]!=0) { | |
| 1546 log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n"); | |
| 1547 } | |
| 1548 /* U+212B ANGSTROM SIGN */ | |
| 1549 errorCode=U_ZERO_ERROR; | |
| 1550 length=unorm2_getRawDecomposition(n2, 0x212b, decomp, UPRV_LENGTHOF(decomp),
&errorCode); | |
| 1551 if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) { | |
| 1552 log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n"); | |
| 1553 } | |
| 1554 errorCode=U_ZERO_ERROR; | |
| 1555 length=unorm2_getRawDecomposition(n2, 0xac00, decomp, UPRV_LENGTHOF(decomp),
&errorCode); | |
| 1556 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x11
61 || decomp[2]!=0) { | |
| 1557 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n"
); | |
| 1558 } | |
| 1559 /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */ | |
| 1560 errorCode=U_ZERO_ERROR; | |
| 1561 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp),
&errorCode); | |
| 1562 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11
a8 || decomp[2]!=0) { | |
| 1563 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n"
); | |
| 1564 } | |
| 1565 errorCode=U_ZERO_ERROR; | |
| 1566 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode); | |
| 1567 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) { | |
| 1568 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow
failed\n"); | |
| 1569 } | |
| 1570 errorCode=U_ZERO_ERROR; | |
| 1571 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode); | |
| 1572 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
| 1573 log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n"); | |
| 1574 } | |
| 1575 errorCode=U_ZERO_ERROR; | |
| 1576 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode); | |
| 1577 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
| 1578 log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n"); | |
| 1579 } | |
| 1580 } | |
| 1581 | |
| 1582 static void | |
| 1583 TestAppendRestoreMiddle() { | |
| 1584 UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 }; /* last chars are 'A' and
'cedilla' NFC */ | |
| 1585 static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 }; /* first char is 'ri
ng above' NFC */ | |
| 1586 /* NFC: C5 is 'A with ring above' */ | |
| 1587 static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0
x66 }; | |
| 1588 int32_t length; | |
| 1589 UErrorCode errorCode=U_ZERO_ERROR; | |
| 1590 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode); | |
| 1591 if(U_FAILURE(errorCode)) { | |
| 1592 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_erro
rName(errorCode)); | |
| 1593 return; | |
| 1594 } | |
| 1595 /* | |
| 1596 * Use length=-1 to fool the estimate of the ReorderingBuffer capacity. | |
| 1597 * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A> | |
| 1598 * still fits into a[] but the full result still overflows this capacity. | |
| 1599 * (Let it modify the destination buffer before reallocating internally.) | |
| 1600 */ | |
| 1601 length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode); | |
| 1602 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(expected)) { | |
| 1603 log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)l
ength); | |
| 1604 return; | |
| 1605 } | |
| 1606 /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */ | |
| 1607 if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[
5]!=0) { | |
| 1608 log_err("unorm2_append(overflow) modified the first string\n"); | |
| 1609 return; | |
| 1610 } | |
| 1611 errorCode=U_ZERO_ERROR; | |
| 1612 length=unorm2_append(n2, a, -1, UPRV_LENGTHOF(a), b, -1, &errorCode); | |
| 1613 if(U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(expected) || 0!=u_memcmp(a,
expected, length)) { | |
| 1614 log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(erro
rCode), (int)length); | |
| 1615 return; | |
| 1616 } | |
| 1617 } | |
| 1618 | |
| 1619 static void | |
| 1620 TestGetEasyToUseInstance() { | |
| 1621 static const UChar in[]={ | |
| 1622 0xA0, /* -> <noBreak> 0020 */ | |
| 1623 0xC7, 0x301 /* = 1E08 = 0043 0327 0301 */ | |
| 1624 }; | |
| 1625 UChar out[32]; | |
| 1626 int32_t length; | |
| 1627 | |
| 1628 UErrorCode errorCode=U_ZERO_ERROR; | |
| 1629 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode); | |
| 1630 if(U_FAILURE(errorCode)) { | |
| 1631 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_erro
rName(errorCode)); | |
| 1632 return; | |
| 1633 } | |
| 1634 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out),
&errorCode); | |
| 1635 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) { | |
| 1636 log_err("unorm2_getNFCInstance() did not return an NFC instance (normali
zed length=%d; %s)\n", | |
| 1637 (int)length, u_errorName(errorCode)); | |
| 1638 } | |
| 1639 | |
| 1640 errorCode=U_ZERO_ERROR; | |
| 1641 n2=unorm2_getNFDInstance(&errorCode); | |
| 1642 if(U_FAILURE(errorCode)) { | |
| 1643 log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_erro
rName(errorCode)); | |
| 1644 return; | |
| 1645 } | |
| 1646 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out),
&errorCode); | |
| 1647 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[
2]!=0x327 || out[3]!=0x301) { | |
| 1648 log_err("unorm2_getNFDInstance() did not return an NFD instance (normali
zed length=%d; %s)\n", | |
| 1649 (int)length, u_errorName(errorCode)); | |
| 1650 } | |
| 1651 | |
| 1652 errorCode=U_ZERO_ERROR; | |
| 1653 n2=unorm2_getNFKCInstance(&errorCode); | |
| 1654 if(U_FAILURE(errorCode)) { | |
| 1655 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_err
orName(errorCode)); | |
| 1656 return; | |
| 1657 } | |
| 1658 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out),
&errorCode); | |
| 1659 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) { | |
| 1660 log_err("unorm2_getNFKCInstance() did not return an NFKC instance (norma
lized length=%d; %s)\n", | |
| 1661 (int)length, u_errorName(errorCode)); | |
| 1662 } | |
| 1663 | |
| 1664 errorCode=U_ZERO_ERROR; | |
| 1665 n2=unorm2_getNFKDInstance(&errorCode); | |
| 1666 if(U_FAILURE(errorCode)) { | |
| 1667 log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_err
orName(errorCode)); | |
| 1668 return; | |
| 1669 } | |
| 1670 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out),
&errorCode); | |
| 1671 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[
2]!=0x327 || out[3]!=0x301) { | |
| 1672 log_err("unorm2_getNFKDInstance() did not return an NFKD instance (norma
lized length=%d; %s)\n", | |
| 1673 (int)length, u_errorName(errorCode)); | |
| 1674 } | |
| 1675 | |
| 1676 errorCode=U_ZERO_ERROR; | |
| 1677 n2=unorm2_getNFKCCasefoldInstance(&errorCode); | |
| 1678 if(U_FAILURE(errorCode)) { | |
| 1679 log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n
", u_errorName(errorCode)); | |
| 1680 return; | |
| 1681 } | |
| 1682 length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out),
&errorCode); | |
| 1683 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) { | |
| 1684 log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefol
d instance (normalized length=%d; %s)\n", | |
| 1685 (int)length, u_errorName(errorCode)); | |
| 1686 } | |
| 1687 } | |
| 1688 | |
| 1689 #endif /* #if !UCONFIG_NO_NORMALIZATION */ | |
| OLD | NEW |