OLD | NEW |
1 | 1 |
2 /******************************************************************** | 2 /******************************************************************** |
3 * COPYRIGHT: | 3 * COPYRIGHT: |
4 * Copyright (c) 2001-2014, International Business Machines Corporation and | 4 * Copyright (c) 2001-2015, International Business Machines Corporation and |
5 * others. All Rights Reserved. | 5 * others. All Rights Reserved. |
6 ********************************************************************/ | 6 ********************************************************************/ |
7 /******************************************************************************* | 7 /******************************************************************************* |
8 * | 8 * |
9 * File cmsccoll.C | 9 * File cmsccoll.C |
10 * | 10 * |
11 *******************************************************************************/ | 11 *******************************************************************************/ |
12 /** | 12 /** |
13 * These are the tests specific to ICU 1.8 and above, that I didn't know where | 13 * These are the tests specific to ICU 1.8 and above, that I didn't know where |
14 * to fit. | 14 * to fit. |
(...skipping 1346 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1361 static void TestCyrillicTailoring(void) { | 1361 static void TestCyrillicTailoring(void) { |
1362 static const char *test[] = { | 1362 static const char *test[] = { |
1363 "\\u0410b", | 1363 "\\u0410b", |
1364 "\\u0410\\u0306a", | 1364 "\\u0410\\u0306a", |
1365 "\\u04d0A" | 1365 "\\u04d0A" |
1366 }; | 1366 }; |
1367 | 1367 |
1368 /* Russian overrides contractions, so this test is not valid anymore */ | 1368 /* Russian overrides contractions, so this test is not valid anymore */ |
1369 /*genericLocaleStarter("ru", test, 3);*/ | 1369 /*genericLocaleStarter("ru", test, 3);*/ |
1370 | 1370 |
1371 genericLocaleStarter("root", test, 3); | 1371 // Most of the following are commented out because UCA 8.0 |
1372 genericRulesStarter("&\\u0410 = \\u0410", test, 3); | 1372 // drops most of the Cyrillic contractions from the default order. |
1373 genericRulesStarter("&Z < \\u0410", test, 3); | 1373 // See CLDR ticket #7246 "root collation: remove Cyrillic contractions". |
| 1374 |
| 1375 // genericLocaleStarter("root", test, 3); |
| 1376 // genericRulesStarter("&\\u0410 = \\u0410", test, 3); |
| 1377 // genericRulesStarter("&Z < \\u0410", test, 3); |
1374 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3); | 1378 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3); |
1375 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3); | 1379 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3); |
1376 genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3); | 1380 // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3); |
1377 genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3); | 1381 // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3); |
1378 } | 1382 } |
1379 | 1383 |
1380 static void TestSuppressContractions(void) { | 1384 static void TestSuppressContractions(void) { |
1381 | 1385 |
1382 static const char *testNoCont2[] = { | 1386 static const char *testNoCont2[] = { |
1383 "\\u0410\\u0302a", | 1387 "\\u0410\\u0302a", |
1384 "\\u0410\\u0306b", | 1388 "\\u0410\\u0306b", |
1385 "\\u0410c" | 1389 "\\u0410c" |
1386 }; | 1390 }; |
1387 static const char *testNoCont[] = { | 1391 static const char *testNoCont[] = { |
(...skipping 3294 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4682 } | 4686 } |
4683 | 4687 |
4684 /* | 4688 /* |
4685 * Test reordering API. | 4689 * Test reordering API. |
4686 */ | 4690 */ |
4687 static void TestReorderingAPI(void) | 4691 static void TestReorderingAPI(void) |
4688 { | 4692 { |
4689 UErrorCode status = U_ZERO_ERROR; | 4693 UErrorCode status = U_ZERO_ERROR; |
4690 UCollator *myCollation; | 4694 UCollator *myCollation; |
4691 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUN
CTUATION}; | 4695 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUN
CTUATION}; |
4692 int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_RE
ORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS}; | 4696 int32_t duplicateReorderCodes[] = {USCRIPT_HIRAGANA, USCRIPT_GREEK, UCOL_REO
RDER_CODE_CURRENCY, USCRIPT_KATAKANA}; |
4693 int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCR
IPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; | 4697 int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCR
IPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; |
4694 int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE; | 4698 int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE; |
4695 UCollationResult collResult; | 4699 UCollationResult collResult; |
4696 int32_t retrievedReorderCodesLength; | 4700 int32_t retrievedReorderCodesLength; |
4697 int32_t retrievedReorderCodes[10]; | 4701 int32_t retrievedReorderCodes[10]; |
4698 UChar greekString[] = { 0x03b1 }; | 4702 UChar greekString[] = { 0x03b1 }; |
4699 UChar punctuationString[] = { 0x203e }; | 4703 UChar punctuationString[] = { 0x203e }; |
4700 int loopIndex; | 4704 int loopIndex; |
4701 | 4705 |
4702 log_verbose("Testing non-lead bytes in a sort key with and without reorderin
g\n"); | 4706 log_verbose("Testing non-lead bytes in a sort key with and without reorderin
g\n"); |
(...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4941 | 4945 |
4942 static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int
32_t expectedScript) { | 4946 static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int
32_t expectedScript) { |
4943 int32_t i; | 4947 int32_t i; |
4944 for (i = 0; i < length; ++i) { | 4948 for (i = 0; i < length; ++i) { |
4945 if (expectedScript == scripts[i]) { return TRUE; } | 4949 if (expectedScript == scripts[i]) { return TRUE; } |
4946 } | 4950 } |
4947 return FALSE; | 4951 return FALSE; |
4948 } | 4952 } |
4949 | 4953 |
4950 static void TestEquivalentReorderingScripts(void) { | 4954 static void TestEquivalentReorderingScripts(void) { |
| 4955 // Beginning with ICU 55, collation reordering moves single scripts |
| 4956 // rather than groups of scripts, |
| 4957 // except where scripts share a range and sort primary-equal. |
4951 UErrorCode status = U_ZERO_ERROR; | 4958 UErrorCode status = U_ZERO_ERROR; |
4952 int32_t equivalentScripts[100]; | 4959 int32_t equivalentScripts[100]; |
4953 int32_t length; | 4960 int32_t length; |
4954 int i; | 4961 int i; |
4955 int32_t prevScript; | 4962 int32_t prevScript; |
4956 /* At least these scripts are expected to be equivalent. There may be more.
*/ | 4963 /* These scripts are expected to be equivalent. */ |
4957 static const int32_t expectedScripts[] = { | 4964 static const int32_t expectedScripts[] = { |
4958 USCRIPT_BOPOMOFO, | 4965 USCRIPT_HIRAGANA, |
4959 USCRIPT_LISU, | 4966 USCRIPT_KATAKANA, |
4960 USCRIPT_LYCIAN, | 4967 USCRIPT_KATAKANA_OR_HIRAGANA |
4961 USCRIPT_CARIAN, | |
4962 USCRIPT_LYDIAN, | |
4963 USCRIPT_YI, | |
4964 USCRIPT_OLD_ITALIC, | |
4965 USCRIPT_GOTHIC, | |
4966 USCRIPT_DESERET, | |
4967 USCRIPT_SHAVIAN, | |
4968 USCRIPT_OSMANYA, | |
4969 USCRIPT_LINEAR_B, | |
4970 USCRIPT_CYPRIOT, | |
4971 USCRIPT_OLD_SOUTH_ARABIAN, | |
4972 USCRIPT_AVESTAN, | |
4973 USCRIPT_IMPERIAL_ARAMAIC, | |
4974 USCRIPT_INSCRIPTIONAL_PARTHIAN, | |
4975 USCRIPT_INSCRIPTIONAL_PAHLAVI, | |
4976 USCRIPT_UGARITIC, | |
4977 USCRIPT_OLD_PERSIAN, | |
4978 USCRIPT_CUNEIFORM, | |
4979 USCRIPT_EGYPTIAN_HIEROGLYPHS, | |
4980 USCRIPT_PHONETIC_POLLARD, | |
4981 USCRIPT_SORA_SOMPENG, | |
4982 USCRIPT_MEROITIC_CURSIVE, | |
4983 USCRIPT_MEROITIC_HIEROGLYPHS | |
4984 }; | 4968 }; |
4985 | 4969 |
4986 /* UScript.GOTHIC */ | 4970 equivalentScripts[0] = 0; |
4987 length = ucol_getEquivalentReorderCodes( | 4971 length = ucol_getEquivalentReorderCodes( |
4988 USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status); | 4972 USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status); |
4989 if (U_FAILURE(status)) { | 4973 if (U_FAILURE(status)) { |
4990 log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder code
s: %s\n", myErrorName(status)); | 4974 log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder code
s: %s\n", myErrorName(status)); |
4991 return; | 4975 return; |
4992 } | 4976 } |
4993 if (length < LEN(expectedScripts)) { | 4977 if (length != 1 || equivalentScripts[0] != USCRIPT_GOTHIC) { |
4994 log_err("ERROR/Gothic: retrieved equivalent script length wrong: " | 4978 log_err("ERROR/Gothic: retrieved equivalent scripts wrong: " |
4995 "expected at least %d, was = %d\n", | 4979 "length expected 1, was = %d; expected [%d] was [%d]\n", |
| 4980 length, USCRIPT_GOTHIC, equivalentScripts[0]); |
| 4981 } |
| 4982 |
| 4983 length = ucol_getEquivalentReorderCodes( |
| 4984 USCRIPT_HIRAGANA, equivalentScripts, LEN(equivalentScripts), &status
); |
| 4985 if (U_FAILURE(status)) { |
| 4986 log_err_status(status, "ERROR/Hiragana: retrieving equivalent reorder co
des: %s\n", myErrorName(status)); |
| 4987 return; |
| 4988 } |
| 4989 if (length != LEN(expectedScripts)) { |
| 4990 log_err("ERROR/Hiragana: retrieved equivalent script length wrong: " |
| 4991 "expected %d, was = %d\n", |
4996 LEN(expectedScripts), length); | 4992 LEN(expectedScripts), length); |
4997 } | 4993 } |
4998 prevScript = -1; | 4994 prevScript = -1; |
4999 for (i = 0; i < length; ++i) { | 4995 for (i = 0; i < length; ++i) { |
5000 int32_t script = equivalentScripts[i]; | 4996 int32_t script = equivalentScripts[i]; |
5001 if (script <= prevScript) { | 4997 if (script <= prevScript) { |
5002 log_err("ERROR/Gothic: equivalent scripts out of order at index %d\n
", i); | 4998 log_err("ERROR/Hiragana: equivalent scripts out of order at index %d
\n", i); |
5003 } | 4999 } |
5004 prevScript = script; | 5000 prevScript = script; |
5005 } | 5001 } |
5006 for (i = 0; i < LEN(expectedScripts); i++) { | 5002 for (i = 0; i < LEN(expectedScripts); i++) { |
5007 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i
])) { | 5003 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i
])) { |
5008 log_err("ERROR/Gothic: equivalent scripts do not contain %d\n", | 5004 log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n", |
5009 expectedScripts[i]); | 5005 expectedScripts[i]); |
5010 } | 5006 } |
5011 } | 5007 } |
5012 | 5008 |
5013 /* UScript.SHAVIAN */ | |
5014 length = ucol_getEquivalentReorderCodes( | 5009 length = ucol_getEquivalentReorderCodes( |
5015 USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status)
; | 5010 USCRIPT_KATAKANA, equivalentScripts, LEN(equivalentScripts), &status
); |
5016 if (U_FAILURE(status)) { | 5011 if (U_FAILURE(status)) { |
5017 log_err_status(status, "ERROR/Shavian: retrieving equivalent reorder cod
es: %s\n", myErrorName(status)); | 5012 log_err_status(status, "ERROR/Katakana: retrieving equivalent reorder co
des: %s\n", myErrorName(status)); |
5018 return; | 5013 return; |
5019 } | 5014 } |
5020 if (length < LEN(expectedScripts)) { | 5015 if (length != LEN(expectedScripts)) { |
5021 log_err("ERROR/Shavian: retrieved equivalent script length wrong: " | 5016 log_err("ERROR/Katakana: retrieved equivalent script length wrong: " |
5022 "expected at least %d, was = %d\n", | 5017 "expected %d, was = %d\n", |
5023 LEN(expectedScripts), length); | 5018 LEN(expectedScripts), length); |
5024 } | 5019 } |
5025 for (i = 0; i < LEN(expectedScripts); i++) { | 5020 for (i = 0; i < LEN(expectedScripts); i++) { |
5026 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i
])) { | 5021 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i
])) { |
5027 log_err("ERROR/Shavian: equivalent scripts do not contain %d\n", | 5022 log_err("ERROR/Katakana: equivalent scripts do not contain %d\n", |
5028 expectedScripts[i]); | 5023 expectedScripts[i]); |
5029 } | 5024 } |
5030 } | 5025 } |
| 5026 |
| 5027 length = ucol_getEquivalentReorderCodes( |
| 5028 USCRIPT_KATAKANA_OR_HIRAGANA, equivalentScripts, LEN(equivalentScrip
ts), &status); |
| 5029 if (U_FAILURE(status) || length != LEN(expectedScripts)) { |
| 5030 log_err("ERROR/Hrkt: retrieved equivalent script length wrong: " |
| 5031 "expected %d, was = %d\n", |
| 5032 LEN(expectedScripts), length); |
| 5033 } |
| 5034 |
| 5035 length = ucol_getEquivalentReorderCodes( |
| 5036 USCRIPT_HAN, equivalentScripts, LEN(equivalentScripts), &status); |
| 5037 if (U_FAILURE(status) || length != 3) { |
| 5038 log_err("ERROR/Hani: retrieved equivalent script length wrong: " |
| 5039 "expected 3, was = %d\n", length); |
| 5040 } |
| 5041 length = ucol_getEquivalentReorderCodes( |
| 5042 USCRIPT_SIMPLIFIED_HAN, equivalentScripts, LEN(equivalentScripts), &
status); |
| 5043 if (U_FAILURE(status) || length != 3) { |
| 5044 log_err("ERROR/Hans: retrieved equivalent script length wrong: " |
| 5045 "expected 3, was = %d\n", length); |
| 5046 } |
| 5047 length = ucol_getEquivalentReorderCodes( |
| 5048 USCRIPT_TRADITIONAL_HAN, equivalentScripts, LEN(equivalentScripts),
&status); |
| 5049 if (U_FAILURE(status) || length != 3) { |
| 5050 log_err("ERROR/Hant: retrieved equivalent script length wrong: " |
| 5051 "expected 3, was = %d\n", length); |
| 5052 } |
| 5053 |
| 5054 length = ucol_getEquivalentReorderCodes( |
| 5055 USCRIPT_MEROITIC_CURSIVE, equivalentScripts, LEN(equivalentScripts),
&status); |
| 5056 if (U_FAILURE(status) || length != 2) { |
| 5057 log_err("ERROR/Merc: retrieved equivalent script length wrong: " |
| 5058 "expected 2, was = %d\n", length); |
| 5059 } |
| 5060 length = ucol_getEquivalentReorderCodes( |
| 5061 USCRIPT_MEROITIC_HIEROGLYPHS, equivalentScripts, LEN(equivalentScrip
ts), &status); |
| 5062 if (U_FAILURE(status) || length != 2) { |
| 5063 log_err("ERROR/Mero: retrieved equivalent script length wrong: " |
| 5064 "expected 2, was = %d\n", length); |
| 5065 } |
5031 } | 5066 } |
5032 | 5067 |
5033 static void TestReorderingAcrossCloning(void) | 5068 static void TestReorderingAcrossCloning(void) |
5034 { | 5069 { |
5035 UErrorCode status = U_ZERO_ERROR; | 5070 UErrorCode status = U_ZERO_ERROR; |
5036 UCollator *myCollation; | 5071 UCollator *myCollation; |
5037 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUN
CTUATION}; | 5072 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUN
CTUATION}; |
5038 UCollator *clonedCollation; | 5073 UCollator *clonedCollation; |
5039 int32_t retrievedReorderCodesLength; | 5074 int32_t retrievedReorderCodesLength; |
5040 int32_t retrievedReorderCodes[10]; | 5075 int32_t retrievedReorderCodes[10]; |
(...skipping 892 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5933 TEST(TestHaniReorderWithOtherRules); | 5968 TEST(TestHaniReorderWithOtherRules); |
5934 TEST(TestMultipleReorder); | 5969 TEST(TestMultipleReorder); |
5935 TEST(TestReorderingAcrossCloning); | 5970 TEST(TestReorderingAcrossCloning); |
5936 TEST(TestReorderWithNumericCollation); | 5971 TEST(TestReorderWithNumericCollation); |
5937 | 5972 |
5938 TEST(TestCaseLevelBufferOverflow); | 5973 TEST(TestCaseLevelBufferOverflow); |
5939 TEST(TestNextSortKeyPartJaIdentical); | 5974 TEST(TestNextSortKeyPartJaIdentical); |
5940 } | 5975 } |
5941 | 5976 |
5942 #endif /* #if !UCONFIG_NO_COLLATION */ | 5977 #endif /* #if !UCONFIG_NO_COLLATION */ |
OLD | NEW |