Index: source/test/intltest/alphaindextst.cpp |
diff --git a/source/test/intltest/alphaindextst.cpp b/source/test/intltest/alphaindextst.cpp |
index bfc53e693ea2f0dcfd782f401b549d20bada4cc6..847434704e0943b168047bda0ebe6fa68fc1a628 100644 |
--- a/source/test/intltest/alphaindextst.cpp |
+++ b/source/test/intltest/alphaindextst.cpp |
@@ -1,10 +1,10 @@ |
/******************************************************************** |
* COPYRIGHT: |
- * Copyright (c) 2012-2013, International Business Machines Corporation |
+ * Copyright (c) 2012-2014, International Business Machines Corporation |
* and others. All Rights Reserved. |
********************************************************************/ |
// |
-// file: alphaindex.cpp |
+// file: alphaindextst.cpp |
// Alphabetic Index Tests. |
// |
// Note: please... no character literals cast to UChars.. use (UChar)0xZZZZ |
@@ -13,6 +13,7 @@ |
#include "intltest.h" |
#include "alphaindextst.h" |
+#include "cmemory.h" |
#include "unicode/alphaindex.h" |
#include "unicode/coll.h" |
@@ -25,8 +26,6 @@ |
// #include <string> |
// #include <iostream> |
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |
- |
namespace { |
UnicodeString joinLabelsAndAppend(AlphabeticIndex::ImmutableIndex &index, UnicodeString &dest) { |
@@ -63,6 +62,8 @@ void AlphabeticIndexTest::runIndexedTest( int32_t index, UBool exec, const char* |
TESTCASE_AUTO(TestSchSt); |
TESTCASE_AUTO(TestNoLabels); |
TESTCASE_AUTO(TestChineseZhuyin); |
+ TESTCASE_AUTO(TestJapaneseKanji); |
+ TESTCASE_AUTO(TestChineseUnihan); |
TESTCASE_AUTO_END; |
} |
@@ -93,7 +94,8 @@ void AlphabeticIndexTest::APITest() { |
// Constructor from a Collator |
// |
status = U_ZERO_ERROR; |
- RuleBasedCollator *coll = dynamic_cast<RuleBasedCollator *>(Collator::createInstance(Locale::getChinese(), status)); |
+ RuleBasedCollator *coll = dynamic_cast<RuleBasedCollator *>( |
+ Collator::createInstance(Locale::getGerman(), status)); |
TEST_CHECK_STATUS; |
TEST_ASSERT(coll != NULL); |
index = new AlphabeticIndex(coll, status); |
@@ -314,7 +316,7 @@ void AlphabeticIndexTest::APITest() { |
// if Russian sorts Cyrillic first. |
int32_t reorderCodes[20]; |
int32_t expectedLatinIndex = 0; |
- if (index->getCollator().getReorderCodes(reorderCodes, LENGTHOF(reorderCodes), status) > 0) { |
+ if (index->getCollator().getReorderCodes(reorderCodes, UPRV_LENGTHOF(reorderCodes), status) > 0) { |
expectedLatinIndex = index->getBucketCount(status) - 1; |
} |
n = index->getBucketIndex(adam, status); |
@@ -530,7 +532,7 @@ static const char *localeAndIndexCharactersLists[][2] = { |
void AlphabeticIndexTest::TestIndexCharactersList() { |
UErrorCode status = U_ZERO_ERROR; |
- for (int32_t i = 0; i < LENGTHOF(localeAndIndexCharactersLists); ++i) { |
+ for (int32_t i = 0; i < UPRV_LENGTHOF(localeAndIndexCharactersLists); ++i) { |
const char *(&localeAndIndexCharacters)[2] = localeAndIndexCharactersLists[i]; |
const char *locale = localeAndIndexCharacters[0]; |
UnicodeString expectedIndexCharacters |
@@ -559,7 +561,7 @@ void AlphabeticIndexTest::TestHaniFirst() { |
return; |
} |
int32_t reorderCodes[] = { USCRIPT_HAN }; |
- coll->setReorderCodes(reorderCodes, LENGTHOF(reorderCodes), status); |
+ coll->setReorderCodes(reorderCodes, UPRV_LENGTHOF(reorderCodes), status); |
TEST_CHECK_STATUS; |
AlphabeticIndex index(coll.orphan(), status); |
TEST_CHECK_STATUS; |
@@ -572,9 +574,9 @@ void AlphabeticIndexTest::TestHaniFirst() { |
assertEquals("getBucketIndex(i)", 9, bucketIndex); |
bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x03B1), status); |
assertEquals("getBucketIndex(Greek alpha)", 27, bucketIndex); |
- // TODO: Test with an unassigned code point (not just U+FFFF) |
- // when unassigned code points are not in the Hani reordering group any more. |
- // String unassigned = UTF16.valueOf(0x50005); |
+ // U+50005 is an unassigned code point which sorts at the end, independent of the Hani group. |
+ bucketIndex = index.getBucketIndex(UnicodeString(0x50005), status); |
+ assertEquals("getBucketIndex(U+50005)", 27, bucketIndex); |
bucketIndex = index.getBucketIndex(UnicodeString((UChar)0xFFFF), status); |
assertEquals("getBucketIndex(U+FFFF)", 27, bucketIndex); |
} |
@@ -588,22 +590,22 @@ void AlphabeticIndexTest::TestPinyinFirst() { |
return; |
} |
int32_t reorderCodes[] = { USCRIPT_HAN }; |
- coll->setReorderCodes(reorderCodes, LENGTHOF(reorderCodes), status); |
+ coll->setReorderCodes(reorderCodes, UPRV_LENGTHOF(reorderCodes), status); |
TEST_CHECK_STATUS; |
AlphabeticIndex index(coll.orphan(), status); |
TEST_CHECK_STATUS; |
- assertEquals("getBucketCount()", 1, index.getBucketCount(status)); // ... (underflow only) |
+ assertEquals("getBucketCount()", 28, index.getBucketCount(status)); // ... A-Z ... |
index.addLabels(Locale::getChinese(), status); |
assertEquals("getBucketCount()", 28, index.getBucketCount(status)); // ... A-Z ... |
int32_t bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x897f), status); |
- assertEquals("getBucketIndex(U+897F)", (int32_t)((UChar)0x0058/*X*/ - (UChar)0x0041/*A*/ + 1), (int32_t)bucketIndex); |
+ assertEquals("getBucketIndex(U+897F)", (int32_t)((UChar)0x0058/*X*/ - (UChar)0x0041/*A*/ + 1), bucketIndex); |
bucketIndex = index.getBucketIndex("i", status); |
assertEquals("getBucketIndex(i)", 9, bucketIndex); |
bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x03B1), status); |
assertEquals("getBucketIndex(Greek alpha)", (int32_t)27, bucketIndex); |
- // TODO: Test with an unassigned code point (not just U+FFFF) |
- // when unassigned code points are not in the Hani reordering group any more. |
- // String unassigned = UTF16.valueOf(0x50005); |
+ // U+50005 is an unassigned code point which sorts at the end, independent of the Hani group. |
+ bucketIndex = index.getBucketIndex(UnicodeString(0x50005), status); |
+ assertEquals("getBucketIndex(U+50005)", 27, bucketIndex); |
bucketIndex = index.getBucketIndex(UnicodeString((UChar)0xFFFF), status); |
assertEquals("getBucketIndex(U+FFFF)", 27, bucketIndex); |
} |
@@ -637,7 +639,7 @@ void AlphabeticIndexTest::TestSchSt() { |
{ "Steiff", 22, "St" }, |
{ "Thomas", 23, "T" } |
}; |
- for (int32_t i = 0; i < LENGTHOF(testCases); ++i) { |
+ for (int32_t i = 0; i < UPRV_LENGTHOF(testCases); ++i) { |
const TestCase &testCase = testCases[i]; |
UnicodeString name = UnicodeString(testCase.name).unescape(); |
UnicodeString label = UnicodeString(testCase.bucketLabel).unescape(); |
@@ -670,7 +672,7 @@ void AlphabeticIndexTest::TestNoLabels() { |
void AlphabeticIndexTest::TestChineseZhuyin() { |
UErrorCode status = U_ZERO_ERROR; |
char loc[100]; |
- uloc_forLanguageTag("zh-u-co-zhuyin", loc, LENGTHOF(loc), NULL, &status); |
+ uloc_forLanguageTag("zh-u-co-zhuyin", loc, UPRV_LENGTHOF(loc), NULL, &status); |
AlphabeticIndex index(loc, status); |
LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status)); |
TEST_CHECK_STATUS; |
@@ -682,4 +684,49 @@ void AlphabeticIndexTest::TestChineseZhuyin() { |
assertEquals("label 5", UnicodeString((UChar)0x3109), immIndex->getBucket(5)->getLabel()); |
} |
+void AlphabeticIndexTest::TestJapaneseKanji() { |
+ UErrorCode status = U_ZERO_ERROR; |
+ AlphabeticIndex index(Locale::getJapanese(), status); |
+ LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status)); |
+ TEST_CHECK_STATUS; |
+ // There are no index characters for Kanji in the Japanese standard collator. |
+ // They should all go into the overflow bucket. |
+ static const UChar32 kanji[] = { 0x4E9C, 0x95C7, 0x4E00, 0x58F1 }; |
+ int32_t overflowIndex = immIndex->getBucketCount() - 1; |
+ for(int32_t i = 0; i < UPRV_LENGTHOF(kanji); ++i) { |
+ char msg[40]; |
+ sprintf(msg, "kanji[%d]=U+%04lX in overflow bucket", (int)i, (long)kanji[i]); |
+ assertEquals(msg, overflowIndex, immIndex->getBucketIndex(UnicodeString(kanji[i]), status)); |
+ TEST_CHECK_STATUS; |
+ } |
+} |
+ |
+void AlphabeticIndexTest::TestChineseUnihan() { |
+ UErrorCode status = U_ZERO_ERROR; |
+ AlphabeticIndex index("zh-u-co-unihan", status); |
+ if(U_FAILURE(status)) { |
+ dataerrln("unable create an AlphabeticIndex for Chinese/unihan: %s", u_errorName(status)); |
+ return; |
+ } |
+ index.setMaxLabelCount(500, status); // ICU 54 default is 99. |
+ LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status)); |
+ TEST_CHECK_STATUS; |
+ int32_t bucketCount = immIndex->getBucketCount(); |
+ if(bucketCount < 216) { |
+ // There should be at least an underflow and overflow label, |
+ // and one for each of 214 radicals, |
+ // and maybe additional labels for simplified radicals. |
+ dataerrln("too few buckets/labels for Chinese/unihan: %d (is zh/unihan data available?)", |
+ bucketCount); |
+ return; |
+ } else { |
+ logln("Chinese/unihan has %d buckets/labels", bucketCount); |
+ } |
+ // bucketIndex = radical number, adjusted for simplified radicals in lower buckets. |
+ int32_t bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x4e5d), status); |
+ assertEquals("getBucketIndex(U+4E5D)", 5, bucketIndex); |
+ bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x7527), status); |
+ assertEquals("getBucketIndex(U+7527)", 100, bucketIndex); |
+} |
+ |
#endif |