Index: source/test/cintltst/cbiapts.c |
diff --git a/source/test/cintltst/cbiapts.c b/source/test/cintltst/cbiapts.c |
index c9080f2d153a828cfe44edce22c4665782b9a0bc..e654c094ad7ac573e7a9eaaf7c1968cbdaa2512a 100644 |
--- a/source/test/cintltst/cbiapts.c |
+++ b/source/test/cintltst/cbiapts.c |
@@ -1,6 +1,6 @@ |
/******************************************************************** |
* COPYRIGHT: |
- * Copyright (c) 1997-2013, International Business Machines Corporation and |
+ * Copyright (c) 1997-2015, International Business Machines Corporation and |
* others. All Rights Reserved. |
********************************************************************/ |
/******************************************************************************** |
@@ -31,6 +31,7 @@ |
#include "unicode/utext.h" |
#include "cintltst.h" |
#include "cbiapts.h" |
+#include "cmemory.h" |
#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ |
log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}} |
@@ -47,6 +48,8 @@ static void TestBreakIteratorStatusVec(void); |
static void TestBreakIteratorUText(void); |
static void TestBreakIteratorTailoring(void); |
static void TestBreakIteratorRefresh(void); |
+static void TestBug11665(void); |
+static void TestBreakIteratorSuppressions(void); |
void addBrkIterAPITest(TestNode** root); |
@@ -62,6 +65,8 @@ void addBrkIterAPITest(TestNode** root) |
addTest(root, &TestBreakIteratorStatusVec, "tstxtbd/cbiapts/TestBreakIteratorStatusVec"); |
addTest(root, &TestBreakIteratorTailoring, "tstxtbd/cbiapts/TestBreakIteratorTailoring"); |
addTest(root, &TestBreakIteratorRefresh, "tstxtbd/cbiapts/TestBreakIteratorRefresh"); |
+ addTest(root, &TestBug11665, "tstxtbd/cbiapts/TestBug11665"); |
+ addTest(root, &TestBreakIteratorSuppressions, "tstxtbd/cbiapts/TestBreakIteratorSuppressions"); |
} |
#define CLONETEST_ITERATOR_COUNT 2 |
@@ -847,6 +852,9 @@ static void TestBreakIteratorRefresh(void) { |
bi = ubrk_open(UBRK_LINE, "en_US", NULL, 0, &status); |
TEST_ASSERT_SUCCESS(status); |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
utext_openUChars(&ut1, testStr, -1, &status); |
TEST_ASSERT_SUCCESS(status); |
@@ -879,4 +887,180 @@ static void TestBreakIteratorRefresh(void) { |
ubrk_close(bi); |
} |
+ |
+static void TestBug11665(void) { |
+ // The problem was with the incorrect breaking of Japanese text beginning |
+ // with Katakana characters when no prior Japanese or Chinese text had been |
+ // encountered. |
+ // |
+ // Tested here in cintltst, rather than in intltest, because only cintltst |
+ // tests have the ability to reset ICU, which is needed to get the bug |
+ // to manifest itself. |
+ |
+ static UChar japaneseText[] = {0x30A2, 0x30EC, 0x30EB, 0x30AE, 0x30FC, 0x6027, 0x7D50, 0x819C, 0x708E}; |
+ int32_t boundaries[10] = {0}; |
+ UBreakIterator *bi = NULL; |
+ int32_t brk; |
+ int32_t brkIdx = 0; |
+ int32_t totalBreaks = 0; |
+ UErrorCode status = U_ZERO_ERROR; |
+ |
+ ctest_resetICU(); |
+ bi = ubrk_open(UBRK_WORD, "en_US", japaneseText, UPRV_LENGTHOF(japaneseText), &status); |
+ TEST_ASSERT_SUCCESS(status); |
+ if (!bi) { |
+ return; |
+ } |
+ for (brk=ubrk_first(bi); brk != UBRK_DONE; brk=ubrk_next(bi)) { |
+ boundaries[brkIdx] = brk; |
+ if (++brkIdx >= UPRV_LENGTHOF(boundaries) - 1) { |
+ break; |
+ } |
+ } |
+ if (brkIdx <= 2 || brkIdx >= UPRV_LENGTHOF(boundaries)) { |
+ log_err("%s:%d too few or many breaks found.\n", __FILE__, __LINE__); |
+ } else { |
+ totalBreaks = brkIdx; |
+ brkIdx = 0; |
+ for (brk=ubrk_first(bi); brk != UBRK_DONE; brk=ubrk_next(bi)) { |
+ if (brk != boundaries[brkIdx]) { |
+ log_err("%s:%d Break #%d differs between first and second iteration.\n", __FILE__, __LINE__, brkIdx); |
+ break; |
+ } |
+ if (++brkIdx >= UPRV_LENGTHOF(boundaries) - 1) { |
+ log_err("%s:%d Too many breaks.\n", __FILE__, __LINE__); |
+ break; |
+ } |
+ } |
+ if (totalBreaks != brkIdx) { |
+ log_err("%s:%d Number of breaks differ between first and second iteration.\n", __FILE__, __LINE__); |
+ } |
+ } |
+ ubrk_close(bi); |
+} |
+ |
+/* |
+ * expOffset is the set of expected offsets, ending with '-1'. |
+ * "Expected expOffset -1" means "expected the end of the offsets" |
+ */ |
+ |
+static const char testSentenceSuppressionsEn[] = "Mr. Jones comes home. Dr. Smith Ph.D. is out. In the U.S.A. it is hot."; |
+static const int32_t testSentSuppFwdOffsetsEn[] = { 22, 26, 46, 70, -1 }; /* With suppressions, currently not handling Dr. */ |
+static const int32_t testSentFwdOffsetsEn[] = { 4, 22, 26, 46, 70, -1 }; /* Without suppressions */ |
+static const int32_t testSentSuppRevOffsetsEn[] = { 46, 26, 22, 0, -1 }; /* With suppressions, currently not handling Dr. */ |
+static const int32_t testSentRevOffsetsEn[] = { 46, 26, 22, 4, 0, -1 }; /* Without suppressions */ |
+ |
+static const char testSentenceSuppressionsDe[] = "Wenn ich schon h\\u00F6re zu Guttenberg kommt evtl. zur\\u00FCck."; |
+static const int32_t testSentSuppFwdOffsetsDe[] = { 53, -1 }; /* With suppressions */ |
+static const int32_t testSentFwdOffsetsDe[] = { 53, -1 }; /* Without suppressions; no break in evtl. zur due to casing */ |
+static const int32_t testSentSuppRevOffsetsDe[] = { 0, -1 }; /* With suppressions */ |
+static const int32_t testSentRevOffsetsDe[] = { 0, -1 }; /* Without suppressions */ |
+ |
+static const char testSentenceSuppressionsEs[] = "Te esperamos todos los miercoles en Bravo 416, Col. El Pueblo a las 7 PM."; |
+static const int32_t testSentSuppFwdOffsetsEs[] = { 73, -1 }; /* With suppressions */ |
+static const int32_t testSentFwdOffsetsEs[] = { 52, 73, -1 }; /* Without suppressions */ |
+static const int32_t testSentSuppRevOffsetsEs[] = { 0, -1 }; /* With suppressions */ |
+static const int32_t testSentRevOffsetsEs[] = { 52, 0, -1 }; /* Without suppressions */ |
+ |
+enum { kTextULenMax = 128 }; |
+ |
+typedef struct { |
+ const char * locale; |
+ const char * text; |
+ const int32_t * expFwdOffsets; |
+ const int32_t * expRevOffsets; |
+} TestBISuppressionsItem; |
+ |
+static const TestBISuppressionsItem testBISuppressionsItems[] = { |
+ { "en@ss=standard", testSentenceSuppressionsEn, testSentSuppFwdOffsetsEn, testSentSuppRevOffsetsEn }, |
+ { "en", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, |
+ { "fr@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, |
+ { "af@ss=standard", testSentenceSuppressionsEn, testSentSuppFwdOffsetsEn, testSentSuppRevOffsetsEn }, /* no brkiter data => en suppressions? */ |
+ { "zh@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressions */ |
+ { "zh_Hant@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressions */ |
+ { "fi@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressions */ |
+ { "ja@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressions */ |
+ { "de@ss=standard", testSentenceSuppressionsDe, testSentSuppFwdOffsetsDe, testSentSuppRevOffsetsDe }, |
+ { "de", testSentenceSuppressionsDe, testSentFwdOffsetsDe, testSentRevOffsetsDe }, |
+ { "es@ss=standard", testSentenceSuppressionsEs, testSentSuppFwdOffsetsEs, testSentSuppRevOffsetsEs }, |
+ { "es", testSentenceSuppressionsEs, testSentFwdOffsetsEs, testSentRevOffsetsEs }, |
+ { NULL, NULL, NULL } |
+}; |
+ |
+static void TestBreakIteratorSuppressions(void) { |
+ const TestBISuppressionsItem * itemPtr; |
+ |
+ for (itemPtr = testBISuppressionsItems; itemPtr->locale != NULL; itemPtr++) { |
+ UChar textU[kTextULenMax]; |
+ int32_t textULen = u_unescape(itemPtr->text, textU, kTextULenMax); |
+ UErrorCode status = U_ZERO_ERROR; |
+ UBreakIterator *bi = ubrk_open(UBRK_SENTENCE, itemPtr->locale, textU, textULen, &status); |
+ log_verbose("#%d: %s\n", (itemPtr-testBISuppressionsItems), itemPtr->locale); |
+ if (U_SUCCESS(status)) { |
+ int32_t offset, start; |
+ const int32_t * expOffsetPtr; |
+ const int32_t * expOffsetStart; |
+ |
+ expOffsetStart = expOffsetPtr = itemPtr->expFwdOffsets; |
+ ubrk_first(bi); |
+ for (; (offset = ubrk_next(bi)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) { |
+ if (offset != *expOffsetPtr) { |
+ log_err("FAIL: ubrk_next loc \"%s\", expected %d, got %d\n", itemPtr->locale, *expOffsetPtr, offset); |
+ } |
+ } |
+ if (offset != UBRK_DONE || *expOffsetPtr >= 0) { |
+ log_err("FAIL: ubrk_next loc \"%s\", expected UBRK_DONE & expOffset -1, got %d and %d\n", itemPtr->locale, offset, *expOffsetPtr); |
+ } |
+ |
+ expOffsetStart = expOffsetPtr = itemPtr->expFwdOffsets; |
+ start = ubrk_first(bi) + 1; |
+ for (; (offset = ubrk_following(bi, start)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) { |
+ if (offset != *expOffsetPtr) { |
+ log_err("FAIL: ubrk_following(%d) loc \"%s\", expected %d, got %d\n", start, itemPtr->locale, *expOffsetPtr, offset); |
+ } |
+ start = *expOffsetPtr + 1; |
+ } |
+ if (offset != UBRK_DONE || *expOffsetPtr >= 0) { |
+ log_err("FAIL: ubrk_following(%d) loc \"%s\", expected UBRK_DONE & expOffset -1, got %d and %d\n", start, itemPtr->locale, offset, *expOffsetPtr); |
+ } |
+ |
+ expOffsetStart = expOffsetPtr = itemPtr->expRevOffsets; |
+ offset = ubrk_last(bi); |
+ log_verbose("___ @%d ubrk_last\n", offset); |
+ if(offset == 0) { |
+ log_err("FAIL: ubrk_last loc \"%s\" unexpected %d\n", itemPtr->locale, offset); |
+ } |
+ for (; (offset = ubrk_previous(bi)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) { |
+ if (offset != *expOffsetPtr) { |
+ log_err("FAIL: ubrk_previous loc \"%s\", expected %d, got %d\n", itemPtr->locale, *expOffsetPtr, offset); |
+ } else { |
+ log_verbose("[%d] @%d ubrk_previous()\n", (expOffsetPtr - expOffsetStart), offset); |
+ } |
+ } |
+ if (offset != UBRK_DONE || *expOffsetPtr >= 0) { |
+ log_err("FAIL: ubrk_previous loc \"%s\", expected UBRK_DONE & expOffset[%d] -1, got %d and %d\n", itemPtr->locale, |
+ expOffsetPtr - expOffsetStart, |
+ offset, *expOffsetPtr); |
+ } |
+ |
+ expOffsetStart = expOffsetPtr = itemPtr->expRevOffsets; |
+ start = ubrk_last(bi) - 1; |
+ for (; (offset = ubrk_preceding(bi, start)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) { |
+ if (offset != *expOffsetPtr) { |
+ log_err("FAIL: ubrk_preceding(%d) loc \"%s\", expected %d, got %d\n", start, itemPtr->locale, *expOffsetPtr, offset); |
+ } |
+ start = *expOffsetPtr - 1; |
+ } |
+ if (start >=0 && (offset != UBRK_DONE || *expOffsetPtr >= 0)) { |
+ log_err("FAIL: ubrk_preceding loc(%d) \"%s\", expected UBRK_DONE & expOffset -1, got %d and %d\n", start, itemPtr->locale, offset, *expOffsetPtr); |
+ } |
+ |
+ ubrk_close(bi); |
+ } else { |
+ log_data_err("FAIL: ubrk_open(UBRK_SENTENCE, \"%s\", ...) status %s (Are you missing data?)\n", itemPtr->locale, u_errorName(status)); |
+ } |
+ } |
+} |
+ |
+ |
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |