| Index: source/test/cintltst/cbiapts.c
|
| diff --git a/source/test/cintltst/cbiapts.c b/source/test/cintltst/cbiapts.c
|
| index c9080f2d153a828cfe44edce22c4665782b9a0bc..e654c094ad7ac573e7a9eaaf7c1968cbdaa2512a 100644
|
| --- a/source/test/cintltst/cbiapts.c
|
| +++ b/source/test/cintltst/cbiapts.c
|
| @@ -1,6 +1,6 @@
|
| /********************************************************************
|
| * COPYRIGHT:
|
| - * Copyright (c) 1997-2013, International Business Machines Corporation and
|
| + * Copyright (c) 1997-2015, International Business Machines Corporation and
|
| * others. All Rights Reserved.
|
| ********************************************************************/
|
| /********************************************************************************
|
| @@ -31,6 +31,7 @@
|
| #include "unicode/utext.h"
|
| #include "cintltst.h"
|
| #include "cbiapts.h"
|
| +#include "cmemory.h"
|
|
|
| #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
|
| log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
|
| @@ -47,6 +48,8 @@ static void TestBreakIteratorStatusVec(void);
|
| static void TestBreakIteratorUText(void);
|
| static void TestBreakIteratorTailoring(void);
|
| static void TestBreakIteratorRefresh(void);
|
| +static void TestBug11665(void);
|
| +static void TestBreakIteratorSuppressions(void);
|
|
|
| void addBrkIterAPITest(TestNode** root);
|
|
|
| @@ -62,6 +65,8 @@ void addBrkIterAPITest(TestNode** root)
|
| addTest(root, &TestBreakIteratorStatusVec, "tstxtbd/cbiapts/TestBreakIteratorStatusVec");
|
| addTest(root, &TestBreakIteratorTailoring, "tstxtbd/cbiapts/TestBreakIteratorTailoring");
|
| addTest(root, &TestBreakIteratorRefresh, "tstxtbd/cbiapts/TestBreakIteratorRefresh");
|
| + addTest(root, &TestBug11665, "tstxtbd/cbiapts/TestBug11665");
|
| + addTest(root, &TestBreakIteratorSuppressions, "tstxtbd/cbiapts/TestBreakIteratorSuppressions");
|
| }
|
|
|
| #define CLONETEST_ITERATOR_COUNT 2
|
| @@ -847,6 +852,9 @@ static void TestBreakIteratorRefresh(void) {
|
|
|
| bi = ubrk_open(UBRK_LINE, "en_US", NULL, 0, &status);
|
| TEST_ASSERT_SUCCESS(status);
|
| + if (U_FAILURE(status)) {
|
| + return;
|
| + }
|
|
|
| utext_openUChars(&ut1, testStr, -1, &status);
|
| TEST_ASSERT_SUCCESS(status);
|
| @@ -879,4 +887,180 @@ static void TestBreakIteratorRefresh(void) {
|
| ubrk_close(bi);
|
| }
|
|
|
| +
|
| +static void TestBug11665(void) {
|
| + // The problem was with the incorrect breaking of Japanese text beginning
|
| + // with Katakana characters when no prior Japanese or Chinese text had been
|
| + // encountered.
|
| + //
|
| + // Tested here in cintltst, rather than in intltest, because only cintltst
|
| + // tests have the ability to reset ICU, which is needed to get the bug
|
| + // to manifest itself.
|
| +
|
| + static UChar japaneseText[] = {0x30A2, 0x30EC, 0x30EB, 0x30AE, 0x30FC, 0x6027, 0x7D50, 0x819C, 0x708E};
|
| + int32_t boundaries[10] = {0};
|
| + UBreakIterator *bi = NULL;
|
| + int32_t brk;
|
| + int32_t brkIdx = 0;
|
| + int32_t totalBreaks = 0;
|
| + UErrorCode status = U_ZERO_ERROR;
|
| +
|
| + ctest_resetICU();
|
| + bi = ubrk_open(UBRK_WORD, "en_US", japaneseText, UPRV_LENGTHOF(japaneseText), &status);
|
| + TEST_ASSERT_SUCCESS(status);
|
| + if (!bi) {
|
| + return;
|
| + }
|
| + for (brk=ubrk_first(bi); brk != UBRK_DONE; brk=ubrk_next(bi)) {
|
| + boundaries[brkIdx] = brk;
|
| + if (++brkIdx >= UPRV_LENGTHOF(boundaries) - 1) {
|
| + break;
|
| + }
|
| + }
|
| + if (brkIdx <= 2 || brkIdx >= UPRV_LENGTHOF(boundaries)) {
|
| + log_err("%s:%d too few or many breaks found.\n", __FILE__, __LINE__);
|
| + } else {
|
| + totalBreaks = brkIdx;
|
| + brkIdx = 0;
|
| + for (brk=ubrk_first(bi); brk != UBRK_DONE; brk=ubrk_next(bi)) {
|
| + if (brk != boundaries[brkIdx]) {
|
| + log_err("%s:%d Break #%d differs between first and second iteration.\n", __FILE__, __LINE__, brkIdx);
|
| + break;
|
| + }
|
| + if (++brkIdx >= UPRV_LENGTHOF(boundaries) - 1) {
|
| + log_err("%s:%d Too many breaks.\n", __FILE__, __LINE__);
|
| + break;
|
| + }
|
| + }
|
| + if (totalBreaks != brkIdx) {
|
| + log_err("%s:%d Number of breaks differ between first and second iteration.\n", __FILE__, __LINE__);
|
| + }
|
| + }
|
| + ubrk_close(bi);
|
| +}
|
| +
|
| +/*
|
| + * expOffset is the set of expected offsets, ending with '-1'.
|
| + * "Expected expOffset -1" means "expected the end of the offsets"
|
| + */
|
| +
|
| +static const char testSentenceSuppressionsEn[] = "Mr. Jones comes home. Dr. Smith Ph.D. is out. In the U.S.A. it is hot.";
|
| +static const int32_t testSentSuppFwdOffsetsEn[] = { 22, 26, 46, 70, -1 }; /* With suppressions, currently not handling Dr. */
|
| +static const int32_t testSentFwdOffsetsEn[] = { 4, 22, 26, 46, 70, -1 }; /* Without suppressions */
|
| +static const int32_t testSentSuppRevOffsetsEn[] = { 46, 26, 22, 0, -1 }; /* With suppressions, currently not handling Dr. */
|
| +static const int32_t testSentRevOffsetsEn[] = { 46, 26, 22, 4, 0, -1 }; /* Without suppressions */
|
| +
|
| +static const char testSentenceSuppressionsDe[] = "Wenn ich schon h\\u00F6re zu Guttenberg kommt evtl. zur\\u00FCck.";
|
| +static const int32_t testSentSuppFwdOffsetsDe[] = { 53, -1 }; /* With suppressions */
|
| +static const int32_t testSentFwdOffsetsDe[] = { 53, -1 }; /* Without suppressions; no break in evtl. zur due to casing */
|
| +static const int32_t testSentSuppRevOffsetsDe[] = { 0, -1 }; /* With suppressions */
|
| +static const int32_t testSentRevOffsetsDe[] = { 0, -1 }; /* Without suppressions */
|
| +
|
| +static const char testSentenceSuppressionsEs[] = "Te esperamos todos los miercoles en Bravo 416, Col. El Pueblo a las 7 PM.";
|
| +static const int32_t testSentSuppFwdOffsetsEs[] = { 73, -1 }; /* With suppressions */
|
| +static const int32_t testSentFwdOffsetsEs[] = { 52, 73, -1 }; /* Without suppressions */
|
| +static const int32_t testSentSuppRevOffsetsEs[] = { 0, -1 }; /* With suppressions */
|
| +static const int32_t testSentRevOffsetsEs[] = { 52, 0, -1 }; /* Without suppressions */
|
| +
|
| +enum { kTextULenMax = 128 };
|
| +
|
| +typedef struct {
|
| + const char * locale;
|
| + const char * text;
|
| + const int32_t * expFwdOffsets;
|
| + const int32_t * expRevOffsets;
|
| +} TestBISuppressionsItem;
|
| +
|
| +static const TestBISuppressionsItem testBISuppressionsItems[] = {
|
| + { "en@ss=standard", testSentenceSuppressionsEn, testSentSuppFwdOffsetsEn, testSentSuppRevOffsetsEn },
|
| + { "en", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn },
|
| + { "fr@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn },
|
| + { "af@ss=standard", testSentenceSuppressionsEn, testSentSuppFwdOffsetsEn, testSentSuppRevOffsetsEn }, /* no brkiter data => en suppressions? */
|
| + { "zh@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressions */
|
| + { "zh_Hant@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressions */
|
| + { "fi@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressions */
|
| + { "ja@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressions */
|
| + { "de@ss=standard", testSentenceSuppressionsDe, testSentSuppFwdOffsetsDe, testSentSuppRevOffsetsDe },
|
| + { "de", testSentenceSuppressionsDe, testSentFwdOffsetsDe, testSentRevOffsetsDe },
|
| + { "es@ss=standard", testSentenceSuppressionsEs, testSentSuppFwdOffsetsEs, testSentSuppRevOffsetsEs },
|
| + { "es", testSentenceSuppressionsEs, testSentFwdOffsetsEs, testSentRevOffsetsEs },
|
| + { NULL, NULL, NULL }
|
| +};
|
| +
|
| +static void TestBreakIteratorSuppressions(void) {
|
| + const TestBISuppressionsItem * itemPtr;
|
| +
|
| + for (itemPtr = testBISuppressionsItems; itemPtr->locale != NULL; itemPtr++) {
|
| + UChar textU[kTextULenMax];
|
| + int32_t textULen = u_unescape(itemPtr->text, textU, kTextULenMax);
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + UBreakIterator *bi = ubrk_open(UBRK_SENTENCE, itemPtr->locale, textU, textULen, &status);
|
| + log_verbose("#%d: %s\n", (itemPtr-testBISuppressionsItems), itemPtr->locale);
|
| + if (U_SUCCESS(status)) {
|
| + int32_t offset, start;
|
| + const int32_t * expOffsetPtr;
|
| + const int32_t * expOffsetStart;
|
| +
|
| + expOffsetStart = expOffsetPtr = itemPtr->expFwdOffsets;
|
| + ubrk_first(bi);
|
| + for (; (offset = ubrk_next(bi)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) {
|
| + if (offset != *expOffsetPtr) {
|
| + log_err("FAIL: ubrk_next loc \"%s\", expected %d, got %d\n", itemPtr->locale, *expOffsetPtr, offset);
|
| + }
|
| + }
|
| + if (offset != UBRK_DONE || *expOffsetPtr >= 0) {
|
| + log_err("FAIL: ubrk_next loc \"%s\", expected UBRK_DONE & expOffset -1, got %d and %d\n", itemPtr->locale, offset, *expOffsetPtr);
|
| + }
|
| +
|
| + expOffsetStart = expOffsetPtr = itemPtr->expFwdOffsets;
|
| + start = ubrk_first(bi) + 1;
|
| + for (; (offset = ubrk_following(bi, start)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) {
|
| + if (offset != *expOffsetPtr) {
|
| + log_err("FAIL: ubrk_following(%d) loc \"%s\", expected %d, got %d\n", start, itemPtr->locale, *expOffsetPtr, offset);
|
| + }
|
| + start = *expOffsetPtr + 1;
|
| + }
|
| + if (offset != UBRK_DONE || *expOffsetPtr >= 0) {
|
| + log_err("FAIL: ubrk_following(%d) loc \"%s\", expected UBRK_DONE & expOffset -1, got %d and %d\n", start, itemPtr->locale, offset, *expOffsetPtr);
|
| + }
|
| +
|
| + expOffsetStart = expOffsetPtr = itemPtr->expRevOffsets;
|
| + offset = ubrk_last(bi);
|
| + log_verbose("___ @%d ubrk_last\n", offset);
|
| + if(offset == 0) {
|
| + log_err("FAIL: ubrk_last loc \"%s\" unexpected %d\n", itemPtr->locale, offset);
|
| + }
|
| + for (; (offset = ubrk_previous(bi)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) {
|
| + if (offset != *expOffsetPtr) {
|
| + log_err("FAIL: ubrk_previous loc \"%s\", expected %d, got %d\n", itemPtr->locale, *expOffsetPtr, offset);
|
| + } else {
|
| + log_verbose("[%d] @%d ubrk_previous()\n", (expOffsetPtr - expOffsetStart), offset);
|
| + }
|
| + }
|
| + if (offset != UBRK_DONE || *expOffsetPtr >= 0) {
|
| + log_err("FAIL: ubrk_previous loc \"%s\", expected UBRK_DONE & expOffset[%d] -1, got %d and %d\n", itemPtr->locale,
|
| + expOffsetPtr - expOffsetStart,
|
| + offset, *expOffsetPtr);
|
| + }
|
| +
|
| + expOffsetStart = expOffsetPtr = itemPtr->expRevOffsets;
|
| + start = ubrk_last(bi) - 1;
|
| + for (; (offset = ubrk_preceding(bi, start)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) {
|
| + if (offset != *expOffsetPtr) {
|
| + log_err("FAIL: ubrk_preceding(%d) loc \"%s\", expected %d, got %d\n", start, itemPtr->locale, *expOffsetPtr, offset);
|
| + }
|
| + start = *expOffsetPtr - 1;
|
| + }
|
| + if (start >=0 && (offset != UBRK_DONE || *expOffsetPtr >= 0)) {
|
| + log_err("FAIL: ubrk_preceding loc(%d) \"%s\", expected UBRK_DONE & expOffset -1, got %d and %d\n", start, itemPtr->locale, offset, *expOffsetPtr);
|
| + }
|
| +
|
| + ubrk_close(bi);
|
| + } else {
|
| + log_data_err("FAIL: ubrk_open(UBRK_SENTENCE, \"%s\", ...) status %s (Are you missing data?)\n", itemPtr->locale, u_errorName(status));
|
| + }
|
| + }
|
| +}
|
| +
|
| +
|
| #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
|
|