Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(17)

Unified Diff: source/test/cintltst/cbiapts.c

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/test/cintltst/capitst.c ('k') | source/test/cintltst/cbiditst.c » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/test/cintltst/cbiapts.c
diff --git a/source/test/cintltst/cbiapts.c b/source/test/cintltst/cbiapts.c
index c9080f2d153a828cfe44edce22c4665782b9a0bc..e654c094ad7ac573e7a9eaaf7c1968cbdaa2512a 100644
--- a/source/test/cintltst/cbiapts.c
+++ b/source/test/cintltst/cbiapts.c
@@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 1997-2013, International Business Machines Corporation and
+ * Copyright (c) 1997-2015, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/********************************************************************************
@@ -31,6 +31,7 @@
#include "unicode/utext.h"
#include "cintltst.h"
#include "cbiapts.h"
+#include "cmemory.h"
#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
@@ -47,6 +48,8 @@ static void TestBreakIteratorStatusVec(void);
static void TestBreakIteratorUText(void);
static void TestBreakIteratorTailoring(void);
static void TestBreakIteratorRefresh(void);
+static void TestBug11665(void);
+static void TestBreakIteratorSuppressions(void);
void addBrkIterAPITest(TestNode** root);
@@ -62,6 +65,8 @@ void addBrkIterAPITest(TestNode** root)
addTest(root, &TestBreakIteratorStatusVec, "tstxtbd/cbiapts/TestBreakIteratorStatusVec");
addTest(root, &TestBreakIteratorTailoring, "tstxtbd/cbiapts/TestBreakIteratorTailoring");
addTest(root, &TestBreakIteratorRefresh, "tstxtbd/cbiapts/TestBreakIteratorRefresh");
+ addTest(root, &TestBug11665, "tstxtbd/cbiapts/TestBug11665");
+ addTest(root, &TestBreakIteratorSuppressions, "tstxtbd/cbiapts/TestBreakIteratorSuppressions");
}
#define CLONETEST_ITERATOR_COUNT 2
@@ -847,6 +852,9 @@ static void TestBreakIteratorRefresh(void) {
bi = ubrk_open(UBRK_LINE, "en_US", NULL, 0, &status);
TEST_ASSERT_SUCCESS(status);
+ if (U_FAILURE(status)) {
+ return;
+ }
utext_openUChars(&ut1, testStr, -1, &status);
TEST_ASSERT_SUCCESS(status);
@@ -879,4 +887,180 @@ static void TestBreakIteratorRefresh(void) {
ubrk_close(bi);
}
+
+static void TestBug11665(void) {
+ // The problem was with the incorrect breaking of Japanese text beginning
+ // with Katakana characters when no prior Japanese or Chinese text had been
+ // encountered.
+ //
+ // Tested here in cintltst, rather than in intltest, because only cintltst
+ // tests have the ability to reset ICU, which is needed to get the bug
+ // to manifest itself.
+
+ static UChar japaneseText[] = {0x30A2, 0x30EC, 0x30EB, 0x30AE, 0x30FC, 0x6027, 0x7D50, 0x819C, 0x708E};
+ int32_t boundaries[10] = {0};
+ UBreakIterator *bi = NULL;
+ int32_t brk;
+ int32_t brkIdx = 0;
+ int32_t totalBreaks = 0;
+ UErrorCode status = U_ZERO_ERROR;
+
+ ctest_resetICU();
+ bi = ubrk_open(UBRK_WORD, "en_US", japaneseText, UPRV_LENGTHOF(japaneseText), &status);
+ TEST_ASSERT_SUCCESS(status);
+ if (!bi) {
+ return;
+ }
+ for (brk=ubrk_first(bi); brk != UBRK_DONE; brk=ubrk_next(bi)) {
+ boundaries[brkIdx] = brk;
+ if (++brkIdx >= UPRV_LENGTHOF(boundaries) - 1) {
+ break;
+ }
+ }
+ if (brkIdx <= 2 || brkIdx >= UPRV_LENGTHOF(boundaries)) {
+ log_err("%s:%d too few or many breaks found.\n", __FILE__, __LINE__);
+ } else {
+ totalBreaks = brkIdx;
+ brkIdx = 0;
+ for (brk=ubrk_first(bi); brk != UBRK_DONE; brk=ubrk_next(bi)) {
+ if (brk != boundaries[brkIdx]) {
+ log_err("%s:%d Break #%d differs between first and second iteration.\n", __FILE__, __LINE__, brkIdx);
+ break;
+ }
+ if (++brkIdx >= UPRV_LENGTHOF(boundaries) - 1) {
+ log_err("%s:%d Too many breaks.\n", __FILE__, __LINE__);
+ break;
+ }
+ }
+ if (totalBreaks != brkIdx) {
+ log_err("%s:%d Number of breaks differ between first and second iteration.\n", __FILE__, __LINE__);
+ }
+ }
+ ubrk_close(bi);
+}
+
+/*
+ * expOffset is the set of expected offsets, ending with '-1'.
+ * "Expected expOffset -1" means "expected the end of the offsets"
+ */
+
+static const char testSentenceSuppressionsEn[] = "Mr. Jones comes home. Dr. Smith Ph.D. is out. In the U.S.A. it is hot.";
+static const int32_t testSentSuppFwdOffsetsEn[] = { 22, 26, 46, 70, -1 }; /* With suppressions, currently not handling Dr. */
+static const int32_t testSentFwdOffsetsEn[] = { 4, 22, 26, 46, 70, -1 }; /* Without suppressions */
+static const int32_t testSentSuppRevOffsetsEn[] = { 46, 26, 22, 0, -1 }; /* With suppressions, currently not handling Dr. */
+static const int32_t testSentRevOffsetsEn[] = { 46, 26, 22, 4, 0, -1 }; /* Without suppressions */
+
+static const char testSentenceSuppressionsDe[] = "Wenn ich schon h\\u00F6re zu Guttenberg kommt evtl. zur\\u00FCck.";
+static const int32_t testSentSuppFwdOffsetsDe[] = { 53, -1 }; /* With suppressions */
+static const int32_t testSentFwdOffsetsDe[] = { 53, -1 }; /* Without suppressions; no break in evtl. zur due to casing */
+static const int32_t testSentSuppRevOffsetsDe[] = { 0, -1 }; /* With suppressions */
+static const int32_t testSentRevOffsetsDe[] = { 0, -1 }; /* Without suppressions */
+
+static const char testSentenceSuppressionsEs[] = "Te esperamos todos los miercoles en Bravo 416, Col. El Pueblo a las 7 PM.";
+static const int32_t testSentSuppFwdOffsetsEs[] = { 73, -1 }; /* With suppressions */
+static const int32_t testSentFwdOffsetsEs[] = { 52, 73, -1 }; /* Without suppressions */
+static const int32_t testSentSuppRevOffsetsEs[] = { 0, -1 }; /* With suppressions */
+static const int32_t testSentRevOffsetsEs[] = { 52, 0, -1 }; /* Without suppressions */
+
+enum { kTextULenMax = 128 };
+
+typedef struct {
+ const char * locale;
+ const char * text;
+ const int32_t * expFwdOffsets;
+ const int32_t * expRevOffsets;
+} TestBISuppressionsItem;
+
+static const TestBISuppressionsItem testBISuppressionsItems[] = {
+ { "en@ss=standard", testSentenceSuppressionsEn, testSentSuppFwdOffsetsEn, testSentSuppRevOffsetsEn },
+ { "en", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn },
+ { "fr@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn },
+ { "af@ss=standard", testSentenceSuppressionsEn, testSentSuppFwdOffsetsEn, testSentSuppRevOffsetsEn }, /* no brkiter data => en suppressions? */
+ { "zh@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressions */
+ { "zh_Hant@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressions */
+ { "fi@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressions */
+ { "ja@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressions */
+ { "de@ss=standard", testSentenceSuppressionsDe, testSentSuppFwdOffsetsDe, testSentSuppRevOffsetsDe },
+ { "de", testSentenceSuppressionsDe, testSentFwdOffsetsDe, testSentRevOffsetsDe },
+ { "es@ss=standard", testSentenceSuppressionsEs, testSentSuppFwdOffsetsEs, testSentSuppRevOffsetsEs },
+ { "es", testSentenceSuppressionsEs, testSentFwdOffsetsEs, testSentRevOffsetsEs },
+ { NULL, NULL, NULL }
+};
+
+static void TestBreakIteratorSuppressions(void) {
+ const TestBISuppressionsItem * itemPtr;
+
+ for (itemPtr = testBISuppressionsItems; itemPtr->locale != NULL; itemPtr++) {
+ UChar textU[kTextULenMax];
+ int32_t textULen = u_unescape(itemPtr->text, textU, kTextULenMax);
+ UErrorCode status = U_ZERO_ERROR;
+ UBreakIterator *bi = ubrk_open(UBRK_SENTENCE, itemPtr->locale, textU, textULen, &status);
+ log_verbose("#%d: %s\n", (itemPtr-testBISuppressionsItems), itemPtr->locale);
+ if (U_SUCCESS(status)) {
+ int32_t offset, start;
+ const int32_t * expOffsetPtr;
+ const int32_t * expOffsetStart;
+
+ expOffsetStart = expOffsetPtr = itemPtr->expFwdOffsets;
+ ubrk_first(bi);
+ for (; (offset = ubrk_next(bi)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) {
+ if (offset != *expOffsetPtr) {
+ log_err("FAIL: ubrk_next loc \"%s\", expected %d, got %d\n", itemPtr->locale, *expOffsetPtr, offset);
+ }
+ }
+ if (offset != UBRK_DONE || *expOffsetPtr >= 0) {
+ log_err("FAIL: ubrk_next loc \"%s\", expected UBRK_DONE & expOffset -1, got %d and %d\n", itemPtr->locale, offset, *expOffsetPtr);
+ }
+
+ expOffsetStart = expOffsetPtr = itemPtr->expFwdOffsets;
+ start = ubrk_first(bi) + 1;
+ for (; (offset = ubrk_following(bi, start)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) {
+ if (offset != *expOffsetPtr) {
+ log_err("FAIL: ubrk_following(%d) loc \"%s\", expected %d, got %d\n", start, itemPtr->locale, *expOffsetPtr, offset);
+ }
+ start = *expOffsetPtr + 1;
+ }
+ if (offset != UBRK_DONE || *expOffsetPtr >= 0) {
+ log_err("FAIL: ubrk_following(%d) loc \"%s\", expected UBRK_DONE & expOffset -1, got %d and %d\n", start, itemPtr->locale, offset, *expOffsetPtr);
+ }
+
+ expOffsetStart = expOffsetPtr = itemPtr->expRevOffsets;
+ offset = ubrk_last(bi);
+ log_verbose("___ @%d ubrk_last\n", offset);
+ if(offset == 0) {
+ log_err("FAIL: ubrk_last loc \"%s\" unexpected %d\n", itemPtr->locale, offset);
+ }
+ for (; (offset = ubrk_previous(bi)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) {
+ if (offset != *expOffsetPtr) {
+ log_err("FAIL: ubrk_previous loc \"%s\", expected %d, got %d\n", itemPtr->locale, *expOffsetPtr, offset);
+ } else {
+ log_verbose("[%d] @%d ubrk_previous()\n", (expOffsetPtr - expOffsetStart), offset);
+ }
+ }
+ if (offset != UBRK_DONE || *expOffsetPtr >= 0) {
+ log_err("FAIL: ubrk_previous loc \"%s\", expected UBRK_DONE & expOffset[%d] -1, got %d and %d\n", itemPtr->locale,
+ expOffsetPtr - expOffsetStart,
+ offset, *expOffsetPtr);
+ }
+
+ expOffsetStart = expOffsetPtr = itemPtr->expRevOffsets;
+ start = ubrk_last(bi) - 1;
+ for (; (offset = ubrk_preceding(bi, start)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) {
+ if (offset != *expOffsetPtr) {
+ log_err("FAIL: ubrk_preceding(%d) loc \"%s\", expected %d, got %d\n", start, itemPtr->locale, *expOffsetPtr, offset);
+ }
+ start = *expOffsetPtr - 1;
+ }
+ if (start >=0 && (offset != UBRK_DONE || *expOffsetPtr >= 0)) {
+ log_err("FAIL: ubrk_preceding loc(%d) \"%s\", expected UBRK_DONE & expOffset -1, got %d and %d\n", start, itemPtr->locale, offset, *expOffsetPtr);
+ }
+
+ ubrk_close(bi);
+ } else {
+ log_data_err("FAIL: ubrk_open(UBRK_SENTENCE, \"%s\", ...) status %s (Are you missing data?)\n", itemPtr->locale, u_errorName(status));
+ }
+ }
+}
+
+
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
« no previous file with comments | « source/test/cintltst/capitst.c ('k') | source/test/cintltst/cbiditst.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698