source/test/intltest/rbbitst.cpp - Issue 845603002: Update ICU to 54.1 step 1

Unified Diff: source/test/intltest/rbbitst.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master

Patch Set: remove unusued directories Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/test/intltest/rbbitst.cpp

diff --git a/source/test/intltest/rbbitst.cpp b/source/test/intltest/rbbitst.cpp

index 95bf8dde4df9fd75edfda691e6c79d82ec496fff..278176dc333130ba0576e54b66b26b7ba0a94374 100644

--- a/source/test/intltest/rbbitst.cpp

+++ b/source/test/intltest/rbbitst.cpp

@@ -1,6 +1,6 @@

/********************************************************************

* COPYRIGHT:

********************************************************************/

/************************************************************************

@@ -31,9 +31,9 @@

#include "intltest.h"

#include "rbbitst.h"

#include <string.h>

+#include "charstr.h"

#include "uvector.h"

#include "uvectr32.h"

-#include <string.h>

#include <stdio.h>

#include <stdlib.h>

#include "unicode/numfmt.h"

@@ -354,27 +354,19 @@ void RBBITest::TestStatusReturn() {

}

-static void printStringBreaks(UnicodeString ustr, int expected[],

- int expectedcount)

+static void printStringBreaks(UText *tstr, int expected[], int expectedCount) {

UErrorCode status = U_ZERO_ERROR;

char name[100];

printf("code alpha extend alphanum type word sent line name\n");

- int j;

- for (j = 0; j < ustr.length(); j ++) {

- if (expectedcount > 0) {

- int k;

- for (k = 0; k < expectedcount; k ++) {

- if (j == expected[k]) {

- printf("------------------------------------------------ %d\n",

- j);

- }

- UChar32 c = ustr.char32At(j);

- if (c > 0xffff) {

- j ++;

+ int nextExpectedIndex = 0;

+ utext_setNativeIndex(tstr, 0);

+ for (int j = 0; j < utext_nativeLength(tstr); j=utext_getNativeIndex(tstr)) {

+ if (nextExpectedIndex < expectedCount && j >= expected[nextExpectedIndex] ) {

+ printf("------------------------------------------------ %d\n", j);

+ ++nextExpectedIndex;

}

+ UChar32 c = utext_next32(tstr);

u_charName(c, U_UNICODE_CHAR_NAME, name, 100, &status);

printf("%7x %5d %6d %8d %4s %4s %4s %4s %s\n", (int)c,

u_isUAlphabetic(c),

@@ -400,6 +392,19 @@ static void printStringBreaks(UnicodeString ustr, int expected[],

}

+static void printStringBreaks(const UnicodeString &ustr, int expected[], int expectedCount) {

+ UErrorCode status = U_ZERO_ERROR;

+ UText *tstr = NULL;

+ tstr = utext_openConstUnicodeString(NULL, &ustr, &status);

+ if (U_FAILURE(status)) {

+ printf("printStringBreaks, utext_openConstUnicodeString() returns %s\n", u_errorName(status));

+ return;

+ }

+ printStringBreaks(tstr, expected, expectedCount);

+ utext_close(tstr);

void RBBITest::TestBug3818() {

UErrorCode status = U_ZERO_ERROR;

@@ -830,23 +835,173 @@ void RBBITest::TestBug5775() {

//------------------------------------------------------------------------------

struct TestParams {

- BreakIterator *bi;

- UnicodeString dataToBreak;

- UVector32 *expectedBreaks;

- UVector32 *srcLine;

+ BreakIterator *bi; // Break iterator is set while parsing test source.

+ // Changed out whenever test data changes break type.

+ UnicodeString dataToBreak; // Data that is built up while parsing the test.

+ UVector32 *expectedBreaks; // Expected break positions, matches dataToBreak UnicodeString.

+ UVector32 *srcLine; // Positions in source file, indexed same as dataToBreak.

UVector32 *srcCol;

+ UText *textToBreak; // UText, could be UTF8 or UTF16.

+ UVector32 *textMap; // Map from UTF-16 dataToBreak offsets to UText offsets.

+ CharString utf8String; // UTF-8 form of text to break.

+ TestParams(UErrorCode &status) : dataToBreak() {

+ bi = NULL;

+ expectedBreaks = new UVector32(status);

+ srcLine = new UVector32(status);

+ srcCol = new UVector32(status);

+ textToBreak = NULL;

+ textMap = new UVector32(status);

+ }

+ ~TestParams() {

+ delete bi;

+ delete expectedBreaks;

+ delete srcLine;

+ delete srcCol;

+ utext_close(textToBreak);

+ delete textMap;

+ }

+ int32_t getSrcLine(int32_t bp);

+ int32_t getExpectedBreak(int32_t bp);

+ int32_t getSrcCol(int32_t bp);

+ void setUTF16(UErrorCode &status);

+ void setUTF8(UErrorCode &status);

};

-void RBBITest::executeTest(TestParams *t) {

+// Append a UnicodeString to a CharString with UTF-8 encoding.

+// Substitute any invalid chars.

+// Note: this is used with test data that includes a few unpaired surrogates in the UTF-16 that will be substituted.

+static void CharStringAppend(CharString &dest, const UnicodeString &src, UErrorCode &status) {

+ if (U_FAILURE(status)) {

+ return;

+ }

+ int32_t utf8Length;

+ u_strToUTF8WithSub(NULL, 0, &utf8Length, // Output Buffer, NULL for preflight.

+ src.getBuffer(), src.length(), // UTF-16 data

+ 0xfffd, NULL, // Substitution char, number of subs.

+ &status);

+ if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {

+ return;

+ }

+ status = U_ZERO_ERROR;

+ int32_t capacity;

+ char *buffer = dest.getAppendBuffer(utf8Length, utf8Length, capacity, status);

+ u_strToUTF8WithSub(buffer, utf8Length, NULL,

+ src.getBuffer(), src.length(),

+ 0xfffd, NULL, &status);

+ dest.append(buffer, utf8Length, status);

+void TestParams::setUTF16(UErrorCode &status) {

+ textToBreak = utext_openUnicodeString(textToBreak, &dataToBreak, &status);

+ textMap->removeAllElements();

+ for (int32_t i=0; i<dataToBreak.length(); i++) {

+ if (i == dataToBreak.getChar32Start(i)) {

+ textMap->addElement(i, status);

+ } else {

+ textMap->addElement(-1, status);

+ }

+ textMap->addElement(dataToBreak.length(), status);

+ U_ASSERT(dataToBreak.length() + 1 == textMap->size());

+void TestParams::setUTF8(UErrorCode &status) {

+ if (U_FAILURE(status)) {

+ return;

+ }

+ utf8String.clear();

+ CharStringAppend(utf8String, dataToBreak, status);

+ textToBreak = utext_openUTF8(textToBreak, utf8String.data(), utf8String.length(), &status);

+ if (U_FAILURE(status)) {

+ return;

+ }

+ textMap->removeAllElements();

+ int32_t utf16Index = 0;

+ for (;;) {

+ textMap->addElement(utf16Index, status);

+ UChar32 c32 = utext_current32(textToBreak);

+ if (c32 < 0) {

+ break;

+ }

+ utf16Index += U16_LENGTH(c32);

+ utext_next32(textToBreak);

+ while (textMap->size() < utext_getNativeIndex(textToBreak)) {

+ textMap->addElement(-1, status);

+ }

+ U_ASSERT(utext_nativeLength(textToBreak) + 1 == textMap->size());

+int32_t TestParams::getSrcLine(int bp) {

+ if (bp >= textMap->size()) {

+ bp = textMap->size() - 1;

+ }

+ int32_t i = 0;

+ for(; bp >= 0 ; --bp) {

+ // Move to a character boundary if we are not on one already.

+ i = textMap->elementAti(bp);

+ if (i >= 0) {

+ break;

+ }

+ return srcLine->elementAti(i);

+int32_t TestParams::getExpectedBreak(int bp) {

+ if (bp >= textMap->size()) {

+ return 0;

+ }

+ int32_t i = textMap->elementAti(bp);

+ int32_t retVal = 0;

+ if (i >= 0) {

+ retVal = expectedBreaks->elementAti(i);

+ }

+ return retVal;

+int32_t TestParams::getSrcCol(int bp) {

+ if (bp >= textMap->size()) {

+ bp = textMap->size() - 1;

+ }

+ int32_t i = 0;

+ for(; bp >= 0; --bp) {

+ // Move bp to a character boundary if we are not on one already.

+ i = textMap->elementAti(bp);

+ if (i >= 0) {

+ break;

+ }

+ return srcCol->elementAti(i);

+void RBBITest::executeTest(TestParams *t, UErrorCode &status) {

int32_t bp;

int32_t prevBP;

int32_t i;

+ TEST_ASSERT_SUCCESS(status);

+ if (U_FAILURE(status)) {

+ return;

+ }

if (t->bi == NULL) {

return;

}

- t->bi->setText(t->dataToBreak);

+ t->bi->setText(t->textToBreak, status);

// Run the iterator forward

@@ -855,93 +1010,92 @@ void RBBITest::executeTest(TestParams *t) {

if (prevBP == bp) {

// Fail for lack of forward progress.

errln("Forward Iteration, no forward progress. Break Pos=%4d File line,col=%4d,%4d",

- bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));

+ bp, t->getSrcLine(bp), t->getSrcCol(bp));

break;

}

- // Check that there were we didn't miss an expected break between the last one

+ // Check that there we didn't miss an expected break between the last one

// and this one.

for (i=prevBP+1; i<bp; i++) {

- if (t->expectedBreaks->elementAti(i) != 0) {

+ if (t->getExpectedBreak(i) != 0) {

int expected[] = {0, i};

printStringBreaks(t->dataToBreak, expected, 2);

errln("Forward Iteration, break expected, but not found. Pos=%4d File line,col= %4d,%4d",

- i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));

+ i, t->getSrcLine(i), t->getSrcCol(i));

}

// Check that the break we did find was expected

- if (t->expectedBreaks->elementAti(bp) == 0) {

+ if (t->getExpectedBreak(bp) == 0) {

int expected[] = {0, bp};

- printStringBreaks(t->dataToBreak, expected, 2);

+ printStringBreaks(t->textToBreak, expected, 2);

errln("Forward Iteration, break found, but not expected. Pos=%4d File line,col= %4d,%4d",

- bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));

+ bp, t->getSrcLine(bp), t->getSrcCol(bp));

} else {

// The break was expected.

// Check that the {nnn} tag value is correct.

- int32_t expectedTagVal = t->expectedBreaks->elementAti(bp);

+ int32_t expectedTagVal = t->getExpectedBreak(bp);

if (expectedTagVal == -1) {

expectedTagVal = 0;

}

- int32_t line = t->srcLine->elementAti(bp);

+ int32_t line = t->getSrcLine(bp);

int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus();

if (rs != expectedTagVal) {

errln("Incorrect status for forward break. Pos=%4d File line,col= %4d,%4d.\n"

" Actual, Expected status = %4d, %4d",

- bp, line, t->srcCol->elementAti(bp), rs, expectedTagVal);

+ bp, line, t->getSrcCol(bp), rs, expectedTagVal);

}

prevBP = bp;

}

// Verify that there were no missed expected breaks after the last one found

- for (i=prevBP+1; i<t->expectedBreaks->size(); i++) {

- if (t->expectedBreaks->elementAti(i) != 0) {

+ for (i=prevBP+1; i<utext_nativeLength(t->textToBreak); i++) {

+ if (t->getExpectedBreak(i) != 0) {

errln("Forward Iteration, break expected, but not found. Pos=%4d File line,col= %4d,%4d",

- i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));

+ i, t->getSrcLine(i), t->getSrcCol(i));

}

// Run the iterator backwards, verify that the same breaks are found.

- prevBP = t->dataToBreak.length()+2; // start with a phony value for the last break pos seen.

+ prevBP = utext_nativeLength(t->textToBreak)+2; // start with a phony value for the last break pos seen.

for (bp = t->bi->last(); bp != BreakIterator::DONE; bp = t->bi->previous()) {

if (prevBP == bp) {

// Fail for lack of progress.

errln("Reverse Iteration, no progress. Break Pos=%4d File line,col=%4d,%4d",

- bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));

+ bp, t->getSrcLine(bp), t->getSrcCol(bp));

break;

}

- // Check that there were we didn't miss an expected break between the last one

+ // Check that we didn't miss an expected break between the last one

// and this one. (UVector returns zeros for index out of bounds.)

for (i=prevBP-1; i>bp; i--) {

- if (t->expectedBreaks->elementAti(i) != 0) {

- errln("Reverse Itertion, break expected, but not found. Pos=%4d File line,col= %4d,%4d",

- i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));

+ if (t->getExpectedBreak(i) != 0) {

+ errln("Reverse Iteration, break expected, but not found. Pos=%4d File line,col= %4d,%4d",

+ i, t->getSrcLine(i), t->getSrcCol(i));

}

// Check that the break we did find was expected

- if (t->expectedBreaks->elementAti(bp) == 0) {

+ if (t->getExpectedBreak(bp) == 0) {

errln("Reverse Itertion, break found, but not expected. Pos=%4d File line,col= %4d,%4d",

- bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));

+ bp, t->getSrcLine(bp), t->getSrcCol(bp));

} else {

// The break was expected.

// Check that the {nnn} tag value is correct.

- int32_t expectedTagVal = t->expectedBreaks->elementAti(bp);

+ int32_t expectedTagVal = t->getExpectedBreak(bp);

if (expectedTagVal == -1) {

expectedTagVal = 0;

}

- int line = t->srcLine->elementAti(bp);

- int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus();

+ int line = t->getSrcLine(bp);

+ int32_t rs = t->bi->getRuleStatus();

if (rs != expectedTagVal) {

errln("Incorrect status for reverse break. Pos=%4d File line,col= %4d,%4d.\n"

" Actual, Expected status = %4d, %4d",

- bp, line, t->srcCol->elementAti(bp), rs, expectedTagVal);

+ bp, line, t->getSrcCol(bp), rs, expectedTagVal);

}

@@ -950,30 +1104,30 @@ void RBBITest::executeTest(TestParams *t) {

// Verify that there were no missed breaks prior to the last one found

for (i=prevBP-1; i>=0; i--) {

- if (t->expectedBreaks->elementAti(i) != 0) {

+ if (t->getExpectedBreak(i) != 0) {

errln("Forward Itertion, break expected, but not found. Pos=%4d File line,col= %4d,%4d",

- i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));

+ i, t->getSrcLine(i), t->getSrcCol(i));

}

// Check isBoundary()

- for (i=0; i<t->expectedBreaks->size(); i++) {

- UBool boundaryExpected = (t->expectedBreaks->elementAti(i) != 0);

+ for (i=0; i < utext_nativeLength(t->textToBreak); i++) {

+ UBool boundaryExpected = (t->getExpectedBreak(i) != 0);

UBool boundaryFound = t->bi->isBoundary(i);

if (boundaryExpected != boundaryFound) {

errln("isBoundary(%d) incorrect. File line,col= %4d,%4d\n"

" Expected, Actual= %s, %s",

- i, t->srcLine->elementAti(i), t->srcCol->elementAti(i),

+ i, t->getSrcLine(i), t->getSrcCol(i),

boundaryExpected ? "true":"false", boundaryFound? "true" : "false");

}

// Check following()

- for (i=0; i<t->expectedBreaks->size(); i++) {

+ for (i=0; i < utext_nativeLength(t->textToBreak); i++) {

int32_t actualBreak = t->bi->following(i);

int32_t expectedBreak = BreakIterator::DONE;

- for (int32_t j=i+1; j < t->expectedBreaks->size(); j++) {

- if (t->expectedBreaks->elementAti(j) != 0) {

+ for (int32_t j=i+1; j <= utext_nativeLength(t->textToBreak); j++) {

+ if (t->getExpectedBreak(j) != 0) {

expectedBreak = j;

break;

}

@@ -981,17 +1135,24 @@ void RBBITest::executeTest(TestParams *t) {

if (expectedBreak != actualBreak) {

errln("following(%d) incorrect. File line,col= %4d,%4d\n"

" Expected, Actual= %d, %d",

- i, t->srcLine->elementAti(i), t->srcCol->elementAti(i), expectedBreak, actualBreak);

+ i, t->getSrcLine(i), t->getSrcCol(i), expectedBreak, actualBreak);

}

// Check preceding()

- for (i=t->expectedBreaks->size(); i>=0; i--) {

+ for (i=utext_nativeLength(t->textToBreak); i>=0; i--) {

int32_t actualBreak = t->bi->preceding(i);

int32_t expectedBreak = BreakIterator::DONE;

- for (int32_t j=i-1; j >= 0; j--) {

- if (t->expectedBreaks->elementAti(j) != 0) {

+ // For UTF-8 & UTF-16 supplementals, all code units of a character are equivalent.

+ // preceding(trailing byte) will return the index of some preceding code point,

+ // not the lead byte of the current code point, even though that has a smaller index.

+ // Therefore, start looking at the expected break data not at i-1, but at

+ // the start of code point index - 1.

+ utext_setNativeIndex(t->textToBreak, i);

+ int32_t j = utext_getNativeIndex(t->textToBreak) - 1;

+ for (; j >= 0; j--) {

+ if (t->getExpectedBreak(j) != 0) {

expectedBreak = j;

break;

}

@@ -999,7 +1160,7 @@ void RBBITest::executeTest(TestParams *t) {

if (expectedBreak != actualBreak) {

errln("preceding(%d) incorrect. File line,col= %4d,%4d\n"

" Expected, Actual= %d, %d",

- i, t->srcLine->elementAti(i), t->srcCol->elementAti(i), expectedBreak, actualBreak);

+ i, t->getSrcLine(i), t->getSrcCol(i), expectedBreak, actualBreak);

}

@@ -1011,11 +1172,7 @@ void RBBITest::TestExtended() {

Locale locale("");

UnicodeString rules;

- TestParams tp;

- tp.bi = NULL;

- tp.expectedBreaks = new UVector32(status);

- tp.srcLine = new UVector32(status);

- tp.srcCol = new UVector32(status);

+ TestParams tp(status);

RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{Nd}_]*) *>"), 0, status);

if (U_FAILURE(status)) {

@@ -1190,7 +1347,16 @@ void RBBITest::TestExtended() {

charIdx += 6;

// RUN THE TEST!

- executeTest(&tp);

+ status = U_ZERO_ERROR;

+ tp.setUTF16(status);

+ executeTest(&tp, status);

+ TEST_ASSERT_SUCCESS(status);

+ // Run again, this time with UTF-8 text wrapped in a UText.

+ status = U_ZERO_ERROR;

+ tp.setUTF8(status);

+ TEST_ASSERT_SUCCESS(status);

+ executeTest(&tp, status);

break;

}

@@ -1356,10 +1522,6 @@ void RBBITest::TestExtended() {

}

end_test:

- delete tp.bi;

- delete tp.expectedBreaks;

- delete tp.srcLine;

- delete tp.srcCol;

delete [] testFile;

#endif

}

« no previous file with comments | « source/test/intltest/rbbitst.h ('k') | source/test/intltest/regcoll.h » ('j') | no next file with comments »