Index: source/test/intltest/rbbitst.cpp |
diff --git a/source/test/intltest/rbbitst.cpp b/source/test/intltest/rbbitst.cpp |
index 278176dc333130ba0576e54b66b26b7ba0a94374..2fb52604f9bec26ee630de56da46d5cc51b93d9d 100644 |
--- a/source/test/intltest/rbbitst.cpp |
+++ b/source/test/intltest/rbbitst.cpp |
@@ -1,6 +1,6 @@ |
/******************************************************************** |
* COPYRIGHT: |
- * Copyright (c) 1999-2014, International Business Machines Corporation and |
+ * Copyright (c) 1999-2015, International Business Machines Corporation and |
* others. All Rights Reserved. |
********************************************************************/ |
/************************************************************************ |
@@ -38,6 +38,11 @@ |
#include <stdlib.h> |
#include "unicode/numfmt.h" |
#include "unicode/uscript.h" |
+#include "cmemory.h" |
+ |
+#if !UCONFIG_NO_FILTERED_BREAK_ITERATION |
+#include "unicode/filteredbrk.h" |
+#endif // !UCONFIG_NO_FILTERED_BREAK_ITERATION |
#define TEST_ASSERT(x) {if (!(x)) { \ |
errln("Failure in file %s, line %d", __FILE__, __LINE__);}} |
@@ -1174,7 +1179,7 @@ void RBBITest::TestExtended() { |
UnicodeString rules; |
TestParams tp(status); |
- RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{Nd}_]*) *>"), 0, status); |
+ RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{Nd}_@&=-]*) *>"), 0, status); |
if (U_FAILURE(status)) { |
dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LINE__, u_errorName(status)); |
} |
@@ -1199,7 +1204,7 @@ void RBBITest::TestExtended() { |
} |
- |
+ bool skipTest = false; // Skip this test? |
// |
// Put the test data into a UnicodeString |
@@ -1267,25 +1272,28 @@ void RBBITest::TestExtended() { |
if (testString.compare(charIdx-1, 6, "<word>") == 0) { |
delete tp.bi; |
tp.bi = BreakIterator::createWordInstance(locale, status); |
+ skipTest = false; |
charIdx += 5; |
break; |
} |
if (testString.compare(charIdx-1, 6, "<char>") == 0) { |
delete tp.bi; |
tp.bi = BreakIterator::createCharacterInstance(locale, status); |
+ skipTest = false; |
charIdx += 5; |
break; |
} |
if (testString.compare(charIdx-1, 6, "<line>") == 0) { |
delete tp.bi; |
tp.bi = BreakIterator::createLineInstance(locale, status); |
+ skipTest = false; |
charIdx += 5; |
break; |
} |
if (testString.compare(charIdx-1, 6, "<sent>") == 0) { |
delete tp.bi; |
- tp.bi = NULL; |
tp.bi = BreakIterator::createSentenceInstance(locale, status); |
+ skipTest = false; |
charIdx += 5; |
break; |
} |
@@ -1346,17 +1354,19 @@ void RBBITest::TestExtended() { |
parseState = PARSE_TAG; |
charIdx += 6; |
- // RUN THE TEST! |
- status = U_ZERO_ERROR; |
- tp.setUTF16(status); |
- executeTest(&tp, status); |
- TEST_ASSERT_SUCCESS(status); |
- |
- // Run again, this time with UTF-8 text wrapped in a UText. |
- status = U_ZERO_ERROR; |
- tp.setUTF8(status); |
- TEST_ASSERT_SUCCESS(status); |
- executeTest(&tp, status); |
+ if (!skipTest) { |
+ // RUN THE TEST! |
+ status = U_ZERO_ERROR; |
+ tp.setUTF16(status); |
+ executeTest(&tp, status); |
+ TEST_ASSERT_SUCCESS(status); |
+ |
+ // Run again, this time with UTF-8 text wrapped in a UText. |
+ status = U_ZERO_ERROR; |
+ tp.setUTF8(status); |
+ TEST_ASSERT_SUCCESS(status); |
+ executeTest(&tp, status); |
+ } |
break; |
} |
@@ -1724,6 +1734,32 @@ void RBBITest::TestUnicodeFiles() { |
} |
+// Check for test cases from the Unicode test data files that are known to fail |
+// and should be skipped because ICU is not yet able to fully implement the spec. |
+// See ticket #7270. |
+ |
+UBool RBBITest::testCaseIsKnownIssue(const UnicodeString &testCase, const char *fileName) { |
+ static const UChar badTestCases[][4] = { // Line Numbers from Unicode 7.0.0 file. |
+ {(UChar)0x200B, (UChar)0x0020, (UChar)0x007D, (UChar)0x0000}, // Line 5198 |
+ {(UChar)0x200B, (UChar)0x0020, (UChar)0x0029, (UChar)0x0000}, // Line 5202 |
+ {(UChar)0x200B, (UChar)0x0020, (UChar)0x0021, (UChar)0x0000}, // Line 5214 |
+ {(UChar)0x200B, (UChar)0x0020, (UChar)0x002c, (UChar)0x0000}, // Line 5246 |
+ {(UChar)0x200B, (UChar)0x0020, (UChar)0x002f, (UChar)0x0000}, // Line 5298 |
+ {(UChar)0x200B, (UChar)0x0020, (UChar)0x2060, (UChar)0x0000} // Line 5302 |
+ }; |
+ if (strcmp(fileName, "LineBreakTest.txt") != 0) { |
+ return FALSE; |
+ } |
+ |
+ for (int i=0; i<UPRV_LENGTHOF(badTestCases); i++) { |
+ if (testCase == UnicodeString(badTestCases[i])) { |
+ return logKnownIssue("7270"); |
+ } |
+ } |
+ return FALSE; |
+} |
+ |
+ |
//-------------------------------------------------------------------------------------------- |
// |
// Run tests from one of the boundary test data files distributed by the Unicode Consortium |
@@ -1731,9 +1767,6 @@ void RBBITest::TestUnicodeFiles() { |
//------------------------------------------------------------------------------------------- |
void RBBITest::runUnicodeTestData(const char *fileName, RuleBasedBreakIterator *bi) { |
#if !UCONFIG_NO_REGULAR_EXPRESSIONS |
- // TODO(andy): Match line break behavior to Unicode 6.0 and remove this time bomb. Ticket #7270 |
- UBool isTicket7270Fixed = !logKnownIssue("7270"); |
- UBool isLineBreak = 0 == strcmp(fileName, "LineBreakTest.txt"); |
UErrorCode status = U_ZERO_ERROR; |
// |
@@ -1825,20 +1858,8 @@ void RBBITest::runUnicodeTestData(const char *fileName, RuleBasedBreakIterator * |
else if (tokenMatcher.start(4, status) >= 0) { |
// Scanned to end of a line, possibly skipping over a comment in the process. |
// If the line from the file contained test data, run the test now. |
- // |
- if (testString.length() > 0) { |
-// TODO(andy): Remove this time bomb code. Note: Failing line numbers may change when updating to new Unicode data. |
-// Rule 8 |
-// ZW SP* <break> |
-// is not yet implemented. |
-if (!(isLineBreak && !isTicket7270Fixed && (5198 == lineNumber || |
- 5202 == lineNumber || |
- 5214 == lineNumber || |
- 5246 == lineNumber || |
- 5298 == lineNumber || |
- 5302 == lineNumber ))) { |
+ if (testString.length() > 0 && !testCaseIsKnownIssue(testString, fileName)) { |
checkUnicodeTestCase(fileName, lineNumber, testString, &breakPositions, bi); |
-} |
} |
// Clear out this test case. |
@@ -2743,8 +2764,9 @@ int32_t RBBISentMonkey::next(int32_t prevPos) { |
continue; |
} |
- // Rule (7). Upper ATerm x Uppper |
- if (fUpperSet->contains(c0) && fATermSet->contains(c1) && fUpperSet->contains(c2)) { |
+ // Rule (7). (Upper | Lower) ATerm x Uppper |
+ if ((fUpperSet->contains(c0) || fLowerSet->contains(c0)) && |
+ fATermSet->contains(c1) && fUpperSet->contains(c2)) { |
continue; |
} |
@@ -3359,6 +3381,7 @@ int32_t RBBILineMonkey::next(int32_t startPos) { |
// LB 22 |
if ((fAL->contains(prevChar) && fIN->contains(thisChar)) || |
+ (fEX->contains(prevChar) && fIN->contains(thisChar)) || |
(fHL->contains(prevChar) && fIN->contains(thisChar)) || |
(fID->contains(prevChar) && fIN->contains(thisChar)) || |
(fIN->contains(prevChar) && fIN->contains(thisChar)) || |