Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(194)

Side by Side Diff: source/test/intltest/rbbitst.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/test/intltest/rbbitst.h ('k') | source/test/intltest/regcoll.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /******************************************************************** 1 /********************************************************************
2 * COPYRIGHT: 2 * COPYRIGHT:
3 * Copyright (c) 1999-2014, International Business Machines Corporation and 3 * Copyright (c) 1999-2015, International Business Machines Corporation and
4 * others. All Rights Reserved. 4 * others. All Rights Reserved.
5 ********************************************************************/ 5 ********************************************************************/
6 /************************************************************************ 6 /************************************************************************
7 * Date Name Description 7 * Date Name Description
8 * 12/15/99 Madhu Creation. 8 * 12/15/99 Madhu Creation.
9 * 01/12/2000 Madhu Updated for changed API and added new tests 9 * 01/12/2000 Madhu Updated for changed API and added new tests
10 ************************************************************************/ 10 ************************************************************************/
11 11
12 #include "utypeinfo.h" // for 'typeid' to work 12 #include "utypeinfo.h" // for 'typeid' to work
13 13
(...skipping 17 matching lines...) Expand all
31 #include "intltest.h" 31 #include "intltest.h"
32 #include "rbbitst.h" 32 #include "rbbitst.h"
33 #include <string.h> 33 #include <string.h>
34 #include "charstr.h" 34 #include "charstr.h"
35 #include "uvector.h" 35 #include "uvector.h"
36 #include "uvectr32.h" 36 #include "uvectr32.h"
37 #include <stdio.h> 37 #include <stdio.h>
38 #include <stdlib.h> 38 #include <stdlib.h>
39 #include "unicode/numfmt.h" 39 #include "unicode/numfmt.h"
40 #include "unicode/uscript.h" 40 #include "unicode/uscript.h"
41 #include "cmemory.h"
42
43 #if !UCONFIG_NO_FILTERED_BREAK_ITERATION
44 #include "unicode/filteredbrk.h"
45 #endif // !UCONFIG_NO_FILTERED_BREAK_ITERATION
41 46
42 #define TEST_ASSERT(x) {if (!(x)) { \ 47 #define TEST_ASSERT(x) {if (!(x)) { \
43 errln("Failure in file %s, line %d", __FILE__, __LINE__);}} 48 errln("Failure in file %s, line %d", __FILE__, __LINE__);}}
44 49
45 #define TEST_ASSERT_SUCCESS(errcode) { if (U_FAILURE(errcode)) { \ 50 #define TEST_ASSERT_SUCCESS(errcode) { if (U_FAILURE(errcode)) { \
46 errcheckln(errcode, "Failure in file %s, line %d, status = \"%s\"", __FILE__ , __LINE__, u_errorName(errcode));}} 51 errcheckln(errcode, "Failure in file %s, line %d, status = \"%s\"", __FILE__ , __LINE__, u_errorName(errcode));}}
47 52
48 53
49 //--------------------------------------------- 54 //---------------------------------------------
50 // runIndexedTest 55 // runIndexedTest
(...skipping 1116 matching lines...) Expand 10 before | Expand all | Expand 10 after
1167 1172
1168 1173
1169 void RBBITest::TestExtended() { 1174 void RBBITest::TestExtended() {
1170 #if !UCONFIG_NO_REGULAR_EXPRESSIONS 1175 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
1171 UErrorCode status = U_ZERO_ERROR; 1176 UErrorCode status = U_ZERO_ERROR;
1172 Locale locale(""); 1177 Locale locale("");
1173 1178
1174 UnicodeString rules; 1179 UnicodeString rules;
1175 TestParams tp(status); 1180 TestParams tp(status);
1176 1181
1177 RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{ Nd}_]*) *>"), 0, status); 1182 RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{ Nd}_@&=-]*) *>"), 0, status);
1178 if (U_FAILURE(status)) { 1183 if (U_FAILURE(status)) {
1179 dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LI NE__, u_errorName(status)); 1184 dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LI NE__, u_errorName(status));
1180 } 1185 }
1181 1186
1182 1187
1183 // 1188 //
1184 // Open and read the test data file. 1189 // Open and read the test data file.
1185 // 1190 //
1186 const char *testDataDirectory = IntlTest::getSourceTestData(status); 1191 const char *testDataDirectory = IntlTest::getSourceTestData(status);
1187 char testFileName[1000]; 1192 char testFileName[1000];
1188 if (testDataDirectory == NULL || strlen(testDataDirectory) >= sizeof(testFil eName)) { 1193 if (testDataDirectory == NULL || strlen(testDataDirectory) >= sizeof(testFil eName)) {
1189 errln("Can't open test data. Path too long."); 1194 errln("Can't open test data. Path too long.");
1190 return; 1195 return;
1191 } 1196 }
1192 strcpy(testFileName, testDataDirectory); 1197 strcpy(testFileName, testDataDirectory);
1193 strcat(testFileName, "rbbitst.txt"); 1198 strcat(testFileName, "rbbitst.txt");
1194 1199
1195 int len; 1200 int len;
1196 UChar *testFile = ReadAndConvertFile(testFileName, len, "UTF-8", status); 1201 UChar *testFile = ReadAndConvertFile(testFileName, len, "UTF-8", status);
1197 if (U_FAILURE(status)) { 1202 if (U_FAILURE(status)) {
1198 return; /* something went wrong, error already output */ 1203 return; /* something went wrong, error already output */
1199 } 1204 }
1200 1205
1201 1206
1202 1207 bool skipTest = false; // Skip this test?
1203 1208
1204 // 1209 //
1205 // Put the test data into a UnicodeString 1210 // Put the test data into a UnicodeString
1206 // 1211 //
1207 UnicodeString testString(FALSE, testFile, len); 1212 UnicodeString testString(FALSE, testFile, len);
1208 1213
1209 enum EParseState{ 1214 enum EParseState{
1210 PARSE_COMMENT, 1215 PARSE_COMMENT,
1211 PARSE_TAG, 1216 PARSE_TAG,
1212 PARSE_DATA, 1217 PARSE_DATA,
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
1260 parseState = PARSE_COMMENT; 1265 parseState = PARSE_COMMENT;
1261 savedState = PARSE_TAG; 1266 savedState = PARSE_TAG;
1262 break; 1267 break;
1263 } 1268 }
1264 if (u_isUWhiteSpace(c)) { 1269 if (u_isUWhiteSpace(c)) {
1265 break; 1270 break;
1266 } 1271 }
1267 if (testString.compare(charIdx-1, 6, "<word>") == 0) { 1272 if (testString.compare(charIdx-1, 6, "<word>") == 0) {
1268 delete tp.bi; 1273 delete tp.bi;
1269 tp.bi = BreakIterator::createWordInstance(locale, status); 1274 tp.bi = BreakIterator::createWordInstance(locale, status);
1275 skipTest = false;
1270 charIdx += 5; 1276 charIdx += 5;
1271 break; 1277 break;
1272 } 1278 }
1273 if (testString.compare(charIdx-1, 6, "<char>") == 0) { 1279 if (testString.compare(charIdx-1, 6, "<char>") == 0) {
1274 delete tp.bi; 1280 delete tp.bi;
1275 tp.bi = BreakIterator::createCharacterInstance(locale, status); 1281 tp.bi = BreakIterator::createCharacterInstance(locale, status);
1282 skipTest = false;
1276 charIdx += 5; 1283 charIdx += 5;
1277 break; 1284 break;
1278 } 1285 }
1279 if (testString.compare(charIdx-1, 6, "<line>") == 0) { 1286 if (testString.compare(charIdx-1, 6, "<line>") == 0) {
1280 delete tp.bi; 1287 delete tp.bi;
1281 tp.bi = BreakIterator::createLineInstance(locale, status); 1288 tp.bi = BreakIterator::createLineInstance(locale, status);
1289 skipTest = false;
1282 charIdx += 5; 1290 charIdx += 5;
1283 break; 1291 break;
1284 } 1292 }
1285 if (testString.compare(charIdx-1, 6, "<sent>") == 0) { 1293 if (testString.compare(charIdx-1, 6, "<sent>") == 0) {
1286 delete tp.bi; 1294 delete tp.bi;
1287 tp.bi = NULL;
1288 tp.bi = BreakIterator::createSentenceInstance(locale, status); 1295 tp.bi = BreakIterator::createSentenceInstance(locale, status);
1296 skipTest = false;
1289 charIdx += 5; 1297 charIdx += 5;
1290 break; 1298 break;
1291 } 1299 }
1292 if (testString.compare(charIdx-1, 7, "<title>") == 0) { 1300 if (testString.compare(charIdx-1, 7, "<title>") == 0) {
1293 delete tp.bi; 1301 delete tp.bi;
1294 tp.bi = BreakIterator::createTitleInstance(locale, status); 1302 tp.bi = BreakIterator::createTitleInstance(locale, status);
1295 charIdx += 6; 1303 charIdx += 6;
1296 break; 1304 break;
1297 } 1305 }
1298 1306
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
1339 if (testString.compare(charIdx-1, 7, "</data>") == 0) { 1347 if (testString.compare(charIdx-1, 7, "</data>") == 0) {
1340 // Add final entry to mappings from break location to source fil e position. 1348 // Add final entry to mappings from break location to source fil e position.
1341 // Need one extra because last break position returned is after the 1349 // Need one extra because last break position returned is after the
1342 // last char in the data, not at the last char. 1350 // last char in the data, not at the last char.
1343 tp.srcLine->addElement(lineNum, status); 1351 tp.srcLine->addElement(lineNum, status);
1344 tp.srcCol ->addElement(column, status); 1352 tp.srcCol ->addElement(column, status);
1345 1353
1346 parseState = PARSE_TAG; 1354 parseState = PARSE_TAG;
1347 charIdx += 6; 1355 charIdx += 6;
1348 1356
1349 // RUN THE TEST! 1357 if (!skipTest) {
1350 status = U_ZERO_ERROR; 1358 // RUN THE TEST!
1351 tp.setUTF16(status); 1359 status = U_ZERO_ERROR;
1352 executeTest(&tp, status); 1360 tp.setUTF16(status);
1353 TEST_ASSERT_SUCCESS(status); 1361 executeTest(&tp, status);
1362 TEST_ASSERT_SUCCESS(status);
1354 1363
1355 // Run again, this time with UTF-8 text wrapped in a UText. 1364 // Run again, this time with UTF-8 text wrapped in a UText.
1356 status = U_ZERO_ERROR; 1365 status = U_ZERO_ERROR;
1357 tp.setUTF8(status); 1366 tp.setUTF8(status);
1358 TEST_ASSERT_SUCCESS(status); 1367 TEST_ASSERT_SUCCESS(status);
1359 executeTest(&tp, status); 1368 executeTest(&tp, status);
1369 }
1360 break; 1370 break;
1361 } 1371 }
1362 1372
1363 if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{")) == 0) { 1373 if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{")) == 0) {
1364 // Named character, e.g. \N{COMBINING GRAVE ACCENT} 1374 // Named character, e.g. \N{COMBINING GRAVE ACCENT}
1365 // Get the code point from the name and insert it into the test data. 1375 // Get the code point from the name and insert it into the test data.
1366 // (Damn, no API takes names in Unicode !!! 1376 // (Damn, no API takes names in Unicode !!!
1367 // we've got to take it back to char *) 1377 // we've got to take it back to char *)
1368 int32_t nameEndIdx = testString.indexOf((UChar)0x7d/*'}'*/, char Idx); 1378 int32_t nameEndIdx = testString.indexOf((UChar)0x7d/*'}'*/, char Idx);
1369 int32_t nameLength = nameEndIdx - (charIdx+2); 1379 int32_t nameLength = nameEndIdx - (charIdx+2);
(...skipping 347 matching lines...) Expand 10 before | Expand all | Expand 10 after
1717 1727
1718 bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::ge tEnglish(), status); 1728 bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::ge tEnglish(), status);
1719 TEST_ASSERT_SUCCESS(status); 1729 TEST_ASSERT_SUCCESS(status);
1720 if (U_SUCCESS(status)) { 1730 if (U_SUCCESS(status)) {
1721 runUnicodeTestData("LineBreakTest.txt", bi); 1731 runUnicodeTestData("LineBreakTest.txt", bi);
1722 } 1732 }
1723 delete bi; 1733 delete bi;
1724 } 1734 }
1725 1735
1726 1736
1737 // Check for test cases from the Unicode test data files that are known to fail
1738 // and should be skipped because ICU is not yet able to fully implement the spec .
1739 // See ticket #7270.
1740
1741 UBool RBBITest::testCaseIsKnownIssue(const UnicodeString &testCase, const char * fileName) {
1742 static const UChar badTestCases[][4] = { // Line Numbers from Unicode 7.0.0 file.
1743 {(UChar)0x200B, (UChar)0x0020, (UChar)0x007D, (UChar)0x0000}, // Line 5198
1744 {(UChar)0x200B, (UChar)0x0020, (UChar)0x0029, (UChar)0x0000}, // Line 5202
1745 {(UChar)0x200B, (UChar)0x0020, (UChar)0x0021, (UChar)0x0000}, // Line 5214
1746 {(UChar)0x200B, (UChar)0x0020, (UChar)0x002c, (UChar)0x0000}, // Line 5246
1747 {(UChar)0x200B, (UChar)0x0020, (UChar)0x002f, (UChar)0x0000}, // Line 5298
1748 {(UChar)0x200B, (UChar)0x0020, (UChar)0x2060, (UChar)0x0000} // Line 5302
1749 };
1750 if (strcmp(fileName, "LineBreakTest.txt") != 0) {
1751 return FALSE;
1752 }
1753
1754 for (int i=0; i<UPRV_LENGTHOF(badTestCases); i++) {
1755 if (testCase == UnicodeString(badTestCases[i])) {
1756 return logKnownIssue("7270");
1757 }
1758 }
1759 return FALSE;
1760 }
1761
1762
1727 //------------------------------------------------------------------------------ -------------- 1763 //------------------------------------------------------------------------------ --------------
1728 // 1764 //
1729 // Run tests from one of the boundary test data files distributed by the Unico de Consortium 1765 // Run tests from one of the boundary test data files distributed by the Unico de Consortium
1730 // 1766 //
1731 //------------------------------------------------------------------------------ ------------- 1767 //------------------------------------------------------------------------------ -------------
1732 void RBBITest::runUnicodeTestData(const char *fileName, RuleBasedBreakIterator * bi) { 1768 void RBBITest::runUnicodeTestData(const char *fileName, RuleBasedBreakIterator * bi) {
1733 #if !UCONFIG_NO_REGULAR_EXPRESSIONS 1769 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
1734 // TODO(andy): Match line break behavior to Unicode 6.0 and remove this time bomb. Ticket #7270
1735 UBool isTicket7270Fixed = !logKnownIssue("7270");
1736 UBool isLineBreak = 0 == strcmp(fileName, "LineBreakTest.txt");
1737 UErrorCode status = U_ZERO_ERROR; 1770 UErrorCode status = U_ZERO_ERROR;
1738 1771
1739 // 1772 //
1740 // Open and read the test data file, put it into a UnicodeString. 1773 // Open and read the test data file, put it into a UnicodeString.
1741 // 1774 //
1742 const char *testDataDirectory = IntlTest::getSourceTestData(status); 1775 const char *testDataDirectory = IntlTest::getSourceTestData(status);
1743 char testFileName[1000]; 1776 char testFileName[1000];
1744 if (testDataDirectory == NULL || strlen(testDataDirectory) >= sizeof(testFil eName)) { 1777 if (testDataDirectory == NULL || strlen(testDataDirectory) >= sizeof(testFil eName)) {
1745 dataerrln("Can't open test data. Path too long."); 1778 dataerrln("Can't open test data. Path too long.");
1746 return; 1779 return;
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
1818 fileName, lineNumber); 1851 fileName, lineNumber);
1819 } 1852 }
1820 } else { 1853 } else {
1821 errln("Syntax Error: Hex Unicode Character value must have no mo re than 8 digits at \'%s\', line %d.\n", 1854 errln("Syntax Error: Hex Unicode Character value must have no mo re than 8 digits at \'%s\', line %d.\n",
1822 fileName, lineNumber); 1855 fileName, lineNumber);
1823 } 1856 }
1824 } 1857 }
1825 else if (tokenMatcher.start(4, status) >= 0) { 1858 else if (tokenMatcher.start(4, status) >= 0) {
1826 // Scanned to end of a line, possibly skipping over a comment in the process. 1859 // Scanned to end of a line, possibly skipping over a comment in the process.
1827 // If the line from the file contained test data, run the test now . 1860 // If the line from the file contained test data, run the test now .
1828 // 1861 if (testString.length() > 0 && !testCaseIsKnownIssue(testString, fil eName)) {
1829 if (testString.length() > 0) {
1830 // TODO(andy): Remove this time bomb code. Note: Failing line numbers may change when updating to new Unicode data.
1831 // Rule 8
1832 // ZW SP* <break>
1833 // is not yet implemented.
1834 if (!(isLineBreak && !isTicket7270Fixed && (5198 == lineNumber ||
1835 5202 == lineNumber ||
1836 5214 == lineNumber ||
1837 5246 == lineNumber ||
1838 5298 == lineNumber ||
1839 5302 == lineNumber ))) {
1840 checkUnicodeTestCase(fileName, lineNumber, testString, &breakPos itions, bi); 1862 checkUnicodeTestCase(fileName, lineNumber, testString, &breakPos itions, bi);
1841 }
1842 } 1863 }
1843 1864
1844 // Clear out this test case. 1865 // Clear out this test case.
1845 // The string and breakPositions vector will be refilled as the n ext 1866 // The string and breakPositions vector will be refilled as the n ext
1846 // test case is parsed. 1867 // test case is parsed.
1847 testString.remove(); 1868 testString.remove();
1848 breakPositions.removeAllElements(); 1869 breakPositions.removeAllElements();
1849 lineNumber++; 1870 lineNumber++;
1850 } else { 1871 } else {
1851 // Scanner catchall. Something unrecognized appeared on the line. 1872 // Scanner catchall. Something unrecognized appeared on the line.
(...skipping 884 matching lines...) Expand 10 before | Expand all | Expand 10 after
2736 if (p2 == prevPos) { 2757 if (p2 == prevPos) {
2737 // Still warming up the loop. (won't work with zero length strings, but we don't care) 2758 // Still warming up the loop. (won't work with zero length strings, but we don't care)
2738 continue; 2759 continue;
2739 } 2760 }
2740 2761
2741 // Rule (6). ATerm x Numeric 2762 // Rule (6). ATerm x Numeric
2742 if (fATermSet->contains(c1) && fNumericSet->contains(c2)) { 2763 if (fATermSet->contains(c1) && fNumericSet->contains(c2)) {
2743 continue; 2764 continue;
2744 } 2765 }
2745 2766
2746 // Rule (7). Upper ATerm x Uppper 2767 // Rule (7). (Upper | Lower) ATerm x Uppper
2747 if (fUpperSet->contains(c0) && fATermSet->contains(c1) && fUpperSet->con tains(c2)) { 2768 if ((fUpperSet->contains(c0) || fLowerSet->contains(c0)) &&
2769 fATermSet->contains(c1) && fUpperSet->contains(c2)) {
2748 continue; 2770 continue;
2749 } 2771 }
2750 2772
2751 // Rule (8) ATerm Close* Sp* x (not (OLettter | Upper | Lower | Sep | STerm | ATerm))* Lower 2773 // Rule (8) ATerm Close* Sp* x (not (OLettter | Upper | Lower | Sep | STerm | ATerm))* Lower
2752 // Note: STerm | ATerm are added to the negated part of the e xpression by a 2774 // Note: STerm | ATerm are added to the negated part of the e xpression by a
2753 // note to the Unicode 5.0 documents. 2775 // note to the Unicode 5.0 documents.
2754 int p8 = p1; 2776 int p8 = p1;
2755 while (fSpSet->contains(cAt(p8))) { 2777 while (fSpSet->contains(cAt(p8))) {
2756 p8 = moveBack(p8); 2778 p8 = moveBack(p8);
2757 } 2779 }
(...skipping 594 matching lines...) Expand 10 before | Expand all | Expand 10 after
3352 } 3374 }
3353 3375
3354 // LB 21b 3376 // LB 21b
3355 // SY x HL 3377 // SY x HL
3356 if (fSY->contains(prevChar) && fHL->contains(thisChar)) { 3378 if (fSY->contains(prevChar) && fHL->contains(thisChar)) {
3357 continue; 3379 continue;
3358 } 3380 }
3359 3381
3360 // LB 22 3382 // LB 22
3361 if ((fAL->contains(prevChar) && fIN->contains(thisChar)) || 3383 if ((fAL->contains(prevChar) && fIN->contains(thisChar)) ||
3384 (fEX->contains(prevChar) && fIN->contains(thisChar)) ||
3362 (fHL->contains(prevChar) && fIN->contains(thisChar)) || 3385 (fHL->contains(prevChar) && fIN->contains(thisChar)) ||
3363 (fID->contains(prevChar) && fIN->contains(thisChar)) || 3386 (fID->contains(prevChar) && fIN->contains(thisChar)) ||
3364 (fIN->contains(prevChar) && fIN->contains(thisChar)) || 3387 (fIN->contains(prevChar) && fIN->contains(thisChar)) ||
3365 (fNU->contains(prevChar) && fIN->contains(thisChar)) ) { 3388 (fNU->contains(prevChar) && fIN->contains(thisChar)) ) {
3366 continue; 3389 continue;
3367 } 3390 }
3368 3391
3369 3392
3370 // LB 23 ID x PO 3393 // LB 23 ID x PO
3371 // AL x NU 3394 // AL x NU
(...skipping 1128 matching lines...) Expand 10 before | Expand all | Expand 10 after
4500 UnicodeSet prependSet(UNICODE_STRING_SIMPLE("[:GCB=Prepend:]"), errorCode); 4523 UnicodeSet prependSet(UNICODE_STRING_SIMPLE("[:GCB=Prepend:]"), errorCode);
4501 if (!prependSet.isEmpty()) { 4524 if (!prependSet.isEmpty()) {
4502 errln( 4525 errln(
4503 "[:GCB=Prepend:] is not empty any more. " 4526 "[:GCB=Prepend:] is not empty any more. "
4504 "Uncomment relevant lines in source/data/brkitr/char.txt and " 4527 "Uncomment relevant lines in source/data/brkitr/char.txt and "
4505 "change this test to the opposite condition."); 4528 "change this test to the opposite condition.");
4506 } 4529 }
4507 } 4530 }
4508 4531
4509 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 4532 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
OLDNEW
« no previous file with comments | « source/test/intltest/rbbitst.h ('k') | source/test/intltest/regcoll.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698