source/test/intltest/rbbitst.cpp - Issue 1621843002: ICU 56 update step 1

Side by Side Diff: source/test/intltest/rbbitst.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /********************************************************************	1 /********************************************************************

2 * COPYRIGHT:	2 * COPYRIGHT:

3 * Copyright (c) 1999-2014, International Business Machines Corporation and	3 * Copyright (c) 1999-2015, International Business Machines Corporation and

4 * others. All Rights Reserved.	4 * others. All Rights Reserved.

5 ********************************************************************/	5 ********************************************************************/

6 /************************************************************************	6 /************************************************************************

7 * Date Name Description	7 * Date Name Description

8 * 12/15/99 Madhu Creation.	8 * 12/15/99 Madhu Creation.

9 * 01/12/2000 Madhu Updated for changed API and added new tests	9 * 01/12/2000 Madhu Updated for changed API and added new tests

10 ************************************************************************/	10 ************************************************************************/

11	11

12 #include "utypeinfo.h" // for 'typeid' to work	12 #include "utypeinfo.h" // for 'typeid' to work

13	13

(...skipping 17 matching lines...) Expand all Loading...
31 #include "intltest.h"	31 #include "intltest.h"

32 #include "rbbitst.h"	32 #include "rbbitst.h"

33 #include <string.h>	33 #include <string.h>

34 #include "charstr.h"	34 #include "charstr.h"

35 #include "uvector.h"	35 #include "uvector.h"

36 #include "uvectr32.h"	36 #include "uvectr32.h"

37 #include <stdio.h>	37 #include <stdio.h>

38 #include <stdlib.h>	38 #include <stdlib.h>

39 #include "unicode/numfmt.h"	39 #include "unicode/numfmt.h"

40 #include "unicode/uscript.h"	40 #include "unicode/uscript.h"

	41 #include "cmemory.h"

	42

	43 #if !UCONFIG_NO_FILTERED_BREAK_ITERATION

	44 #include "unicode/filteredbrk.h"

	45 #endif // !UCONFIG_NO_FILTERED_BREAK_ITERATION

41	46

42 #define TEST_ASSERT(x) {if (!(x)) { \	47 #define TEST_ASSERT(x) {if (!(x)) { \

43 errln("Failure in file %s, line %d", __FILE__, __LINE__);}}	48 errln("Failure in file %s, line %d", __FILE__, __LINE__);}}

44	49

45 #define TEST_ASSERT_SUCCESS(errcode) { if (U_FAILURE(errcode)) { \	50 #define TEST_ASSERT_SUCCESS(errcode) { if (U_FAILURE(errcode)) { \

46 errcheckln(errcode, "Failure in file %s, line %d, status = \"%s\"", __FILE__ , __LINE__, u_errorName(errcode));}}	51 errcheckln(errcode, "Failure in file %s, line %d, status = \"%s\"", __FILE__ , __LINE__, u_errorName(errcode));}}

47	52

48	53

49 //---------------------------------------------	54 //---------------------------------------------

50 // runIndexedTest	55 // runIndexedTest

(...skipping 1116 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1167	1172

1168	1173

1169 void RBBITest::TestExtended() {	1174 void RBBITest::TestExtended() {

1170 #if !UCONFIG_NO_REGULAR_EXPRESSIONS	1175 #if !UCONFIG_NO_REGULAR_EXPRESSIONS

1171 UErrorCode status = U_ZERO_ERROR;	1176 UErrorCode status = U_ZERO_ERROR;

1172 Locale locale("");	1177 Locale locale("");

1173	1178

1174 UnicodeString rules;	1179 UnicodeString rules;

1175 TestParams tp(status);	1180 TestParams tp(status);

1176	1181

1177 RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale ([\\p{L}\\p{ Nd}_]) *>"), 0, status);	1182 RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale ([\\p{L}\\p{ Nd}_@&=-]) *>"), 0, status);

1178 if (U_FAILURE(status)) {	1183 if (U_FAILURE(status)) {

1179 dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LI NE__, u_errorName(status));	1184 dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LI NE__, u_errorName(status));

1180 }	1185 }

1181	1186

1182	1187

1183 //	1188 //

1184 // Open and read the test data file.	1189 // Open and read the test data file.

1185 //	1190 //

1186 const char *testDataDirectory = IntlTest::getSourceTestData(status);	1191 const char *testDataDirectory = IntlTest::getSourceTestData(status);

1187 char testFileName[1000];	1192 char testFileName[1000];

1188 if (testDataDirectory == NULL \|\| strlen(testDataDirectory) >= sizeof(testFil eName)) {	1193 if (testDataDirectory == NULL \|\| strlen(testDataDirectory) >= sizeof(testFil eName)) {

1189 errln("Can't open test data. Path too long.");	1194 errln("Can't open test data. Path too long.");

1190 return;	1195 return;

1191 }	1196 }

1192 strcpy(testFileName, testDataDirectory);	1197 strcpy(testFileName, testDataDirectory);

1193 strcat(testFileName, "rbbitst.txt");	1198 strcat(testFileName, "rbbitst.txt");

1194	1199

1195 int len;	1200 int len;

1196 UChar *testFile = ReadAndConvertFile(testFileName, len, "UTF-8", status);	1201 UChar *testFile = ReadAndConvertFile(testFileName, len, "UTF-8", status);

1197 if (U_FAILURE(status)) {	1202 if (U_FAILURE(status)) {

1198 return; /* something went wrong, error already output */	1203 return; /* something went wrong, error already output */

1199 }	1204 }

1200	1205

1201	1206

1202	1207 bool skipTest = false; // Skip this test?

1203	1208

1204 //	1209 //

1205 // Put the test data into a UnicodeString	1210 // Put the test data into a UnicodeString

1206 //	1211 //

1207 UnicodeString testString(FALSE, testFile, len);	1212 UnicodeString testString(FALSE, testFile, len);

1208	1213

1209 enum EParseState{	1214 enum EParseState{

1210 PARSE_COMMENT,	1215 PARSE_COMMENT,

1211 PARSE_TAG,	1216 PARSE_TAG,

1212 PARSE_DATA,	1217 PARSE_DATA,

(...skipping 47 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1260 parseState = PARSE_COMMENT;	1265 parseState = PARSE_COMMENT;

1261 savedState = PARSE_TAG;	1266 savedState = PARSE_TAG;

1262 break;	1267 break;

1263 }	1268 }

1264 if (u_isUWhiteSpace(c)) {	1269 if (u_isUWhiteSpace(c)) {

1265 break;	1270 break;

1266 }	1271 }

1267 if (testString.compare(charIdx-1, 6, "<word>") == 0) {	1272 if (testString.compare(charIdx-1, 6, "<word>") == 0) {

1268 delete tp.bi;	1273 delete tp.bi;

1269 tp.bi = BreakIterator::createWordInstance(locale, status);	1274 tp.bi = BreakIterator::createWordInstance(locale, status);

	1275 skipTest = false;

1270 charIdx += 5;	1276 charIdx += 5;

1271 break;	1277 break;

1272 }	1278 }

1273 if (testString.compare(charIdx-1, 6, "<char>") == 0) {	1279 if (testString.compare(charIdx-1, 6, "<char>") == 0) {

1274 delete tp.bi;	1280 delete tp.bi;

1275 tp.bi = BreakIterator::createCharacterInstance(locale, status);	1281 tp.bi = BreakIterator::createCharacterInstance(locale, status);

	1282 skipTest = false;

1276 charIdx += 5;	1283 charIdx += 5;

1277 break;	1284 break;

1278 }	1285 }

1279 if (testString.compare(charIdx-1, 6, "<line>") == 0) {	1286 if (testString.compare(charIdx-1, 6, "<line>") == 0) {

1280 delete tp.bi;	1287 delete tp.bi;

1281 tp.bi = BreakIterator::createLineInstance(locale, status);	1288 tp.bi = BreakIterator::createLineInstance(locale, status);

	1289 skipTest = false;

1282 charIdx += 5;	1290 charIdx += 5;

1283 break;	1291 break;

1284 }	1292 }

1285 if (testString.compare(charIdx-1, 6, "<sent>") == 0) {	1293 if (testString.compare(charIdx-1, 6, "<sent>") == 0) {

1286 delete tp.bi;	1294 delete tp.bi;

1287 tp.bi = NULL;

1288 tp.bi = BreakIterator::createSentenceInstance(locale, status);	1295 tp.bi = BreakIterator::createSentenceInstance(locale, status);

	1296 skipTest = false;

1289 charIdx += 5;	1297 charIdx += 5;

1290 break;	1298 break;

1291 }	1299 }

1292 if (testString.compare(charIdx-1, 7, "<title>") == 0) {	1300 if (testString.compare(charIdx-1, 7, "<title>") == 0) {

1293 delete tp.bi;	1301 delete tp.bi;

1294 tp.bi = BreakIterator::createTitleInstance(locale, status);	1302 tp.bi = BreakIterator::createTitleInstance(locale, status);

1295 charIdx += 6;	1303 charIdx += 6;

1296 break;	1304 break;

1297 }	1305 }

1298	1306

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1339 if (testString.compare(charIdx-1, 7, "</data>") == 0) {	1347 if (testString.compare(charIdx-1, 7, "</data>") == 0) {

1340 // Add final entry to mappings from break location to source fil e position.	1348 // Add final entry to mappings from break location to source fil e position.

1341 // Need one extra because last break position returned is after the	1349 // Need one extra because last break position returned is after the

1342 // last char in the data, not at the last char.	1350 // last char in the data, not at the last char.

1343 tp.srcLine->addElement(lineNum, status);	1351 tp.srcLine->addElement(lineNum, status);

1344 tp.srcCol ->addElement(column, status);	1352 tp.srcCol ->addElement(column, status);

1345	1353

1346 parseState = PARSE_TAG;	1354 parseState = PARSE_TAG;

1347 charIdx += 6;	1355 charIdx += 6;

1348	1356

1349 // RUN THE TEST!	1357 if (!skipTest) {

1350 status = U_ZERO_ERROR;	1358 // RUN THE TEST!

1351 tp.setUTF16(status);	1359 status = U_ZERO_ERROR;

1352 executeTest(&tp, status);	1360 tp.setUTF16(status);

1353 TEST_ASSERT_SUCCESS(status);	1361 executeTest(&tp, status);

	1362 TEST_ASSERT_SUCCESS(status);

1354	1363

1355 // Run again, this time with UTF-8 text wrapped in a UText.	1364 // Run again, this time with UTF-8 text wrapped in a UText.

1356 status = U_ZERO_ERROR;	1365 status = U_ZERO_ERROR;

1357 tp.setUTF8(status);	1366 tp.setUTF8(status);

1358 TEST_ASSERT_SUCCESS(status);	1367 TEST_ASSERT_SUCCESS(status);

1359 executeTest(&tp, status);	1368 executeTest(&tp, status);

	1369 }

1360 break;	1370 break;

1361 }	1371 }

1362	1372

1363 if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{")) == 0) {	1373 if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{")) == 0) {

1364 // Named character, e.g. \N{COMBINING GRAVE ACCENT}	1374 // Named character, e.g. \N{COMBINING GRAVE ACCENT}

1365 // Get the code point from the name and insert it into the test data.	1375 // Get the code point from the name and insert it into the test data.

1366 // (Damn, no API takes names in Unicode !!!	1376 // (Damn, no API takes names in Unicode !!!

1367 // we've got to take it back to char *)	1377 // we've got to take it back to char *)

1368 int32_t nameEndIdx = testString.indexOf((UChar)0x7d/'}'/, char Idx);	1378 int32_t nameEndIdx = testString.indexOf((UChar)0x7d/'}'/, char Idx);

1369 int32_t nameLength = nameEndIdx - (charIdx+2);	1379 int32_t nameLength = nameEndIdx - (charIdx+2);

(...skipping 347 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1717	1727

1718 bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::ge tEnglish(), status);	1728 bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::ge tEnglish(), status);

1719 TEST_ASSERT_SUCCESS(status);	1729 TEST_ASSERT_SUCCESS(status);

1720 if (U_SUCCESS(status)) {	1730 if (U_SUCCESS(status)) {

1721 runUnicodeTestData("LineBreakTest.txt", bi);	1731 runUnicodeTestData("LineBreakTest.txt", bi);

1722 }	1732 }

1723 delete bi;	1733 delete bi;

1724 }	1734 }

1725	1735

1726	1736

	1737 // Check for test cases from the Unicode test data files that are known to fail

	1738 // and should be skipped because ICU is not yet able to fully implement the spec .

	1739 // See ticket #7270.

	1740

	1741 UBool RBBITest::testCaseIsKnownIssue(const UnicodeString &testCase, const char * fileName) {

	1742 static const UChar badTestCases[][4] = { // Line Numbers from Unicode 7.0.0 file.

	1743 {(UChar)0x200B, (UChar)0x0020, (UChar)0x007D, (UChar)0x0000}, // Line 5198

	1744 {(UChar)0x200B, (UChar)0x0020, (UChar)0x0029, (UChar)0x0000}, // Line 5202

	1745 {(UChar)0x200B, (UChar)0x0020, (UChar)0x0021, (UChar)0x0000}, // Line 5214

	1746 {(UChar)0x200B, (UChar)0x0020, (UChar)0x002c, (UChar)0x0000}, // Line 5246

	1747 {(UChar)0x200B, (UChar)0x0020, (UChar)0x002f, (UChar)0x0000}, // Line 5298

	1748 {(UChar)0x200B, (UChar)0x0020, (UChar)0x2060, (UChar)0x0000} // Line 5302

	1749 };

	1750 if (strcmp(fileName, "LineBreakTest.txt") != 0) {

	1751 return FALSE;

	1752 }

	1753

	1754 for (int i=0; i<UPRV_LENGTHOF(badTestCases); i++) {

	1755 if (testCase == UnicodeString(badTestCases[i])) {

	1756 return logKnownIssue("7270");

	1757 }

	1758 }

	1759 return FALSE;

	1760 }

	1761

	1762

1727 //------------------------------------------------------------------------------ --------------	1763 //------------------------------------------------------------------------------ --------------

1728 //	1764 //

1729 // Run tests from one of the boundary test data files distributed by the Unico de Consortium	1765 // Run tests from one of the boundary test data files distributed by the Unico de Consortium

1730 //	1766 //

1731 //------------------------------------------------------------------------------ -------------	1767 //------------------------------------------------------------------------------ -------------

1732 void RBBITest::runUnicodeTestData(const char fileName, RuleBasedBreakIterator bi) {	1768 void RBBITest::runUnicodeTestData(const char fileName, RuleBasedBreakIterator bi) {

1733 #if !UCONFIG_NO_REGULAR_EXPRESSIONS	1769 #if !UCONFIG_NO_REGULAR_EXPRESSIONS

1734 // TODO(andy): Match line break behavior to Unicode 6.0 and remove this time bomb. Ticket #7270

1735 UBool isTicket7270Fixed = !logKnownIssue("7270");

1736 UBool isLineBreak = 0 == strcmp(fileName, "LineBreakTest.txt");

1737 UErrorCode status = U_ZERO_ERROR;	1770 UErrorCode status = U_ZERO_ERROR;

1738	1771

1739 //	1772 //

1740 // Open and read the test data file, put it into a UnicodeString.	1773 // Open and read the test data file, put it into a UnicodeString.

1741 //	1774 //

1742 const char *testDataDirectory = IntlTest::getSourceTestData(status);	1775 const char *testDataDirectory = IntlTest::getSourceTestData(status);

1743 char testFileName[1000];	1776 char testFileName[1000];

1744 if (testDataDirectory == NULL \|\| strlen(testDataDirectory) >= sizeof(testFil eName)) {	1777 if (testDataDirectory == NULL \|\| strlen(testDataDirectory) >= sizeof(testFil eName)) {

1745 dataerrln("Can't open test data. Path too long.");	1778 dataerrln("Can't open test data. Path too long.");

1746 return;	1779 return;

(...skipping 71 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1818 fileName, lineNumber);	1851 fileName, lineNumber);

1819 }	1852 }

1820 } else {	1853 } else {

1821 errln("Syntax Error: Hex Unicode Character value must have no mo re than 8 digits at \'%s\', line %d.\n",	1854 errln("Syntax Error: Hex Unicode Character value must have no mo re than 8 digits at \'%s\', line %d.\n",

1822 fileName, lineNumber);	1855 fileName, lineNumber);

1823 }	1856 }

1824 }	1857 }

1825 else if (tokenMatcher.start(4, status) >= 0) {	1858 else if (tokenMatcher.start(4, status) >= 0) {

1826 // Scanned to end of a line, possibly skipping over a comment in the process.	1859 // Scanned to end of a line, possibly skipping over a comment in the process.

1827 // If the line from the file contained test data, run the test now .	1860 // If the line from the file contained test data, run the test now .

1828 //	1861 if (testString.length() > 0 && !testCaseIsKnownIssue(testString, fil eName)) {

1829 if (testString.length() > 0) {

1830 // TODO(andy): Remove this time bomb code. Note: Failing line numbers may change when updating to new Unicode data.

1831 // Rule 8

1832 // ZW SP* <break>

1833 // is not yet implemented.

1834 if (!(isLineBreak && !isTicket7270Fixed && (5198 == lineNumber \|\|

1835 5202 == lineNumber \|\|

1836 5214 == lineNumber \|\|

1837 5246 == lineNumber \|\|

1838 5298 == lineNumber \|\|

1839 5302 == lineNumber ))) {

1840 checkUnicodeTestCase(fileName, lineNumber, testString, &breakPos itions, bi);	1862 checkUnicodeTestCase(fileName, lineNumber, testString, &breakPos itions, bi);

1841 }

1842 }	1863 }

1843	1864

1844 // Clear out this test case.	1865 // Clear out this test case.

1845 // The string and breakPositions vector will be refilled as the n ext	1866 // The string and breakPositions vector will be refilled as the n ext

1846 // test case is parsed.	1867 // test case is parsed.

1847 testString.remove();	1868 testString.remove();

1848 breakPositions.removeAllElements();	1869 breakPositions.removeAllElements();

1849 lineNumber++;	1870 lineNumber++;

1850 } else {	1871 } else {

1851 // Scanner catchall. Something unrecognized appeared on the line.	1872 // Scanner catchall. Something unrecognized appeared on the line.

(...skipping 884 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2736 if (p2 == prevPos) {	2757 if (p2 == prevPos) {

2737 // Still warming up the loop. (won't work with zero length strings, but we don't care)	2758 // Still warming up the loop. (won't work with zero length strings, but we don't care)

2738 continue;	2759 continue;

2739 }	2760 }

2740	2761

2741 // Rule (6). ATerm x Numeric	2762 // Rule (6). ATerm x Numeric

2742 if (fATermSet->contains(c1) && fNumericSet->contains(c2)) {	2763 if (fATermSet->contains(c1) && fNumericSet->contains(c2)) {

2743 continue;	2764 continue;

2744 }	2765 }

2745	2766

2746 // Rule (7). Upper ATerm x Uppper	2767 // Rule (7). (Upper \| Lower) ATerm x Uppper

2747 if (fUpperSet->contains(c0) && fATermSet->contains(c1) && fUpperSet->con tains(c2)) {	2768 if ((fUpperSet->contains(c0) \|\| fLowerSet->contains(c0)) &&

	2769 fATermSet->contains(c1) && fUpperSet->contains(c2)) {

2748 continue;	2770 continue;

2749 }	2771 }

2750	2772

2751 // Rule (8) ATerm Close* Sp* x (not (OLettter \| Upper \| Lower \| Sep \| STerm \| ATerm))* Lower	2773 // Rule (8) ATerm Close* Sp* x (not (OLettter \| Upper \| Lower \| Sep \| STerm \| ATerm))* Lower

2752 // Note: STerm \| ATerm are added to the negated part of the e xpression by a	2774 // Note: STerm \| ATerm are added to the negated part of the e xpression by a

2753 // note to the Unicode 5.0 documents.	2775 // note to the Unicode 5.0 documents.

2754 int p8 = p1;	2776 int p8 = p1;

2755 while (fSpSet->contains(cAt(p8))) {	2777 while (fSpSet->contains(cAt(p8))) {

2756 p8 = moveBack(p8);	2778 p8 = moveBack(p8);

2757 }	2779 }

(...skipping 594 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3352 }	3374 }

3353	3375

3354 // LB 21b	3376 // LB 21b

3355 // SY x HL	3377 // SY x HL

3356 if (fSY->contains(prevChar) && fHL->contains(thisChar)) {	3378 if (fSY->contains(prevChar) && fHL->contains(thisChar)) {

3357 continue;	3379 continue;

3358 }	3380 }

3359	3381

3360 // LB 22	3382 // LB 22

3361 if ((fAL->contains(prevChar) && fIN->contains(thisChar)) \|\|	3383 if ((fAL->contains(prevChar) && fIN->contains(thisChar)) \|\|

	3384 (fEX->contains(prevChar) && fIN->contains(thisChar)) \|\|

3362 (fHL->contains(prevChar) && fIN->contains(thisChar)) \|\|	3385 (fHL->contains(prevChar) && fIN->contains(thisChar)) \|\|

3363 (fID->contains(prevChar) && fIN->contains(thisChar)) \|\|	3386 (fID->contains(prevChar) && fIN->contains(thisChar)) \|\|

3364 (fIN->contains(prevChar) && fIN->contains(thisChar)) \|\|	3387 (fIN->contains(prevChar) && fIN->contains(thisChar)) \|\|

3365 (fNU->contains(prevChar) && fIN->contains(thisChar)) ) {	3388 (fNU->contains(prevChar) && fIN->contains(thisChar)) ) {

3366 continue;	3389 continue;

3367 }	3390 }

3368	3391

3369	3392

3370 // LB 23 ID x PO	3393 // LB 23 ID x PO

3371 // AL x NU	3394 // AL x NU

(...skipping 1128 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4500 UnicodeSet prependSet(UNICODE_STRING_SIMPLE("[:GCB=Prepend:]"), errorCode);	4523 UnicodeSet prependSet(UNICODE_STRING_SIMPLE("[:GCB=Prepend:]"), errorCode);

4501 if (!prependSet.isEmpty()) {	4524 if (!prependSet.isEmpty()) {

4502 errln(	4525 errln(

4503 "[:GCB=Prepend:] is not empty any more. "	4526 "[:GCB=Prepend:] is not empty any more. "

4504 "Uncomment relevant lines in source/data/brkitr/char.txt and "	4527 "Uncomment relevant lines in source/data/brkitr/char.txt and "

4505 "change this test to the opposite condition.");	4528 "change this test to the opposite condition.");

4506 }	4529 }

4507 }	4530 }

4508	4531

4509 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */	4532 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */

OLD	NEW

« no previous file with comments | « source/test/intltest/rbbitst.h ('k') | source/test/intltest/regcoll.cpp » ('j') | no next file with comments »