| OLD | NEW |
| 1 /******************************************************************** | 1 /******************************************************************** |
| 2 * COPYRIGHT: | 2 * COPYRIGHT: |
| 3 * Copyright (c) 1999-2013, International Business Machines Corporation and | 3 * Copyright (c) 1999-2014, International Business Machines Corporation and |
| 4 * others. All Rights Reserved. | 4 * others. All Rights Reserved. |
| 5 ********************************************************************/ | 5 ********************************************************************/ |
| 6 /************************************************************************ | 6 /************************************************************************ |
| 7 * Date Name Description | 7 * Date Name Description |
| 8 * 12/15/99 Madhu Creation. | 8 * 12/15/99 Madhu Creation. |
| 9 * 01/12/2000 Madhu Updated for changed API and added new tests | 9 * 01/12/2000 Madhu Updated for changed API and added new tests |
| 10 ************************************************************************/ | 10 ************************************************************************/ |
| 11 | 11 |
| 12 #include "utypeinfo.h" // for 'typeid' to work | 12 #include "utypeinfo.h" // for 'typeid' to work |
| 13 | 13 |
| (...skipping 10 matching lines...) Expand all Loading... |
| 24 #include "unicode/schriter.h" | 24 #include "unicode/schriter.h" |
| 25 #include "unicode/uniset.h" | 25 #include "unicode/uniset.h" |
| 26 #if !UCONFIG_NO_REGULAR_EXPRESSIONS | 26 #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
| 27 #include "unicode/regex.h" | 27 #include "unicode/regex.h" |
| 28 #endif | 28 #endif |
| 29 #include "unicode/ustring.h" | 29 #include "unicode/ustring.h" |
| 30 #include "unicode/utext.h" | 30 #include "unicode/utext.h" |
| 31 #include "intltest.h" | 31 #include "intltest.h" |
| 32 #include "rbbitst.h" | 32 #include "rbbitst.h" |
| 33 #include <string.h> | 33 #include <string.h> |
| 34 #include "charstr.h" |
| 34 #include "uvector.h" | 35 #include "uvector.h" |
| 35 #include "uvectr32.h" | 36 #include "uvectr32.h" |
| 36 #include <string.h> | |
| 37 #include <stdio.h> | 37 #include <stdio.h> |
| 38 #include <stdlib.h> | 38 #include <stdlib.h> |
| 39 #include "unicode/numfmt.h" | 39 #include "unicode/numfmt.h" |
| 40 #include "unicode/uscript.h" | 40 #include "unicode/uscript.h" |
| 41 | 41 |
| 42 #define TEST_ASSERT(x) {if (!(x)) { \ | 42 #define TEST_ASSERT(x) {if (!(x)) { \ |
| 43 errln("Failure in file %s, line %d", __FILE__, __LINE__);}} | 43 errln("Failure in file %s, line %d", __FILE__, __LINE__);}} |
| 44 | 44 |
| 45 #define TEST_ASSERT_SUCCESS(errcode) { if (U_FAILURE(errcode)) { \ | 45 #define TEST_ASSERT_SUCCESS(errcode) { if (U_FAILURE(errcode)) { \ |
| 46 errcheckln(errcode, "Failure in file %s, line %d, status = \"%s\"", __FILE__
, __LINE__, u_errorName(errcode));}} | 46 errcheckln(errcode, "Failure in file %s, line %d, status = \"%s\"", __FILE__
, __LINE__, u_errorName(errcode));}} |
| (...skipping 300 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 347 errln("FAIL: break at %d, expected tag %d, got tag %d\n", pos,
brkStatus[i], tag); | 347 errln("FAIL: break at %d, expected tag %d, got tag %d\n", pos,
brkStatus[i], tag); |
| 348 break; | 348 break; |
| 349 } | 349 } |
| 350 i++; | 350 i++; |
| 351 } | 351 } |
| 352 } | 352 } |
| 353 delete bi; | 353 delete bi; |
| 354 } | 354 } |
| 355 | 355 |
| 356 | 356 |
| 357 static void printStringBreaks(UnicodeString ustr, int expected[], | 357 static void printStringBreaks(UText *tstr, int expected[], int expectedCount) { |
| 358 int expectedcount) | |
| 359 { | |
| 360 UErrorCode status = U_ZERO_ERROR; | 358 UErrorCode status = U_ZERO_ERROR; |
| 361 char name[100]; | 359 char name[100]; |
| 362 printf("code alpha extend alphanum type word sent line name\n"); | 360 printf("code alpha extend alphanum type word sent line name\n"); |
| 363 int j; | 361 int nextExpectedIndex = 0; |
| 364 for (j = 0; j < ustr.length(); j ++) { | 362 utext_setNativeIndex(tstr, 0); |
| 365 if (expectedcount > 0) { | 363 for (int j = 0; j < utext_nativeLength(tstr); j=utext_getNativeIndex(tstr))
{ |
| 366 int k; | 364 if (nextExpectedIndex < expectedCount && j >= expected[nextExpectedIndex
] ) { |
| 367 for (k = 0; k < expectedcount; k ++) { | 365 printf("------------------------------------------------ %d\n", j); |
| 368 if (j == expected[k]) { | 366 ++nextExpectedIndex; |
| 369 printf("------------------------------------------------ %d\
n", | |
| 370 j); | |
| 371 } | |
| 372 } | |
| 373 } | 367 } |
| 374 UChar32 c = ustr.char32At(j); | 368 |
| 375 if (c > 0xffff) { | 369 UChar32 c = utext_next32(tstr); |
| 376 j ++; | |
| 377 } | |
| 378 u_charName(c, U_UNICODE_CHAR_NAME, name, 100, &status); | 370 u_charName(c, U_UNICODE_CHAR_NAME, name, 100, &status); |
| 379 printf("%7x %5d %6d %8d %4s %4s %4s %4s %s\n", (int)c, | 371 printf("%7x %5d %6d %8d %4s %4s %4s %4s %s\n", (int)c, |
| 380 u_isUAlphabetic(c), | 372 u_isUAlphabetic(c), |
| 381 u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND), | 373 u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND), |
| 382 u_isalnum(c), | 374 u_isalnum(c), |
| 383 u_getPropertyValueName(UCHAR_GENERAL_CATEGORY, | 375 u_getPropertyValueName(UCHAR_GENERAL_CATEGORY, |
| 384 u_charType(c), | 376 u_charType(c), |
| 385 U_SHORT_PROPERTY_NAME), | 377 U_SHORT_PROPERTY_NAME), |
| 386 u_getPropertyValueName(UCHAR_WORD_BREAK, | 378 u_getPropertyValueName(UCHAR_WORD_BREAK, |
| 387 u_getIntPropertyValue(c, | 379 u_getIntPropertyValue(c, |
| 388 UCHAR_WORD_BREAK), | 380 UCHAR_WORD_BREAK), |
| 389 U_SHORT_PROPERTY_NAME), | 381 U_SHORT_PROPERTY_NAME), |
| 390 u_getPropertyValueName(UCHAR_SENTENCE_BREAK, | 382 u_getPropertyValueName(UCHAR_SENTENCE_BREAK, |
| 391 u_getIntPropertyValue(c, | 383 u_getIntPropertyValue(c, |
| 392 UCHAR_SENTENCE_BREAK), | 384 UCHAR_SENTENCE_BREAK), |
| 393 U_SHORT_PROPERTY_NAME), | 385 U_SHORT_PROPERTY_NAME), |
| 394 u_getPropertyValueName(UCHAR_LINE_BREAK, | 386 u_getPropertyValueName(UCHAR_LINE_BREAK, |
| 395 u_getIntPropertyValue(c, | 387 u_getIntPropertyValue(c, |
| 396 UCHAR_LINE_BREAK), | 388 UCHAR_LINE_BREAK), |
| 397 U_SHORT_PROPERTY_NAME), | 389 U_SHORT_PROPERTY_NAME), |
| 398 name); | 390 name); |
| 399 } | 391 } |
| 400 } | 392 } |
| 401 | 393 |
| 402 | 394 |
| 395 static void printStringBreaks(const UnicodeString &ustr, int expected[], int exp
ectedCount) { |
| 396 UErrorCode status = U_ZERO_ERROR; |
| 397 UText *tstr = NULL; |
| 398 tstr = utext_openConstUnicodeString(NULL, &ustr, &status); |
| 399 if (U_FAILURE(status)) { |
| 400 printf("printStringBreaks, utext_openConstUnicodeString() returns %s\n",
u_errorName(status)); |
| 401 return; |
| 402 } |
| 403 printStringBreaks(tstr, expected, expectedCount); |
| 404 utext_close(tstr); |
| 405 } |
| 406 |
| 407 |
| 403 void RBBITest::TestBug3818() { | 408 void RBBITest::TestBug3818() { |
| 404 UErrorCode status = U_ZERO_ERROR; | 409 UErrorCode status = U_ZERO_ERROR; |
| 405 | 410 |
| 406 // Four Thai words... | 411 // Four Thai words... |
| 407 static const UChar thaiWordData[] = { 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0
x0E2B,0x0E0D,0x0E48, | 412 static const UChar thaiWordData[] = { 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0
x0E2B,0x0E0D,0x0E48, |
| 408 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0
x0E2B,0x0E0D,0x0E48, 0 }; | 413 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0
x0E2B,0x0E0D,0x0E48, 0 }; |
| 409 UnicodeString thaiStr(thaiWordData); | 414 UnicodeString thaiStr(thaiWordData); |
| 410 | 415 |
| 411 BreakIterator* bi = BreakIterator::createWordInstance(Locale("th"), status); | 416 BreakIterator* bi = BreakIterator::createWordInstance(Locale("th"), status); |
| 412 if (U_FAILURE(status) || bi == NULL) { | 417 if (U_FAILURE(status) || bi == NULL) { |
| (...skipping 410 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 823 | 828 |
| 824 | 829 |
| 825 | 830 |
| 826 //------------------------------------------------------------------------------ | 831 //------------------------------------------------------------------------------ |
| 827 // | 832 // |
| 828 // RBBITest::Extended Run RBBI Tests from an external test data file | 833 // RBBITest::Extended Run RBBI Tests from an external test data file |
| 829 // | 834 // |
| 830 //------------------------------------------------------------------------------ | 835 //------------------------------------------------------------------------------ |
| 831 | 836 |
| 832 struct TestParams { | 837 struct TestParams { |
| 833 BreakIterator *bi; | 838 BreakIterator *bi; // Break iterator is set while parsin
g test source. |
| 834 UnicodeString dataToBreak; | 839 // Changed out whenever test data c
hanges break type. |
| 835 UVector32 *expectedBreaks; | 840 |
| 836 UVector32 *srcLine; | 841 UnicodeString dataToBreak; // Data that is built up while parsin
g the test. |
| 842 UVector32 *expectedBreaks; // Expected break positions, matches
dataToBreak UnicodeString. |
| 843 UVector32 *srcLine; // Positions in source file, indexed
same as dataToBreak. |
| 837 UVector32 *srcCol; | 844 UVector32 *srcCol; |
| 845 |
| 846 UText *textToBreak; // UText, could be UTF8 or UTF16. |
| 847 UVector32 *textMap; // Map from UTF-16 dataToBreak offset
s to UText offsets. |
| 848 CharString utf8String; // UTF-8 form of text to break. |
| 849 |
| 850 TestParams(UErrorCode &status) : dataToBreak() { |
| 851 bi = NULL; |
| 852 expectedBreaks = new UVector32(status); |
| 853 srcLine = new UVector32(status); |
| 854 srcCol = new UVector32(status); |
| 855 textToBreak = NULL; |
| 856 textMap = new UVector32(status); |
| 857 } |
| 858 |
| 859 ~TestParams() { |
| 860 delete bi; |
| 861 delete expectedBreaks; |
| 862 delete srcLine; |
| 863 delete srcCol; |
| 864 utext_close(textToBreak); |
| 865 delete textMap; |
| 866 } |
| 867 |
| 868 int32_t getSrcLine(int32_t bp); |
| 869 int32_t getExpectedBreak(int32_t bp); |
| 870 int32_t getSrcCol(int32_t bp); |
| 871 |
| 872 void setUTF16(UErrorCode &status); |
| 873 void setUTF8(UErrorCode &status); |
| 838 }; | 874 }; |
| 839 | 875 |
| 840 void RBBITest::executeTest(TestParams *t) { | 876 // Append a UnicodeString to a CharString with UTF-8 encoding. |
| 877 // Substitute any invalid chars. |
| 878 // Note: this is used with test data that includes a few unpaired surrogates i
n the UTF-16 that will be substituted. |
| 879 static void CharStringAppend(CharString &dest, const UnicodeString &src, UErrorC
ode &status) { |
| 880 if (U_FAILURE(status)) { |
| 881 return; |
| 882 } |
| 883 int32_t utf8Length; |
| 884 u_strToUTF8WithSub(NULL, 0, &utf8Length, // Output Buffer, NULL f
or preflight. |
| 885 src.getBuffer(), src.length(), // UTF-16 data |
| 886 0xfffd, NULL, // Substitution char, nu
mber of subs. |
| 887 &status); |
| 888 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) { |
| 889 return; |
| 890 } |
| 891 status = U_ZERO_ERROR; |
| 892 int32_t capacity; |
| 893 char *buffer = dest.getAppendBuffer(utf8Length, utf8Length, capacity, status
); |
| 894 u_strToUTF8WithSub(buffer, utf8Length, NULL, |
| 895 src.getBuffer(), src.length(), |
| 896 0xfffd, NULL, &status); |
| 897 dest.append(buffer, utf8Length, status); |
| 898 } |
| 899 |
| 900 |
| 901 void TestParams::setUTF16(UErrorCode &status) { |
| 902 textToBreak = utext_openUnicodeString(textToBreak, &dataToBreak, &status); |
| 903 textMap->removeAllElements(); |
| 904 for (int32_t i=0; i<dataToBreak.length(); i++) { |
| 905 if (i == dataToBreak.getChar32Start(i)) { |
| 906 textMap->addElement(i, status); |
| 907 } else { |
| 908 textMap->addElement(-1, status); |
| 909 } |
| 910 } |
| 911 textMap->addElement(dataToBreak.length(), status); |
| 912 U_ASSERT(dataToBreak.length() + 1 == textMap->size()); |
| 913 } |
| 914 |
| 915 |
| 916 void TestParams::setUTF8(UErrorCode &status) { |
| 917 if (U_FAILURE(status)) { |
| 918 return; |
| 919 } |
| 920 utf8String.clear(); |
| 921 CharStringAppend(utf8String, dataToBreak, status); |
| 922 textToBreak = utext_openUTF8(textToBreak, utf8String.data(), utf8String.leng
th(), &status); |
| 923 if (U_FAILURE(status)) { |
| 924 return; |
| 925 } |
| 926 |
| 927 textMap->removeAllElements(); |
| 928 int32_t utf16Index = 0; |
| 929 for (;;) { |
| 930 textMap->addElement(utf16Index, status); |
| 931 UChar32 c32 = utext_current32(textToBreak); |
| 932 if (c32 < 0) { |
| 933 break; |
| 934 } |
| 935 utf16Index += U16_LENGTH(c32); |
| 936 utext_next32(textToBreak); |
| 937 while (textMap->size() < utext_getNativeIndex(textToBreak)) { |
| 938 textMap->addElement(-1, status); |
| 939 } |
| 940 } |
| 941 U_ASSERT(utext_nativeLength(textToBreak) + 1 == textMap->size()); |
| 942 } |
| 943 |
| 944 |
| 945 int32_t TestParams::getSrcLine(int bp) { |
| 946 if (bp >= textMap->size()) { |
| 947 bp = textMap->size() - 1; |
| 948 } |
| 949 int32_t i = 0; |
| 950 for(; bp >= 0 ; --bp) { |
| 951 // Move to a character boundary if we are not on one already. |
| 952 i = textMap->elementAti(bp); |
| 953 if (i >= 0) { |
| 954 break; |
| 955 } |
| 956 } |
| 957 return srcLine->elementAti(i); |
| 958 } |
| 959 |
| 960 |
| 961 int32_t TestParams::getExpectedBreak(int bp) { |
| 962 if (bp >= textMap->size()) { |
| 963 return 0; |
| 964 } |
| 965 int32_t i = textMap->elementAti(bp); |
| 966 int32_t retVal = 0; |
| 967 if (i >= 0) { |
| 968 retVal = expectedBreaks->elementAti(i); |
| 969 } |
| 970 return retVal; |
| 971 } |
| 972 |
| 973 |
| 974 int32_t TestParams::getSrcCol(int bp) { |
| 975 if (bp >= textMap->size()) { |
| 976 bp = textMap->size() - 1; |
| 977 } |
| 978 int32_t i = 0; |
| 979 for(; bp >= 0; --bp) { |
| 980 // Move bp to a character boundary if we are not on one already. |
| 981 i = textMap->elementAti(bp); |
| 982 if (i >= 0) { |
| 983 break; |
| 984 } |
| 985 } |
| 986 return srcCol->elementAti(i); |
| 987 } |
| 988 |
| 989 |
| 990 void RBBITest::executeTest(TestParams *t, UErrorCode &status) { |
| 841 int32_t bp; | 991 int32_t bp; |
| 842 int32_t prevBP; | 992 int32_t prevBP; |
| 843 int32_t i; | 993 int32_t i; |
| 844 | 994 |
| 995 TEST_ASSERT_SUCCESS(status); |
| 996 if (U_FAILURE(status)) { |
| 997 return; |
| 998 } |
| 999 |
| 845 if (t->bi == NULL) { | 1000 if (t->bi == NULL) { |
| 846 return; | 1001 return; |
| 847 } | 1002 } |
| 848 | 1003 |
| 849 t->bi->setText(t->dataToBreak); | 1004 t->bi->setText(t->textToBreak, status); |
| 850 // | 1005 // |
| 851 // Run the iterator forward | 1006 // Run the iterator forward |
| 852 // | 1007 // |
| 853 prevBP = -1; | 1008 prevBP = -1; |
| 854 for (bp = t->bi->first(); bp != BreakIterator::DONE; bp = t->bi->next()) { | 1009 for (bp = t->bi->first(); bp != BreakIterator::DONE; bp = t->bi->next()) { |
| 855 if (prevBP == bp) { | 1010 if (prevBP == bp) { |
| 856 // Fail for lack of forward progress. | 1011 // Fail for lack of forward progress. |
| 857 errln("Forward Iteration, no forward progress. Break Pos=%4d File
line,col=%4d,%4d", | 1012 errln("Forward Iteration, no forward progress. Break Pos=%4d File
line,col=%4d,%4d", |
| 858 bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); | 1013 bp, t->getSrcLine(bp), t->getSrcCol(bp)); |
| 859 break; | 1014 break; |
| 860 } | 1015 } |
| 861 | 1016 |
| 862 // Check that there were we didn't miss an expected break between the la
st one | 1017 // Check that there we didn't miss an expected break between the last on
e |
| 863 // and this one. | 1018 // and this one. |
| 864 for (i=prevBP+1; i<bp; i++) { | 1019 for (i=prevBP+1; i<bp; i++) { |
| 865 if (t->expectedBreaks->elementAti(i) != 0) { | 1020 if (t->getExpectedBreak(i) != 0) { |
| 866 int expected[] = {0, i}; | 1021 int expected[] = {0, i}; |
| 867 printStringBreaks(t->dataToBreak, expected, 2); | 1022 printStringBreaks(t->dataToBreak, expected, 2); |
| 868 errln("Forward Iteration, break expected, but not found. Pos=%4
d File line,col= %4d,%4d", | 1023 errln("Forward Iteration, break expected, but not found. Pos=%4
d File line,col= %4d,%4d", |
| 869 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); | 1024 i, t->getSrcLine(i), t->getSrcCol(i)); |
| 870 } | 1025 } |
| 871 } | 1026 } |
| 872 | 1027 |
| 873 // Check that the break we did find was expected | 1028 // Check that the break we did find was expected |
| 874 if (t->expectedBreaks->elementAti(bp) == 0) { | 1029 if (t->getExpectedBreak(bp) == 0) { |
| 875 int expected[] = {0, bp}; | 1030 int expected[] = {0, bp}; |
| 876 printStringBreaks(t->dataToBreak, expected, 2); | 1031 printStringBreaks(t->textToBreak, expected, 2); |
| 877 errln("Forward Iteration, break found, but not expected. Pos=%4d F
ile line,col= %4d,%4d", | 1032 errln("Forward Iteration, break found, but not expected. Pos=%4d F
ile line,col= %4d,%4d", |
| 878 bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); | 1033 bp, t->getSrcLine(bp), t->getSrcCol(bp)); |
| 879 } else { | 1034 } else { |
| 880 // The break was expected. | 1035 // The break was expected. |
| 881 // Check that the {nnn} tag value is correct. | 1036 // Check that the {nnn} tag value is correct. |
| 882 int32_t expectedTagVal = t->expectedBreaks->elementAti(bp); | 1037 int32_t expectedTagVal = t->getExpectedBreak(bp); |
| 883 if (expectedTagVal == -1) { | 1038 if (expectedTagVal == -1) { |
| 884 expectedTagVal = 0; | 1039 expectedTagVal = 0; |
| 885 } | 1040 } |
| 886 int32_t line = t->srcLine->elementAti(bp); | 1041 int32_t line = t->getSrcLine(bp); |
| 887 int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus(); | 1042 int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus(); |
| 888 if (rs != expectedTagVal) { | 1043 if (rs != expectedTagVal) { |
| 889 errln("Incorrect status for forward break. Pos=%4d File line,c
ol= %4d,%4d.\n" | 1044 errln("Incorrect status for forward break. Pos=%4d File line,c
ol= %4d,%4d.\n" |
| 890 " Actual, Expected status = %4d, %4d", | 1045 " Actual, Expected status = %4d, %4d", |
| 891 bp, line, t->srcCol->elementAti(bp), rs, expectedTagVal); | 1046 bp, line, t->getSrcCol(bp), rs, expectedTagVal); |
| 892 } | 1047 } |
| 893 } | 1048 } |
| 894 | 1049 |
| 895 | |
| 896 prevBP = bp; | 1050 prevBP = bp; |
| 897 } | 1051 } |
| 898 | 1052 |
| 899 // Verify that there were no missed expected breaks after the last one found | 1053 // Verify that there were no missed expected breaks after the last one found |
| 900 for (i=prevBP+1; i<t->expectedBreaks->size(); i++) { | 1054 for (i=prevBP+1; i<utext_nativeLength(t->textToBreak); i++) { |
| 901 if (t->expectedBreaks->elementAti(i) != 0) { | 1055 if (t->getExpectedBreak(i) != 0) { |
| 902 errln("Forward Iteration, break expected, but not found. Pos=%4d F
ile line,col= %4d,%4d", | 1056 errln("Forward Iteration, break expected, but not found. Pos=%4d F
ile line,col= %4d,%4d", |
| 903 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); | 1057 i, t->getSrcLine(i), t->getSrcCol(i)); |
| 904 } | 1058 } |
| 905 } | 1059 } |
| 906 | 1060 |
| 907 // | 1061 // |
| 908 // Run the iterator backwards, verify that the same breaks are found. | 1062 // Run the iterator backwards, verify that the same breaks are found. |
| 909 // | 1063 // |
| 910 prevBP = t->dataToBreak.length()+2; // start with a phony value for the las
t break pos seen. | 1064 prevBP = utext_nativeLength(t->textToBreak)+2; // start with a phony value
for the last break pos seen. |
| 911 for (bp = t->bi->last(); bp != BreakIterator::DONE; bp = t->bi->previous())
{ | 1065 for (bp = t->bi->last(); bp != BreakIterator::DONE; bp = t->bi->previous())
{ |
| 912 if (prevBP == bp) { | 1066 if (prevBP == bp) { |
| 913 // Fail for lack of progress. | 1067 // Fail for lack of progress. |
| 914 errln("Reverse Iteration, no progress. Break Pos=%4d File line,col
=%4d,%4d", | 1068 errln("Reverse Iteration, no progress. Break Pos=%4d File line,col
=%4d,%4d", |
| 915 bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); | 1069 bp, t->getSrcLine(bp), t->getSrcCol(bp)); |
| 916 break; | 1070 break; |
| 917 } | 1071 } |
| 918 | 1072 |
| 919 // Check that there were we didn't miss an expected break between the la
st one | 1073 // Check that we didn't miss an expected break between the last one |
| 920 // and this one. (UVector returns zeros for index out of bounds.) | 1074 // and this one. (UVector returns zeros for index out of bounds.) |
| 921 for (i=prevBP-1; i>bp; i--) { | 1075 for (i=prevBP-1; i>bp; i--) { |
| 922 if (t->expectedBreaks->elementAti(i) != 0) { | 1076 if (t->getExpectedBreak(i) != 0) { |
| 923 errln("Reverse Itertion, break expected, but not found. Pos=%4d
File line,col= %4d,%4d", | 1077 errln("Reverse Iteration, break expected, but not found. Pos=%4
d File line,col= %4d,%4d", |
| 924 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); | 1078 i, t->getSrcLine(i), t->getSrcCol(i)); |
| 925 } | 1079 } |
| 926 } | 1080 } |
| 927 | 1081 |
| 928 // Check that the break we did find was expected | 1082 // Check that the break we did find was expected |
| 929 if (t->expectedBreaks->elementAti(bp) == 0) { | 1083 if (t->getExpectedBreak(bp) == 0) { |
| 930 errln("Reverse Itertion, break found, but not expected. Pos=%4d Fi
le line,col= %4d,%4d", | 1084 errln("Reverse Itertion, break found, but not expected. Pos=%4d Fi
le line,col= %4d,%4d", |
| 931 bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); | 1085 bp, t->getSrcLine(bp), t->getSrcCol(bp)); |
| 932 } else { | 1086 } else { |
| 933 // The break was expected. | 1087 // The break was expected. |
| 934 // Check that the {nnn} tag value is correct. | 1088 // Check that the {nnn} tag value is correct. |
| 935 int32_t expectedTagVal = t->expectedBreaks->elementAti(bp); | 1089 int32_t expectedTagVal = t->getExpectedBreak(bp); |
| 936 if (expectedTagVal == -1) { | 1090 if (expectedTagVal == -1) { |
| 937 expectedTagVal = 0; | 1091 expectedTagVal = 0; |
| 938 } | 1092 } |
| 939 int line = t->srcLine->elementAti(bp); | 1093 int line = t->getSrcLine(bp); |
| 940 int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus(); | 1094 int32_t rs = t->bi->getRuleStatus(); |
| 941 if (rs != expectedTagVal) { | 1095 if (rs != expectedTagVal) { |
| 942 errln("Incorrect status for reverse break. Pos=%4d File line,c
ol= %4d,%4d.\n" | 1096 errln("Incorrect status for reverse break. Pos=%4d File line,c
ol= %4d,%4d.\n" |
| 943 " Actual, Expected status = %4d, %4d", | 1097 " Actual, Expected status = %4d, %4d", |
| 944 bp, line, t->srcCol->elementAti(bp), rs, expectedTagVal); | 1098 bp, line, t->getSrcCol(bp), rs, expectedTagVal); |
| 945 } | 1099 } |
| 946 } | 1100 } |
| 947 | 1101 |
| 948 prevBP = bp; | 1102 prevBP = bp; |
| 949 } | 1103 } |
| 950 | 1104 |
| 951 // Verify that there were no missed breaks prior to the last one found | 1105 // Verify that there were no missed breaks prior to the last one found |
| 952 for (i=prevBP-1; i>=0; i--) { | 1106 for (i=prevBP-1; i>=0; i--) { |
| 953 if (t->expectedBreaks->elementAti(i) != 0) { | 1107 if (t->getExpectedBreak(i) != 0) { |
| 954 errln("Forward Itertion, break expected, but not found. Pos=%4d Fi
le line,col= %4d,%4d", | 1108 errln("Forward Itertion, break expected, but not found. Pos=%4d Fi
le line,col= %4d,%4d", |
| 955 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); | 1109 i, t->getSrcLine(i), t->getSrcCol(i)); |
| 956 } | 1110 } |
| 957 } | 1111 } |
| 958 | 1112 |
| 959 // Check isBoundary() | 1113 // Check isBoundary() |
| 960 for (i=0; i<t->expectedBreaks->size(); i++) { | 1114 for (i=0; i < utext_nativeLength(t->textToBreak); i++) { |
| 961 UBool boundaryExpected = (t->expectedBreaks->elementAti(i) != 0); | 1115 UBool boundaryExpected = (t->getExpectedBreak(i) != 0); |
| 962 UBool boundaryFound = t->bi->isBoundary(i); | 1116 UBool boundaryFound = t->bi->isBoundary(i); |
| 963 if (boundaryExpected != boundaryFound) { | 1117 if (boundaryExpected != boundaryFound) { |
| 964 errln("isBoundary(%d) incorrect. File line,col= %4d,%4d\n" | 1118 errln("isBoundary(%d) incorrect. File line,col= %4d,%4d\n" |
| 965 " Expected, Actual= %s, %s", | 1119 " Expected, Actual= %s, %s", |
| 966 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i), | 1120 i, t->getSrcLine(i), t->getSrcCol(i), |
| 967 boundaryExpected ? "true":"false", boundaryFound? "true" : "fa
lse"); | 1121 boundaryExpected ? "true":"false", boundaryFound? "true" : "fa
lse"); |
| 968 } | 1122 } |
| 969 } | 1123 } |
| 970 | 1124 |
| 971 // Check following() | 1125 // Check following() |
| 972 for (i=0; i<t->expectedBreaks->size(); i++) { | 1126 for (i=0; i < utext_nativeLength(t->textToBreak); i++) { |
| 973 int32_t actualBreak = t->bi->following(i); | 1127 int32_t actualBreak = t->bi->following(i); |
| 974 int32_t expectedBreak = BreakIterator::DONE; | 1128 int32_t expectedBreak = BreakIterator::DONE; |
| 975 for (int32_t j=i+1; j < t->expectedBreaks->size(); j++) { | 1129 for (int32_t j=i+1; j <= utext_nativeLength(t->textToBreak); j++) { |
| 976 if (t->expectedBreaks->elementAti(j) != 0) { | 1130 if (t->getExpectedBreak(j) != 0) { |
| 977 expectedBreak = j; | 1131 expectedBreak = j; |
| 978 break; | 1132 break; |
| 979 } | 1133 } |
| 980 } | 1134 } |
| 981 if (expectedBreak != actualBreak) { | 1135 if (expectedBreak != actualBreak) { |
| 982 errln("following(%d) incorrect. File line,col= %4d,%4d\n" | 1136 errln("following(%d) incorrect. File line,col= %4d,%4d\n" |
| 983 " Expected, Actual= %d, %d", | 1137 " Expected, Actual= %d, %d", |
| 984 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i), expect
edBreak, actualBreak); | 1138 i, t->getSrcLine(i), t->getSrcCol(i), expectedBreak, actualBre
ak); |
| 985 } | 1139 } |
| 986 } | 1140 } |
| 987 | 1141 |
| 988 // Check preceding() | 1142 // Check preceding() |
| 989 for (i=t->expectedBreaks->size(); i>=0; i--) { | 1143 for (i=utext_nativeLength(t->textToBreak); i>=0; i--) { |
| 990 int32_t actualBreak = t->bi->preceding(i); | 1144 int32_t actualBreak = t->bi->preceding(i); |
| 991 int32_t expectedBreak = BreakIterator::DONE; | 1145 int32_t expectedBreak = BreakIterator::DONE; |
| 992 | 1146 |
| 993 for (int32_t j=i-1; j >= 0; j--) { | 1147 // For UTF-8 & UTF-16 supplementals, all code units of a character are e
quivalent. |
| 994 if (t->expectedBreaks->elementAti(j) != 0) { | 1148 // preceding(trailing byte) will return the index of some preceding code
point, |
| 1149 // not the lead byte of the current code point, even though that has a s
maller index. |
| 1150 // Therefore, start looking at the expected break data not at i-1, but a
t |
| 1151 // the start of code point index - 1. |
| 1152 utext_setNativeIndex(t->textToBreak, i); |
| 1153 int32_t j = utext_getNativeIndex(t->textToBreak) - 1; |
| 1154 for (; j >= 0; j--) { |
| 1155 if (t->getExpectedBreak(j) != 0) { |
| 995 expectedBreak = j; | 1156 expectedBreak = j; |
| 996 break; | 1157 break; |
| 997 } | 1158 } |
| 998 } | 1159 } |
| 999 if (expectedBreak != actualBreak) { | 1160 if (expectedBreak != actualBreak) { |
| 1000 errln("preceding(%d) incorrect. File line,col= %4d,%4d\n" | 1161 errln("preceding(%d) incorrect. File line,col= %4d,%4d\n" |
| 1001 " Expected, Actual= %d, %d", | 1162 " Expected, Actual= %d, %d", |
| 1002 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i), expect
edBreak, actualBreak); | 1163 i, t->getSrcLine(i), t->getSrcCol(i), expectedBreak, actualBre
ak); |
| 1003 } | 1164 } |
| 1004 } | 1165 } |
| 1005 } | 1166 } |
| 1006 | 1167 |
| 1007 | 1168 |
| 1008 void RBBITest::TestExtended() { | 1169 void RBBITest::TestExtended() { |
| 1009 #if !UCONFIG_NO_REGULAR_EXPRESSIONS | 1170 #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
| 1010 UErrorCode status = U_ZERO_ERROR; | 1171 UErrorCode status = U_ZERO_ERROR; |
| 1011 Locale locale(""); | 1172 Locale locale(""); |
| 1012 | 1173 |
| 1013 UnicodeString rules; | 1174 UnicodeString rules; |
| 1014 TestParams tp; | 1175 TestParams tp(status); |
| 1015 tp.bi = NULL; | |
| 1016 tp.expectedBreaks = new UVector32(status); | |
| 1017 tp.srcLine = new UVector32(status); | |
| 1018 tp.srcCol = new UVector32(status); | |
| 1019 | 1176 |
| 1020 RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{
Nd}_]*) *>"), 0, status); | 1177 RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{
Nd}_]*) *>"), 0, status); |
| 1021 if (U_FAILURE(status)) { | 1178 if (U_FAILURE(status)) { |
| 1022 dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LI
NE__, u_errorName(status)); | 1179 dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LI
NE__, u_errorName(status)); |
| 1023 } | 1180 } |
| 1024 | 1181 |
| 1025 | 1182 |
| 1026 // | 1183 // |
| 1027 // Open and read the test data file. | 1184 // Open and read the test data file. |
| 1028 // | 1185 // |
| (...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1183 // Add final entry to mappings from break location to source fil
e position. | 1340 // Add final entry to mappings from break location to source fil
e position. |
| 1184 // Need one extra because last break position returned is after
the | 1341 // Need one extra because last break position returned is after
the |
| 1185 // last char in the data, not at the last char. | 1342 // last char in the data, not at the last char. |
| 1186 tp.srcLine->addElement(lineNum, status); | 1343 tp.srcLine->addElement(lineNum, status); |
| 1187 tp.srcCol ->addElement(column, status); | 1344 tp.srcCol ->addElement(column, status); |
| 1188 | 1345 |
| 1189 parseState = PARSE_TAG; | 1346 parseState = PARSE_TAG; |
| 1190 charIdx += 6; | 1347 charIdx += 6; |
| 1191 | 1348 |
| 1192 // RUN THE TEST! | 1349 // RUN THE TEST! |
| 1193 executeTest(&tp); | 1350 status = U_ZERO_ERROR; |
| 1351 tp.setUTF16(status); |
| 1352 executeTest(&tp, status); |
| 1353 TEST_ASSERT_SUCCESS(status); |
| 1354 |
| 1355 // Run again, this time with UTF-8 text wrapped in a UText. |
| 1356 status = U_ZERO_ERROR; |
| 1357 tp.setUTF8(status); |
| 1358 TEST_ASSERT_SUCCESS(status); |
| 1359 executeTest(&tp, status); |
| 1194 break; | 1360 break; |
| 1195 } | 1361 } |
| 1196 | 1362 |
| 1197 if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{"))
== 0) { | 1363 if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{"))
== 0) { |
| 1198 // Named character, e.g. \N{COMBINING GRAVE ACCENT} | 1364 // Named character, e.g. \N{COMBINING GRAVE ACCENT} |
| 1199 // Get the code point from the name and insert it into the test
data. | 1365 // Get the code point from the name and insert it into the test
data. |
| 1200 // (Damn, no API takes names in Unicode !!! | 1366 // (Damn, no API takes names in Unicode !!! |
| 1201 // we've got to take it back to char *) | 1367 // we've got to take it back to char *) |
| 1202 int32_t nameEndIdx = testString.indexOf((UChar)0x7d/*'}'*/, char
Idx); | 1368 int32_t nameEndIdx = testString.indexOf((UChar)0x7d/*'}'*/, char
Idx); |
| 1203 int32_t nameLength = nameEndIdx - (charIdx+2); | 1369 int32_t nameLength = nameEndIdx - (charIdx+2); |
| (...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1349 if (U_FAILURE(status)) { | 1515 if (U_FAILURE(status)) { |
| 1350 dataerrln("ICU Error %s while parsing test file at line %d.", | 1516 dataerrln("ICU Error %s while parsing test file at line %d.", |
| 1351 u_errorName(status), lineNum); | 1517 u_errorName(status), lineNum); |
| 1352 status = U_ZERO_ERROR; | 1518 status = U_ZERO_ERROR; |
| 1353 goto end_test; // Stop the test | 1519 goto end_test; // Stop the test |
| 1354 } | 1520 } |
| 1355 | 1521 |
| 1356 } | 1522 } |
| 1357 | 1523 |
| 1358 end_test: | 1524 end_test: |
| 1359 delete tp.bi; | |
| 1360 delete tp.expectedBreaks; | |
| 1361 delete tp.srcLine; | |
| 1362 delete tp.srcCol; | |
| 1363 delete [] testFile; | 1525 delete [] testFile; |
| 1364 #endif | 1526 #endif |
| 1365 } | 1527 } |
| 1366 | 1528 |
| 1367 | 1529 |
| 1368 //------------------------------------------------------------------------------
- | 1530 //------------------------------------------------------------------------------
- |
| 1369 // | 1531 // |
| 1370 // TestDictRules create a break iterator from source rules that includes a | 1532 // TestDictRules create a break iterator from source rules that includes a |
| 1371 // dictionary range. Regression for bug #7130. Source rules | 1533 // dictionary range. Regression for bug #7130. Source rules |
| 1372 // do not declare a break iterator type (word, line, sentence,
etc. | 1534 // do not declare a break iterator type (word, line, sentence,
etc. |
| (...skipping 2965 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4338 UnicodeSet prependSet(UNICODE_STRING_SIMPLE("[:GCB=Prepend:]"), errorCode); | 4500 UnicodeSet prependSet(UNICODE_STRING_SIMPLE("[:GCB=Prepend:]"), errorCode); |
| 4339 if (!prependSet.isEmpty()) { | 4501 if (!prependSet.isEmpty()) { |
| 4340 errln( | 4502 errln( |
| 4341 "[:GCB=Prepend:] is not empty any more. " | 4503 "[:GCB=Prepend:] is not empty any more. " |
| 4342 "Uncomment relevant lines in source/data/brkitr/char.txt and " | 4504 "Uncomment relevant lines in source/data/brkitr/char.txt and " |
| 4343 "change this test to the opposite condition."); | 4505 "change this test to the opposite condition."); |
| 4344 } | 4506 } |
| 4345 } | 4507 } |
| 4346 | 4508 |
| 4347 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | 4509 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
| OLD | NEW |