OLD | NEW |
1 /******************************************************************** | 1 /******************************************************************** |
2 * COPYRIGHT: | 2 * COPYRIGHT: |
3 * Copyright (c) 1999-2013, International Business Machines Corporation and | 3 * Copyright (c) 1999-2014, International Business Machines Corporation and |
4 * others. All Rights Reserved. | 4 * others. All Rights Reserved. |
5 ********************************************************************/ | 5 ********************************************************************/ |
6 /************************************************************************ | 6 /************************************************************************ |
7 * Date Name Description | 7 * Date Name Description |
8 * 12/15/99 Madhu Creation. | 8 * 12/15/99 Madhu Creation. |
9 * 01/12/2000 Madhu Updated for changed API and added new tests | 9 * 01/12/2000 Madhu Updated for changed API and added new tests |
10 ************************************************************************/ | 10 ************************************************************************/ |
11 | 11 |
12 #include "utypeinfo.h" // for 'typeid' to work | 12 #include "utypeinfo.h" // for 'typeid' to work |
13 | 13 |
(...skipping 10 matching lines...) Expand all Loading... |
24 #include "unicode/schriter.h" | 24 #include "unicode/schriter.h" |
25 #include "unicode/uniset.h" | 25 #include "unicode/uniset.h" |
26 #if !UCONFIG_NO_REGULAR_EXPRESSIONS | 26 #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
27 #include "unicode/regex.h" | 27 #include "unicode/regex.h" |
28 #endif | 28 #endif |
29 #include "unicode/ustring.h" | 29 #include "unicode/ustring.h" |
30 #include "unicode/utext.h" | 30 #include "unicode/utext.h" |
31 #include "intltest.h" | 31 #include "intltest.h" |
32 #include "rbbitst.h" | 32 #include "rbbitst.h" |
33 #include <string.h> | 33 #include <string.h> |
| 34 #include "charstr.h" |
34 #include "uvector.h" | 35 #include "uvector.h" |
35 #include "uvectr32.h" | 36 #include "uvectr32.h" |
36 #include <string.h> | |
37 #include <stdio.h> | 37 #include <stdio.h> |
38 #include <stdlib.h> | 38 #include <stdlib.h> |
39 #include "unicode/numfmt.h" | 39 #include "unicode/numfmt.h" |
40 #include "unicode/uscript.h" | 40 #include "unicode/uscript.h" |
41 | 41 |
42 #define TEST_ASSERT(x) {if (!(x)) { \ | 42 #define TEST_ASSERT(x) {if (!(x)) { \ |
43 errln("Failure in file %s, line %d", __FILE__, __LINE__);}} | 43 errln("Failure in file %s, line %d", __FILE__, __LINE__);}} |
44 | 44 |
45 #define TEST_ASSERT_SUCCESS(errcode) { if (U_FAILURE(errcode)) { \ | 45 #define TEST_ASSERT_SUCCESS(errcode) { if (U_FAILURE(errcode)) { \ |
46 errcheckln(errcode, "Failure in file %s, line %d, status = \"%s\"", __FILE__
, __LINE__, u_errorName(errcode));}} | 46 errcheckln(errcode, "Failure in file %s, line %d, status = \"%s\"", __FILE__
, __LINE__, u_errorName(errcode));}} |
(...skipping 300 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
347 errln("FAIL: break at %d, expected tag %d, got tag %d\n", pos,
brkStatus[i], tag); | 347 errln("FAIL: break at %d, expected tag %d, got tag %d\n", pos,
brkStatus[i], tag); |
348 break; | 348 break; |
349 } | 349 } |
350 i++; | 350 i++; |
351 } | 351 } |
352 } | 352 } |
353 delete bi; | 353 delete bi; |
354 } | 354 } |
355 | 355 |
356 | 356 |
357 static void printStringBreaks(UnicodeString ustr, int expected[], | 357 static void printStringBreaks(UText *tstr, int expected[], int expectedCount) { |
358 int expectedcount) | |
359 { | |
360 UErrorCode status = U_ZERO_ERROR; | 358 UErrorCode status = U_ZERO_ERROR; |
361 char name[100]; | 359 char name[100]; |
362 printf("code alpha extend alphanum type word sent line name\n"); | 360 printf("code alpha extend alphanum type word sent line name\n"); |
363 int j; | 361 int nextExpectedIndex = 0; |
364 for (j = 0; j < ustr.length(); j ++) { | 362 utext_setNativeIndex(tstr, 0); |
365 if (expectedcount > 0) { | 363 for (int j = 0; j < utext_nativeLength(tstr); j=utext_getNativeIndex(tstr))
{ |
366 int k; | 364 if (nextExpectedIndex < expectedCount && j >= expected[nextExpectedIndex
] ) { |
367 for (k = 0; k < expectedcount; k ++) { | 365 printf("------------------------------------------------ %d\n", j); |
368 if (j == expected[k]) { | 366 ++nextExpectedIndex; |
369 printf("------------------------------------------------ %d\
n", | |
370 j); | |
371 } | |
372 } | |
373 } | 367 } |
374 UChar32 c = ustr.char32At(j); | 368 |
375 if (c > 0xffff) { | 369 UChar32 c = utext_next32(tstr); |
376 j ++; | |
377 } | |
378 u_charName(c, U_UNICODE_CHAR_NAME, name, 100, &status); | 370 u_charName(c, U_UNICODE_CHAR_NAME, name, 100, &status); |
379 printf("%7x %5d %6d %8d %4s %4s %4s %4s %s\n", (int)c, | 371 printf("%7x %5d %6d %8d %4s %4s %4s %4s %s\n", (int)c, |
380 u_isUAlphabetic(c), | 372 u_isUAlphabetic(c), |
381 u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND), | 373 u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND), |
382 u_isalnum(c), | 374 u_isalnum(c), |
383 u_getPropertyValueName(UCHAR_GENERAL_CATEGORY, | 375 u_getPropertyValueName(UCHAR_GENERAL_CATEGORY, |
384 u_charType(c), | 376 u_charType(c), |
385 U_SHORT_PROPERTY_NAME), | 377 U_SHORT_PROPERTY_NAME), |
386 u_getPropertyValueName(UCHAR_WORD_BREAK, | 378 u_getPropertyValueName(UCHAR_WORD_BREAK, |
387 u_getIntPropertyValue(c, | 379 u_getIntPropertyValue(c, |
388 UCHAR_WORD_BREAK), | 380 UCHAR_WORD_BREAK), |
389 U_SHORT_PROPERTY_NAME), | 381 U_SHORT_PROPERTY_NAME), |
390 u_getPropertyValueName(UCHAR_SENTENCE_BREAK, | 382 u_getPropertyValueName(UCHAR_SENTENCE_BREAK, |
391 u_getIntPropertyValue(c, | 383 u_getIntPropertyValue(c, |
392 UCHAR_SENTENCE_BREAK), | 384 UCHAR_SENTENCE_BREAK), |
393 U_SHORT_PROPERTY_NAME), | 385 U_SHORT_PROPERTY_NAME), |
394 u_getPropertyValueName(UCHAR_LINE_BREAK, | 386 u_getPropertyValueName(UCHAR_LINE_BREAK, |
395 u_getIntPropertyValue(c, | 387 u_getIntPropertyValue(c, |
396 UCHAR_LINE_BREAK), | 388 UCHAR_LINE_BREAK), |
397 U_SHORT_PROPERTY_NAME), | 389 U_SHORT_PROPERTY_NAME), |
398 name); | 390 name); |
399 } | 391 } |
400 } | 392 } |
401 | 393 |
402 | 394 |
| 395 static void printStringBreaks(const UnicodeString &ustr, int expected[], int exp
ectedCount) { |
| 396 UErrorCode status = U_ZERO_ERROR; |
| 397 UText *tstr = NULL; |
| 398 tstr = utext_openConstUnicodeString(NULL, &ustr, &status); |
| 399 if (U_FAILURE(status)) { |
| 400 printf("printStringBreaks, utext_openConstUnicodeString() returns %s\n",
u_errorName(status)); |
| 401 return; |
| 402 } |
| 403 printStringBreaks(tstr, expected, expectedCount); |
| 404 utext_close(tstr); |
| 405 } |
| 406 |
| 407 |
403 void RBBITest::TestBug3818() { | 408 void RBBITest::TestBug3818() { |
404 UErrorCode status = U_ZERO_ERROR; | 409 UErrorCode status = U_ZERO_ERROR; |
405 | 410 |
406 // Four Thai words... | 411 // Four Thai words... |
407 static const UChar thaiWordData[] = { 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0
x0E2B,0x0E0D,0x0E48, | 412 static const UChar thaiWordData[] = { 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0
x0E2B,0x0E0D,0x0E48, |
408 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0
x0E2B,0x0E0D,0x0E48, 0 }; | 413 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0
x0E2B,0x0E0D,0x0E48, 0 }; |
409 UnicodeString thaiStr(thaiWordData); | 414 UnicodeString thaiStr(thaiWordData); |
410 | 415 |
411 BreakIterator* bi = BreakIterator::createWordInstance(Locale("th"), status); | 416 BreakIterator* bi = BreakIterator::createWordInstance(Locale("th"), status); |
412 if (U_FAILURE(status) || bi == NULL) { | 417 if (U_FAILURE(status) || bi == NULL) { |
(...skipping 410 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
823 | 828 |
824 | 829 |
825 | 830 |
826 //------------------------------------------------------------------------------ | 831 //------------------------------------------------------------------------------ |
827 // | 832 // |
828 // RBBITest::Extended Run RBBI Tests from an external test data file | 833 // RBBITest::Extended Run RBBI Tests from an external test data file |
829 // | 834 // |
830 //------------------------------------------------------------------------------ | 835 //------------------------------------------------------------------------------ |
831 | 836 |
832 struct TestParams { | 837 struct TestParams { |
833 BreakIterator *bi; | 838 BreakIterator *bi; // Break iterator is set while parsin
g test source. |
834 UnicodeString dataToBreak; | 839 // Changed out whenever test data c
hanges break type. |
835 UVector32 *expectedBreaks; | 840 |
836 UVector32 *srcLine; | 841 UnicodeString dataToBreak; // Data that is built up while parsin
g the test. |
| 842 UVector32 *expectedBreaks; // Expected break positions, matches
dataToBreak UnicodeString. |
| 843 UVector32 *srcLine; // Positions in source file, indexed
same as dataToBreak. |
837 UVector32 *srcCol; | 844 UVector32 *srcCol; |
| 845 |
| 846 UText *textToBreak; // UText, could be UTF8 or UTF16. |
| 847 UVector32 *textMap; // Map from UTF-16 dataToBreak offset
s to UText offsets. |
| 848 CharString utf8String; // UTF-8 form of text to break. |
| 849 |
| 850 TestParams(UErrorCode &status) : dataToBreak() { |
| 851 bi = NULL; |
| 852 expectedBreaks = new UVector32(status); |
| 853 srcLine = new UVector32(status); |
| 854 srcCol = new UVector32(status); |
| 855 textToBreak = NULL; |
| 856 textMap = new UVector32(status); |
| 857 } |
| 858 |
| 859 ~TestParams() { |
| 860 delete bi; |
| 861 delete expectedBreaks; |
| 862 delete srcLine; |
| 863 delete srcCol; |
| 864 utext_close(textToBreak); |
| 865 delete textMap; |
| 866 } |
| 867 |
| 868 int32_t getSrcLine(int32_t bp); |
| 869 int32_t getExpectedBreak(int32_t bp); |
| 870 int32_t getSrcCol(int32_t bp); |
| 871 |
| 872 void setUTF16(UErrorCode &status); |
| 873 void setUTF8(UErrorCode &status); |
838 }; | 874 }; |
839 | 875 |
840 void RBBITest::executeTest(TestParams *t) { | 876 // Append a UnicodeString to a CharString with UTF-8 encoding. |
| 877 // Substitute any invalid chars. |
| 878 // Note: this is used with test data that includes a few unpaired surrogates i
n the UTF-16 that will be substituted. |
| 879 static void CharStringAppend(CharString &dest, const UnicodeString &src, UErrorC
ode &status) { |
| 880 if (U_FAILURE(status)) { |
| 881 return; |
| 882 } |
| 883 int32_t utf8Length; |
| 884 u_strToUTF8WithSub(NULL, 0, &utf8Length, // Output Buffer, NULL f
or preflight. |
| 885 src.getBuffer(), src.length(), // UTF-16 data |
| 886 0xfffd, NULL, // Substitution char, nu
mber of subs. |
| 887 &status); |
| 888 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) { |
| 889 return; |
| 890 } |
| 891 status = U_ZERO_ERROR; |
| 892 int32_t capacity; |
| 893 char *buffer = dest.getAppendBuffer(utf8Length, utf8Length, capacity, status
); |
| 894 u_strToUTF8WithSub(buffer, utf8Length, NULL, |
| 895 src.getBuffer(), src.length(), |
| 896 0xfffd, NULL, &status); |
| 897 dest.append(buffer, utf8Length, status); |
| 898 } |
| 899 |
| 900 |
| 901 void TestParams::setUTF16(UErrorCode &status) { |
| 902 textToBreak = utext_openUnicodeString(textToBreak, &dataToBreak, &status); |
| 903 textMap->removeAllElements(); |
| 904 for (int32_t i=0; i<dataToBreak.length(); i++) { |
| 905 if (i == dataToBreak.getChar32Start(i)) { |
| 906 textMap->addElement(i, status); |
| 907 } else { |
| 908 textMap->addElement(-1, status); |
| 909 } |
| 910 } |
| 911 textMap->addElement(dataToBreak.length(), status); |
| 912 U_ASSERT(dataToBreak.length() + 1 == textMap->size()); |
| 913 } |
| 914 |
| 915 |
| 916 void TestParams::setUTF8(UErrorCode &status) { |
| 917 if (U_FAILURE(status)) { |
| 918 return; |
| 919 } |
| 920 utf8String.clear(); |
| 921 CharStringAppend(utf8String, dataToBreak, status); |
| 922 textToBreak = utext_openUTF8(textToBreak, utf8String.data(), utf8String.leng
th(), &status); |
| 923 if (U_FAILURE(status)) { |
| 924 return; |
| 925 } |
| 926 |
| 927 textMap->removeAllElements(); |
| 928 int32_t utf16Index = 0; |
| 929 for (;;) { |
| 930 textMap->addElement(utf16Index, status); |
| 931 UChar32 c32 = utext_current32(textToBreak); |
| 932 if (c32 < 0) { |
| 933 break; |
| 934 } |
| 935 utf16Index += U16_LENGTH(c32); |
| 936 utext_next32(textToBreak); |
| 937 while (textMap->size() < utext_getNativeIndex(textToBreak)) { |
| 938 textMap->addElement(-1, status); |
| 939 } |
| 940 } |
| 941 U_ASSERT(utext_nativeLength(textToBreak) + 1 == textMap->size()); |
| 942 } |
| 943 |
| 944 |
| 945 int32_t TestParams::getSrcLine(int bp) { |
| 946 if (bp >= textMap->size()) { |
| 947 bp = textMap->size() - 1; |
| 948 } |
| 949 int32_t i = 0; |
| 950 for(; bp >= 0 ; --bp) { |
| 951 // Move to a character boundary if we are not on one already. |
| 952 i = textMap->elementAti(bp); |
| 953 if (i >= 0) { |
| 954 break; |
| 955 } |
| 956 } |
| 957 return srcLine->elementAti(i); |
| 958 } |
| 959 |
| 960 |
| 961 int32_t TestParams::getExpectedBreak(int bp) { |
| 962 if (bp >= textMap->size()) { |
| 963 return 0; |
| 964 } |
| 965 int32_t i = textMap->elementAti(bp); |
| 966 int32_t retVal = 0; |
| 967 if (i >= 0) { |
| 968 retVal = expectedBreaks->elementAti(i); |
| 969 } |
| 970 return retVal; |
| 971 } |
| 972 |
| 973 |
| 974 int32_t TestParams::getSrcCol(int bp) { |
| 975 if (bp >= textMap->size()) { |
| 976 bp = textMap->size() - 1; |
| 977 } |
| 978 int32_t i = 0; |
| 979 for(; bp >= 0; --bp) { |
| 980 // Move bp to a character boundary if we are not on one already. |
| 981 i = textMap->elementAti(bp); |
| 982 if (i >= 0) { |
| 983 break; |
| 984 } |
| 985 } |
| 986 return srcCol->elementAti(i); |
| 987 } |
| 988 |
| 989 |
| 990 void RBBITest::executeTest(TestParams *t, UErrorCode &status) { |
841 int32_t bp; | 991 int32_t bp; |
842 int32_t prevBP; | 992 int32_t prevBP; |
843 int32_t i; | 993 int32_t i; |
844 | 994 |
| 995 TEST_ASSERT_SUCCESS(status); |
| 996 if (U_FAILURE(status)) { |
| 997 return; |
| 998 } |
| 999 |
845 if (t->bi == NULL) { | 1000 if (t->bi == NULL) { |
846 return; | 1001 return; |
847 } | 1002 } |
848 | 1003 |
849 t->bi->setText(t->dataToBreak); | 1004 t->bi->setText(t->textToBreak, status); |
850 // | 1005 // |
851 // Run the iterator forward | 1006 // Run the iterator forward |
852 // | 1007 // |
853 prevBP = -1; | 1008 prevBP = -1; |
854 for (bp = t->bi->first(); bp != BreakIterator::DONE; bp = t->bi->next()) { | 1009 for (bp = t->bi->first(); bp != BreakIterator::DONE; bp = t->bi->next()) { |
855 if (prevBP == bp) { | 1010 if (prevBP == bp) { |
856 // Fail for lack of forward progress. | 1011 // Fail for lack of forward progress. |
857 errln("Forward Iteration, no forward progress. Break Pos=%4d File
line,col=%4d,%4d", | 1012 errln("Forward Iteration, no forward progress. Break Pos=%4d File
line,col=%4d,%4d", |
858 bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); | 1013 bp, t->getSrcLine(bp), t->getSrcCol(bp)); |
859 break; | 1014 break; |
860 } | 1015 } |
861 | 1016 |
862 // Check that there were we didn't miss an expected break between the la
st one | 1017 // Check that there we didn't miss an expected break between the last on
e |
863 // and this one. | 1018 // and this one. |
864 for (i=prevBP+1; i<bp; i++) { | 1019 for (i=prevBP+1; i<bp; i++) { |
865 if (t->expectedBreaks->elementAti(i) != 0) { | 1020 if (t->getExpectedBreak(i) != 0) { |
866 int expected[] = {0, i}; | 1021 int expected[] = {0, i}; |
867 printStringBreaks(t->dataToBreak, expected, 2); | 1022 printStringBreaks(t->dataToBreak, expected, 2); |
868 errln("Forward Iteration, break expected, but not found. Pos=%4
d File line,col= %4d,%4d", | 1023 errln("Forward Iteration, break expected, but not found. Pos=%4
d File line,col= %4d,%4d", |
869 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); | 1024 i, t->getSrcLine(i), t->getSrcCol(i)); |
870 } | 1025 } |
871 } | 1026 } |
872 | 1027 |
873 // Check that the break we did find was expected | 1028 // Check that the break we did find was expected |
874 if (t->expectedBreaks->elementAti(bp) == 0) { | 1029 if (t->getExpectedBreak(bp) == 0) { |
875 int expected[] = {0, bp}; | 1030 int expected[] = {0, bp}; |
876 printStringBreaks(t->dataToBreak, expected, 2); | 1031 printStringBreaks(t->textToBreak, expected, 2); |
877 errln("Forward Iteration, break found, but not expected. Pos=%4d F
ile line,col= %4d,%4d", | 1032 errln("Forward Iteration, break found, but not expected. Pos=%4d F
ile line,col= %4d,%4d", |
878 bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); | 1033 bp, t->getSrcLine(bp), t->getSrcCol(bp)); |
879 } else { | 1034 } else { |
880 // The break was expected. | 1035 // The break was expected. |
881 // Check that the {nnn} tag value is correct. | 1036 // Check that the {nnn} tag value is correct. |
882 int32_t expectedTagVal = t->expectedBreaks->elementAti(bp); | 1037 int32_t expectedTagVal = t->getExpectedBreak(bp); |
883 if (expectedTagVal == -1) { | 1038 if (expectedTagVal == -1) { |
884 expectedTagVal = 0; | 1039 expectedTagVal = 0; |
885 } | 1040 } |
886 int32_t line = t->srcLine->elementAti(bp); | 1041 int32_t line = t->getSrcLine(bp); |
887 int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus(); | 1042 int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus(); |
888 if (rs != expectedTagVal) { | 1043 if (rs != expectedTagVal) { |
889 errln("Incorrect status for forward break. Pos=%4d File line,c
ol= %4d,%4d.\n" | 1044 errln("Incorrect status for forward break. Pos=%4d File line,c
ol= %4d,%4d.\n" |
890 " Actual, Expected status = %4d, %4d", | 1045 " Actual, Expected status = %4d, %4d", |
891 bp, line, t->srcCol->elementAti(bp), rs, expectedTagVal); | 1046 bp, line, t->getSrcCol(bp), rs, expectedTagVal); |
892 } | 1047 } |
893 } | 1048 } |
894 | 1049 |
895 | |
896 prevBP = bp; | 1050 prevBP = bp; |
897 } | 1051 } |
898 | 1052 |
899 // Verify that there were no missed expected breaks after the last one found | 1053 // Verify that there were no missed expected breaks after the last one found |
900 for (i=prevBP+1; i<t->expectedBreaks->size(); i++) { | 1054 for (i=prevBP+1; i<utext_nativeLength(t->textToBreak); i++) { |
901 if (t->expectedBreaks->elementAti(i) != 0) { | 1055 if (t->getExpectedBreak(i) != 0) { |
902 errln("Forward Iteration, break expected, but not found. Pos=%4d F
ile line,col= %4d,%4d", | 1056 errln("Forward Iteration, break expected, but not found. Pos=%4d F
ile line,col= %4d,%4d", |
903 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); | 1057 i, t->getSrcLine(i), t->getSrcCol(i)); |
904 } | 1058 } |
905 } | 1059 } |
906 | 1060 |
907 // | 1061 // |
908 // Run the iterator backwards, verify that the same breaks are found. | 1062 // Run the iterator backwards, verify that the same breaks are found. |
909 // | 1063 // |
910 prevBP = t->dataToBreak.length()+2; // start with a phony value for the las
t break pos seen. | 1064 prevBP = utext_nativeLength(t->textToBreak)+2; // start with a phony value
for the last break pos seen. |
911 for (bp = t->bi->last(); bp != BreakIterator::DONE; bp = t->bi->previous())
{ | 1065 for (bp = t->bi->last(); bp != BreakIterator::DONE; bp = t->bi->previous())
{ |
912 if (prevBP == bp) { | 1066 if (prevBP == bp) { |
913 // Fail for lack of progress. | 1067 // Fail for lack of progress. |
914 errln("Reverse Iteration, no progress. Break Pos=%4d File line,col
=%4d,%4d", | 1068 errln("Reverse Iteration, no progress. Break Pos=%4d File line,col
=%4d,%4d", |
915 bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); | 1069 bp, t->getSrcLine(bp), t->getSrcCol(bp)); |
916 break; | 1070 break; |
917 } | 1071 } |
918 | 1072 |
919 // Check that there were we didn't miss an expected break between the la
st one | 1073 // Check that we didn't miss an expected break between the last one |
920 // and this one. (UVector returns zeros for index out of bounds.) | 1074 // and this one. (UVector returns zeros for index out of bounds.) |
921 for (i=prevBP-1; i>bp; i--) { | 1075 for (i=prevBP-1; i>bp; i--) { |
922 if (t->expectedBreaks->elementAti(i) != 0) { | 1076 if (t->getExpectedBreak(i) != 0) { |
923 errln("Reverse Itertion, break expected, but not found. Pos=%4d
File line,col= %4d,%4d", | 1077 errln("Reverse Iteration, break expected, but not found. Pos=%4
d File line,col= %4d,%4d", |
924 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); | 1078 i, t->getSrcLine(i), t->getSrcCol(i)); |
925 } | 1079 } |
926 } | 1080 } |
927 | 1081 |
928 // Check that the break we did find was expected | 1082 // Check that the break we did find was expected |
929 if (t->expectedBreaks->elementAti(bp) == 0) { | 1083 if (t->getExpectedBreak(bp) == 0) { |
930 errln("Reverse Itertion, break found, but not expected. Pos=%4d Fi
le line,col= %4d,%4d", | 1084 errln("Reverse Itertion, break found, but not expected. Pos=%4d Fi
le line,col= %4d,%4d", |
931 bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); | 1085 bp, t->getSrcLine(bp), t->getSrcCol(bp)); |
932 } else { | 1086 } else { |
933 // The break was expected. | 1087 // The break was expected. |
934 // Check that the {nnn} tag value is correct. | 1088 // Check that the {nnn} tag value is correct. |
935 int32_t expectedTagVal = t->expectedBreaks->elementAti(bp); | 1089 int32_t expectedTagVal = t->getExpectedBreak(bp); |
936 if (expectedTagVal == -1) { | 1090 if (expectedTagVal == -1) { |
937 expectedTagVal = 0; | 1091 expectedTagVal = 0; |
938 } | 1092 } |
939 int line = t->srcLine->elementAti(bp); | 1093 int line = t->getSrcLine(bp); |
940 int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus(); | 1094 int32_t rs = t->bi->getRuleStatus(); |
941 if (rs != expectedTagVal) { | 1095 if (rs != expectedTagVal) { |
942 errln("Incorrect status for reverse break. Pos=%4d File line,c
ol= %4d,%4d.\n" | 1096 errln("Incorrect status for reverse break. Pos=%4d File line,c
ol= %4d,%4d.\n" |
943 " Actual, Expected status = %4d, %4d", | 1097 " Actual, Expected status = %4d, %4d", |
944 bp, line, t->srcCol->elementAti(bp), rs, expectedTagVal); | 1098 bp, line, t->getSrcCol(bp), rs, expectedTagVal); |
945 } | 1099 } |
946 } | 1100 } |
947 | 1101 |
948 prevBP = bp; | 1102 prevBP = bp; |
949 } | 1103 } |
950 | 1104 |
951 // Verify that there were no missed breaks prior to the last one found | 1105 // Verify that there were no missed breaks prior to the last one found |
952 for (i=prevBP-1; i>=0; i--) { | 1106 for (i=prevBP-1; i>=0; i--) { |
953 if (t->expectedBreaks->elementAti(i) != 0) { | 1107 if (t->getExpectedBreak(i) != 0) { |
954 errln("Forward Itertion, break expected, but not found. Pos=%4d Fi
le line,col= %4d,%4d", | 1108 errln("Forward Itertion, break expected, but not found. Pos=%4d Fi
le line,col= %4d,%4d", |
955 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); | 1109 i, t->getSrcLine(i), t->getSrcCol(i)); |
956 } | 1110 } |
957 } | 1111 } |
958 | 1112 |
959 // Check isBoundary() | 1113 // Check isBoundary() |
960 for (i=0; i<t->expectedBreaks->size(); i++) { | 1114 for (i=0; i < utext_nativeLength(t->textToBreak); i++) { |
961 UBool boundaryExpected = (t->expectedBreaks->elementAti(i) != 0); | 1115 UBool boundaryExpected = (t->getExpectedBreak(i) != 0); |
962 UBool boundaryFound = t->bi->isBoundary(i); | 1116 UBool boundaryFound = t->bi->isBoundary(i); |
963 if (boundaryExpected != boundaryFound) { | 1117 if (boundaryExpected != boundaryFound) { |
964 errln("isBoundary(%d) incorrect. File line,col= %4d,%4d\n" | 1118 errln("isBoundary(%d) incorrect. File line,col= %4d,%4d\n" |
965 " Expected, Actual= %s, %s", | 1119 " Expected, Actual= %s, %s", |
966 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i), | 1120 i, t->getSrcLine(i), t->getSrcCol(i), |
967 boundaryExpected ? "true":"false", boundaryFound? "true" : "fa
lse"); | 1121 boundaryExpected ? "true":"false", boundaryFound? "true" : "fa
lse"); |
968 } | 1122 } |
969 } | 1123 } |
970 | 1124 |
971 // Check following() | 1125 // Check following() |
972 for (i=0; i<t->expectedBreaks->size(); i++) { | 1126 for (i=0; i < utext_nativeLength(t->textToBreak); i++) { |
973 int32_t actualBreak = t->bi->following(i); | 1127 int32_t actualBreak = t->bi->following(i); |
974 int32_t expectedBreak = BreakIterator::DONE; | 1128 int32_t expectedBreak = BreakIterator::DONE; |
975 for (int32_t j=i+1; j < t->expectedBreaks->size(); j++) { | 1129 for (int32_t j=i+1; j <= utext_nativeLength(t->textToBreak); j++) { |
976 if (t->expectedBreaks->elementAti(j) != 0) { | 1130 if (t->getExpectedBreak(j) != 0) { |
977 expectedBreak = j; | 1131 expectedBreak = j; |
978 break; | 1132 break; |
979 } | 1133 } |
980 } | 1134 } |
981 if (expectedBreak != actualBreak) { | 1135 if (expectedBreak != actualBreak) { |
982 errln("following(%d) incorrect. File line,col= %4d,%4d\n" | 1136 errln("following(%d) incorrect. File line,col= %4d,%4d\n" |
983 " Expected, Actual= %d, %d", | 1137 " Expected, Actual= %d, %d", |
984 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i), expect
edBreak, actualBreak); | 1138 i, t->getSrcLine(i), t->getSrcCol(i), expectedBreak, actualBre
ak); |
985 } | 1139 } |
986 } | 1140 } |
987 | 1141 |
988 // Check preceding() | 1142 // Check preceding() |
989 for (i=t->expectedBreaks->size(); i>=0; i--) { | 1143 for (i=utext_nativeLength(t->textToBreak); i>=0; i--) { |
990 int32_t actualBreak = t->bi->preceding(i); | 1144 int32_t actualBreak = t->bi->preceding(i); |
991 int32_t expectedBreak = BreakIterator::DONE; | 1145 int32_t expectedBreak = BreakIterator::DONE; |
992 | 1146 |
993 for (int32_t j=i-1; j >= 0; j--) { | 1147 // For UTF-8 & UTF-16 supplementals, all code units of a character are e
quivalent. |
994 if (t->expectedBreaks->elementAti(j) != 0) { | 1148 // preceding(trailing byte) will return the index of some preceding code
point, |
| 1149 // not the lead byte of the current code point, even though that has a s
maller index. |
| 1150 // Therefore, start looking at the expected break data not at i-1, but a
t |
| 1151 // the start of code point index - 1. |
| 1152 utext_setNativeIndex(t->textToBreak, i); |
| 1153 int32_t j = utext_getNativeIndex(t->textToBreak) - 1; |
| 1154 for (; j >= 0; j--) { |
| 1155 if (t->getExpectedBreak(j) != 0) { |
995 expectedBreak = j; | 1156 expectedBreak = j; |
996 break; | 1157 break; |
997 } | 1158 } |
998 } | 1159 } |
999 if (expectedBreak != actualBreak) { | 1160 if (expectedBreak != actualBreak) { |
1000 errln("preceding(%d) incorrect. File line,col= %4d,%4d\n" | 1161 errln("preceding(%d) incorrect. File line,col= %4d,%4d\n" |
1001 " Expected, Actual= %d, %d", | 1162 " Expected, Actual= %d, %d", |
1002 i, t->srcLine->elementAti(i), t->srcCol->elementAti(i), expect
edBreak, actualBreak); | 1163 i, t->getSrcLine(i), t->getSrcCol(i), expectedBreak, actualBre
ak); |
1003 } | 1164 } |
1004 } | 1165 } |
1005 } | 1166 } |
1006 | 1167 |
1007 | 1168 |
1008 void RBBITest::TestExtended() { | 1169 void RBBITest::TestExtended() { |
1009 #if !UCONFIG_NO_REGULAR_EXPRESSIONS | 1170 #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
1010 UErrorCode status = U_ZERO_ERROR; | 1171 UErrorCode status = U_ZERO_ERROR; |
1011 Locale locale(""); | 1172 Locale locale(""); |
1012 | 1173 |
1013 UnicodeString rules; | 1174 UnicodeString rules; |
1014 TestParams tp; | 1175 TestParams tp(status); |
1015 tp.bi = NULL; | |
1016 tp.expectedBreaks = new UVector32(status); | |
1017 tp.srcLine = new UVector32(status); | |
1018 tp.srcCol = new UVector32(status); | |
1019 | 1176 |
1020 RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{
Nd}_]*) *>"), 0, status); | 1177 RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{
Nd}_]*) *>"), 0, status); |
1021 if (U_FAILURE(status)) { | 1178 if (U_FAILURE(status)) { |
1022 dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LI
NE__, u_errorName(status)); | 1179 dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LI
NE__, u_errorName(status)); |
1023 } | 1180 } |
1024 | 1181 |
1025 | 1182 |
1026 // | 1183 // |
1027 // Open and read the test data file. | 1184 // Open and read the test data file. |
1028 // | 1185 // |
(...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1183 // Add final entry to mappings from break location to source fil
e position. | 1340 // Add final entry to mappings from break location to source fil
e position. |
1184 // Need one extra because last break position returned is after
the | 1341 // Need one extra because last break position returned is after
the |
1185 // last char in the data, not at the last char. | 1342 // last char in the data, not at the last char. |
1186 tp.srcLine->addElement(lineNum, status); | 1343 tp.srcLine->addElement(lineNum, status); |
1187 tp.srcCol ->addElement(column, status); | 1344 tp.srcCol ->addElement(column, status); |
1188 | 1345 |
1189 parseState = PARSE_TAG; | 1346 parseState = PARSE_TAG; |
1190 charIdx += 6; | 1347 charIdx += 6; |
1191 | 1348 |
1192 // RUN THE TEST! | 1349 // RUN THE TEST! |
1193 executeTest(&tp); | 1350 status = U_ZERO_ERROR; |
| 1351 tp.setUTF16(status); |
| 1352 executeTest(&tp, status); |
| 1353 TEST_ASSERT_SUCCESS(status); |
| 1354 |
| 1355 // Run again, this time with UTF-8 text wrapped in a UText. |
| 1356 status = U_ZERO_ERROR; |
| 1357 tp.setUTF8(status); |
| 1358 TEST_ASSERT_SUCCESS(status); |
| 1359 executeTest(&tp, status); |
1194 break; | 1360 break; |
1195 } | 1361 } |
1196 | 1362 |
1197 if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{"))
== 0) { | 1363 if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{"))
== 0) { |
1198 // Named character, e.g. \N{COMBINING GRAVE ACCENT} | 1364 // Named character, e.g. \N{COMBINING GRAVE ACCENT} |
1199 // Get the code point from the name and insert it into the test
data. | 1365 // Get the code point from the name and insert it into the test
data. |
1200 // (Damn, no API takes names in Unicode !!! | 1366 // (Damn, no API takes names in Unicode !!! |
1201 // we've got to take it back to char *) | 1367 // we've got to take it back to char *) |
1202 int32_t nameEndIdx = testString.indexOf((UChar)0x7d/*'}'*/, char
Idx); | 1368 int32_t nameEndIdx = testString.indexOf((UChar)0x7d/*'}'*/, char
Idx); |
1203 int32_t nameLength = nameEndIdx - (charIdx+2); | 1369 int32_t nameLength = nameEndIdx - (charIdx+2); |
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1349 if (U_FAILURE(status)) { | 1515 if (U_FAILURE(status)) { |
1350 dataerrln("ICU Error %s while parsing test file at line %d.", | 1516 dataerrln("ICU Error %s while parsing test file at line %d.", |
1351 u_errorName(status), lineNum); | 1517 u_errorName(status), lineNum); |
1352 status = U_ZERO_ERROR; | 1518 status = U_ZERO_ERROR; |
1353 goto end_test; // Stop the test | 1519 goto end_test; // Stop the test |
1354 } | 1520 } |
1355 | 1521 |
1356 } | 1522 } |
1357 | 1523 |
1358 end_test: | 1524 end_test: |
1359 delete tp.bi; | |
1360 delete tp.expectedBreaks; | |
1361 delete tp.srcLine; | |
1362 delete tp.srcCol; | |
1363 delete [] testFile; | 1525 delete [] testFile; |
1364 #endif | 1526 #endif |
1365 } | 1527 } |
1366 | 1528 |
1367 | 1529 |
1368 //------------------------------------------------------------------------------
- | 1530 //------------------------------------------------------------------------------
- |
1369 // | 1531 // |
1370 // TestDictRules create a break iterator from source rules that includes a | 1532 // TestDictRules create a break iterator from source rules that includes a |
1371 // dictionary range. Regression for bug #7130. Source rules | 1533 // dictionary range. Regression for bug #7130. Source rules |
1372 // do not declare a break iterator type (word, line, sentence,
etc. | 1534 // do not declare a break iterator type (word, line, sentence,
etc. |
(...skipping 2965 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4338 UnicodeSet prependSet(UNICODE_STRING_SIMPLE("[:GCB=Prepend:]"), errorCode); | 4500 UnicodeSet prependSet(UNICODE_STRING_SIMPLE("[:GCB=Prepend:]"), errorCode); |
4339 if (!prependSet.isEmpty()) { | 4501 if (!prependSet.isEmpty()) { |
4340 errln( | 4502 errln( |
4341 "[:GCB=Prepend:] is not empty any more. " | 4503 "[:GCB=Prepend:] is not empty any more. " |
4342 "Uncomment relevant lines in source/data/brkitr/char.txt and " | 4504 "Uncomment relevant lines in source/data/brkitr/char.txt and " |
4343 "change this test to the opposite condition."); | 4505 "change this test to the opposite condition."); |
4344 } | 4506 } |
4345 } | 4507 } |
4346 | 4508 |
4347 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | 4509 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
OLD | NEW |