OLD | NEW |
1 /******************************************************************** | 1 /******************************************************************** |
2 * COPYRIGHT: | 2 * COPYRIGHT: |
3 * Copyright (c) 1997-2013, International Business Machines Corporation and | 3 * Copyright (c) 1997-2015, International Business Machines Corporation and |
4 * others. All Rights Reserved. | 4 * others. All Rights Reserved. |
5 ********************************************************************/ | 5 ********************************************************************/ |
6 /*******************************************************************************
* | 6 /*******************************************************************************
* |
7 * | 7 * |
8 * File CBIAPTS.C | 8 * File CBIAPTS.C |
9 * | 9 * |
10 * Modification History: | 10 * Modification History: |
11 * Name Description | 11 * Name Description |
12 * Madhu Katragadda Creation | 12 * Madhu Katragadda Creation |
13 ********************************************************************************
*/ | 13 ********************************************************************************
*/ |
(...skipping 10 matching lines...) Expand all Loading... |
24 | 24 |
25 #include <stdlib.h> | 25 #include <stdlib.h> |
26 #include <string.h> | 26 #include <string.h> |
27 #include "unicode/uloc.h" | 27 #include "unicode/uloc.h" |
28 #include "unicode/ubrk.h" | 28 #include "unicode/ubrk.h" |
29 #include "unicode/ustring.h" | 29 #include "unicode/ustring.h" |
30 #include "unicode/ucnv.h" | 30 #include "unicode/ucnv.h" |
31 #include "unicode/utext.h" | 31 #include "unicode/utext.h" |
32 #include "cintltst.h" | 32 #include "cintltst.h" |
33 #include "cbiapts.h" | 33 #include "cbiapts.h" |
| 34 #include "cmemory.h" |
34 | 35 |
35 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ | 36 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ |
36 log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n"
, __FILE__, __LINE__, u_errorName(status));}} | 37 log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n"
, __FILE__, __LINE__, u_errorName(status));}} |
37 | 38 |
38 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ | 39 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ |
39 log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FIL
E__, __LINE__);}} | 40 log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FIL
E__, __LINE__);}} |
40 | 41 |
41 #if !UCONFIG_NO_FILE_IO | 42 #if !UCONFIG_NO_FILE_IO |
42 static void TestBreakIteratorSafeClone(void); | 43 static void TestBreakIteratorSafeClone(void); |
43 #endif | 44 #endif |
44 static void TestBreakIteratorRules(void); | 45 static void TestBreakIteratorRules(void); |
45 static void TestBreakIteratorRuleError(void); | 46 static void TestBreakIteratorRuleError(void); |
46 static void TestBreakIteratorStatusVec(void); | 47 static void TestBreakIteratorStatusVec(void); |
47 static void TestBreakIteratorUText(void); | 48 static void TestBreakIteratorUText(void); |
48 static void TestBreakIteratorTailoring(void); | 49 static void TestBreakIteratorTailoring(void); |
49 static void TestBreakIteratorRefresh(void); | 50 static void TestBreakIteratorRefresh(void); |
| 51 static void TestBug11665(void); |
| 52 static void TestBreakIteratorSuppressions(void); |
50 | 53 |
51 void addBrkIterAPITest(TestNode** root); | 54 void addBrkIterAPITest(TestNode** root); |
52 | 55 |
53 void addBrkIterAPITest(TestNode** root) | 56 void addBrkIterAPITest(TestNode** root) |
54 { | 57 { |
55 #if !UCONFIG_NO_FILE_IO | 58 #if !UCONFIG_NO_FILE_IO |
56 addTest(root, &TestBreakIteratorCAPI, "tstxtbd/cbiapts/TestBreakIteratorCAPI
"); | 59 addTest(root, &TestBreakIteratorCAPI, "tstxtbd/cbiapts/TestBreakIteratorCAPI
"); |
57 addTest(root, &TestBreakIteratorSafeClone, "tstxtbd/cbiapts/TestBreakIterato
rSafeClone"); | 60 addTest(root, &TestBreakIteratorSafeClone, "tstxtbd/cbiapts/TestBreakIterato
rSafeClone"); |
58 addTest(root, &TestBreakIteratorUText, "tstxtbd/cbiapts/TestBreakIteratorUTe
xt"); | 61 addTest(root, &TestBreakIteratorUText, "tstxtbd/cbiapts/TestBreakIteratorUTe
xt"); |
59 #endif | 62 #endif |
60 addTest(root, &TestBreakIteratorRules, "tstxtbd/cbiapts/TestBreakIteratorRul
es"); | 63 addTest(root, &TestBreakIteratorRules, "tstxtbd/cbiapts/TestBreakIteratorRul
es"); |
61 addTest(root, &TestBreakIteratorRuleError, "tstxtbd/cbiapts/TestBreakIterato
rRuleError"); | 64 addTest(root, &TestBreakIteratorRuleError, "tstxtbd/cbiapts/TestBreakIterato
rRuleError"); |
62 addTest(root, &TestBreakIteratorStatusVec, "tstxtbd/cbiapts/TestBreakIterato
rStatusVec"); | 65 addTest(root, &TestBreakIteratorStatusVec, "tstxtbd/cbiapts/TestBreakIterato
rStatusVec"); |
63 addTest(root, &TestBreakIteratorTailoring, "tstxtbd/cbiapts/TestBreakIterato
rTailoring"); | 66 addTest(root, &TestBreakIteratorTailoring, "tstxtbd/cbiapts/TestBreakIterato
rTailoring"); |
64 addTest(root, &TestBreakIteratorRefresh, "tstxtbd/cbiapts/TestBreakIteratorR
efresh"); | 67 addTest(root, &TestBreakIteratorRefresh, "tstxtbd/cbiapts/TestBreakIteratorR
efresh"); |
| 68 addTest(root, &TestBug11665, "tstxtbd/cbiapts/TestBug11665"); |
| 69 addTest(root, &TestBreakIteratorSuppressions, "tstxtbd/cbiapts/TestBreakIter
atorSuppressions"); |
65 } | 70 } |
66 | 71 |
67 #define CLONETEST_ITERATOR_COUNT 2 | 72 #define CLONETEST_ITERATOR_COUNT 2 |
68 | 73 |
69 /* | 74 /* |
70 * Utility function for converting char * to UChar * strings, to | 75 * Utility function for converting char * to UChar * strings, to |
71 * simplify the test code. Converted strings are put in heap allocated | 76 * simplify the test code. Converted strings are put in heap allocated |
72 * storage. A hook (probably a local in the caller's code) allows all | 77 * storage. A hook (probably a local in the caller's code) allows all |
73 * strings converted with that hook to be freed with a single call. | 78 * strings converted with that hook to be freed with a single call. |
74 */ | 79 */ |
(...skipping 765 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
840 */ | 845 */ |
841 UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /
* = " A B C D" */ | 846 UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /
* = " A B C D" */ |
842 UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0}; | 847 UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0}; |
843 UErrorCode status = U_ZERO_ERROR; | 848 UErrorCode status = U_ZERO_ERROR; |
844 UBreakIterator *bi; | 849 UBreakIterator *bi; |
845 UText ut1 = UTEXT_INITIALIZER; | 850 UText ut1 = UTEXT_INITIALIZER; |
846 UText ut2 = UTEXT_INITIALIZER; | 851 UText ut2 = UTEXT_INITIALIZER; |
847 | 852 |
848 bi = ubrk_open(UBRK_LINE, "en_US", NULL, 0, &status); | 853 bi = ubrk_open(UBRK_LINE, "en_US", NULL, 0, &status); |
849 TEST_ASSERT_SUCCESS(status); | 854 TEST_ASSERT_SUCCESS(status); |
| 855 if (U_FAILURE(status)) { |
| 856 return; |
| 857 } |
850 | 858 |
851 utext_openUChars(&ut1, testStr, -1, &status); | 859 utext_openUChars(&ut1, testStr, -1, &status); |
852 TEST_ASSERT_SUCCESS(status); | 860 TEST_ASSERT_SUCCESS(status); |
853 ubrk_setUText(bi, &ut1, &status); | 861 ubrk_setUText(bi, &ut1, &status); |
854 TEST_ASSERT_SUCCESS(status); | 862 TEST_ASSERT_SUCCESS(status); |
855 | 863 |
856 if (U_SUCCESS(status)) { | 864 if (U_SUCCESS(status)) { |
857 /* Line boundaries will occur before each letter in the original string
*/ | 865 /* Line boundaries will occur before each letter in the original string
*/ |
858 TEST_ASSERT(1 == ubrk_next(bi)); | 866 TEST_ASSERT(1 == ubrk_next(bi)); |
859 TEST_ASSERT(3 == ubrk_next(bi)); | 867 TEST_ASSERT(3 == ubrk_next(bi)); |
(...skipping 12 matching lines...) Expand all Loading... |
872 TEST_ASSERT(8 == ubrk_next(bi)); | 880 TEST_ASSERT(8 == ubrk_next(bi)); |
873 TEST_ASSERT(UBRK_DONE == ubrk_next(bi)); | 881 TEST_ASSERT(UBRK_DONE == ubrk_next(bi)); |
874 TEST_ASSERT_SUCCESS(status); | 882 TEST_ASSERT_SUCCESS(status); |
875 | 883 |
876 utext_close(&ut1); | 884 utext_close(&ut1); |
877 utext_close(&ut2); | 885 utext_close(&ut2); |
878 } | 886 } |
879 ubrk_close(bi); | 887 ubrk_close(bi); |
880 } | 888 } |
881 | 889 |
| 890 |
| 891 static void TestBug11665(void) { |
| 892 // The problem was with the incorrect breaking of Japanese text beginning |
| 893 // with Katakana characters when no prior Japanese or Chinese text had been |
| 894 // encountered. |
| 895 // |
| 896 // Tested here in cintltst, rather than in intltest, because only cintltst |
| 897 // tests have the ability to reset ICU, which is needed to get the bug |
| 898 // to manifest itself. |
| 899 |
| 900 static UChar japaneseText[] = {0x30A2, 0x30EC, 0x30EB, 0x30AE, 0x30FC, 0x602
7, 0x7D50, 0x819C, 0x708E}; |
| 901 int32_t boundaries[10] = {0}; |
| 902 UBreakIterator *bi = NULL; |
| 903 int32_t brk; |
| 904 int32_t brkIdx = 0; |
| 905 int32_t totalBreaks = 0; |
| 906 UErrorCode status = U_ZERO_ERROR; |
| 907 |
| 908 ctest_resetICU(); |
| 909 bi = ubrk_open(UBRK_WORD, "en_US", japaneseText, UPRV_LENGTHOF(japaneseText)
, &status); |
| 910 TEST_ASSERT_SUCCESS(status); |
| 911 if (!bi) { |
| 912 return; |
| 913 } |
| 914 for (brk=ubrk_first(bi); brk != UBRK_DONE; brk=ubrk_next(bi)) { |
| 915 boundaries[brkIdx] = brk; |
| 916 if (++brkIdx >= UPRV_LENGTHOF(boundaries) - 1) { |
| 917 break; |
| 918 } |
| 919 } |
| 920 if (brkIdx <= 2 || brkIdx >= UPRV_LENGTHOF(boundaries)) { |
| 921 log_err("%s:%d too few or many breaks found.\n", __FILE__, __LINE__); |
| 922 } else { |
| 923 totalBreaks = brkIdx; |
| 924 brkIdx = 0; |
| 925 for (brk=ubrk_first(bi); brk != UBRK_DONE; brk=ubrk_next(bi)) { |
| 926 if (brk != boundaries[brkIdx]) { |
| 927 log_err("%s:%d Break #%d differs between first and second iterat
ion.\n", __FILE__, __LINE__, brkIdx); |
| 928 break; |
| 929 } |
| 930 if (++brkIdx >= UPRV_LENGTHOF(boundaries) - 1) { |
| 931 log_err("%s:%d Too many breaks.\n", __FILE__, __LINE__); |
| 932 break; |
| 933 } |
| 934 } |
| 935 if (totalBreaks != brkIdx) { |
| 936 log_err("%s:%d Number of breaks differ between first and second iter
ation.\n", __FILE__, __LINE__); |
| 937 } |
| 938 } |
| 939 ubrk_close(bi); |
| 940 } |
| 941 |
| 942 /* |
| 943 * expOffset is the set of expected offsets, ending with '-1'. |
| 944 * "Expected expOffset -1" means "expected the end of the offsets" |
| 945 */ |
| 946 |
| 947 static const char testSentenceSuppressionsEn[] = "Mr. Jones comes home. Dr. Smi
th Ph.D. is out. In the U.S.A. it is hot."; |
| 948 static const int32_t testSentSuppFwdOffsetsEn[] = { 22, 26, 46, 70, -1 }; /*
With suppressions, currently not handling Dr. */ |
| 949 static const int32_t testSentFwdOffsetsEn[] = { 4, 22, 26, 46, 70, -1 }; /*
Without suppressions */ |
| 950 static const int32_t testSentSuppRevOffsetsEn[] = { 46, 26, 22, 0, -1 }; /*
With suppressions, currently not handling Dr. */ |
| 951 static const int32_t testSentRevOffsetsEn[] = { 46, 26, 22, 4, 0, -1 }; /*
Without suppressions */ |
| 952 |
| 953 static const char testSentenceSuppressionsDe[] = "Wenn ich schon h\\u00F6re zu
Guttenberg kommt evtl. zur\\u00FCck."; |
| 954 static const int32_t testSentSuppFwdOffsetsDe[] = { 53, -1 }; /* With supp
ressions */ |
| 955 static const int32_t testSentFwdOffsetsDe[] = { 53, -1 }; /* Without s
uppressions; no break in evtl. zur due to casing */ |
| 956 static const int32_t testSentSuppRevOffsetsDe[] = { 0, -1 }; /* With supp
ressions */ |
| 957 static const int32_t testSentRevOffsetsDe[] = { 0, -1 }; /* Without s
uppressions */ |
| 958 |
| 959 static const char testSentenceSuppressionsEs[] = "Te esperamos todos los mierco
les en Bravo 416, Col. El Pueblo a las 7 PM."; |
| 960 static const int32_t testSentSuppFwdOffsetsEs[] = { 73, -1 }; /* With supp
ressions */ |
| 961 static const int32_t testSentFwdOffsetsEs[] = { 52, 73, -1 }; /* Without s
uppressions */ |
| 962 static const int32_t testSentSuppRevOffsetsEs[] = { 0, -1 }; /* With supp
ressions */ |
| 963 static const int32_t testSentRevOffsetsEs[] = { 52, 0, -1 }; /* Without s
uppressions */ |
| 964 |
| 965 enum { kTextULenMax = 128 }; |
| 966 |
| 967 typedef struct { |
| 968 const char * locale; |
| 969 const char * text; |
| 970 const int32_t * expFwdOffsets; |
| 971 const int32_t * expRevOffsets; |
| 972 } TestBISuppressionsItem; |
| 973 |
| 974 static const TestBISuppressionsItem testBISuppressionsItems[] = { |
| 975 { "en@ss=standard", testSentenceSuppressionsEn, testSentSuppFwdOffsetsEn, te
stSentSuppRevOffsetsEn }, |
| 976 { "en", testSentenceSuppressionsEn, testSentFwdOffsetsEn, te
stSentRevOffsetsEn }, |
| 977 { "fr@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, te
stSentRevOffsetsEn }, |
| 978 { "af@ss=standard", testSentenceSuppressionsEn, testSentSuppFwdOffsetsEn, te
stSentSuppRevOffsetsEn }, /* no brkiter data => en suppressions? */ |
| 979 { "zh@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, te
stSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressio
ns */ |
| 980 { "zh_Hant@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, t
estSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressio
ns */ |
| 981 { "fi@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, te
stSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressio
ns */ |
| 982 { "ja@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, te
stSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressio
ns */ |
| 983 { "de@ss=standard", testSentenceSuppressionsDe, testSentSuppFwdOffsetsDe, te
stSentSuppRevOffsetsDe }, |
| 984 { "de", testSentenceSuppressionsDe, testSentFwdOffsetsDe, te
stSentRevOffsetsDe }, |
| 985 { "es@ss=standard", testSentenceSuppressionsEs, testSentSuppFwdOffsetsEs, te
stSentSuppRevOffsetsEs }, |
| 986 { "es", testSentenceSuppressionsEs, testSentFwdOffsetsEs, te
stSentRevOffsetsEs }, |
| 987 { NULL, NULL, NULL } |
| 988 }; |
| 989 |
| 990 static void TestBreakIteratorSuppressions(void) { |
| 991 const TestBISuppressionsItem * itemPtr; |
| 992 |
| 993 for (itemPtr = testBISuppressionsItems; itemPtr->locale != NULL; itemPtr++)
{ |
| 994 UChar textU[kTextULenMax]; |
| 995 int32_t textULen = u_unescape(itemPtr->text, textU, kTextULenMax); |
| 996 UErrorCode status = U_ZERO_ERROR; |
| 997 UBreakIterator *bi = ubrk_open(UBRK_SENTENCE, itemPtr->locale, textU, te
xtULen, &status); |
| 998 log_verbose("#%d: %s\n", (itemPtr-testBISuppressionsItems), itemPtr->loc
ale); |
| 999 if (U_SUCCESS(status)) { |
| 1000 int32_t offset, start; |
| 1001 const int32_t * expOffsetPtr; |
| 1002 const int32_t * expOffsetStart; |
| 1003 |
| 1004 expOffsetStart = expOffsetPtr = itemPtr->expFwdOffsets; |
| 1005 ubrk_first(bi); |
| 1006 for (; (offset = ubrk_next(bi)) != UBRK_DONE && *expOffsetPtr >= 0;
expOffsetPtr++) { |
| 1007 if (offset != *expOffsetPtr) { |
| 1008 log_err("FAIL: ubrk_next loc \"%s\", expected %d, got %d\n",
itemPtr->locale, *expOffsetPtr, offset); |
| 1009 } |
| 1010 } |
| 1011 if (offset != UBRK_DONE || *expOffsetPtr >= 0) { |
| 1012 log_err("FAIL: ubrk_next loc \"%s\", expected UBRK_DONE & expOff
set -1, got %d and %d\n", itemPtr->locale, offset, *expOffsetPtr); |
| 1013 } |
| 1014 |
| 1015 expOffsetStart = expOffsetPtr = itemPtr->expFwdOffsets; |
| 1016 start = ubrk_first(bi) + 1; |
| 1017 for (; (offset = ubrk_following(bi, start)) != UBRK_DONE && *expOffs
etPtr >= 0; expOffsetPtr++) { |
| 1018 if (offset != *expOffsetPtr) { |
| 1019 log_err("FAIL: ubrk_following(%d) loc \"%s\", expected %d, g
ot %d\n", start, itemPtr->locale, *expOffsetPtr, offset); |
| 1020 } |
| 1021 start = *expOffsetPtr + 1; |
| 1022 } |
| 1023 if (offset != UBRK_DONE || *expOffsetPtr >= 0) { |
| 1024 log_err("FAIL: ubrk_following(%d) loc \"%s\", expected UBRK_DONE
& expOffset -1, got %d and %d\n", start, itemPtr->locale, offset, *expOffsetPtr
); |
| 1025 } |
| 1026 |
| 1027 expOffsetStart = expOffsetPtr = itemPtr->expRevOffsets; |
| 1028 offset = ubrk_last(bi); |
| 1029 log_verbose("___ @%d ubrk_last\n", offset); |
| 1030 if(offset == 0) { |
| 1031 log_err("FAIL: ubrk_last loc \"%s\" unexpected %d\n", itemPtr->loc
ale, offset); |
| 1032 } |
| 1033 for (; (offset = ubrk_previous(bi)) != UBRK_DONE && *expOffsetPtr >=
0; expOffsetPtr++) { |
| 1034 if (offset != *expOffsetPtr) { |
| 1035 log_err("FAIL: ubrk_previous loc \"%s\", expected %d, got %d
\n", itemPtr->locale, *expOffsetPtr, offset); |
| 1036 } else { |
| 1037 log_verbose("[%d] @%d ubrk_previous()\n", (expOffsetPtr - ex
pOffsetStart), offset); |
| 1038 } |
| 1039 } |
| 1040 if (offset != UBRK_DONE || *expOffsetPtr >= 0) { |
| 1041 log_err("FAIL: ubrk_previous loc \"%s\", expected UBRK_DONE & ex
pOffset[%d] -1, got %d and %d\n", itemPtr->locale, |
| 1042 expOffsetPtr - expOffsetStart, |
| 1043 offset, *expOffsetPtr); |
| 1044 } |
| 1045 |
| 1046 expOffsetStart = expOffsetPtr = itemPtr->expRevOffsets; |
| 1047 start = ubrk_last(bi) - 1; |
| 1048 for (; (offset = ubrk_preceding(bi, start)) != UBRK_DONE && *expOffs
etPtr >= 0; expOffsetPtr++) { |
| 1049 if (offset != *expOffsetPtr) { |
| 1050 log_err("FAIL: ubrk_preceding(%d) loc \"%s\", expected %d, g
ot %d\n", start, itemPtr->locale, *expOffsetPtr, offset); |
| 1051 } |
| 1052 start = *expOffsetPtr - 1; |
| 1053 } |
| 1054 if (start >=0 && (offset != UBRK_DONE || *expOffsetPtr >= 0)) { |
| 1055 log_err("FAIL: ubrk_preceding loc(%d) \"%s\", expected UBRK_DONE
& expOffset -1, got %d and %d\n", start, itemPtr->locale, offset, *expOffsetPtr
); |
| 1056 } |
| 1057 |
| 1058 ubrk_close(bi); |
| 1059 } else { |
| 1060 log_data_err("FAIL: ubrk_open(UBRK_SENTENCE, \"%s\", ...) status %s
(Are you missing data?)\n", itemPtr->locale, u_errorName(status)); |
| 1061 } |
| 1062 } |
| 1063 } |
| 1064 |
| 1065 |
882 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | 1066 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
OLD | NEW |