icu46/source/test/intltest/rbbitst.cpp - Issue 6370014: CJK segmentation patch for ICU 4.6...

Side by Side Diff: icu46/source/test/intltest/rbbitst.cpp

Issue 6370014: CJK segmentation patch for ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 9 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /********************************************************************	1 /********************************************************************

2 * COPYRIGHT:	2 * COPYRIGHT:

3 * Copyright (c) 1999-2010, International Business Machines Corporation and	3 * Copyright (c) 1999-2010, International Business Machines Corporation and

4 * others. All Rights Reserved.	4 * others. All Rights Reserved.

5 ********************************************************************/	5 ********************************************************************/

6 /************************************************************************	6 /************************************************************************

7 * Date Name Description	7 * Date Name Description

8 * 12/15/99 Madhu Creation.	8 * 12/15/99 Madhu Creation.

9 * 01/12/2000 Madhu Updated for changed API and added new tests	9 * 01/12/2000 Madhu Updated for changed API and added new tests

10 ************************************************************************/	10 ************************************************************************/

(...skipping 17 matching lines...) Expand all Loading...
28 #include "unicode/utext.h"	28 #include "unicode/utext.h"

29 #include "intltest.h"	29 #include "intltest.h"

30 #include "rbbitst.h"	30 #include "rbbitst.h"

31 #include <string.h>	31 #include <string.h>

32 #include "uvector.h"	32 #include "uvector.h"

33 #include "uvectr32.h"	33 #include "uvectr32.h"

34 #include "triedict.h"	34 #include "triedict.h"

35 #include <string.h>	35 #include <string.h>

36 #include <stdio.h>	36 #include <stdio.h>

37 #include <stdlib.h>	37 #include <stdlib.h>

	38 #include "unicode/numfmt.h"

	39 #include "unicode/uscript.h"

38	40

39 #define TEST_ASSERT(x) {if (!(x)) { \	41 #define TEST_ASSERT(x) {if (!(x)) { \

40 errln("Failure in file %s, line %d", __FILE__, __LINE__);}}	42 errln("Failure in file %s, line %d", __FILE__, __LINE__);}}

41	43

42 #define TEST_ASSERT_SUCCESS(errcode) { if (U_FAILURE(errcode)) { \	44 #define TEST_ASSERT_SUCCESS(errcode) { if (U_FAILURE(errcode)) { \

43 errcheckln(errcode, "Failure in file %s, line %d, status = \"%s\"", __FILE__ , __LINE__, u_errorName(errcode));}}	45 errcheckln(errcode, "Failure in file %s, line %d, status = \"%s\"", __FILE__ , __LINE__, u_errorName(errcode));}}

44	46

45	47

46 //---------------------------------------------	48 //---------------------------------------------

47 // runIndexedTest	49 // runIndexedTest

(...skipping 83 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
131 case 20: name = "TestTrieDict";	133 case 20: name = "TestTrieDict";

132 if(exec) TestTrieDict(); break;	134 if(exec) TestTrieDict(); break;

133	135

134 #if !UCONFIG_NO_FILE_IO	136 #if !UCONFIG_NO_FILE_IO

135 case 21: name = "TestBug5775";	137 case 21: name = "TestBug5775";

136 if (exec) TestBug5775(); break;	138 if (exec) TestBug5775(); break;

137 case 22: name = "TestThaiBreaks";	139 case 22: name = "TestThaiBreaks";

138 if (exec) TestThaiBreaks(); break;	140 if (exec) TestThaiBreaks(); break;

139 case 23: name = "TestTailoredBreaks";	141 case 23: name = "TestTailoredBreaks";

140 if (exec) TestTailoredBreaks(); break;	142 if (exec) TestTailoredBreaks(); break;

	143 case 24: name = "TestTrieDictWithValue";

	144 if(exec) TestTrieDictWithValue(); break;

141 #else	145 #else

142 case 21: case 22: case 23: name = "skip";	146 case 21: case 22: case 23: case 24: name = "skip";

143 break;	147 break;

144 #endif	148 #endif

145 case 24: name = "TestDictRules";	149 case 25: name = "TestDictRules";

146 if (exec) TestDictRules(); break;	150 if (exec) TestDictRules(); break;

147 case 25: name = "TestBug5532";	151 case 25: name = "TestBug5532";

148 if (exec) TestBug5532(); break;	152 if (exec) TestBug5532(); break;

149 default: name = ""; break; //needed to end loop	153 default: name = ""; break; //needed to end loop

150 }	154 }

151 }	155 }

152	156

153	157

154 //---------------------------------------------------------------------------	158 //---------------------------------------------------------------------------

155 //	159 //

(...skipping 444 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
600 startOfSecondWord = bi->following(0);	604 startOfSecondWord = bi->following(0);

601 if (startOfSecondWord != 4) {	605 if (startOfSecondWord != 4) {

602 errln("Fail at file %s, line %d expected start of word at 4, got %d",	606 errln("Fail at file %s, line %d expected start of word at 4, got %d",

603 __FILE__, __LINE__, startOfSecondWord);	607 __FILE__, __LINE__, startOfSecondWord);

604 }	608 }

605 delete bi;	609 delete bi;

606 }	610 }

607	611

608	612

609 void RBBITest::TestJapaneseWordBreak() {	613 void RBBITest::TestJapaneseWordBreak() {

	614 // TODO: Rewrite this test for a dictionary-based word breaking.

	615 #if 0

610 UErrorCode status = U_ZERO_ERROR;	616 UErrorCode status = U_ZERO_ERROR;

611 BITestData japaneseWordSelection(status);	617 BITestData japaneseWordSelection(status);

612	618

613 ADD_DATACHUNK(japaneseWordSelection, NULL, 0, status); // Break at start of data	619 ADD_DATACHUNK(japaneseWordSelection, NULL, 0, status); // Break at start of data

614 ADD_DATACHUNK(japaneseWordSelection, "\\u4ECA\\u65E5", 400, status); //2	620 ADD_DATACHUNK(japaneseWordSelection, "\\u4ECA\\u65E5", 400, status); //2

615 ADD_DATACHUNK(japaneseWordSelection, "\\u306F\\u3044\\u3044", 300, status); //5	621 ADD_DATACHUNK(japaneseWordSelection, "\\u306F\\u3044\\u3044", 300, status); //5

616 ADD_DATACHUNK(japaneseWordSelection, "\\u5929\\u6C17", 400, status); //7	622 ADD_DATACHUNK(japaneseWordSelection, "\\u5929\\u6C17", 400, status); //7

617 ADD_DATACHUNK(japaneseWordSelection, "\\u3067\\u3059\\u306D", 300, status); //10	623 ADD_DATACHUNK(japaneseWordSelection, "\\u3067\\u3059\\u306D", 300, status); //10

618 ADD_DATACHUNK(japaneseWordSelection, "\\u3002", 0, status); //11	624 ADD_DATACHUNK(japaneseWordSelection, "\\u3002", 0, status); //11

619 ADD_DATACHUNK(japaneseWordSelection, "\\u000D\\u000A", 0, status); //12	625 ADD_DATACHUNK(japaneseWordSelection, "\\u000D\\u000A", 0, status); //12

620	626

621 RuleBasedBreakIterator* e = (RuleBasedBreakIterator *)BreakIterator::createW ordInstance(	627 RuleBasedBreakIterator* e = (RuleBasedBreakIterator *)BreakIterator::createW ordInstance(

622 Locale("ja"), status);	628 Locale("ja"), status);

623 if (U_FAILURE(status))	629 if (U_FAILURE(status))

624 {	630 {

625 errcheckln(status, "Failed to create the BreakIterator for Japanese loca le in TestJapaneseWordBreak.\n");	631 errcheckln(status, "Failed to create the BreakIterator for Japanese loca le in TestJapaneseWordBreak.\n");

626 return;	632 return;

627 }	633 }

628	634

629 generalIteratorTest(*e, japaneseWordSelection);	635 generalIteratorTest(*e, japaneseWordSelection);

630 delete e;	636 delete e;

	637 #endif

631 }	638 }

632	639

633 void RBBITest::TestTrieDict() {	640 void RBBITest::TestTrieDict() {

634 UErrorCode status = U_ZERO_ERROR;	641 UErrorCode status = U_ZERO_ERROR;

635	642

636 //	643 //

637 // Open and read the test data file.	644 // Open and read the test data file.

638 //	645 //

639 const char *testDataDirectory = IntlTest::getSourceTestData(status);	646 const char *testDataDirectory = IntlTest::getSourceTestData(status);

640 char testFileName[1000];	647 char testFileName[1000];

(...skipping 201 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
842 delete compactDict;	849 delete compactDict;

843 delete mutableDict;	850 delete mutableDict;

844 delete breaks;	851 delete breaks;

845 delete[] testFile;	852 delete[] testFile;

846 delete enumer1;	853 delete enumer1;

847 delete mutable2;	854 delete mutable2;

848 delete cloneEnum;	855 delete cloneEnum;

849 delete compact2;	856 delete compact2;

850 }	857 }

851	858

	859 /TODO: delete later/

	860 inline void writeEnumerationToFile(StringEnumeration enumer, char filename){

	861 UErrorCode status = U_ZERO_ERROR;

	862 FILE *outfile = fopen(filename,"w");

	863 UConverter *cvt = ucnv_open("UTF-8", &status);

	864 if (U_FAILURE(status))

	865 return;

	866 if(outfile != NULL){

	867 status = U_ZERO_ERROR;

	868 const UnicodeString *word = enumer->snext(status);

	869 while (word != NULL && U_SUCCESS(status)) {

	870 char u8word[500];

	871 status = U_ZERO_ERROR;

	872 ucnv_fromUChars(cvt, u8word, 500, word->getBuffer(), word->length(),

	873 &status);

	874 fprintf(outfile,"%s\n", u8word);

	875 status = U_ZERO_ERROR;

	876 word = enumer->snext(status);

	877 }

	878 fclose(outfile);

	879 }

	880 ucnv_close(cvt);

	881 }

	882

	883 // A very simple helper class to streamline the buffer handling in

	884 // TestTrieDictWithValue

	885 template<class T, size_t N>

	886 class AutoBuffer {

	887 public:

	888 AutoBuffer(size_t size) : buffer(stackBuffer) {

	889 if (size > N)

	890 buffer = new T[size];

	891 }

	892 ~AutoBuffer() {

	893 if (buffer != stackBuffer)

	894 delete [] buffer;

	895 }

	896 T* elems() {

	897 return buffer;

	898 }

	899 const T& operator[] (size_t i) const {

	900 return buffer[i];

	901 }

	902 T& operator[] (size_t i) {

	903 return buffer[i];

	904 }

	905 private:

	906 T stackBuffer[N];

	907 T* buffer;

	908 AutoBuffer();

	909 };

	910

	911 //----------------------------------------------------------------------------

	912 //

	913 // TestTrieDictWithValue Test trie dictionaries with logprob values and

	914 // more than 2^16 nodes after compaction.

	915 //

	916 //----------------------------------------------------------------------------

	917 void RBBITest::TestTrieDictWithValue() {

	918 UErrorCode status = U_ZERO_ERROR;

	919

	920 //

	921 // Open and read the test data file.

	922 //

	923 const char *testDataDirectory = IntlTest::getSourceTestData(status);

	924 const char *filename = "cjdict-truncated.txt";

	925 char testFileName[1000];

	926 if (testDataDirectory == NULL \|\| strlen(testDataDirectory) + strlen(filename ) + 10 >= sizeof(testFileName)) {

	927 errln("Can't open test data. Path too long.");

	928 return;

	929 }

	930 strcpy(testFileName, testDataDirectory);

	931 strcat(testFileName, filename);

	932

	933 // Items needing deleting at the end

	934 MutableTrieDictionary *mutableDict = NULL;

	935 CompactTrieDictionary *compactDict = NULL;

	936 UnicodeSet *breaks = NULL;

	937 UChar *testFile = NULL;

	938 StringEnumeration *enumer1 = NULL;

	939 StringEnumeration *enumer2 = NULL;

	940 MutableTrieDictionary *mutable2 = NULL;

	941 StringEnumeration *cloneEnum = NULL;

	942 CompactTrieDictionary *compact2 = NULL;

	943 NumberFormat *nf = NULL;

	944 UText originalText = NULL, cloneText = NULL;

	945

	946 const UnicodeString *originalWord = NULL;

	947 const UnicodeString *cloneWord = NULL;

	948 UChar *current;

	949 UChar *word;

	950 UChar uc;

	951 int32_t wordLen;

	952 int32_t wordCount;

	953 int32_t testCount;

	954 int32_t valueLen;

	955 int counter = 0;

	956

	957 int len;

	958 testFile = ReadAndConvertFile(testFileName, len, NULL, status);

	959 if (U_FAILURE(status)) {

	960 goto cleanup; /* something went wrong, error already output */

	961 }

	962

	963 mutableDict = new MutableTrieDictionary(0x0E1C, status, TRUE);

	964 if (U_FAILURE(status)) {

	965 errln("Error creating MutableTrieDictionary: %s\n", u_errorName(status)) ;

	966 goto cleanup;

	967 }

	968

	969 breaks = new UnicodeSet;

	970 breaks->add(0x000A); // Line Feed

	971 breaks->add(0x000D); // Carriage Return

	972 breaks->add(0x2028); // Line Separator

	973 breaks->add(0x2029); // Paragraph Separator

	974 breaks->add(0x0009); // Tab character

	975

	976 // Now add each non-comment line of the file as a word.

	977 current = testFile;

	978 word = current;

	979 uc = *current++;

	980 wordLen = 0;

	981 wordCount = 0;

	982 nf = NumberFormat::createInstance(status);

	983

	984 while (uc) {

	985 UnicodeString ucharValue;

	986 valueLen = 0;

	987

	988 if (uc == 0x0023) { // #comment line, skip

	989 while (uc && !breaks->contains(uc)) {

	990 uc = *current++;

	991 }

	992 }

	993 else{

	994 while (uc && !breaks->contains(uc)) {

	995 ++wordLen;

	996 uc = *current++;

	997 }

	998 if(uc == 0x0009){ //separator is a tab char, read in num after tab

	999 uc = *current++;

	1000 while (uc && !breaks->contains(uc)) {

	1001 ucharValue.append(uc);

	1002 uc = *current++;

	1003 }

	1004 }

	1005 }

	1006 if (wordLen > 0) {

	1007 Formattable value((int32_t)0);

	1008 nf->parse(ucharValue.getTerminatedBuffer(), value, status);

	1009

	1010 if(U_FAILURE(status)){

	1011 errln("parsing of value failed when reading in dictionary\n");

	1012 goto cleanup;

	1013 }

	1014 mutableDict->addWord(word, wordLen, status, value.getLong());

	1015 if (U_FAILURE(status)) {

	1016 errln("Could not add word to mutable dictionary; status %s\n", u _errorName(status));

	1017 goto cleanup;

	1018 }

	1019 wordCount += 1;

	1020 }

	1021

	1022 // Find beginning of next line

	1023 while (uc && breaks->contains(uc)) {

	1024 uc = *current++;

	1025 }

	1026 word = current-1;

	1027 wordLen = 0;

	1028 }

	1029

	1030 if (wordCount < 50) {

	1031 errln("Word count (%d) unreasonably small\n", wordCount);

	1032 goto cleanup;

	1033 }

	1034

	1035 enumer1 = mutableDict->openWords(status);

	1036 if (U_FAILURE(status)) {

	1037 errln("Could not open mutable dictionary enumerator: %s\n", u_errorName( status));

	1038 goto cleanup;

	1039 }

	1040

	1041 testCount = 0;

	1042 if (wordCount != (testCount = enumer1->count(status))) {

	1043 errln("MutableTrieDictionary word count (%d) differs from file word coun t (%d), with status %s\n",

	1044 testCount, wordCount, u_errorName(status));

	1045 goto cleanup;

	1046 }

	1047

	1048 // Now compact it

	1049 compactDict = new CompactTrieDictionary(*mutableDict, status);

	1050 if (U_FAILURE(status)) {

	1051 errln("Failed to create CompactTrieDictionary: %s\n", u_errorName(status ));

	1052 goto cleanup;

	1053 }

	1054

	1055 enumer2 = compactDict->openWords(status);

	1056 if (U_FAILURE(status)) {

	1057 errln("Could not open compact trie dictionary enumerator: %s\n", u_error Name(status));

	1058 goto cleanup;

	1059 }

	1060

	1061

	1062 //delete later

	1063 // writeEnumerationToFile(enumer1, "/home/jchye/mutable.txt");

	1064 // writeEnumerationToFile(enumer2, "/home/jchye/compact.txt");

	1065

	1066 enumer1->reset(status);

	1067 enumer2->reset(status);

	1068

	1069 originalWord = enumer1->snext(status);

	1070 cloneWord = enumer2->snext(status);

	1071 while (U_SUCCESS(status) && originalWord != NULL && cloneWord != NULL) {

	1072 if (originalWord != cloneWord) {

	1073 errln("MutableTrieDictionary and CompactTrieDictionary word mismatch at %d, lengths are %d and %d\n",

	1074 counter, originalWord->length(), cloneWord->length());

	1075 goto cleanup;

	1076 }

	1077

	1078 // check if attached values of the same word in both dictionaries tally

	1079 #if 0

	1080 int32_t lengths1[originalWord->length()], lengths2[cloneWord->length()];

	1081 uint16_t values1[originalWord->length()], values2[cloneWord->length()];

	1082 #endif

	1083 AutoBuffer<int32_t, 20> lengths1(originalWord->length());

	1084 AutoBuffer<int32_t, 20> lengths2(cloneWord->length());

	1085 AutoBuffer<uint16_t, 20> values1(originalWord->length());

	1086 AutoBuffer<uint16_t, 20> values2(cloneWord->length());

	1087

	1088 originalText = utext_openConstUnicodeString(originalText, originalWord, &status);

	1089 cloneText = utext_openConstUnicodeString(cloneText, cloneWord, &status);

	1090

	1091 int count1, count2;

	1092 mutableDict->matches(originalText, originalWord->length(), lengths1.elem s(), count1, originalWord->length(), values1.elems());

	1093 compactDict->matches(cloneText, cloneWord->length(), lengths2.elems(), c ount2, cloneWord->length(), values2.elems());

	1094

	1095 if(values1[count1-1] != values2[count2-1]){

	1096 errln("Values of word %d in MutableTrieDictionary and CompactTrieDic tionary do not match, with values %d and %d\n",

	1097 counter, values1[count1-1], values2[count2-1]);

	1098 goto cleanup;

	1099 }

	1100

	1101 counter++;

	1102 originalWord = enumer1->snext(status);

	1103 cloneWord = enumer2->snext(status);

	1104 }

	1105 if (enumer1->getDynamicClassID() == enumer2->getDynamicClassID()) {

	1106 errln("CompactTrieEnumeration and MutableTrieEnumeration ClassIDs are th e same");

	1107 }

	1108

	1109 delete enumer1;

	1110 enumer1 = NULL;

	1111 delete enumer2;

	1112 enumer2 = NULL;

	1113

	1114 // Now un-compact it

	1115 mutable2 = compactDict->cloneMutable(status);

	1116 if (U_FAILURE(status)) {

	1117 errln("Could not clone CompactTrieDictionary to MutableTrieDictionary: % s\n", u_errorName(status));

	1118 goto cleanup;

	1119 }

	1120

	1121 cloneEnum = mutable2->openWords(status);

	1122 if (U_FAILURE(status)) {

	1123 errln("Could not create cloned mutable enumerator: %s\n", u_errorName(st atus));

	1124 goto cleanup;

	1125 }

	1126

	1127 if (wordCount != (testCount = cloneEnum->count(status))) {

	1128 errln("Cloned MutableTrieDictionary word count (%d) differs from file wo rd count (%d), with status %s\n",

	1129 testCount, wordCount, u_errorName(status));

	1130 goto cleanup;

	1131 }

	1132

	1133 // Compact original dictionary to clone. Note that we can only compare the s ame kind of

	1134 // dictionary as the order of the enumerators is not guaranteed to be the sa me between

	1135 // different kinds

	1136 enumer1 = mutableDict->openWords(status);

	1137 if (U_FAILURE(status)) {

	1138 errln("Could not re-open mutable dictionary enumerator: %s\n", u_errorNa me(status));

	1139 goto cleanup;

	1140 }

	1141

	1142 counter = 0;

	1143 originalWord = enumer1->snext(status);

	1144 cloneWord = cloneEnum->snext(status);

	1145 while (U_SUCCESS(status) && originalWord != NULL && cloneWord != NULL) {

	1146 if (originalWord != cloneWord) {

	1147 errln("Original and cloned MutableTrieDictionary word mismatch\n");

	1148 goto cleanup;

	1149 }

	1150

	1151 // check if attached values of the same word in both dictionaries tally

	1152 AutoBuffer<int32_t, 20> lengths1(originalWord->length());

	1153 AutoBuffer<int32_t, 20> lengths2(cloneWord->length());

	1154 AutoBuffer<uint16_t, 20> values1(originalWord->length());

	1155 AutoBuffer<uint16_t, 20> values2(cloneWord->length());

	1156 originalText = utext_openConstUnicodeString(originalText, originalWord, &status);

	1157 cloneText = utext_openConstUnicodeString(cloneText, cloneWord, &status);

	1158

	1159 int count1, count2;

	1160 mutableDict->matches(originalText, originalWord->length(), lengths1.elem s(), count1, originalWord->length(), values1.elems());

	1161 mutable2->matches(cloneText, cloneWord->length(), lengths2.elems(), coun t2, cloneWord->length(), values2.elems());

	1162

	1163 if(values1[count1-1] != values2[count2-1]){

	1164 errln("Values of word %d in original and cloned MutableTrieDictionar y do not match, with values %d and %d\n",

	1165 counter, values1[count1-1], values2[count2-1]);

	1166 goto cleanup;

	1167 }

	1168

	1169 counter++;

	1170

	1171 originalWord = enumer1->snext(status);

	1172 cloneWord = cloneEnum->snext(status);

	1173 }

	1174

	1175 if (U_FAILURE(status)) {

	1176 errln("Enumeration failed: %s\n", u_errorName(status));

	1177 goto cleanup;

	1178 }

	1179

	1180 if (originalWord != cloneWord) {

	1181 errln("Original and cloned MutableTrieDictionary ended enumeration at di fferent points\n");

	1182 goto cleanup;

	1183 }

	1184

	1185 // Test the data copying constructor for CompactTrieDict, and the data acces s APIs.

	1186 compact2 = new CompactTrieDictionary(compactDict->data(), status);

	1187 if (U_FAILURE(status)) {

	1188 errln("CompactTrieDictionary(const void *,...) failed\n");

	1189 goto cleanup;

	1190 }

	1191

	1192 if (compact2->dataSize() == 0) {

	1193 errln("CompactTrieDictionary->dataSize() == 0\n");

	1194 goto cleanup;

	1195 }

	1196

	1197 // Now count the words via the second dictionary

	1198 delete enumer1;

	1199 enumer1 = compact2->openWords(status);

	1200 if (U_FAILURE(status)) {

	1201 errln("Could not open compact trie dictionary 2 enumerator: %s\n", u_err orName(status));

	1202 goto cleanup;

	1203 }

	1204

	1205 if (wordCount != (testCount = enumer1->count(status))) {

	1206 errln("CompactTrieDictionary 2 word count (%d) differs from file word co unt (%d), with status %s\n",

	1207 testCount, wordCount, u_errorName(status));

	1208 goto cleanup;

	1209 }

	1210

	1211 cleanup:

	1212 delete compactDict;

	1213 delete mutableDict;

	1214 delete breaks;

	1215 delete[] testFile;

	1216 delete enumer1;

	1217 delete mutable2;

	1218 delete cloneEnum;

	1219 delete compact2;

	1220 utext_close(originalText);

	1221 utext_close(cloneText);

	1222

	1223

	1224 }

852	1225

853 //----------------------------------------------------------------------------	1226 //----------------------------------------------------------------------------

854 //	1227 //

855 // generalIteratorTest Given a break iterator and a set of test data,	1228 // generalIteratorTest Given a break iterator and a set of test data,

856 // Run the tests and report the results.	1229 // Run the tests and report the results.

857 //	1230 //

858 //----------------------------------------------------------------------------	1231 //----------------------------------------------------------------------------

859 void RBBITest::generalIteratorTest(RuleBasedBreakIterator& bi, BITestData &td)	1232 void RBBITest::generalIteratorTest(RuleBasedBreakIterator& bi, BITestData &td)

860 {	1233 {

861	1234

(...skipping 1001 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1863 // UBreakIteratorType UBRK_WORD, Locale "en_US_POSIX"	2236 // UBreakIteratorType UBRK_WORD, Locale "en_US_POSIX"

1864 // Words don't include colon or period (cldrbug #1969).	2237 // Words don't include colon or period (cldrbug #1969).

1865 static const char posxWordText[] = "Can't have breaks in xx:yy or struct. field for CS-types.";	2238 static const char posxWordText[] = "Can't have breaks in xx:yy or struct. field for CS-types.";

1866 static const int32_t posxWordTOffsets[] = { 5, 6, 10, 11, 17, 18, 20, 21, 23, 24 , 26, 27, 29, 30, 36, 37, 42, 43, 46, 47, 49, 50, 55, 56 };	2239 static const int32_t posxWordTOffsets[] = { 5, 6, 10, 11, 17, 18, 20, 21, 23, 24 , 26, 27, 29, 30, 36, 37, 42, 43, 46, 47, 49, 50, 55, 56 };

1867 static const int32_t posxWordROffsets[] = { 5, 6, 10, 11, 17, 18, 20, 21, 26, 27, 29, 30, 42, 43, 46, 47, 49, 50, 55, 56 };	2240 static const int32_t posxWordROffsets[] = { 5, 6, 10, 11, 17, 18, 20, 21, 26, 27, 29, 30, 42, 43, 46, 47, 49, 50, 55, 56 };

1868	2241

1869 // UBreakIteratorType UBRK_WORD, Locale "ja"	2242 // UBreakIteratorType UBRK_WORD, Locale "ja"

1870 // Don't break in runs of hiragana or runs of ideograph, where the latter includ es \u3005 \u3007 \u303B (cldrbug #2009).	2243 // Don't break in runs of hiragana or runs of ideograph, where the latter includ es \u3005 \u3007 \u303B (cldrbug #2009).

1871 static const char jaWordText[] = "\\u79C1\\u9054\\u306B\\u4E00\\u3007\\u3 007\\u3007\\u306E\\u30B3\\u30F3\\u30D4\\u30E5\\u30FC\\u30BF"	2244 static const char jaWordText[] = "\\u79C1\\u9054\\u306B\\u4E00\\u3007\\u3 007\\u3007\\u306E\\u30B3\\u30F3\\u30D4\\u30E5\\u30FC\\u30BF"

1872 "\\u304C\\u3042\\u308B\\u3002\\u5948\\u3 005\\u306F\\u30EF\\u30FC\\u30C9\\u3067\\u3042\\u308B\\u3002";	2245 "\\u304C\\u3042\\u308B\\u3002\\u5948\\u3 005\\u306F\\u30EF\\u30FC\\u30C9\\u3067\\u3042\\u308B\\u3002";

	2246 #if 0

1873 static const int32_t jaWordTOffsets[] = { 2, 3, 7, 8, 14, 17 , 18, 20, 21, 24, 27, 28 };	2247 static const int32_t jaWordTOffsets[] = { 2, 3, 7, 8, 14, 17 , 18, 20, 21, 24, 27, 28 };

1874 static const int32_t jaWordROffsets[] = { 1, 2, 3, 4, 5, 6, 7, 8, 14, 15, 16, 17 , 18, 19, 20, 21, 24, 25, 26, 27, 28 };	2248 static const int32_t jaWordROffsets[] = { 1, 2, 3, 4, 5, 6, 7, 8, 14, 15, 16, 17 , 18, 19, 20, 21, 24, 25, 26, 27, 28 };

	2249 #endif

	2250 // There's no separate Japanese word break iterator. Root is the same as Japanes e.

	2251 // Our dictionary-based iterator has to be tweaked to better handle U+3005,

	2252 // U+3007, U+300B and some other cases.

	2253 static const int32_t jaWordTOffsets[] = { 1, 2, 3, 4, 5, 7, 8, 12, 13, 14, 15 , 17, 18, 20, 21, 22, 23, 24, 25, 27, 28 };

	2254 static const int32_t jaWordROffsets[] = { 1, 2, 3, 4, 5, 7, 8, 12, 13, 14, 15 , 17, 18, 20, 21, 22, 23, 24, 25, 27, 28 };

1875	2255

1876 // UBreakIteratorType UBRK_SENTENCE, Locale "el"	2256 // UBreakIteratorType UBRK_SENTENCE, Locale "el"

1877 // Add break after Greek question mark (cldrbug #2069).	2257 // Add break after Greek question mark (cldrbug #2069).

1878 static const char elSentText[] = "\\u0391\\u03B2, \\u03B3\\u03B4; \\u0395 \\u03B6\\u03B7\\u037E \\u0398 \\u03B9\\u03BA. "	2258 static const char elSentText[] = "\\u0391\\u03B2, \\u03B3\\u03B4; \\u0395 \\u03B6\\u03B7\\u037E \\u0398 \\u03B9\\u03BA. "

1879 "\\u039B\\u03BC \\u03BD\\u03BE! \\u039F\ \u03C0, \\u03A1\\u03C2? \\u03A3";	2259 "\\u039B\\u03BC \\u03BD\\u03BE! \\u039F\ \u03C0, \\u03A1\\u03C2? \\u03A3";

1880 static const int32_t elSentTOffsets[] = { 8, 14, 20, 27, 35, 36 };	2260 static const int32_t elSentTOffsets[] = { 8, 14, 20, 27, 35, 36 };

1881 static const int32_t elSentROffsets[] = { 20, 27, 35, 36 };	2261 static const int32_t elSentROffsets[] = { 20, 27, 35, 36 };

1882	2262

1883 // UBreakIteratorType UBRK_CHARACTER, Locale "th"	2263 // UBreakIteratorType UBRK_CHARACTER, Locale "th"

1884 // Clusters should not include spacing Thai/Lao vowels (prefix or postfix), exce pt for [SARA] AM (cldrbug #2161).	2264 // Clusters should not include spacing Thai/Lao vowels (prefix or postfix), exce pt for [SARA] AM (cldrbug #2161).

(...skipping 780 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2665 virtual void setText(const UnicodeString &s);	3045 virtual void setText(const UnicodeString &s);

2666 virtual int32_t next(int32_t i);	3046 virtual int32_t next(int32_t i);

2667 private:	3047 private:

2668 UVector *fSets;	3048 UVector *fSets;

2669	3049

2670 UnicodeSet *fCRSet;	3050 UnicodeSet *fCRSet;

2671 UnicodeSet *fLFSet;	3051 UnicodeSet *fLFSet;

2672 UnicodeSet *fNewlineSet;	3052 UnicodeSet *fNewlineSet;

2673 UnicodeSet *fKatakanaSet;	3053 UnicodeSet *fKatakanaSet;

2674 UnicodeSet *fALetterSet;	3054 UnicodeSet *fALetterSet;

	3055 // TODO(jungshik): Do we still need this change?

	3056 // UnicodeSet *fALetterSet; // matches ALetterPlus in word.txt

2675 UnicodeSet *fMidNumLetSet;	3057 UnicodeSet *fMidNumLetSet;

2676 UnicodeSet *fMidLetterSet;	3058 UnicodeSet *fMidLetterSet;

2677 UnicodeSet *fMidNumSet;	3059 UnicodeSet *fMidNumSet;

2678 UnicodeSet *fNumericSet;	3060 UnicodeSet *fNumericSet;

2679 UnicodeSet *fFormatSet;	3061 UnicodeSet *fFormatSet;

2680 UnicodeSet *fOtherSet;	3062 UnicodeSet *fOtherSet;

2681 UnicodeSet *fExtendSet;	3063 UnicodeSet *fExtendSet;

2682 UnicodeSet *fExtendNumLetSet;	3064 UnicodeSet *fExtendNumLetSet;

	3065 UnicodeSet *fDictionaryCjkSet;

2683	3066

2684 RegexMatcher *fMatcher;	3067 RegexMatcher *fMatcher;

2685	3068

2686 const UnicodeString *fText;	3069 const UnicodeString *fText;

2687 };	3070 };

2688	3071

2689	3072

2690 RBBIWordMonkey::RBBIWordMonkey()	3073 RBBIWordMonkey::RBBIWordMonkey()

2691 {	3074 {

2692 UErrorCode status = U_ZERO_ERROR;	3075 UErrorCode status = U_ZERO_ERROR;

2693	3076

2694 fSets = new UVector(status);	3077 fSets = new UVector(status);

2695	3078

2696 fCRSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = C R}]"), status);	3079 fCRSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = C R}]"), status);

2697 fLFSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = L F}]"), status);	3080 fLFSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = L F}]"), status);

2698 fNewlineSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = N ewline}]"), status);	3081 fNewlineSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = N ewline}]"), status);

2699 fALetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = A Letter}]"), status);	3082 fDictionaryCjkSet= new UnicodeSet("[[\\uac00-\\ud7a3][:Han:][:Hiragana:]]", status);

	3083 // Exclude Hangul syllables from ALetterSet during testing.

	3084 // Leave CJK dictionary characters out from the monkey tests!

	3085 #if 0

	3086 fALetterSet = new UnicodeSet("[\\p{Word_Break = ALetter}"

	3087 "[\\p{Line_Break = Complex_Context}"

	3088 "-\\p{Grapheme_Cluster_Break = Extend}"

	3089 "-\\p{Grapheme_Cluster_Break = Control}"

	3090 "]]",

	3091 status);

	3092 #endif

	3093 fALetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = A Letter}]"), status);

	3094 fALetterSet->removeAll(*fDictionaryCjkSet);

2700 fKatakanaSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = K atakana}]"), status);	3095 fKatakanaSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = K atakana}]"), status);

2701 fMidNumLetSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = M idNumLet}]"), status);	3096 fMidNumLetSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = M idNumLet}]"), status);

2702 fMidLetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = M idLetter}]"), status);	3097 fMidLetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = M idLetter}]"), status);

2703 fMidNumSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = M idNum}]"), status);	3098 fMidNumSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = M idNum}]"), status);

2704 fNumericSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = N umeric}]"), status);	3099 fNumericSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = N umeric}[\\uff10-\\uff19]]"), status);

2705 fFormatSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = F ormat}]"), status);	3100 fFormatSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = F ormat}]"), status);

2706 fExtendNumLetSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = E xtendNumLet}]"), status);	3101 fExtendNumLetSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = E xtendNumLet}]"), status);

2707 fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = E xtend}]"), status);	3102 fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = E xtend}]"), status);

2708	3103

2709 fOtherSet = new UnicodeSet();	3104 fOtherSet = new UnicodeSet();

2710 if(U_FAILURE(status)) {	3105 if(U_FAILURE(status)) {

2711 deferredStatus = status;	3106 deferredStatus = status;

2712 return;	3107 return;

2713 }	3108 }

2714	3109

2715 fOtherSet->complement();	3110 fOtherSet->complement();

2716 fOtherSet->removeAll(*fCRSet);	3111 fOtherSet->removeAll(*fCRSet);

2717 fOtherSet->removeAll(*fLFSet);	3112 fOtherSet->removeAll(*fLFSet);

2718 fOtherSet->removeAll(*fNewlineSet);	3113 fOtherSet->removeAll(*fNewlineSet);

2719 fOtherSet->removeAll(*fKatakanaSet);	3114 fOtherSet->removeAll(*fKatakanaSet);

2720 fOtherSet->removeAll(*fALetterSet);	3115 fOtherSet->removeAll(*fALetterSet);

2721 fOtherSet->removeAll(*fMidLetterSet);	3116 fOtherSet->removeAll(*fMidLetterSet);

2722 fOtherSet->removeAll(*fMidNumSet);	3117 fOtherSet->removeAll(*fMidNumSet);

2723 fOtherSet->removeAll(*fNumericSet);	3118 fOtherSet->removeAll(*fNumericSet);

2724 fOtherSet->removeAll(*fExtendNumLetSet);	3119 fOtherSet->removeAll(*fExtendNumLetSet);

2725 fOtherSet->removeAll(*fFormatSet);	3120 fOtherSet->removeAll(*fFormatSet);

2726 fOtherSet->removeAll(*fExtendSet);	3121 fOtherSet->removeAll(*fExtendSet);

2727 // Inhibit dictionary characters from being tested at all.	3122 // Inhibit dictionary characters from being tested at all.

	3123 fOtherSet->removeAll(*fDictionaryCjkSet);

2728 fOtherSet->removeAll(UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{LineBreak = Comp lex_Context}]"), status));	3124 fOtherSet->removeAll(UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{LineBreak = Comp lex_Context}]"), status));

2729	3125

2730 fSets->addElement(fCRSet, status);	3126 fSets->addElement(fCRSet, status);

2731 fSets->addElement(fLFSet, status);	3127 fSets->addElement(fLFSet, status);

2732 fSets->addElement(fNewlineSet, status);	3128 fSets->addElement(fNewlineSet, status);

2733 fSets->addElement(fALetterSet, status);	3129 fSets->addElement(fALetterSet, status);

2734 fSets->addElement(fKatakanaSet, status);	3130 //fSets->addElement(fKatakanaSet, status); //TODO: work out how to test kat akana

2735 fSets->addElement(fMidLetterSet, status);	3131 fSets->addElement(fMidLetterSet, status);

2736 fSets->addElement(fMidNumLetSet, status);	3132 fSets->addElement(fMidNumLetSet, status);

2737 fSets->addElement(fMidNumSet, status);	3133 fSets->addElement(fMidNumSet, status);

2738 fSets->addElement(fNumericSet, status);	3134 fSets->addElement(fNumericSet, status);

2739 fSets->addElement(fFormatSet, status);	3135 fSets->addElement(fFormatSet, status);

2740 fSets->addElement(fExtendSet, status);	3136 fSets->addElement(fExtendSet, status);

2741 fSets->addElement(fOtherSet, status);	3137 fSets->addElement(fOtherSet, status);

2742 fSets->addElement(fExtendNumLetSet, status);	3138 fSets->addElement(fExtendNumLetSet, status);

2743	3139

2744 if (U_FAILURE(status)) {	3140 if (U_FAILURE(status)) {

(...skipping 1226 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3971 printStringBreaks(ustr, expected, expectedcount);	4367 printStringBreaks(ustr, expected, expectedcount);

3972 test->errln("isBoundary() failed. Not expecting boundary at pos ition %d", j);	4368 test->errln("isBoundary() failed. Not expecting boundary at pos ition %d", j);

3973 return;	4369 return;

3974 }	4370 }

3975 }	4371 }

3976 }	4372 }

3977	4373

3978 for (i = bi->last(); i != BreakIterator::DONE; i = bi->previous()) {	4374 for (i = bi->last(); i != BreakIterator::DONE; i = bi->previous()) {

3979 count --;	4375 count --;

3980 if (forward[count] != i) {	4376 if (forward[count] != i) {

	4377 printStringBreaks(ustr, expected, expectedcount);

3981 test->errln("happy break test previous() failed: expected %d but got %d",	4378 test->errln("happy break test previous() failed: expected %d but got %d",

3982 forward[count], i);	4379 forward[count], i);

3983 break;	4380 break;

3984 }	4381 }

3985 }	4382 }

3986 if (count != 0) {	4383 if (count != 0) {

3987 printStringBreaks(ustr, expected, expectedcount);	4384 printStringBreaks(ustr, expected, expectedcount);

3988 test->errln("break test previous() failed: missed a match");	4385 test->errln("break test previous() failed: missed a match");

3989 return;	4386 return;

3990 }	4387 }

(...skipping 13 matching lines...) Expand all Loading...
4004 }	4401 }

4005	4402

4006 void RBBITest::TestWordBreaks(void)	4403 void RBBITest::TestWordBreaks(void)

4007 {	4404 {

4008 #if !UCONFIG_NO_REGULAR_EXPRESSIONS	4405 #if !UCONFIG_NO_REGULAR_EXPRESSIONS

4009	4406

4010 Locale locale("en");	4407 Locale locale("en");

4011 UErrorCode status = U_ZERO_ERROR;	4408 UErrorCode status = U_ZERO_ERROR;

4012 // BreakIterator *bi = BreakIterator::createCharacterInstance(locale, statu s);	4409 // BreakIterator *bi = BreakIterator::createCharacterInstance(locale, statu s);

4013 BreakIterator *bi = BreakIterator::createWordInstance(locale, status);	4410 BreakIterator *bi = BreakIterator::createWordInstance(locale, status);

	4411 // Replaced any C+J characters in a row with a random sequence of characters

	4412 // of the same length to make our C+J segmentation not get in the way.

4014 static const char *strlist[] =	4413 static const char *strlist[] =

4015 {	4414 {

4016 "\\U000e0032\\u0097\\u0f94\\uc2d8\\u05f4\\U000e0031\\u060d",	4415 "\\U000e0032\\u0097\\u0f94\\uc2d8\\u05f4\\U000e0031\\u060d",

4017 "\\U000e0037\\u4666\\u1202\\u003a\\U000e0031\\u064d\\u0bea\\u591c\\U000e0040 \\u003b",	4416 "\\U000e0037\\u2666\\u1202\\u003a\\U000e0031\\u064d\\u0bea\\u091c\\U000e0040 \\u003b",

4018 "\\u0589\\u3e99\\U0001d7f3\\U000e0074\\u1810\\u200e\\U000e004b\\u0027\\U000e 0061\\u003a",	4417 "\\u0589\\u3e99\\U0001d7f3\\U000e0074\\u1810\\u200e\\U000e004b\\u0027\\U000e 0061\\u003a",

4019 "\\u398c\\U000104a5\\U0001d173\\u102d\\u002e\\uca3b\\u002e\\u002c\\u5622",	4418 "\\u398c\\U000104a5\\U0001d173\\u102d\\u002e\\uca3b\\u002e\\u002c\\u5622",

4020 "\\u90ca\\u3588\\u009c\\u0953\\u194b",	4419 "\\uac00\\u3588\\u009c\\u0953\\u194b",

4021 "\\u200e\\U000e0072\\u0a4b\\U000e003f\\ufd2b\\u2027\\u002e\\u002e",	4420 "\\u200e\\U000e0072\\u0a4b\\U000e003f\\ufd2b\\u2027\\u002e\\u002e",

4022 "\\u0602\\u2019\\ua191\\U000e0063\\u0a4c\\u003a\\ub4b5\\u003a\\u827f\\u002e" ,	4421 "\\u0602\\u2019\\ua191\\U000e0063\\u0a4c\\u003a\\ub4b5\\u003a\\u827f\\u002e" ,

4023 "\\u7f1f\\uc634\\u65f8\\u0944\\u04f2\\uacdf\\u1f9c\\u05f4\\u002e",	4422 "\\u2f1f\\u1634\\u05f8\\u0944\\u04f2\\u0cdf\\u1f9c\\u05f4\\u002e",

4024 "\\U000e0042\\u002e\\u0fb8\\u09ef\\u0ed1\\u2044",	4423 "\\U000e0042\\u002e\\u0fb8\\u09ef\\u0ed1\\u2044",

4025 "\\u003b\\u024a\\u102e\\U000e0071\\u0600",	4424 "\\u003b\\u024a\\u102e\\U000e0071\\u0600",

4026 "\\u2027\\U000e0067\\u0a47\\u00b7",	4425 "\\u2027\\U000e0067\\u0a47\\u00b7",

4027 "\\u1fcd\\u002c\\u07aa\\u0027\\u11b0",	4426 "\\u1fcd\\u002c\\u07aa\\u0027\\u11b0",

4028 "\\u002c\\U000e003c\\U0001d7f4\\u003a\\u0c6f\\u0027",	4427 "\\u002c\\U000e003c\\U0001d7f4\\u003a\\u0c6f\\u0027",

4029 "\\u0589\\U000e006e\\u0a42\\U000104a5",	4428 "\\u0589\\U000e006e\\u0a42\\U000104a5",

4030 "\\u4f66\\ub523\\u003a\\uacae\\U000e0047\\u003a",	4429 "\\u0f66\\u2523\\u003a\\u0cae\\U000e0047\\u003a",

4031 "\\u003a\\u0f21\\u0668\\u0dab\\u003a\\u0655\\u00b7",	4430 "\\u003a\\u0f21\\u0668\\u0dab\\u003a\\u0655\\u00b7",

4032 "\\u0027\\u11af\\U000e0057\\u0602",	4431 "\\u0027\\u11af\\U000e0057\\u0602",

4033 "\\U0001d7f2\\U000e007\\u0004\\u0589",	4432 "\\U0001d7f2\\U000e007\\u0004\\u0589",

4034 "\\U000e0022\\u003a\\u10b3\\u003a\\ua21b\\u002e\\U000e0058\\u1732\\U000e002b ",	4433 "\\U000e0022\\u003a\\u10b3\\u003a\\ua21b\\u002e\\U000e0058\\u1732\\U000e002b ",

4035 "\\U0001d7f2\\U000e007d\\u0004\\u0589",	4434 "\\U0001d7f2\\U000e007d\\u0004\\u0589",

4036 "\\u82ab\\u17e8\\u0736\\u2019\\U0001d64d",	4435 "\\u82ab\\u17e8\\u0736\\u2019\\U0001d64d",

4037 "\\ub55c\\u0a68\\U000e0037\\u0cd6\\u002c\\ub959",	4436 "\\ub55c\\u0a68\\U000e0037\\u0cd6\\u002c\\ub959",

4038 "\\U000e0065\\u302c\\uc986\\u09ee\\U000e0068",	4437 "\\U000e0065\\u302c\\uc986\\u09ee\\U000e0068",

4039 "\\u0be8\\u002e\\u0c68\\u066e\\u136d\\ufc99\\u59e7",	4438 "\\u0be8\\u002e\\u0c68\\u066e\\u136d\\ufc99\\u59e7",

4040 "\\u0233\\U000e0020\\u0a69\\u0d6a",	4439 "\\u0233\\U000e0020\\u0a69\\u0d6a",

4041 "\\u206f\\u0741\\ub3ab\\u2019\\ubcac\\u2019",	4440 "\\u206f\\u0741\\ub3ab\\u2019\\ubcac\\u2019",

4042 "\\u58f4\\U000e0049\\u20e7\\u2027",	4441 "\\u18f4\\U000e0049\\u20e7\\u2027",

4043 "\\ub315\\U0001d7e5\\U000e0073\\u0c47\\u06f2\\u0c6a\\u0037\\u10fe",	4442 "\\ub315\\U0001d7e5\\U000e0073\\u0c47\\u06f2\\u0c6a\\u0037\\u10fe",

4044 "\\ua183\\u102d\\u0bec\\u003a",	4443 "\\ua183\\u102d\\u0bec\\u003a",

4045 "\\u17e8\\u06e7\\u002e\\u096d\\u003b",	4444 "\\u17e8\\u06e7\\u002e\\u096d\\u003b",

4046 "\\u003a\\u0e57\\u0fad\\u002e",	4445 "\\u003a\\u0e57\\u0fad\\u002e",

4047 "\\u002e\\U000e004c\\U0001d7ea\\u05bb\\ud0fd\\u02de",	4446 "\\u002e\\U000e004c\\U0001d7ea\\u05bb\\ud0fd\\u02de",

4048 "\\u32e6\\U0001d7f6\\u0fa1\\u206a\\U000e003c\\u0cec\\u003a",	4447 "\\u32e6\\U0001d7f6\\u0fa1\\u206a\\U000e003c\\u0cec\\u003a",

4049 "\\U000e005d\\u2044\\u0731\\u0650\\u0061",	4448 "\\U000e005d\\u2044\\u0731\\u0650\\u0061",

4050 "\\u003a\\u0664\\u00b7\\u1fba",	4449 "\\u003a\\u0664\\u00b7\\u1fba",

4051 "\\u003b\\u0027\\u00b7\\u47a3",	4450 "\\u003b\\u0027\\u00b7\\u47a3",

4052 "\\u2027\\U000e0067\\u0a42\\u00b7\\ubddf\\uc26c\\u003a\\u4186\\u041b",	4451 "\\u2027\\U000e0067\\u0a42\\u00b7\\u4edf\\uc26c\\u003a\\u4186\\u041b",

4053 "\\u0027\\u003a\\U0001d70f\\U0001d7df\\ubf4a\\U0001d7f5\\U0001d177\\u003a\\u 0e51\\u1058\\U000e0058\\u00b7\\u0673",	4452 "\\u0027\\u003a\\U0001d70f\\U0001d7df\\ubf4a\\U0001d7f5\\U0001d177\\u003a\\u 0e51\\u1058\\U000e0058\\u00b7\\u0673",

4054 "\\uc30d\\u002e\\U000e002c\\u0c48\\u003a\\ub5a1\\u0661\\u002c",	4453 "\\uc30d\\u002e\\U000e002c\\u0c48\\u003a\\ub5a1\\u0661\\u002c",

4055 };	4454 };

4056 int loop;	4455 int loop;

4057 if (U_FAILURE(status)) {	4456 if (U_FAILURE(status)) {

4058 errcheckln(status, "Creation of break iterator failed %s", u_errorName(s tatus));	4457 errcheckln(status, "Creation of break iterator failed %s", u_errorName(s tatus));

4059 return;	4458 return;

4060 }	4459 }

4061 for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) {	4460 for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) {

4062 // printf("looping %d\n", loop);	4461 // printf("looping %d\n", loop);

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4097 "\\u002c\\U000e003c\\U0001d7f4\\u003a\\u0c6f\\u0027",	4496 "\\u002c\\U000e003c\\U0001d7f4\\u003a\\u0c6f\\u0027",

4098 "\\u0589\\U000e006e\\u0a42\\U000104a5",	4497 "\\u0589\\U000e006e\\u0a42\\U000104a5",

4099 "\\u4f66\\ub523\\u003a\\uacae\\U000e0047\\u003a",	4498 "\\u4f66\\ub523\\u003a\\uacae\\U000e0047\\u003a",

4100 "\\u003a\\u0f21\\u0668\\u0dab\\u003a\\u0655\\u00b7",	4499 "\\u003a\\u0f21\\u0668\\u0dab\\u003a\\u0655\\u00b7",

4101 "\\u0027\\u11af\\U000e0057\\u0602",	4500 "\\u0027\\u11af\\U000e0057\\u0602",

4102 "\\U0001d7f2\\U000e007\\u0004\\u0589",	4501 "\\U0001d7f2\\U000e007\\u0004\\u0589",

4103 "\\U000e0022\\u003a\\u10b3\\u003a\\ua21b\\u002e\\U000e0058\\u1732\\U000e002b ",	4502 "\\U000e0022\\u003a\\u10b3\\u003a\\ua21b\\u002e\\U000e0058\\u1732\\U000e002b ",

4104 "\\U0001d7f2\\U000e007d\\u0004\\u0589",	4503 "\\U0001d7f2\\U000e007d\\u0004\\u0589",

4105 "\\u82ab\\u17e8\\u0736\\u2019\\U0001d64d",	4504 "\\u82ab\\u17e8\\u0736\\u2019\\U0001d64d",

4106 "\\u0e01\\ub55c\\u0a68\\U000e0037\\u0cd6\\u002c\\ub959",	4505 "\\u0e01\\ub55c\\u0a68\\U000e0037\\u0cd6\\u002c\\ub959",

4107 "\\U000e0065\\u302c\\uc986\\u09ee\\U000e0068",	4506 "\\U000e0065\\u302c\\u09ee\\U000e0068",

4108 "\\u0be8\\u002e\\u0c68\\u066e\\u136d\\ufc99\\u59e7",	4507 "\\u0be8\\u002e\\u0c68\\u066e\\u136d\\ufc99\\u59e7",

4109 "\\u0233\\U000e0020\\u0a69\\u0d6a",	4508 "\\u0233\\U000e0020\\u0a69\\u0d6a",

4110 "\\u206f\\u0741\\ub3ab\\u2019\\ubcac\\u2019",	4509 "\\u206f\\u0741\\ub3ab\\u2019\\ubcac\\u2019",

4111 "\\u58f4\\U000e0049\\u20e7\\u2027",	4510 "\\u58f4\\U000e0049\\u20e7\\u2027",

4112 "\\ub315\\U0001d7e5\\U000e0073\\u0c47\\u06f2\\u0c6a\\u0037\\u10fe",	4511 "\\U0001d7e5\\U000e0073\\u0c47\\u06f2\\u0c6a\\u0037\\u10fe",

4113 "\\ua183\\u102d\\u0bec\\u003a",	4512 "\\ua183\\u102d\\u0bec\\u003a",

4114 "\\u17e8\\u06e7\\u002e\\u096d\\u003b",	4513 "\\u17e8\\u06e7\\u002e\\u096d\\u003b",

4115 "\\u003a\\u0e57\\u0fad\\u002e",	4514 "\\u003a\\u0e57\\u0fad\\u002e",

4116 "\\u002e\\U000e004c\\U0001d7ea\\u05bb\\ud0fd\\u02de",	4515 "\\u002e\\U000e004c\\U0001d7ea\\u05bb\\ud0fd\\u02de",

4117 "\\u32e6\\U0001d7f6\\u0fa1\\u206a\\U000e003c\\u0cec\\u003a",	4516 "\\u32e6\\U0001d7f6\\u0fa1\\u206a\\U000e003c\\u0cec\\u003a",

4118 "\\ua2a5\\u0038\\u2044\\u002e\\u0c67\\U000e003c\\u05f4\\u2027\\u05f4\\u2019" ,	4517 "\\ua2a5\\u0038\\u2044\\u002e\\u0c67\\U000e003c\\u05f4\\u2027\\u05f4\\u2019" ,

4119 "\\u003a\\u0664\\u00b7\\u1fba",	4518 "\\u003a\\u0664\\u00b7\\u1fba",

4120 "\\u003b\\u0027\\u00b7\\u47a3",	4519 "\\u003b\\u0027\\u00b7\\u47a3",

4121 };	4520 };

4122 int loop;	4521 int loop;

(...skipping 662 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4785 pos = bi->last();	5184 pos = bi->last();

4786 do {	5185 do {

4787 // ruleStatus = bi->getRuleStatus();	5186 // ruleStatus = bi->getRuleStatus();

4788 printf("%d\t%d\n", pos, ruleStatus);	5187 printf("%d\t%d\n", pos, ruleStatus);

4789 pos = bi->previous();	5188 pos = bi->previous();

4790 } while (pos != BreakIterator::DONE);	5189 } while (pos != BreakIterator::DONE);

4791 #endif	5190 #endif

4792 }	5191 }

4793	5192

4794 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */	5193 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */

OLD	NEW

« no previous file with comments | « icu46/source/test/intltest/rbbitst.h ('k') | icu46/source/test/testdata/rbbitst.txt » ('j') | no next file with comments »