OLD | NEW |
1 /******************************************************************** | 1 /******************************************************************** |
2 * COPYRIGHT: | 2 * COPYRIGHT: |
3 * Copyright (c) 1997-2013, International Business Machines Corporation and | 3 * Copyright (c) 1997-2014, International Business Machines Corporation and |
4 * others. All Rights Reserved. | 4 * others. All Rights Reserved. |
5 ********************************************************************/ | 5 ********************************************************************/ |
6 /*******************************************************************************
* | 6 /*******************************************************************************
* |
7 * | 7 * |
8 * File CITERTST.C | 8 * File CITERTST.C |
9 * | 9 * |
10 * Modification History: | 10 * Modification History: |
11 * Date Name Description | 11 * Date Name Description |
12 * Madhu Katragadda Ported for C API | 12 * Madhu Katragadda Ported for C API |
13 * 02/19/01 synwee Modified test case for new collation iterator | 13 * 02/19/01 synwee Modified test case for new collation iterator |
(...skipping 14 matching lines...) Expand all Loading... |
28 #include "unicode/ustring.h" | 28 #include "unicode/ustring.h" |
29 #include "unicode/putil.h" | 29 #include "unicode/putil.h" |
30 #include "callcoll.h" | 30 #include "callcoll.h" |
31 #include "cmemory.h" | 31 #include "cmemory.h" |
32 #include "cintltst.h" | 32 #include "cintltst.h" |
33 #include "citertst.h" | 33 #include "citertst.h" |
34 #include "ccolltst.h" | 34 #include "ccolltst.h" |
35 #include "filestrm.h" | 35 #include "filestrm.h" |
36 #include "cstring.h" | 36 #include "cstring.h" |
37 #include "ucol_imp.h" | 37 #include "ucol_imp.h" |
38 #include "ucol_tok.h" | |
39 #include "uparse.h" | 38 #include "uparse.h" |
40 #include <stdio.h> | 39 #include <stdio.h> |
41 | 40 |
42 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *); | 41 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *); |
43 | 42 |
44 void addCollIterTest(TestNode** root) | 43 void addCollIterTest(TestNode** root) |
45 { | 44 { |
46 addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious"); | 45 addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious"); |
47 addTest(root, &TestOffset, "tscoll/citertst/TestOffset"); | 46 addTest(root, &TestOffset, "tscoll/citertst/TestOffset"); |
48 addTest(root, &TestSetText, "tscoll/citertst/TestSetText"); | 47 addTest(root, &TestSetText, "tscoll/citertst/TestSetText"); |
49 addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion"); | 48 addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion"); |
50 addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar"); | 49 addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar"); |
51 addTest(root, &TestNormalizedUnicodeChar, | 50 addTest(root, &TestNormalizedUnicodeChar, |
52 "tscoll/citertst/TestNormalizedUnicodeChar"); | 51 "tscoll/citertst/TestNormalizedUnicodeChar"); |
53 addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization"); | 52 addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization"); |
54 addTest(root, &TestBug672, "tscoll/citertst/TestBug672"); | 53 addTest(root, &TestBug672, "tscoll/citertst/TestBug672"); |
55 addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize"); | 54 addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize"); |
56 addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer"); | 55 addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer"); |
57 addTest(root, &TestCEs, "tscoll/citertst/TestCEs"); | |
58 addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos"); | 56 addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos"); |
59 addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow")
; | |
60 addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity"); | |
61 addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity"); | |
62 addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollat
orElements"); | 57 addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollat
orElements"); |
63 } | 58 } |
64 | 59 |
65 /* The locales we support */ | 60 /* The locales we support */ |
66 | 61 |
67 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"}; | 62 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"}; |
68 | 63 |
69 static void TestBug672() { | 64 static void TestBug672() { |
70 UErrorCode status = U_ZERO_ERROR; | 65 UErrorCode status = U_ZERO_ERROR; |
71 UChar pattern[20]; | 66 UChar pattern[20]; |
(...skipping 684 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
756 { | 751 { |
757 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status
)); | 752 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status
)); |
758 } | 753 } |
759 else | 754 else |
760 { | 755 { |
761 assertEqual(iter1, iter2); | 756 assertEqual(iter1, iter2); |
762 } | 757 } |
763 | 758 |
764 /* Now set it to point to a null string with fake length*/ | 759 /* Now set it to point to a null string with fake length*/ |
765 ucol_setText(iter2, NULL, 2, &status); | 760 ucol_setText(iter2, NULL, 2, &status); |
766 if (U_FAILURE(status)) | 761 if (status != U_ILLEGAL_ARGUMENT_ERROR) |
767 { | 762 { |
768 log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status)
); | 763 log_err("call to iter2->setText(null, 2) should yield an illegal-argumen
t-error - %s\n", |
769 } | 764 myErrorName(status)); |
770 else | |
771 { | |
772 if (ucol_next(iter2, &status) != UCOL_NULLORDER) { | |
773 log_err("iter2 with null text expected to return UCOL_NULLORDER\n"); | |
774 } | |
775 } | 765 } |
776 | 766 |
777 ucol_closeElements(iter2); | 767 ucol_closeElements(iter2); |
778 ucol_closeElements(iter1); | 768 ucol_closeElements(iter1); |
779 ucol_close(en_us); | 769 ucol_close(en_us); |
780 } | 770 } |
781 | 771 |
782 /** @bug 4108762 | 772 /** @bug 4108762 |
783 * Test for getMaxExpansion() | 773 * Test for getMaxExpansion() |
784 */ | 774 */ |
(...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1004 ucol_next(testiter, &status); | 994 ucol_next(testiter, &status); |
1005 ucol_closeElements(testiter); | 995 ucol_closeElements(testiter); |
1006 ucol_closeElements(iter); | 996 ucol_closeElements(iter); |
1007 ucol_close(coll); | 997 ucol_close(coll); |
1008 } else { | 998 } else { |
1009 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(statu
s)); | 999 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(statu
s)); |
1010 } | 1000 } |
1011 } | 1001 } |
1012 | 1002 |
1013 /** | 1003 /** |
1014 * Sniplets of code from genuca | |
1015 */ | |
1016 static int32_t hex2num(char hex) { | |
1017 if(hex>='0' && hex <='9') { | |
1018 return hex-'0'; | |
1019 } else if(hex>='a' && hex<='f') { | |
1020 return hex-'a'+10; | |
1021 } else if(hex>='A' && hex<='F') { | |
1022 return hex-'A'+10; | |
1023 } else { | |
1024 return 0; | |
1025 } | |
1026 } | |
1027 | |
1028 /** | |
1029 * Getting codepoints from a string | |
1030 * @param str character string contain codepoints seperated by space and ended | |
1031 * by a semicolon | |
1032 * @param codepoints array for storage, assuming size > 5 | |
1033 * @return position at the end of the codepoint section | |
1034 */ | |
1035 static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) { | |
1036 UErrorCode errorCode = U_ZERO_ERROR; | |
1037 char *semi = uprv_strchr(str, ';'); | |
1038 char *pipe = uprv_strchr(str, '|'); | |
1039 char *s; | |
1040 *codepoints = 0; | |
1041 *contextCPs = 0; | |
1042 if(semi == NULL) { | |
1043 log_err("expected semicolon after code point string in FractionalUCA.txt
%s\n", str); | |
1044 return str; | |
1045 } | |
1046 if(pipe != NULL) { | |
1047 int32_t contextLength; | |
1048 *pipe = 0; | |
1049 contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode); | |
1050 *pipe = '|'; | |
1051 if(U_FAILURE(errorCode)) { | |
1052 log_err("error parsing precontext string from FractionalUCA.txt %s\n
", str); | |
1053 return str; | |
1054 } | |
1055 /* prepend the precontext string to the codepoints */ | |
1056 u_memcpy(codepoints, contextCPs, contextLength); | |
1057 codepoints += contextLength; | |
1058 /* start of the code point string */ | |
1059 s = pipe + 1; | |
1060 } else { | |
1061 s = str; | |
1062 } | |
1063 u_parseString(s, codepoints, 99, NULL, &errorCode); | |
1064 if(U_FAILURE(errorCode)) { | |
1065 log_err("error parsing code point string from FractionalUCA.txt %s\n", s
tr); | |
1066 return str; | |
1067 } | |
1068 return semi + 1; | |
1069 } | |
1070 | |
1071 /** | |
1072 * Sniplets of code from genuca | |
1073 */ | |
1074 static int32_t | |
1075 readElement(char **from, char *to, char separator, UErrorCode *status) | |
1076 { | |
1077 if (U_SUCCESS(*status)) { | |
1078 char buffer[1024]; | |
1079 int32_t i = 0; | |
1080 while (**from != separator) { | |
1081 if (**from != ' ') { | |
1082 *(buffer+i++) = **from; | |
1083 } | |
1084 (*from)++; | |
1085 } | |
1086 (*from)++; | |
1087 *(buffer + i) = 0; | |
1088 strcpy(to, buffer); | |
1089 return i/2; | |
1090 } | |
1091 | |
1092 return 0; | |
1093 } | |
1094 | |
1095 /** | |
1096 * Sniplets of code from genuca | |
1097 */ | |
1098 static uint32_t | |
1099 getSingleCEValue(char *primary, char *secondary, char *tertiary, | |
1100 UErrorCode *status) | |
1101 { | |
1102 if (U_SUCCESS(*status)) { | |
1103 uint32_t value = 0; | |
1104 char primsave = '\0'; | |
1105 char secsave = '\0'; | |
1106 char tersave = '\0'; | |
1107 char *primend = primary+4; | |
1108 char *secend = secondary+2; | |
1109 char *terend = tertiary+2; | |
1110 uint32_t primvalue; | |
1111 uint32_t secvalue; | |
1112 uint32_t tervalue; | |
1113 | |
1114 if (uprv_strlen(primary) > 4) { | |
1115 primsave = *primend; | |
1116 *primend = '\0'; | |
1117 } | |
1118 | |
1119 if (uprv_strlen(secondary) > 2) { | |
1120 secsave = *secend; | |
1121 *secend = '\0'; | |
1122 } | |
1123 | |
1124 if (uprv_strlen(tertiary) > 2) { | |
1125 tersave = *terend; | |
1126 *terend = '\0'; | |
1127 } | |
1128 | |
1129 primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0; | |
1130 secvalue = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0; | |
1131 tervalue = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0; | |
1132 if(primvalue <= 0xFF) { | |
1133 primvalue <<= 8; | |
1134 } | |
1135 | |
1136 value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK) | |
1137 | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK) | |
1138 | (tervalue & UCOL_TERTIARYORDERMASK); | |
1139 | |
1140 if(primsave!='\0') { | |
1141 *primend = primsave; | |
1142 } | |
1143 if(secsave!='\0') { | |
1144 *secend = secsave; | |
1145 } | |
1146 if(tersave!='\0') { | |
1147 *terend = tersave; | |
1148 } | |
1149 return value; | |
1150 } | |
1151 return 0; | |
1152 } | |
1153 | |
1154 /** | |
1155 * Getting collation elements generated from a string | |
1156 * @param str character string contain collation elements contained in [] and | |
1157 * seperated by space | |
1158 * @param ce array for storage, assuming size > 20 | |
1159 * @param status error status | |
1160 * @return position at the end of the codepoint section | |
1161 */ | |
1162 static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) { | |
1163 char *pStartCP = uprv_strchr(str, '['); | |
1164 int count = 0; | |
1165 char *pEndCP; | |
1166 char primary[100]; | |
1167 char secondary[100]; | |
1168 char tertiary[100]; | |
1169 | |
1170 while (*pStartCP == '[') { | |
1171 uint32_t primarycount = 0; | |
1172 uint32_t secondarycount = 0; | |
1173 uint32_t tertiarycount = 0; | |
1174 uint32_t CEi = 1; | |
1175 pEndCP = strchr(pStartCP, ']'); | |
1176 if(pEndCP == NULL) { | |
1177 break; | |
1178 } | |
1179 pStartCP ++; | |
1180 | |
1181 primarycount = readElement(&pStartCP, primary, ',', status); | |
1182 secondarycount = readElement(&pStartCP, secondary, ',', status); | |
1183 tertiarycount = readElement(&pStartCP, tertiary, ']', status); | |
1184 | |
1185 /* I want to get the CEs entered right here, including continuation */ | |
1186 ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status); | |
1187 if (U_FAILURE(*status)) { | |
1188 break; | |
1189 } | |
1190 | |
1191 while (2 * CEi < primarycount || CEi < secondarycount || | |
1192 CEi < tertiarycount) { | |
1193 uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */ | |
1194 if (2 * CEi < primarycount) { | |
1195 value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28); | |
1196 value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24); | |
1197 } | |
1198 | |
1199 if (2 * CEi + 1 < primarycount) { | |
1200 value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20); | |
1201 value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16); | |
1202 } | |
1203 | |
1204 if (CEi < secondarycount) { | |
1205 value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12); | |
1206 value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8); | |
1207 } | |
1208 | |
1209 if (CEi < tertiarycount) { | |
1210 value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4); | |
1211 value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF); | |
1212 } | |
1213 | |
1214 CEi ++; | |
1215 ces[count ++] = value; | |
1216 } | |
1217 | |
1218 pStartCP = pEndCP + 1; | |
1219 } | |
1220 ces[count] = 0; | |
1221 return pStartCP; | |
1222 } | |
1223 | |
1224 /** | |
1225 * Getting the FractionalUCA.txt file stream | |
1226 */ | |
1227 static FileStream * getFractionalUCA(void) | |
1228 { | |
1229 char newPath[256]; | |
1230 char backupPath[256]; | |
1231 FileStream *result = NULL; | |
1232 | |
1233 /* Look inside ICU_DATA first */ | |
1234 uprv_strcpy(newPath, ctest_dataSrcDir()); | |
1235 uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING ); | |
1236 uprv_strcat(newPath, "FractionalUCA.txt"); | |
1237 | |
1238 /* As a fallback, try to guess where the source data was located | |
1239 * at the time ICU was built, and look there. | |
1240 */ | |
1241 #if defined (U_TOPSRCDIR) | |
1242 strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data"); | |
1243 #else | |
1244 { | |
1245 UErrorCode errorCode = U_ZERO_ERROR; | |
1246 strcpy(backupPath, loadTestData(&errorCode)); | |
1247 strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_
SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data"); | |
1248 } | |
1249 #endif | |
1250 strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "Fractional
UCA.txt"); | |
1251 | |
1252 result = T_FileStream_open(newPath, "rb"); | |
1253 | |
1254 if (result == NULL) { | |
1255 result = T_FileStream_open(backupPath, "rb"); | |
1256 if (result == NULL) { | |
1257 log_err("Failed to open either %s or %s\n", newPath, backupPath); | |
1258 } | |
1259 } | |
1260 return result; | |
1261 } | |
1262 | |
1263 /** | |
1264 * Testing the CEs returned by the iterator | |
1265 */ | |
1266 static void TestCEs() { | |
1267 FileStream *file = NULL; | |
1268 char line[2048]; | |
1269 char *str; | |
1270 UChar codepoints[10]; | |
1271 uint32_t ces[20]; | |
1272 UErrorCode status = U_ZERO_ERROR; | |
1273 UCollator *coll = ucol_open("", &status); | |
1274 uint32_t lineNo = 0; | |
1275 UChar contextCPs[5]; | |
1276 | |
1277 if (U_FAILURE(status)) { | |
1278 log_err_status(status, "Error in opening root collator -> %s\n", u_error
Name(status)); | |
1279 return; | |
1280 } | |
1281 | |
1282 file = getFractionalUCA(); | |
1283 | |
1284 if (file == NULL) { | |
1285 log_err("*** unable to open input FractionalUCA.txt file ***\n"); | |
1286 return; | |
1287 } | |
1288 | |
1289 | |
1290 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) { | |
1291 int count = 0; | |
1292 UCollationElements *iter; | |
1293 int32_t preContextCeLen=0; | |
1294 lineNo++; | |
1295 /* skip this line if it is empty or a comment or is a return value | |
1296 or start of some variable section */ | |
1297 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' || | |
1298 line[0] == 0x000D || line[0] == '[') { | |
1299 continue; | |
1300 } | |
1301 | |
1302 str = getCodePoints(line, codepoints, contextCPs); | |
1303 | |
1304 /* these are 'fake' codepoints in the fractional UCA, and are used just | |
1305 * for positioning of indirect values. They should not go through this | |
1306 * test. | |
1307 */ | |
1308 if(*codepoints == 0xFDD0) { | |
1309 continue; | |
1310 } | |
1311 if (*contextCPs != 0) { | |
1312 iter = ucol_openElements(coll, contextCPs, -1, &status); | |
1313 if (U_FAILURE(status)) { | |
1314 log_err("Error in opening collation elements\n"); | |
1315 break; | |
1316 } | |
1317 while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t
)UCOL_NULLORDER) { | |
1318 preContextCeLen++; | |
1319 } | |
1320 ucol_closeElements(iter); | |
1321 } | |
1322 | |
1323 getCEs(str, ces+preContextCeLen, &status); | |
1324 if (U_FAILURE(status)) { | |
1325 log_err("Error in parsing collation elements in FractionalUCA.txt\n"
); | |
1326 break; | |
1327 } | |
1328 iter = ucol_openElements(coll, codepoints, -1, &status); | |
1329 if (U_FAILURE(status)) { | |
1330 log_err("Error in opening collation elements\n"); | |
1331 break; | |
1332 } | |
1333 for (;;) { | |
1334 uint32_t ce = (uint32_t)ucol_next(iter, &status); | |
1335 if (ce == 0xFFFFFFFF) { | |
1336 ce = 0; | |
1337 } | |
1338 /* we now unconditionally reorder Thai/Lao prevowels, so this | |
1339 * test would fail if we don't skip here. | |
1340 */ | |
1341 if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) { | |
1342 continue; | |
1343 } | |
1344 if (ce != ces[count] || U_FAILURE(status)) { | |
1345 log_err("Collation elements in FractionalUCA.txt and iterators d
o not match!\n"); | |
1346 break; | |
1347 } | |
1348 if (ces[count] == 0) { | |
1349 break; | |
1350 } | |
1351 count ++; | |
1352 } | |
1353 ucol_closeElements(iter); | |
1354 } | |
1355 | |
1356 T_FileStream_close(file); | |
1357 ucol_close(coll); | |
1358 } | |
1359 | |
1360 /** | |
1361 * Testing the discontigous contractions | 1004 * Testing the discontigous contractions |
1362 */ | 1005 */ |
1363 static void TestDiscontiguos() { | 1006 static void TestDiscontiguos() { |
1364 const char *rulestr = | 1007 const char *rulestr = |
1365 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315"; | 1008 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315"; |
1366 UChar rule[50]; | 1009 UChar rule[50]; |
1367 int rulelen = u_unescape(rulestr, rule, 50); | 1010 int rulelen = u_unescape(rulestr, rule, 50); |
1368 const char *src[] = { | 1011 const char *src[] = { |
1369 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC", | 1012 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC", |
1370 /* base character blocked */ | 1013 /* base character blocked */ |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1460 } | 1103 } |
1461 ucol_reset(iter); | 1104 ucol_reset(iter); |
1462 backAndForth(iter); | 1105 backAndForth(iter); |
1463 count ++; | 1106 count ++; |
1464 } | 1107 } |
1465 ucol_closeElements(resultiter); | 1108 ucol_closeElements(resultiter); |
1466 ucol_closeElements(iter); | 1109 ucol_closeElements(iter); |
1467 ucol_close(coll); | 1110 ucol_close(coll); |
1468 } | 1111 } |
1469 | 1112 |
1470 static void TestCEBufferOverflow() | |
1471 { | |
1472 UChar str[UCOL_EXPAND_CE_BUFFER_SIZE + 1]; | |
1473 UErrorCode status = U_ZERO_ERROR; | |
1474 UChar rule[10]; | |
1475 UCollator *coll; | |
1476 UCollationElements *iter; | |
1477 | |
1478 u_uastrcpy(rule, "&z < AB"); | |
1479 coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH,
NULL,&status); | |
1480 if (U_FAILURE(status)) { | |
1481 log_err_status(status, "Rule based collator not created for testing ce b
uffer overflow -> %s\n", u_errorName(status)); | |
1482 return; | |
1483 } | |
1484 | |
1485 /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic | |
1486 test. this will cause an overflow in getPrev */ | |
1487 str[0] = 0x0041; /* 'A' */ | |
1488 /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/ | |
1489 uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE); | |
1490 str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042; /* 'B' */ | |
1491 iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1, | |
1492 &status); | |
1493 if (ucol_previous(iter, &status) == UCOL_NULLORDER || | |
1494 status == U_BUFFER_OVERFLOW_ERROR) { | |
1495 log_err("CE buffer should not overflow with long string of trail surroga
tes\n"); | |
1496 } | |
1497 ucol_closeElements(iter); | |
1498 ucol_close(coll); | |
1499 } | |
1500 | |
1501 /** | |
1502 * Checking collation element validity. | |
1503 */ | |
1504 #define MAX_CODEPOINTS_TO_SHOW 10 | |
1505 static void showCodepoints(const UChar *codepoints, int length, char * codepoint
Text) { | |
1506 int i, lengthToUse = length; | |
1507 if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) { | |
1508 lengthToUse = MAX_CODEPOINTS_TO_SHOW; | |
1509 } | |
1510 for (i = 0; i < lengthToUse; ++i) { | |
1511 int bytesWritten = sprintf(codepointText, " %04X", *codepoints++); | |
1512 if (bytesWritten <= 0) { | |
1513 break; | |
1514 } | |
1515 codepointText += bytesWritten; | |
1516 } | |
1517 if (i < length) { | |
1518 sprintf(codepointText, " ..."); | |
1519 } | |
1520 } | |
1521 | |
1522 static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints, | |
1523 int length) | |
1524 { | |
1525 UErrorCode status = U_ZERO_ERROR; | |
1526 UCollationElements *iter = ucol_openElements(coll, codepoints, length, | |
1527 &status); | |
1528 UBool result = FALSE; | |
1529 UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE; | |
1530 const char * collLocale; | |
1531 | |
1532 if (U_FAILURE(status)) { | |
1533 log_err("Error creating iterator for testing validity\n"); | |
1534 return FALSE; | |
1535 } | |
1536 collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status); | |
1537 if (U_FAILURE(status) || collLocale==NULL) { | |
1538 status = U_ZERO_ERROR; | |
1539 collLocale = "?"; | |
1540 } | |
1541 | |
1542 for (;;) { | |
1543 uint32_t ce = ucol_next(iter, &status); | |
1544 uint32_t primary, p1, p2, secondary, tertiary; | |
1545 if (ce == UCOL_NULLORDER) { | |
1546 result = TRUE; | |
1547 break; | |
1548 } | |
1549 if (ce == 0) { | |
1550 continue; | |
1551 } | |
1552 if (ce == 0x02000202) { | |
1553 /* special CE for merge-sort character */ | |
1554 if (*codepoints == 0xFFFE /* && length == 1 */) { | |
1555 /* | |
1556 * Note: We should check for length==1 but the token parser appe
ars | |
1557 * to give us trailing NUL characters. | |
1558 * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTail
oredSet() | |
1559 * rather than the internal collation rule p
arser | |
1560 */ | |
1561 continue; | |
1562 } else { | |
1563 log_err("Special 02/02/02 weight for code point U+%04X [len %d]
!= U+FFFE\n", | |
1564 (int)*codepoints, (int)length); | |
1565 break; | |
1566 } | |
1567 } | |
1568 primary = UCOL_PRIMARYORDER(ce); | |
1569 p1 = primary >> 8; | |
1570 p2 = primary & 0xFF; | |
1571 secondary = UCOL_SECONDARYORDER(ce); | |
1572 tertiary = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION; | |
1573 | |
1574 if (!isContinuation(ce)) { | |
1575 if ((ce & UCOL_REMOVE_CONTINUATION) == 0) { | |
1576 log_err("Empty CE %08lX except for case bits\n", (long)ce); | |
1577 break; | |
1578 } | |
1579 if (p1 == 0) { | |
1580 if (p2 != 0) { | |
1581 log_err("Primary 00 xx in %08lX\n", (long)ce); | |
1582 break; | |
1583 } | |
1584 primaryDone = TRUE; | |
1585 } else { | |
1586 if (p1 <= 2 || p1 >= 0xF0) { | |
1587 /* Primary first bytes F0..FF are specials. */ | |
1588 log_err("Primary first byte of %08lX out of range\n", (long)
ce); | |
1589 break; | |
1590 } | |
1591 if (p2 == 0) { | |
1592 primaryDone = TRUE; | |
1593 } else { | |
1594 if (p2 <= 3 || p2 >= 0xFF) { | |
1595 /* Primary second bytes 03 and FF are sort key compressi
on terminators. */ | |
1596 log_err("Primary second byte of %08lX out of range\n", (
long)ce); | |
1597 break; | |
1598 } | |
1599 primaryDone = FALSE; | |
1600 } | |
1601 } | |
1602 if (secondary == 0) { | |
1603 if (primary != 0) { | |
1604 log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce); | |
1605 break; | |
1606 } | |
1607 secondaryDone = TRUE; | |
1608 } else { | |
1609 if (secondary <= 2 || | |
1610 (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COM
MON + 0x80)) | |
1611 ) { | |
1612 /* Secondary first bytes common+1..+0x80 are used for sort k
ey compression. */ | |
1613 log_err("Secondary byte of %08lX out of range\n", (long)ce); | |
1614 break; | |
1615 } | |
1616 secondaryDone = FALSE; | |
1617 } | |
1618 if (tertiary == 0) { | |
1619 /* We know that ce != 0. */ | |
1620 log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n",
(long)ce); | |
1621 break; | |
1622 } | |
1623 if (tertiary <= 2) { | |
1624 log_err("Tertiary byte of %08lX out of range\n", (long)ce); | |
1625 break; | |
1626 } | |
1627 tertiaryDone = FALSE; | |
1628 } else { | |
1629 if ((ce & UCOL_REMOVE_CONTINUATION) == 0) { | |
1630 log_err("Empty continuation %08lX\n", (long)ce); | |
1631 break; | |
1632 } | |
1633 if (primaryDone && primary != 0) { | |
1634 log_err("Primary was done but continues in %08lX\n", (long)ce); | |
1635 break; | |
1636 } | |
1637 if (p1 == 0) { | |
1638 if (p2 != 0) { | |
1639 log_err("Primary 00 xx in %08lX\n", (long)ce); | |
1640 break; | |
1641 } | |
1642 primaryDone = TRUE; | |
1643 } else { | |
1644 if (p1 <= 2) { | |
1645 log_err("Primary first byte of %08lX out of range\n", (long)
ce); | |
1646 break; | |
1647 } | |
1648 if (p2 == 0) { | |
1649 primaryDone = TRUE; | |
1650 } else { | |
1651 if (p2 <= 3) { | |
1652 log_err("Primary second byte of %08lX out of range\n", (
long)ce); | |
1653 break; | |
1654 } | |
1655 } | |
1656 } | |
1657 if (secondaryDone && secondary != 0) { | |
1658 log_err("Secondary was done but continues in %08lX\n", (long)ce)
; | |
1659 break; | |
1660 } | |
1661 if (secondary == 0) { | |
1662 secondaryDone = TRUE; | |
1663 } else { | |
1664 if (secondary <= 2) { | |
1665 log_err("Secondary byte of %08lX out of range\n", (long)ce); | |
1666 break; | |
1667 } | |
1668 } | |
1669 if (tertiaryDone && tertiary != 0) { | |
1670 log_err("Tertiary was done but continues in %08lX\n", (long)ce); | |
1671 break; | |
1672 } | |
1673 if (tertiary == 0) { | |
1674 tertiaryDone = TRUE; | |
1675 } else if (tertiary <= 2) { | |
1676 log_err("Tertiary byte of %08lX out of range\n", (long)ce); | |
1677 break; | |
1678 } | |
1679 } | |
1680 } | |
1681 if (!result) { | |
1682 char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5]; | |
1683 showCodepoints(codepoints, length, codepointText); | |
1684 log_err("Locale: %s Code point string: %s\n", collLocale, codepointText
); | |
1685 } | |
1686 ucol_closeElements(iter); | |
1687 return result; | |
1688 } | |
1689 | |
1690 static const UChar IMPORT[] = { 0x5B, 0x69, 0x6D, 0x70, 0x6F, 0x72, 0x74, 0 };
/* "[import" */ | |
1691 | |
1692 static void TestCEValidity() | |
1693 { | |
1694 /* testing UCA collation elements */ | |
1695 UErrorCode status = U_ZERO_ERROR; | |
1696 /* en_US has no tailorings */ | |
1697 UCollator *coll = ucol_open("root", &status); | |
1698 /* tailored locales */ | |
1699 char locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh
__PINYIN"}; | |
1700 const char *loc; | |
1701 FileStream *file = NULL; | |
1702 char line[2048]; | |
1703 UChar codepoints[11]; | |
1704 int count = 0; | |
1705 int maxCount = 0; | |
1706 UChar contextCPs[3]; | |
1707 UChar32 c; | |
1708 UParseError parseError; | |
1709 if (U_FAILURE(status)) { | |
1710 log_err_status(status, "en_US collator creation failed -> %s\n", u_error
Name(status)); | |
1711 return; | |
1712 } | |
1713 log_verbose("Testing UCA elements\n"); | |
1714 file = getFractionalUCA(); | |
1715 if (file == NULL) { | |
1716 log_err("Fractional UCA data can not be opened\n"); | |
1717 return; | |
1718 } | |
1719 | |
1720 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) { | |
1721 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' || | |
1722 line[0] == 0x000D || line[0] == '[') { | |
1723 continue; | |
1724 } | |
1725 | |
1726 getCodePoints(line, codepoints, contextCPs); | |
1727 checkCEValidity(coll, codepoints, u_strlen(codepoints)); | |
1728 } | |
1729 | |
1730 log_verbose("Testing UCA elements for the whole range of unicode characters\
n"); | |
1731 for (c = 0; c <= 0xffff; ++c) { | |
1732 if (u_isdefined(c)) { | |
1733 codepoints[0] = (UChar)c; | |
1734 checkCEValidity(coll, codepoints, 1); | |
1735 } | |
1736 } | |
1737 for (; c <= 0x10ffff; ++c) { | |
1738 if (u_isdefined(c)) { | |
1739 int32_t i = 0; | |
1740 U16_APPEND_UNSAFE(codepoints, i, c); | |
1741 checkCEValidity(coll, codepoints, i); | |
1742 } | |
1743 } | |
1744 | |
1745 ucol_close(coll); | |
1746 | |
1747 /* testing tailored collation elements */ | |
1748 log_verbose("Testing tailored elements\n"); | |
1749 if(getTestOption(QUICK_OPTION)) { | |
1750 maxCount = sizeof(locale)/sizeof(locale[0]); | |
1751 } else { | |
1752 maxCount = uloc_countAvailable(); | |
1753 } | |
1754 while (count < maxCount) { | |
1755 const UChar *rules = NULL, | |
1756 *current = NULL; | |
1757 UChar *rulesCopy = NULL; | |
1758 int32_t ruleLen = 0; | |
1759 | |
1760 uint32_t chOffset = 0; | |
1761 uint32_t chLen = 0; | |
1762 uint32_t exOffset = 0; | |
1763 uint32_t exLen = 0; | |
1764 uint32_t prefixOffset = 0; | |
1765 uint32_t prefixLen = 0; | |
1766 UBool startOfRules = TRUE; | |
1767 UColOptionSet opts; | |
1768 | |
1769 UColTokenParser src; | |
1770 uint32_t strength = 0; | |
1771 uint16_t specs = 0; | |
1772 | |
1773 (void)specs; /* Suppress set but not used warnings. */ | |
1774 (void)strength; | |
1775 (void)prefixLen; | |
1776 (void)prefixOffset; | |
1777 (void)exLen; | |
1778 (void)exOffset; | |
1779 | |
1780 if(getTestOption(QUICK_OPTION)) { | |
1781 loc = locale[count]; | |
1782 } else { | |
1783 loc = uloc_getAvailable(count); | |
1784 if(!hasCollationElements(loc)) { | |
1785 count++; | |
1786 continue; | |
1787 } | |
1788 } | |
1789 status = U_ZERO_ERROR; // clear status from previous loop iteration | |
1790 | |
1791 uprv_memset(&src, 0, sizeof(UColTokenParser)); | |
1792 | |
1793 log_verbose("Testing CEs for %s\n", loc); | |
1794 | |
1795 coll = ucol_open(loc, &status); | |
1796 if (U_FAILURE(status)) { | |
1797 log_err("%s collator creation failed with status %s\n", loc, u_error
Name(status)); | |
1798 return; | |
1799 } | |
1800 | |
1801 src.opts = &opts; | |
1802 rules = ucol_getRules(coll, &ruleLen); | |
1803 | |
1804 /* | |
1805 * We have not set up the UColTokenParser with a callback function | |
1806 * to fetch [import] sub-rules, | |
1807 * so skip testing tailorings that import others. | |
1808 * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet(
) | |
1809 * rather than the internal collation rule parser | |
1810 */ | |
1811 if (ruleLen > 0 && u_strstr(rules, IMPORT) == NULL) { | |
1812 rulesCopy = (UChar *)uprv_malloc((ruleLen + | |
1813 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar)); | |
1814 uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar)); | |
1815 src.current = src.source = rulesCopy; | |
1816 src.end = rulesCopy + ruleLen; | |
1817 src.extraCurrent = src.end; | |
1818 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE; | |
1819 | |
1820 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parse
NextToken can cause the pointer to | |
1821 the rules copy in src.source to get reallocated, freeing the
original pointer in rulesCopy */ | |
1822 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parse
Error,&status)) != NULL && U_SUCCESS(status)) { | |
1823 strength = src.parsedToken.strength; | |
1824 chOffset = src.parsedToken.charsOffset; | |
1825 chLen = src.parsedToken.charsLen; | |
1826 exOffset = src.parsedToken.extensionOffset; | |
1827 exLen = src.parsedToken.extensionLen; | |
1828 prefixOffset = src.parsedToken.prefixOffset; | |
1829 prefixLen = src.parsedToken.prefixLen; | |
1830 specs = src.parsedToken.flags; | |
1831 | |
1832 startOfRules = FALSE; | |
1833 uprv_memcpy(codepoints, src.source + chOffset, | |
1834 chLen * sizeof(UChar)); | |
1835 codepoints[chLen] = 0; | |
1836 checkCEValidity(coll, codepoints, chLen); | |
1837 } | |
1838 if (U_FAILURE(status)) { | |
1839 log_err("%s collator, ucol_tok_parseNextToken failed with status
%s\n", loc, u_errorName(status)); | |
1840 } | |
1841 uprv_free(src.source); | |
1842 uprv_free(src.reorderCodes); | |
1843 } | |
1844 | |
1845 ucol_close(coll); | |
1846 count ++; | |
1847 } | |
1848 T_FileStream_close(file); | |
1849 } | |
1850 | |
1851 static void printSortKeyError(const UChar *codepoints, int length, | |
1852 uint8_t *sortkey, int sklen) | |
1853 { | |
1854 int count = 0; | |
1855 log_err("Sortkey not valid for "); | |
1856 while (length > 0) { | |
1857 log_err("0x%04x ", *codepoints); | |
1858 length --; | |
1859 codepoints ++; | |
1860 } | |
1861 log_err("\nSortkey : "); | |
1862 while (count < sklen) { | |
1863 log_err("0x%02x ", sortkey[count]); | |
1864 count ++; | |
1865 } | |
1866 log_err("\n"); | |
1867 } | |
1868 | |
1869 /** | |
1870 * Checking sort key validity for all levels | |
1871 */ | |
1872 static UBool checkSortKeyValidity(UCollator *coll, | |
1873 const UChar *codepoints, | |
1874 int length) | |
1875 { | |
1876 UErrorCode status = U_ZERO_ERROR; | |
1877 UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY, | |
1878 UCOL_TERTIARY, UCOL_QUATERNARY, | |
1879 UCOL_IDENTICAL}; | |
1880 int strengthlen = 5; | |
1881 int strengthIndex = 0; | |
1882 int caselevel = 0; | |
1883 | |
1884 while (caselevel < 1) { | |
1885 if (caselevel == 0) { | |
1886 ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status); | |
1887 } | |
1888 else { | |
1889 ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status); | |
1890 } | |
1891 | |
1892 while (strengthIndex < strengthlen) { | |
1893 int count01 = 0; | |
1894 uint32_t count = 0; | |
1895 uint8_t sortkey[128]; | |
1896 uint32_t sklen; | |
1897 | |
1898 ucol_setStrength(coll, strength[strengthIndex]); | |
1899 sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128); | |
1900 while (sortkey[count] != 0) { | |
1901 if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 &
& strengthIndex != 4)) { | |
1902 printSortKeyError(codepoints, length, sortkey, sklen); | |
1903 return FALSE; | |
1904 } | |
1905 if (sortkey[count] == 1) { | |
1906 count01 ++; | |
1907 } | |
1908 count ++; | |
1909 } | |
1910 | |
1911 if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) { | |
1912 printSortKeyError(codepoints, length, sortkey, sklen); | |
1913 return FALSE; | |
1914 } | |
1915 strengthIndex ++; | |
1916 } | |
1917 caselevel ++; | |
1918 } | |
1919 return TRUE; | |
1920 } | |
1921 | |
1922 static void TestSortKeyValidity(void) | |
1923 { | |
1924 /* testing UCA collation elements */ | |
1925 UErrorCode status = U_ZERO_ERROR; | |
1926 /* en_US has no tailorings */ | |
1927 UCollator *coll = ucol_open("en_US", &status); | |
1928 /* tailored locales */ | |
1929 char locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"}; | |
1930 FileStream *file = NULL; | |
1931 char line[2048]; | |
1932 UChar codepoints[10]; | |
1933 int count = 0; | |
1934 UChar contextCPs[5]; | |
1935 UParseError parseError; | |
1936 if (U_FAILURE(status)) { | |
1937 log_err_status(status, "en_US collator creation failed -> %s\n", u_error
Name(status)); | |
1938 return; | |
1939 } | |
1940 log_verbose("Testing UCA elements\n"); | |
1941 file = getFractionalUCA(); | |
1942 if (file == NULL) { | |
1943 log_err("Fractional UCA data can not be opened\n"); | |
1944 return; | |
1945 } | |
1946 | |
1947 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) { | |
1948 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' || | |
1949 line[0] == 0x000D || line[0] == '[') { | |
1950 continue; | |
1951 } | |
1952 | |
1953 getCodePoints(line, codepoints, contextCPs); | |
1954 if(codepoints[0] == 0xFFFE) { | |
1955 /* Skip special merge-sort character U+FFFE which has otherwise ille
gal 02 weight bytes. */ | |
1956 continue; | |
1957 } | |
1958 checkSortKeyValidity(coll, codepoints, u_strlen(codepoints)); | |
1959 } | |
1960 | |
1961 log_verbose("Testing UCA elements for the whole range of unicode characters\
n"); | |
1962 codepoints[0] = 0; | |
1963 | |
1964 while (codepoints[0] < 0xFFFF) { | |
1965 if (u_isdefined((UChar32)codepoints[0])) { | |
1966 checkSortKeyValidity(coll, codepoints, 1); | |
1967 } | |
1968 codepoints[0] ++; | |
1969 } | |
1970 | |
1971 ucol_close(coll); | |
1972 | |
1973 /* testing tailored collation elements */ | |
1974 log_verbose("Testing tailored elements\n"); | |
1975 while (count < 5) { | |
1976 const UChar *rules = NULL, | |
1977 *current = NULL; | |
1978 UChar *rulesCopy = NULL; | |
1979 int32_t ruleLen = 0; | |
1980 | |
1981 uint32_t chOffset = 0; | |
1982 uint32_t chLen = 0; | |
1983 uint32_t exOffset = 0; | |
1984 uint32_t exLen = 0; | |
1985 uint32_t prefixOffset = 0; | |
1986 uint32_t prefixLen = 0; | |
1987 UBool startOfRules = TRUE; | |
1988 UColOptionSet opts; | |
1989 | |
1990 UColTokenParser src; | |
1991 uint32_t strength = 0; | |
1992 uint16_t specs = 0; | |
1993 status = U_ZERO_ERROR; // clear status from previous loop iteration | |
1994 | |
1995 (void)specs; | |
1996 (void)strength; | |
1997 (void)prefixLen; | |
1998 (void)prefixOffset; | |
1999 (void)exLen; | |
2000 (void)exOffset; | |
2001 | |
2002 uprv_memset(&src, 0, sizeof(UColTokenParser)); | |
2003 | |
2004 coll = ucol_open(locale[count], &status); | |
2005 if (U_FAILURE(status)) { | |
2006 log_err("%s collator creation failed with status %s\n", locale[count
], u_errorName(status)); | |
2007 return; | |
2008 } | |
2009 | |
2010 src.opts = &opts; | |
2011 rules = ucol_getRules(coll, &ruleLen); | |
2012 | |
2013 /* | |
2014 * We have not set up the UColTokenParser with a callback function | |
2015 * to fetch [import] sub-rules, | |
2016 * so skip testing tailorings that import others. | |
2017 * TODO: Ticket #8047: Change TestSortKeyValidity to use ucol_getTailore
dSet() | |
2018 * rather than the internal collation rule parser | |
2019 */ | |
2020 if (ruleLen > 0 && u_strstr(rules, IMPORT) == NULL) { | |
2021 rulesCopy = (UChar *)uprv_malloc((ruleLen + | |
2022 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar)); | |
2023 uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar)); | |
2024 src.current = src.source = rulesCopy; | |
2025 src.end = rulesCopy + ruleLen; | |
2026 src.extraCurrent = src.end; | |
2027 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE; | |
2028 | |
2029 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parse
NextToken can cause the pointer to | |
2030 the rules copy in src.source to get reallocated, freeing the
original pointer in rulesCopy */ | |
2031 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseE
rror, &status)) != NULL && U_SUCCESS(status)) { | |
2032 strength = src.parsedToken.strength; | |
2033 chOffset = src.parsedToken.charsOffset; | |
2034 chLen = src.parsedToken.charsLen; | |
2035 exOffset = src.parsedToken.extensionOffset; | |
2036 exLen = src.parsedToken.extensionLen; | |
2037 prefixOffset = src.parsedToken.prefixOffset; | |
2038 prefixLen = src.parsedToken.prefixLen; | |
2039 specs = src.parsedToken.flags; | |
2040 | |
2041 startOfRules = FALSE; | |
2042 uprv_memcpy(codepoints, src.source + chOffset, | |
2043 chLen * sizeof(UChar)); | |
2044 codepoints[chLen] = 0; | |
2045 if(codepoints[0] == 0xFFFE) { | |
2046 /* Skip special merge-sort character U+FFFE which has otherw
ise illegal 02 weight bytes. */ | |
2047 continue; | |
2048 } | |
2049 checkSortKeyValidity(coll, codepoints, chLen); | |
2050 } | |
2051 if (U_FAILURE(status)) { | |
2052 log_err("%s collator, ucol_tok_parseNextToken failed with status
%s\n", locale[count], u_errorName(status)); | |
2053 } | |
2054 uprv_free(src.source); | |
2055 uprv_free(src.reorderCodes); | |
2056 } | |
2057 | |
2058 ucol_close(coll); | |
2059 count ++; | |
2060 } | |
2061 T_FileStream_close(file); | |
2062 } | |
2063 | |
2064 /** | 1113 /** |
2065 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) wi
th | 1114 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) wi
th |
2066 * normalization on AND jamo tailoring, among other things. | 1115 * normalization on AND jamo tailoring, among other things. |
| 1116 * |
| 1117 * Note: This test is sensitive to changes of the root collator, |
| 1118 * for example whether the ae-ligature maps to three CEs (as in the DUCET) |
| 1119 * or to two CEs (as in the CLDR 24 FractionalUCA.txt). |
| 1120 * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding
. |
| 1121 * For example, the DUCET's artificial secondary CE in the ae-ligature |
| 1122 * may map to two 32-bit iterator CEs (as it did until ICU 52). |
2067 */ | 1123 */ |
2068 static const UChar tsceText[] = { /* Nothing in here should be ignorable */ | 1124 static const UChar tsceText[] = { /* Nothing in here should be ignorable */ |
2069 0x0020, 0xAC00, /* simple LV Hangul */ | 1125 0x0020, 0xAC00, /* simple LV Hangul */ |
2070 0x0020, 0xAC01, /* simple LVT Hangul */ | 1126 0x0020, 0xAC01, /* simple LVT Hangul */ |
2071 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */ | 1127 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */ |
2072 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */ | 1128 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */ |
2073 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */ | 1129 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */ |
2074 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */ | 1130 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */ |
2075 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands f
or search */ | 1131 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands f
or search */ |
2076 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for
search */ | 1132 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for
search */ |
2077 0x0020, 0x00E6, /* small letter ae, expands */ | 1133 0x0020, 0x00E6, /* small letter ae, expands */ |
2078 0x0020, 0x1E4D, /* small letter o with tilde and acute, deco
mposes */ | 1134 0x0020, 0x1E4D, /* small letter o with tilde and acute, deco
mposes */ |
2079 0x0020 | 1135 0x0020 |
2080 }; | 1136 }; |
2081 enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) }; | 1137 enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) }; |
2082 | 1138 |
2083 static const int32_t rootStandardOffsets[] = { | 1139 static const int32_t rootStandardOffsets[] = { |
2084 0, 1,2, | 1140 0, 1,2, |
2085 2, 3,4,4, | 1141 2, 3,4,4, |
2086 4, 5,6,6, | 1142 4, 5,6,6, |
2087 6, 7,8,8, | 1143 6, 7,8,8, |
2088 8, 9,10,11, | 1144 8, 9,10,11, |
2089 12, 13,14,15, | 1145 12, 13,14,15, |
2090 16, 17,18,19, | 1146 16, 17,18,19, |
2091 20, 21,22,23, | 1147 20, 21,22,23, |
2092 24, 25,26,26,26, | 1148 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs *
/ |
2093 26, 27,28,28, | 1149 26, 27,28,28, |
2094 28, | 1150 28, |
2095 29 | 1151 29 |
2096 }; | 1152 }; |
2097 enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandar
dOffsets[0]) }; | 1153 enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandar
dOffsets[0]) }; |
2098 | 1154 |
2099 static const int32_t rootSearchOffsets[] = { | 1155 static const int32_t rootSearchOffsets[] = { |
2100 0, 1,2, | 1156 0, 1,2, |
2101 2, 3,4,4, | 1157 2, 3,4,4, |
2102 4, 5,6,6,6, | 1158 4, 5,6,6,6, |
2103 6, 7,8,8,8,8,8,8, | 1159 6, 7,8,8,8,8,8,8, |
2104 8, 9,10,11, | 1160 8, 9,10,11, |
2105 12, 13,14,15, | 1161 12, 13,14,15, |
2106 16, 17,18,19,20, | 1162 16, 17,18,19,20, |
2107 20, 21,22,22,23,23,23,24, | 1163 20, 21,22,22,23,23,23,24, |
2108 24, 25,26,26,26, | 1164 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs *
/ |
2109 26, 27,28,28, | 1165 26, 27,28,28, |
2110 28, | 1166 28, |
2111 29 | 1167 29 |
2112 }; | 1168 }; |
2113 enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffse
ts[0]) }; | 1169 enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffse
ts[0]) }; |
2114 | 1170 |
2115 typedef struct { | 1171 typedef struct { |
2116 const char * locale; | 1172 const char * locale; |
2117 const int32_t * offsets; | 1173 const int32_t * offsets; |
2118 int32_t offsetsLen; | 1174 int32_t offsetsLen; |
(...skipping 16 matching lines...) Expand all Loading... |
2135 if ( U_SUCCESS(status) ) { | 1191 if ( U_SUCCESS(status) ) { |
2136 int32_t offset, element; | 1192 int32_t offset, element; |
2137 const int32_t * nextOffsetPtr; | 1193 const int32_t * nextOffsetPtr; |
2138 const int32_t * limitOffsetPtr; | 1194 const int32_t * limitOffsetPtr; |
2139 | 1195 |
2140 nextOffsetPtr = tsceItemPtr->offsets; | 1196 nextOffsetPtr = tsceItemPtr->offsets; |
2141 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen; | 1197 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen; |
2142 do { | 1198 do { |
2143 offset = ucol_getOffset(uce); | 1199 offset = ucol_getOffset(uce); |
2144 element = ucol_next(uce, &status); | 1200 element = ucol_next(uce, &status); |
| 1201 log_verbose("(%s) offset=%2d ce=%08x\n", tsceItemPtr->local
e, offset, element); |
2145 if ( element == 0 ) { | 1202 if ( element == 0 ) { |
2146 log_err("error, locale %s, ucol_next returned element 0\
n", tsceItemPtr->locale ); | 1203 log_err("error, locale %s, ucol_next returned element 0\
n", tsceItemPtr->locale ); |
2147 } | 1204 } |
2148 if ( nextOffsetPtr < limitOffsetPtr ) { | 1205 if ( nextOffsetPtr < limitOffsetPtr ) { |
2149 if (offset != *nextOffsetPtr) { | 1206 if (offset != *nextOffsetPtr) { |
2150 log_err("error, locale %s, expected ucol_next -> uco
l_getOffset %d, got %d\n", | 1207 log_err("error, locale %s, expected ucol_next -> uco
l_getOffset %d, got %d\n", |
2151 tsceItemPtr->locale,
*nextOffsetPtr, offset ); | 1208 tsceItemPtr->locale,
*nextOffsetPtr, offset ); |
2152 nextOffsetPtr = limitOffsetPtr; | 1209 nextOffsetPtr = limitOffsetPtr; |
2153 break; | 1210 break; |
2154 } | 1211 } |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2192 log_err("error, locale %s, ucol_openElements failed: %s\n", tsce
ItemPtr->locale, u_errorName(status) ); | 1249 log_err("error, locale %s, ucol_openElements failed: %s\n", tsce
ItemPtr->locale, u_errorName(status) ); |
2193 } | 1250 } |
2194 ucol_close(ucol); | 1251 ucol_close(ucol); |
2195 } else { | 1252 } else { |
2196 log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr
->locale, u_errorName(status) ); | 1253 log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr
->locale, u_errorName(status) ); |
2197 } | 1254 } |
2198 } | 1255 } |
2199 } | 1256 } |
2200 | 1257 |
2201 #endif /* #if !UCONFIG_NO_COLLATION */ | 1258 #endif /* #if !UCONFIG_NO_COLLATION */ |
OLD | NEW |