Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(394)

Side by Side Diff: source/test/cintltst/citertst.c

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/test/cintltst/citertst.h ('k') | source/test/cintltst/cldrtest.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /******************************************************************** 1 /********************************************************************
2 * COPYRIGHT: 2 * COPYRIGHT:
3 * Copyright (c) 1997-2013, International Business Machines Corporation and 3 * Copyright (c) 1997-2014, International Business Machines Corporation and
4 * others. All Rights Reserved. 4 * others. All Rights Reserved.
5 ********************************************************************/ 5 ********************************************************************/
6 /******************************************************************************* * 6 /******************************************************************************* *
7 * 7 *
8 * File CITERTST.C 8 * File CITERTST.C
9 * 9 *
10 * Modification History: 10 * Modification History:
11 * Date Name Description 11 * Date Name Description
12 * Madhu Katragadda Ported for C API 12 * Madhu Katragadda Ported for C API
13 * 02/19/01 synwee Modified test case for new collation iterator 13 * 02/19/01 synwee Modified test case for new collation iterator
(...skipping 14 matching lines...) Expand all
28 #include "unicode/ustring.h" 28 #include "unicode/ustring.h"
29 #include "unicode/putil.h" 29 #include "unicode/putil.h"
30 #include "callcoll.h" 30 #include "callcoll.h"
31 #include "cmemory.h" 31 #include "cmemory.h"
32 #include "cintltst.h" 32 #include "cintltst.h"
33 #include "citertst.h" 33 #include "citertst.h"
34 #include "ccolltst.h" 34 #include "ccolltst.h"
35 #include "filestrm.h" 35 #include "filestrm.h"
36 #include "cstring.h" 36 #include "cstring.h"
37 #include "ucol_imp.h" 37 #include "ucol_imp.h"
38 #include "ucol_tok.h"
39 #include "uparse.h" 38 #include "uparse.h"
40 #include <stdio.h> 39 #include <stdio.h>
41 40
42 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *); 41 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
43 42
44 void addCollIterTest(TestNode** root) 43 void addCollIterTest(TestNode** root)
45 { 44 {
46 addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious"); 45 addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
47 addTest(root, &TestOffset, "tscoll/citertst/TestOffset"); 46 addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
48 addTest(root, &TestSetText, "tscoll/citertst/TestSetText"); 47 addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
49 addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion"); 48 addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
50 addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar"); 49 addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
51 addTest(root, &TestNormalizedUnicodeChar, 50 addTest(root, &TestNormalizedUnicodeChar,
52 "tscoll/citertst/TestNormalizedUnicodeChar"); 51 "tscoll/citertst/TestNormalizedUnicodeChar");
53 addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization"); 52 addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
54 addTest(root, &TestBug672, "tscoll/citertst/TestBug672"); 53 addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
55 addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize"); 54 addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
56 addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer"); 55 addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
57 addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
58 addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos"); 56 addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
59 addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow") ;
60 addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
61 addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
62 addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollat orElements"); 57 addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollat orElements");
63 } 58 }
64 59
65 /* The locales we support */ 60 /* The locales we support */
66 61
67 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"}; 62 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
68 63
69 static void TestBug672() { 64 static void TestBug672() {
70 UErrorCode status = U_ZERO_ERROR; 65 UErrorCode status = U_ZERO_ERROR;
71 UChar pattern[20]; 66 UChar pattern[20];
(...skipping 684 matching lines...) Expand 10 before | Expand all | Expand 10 after
756 { 751 {
757 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status )); 752 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status ));
758 } 753 }
759 else 754 else
760 { 755 {
761 assertEqual(iter1, iter2); 756 assertEqual(iter1, iter2);
762 } 757 }
763 758
764 /* Now set it to point to a null string with fake length*/ 759 /* Now set it to point to a null string with fake length*/
765 ucol_setText(iter2, NULL, 2, &status); 760 ucol_setText(iter2, NULL, 2, &status);
766 if (U_FAILURE(status)) 761 if (status != U_ILLEGAL_ARGUMENT_ERROR)
767 { 762 {
768 log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status) ); 763 log_err("call to iter2->setText(null, 2) should yield an illegal-argumen t-error - %s\n",
769 } 764 myErrorName(status));
770 else
771 {
772 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
773 log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
774 }
775 } 765 }
776 766
777 ucol_closeElements(iter2); 767 ucol_closeElements(iter2);
778 ucol_closeElements(iter1); 768 ucol_closeElements(iter1);
779 ucol_close(en_us); 769 ucol_close(en_us);
780 } 770 }
781 771
782 /** @bug 4108762 772 /** @bug 4108762
783 * Test for getMaxExpansion() 773 * Test for getMaxExpansion()
784 */ 774 */
(...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after
1004 ucol_next(testiter, &status); 994 ucol_next(testiter, &status);
1005 ucol_closeElements(testiter); 995 ucol_closeElements(testiter);
1006 ucol_closeElements(iter); 996 ucol_closeElements(iter);
1007 ucol_close(coll); 997 ucol_close(coll);
1008 } else { 998 } else {
1009 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(statu s)); 999 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(statu s));
1010 } 1000 }
1011 } 1001 }
1012 1002
1013 /** 1003 /**
1014 * Sniplets of code from genuca
1015 */
1016 static int32_t hex2num(char hex) {
1017 if(hex>='0' && hex <='9') {
1018 return hex-'0';
1019 } else if(hex>='a' && hex<='f') {
1020 return hex-'a'+10;
1021 } else if(hex>='A' && hex<='F') {
1022 return hex-'A'+10;
1023 } else {
1024 return 0;
1025 }
1026 }
1027
1028 /**
1029 * Getting codepoints from a string
1030 * @param str character string contain codepoints seperated by space and ended
1031 * by a semicolon
1032 * @param codepoints array for storage, assuming size > 5
1033 * @return position at the end of the codepoint section
1034 */
1035 static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) {
1036 UErrorCode errorCode = U_ZERO_ERROR;
1037 char *semi = uprv_strchr(str, ';');
1038 char *pipe = uprv_strchr(str, '|');
1039 char *s;
1040 *codepoints = 0;
1041 *contextCPs = 0;
1042 if(semi == NULL) {
1043 log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str);
1044 return str;
1045 }
1046 if(pipe != NULL) {
1047 int32_t contextLength;
1048 *pipe = 0;
1049 contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode);
1050 *pipe = '|';
1051 if(U_FAILURE(errorCode)) {
1052 log_err("error parsing precontext string from FractionalUCA.txt %s\n ", str);
1053 return str;
1054 }
1055 /* prepend the precontext string to the codepoints */
1056 u_memcpy(codepoints, contextCPs, contextLength);
1057 codepoints += contextLength;
1058 /* start of the code point string */
1059 s = pipe + 1;
1060 } else {
1061 s = str;
1062 }
1063 u_parseString(s, codepoints, 99, NULL, &errorCode);
1064 if(U_FAILURE(errorCode)) {
1065 log_err("error parsing code point string from FractionalUCA.txt %s\n", s tr);
1066 return str;
1067 }
1068 return semi + 1;
1069 }
1070
1071 /**
1072 * Sniplets of code from genuca
1073 */
1074 static int32_t
1075 readElement(char **from, char *to, char separator, UErrorCode *status)
1076 {
1077 if (U_SUCCESS(*status)) {
1078 char buffer[1024];
1079 int32_t i = 0;
1080 while (**from != separator) {
1081 if (**from != ' ') {
1082 *(buffer+i++) = **from;
1083 }
1084 (*from)++;
1085 }
1086 (*from)++;
1087 *(buffer + i) = 0;
1088 strcpy(to, buffer);
1089 return i/2;
1090 }
1091
1092 return 0;
1093 }
1094
1095 /**
1096 * Sniplets of code from genuca
1097 */
1098 static uint32_t
1099 getSingleCEValue(char *primary, char *secondary, char *tertiary,
1100 UErrorCode *status)
1101 {
1102 if (U_SUCCESS(*status)) {
1103 uint32_t value = 0;
1104 char primsave = '\0';
1105 char secsave = '\0';
1106 char tersave = '\0';
1107 char *primend = primary+4;
1108 char *secend = secondary+2;
1109 char *terend = tertiary+2;
1110 uint32_t primvalue;
1111 uint32_t secvalue;
1112 uint32_t tervalue;
1113
1114 if (uprv_strlen(primary) > 4) {
1115 primsave = *primend;
1116 *primend = '\0';
1117 }
1118
1119 if (uprv_strlen(secondary) > 2) {
1120 secsave = *secend;
1121 *secend = '\0';
1122 }
1123
1124 if (uprv_strlen(tertiary) > 2) {
1125 tersave = *terend;
1126 *terend = '\0';
1127 }
1128
1129 primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;
1130 secvalue = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;
1131 tervalue = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;
1132 if(primvalue <= 0xFF) {
1133 primvalue <<= 8;
1134 }
1135
1136 value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
1137 | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
1138 | (tervalue & UCOL_TERTIARYORDERMASK);
1139
1140 if(primsave!='\0') {
1141 *primend = primsave;
1142 }
1143 if(secsave!='\0') {
1144 *secend = secsave;
1145 }
1146 if(tersave!='\0') {
1147 *terend = tersave;
1148 }
1149 return value;
1150 }
1151 return 0;
1152 }
1153
1154 /**
1155 * Getting collation elements generated from a string
1156 * @param str character string contain collation elements contained in [] and
1157 * seperated by space
1158 * @param ce array for storage, assuming size > 20
1159 * @param status error status
1160 * @return position at the end of the codepoint section
1161 */
1162 static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
1163 char *pStartCP = uprv_strchr(str, '[');
1164 int count = 0;
1165 char *pEndCP;
1166 char primary[100];
1167 char secondary[100];
1168 char tertiary[100];
1169
1170 while (*pStartCP == '[') {
1171 uint32_t primarycount = 0;
1172 uint32_t secondarycount = 0;
1173 uint32_t tertiarycount = 0;
1174 uint32_t CEi = 1;
1175 pEndCP = strchr(pStartCP, ']');
1176 if(pEndCP == NULL) {
1177 break;
1178 }
1179 pStartCP ++;
1180
1181 primarycount = readElement(&pStartCP, primary, ',', status);
1182 secondarycount = readElement(&pStartCP, secondary, ',', status);
1183 tertiarycount = readElement(&pStartCP, tertiary, ']', status);
1184
1185 /* I want to get the CEs entered right here, including continuation */
1186 ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
1187 if (U_FAILURE(*status)) {
1188 break;
1189 }
1190
1191 while (2 * CEi < primarycount || CEi < secondarycount ||
1192 CEi < tertiarycount) {
1193 uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
1194 if (2 * CEi < primarycount) {
1195 value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
1196 value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
1197 }
1198
1199 if (2 * CEi + 1 < primarycount) {
1200 value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
1201 value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
1202 }
1203
1204 if (CEi < secondarycount) {
1205 value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
1206 value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
1207 }
1208
1209 if (CEi < tertiarycount) {
1210 value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
1211 value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
1212 }
1213
1214 CEi ++;
1215 ces[count ++] = value;
1216 }
1217
1218 pStartCP = pEndCP + 1;
1219 }
1220 ces[count] = 0;
1221 return pStartCP;
1222 }
1223
1224 /**
1225 * Getting the FractionalUCA.txt file stream
1226 */
1227 static FileStream * getFractionalUCA(void)
1228 {
1229 char newPath[256];
1230 char backupPath[256];
1231 FileStream *result = NULL;
1232
1233 /* Look inside ICU_DATA first */
1234 uprv_strcpy(newPath, ctest_dataSrcDir());
1235 uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
1236 uprv_strcat(newPath, "FractionalUCA.txt");
1237
1238 /* As a fallback, try to guess where the source data was located
1239 * at the time ICU was built, and look there.
1240 */
1241 #if defined (U_TOPSRCDIR)
1242 strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data");
1243 #else
1244 {
1245 UErrorCode errorCode = U_ZERO_ERROR;
1246 strcpy(backupPath, loadTestData(&errorCode));
1247 strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_ SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
1248 }
1249 #endif
1250 strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "Fractional UCA.txt");
1251
1252 result = T_FileStream_open(newPath, "rb");
1253
1254 if (result == NULL) {
1255 result = T_FileStream_open(backupPath, "rb");
1256 if (result == NULL) {
1257 log_err("Failed to open either %s or %s\n", newPath, backupPath);
1258 }
1259 }
1260 return result;
1261 }
1262
1263 /**
1264 * Testing the CEs returned by the iterator
1265 */
1266 static void TestCEs() {
1267 FileStream *file = NULL;
1268 char line[2048];
1269 char *str;
1270 UChar codepoints[10];
1271 uint32_t ces[20];
1272 UErrorCode status = U_ZERO_ERROR;
1273 UCollator *coll = ucol_open("", &status);
1274 uint32_t lineNo = 0;
1275 UChar contextCPs[5];
1276
1277 if (U_FAILURE(status)) {
1278 log_err_status(status, "Error in opening root collator -> %s\n", u_error Name(status));
1279 return;
1280 }
1281
1282 file = getFractionalUCA();
1283
1284 if (file == NULL) {
1285 log_err("*** unable to open input FractionalUCA.txt file ***\n");
1286 return;
1287 }
1288
1289
1290 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1291 int count = 0;
1292 UCollationElements *iter;
1293 int32_t preContextCeLen=0;
1294 lineNo++;
1295 /* skip this line if it is empty or a comment or is a return value
1296 or start of some variable section */
1297 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1298 line[0] == 0x000D || line[0] == '[') {
1299 continue;
1300 }
1301
1302 str = getCodePoints(line, codepoints, contextCPs);
1303
1304 /* these are 'fake' codepoints in the fractional UCA, and are used just
1305 * for positioning of indirect values. They should not go through this
1306 * test.
1307 */
1308 if(*codepoints == 0xFDD0) {
1309 continue;
1310 }
1311 if (*contextCPs != 0) {
1312 iter = ucol_openElements(coll, contextCPs, -1, &status);
1313 if (U_FAILURE(status)) {
1314 log_err("Error in opening collation elements\n");
1315 break;
1316 }
1317 while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t )UCOL_NULLORDER) {
1318 preContextCeLen++;
1319 }
1320 ucol_closeElements(iter);
1321 }
1322
1323 getCEs(str, ces+preContextCeLen, &status);
1324 if (U_FAILURE(status)) {
1325 log_err("Error in parsing collation elements in FractionalUCA.txt\n" );
1326 break;
1327 }
1328 iter = ucol_openElements(coll, codepoints, -1, &status);
1329 if (U_FAILURE(status)) {
1330 log_err("Error in opening collation elements\n");
1331 break;
1332 }
1333 for (;;) {
1334 uint32_t ce = (uint32_t)ucol_next(iter, &status);
1335 if (ce == 0xFFFFFFFF) {
1336 ce = 0;
1337 }
1338 /* we now unconditionally reorder Thai/Lao prevowels, so this
1339 * test would fail if we don't skip here.
1340 */
1341 if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {
1342 continue;
1343 }
1344 if (ce != ces[count] || U_FAILURE(status)) {
1345 log_err("Collation elements in FractionalUCA.txt and iterators d o not match!\n");
1346 break;
1347 }
1348 if (ces[count] == 0) {
1349 break;
1350 }
1351 count ++;
1352 }
1353 ucol_closeElements(iter);
1354 }
1355
1356 T_FileStream_close(file);
1357 ucol_close(coll);
1358 }
1359
1360 /**
1361 * Testing the discontigous contractions 1004 * Testing the discontigous contractions
1362 */ 1005 */
1363 static void TestDiscontiguos() { 1006 static void TestDiscontiguos() {
1364 const char *rulestr = 1007 const char *rulestr =
1365 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315"; 1008 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1366 UChar rule[50]; 1009 UChar rule[50];
1367 int rulelen = u_unescape(rulestr, rule, 50); 1010 int rulelen = u_unescape(rulestr, rule, 50);
1368 const char *src[] = { 1011 const char *src[] = {
1369 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC", 1012 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1370 /* base character blocked */ 1013 /* base character blocked */
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
1460 } 1103 }
1461 ucol_reset(iter); 1104 ucol_reset(iter);
1462 backAndForth(iter); 1105 backAndForth(iter);
1463 count ++; 1106 count ++;
1464 } 1107 }
1465 ucol_closeElements(resultiter); 1108 ucol_closeElements(resultiter);
1466 ucol_closeElements(iter); 1109 ucol_closeElements(iter);
1467 ucol_close(coll); 1110 ucol_close(coll);
1468 } 1111 }
1469 1112
1470 static void TestCEBufferOverflow()
1471 {
1472 UChar str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
1473 UErrorCode status = U_ZERO_ERROR;
1474 UChar rule[10];
1475 UCollator *coll;
1476 UCollationElements *iter;
1477
1478 u_uastrcpy(rule, "&z < AB");
1479 coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
1480 if (U_FAILURE(status)) {
1481 log_err_status(status, "Rule based collator not created for testing ce b uffer overflow -> %s\n", u_errorName(status));
1482 return;
1483 }
1484
1485 /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
1486 test. this will cause an overflow in getPrev */
1487 str[0] = 0x0041; /* 'A' */
1488 /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
1489 uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
1490 str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042; /* 'B' */
1491 iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
1492 &status);
1493 if (ucol_previous(iter, &status) == UCOL_NULLORDER ||
1494 status == U_BUFFER_OVERFLOW_ERROR) {
1495 log_err("CE buffer should not overflow with long string of trail surroga tes\n");
1496 }
1497 ucol_closeElements(iter);
1498 ucol_close(coll);
1499 }
1500
1501 /**
1502 * Checking collation element validity.
1503 */
1504 #define MAX_CODEPOINTS_TO_SHOW 10
1505 static void showCodepoints(const UChar *codepoints, int length, char * codepoint Text) {
1506 int i, lengthToUse = length;
1507 if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) {
1508 lengthToUse = MAX_CODEPOINTS_TO_SHOW;
1509 }
1510 for (i = 0; i < lengthToUse; ++i) {
1511 int bytesWritten = sprintf(codepointText, " %04X", *codepoints++);
1512 if (bytesWritten <= 0) {
1513 break;
1514 }
1515 codepointText += bytesWritten;
1516 }
1517 if (i < length) {
1518 sprintf(codepointText, " ...");
1519 }
1520 }
1521
1522 static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
1523 int length)
1524 {
1525 UErrorCode status = U_ZERO_ERROR;
1526 UCollationElements *iter = ucol_openElements(coll, codepoints, length,
1527 &status);
1528 UBool result = FALSE;
1529 UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE;
1530 const char * collLocale;
1531
1532 if (U_FAILURE(status)) {
1533 log_err("Error creating iterator for testing validity\n");
1534 return FALSE;
1535 }
1536 collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status);
1537 if (U_FAILURE(status) || collLocale==NULL) {
1538 status = U_ZERO_ERROR;
1539 collLocale = "?";
1540 }
1541
1542 for (;;) {
1543 uint32_t ce = ucol_next(iter, &status);
1544 uint32_t primary, p1, p2, secondary, tertiary;
1545 if (ce == UCOL_NULLORDER) {
1546 result = TRUE;
1547 break;
1548 }
1549 if (ce == 0) {
1550 continue;
1551 }
1552 if (ce == 0x02000202) {
1553 /* special CE for merge-sort character */
1554 if (*codepoints == 0xFFFE /* && length == 1 */) {
1555 /*
1556 * Note: We should check for length==1 but the token parser appe ars
1557 * to give us trailing NUL characters.
1558 * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTail oredSet()
1559 * rather than the internal collation rule p arser
1560 */
1561 continue;
1562 } else {
1563 log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n",
1564 (int)*codepoints, (int)length);
1565 break;
1566 }
1567 }
1568 primary = UCOL_PRIMARYORDER(ce);
1569 p1 = primary >> 8;
1570 p2 = primary & 0xFF;
1571 secondary = UCOL_SECONDARYORDER(ce);
1572 tertiary = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION;
1573
1574 if (!isContinuation(ce)) {
1575 if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
1576 log_err("Empty CE %08lX except for case bits\n", (long)ce);
1577 break;
1578 }
1579 if (p1 == 0) {
1580 if (p2 != 0) {
1581 log_err("Primary 00 xx in %08lX\n", (long)ce);
1582 break;
1583 }
1584 primaryDone = TRUE;
1585 } else {
1586 if (p1 <= 2 || p1 >= 0xF0) {
1587 /* Primary first bytes F0..FF are specials. */
1588 log_err("Primary first byte of %08lX out of range\n", (long) ce);
1589 break;
1590 }
1591 if (p2 == 0) {
1592 primaryDone = TRUE;
1593 } else {
1594 if (p2 <= 3 || p2 >= 0xFF) {
1595 /* Primary second bytes 03 and FF are sort key compressi on terminators. */
1596 log_err("Primary second byte of %08lX out of range\n", ( long)ce);
1597 break;
1598 }
1599 primaryDone = FALSE;
1600 }
1601 }
1602 if (secondary == 0) {
1603 if (primary != 0) {
1604 log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce);
1605 break;
1606 }
1607 secondaryDone = TRUE;
1608 } else {
1609 if (secondary <= 2 ||
1610 (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COM MON + 0x80))
1611 ) {
1612 /* Secondary first bytes common+1..+0x80 are used for sort k ey compression. */
1613 log_err("Secondary byte of %08lX out of range\n", (long)ce);
1614 break;
1615 }
1616 secondaryDone = FALSE;
1617 }
1618 if (tertiary == 0) {
1619 /* We know that ce != 0. */
1620 log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce);
1621 break;
1622 }
1623 if (tertiary <= 2) {
1624 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
1625 break;
1626 }
1627 tertiaryDone = FALSE;
1628 } else {
1629 if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
1630 log_err("Empty continuation %08lX\n", (long)ce);
1631 break;
1632 }
1633 if (primaryDone && primary != 0) {
1634 log_err("Primary was done but continues in %08lX\n", (long)ce);
1635 break;
1636 }
1637 if (p1 == 0) {
1638 if (p2 != 0) {
1639 log_err("Primary 00 xx in %08lX\n", (long)ce);
1640 break;
1641 }
1642 primaryDone = TRUE;
1643 } else {
1644 if (p1 <= 2) {
1645 log_err("Primary first byte of %08lX out of range\n", (long) ce);
1646 break;
1647 }
1648 if (p2 == 0) {
1649 primaryDone = TRUE;
1650 } else {
1651 if (p2 <= 3) {
1652 log_err("Primary second byte of %08lX out of range\n", ( long)ce);
1653 break;
1654 }
1655 }
1656 }
1657 if (secondaryDone && secondary != 0) {
1658 log_err("Secondary was done but continues in %08lX\n", (long)ce) ;
1659 break;
1660 }
1661 if (secondary == 0) {
1662 secondaryDone = TRUE;
1663 } else {
1664 if (secondary <= 2) {
1665 log_err("Secondary byte of %08lX out of range\n", (long)ce);
1666 break;
1667 }
1668 }
1669 if (tertiaryDone && tertiary != 0) {
1670 log_err("Tertiary was done but continues in %08lX\n", (long)ce);
1671 break;
1672 }
1673 if (tertiary == 0) {
1674 tertiaryDone = TRUE;
1675 } else if (tertiary <= 2) {
1676 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
1677 break;
1678 }
1679 }
1680 }
1681 if (!result) {
1682 char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5];
1683 showCodepoints(codepoints, length, codepointText);
1684 log_err("Locale: %s Code point string: %s\n", collLocale, codepointText );
1685 }
1686 ucol_closeElements(iter);
1687 return result;
1688 }
1689
1690 static const UChar IMPORT[] = { 0x5B, 0x69, 0x6D, 0x70, 0x6F, 0x72, 0x74, 0 }; /* "[import" */
1691
1692 static void TestCEValidity()
1693 {
1694 /* testing UCA collation elements */
1695 UErrorCode status = U_ZERO_ERROR;
1696 /* en_US has no tailorings */
1697 UCollator *coll = ucol_open("root", &status);
1698 /* tailored locales */
1699 char locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh __PINYIN"};
1700 const char *loc;
1701 FileStream *file = NULL;
1702 char line[2048];
1703 UChar codepoints[11];
1704 int count = 0;
1705 int maxCount = 0;
1706 UChar contextCPs[3];
1707 UChar32 c;
1708 UParseError parseError;
1709 if (U_FAILURE(status)) {
1710 log_err_status(status, "en_US collator creation failed -> %s\n", u_error Name(status));
1711 return;
1712 }
1713 log_verbose("Testing UCA elements\n");
1714 file = getFractionalUCA();
1715 if (file == NULL) {
1716 log_err("Fractional UCA data can not be opened\n");
1717 return;
1718 }
1719
1720 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1721 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1722 line[0] == 0x000D || line[0] == '[') {
1723 continue;
1724 }
1725
1726 getCodePoints(line, codepoints, contextCPs);
1727 checkCEValidity(coll, codepoints, u_strlen(codepoints));
1728 }
1729
1730 log_verbose("Testing UCA elements for the whole range of unicode characters\ n");
1731 for (c = 0; c <= 0xffff; ++c) {
1732 if (u_isdefined(c)) {
1733 codepoints[0] = (UChar)c;
1734 checkCEValidity(coll, codepoints, 1);
1735 }
1736 }
1737 for (; c <= 0x10ffff; ++c) {
1738 if (u_isdefined(c)) {
1739 int32_t i = 0;
1740 U16_APPEND_UNSAFE(codepoints, i, c);
1741 checkCEValidity(coll, codepoints, i);
1742 }
1743 }
1744
1745 ucol_close(coll);
1746
1747 /* testing tailored collation elements */
1748 log_verbose("Testing tailored elements\n");
1749 if(getTestOption(QUICK_OPTION)) {
1750 maxCount = sizeof(locale)/sizeof(locale[0]);
1751 } else {
1752 maxCount = uloc_countAvailable();
1753 }
1754 while (count < maxCount) {
1755 const UChar *rules = NULL,
1756 *current = NULL;
1757 UChar *rulesCopy = NULL;
1758 int32_t ruleLen = 0;
1759
1760 uint32_t chOffset = 0;
1761 uint32_t chLen = 0;
1762 uint32_t exOffset = 0;
1763 uint32_t exLen = 0;
1764 uint32_t prefixOffset = 0;
1765 uint32_t prefixLen = 0;
1766 UBool startOfRules = TRUE;
1767 UColOptionSet opts;
1768
1769 UColTokenParser src;
1770 uint32_t strength = 0;
1771 uint16_t specs = 0;
1772
1773 (void)specs; /* Suppress set but not used warnings. */
1774 (void)strength;
1775 (void)prefixLen;
1776 (void)prefixOffset;
1777 (void)exLen;
1778 (void)exOffset;
1779
1780 if(getTestOption(QUICK_OPTION)) {
1781 loc = locale[count];
1782 } else {
1783 loc = uloc_getAvailable(count);
1784 if(!hasCollationElements(loc)) {
1785 count++;
1786 continue;
1787 }
1788 }
1789 status = U_ZERO_ERROR; // clear status from previous loop iteration
1790
1791 uprv_memset(&src, 0, sizeof(UColTokenParser));
1792
1793 log_verbose("Testing CEs for %s\n", loc);
1794
1795 coll = ucol_open(loc, &status);
1796 if (U_FAILURE(status)) {
1797 log_err("%s collator creation failed with status %s\n", loc, u_error Name(status));
1798 return;
1799 }
1800
1801 src.opts = &opts;
1802 rules = ucol_getRules(coll, &ruleLen);
1803
1804 /*
1805 * We have not set up the UColTokenParser with a callback function
1806 * to fetch [import] sub-rules,
1807 * so skip testing tailorings that import others.
1808 * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet( )
1809 * rather than the internal collation rule parser
1810 */
1811 if (ruleLen > 0 && u_strstr(rules, IMPORT) == NULL) {
1812 rulesCopy = (UChar *)uprv_malloc((ruleLen +
1813 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1814 uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1815 src.current = src.source = rulesCopy;
1816 src.end = rulesCopy + ruleLen;
1817 src.extraCurrent = src.end;
1818 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1819
1820 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parse NextToken can cause the pointer to
1821 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1822 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parse Error,&status)) != NULL && U_SUCCESS(status)) {
1823 strength = src.parsedToken.strength;
1824 chOffset = src.parsedToken.charsOffset;
1825 chLen = src.parsedToken.charsLen;
1826 exOffset = src.parsedToken.extensionOffset;
1827 exLen = src.parsedToken.extensionLen;
1828 prefixOffset = src.parsedToken.prefixOffset;
1829 prefixLen = src.parsedToken.prefixLen;
1830 specs = src.parsedToken.flags;
1831
1832 startOfRules = FALSE;
1833 uprv_memcpy(codepoints, src.source + chOffset,
1834 chLen * sizeof(UChar));
1835 codepoints[chLen] = 0;
1836 checkCEValidity(coll, codepoints, chLen);
1837 }
1838 if (U_FAILURE(status)) {
1839 log_err("%s collator, ucol_tok_parseNextToken failed with status %s\n", loc, u_errorName(status));
1840 }
1841 uprv_free(src.source);
1842 uprv_free(src.reorderCodes);
1843 }
1844
1845 ucol_close(coll);
1846 count ++;
1847 }
1848 T_FileStream_close(file);
1849 }
1850
1851 static void printSortKeyError(const UChar *codepoints, int length,
1852 uint8_t *sortkey, int sklen)
1853 {
1854 int count = 0;
1855 log_err("Sortkey not valid for ");
1856 while (length > 0) {
1857 log_err("0x%04x ", *codepoints);
1858 length --;
1859 codepoints ++;
1860 }
1861 log_err("\nSortkey : ");
1862 while (count < sklen) {
1863 log_err("0x%02x ", sortkey[count]);
1864 count ++;
1865 }
1866 log_err("\n");
1867 }
1868
1869 /**
1870 * Checking sort key validity for all levels
1871 */
1872 static UBool checkSortKeyValidity(UCollator *coll,
1873 const UChar *codepoints,
1874 int length)
1875 {
1876 UErrorCode status = U_ZERO_ERROR;
1877 UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
1878 UCOL_TERTIARY, UCOL_QUATERNARY,
1879 UCOL_IDENTICAL};
1880 int strengthlen = 5;
1881 int strengthIndex = 0;
1882 int caselevel = 0;
1883
1884 while (caselevel < 1) {
1885 if (caselevel == 0) {
1886 ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
1887 }
1888 else {
1889 ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
1890 }
1891
1892 while (strengthIndex < strengthlen) {
1893 int count01 = 0;
1894 uint32_t count = 0;
1895 uint8_t sortkey[128];
1896 uint32_t sklen;
1897
1898 ucol_setStrength(coll, strength[strengthIndex]);
1899 sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
1900 while (sortkey[count] != 0) {
1901 if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 & & strengthIndex != 4)) {
1902 printSortKeyError(codepoints, length, sortkey, sklen);
1903 return FALSE;
1904 }
1905 if (sortkey[count] == 1) {
1906 count01 ++;
1907 }
1908 count ++;
1909 }
1910
1911 if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) {
1912 printSortKeyError(codepoints, length, sortkey, sklen);
1913 return FALSE;
1914 }
1915 strengthIndex ++;
1916 }
1917 caselevel ++;
1918 }
1919 return TRUE;
1920 }
1921
1922 static void TestSortKeyValidity(void)
1923 {
1924 /* testing UCA collation elements */
1925 UErrorCode status = U_ZERO_ERROR;
1926 /* en_US has no tailorings */
1927 UCollator *coll = ucol_open("en_US", &status);
1928 /* tailored locales */
1929 char locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
1930 FileStream *file = NULL;
1931 char line[2048];
1932 UChar codepoints[10];
1933 int count = 0;
1934 UChar contextCPs[5];
1935 UParseError parseError;
1936 if (U_FAILURE(status)) {
1937 log_err_status(status, "en_US collator creation failed -> %s\n", u_error Name(status));
1938 return;
1939 }
1940 log_verbose("Testing UCA elements\n");
1941 file = getFractionalUCA();
1942 if (file == NULL) {
1943 log_err("Fractional UCA data can not be opened\n");
1944 return;
1945 }
1946
1947 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1948 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1949 line[0] == 0x000D || line[0] == '[') {
1950 continue;
1951 }
1952
1953 getCodePoints(line, codepoints, contextCPs);
1954 if(codepoints[0] == 0xFFFE) {
1955 /* Skip special merge-sort character U+FFFE which has otherwise ille gal 02 weight bytes. */
1956 continue;
1957 }
1958 checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
1959 }
1960
1961 log_verbose("Testing UCA elements for the whole range of unicode characters\ n");
1962 codepoints[0] = 0;
1963
1964 while (codepoints[0] < 0xFFFF) {
1965 if (u_isdefined((UChar32)codepoints[0])) {
1966 checkSortKeyValidity(coll, codepoints, 1);
1967 }
1968 codepoints[0] ++;
1969 }
1970
1971 ucol_close(coll);
1972
1973 /* testing tailored collation elements */
1974 log_verbose("Testing tailored elements\n");
1975 while (count < 5) {
1976 const UChar *rules = NULL,
1977 *current = NULL;
1978 UChar *rulesCopy = NULL;
1979 int32_t ruleLen = 0;
1980
1981 uint32_t chOffset = 0;
1982 uint32_t chLen = 0;
1983 uint32_t exOffset = 0;
1984 uint32_t exLen = 0;
1985 uint32_t prefixOffset = 0;
1986 uint32_t prefixLen = 0;
1987 UBool startOfRules = TRUE;
1988 UColOptionSet opts;
1989
1990 UColTokenParser src;
1991 uint32_t strength = 0;
1992 uint16_t specs = 0;
1993 status = U_ZERO_ERROR; // clear status from previous loop iteration
1994
1995 (void)specs;
1996 (void)strength;
1997 (void)prefixLen;
1998 (void)prefixOffset;
1999 (void)exLen;
2000 (void)exOffset;
2001
2002 uprv_memset(&src, 0, sizeof(UColTokenParser));
2003
2004 coll = ucol_open(locale[count], &status);
2005 if (U_FAILURE(status)) {
2006 log_err("%s collator creation failed with status %s\n", locale[count ], u_errorName(status));
2007 return;
2008 }
2009
2010 src.opts = &opts;
2011 rules = ucol_getRules(coll, &ruleLen);
2012
2013 /*
2014 * We have not set up the UColTokenParser with a callback function
2015 * to fetch [import] sub-rules,
2016 * so skip testing tailorings that import others.
2017 * TODO: Ticket #8047: Change TestSortKeyValidity to use ucol_getTailore dSet()
2018 * rather than the internal collation rule parser
2019 */
2020 if (ruleLen > 0 && u_strstr(rules, IMPORT) == NULL) {
2021 rulesCopy = (UChar *)uprv_malloc((ruleLen +
2022 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
2023 uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
2024 src.current = src.source = rulesCopy;
2025 src.end = rulesCopy + ruleLen;
2026 src.extraCurrent = src.end;
2027 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
2028
2029 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parse NextToken can cause the pointer to
2030 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
2031 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseE rror, &status)) != NULL && U_SUCCESS(status)) {
2032 strength = src.parsedToken.strength;
2033 chOffset = src.parsedToken.charsOffset;
2034 chLen = src.parsedToken.charsLen;
2035 exOffset = src.parsedToken.extensionOffset;
2036 exLen = src.parsedToken.extensionLen;
2037 prefixOffset = src.parsedToken.prefixOffset;
2038 prefixLen = src.parsedToken.prefixLen;
2039 specs = src.parsedToken.flags;
2040
2041 startOfRules = FALSE;
2042 uprv_memcpy(codepoints, src.source + chOffset,
2043 chLen * sizeof(UChar));
2044 codepoints[chLen] = 0;
2045 if(codepoints[0] == 0xFFFE) {
2046 /* Skip special merge-sort character U+FFFE which has otherw ise illegal 02 weight bytes. */
2047 continue;
2048 }
2049 checkSortKeyValidity(coll, codepoints, chLen);
2050 }
2051 if (U_FAILURE(status)) {
2052 log_err("%s collator, ucol_tok_parseNextToken failed with status %s\n", locale[count], u_errorName(status));
2053 }
2054 uprv_free(src.source);
2055 uprv_free(src.reorderCodes);
2056 }
2057
2058 ucol_close(coll);
2059 count ++;
2060 }
2061 T_FileStream_close(file);
2062 }
2063
2064 /** 1113 /**
2065 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) wi th 1114 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) wi th
2066 * normalization on AND jamo tailoring, among other things. 1115 * normalization on AND jamo tailoring, among other things.
1116 *
1117 * Note: This test is sensitive to changes of the root collator,
1118 * for example whether the ae-ligature maps to three CEs (as in the DUCET)
1119 * or to two CEs (as in the CLDR 24 FractionalUCA.txt).
1120 * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding .
1121 * For example, the DUCET's artificial secondary CE in the ae-ligature
1122 * may map to two 32-bit iterator CEs (as it did until ICU 52).
2067 */ 1123 */
2068 static const UChar tsceText[] = { /* Nothing in here should be ignorable */ 1124 static const UChar tsceText[] = { /* Nothing in here should be ignorable */
2069 0x0020, 0xAC00, /* simple LV Hangul */ 1125 0x0020, 0xAC00, /* simple LV Hangul */
2070 0x0020, 0xAC01, /* simple LVT Hangul */ 1126 0x0020, 0xAC01, /* simple LVT Hangul */
2071 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */ 1127 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */
2072 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */ 1128 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */
2073 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */ 1129 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
2074 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */ 1130 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
2075 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands f or search */ 1131 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands f or search */
2076 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */ 1132 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
2077 0x0020, 0x00E6, /* small letter ae, expands */ 1133 0x0020, 0x00E6, /* small letter ae, expands */
2078 0x0020, 0x1E4D, /* small letter o with tilde and acute, deco mposes */ 1134 0x0020, 0x1E4D, /* small letter o with tilde and acute, deco mposes */
2079 0x0020 1135 0x0020
2080 }; 1136 };
2081 enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) }; 1137 enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) };
2082 1138
2083 static const int32_t rootStandardOffsets[] = { 1139 static const int32_t rootStandardOffsets[] = {
2084 0, 1,2, 1140 0, 1,2,
2085 2, 3,4,4, 1141 2, 3,4,4,
2086 4, 5,6,6, 1142 4, 5,6,6,
2087 6, 7,8,8, 1143 6, 7,8,8,
2088 8, 9,10,11, 1144 8, 9,10,11,
2089 12, 13,14,15, 1145 12, 13,14,15,
2090 16, 17,18,19, 1146 16, 17,18,19,
2091 20, 21,22,23, 1147 20, 21,22,23,
2092 24, 25,26,26,26, 1148 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs * /
2093 26, 27,28,28, 1149 26, 27,28,28,
2094 28, 1150 28,
2095 29 1151 29
2096 }; 1152 };
2097 enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandar dOffsets[0]) }; 1153 enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandar dOffsets[0]) };
2098 1154
2099 static const int32_t rootSearchOffsets[] = { 1155 static const int32_t rootSearchOffsets[] = {
2100 0, 1,2, 1156 0, 1,2,
2101 2, 3,4,4, 1157 2, 3,4,4,
2102 4, 5,6,6,6, 1158 4, 5,6,6,6,
2103 6, 7,8,8,8,8,8,8, 1159 6, 7,8,8,8,8,8,8,
2104 8, 9,10,11, 1160 8, 9,10,11,
2105 12, 13,14,15, 1161 12, 13,14,15,
2106 16, 17,18,19,20, 1162 16, 17,18,19,20,
2107 20, 21,22,22,23,23,23,24, 1163 20, 21,22,22,23,23,23,24,
2108 24, 25,26,26,26, 1164 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs * /
2109 26, 27,28,28, 1165 26, 27,28,28,
2110 28, 1166 28,
2111 29 1167 29
2112 }; 1168 };
2113 enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffse ts[0]) }; 1169 enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffse ts[0]) };
2114 1170
2115 typedef struct { 1171 typedef struct {
2116 const char * locale; 1172 const char * locale;
2117 const int32_t * offsets; 1173 const int32_t * offsets;
2118 int32_t offsetsLen; 1174 int32_t offsetsLen;
(...skipping 16 matching lines...) Expand all
2135 if ( U_SUCCESS(status) ) { 1191 if ( U_SUCCESS(status) ) {
2136 int32_t offset, element; 1192 int32_t offset, element;
2137 const int32_t * nextOffsetPtr; 1193 const int32_t * nextOffsetPtr;
2138 const int32_t * limitOffsetPtr; 1194 const int32_t * limitOffsetPtr;
2139 1195
2140 nextOffsetPtr = tsceItemPtr->offsets; 1196 nextOffsetPtr = tsceItemPtr->offsets;
2141 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen; 1197 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
2142 do { 1198 do {
2143 offset = ucol_getOffset(uce); 1199 offset = ucol_getOffset(uce);
2144 element = ucol_next(uce, &status); 1200 element = ucol_next(uce, &status);
1201 log_verbose("(%s) offset=%2d ce=%08x\n", tsceItemPtr->local e, offset, element);
2145 if ( element == 0 ) { 1202 if ( element == 0 ) {
2146 log_err("error, locale %s, ucol_next returned element 0\ n", tsceItemPtr->locale ); 1203 log_err("error, locale %s, ucol_next returned element 0\ n", tsceItemPtr->locale );
2147 } 1204 }
2148 if ( nextOffsetPtr < limitOffsetPtr ) { 1205 if ( nextOffsetPtr < limitOffsetPtr ) {
2149 if (offset != *nextOffsetPtr) { 1206 if (offset != *nextOffsetPtr) {
2150 log_err("error, locale %s, expected ucol_next -> uco l_getOffset %d, got %d\n", 1207 log_err("error, locale %s, expected ucol_next -> uco l_getOffset %d, got %d\n",
2151 tsceItemPtr->locale, *nextOffsetPtr, offset ); 1208 tsceItemPtr->locale, *nextOffsetPtr, offset );
2152 nextOffsetPtr = limitOffsetPtr; 1209 nextOffsetPtr = limitOffsetPtr;
2153 break; 1210 break;
2154 } 1211 }
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
2192 log_err("error, locale %s, ucol_openElements failed: %s\n", tsce ItemPtr->locale, u_errorName(status) ); 1249 log_err("error, locale %s, ucol_openElements failed: %s\n", tsce ItemPtr->locale, u_errorName(status) );
2193 } 1250 }
2194 ucol_close(ucol); 1251 ucol_close(ucol);
2195 } else { 1252 } else {
2196 log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr ->locale, u_errorName(status) ); 1253 log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr ->locale, u_errorName(status) );
2197 } 1254 }
2198 } 1255 }
2199 } 1256 }
2200 1257
2201 #endif /* #if !UCONFIG_NO_COLLATION */ 1258 #endif /* #if !UCONFIG_NO_COLLATION */
OLDNEW
« no previous file with comments | « source/test/cintltst/citertst.h ('k') | source/test/cintltst/cldrtest.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698