OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ****************************************************************************** |
| 3 * |
| 4 * Copyright (C) 2002-2010, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ****************************************************************************** |
| 8 * file name: custrtst.c |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * created on: 2002oct09 |
| 14 * created by: Markus W. Scherer |
| 15 * |
| 16 * Tests of ustring.h Unicode string API functions. |
| 17 */ |
| 18 |
| 19 #include "unicode/ustring.h" |
| 20 #include "unicode/ucnv.h" |
| 21 #include "unicode/uiter.h" |
| 22 #include "cintltst.h" |
| 23 #include <string.h> |
| 24 |
| 25 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) |
| 26 |
| 27 /* get the sign of an integer */ |
| 28 #define _SIGN(value) ((value)==0 ? 0 : ((int32_t)(value)>>31)|1) |
| 29 |
| 30 /* test setup --------------------------------------------------------------- */ |
| 31 |
| 32 static void setUpDataTable(void); |
| 33 static void TestStringCopy(void); |
| 34 static void TestStringFunctions(void); |
| 35 static void TestStringSearching(void); |
| 36 static void TestSurrogateSearching(void); |
| 37 static void TestUnescape(void); |
| 38 static void TestCountChar32(void); |
| 39 static void TestUCharIterator(void); |
| 40 static void TestUNormIterator(void); |
| 41 static void TestBadUNormIterator(void); |
| 42 |
| 43 void addUStringTest(TestNode** root); |
| 44 |
| 45 void addUStringTest(TestNode** root) |
| 46 { |
| 47 addTest(root, &TestStringCopy, "tsutil/custrtst/TestStringCopy"); |
| 48 addTest(root, &TestStringFunctions, "tsutil/custrtst/TestStringFunctions"); |
| 49 addTest(root, &TestStringSearching, "tsutil/custrtst/TestStringSearching"); |
| 50 addTest(root, &TestSurrogateSearching, "tsutil/custrtst/TestSurrogateSearchi
ng"); |
| 51 addTest(root, &TestUnescape, "tsutil/custrtst/TestUnescape"); |
| 52 addTest(root, &TestCountChar32, "tsutil/custrtst/TestCountChar32"); |
| 53 addTest(root, &TestUCharIterator, "tsutil/custrtst/TestUCharIterator"); |
| 54 addTest(root, &TestUNormIterator, "tsutil/custrtst/TestUNormIterator"); |
| 55 addTest(root, &TestBadUNormIterator, "tsutil/custrtst/TestBadUNormIterator")
; |
| 56 } |
| 57 |
| 58 /* test data for TestStringFunctions ---------------------------------------- */ |
| 59 |
| 60 UChar*** dataTable = NULL; |
| 61 |
| 62 static const char* raw[3][4] = { |
| 63 |
| 64 /* First String */ |
| 65 { "English_", "French_", "Croatian_", "English_"}, |
| 66 /* Second String */ |
| 67 { "United States", "France", "Croatia", "Unites States"}, |
| 68 |
| 69 /* Concatenated string */ |
| 70 { "English_United States", "French_France", "Croatian_Croatia", "English_U
nited States"} |
| 71 }; |
| 72 |
| 73 static void setUpDataTable() |
| 74 { |
| 75 int32_t i,j; |
| 76 if(dataTable == NULL) { |
| 77 dataTable = (UChar***)calloc(sizeof(UChar**),3); |
| 78 |
| 79 for (i = 0; i < 3; i++) { |
| 80 dataTable[i] = (UChar**)calloc(sizeof(UChar*),4); |
| 81 for (j = 0; j < 4; j++){ |
| 82 dataTable[i][j] = (UChar*) malloc(sizeof(UChar)*(strlen(raw[
i][j])+1)); |
| 83 u_uastrcpy(dataTable[i][j],raw[i][j]); |
| 84 } |
| 85 } |
| 86 } |
| 87 } |
| 88 |
| 89 static void cleanUpDataTable() |
| 90 { |
| 91 int32_t i,j; |
| 92 if(dataTable != NULL) { |
| 93 for (i=0; i<3; i++) { |
| 94 for(j = 0; j<4; j++) { |
| 95 free(dataTable[i][j]); |
| 96 } |
| 97 free(dataTable[i]); |
| 98 } |
| 99 free(dataTable); |
| 100 } |
| 101 dataTable = NULL; |
| 102 } |
| 103 |
| 104 /*Tests for u_strcat(),u_strcmp(), u_strlen(), u_strcpy(),u_strncat(),u_strncmp
(),u_strncpy, u_uastrcpy(),u_austrcpy(), u_uastrncpy(); */ |
| 105 static void TestStringFunctions() |
| 106 { |
| 107 int32_t i,j,k; |
| 108 UChar temp[512]; |
| 109 UChar nullTemp[512]; |
| 110 char test[512]; |
| 111 char tempOut[512]; |
| 112 |
| 113 setUpDataTable(); |
| 114 |
| 115 log_verbose("Testing u_strlen()\n"); |
| 116 if( u_strlen(dataTable[0][0])!= u_strlen(dataTable[0][3]) || u_strlen(dataTa
ble[0][0]) == u_strlen(dataTable[0][2])) |
| 117 log_err("There is an error in u_strlen()"); |
| 118 |
| 119 log_verbose("Testing u_memcpy() and u_memcmp()\n"); |
| 120 |
| 121 for(i=0;i<3;++i) |
| 122 { |
| 123 for(j=0;j<4;++j) |
| 124 { |
| 125 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j])); |
| 126 temp[0] = 0; |
| 127 temp[7] = 0xA4; /* Mark the end */ |
| 128 u_memcpy(temp,dataTable[i][j], 7); |
| 129 |
| 130 if(temp[7] != 0xA4) |
| 131 log_err("an error occured in u_memcpy()\n"); |
| 132 if(u_memcmp(temp, dataTable[i][j], 7)!=0) |
| 133 log_err("an error occured in u_memcpy() or u_memcmp()\n"); |
| 134 } |
| 135 } |
| 136 if(u_memcmp(dataTable[0][0], dataTable[1][1], 7)==0) |
| 137 log_err("an error occured in u_memcmp()\n"); |
| 138 |
| 139 log_verbose("Testing u_memset()\n"); |
| 140 nullTemp[0] = 0; |
| 141 nullTemp[7] = 0; |
| 142 u_memset(nullTemp, 0xa4, 7); |
| 143 for (i = 0; i < 7; i++) { |
| 144 if(nullTemp[i] != 0xa4) { |
| 145 log_err("an error occured in u_memset()\n"); |
| 146 } |
| 147 } |
| 148 if(nullTemp[7] != 0) { |
| 149 log_err("u_memset() went too far\n"); |
| 150 } |
| 151 |
| 152 u_memset(nullTemp, 0, 7); |
| 153 nullTemp[7] = 0xa4; |
| 154 temp[7] = 0; |
| 155 u_memcpy(temp,nullTemp, 7); |
| 156 if(u_memcmp(temp, nullTemp, 7)!=0 || temp[7]!=0) |
| 157 log_err("an error occured in u_memcpy() or u_memcmp()\n"); |
| 158 |
| 159 |
| 160 log_verbose("Testing u_memmove()\n"); |
| 161 for (i = 0; i < 7; i++) { |
| 162 temp[i] = (UChar)i; |
| 163 } |
| 164 u_memmove(temp + 1, temp, 7); |
| 165 if(temp[0] != 0) { |
| 166 log_err("an error occured in u_memmove()\n"); |
| 167 } |
| 168 for (i = 1; i <= 7; i++) { |
| 169 if(temp[i] != (i - 1)) { |
| 170 log_err("an error occured in u_memmove()\n"); |
| 171 } |
| 172 } |
| 173 |
| 174 log_verbose("Testing u_strcpy() and u_strcmp()\n"); |
| 175 |
| 176 for(i=0;i<3;++i) |
| 177 { |
| 178 for(j=0;j<4;++j) |
| 179 { |
| 180 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j])); |
| 181 temp[0] = 0; |
| 182 u_strcpy(temp,dataTable[i][j]); |
| 183 |
| 184 if(u_strcmp(temp,dataTable[i][j])!=0) |
| 185 log_err("something threw an error in u_strcpy() or u_strcmp()\n"
); |
| 186 } |
| 187 } |
| 188 if(u_strcmp(dataTable[0][0], dataTable[1][1])==0) |
| 189 log_err("an error occured in u_memcmp()\n"); |
| 190 |
| 191 log_verbose("testing u_strcat()\n"); |
| 192 i=0; |
| 193 for(j=0; j<2;++j) |
| 194 { |
| 195 u_uastrcpy(temp, ""); |
| 196 u_strcpy(temp,dataTable[i][j]); |
| 197 u_strcat(temp,dataTable[i+1][j]); |
| 198 if(u_strcmp(temp,dataTable[i+2][j])!=0) |
| 199 log_err("something threw an error in u_strcat()\n"); |
| 200 |
| 201 } |
| 202 log_verbose("Testing u_strncmp()\n"); |
| 203 for(i=0,j=0;j<4; ++j) |
| 204 { |
| 205 k=u_strlen(dataTable[i][j]); |
| 206 if(u_strncmp(dataTable[i][j],dataTable[i+2][j],k)!=0) |
| 207 log_err("Something threw an error in u_strncmp\n"); |
| 208 } |
| 209 if(u_strncmp(dataTable[0][0], dataTable[1][1], 7)==0) |
| 210 log_err("an error occured in u_memcmp()\n"); |
| 211 |
| 212 |
| 213 log_verbose("Testing u_strncat\n"); |
| 214 for(i=0,j=0;j<4; ++j) |
| 215 { |
| 216 k=u_strlen(dataTable[i][j]); |
| 217 |
| 218 u_uastrcpy(temp,""); |
| 219 |
| 220 if(u_strcmp(u_strncat(temp,dataTable[i+2][j],k),dataTable[i][j])!=0) |
| 221 log_err("something threw an error in u_strncat or u_uastrcpy()\n"); |
| 222 |
| 223 } |
| 224 |
| 225 log_verbose("Testing u_strncpy() and u_uastrcpy()\n"); |
| 226 for(i=2,j=0;j<4; ++j) |
| 227 { |
| 228 k=u_strlen(dataTable[i][j]); |
| 229 u_strncpy(temp, dataTable[i][j],k); |
| 230 temp[k] = 0xa4; |
| 231 |
| 232 if(u_strncmp(temp, dataTable[i][j],k)!=0) |
| 233 log_err("something threw an error in u_strncpy()\n"); |
| 234 |
| 235 if(temp[k] != 0xa4) |
| 236 log_err("something threw an error in u_strncpy()\n"); |
| 237 |
| 238 u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1); |
| 239 u_uastrncpy(temp, raw[i][j], k-1); |
| 240 if(u_strncmp(temp, dataTable[i][j],k-1)!=0) |
| 241 log_err("something threw an error in u_uastrncpy(k-1)\n"); |
| 242 |
| 243 if(temp[k-1] != 0x3F) |
| 244 log_err("something threw an error in u_uastrncpy(k-1)\n"); |
| 245 |
| 246 u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1); |
| 247 u_uastrncpy(temp, raw[i][j], k+1); |
| 248 if(u_strcmp(temp, dataTable[i][j])!=0) |
| 249 log_err("something threw an error in u_uastrncpy(k+1)\n"); |
| 250 |
| 251 if(temp[k] != 0) |
| 252 log_err("something threw an error in u_uastrncpy(k+1)\n"); |
| 253 |
| 254 u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1); |
| 255 u_uastrncpy(temp, raw[i][j], k); |
| 256 if(u_strncmp(temp, dataTable[i][j], k)!=0) |
| 257 log_err("something threw an error in u_uastrncpy(k)\n"); |
| 258 |
| 259 if(temp[k] != 0x3F) |
| 260 log_err("something threw an error in u_uastrncpy(k)\n"); |
| 261 } |
| 262 |
| 263 log_verbose("Testing u_strchr() and u_memchr()\n"); |
| 264 |
| 265 for(i=2,j=0;j<4;j++) |
| 266 { |
| 267 UChar saveVal = dataTable[i][j][0]; |
| 268 UChar *findPtr = u_strchr(dataTable[i][j], 0x005F); |
| 269 int32_t dataSize = (int32_t)(u_strlen(dataTable[i][j]) + 1); |
| 270 |
| 271 log_verbose("%s ", u_austrcpy(tempOut, findPtr)); |
| 272 |
| 273 if (findPtr == NULL || *findPtr != 0x005F) { |
| 274 log_err("u_strchr can't find '_' in the string\n"); |
| 275 } |
| 276 |
| 277 findPtr = u_strchr32(dataTable[i][j], 0x005F); |
| 278 if (findPtr == NULL || *findPtr != 0x005F) { |
| 279 log_err("u_strchr32 can't find '_' in the string\n"); |
| 280 } |
| 281 |
| 282 findPtr = u_strchr(dataTable[i][j], 0); |
| 283 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) { |
| 284 log_err("u_strchr can't find NULL in the string\n"); |
| 285 } |
| 286 |
| 287 findPtr = u_strchr32(dataTable[i][j], 0); |
| 288 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) { |
| 289 log_err("u_strchr32 can't find NULL in the string\n"); |
| 290 } |
| 291 |
| 292 findPtr = u_memchr(dataTable[i][j], 0, dataSize); |
| 293 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) { |
| 294 log_err("u_memchr can't find NULL in the string\n"); |
| 295 } |
| 296 |
| 297 findPtr = u_memchr32(dataTable[i][j], 0, dataSize); |
| 298 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) { |
| 299 log_err("u_memchr32 can't find NULL in the string\n"); |
| 300 } |
| 301 |
| 302 dataTable[i][j][0] = 0; |
| 303 /* Make sure we skip over the NULL termination */ |
| 304 findPtr = u_memchr(dataTable[i][j], 0x005F, dataSize); |
| 305 if (findPtr == NULL || *findPtr != 0x005F) { |
| 306 log_err("u_memchr can't find '_' in the string\n"); |
| 307 } |
| 308 |
| 309 findPtr = u_memchr32(dataTable[i][j], 0x005F, dataSize); |
| 310 if (findPtr == NULL || *findPtr != 0x005F) { |
| 311 log_err("u_memchr32 can't find '_' in the string\n"); |
| 312 } |
| 313 findPtr = u_memchr32(dataTable[i][j], 0xFFFD, dataSize); |
| 314 if (findPtr != NULL) { |
| 315 log_err("Should have found NULL when the character is not there.\n")
; |
| 316 } |
| 317 dataTable[i][j][0] = saveVal; /* Put it back for the other tests */ |
| 318 } |
| 319 |
| 320 /* |
| 321 * test that u_strchr32() |
| 322 * does not find surrogate code points when they are part of matched pairs |
| 323 * (= part of supplementary code points) |
| 324 * Jitterbug 1542 |
| 325 */ |
| 326 { |
| 327 static const UChar s[]={ |
| 328 /* 0 1 2 3 4 5 6 7
8 9 */ |
| 329 0x0061, 0xd841, 0xdc02, 0xd841, 0x0062, 0xdc02, 0xd841, 0xdc02, 0x00
63, 0 |
| 330 }; |
| 331 |
| 332 if(u_strchr32(s, 0xd841)!=(s+3) || u_strchr32(s, 0xdc02)!=(s+5)) { |
| 333 log_err("error: u_strchr32(surrogate) finds a partial supplementary
code point\n"); |
| 334 } |
| 335 if(u_memchr32(s, 0xd841, 9)!=(s+3) || u_memchr32(s, 0xdc02, 9)!=(s+5)) { |
| 336 log_err("error: u_memchr32(surrogate) finds a partial supplementary
code point\n"); |
| 337 } |
| 338 } |
| 339 |
| 340 log_verbose("Testing u_austrcpy()"); |
| 341 u_austrcpy(test,dataTable[0][0]); |
| 342 if(strcmp(test,raw[0][0])!=0) |
| 343 log_err("There is an error in u_austrcpy()"); |
| 344 |
| 345 |
| 346 log_verbose("Testing u_strtok_r()"); |
| 347 { |
| 348 const char tokString[] = " , 1 2 3 AHHHHH! 5.5 6 7 , 8\n"; |
| 349 const char *tokens[] = {",", "1", "2", "3", "AHHHHH!", "5.5", "6", "7",
"8\n"}; |
| 350 UChar delimBuf[sizeof(test)]; |
| 351 UChar currTokenBuf[sizeof(tokString)]; |
| 352 UChar *state; |
| 353 uint32_t currToken = 0; |
| 354 UChar *ptr; |
| 355 |
| 356 u_uastrcpy(temp, tokString); |
| 357 u_uastrcpy(delimBuf, " "); |
| 358 |
| 359 ptr = u_strtok_r(temp, delimBuf, &state); |
| 360 u_uastrcpy(delimBuf, " ,"); |
| 361 while (ptr != NULL) { |
| 362 u_uastrcpy(currTokenBuf, tokens[currToken]); |
| 363 if (u_strcmp(ptr, currTokenBuf) != 0) { |
| 364 log_err("u_strtok_r mismatch at %d. Got: %s, Expected: %s\n", cu
rrToken, ptr, tokens[currToken]); |
| 365 } |
| 366 ptr = u_strtok_r(NULL, delimBuf, &state); |
| 367 currToken++; |
| 368 } |
| 369 |
| 370 if (currToken != sizeof(tokens)/sizeof(tokens[0])) { |
| 371 log_err("Didn't get correct number of tokens\n"); |
| 372 } |
| 373 state = delimBuf; /* Give it an "invalid" saveState */ |
| 374 u_uastrcpy(currTokenBuf, ""); |
| 375 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) { |
| 376 log_err("Didn't get NULL for empty string\n"); |
| 377 } |
| 378 if (state != NULL) { |
| 379 log_err("State should be NULL for empty string\n"); |
| 380 } |
| 381 state = delimBuf; /* Give it an "invalid" saveState */ |
| 382 u_uastrcpy(currTokenBuf, ", ,"); |
| 383 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) { |
| 384 log_err("Didn't get NULL for a string of delimiters\n"); |
| 385 } |
| 386 if (state != NULL) { |
| 387 log_err("State should be NULL for a string of delimiters\n"); |
| 388 } |
| 389 |
| 390 state = delimBuf; /* Give it an "invalid" saveState */ |
| 391 u_uastrcpy(currTokenBuf, "q, ,"); |
| 392 if (u_strtok_r(currTokenBuf, delimBuf, &state) == NULL) { |
| 393 log_err("Got NULL for a string that does not begin with delimiters\n
"); |
| 394 } |
| 395 if (u_strtok_r(NULL, delimBuf, &state) != NULL) { |
| 396 log_err("Didn't get NULL for a string that ends in delimiters\n"); |
| 397 } |
| 398 if (state != NULL) { |
| 399 log_err("State should be NULL for empty string\n"); |
| 400 } |
| 401 |
| 402 state = delimBuf; /* Give it an "invalid" saveState */ |
| 403 u_uastrcpy(currTokenBuf, tokString); |
| 404 u_uastrcpy(temp, tokString); |
| 405 u_uastrcpy(delimBuf, "q"); /* Give it a delimiter that it can't find. *
/ |
| 406 ptr = u_strtok_r(currTokenBuf, delimBuf, &state); |
| 407 if (ptr == NULL || u_strcmp(ptr, temp) != 0) { |
| 408 log_err("Should have recieved the same string when there are no deli
miters\n"); |
| 409 } |
| 410 if (u_strtok_r(NULL, delimBuf, &state) != NULL) { |
| 411 log_err("Should not have found another token in a one token string\n
"); |
| 412 } |
| 413 } |
| 414 |
| 415 /* test u_strcmpCodePointOrder() */ |
| 416 { |
| 417 /* these strings are in ascending order */ |
| 418 static const UChar strings[][4]={ |
| 419 { 0x61, 0 }, /* U+0061 */ |
| 420 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */ |
| 421 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */ |
| 422 { 0xd800, 0 }, /* U+d800 */ |
| 423 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */ |
| 424 { 0xdfff, 0 }, /* U+dfff */ |
| 425 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */ |
| 426 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */ |
| 427 { 0xd800, 0xdc02, 0 }, /* U+10002 */ |
| 428 { 0xd84d, 0xdc56, 0 } /* U+23456 */ |
| 429 }; |
| 430 |
| 431 UCharIterator iter1, iter2; |
| 432 int32_t len1, len2, r1, r2; |
| 433 |
| 434 for(i=0; i<(sizeof(strings)/sizeof(strings[0])-1); ++i) { |
| 435 if(u_strcmpCodePointOrder(strings[i], strings[i+1])>=0) { |
| 436 log_err("error: u_strcmpCodePointOrder() fails for string %d and
the following one\n", i); |
| 437 } |
| 438 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 10)>=0) { |
| 439 log_err("error: u_strncmpCodePointOrder() fails for string %d an
d the following one\n", i); |
| 440 } |
| 441 |
| 442 /* There are at least 2 UChars in each string - verify that strncmp(
)==memcmp(). */ |
| 443 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 2)!=u_memcmpCod
ePointOrder(strings[i], strings[i+1], 2)) { |
| 444 log_err("error: u_strncmpCodePointOrder(2)!=u_memcmpCodePointOrd
er(2) for string %d and the following one\n", i); |
| 445 } |
| 446 |
| 447 /* test u_strCompare(TRUE) */ |
| 448 len1=u_strlen(strings[i]); |
| 449 len2=u_strlen(strings[i+1]); |
| 450 if( u_strCompare(strings[i], -1, strings[i+1], -1, TRUE)>=0 || |
| 451 u_strCompare(strings[i], -1, strings[i+1], len2, TRUE)>=0 || |
| 452 u_strCompare(strings[i], len1, strings[i+1], -1, TRUE)>=0 || |
| 453 u_strCompare(strings[i], len1, strings[i+1], len2, TRUE)>=0 |
| 454 ) { |
| 455 log_err("error: u_strCompare(code point order) fails for string
%d and the following one\n", i); |
| 456 } |
| 457 |
| 458 /* test u_strCompare(FALSE) */ |
| 459 r1=u_strCompare(strings[i], -1, strings[i+1], -1, FALSE); |
| 460 r2=u_strcmp(strings[i], strings[i+1]); |
| 461 if(_SIGN(r1)!=_SIGN(r2)) { |
| 462 log_err("error: u_strCompare(code unit order)!=u_strcmp() for st
ring %d and the following one\n", i); |
| 463 } |
| 464 |
| 465 /* test u_strCompareIter() */ |
| 466 uiter_setString(&iter1, strings[i], len1); |
| 467 uiter_setString(&iter2, strings[i+1], len2); |
| 468 if(u_strCompareIter(&iter1, &iter2, TRUE)>=0) { |
| 469 log_err("error: u_strCompareIter(code point order) fails for str
ing %d and the following one\n", i); |
| 470 } |
| 471 r1=u_strCompareIter(&iter1, &iter2, FALSE); |
| 472 if(_SIGN(r1)!=_SIGN(u_strcmp(strings[i], strings[i+1]))) { |
| 473 log_err("error: u_strCompareIter(code unit order)!=u_strcmp() fo
r string %d and the following one\n", i); |
| 474 } |
| 475 } |
| 476 } |
| 477 |
| 478 cleanUpDataTable(); |
| 479 } |
| 480 |
| 481 static void TestStringSearching() |
| 482 { |
| 483 const UChar testString[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0064, 0x0061,
0}; |
| 484 const UChar testSurrogateString[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff,
0x0063, 0x0064, 0x0064, 0xdbff, 0xdfff, 0xdb00, 0xdf00, 0x0061, 0}; |
| 485 const UChar surrMatchSet1[] = {0xdbff, 0xdfff, 0}; |
| 486 const UChar surrMatchSet2[] = {0x0061, 0x0062, 0xdbff, 0xdfff, 0}; |
| 487 const UChar surrMatchSet3[] = {0xdb00, 0xdf00, 0xdbff, 0xdfff, 0}; |
| 488 const UChar surrMatchSet4[] = {0x0000}; |
| 489 const UChar surrMatchSetBad[] = {0xdbff, 0x0061, 0}; |
| 490 const UChar surrMatchSetBad2[] = {0x0061, 0xdbff, 0}; |
| 491 const UChar surrMatchSetBad3[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0}
; /* has partial surrogate */ |
| 492 const UChar |
| 493 empty[] = { 0 }, |
| 494 a[] = { 0x61, 0 }, |
| 495 ab[] = { 0x61, 0x62, 0 }, |
| 496 ba[] = { 0x62, 0x61, 0 }, |
| 497 abcd[] = { 0x61, 0x62, 0x63, 0x64, 0 }, |
| 498 cd[] = { 0x63, 0x64, 0 }, |
| 499 dc[] = { 0x64, 0x63, 0 }, |
| 500 cdh[] = { 0x63, 0x64, 0x68, 0 }, |
| 501 f[] = { 0x66, 0 }, |
| 502 fg[] = { 0x66, 0x67, 0 }, |
| 503 gf[] = { 0x67, 0x66, 0 }; |
| 504 |
| 505 log_verbose("Testing u_strpbrk()"); |
| 506 |
| 507 if (u_strpbrk(testString, a) != &testString[0]) { |
| 508 log_err("u_strpbrk couldn't find first letter a.\n"); |
| 509 } |
| 510 if (u_strpbrk(testString, dc) != &testString[2]) { |
| 511 log_err("u_strpbrk couldn't find d or c.\n"); |
| 512 } |
| 513 if (u_strpbrk(testString, cd) != &testString[2]) { |
| 514 log_err("u_strpbrk couldn't find c or d.\n"); |
| 515 } |
| 516 if (u_strpbrk(testString, cdh) != &testString[2]) { |
| 517 log_err("u_strpbrk couldn't find c, d or h.\n"); |
| 518 } |
| 519 if (u_strpbrk(testString, f) != NULL) { |
| 520 log_err("u_strpbrk didn't return NULL for \"f\".\n"); |
| 521 } |
| 522 if (u_strpbrk(testString, fg) != NULL) { |
| 523 log_err("u_strpbrk didn't return NULL for \"fg\".\n"); |
| 524 } |
| 525 if (u_strpbrk(testString, gf) != NULL) { |
| 526 log_err("u_strpbrk didn't return NULL for \"gf\".\n"); |
| 527 } |
| 528 if (u_strpbrk(testString, empty) != NULL) { |
| 529 log_err("u_strpbrk didn't return NULL for \"\".\n"); |
| 530 } |
| 531 |
| 532 log_verbose("Testing u_strpbrk() with surrogates"); |
| 533 |
| 534 if (u_strpbrk(testSurrogateString, a) != &testSurrogateString[1]) { |
| 535 log_err("u_strpbrk couldn't find first letter a.\n"); |
| 536 } |
| 537 if (u_strpbrk(testSurrogateString, dc) != &testSurrogateString[5]) { |
| 538 log_err("u_strpbrk couldn't find d or c.\n"); |
| 539 } |
| 540 if (u_strpbrk(testSurrogateString, cd) != &testSurrogateString[5]) { |
| 541 log_err("u_strpbrk couldn't find c or d.\n"); |
| 542 } |
| 543 if (u_strpbrk(testSurrogateString, cdh) != &testSurrogateString[5]) { |
| 544 log_err("u_strpbrk couldn't find c, d or h.\n"); |
| 545 } |
| 546 if (u_strpbrk(testSurrogateString, f) != NULL) { |
| 547 log_err("u_strpbrk didn't return NULL for \"f\".\n"); |
| 548 } |
| 549 if (u_strpbrk(testSurrogateString, fg) != NULL) { |
| 550 log_err("u_strpbrk didn't return NULL for \"fg\".\n"); |
| 551 } |
| 552 if (u_strpbrk(testSurrogateString, gf) != NULL) { |
| 553 log_err("u_strpbrk didn't return NULL for \"gf\".\n"); |
| 554 } |
| 555 if (u_strpbrk(testSurrogateString, surrMatchSet1) != &testSurrogateString[3]
) { |
| 556 log_err("u_strpbrk couldn't find \"0xdbff, 0xdfff\".\n"); |
| 557 } |
| 558 if (u_strpbrk(testSurrogateString, surrMatchSet2) != &testSurrogateString[1]
) { |
| 559 log_err("u_strpbrk couldn't find \"0xdbff, a, b, 0xdbff, 0xdfff\".\n"); |
| 560 } |
| 561 if (u_strpbrk(testSurrogateString, surrMatchSet3) != &testSurrogateString[3]
) { |
| 562 log_err("u_strpbrk couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n")
; |
| 563 } |
| 564 if (u_strpbrk(testSurrogateString, surrMatchSet4) != NULL) { |
| 565 log_err("u_strpbrk should have returned NULL for empty string.\n"); |
| 566 } |
| 567 if (u_strpbrk(testSurrogateString, surrMatchSetBad) != &testSurrogateString[
0]) { |
| 568 log_err("u_strpbrk should have found bad surrogate.\n"); |
| 569 } |
| 570 |
| 571 log_verbose("Testing u_strcspn()"); |
| 572 |
| 573 if (u_strcspn(testString, a) != 0) { |
| 574 log_err("u_strcspn couldn't find first letter a.\n"); |
| 575 } |
| 576 if (u_strcspn(testString, dc) != 2) { |
| 577 log_err("u_strcspn couldn't find d or c.\n"); |
| 578 } |
| 579 if (u_strcspn(testString, cd) != 2) { |
| 580 log_err("u_strcspn couldn't find c or d.\n"); |
| 581 } |
| 582 if (u_strcspn(testString, cdh) != 2) { |
| 583 log_err("u_strcspn couldn't find c, d or h.\n"); |
| 584 } |
| 585 if (u_strcspn(testString, f) != u_strlen(testString)) { |
| 586 log_err("u_strcspn didn't return NULL for \"f\".\n"); |
| 587 } |
| 588 if (u_strcspn(testString, fg) != u_strlen(testString)) { |
| 589 log_err("u_strcspn didn't return NULL for \"fg\".\n"); |
| 590 } |
| 591 if (u_strcspn(testString, gf) != u_strlen(testString)) { |
| 592 log_err("u_strcspn didn't return NULL for \"gf\".\n"); |
| 593 } |
| 594 |
| 595 log_verbose("Testing u_strcspn() with surrogates"); |
| 596 |
| 597 if (u_strcspn(testSurrogateString, a) != 1) { |
| 598 log_err("u_strcspn couldn't find first letter a.\n"); |
| 599 } |
| 600 if (u_strcspn(testSurrogateString, dc) != 5) { |
| 601 log_err("u_strcspn couldn't find d or c.\n"); |
| 602 } |
| 603 if (u_strcspn(testSurrogateString, cd) != 5) { |
| 604 log_err("u_strcspn couldn't find c or d.\n"); |
| 605 } |
| 606 if (u_strcspn(testSurrogateString, cdh) != 5) { |
| 607 log_err("u_strcspn couldn't find c, d or h.\n"); |
| 608 } |
| 609 if (u_strcspn(testSurrogateString, f) != u_strlen(testSurrogateString)) { |
| 610 log_err("u_strcspn didn't return NULL for \"f\".\n"); |
| 611 } |
| 612 if (u_strcspn(testSurrogateString, fg) != u_strlen(testSurrogateString)) { |
| 613 log_err("u_strcspn didn't return NULL for \"fg\".\n"); |
| 614 } |
| 615 if (u_strcspn(testSurrogateString, gf) != u_strlen(testSurrogateString)) { |
| 616 log_err("u_strcspn didn't return NULL for \"gf\".\n"); |
| 617 } |
| 618 if (u_strcspn(testSurrogateString, surrMatchSet1) != 3) { |
| 619 log_err("u_strcspn couldn't find \"0xdbff, 0xdfff\".\n"); |
| 620 } |
| 621 if (u_strcspn(testSurrogateString, surrMatchSet2) != 1) { |
| 622 log_err("u_strcspn couldn't find \"a, b, 0xdbff, 0xdfff\".\n"); |
| 623 } |
| 624 if (u_strcspn(testSurrogateString, surrMatchSet3) != 3) { |
| 625 log_err("u_strcspn couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n")
; |
| 626 } |
| 627 if (u_strcspn(testSurrogateString, surrMatchSet4) != u_strlen(testSurrogateS
tring)) { |
| 628 log_err("u_strcspn should have returned strlen for empty string.\n"); |
| 629 } |
| 630 |
| 631 |
| 632 log_verbose("Testing u_strspn()"); |
| 633 |
| 634 if (u_strspn(testString, a) != 1) { |
| 635 log_err("u_strspn couldn't skip first letter a.\n"); |
| 636 } |
| 637 if (u_strspn(testString, ab) != 2) { |
| 638 log_err("u_strspn couldn't skip a or b.\n"); |
| 639 } |
| 640 if (u_strspn(testString, ba) != 2) { |
| 641 log_err("u_strspn couldn't skip a or b.\n"); |
| 642 } |
| 643 if (u_strspn(testString, f) != 0) { |
| 644 log_err("u_strspn didn't return 0 for \"f\".\n"); |
| 645 } |
| 646 if (u_strspn(testString, dc) != 0) { |
| 647 log_err("u_strspn couldn't find first letter a (skip d or c).\n"); |
| 648 } |
| 649 if (u_strspn(testString, abcd) != u_strlen(testString)) { |
| 650 log_err("u_strspn couldn't skip over the whole string.\n"); |
| 651 } |
| 652 if (u_strspn(testString, empty) != 0) { |
| 653 log_err("u_strspn should have returned 0 for empty string.\n"); |
| 654 } |
| 655 |
| 656 log_verbose("Testing u_strspn() with surrogates"); |
| 657 if (u_strspn(testSurrogateString, surrMatchSetBad) != 2) { |
| 658 log_err("u_strspn couldn't skip 0xdbff or a.\n"); |
| 659 } |
| 660 if (u_strspn(testSurrogateString, surrMatchSetBad2) != 2) { |
| 661 log_err("u_strspn couldn't skip 0xdbff or a.\n"); |
| 662 } |
| 663 if (u_strspn(testSurrogateString, f) != 0) { |
| 664 log_err("u_strspn couldn't skip d or c (skip first letter).\n"); |
| 665 } |
| 666 if (u_strspn(testSurrogateString, dc) != 0) { |
| 667 log_err("u_strspn couldn't skip d or c (skip first letter).\n"); |
| 668 } |
| 669 if (u_strspn(testSurrogateString, cd) != 0) { |
| 670 log_err("u_strspn couldn't skip d or c (skip first letter).\n"); |
| 671 } |
| 672 if (u_strspn(testSurrogateString, testSurrogateString) != u_strlen(testSurro
gateString)) { |
| 673 log_err("u_strspn couldn't skip whole string.\n"); |
| 674 } |
| 675 if (u_strspn(testSurrogateString, surrMatchSet1) != 0) { |
| 676 log_err("u_strspn couldn't skip \"0xdbff, 0xdfff\" (get first letter).\n
"); |
| 677 } |
| 678 if (u_strspn(testSurrogateString, surrMatchSetBad3) != 5) { |
| 679 log_err("u_strspn couldn't skip \"0xdbff, a, b, 0xdbff, 0xdfff\".\n"); |
| 680 } |
| 681 if (u_strspn(testSurrogateString, surrMatchSet4) != 0) { |
| 682 log_err("u_strspn should have returned 0 for empty string.\n"); |
| 683 } |
| 684 } |
| 685 |
| 686 /* |
| 687 * All binary Unicode string searches should behave the same for equivalent inpu
t. |
| 688 * See Jitterbug 2145. |
| 689 * There are some new functions, too - just test them all. |
| 690 */ |
| 691 static void |
| 692 TestSurrogateSearching() { |
| 693 static const UChar s[]={ |
| 694 /* 0 1 2 3 4 5 6 7 8 9
10 11 */ |
| 695 0x61, 0xd801, 0xdc02, 0x61, 0xdc02, 0x61, 0xd801, 0x61, 0xd801, 0xdc02,
0x61, 0 |
| 696 }, sub_a[]={ |
| 697 0x61, 0 |
| 698 }, sub_b[]={ |
| 699 0x62, 0 |
| 700 }, sub_lead[]={ |
| 701 0xd801, 0 |
| 702 }, sub_trail[]={ |
| 703 0xdc02, 0 |
| 704 }, sub_supp[]={ |
| 705 0xd801, 0xdc02, 0 |
| 706 }, sub_supp2[]={ |
| 707 0xd801, 0xdc03, 0 |
| 708 }, sub_a_lead[]={ |
| 709 0x61, 0xd801, 0 |
| 710 }, sub_trail_a[]={ |
| 711 0xdc02, 0x61, 0 |
| 712 }, sub_aba[]={ |
| 713 0x61, 0x62, 0x61, 0 |
| 714 }; |
| 715 static const UChar a=0x61, b=0x62, lead=0xd801, trail=0xdc02, nul=0; |
| 716 static const UChar32 supp=0x10402, supp2=0x10403, ill=0x123456; |
| 717 |
| 718 const UChar *first, *last; |
| 719 |
| 720 /* search for NUL code point: find end of string */ |
| 721 first=s+u_strlen(s); |
| 722 |
| 723 if( |
| 724 first!=u_strchr(s, nul) || |
| 725 first!=u_strchr32(s, nul) || |
| 726 first!=u_memchr(s, nul, LENGTHOF(s)) || |
| 727 first!=u_memchr32(s, nul, LENGTHOF(s)) || |
| 728 first!=u_strrchr(s, nul) || |
| 729 first!=u_strrchr32(s, nul) || |
| 730 first!=u_memrchr(s, nul, LENGTHOF(s)) || |
| 731 first!=u_memrchr32(s, nul, LENGTHOF(s)) |
| 732 ) { |
| 733 log_err("error: one of the u_str[|mem][r]chr[32](s, nul) does not find t
he terminator of s\n"); |
| 734 } |
| 735 |
| 736 /* search for empty substring: find beginning of string */ |
| 737 if( |
| 738 s!=u_strstr(s, &nul) || |
| 739 s!=u_strFindFirst(s, -1, &nul, -1) || |
| 740 s!=u_strFindFirst(s, -1, &nul, 0) || |
| 741 s!=u_strFindFirst(s, LENGTHOF(s), &nul, -1) || |
| 742 s!=u_strFindFirst(s, LENGTHOF(s), &nul, 0) || |
| 743 s!=u_strrstr(s, &nul) || |
| 744 s!=u_strFindLast(s, -1, &nul, -1) || |
| 745 s!=u_strFindLast(s, -1, &nul, 0) || |
| 746 s!=u_strFindLast(s, LENGTHOF(s), &nul, -1) || |
| 747 s!=u_strFindLast(s, LENGTHOF(s), &nul, 0) |
| 748 ) { |
| 749 log_err("error: one of the u_str[str etc](s, \"\") does not find s itsel
f\n"); |
| 750 } |
| 751 |
| 752 /* find 'a' in s[1..10[ */ |
| 753 first=s+3; |
| 754 last=s+7; |
| 755 if( |
| 756 first!=u_strchr(s+1, a) || |
| 757 first!=u_strchr32(s+1, a) || |
| 758 first!=u_memchr(s+1, a, 9) || |
| 759 first!=u_memchr32(s+1, a, 9) || |
| 760 first!=u_strstr(s+1, sub_a) || |
| 761 first!=u_strFindFirst(s+1, -1, sub_a, -1) || |
| 762 first!=u_strFindFirst(s+1, -1, &a, 1) || |
| 763 first!=u_strFindFirst(s+1, 9, sub_a, -1) || |
| 764 first!=u_strFindFirst(s+1, 9, &a, 1) || |
| 765 (s+10)!=u_strrchr(s+1, a) || |
| 766 (s+10)!=u_strrchr32(s+1, a) || |
| 767 last!=u_memrchr(s+1, a, 9) || |
| 768 last!=u_memrchr32(s+1, a, 9) || |
| 769 (s+10)!=u_strrstr(s+1, sub_a) || |
| 770 (s+10)!=u_strFindLast(s+1, -1, sub_a, -1) || |
| 771 (s+10)!=u_strFindLast(s+1, -1, &a, 1) || |
| 772 last!=u_strFindLast(s+1, 9, sub_a, -1) || |
| 773 last!=u_strFindLast(s+1, 9, &a, 1) |
| 774 ) { |
| 775 log_err("error: one of the u_str[chr etc]('a') does not find the correct
place\n"); |
| 776 } |
| 777 |
| 778 /* do not find 'b' in s[1..10[ */ |
| 779 if( |
| 780 NULL!=u_strchr(s+1, b) || |
| 781 NULL!=u_strchr32(s+1, b) || |
| 782 NULL!=u_memchr(s+1, b, 9) || |
| 783 NULL!=u_memchr32(s+1, b, 9) || |
| 784 NULL!=u_strstr(s+1, sub_b) || |
| 785 NULL!=u_strFindFirst(s+1, -1, sub_b, -1) || |
| 786 NULL!=u_strFindFirst(s+1, -1, &b, 1) || |
| 787 NULL!=u_strFindFirst(s+1, 9, sub_b, -1) || |
| 788 NULL!=u_strFindFirst(s+1, 9, &b, 1) || |
| 789 NULL!=u_strrchr(s+1, b) || |
| 790 NULL!=u_strrchr32(s+1, b) || |
| 791 NULL!=u_memrchr(s+1, b, 9) || |
| 792 NULL!=u_memrchr32(s+1, b, 9) || |
| 793 NULL!=u_strrstr(s+1, sub_b) || |
| 794 NULL!=u_strFindLast(s+1, -1, sub_b, -1) || |
| 795 NULL!=u_strFindLast(s+1, -1, &b, 1) || |
| 796 NULL!=u_strFindLast(s+1, 9, sub_b, -1) || |
| 797 NULL!=u_strFindLast(s+1, 9, &b, 1) |
| 798 ) { |
| 799 log_err("error: one of the u_str[chr etc]('b') incorrectly finds somethi
ng\n"); |
| 800 } |
| 801 |
| 802 /* do not find a non-code point in s[1..10[ */ |
| 803 if( |
| 804 NULL!=u_strchr32(s+1, ill) || |
| 805 NULL!=u_memchr32(s+1, ill, 9) || |
| 806 NULL!=u_strrchr32(s+1, ill) || |
| 807 NULL!=u_memrchr32(s+1, ill, 9) |
| 808 ) { |
| 809 log_err("error: one of the u_str[chr etc](illegal code point) incorrectl
y finds something\n"); |
| 810 } |
| 811 |
| 812 /* find U+d801 in s[1..10[ */ |
| 813 first=s+6; |
| 814 if( |
| 815 first!=u_strchr(s+1, lead) || |
| 816 first!=u_strchr32(s+1, lead) || |
| 817 first!=u_memchr(s+1, lead, 9) || |
| 818 first!=u_memchr32(s+1, lead, 9) || |
| 819 first!=u_strstr(s+1, sub_lead) || |
| 820 first!=u_strFindFirst(s+1, -1, sub_lead, -1) || |
| 821 first!=u_strFindFirst(s+1, -1, &lead, 1) || |
| 822 first!=u_strFindFirst(s+1, 9, sub_lead, -1) || |
| 823 first!=u_strFindFirst(s+1, 9, &lead, 1) || |
| 824 first!=u_strrchr(s+1, lead) || |
| 825 first!=u_strrchr32(s+1, lead) || |
| 826 first!=u_memrchr(s+1, lead, 9) || |
| 827 first!=u_memrchr32(s+1, lead, 9) || |
| 828 first!=u_strrstr(s+1, sub_lead) || |
| 829 first!=u_strFindLast(s+1, -1, sub_lead, -1) || |
| 830 first!=u_strFindLast(s+1, -1, &lead, 1) || |
| 831 first!=u_strFindLast(s+1, 9, sub_lead, -1) || |
| 832 first!=u_strFindLast(s+1, 9, &lead, 1) |
| 833 ) { |
| 834 log_err("error: one of the u_str[chr etc](U+d801) does not find the corr
ect place\n"); |
| 835 } |
| 836 |
| 837 /* find U+dc02 in s[1..10[ */ |
| 838 first=s+4; |
| 839 if( |
| 840 first!=u_strchr(s+1, trail) || |
| 841 first!=u_strchr32(s+1, trail) || |
| 842 first!=u_memchr(s+1, trail, 9) || |
| 843 first!=u_memchr32(s+1, trail, 9) || |
| 844 first!=u_strstr(s+1, sub_trail) || |
| 845 first!=u_strFindFirst(s+1, -1, sub_trail, -1) || |
| 846 first!=u_strFindFirst(s+1, -1, &trail, 1) || |
| 847 first!=u_strFindFirst(s+1, 9, sub_trail, -1) || |
| 848 first!=u_strFindFirst(s+1, 9, &trail, 1) || |
| 849 first!=u_strrchr(s+1, trail) || |
| 850 first!=u_strrchr32(s+1, trail) || |
| 851 first!=u_memrchr(s+1, trail, 9) || |
| 852 first!=u_memrchr32(s+1, trail, 9) || |
| 853 first!=u_strrstr(s+1, sub_trail) || |
| 854 first!=u_strFindLast(s+1, -1, sub_trail, -1) || |
| 855 first!=u_strFindLast(s+1, -1, &trail, 1) || |
| 856 first!=u_strFindLast(s+1, 9, sub_trail, -1) || |
| 857 first!=u_strFindLast(s+1, 9, &trail, 1) |
| 858 ) { |
| 859 log_err("error: one of the u_str[chr etc](U+dc02) does not find the corr
ect place\n"); |
| 860 } |
| 861 |
| 862 /* find U+10402 in s[1..10[ */ |
| 863 first=s+1; |
| 864 last=s+8; |
| 865 if( |
| 866 first!=u_strchr32(s+1, supp) || |
| 867 first!=u_memchr32(s+1, supp, 9) || |
| 868 first!=u_strstr(s+1, sub_supp) || |
| 869 first!=u_strFindFirst(s+1, -1, sub_supp, -1) || |
| 870 first!=u_strFindFirst(s+1, -1, sub_supp, 2) || |
| 871 first!=u_strFindFirst(s+1, 9, sub_supp, -1) || |
| 872 first!=u_strFindFirst(s+1, 9, sub_supp, 2) || |
| 873 last!=u_strrchr32(s+1, supp) || |
| 874 last!=u_memrchr32(s+1, supp, 9) || |
| 875 last!=u_strrstr(s+1, sub_supp) || |
| 876 last!=u_strFindLast(s+1, -1, sub_supp, -1) || |
| 877 last!=u_strFindLast(s+1, -1, sub_supp, 2) || |
| 878 last!=u_strFindLast(s+1, 9, sub_supp, -1) || |
| 879 last!=u_strFindLast(s+1, 9, sub_supp, 2) |
| 880 ) { |
| 881 log_err("error: one of the u_str[chr etc](U+10402) does not find the cor
rect place\n"); |
| 882 } |
| 883 |
| 884 /* do not find U+10402 in a single UChar */ |
| 885 if( |
| 886 NULL!=u_memchr32(s+1, supp, 1) || |
| 887 NULL!=u_strFindFirst(s+1, 1, sub_supp, -1) || |
| 888 NULL!=u_strFindFirst(s+1, 1, sub_supp, 2) || |
| 889 NULL!=u_memrchr32(s+1, supp, 1) || |
| 890 NULL!=u_strFindLast(s+1, 1, sub_supp, -1) || |
| 891 NULL!=u_strFindLast(s+1, 1, sub_supp, 2) || |
| 892 NULL!=u_memrchr32(s+2, supp, 1) || |
| 893 NULL!=u_strFindLast(s+2, 1, sub_supp, -1) || |
| 894 NULL!=u_strFindLast(s+2, 1, sub_supp, 2) |
| 895 ) { |
| 896 log_err("error: one of the u_str[chr etc](U+10402) incorrectly finds a s
upplementary c.p. in a single UChar\n"); |
| 897 } |
| 898 |
| 899 /* do not find U+10403 in s[1..10[ */ |
| 900 if( |
| 901 NULL!=u_strchr32(s+1, supp2) || |
| 902 NULL!=u_memchr32(s+1, supp2, 9) || |
| 903 NULL!=u_strstr(s+1, sub_supp2) || |
| 904 NULL!=u_strFindFirst(s+1, -1, sub_supp2, -1) || |
| 905 NULL!=u_strFindFirst(s+1, -1, sub_supp2, 2) || |
| 906 NULL!=u_strFindFirst(s+1, 9, sub_supp2, -1) || |
| 907 NULL!=u_strFindFirst(s+1, 9, sub_supp2, 2) || |
| 908 NULL!=u_strrchr32(s+1, supp2) || |
| 909 NULL!=u_memrchr32(s+1, supp2, 9) || |
| 910 NULL!=u_strrstr(s+1, sub_supp2) || |
| 911 NULL!=u_strFindLast(s+1, -1, sub_supp2, -1) || |
| 912 NULL!=u_strFindLast(s+1, -1, sub_supp2, 2) || |
| 913 NULL!=u_strFindLast(s+1, 9, sub_supp2, -1) || |
| 914 NULL!=u_strFindLast(s+1, 9, sub_supp2, 2) |
| 915 ) { |
| 916 log_err("error: one of the u_str[chr etc](U+10403) incorrectly finds som
ething\n"); |
| 917 } |
| 918 |
| 919 /* find <0061 d801> in s[1..10[ */ |
| 920 first=s+5; |
| 921 if( |
| 922 first!=u_strstr(s+1, sub_a_lead) || |
| 923 first!=u_strFindFirst(s+1, -1, sub_a_lead, -1) || |
| 924 first!=u_strFindFirst(s+1, -1, sub_a_lead, 2) || |
| 925 first!=u_strFindFirst(s+1, 9, sub_a_lead, -1) || |
| 926 first!=u_strFindFirst(s+1, 9, sub_a_lead, 2) || |
| 927 first!=u_strrstr(s+1, sub_a_lead) || |
| 928 first!=u_strFindLast(s+1, -1, sub_a_lead, -1) || |
| 929 first!=u_strFindLast(s+1, -1, sub_a_lead, 2) || |
| 930 first!=u_strFindLast(s+1, 9, sub_a_lead, -1) || |
| 931 first!=u_strFindLast(s+1, 9, sub_a_lead, 2) |
| 932 ) { |
| 933 log_err("error: one of the u_str[str etc](<0061 d801>) does not find the
correct place\n"); |
| 934 } |
| 935 |
| 936 /* find <dc02 0061> in s[1..10[ */ |
| 937 first=s+4; |
| 938 if( |
| 939 first!=u_strstr(s+1, sub_trail_a) || |
| 940 first!=u_strFindFirst(s+1, -1, sub_trail_a, -1) || |
| 941 first!=u_strFindFirst(s+1, -1, sub_trail_a, 2) || |
| 942 first!=u_strFindFirst(s+1, 9, sub_trail_a, -1) || |
| 943 first!=u_strFindFirst(s+1, 9, sub_trail_a, 2) || |
| 944 first!=u_strrstr(s+1, sub_trail_a) || |
| 945 first!=u_strFindLast(s+1, -1, sub_trail_a, -1) || |
| 946 first!=u_strFindLast(s+1, -1, sub_trail_a, 2) || |
| 947 first!=u_strFindLast(s+1, 9, sub_trail_a, -1) || |
| 948 first!=u_strFindLast(s+1, 9, sub_trail_a, 2) |
| 949 ) { |
| 950 log_err("error: one of the u_str[str etc](<dc02 0061>) does not find the
correct place\n"); |
| 951 } |
| 952 |
| 953 /* do not find "aba" in s[1..10[ */ |
| 954 if( |
| 955 NULL!=u_strstr(s+1, sub_aba) || |
| 956 NULL!=u_strFindFirst(s+1, -1, sub_aba, -1) || |
| 957 NULL!=u_strFindFirst(s+1, -1, sub_aba, 3) || |
| 958 NULL!=u_strFindFirst(s+1, 9, sub_aba, -1) || |
| 959 NULL!=u_strFindFirst(s+1, 9, sub_aba, 3) || |
| 960 NULL!=u_strrstr(s+1, sub_aba) || |
| 961 NULL!=u_strFindLast(s+1, -1, sub_aba, -1) || |
| 962 NULL!=u_strFindLast(s+1, -1, sub_aba, 3) || |
| 963 NULL!=u_strFindLast(s+1, 9, sub_aba, -1) || |
| 964 NULL!=u_strFindLast(s+1, 9, sub_aba, 3) |
| 965 ) { |
| 966 log_err("error: one of the u_str[str etc](\"aba\") incorrectly finds som
ething\n"); |
| 967 } |
| 968 } |
| 969 |
| 970 static void TestStringCopy() |
| 971 { |
| 972 UChar temp[40]; |
| 973 UChar *result=0; |
| 974 UChar subString[5]; |
| 975 UChar uchars[]={0x61, 0x62, 0x63, 0x00}; |
| 976 char charOut[40]; |
| 977 char chars[]="abc"; /* needs default codepage */ |
| 978 |
| 979 log_verbose("Testing u_uastrncpy() and u_uastrcpy()"); |
| 980 |
| 981 u_uastrcpy(temp, "abc"); |
| 982 if(u_strcmp(temp, uchars) != 0) { |
| 983 log_err("There is an error in u_uastrcpy() Expected %s Got %s\n", austrd
up(uchars), austrdup(temp)); |
| 984 } |
| 985 |
| 986 temp[0] = 0xFB; /* load garbage into it */ |
| 987 temp[1] = 0xFB; |
| 988 temp[2] = 0xFB; |
| 989 temp[3] = 0xFB; |
| 990 |
| 991 u_uastrncpy(temp, "abcabcabc", 3); |
| 992 if(u_strncmp(uchars, temp, 3) != 0){ |
| 993 log_err("There is an error in u_uastrncpy() Expected %s Got %s\n", austr
dup(uchars), austrdup(temp)); |
| 994 } |
| 995 if(temp[3] != 0xFB) { |
| 996 log_err("u_uastrncpy wrote past it's bounds. Expected undisturbed byte a
t 3\n"); |
| 997 } |
| 998 |
| 999 charOut[0] = (char)0x7B; /* load garbage into it */ |
| 1000 charOut[1] = (char)0x7B; |
| 1001 charOut[2] = (char)0x7B; |
| 1002 charOut[3] = (char)0x7B; |
| 1003 |
| 1004 temp[0] = 0x0061; |
| 1005 temp[1] = 0x0062; |
| 1006 temp[2] = 0x0063; |
| 1007 temp[3] = 0x0061; |
| 1008 temp[4] = 0x0062; |
| 1009 temp[5] = 0x0063; |
| 1010 temp[6] = 0x0000; |
| 1011 |
| 1012 u_austrncpy(charOut, temp, 3); |
| 1013 if(strncmp(chars, charOut, 3) != 0){ |
| 1014 log_err("There is an error in u_austrncpy() Expected %s Got %s\n", austr
dup(uchars), austrdup(temp)); |
| 1015 } |
| 1016 if(charOut[3] != (char)0x7B) { |
| 1017 log_err("u_austrncpy wrote past it's bounds. Expected undisturbed byte a
t 3\n"); |
| 1018 } |
| 1019 |
| 1020 /*Testing u_strchr()*/ |
| 1021 log_verbose("Testing u_strchr\n"); |
| 1022 temp[0]=0x42; |
| 1023 temp[1]=0x62; |
| 1024 temp[2]=0x62; |
| 1025 temp[3]=0x63; |
| 1026 temp[4]=0xd841; |
| 1027 temp[5]=0xd841; |
| 1028 temp[6]=0xdc02; |
| 1029 temp[7]=0; |
| 1030 result=u_strchr(temp, (UChar)0x62); |
| 1031 if(result != temp+1){ |
| 1032 log_err("There is an error in u_strchr() Expected match at position 1 Go
t %ld (pointer 0x%lx)\n", result-temp, result); |
| 1033 } |
| 1034 /*Testing u_strstr()*/ |
| 1035 log_verbose("Testing u_strstr\n"); |
| 1036 subString[0]=0x62; |
| 1037 subString[1]=0x63; |
| 1038 subString[2]=0; |
| 1039 result=u_strstr(temp, subString); |
| 1040 if(result != temp+2){ |
| 1041 log_err("There is an error in u_strstr() Expected match at position 2 Go
t %ld (pointer 0x%lx)\n", result-temp, result); |
| 1042 } |
| 1043 result=u_strstr(temp, subString+2); /* subString+2 is an empty string */ |
| 1044 if(result != temp){ |
| 1045 log_err("There is an error in u_strstr() Expected match at position 0 Go
t %ld (pointer 0x%lx)\n", result-temp, result); |
| 1046 } |
| 1047 result=u_strstr(subString, temp); |
| 1048 if(result != NULL){ |
| 1049 log_err("There is an error in u_strstr() Expected NULL \"not found\" Got
non-NULL \"found\" result\n"); |
| 1050 } |
| 1051 |
| 1052 /*Testing u_strchr32*/ |
| 1053 log_verbose("Testing u_strchr32\n"); |
| 1054 result=u_strchr32(temp, (UChar32)0x62); |
| 1055 if(result != temp+1){ |
| 1056 log_err("There is an error in u_strchr32() Expected match at position 1
Got %ld (pointer 0x%lx)\n", result-temp, result); |
| 1057 } |
| 1058 result=u_strchr32(temp, (UChar32)0xfb); |
| 1059 if(result != NULL){ |
| 1060 log_err("There is an error in u_strchr32() Expected NULL \"not found\" G
ot non-NULL \"found\" result\n"); |
| 1061 } |
| 1062 result=u_strchr32(temp, (UChar32)0x20402); |
| 1063 if(result != temp+5){ |
| 1064 log_err("There is an error in u_strchr32() Expected match at position 5
Got %ld (pointer 0x%lx)\n", result-temp, result); |
| 1065 } |
| 1066 |
| 1067 temp[7]=0xfc00; |
| 1068 result=u_memchr32(temp, (UChar32)0x20402, 7); |
| 1069 if(result != temp+5){ |
| 1070 log_err("There is an error in u_memchr32() Expected match at position 5
Got %ld (pointer 0x%lx)\n", result-temp, result); |
| 1071 } |
| 1072 result=u_memchr32(temp, (UChar32)0x20402, 6); |
| 1073 if(result != NULL){ |
| 1074 log_err("There is an error in u_memchr32() Expected no match Got %ld (po
inter 0x%lx)\n", result-temp, result); |
| 1075 } |
| 1076 result=u_memchr32(temp, (UChar32)0x20402, 1); |
| 1077 if(result != NULL){ |
| 1078 log_err("There is an error in u_memchr32() Expected no match Got %ld (po
inter 0x%lx)\n", result-temp, result); |
| 1079 } |
| 1080 result=u_memchr32(temp, (UChar32)0xfc00, 8); |
| 1081 if(result != temp+7){ |
| 1082 log_err("There is an error in u_memchr32() Expected match at position 7
Got %ld (pointer 0x%lx)\n", result-temp, result); |
| 1083 } |
| 1084 } |
| 1085 |
| 1086 /* test u_unescape() and u_unescapeAt() ------------------------------------- */ |
| 1087 |
| 1088 static void |
| 1089 TestUnescape() { |
| 1090 static UChar buffer[200]; |
| 1091 |
| 1092 static const char* input = |
| 1093 "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\e\\
cC\\n \\x1b\\x{263a}"; |
| 1094 |
| 1095 static const UChar expect[]={ |
| 1096 0x53, 0x63, 0x68, 0xf6, 0x6e, 0x65, 0x73, 0x20, 0x41, 0x75, 0x74, 0x6f,
0x3a, 0x20, |
| 1097 0x20ac, 0x20, 0x31, 0x31, 0x32, 0x34, 0x30, 0x2e, 0x0c, |
| 1098 0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20, |
| 1099 0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x
1b, 0x03, 0x0a, 0x20, 0x1b, 0x263A, 0 |
| 1100 }; |
| 1101 static const int32_t explength = sizeof(expect)/sizeof(expect[0])-1; |
| 1102 int32_t length; |
| 1103 |
| 1104 /* test u_unescape() */ |
| 1105 length=u_unescape(input, buffer, sizeof(buffer)/sizeof(buffer[0])); |
| 1106 if(length!=explength || u_strcmp(buffer, expect)!=0) { |
| 1107 log_err("failure in u_unescape(): length %d!=%d and/or incorrect result
string\n", length, |
| 1108 explength); |
| 1109 } |
| 1110 |
| 1111 /* try preflighting */ |
| 1112 length=u_unescape(input, NULL, sizeof(buffer)/sizeof(buffer[0])); |
| 1113 if(length!=explength || u_strcmp(buffer, expect)!=0) { |
| 1114 log_err("failure in u_unescape(preflighting): length %d!=%d\n", length,
explength); |
| 1115 } |
| 1116 |
| 1117 /* ### TODO: test u_unescapeAt() */ |
| 1118 } |
| 1119 |
| 1120 /* test code point counting functions --------------------------------------- */ |
| 1121 |
| 1122 /* reference implementation of u_strHasMoreChar32Than() */ |
| 1123 static int32_t |
| 1124 _refStrHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) { |
| 1125 int32_t count=u_countChar32(s, length); |
| 1126 return count>number; |
| 1127 } |
| 1128 |
| 1129 /* compare the real function against the reference */ |
| 1130 static void |
| 1131 _testStrHasMoreChar32Than(const UChar *s, int32_t i, int32_t length, int32_t num
ber) { |
| 1132 if(u_strHasMoreChar32Than(s, length, number)!=_refStrHasMoreChar32Than(s, le
ngth, number)) { |
| 1133 log_err("u_strHasMoreChar32Than(s+%d, %d, %d)=%hd is wrong\n", |
| 1134 i, length, number, u_strHasMoreChar32Than(s, length, number)); |
| 1135 } |
| 1136 } |
| 1137 |
| 1138 static void |
| 1139 TestCountChar32() { |
| 1140 static const UChar string[]={ |
| 1141 0x61, 0x62, 0xd800, 0xdc00, |
| 1142 0xd801, 0xdc01, 0x63, 0xd802, |
| 1143 0x64, 0xdc03, 0x65, 0x66, |
| 1144 0xd804, 0xdc04, 0xd805, 0xdc05, |
| 1145 0x67 |
| 1146 }; |
| 1147 UChar buffer[100]; |
| 1148 int32_t i, length, number; |
| 1149 |
| 1150 /* test u_strHasMoreChar32Than() with length>=0 */ |
| 1151 length=LENGTHOF(string); |
| 1152 while(length>=0) { |
| 1153 for(i=0; i<=length; ++i) { |
| 1154 for(number=-1; number<=((length-i)+2); ++number) { |
| 1155 _testStrHasMoreChar32Than(string+i, i, length-i, number); |
| 1156 } |
| 1157 } |
| 1158 --length; |
| 1159 } |
| 1160 |
| 1161 /* test u_strHasMoreChar32Than() with NUL-termination (length=-1) */ |
| 1162 length=LENGTHOF(string); |
| 1163 u_memcpy(buffer, string, length); |
| 1164 while(length>=0) { |
| 1165 buffer[length]=0; |
| 1166 for(i=0; i<=length; ++i) { |
| 1167 for(number=-1; number<=((length-i)+2); ++number) { |
| 1168 _testStrHasMoreChar32Than(string+i, i, -1, number); |
| 1169 } |
| 1170 } |
| 1171 --length; |
| 1172 } |
| 1173 |
| 1174 /* test u_strHasMoreChar32Than() with NULL string (bad input) */ |
| 1175 for(length=-1; length<=1; ++length) { |
| 1176 for(i=0; i<=length; ++i) { |
| 1177 for(number=-2; number<=2; ++number) { |
| 1178 _testStrHasMoreChar32Than(NULL, 0, length, number); |
| 1179 } |
| 1180 } |
| 1181 } |
| 1182 } |
| 1183 |
| 1184 /* UCharIterator ------------------------------------------------------------ */ |
| 1185 |
| 1186 /* |
| 1187 * Compare results from two iterators, should be same. |
| 1188 * Assume that the text is not empty and that |
| 1189 * iteration start==0 and iteration limit==length. |
| 1190 */ |
| 1191 static void |
| 1192 compareIterators(UCharIterator *iter1, const char *n1, |
| 1193 UCharIterator *iter2, const char *n2) { |
| 1194 int32_t i, pos1, pos2, middle, length; |
| 1195 UChar32 c1, c2; |
| 1196 |
| 1197 /* compare lengths */ |
| 1198 length=iter1->getIndex(iter1, UITER_LENGTH); |
| 1199 pos2=iter2->getIndex(iter2, UITER_LENGTH); |
| 1200 if(length!=pos2) { |
| 1201 log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1, leng
th, pos2, n2); |
| 1202 return; |
| 1203 } |
| 1204 |
| 1205 /* set into the middle */ |
| 1206 middle=length/2; |
| 1207 |
| 1208 pos1=iter1->move(iter1, middle, UITER_ZERO); |
| 1209 if(pos1!=middle) { |
| 1210 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n"
, n1, middle, pos1); |
| 1211 return; |
| 1212 } |
| 1213 |
| 1214 pos2=iter2->move(iter2, middle, UITER_ZERO); |
| 1215 if(pos2!=middle) { |
| 1216 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n"
, n2, middle, pos2); |
| 1217 return; |
| 1218 } |
| 1219 |
| 1220 /* test current() */ |
| 1221 c1=iter1->current(iter1); |
| 1222 c2=iter2->current(iter2); |
| 1223 if(c1!=c2) { |
| 1224 log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n
1, c1, c2, n2, middle); |
| 1225 return; |
| 1226 } |
| 1227 |
| 1228 /* move forward 3 UChars */ |
| 1229 for(i=0; i<3; ++i) { |
| 1230 c1=iter1->next(iter1); |
| 1231 c2=iter2->next(iter2); |
| 1232 if(c1!=c2) { |
| 1233 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in mi
ddle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); |
| 1234 return; |
| 1235 } |
| 1236 } |
| 1237 |
| 1238 /* move backward 5 UChars */ |
| 1239 for(i=0; i<5; ++i) { |
| 1240 c1=iter1->previous(iter1); |
| 1241 c2=iter2->previous(iter2); |
| 1242 if(c1!=c2) { |
| 1243 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (start
ed in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); |
| 1244 return; |
| 1245 } |
| 1246 } |
| 1247 |
| 1248 /* iterate forward from the beginning */ |
| 1249 pos1=iter1->move(iter1, 0, UITER_START); |
| 1250 if(pos1<0) { |
| 1251 log_err("%s->move(start) failed\n", n1); |
| 1252 return; |
| 1253 } |
| 1254 if(!iter1->hasNext(iter1)) { |
| 1255 log_err("%s->hasNext() at the start returns FALSE\n", n1); |
| 1256 return; |
| 1257 } |
| 1258 |
| 1259 pos2=iter2->move(iter2, 0, UITER_START); |
| 1260 if(pos2<0) { |
| 1261 log_err("%s->move(start) failed\n", n2); |
| 1262 return; |
| 1263 } |
| 1264 if(!iter2->hasNext(iter2)) { |
| 1265 log_err("%s->hasNext() at the start returns FALSE\n", n2); |
| 1266 return; |
| 1267 } |
| 1268 |
| 1269 do { |
| 1270 c1=iter1->next(iter1); |
| 1271 c2=iter2->next(iter2); |
| 1272 if(c1!=c2) { |
| 1273 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1, c1, c2
, n2, iter1->getIndex(iter1, UITER_CURRENT)); |
| 1274 return; |
| 1275 } |
| 1276 } while(c1>=0); |
| 1277 |
| 1278 if(iter1->hasNext(iter1)) { |
| 1279 log_err("%s->hasNext() at the end returns TRUE\n", n1); |
| 1280 return; |
| 1281 } |
| 1282 if(iter2->hasNext(iter2)) { |
| 1283 log_err("%s->hasNext() at the end returns TRUE\n", n2); |
| 1284 return; |
| 1285 } |
| 1286 |
| 1287 /* back to the middle */ |
| 1288 pos1=iter1->move(iter1, middle, UITER_ZERO); |
| 1289 if(pos1!=middle) { |
| 1290 log_err("%s->move(from end to middle %d)=%d does not move to the middle\
n", n1, middle, pos1); |
| 1291 return; |
| 1292 } |
| 1293 |
| 1294 pos2=iter2->move(iter2, middle, UITER_ZERO); |
| 1295 if(pos2!=middle) { |
| 1296 log_err("%s->move(from end to middle %d)=%d does not move to the middle\
n", n2, middle, pos2); |
| 1297 return; |
| 1298 } |
| 1299 |
| 1300 /* move to index 1 */ |
| 1301 pos1=iter1->move(iter1, 1, UITER_ZERO); |
| 1302 if(pos1!=1) { |
| 1303 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1, mid
dle, pos1); |
| 1304 return; |
| 1305 } |
| 1306 |
| 1307 pos2=iter2->move(iter2, 1, UITER_ZERO); |
| 1308 if(pos2!=1) { |
| 1309 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2, mid
dle, pos2); |
| 1310 return; |
| 1311 } |
| 1312 |
| 1313 /* iterate backward from the end */ |
| 1314 pos1=iter1->move(iter1, 0, UITER_LIMIT); |
| 1315 if(pos1<0) { |
| 1316 log_err("%s->move(limit) failed\n", n1); |
| 1317 return; |
| 1318 } |
| 1319 if(!iter1->hasPrevious(iter1)) { |
| 1320 log_err("%s->hasPrevious() at the end returns FALSE\n", n1); |
| 1321 return; |
| 1322 } |
| 1323 |
| 1324 pos2=iter2->move(iter2, 0, UITER_LIMIT); |
| 1325 if(pos2<0) { |
| 1326 log_err("%s->move(limit) failed\n", n2); |
| 1327 return; |
| 1328 } |
| 1329 if(!iter2->hasPrevious(iter2)) { |
| 1330 log_err("%s->hasPrevious() at the end returns FALSE\n", n2); |
| 1331 return; |
| 1332 } |
| 1333 |
| 1334 do { |
| 1335 c1=iter1->previous(iter1); |
| 1336 c2=iter2->previous(iter2); |
| 1337 if(c1!=c2) { |
| 1338 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1
, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); |
| 1339 return; |
| 1340 } |
| 1341 } while(c1>=0); |
| 1342 |
| 1343 if(iter1->hasPrevious(iter1)) { |
| 1344 log_err("%s->hasPrevious() at the start returns TRUE\n", n1); |
| 1345 return; |
| 1346 } |
| 1347 if(iter2->hasPrevious(iter2)) { |
| 1348 log_err("%s->hasPrevious() at the start returns TRUE\n", n2); |
| 1349 return; |
| 1350 } |
| 1351 } |
| 1352 |
| 1353 /* |
| 1354 * Test the iterator's getState() and setState() functions. |
| 1355 * iter1 and iter2 must be set up for the same iterator type and the same string |
| 1356 * but may be physically different structs (different addresses). |
| 1357 * |
| 1358 * Assume that the text is not empty and that |
| 1359 * iteration start==0 and iteration limit==length. |
| 1360 * It must be 2<=middle<=length-2. |
| 1361 */ |
| 1362 static void |
| 1363 testIteratorState(UCharIterator *iter1, UCharIterator *iter2, const char *n, int
32_t middle) { |
| 1364 UChar32 u[4]; |
| 1365 |
| 1366 UErrorCode errorCode; |
| 1367 UChar32 c; |
| 1368 uint32_t state; |
| 1369 int32_t i, j; |
| 1370 |
| 1371 /* get four UChars from the middle of the string */ |
| 1372 iter1->move(iter1, middle-2, UITER_ZERO); |
| 1373 for(i=0; i<4; ++i) { |
| 1374 c=iter1->next(iter1); |
| 1375 if(c<0) { |
| 1376 /* the test violates the assumptions, see comment above */ |
| 1377 log_err("test error: %s[%d]=%d\n", n, middle-2+i, c); |
| 1378 return; |
| 1379 } |
| 1380 u[i]=c; |
| 1381 } |
| 1382 |
| 1383 /* move to the middle and get the state */ |
| 1384 iter1->move(iter1, -2, UITER_CURRENT); |
| 1385 state=uiter_getState(iter1); |
| 1386 |
| 1387 /* set the state into the second iterator and compare the results */ |
| 1388 errorCode=U_ZERO_ERROR; |
| 1389 uiter_setState(iter2, state, &errorCode); |
| 1390 if(U_FAILURE(errorCode)) { |
| 1391 log_err("%s->setState(0x%x) failed: %s\n", n, state, u_errorName(errorCo
de)); |
| 1392 return; |
| 1393 } |
| 1394 |
| 1395 c=iter2->current(iter2); |
| 1396 if(c!=u[2]) { |
| 1397 log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n, middle, c, u[2]); |
| 1398 } |
| 1399 |
| 1400 c=iter2->previous(iter2); |
| 1401 if(c!=u[1]) { |
| 1402 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-1, c, u[1]); |
| 1403 } |
| 1404 |
| 1405 iter2->move(iter2, 2, UITER_CURRENT); |
| 1406 c=iter2->next(iter2); |
| 1407 if(c!=u[3]) { |
| 1408 log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n, middle+1, c, u[3]); |
| 1409 } |
| 1410 |
| 1411 iter2->move(iter2, -3, UITER_CURRENT); |
| 1412 c=iter2->previous(iter2); |
| 1413 if(c!=u[0]) { |
| 1414 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-2, c, u[0]); |
| 1415 } |
| 1416 |
| 1417 /* move the second iterator back to the middle */ |
| 1418 iter2->move(iter2, 1, UITER_CURRENT); |
| 1419 iter2->next(iter2); |
| 1420 |
| 1421 /* check that both are in the middle */ |
| 1422 i=iter1->getIndex(iter1, UITER_CURRENT); |
| 1423 j=iter2->getIndex(iter2, UITER_CURRENT); |
| 1424 if(i!=middle) { |
| 1425 log_err("%s->getIndex(current)=%d!=%d as expected\n", n, i, middle); |
| 1426 } |
| 1427 if(i!=j) { |
| 1428 log_err("%s->getIndex(current)=%d!=%d after setState()\n", n, j, i); |
| 1429 } |
| 1430 |
| 1431 /* compare lengths */ |
| 1432 i=iter1->getIndex(iter1, UITER_LENGTH); |
| 1433 j=iter2->getIndex(iter2, UITER_LENGTH); |
| 1434 if(i!=j) { |
| 1435 log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n, i, j
); |
| 1436 } |
| 1437 } |
| 1438 |
| 1439 static void |
| 1440 TestUCharIterator() { |
| 1441 static const UChar text[]={ |
| 1442 0x61, 0x62, 0x63, 0xd801, 0xdffd, 0x78, 0x79, 0x7a, 0 |
| 1443 }; |
| 1444 char bytes[40]; |
| 1445 |
| 1446 UCharIterator iter, iter1, iter2; |
| 1447 UConverter *cnv; |
| 1448 UErrorCode errorCode; |
| 1449 int32_t length; |
| 1450 |
| 1451 /* simple API/code coverage - test NOOP UCharIterator */ |
| 1452 uiter_setString(&iter, NULL, 0); |
| 1453 if( iter.current(&iter)!=-1 || iter.next(&iter)!=-1 || iter.previous(&iter)!
=-1 || |
| 1454 iter.move(&iter, 1, UITER_CURRENT) || iter.getIndex(&iter, UITER_CURRENT
)!=0 || |
| 1455 iter.hasNext(&iter) || iter.hasPrevious(&iter) |
| 1456 ) { |
| 1457 log_err("NOOP UCharIterator behaves unexpectedly\n"); |
| 1458 } |
| 1459 |
| 1460 /* test get/set state */ |
| 1461 length=LENGTHOF(text)-1; |
| 1462 uiter_setString(&iter1, text, -1); |
| 1463 uiter_setString(&iter2, text, length); |
| 1464 testIteratorState(&iter1, &iter2, "UTF16IteratorState", length/2); |
| 1465 testIteratorState(&iter1, &iter2, "UTF16IteratorStatePlus1", length/2+1); |
| 1466 |
| 1467 /* compare the same string between UTF-16 and UTF-8 UCharIterators ------ */ |
| 1468 errorCode=U_ZERO_ERROR; |
| 1469 u_strToUTF8(bytes, sizeof(bytes), &length, text, -1, &errorCode); |
| 1470 if(U_FAILURE(errorCode)) { |
| 1471 log_err("u_strToUTF8() failed, %s\n", u_errorName(errorCode)); |
| 1472 return; |
| 1473 } |
| 1474 |
| 1475 uiter_setString(&iter1, text, -1); |
| 1476 uiter_setUTF8(&iter2, bytes, length); |
| 1477 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator"); |
| 1478 |
| 1479 /* try again with length=-1 */ |
| 1480 uiter_setUTF8(&iter2, bytes, -1); |
| 1481 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator_1"); |
| 1482 |
| 1483 /* test get/set state */ |
| 1484 length=LENGTHOF(text)-1; |
| 1485 uiter_setUTF8(&iter1, bytes, -1); |
| 1486 testIteratorState(&iter1, &iter2, "UTF8IteratorState", length/2); |
| 1487 testIteratorState(&iter1, &iter2, "UTF8IteratorStatePlus1", length/2+1); |
| 1488 |
| 1489 /* compare the same string between UTF-16 and UTF-16BE UCharIterators --- */ |
| 1490 errorCode=U_ZERO_ERROR; |
| 1491 cnv=ucnv_open("UTF-16BE", &errorCode); |
| 1492 length=ucnv_fromUChars(cnv, bytes, sizeof(bytes), text, -1, &errorCode); |
| 1493 ucnv_close(cnv); |
| 1494 if(U_FAILURE(errorCode)) { |
| 1495 log_err("ucnv_fromUChars(UTF-16BE) failed, %s\n", u_errorName(errorCode)
); |
| 1496 return; |
| 1497 } |
| 1498 |
| 1499 /* terminate with a _pair_ of 0 bytes - a UChar NUL in UTF-16BE (length is k
nown to be ok) */ |
| 1500 bytes[length]=bytes[length+1]=0; |
| 1501 |
| 1502 uiter_setString(&iter1, text, -1); |
| 1503 uiter_setUTF16BE(&iter2, bytes, length); |
| 1504 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator"); |
| 1505 |
| 1506 /* try again with length=-1 */ |
| 1507 uiter_setUTF16BE(&iter2, bytes, -1); |
| 1508 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator_1"); |
| 1509 |
| 1510 /* try again after moving the bytes up one, and with length=-1 */ |
| 1511 memmove(bytes+1, bytes, length+2); |
| 1512 uiter_setUTF16BE(&iter2, bytes+1, -1); |
| 1513 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIteratorMoved1"); |
| 1514 |
| 1515 /* ### TODO test other iterators: CharacterIterator, Replaceable */ |
| 1516 } |
| 1517 |
| 1518 #if UCONFIG_NO_COLLATION |
| 1519 |
| 1520 static void |
| 1521 TestUNormIterator() { |
| 1522 /* test nothing */ |
| 1523 } |
| 1524 |
| 1525 static void |
| 1526 TestBadUNormIterator(void) { |
| 1527 /* test nothing, as well */ |
| 1528 } |
| 1529 |
| 1530 #else |
| 1531 |
| 1532 #include "unicode/unorm.h" |
| 1533 #include "unorm_it.h" |
| 1534 |
| 1535 /* |
| 1536 * Compare results from two iterators, should be same. |
| 1537 * Assume that the text is not empty and that |
| 1538 * iteration start==0 and iteration limit==length. |
| 1539 * |
| 1540 * Modified version of compareIterators() but does not assume that indexes |
| 1541 * are available. |
| 1542 */ |
| 1543 static void |
| 1544 compareIterNoIndexes(UCharIterator *iter1, const char *n1, |
| 1545 UCharIterator *iter2, const char *n2, |
| 1546 int32_t middle) { |
| 1547 uint32_t state; |
| 1548 int32_t i; |
| 1549 UChar32 c1, c2; |
| 1550 UErrorCode errorCode; |
| 1551 |
| 1552 /* code coverage for unorm_it.c/unormIteratorGetIndex() */ |
| 1553 if( |
| 1554 iter2->getIndex(iter2, UITER_START)!=0 || |
| 1555 iter2->getIndex(iter2, UITER_LENGTH)!=UITER_UNKNOWN_INDEX |
| 1556 ) { |
| 1557 log_err("UNormIterator.getIndex() failed\n"); |
| 1558 } |
| 1559 |
| 1560 /* set into the middle */ |
| 1561 iter1->move(iter1, middle, UITER_ZERO); |
| 1562 iter2->move(iter2, middle, UITER_ZERO); |
| 1563 |
| 1564 /* test current() */ |
| 1565 c1=iter1->current(iter1); |
| 1566 c2=iter2->current(iter2); |
| 1567 if(c1!=c2) { |
| 1568 log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n
1, c1, c2, n2, middle); |
| 1569 return; |
| 1570 } |
| 1571 |
| 1572 /* move forward 3 UChars */ |
| 1573 for(i=0; i<3; ++i) { |
| 1574 c1=iter1->next(iter1); |
| 1575 c2=iter2->next(iter2); |
| 1576 if(c1!=c2) { |
| 1577 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in mi
ddle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); |
| 1578 return; |
| 1579 } |
| 1580 } |
| 1581 |
| 1582 /* move backward 5 UChars */ |
| 1583 for(i=0; i<5; ++i) { |
| 1584 c1=iter1->previous(iter1); |
| 1585 c2=iter2->previous(iter2); |
| 1586 if(c1!=c2) { |
| 1587 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (start
ed in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); |
| 1588 return; |
| 1589 } |
| 1590 } |
| 1591 |
| 1592 /* iterate forward from the beginning */ |
| 1593 iter1->move(iter1, 0, UITER_START); |
| 1594 if(!iter1->hasNext(iter1)) { |
| 1595 log_err("%s->hasNext() at the start returns FALSE\n", n1); |
| 1596 return; |
| 1597 } |
| 1598 |
| 1599 iter2->move(iter2, 0, UITER_START); |
| 1600 if(!iter2->hasNext(iter2)) { |
| 1601 log_err("%s->hasNext() at the start returns FALSE\n", n2); |
| 1602 return; |
| 1603 } |
| 1604 |
| 1605 do { |
| 1606 c1=iter1->next(iter1); |
| 1607 c2=iter2->next(iter2); |
| 1608 if(c1!=c2) { |
| 1609 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1, c1, c2
, n2, iter1->getIndex(iter1, UITER_CURRENT)); |
| 1610 return; |
| 1611 } |
| 1612 } while(c1>=0); |
| 1613 |
| 1614 if(iter1->hasNext(iter1)) { |
| 1615 log_err("%s->hasNext() at the end returns TRUE\n", n1); |
| 1616 return; |
| 1617 } |
| 1618 if(iter2->hasNext(iter2)) { |
| 1619 log_err("%s->hasNext() at the end returns TRUE\n", n2); |
| 1620 return; |
| 1621 } |
| 1622 |
| 1623 /* iterate backward */ |
| 1624 do { |
| 1625 c1=iter1->previous(iter1); |
| 1626 c2=iter2->previous(iter2); |
| 1627 if(c1!=c2) { |
| 1628 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1
, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); |
| 1629 return; |
| 1630 } |
| 1631 } while(c1>=0); |
| 1632 |
| 1633 /* back to the middle */ |
| 1634 iter1->move(iter1, middle, UITER_ZERO); |
| 1635 iter2->move(iter2, middle, UITER_ZERO); |
| 1636 |
| 1637 /* try get/set state */ |
| 1638 while((state=uiter_getState(iter2))==UITER_NO_STATE) { |
| 1639 if(!iter2->hasNext(iter2)) { |
| 1640 log_err("%s has no known state from middle=%d to the end\n", n2, mid
dle); |
| 1641 return; |
| 1642 } |
| 1643 iter2->next(iter2); |
| 1644 } |
| 1645 |
| 1646 errorCode=U_ZERO_ERROR; |
| 1647 |
| 1648 c2=iter2->current(iter2); |
| 1649 iter2->move(iter2, 0, UITER_ZERO); |
| 1650 uiter_setState(iter2, state, &errorCode); |
| 1651 c1=iter2->current(iter2); |
| 1652 if(U_FAILURE(errorCode) || c1!=c2) { |
| 1653 log_err("%s->current() differs across get/set state, U+%04x vs. U+%04x\n
", n2, c2, c1); |
| 1654 return; |
| 1655 } |
| 1656 |
| 1657 c2=iter2->previous(iter2); |
| 1658 iter2->move(iter2, 0, UITER_ZERO); |
| 1659 uiter_setState(iter2, state, &errorCode); |
| 1660 c1=iter2->previous(iter2); |
| 1661 if(U_FAILURE(errorCode) || c1!=c2) { |
| 1662 log_err("%s->previous() differs across get/set state, U+%04x vs. U+%04x\
n", n2, c2, c1); |
| 1663 return; |
| 1664 } |
| 1665 |
| 1666 /* iterate backward from the end */ |
| 1667 iter1->move(iter1, 0, UITER_LIMIT); |
| 1668 if(!iter1->hasPrevious(iter1)) { |
| 1669 log_err("%s->hasPrevious() at the end returns FALSE\n", n1); |
| 1670 return; |
| 1671 } |
| 1672 |
| 1673 iter2->move(iter2, 0, UITER_LIMIT); |
| 1674 if(!iter2->hasPrevious(iter2)) { |
| 1675 log_err("%s->hasPrevious() at the end returns FALSE\n", n2); |
| 1676 return; |
| 1677 } |
| 1678 |
| 1679 do { |
| 1680 c1=iter1->previous(iter1); |
| 1681 c2=iter2->previous(iter2); |
| 1682 if(c1!=c2) { |
| 1683 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1
, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT)); |
| 1684 return; |
| 1685 } |
| 1686 } while(c1>=0); |
| 1687 |
| 1688 if(iter1->hasPrevious(iter1)) { |
| 1689 log_err("%s->hasPrevious() at the start returns TRUE\n", n1); |
| 1690 return; |
| 1691 } |
| 1692 if(iter2->hasPrevious(iter2)) { |
| 1693 log_err("%s->hasPrevious() at the start returns TRUE\n", n2); |
| 1694 return; |
| 1695 } |
| 1696 } |
| 1697 |
| 1698 /* n2 must have a digit 1 at the end, will be incremented with the normalization
mode */ |
| 1699 static void |
| 1700 testUNormIteratorWithText(const UChar *text, int32_t textLength, int32_t middle, |
| 1701 const char *name1, const char *n2) { |
| 1702 UChar buffer[600]; |
| 1703 char name2[40]; |
| 1704 |
| 1705 UCharIterator iter1, iter2, *iter; |
| 1706 UNormIterator *uni; |
| 1707 |
| 1708 UNormalizationMode mode; |
| 1709 UErrorCode errorCode; |
| 1710 int32_t length; |
| 1711 |
| 1712 /* open a normalizing iterator */ |
| 1713 errorCode=U_ZERO_ERROR; |
| 1714 uni=unorm_openIter(NULL, 0, &errorCode); |
| 1715 if(U_FAILURE(errorCode)) { |
| 1716 log_err("unorm_openIter() fails: %s\n", u_errorName(errorCode)); |
| 1717 return; |
| 1718 } |
| 1719 |
| 1720 /* set iterator 2 to the original text */ |
| 1721 uiter_setString(&iter2, text, textLength); |
| 1722 |
| 1723 strcpy(name2, n2); |
| 1724 |
| 1725 /* test the normalizing iterator for each mode */ |
| 1726 for(mode=UNORM_NONE; mode<UNORM_MODE_COUNT; ++mode) { |
| 1727 length=unorm_normalize(text, textLength, mode, 0, buffer, LENGTHOF(buffe
r), &errorCode); |
| 1728 if(U_FAILURE(errorCode)) { |
| 1729 log_data_err("unorm_normalize(mode %d) failed: %s - (Are you missing
data?)\n", mode, u_errorName(errorCode)); |
| 1730 break; |
| 1731 } |
| 1732 |
| 1733 /* set iterator 1 to the normalized text */ |
| 1734 uiter_setString(&iter1, buffer, length); |
| 1735 |
| 1736 /* set the normalizing iterator to use iter2 */ |
| 1737 iter=unorm_setIter(uni, &iter2, mode, &errorCode); |
| 1738 if(U_FAILURE(errorCode)) { |
| 1739 log_err("unorm_setIter(mode %d) failed: %s\n", mode, u_errorName(err
orCode)); |
| 1740 break; |
| 1741 } |
| 1742 |
| 1743 compareIterNoIndexes(&iter1, name1, iter, name2, middle); |
| 1744 ++name2[strlen(name2)-1]; |
| 1745 } |
| 1746 |
| 1747 unorm_closeIter(uni); |
| 1748 } |
| 1749 |
| 1750 static void |
| 1751 TestUNormIterator() { |
| 1752 static const UChar text[]={ /* must contain <00C5 0327> see u_strchr() below
*/ |
| 1753 0x61, /* 'a' */ |
| 1754 0xe4, 0x61, 0x308, /* variations of
'a'+umlaut */ |
| 1755 0xc5, 0x327, 0x41, 0x30a, 0x327, 0x41, 0x327, 0x30a, /* variations of
'A'+ring+cedilla */ |
| 1756 0xfb03, 0xfb00, 0x69, 0x66, 0x66, 0x69, 0x66, 0xfb01 /* variations of
'ffi' */ |
| 1757 }; |
| 1758 static const UChar surrogateText[]={ |
| 1759 0x6e, 0xd900, 0x6a, 0xdc00, 0xd900, 0xdc00, 0x61 |
| 1760 }; |
| 1761 |
| 1762 UChar longText[600]; |
| 1763 int32_t i, middle, length; |
| 1764 |
| 1765 length=LENGTHOF(text); |
| 1766 testUNormIteratorWithText(text, length, length/2, "UCharIter", "UNormIter1")
; |
| 1767 testUNormIteratorWithText(text, length, length, "UCharIterEnd", "UNormIterEn
d1"); |
| 1768 |
| 1769 /* test again, this time with an insane string to cause internal buffer over
flows */ |
| 1770 middle=(int32_t)(u_strchr(text, 0x327)-text); /* see comment at text[] */ |
| 1771 memcpy(longText, text, middle*U_SIZEOF_UCHAR); |
| 1772 for(i=0; i<150; ++i) { |
| 1773 longText[middle+i]=0x30a; /* insert many rings between 'A-ring' and cedi
lla */ |
| 1774 } |
| 1775 memcpy(longText+middle+i, text+middle, (LENGTHOF(text)-middle)*U_SIZEOF_UCHA
R); |
| 1776 length=LENGTHOF(text)+i; |
| 1777 |
| 1778 /* append another copy of this string for more overflows */ |
| 1779 memcpy(longText+length, longText, length*U_SIZEOF_UCHAR); |
| 1780 length*=2; |
| 1781 |
| 1782 /* the first test of the following two starts at length/4, inside the sea of
combining rings */ |
| 1783 testUNormIteratorWithText(longText, length, length/4, "UCharIterLong", "UNor
mIterLong1"); |
| 1784 testUNormIteratorWithText(longText, length, length, "UCharIterLongEnd", "UNo
rmIterLongEnd1"); |
| 1785 |
| 1786 length=LENGTHOF(surrogateText); |
| 1787 testUNormIteratorWithText(surrogateText, length, length/4, "UCharIterSurr",
"UNormIterSurr1"); |
| 1788 testUNormIteratorWithText(surrogateText, length, length, "UCharIterSurrEnd",
"UNormIterSurrEnd1"); |
| 1789 } |
| 1790 |
| 1791 static void |
| 1792 TestBadUNormIterator(void) { |
| 1793 #if !UCONFIG_NO_NORMALIZATION |
| 1794 UErrorCode status = U_ILLEGAL_ESCAPE_SEQUENCE; |
| 1795 UNormIterator *uni; |
| 1796 |
| 1797 unorm_setIter(NULL, NULL, UNORM_NONE, &status); |
| 1798 if (status != U_ILLEGAL_ESCAPE_SEQUENCE) { |
| 1799 log_err("unorm_setIter changed the error code to: %s\n", u_errorName(sta
tus)); |
| 1800 } |
| 1801 status = U_ZERO_ERROR; |
| 1802 unorm_setIter(NULL, NULL, UNORM_NONE, &status); |
| 1803 if (status != U_ILLEGAL_ARGUMENT_ERROR) { |
| 1804 log_err("unorm_setIter didn't react correctly to bad arguments: %s\n", u
_errorName(status)); |
| 1805 } |
| 1806 status = U_ZERO_ERROR; |
| 1807 uni=unorm_openIter(NULL, 0, &status); |
| 1808 if(U_FAILURE(status)) { |
| 1809 log_err("unorm_openIter() fails: %s\n", u_errorName(status)); |
| 1810 return; |
| 1811 } |
| 1812 unorm_setIter(uni, NULL, UNORM_NONE, &status); |
| 1813 unorm_closeIter(uni); |
| 1814 #endif |
| 1815 } |
| 1816 |
| 1817 #endif |
OLD | NEW |