| OLD | NEW |
| (Empty) |
| 1 /******************************************************************** | |
| 2 * COPYRIGHT: | |
| 3 * Copyright (C) 2001-2012 IBM, Inc. All Rights Reserved. | |
| 4 * | |
| 5 ********************************************************************/ | |
| 6 /*******************************************************************************
* | |
| 7 * | |
| 8 * File CALLCOLL.C | |
| 9 * | |
| 10 * Modification History: | |
| 11 * Name Description | |
| 12 * Andy Heninger First Version | |
| 13 * | |
| 14 ********************************************************************************
* | |
| 15 */ | |
| 16 | |
| 17 // | |
| 18 // This program tests string collation and sort key generation performance. | |
| 19 // Three APIs can be teste: ICU C , Unix strcoll, strxfrm and Windows LCMap
String | |
| 20 // A file of names is required as input, one per line. It must be in utf-8
or utf-16 format, | |
| 21 // and include a byte order mark. Either LE or BE format is OK. | |
| 22 // | |
| 23 | |
| 24 const char gUsageString[] = | |
| 25 "usage: collperf options...\n" | |
| 26 "-help Display this message.\n" | |
| 27 "-file file_name utf-16 format file of names.\n" | |
| 28 "-locale name ICU locale to use. Default is en_US\n" | |
| 29 "-rules file_name Collation rules file (overrides locale)\n" | |
| 30 "-langid 0x1234 Windows Language ID number. Default to value fo
r -locale option\n" | |
| 31 " see http://msdn.microsoft.com/library/psdk/wi
nbase/nls_8xo3.htm\n" | |
| 32 "-win Run test using Windows native services. (ICU is
default)\n" | |
| 33 "-unix Run test using Unix strxfrm, strcoll services.\n
" | |
| 34 "-uselen Use API with string lengths. Default is null-te
rminated strings\n" | |
| 35 "-usekeys Run tests using sortkeys rather than strcoll\n" | |
| 36 "-strcmp Run tests using u_strcmp rather than strcoll\n" | |
| 37 "-strcmpCPO Run tests using u_strcmpCodePointOrder rather th
an strcoll\n" | |
| 38 "-loop nnnn Loopcount for test. Adjust for reasonable total
running time.\n" | |
| 39 "-iloop n Inner Loop Count. Default = 1. Number of calls
to function\n" | |
| 40 " under test at each call point. For measurin
g test overhead.\n" | |
| 41 "-terse Terse numbers-only output. Intended for use by
scripts.\n" | |
| 42 "-french French accent ordering\n" | |
| 43 "-frenchoff No French accent ordering (for use with French l
ocales.)\n" | |
| 44 "-norm Normalizing mode on\n" | |
| 45 "-shifted Shifted mode\n" | |
| 46 "-lower Lower case first\n" | |
| 47 "-upper Upper case first\n" | |
| 48 "-case Enable separate case level\n" | |
| 49 "-level n Sort level, 1 to 5, for Primary, Secndary, Terti
ary, Quaternary, Identical\n" | |
| 50 "-keyhist Produce a table sort key size vs. string length\
n" | |
| 51 "-binsearch Binary Search timing test\n" | |
| 52 "-keygen Sort Key Generation timing test\n" | |
| 53 "-qsort Quicksort timing test\n" | |
| 54 "-iter Iteration Performance Test\n" | |
| 55 "-dump Display strings, sort keys and CEs.\n" | |
| 56 ; | |
| 57 | |
| 58 | |
| 59 | |
| 60 #include <stdio.h> | |
| 61 #include <string.h> | |
| 62 #include <stdlib.h> | |
| 63 #include <math.h> | |
| 64 #include <locale.h> | |
| 65 #include <errno.h> | |
| 66 | |
| 67 #include <unicode/utypes.h> | |
| 68 #include <unicode/ucol.h> | |
| 69 #include <unicode/ucoleitr.h> | |
| 70 #include <unicode/uloc.h> | |
| 71 #include <unicode/ustring.h> | |
| 72 #include <unicode/ures.h> | |
| 73 #include <unicode/uchar.h> | |
| 74 #include <unicode/ucnv.h> | |
| 75 #include <unicode/utf8.h> | |
| 76 | |
| 77 #ifdef WIN32 | |
| 78 #include <windows.h> | |
| 79 #else | |
| 80 // | |
| 81 // Stubs for Windows API functions when building on UNIXes. | |
| 82 // | |
| 83 typedef int DWORD; | |
| 84 inline int CompareStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;} | |
| 85 #include <sys/time.h> | |
| 86 unsigned long timeGetTime() { | |
| 87 struct timeval t; | |
| 88 gettimeofday(&t, 0); | |
| 89 unsigned long val = t.tv_sec * 1000; // Let it overflow. Who cares. | |
| 90 val += t.tv_usec / 1000; | |
| 91 return val; | |
| 92 } | |
| 93 inline int LCMapStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;} | |
| 94 const int LCMAP_SORTKEY = 0; | |
| 95 #define MAKELCID(a,b) 0 | |
| 96 const int SORT_DEFAULT = 0; | |
| 97 #endif | |
| 98 | |
| 99 | |
| 100 | |
| 101 // | |
| 102 // Command line option variables | |
| 103 // These global variables are set according to the options specified | |
| 104 // on the command line by the user. | |
| 105 char * opt_fName = 0; | |
| 106 const char * opt_locale = "en_US"; | |
| 107 int opt_langid = 0; // Defaults to value corresponding to opt_loc
ale. | |
| 108 char * opt_rules = 0; | |
| 109 UBool opt_help = FALSE; | |
| 110 int opt_loopCount = 1; | |
| 111 int opt_iLoopCount = 1; | |
| 112 UBool opt_terse = FALSE; | |
| 113 UBool opt_qsort = FALSE; | |
| 114 UBool opt_binsearch = FALSE; | |
| 115 UBool opt_icu = TRUE; | |
| 116 UBool opt_win = FALSE; // Run with Windows native functions. | |
| 117 UBool opt_unix = FALSE; // Run with UNIX strcoll, strxfrm functions. | |
| 118 UBool opt_uselen = FALSE; | |
| 119 UBool opt_usekeys = FALSE; | |
| 120 UBool opt_strcmp = FALSE; | |
| 121 UBool opt_strcmpCPO = FALSE; | |
| 122 UBool opt_norm = FALSE; | |
| 123 UBool opt_keygen = FALSE; | |
| 124 UBool opt_french = FALSE; | |
| 125 UBool opt_frenchoff = FALSE; | |
| 126 UBool opt_shifted = FALSE; | |
| 127 UBool opt_lower = FALSE; | |
| 128 UBool opt_upper = FALSE; | |
| 129 UBool opt_case = FALSE; | |
| 130 int opt_level = 0; | |
| 131 UBool opt_keyhist = FALSE; | |
| 132 UBool opt_itertest = FALSE; | |
| 133 UBool opt_dump = FALSE; | |
| 134 | |
| 135 | |
| 136 | |
| 137 // | |
| 138 // Definitions for the command line options | |
| 139 // | |
| 140 struct OptSpec { | |
| 141 const char *name; | |
| 142 enum {FLAG, NUM, STRING} type; | |
| 143 void *pVar; | |
| 144 }; | |
| 145 | |
| 146 OptSpec opts[] = { | |
| 147 {"-file", OptSpec::STRING, &opt_fName}, | |
| 148 {"-locale", OptSpec::STRING, &opt_locale}, | |
| 149 {"-langid", OptSpec::NUM, &opt_langid}, | |
| 150 {"-rules", OptSpec::STRING, &opt_rules}, | |
| 151 {"-qsort", OptSpec::FLAG, &opt_qsort}, | |
| 152 {"-binsearch", OptSpec::FLAG, &opt_binsearch}, | |
| 153 {"-iter", OptSpec::FLAG, &opt_itertest}, | |
| 154 {"-win", OptSpec::FLAG, &opt_win}, | |
| 155 {"-unix", OptSpec::FLAG, &opt_unix}, | |
| 156 {"-uselen", OptSpec::FLAG, &opt_uselen}, | |
| 157 {"-usekeys", OptSpec::FLAG, &opt_usekeys}, | |
| 158 {"-strcmp", OptSpec::FLAG, &opt_strcmp}, | |
| 159 {"-strcmpCPO", OptSpec::FLAG, &opt_strcmpCPO}, | |
| 160 {"-norm", OptSpec::FLAG, &opt_norm}, | |
| 161 {"-french", OptSpec::FLAG, &opt_french}, | |
| 162 {"-frenchoff", OptSpec::FLAG, &opt_frenchoff}, | |
| 163 {"-shifted", OptSpec::FLAG, &opt_shifted}, | |
| 164 {"-lower", OptSpec::FLAG, &opt_lower}, | |
| 165 {"-upper", OptSpec::FLAG, &opt_upper}, | |
| 166 {"-case", OptSpec::FLAG, &opt_case}, | |
| 167 {"-level", OptSpec::NUM, &opt_level}, | |
| 168 {"-keyhist", OptSpec::FLAG, &opt_keyhist}, | |
| 169 {"-keygen", OptSpec::FLAG, &opt_keygen}, | |
| 170 {"-loop", OptSpec::NUM, &opt_loopCount}, | |
| 171 {"-iloop", OptSpec::NUM, &opt_iLoopCount}, | |
| 172 {"-terse", OptSpec::FLAG, &opt_terse}, | |
| 173 {"-dump", OptSpec::FLAG, &opt_dump}, | |
| 174 {"-help", OptSpec::FLAG, &opt_help}, | |
| 175 {"-?", OptSpec::FLAG, &opt_help}, | |
| 176 {0, OptSpec::FLAG, 0} | |
| 177 }; | |
| 178 | |
| 179 | |
| 180 //--------------------------------------------------------------------------- | |
| 181 // | |
| 182 // Global variables pointing to and describing the test file | |
| 183 // | |
| 184 //--------------------------------------------------------------------------- | |
| 185 | |
| 186 // | |
| 187 // struct Line | |
| 188 // | |
| 189 // Each line from the source file (containing a name, presumably) gets | |
| 190 // one of these structs. | |
| 191 // | |
| 192 struct Line { | |
| 193 UChar *name; | |
| 194 int len; | |
| 195 char *winSortKey; | |
| 196 char *icuSortKey; | |
| 197 char *unixSortKey; | |
| 198 char *unixName; | |
| 199 }; | |
| 200 | |
| 201 | |
| 202 | |
| 203 Line *gFileLines; // Ptr to array of Line structs, one per li
ne in the file. | |
| 204 int gNumFileLines; | |
| 205 UCollator *gCol; | |
| 206 DWORD gWinLCID; | |
| 207 | |
| 208 Line **gSortedLines; | |
| 209 Line **gRandomLines; | |
| 210 int gCount; | |
| 211 | |
| 212 | |
| 213 | |
| 214 //--------------------------------------------------------------------------- | |
| 215 // | |
| 216 // ProcessOptions() Function to read the command line options. | |
| 217 // | |
| 218 //--------------------------------------------------------------------------- | |
| 219 UBool ProcessOptions(int argc, const char **argv, OptSpec opts[]) | |
| 220 { | |
| 221 int i; | |
| 222 int argNum; | |
| 223 const char *pArgName; | |
| 224 OptSpec *pOpt; | |
| 225 | |
| 226 for (argNum=1; argNum<argc; argNum++) { | |
| 227 pArgName = argv[argNum]; | |
| 228 for (pOpt = opts; pOpt->name != 0; pOpt++) { | |
| 229 if (strcmp(pOpt->name, pArgName) == 0) { | |
| 230 switch (pOpt->type) { | |
| 231 case OptSpec::FLAG: | |
| 232 *(UBool *)(pOpt->pVar) = TRUE; | |
| 233 break; | |
| 234 case OptSpec::STRING: | |
| 235 argNum ++; | |
| 236 if (argNum >= argc) { | |
| 237 fprintf(stderr, "value expected for \"%s\" option.\n", p
Opt->name); | |
| 238 return FALSE; | |
| 239 } | |
| 240 *(const char **)(pOpt->pVar) = argv[argNum]; | |
| 241 break; | |
| 242 case OptSpec::NUM: | |
| 243 argNum ++; | |
| 244 if (argNum >= argc) { | |
| 245 fprintf(stderr, "value expected for \"%s\" option.\n", p
Opt->name); | |
| 246 return FALSE; | |
| 247 } | |
| 248 char *endp; | |
| 249 i = strtol(argv[argNum], &endp, 0); | |
| 250 if (endp == argv[argNum]) { | |
| 251 fprintf(stderr, "integer value expected for \"%s\" optio
n.\n", pOpt->name); | |
| 252 return FALSE; | |
| 253 } | |
| 254 *(int *)(pOpt->pVar) = i; | |
| 255 } | |
| 256 break; | |
| 257 } | |
| 258 } | |
| 259 if (pOpt->name == 0) | |
| 260 { | |
| 261 fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName); | |
| 262 return FALSE; | |
| 263 } | |
| 264 } | |
| 265 return TRUE; | |
| 266 } | |
| 267 | |
| 268 //------------------------------------------------------------------------------
--------- | |
| 269 // | |
| 270 // Comparison functions for use by qsort. | |
| 271 // | |
| 272 // Six flavors, ICU or Windows, SortKey or String Compare, Strings with le
ngth | |
| 273 // or null terminated. | |
| 274 // | |
| 275 //------------------------------------------------------------------------------
--------- | |
| 276 int ICUstrcmpK(const void *a, const void *b) { | |
| 277 gCount++; | |
| 278 int t = strcmp((*(Line **)a)->icuSortKey, (*(Line **)b)->icuSortKey); | |
| 279 return t; | |
| 280 } | |
| 281 | |
| 282 | |
| 283 int ICUstrcmpL(const void *a, const void *b) { | |
| 284 gCount++; | |
| 285 UCollationResult t; | |
| 286 t = ucol_strcoll(gCol, (*(Line **)a)->name, (*(Line **)a)->len, (*(Line **)b
)->name, (*(Line **)b)->len); | |
| 287 if (t == UCOL_LESS) return -1; | |
| 288 if (t == UCOL_GREATER) return +1; | |
| 289 return 0; | |
| 290 } | |
| 291 | |
| 292 | |
| 293 int ICUstrcmp(const void *a, const void *b) { | |
| 294 gCount++; | |
| 295 UCollationResult t; | |
| 296 t = ucol_strcoll(gCol, (*(Line **)a)->name, -1, (*(Line **)b)->name, -1); | |
| 297 if (t == UCOL_LESS) return -1; | |
| 298 if (t == UCOL_GREATER) return +1; | |
| 299 return 0; | |
| 300 } | |
| 301 | |
| 302 | |
| 303 int Winstrcmp(const void *a, const void *b) { | |
| 304 gCount++; | |
| 305 int t; | |
| 306 t = CompareStringW(gWinLCID, 0, (*(Line **)a)->name, -1, (*(Line **)b)->name
, -1); | |
| 307 return t-2; | |
| 308 } | |
| 309 | |
| 310 | |
| 311 int UNIXstrcmp(const void *a, const void *b) { | |
| 312 gCount++; | |
| 313 int t; | |
| 314 t = strcoll((*(Line **)a)->unixName, (*(Line **)b)->unixName); | |
| 315 return t; | |
| 316 } | |
| 317 | |
| 318 | |
| 319 int WinstrcmpL(const void *a, const void *b) { | |
| 320 gCount++; | |
| 321 int t; | |
| 322 t = CompareStringW(gWinLCID, 0, (*(Line **)a)->name, (*(Line **)a)->len, (*(
Line **)b)->name, (*(Line **)b)->len); | |
| 323 return t-2; | |
| 324 } | |
| 325 | |
| 326 | |
| 327 int WinstrcmpK(const void *a, const void *b) { | |
| 328 gCount++; | |
| 329 int t = strcmp((*(Line **)a)->winSortKey, (*(Line **)b)->winSortKey); | |
| 330 return t; | |
| 331 } | |
| 332 | |
| 333 | |
| 334 //------------------------------------------------------------------------------
--------- | |
| 335 // | |
| 336 // Function for sorting the names (lines) into a random order. | |
| 337 // Order is based on a hash of the ICU Sort key for the lines | |
| 338 // The randomized order is used as input for the sorting timing tests. | |
| 339 // | |
| 340 //------------------------------------------------------------------------------
--------- | |
| 341 int ICURandomCmp(const void *a, const void *b) { | |
| 342 char *ask = (*(Line **)a)->icuSortKey; | |
| 343 char *bsk = (*(Line **)b)->icuSortKey; | |
| 344 int aVal = 0; | |
| 345 int bVal = 0; | |
| 346 int retVal; | |
| 347 while (*ask != 0) { | |
| 348 aVal += aVal*37 + *ask++; | |
| 349 } | |
| 350 while (*bsk != 0) { | |
| 351 bVal += bVal*37 + *bsk++; | |
| 352 } | |
| 353 retVal = -1; | |
| 354 if (aVal == bVal) { | |
| 355 retVal = 0; | |
| 356 } | |
| 357 else if (aVal > bVal) { | |
| 358 retVal = 1; | |
| 359 } | |
| 360 return retVal; | |
| 361 } | |
| 362 | |
| 363 //------------------------------------------------------------------------------
--------- | |
| 364 // | |
| 365 // doKeyGen() Key Generation Timing Test | |
| 366 // | |
| 367 //------------------------------------------------------------------------------
--------- | |
| 368 void doKeyGen() | |
| 369 { | |
| 370 int line; | |
| 371 int loops = 0; | |
| 372 int iLoop; | |
| 373 int t; | |
| 374 int len=-1; | |
| 375 | |
| 376 // Adjust loop count to compensate for file size. Should be order n | |
| 377 double dLoopCount = double(opt_loopCount) * (1000. / double(gNumFileLines))
; | |
| 378 int adj_loopCount = int(dLoopCount); | |
| 379 if (adj_loopCount < 1) adj_loopCount = 1; | |
| 380 | |
| 381 | |
| 382 unsigned long startTime = timeGetTime(); | |
| 383 | |
| 384 if (opt_win) { | |
| 385 for (loops=0; loops<adj_loopCount; loops++) { | |
| 386 for (line=0; line < gNumFileLines; line++) { | |
| 387 if (opt_uselen) { | |
| 388 len = gFileLines[line].len; | |
| 389 } | |
| 390 for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { | |
| 391 t=LCMapStringW(gWinLCID, LCMAP_SORTKEY, | |
| 392 gFileLines[line].name, len, | |
| 393 (unsigned short *)gFileLines[line].winSortKey, 5000);
// TODO something with length. | |
| 394 } | |
| 395 } | |
| 396 } | |
| 397 } | |
| 398 else if (opt_icu) | |
| 399 { | |
| 400 for (loops=0; loops<adj_loopCount; loops++) { | |
| 401 for (line=0; line < gNumFileLines; line++) { | |
| 402 if (opt_uselen) { | |
| 403 len = gFileLines[line].len; | |
| 404 } | |
| 405 for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { | |
| 406 t = ucol_getSortKey(gCol, gFileLines[line].name, len, (unsig
ned char *)gFileLines[line].icuSortKey, 5000); | |
| 407 } | |
| 408 } | |
| 409 } | |
| 410 } | |
| 411 else if (opt_unix) | |
| 412 { | |
| 413 for (loops=0; loops<adj_loopCount; loops++) { | |
| 414 for (line=0; line < gNumFileLines; line++) { | |
| 415 for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { | |
| 416 t = strxfrm(gFileLines[line].unixSortKey, gFileLines[line].unixN
ame, 5000); | |
| 417 } | |
| 418 } | |
| 419 } | |
| 420 } | |
| 421 | |
| 422 unsigned long elapsedTime = timeGetTime() - startTime; | |
| 423 int ns = (int)(float(1000000) * (float)elapsedTime / (float)(adj_loopCount*g
NumFileLines)); | |
| 424 | |
| 425 if (opt_terse == FALSE) { | |
| 426 printf("Sort Key Generation: total # of keys = %d\n", loops*gNumFileLin
es); | |
| 427 printf("Sort Key Generation: time per key = %d ns\n", ns); | |
| 428 } | |
| 429 else { | |
| 430 printf("%d, ", ns); | |
| 431 } | |
| 432 | |
| 433 int totalKeyLen = 0; | |
| 434 int totalChars = 0; | |
| 435 for (line=0; line<gNumFileLines; line++) { | |
| 436 totalChars += u_strlen(gFileLines[line].name); | |
| 437 if (opt_win) { | |
| 438 totalKeyLen += strlen(gFileLines[line].winSortKey); | |
| 439 } | |
| 440 else if (opt_icu) { | |
| 441 totalKeyLen += strlen(gFileLines[line].icuSortKey); | |
| 442 } | |
| 443 else if (opt_unix) { | |
| 444 totalKeyLen += strlen(gFileLines[line].unixSortKey); | |
| 445 } | |
| 446 | |
| 447 } | |
| 448 if (opt_terse == FALSE) { | |
| 449 printf("Key Length / character = %f\n", (float)totalKeyLen / (float)tota
lChars); | |
| 450 } else { | |
| 451 printf("%f, ", (float)totalKeyLen / (float)totalChars); | |
| 452 } | |
| 453 } | |
| 454 | |
| 455 | |
| 456 | |
| 457 //------------------------------------------------------------------------------
--------- | |
| 458 // | |
| 459 // doBinarySearch() Binary Search timing test. Each name from the list | |
| 460 // is looked up in the full sorted list of names. | |
| 461 // | |
| 462 //------------------------------------------------------------------------------
--------- | |
| 463 void doBinarySearch() | |
| 464 { | |
| 465 | |
| 466 gCount = 0; | |
| 467 int line; | |
| 468 int loops = 0; | |
| 469 int iLoop = 0; | |
| 470 unsigned long elapsedTime = 0; | |
| 471 | |
| 472 // Adjust loop count to compensate for file size. Should be order n (looku
ps) * log n (compares/lookup) | |
| 473 // Accurate timings do not depend on this being perfect. The correction is
just to try to | |
| 474 // get total running times of about the right order, so the that user does
n't need to | |
| 475 // manually adjust the loop count for every different file size. | |
| 476 double dLoopCount = double(opt_loopCount) * 3000. / (log10((double)gNumFileL
ines) * double(gNumFileLines)); | |
| 477 if (opt_usekeys) dLoopCount *= 5; | |
| 478 int adj_loopCount = int(dLoopCount); | |
| 479 if (adj_loopCount < 1) adj_loopCount = 1; | |
| 480 | |
| 481 | |
| 482 for (;;) { // not really a loop, just allows "break" to work, to simplify | |
| 483 // inadvertantly running more than one test through here. | |
| 484 if (opt_strcmp || opt_strcmpCPO) | |
| 485 { | |
| 486 unsigned long startTime = timeGetTime(); | |
| 487 typedef int32_t (U_EXPORT2 *PF)(const UChar *, const UChar *); | |
| 488 PF pf = u_strcmp; | |
| 489 if (opt_strcmpCPO) {pf = u_strcmpCodePointOrder;} | |
| 490 //if (opt_strcmp && opt_win) {pf = (PF)wcscmp;} // Damn the differ
ence between int32_t and int | |
| 491 // which forces th
e use of a cast here. | |
| 492 | |
| 493 int r = 0; | |
| 494 for (loops=0; loops<adj_loopCount; loops++) { | |
| 495 | |
| 496 for (line=0; line < gNumFileLines; line++) { | |
| 497 int hi = gNumFileLines-1; | |
| 498 int lo = 0; | |
| 499 int guess = -1; | |
| 500 for (;;) { | |
| 501 int newGuess = (hi + lo) / 2; | |
| 502 if (newGuess == guess) | |
| 503 break; | |
| 504 guess = newGuess; | |
| 505 for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { | |
| 506 r = (*pf)((gSortedLines[line])->name, (gSortedLines[
guess])->name); | |
| 507 } | |
| 508 gCount++; | |
| 509 if (r== 0) | |
| 510 break; | |
| 511 if (r < 0) | |
| 512 hi = guess; | |
| 513 else | |
| 514 lo = guess; | |
| 515 } | |
| 516 } | |
| 517 } | |
| 518 elapsedTime = timeGetTime() - startTime; | |
| 519 break; | |
| 520 } | |
| 521 | |
| 522 | |
| 523 if (opt_icu) | |
| 524 { | |
| 525 unsigned long startTime = timeGetTime(); | |
| 526 UCollationResult r = UCOL_EQUAL; | |
| 527 for (loops=0; loops<adj_loopCount; loops++) { | |
| 528 | |
| 529 for (line=0; line < gNumFileLines; line++) { | |
| 530 int lineLen = -1; | |
| 531 int guessLen = -1; | |
| 532 if (opt_uselen) { | |
| 533 lineLen = (gSortedLines[line])->len; | |
| 534 } | |
| 535 int hi = gNumFileLines-1; | |
| 536 int lo = 0; | |
| 537 int guess = -1; | |
| 538 for (;;) { | |
| 539 int newGuess = (hi + lo) / 2; | |
| 540 if (newGuess == guess) | |
| 541 break; | |
| 542 guess = newGuess; | |
| 543 int ri = 0; | |
| 544 if (opt_usekeys) { | |
| 545 for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { | |
| 546 ri = strcmp((gSortedLines[line])->icuSortKey, (g
SortedLines[guess])->icuSortKey); | |
| 547 } | |
| 548 gCount++; | |
| 549 r=UCOL_GREATER; if(ri<0) {r=UCOL_LESS;} else if (ri=
=0) {r=UCOL_EQUAL;} | |
| 550 } | |
| 551 else | |
| 552 { | |
| 553 if (opt_uselen) { | |
| 554 guessLen = (gSortedLines[guess])->len; | |
| 555 } | |
| 556 for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { | |
| 557 r = ucol_strcoll(gCol, (gSortedLines[line])->nam
e, lineLen, (gSortedLines[guess])->name, guessLen); | |
| 558 } | |
| 559 gCount++; | |
| 560 } | |
| 561 if (r== UCOL_EQUAL) | |
| 562 break; | |
| 563 if (r == UCOL_LESS) | |
| 564 hi = guess; | |
| 565 else | |
| 566 lo = guess; | |
| 567 } | |
| 568 } | |
| 569 } | |
| 570 elapsedTime = timeGetTime() - startTime; | |
| 571 break; | |
| 572 } | |
| 573 | |
| 574 if (opt_win) | |
| 575 { | |
| 576 unsigned long startTime = timeGetTime(); | |
| 577 int r = 0; | |
| 578 for (loops=0; loops<adj_loopCount; loops++) { | |
| 579 | |
| 580 for (line=0; line < gNumFileLines; line++) { | |
| 581 int lineLen = -1; | |
| 582 int guessLen = -1; | |
| 583 if (opt_uselen) { | |
| 584 lineLen = (gSortedLines[line])->len; | |
| 585 } | |
| 586 int hi = gNumFileLines-1; | |
| 587 int lo = 0; | |
| 588 int guess = -1; | |
| 589 for (;;) { | |
| 590 int newGuess = (hi + lo) / 2; | |
| 591 if (newGuess == guess) | |
| 592 break; | |
| 593 guess = newGuess; | |
| 594 if (opt_usekeys) { | |
| 595 for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { | |
| 596 r = strcmp((gSortedLines[line])->winSortKey, (gS
ortedLines[guess])->winSortKey); | |
| 597 } | |
| 598 gCount++; | |
| 599 r+=2; | |
| 600 } | |
| 601 else | |
| 602 { | |
| 603 if (opt_uselen) { | |
| 604 guessLen = (gSortedLines[guess])->len; | |
| 605 } | |
| 606 for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { | |
| 607 r = CompareStringW(gWinLCID, 0, (gSortedLines[li
ne])->name, lineLen, (gSortedLines[guess])->name, guessLen); | |
| 608 } | |
| 609 if (r == 0) { | |
| 610 if (opt_terse == FALSE) { | |
| 611 fprintf(stderr, "Error returned from Windows
CompareStringW.\n"); | |
| 612 } | |
| 613 exit(-1); | |
| 614 } | |
| 615 gCount++; | |
| 616 } | |
| 617 if (r== 2) // strings == | |
| 618 break; | |
| 619 if (r == 1) // line < guess | |
| 620 hi = guess; | |
| 621 else // line > guess | |
| 622 lo = guess; | |
| 623 } | |
| 624 } | |
| 625 } | |
| 626 elapsedTime = timeGetTime() - startTime; | |
| 627 break; | |
| 628 } | |
| 629 | |
| 630 if (opt_unix) | |
| 631 { | |
| 632 unsigned long startTime = timeGetTime(); | |
| 633 int r = 0; | |
| 634 for (loops=0; loops<adj_loopCount; loops++) { | |
| 635 | |
| 636 for (line=0; line < gNumFileLines; line++) { | |
| 637 int hi = gNumFileLines-1; | |
| 638 int lo = 0; | |
| 639 int guess = -1; | |
| 640 for (;;) { | |
| 641 int newGuess = (hi + lo) / 2; | |
| 642 if (newGuess == guess) | |
| 643 break; | |
| 644 guess = newGuess; | |
| 645 if (opt_usekeys) { | |
| 646 for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { | |
| 647 r = strcmp((gSortedLines[line])->unixSortKey, (
gSortedLines[guess])->unixSortKey); | |
| 648 } | |
| 649 gCount++; | |
| 650 } | |
| 651 else | |
| 652 { | |
| 653 for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { | |
| 654 r = strcoll((gSortedLines[line])->unixName, (gSo
rtedLines[guess])->unixName); | |
| 655 } | |
| 656 errno = 0; | |
| 657 if (errno != 0) { | |
| 658 fprintf(stderr, "Error %d returned from strcoll.
\n", errno); | |
| 659 exit(-1); | |
| 660 } | |
| 661 gCount++; | |
| 662 } | |
| 663 if (r == 0) // strings == | |
| 664 break; | |
| 665 if (r < 0) // line < guess | |
| 666 hi = guess; | |
| 667 else // line > guess | |
| 668 lo = guess; | |
| 669 } | |
| 670 } | |
| 671 } | |
| 672 elapsedTime = timeGetTime() - startTime; | |
| 673 break; | |
| 674 } | |
| 675 break; | |
| 676 } | |
| 677 | |
| 678 int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount); | |
| 679 if (opt_terse == FALSE) { | |
| 680 printf("binary search: total # of string compares = %d\n", gCount); | |
| 681 printf("binary search: compares per loop = %d\n", gCount / loops); | |
| 682 printf("binary search: time per compare = %d ns\n", ns); | |
| 683 } else { | |
| 684 printf("%d, ", ns); | |
| 685 } | |
| 686 | |
| 687 } | |
| 688 | |
| 689 | |
| 690 | |
| 691 | |
| 692 //------------------------------------------------------------------------------
--------- | |
| 693 // | |
| 694 // doQSort() The quick sort timing test. Uses the C library qsort function
. | |
| 695 // | |
| 696 //------------------------------------------------------------------------------
--------- | |
| 697 void doQSort() { | |
| 698 int i; | |
| 699 Line **sortBuf = new Line *[gNumFileLines]; | |
| 700 | |
| 701 // Adjust loop count to compensate for file size. QSort should be n log(n) | |
| 702 double dLoopCount = double(opt_loopCount) * 3000. / (log10((double)gNumFileL
ines) * double(gNumFileLines)); | |
| 703 if (opt_usekeys) dLoopCount *= 5; | |
| 704 int adj_loopCount = int(dLoopCount); | |
| 705 if (adj_loopCount < 1) adj_loopCount = 1; | |
| 706 | |
| 707 | |
| 708 gCount = 0; | |
| 709 unsigned long startTime = timeGetTime(); | |
| 710 if (opt_win && opt_usekeys) { | |
| 711 for (i=0; i<opt_loopCount; i++) { | |
| 712 memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *)); | |
| 713 qsort(sortBuf, gNumFileLines, sizeof(Line *), WinstrcmpK); | |
| 714 } | |
| 715 } | |
| 716 | |
| 717 else if (opt_win && opt_uselen) { | |
| 718 for (i=0; i<adj_loopCount; i++) { | |
| 719 memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *)); | |
| 720 qsort(sortBuf, gNumFileLines, sizeof(Line *), WinstrcmpL); | |
| 721 } | |
| 722 } | |
| 723 | |
| 724 | |
| 725 else if (opt_win && !opt_uselen) { | |
| 726 for (i=0; i<adj_loopCount; i++) { | |
| 727 memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *)); | |
| 728 qsort(sortBuf, gNumFileLines, sizeof(Line *), Winstrcmp); | |
| 729 } | |
| 730 } | |
| 731 | |
| 732 else if (opt_icu && opt_usekeys) { | |
| 733 for (i=0; i<adj_loopCount; i++) { | |
| 734 memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *)); | |
| 735 qsort(sortBuf, gNumFileLines, sizeof(Line *), ICUstrcmpK); | |
| 736 } | |
| 737 } | |
| 738 | |
| 739 else if (opt_icu && opt_uselen) { | |
| 740 for (i=0; i<adj_loopCount; i++) { | |
| 741 memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *)); | |
| 742 qsort(sortBuf, gNumFileLines, sizeof(Line *), ICUstrcmpL); | |
| 743 } | |
| 744 } | |
| 745 | |
| 746 | |
| 747 else if (opt_icu && !opt_uselen) { | |
| 748 for (i=0; i<adj_loopCount; i++) { | |
| 749 memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *)); | |
| 750 qsort(sortBuf, gNumFileLines, sizeof(Line *), ICUstrcmp); | |
| 751 } | |
| 752 } | |
| 753 | |
| 754 else if (opt_unix && !opt_usekeys) { | |
| 755 for (i=0; i<adj_loopCount; i++) { | |
| 756 memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *)); | |
| 757 qsort(sortBuf, gNumFileLines, sizeof(Line *), UNIXstrcmp); | |
| 758 } | |
| 759 } | |
| 760 | |
| 761 unsigned long elapsedTime = timeGetTime() - startTime; | |
| 762 int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount); | |
| 763 if (opt_terse == FALSE) { | |
| 764 printf("qsort: total # of string compares = %d\n", gCount); | |
| 765 printf("qsort: time per compare = %d ns\n", ns); | |
| 766 } else { | |
| 767 printf("%d, ", ns); | |
| 768 } | |
| 769 } | |
| 770 | |
| 771 | |
| 772 | |
| 773 //------------------------------------------------------------------------------
--------- | |
| 774 // | |
| 775 // doKeyHist() Output a table of data for | |
| 776 // average sort key size vs. string length. | |
| 777 // | |
| 778 //------------------------------------------------------------------------------
--------- | |
| 779 void doKeyHist() { | |
| 780 int i; | |
| 781 int maxLen = 0; | |
| 782 | |
| 783 // Find the maximum string length | |
| 784 for (i=0; i<gNumFileLines; i++) { | |
| 785 if (gFileLines[i].len > maxLen) maxLen = gFileLines[i].len; | |
| 786 } | |
| 787 | |
| 788 // Allocate arrays to hold the histogram data | |
| 789 int *accumulatedLen = new int[maxLen+1]; | |
| 790 int *numKeysOfSize = new int[maxLen+1]; | |
| 791 for (i=0; i<=maxLen; i++) { | |
| 792 accumulatedLen[i] = 0; | |
| 793 numKeysOfSize[i] = 0; | |
| 794 } | |
| 795 | |
| 796 // Fill the arrays... | |
| 797 for (i=0; i<gNumFileLines; i++) { | |
| 798 int len = gFileLines[i].len; | |
| 799 accumulatedLen[len] += strlen(gFileLines[i].icuSortKey); | |
| 800 numKeysOfSize[len] += 1; | |
| 801 } | |
| 802 | |
| 803 // And write out averages | |
| 804 printf("String Length, Avg Key Length, Avg Key Len per char\n"); | |
| 805 for (i=1; i<=maxLen; i++) { | |
| 806 if (numKeysOfSize[i] > 0) { | |
| 807 printf("%d, %f, %f\n", i, (float)accumulatedLen[i] / (float)numKeysO
fSize[i], | |
| 808 (float)accumulatedLen[i] / (float)(numKeysOfSize[i] * i)); | |
| 809 } | |
| 810 } | |
| 811 delete []accumulatedLen; | |
| 812 delete []numKeysOfSize ; | |
| 813 } | |
| 814 | |
| 815 //------------------------------------------------------------------------------
--------- | |
| 816 // | |
| 817 // doForwardIterTest(UBool) Forward iteration test | |
| 818 // argument null-terminated string used | |
| 819 // | |
| 820 //------------------------------------------------------------------------------
--------- | |
| 821 void doForwardIterTest(UBool haslen) { | |
| 822 int count = 0; | |
| 823 | |
| 824 UErrorCode error = U_ZERO_ERROR; | |
| 825 printf("\n\nPerforming forward iteration performance test with "); | |
| 826 | |
| 827 if (haslen) { | |
| 828 printf("non-null terminated data -----------\n"); | |
| 829 } | |
| 830 else { | |
| 831 printf("null terminated data -----------\n"); | |
| 832 } | |
| 833 printf("performance test on strings from file -----------\n"); | |
| 834 | |
| 835 UChar dummytext[] = {0, 0}; | |
| 836 UCollationElements *iter = ucol_openElements(gCol, NULL, 0, &error); | |
| 837 ucol_setText(iter, dummytext, 1, &error); | |
| 838 | |
| 839 gCount = 0; | |
| 840 unsigned long startTime = timeGetTime(); | |
| 841 while (count < opt_loopCount) { | |
| 842 int linecount = 0; | |
| 843 while (linecount < gNumFileLines) { | |
| 844 UChar *str = gFileLines[linecount].name; | |
| 845 int strlen = haslen?gFileLines[linecount].len:-1; | |
| 846 ucol_setText(iter, str, strlen, &error); | |
| 847 while (ucol_next(iter, &error) != UCOL_NULLORDER) { | |
| 848 gCount++; | |
| 849 } | |
| 850 | |
| 851 linecount ++; | |
| 852 } | |
| 853 count ++; | |
| 854 } | |
| 855 unsigned long elapsedTime = timeGetTime() - startTime; | |
| 856 printf("elapsedTime %ld\n", elapsedTime); | |
| 857 | |
| 858 // empty loop recalculation | |
| 859 count = 0; | |
| 860 startTime = timeGetTime(); | |
| 861 while (count < opt_loopCount) { | |
| 862 int linecount = 0; | |
| 863 while (linecount < gNumFileLines) { | |
| 864 UChar *str = gFileLines[linecount].name; | |
| 865 int strlen = haslen?gFileLines[linecount].len:-1; | |
| 866 ucol_setText(iter, str, strlen, &error); | |
| 867 linecount ++; | |
| 868 } | |
| 869 count ++; | |
| 870 } | |
| 871 elapsedTime -= (timeGetTime() - startTime); | |
| 872 printf("elapsedTime %ld\n", elapsedTime); | |
| 873 | |
| 874 ucol_closeElements(iter); | |
| 875 | |
| 876 int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount); | |
| 877 printf("Total number of strings compared %d in %d loops\n", gNumFileLines, | |
| 878 opt_loopCount); | |
| 879 printf("Average time per ucol_next() nano seconds %d\n", ns); | |
| 880 | |
| 881 printf("performance test on skipped-5 concatenated strings from file -------
----\n"); | |
| 882 | |
| 883 UChar *str; | |
| 884 int strlen = 0; | |
| 885 // appending all the strings | |
| 886 int linecount = 0; | |
| 887 while (linecount < gNumFileLines) { | |
| 888 strlen += haslen?gFileLines[linecount].len: | |
| 889 u_strlen(gFileLines[linecount].name); | |
| 890 linecount ++; | |
| 891 } | |
| 892 str = (UChar *)malloc(sizeof(UChar) * strlen); | |
| 893 int strindex = 0; | |
| 894 linecount = 0; | |
| 895 while (strindex < strlen) { | |
| 896 int len = 0; | |
| 897 len += haslen?gFileLines[linecount].len: | |
| 898 u_strlen(gFileLines[linecount].name); | |
| 899 memcpy(str + strindex, gFileLines[linecount].name, | |
| 900 sizeof(UChar) * len); | |
| 901 strindex += len; | |
| 902 linecount ++; | |
| 903 } | |
| 904 | |
| 905 printf("Total size of strings %d\n", strlen); | |
| 906 | |
| 907 gCount = 0; | |
| 908 count = 0; | |
| 909 | |
| 910 if (!haslen) { | |
| 911 strlen = -1; | |
| 912 } | |
| 913 iter = ucol_openElements(gCol, str, strlen, &error); | |
| 914 if (!haslen) { | |
| 915 strlen = u_strlen(str); | |
| 916 } | |
| 917 strlen -= 5; // any left over characters are not iterated, | |
| 918 // this is to ensure the backwards and forwards iterators | |
| 919 // gets the same position | |
| 920 startTime = timeGetTime(); | |
| 921 while (count < opt_loopCount) { | |
| 922 int count5 = 5; | |
| 923 strindex = 0; | |
| 924 ucol_setOffset(iter, strindex, &error); | |
| 925 while (TRUE) { | |
| 926 if (ucol_next(iter, &error) == UCOL_NULLORDER) { | |
| 927 break; | |
| 928 } | |
| 929 gCount++; | |
| 930 count5 --; | |
| 931 if (count5 == 0) { | |
| 932 strindex += 10; | |
| 933 if (strindex > strlen) { | |
| 934 break; | |
| 935 } | |
| 936 ucol_setOffset(iter, strindex, &error); | |
| 937 count5 = 5; | |
| 938 } | |
| 939 } | |
| 940 count ++; | |
| 941 } | |
| 942 | |
| 943 elapsedTime = timeGetTime() - startTime; | |
| 944 printf("elapsedTime %ld\n", elapsedTime); | |
| 945 | |
| 946 // empty loop recalculation | |
| 947 int tempgCount = 0; | |
| 948 count = 0; | |
| 949 startTime = timeGetTime(); | |
| 950 while (count < opt_loopCount) { | |
| 951 int count5 = 5; | |
| 952 strindex = 0; | |
| 953 ucol_setOffset(iter, strindex, &error); | |
| 954 while (TRUE) { | |
| 955 tempgCount ++; | |
| 956 count5 --; | |
| 957 if (count5 == 0) { | |
| 958 strindex += 10; | |
| 959 if (strindex > strlen) { | |
| 960 break; | |
| 961 } | |
| 962 ucol_setOffset(iter, strindex, &error); | |
| 963 count5 = 5; | |
| 964 } | |
| 965 } | |
| 966 count ++; | |
| 967 } | |
| 968 elapsedTime -= (timeGetTime() - startTime); | |
| 969 printf("elapsedTime %ld\n", elapsedTime); | |
| 970 | |
| 971 ucol_closeElements(iter); | |
| 972 | |
| 973 printf("gCount %d\n", gCount); | |
| 974 ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount); | |
| 975 printf("Average time per ucol_next() nano seconds %d\n", ns); | |
| 976 } | |
| 977 | |
| 978 //------------------------------------------------------------------------------
--------- | |
| 979 // | |
| 980 // doBackwardIterTest(UBool) Backwards iteration test | |
| 981 // argument null-terminated string used | |
| 982 // | |
| 983 //------------------------------------------------------------------------------
--------- | |
| 984 void doBackwardIterTest(UBool haslen) { | |
| 985 int count = 0; | |
| 986 UErrorCode error = U_ZERO_ERROR; | |
| 987 printf("\n\nPerforming backward iteration performance test with "); | |
| 988 | |
| 989 if (haslen) { | |
| 990 printf("non-null terminated data -----------\n"); | |
| 991 } | |
| 992 else { | |
| 993 printf("null terminated data -----------\n"); | |
| 994 } | |
| 995 | |
| 996 printf("performance test on strings from file -----------\n"); | |
| 997 | |
| 998 UCollationElements *iter = ucol_openElements(gCol, NULL, 0, &error); | |
| 999 UChar dummytext[] = {0, 0}; | |
| 1000 ucol_setText(iter, dummytext, 1, &error); | |
| 1001 | |
| 1002 gCount = 0; | |
| 1003 unsigned long startTime = timeGetTime(); | |
| 1004 while (count < opt_loopCount) { | |
| 1005 int linecount = 0; | |
| 1006 while (linecount < gNumFileLines) { | |
| 1007 UChar *str = gFileLines[linecount].name; | |
| 1008 int strlen = haslen?gFileLines[linecount].len:-1; | |
| 1009 ucol_setText(iter, str, strlen, &error); | |
| 1010 while (ucol_previous(iter, &error) != UCOL_NULLORDER) { | |
| 1011 gCount ++; | |
| 1012 } | |
| 1013 | |
| 1014 linecount ++; | |
| 1015 } | |
| 1016 count ++; | |
| 1017 } | |
| 1018 unsigned long elapsedTime = timeGetTime() - startTime; | |
| 1019 | |
| 1020 printf("elapsedTime %ld\n", elapsedTime); | |
| 1021 | |
| 1022 // empty loop recalculation | |
| 1023 count = 0; | |
| 1024 startTime = timeGetTime(); | |
| 1025 while (count < opt_loopCount) { | |
| 1026 int linecount = 0; | |
| 1027 while (linecount < gNumFileLines) { | |
| 1028 UChar *str = gFileLines[linecount].name; | |
| 1029 int strlen = haslen?gFileLines[linecount].len:-1; | |
| 1030 ucol_setText(iter, str, strlen, &error); | |
| 1031 linecount ++; | |
| 1032 } | |
| 1033 count ++; | |
| 1034 } | |
| 1035 elapsedTime -= (timeGetTime() - startTime); | |
| 1036 | |
| 1037 printf("elapsedTime %ld\n", elapsedTime); | |
| 1038 ucol_closeElements(iter); | |
| 1039 | |
| 1040 int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount); | |
| 1041 printf("Total number of strings compared %d in %d loops\n", gNumFileLines, | |
| 1042 opt_loopCount); | |
| 1043 printf("Average time per ucol_previous() nano seconds %d\n", ns); | |
| 1044 | |
| 1045 printf("performance test on skipped-5 concatenated strings from file -------
----\n"); | |
| 1046 | |
| 1047 UChar *str; | |
| 1048 int strlen = 0; | |
| 1049 // appending all the strings | |
| 1050 int linecount = 0; | |
| 1051 while (linecount < gNumFileLines) { | |
| 1052 strlen += haslen?gFileLines[linecount].len: | |
| 1053 u_strlen(gFileLines[linecount].name); | |
| 1054 linecount ++; | |
| 1055 } | |
| 1056 str = (UChar *)malloc(sizeof(UChar) * strlen); | |
| 1057 int strindex = 0; | |
| 1058 linecount = 0; | |
| 1059 while (strindex < strlen) { | |
| 1060 int len = 0; | |
| 1061 len += haslen?gFileLines[linecount].len: | |
| 1062 u_strlen(gFileLines[linecount].name); | |
| 1063 memcpy(str + strindex, gFileLines[linecount].name, | |
| 1064 sizeof(UChar) * len); | |
| 1065 strindex += len; | |
| 1066 linecount ++; | |
| 1067 } | |
| 1068 | |
| 1069 printf("Total size of strings %d\n", strlen); | |
| 1070 | |
| 1071 gCount = 0; | |
| 1072 count = 0; | |
| 1073 | |
| 1074 if (!haslen) { | |
| 1075 strlen = -1; | |
| 1076 } | |
| 1077 | |
| 1078 iter = ucol_openElements(gCol, str, strlen, &error); | |
| 1079 if (!haslen) { | |
| 1080 strlen = u_strlen(str); | |
| 1081 } | |
| 1082 | |
| 1083 startTime = timeGetTime(); | |
| 1084 while (count < opt_loopCount) { | |
| 1085 int count5 = 5; | |
| 1086 strindex = 5; | |
| 1087 ucol_setOffset(iter, strindex, &error); | |
| 1088 while (TRUE) { | |
| 1089 if (ucol_previous(iter, &error) == UCOL_NULLORDER) { | |
| 1090 break; | |
| 1091 } | |
| 1092 gCount ++; | |
| 1093 count5 --; | |
| 1094 if (count5 == 0) { | |
| 1095 strindex += 10; | |
| 1096 if (strindex > strlen) { | |
| 1097 break; | |
| 1098 } | |
| 1099 ucol_setOffset(iter, strindex, &error); | |
| 1100 count5 = 5; | |
| 1101 } | |
| 1102 } | |
| 1103 count ++; | |
| 1104 } | |
| 1105 | |
| 1106 elapsedTime = timeGetTime() - startTime; | |
| 1107 printf("elapsedTime %ld\n", elapsedTime); | |
| 1108 | |
| 1109 // empty loop recalculation | |
| 1110 count = 0; | |
| 1111 int tempgCount = 0; | |
| 1112 startTime = timeGetTime(); | |
| 1113 while (count < opt_loopCount) { | |
| 1114 int count5 = 5; | |
| 1115 strindex = 5; | |
| 1116 ucol_setOffset(iter, strindex, &error); | |
| 1117 while (TRUE) { | |
| 1118 tempgCount ++; | |
| 1119 count5 --; | |
| 1120 if (count5 == 0) { | |
| 1121 strindex += 10; | |
| 1122 if (strindex > strlen) { | |
| 1123 break; | |
| 1124 } | |
| 1125 ucol_setOffset(iter, strindex, &error); | |
| 1126 count5 = 5; | |
| 1127 } | |
| 1128 } | |
| 1129 count ++; | |
| 1130 } | |
| 1131 elapsedTime -= (timeGetTime() - startTime); | |
| 1132 printf("elapsedTime %ld\n", elapsedTime); | |
| 1133 ucol_closeElements(iter); | |
| 1134 | |
| 1135 printf("gCount %d\n", gCount); | |
| 1136 ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount); | |
| 1137 printf("Average time per ucol_previous() nano seconds %d\n", ns); | |
| 1138 } | |
| 1139 | |
| 1140 //------------------------------------------------------------------------------
--------- | |
| 1141 // | |
| 1142 // doIterTest() Iteration test | |
| 1143 // | |
| 1144 //------------------------------------------------------------------------------
--------- | |
| 1145 void doIterTest() { | |
| 1146 doForwardIterTest(opt_uselen); | |
| 1147 doBackwardIterTest(opt_uselen); | |
| 1148 } | |
| 1149 | |
| 1150 | |
| 1151 //------------------------------------------------------------------------------
---------- | |
| 1152 // | |
| 1153 // UnixConvert -- Convert the lines of the file to the encoding for UNIX | |
| 1154 // Since it appears that Unicode support is going in the gene
ral | |
| 1155 // direction of the use of UTF-8 locales, that is the approac
h | |
| 1156 // that is used here. | |
| 1157 // | |
| 1158 //------------------------------------------------------------------------------
---------- | |
| 1159 void UnixConvert() { | |
| 1160 int line; | |
| 1161 | |
| 1162 UConverter *cvrtr; // An ICU code page converter. | |
| 1163 UErrorCode status = U_ZERO_ERROR; | |
| 1164 | |
| 1165 | |
| 1166 cvrtr = ucnv_open("utf-8", &status); // we are just doing UTF-8 locales f
or now. | |
| 1167 if (U_FAILURE(status)) { | |
| 1168 fprintf(stderr, "ICU Converter open failed.: %s\n", u_errorName(status))
; | |
| 1169 exit(-1); | |
| 1170 } | |
| 1171 | |
| 1172 for (line=0; line < gNumFileLines; line++) { | |
| 1173 int sizeNeeded = ucnv_fromUChars(cvrtr, | |
| 1174 0, // ptr to target buffer. | |
| 1175 0, // length of target buffe
r. | |
| 1176 gFileLines[line].name, | |
| 1177 -1, // source is null termin
ated | |
| 1178 &status); | |
| 1179 if (status != U_BUFFER_OVERFLOW_ERROR && status != U_ZERO_ERROR) { | |
| 1180 //fprintf(stderr, "Conversion from Unicode, something is wrong.\n"); | |
| 1181 //exit(-1); | |
| 1182 } | |
| 1183 status = U_ZERO_ERROR; | |
| 1184 gFileLines[line].unixName = new char[sizeNeeded+1]; | |
| 1185 sizeNeeded = ucnv_fromUChars(cvrtr, | |
| 1186 gFileLines[line].unixName, // ptr to ta
rget buffer. | |
| 1187 sizeNeeded+1, // length of target buffe
r. | |
| 1188 gFileLines[line].name, | |
| 1189 -1, // source is null termin
ated | |
| 1190 &status); | |
| 1191 if (U_FAILURE(status)) { | |
| 1192 fprintf(stderr, "ICU Conversion Failed.: %d\n", status); | |
| 1193 exit(-1); | |
| 1194 } | |
| 1195 gFileLines[line].unixName[sizeNeeded] = 0; | |
| 1196 }; | |
| 1197 ucnv_close(cvrtr); | |
| 1198 } | |
| 1199 | |
| 1200 | |
| 1201 //------------------------------------------------------------------------------
---------- | |
| 1202 // | |
| 1203 // class UCharFile Class to hide all the gorp to read a file in | |
| 1204 // and produce a stream of UChars. | |
| 1205 // | |
| 1206 //------------------------------------------------------------------------------
---------- | |
| 1207 class UCharFile { | |
| 1208 public: | |
| 1209 UCharFile(const char *fileName); | |
| 1210 ~UCharFile(); | |
| 1211 UChar get(); | |
| 1212 UBool eof() {return fEof;}; | |
| 1213 UBool error() {return fError;}; | |
| 1214 | |
| 1215 private: | |
| 1216 UCharFile (const UCharFile & /*other*/) {}; // No co
py constructor. | |
| 1217 UCharFile & operator = (const UCharFile &/*other*/) {return *this;}; // No
assignment op | |
| 1218 | |
| 1219 FILE *fFile; | |
| 1220 const char *fName; | |
| 1221 UBool fEof; | |
| 1222 UBool fError; | |
| 1223 UChar fPending2ndSurrogate; | |
| 1224 | |
| 1225 enum {UTF16LE, UTF16BE, UTF8} fEncoding; | |
| 1226 }; | |
| 1227 | |
| 1228 UCharFile::UCharFile(const char * fileName) { | |
| 1229 fEof = FALSE; | |
| 1230 fError = FALSE; | |
| 1231 fName = fileName; | |
| 1232 fFile = fopen(fName, "rb"); | |
| 1233 fPending2ndSurrogate = 0; | |
| 1234 if (fFile == NULL) { | |
| 1235 fprintf(stderr, "Can not open file \"%s\"\n", opt_fName); | |
| 1236 fError = TRUE; | |
| 1237 return; | |
| 1238 } | |
| 1239 // | |
| 1240 // Look for the byte order mark at the start of the file. | |
| 1241 // | |
| 1242 int BOMC1, BOMC2, BOMC3; | |
| 1243 BOMC1 = fgetc(fFile); | |
| 1244 BOMC2 = fgetc(fFile); | |
| 1245 | |
| 1246 if (BOMC1 == 0xff && BOMC2 == 0xfe) { | |
| 1247 fEncoding = UTF16LE; } | |
| 1248 else if (BOMC1 == 0xfe && BOMC2 == 0xff) { | |
| 1249 fEncoding = UTF16BE; } | |
| 1250 else if (BOMC1 == 0xEF && BOMC2 == 0xBB && (BOMC3 = fgetc(fFile)) == 0xBF )
{ | |
| 1251 fEncoding = UTF8; } | |
| 1252 else | |
| 1253 { | |
| 1254 fprintf(stderr, "collperf: file \"%s\" encoding must be UTF-8 or UTF-16
, and " | |
| 1255 "must include a BOM.\n", fileName); | |
| 1256 fError = true; | |
| 1257 return; | |
| 1258 } | |
| 1259 } | |
| 1260 | |
| 1261 | |
| 1262 UCharFile::~UCharFile() { | |
| 1263 fclose(fFile); | |
| 1264 } | |
| 1265 | |
| 1266 | |
| 1267 | |
| 1268 UChar UCharFile::get() { | |
| 1269 UChar c; | |
| 1270 switch (fEncoding) { | |
| 1271 case UTF16LE: | |
| 1272 { | |
| 1273 int cL, cH; | |
| 1274 cL = fgetc(fFile); | |
| 1275 cH = fgetc(fFile); | |
| 1276 c = cL | (cH << 8); | |
| 1277 if (cH == EOF) { | |
| 1278 c = 0; | |
| 1279 fEof = TRUE; | |
| 1280 } | |
| 1281 break; | |
| 1282 } | |
| 1283 case UTF16BE: | |
| 1284 { | |
| 1285 int cL, cH; | |
| 1286 cH = fgetc(fFile); | |
| 1287 cL = fgetc(fFile); | |
| 1288 c = cL | (cH << 8); | |
| 1289 if (cL == EOF) { | |
| 1290 c = 0; | |
| 1291 fEof = TRUE; | |
| 1292 } | |
| 1293 break; | |
| 1294 } | |
| 1295 case UTF8: | |
| 1296 { | |
| 1297 if (fPending2ndSurrogate != 0) { | |
| 1298 c = fPending2ndSurrogate; | |
| 1299 fPending2ndSurrogate = 0; | |
| 1300 break; | |
| 1301 } | |
| 1302 | |
| 1303 int ch = fgetc(fFile); // Note: c and ch are separate cause eof t
est doesn't work on UChar type. | |
| 1304 if (ch == EOF) { | |
| 1305 c = 0; | |
| 1306 fEof = TRUE; | |
| 1307 break; | |
| 1308 } | |
| 1309 | |
| 1310 if (ch <= 0x7f) { | |
| 1311 // It's ascii. No further utf-8 conversion. | |
| 1312 c = ch; | |
| 1313 break; | |
| 1314 } | |
| 1315 | |
| 1316 // Figure out the lenght of the char and read the rest of the bytes | |
| 1317 // into a temp array. | |
| 1318 int nBytes; | |
| 1319 if (ch >= 0xF0) {nBytes=4;} | |
| 1320 else if (ch >= 0xE0) {nBytes=3;} | |
| 1321 else if (ch >= 0xC0) {nBytes=2;} | |
| 1322 else { | |
| 1323 fprintf(stderr, "utf-8 encoded file contains corrupt data.\n"); | |
| 1324 fError = TRUE; | |
| 1325 return 0; | |
| 1326 } | |
| 1327 | |
| 1328 unsigned char bytes[10]; | |
| 1329 bytes[0] = (unsigned char)ch; | |
| 1330 int i; | |
| 1331 for (i=1; i<nBytes; i++) { | |
| 1332 bytes[i] = fgetc(fFile); | |
| 1333 if (bytes[i] < 0x80 || bytes[i] >= 0xc0) { | |
| 1334 fprintf(stderr, "utf-8 encoded file contains corrupt data.\n
"); | |
| 1335 fError = TRUE; | |
| 1336 return 0; | |
| 1337 } | |
| 1338 } | |
| 1339 | |
| 1340 // Convert the bytes from the temp array to a Unicode char. | |
| 1341 i = 0; | |
| 1342 uint32_t cp; | |
| 1343 U8_NEXT_UNSAFE(bytes, i, cp); | |
| 1344 c = (UChar)cp; | |
| 1345 | |
| 1346 if (cp >= 0x10000) { | |
| 1347 // The code point needs to be broken up into a utf-16 surrogate
pair. | |
| 1348 // Process first half this time through the main loop, and | |
| 1349 // remember the other half for the next time through. | |
| 1350 UChar utf16Buf[3]; | |
| 1351 i = 0; | |
| 1352 UTF16_APPEND_CHAR_UNSAFE(utf16Buf, i, cp); | |
| 1353 fPending2ndSurrogate = utf16Buf[1]; | |
| 1354 c = utf16Buf[0]; | |
| 1355 } | |
| 1356 break; | |
| 1357 }; | |
| 1358 default: | |
| 1359 c = 0xFFFD; /* Error, unspecified codepage*/ | |
| 1360 fprintf(stderr, "UCharFile: Error: unknown fEncoding\n"); | |
| 1361 exit(1); | |
| 1362 } | |
| 1363 return c; | |
| 1364 } | |
| 1365 | |
| 1366 //------------------------------------------------------------------------------
---------- | |
| 1367 // | |
| 1368 // openRulesCollator - Command line specified a rules file. Read it in | |
| 1369 // and open a collator with it. | |
| 1370 // | |
| 1371 //------------------------------------------------------------------------------
---------- | |
| 1372 UCollator *openRulesCollator() { | |
| 1373 UCharFile f(opt_rules); | |
| 1374 if (f.error()) { | |
| 1375 return 0; | |
| 1376 } | |
| 1377 | |
| 1378 int bufLen = 10000; | |
| 1379 UChar *buf = (UChar *)malloc(bufLen * sizeof(UChar)); | |
| 1380 UChar *tmp; | |
| 1381 int i = 0; | |
| 1382 | |
| 1383 for(;;) { | |
| 1384 buf[i] = f.get(); | |
| 1385 if (f.eof()) { | |
| 1386 break; | |
| 1387 } | |
| 1388 if (f.error()) { | |
| 1389 return 0; | |
| 1390 } | |
| 1391 i++; | |
| 1392 if (i >= bufLen) { | |
| 1393 tmp = buf; | |
| 1394 bufLen += 10000; | |
| 1395 buf = (UChar *)realloc(buf, bufLen); | |
| 1396 if (buf == NULL) { | |
| 1397 free(tmp); | |
| 1398 return 0; | |
| 1399 } | |
| 1400 } | |
| 1401 } | |
| 1402 buf[i] = 0; | |
| 1403 | |
| 1404 UErrorCode status = U_ZERO_ERROR; | |
| 1405 UCollator *coll = ucol_openRules(buf, u_strlen(buf), UCOL_OFF, | |
| 1406 UCOL_DEFAULT_STRENGTH, NULL, &status); | |
| 1407 if (U_FAILURE(status)) { | |
| 1408 fprintf(stderr, "ICU ucol_openRules() open failed.: %d\n", status); | |
| 1409 return 0; | |
| 1410 } | |
| 1411 free(buf); | |
| 1412 return coll; | |
| 1413 } | |
| 1414 | |
| 1415 | |
| 1416 | |
| 1417 | |
| 1418 | |
| 1419 //------------------------------------------------------------------------------
---------- | |
| 1420 // | |
| 1421 // Main -- process command line, read in and pre-process the test file, | |
| 1422 // call other functions to do the actual tests. | |
| 1423 // | |
| 1424 //------------------------------------------------------------------------------
---------- | |
| 1425 int main(int argc, const char** argv) { | |
| 1426 if (ProcessOptions(argc, argv, opts) != TRUE || opt_help || opt_fName == 0)
{ | |
| 1427 printf(gUsageString); | |
| 1428 exit (1); | |
| 1429 } | |
| 1430 | |
| 1431 // Make sure that we've only got one API selected. | |
| 1432 if (opt_unix || opt_win) opt_icu = FALSE; | |
| 1433 if (opt_unix) opt_win = FALSE; | |
| 1434 | |
| 1435 // | |
| 1436 // Set up an ICU collator | |
| 1437 // | |
| 1438 UErrorCode status = U_ZERO_ERROR; | |
| 1439 | |
| 1440 if (opt_rules != 0) { | |
| 1441 gCol = openRulesCollator(); | |
| 1442 if (gCol == 0) {return -1;} | |
| 1443 } | |
| 1444 else { | |
| 1445 gCol = ucol_open(opt_locale, &status); | |
| 1446 if (U_FAILURE(status)) { | |
| 1447 fprintf(stderr, "Collator creation failed.: %d\n", status); | |
| 1448 return -1; | |
| 1449 } | |
| 1450 } | |
| 1451 if (status==U_USING_DEFAULT_WARNING && opt_terse==FALSE) { | |
| 1452 fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", opt_locale)
; | |
| 1453 } | |
| 1454 if (status==U_USING_FALLBACK_WARNING && opt_terse==FALSE) { | |
| 1455 fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", opt_locale); | |
| 1456 } | |
| 1457 | |
| 1458 if (opt_norm) { | |
| 1459 ucol_setAttribute(gCol, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
| 1460 } | |
| 1461 if (opt_french && opt_frenchoff) { | |
| 1462 fprintf(stderr, "collperf: Error, specified both -french and -frenchoff
options."); | |
| 1463 exit(-1); | |
| 1464 } | |
| 1465 if (opt_french) { | |
| 1466 ucol_setAttribute(gCol, UCOL_FRENCH_COLLATION, UCOL_ON, &status); | |
| 1467 } | |
| 1468 if (opt_frenchoff) { | |
| 1469 ucol_setAttribute(gCol, UCOL_FRENCH_COLLATION, UCOL_OFF, &status); | |
| 1470 } | |
| 1471 if (opt_lower) { | |
| 1472 ucol_setAttribute(gCol, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, &status); | |
| 1473 } | |
| 1474 if (opt_upper) { | |
| 1475 ucol_setAttribute(gCol, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &status); | |
| 1476 } | |
| 1477 if (opt_case) { | |
| 1478 ucol_setAttribute(gCol, UCOL_CASE_LEVEL, UCOL_ON, &status); | |
| 1479 } | |
| 1480 if (opt_shifted) { | |
| 1481 ucol_setAttribute(gCol, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); | |
| 1482 } | |
| 1483 if (opt_level != 0) { | |
| 1484 switch (opt_level) { | |
| 1485 case 1: | |
| 1486 ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_PRIMARY, &status); | |
| 1487 break; | |
| 1488 case 2: | |
| 1489 ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_SECONDARY, &status); | |
| 1490 break; | |
| 1491 case 3: | |
| 1492 ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_TERTIARY, &status); | |
| 1493 break; | |
| 1494 case 4: | |
| 1495 ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_QUATERNARY, &status); | |
| 1496 break; | |
| 1497 case 5: | |
| 1498 ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_IDENTICAL, &status); | |
| 1499 break; | |
| 1500 default: | |
| 1501 fprintf(stderr, "-level param must be between 1 and 5\n"); | |
| 1502 exit(-1); | |
| 1503 } | |
| 1504 } | |
| 1505 | |
| 1506 if (U_FAILURE(status)) { | |
| 1507 fprintf(stderr, "Collator attribute setting failed.: %d\n", status); | |
| 1508 return -1; | |
| 1509 } | |
| 1510 | |
| 1511 | |
| 1512 // | |
| 1513 // Set up a Windows LCID | |
| 1514 // | |
| 1515 if (opt_langid != 0) { | |
| 1516 gWinLCID = MAKELCID(opt_langid, SORT_DEFAULT); | |
| 1517 } | |
| 1518 else { | |
| 1519 gWinLCID = uloc_getLCID(opt_locale); | |
| 1520 } | |
| 1521 | |
| 1522 | |
| 1523 // | |
| 1524 // Set the UNIX locale | |
| 1525 // | |
| 1526 if (opt_unix) { | |
| 1527 if (setlocale(LC_ALL, opt_locale) == 0) { | |
| 1528 fprintf(stderr, "setlocale(LC_ALL, %s) failed.\n", opt_locale); | |
| 1529 exit(-1); | |
| 1530 } | |
| 1531 } | |
| 1532 | |
| 1533 // Read in the input file. | |
| 1534 // File assumed to be utf-16. | |
| 1535 // Lines go onto heap buffers. Global index array to line starts is creat
ed. | |
| 1536 // Lines themselves are null terminated. | |
| 1537 // | |
| 1538 | |
| 1539 UCharFile f(opt_fName); | |
| 1540 if (f.error()) { | |
| 1541 exit(-1); | |
| 1542 } | |
| 1543 | |
| 1544 const int MAXLINES = 100000; | |
| 1545 gFileLines = new Line[MAXLINES]; | |
| 1546 UChar buf[1024]; | |
| 1547 int column = 0; | |
| 1548 | |
| 1549 // Read the file, split into lines, and save in memory. | |
| 1550 // Loop runs once per utf-16 value from the input file, | |
| 1551 // (The number of bytes read from file per loop iteration depends on exte
rnal encoding.) | |
| 1552 for (;;) { | |
| 1553 | |
| 1554 UChar c = f.get(); | |
| 1555 if (f.error()){ | |
| 1556 exit(-1); | |
| 1557 } | |
| 1558 | |
| 1559 | |
| 1560 // We now have a good UTF-16 value in c. | |
| 1561 | |
| 1562 // Watch for CR, LF, EOF; these finish off a line. | |
| 1563 if (c == 0xd) { | |
| 1564 continue; | |
| 1565 } | |
| 1566 | |
| 1567 if (f.eof() || c == 0x0a || c==0x2028) { // Unipad inserts 2028 line se
parators! | |
| 1568 buf[column++] = 0; | |
| 1569 if (column > 1) { | |
| 1570 gFileLines[gNumFileLines].name = new UChar[column]; | |
| 1571 gFileLines[gNumFileLines].len = column-1; | |
| 1572 memcpy(gFileLines[gNumFileLines].name, buf, column * sizeof(UCha
r)); | |
| 1573 gNumFileLines++; | |
| 1574 column = 0; | |
| 1575 if (gNumFileLines >= MAXLINES) { | |
| 1576 fprintf(stderr, "File too big. Max number of lines is %d\n"
, MAXLINES); | |
| 1577 exit(-1); | |
| 1578 } | |
| 1579 | |
| 1580 } | |
| 1581 if (c == 0xa || c == 0x2028) | |
| 1582 continue; | |
| 1583 else | |
| 1584 break; // EOF | |
| 1585 } | |
| 1586 buf[column++] = c; | |
| 1587 if (column >= 1023) | |
| 1588 { | |
| 1589 static UBool warnFlag = TRUE; | |
| 1590 if (warnFlag) { | |
| 1591 fprintf(stderr, "Warning - file line longer than 1023 chars trun
cated.\n"); | |
| 1592 warnFlag = FALSE; | |
| 1593 } | |
| 1594 column--; | |
| 1595 } | |
| 1596 } | |
| 1597 | |
| 1598 if (opt_terse == FALSE) { | |
| 1599 printf("file \"%s\", %d lines.\n", opt_fName, gNumFileLines); | |
| 1600 } | |
| 1601 | |
| 1602 | |
| 1603 // Convert the lines to the UNIX encoding. | |
| 1604 if (opt_unix) { | |
| 1605 UnixConvert(); | |
| 1606 } | |
| 1607 | |
| 1608 // | |
| 1609 // Pre-compute ICU sort keys for the lines of the file. | |
| 1610 // | |
| 1611 int line; | |
| 1612 int32_t t; | |
| 1613 | |
| 1614 for (line=0; line<gNumFileLines; line++) { | |
| 1615 t = ucol_getSortKey(gCol, gFileLines[line].name, -1, (unsigned char *)b
uf, sizeof(buf)); | |
| 1616 gFileLines[line].icuSortKey = new char[t]; | |
| 1617 | |
| 1618 if (t > (int32_t)sizeof(buf)) { | |
| 1619 t = ucol_getSortKey(gCol, gFileLines[line].name, -1, (unsigned char
*)gFileLines[line].icuSortKey , t); | |
| 1620 } | |
| 1621 else | |
| 1622 { | |
| 1623 memcpy(gFileLines[line].icuSortKey, buf, t); | |
| 1624 } | |
| 1625 } | |
| 1626 | |
| 1627 | |
| 1628 | |
| 1629 // | |
| 1630 // Pre-compute Windows sort keys for the lines of the file. | |
| 1631 // | |
| 1632 for (line=0; line<gNumFileLines; line++) { | |
| 1633 t=LCMapStringW(gWinLCID, LCMAP_SORTKEY, gFileLines[line].name, -1, buf,
sizeof(buf)); | |
| 1634 gFileLines[line].winSortKey = new char[t]; | |
| 1635 if (t > (int32_t)sizeof(buf)) { | |
| 1636 t = LCMapStringW(gWinLCID, LCMAP_SORTKEY, gFileLines[line].name, -1
, (unsigned short *)(gFileLines[line].winSortKey), t); | |
| 1637 } | |
| 1638 else | |
| 1639 { | |
| 1640 memcpy(gFileLines[line].winSortKey, buf, t); | |
| 1641 } | |
| 1642 } | |
| 1643 | |
| 1644 // | |
| 1645 // Pre-compute UNIX sort keys for the lines of the file. | |
| 1646 // | |
| 1647 if (opt_unix) { | |
| 1648 for (line=0; line<gNumFileLines; line++) { | |
| 1649 t=strxfrm((char *)buf, gFileLines[line].unixName, sizeof(buf)); | |
| 1650 gFileLines[line].unixSortKey = new char[t]; | |
| 1651 if (t > (int32_t)sizeof(buf)) { | |
| 1652 t = strxfrm(gFileLines[line].unixSortKey, gFileLines[line].unix
Name, sizeof(buf)); | |
| 1653 } | |
| 1654 else | |
| 1655 { | |
| 1656 memcpy(gFileLines[line].unixSortKey, buf, t); | |
| 1657 } | |
| 1658 } | |
| 1659 } | |
| 1660 | |
| 1661 | |
| 1662 // | |
| 1663 // Dump file lines, CEs, Sort Keys if requested. | |
| 1664 // | |
| 1665 if (opt_dump) { | |
| 1666 int i; | |
| 1667 for (line=0; line<gNumFileLines; line++) { | |
| 1668 for (i=0;;i++) { | |
| 1669 UChar c = gFileLines[line].name[i]; | |
| 1670 if (c == 0) | |
| 1671 break; | |
| 1672 if (c < 0x20 || c > 0x7e) { | |
| 1673 printf("\\u%.4x", c); | |
| 1674 } | |
| 1675 else { | |
| 1676 printf("%c", c); | |
| 1677 } | |
| 1678 } | |
| 1679 printf("\n"); | |
| 1680 | |
| 1681 printf(" CEs: "); | |
| 1682 UCollationElements *CEiter = ucol_openElements(gCol, gFileLines[line
].name, -1, &status); | |
| 1683 int32_t ce; | |
| 1684 i = 0; | |
| 1685 for (;;) { | |
| 1686 ce = ucol_next(CEiter, &status); | |
| 1687 if (ce == UCOL_NULLORDER) { | |
| 1688 break; | |
| 1689 } | |
| 1690 printf(" %.8x", ce); | |
| 1691 if (++i > 8) { | |
| 1692 printf("\n "); | |
| 1693 i = 0; | |
| 1694 } | |
| 1695 } | |
| 1696 printf("\n"); | |
| 1697 ucol_closeElements(CEiter); | |
| 1698 | |
| 1699 | |
| 1700 printf(" ICU Sort Key: "); | |
| 1701 for (i=0; ; i++) { | |
| 1702 unsigned char c = gFileLines[line].icuSortKey[i]; | |
| 1703 printf("%02x ", c); | |
| 1704 if (c == 0) { | |
| 1705 break; | |
| 1706 } | |
| 1707 if (i > 0 && i % 20 == 0) { | |
| 1708 printf("\n "); | |
| 1709 } | |
| 1710 } | |
| 1711 printf("\n"); | |
| 1712 } | |
| 1713 } | |
| 1714 | |
| 1715 | |
| 1716 // | |
| 1717 // Pre-sort the lines. | |
| 1718 // | |
| 1719 int i; | |
| 1720 gSortedLines = new Line *[gNumFileLines]; | |
| 1721 for (i=0; i<gNumFileLines; i++) { | |
| 1722 gSortedLines[i] = &gFileLines[i]; | |
| 1723 } | |
| 1724 | |
| 1725 if (opt_win) { | |
| 1726 qsort(gSortedLines, gNumFileLines, sizeof(Line *), Winstrcmp); | |
| 1727 } | |
| 1728 else if (opt_unix) { | |
| 1729 qsort(gSortedLines, gNumFileLines, sizeof(Line *), UNIXstrcmp); | |
| 1730 } | |
| 1731 else /* ICU */ | |
| 1732 { | |
| 1733 qsort(gSortedLines, gNumFileLines, sizeof(Line *), ICUstrcmp); | |
| 1734 } | |
| 1735 | |
| 1736 | |
| 1737 // | |
| 1738 // Make up a randomized order, will be used for sorting tests. | |
| 1739 // | |
| 1740 gRandomLines = new Line *[gNumFileLines]; | |
| 1741 for (i=0; i<gNumFileLines; i++) { | |
| 1742 gRandomLines[i] = &gFileLines[i]; | |
| 1743 } | |
| 1744 qsort(gRandomLines, gNumFileLines, sizeof(Line *), ICURandomCmp); | |
| 1745 | |
| 1746 | |
| 1747 | |
| 1748 | |
| 1749 // | |
| 1750 // We've got the file read into memory. Go do something with it. | |
| 1751 // | |
| 1752 | |
| 1753 if (opt_qsort) doQSort(); | |
| 1754 if (opt_binsearch) doBinarySearch(); | |
| 1755 if (opt_keygen) doKeyGen(); | |
| 1756 if (opt_keyhist) doKeyHist(); | |
| 1757 if (opt_itertest) doIterTest(); | |
| 1758 | |
| 1759 return 0; | |
| 1760 | |
| 1761 } | |
| OLD | NEW |