OLD | NEW |
(Empty) | |
| 1 /******************************************************************** |
| 2 * COPYRIGHT: |
| 3 * Copyright (C) 2001-2006 IBM, Inc. All Rights Reserved. |
| 4 * |
| 5 ********************************************************************/ |
| 6 |
| 7 #include <stdio.h> |
| 8 #include <stdlib.h> |
| 9 #include <locale.h> |
| 10 #include <limits.h> |
| 11 #include <string.h> |
| 12 #include "unicode/uperf.h" |
| 13 #include "uoptions.h" |
| 14 #include "unicode/coll.h" |
| 15 #include <unicode/ucoleitr.h> |
| 16 |
| 17 |
| 18 |
| 19 /* To store an array of string<UNIT> in continue space. |
| 20 Since string<UNIT> itself is treated as an array of UNIT, this |
| 21 class will ease our memory management for an array of string<UNIT>. |
| 22 */ |
| 23 |
| 24 //template<typename UNIT> |
| 25 #define COMPATCT_ARRAY(CompactArrays, UNIT) \ |
| 26 struct CompactArrays{\ |
| 27 CompactArrays(const CompactArrays & );\ |
| 28 CompactArrays & operator=(const CompactArrays & );\ |
| 29 int32_t count;/*total number of the strings*/ \ |
| 30 int32_t * index;/*relative offset in data*/ \ |
| 31 UNIT * data; /*the real space to hold strings*/ \ |
| 32 \ |
| 33 ~CompactArrays(){free(index);free(data);} \ |
| 34 CompactArrays():data(NULL), index(NULL), count(0){ \ |
| 35 index = (int32_t *) realloc(index, sizeof(int32_t)); \ |
| 36 index[0] = 0; \ |
| 37 } \ |
| 38 void append_one(int32_t theLen){ /*include terminal NULL*/ \ |
| 39 count++; \ |
| 40 index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \ |
| 41 index[count] = index[count - 1] + theLen; \ |
| 42 data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \ |
| 43 } \ |
| 44 UNIT * last(){return data + index[count - 1];} \ |
| 45 UNIT * dataOf(int32_t i){return data + index[i];} \ |
| 46 int32_t lengthOf(int i){return index[i+1] - index[i] - 1; } /*exclude termin
ating NULL*/ \ |
| 47 }; |
| 48 |
| 49 //typedef CompactArrays<UChar> CA_uchar; |
| 50 //typedef CompactArrays<char> CA_char; |
| 51 //typedef CompactArrays<uint8_t> CA_uint8; |
| 52 //typedef CompactArrays<WCHAR> CA_win_wchar; |
| 53 |
| 54 COMPATCT_ARRAY(CA_uchar, UChar) |
| 55 COMPATCT_ARRAY(CA_char, char) |
| 56 COMPATCT_ARRAY(CA_uint8, uint8_t) |
| 57 COMPATCT_ARRAY(CA_win_wchar, WCHAR) |
| 58 |
| 59 |
| 60 struct DataIndex { |
| 61 static DWORD win_langid; // for qsort callback function |
| 62 static UCollator * col; // for qsort callback function |
| 63 uint8_t * icu_key; |
| 64 UChar * icu_data; |
| 65 int32_t icu_data_len; |
| 66 char* posix_key; |
| 67 char* posix_data; |
| 68 int32_t posix_data_len; |
| 69 char* win_key; |
| 70 WCHAR * win_data; |
| 71 int32_t win_data_len; |
| 72 }; |
| 73 DWORD DataIndex::win_langid; |
| 74 UCollator * DataIndex::col; |
| 75 |
| 76 |
| 77 |
| 78 class CmdKeyGen : public UPerfFunction { |
| 79 typedef void (CmdKeyGen::* Func)(int32_t); |
| 80 enum{MAX_KEY_LENGTH = 5000}; |
| 81 UCollator * col; |
| 82 DWORD win_langid; |
| 83 int32_t count; |
| 84 DataIndex * data; |
| 85 Func fn; |
| 86 |
| 87 union { // to save sapce |
| 88 uint8_t icu_key[MAX_KEY_LENGTH]; |
| 89 char posix_key[MAX_KEY_LENGTH]; |
| 90 WCHAR win_key[MAX_KEY_LENGTH]; |
| 91 }; |
| 92 public: |
| 93 CmdKeyGen(UErrorCode, UCollator * col,DWORD win_langid, int32_t count, DataI
ndex * data,Func fn,int32_t) |
| 94 :col(col),win_langid(win_langid), count(count), data(data), fn(fn){} |
| 95 |
| 96 virtual long getOperationsPerIteration(){return count;} |
| 97 |
| 98 virtual void call(UErrorCode* status){ |
| 99 for(int32_t i = 0; i< count; i++){ |
| 100 (this->*fn)(i); |
| 101 } |
| 102 } |
| 103 |
| 104 void icu_key_null(int32_t i){ |
| 105 ucol_getSortKey(col, data[i].icu_data, -1, icu_key, MAX_KEY_LENGTH); |
| 106 } |
| 107 |
| 108 void icu_key_len(int32_t i){ |
| 109 ucol_getSortKey(col, data[i].icu_data, data[i].icu_data_len, icu_key
, MAX_KEY_LENGTH); |
| 110 } |
| 111 |
| 112 // pre-generated in CollPerfTest::prepareData(), need not to check error
here |
| 113 void win_key_null(int32_t i){ |
| 114 //LCMAP_SORTsk 0x00000400 // WC sort sk (normalize) |
| 115 LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, -1, win_ke
y, MAX_KEY_LENGTH); |
| 116 } |
| 117 |
| 118 void win_key_len(int32_t i){ |
| 119 LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, data[i].wi
n_data_len, win_key, MAX_KEY_LENGTH); |
| 120 } |
| 121 |
| 122 void posix_key_null(int32_t i){ |
| 123 strxfrm(posix_key, data[i].posix_data, MAX_KEY_LENGTH); |
| 124 } |
| 125 }; |
| 126 |
| 127 |
| 128 class CmdIter : public UPerfFunction { |
| 129 typedef void (CmdIter::* Func)(UErrorCode* , int32_t ); |
| 130 int32_t count; |
| 131 CA_uchar * data; |
| 132 Func fn; |
| 133 UCollationElements *iter; |
| 134 int32_t exec_count; |
| 135 public: |
| 136 CmdIter(UErrorCode & status, UCollator * col, int32_t count, CA_uchar *data,
Func fn, int32_t,int32_t) |
| 137 :count(count), data(data), fn(fn){ |
| 138 exec_count = 0; |
| 139 UChar dummytext[] = {0, 0}; |
| 140 iter = ucol_openElements(col, NULL, 0, &status); |
| 141 ucol_setText(iter, dummytext, 1, &status); |
| 142 } |
| 143 ~CmdIter(){ |
| 144 ucol_closeElements(iter); |
| 145 } |
| 146 |
| 147 virtual long getOperationsPerIteration(){return exec_count ? exec_count
: 1;} |
| 148 |
| 149 virtual void call(UErrorCode* status){ |
| 150 exec_count = 0; |
| 151 for(int32_t i = 0; i< count; i++){ |
| 152 (this->*fn)(status, i); |
| 153 } |
| 154 } |
| 155 |
| 156 void icu_forward_null(UErrorCode* status, int32_t i){ |
| 157 ucol_setText(iter, data->dataOf(i), -1, status); |
| 158 while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++; |
| 159 } |
| 160 |
| 161 void icu_forward_len(UErrorCode* status, int32_t i){ |
| 162 ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status); |
| 163 while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++; |
| 164 } |
| 165 |
| 166 void icu_backward_null(UErrorCode* status, int32_t i){ |
| 167 ucol_setText(iter, data->dataOf(i), -1, status); |
| 168 while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++; |
| 169 } |
| 170 |
| 171 void icu_backward_len(UErrorCode* status, int32_t i){ |
| 172 ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status); |
| 173 while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++; |
| 174 } |
| 175 }; |
| 176 |
| 177 class CmdIterAll : public UPerfFunction { |
| 178 typedef void (CmdIterAll::* Func)(UErrorCode* status); |
| 179 int32_t count; |
| 180 UChar * data; |
| 181 Func fn; |
| 182 UCollationElements *iter; |
| 183 int32_t exec_count; |
| 184 |
| 185 public: |
| 186 enum CALL {forward_null, forward_len, backward_null, backward_len}; |
| 187 |
| 188 ~CmdIterAll(){ |
| 189 ucol_closeElements(iter); |
| 190 } |
| 191 CmdIterAll(UErrorCode & status, UCollator * col, int32_t count, UChar * dat
a, CALL call,int32_t,int32_t) |
| 192 :count(count),data(data) |
| 193 { |
| 194 exec_count = 0; |
| 195 if (call == forward_null || call == backward_null) { |
| 196 iter = ucol_openElements(col, data, -1, &status); |
| 197 } else { |
| 198 iter = ucol_openElements(col, data, count, &status); |
| 199 } |
| 200 |
| 201 if (call == forward_null || call == forward_len){ |
| 202 fn = &CmdIterAll::icu_forward_all; |
| 203 } else { |
| 204 fn = &CmdIterAll::icu_backward_all; |
| 205 } |
| 206 } |
| 207 virtual long getOperationsPerIteration(){return exec_count ? exec_count : 1;
} |
| 208 |
| 209 virtual void call(UErrorCode* status){ |
| 210 (this->*fn)(status); |
| 211 } |
| 212 |
| 213 void icu_forward_all(UErrorCode* status){ |
| 214 int strlen = count - 5; |
| 215 int count5 = 5; |
| 216 int strindex = 0; |
| 217 ucol_setOffset(iter, strindex, status); |
| 218 while (TRUE) { |
| 219 if (ucol_next(iter, status) == UCOL_NULLORDER) { |
| 220 break; |
| 221 } |
| 222 exec_count++; |
| 223 count5 --; |
| 224 if (count5 == 0) { |
| 225 strindex += 10; |
| 226 if (strindex > strlen) { |
| 227 break; |
| 228 } |
| 229 ucol_setOffset(iter, strindex, status); |
| 230 count5 = 5; |
| 231 } |
| 232 } |
| 233 } |
| 234 |
| 235 void icu_backward_all(UErrorCode* status){ |
| 236 int strlen = count; |
| 237 int count5 = 5; |
| 238 int strindex = 5; |
| 239 ucol_setOffset(iter, strindex, status); |
| 240 while (TRUE) { |
| 241 if (ucol_previous(iter, status) == UCOL_NULLORDER) { |
| 242 break; |
| 243 } |
| 244 exec_count++; |
| 245 count5 --; |
| 246 if (count5 == 0) { |
| 247 strindex += 10; |
| 248 if (strindex > strlen) { |
| 249 break; |
| 250 } |
| 251 ucol_setOffset(iter, strindex, status); |
| 252 count5 = 5; |
| 253 } |
| 254 } |
| 255 } |
| 256 |
| 257 }; |
| 258 |
| 259 struct CmdQsort : public UPerfFunction{ |
| 260 |
| 261 static int q_random(const void * a, const void * b){ |
| 262 uint8_t * key_a = ((DataIndex *)a)->icu_key; |
| 263 uint8_t * key_b = ((DataIndex *)b)->icu_key; |
| 264 |
| 265 int val_a = 0; |
| 266 int val_b = 0; |
| 267 while (*key_a != 0) {val_a += val_a*37 + *key_a++;} |
| 268 while (*key_b != 0) {val_b += val_b*37 + *key_b++;} |
| 269 return val_a - val_b; |
| 270 } |
| 271 |
| 272 #define QCAST() \ |
| 273 DataIndex * da = (DataIndex *) a; \ |
| 274 DataIndex * db = (DataIndex *) b; \ |
| 275 ++exec_count |
| 276 |
| 277 static int icu_strcoll_null(const void *a, const void *b){ |
| 278 QCAST(); |
| 279 return ucol_strcoll(da->col, da->icu_data, -1, db->icu_data, -1) - UCOL_
EQUAL; |
| 280 } |
| 281 |
| 282 static int icu_strcoll_len(const void *a, const void *b){ |
| 283 QCAST(); |
| 284 return ucol_strcoll(da->col, da->icu_data, da->icu_data_len, db->icu_dat
a, db->icu_data_len) - UCOL_EQUAL; |
| 285 } |
| 286 |
| 287 static int icu_cmpkey (const void *a, const void *b){ |
| 288 QCAST(); |
| 289 return strcmp((char *) da->icu_key, (char *) db->icu_key); |
| 290 } |
| 291 |
| 292 static int win_cmp_null(const void *a, const void *b) { |
| 293 QCAST(); |
| 294 //CSTR_LESS_THAN 1 |
| 295 //CSTR_EQUAL 2 |
| 296 //CSTR_GREATER_THAN 3 |
| 297 int t = CompareStringW(da->win_langid, 0, da->win_data, -1, db->win_data
, -1); |
| 298 if (t == 0){ |
| 299 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastEr
ror()); |
| 300 exit(-1); |
| 301 } else{ |
| 302 return t - CSTR_EQUAL; |
| 303 } |
| 304 } |
| 305 |
| 306 static int win_cmp_len(const void *a, const void *b) { |
| 307 QCAST(); |
| 308 int t = CompareStringW(da->win_langid, 0, da->win_data, da->win_data_len
, db->win_data, db->win_data_len); |
| 309 if (t == 0){ |
| 310 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastEr
ror()); |
| 311 exit(-1); |
| 312 } else{ |
| 313 return t - CSTR_EQUAL; |
| 314 } |
| 315 } |
| 316 |
| 317 #define QFUNC(name, func, data) \ |
| 318 static int name (const void *a, const void *b){ \ |
| 319 QCAST(); \ |
| 320 return func(da->data, db->data); \ |
| 321 } |
| 322 |
| 323 QFUNC(posix_strcoll_null, strcoll, posix_data) |
| 324 QFUNC(posix_cmpkey, strcmp, posix_key) |
| 325 QFUNC(win_cmpkey, strcmp, win_key) |
| 326 QFUNC(win_wcscmp, wcscmp, win_data) |
| 327 QFUNC(icu_strcmp, u_strcmp, icu_data) |
| 328 QFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data) |
| 329 |
| 330 private: |
| 331 static int32_t exec_count; // potential muilt-thread problem |
| 332 |
| 333 typedef int (* Func)(const void *, const void *); |
| 334 |
| 335 Func fn; |
| 336 void * base; //Start of target array. |
| 337 int32_t num; //Array size in elements. |
| 338 int32_t width; //Element size in bytes. |
| 339 |
| 340 void * backup; //copy source of base |
| 341 public: |
| 342 CmdQsort(UErrorCode & status,void *theBase, int32_t num, int32_t width, Func
fn, int32_t,int32_t) |
| 343 :backup(theBase),num(num),width(width),fn(fn){ |
| 344 base = malloc(num * width); |
| 345 time_empty(100, &status); // warm memory/cache |
| 346 } |
| 347 |
| 348 ~CmdQsort(){ |
| 349 free(base); |
| 350 } |
| 351 |
| 352 void empty_call(){ |
| 353 exec_count = 0; |
| 354 memcpy(base, backup, num * width); |
| 355 } |
| 356 |
| 357 double time_empty(int32_t n, UErrorCode* status) { |
| 358 UTimer start, stop; |
| 359 utimer_getTime(&start); |
| 360 while (n-- > 0) { |
| 361 empty_call(); |
| 362 } |
| 363 utimer_getTime(&stop); |
| 364 return utimer_getDeltaSeconds(&start,&stop); // ms |
| 365 } |
| 366 |
| 367 virtual void call(UErrorCode* status){ |
| 368 exec_count = 0; |
| 369 memcpy(base, backup, num * width); |
| 370 qsort(base, num, width, fn); |
| 371 } |
| 372 virtual double time(int32_t n, UErrorCode* status) { |
| 373 double t1 = time_empty(n,status); |
| 374 double t2 = UPerfFunction::time(n, status); |
| 375 return t2-t1;// < 0 ? t2 : t2-t1; |
| 376 } |
| 377 |
| 378 virtual long getOperationsPerIteration(){ return exec_count?exec_count:1
;} |
| 379 }; |
| 380 int32_t CmdQsort::exec_count; |
| 381 |
| 382 |
| 383 class CmdBinSearch : public UPerfFunction{ |
| 384 public: |
| 385 typedef int (CmdBinSearch::* Func)(int, int); |
| 386 |
| 387 UCollator * col; |
| 388 DWORD win_langid; |
| 389 int32_t count; |
| 390 DataIndex * rnd; |
| 391 DataIndex * ord; |
| 392 Func fn; |
| 393 int32_t exec_count; |
| 394 |
| 395 CmdBinSearch(UErrorCode, UCollator * col,DWORD win_langid,int32_t count,Data
Index * rnd,DataIndex * ord,Func fn) |
| 396 :col(col),win_langid(win_langid), count(count), rnd(rnd), ord(ord), fn(f
n),exec_count(0){} |
| 397 |
| 398 |
| 399 virtual void call(UErrorCode* status){ |
| 400 exec_count = 0; |
| 401 for(int32_t i = 0; i< count; i++){ // search all data |
| 402 binary_search(i); |
| 403 } |
| 404 } |
| 405 virtual long getOperationsPerIteration(){ return exec_count?exec_count:1
;} |
| 406 |
| 407 void binary_search(int32_t random) { |
| 408 int low = 0; |
| 409 int high = count - 1; |
| 410 int guess; |
| 411 int last_guess = -1; |
| 412 int r; |
| 413 while (TRUE) { |
| 414 guess = (high + low)/2; |
| 415 if (last_guess == guess) break; // nothing to search |
| 416 |
| 417 r = (this->*fn)(random, guess); |
| 418 exec_count++; |
| 419 |
| 420 if (r == 0) |
| 421 return; // found, search end. |
| 422 if (r < 0) { |
| 423 high = guess; |
| 424 } else { |
| 425 low = guess; |
| 426 } |
| 427 last_guess = guess; |
| 428 } |
| 429 } |
| 430 |
| 431 int icu_strcoll_null(int32_t i, int32_t j){ |
| 432 return ucol_strcoll(col, rnd[i].icu_data, -1, ord[j].icu_data,-1); |
| 433 } |
| 434 |
| 435 int icu_strcoll_len(int32_t i, int32_t j){ |
| 436 return ucol_strcoll(col, rnd[i].icu_data, rnd[i].icu_data_len, ord[j
].icu_data, ord[j].icu_data_len); |
| 437 } |
| 438 |
| 439 int icu_cmpkey(int32_t i, int32_t j) { |
| 440 return strcmp( (char *) rnd[i].icu_key, (char *) ord[j].icu_key ); |
| 441 } |
| 442 |
| 443 int win_cmp_null(int32_t i, int32_t j) { |
| 444 int t = CompareStringW(win_langid, 0, rnd[i].win_data, -1, ord[j].wi
n_data, -1); |
| 445 if (t == 0){ |
| 446 fprintf(stderr, "CompareStringW error, error number %x\n", GetLa
stError()); |
| 447 exit(-1); |
| 448 } else{ |
| 449 return t - CSTR_EQUAL; |
| 450 } |
| 451 } |
| 452 |
| 453 int win_cmp_len(int32_t i, int32_t j) { |
| 454 int t = CompareStringW(win_langid, 0, rnd[i].win_data, rnd[i].win_da
ta_len, ord[j].win_data, ord[j].win_data_len); |
| 455 if (t == 0){ |
| 456 fprintf(stderr, "CompareStringW error, error number %x\n", GetLa
stError()); |
| 457 exit(-1); |
| 458 } else{ |
| 459 return t - CSTR_EQUAL; |
| 460 } |
| 461 } |
| 462 |
| 463 #define BFUNC(name, func, data) \ |
| 464 int name(int32_t i, int32_t j) { \ |
| 465 return func(rnd[i].data, ord[j].data); \ |
| 466 } |
| 467 |
| 468 BFUNC(posix_strcoll_null, strcoll, posix_data) |
| 469 BFUNC(posix_cmpkey, strcmp, posix_key) |
| 470 BFUNC(win_cmpkey, strcmp, win_key) |
| 471 BFUNC(win_wcscmp, wcscmp, win_data) |
| 472 BFUNC(icu_strcmp, u_strcmp, icu_data) |
| 473 BFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data) |
| 474 }; |
| 475 |
| 476 class CollPerfTest : public UPerfTest { |
| 477 public: |
| 478 UCollator * col; |
| 479 DWORD win_langid; |
| 480 |
| 481 UChar * icu_data_all; |
| 482 int32_t icu_data_all_len; |
| 483 |
| 484 int32_t count; |
| 485 CA_uchar * icu_data; |
| 486 CA_uint8 * icu_key; |
| 487 CA_char * posix_data; |
| 488 CA_char * posix_key; |
| 489 CA_win_wchar * win_data; |
| 490 CA_char * win_key; |
| 491 |
| 492 DataIndex * rnd_index; // random by icu key |
| 493 DataIndex * ord_win_data; |
| 494 DataIndex * ord_win_key; |
| 495 DataIndex * ord_posix_data; |
| 496 DataIndex * ord_posix_key; |
| 497 DataIndex * ord_icu_data; |
| 498 DataIndex * ord_icu_key; |
| 499 DataIndex * ord_win_wcscmp; |
| 500 DataIndex * ord_icu_strcmp; |
| 501 DataIndex * ord_icu_cmpcpo; |
| 502 |
| 503 virtual ~CollPerfTest(){ |
| 504 ucol_close(col); |
| 505 delete [] icu_data_all; |
| 506 delete icu_data; |
| 507 delete icu_key; |
| 508 delete posix_data; |
| 509 delete posix_key; |
| 510 delete win_data; |
| 511 delete win_key; |
| 512 delete[] rnd_index; |
| 513 delete[] ord_win_data; |
| 514 delete[] ord_win_key; |
| 515 delete[] ord_posix_data; |
| 516 delete[] ord_posix_key; |
| 517 delete[] ord_icu_data; |
| 518 delete[] ord_icu_key; |
| 519 delete[] ord_win_wcscmp; |
| 520 delete[] ord_icu_strcmp; |
| 521 delete[] ord_icu_cmpcpo; |
| 522 } |
| 523 |
| 524 CollPerfTest(int32_t argc, const char* argv[], UErrorCode& status):UPerfTest
(argc, argv, status){ |
| 525 col = NULL; |
| 526 icu_data_all = NULL; |
| 527 icu_data = NULL; |
| 528 icu_key = NULL; |
| 529 posix_data = NULL; |
| 530 posix_key = NULL; |
| 531 win_data =NULL; |
| 532 win_key = NULL; |
| 533 |
| 534 rnd_index = NULL; |
| 535 ord_win_data= NULL; |
| 536 ord_win_key= NULL; |
| 537 ord_posix_data= NULL; |
| 538 ord_posix_key= NULL; |
| 539 ord_icu_data= NULL; |
| 540 ord_icu_key= NULL; |
| 541 ord_win_wcscmp = NULL; |
| 542 ord_icu_strcmp = NULL; |
| 543 ord_icu_cmpcpo = NULL; |
| 544 |
| 545 if (U_FAILURE(status)){ |
| 546 return; |
| 547 } |
| 548 |
| 549 // Parse additional arguments |
| 550 |
| 551 UOption options[] = { |
| 552 UOPTION_DEF("langid", 'i', UOPT_REQUIRES_ARG), // Windows Lan
guage ID number. |
| 553 UOPTION_DEF("rulefile", 'r', UOPT_REQUIRES_ARG), // --rulef
ile <filename> |
| 554 // Collation related arguments. All are optional. |
| 555 // To simplify parsing, two choice arguments are disigned as NO_
ARG. |
| 556 // The default value is UPPER word in the comment |
| 557 UOPTION_DEF("c_french", 'f', UOPT_NO_ARG), // --french
<on | OFF> |
| 558 UOPTION_DEF("c_alternate", 'a', UOPT_NO_ARG), // --alterna
te <NON_IGNORE | shifted> |
| 559 UOPTION_DEF("c_casefirst", 'c', UOPT_REQUIRES_ARG), // --casefir
st <lower | upper | OFF> |
| 560 UOPTION_DEF("c_caselevel", 'l', UOPT_NO_ARG), // --caselev
el <on | OFF> |
| 561 UOPTION_DEF("c_normal", 'n', UOPT_NO_ARG), // --normal
<on | OFF> |
| 562 UOPTION_DEF("c_strength", 's', UOPT_REQUIRES_ARG), // --strengt
h <1-5> |
| 563 }; |
| 564 int32_t opt_len = (sizeof(options)/sizeof(options[0])); |
| 565 enum {i, r,f,a,c,l,n,s}; // The buffer between the option items' order
and their references |
| 566 |
| 567 _remainingArgc = u_parseArgs(_remainingArgc, (char**)argv, opt_len, opti
ons); |
| 568 |
| 569 if (_remainingArgc < 0){ |
| 570 status = U_ILLEGAL_ARGUMENT_ERROR; |
| 571 return; |
| 572 } |
| 573 |
| 574 if (locale == NULL){ |
| 575 locale = "en_US"; // set default locale |
| 576 } |
| 577 |
| 578 //#ifdef U_WINDOWS |
| 579 if (options[i].doesOccur) { |
| 580 char *endp; |
| 581 int tmp = strtol(options[i].value, &endp, 0); |
| 582 if (endp == options[i].value) { |
| 583 status = U_ILLEGAL_ARGUMENT_ERROR; |
| 584 return; |
| 585 } |
| 586 win_langid = MAKELCID(tmp, SORT_DEFAULT); |
| 587 } else { |
| 588 win_langid = uloc_getLCID(locale); |
| 589 } |
| 590 //#endif |
| 591 |
| 592 // Set up an ICU collator |
| 593 if (options[r].doesOccur) { |
| 594 // TODO: implement it |
| 595 } else { |
| 596 col = ucol_open(locale, &status); |
| 597 if (U_FAILURE(status)) { |
| 598 return; |
| 599 } |
| 600 } |
| 601 |
| 602 if (options[f].doesOccur) { |
| 603 ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_ON, &status); |
| 604 } else { |
| 605 ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_OFF, &status); |
| 606 } |
| 607 |
| 608 if (options[a].doesOccur) { |
| 609 ucol_setAttribute(col, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &statu
s); |
| 610 } |
| 611 |
| 612 if (options[c].doesOccur) { // strcmp() has i18n encoding problem |
| 613 if (strcmp("lower", options[c].value) == 0){ |
| 614 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, &statu
s); |
| 615 } else if (strcmp("upper", options[c].value) == 0) { |
| 616 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &statu
s); |
| 617 } else { |
| 618 status = U_ILLEGAL_ARGUMENT_ERROR; |
| 619 return; |
| 620 } |
| 621 } |
| 622 |
| 623 if (options[l].doesOccur){ |
| 624 ucol_setAttribute(col, UCOL_CASE_LEVEL, UCOL_ON, &status); |
| 625 } |
| 626 |
| 627 if (options[n].doesOccur){ |
| 628 ucol_setAttribute(col, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); |
| 629 } |
| 630 |
| 631 if (options[s].doesOccur) { |
| 632 char *endp; |
| 633 int tmp = strtol(options[l].value, &endp, 0); |
| 634 if (endp == options[l].value) { |
| 635 status = U_ILLEGAL_ARGUMENT_ERROR; |
| 636 return; |
| 637 } |
| 638 switch (tmp) { |
| 639 case 1: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_PRIMARY, &sta
tus); break; |
| 640 case 2: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_SECONDARY, &s
tatus); break; |
| 641 case 3: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_TERTIARY, &st
atus); break; |
| 642 case 4: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_QUATERNARY, &
status); break; |
| 643 case 5: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_IDENTICAL, &s
tatus); break; |
| 644 default: status = U_ILLEGAL_ARGUMENT_ERROR;
return; |
| 645 } |
| 646 } |
| 647 prepareData(status); |
| 648 } |
| 649 |
| 650 //to avoid use the annoying 'id' in TESTCASE(id,test) macro or the like |
| 651 #define TEST(testname, classname, arg1, arg2, arg3, arg4, arg5, arg6) \ |
| 652 if(temp == index) {\ |
| 653 name = #testname;\ |
| 654 if (exec) {\ |
| 655 UErrorCode status = U_ZERO_ERROR;\ |
| 656 UPerfFunction * t = new classname(status,arg1, arg2, arg3, arg4, arg5, arg6)
;\ |
| 657 if (U_FAILURE(status)) {\ |
| 658 delete t;\ |
| 659 return NULL;\ |
| 660 } else {\ |
| 661 return t;\ |
| 662 }\ |
| 663 } else {\ |
| 664 return NULL;\ |
| 665 }\ |
| 666 }\ |
| 667 temp++\ |
| 668 |
| 669 |
| 670 virtual UPerfFunction* runIndexedTest( /*[in]*/int32_t index, /*[in]*/UBool
exec, /*[out]*/const char* &name, /*[in]*/ char* par = NULL ){ |
| 671 int temp = 0; |
| 672 |
| 673 #define TEST_KEYGEN(testname, func)\ |
| 674 TEST(testname, CmdKeyGen, col, win_langid, count, rnd_index, &CmdKeyGen::fun
c, 0) |
| 675 TEST_KEYGEN(TestIcu_KeyGen_null, icu_key_null); |
| 676 TEST_KEYGEN(TestIcu_KeyGen_len, icu_key_len); |
| 677 TEST_KEYGEN(TestPosix_KeyGen_null, posix_key_null); |
| 678 TEST_KEYGEN(TestWin_KeyGen_null, win_key_null); |
| 679 TEST_KEYGEN(TestWin_KeyGen_len, win_key_len); |
| 680 |
| 681 #define TEST_ITER(testname, func)\ |
| 682 TEST(testname, CmdIter, col, count, icu_data, &CmdIter::func,0,0) |
| 683 TEST_ITER(TestIcu_ForwardIter_null, icu_forward_null); |
| 684 TEST_ITER(TestIcu_ForwardIter_len, icu_forward_len); |
| 685 TEST_ITER(TestIcu_BackwardIter_null, icu_backward_null); |
| 686 TEST_ITER(TestIcu_BackwardIter_len, icu_backward_len); |
| 687 |
| 688 #define TEST_ITER_ALL(testname, func)\ |
| 689 TEST(testname, CmdIterAll, col, icu_data_all_len, icu_data_all, CmdIterAll::
func,0,0) |
| 690 TEST_ITER_ALL(TestIcu_ForwardIter_all_null, forward_null); |
| 691 TEST_ITER_ALL(TestIcu_ForwardIter_all_len, forward_len); |
| 692 TEST_ITER_ALL(TestIcu_BackwardIter_all_null, backward_null); |
| 693 TEST_ITER_ALL(TestIcu_BackwardIter_all_len, backward_len); |
| 694 |
| 695 #define TEST_QSORT(testname, func)\ |
| 696 TEST(testname, CmdQsort, rnd_index, count, sizeof(DataIndex), CmdQsort::func
,0,0) |
| 697 TEST_QSORT(TestIcu_qsort_strcoll_null, icu_strcoll_null); |
| 698 TEST_QSORT(TestIcu_qsort_strcoll_len, icu_strcoll_len); |
| 699 TEST_QSORT(TestIcu_qsort_usekey, icu_cmpkey); |
| 700 TEST_QSORT(TestPosix_qsort_strcoll_null, posix_strcoll_null); |
| 701 TEST_QSORT(TestPosix_qsort_usekey, posix_cmpkey); |
| 702 TEST_QSORT(TestWin_qsort_CompareStringW_null, win_cmp_null); |
| 703 TEST_QSORT(TestWin_qsort_CompareStringW_len, win_cmp_len); |
| 704 TEST_QSORT(TestWin_qsort_usekey, win_cmpkey); |
| 705 |
| 706 #define TEST_BIN(testname, func)\ |
| 707 TEST(testname, CmdBinSearch, col, win_langid, count, rnd_index, ord_icu_key,
&CmdBinSearch::func) |
| 708 TEST_BIN(TestIcu_BinarySearch_strcoll_null, icu_strcoll_null); |
| 709 TEST_BIN(TestIcu_BinarySearch_strcoll_len, icu_strcoll_len); |
| 710 TEST_BIN(TestIcu_BinarySearch_usekey, icu_cmpkey); |
| 711 TEST_BIN(TestIcu_BinarySearch_strcmp, icu_strcmp); |
| 712 TEST_BIN(TestIcu_BinarySearch_cmpCPO, icu_cmpcpo); |
| 713 TEST_BIN(TestPosix_BinarySearch_strcoll_null, posix_strcoll_null); |
| 714 TEST_BIN(TestPosix_BinarySearch_usekey, posix_cmpkey); |
| 715 TEST_BIN(TestWin_BinarySearch_CompareStringW_null, win_cmp_null); |
| 716 TEST_BIN(TestWin_BinarySearch_CompareStringW_len, win_cmp_len); |
| 717 TEST_BIN(TestWin_BinarySearch_usekey, win_cmpkey); |
| 718 TEST_BIN(TestWin_BinarySearch_wcscmp, win_wcscmp); |
| 719 |
| 720 name=""; |
| 721 return NULL; |
| 722 } |
| 723 |
| 724 |
| 725 |
| 726 void prepareData(UErrorCode& status){ |
| 727 if(U_FAILURE(status)) return; |
| 728 if (icu_data) return; // prepared |
| 729 |
| 730 icu_data = new CA_uchar(); |
| 731 |
| 732 // Following code is borrowed from UPerfTest::getLines(); |
| 733 const UChar* line=NULL; |
| 734 int32_t len =0; |
| 735 for (;;) { |
| 736 line = ucbuf_readline(ucharBuf,&len,&status); |
| 737 if(line == NULL || U_FAILURE(status)){break;} |
| 738 |
| 739 // Refer to the source code of ucbuf_readline() |
| 740 // 1. 'len' includs the line terminal symbols |
| 741 // 2. The length of the line terminal symbols is only one character |
| 742 // 3. The Windows CR LF line terminal symbols will be converted to C
R |
| 743 |
| 744 if (len == 1) { |
| 745 continue; //skip empty line |
| 746 } else { |
| 747 icu_data->append_one(len); |
| 748 memcpy(icu_data->last(), line, len * sizeof(UChar)); |
| 749 icu_data->last()[len -1] = NULL; |
| 750 } |
| 751 } |
| 752 if(U_FAILURE(status)) return; |
| 753 |
| 754 // UTF-16 -> UTF-8 conversion. |
| 755 UConverter *conv = ucnv_open("utf-8", &status); // just UTF-8 for now. |
| 756 if (U_FAILURE(status)) return; |
| 757 |
| 758 count = icu_data->count; |
| 759 |
| 760 icu_data_all_len = icu_data->index[count]; // includes all NULLs |
| 761 icu_data_all_len -= count; // excludes all NULLs |
| 762 icu_data_all_len += 1; // the terminal NULL |
| 763 icu_data_all = new UChar[icu_data_all_len]; |
| 764 icu_data_all[icu_data_all_len - 1] = 0; //the terminal NULL |
| 765 |
| 766 icu_key = new CA_uint8; |
| 767 win_data = new CA_win_wchar; |
| 768 win_key = new CA_char; |
| 769 posix_data = new CA_char; |
| 770 posix_key = new CA_char; |
| 771 rnd_index = new DataIndex[count]; |
| 772 DataIndex::win_langid = win_langid; |
| 773 DataIndex::col = col; |
| 774 |
| 775 |
| 776 UChar * p = icu_data_all; |
| 777 int32_t s; |
| 778 int32_t t; |
| 779 for (int i=0; i < count; i++) { |
| 780 // ICU all data |
| 781 s = sizeof(UChar) * icu_data->lengthOf(i); |
| 782 memcpy(p, icu_data->dataOf(i), s); |
| 783 p += icu_data->lengthOf(i); |
| 784 |
| 785 // ICU data |
| 786 |
| 787 // ICU key |
| 788 s = ucol_getSortKey(col, icu_data->dataOf(i), -1,NULL, 0); |
| 789 icu_key->append_one(s); |
| 790 t = ucol_getSortKey(col, icu_data->dataOf(i), -1,icu_key->last(), s)
; |
| 791 if (t != s) {status = U_INVALID_FORMAT_ERROR;return;} |
| 792 |
| 793 // POSIX data |
| 794 s = ucnv_fromUChars(conv,NULL, 0, icu_data->dataOf(i), icu_data->len
gthOf(i), &status); |
| 795 if (status == U_BUFFER_OVERFLOW_ERROR || status == U_ZERO_ERROR){ |
| 796 status = U_ZERO_ERROR; |
| 797 } else { |
| 798 return; |
| 799 } |
| 800 posix_data->append_one(s + 1); // plus terminal NULL |
| 801 t = ucnv_fromUChars(conv,posix_data->last(), s, icu_data->dataOf(i),
icu_data->lengthOf(i), &status); |
| 802 if (U_FAILURE(status)) return; |
| 803 if ( t != s){status = U_INVALID_FORMAT_ERROR;return;} |
| 804 posix_data->last()[s] = 0; |
| 805 |
| 806 // POSIX key |
| 807 s = strxfrm(NULL, posix_data->dataOf(i), 0); |
| 808 if (s == INT_MAX){status = U_INVALID_FORMAT_ERROR;return;} |
| 809 posix_key->append_one(s); |
| 810 t = strxfrm(posix_key->last(), posix_data->dataOf(i), s); |
| 811 if (t != s) {status = U_INVALID_FORMAT_ERROR;return;} |
| 812 |
| 813 // Win data |
| 814 s = icu_data->lengthOf(i) + 1; // plus terminal NULL |
| 815 win_data->append_one(s); |
| 816 memcpy(win_data->last(), icu_data->dataOf(i), sizeof(WCHAR) * s); |
| 817 |
| 818 // Win key |
| 819 s = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win
_data->lengthOf(i), NULL,0); |
| 820 if (s == 0) {status = U_INVALID_FORMAT_ERROR;return;} |
| 821 win_key->append_one(s); |
| 822 t = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win
_data->lengthOf(i), (WCHAR *)(win_key->last()),s); |
| 823 if (t != s) {status = U_INVALID_FORMAT_ERROR;return;} |
| 824 |
| 825 }; |
| 826 |
| 827 // append_one() will make points shifting, should not merge following co
de into previous iteration |
| 828 for (int i=0; i < count; i++) { |
| 829 rnd_index[i].icu_key = icu_key->dataOf(i); |
| 830 rnd_index[i].icu_data = icu_data->dataOf(i); |
| 831 rnd_index[i].icu_data_len = icu_data->lengthOf(i); |
| 832 rnd_index[i].posix_key = posix_key->last(); |
| 833 rnd_index[i].posix_data = posix_data->dataOf(i); |
| 834 rnd_index[i].posix_data_len = posix_data->lengthOf(i); |
| 835 rnd_index[i].win_key = win_key->dataOf(i); |
| 836 rnd_index[i].win_data = win_data->dataOf(i); |
| 837 rnd_index[i].win_data_len = win_data->lengthOf(i); |
| 838 }; |
| 839 |
| 840 ucnv_close(conv); |
| 841 qsort(rnd_index, count, sizeof(DataIndex), CmdQsort::q_random); |
| 842 |
| 843 #define SORT(data, func) \ |
| 844 data = new DataIndex[count];\ |
| 845 memcpy(data, rnd_index, count * sizeof(DataIndex));\ |
| 846 qsort(data, count, sizeof(DataIndex), CmdQsort::func) |
| 847 |
| 848 SORT(ord_icu_data, icu_strcoll_len); |
| 849 SORT(ord_icu_key, icu_cmpkey); |
| 850 SORT(ord_posix_data, posix_strcoll_null); |
| 851 SORT(ord_posix_key, posix_cmpkey); |
| 852 SORT(ord_win_data, win_cmp_len); |
| 853 SORT(ord_win_key, win_cmpkey); |
| 854 SORT(ord_win_wcscmp, win_wcscmp); |
| 855 SORT(ord_icu_strcmp, icu_strcmp); |
| 856 SORT(ord_icu_cmpcpo, icu_cmpcpo); |
| 857 } |
| 858 }; |
| 859 |
| 860 |
| 861 int main(int argc, const char *argv[]) |
| 862 { |
| 863 |
| 864 UErrorCode status = U_ZERO_ERROR; |
| 865 CollPerfTest test(argc, argv, status); |
| 866 |
| 867 if (U_FAILURE(status)){ |
| 868 printf("The error is %s\n", u_errorName(status)); |
| 869 //TODO: print usage here |
| 870 return status; |
| 871 } |
| 872 |
| 873 if (test.run() == FALSE){ |
| 874 fprintf(stderr, "FAILED: Tests could not be run please check the " |
| 875 "arguments.\n"); |
| 876 return -1; |
| 877 } |
| 878 return 0; |
| 879 } |
| 880 |
OLD | NEW |