source/test/perf/collationperf/collperf.cpp - Issue 2435373002: Delete source/test

Unified Diff: source/test/perf/collationperf/collperf.cpp

Issue 2435373002: Delete source/test (Closed)

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: source/test/perf/collationperf/collperf.cpp

diff --git a/source/test/perf/collationperf/collperf.cpp b/source/test/perf/collationperf/collperf.cpp

deleted file mode 100644

index 1ce078176178ee80d2c288593729b3c2a2af3a5a..0000000000000000000000000000000000000000

--- a/source/test/perf/collationperf/collperf.cpp

+++ /dev/null

@@ -1,1761 +0,0 @@

-/********************************************************************

- * COPYRIGHT:

- *

- ********************************************************************/

-/********************************************************************************

-* File CALLCOLL.C

-* Modification History:

-* Name Description

-* Andy Heninger First Version

-*********************************************************************************

-*/

-//

-// This program tests string collation and sort key generation performance.

-// Three APIs can be teste: ICU C , Unix strcoll, strxfrm and Windows LCMapString

-// A file of names is required as input, one per line. It must be in utf-8 or utf-16 format,

-// and include a byte order mark. Either LE or BE format is OK.

-//

-const char gUsageString[] =

- "usage: collperf options...\n"

- "-help Display this message.\n"

- "-file file_name utf-16 format file of names.\n"

- "-locale name ICU locale to use. Default is en_US\n"

- "-rules file_name Collation rules file (overrides locale)\n"

- "-langid 0x1234 Windows Language ID number. Default to value for -locale option\n"

- " see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n"

- "-win Run test using Windows native services. (ICU is default)\n"

- "-unix Run test using Unix strxfrm, strcoll services.\n"

- "-uselen Use API with string lengths. Default is null-terminated strings\n"

- "-usekeys Run tests using sortkeys rather than strcoll\n"

- "-strcmp Run tests using u_strcmp rather than strcoll\n"

- "-strcmpCPO Run tests using u_strcmpCodePointOrder rather than strcoll\n"

- "-loop nnnn Loopcount for test. Adjust for reasonable total running time.\n"

- "-iloop n Inner Loop Count. Default = 1. Number of calls to function\n"

- " under test at each call point. For measuring test overhead.\n"

- "-terse Terse numbers-only output. Intended for use by scripts.\n"

- "-french French accent ordering\n"

- "-frenchoff No French accent ordering (for use with French locales.)\n"

- "-norm Normalizing mode on\n"

- "-shifted Shifted mode\n"

- "-lower Lower case first\n"

- "-upper Upper case first\n"

- "-case Enable separate case level\n"

- "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"

- "-keyhist Produce a table sort key size vs. string length\n"

- "-binsearch Binary Search timing test\n"

- "-keygen Sort Key Generation timing test\n"

- "-qsort Quicksort timing test\n"

- "-iter Iteration Performance Test\n"

- "-dump Display strings, sort keys and CEs.\n"

- ;

-#include <stdio.h>

-#include <string.h>

-#include <stdlib.h>

-#include <math.h>

-#include <locale.h>

-#include <errno.h>

-#include <unicode/utypes.h>

-#include <unicode/ucol.h>

-#include <unicode/ucoleitr.h>

-#include <unicode/uloc.h>

-#include <unicode/ustring.h>

-#include <unicode/ures.h>

-#include <unicode/uchar.h>

-#include <unicode/ucnv.h>

-#include <unicode/utf8.h>

-#ifdef WIN32

-#include <windows.h>

-#else

-//

-// Stubs for Windows API functions when building on UNIXes.

-//

-typedef int DWORD;

-inline int CompareStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;}

-#include <sys/time.h>

-unsigned long timeGetTime() {

- struct timeval t;

- gettimeofday(&t, 0);

- unsigned long val = t.tv_sec * 1000; // Let it overflow. Who cares.

- val += t.tv_usec / 1000;

- return val;

-inline int LCMapStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;}

-const int LCMAP_SORTKEY = 0;

-#define MAKELCID(a,b) 0

-const int SORT_DEFAULT = 0;

-#endif

-//

-// Command line option variables

-// These global variables are set according to the options specified

-// on the command line by the user.

-char * opt_fName = 0;

-const char * opt_locale = "en_US";

-int opt_langid = 0; // Defaults to value corresponding to opt_locale.

-char * opt_rules = 0;

-UBool opt_help = FALSE;

-int opt_loopCount = 1;

-int opt_iLoopCount = 1;

-UBool opt_terse = FALSE;

-UBool opt_qsort = FALSE;

-UBool opt_binsearch = FALSE;

-UBool opt_icu = TRUE;

-UBool opt_win = FALSE; // Run with Windows native functions.

-UBool opt_unix = FALSE; // Run with UNIX strcoll, strxfrm functions.

-UBool opt_uselen = FALSE;

-UBool opt_usekeys = FALSE;

-UBool opt_strcmp = FALSE;

-UBool opt_strcmpCPO = FALSE;

-UBool opt_norm = FALSE;

-UBool opt_keygen = FALSE;

-UBool opt_french = FALSE;

-UBool opt_frenchoff = FALSE;

-UBool opt_shifted = FALSE;

-UBool opt_lower = FALSE;

-UBool opt_upper = FALSE;

-UBool opt_case = FALSE;

-int opt_level = 0;

-UBool opt_keyhist = FALSE;

-UBool opt_itertest = FALSE;

-UBool opt_dump = FALSE;

-//

-// Definitions for the command line options

-//

-struct OptSpec {

- const char *name;

- enum {FLAG, NUM, STRING} type;

- void *pVar;

-};

-OptSpec opts[] = {

- {"-file", OptSpec::STRING, &opt_fName},

- {"-locale", OptSpec::STRING, &opt_locale},

- {"-langid", OptSpec::NUM, &opt_langid},

- {"-rules", OptSpec::STRING, &opt_rules},

- {"-qsort", OptSpec::FLAG, &opt_qsort},

- {"-binsearch", OptSpec::FLAG, &opt_binsearch},

- {"-iter", OptSpec::FLAG, &opt_itertest},

- {"-win", OptSpec::FLAG, &opt_win},

- {"-unix", OptSpec::FLAG, &opt_unix},

- {"-uselen", OptSpec::FLAG, &opt_uselen},

- {"-usekeys", OptSpec::FLAG, &opt_usekeys},

- {"-strcmp", OptSpec::FLAG, &opt_strcmp},

- {"-strcmpCPO", OptSpec::FLAG, &opt_strcmpCPO},

- {"-norm", OptSpec::FLAG, &opt_norm},

- {"-french", OptSpec::FLAG, &opt_french},

- {"-frenchoff", OptSpec::FLAG, &opt_frenchoff},

- {"-shifted", OptSpec::FLAG, &opt_shifted},

- {"-lower", OptSpec::FLAG, &opt_lower},

- {"-upper", OptSpec::FLAG, &opt_upper},

- {"-case", OptSpec::FLAG, &opt_case},

- {"-level", OptSpec::NUM, &opt_level},

- {"-keyhist", OptSpec::FLAG, &opt_keyhist},

- {"-keygen", OptSpec::FLAG, &opt_keygen},

- {"-loop", OptSpec::NUM, &opt_loopCount},

- {"-iloop", OptSpec::NUM, &opt_iLoopCount},

- {"-terse", OptSpec::FLAG, &opt_terse},

- {"-dump", OptSpec::FLAG, &opt_dump},

- {"-help", OptSpec::FLAG, &opt_help},

- {"-?", OptSpec::FLAG, &opt_help},

- {0, OptSpec::FLAG, 0}

-};

-//---------------------------------------------------------------------------

-//

-// Global variables pointing to and describing the test file

-//

-//---------------------------------------------------------------------------

-//

-// struct Line

-//

-// Each line from the source file (containing a name, presumably) gets

-// one of these structs.

-//

-struct Line {

- UChar *name;

- int len;

- char *winSortKey;

- char *icuSortKey;

- char *unixSortKey;

- char *unixName;

-};

-Line *gFileLines; // Ptr to array of Line structs, one per line in the file.

-int gNumFileLines;

-UCollator *gCol;

-DWORD gWinLCID;

-Line **gSortedLines;

-Line **gRandomLines;

-int gCount;

-//---------------------------------------------------------------------------

-//

-// ProcessOptions() Function to read the command line options.

-//

-//---------------------------------------------------------------------------

-UBool ProcessOptions(int argc, const char **argv, OptSpec opts[])

- int i;

- int argNum;

- const char *pArgName;

- OptSpec *pOpt;

- for (argNum=1; argNum<argc; argNum++) {

- pArgName = argv[argNum];

- for (pOpt = opts; pOpt->name != 0; pOpt++) {

- if (strcmp(pOpt->name, pArgName) == 0) {

- switch (pOpt->type) {

- case OptSpec::FLAG:

- *(UBool *)(pOpt->pVar) = TRUE;

- break;

- case OptSpec::STRING:

- argNum ++;

- if (argNum >= argc) {

- fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name);

- return FALSE;

- }

- *(const char **)(pOpt->pVar) = argv[argNum];

- break;

- case OptSpec::NUM:

- argNum ++;

- if (argNum >= argc) {

- fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name);

- return FALSE;

- }

- char *endp;

- i = strtol(argv[argNum], &endp, 0);

- if (endp == argv[argNum]) {

- fprintf(stderr, "integer value expected for \"%s\" option.\n", pOpt->name);

- return FALSE;

- }

- *(int *)(pOpt->pVar) = i;

- }

- break;

- }

- if (pOpt->name == 0)

- {

- fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);

- return FALSE;

- }

-return TRUE;

-//---------------------------------------------------------------------------------------

-//

-// Comparison functions for use by qsort.

-//

-// Six flavors, ICU or Windows, SortKey or String Compare, Strings with length

-// or null terminated.

-//

-//---------------------------------------------------------------------------------------

-int ICUstrcmpK(const void *a, const void *b) {

- gCount++;

- int t = strcmp((*(Line **)a)->icuSortKey, (*(Line **)b)->icuSortKey);

- return t;

-int ICUstrcmpL(const void *a, const void *b) {

- gCount++;

- UCollationResult t;

- t = ucol_strcoll(gCol, (*(Line **)a)->name, (*(Line **)a)->len, (*(Line **)b)->name, (*(Line **)b)->len);

- if (t == UCOL_LESS) return -1;

- if (t == UCOL_GREATER) return +1;

- return 0;

-int ICUstrcmp(const void *a, const void *b) {

- gCount++;

- UCollationResult t;

- t = ucol_strcoll(gCol, (*(Line **)a)->name, -1, (*(Line **)b)->name, -1);

- if (t == UCOL_LESS) return -1;

- if (t == UCOL_GREATER) return +1;

- return 0;

-int Winstrcmp(const void *a, const void *b) {

- gCount++;

- int t;

- t = CompareStringW(gWinLCID, 0, (*(Line **)a)->name, -1, (*(Line **)b)->name, -1);

- return t-2;

-int UNIXstrcmp(const void *a, const void *b) {

- gCount++;

- int t;

- t = strcoll((*(Line **)a)->unixName, (*(Line **)b)->unixName);

- return t;

-int WinstrcmpL(const void *a, const void *b) {

- gCount++;

- int t;

- t = CompareStringW(gWinLCID, 0, (*(Line **)a)->name, (*(Line **)a)->len, (*(Line **)b)->name, (*(Line **)b)->len);

- return t-2;

-int WinstrcmpK(const void *a, const void *b) {

- gCount++;

- int t = strcmp((*(Line **)a)->winSortKey, (*(Line **)b)->winSortKey);

- return t;

-//---------------------------------------------------------------------------------------

-//

-// Function for sorting the names (lines) into a random order.

-// Order is based on a hash of the ICU Sort key for the lines

-// The randomized order is used as input for the sorting timing tests.

-//

-//---------------------------------------------------------------------------------------

-int ICURandomCmp(const void *a, const void *b) {

- char *ask = (*(Line **)a)->icuSortKey;

- char *bsk = (*(Line **)b)->icuSortKey;

- int aVal = 0;

- int bVal = 0;

- int retVal;

- while (*ask != 0) {

- aVal += aVal*37 + *ask++;

- }

- while (*bsk != 0) {

- bVal += bVal*37 + *bsk++;

- }

- retVal = -1;

- if (aVal == bVal) {

- retVal = 0;

- }

- else if (aVal > bVal) {

- retVal = 1;

- }

- return retVal;

-//---------------------------------------------------------------------------------------

-//

-// doKeyGen() Key Generation Timing Test

-//

-//---------------------------------------------------------------------------------------

-void doKeyGen()

- int line;

- int loops = 0;

- int iLoop;

- int t;

- int len=-1;

- // Adjust loop count to compensate for file size. Should be order n

- double dLoopCount = double(opt_loopCount) * (1000. / double(gNumFileLines));

- int adj_loopCount = int(dLoopCount);

- if (adj_loopCount < 1) adj_loopCount = 1;

- unsigned long startTime = timeGetTime();

- if (opt_win) {

- for (loops=0; loops<adj_loopCount; loops++) {

- for (line=0; line < gNumFileLines; line++) {

- if (opt_uselen) {

- len = gFileLines[line].len;

- }

- for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {

- t=LCMapStringW(gWinLCID, LCMAP_SORTKEY,

- gFileLines[line].name, len,

- (unsigned short *)gFileLines[line].winSortKey, 5000); // TODO something with length.

- }

- else if (opt_icu)

- {

- for (loops=0; loops<adj_loopCount; loops++) {

- for (line=0; line < gNumFileLines; line++) {

- if (opt_uselen) {

- len = gFileLines[line].len;

- }

- for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {

- t = ucol_getSortKey(gCol, gFileLines[line].name, len, (unsigned char *)gFileLines[line].icuSortKey, 5000);

- }

- else if (opt_unix)

- {

- for (loops=0; loops<adj_loopCount; loops++) {

- for (line=0; line < gNumFileLines; line++) {

- for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {

- t = strxfrm(gFileLines[line].unixSortKey, gFileLines[line].unixName, 5000);

- }

- unsigned long elapsedTime = timeGetTime() - startTime;

- int ns = (int)(float(1000000) * (float)elapsedTime / (float)(adj_loopCount*gNumFileLines));

- if (opt_terse == FALSE) {

- printf("Sort Key Generation: total # of keys = %d\n", loops*gNumFileLines);

- printf("Sort Key Generation: time per key = %d ns\n", ns);

- }

- else {

- printf("%d, ", ns);

- }

- int totalKeyLen = 0;

- int totalChars = 0;

- for (line=0; line<gNumFileLines; line++) {

- totalChars += u_strlen(gFileLines[line].name);

- if (opt_win) {

- totalKeyLen += strlen(gFileLines[line].winSortKey);

- }

- else if (opt_icu) {

- totalKeyLen += strlen(gFileLines[line].icuSortKey);

- }

- else if (opt_unix) {

- totalKeyLen += strlen(gFileLines[line].unixSortKey);

- }

- if (opt_terse == FALSE) {

- printf("Key Length / character = %f\n", (float)totalKeyLen / (float)totalChars);

- } else {

- printf("%f, ", (float)totalKeyLen / (float)totalChars);

- }

-//---------------------------------------------------------------------------------------

-//

-// doBinarySearch() Binary Search timing test. Each name from the list

-// is looked up in the full sorted list of names.

-//

-//---------------------------------------------------------------------------------------

-void doBinarySearch()

- gCount = 0;

- int line;

- int loops = 0;

- int iLoop = 0;

- unsigned long elapsedTime = 0;

- // Adjust loop count to compensate for file size. Should be order n (lookups) * log n (compares/lookup)

- // Accurate timings do not depend on this being perfect. The correction is just to try to

- // get total running times of about the right order, so the that user doesn't need to

- // manually adjust the loop count for every different file size.

- double dLoopCount = double(opt_loopCount) * 3000. / (log10((double)gNumFileLines) * double(gNumFileLines));

- if (opt_usekeys) dLoopCount *= 5;

- int adj_loopCount = int(dLoopCount);

- if (adj_loopCount < 1) adj_loopCount = 1;

- for (;;) { // not really a loop, just allows "break" to work, to simplify

- // inadvertantly running more than one test through here.

- if (opt_strcmp || opt_strcmpCPO)

- {

- unsigned long startTime = timeGetTime();

- typedef int32_t (U_EXPORT2 *PF)(const UChar *, const UChar *);

- PF pf = u_strcmp;

- if (opt_strcmpCPO) {pf = u_strcmpCodePointOrder;}

- //if (opt_strcmp && opt_win) {pf = (PF)wcscmp;} // Damn the difference between int32_t and int

- // which forces the use of a cast here.

- int r = 0;

- for (loops=0; loops<adj_loopCount; loops++) {

- for (line=0; line < gNumFileLines; line++) {

- int hi = gNumFileLines-1;

- int lo = 0;

- int guess = -1;

- for (;;) {

- int newGuess = (hi + lo) / 2;

- if (newGuess == guess)

- break;

- guess = newGuess;

- for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {

- r = (*pf)((gSortedLines[line])->name, (gSortedLines[guess])->name);

- }

- gCount++;

- if (r== 0)

- break;

- if (r < 0)

- hi = guess;

- else

- lo = guess;

- }

- elapsedTime = timeGetTime() - startTime;

- break;

- }

- if (opt_icu)

- {

- unsigned long startTime = timeGetTime();

- UCollationResult r = UCOL_EQUAL;

- for (loops=0; loops<adj_loopCount; loops++) {

- for (line=0; line < gNumFileLines; line++) {

- int lineLen = -1;

- int guessLen = -1;

- if (opt_uselen) {

- lineLen = (gSortedLines[line])->len;

- }

- int hi = gNumFileLines-1;

- int lo = 0;

- int guess = -1;

- for (;;) {

- int newGuess = (hi + lo) / 2;

- if (newGuess == guess)

- break;

- guess = newGuess;

- int ri = 0;

- if (opt_usekeys) {

- for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {

- ri = strcmp((gSortedLines[line])->icuSortKey, (gSortedLines[guess])->icuSortKey);

- }

- gCount++;

- r=UCOL_GREATER; if(ri<0) {r=UCOL_LESS;} else if (ri==0) {r=UCOL_EQUAL;}

- }

- else

- {

- if (opt_uselen) {

- guessLen = (gSortedLines[guess])->len;

- }

- for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {

- r = ucol_strcoll(gCol, (gSortedLines[line])->name, lineLen, (gSortedLines[guess])->name, guessLen);

- }

- gCount++;

- }

- if (r== UCOL_EQUAL)

- break;

- if (r == UCOL_LESS)

- hi = guess;

- else

- lo = guess;

- }

- elapsedTime = timeGetTime() - startTime;

- break;

- }

- if (opt_win)

- {

- unsigned long startTime = timeGetTime();

- int r = 0;

- for (loops=0; loops<adj_loopCount; loops++) {

- for (line=0; line < gNumFileLines; line++) {

- int lineLen = -1;

- int guessLen = -1;

- if (opt_uselen) {

- lineLen = (gSortedLines[line])->len;

- }

- int hi = gNumFileLines-1;

- int lo = 0;

- int guess = -1;

- for (;;) {

- int newGuess = (hi + lo) / 2;

- if (newGuess == guess)

- break;

- guess = newGuess;

- if (opt_usekeys) {

- for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {

- r = strcmp((gSortedLines[line])->winSortKey, (gSortedLines[guess])->winSortKey);

- }

- gCount++;

- r+=2;

- }

- else

- {

- if (opt_uselen) {

- guessLen = (gSortedLines[guess])->len;

- }

- for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {

- r = CompareStringW(gWinLCID, 0, (gSortedLines[line])->name, lineLen, (gSortedLines[guess])->name, guessLen);

- }

- if (r == 0) {

- if (opt_terse == FALSE) {

- fprintf(stderr, "Error returned from Windows CompareStringW.\n");

- }

- exit(-1);

- }

- gCount++;

- }

- if (r== 2) // strings ==

- break;

- if (r == 1) // line < guess

- hi = guess;

- else // line > guess

- lo = guess;

- }

- elapsedTime = timeGetTime() - startTime;

- break;

- }

- if (opt_unix)

- {

- unsigned long startTime = timeGetTime();

- int r = 0;

- for (loops=0; loops<adj_loopCount; loops++) {

- for (line=0; line < gNumFileLines; line++) {

- int hi = gNumFileLines-1;

- int lo = 0;

- int guess = -1;

- for (;;) {

- int newGuess = (hi + lo) / 2;

- if (newGuess == guess)

- break;

- guess = newGuess;

- if (opt_usekeys) {

- for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {

- r = strcmp((gSortedLines[line])->unixSortKey, (gSortedLines[guess])->unixSortKey);

- }

- gCount++;

- }

- else

- {

- for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {

- r = strcoll((gSortedLines[line])->unixName, (gSortedLines[guess])->unixName);

- }

- errno = 0;

- if (errno != 0) {

- fprintf(stderr, "Error %d returned from strcoll.\n", errno);

- exit(-1);

- }

- gCount++;

- }

- if (r == 0) // strings ==

- break;

- if (r < 0) // line < guess

- hi = guess;

- else // line > guess

- lo = guess;

- }

- elapsedTime = timeGetTime() - startTime;

- break;

- }

- break;

- }

- int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount);

- if (opt_terse == FALSE) {

- printf("binary search: total # of string compares = %d\n", gCount);

- printf("binary search: compares per loop = %d\n", gCount / loops);

- printf("binary search: time per compare = %d ns\n", ns);

- } else {

- printf("%d, ", ns);

- }

-//---------------------------------------------------------------------------------------

-//

-// doQSort() The quick sort timing test. Uses the C library qsort function.

-//

-//---------------------------------------------------------------------------------------

-void doQSort() {

- int i;

- Line **sortBuf = new Line *[gNumFileLines];

- // Adjust loop count to compensate for file size. QSort should be n log(n)

- double dLoopCount = double(opt_loopCount) * 3000. / (log10((double)gNumFileLines) * double(gNumFileLines));

- if (opt_usekeys) dLoopCount *= 5;

- int adj_loopCount = int(dLoopCount);

- if (adj_loopCount < 1) adj_loopCount = 1;

- gCount = 0;

- unsigned long startTime = timeGetTime();

- if (opt_win && opt_usekeys) {

- for (i=0; i<opt_loopCount; i++) {

- memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *));

- qsort(sortBuf, gNumFileLines, sizeof(Line *), WinstrcmpK);

- }

- else if (opt_win && opt_uselen) {

- for (i=0; i<adj_loopCount; i++) {

- memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *));

- qsort(sortBuf, gNumFileLines, sizeof(Line *), WinstrcmpL);

- }

- else if (opt_win && !opt_uselen) {

- for (i=0; i<adj_loopCount; i++) {

- memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *));

- qsort(sortBuf, gNumFileLines, sizeof(Line *), Winstrcmp);

- }

- else if (opt_icu && opt_usekeys) {

- for (i=0; i<adj_loopCount; i++) {

- memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *));

- qsort(sortBuf, gNumFileLines, sizeof(Line *), ICUstrcmpK);

- }

- else if (opt_icu && opt_uselen) {

- for (i=0; i<adj_loopCount; i++) {

- memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *));

- qsort(sortBuf, gNumFileLines, sizeof(Line *), ICUstrcmpL);

- }

- else if (opt_icu && !opt_uselen) {

- for (i=0; i<adj_loopCount; i++) {

- memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *));

- qsort(sortBuf, gNumFileLines, sizeof(Line *), ICUstrcmp);

- }

- else if (opt_unix && !opt_usekeys) {

- for (i=0; i<adj_loopCount; i++) {

- memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *));

- qsort(sortBuf, gNumFileLines, sizeof(Line *), UNIXstrcmp);

- }

- unsigned long elapsedTime = timeGetTime() - startTime;

- int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount);

- if (opt_terse == FALSE) {

- printf("qsort: total # of string compares = %d\n", gCount);

- printf("qsort: time per compare = %d ns\n", ns);

- } else {

- printf("%d, ", ns);

- }

-//---------------------------------------------------------------------------------------

-//

-// doKeyHist() Output a table of data for

-// average sort key size vs. string length.

-//

-//---------------------------------------------------------------------------------------

-void doKeyHist() {

- int i;

- int maxLen = 0;

- // Find the maximum string length

- for (i=0; i<gNumFileLines; i++) {

- if (gFileLines[i].len > maxLen) maxLen = gFileLines[i].len;

- }

- // Allocate arrays to hold the histogram data

- int *accumulatedLen = new int[maxLen+1];

- int *numKeysOfSize = new int[maxLen+1];

- for (i=0; i<=maxLen; i++) {

- accumulatedLen[i] = 0;

- numKeysOfSize[i] = 0;

- }

- // Fill the arrays...

- for (i=0; i<gNumFileLines; i++) {

- int len = gFileLines[i].len;

- accumulatedLen[len] += strlen(gFileLines[i].icuSortKey);

- numKeysOfSize[len] += 1;

- }

- // And write out averages

- printf("String Length, Avg Key Length, Avg Key Len per char\n");

- for (i=1; i<=maxLen; i++) {

- if (numKeysOfSize[i] > 0) {

- printf("%d, %f, %f\n", i, (float)accumulatedLen[i] / (float)numKeysOfSize[i],

- (float)accumulatedLen[i] / (float)(numKeysOfSize[i] * i));

- }

- delete []accumulatedLen;

- delete []numKeysOfSize ;

-//---------------------------------------------------------------------------------------

-//

-// doForwardIterTest(UBool) Forward iteration test

-// argument null-terminated string used

-//

-//---------------------------------------------------------------------------------------

-void doForwardIterTest(UBool haslen) {

- int count = 0;

- UErrorCode error = U_ZERO_ERROR;

- printf("\n\nPerforming forward iteration performance test with ");

- if (haslen) {

- printf("non-null terminated data -----------\n");

- }

- else {

- printf("null terminated data -----------\n");

- }

- printf("performance test on strings from file -----------\n");

- UChar dummytext[] = {0, 0};

- UCollationElements *iter = ucol_openElements(gCol, NULL, 0, &error);

- ucol_setText(iter, dummytext, 1, &error);

- gCount = 0;

- unsigned long startTime = timeGetTime();

- while (count < opt_loopCount) {

- int linecount = 0;

- while (linecount < gNumFileLines) {

- UChar *str = gFileLines[linecount].name;

- int strlen = haslen?gFileLines[linecount].len:-1;

- ucol_setText(iter, str, strlen, &error);

- while (ucol_next(iter, &error) != UCOL_NULLORDER) {

- gCount++;

- }

- linecount ++;

- }

- count ++;

- }

- unsigned long elapsedTime = timeGetTime() - startTime;

- printf("elapsedTime %ld\n", elapsedTime);

- // empty loop recalculation

- count = 0;

- startTime = timeGetTime();

- while (count < opt_loopCount) {

- int linecount = 0;

- while (linecount < gNumFileLines) {

- UChar *str = gFileLines[linecount].name;

- int strlen = haslen?gFileLines[linecount].len:-1;

- ucol_setText(iter, str, strlen, &error);

- linecount ++;

- }

- count ++;

- }

- elapsedTime -= (timeGetTime() - startTime);

- printf("elapsedTime %ld\n", elapsedTime);

- ucol_closeElements(iter);

- int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount);

- printf("Total number of strings compared %d in %d loops\n", gNumFileLines,

- opt_loopCount);

- printf("Average time per ucol_next() nano seconds %d\n", ns);

- printf("performance test on skipped-5 concatenated strings from file -----------\n");

- UChar *str;

- int strlen = 0;

- // appending all the strings

- int linecount = 0;

- while (linecount < gNumFileLines) {

- strlen += haslen?gFileLines[linecount].len:

- u_strlen(gFileLines[linecount].name);

- linecount ++;

- }

- str = (UChar *)malloc(sizeof(UChar) * strlen);

- int strindex = 0;

- linecount = 0;

- while (strindex < strlen) {

- int len = 0;

- len += haslen?gFileLines[linecount].len:

- u_strlen(gFileLines[linecount].name);

- memcpy(str + strindex, gFileLines[linecount].name,

- sizeof(UChar) * len);

- strindex += len;

- linecount ++;

- }

- printf("Total size of strings %d\n", strlen);

- gCount = 0;

- count = 0;

- if (!haslen) {

- strlen = -1;

- }

- iter = ucol_openElements(gCol, str, strlen, &error);

- if (!haslen) {

- strlen = u_strlen(str);

- }

- strlen -= 5; // any left over characters are not iterated,

- // this is to ensure the backwards and forwards iterators

- // gets the same position

- startTime = timeGetTime();

- while (count < opt_loopCount) {

- int count5 = 5;

- strindex = 0;

- ucol_setOffset(iter, strindex, &error);

- while (TRUE) {

- if (ucol_next(iter, &error) == UCOL_NULLORDER) {

- break;

- }

- gCount++;

- count5 --;

- if (count5 == 0) {

- strindex += 10;

- if (strindex > strlen) {

- break;

- }

- ucol_setOffset(iter, strindex, &error);

- count5 = 5;

- }

- count ++;

- }

- elapsedTime = timeGetTime() - startTime;

- printf("elapsedTime %ld\n", elapsedTime);

- // empty loop recalculation

- int tempgCount = 0;

- count = 0;

- startTime = timeGetTime();

- while (count < opt_loopCount) {

- int count5 = 5;

- strindex = 0;

- ucol_setOffset(iter, strindex, &error);

- while (TRUE) {

- tempgCount ++;

- count5 --;

- if (count5 == 0) {

- strindex += 10;

- if (strindex > strlen) {

- break;

- }

- ucol_setOffset(iter, strindex, &error);

- count5 = 5;

- }

- count ++;

- }

- elapsedTime -= (timeGetTime() - startTime);

- printf("elapsedTime %ld\n", elapsedTime);

- ucol_closeElements(iter);

- printf("gCount %d\n", gCount);

- ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount);

- printf("Average time per ucol_next() nano seconds %d\n", ns);

-//---------------------------------------------------------------------------------------

-//

-// doBackwardIterTest(UBool) Backwards iteration test

-// argument null-terminated string used

-//

-//---------------------------------------------------------------------------------------

-void doBackwardIterTest(UBool haslen) {

- int count = 0;

- UErrorCode error = U_ZERO_ERROR;

- printf("\n\nPerforming backward iteration performance test with ");

- if (haslen) {

- printf("non-null terminated data -----------\n");

- }

- else {

- printf("null terminated data -----------\n");

- }

- printf("performance test on strings from file -----------\n");

- UCollationElements *iter = ucol_openElements(gCol, NULL, 0, &error);

- UChar dummytext[] = {0, 0};

- ucol_setText(iter, dummytext, 1, &error);

- gCount = 0;

- unsigned long startTime = timeGetTime();

- while (count < opt_loopCount) {

- int linecount = 0;

- while (linecount < gNumFileLines) {

- UChar *str = gFileLines[linecount].name;

- int strlen = haslen?gFileLines[linecount].len:-1;

- ucol_setText(iter, str, strlen, &error);

- while (ucol_previous(iter, &error) != UCOL_NULLORDER) {

- gCount ++;

- }

- linecount ++;

- }

- count ++;

- }

- unsigned long elapsedTime = timeGetTime() - startTime;

- printf("elapsedTime %ld\n", elapsedTime);

- // empty loop recalculation

- count = 0;

- startTime = timeGetTime();

- while (count < opt_loopCount) {

- int linecount = 0;

- while (linecount < gNumFileLines) {

- UChar *str = gFileLines[linecount].name;

- int strlen = haslen?gFileLines[linecount].len:-1;

- ucol_setText(iter, str, strlen, &error);

- linecount ++;

- }

- count ++;

- }

- elapsedTime -= (timeGetTime() - startTime);

- printf("elapsedTime %ld\n", elapsedTime);

- ucol_closeElements(iter);

- int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount);

- printf("Total number of strings compared %d in %d loops\n", gNumFileLines,

- opt_loopCount);

- printf("Average time per ucol_previous() nano seconds %d\n", ns);

- printf("performance test on skipped-5 concatenated strings from file -----------\n");

- UChar *str;

- int strlen = 0;

- // appending all the strings

- int linecount = 0;

- while (linecount < gNumFileLines) {

- strlen += haslen?gFileLines[linecount].len:

- u_strlen(gFileLines[linecount].name);

- linecount ++;

- }

- str = (UChar *)malloc(sizeof(UChar) * strlen);

- int strindex = 0;

- linecount = 0;

- while (strindex < strlen) {

- int len = 0;

- len += haslen?gFileLines[linecount].len:

- u_strlen(gFileLines[linecount].name);

- memcpy(str + strindex, gFileLines[linecount].name,

- sizeof(UChar) * len);

- strindex += len;

- linecount ++;

- }

- printf("Total size of strings %d\n", strlen);

- gCount = 0;

- count = 0;

- if (!haslen) {

- strlen = -1;

- }

- iter = ucol_openElements(gCol, str, strlen, &error);

- if (!haslen) {

- strlen = u_strlen(str);

- }

- startTime = timeGetTime();

- while (count < opt_loopCount) {

- int count5 = 5;

- strindex = 5;

- ucol_setOffset(iter, strindex, &error);

- while (TRUE) {

- if (ucol_previous(iter, &error) == UCOL_NULLORDER) {

- break;

- }

- gCount ++;

- count5 --;

- if (count5 == 0) {

- strindex += 10;

- if (strindex > strlen) {

- break;

- }

- ucol_setOffset(iter, strindex, &error);

- count5 = 5;

- }

- count ++;

- }

- elapsedTime = timeGetTime() - startTime;

- printf("elapsedTime %ld\n", elapsedTime);

- // empty loop recalculation

- count = 0;

- int tempgCount = 0;

- startTime = timeGetTime();

- while (count < opt_loopCount) {

- int count5 = 5;

- strindex = 5;

- ucol_setOffset(iter, strindex, &error);

- while (TRUE) {

- tempgCount ++;

- count5 --;

- if (count5 == 0) {

- strindex += 10;

- if (strindex > strlen) {

- break;

- }

- ucol_setOffset(iter, strindex, &error);

- count5 = 5;

- }

- count ++;

- }

- elapsedTime -= (timeGetTime() - startTime);

- printf("elapsedTime %ld\n", elapsedTime);

- ucol_closeElements(iter);

- printf("gCount %d\n", gCount);

- ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount);

- printf("Average time per ucol_previous() nano seconds %d\n", ns);

-//---------------------------------------------------------------------------------------

-//

-// doIterTest() Iteration test

-//

-//---------------------------------------------------------------------------------------

-void doIterTest() {

- doForwardIterTest(opt_uselen);

- doBackwardIterTest(opt_uselen);

-//----------------------------------------------------------------------------------------

-//

-// UnixConvert -- Convert the lines of the file to the encoding for UNIX

-// Since it appears that Unicode support is going in the general

-// direction of the use of UTF-8 locales, that is the approach

-// that is used here.

-//

-//----------------------------------------------------------------------------------------

-void UnixConvert() {

- int line;

- UConverter *cvrtr; // An ICU code page converter.

- UErrorCode status = U_ZERO_ERROR;

- cvrtr = ucnv_open("utf-8", &status); // we are just doing UTF-8 locales for now.

- if (U_FAILURE(status)) {

- fprintf(stderr, "ICU Converter open failed.: %s\n", u_errorName(status));

- exit(-1);

- }

- for (line=0; line < gNumFileLines; line++) {

- int sizeNeeded = ucnv_fromUChars(cvrtr,

- 0, // ptr to target buffer.

- 0, // length of target buffer.

- gFileLines[line].name,

- -1, // source is null terminated

- &status);

- if (status != U_BUFFER_OVERFLOW_ERROR && status != U_ZERO_ERROR) {

- //fprintf(stderr, "Conversion from Unicode, something is wrong.\n");

- //exit(-1);

- }

- status = U_ZERO_ERROR;

- gFileLines[line].unixName = new char[sizeNeeded+1];

- sizeNeeded = ucnv_fromUChars(cvrtr,

- gFileLines[line].unixName, // ptr to target buffer.

- sizeNeeded+1, // length of target buffer.

- gFileLines[line].name,

- -1, // source is null terminated

- &status);

- if (U_FAILURE(status)) {

- fprintf(stderr, "ICU Conversion Failed.: %d\n", status);

- exit(-1);

- }

- gFileLines[line].unixName[sizeNeeded] = 0;

- };

- ucnv_close(cvrtr);

-//----------------------------------------------------------------------------------------

-//

-// class UCharFile Class to hide all the gorp to read a file in

-// and produce a stream of UChars.

-//

-//----------------------------------------------------------------------------------------

-class UCharFile {

-public:

- UCharFile(const char *fileName);

- ~UCharFile();

- UChar get();

- UBool eof() {return fEof;};

- UBool error() {return fError;};

-private:

- UCharFile (const UCharFile & /*other*/) {}; // No copy constructor.

- UCharFile & operator = (const UCharFile &/*other*/) {return *this;}; // No assignment op

- FILE *fFile;

- const char *fName;

- UBool fEof;

- UBool fError;

- UChar fPending2ndSurrogate;

- enum {UTF16LE, UTF16BE, UTF8} fEncoding;

-};

-UCharFile::UCharFile(const char * fileName) {

- fEof = FALSE;

- fError = FALSE;

- fName = fileName;

- fFile = fopen(fName, "rb");

- fPending2ndSurrogate = 0;

- if (fFile == NULL) {

- fprintf(stderr, "Can not open file \"%s\"\n", opt_fName);

- fError = TRUE;

- return;

- }

- //

- // Look for the byte order mark at the start of the file.

- //

- int BOMC1, BOMC2, BOMC3;

- BOMC1 = fgetc(fFile);

- BOMC2 = fgetc(fFile);

- if (BOMC1 == 0xff && BOMC2 == 0xfe) {

- fEncoding = UTF16LE; }

- else if (BOMC1 == 0xfe && BOMC2 == 0xff) {

- fEncoding = UTF16BE; }

- else if (BOMC1 == 0xEF && BOMC2 == 0xBB && (BOMC3 = fgetc(fFile)) == 0xBF ) {

- fEncoding = UTF8; }

- else

- {

- fprintf(stderr, "collperf: file \"%s\" encoding must be UTF-8 or UTF-16, and "

- "must include a BOM.\n", fileName);

- fError = true;

- return;

- }

-UCharFile::~UCharFile() {

- fclose(fFile);

-UChar UCharFile::get() {

- UChar c;

- switch (fEncoding) {

- case UTF16LE:

- {

- int cL, cH;

- cL = fgetc(fFile);

- cH = fgetc(fFile);

- c = cL | (cH << 8);

- if (cH == EOF) {

- c = 0;

- fEof = TRUE;

- }

- break;

- }

- case UTF16BE:

- {

- int cL, cH;

- cH = fgetc(fFile);

- cL = fgetc(fFile);

- c = cL | (cH << 8);

- if (cL == EOF) {

- c = 0;

- fEof = TRUE;

- }

- break;

- }

- case UTF8:

- {

- if (fPending2ndSurrogate != 0) {

- c = fPending2ndSurrogate;

- fPending2ndSurrogate = 0;

- break;

- }

- int ch = fgetc(fFile); // Note: c and ch are separate cause eof test doesn't work on UChar type.

- if (ch == EOF) {

- c = 0;

- fEof = TRUE;

- break;

- }

- if (ch <= 0x7f) {

- // It's ascii. No further utf-8 conversion.

- c = ch;

- break;

- }

- // Figure out the lenght of the char and read the rest of the bytes

- // into a temp array.

- int nBytes;

- if (ch >= 0xF0) {nBytes=4;}

- else if (ch >= 0xE0) {nBytes=3;}

- else if (ch >= 0xC0) {nBytes=2;}

- else {

- fprintf(stderr, "utf-8 encoded file contains corrupt data.\n");

- fError = TRUE;

- return 0;

- }

- unsigned char bytes[10];

- bytes[0] = (unsigned char)ch;

- int i;

- for (i=1; i<nBytes; i++) {

- bytes[i] = fgetc(fFile);

- if (bytes[i] < 0x80 || bytes[i] >= 0xc0) {

- fprintf(stderr, "utf-8 encoded file contains corrupt data.\n");

- fError = TRUE;

- return 0;

- }

- // Convert the bytes from the temp array to a Unicode char.

- i = 0;

- uint32_t cp;

- U8_NEXT_UNSAFE(bytes, i, cp);

- c = (UChar)cp;

- if (cp >= 0x10000) {

- // The code point needs to be broken up into a utf-16 surrogate pair.

- // Process first half this time through the main loop, and

- // remember the other half for the next time through.

- UChar utf16Buf[3];

- i = 0;

- UTF16_APPEND_CHAR_UNSAFE(utf16Buf, i, cp);

- fPending2ndSurrogate = utf16Buf[1];

- c = utf16Buf[0];

- }

- break;

- };

- default:

- c = 0xFFFD; /* Error, unspecified codepage*/

- fprintf(stderr, "UCharFile: Error: unknown fEncoding\n");

- exit(1);

- }

- return c;

-//----------------------------------------------------------------------------------------

-//

-// openRulesCollator - Command line specified a rules file. Read it in

-// and open a collator with it.

-//

-//----------------------------------------------------------------------------------------

-UCollator *openRulesCollator() {

- UCharFile f(opt_rules);

- if (f.error()) {

- return 0;

- }

- int bufLen = 10000;

- UChar *buf = (UChar *)malloc(bufLen * sizeof(UChar));

- UChar *tmp;

- int i = 0;

- for(;;) {

- buf[i] = f.get();

- if (f.eof()) {

- break;

- }

- if (f.error()) {

- return 0;

- }

- i++;

- if (i >= bufLen) {

- tmp = buf;

- bufLen += 10000;

- buf = (UChar *)realloc(buf, bufLen);

- if (buf == NULL) {

- free(tmp);

- return 0;

- }

- buf[i] = 0;

- UErrorCode status = U_ZERO_ERROR;

- UCollator *coll = ucol_openRules(buf, u_strlen(buf), UCOL_OFF,

- UCOL_DEFAULT_STRENGTH, NULL, &status);

- if (U_FAILURE(status)) {

- fprintf(stderr, "ICU ucol_openRules() open failed.: %d\n", status);

- return 0;

- }

- free(buf);

- return coll;

-//----------------------------------------------------------------------------------------

-//

-// Main -- process command line, read in and pre-process the test file,

-// call other functions to do the actual tests.

-//

-//----------------------------------------------------------------------------------------

-int main(int argc, const char** argv) {

- if (ProcessOptions(argc, argv, opts) != TRUE || opt_help || opt_fName == 0) {

- printf(gUsageString);

- exit (1);

- }

- // Make sure that we've only got one API selected.

- if (opt_unix || opt_win) opt_icu = FALSE;

- if (opt_unix) opt_win = FALSE;

- //

- // Set up an ICU collator

- //

- UErrorCode status = U_ZERO_ERROR;

- if (opt_rules != 0) {

- gCol = openRulesCollator();

- if (gCol == 0) {return -1;}

- }

- else {

- gCol = ucol_open(opt_locale, &status);

- if (U_FAILURE(status)) {

- fprintf(stderr, "Collator creation failed.: %d\n", status);

- return -1;

- }

- if (status==U_USING_DEFAULT_WARNING && opt_terse==FALSE) {

- fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", opt_locale);

- }

- if (status==U_USING_FALLBACK_WARNING && opt_terse==FALSE) {

- fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", opt_locale);

- }

- if (opt_norm) {

- ucol_setAttribute(gCol, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

- }

- if (opt_french && opt_frenchoff) {

- fprintf(stderr, "collperf: Error, specified both -french and -frenchoff options.");

- exit(-1);

- }

- if (opt_french) {

- ucol_setAttribute(gCol, UCOL_FRENCH_COLLATION, UCOL_ON, &status);

- }

- if (opt_frenchoff) {

- ucol_setAttribute(gCol, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);

- }

- if (opt_lower) {

- ucol_setAttribute(gCol, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, &status);

- }

- if (opt_upper) {

- ucol_setAttribute(gCol, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &status);

- }

- if (opt_case) {

- ucol_setAttribute(gCol, UCOL_CASE_LEVEL, UCOL_ON, &status);

- }

- if (opt_shifted) {

- ucol_setAttribute(gCol, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);

- }

- if (opt_level != 0) {

- switch (opt_level) {

- case 1:

- ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_PRIMARY, &status);

- break;

- case 2:

- ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_SECONDARY, &status);

- break;

- case 3:

- ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_TERTIARY, &status);

- break;

- case 4:

- ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_QUATERNARY, &status);

- break;

- case 5:

- ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_IDENTICAL, &status);

- break;

- default:

- fprintf(stderr, "-level param must be between 1 and 5\n");

- exit(-1);

- }

- if (U_FAILURE(status)) {

- fprintf(stderr, "Collator attribute setting failed.: %d\n", status);

- return -1;

- }

- //

- // Set up a Windows LCID

- //

- if (opt_langid != 0) {

- gWinLCID = MAKELCID(opt_langid, SORT_DEFAULT);

- }

- else {

- gWinLCID = uloc_getLCID(opt_locale);

- }

- //

- // Set the UNIX locale

- //

- if (opt_unix) {

- if (setlocale(LC_ALL, opt_locale) == 0) {

- fprintf(stderr, "setlocale(LC_ALL, %s) failed.\n", opt_locale);

- exit(-1);

- }

- // Read in the input file.

- // File assumed to be utf-16.

- // Lines go onto heap buffers. Global index array to line starts is created.

- // Lines themselves are null terminated.

- //

- UCharFile f(opt_fName);

- if (f.error()) {

- exit(-1);

- }

- const int MAXLINES = 100000;

- gFileLines = new Line[MAXLINES];

- UChar buf[1024];

- int column = 0;

- // Read the file, split into lines, and save in memory.

- // Loop runs once per utf-16 value from the input file,

- // (The number of bytes read from file per loop iteration depends on external encoding.)

- for (;;) {

- UChar c = f.get();

- if (f.error()){

- exit(-1);

- }

- // We now have a good UTF-16 value in c.

- // Watch for CR, LF, EOF; these finish off a line.

- if (c == 0xd) {

- continue;

- }

- if (f.eof() || c == 0x0a || c==0x2028) { // Unipad inserts 2028 line separators!

- buf[column++] = 0;

- if (column > 1) {

- gFileLines[gNumFileLines].name = new UChar[column];

- gFileLines[gNumFileLines].len = column-1;

- memcpy(gFileLines[gNumFileLines].name, buf, column * sizeof(UChar));

- gNumFileLines++;

- column = 0;

- if (gNumFileLines >= MAXLINES) {

- fprintf(stderr, "File too big. Max number of lines is %d\n", MAXLINES);

- exit(-1);

- }

- if (c == 0xa || c == 0x2028)

- continue;

- else

- break; // EOF

- }

- buf[column++] = c;

- if (column >= 1023)

- {

- static UBool warnFlag = TRUE;

- if (warnFlag) {

- fprintf(stderr, "Warning - file line longer than 1023 chars truncated.\n");

- warnFlag = FALSE;

- }

- column--;

- }

- if (opt_terse == FALSE) {

- printf("file \"%s\", %d lines.\n", opt_fName, gNumFileLines);

- }

- // Convert the lines to the UNIX encoding.

- if (opt_unix) {

- UnixConvert();

- }

- //

- // Pre-compute ICU sort keys for the lines of the file.

- //

- int line;

- int32_t t;

- for (line=0; line<gNumFileLines; line++) {

- t = ucol_getSortKey(gCol, gFileLines[line].name, -1, (unsigned char *)buf, sizeof(buf));

- gFileLines[line].icuSortKey = new char[t];

- if (t > (int32_t)sizeof(buf)) {

- t = ucol_getSortKey(gCol, gFileLines[line].name, -1, (unsigned char *)gFileLines[line].icuSortKey , t);

- }

- else

- {

- memcpy(gFileLines[line].icuSortKey, buf, t);

- }

- //

- // Pre-compute Windows sort keys for the lines of the file.

- //

- for (line=0; line<gNumFileLines; line++) {

- t=LCMapStringW(gWinLCID, LCMAP_SORTKEY, gFileLines[line].name, -1, buf, sizeof(buf));

- gFileLines[line].winSortKey = new char[t];

- if (t > (int32_t)sizeof(buf)) {

- t = LCMapStringW(gWinLCID, LCMAP_SORTKEY, gFileLines[line].name, -1, (unsigned short *)(gFileLines[line].winSortKey), t);

- }

- else

- {

- memcpy(gFileLines[line].winSortKey, buf, t);

- }

- //

- // Pre-compute UNIX sort keys for the lines of the file.

- //

- if (opt_unix) {

- for (line=0; line<gNumFileLines; line++) {

- t=strxfrm((char *)buf, gFileLines[line].unixName, sizeof(buf));

- gFileLines[line].unixSortKey = new char[t];

- if (t > (int32_t)sizeof(buf)) {

- t = strxfrm(gFileLines[line].unixSortKey, gFileLines[line].unixName, sizeof(buf));

- }

- else

- {

- memcpy(gFileLines[line].unixSortKey, buf, t);

- }

- //

- // Dump file lines, CEs, Sort Keys if requested.

- //

- if (opt_dump) {

- int i;

- for (line=0; line<gNumFileLines; line++) {

- for (i=0;;i++) {

- UChar c = gFileLines[line].name[i];

- if (c == 0)

- break;

- if (c < 0x20 || c > 0x7e) {

- printf("\\u%.4x", c);

- }

- else {

- printf("%c", c);

- }

- printf("\n");

- printf(" CEs: ");

- UCollationElements *CEiter = ucol_openElements(gCol, gFileLines[line].name, -1, &status);

- int32_t ce;

- i = 0;

- for (;;) {

- ce = ucol_next(CEiter, &status);

- if (ce == UCOL_NULLORDER) {

- break;

- }

- printf(" %.8x", ce);

- if (++i > 8) {

- printf("\n ");

- i = 0;

- }

- printf("\n");

- ucol_closeElements(CEiter);

- printf(" ICU Sort Key: ");

- for (i=0; ; i++) {

- unsigned char c = gFileLines[line].icuSortKey[i];

- printf("%02x ", c);

- if (c == 0) {

- break;

- }

- if (i > 0 && i % 20 == 0) {

- printf("\n ");

- }

- printf("\n");

- }

- //

- // Pre-sort the lines.

- //

- int i;

- gSortedLines = new Line *[gNumFileLines];

- for (i=0; i<gNumFileLines; i++) {

- gSortedLines[i] = &gFileLines[i];

- }

- if (opt_win) {

- qsort(gSortedLines, gNumFileLines, sizeof(Line *), Winstrcmp);

- }

- else if (opt_unix) {

- qsort(gSortedLines, gNumFileLines, sizeof(Line *), UNIXstrcmp);

- }

- else /* ICU */

- {

- qsort(gSortedLines, gNumFileLines, sizeof(Line *), ICUstrcmp);

- }

- //

- // Make up a randomized order, will be used for sorting tests.

- //

- gRandomLines = new Line *[gNumFileLines];

- for (i=0; i<gNumFileLines; i++) {

- gRandomLines[i] = &gFileLines[i];

- }

- qsort(gRandomLines, gNumFileLines, sizeof(Line *), ICURandomCmp);

- //

- // We've got the file read into memory. Go do something with it.

- //

- if (opt_qsort) doQSort();

- if (opt_binsearch) doBinarySearch();

- if (opt_keygen) doKeyGen();

- if (opt_keyhist) doKeyHist();

- if (opt_itertest) doIterTest();

- return 0;

« no previous file with comments | « source/test/perf/collationperf/Makefile.in ('k') | source/test/perf/collationperf/readme.html » ('j') | no next file with comments »