icu46/source/test/perf/ubrkperf/ubrkperfold.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/test/perf/ubrkperf/ubrkperfold.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /********************************************************************

	2 * COPYRIGHT:

	3 * Copyright (C) 2001-2005 IBM, Inc. All Rights Reserved.

	4 *

	5 ********************************************************************/

	6 /******************************************************************************* *

	7 *

	8 * File ubrkperf.cpp

	9 *

	10 * Modification History:

	11 * Name Description

	12 * Vladimir Weinstein First Version, based on collperf

	13 *

	14 ******************************************************************************** *

	15 */

	16

	17 //

	18 // This program tests break iterator performance

	19 // Currently we test only ICU APIs with the future possibility of testing * nix & win32 APIs

	20 // (if any)

	21 // A text file is required as input. It must be in utf-8 or utf-16 format,

	22 // and include a byte order mark. Either LE or BE format is OK.

	23 //

	24

	25 const char gUsageString[] =

	26 "usage: ubrkperf options...\n"

	27 "-help Display this message.\n"

	28 "-file file_name utf-16/utf-8 format file.\n"

	29 "-locale name ICU locale to use. Default is en_US\n"

	30 "-langid 0x1234 Windows Language ID number. Default to value fo r -locale option\n"

	31 " see http://msdn.microsoft.com/library/psdk/wi nbase/nls_8xo3.htm\n"

	32 "-win Run test using Windows native services. (current ly not working) (ICU is default)\n"

	33 "-unix Run test using Unix word breaking services. (cur rently not working) \n"

	34 "-mac Run test using MacOSX word breaking services.\n"

	35 "-uselen Use API with string lengths. Default is null-te rminated strings\n"

	36 "-char Use character break iterator\n"

	37 "-word Use word break iterator\n"

	38 "-line Use line break iterator\n"

	39 "-sentence Use sentence break iterator\n"

	40 "-loop nnnn Loopcount for test. Adjust for reasonable total running time.\n"

	41 "-iloop n Inner Loop Count. Default = 1. Number of calls to function\n"

	42 " under test at each call point. For measurin g test overhead.\n"

	43 "-terse Terse numbers-only output. Intended for use by scripts.\n"

	44 "-dump Display stuff.\n"

	45 "-capi Use C APIs instead of C++ APIs (currently not wo rking)\n"

	46 "-next Do the next test\n"

	47 "-isBound Do the isBound test\n"

	48 ;

	49

	50

	51 #include <stdio.h>

	52 #include <string.h>

	53 #include <stdlib.h>

	54 #include <math.h>

	55 #include <locale.h>

	56 #include <errno.h>

	57 #include <sys/stat.h>

	58

	59 #include <unicode/utypes.h>

	60 #include <unicode/ucol.h>

	61 #include <unicode/ucoleitr.h>

	62 #include <unicode/uloc.h>

	63 #include <unicode/ustring.h>

	64 #include <unicode/ures.h>

	65 #include <unicode/uchar.h>

	66 #include <unicode/ucnv.h>

	67 #include <unicode/utf8.h>

	68

	69 #include <unicode/brkiter.h>

	70

	71

	72 #ifdef U_WINDOWS

	73 #include <windows.h>

	74 #else

	75 //

	76 // Stubs for Windows API functions when building on UNIXes.

	77 //

	78 #include <sys/time.h>

	79 unsigned long timeGetTime() {

	80 struct timeval t;

	81 gettimeofday(&t, 0);

	82 unsigned long val = t.tv_sec * 1000; // Let it overflow. Who cares.

	83 val += t.tv_usec / 1000;

	84 return val;

	85 };

	86 #define MAKELCID(a,b) 0

	87 #endif

	88

	89

	90 //

	91 // Command line option variables

	92 // These global variables are set according to the options specified

	93 // on the command line by the user.

	94 char * opt_fName = 0;

	95 char * opt_locale = "en_US";

	96 int opt_langid = 0; // Defaults to value corresponding to opt_loc ale.

	97 char * opt_rules = 0;

	98 UBool opt_help = FALSE;

	99 int opt_time = 0;

	100 int opt_loopCount = 0;

	101 int opt_passesCount= 1;

	102 UBool opt_terse = FALSE;

	103 UBool opt_icu = TRUE;

	104 UBool opt_win = FALSE; // Run with Windows native functions.

	105 UBool opt_unix = FALSE; // Run with UNIX strcoll, strxfrm functions.

	106 UBool opt_mac = FALSE; // Run with MacOSX word break services.

	107 UBool opt_uselen = FALSE;

	108 UBool opt_dump = FALSE;

	109 UBool opt_char = FALSE;

	110 UBool opt_word = FALSE;

	111 UBool opt_line = FALSE;

	112 UBool opt_sentence = FALSE;

	113 UBool opt_capi = FALSE;

	114

	115 UBool opt_next = FALSE;

	116 UBool opt_isBound = FALSE;

	117

	118

	119

	120 //

	121 // Definitions for the command line options

	122 //

	123 struct OptSpec {

	124 const char *name;

	125 enum {FLAG, NUM, STRING} type;

	126 void *pVar;

	127 };

	128

	129 OptSpec opts[] = {

	130 {"-file", OptSpec::STRING, &opt_fName},

	131 {"-locale", OptSpec::STRING, &opt_locale},

	132 {"-langid", OptSpec::NUM, &opt_langid},

	133 {"-win", OptSpec::FLAG, &opt_win},

	134 {"-unix", OptSpec::FLAG, &opt_unix},

	135 {"-mac", OptSpec::FLAG, &opt_mac},

	136 {"-uselen", OptSpec::FLAG, &opt_uselen},

	137 {"-loop", OptSpec::NUM, &opt_loopCount},

	138 {"-time", OptSpec::NUM, &opt_time},

	139 {"-passes", OptSpec::NUM, &opt_passesCount},

	140 {"-char", OptSpec::FLAG, &opt_char},

	141 {"-word", OptSpec::FLAG, &opt_word},

	142 {"-line", OptSpec::FLAG, &opt_line},

	143 {"-sentence", OptSpec::FLAG, &opt_sentence},

	144 {"-terse", OptSpec::FLAG, &opt_terse},

	145 {"-dump", OptSpec::FLAG, &opt_dump},

	146 {"-capi", OptSpec::FLAG, &opt_capi},

	147 {"-next", OptSpec::FLAG, &opt_next},

	148 {"-isBound", OptSpec::FLAG, &opt_isBound},

	149 {"-help", OptSpec::FLAG, &opt_help},

	150 {"-?", OptSpec::FLAG, &opt_help},

	151 {0, OptSpec::FLAG, 0}

	152 };

	153

	154

	155 //---------------------------------------------------------------------------

	156 //

	157 // Global variables pointing to and describing the test file

	158 //

	159 //---------------------------------------------------------------------------

	160

	161 //DWORD gWinLCID;

	162 BreakIterator *brkit = NULL;

	163 UChar *text = NULL;

	164 int32_t textSize = 0;

	165

	166

	167

	168 #ifdef U_DARWIN

	169 #include <ApplicationServices/ApplicationServices.h>

	170 enum{

	171 kUCTextBreakAllMask = (kUCTextBreakClusterMask \| kUCTextBreakWordMask \| kUCTex tBreakLineMask)

	172 };

	173 UCTextBreakType breakTypes[4] = {kUCTextBreakCharMask, kUCTextBreakClusterMask, kUCTextBreakWordMask, kUCTextBreakLineMask};

	174 TextBreakLocatorRef breakRef;

	175 UCTextBreakType macBreakType;

	176

	177 void createMACBrkIt() {

	178 OSStatus status = noErr;

	179 LocaleRef lref;

	180 status = LocaleRefFromLocaleString(opt_locale, &lref);

	181 status = UCCreateTextBreakLocator(lref, 0, kUCTextBreakAllMask, (TextBreakLoca torRef*)&breakRef);

	182 if(opt_char == TRUE) {

	183 macBreakType = kUCTextBreakClusterMask;

	184 } else if(opt_word == TRUE) {

	185 macBreakType = kUCTextBreakWordMask;

	186 } else if(opt_line == TRUE) {

	187 macBreakType = kUCTextBreakLineMask;

	188 } else if(opt_sentence == TRUE) {

	189 // error

	190 // brkit = BreakIterator::createSentenceInstance(opt_locale, status);

	191 } else {

	192 // default is character iterator

	193 macBreakType = kUCTextBreakClusterMask;

	194 }

	195 }

	196 #endif

	197

	198 void createICUBrkIt() {

	199 //

	200 // Set up an ICU break iterator

	201 //

	202 UErrorCode status = U_ZERO_ERROR;

	203 if(opt_char == TRUE) {

	204 brkit = BreakIterator::createCharacterInstance(opt_locale, status);

	205 } else if(opt_word == TRUE) {

	206 brkit = BreakIterator::createWordInstance(opt_locale, status);

	207 } else if(opt_line == TRUE) {

	208 brkit = BreakIterator::createLineInstance(opt_locale, status);

	209 } else if(opt_sentence == TRUE) {

	210 brkit = BreakIterator::createSentenceInstance(opt_locale, status);

	211 } else {

	212 // default is character iterator

	213 brkit = BreakIterator::createCharacterInstance(opt_locale, status);

	214 }

	215 if (status==U_USING_DEFAULT_WARNING && opt_terse==FALSE) {

	216 fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", opt_locale);

	217 }

	218 if (status==U_USING_FALLBACK_WARNING && opt_terse==FALSE) {

	219 fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", opt_locale);

	220 }

	221

	222 }

	223

	224 //---------------------------------------------------------------------------

	225 //

	226 // ProcessOptions() Function to read the command line options.

	227 //

	228 //---------------------------------------------------------------------------

	229 UBool ProcessOptions(int argc, const char **argv, OptSpec opts[])

	230 {

	231 int i;

	232 int argNum;

	233 const char *pArgName;

	234 OptSpec *pOpt;

	235

	236 for (argNum=1; argNum<argc; argNum++) {

	237 pArgName = argv[argNum];

	238 for (pOpt = opts; pOpt->name != 0; pOpt++) {

	239 if (strcmp(pOpt->name, pArgName) == 0) {

	240 switch (pOpt->type) {

	241 case OptSpec::FLAG:

	242 (UBool )(pOpt->pVar) = TRUE;

	243 break;

	244 case OptSpec::STRING:

	245 argNum ++;

	246 if (argNum >= argc) {

	247 fprintf(stderr, "value expected for \"%s\" option.\n", p Opt->name);

	248 return FALSE;

	249 }

	250 (const char *)(pOpt->pVar) = argv[argNum];

	251 break;

	252 case OptSpec::NUM:

	253 argNum ++;

	254 if (argNum >= argc) {

	255 fprintf(stderr, "value expected for \"%s\" option.\n", p Opt->name);

	256 return FALSE;

	257 }

	258 char *endp;

	259 i = strtol(argv[argNum], &endp, 0);

	260 if (endp == argv[argNum]) {

	261 fprintf(stderr, "integer value expected for \"%s\" optio n.\n", pOpt->name);

	262 return FALSE;

	263 }

	264 (int )(pOpt->pVar) = i;

	265 }

	266 break;

	267 }

	268 }

	269 if (pOpt->name == 0)

	270 {

	271 fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);

	272 return FALSE;

	273 }

	274 }

	275 return TRUE;

	276 }

	277

	278

	279 void doForwardTest() {

	280 if (opt_terse == FALSE) {

	281 printf("Doing the forward test\n");

	282 }

	283 int32_t noBreaks = 0;

	284 int32_t i = 0;

	285 unsigned long startTime = timeGetTime();

	286 unsigned long elapsedTime = 0;

	287 if(opt_icu) {

	288 createICUBrkIt();

	289 brkit->setText(UnicodeString(text, textSize));

	290 brkit->first();

	291 if (opt_terse == FALSE) {

	292 printf("Warmup\n");

	293 }

	294 int j;

	295 while((j = brkit->next()) != BreakIterator::DONE) {

	296 noBreaks++;

	297 //fprintf(stderr, "%d ", j);

	298 }

	299

	300 if (opt_terse == FALSE) {

	301 printf("Measure\n");

	302 }

	303 startTime = timeGetTime();

	304 for(i = 0; i < opt_loopCount; i++) {

	305 brkit->first();

	306 while(brkit->next() != BreakIterator::DONE) {

	307 }

	308 }

	309

	310 elapsedTime = timeGetTime()-startTime;

	311 } else if(opt_mac) {

	312 #ifdef U_DARWIN

	313 createMACBrkIt();

	314 UniChar* filePtr = text;

	315 OSStatus status = noErr;

	316 UniCharCount startOffset = 0, breakOffset = 0, numUniChars = textSize;

	317 startOffset = 0;

	318 //printf("\t---Search forward--\n");

	319

	320 while (startOffset < numUniChars)

	321 {

	322 status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEdge Mask, filePtr, numUniChars,

	323 startOffset, &breakOffset);

	324 //require_action(status == noErr, EXIT, printf( "**UCFindTextBreak failed: startOffset %d, status %d\n", (int)startOffset, (int)status));

	325 //require_action((breakOffset <= numUniChars),EXIT, printf("**UCFindTextBr eak breakOffset too big: startOffset %d, breakOffset %d\n", (int)startOffset, (i nt)breakOffset));

	326

	327 // Output break

	328 //printf("\t%d\n", (int)breakOffset);

	329

	330 // Increment counters

	331 noBreaks++;

	332 startOffset = breakOffset;

	333 }

	334 startTime = timeGetTime();

	335 for(i = 0; i < opt_loopCount; i++) {

	336 startOffset = 0;

	337

	338 while (startOffset < numUniChars)

	339 {

	340 status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEd geMask, filePtr, numUniChars,

	341 startOffset, &breakOffset);

	342 // Increment counters

	343 startOffset = breakOffset;

	344 }

	345 }

	346 elapsedTime = timeGetTime()-startTime;

	347 UCDisposeTextBreakLocator(&breakRef);

	348 #endif

	349

	350

	351 }

	352

	353

	354 if (opt_terse == FALSE) {

	355 int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCoun t));

	356 int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize)) ;

	357 int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreak s));

	358 printf("forward break iteration average loop time %d\n", loopTime);

	359 printf("number of code units %d average time per code unit %d\n", textSize , timePerCU);

	360 printf("number of breaks %d average time per break %d\n", noBreaks, timePe rBreak);

	361 } else {

	362 printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize);

	363 }

	364

	365

	366 }

	367

	368 void doIsBoundTest() {

	369 int32_t noBreaks = 0, hit = 0;

	370 int32_t i = 0, j = 0;

	371 unsigned long startTime = timeGetTime();

	372 unsigned long elapsedTime = 0;

	373 createICUBrkIt();

	374 brkit->setText(UnicodeString(text, textSize));

	375 brkit->first();

	376 for(j = 0; j < textSize; j++) {

	377 if(brkit->isBoundary(j)) {

	378 noBreaks++;

	379 //fprintf(stderr, "%d ", j);

	380 }

	381 }

	382 /*

	383 while(brkit->next() != BreakIterator::DONE) {

	384 noBreaks++;

	385 }

	386 */

	387

	388 startTime = timeGetTime();

	389 for(i = 0; i < opt_loopCount; i++) {

	390 for(j = 0; j < textSize; j++) {

	391 if(brkit->isBoundary(j)) {

	392 hit++;

	393 }

	394 }

	395 }

	396

	397 elapsedTime = timeGetTime()-startTime;

	398 int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCoun t));

	399 if (opt_terse == FALSE) {

	400 int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize)) ;

	401 int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreak s));

	402 printf("forward break iteration average loop time %d\n", loopTime);

	403 printf("number of code units %d average time per code unit %d\n", textSize , timePerCU);

	404 printf("number of breaks %d average time per break %d\n", noBreaks, timePe rBreak);

	405 } else {

	406 printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize);

	407 }

	408 }

	409

	410 //------------------------------------------------------------------------------ ----------

	411 //

	412 // UnixConvert -- Convert the lines of the file to the encoding for UNIX

	413 // Since it appears that Unicode support is going in the gene ral

	414 // direction of the use of UTF-8 locales, that is the approac h

	415 // that is used here.

	416 //

	417 //------------------------------------------------------------------------------ ----------

	418 void UnixConvert() {

	419 #if 0

	420 int line;

	421

	422 UConverter *cvrtr; // An ICU code page converter.

	423 UErrorCode status = U_ZERO_ERROR;

	424

	425

	426 cvrtr = ucnv_open("utf-8", &status); // we are just doing UTF-8 locales f or now.

	427 if (U_FAILURE(status)) {

	428 fprintf(stderr, "ICU Converter open failed.: %d\n", &status);

	429 exit(-1);

	430 }

	431 // redo for unix

	432 for (line=0; line < gNumFileLines; line++) {

	433 int sizeNeeded = ucnv_fromUChars(cvrtr,

	434 0, // ptr to target buffer.

	435 0, // length of target buffe r.

	436 gFileLines[line].name,

	437 -1, // source is null termin ated

	438 &status);

	439 if (status != U_BUFFER_OVERFLOW_ERROR && status != U_ZERO_ERROR) {

	440 fprintf(stderr, "Conversion from Unicode, something is wrong.\n");

	441 exit(-1);

	442 }

	443 status = U_ZERO_ERROR;

	444 gFileLines[line].unixName = new char[sizeNeeded+1];

	445 sizeNeeded = ucnv_fromUChars(cvrtr,

	446 gFileLines[line].unixName, // ptr to ta rget buffer.

	447 sizeNeeded+1, // length of target buffe r.

	448 gFileLines[line].name,

	449 -1, // source is null termin ated

	450 &status);

	451 if (U_FAILURE(status)) {

	452 fprintf(stderr, "ICU Conversion Failed.: %d\n", status);

	453 exit(-1);

	454 }

	455 gFileLines[line].unixName[sizeNeeded] = 0;

	456 };

	457 ucnv_close(cvrtr);

	458 #endif

	459 }

	460

	461

	462 //------------------------------------------------------------------------------ ----------

	463 //

	464 // class UCharFile Class to hide all the gorp to read a file in

	465 // and produce a stream of UChars.

	466 //

	467 //------------------------------------------------------------------------------ ----------

	468 class UCharFile {

	469 public:

	470 UCharFile(const char *fileName);

	471 ~UCharFile();

	472 UChar get();

	473 UBool eof() {return fEof;};

	474 UBool error() {return fError;};

	475 int32_t size() { return fFileSize; };

	476

	477 private:

	478 UCharFile (const UCharFile &other) {}; // No copy co nstructor.

	479 UCharFile & operator = (const UCharFile &other) {return *this;}; // No ass ignment op

	480

	481 FILE *fFile;

	482 const char *fName;

	483 UBool fEof;

	484 UBool fError;

	485 UChar fPending2ndSurrogate;

	486 int32_t fFileSize;

	487

	488 enum {UTF16LE, UTF16BE, UTF8} fEncoding;

	489 };

	490

	491 UCharFile::UCharFile(const char * fileName) {

	492 fEof = FALSE;

	493 fError = FALSE;

	494 fName = fileName;

	495 struct stat buf;

	496 int32_t result = stat(fileName, &buf);

	497 if(result != 0) {

	498 fprintf(stderr, "Error getting info\n");

	499 fFileSize = -1;

	500 } else {

	501 fFileSize = buf.st_size;

	502 }

	503 fFile = fopen(fName, "rb");

	504 fPending2ndSurrogate = 0;

	505 if (fFile == NULL) {

	506 fprintf(stderr, "Can not open file \"%s\"\n", opt_fName);

	507 fError = TRUE;

	508 return;

	509 }

	510 //

	511 // Look for the byte order mark at the start of the file.

	512 //

	513 int BOMC1, BOMC2, BOMC3;

	514 BOMC1 = fgetc(fFile);

	515 BOMC2 = fgetc(fFile);

	516

	517 if (BOMC1 == 0xff && BOMC2 == 0xfe) {

	518 fEncoding = UTF16LE; }

	519 else if (BOMC1 == 0xfe && BOMC2 == 0xff) {

	520 fEncoding = UTF16BE; }

	521 else if (BOMC1 == 0xEF && BOMC2 == 0xBB && (BOMC3 = fgetc(fFile)) == 0xBF ) {

	522 fEncoding = UTF8; }

	523 else

	524 {

	525 fprintf(stderr, "collperf: file \"%s\" encoding must be UTF-8 or UTF-16 , and "

	526 "must include a BOM.\n", fileName);

	527 fError = true;

	528 return;

	529 }

	530 }

	531

	532

	533 UCharFile::~UCharFile() {

	534 fclose(fFile);

	535 }

	536

	537

	538

	539 UChar UCharFile::get() {

	540 UChar c;

	541 switch (fEncoding) {

	542 case UTF16LE:

	543 {

	544 int cL, cH;

	545 cL = fgetc(fFile);

	546 cH = fgetc(fFile);

	547 c = cL \| (cH << 8);

	548 if (cH == EOF) {

	549 c = 0;

	550 fEof = TRUE;

	551 }

	552 break;

	553 }

	554 case UTF16BE:

	555 {

	556 int cL, cH;

	557 cH = fgetc(fFile);

	558 cL = fgetc(fFile);

	559 c = cL \| (cH << 8);

	560 if (cL == EOF) {

	561 c = 0;

	562 fEof = TRUE;

	563 }

	564 break;

	565 }

	566 case UTF8:

	567 {

	568 if (fPending2ndSurrogate != 0) {

	569 c = fPending2ndSurrogate;

	570 fPending2ndSurrogate = 0;

	571 break;

	572 }

	573

	574 int ch = fgetc(fFile); // Note: c and ch are separate cause eof t est doesn't work on UChar type.

	575 if (ch == EOF) {

	576 c = 0;

	577 fEof = TRUE;

	578 break;

	579 }

	580

	581 if (ch <= 0x7f) {

	582 // It's ascii. No further utf-8 conversion.

	583 c = ch;

	584 break;

	585 }

	586

	587 // Figure out the lenght of the char and read the rest of the bytes

	588 // into a temp array.

	589 int nBytes;

	590 if (ch >= 0xF0) {nBytes=4;}

	591 else if (ch >= 0xE0) {nBytes=3;}

	592 else if (ch >= 0xC0) {nBytes=2;}

	593 else {

	594 fprintf(stderr, "not likely utf-8 encoded file %s contains corru pt data at offset %d.\n", fName, ftell(fFile));

	595 fError = TRUE;

	596 return 0;

	597 }

	598

	599 unsigned char bytes[10];

	600 bytes[0] = (unsigned char)ch;

	601 int i;

	602 for (i=1; i<nBytes; i++) {

	603 bytes[i] = fgetc(fFile);

	604 if (bytes[i] < 0x80 \|\| bytes[i] >= 0xc0) {

	605 fprintf(stderr, "utf-8 encoded file %s contains corrupt data at offset %d. Expected %d bytes, byte %d is invalid. First byte is %02X\n", fNa me, ftell(fFile), nBytes, i, ch);

	606 fError = TRUE;

	607 return 0;

	608 }

	609 }

	610

	611 // Convert the bytes from the temp array to a Unicode char.

	612 i = 0;

	613 uint32_t cp;

	614 UTF8_NEXT_CHAR_UNSAFE(bytes, i, cp);

	615 c = (UChar)cp;

	616

	617 if (cp >= 0x10000) {

	618 // The code point needs to be broken up into a utf-16 surrogate pair.

	619 // Process first half this time through the main loop, and

	620 // remember the other half for the next time through.

	621 UChar utf16Buf[3];

	622 i = 0;

	623 UTF16_APPEND_CHAR_UNSAFE(utf16Buf, i, cp);

	624 fPending2ndSurrogate = utf16Buf[1];

	625 c = utf16Buf[0];

	626 }

	627 break;

	628 };

	629 }

	630 return c;

	631 }

	632

	633

	634 //------------------------------------------------------------------------------ ----------

	635 //

	636 // Main -- process command line, read in and pre-process the test file,

	637 // call other functions to do the actual tests.

	638 //

	639 //------------------------------------------------------------------------------ ----------

	640 int main(int argc, const char** argv) {

	641 if (ProcessOptions(argc, argv, opts) != TRUE \|\| opt_help \|\| opt_fName == 0) {

	642 printf(gUsageString);

	643 exit (1);

	644 }

	645 // Make sure that we've only got one API selected.

	646 if (opt_mac \|\| opt_unix \|\| opt_win) opt_icu = FALSE;

	647 if (opt_mac \|\| opt_unix) opt_win = FALSE;

	648 if (opt_mac) opt_unix = FALSE;

	649

	650 UErrorCode status = U_ZERO_ERROR;

	651

	652

	653

	654 //

	655 // Set up a Windows LCID

	656 //

	657 /*

	658 if (opt_langid != 0) {

	659 gWinLCID = MAKELCID(opt_langid, SORT_DEFAULT);

	660 }

	661 else {

	662 gWinLCID = uloc_getLCID(opt_locale);

	663 }

	664 */

	665

	666 //

	667 // Set the UNIX locale

	668 //

	669 if (opt_unix) {

	670 if (setlocale(LC_ALL, opt_locale) == 0) {

	671 fprintf(stderr, "setlocale(LC_ALL, %s) failed.\n", opt_locale);

	672 exit(-1);

	673 }

	674 }

	675

	676 // Read in the input file.

	677 // File assumed to be utf-16.

	678 // Lines go onto heap buffers. Global index array to line starts is creat ed.

	679 // Lines themselves are null terminated.

	680 //

	681

	682 UCharFile f(opt_fName);

	683 if (f.error()) {

	684 exit(-1);

	685 }

	686 int32_t fileSize = f.size();

	687 const int STARTSIZE = 70000;

	688 int32_t bufSize = 0;

	689 int32_t charCount = 0;

	690 if(fileSize != -1) {

	691 text = (UChar )malloc(fileSizesizeof(UChar));

	692 bufSize = fileSize;

	693 } else {

	694 text = (UChar )malloc(STARTSIZEsizeof(UChar));

	695 bufSize = STARTSIZE;

	696 }

	697 if(text == NULL) {

	698 fprintf(stderr, "Allocating buffer failed\n");

	699 exit(-1);

	700 }

	701

	702

	703 // Read the file, split into lines, and save in memory.

	704 // Loop runs once per utf-16 value from the input file,

	705 // (The number of bytes read from file per loop iteration depends on exte rnal encoding.)

	706 for (;;) {

	707

	708 UChar c = f.get();

	709 if(f.eof()) {

	710 break;

	711 }

	712 if (f.error()){

	713 exit(-1);

	714 }

	715 // We now have a good UTF-16 value in c.

	716 text[charCount++] = c;

	717 if(charCount == bufSize) {

	718 text = (UChar )realloc(text, 2bufSize*sizeof(UChar));

	719 if(text == NULL) {

	720 fprintf(stderr, "Reallocating buffer failed\n");

	721 exit(-1);

	722 }

	723 bufSize *= 2;

	724 }

	725 }

	726

	727

	728 if (opt_terse == FALSE) {

	729 printf("file \"%s\", %d charCount code units.\n", opt_fName, charCount);

	730 }

	731

	732 textSize = charCount;

	733

	734

	735

	736

	737 //

	738 // Dump file contents if requested.

	739 //

	740 if (opt_dump) {

	741 // dump file, etc... possibly

	742 }

	743

	744

	745 //

	746 // We've got the file read into memory. Go do something with it.

	747 //

	748 int32_t i = 0;

	749 for(i = 0; i < opt_passesCount; i++) {

	750 if(opt_loopCount != 0) {

	751 if(opt_next) {

	752 doForwardTest();

	753 } else if(opt_isBound) {

	754 doIsBoundTest();

	755 } else {

	756 doForwardTest();

	757 }

	758 } else if(opt_time != 0) {

	759

	760 }

	761 }

	762

	763 if(text != NULL) {

	764 free(text);

	765 }

	766 if(brkit != NULL) {

	767 delete brkit;

	768 }

	769

	770 return 0;

	771 }

OLD	NEW

« no previous file with comments | « icu46/source/test/perf/ubrkperf/ubrkperf20.dsp ('k') | icu46/source/test/perf/ubrkperf/ubrkperfold.dsp » ('j') | no next file with comments »