Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(255)

Unified Diff: icu46/source/test/perf/ubrkperf/ubrkperfold.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/
Patch Set: Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « icu46/source/test/perf/ubrkperf/ubrkperf20.dsp ('k') | icu46/source/test/perf/ubrkperf/ubrkperfold.dsp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: icu46/source/test/perf/ubrkperf/ubrkperfold.cpp
===================================================================
--- icu46/source/test/perf/ubrkperf/ubrkperfold.cpp (revision 0)
+++ icu46/source/test/perf/ubrkperf/ubrkperfold.cpp (revision 0)
@@ -0,0 +1,771 @@
+/********************************************************************
+ * COPYRIGHT:
+ * Copyright (C) 2001-2005 IBM, Inc. All Rights Reserved.
+ *
+ ********************************************************************/
+/********************************************************************************
+*
+* File ubrkperf.cpp
+*
+* Modification History:
+* Name Description
+* Vladimir Weinstein First Version, based on collperf
+*
+*********************************************************************************
+*/
+
+//
+// This program tests break iterator performance
+// Currently we test only ICU APIs with the future possibility of testing *nix & win32 APIs
+// (if any)
+// A text file is required as input. It must be in utf-8 or utf-16 format,
+// and include a byte order mark. Either LE or BE format is OK.
+//
+
+const char gUsageString[] =
+ "usage: ubrkperf options...\n"
+ "-help Display this message.\n"
+ "-file file_name utf-16/utf-8 format file.\n"
+ "-locale name ICU locale to use. Default is en_US\n"
+ "-langid 0x1234 Windows Language ID number. Default to value for -locale option\n"
+ " see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n"
+ "-win Run test using Windows native services. (currently not working) (ICU is default)\n"
+ "-unix Run test using Unix word breaking services. (currently not working) \n"
+ "-mac Run test using MacOSX word breaking services.\n"
+ "-uselen Use API with string lengths. Default is null-terminated strings\n"
+ "-char Use character break iterator\n"
+ "-word Use word break iterator\n"
+ "-line Use line break iterator\n"
+ "-sentence Use sentence break iterator\n"
+ "-loop nnnn Loopcount for test. Adjust for reasonable total running time.\n"
+ "-iloop n Inner Loop Count. Default = 1. Number of calls to function\n"
+ " under test at each call point. For measuring test overhead.\n"
+ "-terse Terse numbers-only output. Intended for use by scripts.\n"
+ "-dump Display stuff.\n"
+ "-capi Use C APIs instead of C++ APIs (currently not working)\n"
+ "-next Do the next test\n"
+ "-isBound Do the isBound test\n"
+ ;
+
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+#include <locale.h>
+#include <errno.h>
+#include <sys/stat.h>
+
+#include <unicode/utypes.h>
+#include <unicode/ucol.h>
+#include <unicode/ucoleitr.h>
+#include <unicode/uloc.h>
+#include <unicode/ustring.h>
+#include <unicode/ures.h>
+#include <unicode/uchar.h>
+#include <unicode/ucnv.h>
+#include <unicode/utf8.h>
+
+#include <unicode/brkiter.h>
+
+
+#ifdef U_WINDOWS
+#include <windows.h>
+#else
+//
+// Stubs for Windows API functions when building on UNIXes.
+//
+#include <sys/time.h>
+unsigned long timeGetTime() {
+ struct timeval t;
+ gettimeofday(&t, 0);
+ unsigned long val = t.tv_sec * 1000; // Let it overflow. Who cares.
+ val += t.tv_usec / 1000;
+ return val;
+};
+#define MAKELCID(a,b) 0
+#endif
+
+
+//
+// Command line option variables
+// These global variables are set according to the options specified
+// on the command line by the user.
+char * opt_fName = 0;
+char * opt_locale = "en_US";
+int opt_langid = 0; // Defaults to value corresponding to opt_locale.
+char * opt_rules = 0;
+UBool opt_help = FALSE;
+int opt_time = 0;
+int opt_loopCount = 0;
+int opt_passesCount= 1;
+UBool opt_terse = FALSE;
+UBool opt_icu = TRUE;
+UBool opt_win = FALSE; // Run with Windows native functions.
+UBool opt_unix = FALSE; // Run with UNIX strcoll, strxfrm functions.
+UBool opt_mac = FALSE; // Run with MacOSX word break services.
+UBool opt_uselen = FALSE;
+UBool opt_dump = FALSE;
+UBool opt_char = FALSE;
+UBool opt_word = FALSE;
+UBool opt_line = FALSE;
+UBool opt_sentence = FALSE;
+UBool opt_capi = FALSE;
+
+UBool opt_next = FALSE;
+UBool opt_isBound = FALSE;
+
+
+
+//
+// Definitions for the command line options
+//
+struct OptSpec {
+ const char *name;
+ enum {FLAG, NUM, STRING} type;
+ void *pVar;
+};
+
+OptSpec opts[] = {
+ {"-file", OptSpec::STRING, &opt_fName},
+ {"-locale", OptSpec::STRING, &opt_locale},
+ {"-langid", OptSpec::NUM, &opt_langid},
+ {"-win", OptSpec::FLAG, &opt_win},
+ {"-unix", OptSpec::FLAG, &opt_unix},
+ {"-mac", OptSpec::FLAG, &opt_mac},
+ {"-uselen", OptSpec::FLAG, &opt_uselen},
+ {"-loop", OptSpec::NUM, &opt_loopCount},
+ {"-time", OptSpec::NUM, &opt_time},
+ {"-passes", OptSpec::NUM, &opt_passesCount},
+ {"-char", OptSpec::FLAG, &opt_char},
+ {"-word", OptSpec::FLAG, &opt_word},
+ {"-line", OptSpec::FLAG, &opt_line},
+ {"-sentence", OptSpec::FLAG, &opt_sentence},
+ {"-terse", OptSpec::FLAG, &opt_terse},
+ {"-dump", OptSpec::FLAG, &opt_dump},
+ {"-capi", OptSpec::FLAG, &opt_capi},
+ {"-next", OptSpec::FLAG, &opt_next},
+ {"-isBound", OptSpec::FLAG, &opt_isBound},
+ {"-help", OptSpec::FLAG, &opt_help},
+ {"-?", OptSpec::FLAG, &opt_help},
+ {0, OptSpec::FLAG, 0}
+};
+
+
+//---------------------------------------------------------------------------
+//
+// Global variables pointing to and describing the test file
+//
+//---------------------------------------------------------------------------
+
+//DWORD gWinLCID;
+BreakIterator *brkit = NULL;
+UChar *text = NULL;
+int32_t textSize = 0;
+
+
+
+#ifdef U_DARWIN
+#include <ApplicationServices/ApplicationServices.h>
+enum{
+ kUCTextBreakAllMask = (kUCTextBreakClusterMask | kUCTextBreakWordMask | kUCTextBreakLineMask)
+ };
+UCTextBreakType breakTypes[4] = {kUCTextBreakCharMask, kUCTextBreakClusterMask, kUCTextBreakWordMask, kUCTextBreakLineMask};
+TextBreakLocatorRef breakRef;
+UCTextBreakType macBreakType;
+
+void createMACBrkIt() {
+ OSStatus status = noErr;
+ LocaleRef lref;
+ status = LocaleRefFromLocaleString(opt_locale, &lref);
+ status = UCCreateTextBreakLocator(lref, 0, kUCTextBreakAllMask, (TextBreakLocatorRef*)&breakRef);
+ if(opt_char == TRUE) {
+ macBreakType = kUCTextBreakClusterMask;
+ } else if(opt_word == TRUE) {
+ macBreakType = kUCTextBreakWordMask;
+ } else if(opt_line == TRUE) {
+ macBreakType = kUCTextBreakLineMask;
+ } else if(opt_sentence == TRUE) {
+ // error
+ // brkit = BreakIterator::createSentenceInstance(opt_locale, status);
+ } else {
+ // default is character iterator
+ macBreakType = kUCTextBreakClusterMask;
+ }
+}
+#endif
+
+void createICUBrkIt() {
+ //
+ // Set up an ICU break iterator
+ //
+ UErrorCode status = U_ZERO_ERROR;
+ if(opt_char == TRUE) {
+ brkit = BreakIterator::createCharacterInstance(opt_locale, status);
+ } else if(opt_word == TRUE) {
+ brkit = BreakIterator::createWordInstance(opt_locale, status);
+ } else if(opt_line == TRUE) {
+ brkit = BreakIterator::createLineInstance(opt_locale, status);
+ } else if(opt_sentence == TRUE) {
+ brkit = BreakIterator::createSentenceInstance(opt_locale, status);
+ } else {
+ // default is character iterator
+ brkit = BreakIterator::createCharacterInstance(opt_locale, status);
+ }
+ if (status==U_USING_DEFAULT_WARNING && opt_terse==FALSE) {
+ fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", opt_locale);
+ }
+ if (status==U_USING_FALLBACK_WARNING && opt_terse==FALSE) {
+ fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", opt_locale);
+ }
+
+}
+
+//---------------------------------------------------------------------------
+//
+// ProcessOptions() Function to read the command line options.
+//
+//---------------------------------------------------------------------------
+UBool ProcessOptions(int argc, const char **argv, OptSpec opts[])
+{
+ int i;
+ int argNum;
+ const char *pArgName;
+ OptSpec *pOpt;
+
+ for (argNum=1; argNum<argc; argNum++) {
+ pArgName = argv[argNum];
+ for (pOpt = opts; pOpt->name != 0; pOpt++) {
+ if (strcmp(pOpt->name, pArgName) == 0) {
+ switch (pOpt->type) {
+ case OptSpec::FLAG:
+ *(UBool *)(pOpt->pVar) = TRUE;
+ break;
+ case OptSpec::STRING:
+ argNum ++;
+ if (argNum >= argc) {
+ fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name);
+ return FALSE;
+ }
+ *(const char **)(pOpt->pVar) = argv[argNum];
+ break;
+ case OptSpec::NUM:
+ argNum ++;
+ if (argNum >= argc) {
+ fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name);
+ return FALSE;
+ }
+ char *endp;
+ i = strtol(argv[argNum], &endp, 0);
+ if (endp == argv[argNum]) {
+ fprintf(stderr, "integer value expected for \"%s\" option.\n", pOpt->name);
+ return FALSE;
+ }
+ *(int *)(pOpt->pVar) = i;
+ }
+ break;
+ }
+ }
+ if (pOpt->name == 0)
+ {
+ fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);
+ return FALSE;
+ }
+ }
+return TRUE;
+}
+
+
+void doForwardTest() {
+ if (opt_terse == FALSE) {
+ printf("Doing the forward test\n");
+ }
+ int32_t noBreaks = 0;
+ int32_t i = 0;
+ unsigned long startTime = timeGetTime();
+ unsigned long elapsedTime = 0;
+ if(opt_icu) {
+ createICUBrkIt();
+ brkit->setText(UnicodeString(text, textSize));
+ brkit->first();
+ if (opt_terse == FALSE) {
+ printf("Warmup\n");
+ }
+ int j;
+ while((j = brkit->next()) != BreakIterator::DONE) {
+ noBreaks++;
+ //fprintf(stderr, "%d ", j);
+ }
+
+ if (opt_terse == FALSE) {
+ printf("Measure\n");
+ }
+ startTime = timeGetTime();
+ for(i = 0; i < opt_loopCount; i++) {
+ brkit->first();
+ while(brkit->next() != BreakIterator::DONE) {
+ }
+ }
+
+ elapsedTime = timeGetTime()-startTime;
+ } else if(opt_mac) {
+#ifdef U_DARWIN
+ createMACBrkIt();
+ UniChar* filePtr = text;
+ OSStatus status = noErr;
+ UniCharCount startOffset = 0, breakOffset = 0, numUniChars = textSize;
+ startOffset = 0;
+ //printf("\t---Search forward--\n");
+
+ while (startOffset < numUniChars)
+ {
+ status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEdgeMask, filePtr, numUniChars,
+ startOffset, &breakOffset);
+ //require_action(status == noErr, EXIT, printf( "**UCFindTextBreak failed: startOffset %d, status %d\n", (int)startOffset, (int)status));
+ //require_action((breakOffset <= numUniChars),EXIT, printf("**UCFindTextBreak breakOffset too big: startOffset %d, breakOffset %d\n", (int)startOffset, (int)breakOffset));
+
+ // Output break
+ //printf("\t%d\n", (int)breakOffset);
+
+ // Increment counters
+ noBreaks++;
+ startOffset = breakOffset;
+ }
+ startTime = timeGetTime();
+ for(i = 0; i < opt_loopCount; i++) {
+ startOffset = 0;
+
+ while (startOffset < numUniChars)
+ {
+ status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEdgeMask, filePtr, numUniChars,
+ startOffset, &breakOffset);
+ // Increment counters
+ startOffset = breakOffset;
+ }
+ }
+ elapsedTime = timeGetTime()-startTime;
+ UCDisposeTextBreakLocator(&breakRef);
+#endif
+
+
+ }
+
+
+ if (opt_terse == FALSE) {
+ int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCount));
+ int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize));
+ int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreaks));
+ printf("forward break iteration average loop time %d\n", loopTime);
+ printf("number of code units %d average time per code unit %d\n", textSize, timePerCU);
+ printf("number of breaks %d average time per break %d\n", noBreaks, timePerBreak);
+ } else {
+ printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize);
+ }
+
+
+}
+
+void doIsBoundTest() {
+ int32_t noBreaks = 0, hit = 0;
+ int32_t i = 0, j = 0;
+ unsigned long startTime = timeGetTime();
+ unsigned long elapsedTime = 0;
+ createICUBrkIt();
+ brkit->setText(UnicodeString(text, textSize));
+ brkit->first();
+ for(j = 0; j < textSize; j++) {
+ if(brkit->isBoundary(j)) {
+ noBreaks++;
+ //fprintf(stderr, "%d ", j);
+ }
+ }
+ /*
+ while(brkit->next() != BreakIterator::DONE) {
+ noBreaks++;
+ }
+ */
+
+ startTime = timeGetTime();
+ for(i = 0; i < opt_loopCount; i++) {
+ for(j = 0; j < textSize; j++) {
+ if(brkit->isBoundary(j)) {
+ hit++;
+ }
+ }
+ }
+
+ elapsedTime = timeGetTime()-startTime;
+ int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCount));
+ if (opt_terse == FALSE) {
+ int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize));
+ int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreaks));
+ printf("forward break iteration average loop time %d\n", loopTime);
+ printf("number of code units %d average time per code unit %d\n", textSize, timePerCU);
+ printf("number of breaks %d average time per break %d\n", noBreaks, timePerBreak);
+ } else {
+ printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize);
+ }
+}
+
+//----------------------------------------------------------------------------------------
+//
+// UnixConvert -- Convert the lines of the file to the encoding for UNIX
+// Since it appears that Unicode support is going in the general
+// direction of the use of UTF-8 locales, that is the approach
+// that is used here.
+//
+//----------------------------------------------------------------------------------------
+void UnixConvert() {
+#if 0
+ int line;
+
+ UConverter *cvrtr; // An ICU code page converter.
+ UErrorCode status = U_ZERO_ERROR;
+
+
+ cvrtr = ucnv_open("utf-8", &status); // we are just doing UTF-8 locales for now.
+ if (U_FAILURE(status)) {
+ fprintf(stderr, "ICU Converter open failed.: %d\n", &status);
+ exit(-1);
+ }
+ // redo for unix
+ for (line=0; line < gNumFileLines; line++) {
+ int sizeNeeded = ucnv_fromUChars(cvrtr,
+ 0, // ptr to target buffer.
+ 0, // length of target buffer.
+ gFileLines[line].name,
+ -1, // source is null terminated
+ &status);
+ if (status != U_BUFFER_OVERFLOW_ERROR && status != U_ZERO_ERROR) {
+ fprintf(stderr, "Conversion from Unicode, something is wrong.\n");
+ exit(-1);
+ }
+ status = U_ZERO_ERROR;
+ gFileLines[line].unixName = new char[sizeNeeded+1];
+ sizeNeeded = ucnv_fromUChars(cvrtr,
+ gFileLines[line].unixName, // ptr to target buffer.
+ sizeNeeded+1, // length of target buffer.
+ gFileLines[line].name,
+ -1, // source is null terminated
+ &status);
+ if (U_FAILURE(status)) {
+ fprintf(stderr, "ICU Conversion Failed.: %d\n", status);
+ exit(-1);
+ }
+ gFileLines[line].unixName[sizeNeeded] = 0;
+ };
+ ucnv_close(cvrtr);
+#endif
+}
+
+
+//----------------------------------------------------------------------------------------
+//
+// class UCharFile Class to hide all the gorp to read a file in
+// and produce a stream of UChars.
+//
+//----------------------------------------------------------------------------------------
+class UCharFile {
+public:
+ UCharFile(const char *fileName);
+ ~UCharFile();
+ UChar get();
+ UBool eof() {return fEof;};
+ UBool error() {return fError;};
+ int32_t size() { return fFileSize; };
+
+private:
+ UCharFile (const UCharFile &other) {}; // No copy constructor.
+ UCharFile & operator = (const UCharFile &other) {return *this;}; // No assignment op
+
+ FILE *fFile;
+ const char *fName;
+ UBool fEof;
+ UBool fError;
+ UChar fPending2ndSurrogate;
+ int32_t fFileSize;
+
+ enum {UTF16LE, UTF16BE, UTF8} fEncoding;
+};
+
+UCharFile::UCharFile(const char * fileName) {
+ fEof = FALSE;
+ fError = FALSE;
+ fName = fileName;
+ struct stat buf;
+ int32_t result = stat(fileName, &buf);
+ if(result != 0) {
+ fprintf(stderr, "Error getting info\n");
+ fFileSize = -1;
+ } else {
+ fFileSize = buf.st_size;
+ }
+ fFile = fopen(fName, "rb");
+ fPending2ndSurrogate = 0;
+ if (fFile == NULL) {
+ fprintf(stderr, "Can not open file \"%s\"\n", opt_fName);
+ fError = TRUE;
+ return;
+ }
+ //
+ // Look for the byte order mark at the start of the file.
+ //
+ int BOMC1, BOMC2, BOMC3;
+ BOMC1 = fgetc(fFile);
+ BOMC2 = fgetc(fFile);
+
+ if (BOMC1 == 0xff && BOMC2 == 0xfe) {
+ fEncoding = UTF16LE; }
+ else if (BOMC1 == 0xfe && BOMC2 == 0xff) {
+ fEncoding = UTF16BE; }
+ else if (BOMC1 == 0xEF && BOMC2 == 0xBB && (BOMC3 = fgetc(fFile)) == 0xBF ) {
+ fEncoding = UTF8; }
+ else
+ {
+ fprintf(stderr, "collperf: file \"%s\" encoding must be UTF-8 or UTF-16, and "
+ "must include a BOM.\n", fileName);
+ fError = true;
+ return;
+ }
+}
+
+
+UCharFile::~UCharFile() {
+ fclose(fFile);
+}
+
+
+
+UChar UCharFile::get() {
+ UChar c;
+ switch (fEncoding) {
+ case UTF16LE:
+ {
+ int cL, cH;
+ cL = fgetc(fFile);
+ cH = fgetc(fFile);
+ c = cL | (cH << 8);
+ if (cH == EOF) {
+ c = 0;
+ fEof = TRUE;
+ }
+ break;
+ }
+ case UTF16BE:
+ {
+ int cL, cH;
+ cH = fgetc(fFile);
+ cL = fgetc(fFile);
+ c = cL | (cH << 8);
+ if (cL == EOF) {
+ c = 0;
+ fEof = TRUE;
+ }
+ break;
+ }
+ case UTF8:
+ {
+ if (fPending2ndSurrogate != 0) {
+ c = fPending2ndSurrogate;
+ fPending2ndSurrogate = 0;
+ break;
+ }
+
+ int ch = fgetc(fFile); // Note: c and ch are separate cause eof test doesn't work on UChar type.
+ if (ch == EOF) {
+ c = 0;
+ fEof = TRUE;
+ break;
+ }
+
+ if (ch <= 0x7f) {
+ // It's ascii. No further utf-8 conversion.
+ c = ch;
+ break;
+ }
+
+ // Figure out the lenght of the char and read the rest of the bytes
+ // into a temp array.
+ int nBytes;
+ if (ch >= 0xF0) {nBytes=4;}
+ else if (ch >= 0xE0) {nBytes=3;}
+ else if (ch >= 0xC0) {nBytes=2;}
+ else {
+ fprintf(stderr, "not likely utf-8 encoded file %s contains corrupt data at offset %d.\n", fName, ftell(fFile));
+ fError = TRUE;
+ return 0;
+ }
+
+ unsigned char bytes[10];
+ bytes[0] = (unsigned char)ch;
+ int i;
+ for (i=1; i<nBytes; i++) {
+ bytes[i] = fgetc(fFile);
+ if (bytes[i] < 0x80 || bytes[i] >= 0xc0) {
+ fprintf(stderr, "utf-8 encoded file %s contains corrupt data at offset %d. Expected %d bytes, byte %d is invalid. First byte is %02X\n", fName, ftell(fFile), nBytes, i, ch);
+ fError = TRUE;
+ return 0;
+ }
+ }
+
+ // Convert the bytes from the temp array to a Unicode char.
+ i = 0;
+ uint32_t cp;
+ UTF8_NEXT_CHAR_UNSAFE(bytes, i, cp);
+ c = (UChar)cp;
+
+ if (cp >= 0x10000) {
+ // The code point needs to be broken up into a utf-16 surrogate pair.
+ // Process first half this time through the main loop, and
+ // remember the other half for the next time through.
+ UChar utf16Buf[3];
+ i = 0;
+ UTF16_APPEND_CHAR_UNSAFE(utf16Buf, i, cp);
+ fPending2ndSurrogate = utf16Buf[1];
+ c = utf16Buf[0];
+ }
+ break;
+ };
+ }
+ return c;
+}
+
+
+//----------------------------------------------------------------------------------------
+//
+// Main -- process command line, read in and pre-process the test file,
+// call other functions to do the actual tests.
+//
+//----------------------------------------------------------------------------------------
+int main(int argc, const char** argv) {
+ if (ProcessOptions(argc, argv, opts) != TRUE || opt_help || opt_fName == 0) {
+ printf(gUsageString);
+ exit (1);
+ }
+ // Make sure that we've only got one API selected.
+ if (opt_mac || opt_unix || opt_win) opt_icu = FALSE;
+ if (opt_mac || opt_unix) opt_win = FALSE;
+ if (opt_mac) opt_unix = FALSE;
+
+ UErrorCode status = U_ZERO_ERROR;
+
+
+
+ //
+ // Set up a Windows LCID
+ //
+ /*
+ if (opt_langid != 0) {
+ gWinLCID = MAKELCID(opt_langid, SORT_DEFAULT);
+ }
+ else {
+ gWinLCID = uloc_getLCID(opt_locale);
+ }
+ */
+
+ //
+ // Set the UNIX locale
+ //
+ if (opt_unix) {
+ if (setlocale(LC_ALL, opt_locale) == 0) {
+ fprintf(stderr, "setlocale(LC_ALL, %s) failed.\n", opt_locale);
+ exit(-1);
+ }
+ }
+
+ // Read in the input file.
+ // File assumed to be utf-16.
+ // Lines go onto heap buffers. Global index array to line starts is created.
+ // Lines themselves are null terminated.
+ //
+
+ UCharFile f(opt_fName);
+ if (f.error()) {
+ exit(-1);
+ }
+ int32_t fileSize = f.size();
+ const int STARTSIZE = 70000;
+ int32_t bufSize = 0;
+ int32_t charCount = 0;
+ if(fileSize != -1) {
+ text = (UChar *)malloc(fileSize*sizeof(UChar));
+ bufSize = fileSize;
+ } else {
+ text = (UChar *)malloc(STARTSIZE*sizeof(UChar));
+ bufSize = STARTSIZE;
+ }
+ if(text == NULL) {
+ fprintf(stderr, "Allocating buffer failed\n");
+ exit(-1);
+ }
+
+
+ // Read the file, split into lines, and save in memory.
+ // Loop runs once per utf-16 value from the input file,
+ // (The number of bytes read from file per loop iteration depends on external encoding.)
+ for (;;) {
+
+ UChar c = f.get();
+ if(f.eof()) {
+ break;
+ }
+ if (f.error()){
+ exit(-1);
+ }
+ // We now have a good UTF-16 value in c.
+ text[charCount++] = c;
+ if(charCount == bufSize) {
+ text = (UChar *)realloc(text, 2*bufSize*sizeof(UChar));
+ if(text == NULL) {
+ fprintf(stderr, "Reallocating buffer failed\n");
+ exit(-1);
+ }
+ bufSize *= 2;
+ }
+ }
+
+
+ if (opt_terse == FALSE) {
+ printf("file \"%s\", %d charCount code units.\n", opt_fName, charCount);
+ }
+
+ textSize = charCount;
+
+
+
+
+ //
+ // Dump file contents if requested.
+ //
+ if (opt_dump) {
+ // dump file, etc... possibly
+ }
+
+
+ //
+ // We've got the file read into memory. Go do something with it.
+ //
+ int32_t i = 0;
+ for(i = 0; i < opt_passesCount; i++) {
+ if(opt_loopCount != 0) {
+ if(opt_next) {
+ doForwardTest();
+ } else if(opt_isBound) {
+ doIsBoundTest();
+ } else {
+ doForwardTest();
+ }
+ } else if(opt_time != 0) {
+
+ }
+ }
+
+ if(text != NULL) {
+ free(text);
+ }
+ if(brkit != NULL) {
+ delete brkit;
+ }
+
+ return 0;
+}
Property changes on: icu46/source/test/perf/ubrkperf/ubrkperfold.cpp
___________________________________________________________________
Added: svn:eol-style
+ LF
« no previous file with comments | « icu46/source/test/perf/ubrkperf/ubrkperf20.dsp ('k') | icu46/source/test/perf/ubrkperf/ubrkperfold.dsp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698