Index: source/test/perf/ubrkperf/ubrkperfold.cpp |
diff --git a/source/test/perf/ubrkperf/ubrkperfold.cpp b/source/test/perf/ubrkperf/ubrkperfold.cpp |
deleted file mode 100644 |
index bfc2e5579aac5045d4247b382e9192ca668e3707..0000000000000000000000000000000000000000 |
--- a/source/test/perf/ubrkperf/ubrkperfold.cpp |
+++ /dev/null |
@@ -1,771 +0,0 @@ |
-/******************************************************************** |
- * COPYRIGHT: |
- * Copyright (C) 2001-2012 IBM, Inc. All Rights Reserved. |
- * |
- ********************************************************************/ |
-/******************************************************************************** |
-* |
-* File ubrkperf.cpp |
-* |
-* Modification History: |
-* Name Description |
-* Vladimir Weinstein First Version, based on collperf |
-* |
-********************************************************************************* |
-*/ |
- |
-// |
-// This program tests break iterator performance |
-// Currently we test only ICU APIs with the future possibility of testing *nix & win32 APIs |
-// (if any) |
-// A text file is required as input. It must be in utf-8 or utf-16 format, |
-// and include a byte order mark. Either LE or BE format is OK. |
-// |
- |
-const char gUsageString[] = |
- "usage: ubrkperf options...\n" |
- "-help Display this message.\n" |
- "-file file_name utf-16/utf-8 format file.\n" |
- "-locale name ICU locale to use. Default is en_US\n" |
- "-langid 0x1234 Windows Language ID number. Default to value for -locale option\n" |
- " see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n" |
- "-win Run test using Windows native services. (currently not working) (ICU is default)\n" |
- "-unix Run test using Unix word breaking services. (currently not working) \n" |
- "-mac Run test using MacOSX word breaking services.\n" |
- "-uselen Use API with string lengths. Default is null-terminated strings\n" |
- "-char Use character break iterator\n" |
- "-word Use word break iterator\n" |
- "-line Use line break iterator\n" |
- "-sentence Use sentence break iterator\n" |
- "-loop nnnn Loopcount for test. Adjust for reasonable total running time.\n" |
- "-iloop n Inner Loop Count. Default = 1. Number of calls to function\n" |
- " under test at each call point. For measuring test overhead.\n" |
- "-terse Terse numbers-only output. Intended for use by scripts.\n" |
- "-dump Display stuff.\n" |
- "-capi Use C APIs instead of C++ APIs (currently not working)\n" |
- "-next Do the next test\n" |
- "-isBound Do the isBound test\n" |
- ; |
- |
- |
-#include <stdio.h> |
-#include <string.h> |
-#include <stdlib.h> |
-#include <math.h> |
-#include <locale.h> |
-#include <errno.h> |
-#include <sys/stat.h> |
- |
-#include <unicode/utypes.h> |
-#include <unicode/ucol.h> |
-#include <unicode/ucoleitr.h> |
-#include <unicode/uloc.h> |
-#include <unicode/ustring.h> |
-#include <unicode/ures.h> |
-#include <unicode/uchar.h> |
-#include <unicode/ucnv.h> |
-#include <unicode/utf8.h> |
- |
-#include <unicode/brkiter.h> |
- |
- |
-#if U_PLATFORM_HAS_WIN32_API |
-#include <windows.h> |
-#else |
-// |
-// Stubs for Windows API functions when building on UNIXes. |
-// |
-#include <sys/time.h> |
-unsigned long timeGetTime() { |
- struct timeval t; |
- gettimeofday(&t, 0); |
- unsigned long val = t.tv_sec * 1000; // Let it overflow. Who cares. |
- val += t.tv_usec / 1000; |
- return val; |
-}; |
-#define MAKELCID(a,b) 0 |
-#endif |
- |
- |
-// |
-// Command line option variables |
-// These global variables are set according to the options specified |
-// on the command line by the user. |
-char * opt_fName = 0; |
-char * opt_locale = "en_US"; |
-int opt_langid = 0; // Defaults to value corresponding to opt_locale. |
-char * opt_rules = 0; |
-UBool opt_help = FALSE; |
-int opt_time = 0; |
-int opt_loopCount = 0; |
-int opt_passesCount= 1; |
-UBool opt_terse = FALSE; |
-UBool opt_icu = TRUE; |
-UBool opt_win = FALSE; // Run with Windows native functions. |
-UBool opt_unix = FALSE; // Run with UNIX strcoll, strxfrm functions. |
-UBool opt_mac = FALSE; // Run with MacOSX word break services. |
-UBool opt_uselen = FALSE; |
-UBool opt_dump = FALSE; |
-UBool opt_char = FALSE; |
-UBool opt_word = FALSE; |
-UBool opt_line = FALSE; |
-UBool opt_sentence = FALSE; |
-UBool opt_capi = FALSE; |
- |
-UBool opt_next = FALSE; |
-UBool opt_isBound = FALSE; |
- |
- |
- |
-// |
-// Definitions for the command line options |
-// |
-struct OptSpec { |
- const char *name; |
- enum {FLAG, NUM, STRING} type; |
- void *pVar; |
-}; |
- |
-OptSpec opts[] = { |
- {"-file", OptSpec::STRING, &opt_fName}, |
- {"-locale", OptSpec::STRING, &opt_locale}, |
- {"-langid", OptSpec::NUM, &opt_langid}, |
- {"-win", OptSpec::FLAG, &opt_win}, |
- {"-unix", OptSpec::FLAG, &opt_unix}, |
- {"-mac", OptSpec::FLAG, &opt_mac}, |
- {"-uselen", OptSpec::FLAG, &opt_uselen}, |
- {"-loop", OptSpec::NUM, &opt_loopCount}, |
- {"-time", OptSpec::NUM, &opt_time}, |
- {"-passes", OptSpec::NUM, &opt_passesCount}, |
- {"-char", OptSpec::FLAG, &opt_char}, |
- {"-word", OptSpec::FLAG, &opt_word}, |
- {"-line", OptSpec::FLAG, &opt_line}, |
- {"-sentence", OptSpec::FLAG, &opt_sentence}, |
- {"-terse", OptSpec::FLAG, &opt_terse}, |
- {"-dump", OptSpec::FLAG, &opt_dump}, |
- {"-capi", OptSpec::FLAG, &opt_capi}, |
- {"-next", OptSpec::FLAG, &opt_next}, |
- {"-isBound", OptSpec::FLAG, &opt_isBound}, |
- {"-help", OptSpec::FLAG, &opt_help}, |
- {"-?", OptSpec::FLAG, &opt_help}, |
- {0, OptSpec::FLAG, 0} |
-}; |
- |
- |
-//--------------------------------------------------------------------------- |
-// |
-// Global variables pointing to and describing the test file |
-// |
-//--------------------------------------------------------------------------- |
- |
-//DWORD gWinLCID; |
-BreakIterator *brkit = NULL; |
-UChar *text = NULL; |
-int32_t textSize = 0; |
- |
- |
- |
-#if U_PLATFORM_IS_DARWIN_BASED |
-#include <ApplicationServices/ApplicationServices.h> |
-enum{ |
- kUCTextBreakAllMask = (kUCTextBreakClusterMask | kUCTextBreakWordMask | kUCTextBreakLineMask) |
- }; |
-UCTextBreakType breakTypes[4] = {kUCTextBreakCharMask, kUCTextBreakClusterMask, kUCTextBreakWordMask, kUCTextBreakLineMask}; |
-TextBreakLocatorRef breakRef; |
-UCTextBreakType macBreakType; |
- |
-void createMACBrkIt() { |
- OSStatus status = noErr; |
- LocaleRef lref; |
- status = LocaleRefFromLocaleString(opt_locale, &lref); |
- status = UCCreateTextBreakLocator(lref, 0, kUCTextBreakAllMask, (TextBreakLocatorRef*)&breakRef); |
- if(opt_char == TRUE) { |
- macBreakType = kUCTextBreakClusterMask; |
- } else if(opt_word == TRUE) { |
- macBreakType = kUCTextBreakWordMask; |
- } else if(opt_line == TRUE) { |
- macBreakType = kUCTextBreakLineMask; |
- } else if(opt_sentence == TRUE) { |
- // error |
- // brkit = BreakIterator::createSentenceInstance(opt_locale, status); |
- } else { |
- // default is character iterator |
- macBreakType = kUCTextBreakClusterMask; |
- } |
-} |
-#endif |
- |
-void createICUBrkIt() { |
- // |
- // Set up an ICU break iterator |
- // |
- UErrorCode status = U_ZERO_ERROR; |
- if(opt_char == TRUE) { |
- brkit = BreakIterator::createCharacterInstance(opt_locale, status); |
- } else if(opt_word == TRUE) { |
- brkit = BreakIterator::createWordInstance(opt_locale, status); |
- } else if(opt_line == TRUE) { |
- brkit = BreakIterator::createLineInstance(opt_locale, status); |
- } else if(opt_sentence == TRUE) { |
- brkit = BreakIterator::createSentenceInstance(opt_locale, status); |
- } else { |
- // default is character iterator |
- brkit = BreakIterator::createCharacterInstance(opt_locale, status); |
- } |
- if (status==U_USING_DEFAULT_WARNING && opt_terse==FALSE) { |
- fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", opt_locale); |
- } |
- if (status==U_USING_FALLBACK_WARNING && opt_terse==FALSE) { |
- fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", opt_locale); |
- } |
- |
-} |
- |
-//--------------------------------------------------------------------------- |
-// |
-// ProcessOptions() Function to read the command line options. |
-// |
-//--------------------------------------------------------------------------- |
-UBool ProcessOptions(int argc, const char **argv, OptSpec opts[]) |
-{ |
- int i; |
- int argNum; |
- const char *pArgName; |
- OptSpec *pOpt; |
- |
- for (argNum=1; argNum<argc; argNum++) { |
- pArgName = argv[argNum]; |
- for (pOpt = opts; pOpt->name != 0; pOpt++) { |
- if (strcmp(pOpt->name, pArgName) == 0) { |
- switch (pOpt->type) { |
- case OptSpec::FLAG: |
- *(UBool *)(pOpt->pVar) = TRUE; |
- break; |
- case OptSpec::STRING: |
- argNum ++; |
- if (argNum >= argc) { |
- fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name); |
- return FALSE; |
- } |
- *(const char **)(pOpt->pVar) = argv[argNum]; |
- break; |
- case OptSpec::NUM: |
- argNum ++; |
- if (argNum >= argc) { |
- fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name); |
- return FALSE; |
- } |
- char *endp; |
- i = strtol(argv[argNum], &endp, 0); |
- if (endp == argv[argNum]) { |
- fprintf(stderr, "integer value expected for \"%s\" option.\n", pOpt->name); |
- return FALSE; |
- } |
- *(int *)(pOpt->pVar) = i; |
- } |
- break; |
- } |
- } |
- if (pOpt->name == 0) |
- { |
- fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName); |
- return FALSE; |
- } |
- } |
-return TRUE; |
-} |
- |
- |
-void doForwardTest() { |
- if (opt_terse == FALSE) { |
- printf("Doing the forward test\n"); |
- } |
- int32_t noBreaks = 0; |
- int32_t i = 0; |
- unsigned long startTime = timeGetTime(); |
- unsigned long elapsedTime = 0; |
- if(opt_icu) { |
- createICUBrkIt(); |
- brkit->setText(UnicodeString(text, textSize)); |
- brkit->first(); |
- if (opt_terse == FALSE) { |
- printf("Warmup\n"); |
- } |
- int j; |
- while((j = brkit->next()) != BreakIterator::DONE) { |
- noBreaks++; |
- //fprintf(stderr, "%d ", j); |
- } |
- |
- if (opt_terse == FALSE) { |
- printf("Measure\n"); |
- } |
- startTime = timeGetTime(); |
- for(i = 0; i < opt_loopCount; i++) { |
- brkit->first(); |
- while(brkit->next() != BreakIterator::DONE) { |
- } |
- } |
- |
- elapsedTime = timeGetTime()-startTime; |
- } else if(opt_mac) { |
-#if U_PLATFORM_IS_DARWIN_BASED |
- createMACBrkIt(); |
- UniChar* filePtr = text; |
- OSStatus status = noErr; |
- UniCharCount startOffset = 0, breakOffset = 0, numUniChars = textSize; |
- startOffset = 0; |
- //printf("\t---Search forward--\n"); |
- |
- while (startOffset < numUniChars) |
- { |
- status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEdgeMask, filePtr, numUniChars, |
- startOffset, &breakOffset); |
- //require_action(status == noErr, EXIT, printf( "**UCFindTextBreak failed: startOffset %d, status %d\n", (int)startOffset, (int)status)); |
- //require_action((breakOffset <= numUniChars),EXIT, printf("**UCFindTextBreak breakOffset too big: startOffset %d, breakOffset %d\n", (int)startOffset, (int)breakOffset)); |
- |
- // Output break |
- //printf("\t%d\n", (int)breakOffset); |
- |
- // Increment counters |
- noBreaks++; |
- startOffset = breakOffset; |
- } |
- startTime = timeGetTime(); |
- for(i = 0; i < opt_loopCount; i++) { |
- startOffset = 0; |
- |
- while (startOffset < numUniChars) |
- { |
- status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEdgeMask, filePtr, numUniChars, |
- startOffset, &breakOffset); |
- // Increment counters |
- startOffset = breakOffset; |
- } |
- } |
- elapsedTime = timeGetTime()-startTime; |
- UCDisposeTextBreakLocator(&breakRef); |
-#endif |
- |
- |
- } |
- |
- |
- if (opt_terse == FALSE) { |
- int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCount)); |
- int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize)); |
- int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreaks)); |
- printf("forward break iteration average loop time %d\n", loopTime); |
- printf("number of code units %d average time per code unit %d\n", textSize, timePerCU); |
- printf("number of breaks %d average time per break %d\n", noBreaks, timePerBreak); |
- } else { |
- printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize); |
- } |
- |
- |
-} |
- |
-void doIsBoundTest() { |
- int32_t noBreaks = 0, hit = 0; |
- int32_t i = 0, j = 0; |
- unsigned long startTime = timeGetTime(); |
- unsigned long elapsedTime = 0; |
- createICUBrkIt(); |
- brkit->setText(UnicodeString(text, textSize)); |
- brkit->first(); |
- for(j = 0; j < textSize; j++) { |
- if(brkit->isBoundary(j)) { |
- noBreaks++; |
- //fprintf(stderr, "%d ", j); |
- } |
- } |
- /* |
- while(brkit->next() != BreakIterator::DONE) { |
- noBreaks++; |
- } |
- */ |
- |
- startTime = timeGetTime(); |
- for(i = 0; i < opt_loopCount; i++) { |
- for(j = 0; j < textSize; j++) { |
- if(brkit->isBoundary(j)) { |
- hit++; |
- } |
- } |
- } |
- |
- elapsedTime = timeGetTime()-startTime; |
- int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCount)); |
- if (opt_terse == FALSE) { |
- int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize)); |
- int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreaks)); |
- printf("forward break iteration average loop time %d\n", loopTime); |
- printf("number of code units %d average time per code unit %d\n", textSize, timePerCU); |
- printf("number of breaks %d average time per break %d\n", noBreaks, timePerBreak); |
- } else { |
- printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize); |
- } |
-} |
- |
-//---------------------------------------------------------------------------------------- |
-// |
-// UnixConvert -- Convert the lines of the file to the encoding for UNIX |
-// Since it appears that Unicode support is going in the general |
-// direction of the use of UTF-8 locales, that is the approach |
-// that is used here. |
-// |
-//---------------------------------------------------------------------------------------- |
-void UnixConvert() { |
-#if 0 |
- int line; |
- |
- UConverter *cvrtr; // An ICU code page converter. |
- UErrorCode status = U_ZERO_ERROR; |
- |
- |
- cvrtr = ucnv_open("utf-8", &status); // we are just doing UTF-8 locales for now. |
- if (U_FAILURE(status)) { |
- fprintf(stderr, "ICU Converter open failed.: %d\n", &status); |
- exit(-1); |
- } |
- // redo for unix |
- for (line=0; line < gNumFileLines; line++) { |
- int sizeNeeded = ucnv_fromUChars(cvrtr, |
- 0, // ptr to target buffer. |
- 0, // length of target buffer. |
- gFileLines[line].name, |
- -1, // source is null terminated |
- &status); |
- if (status != U_BUFFER_OVERFLOW_ERROR && status != U_ZERO_ERROR) { |
- fprintf(stderr, "Conversion from Unicode, something is wrong.\n"); |
- exit(-1); |
- } |
- status = U_ZERO_ERROR; |
- gFileLines[line].unixName = new char[sizeNeeded+1]; |
- sizeNeeded = ucnv_fromUChars(cvrtr, |
- gFileLines[line].unixName, // ptr to target buffer. |
- sizeNeeded+1, // length of target buffer. |
- gFileLines[line].name, |
- -1, // source is null terminated |
- &status); |
- if (U_FAILURE(status)) { |
- fprintf(stderr, "ICU Conversion Failed.: %d\n", status); |
- exit(-1); |
- } |
- gFileLines[line].unixName[sizeNeeded] = 0; |
- }; |
- ucnv_close(cvrtr); |
-#endif |
-} |
- |
- |
-//---------------------------------------------------------------------------------------- |
-// |
-// class UCharFile Class to hide all the gorp to read a file in |
-// and produce a stream of UChars. |
-// |
-//---------------------------------------------------------------------------------------- |
-class UCharFile { |
-public: |
- UCharFile(const char *fileName); |
- ~UCharFile(); |
- UChar get(); |
- UBool eof() {return fEof;}; |
- UBool error() {return fError;}; |
- int32_t size() { return fFileSize; }; |
- |
-private: |
- UCharFile (const UCharFile &other) {}; // No copy constructor. |
- UCharFile & operator = (const UCharFile &other) {return *this;}; // No assignment op |
- |
- FILE *fFile; |
- const char *fName; |
- UBool fEof; |
- UBool fError; |
- UChar fPending2ndSurrogate; |
- int32_t fFileSize; |
- |
- enum {UTF16LE, UTF16BE, UTF8} fEncoding; |
-}; |
- |
-UCharFile::UCharFile(const char * fileName) { |
- fEof = FALSE; |
- fError = FALSE; |
- fName = fileName; |
- struct stat buf; |
- int32_t result = stat(fileName, &buf); |
- if(result != 0) { |
- fprintf(stderr, "Error getting info\n"); |
- fFileSize = -1; |
- } else { |
- fFileSize = buf.st_size; |
- } |
- fFile = fopen(fName, "rb"); |
- fPending2ndSurrogate = 0; |
- if (fFile == NULL) { |
- fprintf(stderr, "Can not open file \"%s\"\n", opt_fName); |
- fError = TRUE; |
- return; |
- } |
- // |
- // Look for the byte order mark at the start of the file. |
- // |
- int BOMC1, BOMC2, BOMC3; |
- BOMC1 = fgetc(fFile); |
- BOMC2 = fgetc(fFile); |
- |
- if (BOMC1 == 0xff && BOMC2 == 0xfe) { |
- fEncoding = UTF16LE; } |
- else if (BOMC1 == 0xfe && BOMC2 == 0xff) { |
- fEncoding = UTF16BE; } |
- else if (BOMC1 == 0xEF && BOMC2 == 0xBB && (BOMC3 = fgetc(fFile)) == 0xBF ) { |
- fEncoding = UTF8; } |
- else |
- { |
- fprintf(stderr, "collperf: file \"%s\" encoding must be UTF-8 or UTF-16, and " |
- "must include a BOM.\n", fileName); |
- fError = true; |
- return; |
- } |
-} |
- |
- |
-UCharFile::~UCharFile() { |
- fclose(fFile); |
-} |
- |
- |
- |
-UChar UCharFile::get() { |
- UChar c; |
- switch (fEncoding) { |
- case UTF16LE: |
- { |
- int cL, cH; |
- cL = fgetc(fFile); |
- cH = fgetc(fFile); |
- c = cL | (cH << 8); |
- if (cH == EOF) { |
- c = 0; |
- fEof = TRUE; |
- } |
- break; |
- } |
- case UTF16BE: |
- { |
- int cL, cH; |
- cH = fgetc(fFile); |
- cL = fgetc(fFile); |
- c = cL | (cH << 8); |
- if (cL == EOF) { |
- c = 0; |
- fEof = TRUE; |
- } |
- break; |
- } |
- case UTF8: |
- { |
- if (fPending2ndSurrogate != 0) { |
- c = fPending2ndSurrogate; |
- fPending2ndSurrogate = 0; |
- break; |
- } |
- |
- int ch = fgetc(fFile); // Note: c and ch are separate cause eof test doesn't work on UChar type. |
- if (ch == EOF) { |
- c = 0; |
- fEof = TRUE; |
- break; |
- } |
- |
- if (ch <= 0x7f) { |
- // It's ascii. No further utf-8 conversion. |
- c = ch; |
- break; |
- } |
- |
- // Figure out the lenght of the char and read the rest of the bytes |
- // into a temp array. |
- int nBytes; |
- if (ch >= 0xF0) {nBytes=4;} |
- else if (ch >= 0xE0) {nBytes=3;} |
- else if (ch >= 0xC0) {nBytes=2;} |
- else { |
- fprintf(stderr, "not likely utf-8 encoded file %s contains corrupt data at offset %d.\n", fName, ftell(fFile)); |
- fError = TRUE; |
- return 0; |
- } |
- |
- unsigned char bytes[10]; |
- bytes[0] = (unsigned char)ch; |
- int i; |
- for (i=1; i<nBytes; i++) { |
- bytes[i] = fgetc(fFile); |
- if (bytes[i] < 0x80 || bytes[i] >= 0xc0) { |
- fprintf(stderr, "utf-8 encoded file %s contains corrupt data at offset %d. Expected %d bytes, byte %d is invalid. First byte is %02X\n", fName, ftell(fFile), nBytes, i, ch); |
- fError = TRUE; |
- return 0; |
- } |
- } |
- |
- // Convert the bytes from the temp array to a Unicode char. |
- i = 0; |
- uint32_t cp; |
- U8_NEXT_UNSAFE(bytes, i, cp); |
- c = (UChar)cp; |
- |
- if (cp >= 0x10000) { |
- // The code point needs to be broken up into a utf-16 surrogate pair. |
- // Process first half this time through the main loop, and |
- // remember the other half for the next time through. |
- UChar utf16Buf[3]; |
- i = 0; |
- UTF16_APPEND_CHAR_UNSAFE(utf16Buf, i, cp); |
- fPending2ndSurrogate = utf16Buf[1]; |
- c = utf16Buf[0]; |
- } |
- break; |
- }; |
- } |
- return c; |
-} |
- |
- |
-//---------------------------------------------------------------------------------------- |
-// |
-// Main -- process command line, read in and pre-process the test file, |
-// call other functions to do the actual tests. |
-// |
-//---------------------------------------------------------------------------------------- |
-int main(int argc, const char** argv) { |
- if (ProcessOptions(argc, argv, opts) != TRUE || opt_help || opt_fName == 0) { |
- printf(gUsageString); |
- exit (1); |
- } |
- // Make sure that we've only got one API selected. |
- if (opt_mac || opt_unix || opt_win) opt_icu = FALSE; |
- if (opt_mac || opt_unix) opt_win = FALSE; |
- if (opt_mac) opt_unix = FALSE; |
- |
- UErrorCode status = U_ZERO_ERROR; |
- |
- |
- |
- // |
- // Set up a Windows LCID |
- // |
- /* |
- if (opt_langid != 0) { |
- gWinLCID = MAKELCID(opt_langid, SORT_DEFAULT); |
- } |
- else { |
- gWinLCID = uloc_getLCID(opt_locale); |
- } |
- */ |
- |
- // |
- // Set the UNIX locale |
- // |
- if (opt_unix) { |
- if (setlocale(LC_ALL, opt_locale) == 0) { |
- fprintf(stderr, "setlocale(LC_ALL, %s) failed.\n", opt_locale); |
- exit(-1); |
- } |
- } |
- |
- // Read in the input file. |
- // File assumed to be utf-16. |
- // Lines go onto heap buffers. Global index array to line starts is created. |
- // Lines themselves are null terminated. |
- // |
- |
- UCharFile f(opt_fName); |
- if (f.error()) { |
- exit(-1); |
- } |
- int32_t fileSize = f.size(); |
- const int STARTSIZE = 70000; |
- int32_t bufSize = 0; |
- int32_t charCount = 0; |
- if(fileSize != -1) { |
- text = (UChar *)malloc(fileSize*sizeof(UChar)); |
- bufSize = fileSize; |
- } else { |
- text = (UChar *)malloc(STARTSIZE*sizeof(UChar)); |
- bufSize = STARTSIZE; |
- } |
- if(text == NULL) { |
- fprintf(stderr, "Allocating buffer failed\n"); |
- exit(-1); |
- } |
- |
- |
- // Read the file, split into lines, and save in memory. |
- // Loop runs once per utf-16 value from the input file, |
- // (The number of bytes read from file per loop iteration depends on external encoding.) |
- for (;;) { |
- |
- UChar c = f.get(); |
- if(f.eof()) { |
- break; |
- } |
- if (f.error()){ |
- exit(-1); |
- } |
- // We now have a good UTF-16 value in c. |
- text[charCount++] = c; |
- if(charCount == bufSize) { |
- text = (UChar *)realloc(text, 2*bufSize*sizeof(UChar)); |
- if(text == NULL) { |
- fprintf(stderr, "Reallocating buffer failed\n"); |
- exit(-1); |
- } |
- bufSize *= 2; |
- } |
- } |
- |
- |
- if (opt_terse == FALSE) { |
- printf("file \"%s\", %d charCount code units.\n", opt_fName, charCount); |
- } |
- |
- textSize = charCount; |
- |
- |
- |
- |
- // |
- // Dump file contents if requested. |
- // |
- if (opt_dump) { |
- // dump file, etc... possibly |
- } |
- |
- |
- // |
- // We've got the file read into memory. Go do something with it. |
- // |
- int32_t i = 0; |
- for(i = 0; i < opt_passesCount; i++) { |
- if(opt_loopCount != 0) { |
- if(opt_next) { |
- doForwardTest(); |
- } else if(opt_isBound) { |
- doIsBoundTest(); |
- } else { |
- doForwardTest(); |
- } |
- } else if(opt_time != 0) { |
- |
- } |
- } |
- |
- if(text != NULL) { |
- free(text); |
- } |
- if(brkit != NULL) { |
- delete brkit; |
- } |
- |
- return 0; |
-} |