source/test/perf/ubrkperf/ubrkperfold.cpp - Issue 2435373002: Delete source/test

Side by Side Diff: source/test/perf/ubrkperf/ubrkperfold.cpp

Issue 2435373002: Delete source/test (Closed)

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 /********************************************************************

2 * COPYRIGHT:

3 * Copyright (C) 2001-2012 IBM, Inc. All Rights Reserved.

4 *

5 ********************************************************************/

6 /******************************************************************************* *

7 *

8 * File ubrkperf.cpp

9 *

10 * Modification History:

11 * Name Description

12 * Vladimir Weinstein First Version, based on collperf

13 *

14 ******************************************************************************** *

15 */

16

17 //

18 // This program tests break iterator performance

19 // Currently we test only ICU APIs with the future possibility of testing * nix & win32 APIs

20 // (if any)

21 // A text file is required as input. It must be in utf-8 or utf-16 format,

22 // and include a byte order mark. Either LE or BE format is OK.

23 //

24

25 const char gUsageString[] =

26 "usage: ubrkperf options...\n"

27 "-help Display this message.\n"

28 "-file file_name utf-16/utf-8 format file.\n"

29 "-locale name ICU locale to use. Default is en_US\n"

30 "-langid 0x1234 Windows Language ID number. Default to value fo r -locale option\n"

31 " see http://msdn.microsoft.com/library/psdk/wi nbase/nls_8xo3.htm\n"

32 "-win Run test using Windows native services. (current ly not working) (ICU is default)\n"

33 "-unix Run test using Unix word breaking services. (cur rently not working) \n"

34 "-mac Run test using MacOSX word breaking services.\n"

35 "-uselen Use API with string lengths. Default is null-te rminated strings\n"

36 "-char Use character break iterator\n"

37 "-word Use word break iterator\n"

38 "-line Use line break iterator\n"

39 "-sentence Use sentence break iterator\n"

40 "-loop nnnn Loopcount for test. Adjust for reasonable total running time.\n"

41 "-iloop n Inner Loop Count. Default = 1. Number of calls to function\n"

42 " under test at each call point. For measurin g test overhead.\n"

43 "-terse Terse numbers-only output. Intended for use by scripts.\n"

44 "-dump Display stuff.\n"

45 "-capi Use C APIs instead of C++ APIs (currently not wo rking)\n"

46 "-next Do the next test\n"

47 "-isBound Do the isBound test\n"

48 ;

49

50

51 #include <stdio.h>

52 #include <string.h>

53 #include <stdlib.h>

54 #include <math.h>

55 #include <locale.h>

56 #include <errno.h>

57 #include <sys/stat.h>

58

59 #include <unicode/utypes.h>

60 #include <unicode/ucol.h>

61 #include <unicode/ucoleitr.h>

62 #include <unicode/uloc.h>

63 #include <unicode/ustring.h>

64 #include <unicode/ures.h>

65 #include <unicode/uchar.h>

66 #include <unicode/ucnv.h>

67 #include <unicode/utf8.h>

68

69 #include <unicode/brkiter.h>

70

71

72 #if U_PLATFORM_HAS_WIN32_API

73 #include <windows.h>

74 #else

75 //

76 // Stubs for Windows API functions when building on UNIXes.

77 //

78 #include <sys/time.h>

79 unsigned long timeGetTime() {

80 struct timeval t;

81 gettimeofday(&t, 0);

82 unsigned long val = t.tv_sec * 1000; // Let it overflow. Who cares.

83 val += t.tv_usec / 1000;

84 return val;

85 };

86 #define MAKELCID(a,b) 0

87 #endif

88

89

90 //

91 // Command line option variables

92 // These global variables are set according to the options specified

93 // on the command line by the user.

94 char * opt_fName = 0;

95 char * opt_locale = "en_US";

96 int opt_langid = 0; // Defaults to value corresponding to opt_loc ale.

97 char * opt_rules = 0;

98 UBool opt_help = FALSE;

99 int opt_time = 0;

100 int opt_loopCount = 0;

101 int opt_passesCount= 1;

102 UBool opt_terse = FALSE;

103 UBool opt_icu = TRUE;

104 UBool opt_win = FALSE; // Run with Windows native functions.

105 UBool opt_unix = FALSE; // Run with UNIX strcoll, strxfrm functions.

106 UBool opt_mac = FALSE; // Run with MacOSX word break services.

107 UBool opt_uselen = FALSE;

108 UBool opt_dump = FALSE;

109 UBool opt_char = FALSE;

110 UBool opt_word = FALSE;

111 UBool opt_line = FALSE;

112 UBool opt_sentence = FALSE;

113 UBool opt_capi = FALSE;

114

115 UBool opt_next = FALSE;

116 UBool opt_isBound = FALSE;

117

118

119

120 //

121 // Definitions for the command line options

122 //

123 struct OptSpec {

124 const char *name;

125 enum {FLAG, NUM, STRING} type;

126 void *pVar;

127 };

128

129 OptSpec opts[] = {

130 {"-file", OptSpec::STRING, &opt_fName},

131 {"-locale", OptSpec::STRING, &opt_locale},

132 {"-langid", OptSpec::NUM, &opt_langid},

133 {"-win", OptSpec::FLAG, &opt_win},

134 {"-unix", OptSpec::FLAG, &opt_unix},

135 {"-mac", OptSpec::FLAG, &opt_mac},

136 {"-uselen", OptSpec::FLAG, &opt_uselen},

137 {"-loop", OptSpec::NUM, &opt_loopCount},

138 {"-time", OptSpec::NUM, &opt_time},

139 {"-passes", OptSpec::NUM, &opt_passesCount},

140 {"-char", OptSpec::FLAG, &opt_char},

141 {"-word", OptSpec::FLAG, &opt_word},

142 {"-line", OptSpec::FLAG, &opt_line},

143 {"-sentence", OptSpec::FLAG, &opt_sentence},

144 {"-terse", OptSpec::FLAG, &opt_terse},

145 {"-dump", OptSpec::FLAG, &opt_dump},

146 {"-capi", OptSpec::FLAG, &opt_capi},

147 {"-next", OptSpec::FLAG, &opt_next},

148 {"-isBound", OptSpec::FLAG, &opt_isBound},

149 {"-help", OptSpec::FLAG, &opt_help},

150 {"-?", OptSpec::FLAG, &opt_help},

151 {0, OptSpec::FLAG, 0}

152 };

153

154

155 //---------------------------------------------------------------------------

156 //

157 // Global variables pointing to and describing the test file

158 //

159 //---------------------------------------------------------------------------

160

161 //DWORD gWinLCID;

162 BreakIterator *brkit = NULL;

163 UChar *text = NULL;

164 int32_t textSize = 0;

165

166

167

168 #if U_PLATFORM_IS_DARWIN_BASED

169 #include <ApplicationServices/ApplicationServices.h>

170 enum{

171 kUCTextBreakAllMask = (kUCTextBreakClusterMask \| kUCTextBreakWordMask \| kUCTex tBreakLineMask)

172 };

173 UCTextBreakType breakTypes[4] = {kUCTextBreakCharMask, kUCTextBreakClusterMask, kUCTextBreakWordMask, kUCTextBreakLineMask};

174 TextBreakLocatorRef breakRef;

175 UCTextBreakType macBreakType;

176

177 void createMACBrkIt() {

178 OSStatus status = noErr;

179 LocaleRef lref;

180 status = LocaleRefFromLocaleString(opt_locale, &lref);

181 status = UCCreateTextBreakLocator(lref, 0, kUCTextBreakAllMask, (TextBreakLoca torRef*)&breakRef);

182 if(opt_char == TRUE) {

183 macBreakType = kUCTextBreakClusterMask;

184 } else if(opt_word == TRUE) {

185 macBreakType = kUCTextBreakWordMask;

186 } else if(opt_line == TRUE) {

187 macBreakType = kUCTextBreakLineMask;

188 } else if(opt_sentence == TRUE) {

189 // error

190 // brkit = BreakIterator::createSentenceInstance(opt_locale, status);

191 } else {

192 // default is character iterator

193 macBreakType = kUCTextBreakClusterMask;

194 }

195 }

196 #endif

197

198 void createICUBrkIt() {

199 //

200 // Set up an ICU break iterator

201 //

202 UErrorCode status = U_ZERO_ERROR;

203 if(opt_char == TRUE) {

204 brkit = BreakIterator::createCharacterInstance(opt_locale, status);

205 } else if(opt_word == TRUE) {

206 brkit = BreakIterator::createWordInstance(opt_locale, status);

207 } else if(opt_line == TRUE) {

208 brkit = BreakIterator::createLineInstance(opt_locale, status);

209 } else if(opt_sentence == TRUE) {

210 brkit = BreakIterator::createSentenceInstance(opt_locale, status);

211 } else {

212 // default is character iterator

213 brkit = BreakIterator::createCharacterInstance(opt_locale, status);

214 }

215 if (status==U_USING_DEFAULT_WARNING && opt_terse==FALSE) {

216 fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", opt_locale);

217 }

218 if (status==U_USING_FALLBACK_WARNING && opt_terse==FALSE) {

219 fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", opt_locale);

220 }

221

222 }

223

224 //---------------------------------------------------------------------------

225 //

226 // ProcessOptions() Function to read the command line options.

227 //

228 //---------------------------------------------------------------------------

229 UBool ProcessOptions(int argc, const char **argv, OptSpec opts[])

230 {

231 int i;

232 int argNum;

233 const char *pArgName;

234 OptSpec *pOpt;

235

236 for (argNum=1; argNum<argc; argNum++) {

237 pArgName = argv[argNum];

238 for (pOpt = opts; pOpt->name != 0; pOpt++) {

239 if (strcmp(pOpt->name, pArgName) == 0) {

240 switch (pOpt->type) {

241 case OptSpec::FLAG:

242 (UBool )(pOpt->pVar) = TRUE;

243 break;

244 case OptSpec::STRING:

245 argNum ++;

246 if (argNum >= argc) {

247 fprintf(stderr, "value expected for \"%s\" option.\n", p Opt->name);

248 return FALSE;

249 }

250 (const char *)(pOpt->pVar) = argv[argNum];

251 break;

252 case OptSpec::NUM:

253 argNum ++;

254 if (argNum >= argc) {

255 fprintf(stderr, "value expected for \"%s\" option.\n", p Opt->name);

256 return FALSE;

257 }

258 char *endp;

259 i = strtol(argv[argNum], &endp, 0);

260 if (endp == argv[argNum]) {

261 fprintf(stderr, "integer value expected for \"%s\" optio n.\n", pOpt->name);

262 return FALSE;

263 }

264 (int )(pOpt->pVar) = i;

265 }

266 break;

267 }

268 }

269 if (pOpt->name == 0)

270 {

271 fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);

272 return FALSE;

273 }

274 }

275 return TRUE;

276 }

277

278

279 void doForwardTest() {

280 if (opt_terse == FALSE) {

281 printf("Doing the forward test\n");

282 }

283 int32_t noBreaks = 0;

284 int32_t i = 0;

285 unsigned long startTime = timeGetTime();

286 unsigned long elapsedTime = 0;

287 if(opt_icu) {

288 createICUBrkIt();

289 brkit->setText(UnicodeString(text, textSize));

290 brkit->first();

291 if (opt_terse == FALSE) {

292 printf("Warmup\n");

293 }

294 int j;

295 while((j = brkit->next()) != BreakIterator::DONE) {

296 noBreaks++;

297 //fprintf(stderr, "%d ", j);

298 }

299

300 if (opt_terse == FALSE) {

301 printf("Measure\n");

302 }

303 startTime = timeGetTime();

304 for(i = 0; i < opt_loopCount; i++) {

305 brkit->first();

306 while(brkit->next() != BreakIterator::DONE) {

307 }

308 }

309

310 elapsedTime = timeGetTime()-startTime;

311 } else if(opt_mac) {

312 #if U_PLATFORM_IS_DARWIN_BASED

313 createMACBrkIt();

314 UniChar* filePtr = text;

315 OSStatus status = noErr;

316 UniCharCount startOffset = 0, breakOffset = 0, numUniChars = textSize;

317 startOffset = 0;

318 //printf("\t---Search forward--\n");

319

320 while (startOffset < numUniChars)

321 {

322 status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEdge Mask, filePtr, numUniChars,

323 startOffset, &breakOffset);

324 //require_action(status == noErr, EXIT, printf( "**UCFindTextBreak failed: startOffset %d, status %d\n", (int)startOffset, (int)status));

325 //require_action((breakOffset <= numUniChars),EXIT, printf("**UCFindTextBr eak breakOffset too big: startOffset %d, breakOffset %d\n", (int)startOffset, (i nt)breakOffset));

326

327 // Output break

328 //printf("\t%d\n", (int)breakOffset);

329

330 // Increment counters

331 noBreaks++;

332 startOffset = breakOffset;

333 }

334 startTime = timeGetTime();

335 for(i = 0; i < opt_loopCount; i++) {

336 startOffset = 0;

337

338 while (startOffset < numUniChars)

339 {

340 status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEd geMask, filePtr, numUniChars,

341 startOffset, &breakOffset);

342 // Increment counters

343 startOffset = breakOffset;

344 }

345 }

346 elapsedTime = timeGetTime()-startTime;

347 UCDisposeTextBreakLocator(&breakRef);

348 #endif

349

350

351 }

352

353

354 if (opt_terse == FALSE) {

355 int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCoun t));

356 int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize)) ;

357 int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreak s));

358 printf("forward break iteration average loop time %d\n", loopTime);

359 printf("number of code units %d average time per code unit %d\n", textSize , timePerCU);

360 printf("number of breaks %d average time per break %d\n", noBreaks, timePe rBreak);

361 } else {

362 printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize);

363 }

364

365

366 }

367

368 void doIsBoundTest() {

369 int32_t noBreaks = 0, hit = 0;

370 int32_t i = 0, j = 0;

371 unsigned long startTime = timeGetTime();

372 unsigned long elapsedTime = 0;

373 createICUBrkIt();

374 brkit->setText(UnicodeString(text, textSize));

375 brkit->first();

376 for(j = 0; j < textSize; j++) {

377 if(brkit->isBoundary(j)) {

378 noBreaks++;

379 //fprintf(stderr, "%d ", j);

380 }

381 }

382 /*

383 while(brkit->next() != BreakIterator::DONE) {

384 noBreaks++;

385 }

386 */

387

388 startTime = timeGetTime();

389 for(i = 0; i < opt_loopCount; i++) {

390 for(j = 0; j < textSize; j++) {

391 if(brkit->isBoundary(j)) {

392 hit++;

393 }

394 }

395 }

396

397 elapsedTime = timeGetTime()-startTime;

398 int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCoun t));

399 if (opt_terse == FALSE) {

400 int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize)) ;

401 int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreak s));

402 printf("forward break iteration average loop time %d\n", loopTime);

403 printf("number of code units %d average time per code unit %d\n", textSize , timePerCU);

404 printf("number of breaks %d average time per break %d\n", noBreaks, timePe rBreak);

405 } else {

406 printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize);

407 }

408 }

409

410 //------------------------------------------------------------------------------ ----------

411 //

412 // UnixConvert -- Convert the lines of the file to the encoding for UNIX

413 // Since it appears that Unicode support is going in the gene ral

414 // direction of the use of UTF-8 locales, that is the approac h

415 // that is used here.

416 //

417 //------------------------------------------------------------------------------ ----------

418 void UnixConvert() {

419 #if 0

420 int line;

421

422 UConverter *cvrtr; // An ICU code page converter.

423 UErrorCode status = U_ZERO_ERROR;

424

425

426 cvrtr = ucnv_open("utf-8", &status); // we are just doing UTF-8 locales f or now.

427 if (U_FAILURE(status)) {

428 fprintf(stderr, "ICU Converter open failed.: %d\n", &status);

429 exit(-1);

430 }

431 // redo for unix

432 for (line=0; line < gNumFileLines; line++) {

433 int sizeNeeded = ucnv_fromUChars(cvrtr,

434 0, // ptr to target buffer.

435 0, // length of target buffe r.

436 gFileLines[line].name,

437 -1, // source is null termin ated

438 &status);

439 if (status != U_BUFFER_OVERFLOW_ERROR && status != U_ZERO_ERROR) {

440 fprintf(stderr, "Conversion from Unicode, something is wrong.\n");

441 exit(-1);

442 }

443 status = U_ZERO_ERROR;

444 gFileLines[line].unixName = new char[sizeNeeded+1];

445 sizeNeeded = ucnv_fromUChars(cvrtr,

446 gFileLines[line].unixName, // ptr to ta rget buffer.

447 sizeNeeded+1, // length of target buffe r.

448 gFileLines[line].name,

449 -1, // source is null termin ated

450 &status);

451 if (U_FAILURE(status)) {

452 fprintf(stderr, "ICU Conversion Failed.: %d\n", status);

453 exit(-1);

454 }

455 gFileLines[line].unixName[sizeNeeded] = 0;

456 };

457 ucnv_close(cvrtr);

458 #endif

459 }

460

461

462 //------------------------------------------------------------------------------ ----------

463 //

464 // class UCharFile Class to hide all the gorp to read a file in

465 // and produce a stream of UChars.

466 //

467 //------------------------------------------------------------------------------ ----------

468 class UCharFile {

469 public:

470 UCharFile(const char *fileName);

471 ~UCharFile();

472 UChar get();

473 UBool eof() {return fEof;};

474 UBool error() {return fError;};

475 int32_t size() { return fFileSize; };

476

477 private:

478 UCharFile (const UCharFile &other) {}; // No copy co nstructor.

479 UCharFile & operator = (const UCharFile &other) {return *this;}; // No ass ignment op

480

481 FILE *fFile;

482 const char *fName;

483 UBool fEof;

484 UBool fError;

485 UChar fPending2ndSurrogate;

486 int32_t fFileSize;

487

488 enum {UTF16LE, UTF16BE, UTF8} fEncoding;

489 };

490

491 UCharFile::UCharFile(const char * fileName) {

492 fEof = FALSE;

493 fError = FALSE;

494 fName = fileName;

495 struct stat buf;

496 int32_t result = stat(fileName, &buf);

497 if(result != 0) {

498 fprintf(stderr, "Error getting info\n");

499 fFileSize = -1;

500 } else {

501 fFileSize = buf.st_size;

502 }

503 fFile = fopen(fName, "rb");

504 fPending2ndSurrogate = 0;

505 if (fFile == NULL) {

506 fprintf(stderr, "Can not open file \"%s\"\n", opt_fName);

507 fError = TRUE;

508 return;

509 }

510 //

511 // Look for the byte order mark at the start of the file.

512 //

513 int BOMC1, BOMC2, BOMC3;

514 BOMC1 = fgetc(fFile);

515 BOMC2 = fgetc(fFile);

516

517 if (BOMC1 == 0xff && BOMC2 == 0xfe) {

518 fEncoding = UTF16LE; }

519 else if (BOMC1 == 0xfe && BOMC2 == 0xff) {

520 fEncoding = UTF16BE; }

521 else if (BOMC1 == 0xEF && BOMC2 == 0xBB && (BOMC3 = fgetc(fFile)) == 0xBF ) {

522 fEncoding = UTF8; }

523 else

524 {

525 fprintf(stderr, "collperf: file \"%s\" encoding must be UTF-8 or UTF-16 , and "

526 "must include a BOM.\n", fileName);

527 fError = true;

528 return;

529 }

530 }

531

532

533 UCharFile::~UCharFile() {

534 fclose(fFile);

535 }

536

537

538

539 UChar UCharFile::get() {

540 UChar c;

541 switch (fEncoding) {

542 case UTF16LE:

543 {

544 int cL, cH;

545 cL = fgetc(fFile);

546 cH = fgetc(fFile);

547 c = cL \| (cH << 8);

548 if (cH == EOF) {

549 c = 0;

550 fEof = TRUE;

551 }

552 break;

553 }

554 case UTF16BE:

555 {

556 int cL, cH;

557 cH = fgetc(fFile);

558 cL = fgetc(fFile);

559 c = cL \| (cH << 8);

560 if (cL == EOF) {

561 c = 0;

562 fEof = TRUE;

563 }

564 break;

565 }

566 case UTF8:

567 {

568 if (fPending2ndSurrogate != 0) {

569 c = fPending2ndSurrogate;

570 fPending2ndSurrogate = 0;

571 break;

572 }

573

574 int ch = fgetc(fFile); // Note: c and ch are separate cause eof t est doesn't work on UChar type.

575 if (ch == EOF) {

576 c = 0;

577 fEof = TRUE;

578 break;

579 }

580

581 if (ch <= 0x7f) {

582 // It's ascii. No further utf-8 conversion.

583 c = ch;

584 break;

585 }

586

587 // Figure out the lenght of the char and read the rest of the bytes

588 // into a temp array.

589 int nBytes;

590 if (ch >= 0xF0) {nBytes=4;}

591 else if (ch >= 0xE0) {nBytes=3;}

592 else if (ch >= 0xC0) {nBytes=2;}

593 else {

594 fprintf(stderr, "not likely utf-8 encoded file %s contains corru pt data at offset %d.\n", fName, ftell(fFile));

595 fError = TRUE;

596 return 0;

597 }

598

599 unsigned char bytes[10];

600 bytes[0] = (unsigned char)ch;

601 int i;

602 for (i=1; i<nBytes; i++) {

603 bytes[i] = fgetc(fFile);

604 if (bytes[i] < 0x80 \|\| bytes[i] >= 0xc0) {

605 fprintf(stderr, "utf-8 encoded file %s contains corrupt data at offset %d. Expected %d bytes, byte %d is invalid. First byte is %02X\n", fNa me, ftell(fFile), nBytes, i, ch);

606 fError = TRUE;

607 return 0;

608 }

609 }

610

611 // Convert the bytes from the temp array to a Unicode char.

612 i = 0;

613 uint32_t cp;

614 U8_NEXT_UNSAFE(bytes, i, cp);

615 c = (UChar)cp;

616

617 if (cp >= 0x10000) {

618 // The code point needs to be broken up into a utf-16 surrogate pair.

619 // Process first half this time through the main loop, and

620 // remember the other half for the next time through.

621 UChar utf16Buf[3];

622 i = 0;

623 UTF16_APPEND_CHAR_UNSAFE(utf16Buf, i, cp);

624 fPending2ndSurrogate = utf16Buf[1];

625 c = utf16Buf[0];

626 }

627 break;

628 };

629 }

630 return c;

631 }

632

633

634 //------------------------------------------------------------------------------ ----------

635 //

636 // Main -- process command line, read in and pre-process the test file,

637 // call other functions to do the actual tests.

638 //

639 //------------------------------------------------------------------------------ ----------

640 int main(int argc, const char** argv) {

641 if (ProcessOptions(argc, argv, opts) != TRUE \|\| opt_help \|\| opt_fName == 0) {

642 printf(gUsageString);

643 exit (1);

644 }

645 // Make sure that we've only got one API selected.

646 if (opt_mac \|\| opt_unix \|\| opt_win) opt_icu = FALSE;

647 if (opt_mac \|\| opt_unix) opt_win = FALSE;

648 if (opt_mac) opt_unix = FALSE;

649

650 UErrorCode status = U_ZERO_ERROR;

651

652

653

654 //

655 // Set up a Windows LCID

656 //

657 /*

658 if (opt_langid != 0) {

659 gWinLCID = MAKELCID(opt_langid, SORT_DEFAULT);

660 }

661 else {

662 gWinLCID = uloc_getLCID(opt_locale);

663 }

664 */

665

666 //

667 // Set the UNIX locale

668 //

669 if (opt_unix) {

670 if (setlocale(LC_ALL, opt_locale) == 0) {

671 fprintf(stderr, "setlocale(LC_ALL, %s) failed.\n", opt_locale);

672 exit(-1);

673 }

674 }

675

676 // Read in the input file.

677 // File assumed to be utf-16.

678 // Lines go onto heap buffers. Global index array to line starts is creat ed.

679 // Lines themselves are null terminated.

680 //

681

682 UCharFile f(opt_fName);

683 if (f.error()) {

684 exit(-1);

685 }

686 int32_t fileSize = f.size();

687 const int STARTSIZE = 70000;

688 int32_t bufSize = 0;

689 int32_t charCount = 0;

690 if(fileSize != -1) {

691 text = (UChar )malloc(fileSizesizeof(UChar));

692 bufSize = fileSize;

693 } else {

694 text = (UChar )malloc(STARTSIZEsizeof(UChar));

695 bufSize = STARTSIZE;

696 }

697 if(text == NULL) {

698 fprintf(stderr, "Allocating buffer failed\n");

699 exit(-1);

700 }

701

702

703 // Read the file, split into lines, and save in memory.

704 // Loop runs once per utf-16 value from the input file,

705 // (The number of bytes read from file per loop iteration depends on exte rnal encoding.)

706 for (;;) {

707

708 UChar c = f.get();

709 if(f.eof()) {

710 break;

711 }

712 if (f.error()){

713 exit(-1);

714 }

715 // We now have a good UTF-16 value in c.

716 text[charCount++] = c;

717 if(charCount == bufSize) {

718 text = (UChar )realloc(text, 2bufSize*sizeof(UChar));

719 if(text == NULL) {

720 fprintf(stderr, "Reallocating buffer failed\n");

721 exit(-1);

722 }

723 bufSize *= 2;

724 }

725 }

726

727

728 if (opt_terse == FALSE) {

729 printf("file \"%s\", %d charCount code units.\n", opt_fName, charCount);

730 }

731

732 textSize = charCount;

733

734

735

736

737 //

738 // Dump file contents if requested.

739 //

740 if (opt_dump) {

741 // dump file, etc... possibly

742 }

743

744

745 //

746 // We've got the file read into memory. Go do something with it.

747 //

748 int32_t i = 0;

749 for(i = 0; i < opt_passesCount; i++) {

750 if(opt_loopCount != 0) {

751 if(opt_next) {

752 doForwardTest();

753 } else if(opt_isBound) {

754 doIsBoundTest();

755 } else {

756 doForwardTest();

757 }

758 } else if(opt_time != 0) {

759

760 }

761 }

762

763 if(text != NULL) {

764 free(text);

765 }

766 if(brkit != NULL) {

767 delete brkit;

768 }

769

770 return 0;

771 }

OLD	NEW

« no previous file with comments | « source/test/perf/ubrkperf/ubrkperf20.dsp ('k') | source/test/perf/ubrkperf/ubrkperfold.dsp » ('j') | no next file with comments »