source/test/thaitest/thaitest.cpp - Issue 2435373002: Delete source/test

Unified Diff: source/test/thaitest/thaitest.cpp

Issue 2435373002: Delete source/test (Closed)

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: source/test/thaitest/thaitest.cpp

diff --git a/source/test/thaitest/thaitest.cpp b/source/test/thaitest/thaitest.cpp

deleted file mode 100644

index 657fab433c09d5b18ee125b2dd5b1f7bb1689f99..0000000000000000000000000000000000000000

--- a/source/test/thaitest/thaitest.cpp

+++ /dev/null

@@ -1,571 +0,0 @@

-/*

- ******************************************************************************

- */

-#include <errno.h>

-#include <stdio.h>

-#include <string.h>

-#include "unicode/utypes.h"

-#include "unicode/uchar.h"

-#include "unicode/uchriter.h"

-#include "unicode/brkiter.h"

-#include "unicode/locid.h"

-#include "unicode/unistr.h"

-#include "unicode/uniset.h"

-#include "unicode/ustring.h"

-/*

- * This program takes a Unicode text file containing Thai text with

- * spaces inserted where the word breaks are. It computes a copy of

- * the text without spaces and uses a word instance of a Thai BreakIterator

- * to compute the word breaks. The program reports any differences in the

- * breaks.

- *

- * NOTE: by it's very nature, Thai word breaking is not exact, so it is

- * exptected that this program will always report some differences.

- */

-/*

- * This class is a break iterator that counts words and spaces.

- */

-class SpaceBreakIterator

-public:

- // The constructor:

- // text - pointer to an array of UChars to iterate over

- // count - the number of UChars in text

- SpaceBreakIterator(const UChar *text, int32_t count);

- // the destructor

- ~SpaceBreakIterator();

- // return next break position

- int32_t next();

- // return current word count

- int32_t getWordCount();

- // return current space count

- int32_t getSpaceCount();

-private:

- // No arg constructor: private so clients can't call it.

- SpaceBreakIterator();

- // The underlying BreakIterator

- BreakIterator *fBreakIter;

- // address of the UChar array

- const UChar *fText;

- // number of UChars in fText

- int32_t fTextCount;

- // current word count

- int32_t fWordCount;

- // current space count

- int32_t fSpaceCount;

- // UnicodeSet of SA characters

- UnicodeSet fComplexContext;

- // true when fBreakIter has returned DONE

- UBool fDone;

-};

-/*

- * This is the main class. It compares word breaks and reports the differences.

- */

-class ThaiWordbreakTest

-public:

- // The main constructor:

- // spaces - pointer to a UChar array for the text with spaces

- // spaceCount - the number of characters in the spaces array

- // noSpaces - pointer to a UChar array for the text without spaces

- // noSpaceCount - the number of characters in the noSpaces array

- // verbose - report all breaks if true, otherwise just report differences

- ThaiWordbreakTest(const UChar *spaces, int32_t spaceCount, const UChar *noSpaces, int32_t noSpaceCount, UBool verbose);

- ~ThaiWordbreakTest();

- // returns the number of breaks that are in the spaces array

- // but aren't found in the noSpaces array

- int32_t getBreaksNotFound();

- // returns the number of breaks which are found in the noSpaces

- // array but aren't in the spaces array

- int32_t getInvalidBreaks();

- // returns the number of words found in the spaces array

- int32_t getWordCount();

- // reads the input Unicode text file:

- // fileName - the path name of the file

- // charCount - set to the number of UChars read from the file

- // returns - the address of the UChar array containing the characters

- static const UChar *readFile(char *fileName, int32_t &charCount);

- // removes spaces form the input UChar array:

- // spaces - pointer to the input UChar array

- // count - number of UChars in the spaces array

- // nonSpaceCount - the number of UChars in the result array

- // returns - the address of the UChar array with spaces removed

- static const UChar *crunchSpaces(const UChar *spaces, int32_t count, int32_t &nonSpaceCount);

-private:

- // The no arg constructor - private so clients can't call it

- ThaiWordbreakTest();

- // This does the actual comparison:

- // spaces - the address of the UChar array for the text with spaces

- // spaceCount - the number of UChars in the spaces array

- // noSpaces - the address of the UChar array for the text without spaces

- // noSpaceCount - the number of UChars in the noSpaces array

- // returns - true if all breaks match, FALSE otherwise

- UBool compareWordBreaks(const UChar *spaces, int32_t spaceCount,

- const UChar *noSpaces, int32_t noSpaceCount);

- // helper method to report a break in the spaces

- // array that's not found in the noSpaces array

- void breakNotFound(int32_t br);

- // helper method to report a break that's found in

- // the noSpaces array that's not in the spaces array

- void foundInvalidBreak(int32_t br);

- // count of breaks in the spaces array that

- // aren't found in the noSpaces array

- int32_t fBreaksNotFound;

- // count of breaks found in the noSpaces array

- // that aren't in the spaces array

- int32_t fInvalidBreaks;

- // number of words found in the spaces array

- int32_t fWordCount;

- // report all breaks if true, otherwise just report differences

- UBool fVerbose;

-};

-/*

- * The main constructor: it calls compareWordBreaks and reports any differences

- */

-ThaiWordbreakTest::ThaiWordbreakTest(const UChar *spaces, int32_t spaceCount,

- const UChar *noSpaces, int32_t noSpaceCount, UBool verbose)

-: fBreaksNotFound(0), fInvalidBreaks(0), fWordCount(0), fVerbose(verbose)

- compareWordBreaks(spaces, spaceCount, noSpaces, noSpaceCount);

-/*

- * The no arg constructor

- */

-ThaiWordbreakTest::ThaiWordbreakTest()

- // nothing

-/*

- * The destructor

- */

-ThaiWordbreakTest::~ThaiWordbreakTest()

- // nothing?

-/*

- * returns the number of breaks in the spaces array

- * that aren't found in the noSpaces array

- */

-inline int32_t ThaiWordbreakTest::getBreaksNotFound()

- return fBreaksNotFound;

-/*

- * Returns the number of breaks found in the noSpaces

- * array that aren't in the spaces array

- */

-inline int32_t ThaiWordbreakTest::getInvalidBreaks()

- return fInvalidBreaks;

-/*

- * Returns the number of words found in the spaces array

- */

-inline int32_t ThaiWordbreakTest::getWordCount()

- return fWordCount;

-/*

- * This method does the acutal break comparison and reports the results.

- * It uses a SpaceBreakIterator to iterate over the text with spaces,

- * and a word instance of a Thai BreakIterator to iterate over the text

- * without spaces.

- */

-UBool ThaiWordbreakTest::compareWordBreaks(const UChar *spaces, int32_t spaceCount,

- const UChar *noSpaces, int32_t noSpaceCount)

- UBool result = TRUE;

- Locale thai("th");

- UCharCharacterIterator *noSpaceIter = new UCharCharacterIterator(noSpaces, noSpaceCount);

- UErrorCode status = U_ZERO_ERROR;

- BreakIterator *breakIter = BreakIterator::createWordInstance(thai, status);

- breakIter->adoptText(noSpaceIter);

- SpaceBreakIterator spaceIter(spaces, spaceCount);

- int32_t nextBreak = 0;

- int32_t nextSpaceBreak = 0;

- int32_t iterCount = 0;

- while (TRUE) {

- nextSpaceBreak = spaceIter.next();

- nextBreak = breakIter->next();

- if (nextSpaceBreak == BreakIterator::DONE || nextBreak == BreakIterator::DONE) {

- if (nextBreak != BreakIterator::DONE) {

- fprintf(stderr, "break iterator didn't end.\n");

- } else if (nextSpaceBreak != BreakIterator::DONE) {

- fprintf(stderr, "premature break iterator end.\n");

- }

- break;

- }

- while (nextSpaceBreak != nextBreak &&

- nextSpaceBreak != BreakIterator::DONE && nextBreak != BreakIterator::DONE) {

- if (nextSpaceBreak < nextBreak) {

- breakNotFound(nextSpaceBreak);

- result = FALSE;

- nextSpaceBreak = spaceIter.next();

- } else if (nextSpaceBreak > nextBreak) {

- foundInvalidBreak(nextBreak);

- result = FALSE;

- nextBreak = breakIter->next();

- }

- if (fVerbose) {

- printf("%d %d\n", nextSpaceBreak, nextBreak);

- }

- fWordCount = spaceIter.getWordCount();

- delete breakIter;

- return result;

-/*

- * Report a break that's in the text with spaces but

- * not found in the text without spaces.

- */

-void ThaiWordbreakTest::breakNotFound(int32_t br)

- if (fVerbose) {

- printf("%d ****\n", br);

- } else {

- fprintf(stderr, "break not found: %d\n", br);

- }

- fBreaksNotFound += 1;

-/*

- * Report a break that's found in the text without spaces

- * that isn't in the text with spaces.

- */

-void ThaiWordbreakTest::foundInvalidBreak(int32_t br)

- if (fVerbose) {

- printf("**** %d\n", br);

- } else {

- fprintf(stderr, "found invalid break: %d\n", br);

- }

- fInvalidBreaks += 1;

-/*

- * Read the text from a file. The text must start with a Unicode Byte

- * Order Mark (BOM) so that we know what order to read the bytes in.

- */

-const UChar *ThaiWordbreakTest::readFile(char *fileName, int32_t &charCount)

- FILE *f;

- int32_t fileSize;

- UChar *buffer;

- char *bufferChars;

- f = fopen(fileName, "rb");

- if( f == NULL ) {

- fprintf(stderr,"Couldn't open %s reason: %s \n", fileName, strerror(errno));

- return 0;

- }

- fseek(f, 0, SEEK_END);

- fileSize = ftell(f);

- fseek(f, 0, SEEK_SET);

- bufferChars = new char[fileSize];

- if(bufferChars == 0) {

- fprintf(stderr,"Couldn't get memory for reading %s reason: %s \n", fileName, strerror(errno));

- fclose(f);

- return 0;

- }

- fread(bufferChars, sizeof(char), fileSize, f);

- if( ferror(f) ) {

- fprintf(stderr,"Couldn't read %s reason: %s \n", fileName, strerror(errno));

- fclose(f);

- delete[] bufferChars;

- return 0;

- }

- fclose(f);

- UnicodeString myText(bufferChars, fileSize, "UTF-8");

- delete[] bufferChars;

- charCount = myText.length();

- buffer = new UChar[charCount];

- if(buffer == 0) {

- fprintf(stderr,"Couldn't get memory for reading %s reason: %s \n", fileName, strerror(errno));

- return 0;

- }

- myText.extract(1, myText.length(), buffer);

- charCount--; // skip the BOM

- buffer[charCount] = 0; // NULL terminate for easier reading in the debugger

- return buffer;

-/*

- * Remove spaces from the input UChar array.

- *

- * We check explicitly for a Unicode code value of 0x0020

- * because Unicode::isSpaceChar returns true for CR, LF, etc.

- *

- */

-const UChar *ThaiWordbreakTest::crunchSpaces(const UChar *spaces, int32_t count, int32_t &nonSpaceCount)

- int32_t i, out, spaceCount;

- spaceCount = 0;

- for (i = 0; i < count; i += 1) {

- if (spaces[i] == 0x0020 /*Unicode::isSpaceChar(spaces[i])*/) {

- spaceCount += 1;

- }

- nonSpaceCount = count - spaceCount;

- UChar *noSpaces = new UChar[nonSpaceCount];

- if (noSpaces == 0) {

- fprintf(stderr, "Couldn't allocate memory for the space stripped text.\n");

- return 0;

- }

- for (out = 0, i = 0; i < count; i += 1) {

- if (spaces[i] != 0x0020 /*! Unicode::isSpaceChar(spaces[i])*/) {

- noSpaces[out++] = spaces[i];

- }

- return noSpaces;

-/*

- * Generate a text file with spaces in it from a file without.

- */

-int generateFile(const UChar *chars, int32_t length) {

- Locale root("");

- UCharCharacterIterator *noSpaceIter = new UCharCharacterIterator(chars, length);

- UErrorCode status = U_ZERO_ERROR;

- UnicodeSet complexContext(UNICODE_STRING_SIMPLE("[:LineBreak=SA:]"), status);

- BreakIterator *breakIter = BreakIterator::createWordInstance(root, status);

- breakIter->adoptText(noSpaceIter);

- char outbuf[1024];

- int32_t strlength;

- UChar bom = 0xFEFF;

- printf("%s", u_strToUTF8(outbuf, sizeof(outbuf), &strlength, &bom, 1, &status));

- int32_t prevbreak = 0;

- while (U_SUCCESS(status)) {

- int32_t nextbreak = breakIter->next();

- if (nextbreak == BreakIterator::DONE) {

- break;

- }

- printf("%s", u_strToUTF8(outbuf, sizeof(outbuf), &strlength, &chars[prevbreak],

- nextbreak-prevbreak, &status));

- if (nextbreak > 0 && complexContext.contains(chars[nextbreak-1])

- && complexContext.contains(chars[nextbreak])) {

- printf(" ");

- }

- prevbreak = nextbreak;

- }

- if (U_FAILURE(status)) {

- fprintf(stderr, "generate failed: %s\n", u_errorName(status));

- return status;

- }

- else {

- return 0;

- }

-/*

- * The main routine. Read the command line arguments, read the text file,

- * remove the spaces, do the comparison and report the final results

- */

-int main(int argc, char **argv)

- char *fileName = "space.txt";

- int arg = 1;

- UBool verbose = FALSE;

- UBool generate = FALSE;

- if (argc >= 2 && strcmp(argv[1], "-generate") == 0) {

- generate = TRUE;

- arg += 1;

- }

- if (argc >= 2 && strcmp(argv[1], "-verbose") == 0) {

- verbose = TRUE;

- arg += 1;

- }

- if (arg == argc - 1) {

- fileName = argv[arg++];

- }

- if (arg != argc) {

- fprintf(stderr, "Usage: %s [-verbose] [<file>]\n", argv[0]);

- return 1;

- }

- int32_t spaceCount, nonSpaceCount;

- const UChar *spaces, *noSpaces;

- spaces = ThaiWordbreakTest::readFile(fileName, spaceCount);

- if (spaces == 0) {

- return 1;

- }

- if (generate) {

- return generateFile(spaces, spaceCount);

- }

- noSpaces = ThaiWordbreakTest::crunchSpaces(spaces, spaceCount, nonSpaceCount);

- if (noSpaces == 0) {

- return 1;

- }

- ThaiWordbreakTest test(spaces, spaceCount, noSpaces, nonSpaceCount, verbose);

- printf("word count: %d\n", test.getWordCount());

- printf("breaks not found: %d\n", test.getBreaksNotFound());

- printf("invalid breaks found: %d\n", test.getInvalidBreaks());

- return 0;

-/*

- * The main constructor. Clear all the counts and construct a default

- * word instance of a BreakIterator.

- */

-SpaceBreakIterator::SpaceBreakIterator(const UChar *text, int32_t count)

- : fBreakIter(0), fText(text), fTextCount(count), fWordCount(0), fSpaceCount(0), fDone(FALSE)

- UCharCharacterIterator *iter = new UCharCharacterIterator(text, count);

- UErrorCode status = U_ZERO_ERROR;

- fComplexContext.applyPattern(UNICODE_STRING_SIMPLE("[:LineBreak=SA:]"), status);

- Locale root("");

- fBreakIter = BreakIterator::createWordInstance(root, status);

- fBreakIter->adoptText(iter);

-SpaceBreakIterator::SpaceBreakIterator()

- // nothing

-/*

- * The destructor. delete the underlying BreakIterator

- */

-SpaceBreakIterator::~SpaceBreakIterator()

- delete fBreakIter;

-/*

- * Return the next break, counting words and spaces.

- */

-int32_t SpaceBreakIterator::next()

- if (fDone) {

- return BreakIterator::DONE;

- }

- int32_t nextBreak;

- do {

- nextBreak = fBreakIter->next();

- if (nextBreak == BreakIterator::DONE) {

- fDone = TRUE;

- return BreakIterator::DONE;

- }

- while(nextBreak > 0 && fComplexContext.contains(fText[nextBreak-1])

- && fComplexContext.contains(fText[nextBreak]));

- int32_t result = nextBreak - fSpaceCount;

- if (nextBreak < fTextCount) {

- if (fText[nextBreak] == 0x0020 /*Unicode::isSpaceChar(fText[nextBreak])*/) {

- fSpaceCount += fBreakIter->next() - nextBreak;

- }

- fWordCount += 1;

- return result;

-/*

- * Returns the current space count

- */

-int32_t SpaceBreakIterator::getSpaceCount()

- return fSpaceCount;

-/*

- * Returns the current word count

- */

-int32_t SpaceBreakIterator::getWordCount()

- return fWordCount;

« no previous file with comments | « source/test/thaitest/space.txt ('k') | source/test/thaitest/thaitest.dsp » ('j') | no next file with comments »