| Index: icu46/source/test/intltest/bidiconf.cpp
|
| ===================================================================
|
| --- icu46/source/test/intltest/bidiconf.cpp (revision 0)
|
| +++ icu46/source/test/intltest/bidiconf.cpp (revision 0)
|
| @@ -0,0 +1,488 @@
|
| +/*
|
| +*******************************************************************************
|
| +*
|
| +* Copyright (C) 2009-2010, International Business Machines
|
| +* Corporation and others. All Rights Reserved.
|
| +*
|
| +*******************************************************************************
|
| +* file name: bidiconf.cpp
|
| +* encoding: US-ASCII
|
| +* tab size: 8 (not used)
|
| +* indentation:4
|
| +*
|
| +* created on: 2009oct16
|
| +* created by: Markus W. Scherer
|
| +*
|
| +* BiDi conformance test, using the Unicode BidiTest.txt file.
|
| +*/
|
| +
|
| +#include <stdio.h>
|
| +#include <stdlib.h>
|
| +#include <string.h>
|
| +#include "unicode/utypes.h"
|
| +#include "unicode/ubidi.h"
|
| +#include "unicode/errorcode.h"
|
| +#include "unicode/localpointer.h"
|
| +#include "unicode/putil.h"
|
| +#include "unicode/unistr.h"
|
| +#include "intltest.h"
|
| +#include "uparse.h"
|
| +
|
| +class BiDiConformanceTest : public IntlTest {
|
| +public:
|
| + BiDiConformanceTest() :
|
| + directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0),
|
| + errorCount(0) {}
|
| +
|
| + void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
|
| +
|
| + void TestBidiTest();
|
| +private:
|
| + char *getUnidataPath(char path[]);
|
| +
|
| + UBool parseLevels(const char *start);
|
| + UBool parseOrdering(const char *start);
|
| + UBool parseInputStringFromBiDiClasses(const char *&start);
|
| +
|
| + UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
|
| + const char *paraLevelName);
|
| + UBool checkOrdering(UBiDi *ubidi, const char *paraLevelName);
|
| +
|
| + void printErrorLine(const char *paraLevelName);
|
| +
|
| + char line[10000];
|
| + UBiDiLevel levels[1000];
|
| + uint32_t directionBits;
|
| + int32_t ordering[1000];
|
| + int32_t lineNumber;
|
| + int32_t levelsCount;
|
| + int32_t orderingCount;
|
| + int32_t errorCount;
|
| + UnicodeString inputString;
|
| +};
|
| +
|
| +extern IntlTest *createBiDiConformanceTest() {
|
| + return new BiDiConformanceTest();
|
| +}
|
| +
|
| +void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
|
| + if(exec) {
|
| + logln("TestSuite BiDiConformanceTest: ");
|
| + }
|
| + switch (index) {
|
| + TESTCASE(0, TestBidiTest);
|
| + default:
|
| + name="";
|
| + break; // needed to end the loop
|
| + }
|
| +}
|
| +
|
| +// TODO: Move to a common place (IntlTest?) to avoid duplication with UnicodeTest (ucdtest.cpp).
|
| +char *BiDiConformanceTest::getUnidataPath(char path[]) {
|
| + IcuTestErrorCode errorCode(*this, "getUnidataPath");
|
| + const int kUnicodeDataTxtLength=15; // strlen("UnicodeData.txt")
|
| +
|
| + // Look inside ICU_DATA first.
|
| + strcpy(path, pathToDataDirectory());
|
| + strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
|
| + FILE *f=fopen(path, "r");
|
| + if(f!=NULL) {
|
| + fclose(f);
|
| + *(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename.
|
| + return path;
|
| + }
|
| +
|
| + // As a fallback, try to guess where the source data was located
|
| + // at the time ICU was built, and look there.
|
| +# ifdef U_TOPSRCDIR
|
| + strcpy(path, U_TOPSRCDIR U_FILE_SEP_STRING "data");
|
| +# else
|
| + strcpy(path, loadTestData(errorCode));
|
| + strcat(path, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
|
| + U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
|
| + U_FILE_SEP_STRING "data");
|
| +# endif
|
| + strcat(path, U_FILE_SEP_STRING);
|
| + strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
|
| + f=fopen(path, "r");
|
| + if(f!=NULL) {
|
| + fclose(f);
|
| + *(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename.
|
| + return path;
|
| + }
|
| + return NULL;
|
| +}
|
| +
|
| +U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
|
| +
|
| +UBool BiDiConformanceTest::parseLevels(const char *start) {
|
| + directionBits=0;
|
| + levelsCount=0;
|
| + while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
|
| + if(*start=='x') {
|
| + levels[levelsCount++]=UBIDI_DEFAULT_LTR;
|
| + ++start;
|
| + } else {
|
| + char *end;
|
| + uint32_t value=(uint32_t)strtoul(start, &end, 10);
|
| + if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
|
| + errln("@Levels: parse error at %s", start);
|
| + return FALSE;
|
| + }
|
| + levels[levelsCount++]=(UBiDiLevel)value;
|
| + directionBits|=(1<<(value&1));
|
| + start=end;
|
| + }
|
| + }
|
| + return TRUE;
|
| +}
|
| +
|
| +UBool BiDiConformanceTest::parseOrdering(const char *start) {
|
| + orderingCount=0;
|
| + while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
|
| + char *end;
|
| + uint32_t value=(uint32_t)strtoul(start, &end, 10);
|
| + if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>=1000) {
|
| + errln("@Reorder: parse error at %s", start);
|
| + return FALSE;
|
| + }
|
| + ordering[orderingCount++]=(int32_t)value;
|
| + start=end;
|
| + }
|
| + return TRUE;
|
| +}
|
| +
|
| +static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
|
| + 0x6c, // 'l' for L
|
| + 0x52, // 'R' for R
|
| + 0x33, // '3' for EN
|
| + 0x2d, // '-' for ES
|
| + 0x25, // '%' for ET
|
| + 0x39, // '9' for AN
|
| + 0x2c, // ',' for CS
|
| + 0x2f, // '/' for B
|
| + 0x5f, // '_' for S
|
| + 0x20, // ' ' for WS
|
| + 0x3d, // '=' for ON
|
| + 0x65, // 'e' for LRE
|
| + 0x6f, // 'o' for LRO
|
| + 0x41, // 'A' for AL
|
| + 0x45, // 'E' for RLE
|
| + 0x4f, // 'O' for RLO
|
| + 0x2a, // '*' for PDF
|
| + 0x60, // '`' for NSM
|
| + 0x7c // '|' for BN
|
| +};
|
| +
|
| +U_CDECL_BEGIN
|
| +
|
| +static UCharDirection U_CALLCONV
|
| +biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) {
|
| + for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) {
|
| + if(c==charFromBiDiClass[i]) {
|
| + return (UCharDirection)i;
|
| + }
|
| + }
|
| + // Character not in our hardcoded table.
|
| + // Should not occur during testing.
|
| + return U_BIDI_CLASS_DEFAULT;
|
| +}
|
| +
|
| +U_CDECL_END
|
| +
|
| +static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={
|
| + 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 0
|
| +};
|
| +
|
| +UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) {
|
| + inputString.remove();
|
| + /*
|
| + * Lengthy but fast BiDi class parser.
|
| + * A simple parser could terminate or extract the name string and use
|
| + * int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString);
|
| + * but that makes this test take significantly more time.
|
| + */
|
| + while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
|
| + UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT;
|
| + // Compare each character once until we have a match on
|
| + // a complete, short BiDi class name.
|
| + if(start[0]=='L') {
|
| + if(start[1]=='R') {
|
| + if(start[2]=='E') {
|
| + biDiClass=U_LEFT_TO_RIGHT_EMBEDDING;
|
| + } else if(start[2]=='O') {
|
| + biDiClass=U_LEFT_TO_RIGHT_OVERRIDE;
|
| + }
|
| + } else {
|
| + biDiClass=U_LEFT_TO_RIGHT;
|
| + }
|
| + } else if(start[0]=='R') {
|
| + if(start[1]=='L') {
|
| + if(start[2]=='E') {
|
| + biDiClass=U_RIGHT_TO_LEFT_EMBEDDING;
|
| + } else if(start[2]=='O') {
|
| + biDiClass=U_RIGHT_TO_LEFT_OVERRIDE;
|
| + }
|
| + } else {
|
| + biDiClass=U_RIGHT_TO_LEFT;
|
| + }
|
| + } else if(start[0]=='E') {
|
| + if(start[1]=='N') {
|
| + biDiClass=U_EUROPEAN_NUMBER;
|
| + } else if(start[1]=='S') {
|
| + biDiClass=U_EUROPEAN_NUMBER_SEPARATOR;
|
| + } else if(start[1]=='T') {
|
| + biDiClass=U_EUROPEAN_NUMBER_TERMINATOR;
|
| + }
|
| + } else if(start[0]=='A') {
|
| + if(start[1]=='L') {
|
| + biDiClass=U_RIGHT_TO_LEFT_ARABIC;
|
| + } else if(start[1]=='N') {
|
| + biDiClass=U_ARABIC_NUMBER;
|
| + }
|
| + } else if(start[0]=='C' && start[1]=='S') {
|
| + biDiClass=U_COMMON_NUMBER_SEPARATOR;
|
| + } else if(start[0]=='B') {
|
| + if(start[1]=='N') {
|
| + biDiClass=U_BOUNDARY_NEUTRAL;
|
| + } else {
|
| + biDiClass=U_BLOCK_SEPARATOR;
|
| + }
|
| + } else if(start[0]=='S') {
|
| + biDiClass=U_SEGMENT_SEPARATOR;
|
| + } else if(start[0]=='W' && start[1]=='S') {
|
| + biDiClass=U_WHITE_SPACE_NEUTRAL;
|
| + } else if(start[0]=='O' && start[1]=='N') {
|
| + biDiClass=U_OTHER_NEUTRAL;
|
| + } else if(start[0]=='P' && start[1]=='D' && start[2]=='F') {
|
| + biDiClass=U_POP_DIRECTIONAL_FORMAT;
|
| + } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') {
|
| + biDiClass=U_DIR_NON_SPACING_MARK;
|
| + }
|
| + // Now we verify that the class name is terminated properly,
|
| + // and not just the start of a longer word.
|
| + int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass];
|
| + char c=start[biDiClassNameLength];
|
| + if(biDiClass==U_CHAR_DIRECTION_COUNT || (!U_IS_INV_WHITESPACE(c) && c!=';' && c!=0)) {
|
| + errln("BiDi class string not recognized at %s", start);
|
| + return FALSE;
|
| + }
|
| + inputString.append(charFromBiDiClass[biDiClass]);
|
| + start+=biDiClassNameLength;
|
| + }
|
| + return TRUE;
|
| +}
|
| +
|
| +void BiDiConformanceTest::TestBidiTest() {
|
| + IcuTestErrorCode errorCode(*this, "TestBidiTest");
|
| + const char *sourceTestDataPath=getSourceTestData(errorCode);
|
| + if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
|
| + "folder (getSourceTestData())")) {
|
| + return;
|
| + }
|
| + char bidiTestPath[400];
|
| + strcpy(bidiTestPath, sourceTestDataPath);
|
| + strcat(bidiTestPath, "BidiTest.txt");
|
| + LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
|
| + if(bidiTestFile.isNull()) {
|
| + errln("unable to open %s", bidiTestPath);
|
| + return;
|
| + }
|
| + LocalUBiDiPointer ubidi(ubidi_open());
|
| + ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL,
|
| + NULL, NULL, errorCode);
|
| + if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) {
|
| + return;
|
| + }
|
| + lineNumber=0;
|
| + levelsCount=0;
|
| + orderingCount=0;
|
| + errorCount=0;
|
| + while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
|
| + ++lineNumber;
|
| + // Remove trailing comments and whitespace.
|
| + char *commentStart=strchr(line, '#');
|
| + if(commentStart!=NULL) {
|
| + *commentStart=0;
|
| + }
|
| + u_rtrim(line);
|
| + const char *start=u_skipWhitespace(line);
|
| + if(*start==0) {
|
| + continue; // Skip empty and comment-only lines.
|
| + }
|
| + if(*start=='@') {
|
| + ++start;
|
| + if(0==strncmp(start, "Levels:", 7)) {
|
| + if(!parseLevels(start+7)) {
|
| + return;
|
| + }
|
| + } else if(0==strncmp(start, "Reorder:", 8)) {
|
| + if(!parseOrdering(start+8)) {
|
| + return;
|
| + }
|
| + }
|
| + // Skip unknown @Xyz: ...
|
| + } else {
|
| + if(!parseInputStringFromBiDiClasses(start)) {
|
| + return;
|
| + }
|
| + start=u_skipWhitespace(start);
|
| + if(*start!=';') {
|
| + errln("missing ; separator on input line %s", line);
|
| + return;
|
| + }
|
| + start=u_skipWhitespace(start+1);
|
| + char *end;
|
| + uint32_t bitset=(uint32_t)strtoul(start, &end, 16);
|
| + if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) {
|
| + errln("input bitset parse error at %s", start);
|
| + return;
|
| + }
|
| + // Loop over the bitset.
|
| + static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBIDI_DEFAULT_RTL };
|
| + static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" };
|
| + for(int i=0; i<=3; ++i) {
|
| + if(bitset&(1<<i)) {
|
| + ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
|
| + paraLevels[i], NULL, errorCode);
|
| + const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
|
| + if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
|
| + errln("Input line %d: %s", (int)lineNumber, line);
|
| + return;
|
| + }
|
| + if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()),
|
| + paraLevelNames[i])) {
|
| + // continue outerLoop; does not exist in C++
|
| + // so just break out of the inner loop.
|
| + break;
|
| + }
|
| + if(!checkOrdering(ubidi.getAlias(), paraLevelNames[i])) {
|
| + // continue outerLoop; does not exist in C++
|
| + // so just break out of the inner loop.
|
| + break;
|
| + }
|
| + }
|
| + }
|
| + }
|
| + }
|
| +}
|
| +
|
| +static UChar printLevel(UBiDiLevel level) {
|
| + if(level<UBIDI_DEFAULT_LTR) {
|
| + return 0x30+level;
|
| + } else {
|
| + return 0x78; // 'x'
|
| + }
|
| +}
|
| +
|
| +static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) {
|
| + uint32_t actualDirectionBits=0;
|
| + for(int32_t i=0; i<actualCount; ++i) {
|
| + actualDirectionBits|=(1<<(actualLevels[i]&1));
|
| + }
|
| + return actualDirectionBits;
|
| +}
|
| +
|
| +UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
|
| + const char *paraLevelName) {
|
| + UBool isOk=TRUE;
|
| + if(levelsCount!=actualCount) {
|
| + errln("Wrong number of level values; expected %d actual %d",
|
| + (int)levelsCount, (int)actualCount);
|
| + isOk=FALSE;
|
| + } else {
|
| + for(int32_t i=0; i<actualCount; ++i) {
|
| + if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) {
|
| + if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) {
|
| + // ICU used a shortcut:
|
| + // Since the text is unidirectional, it did not store the resolved
|
| + // levels but just returns all levels as the paragraph level 0 or 1.
|
| + // The reordering result is the same, so this is fine.
|
| + break;
|
| + } else {
|
| + errln("Wrong level value at index %d; expected %d actual %d",
|
| + (int)i, levels[i], actualLevels[i]);
|
| + isOk=FALSE;
|
| + break;
|
| + }
|
| + }
|
| + }
|
| + }
|
| + if(!isOk) {
|
| + printErrorLine(paraLevelName);
|
| + UnicodeString els("Expected levels: ");
|
| + int32_t i;
|
| + for(i=0; i<levelsCount; ++i) {
|
| + els.append((UChar)0x20).append(printLevel(levels[i]));
|
| + }
|
| + UnicodeString als("Actual levels: ");
|
| + for(i=0; i<actualCount; ++i) {
|
| + als.append((UChar)0x20).append(printLevel(actualLevels[i]));
|
| + }
|
| + errln(els);
|
| + errln(als);
|
| + }
|
| + return isOk;
|
| +}
|
| +
|
| +// Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS);
|
| +// does not work for custom BiDi class assignments
|
| +// and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here.
|
| +// Therefore we just skip the indexes for BiDi controls while comparing
|
| +// with the expected ordering that has them omitted.
|
| +UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi, const char *paraLevelName) {
|
| + UBool isOk=TRUE;
|
| + IcuTestErrorCode errorCode(*this, "TestBidiTest/checkOrdering()");
|
| + int32_t resultLength=ubidi_getResultLength(ubidi); // visual length including BiDi controls
|
| + int32_t i, visualIndex;
|
| + // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun()
|
| + // and loop over each run's indexes, but that seems unnecessary for this test code.
|
| + for(i=visualIndex=0; i<resultLength; ++i) {
|
| + int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
|
| + if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) {
|
| + errln("Input line %d: %s", (int)lineNumber, line);
|
| + return FALSE;
|
| + }
|
| + if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) {
|
| + continue; // BiDi control, omitted from expected ordering.
|
| + }
|
| + if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) {
|
| + errln("Wrong ordering value at visual index %d; expected %d actual %d",
|
| + (int)visualIndex, ordering[visualIndex], logicalIndex);
|
| + isOk=FALSE;
|
| + break;
|
| + }
|
| + ++visualIndex;
|
| + }
|
| + // visualIndex is now the visual length minus the BiDi controls,
|
| + // which should match the length of the BidiTest.txt ordering.
|
| + if(isOk && orderingCount!=visualIndex) {
|
| + errln("Wrong number of ordering values; expected %d actual %d",
|
| + (int)orderingCount, (int)visualIndex);
|
| + isOk=FALSE;
|
| + }
|
| + if(!isOk) {
|
| + printErrorLine(paraLevelName);
|
| + UnicodeString eord("Expected ordering: ");
|
| + for(i=0; i<orderingCount; ++i) {
|
| + eord.append((UChar)0x20).append((UChar)(0x30+ordering[i]));
|
| + }
|
| + UnicodeString aord("Actual ordering: ");
|
| + for(i=0; i<resultLength; ++i) {
|
| + int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
|
| + if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) {
|
| + aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex));
|
| + }
|
| + }
|
| + errln(eord);
|
| + errln(aord);
|
| + }
|
| + return isOk;
|
| +}
|
| +
|
| +void BiDiConformanceTest::printErrorLine(const char *paraLevelName) {
|
| + ++errorCount;
|
| + errln("Input line %5d: %s", (int)lineNumber, line);
|
| + errln(UnicodeString("Input string: ")+inputString);
|
| + errln("Para level: %s", paraLevelName);
|
| +}
|
|
|
| Property changes on: icu46/source/test/intltest/bidiconf.cpp
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|