Index: icu46/source/test/intltest/bidiconf.cpp |
=================================================================== |
--- icu46/source/test/intltest/bidiconf.cpp (revision 0) |
+++ icu46/source/test/intltest/bidiconf.cpp (revision 0) |
@@ -0,0 +1,488 @@ |
+/* |
+******************************************************************************* |
+* |
+* Copyright (C) 2009-2010, International Business Machines |
+* Corporation and others. All Rights Reserved. |
+* |
+******************************************************************************* |
+* file name: bidiconf.cpp |
+* encoding: US-ASCII |
+* tab size: 8 (not used) |
+* indentation:4 |
+* |
+* created on: 2009oct16 |
+* created by: Markus W. Scherer |
+* |
+* BiDi conformance test, using the Unicode BidiTest.txt file. |
+*/ |
+ |
+#include <stdio.h> |
+#include <stdlib.h> |
+#include <string.h> |
+#include "unicode/utypes.h" |
+#include "unicode/ubidi.h" |
+#include "unicode/errorcode.h" |
+#include "unicode/localpointer.h" |
+#include "unicode/putil.h" |
+#include "unicode/unistr.h" |
+#include "intltest.h" |
+#include "uparse.h" |
+ |
+class BiDiConformanceTest : public IntlTest { |
+public: |
+ BiDiConformanceTest() : |
+ directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0), |
+ errorCount(0) {} |
+ |
+ void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); |
+ |
+ void TestBidiTest(); |
+private: |
+ char *getUnidataPath(char path[]); |
+ |
+ UBool parseLevels(const char *start); |
+ UBool parseOrdering(const char *start); |
+ UBool parseInputStringFromBiDiClasses(const char *&start); |
+ |
+ UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount, |
+ const char *paraLevelName); |
+ UBool checkOrdering(UBiDi *ubidi, const char *paraLevelName); |
+ |
+ void printErrorLine(const char *paraLevelName); |
+ |
+ char line[10000]; |
+ UBiDiLevel levels[1000]; |
+ uint32_t directionBits; |
+ int32_t ordering[1000]; |
+ int32_t lineNumber; |
+ int32_t levelsCount; |
+ int32_t orderingCount; |
+ int32_t errorCount; |
+ UnicodeString inputString; |
+}; |
+ |
+extern IntlTest *createBiDiConformanceTest() { |
+ return new BiDiConformanceTest(); |
+} |
+ |
+void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { |
+ if(exec) { |
+ logln("TestSuite BiDiConformanceTest: "); |
+ } |
+ switch (index) { |
+ TESTCASE(0, TestBidiTest); |
+ default: |
+ name=""; |
+ break; // needed to end the loop |
+ } |
+} |
+ |
+// TODO: Move to a common place (IntlTest?) to avoid duplication with UnicodeTest (ucdtest.cpp). |
+char *BiDiConformanceTest::getUnidataPath(char path[]) { |
+ IcuTestErrorCode errorCode(*this, "getUnidataPath"); |
+ const int kUnicodeDataTxtLength=15; // strlen("UnicodeData.txt") |
+ |
+ // Look inside ICU_DATA first. |
+ strcpy(path, pathToDataDirectory()); |
+ strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt"); |
+ FILE *f=fopen(path, "r"); |
+ if(f!=NULL) { |
+ fclose(f); |
+ *(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename. |
+ return path; |
+ } |
+ |
+ // As a fallback, try to guess where the source data was located |
+ // at the time ICU was built, and look there. |
+# ifdef U_TOPSRCDIR |
+ strcpy(path, U_TOPSRCDIR U_FILE_SEP_STRING "data"); |
+# else |
+ strcpy(path, loadTestData(errorCode)); |
+ strcat(path, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." |
+ U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." |
+ U_FILE_SEP_STRING "data"); |
+# endif |
+ strcat(path, U_FILE_SEP_STRING); |
+ strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt"); |
+ f=fopen(path, "r"); |
+ if(f!=NULL) { |
+ fclose(f); |
+ *(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename. |
+ return path; |
+ } |
+ return NULL; |
+} |
+ |
+U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose); |
+ |
+UBool BiDiConformanceTest::parseLevels(const char *start) { |
+ directionBits=0; |
+ levelsCount=0; |
+ while(*start!=0 && *(start=u_skipWhitespace(start))!=0) { |
+ if(*start=='x') { |
+ levels[levelsCount++]=UBIDI_DEFAULT_LTR; |
+ ++start; |
+ } else { |
+ char *end; |
+ uint32_t value=(uint32_t)strtoul(start, &end, 10); |
+ if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) { |
+ errln("@Levels: parse error at %s", start); |
+ return FALSE; |
+ } |
+ levels[levelsCount++]=(UBiDiLevel)value; |
+ directionBits|=(1<<(value&1)); |
+ start=end; |
+ } |
+ } |
+ return TRUE; |
+} |
+ |
+UBool BiDiConformanceTest::parseOrdering(const char *start) { |
+ orderingCount=0; |
+ while(*start!=0 && *(start=u_skipWhitespace(start))!=0) { |
+ char *end; |
+ uint32_t value=(uint32_t)strtoul(start, &end, 10); |
+ if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>=1000) { |
+ errln("@Reorder: parse error at %s", start); |
+ return FALSE; |
+ } |
+ ordering[orderingCount++]=(int32_t)value; |
+ start=end; |
+ } |
+ return TRUE; |
+} |
+ |
+static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={ |
+ 0x6c, // 'l' for L |
+ 0x52, // 'R' for R |
+ 0x33, // '3' for EN |
+ 0x2d, // '-' for ES |
+ 0x25, // '%' for ET |
+ 0x39, // '9' for AN |
+ 0x2c, // ',' for CS |
+ 0x2f, // '/' for B |
+ 0x5f, // '_' for S |
+ 0x20, // ' ' for WS |
+ 0x3d, // '=' for ON |
+ 0x65, // 'e' for LRE |
+ 0x6f, // 'o' for LRO |
+ 0x41, // 'A' for AL |
+ 0x45, // 'E' for RLE |
+ 0x4f, // 'O' for RLO |
+ 0x2a, // '*' for PDF |
+ 0x60, // '`' for NSM |
+ 0x7c // '|' for BN |
+}; |
+ |
+U_CDECL_BEGIN |
+ |
+static UCharDirection U_CALLCONV |
+biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) { |
+ for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) { |
+ if(c==charFromBiDiClass[i]) { |
+ return (UCharDirection)i; |
+ } |
+ } |
+ // Character not in our hardcoded table. |
+ // Should not occur during testing. |
+ return U_BIDI_CLASS_DEFAULT; |
+} |
+ |
+U_CDECL_END |
+ |
+static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={ |
+ 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 0 |
+}; |
+ |
+UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) { |
+ inputString.remove(); |
+ /* |
+ * Lengthy but fast BiDi class parser. |
+ * A simple parser could terminate or extract the name string and use |
+ * int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString); |
+ * but that makes this test take significantly more time. |
+ */ |
+ while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { |
+ UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT; |
+ // Compare each character once until we have a match on |
+ // a complete, short BiDi class name. |
+ if(start[0]=='L') { |
+ if(start[1]=='R') { |
+ if(start[2]=='E') { |
+ biDiClass=U_LEFT_TO_RIGHT_EMBEDDING; |
+ } else if(start[2]=='O') { |
+ biDiClass=U_LEFT_TO_RIGHT_OVERRIDE; |
+ } |
+ } else { |
+ biDiClass=U_LEFT_TO_RIGHT; |
+ } |
+ } else if(start[0]=='R') { |
+ if(start[1]=='L') { |
+ if(start[2]=='E') { |
+ biDiClass=U_RIGHT_TO_LEFT_EMBEDDING; |
+ } else if(start[2]=='O') { |
+ biDiClass=U_RIGHT_TO_LEFT_OVERRIDE; |
+ } |
+ } else { |
+ biDiClass=U_RIGHT_TO_LEFT; |
+ } |
+ } else if(start[0]=='E') { |
+ if(start[1]=='N') { |
+ biDiClass=U_EUROPEAN_NUMBER; |
+ } else if(start[1]=='S') { |
+ biDiClass=U_EUROPEAN_NUMBER_SEPARATOR; |
+ } else if(start[1]=='T') { |
+ biDiClass=U_EUROPEAN_NUMBER_TERMINATOR; |
+ } |
+ } else if(start[0]=='A') { |
+ if(start[1]=='L') { |
+ biDiClass=U_RIGHT_TO_LEFT_ARABIC; |
+ } else if(start[1]=='N') { |
+ biDiClass=U_ARABIC_NUMBER; |
+ } |
+ } else if(start[0]=='C' && start[1]=='S') { |
+ biDiClass=U_COMMON_NUMBER_SEPARATOR; |
+ } else if(start[0]=='B') { |
+ if(start[1]=='N') { |
+ biDiClass=U_BOUNDARY_NEUTRAL; |
+ } else { |
+ biDiClass=U_BLOCK_SEPARATOR; |
+ } |
+ } else if(start[0]=='S') { |
+ biDiClass=U_SEGMENT_SEPARATOR; |
+ } else if(start[0]=='W' && start[1]=='S') { |
+ biDiClass=U_WHITE_SPACE_NEUTRAL; |
+ } else if(start[0]=='O' && start[1]=='N') { |
+ biDiClass=U_OTHER_NEUTRAL; |
+ } else if(start[0]=='P' && start[1]=='D' && start[2]=='F') { |
+ biDiClass=U_POP_DIRECTIONAL_FORMAT; |
+ } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') { |
+ biDiClass=U_DIR_NON_SPACING_MARK; |
+ } |
+ // Now we verify that the class name is terminated properly, |
+ // and not just the start of a longer word. |
+ int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass]; |
+ char c=start[biDiClassNameLength]; |
+ if(biDiClass==U_CHAR_DIRECTION_COUNT || (!U_IS_INV_WHITESPACE(c) && c!=';' && c!=0)) { |
+ errln("BiDi class string not recognized at %s", start); |
+ return FALSE; |
+ } |
+ inputString.append(charFromBiDiClass[biDiClass]); |
+ start+=biDiClassNameLength; |
+ } |
+ return TRUE; |
+} |
+ |
+void BiDiConformanceTest::TestBidiTest() { |
+ IcuTestErrorCode errorCode(*this, "TestBidiTest"); |
+ const char *sourceTestDataPath=getSourceTestData(errorCode); |
+ if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata " |
+ "folder (getSourceTestData())")) { |
+ return; |
+ } |
+ char bidiTestPath[400]; |
+ strcpy(bidiTestPath, sourceTestDataPath); |
+ strcat(bidiTestPath, "BidiTest.txt"); |
+ LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r")); |
+ if(bidiTestFile.isNull()) { |
+ errln("unable to open %s", bidiTestPath); |
+ return; |
+ } |
+ LocalUBiDiPointer ubidi(ubidi_open()); |
+ ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL, |
+ NULL, NULL, errorCode); |
+ if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) { |
+ return; |
+ } |
+ lineNumber=0; |
+ levelsCount=0; |
+ orderingCount=0; |
+ errorCount=0; |
+ while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) { |
+ ++lineNumber; |
+ // Remove trailing comments and whitespace. |
+ char *commentStart=strchr(line, '#'); |
+ if(commentStart!=NULL) { |
+ *commentStart=0; |
+ } |
+ u_rtrim(line); |
+ const char *start=u_skipWhitespace(line); |
+ if(*start==0) { |
+ continue; // Skip empty and comment-only lines. |
+ } |
+ if(*start=='@') { |
+ ++start; |
+ if(0==strncmp(start, "Levels:", 7)) { |
+ if(!parseLevels(start+7)) { |
+ return; |
+ } |
+ } else if(0==strncmp(start, "Reorder:", 8)) { |
+ if(!parseOrdering(start+8)) { |
+ return; |
+ } |
+ } |
+ // Skip unknown @Xyz: ... |
+ } else { |
+ if(!parseInputStringFromBiDiClasses(start)) { |
+ return; |
+ } |
+ start=u_skipWhitespace(start); |
+ if(*start!=';') { |
+ errln("missing ; separator on input line %s", line); |
+ return; |
+ } |
+ start=u_skipWhitespace(start+1); |
+ char *end; |
+ uint32_t bitset=(uint32_t)strtoul(start, &end, 16); |
+ if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) { |
+ errln("input bitset parse error at %s", start); |
+ return; |
+ } |
+ // Loop over the bitset. |
+ static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBIDI_DEFAULT_RTL }; |
+ static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" }; |
+ for(int i=0; i<=3; ++i) { |
+ if(bitset&(1<<i)) { |
+ ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(), |
+ paraLevels[i], NULL, errorCode); |
+ const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode); |
+ if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) { |
+ errln("Input line %d: %s", (int)lineNumber, line); |
+ return; |
+ } |
+ if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()), |
+ paraLevelNames[i])) { |
+ // continue outerLoop; does not exist in C++ |
+ // so just break out of the inner loop. |
+ break; |
+ } |
+ if(!checkOrdering(ubidi.getAlias(), paraLevelNames[i])) { |
+ // continue outerLoop; does not exist in C++ |
+ // so just break out of the inner loop. |
+ break; |
+ } |
+ } |
+ } |
+ } |
+ } |
+} |
+ |
+static UChar printLevel(UBiDiLevel level) { |
+ if(level<UBIDI_DEFAULT_LTR) { |
+ return 0x30+level; |
+ } else { |
+ return 0x78; // 'x' |
+ } |
+} |
+ |
+static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) { |
+ uint32_t actualDirectionBits=0; |
+ for(int32_t i=0; i<actualCount; ++i) { |
+ actualDirectionBits|=(1<<(actualLevels[i]&1)); |
+ } |
+ return actualDirectionBits; |
+} |
+ |
+UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount, |
+ const char *paraLevelName) { |
+ UBool isOk=TRUE; |
+ if(levelsCount!=actualCount) { |
+ errln("Wrong number of level values; expected %d actual %d", |
+ (int)levelsCount, (int)actualCount); |
+ isOk=FALSE; |
+ } else { |
+ for(int32_t i=0; i<actualCount; ++i) { |
+ if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) { |
+ if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) { |
+ // ICU used a shortcut: |
+ // Since the text is unidirectional, it did not store the resolved |
+ // levels but just returns all levels as the paragraph level 0 or 1. |
+ // The reordering result is the same, so this is fine. |
+ break; |
+ } else { |
+ errln("Wrong level value at index %d; expected %d actual %d", |
+ (int)i, levels[i], actualLevels[i]); |
+ isOk=FALSE; |
+ break; |
+ } |
+ } |
+ } |
+ } |
+ if(!isOk) { |
+ printErrorLine(paraLevelName); |
+ UnicodeString els("Expected levels: "); |
+ int32_t i; |
+ for(i=0; i<levelsCount; ++i) { |
+ els.append((UChar)0x20).append(printLevel(levels[i])); |
+ } |
+ UnicodeString als("Actual levels: "); |
+ for(i=0; i<actualCount; ++i) { |
+ als.append((UChar)0x20).append(printLevel(actualLevels[i])); |
+ } |
+ errln(els); |
+ errln(als); |
+ } |
+ return isOk; |
+} |
+ |
+// Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS); |
+// does not work for custom BiDi class assignments |
+// and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here. |
+// Therefore we just skip the indexes for BiDi controls while comparing |
+// with the expected ordering that has them omitted. |
+UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi, const char *paraLevelName) { |
+ UBool isOk=TRUE; |
+ IcuTestErrorCode errorCode(*this, "TestBidiTest/checkOrdering()"); |
+ int32_t resultLength=ubidi_getResultLength(ubidi); // visual length including BiDi controls |
+ int32_t i, visualIndex; |
+ // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun() |
+ // and loop over each run's indexes, but that seems unnecessary for this test code. |
+ for(i=visualIndex=0; i<resultLength; ++i) { |
+ int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); |
+ if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) { |
+ errln("Input line %d: %s", (int)lineNumber, line); |
+ return FALSE; |
+ } |
+ if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) { |
+ continue; // BiDi control, omitted from expected ordering. |
+ } |
+ if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) { |
+ errln("Wrong ordering value at visual index %d; expected %d actual %d", |
+ (int)visualIndex, ordering[visualIndex], logicalIndex); |
+ isOk=FALSE; |
+ break; |
+ } |
+ ++visualIndex; |
+ } |
+ // visualIndex is now the visual length minus the BiDi controls, |
+ // which should match the length of the BidiTest.txt ordering. |
+ if(isOk && orderingCount!=visualIndex) { |
+ errln("Wrong number of ordering values; expected %d actual %d", |
+ (int)orderingCount, (int)visualIndex); |
+ isOk=FALSE; |
+ } |
+ if(!isOk) { |
+ printErrorLine(paraLevelName); |
+ UnicodeString eord("Expected ordering: "); |
+ for(i=0; i<orderingCount; ++i) { |
+ eord.append((UChar)0x20).append((UChar)(0x30+ordering[i])); |
+ } |
+ UnicodeString aord("Actual ordering: "); |
+ for(i=0; i<resultLength; ++i) { |
+ int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); |
+ if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) { |
+ aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex)); |
+ } |
+ } |
+ errln(eord); |
+ errln(aord); |
+ } |
+ return isOk; |
+} |
+ |
+void BiDiConformanceTest::printErrorLine(const char *paraLevelName) { |
+ ++errorCount; |
+ errln("Input line %5d: %s", (int)lineNumber, line); |
+ errln(UnicodeString("Input string: ")+inputString); |
+ errln("Para level: %s", paraLevelName); |
+} |
Property changes on: icu46/source/test/intltest/bidiconf.cpp |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |