| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 ******************************************************************************* | |
| 3 * | |
| 4 * Copyright (C) 2009-2014, International Business Machines | |
| 5 * Corporation and others. All Rights Reserved. | |
| 6 * | |
| 7 ******************************************************************************* | |
| 8 * file name: bidiconf.cpp | |
| 9 * encoding: US-ASCII | |
| 10 * tab size: 8 (not used) | |
| 11 * indentation:4 | |
| 12 * | |
| 13 * created on: 2009oct16 | |
| 14 * created by: Markus W. Scherer | |
| 15 * | |
| 16 * BiDi conformance test, using the Unicode BidiTest.txt and BidiCharacterTest.
txt files. | |
| 17 */ | |
| 18 | |
| 19 #include <stdio.h> | |
| 20 #include <stdlib.h> | |
| 21 #include <string.h> | |
| 22 #include "unicode/utypes.h" | |
| 23 #include "unicode/ubidi.h" | |
| 24 #include "unicode/errorcode.h" | |
| 25 #include "unicode/localpointer.h" | |
| 26 #include "unicode/putil.h" | |
| 27 #include "unicode/unistr.h" | |
| 28 #include "intltest.h" | |
| 29 #include "uparse.h" | |
| 30 | |
| 31 class BiDiConformanceTest : public IntlTest { | |
| 32 public: | |
| 33 BiDiConformanceTest() : | |
| 34 directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0), | |
| 35 errorCount(0) {} | |
| 36 | |
| 37 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=
NULL); | |
| 38 | |
| 39 void TestBidiTest(); | |
| 40 void TestBidiCharacterTest(); | |
| 41 private: | |
| 42 UBool parseLevels(const char *&start); | |
| 43 UBool parseOrdering(const char *start); | |
| 44 UBool parseInputStringFromBiDiClasses(const char *&start); | |
| 45 | |
| 46 UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount); | |
| 47 UBool checkOrdering(UBiDi *ubidi); | |
| 48 | |
| 49 void printErrorLine(); | |
| 50 | |
| 51 char line[10000]; | |
| 52 UBiDiLevel levels[1000]; | |
| 53 uint32_t directionBits; | |
| 54 int32_t ordering[1000]; | |
| 55 int32_t lineNumber; | |
| 56 int32_t levelsCount; | |
| 57 int32_t orderingCount; | |
| 58 int32_t errorCount; | |
| 59 UnicodeString inputString; | |
| 60 const char *paraLevelName; | |
| 61 char levelNameString[12]; | |
| 62 }; | |
| 63 | |
| 64 extern IntlTest *createBiDiConformanceTest() { | |
| 65 return new BiDiConformanceTest(); | |
| 66 } | |
| 67 | |
| 68 void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *
&name, char * /*par*/) { | |
| 69 if(exec) { | |
| 70 logln("TestSuite BiDiConformanceTest: "); | |
| 71 } | |
| 72 TESTCASE_AUTO_BEGIN; | |
| 73 TESTCASE_AUTO(TestBidiTest); | |
| 74 TESTCASE_AUTO(TestBidiCharacterTest); | |
| 75 TESTCASE_AUTO_END; | |
| 76 } | |
| 77 | |
| 78 U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose); | |
| 79 | |
| 80 UBool BiDiConformanceTest::parseLevels(const char *&start) { | |
| 81 directionBits=0; | |
| 82 levelsCount=0; | |
| 83 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { | |
| 84 if(*start=='x') { | |
| 85 levels[levelsCount++]=UBIDI_DEFAULT_LTR; | |
| 86 ++start; | |
| 87 } else { | |
| 88 char *end; | |
| 89 uint32_t value=(uint32_t)strtoul(start, &end, 10); | |
| 90 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';'
) | |
| 91 || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) { | |
| 92 errln("\nError on line %d: Levels parse error at %s", (int)lineN
umber, start); | |
| 93 printErrorLine(); | |
| 94 return FALSE; | |
| 95 } | |
| 96 levels[levelsCount++]=(UBiDiLevel)value; | |
| 97 directionBits|=(1<<(value&1)); | |
| 98 start=end; | |
| 99 } | |
| 100 } | |
| 101 return TRUE; | |
| 102 } | |
| 103 | |
| 104 UBool BiDiConformanceTest::parseOrdering(const char *start) { | |
| 105 orderingCount=0; | |
| 106 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { | |
| 107 char *end; | |
| 108 uint32_t value=(uint32_t)strtoul(start, &end, 10); | |
| 109 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';') ||
value>=1000) { | |
| 110 errln("\nError on line %d: Reorder parse error at %s", (int)lineNumb
er, start); | |
| 111 printErrorLine(); | |
| 112 return FALSE; | |
| 113 } | |
| 114 ordering[orderingCount++]=(int32_t)value; | |
| 115 start=end; | |
| 116 } | |
| 117 return TRUE; | |
| 118 } | |
| 119 | |
| 120 static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={ | |
| 121 0x6c, // 'l' for L | |
| 122 0x52, // 'R' for R | |
| 123 0x33, // '3' for EN | |
| 124 0x2d, // '-' for ES | |
| 125 0x25, // '%' for ET | |
| 126 0x39, // '9' for AN | |
| 127 0x2c, // ',' for CS | |
| 128 0x2f, // '/' for B | |
| 129 0x5f, // '_' for S | |
| 130 0x20, // ' ' for WS | |
| 131 0x3d, // '=' for ON | |
| 132 0x65, // 'e' for LRE | |
| 133 0x6f, // 'o' for LRO | |
| 134 0x41, // 'A' for AL | |
| 135 0x45, // 'E' for RLE | |
| 136 0x4f, // 'O' for RLO | |
| 137 0x2a, // '*' for PDF | |
| 138 0x60, // '`' for NSM | |
| 139 0x7c, // '|' for BN | |
| 140 // new in Unicode 6.3/ICU 52 | |
| 141 0x53, // 'S' for FSI | |
| 142 0x69, // 'i' for LRI | |
| 143 0x49, // 'I' for RLI | |
| 144 0x2e // '.' for PDI | |
| 145 }; | |
| 146 | |
| 147 U_CDECL_BEGIN | |
| 148 | |
| 149 static UCharDirection U_CALLCONV | |
| 150 biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) { | |
| 151 for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) { | |
| 152 if(c==charFromBiDiClass[i]) { | |
| 153 return (UCharDirection)i; | |
| 154 } | |
| 155 } | |
| 156 // Character not in our hardcoded table. | |
| 157 // Should not occur during testing. | |
| 158 return U_BIDI_CLASS_DEFAULT; | |
| 159 } | |
| 160 | |
| 161 U_CDECL_END | |
| 162 | |
| 163 static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={ | |
| 164 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3, 0 | |
| 165 }; | |
| 166 | |
| 167 UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) { | |
| 168 inputString.remove(); | |
| 169 /* | |
| 170 * Lengthy but fast BiDi class parser. | |
| 171 * A simple parser could terminate or extract the name string and use | |
| 172 * int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClass
String); | |
| 173 * but that makes this test take significantly more time. | |
| 174 */ | |
| 175 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { | |
| 176 UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT; | |
| 177 // Compare each character once until we have a match on | |
| 178 // a complete, short BiDi class name. | |
| 179 if(start[0]=='L') { | |
| 180 if(start[1]=='R') { | |
| 181 if(start[2]=='E') { | |
| 182 biDiClass=U_LEFT_TO_RIGHT_EMBEDDING; | |
| 183 } else if(start[2]=='I') { | |
| 184 biDiClass=U_LEFT_TO_RIGHT_ISOLATE; | |
| 185 } else if(start[2]=='O') { | |
| 186 biDiClass=U_LEFT_TO_RIGHT_OVERRIDE; | |
| 187 } | |
| 188 } else { | |
| 189 biDiClass=U_LEFT_TO_RIGHT; | |
| 190 } | |
| 191 } else if(start[0]=='R') { | |
| 192 if(start[1]=='L') { | |
| 193 if(start[2]=='E') { | |
| 194 biDiClass=U_RIGHT_TO_LEFT_EMBEDDING; | |
| 195 } else if(start[2]=='I') { | |
| 196 biDiClass=U_RIGHT_TO_LEFT_ISOLATE; | |
| 197 } else if(start[2]=='O') { | |
| 198 biDiClass=U_RIGHT_TO_LEFT_OVERRIDE; | |
| 199 } | |
| 200 } else { | |
| 201 biDiClass=U_RIGHT_TO_LEFT; | |
| 202 } | |
| 203 } else if(start[0]=='E') { | |
| 204 if(start[1]=='N') { | |
| 205 biDiClass=U_EUROPEAN_NUMBER; | |
| 206 } else if(start[1]=='S') { | |
| 207 biDiClass=U_EUROPEAN_NUMBER_SEPARATOR; | |
| 208 } else if(start[1]=='T') { | |
| 209 biDiClass=U_EUROPEAN_NUMBER_TERMINATOR; | |
| 210 } | |
| 211 } else if(start[0]=='A') { | |
| 212 if(start[1]=='L') { | |
| 213 biDiClass=U_RIGHT_TO_LEFT_ARABIC; | |
| 214 } else if(start[1]=='N') { | |
| 215 biDiClass=U_ARABIC_NUMBER; | |
| 216 } | |
| 217 } else if(start[0]=='C' && start[1]=='S') { | |
| 218 biDiClass=U_COMMON_NUMBER_SEPARATOR; | |
| 219 } else if(start[0]=='B') { | |
| 220 if(start[1]=='N') { | |
| 221 biDiClass=U_BOUNDARY_NEUTRAL; | |
| 222 } else { | |
| 223 biDiClass=U_BLOCK_SEPARATOR; | |
| 224 } | |
| 225 } else if(start[0]=='S') { | |
| 226 biDiClass=U_SEGMENT_SEPARATOR; | |
| 227 } else if(start[0]=='W' && start[1]=='S') { | |
| 228 biDiClass=U_WHITE_SPACE_NEUTRAL; | |
| 229 } else if(start[0]=='O' && start[1]=='N') { | |
| 230 biDiClass=U_OTHER_NEUTRAL; | |
| 231 } else if(start[0]=='P' && start[1]=='D') { | |
| 232 if(start[2]=='F') { | |
| 233 biDiClass=U_POP_DIRECTIONAL_FORMAT; | |
| 234 } else if(start[2]=='I') { | |
| 235 biDiClass=U_POP_DIRECTIONAL_ISOLATE; | |
| 236 } | |
| 237 } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') { | |
| 238 biDiClass=U_DIR_NON_SPACING_MARK; | |
| 239 } else if(start[0]=='F' && start[1]=='S' && start[2]=='I') { | |
| 240 biDiClass=U_FIRST_STRONG_ISOLATE; | |
| 241 } | |
| 242 // Now we verify that the class name is terminated properly, | |
| 243 // and not just the start of a longer word. | |
| 244 int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass]; | |
| 245 char c=start[biDiClassNameLength]; | |
| 246 if(biDiClass<U_CHAR_DIRECTION_COUNT && (U_IS_INV_WHITESPACE(c) || c==';'
|| c==0)) { | |
| 247 inputString.append(charFromBiDiClass[biDiClass]); | |
| 248 start+=biDiClassNameLength; | |
| 249 continue; | |
| 250 } | |
| 251 errln("\nError on line %d: BiDi class string not recognized at %s", (int
)lineNumber, start); | |
| 252 printErrorLine(); | |
| 253 return FALSE; | |
| 254 } | |
| 255 return TRUE; | |
| 256 } | |
| 257 | |
| 258 void BiDiConformanceTest::TestBidiTest() { | |
| 259 IcuTestErrorCode errorCode(*this, "TestBidiTest"); | |
| 260 const char *sourceTestDataPath=getSourceTestData(errorCode); | |
| 261 if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata " | |
| 262 "folder (getSourceTestData())")) { | |
| 263 return; | |
| 264 } | |
| 265 char bidiTestPath[400]; | |
| 266 strcpy(bidiTestPath, sourceTestDataPath); | |
| 267 strcat(bidiTestPath, "BidiTest.txt"); | |
| 268 LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r")); | |
| 269 if(bidiTestFile.isNull()) { | |
| 270 errln("unable to open %s", bidiTestPath); | |
| 271 return; | |
| 272 } | |
| 273 LocalUBiDiPointer ubidi(ubidi_open()); | |
| 274 ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL, | |
| 275 NULL, NULL, errorCode); | |
| 276 if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) { | |
| 277 return; | |
| 278 } | |
| 279 lineNumber=0; | |
| 280 levelsCount=0; | |
| 281 orderingCount=0; | |
| 282 errorCount=0; | |
| 283 // paraLevelName must be initialized in case the first non-comment line is i
n error | |
| 284 paraLevelName="N/A"; | |
| 285 while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias(
))!=NULL) { | |
| 286 ++lineNumber; | |
| 287 // Remove trailing comments and whitespace. | |
| 288 char *commentStart=strchr(line, '#'); | |
| 289 if(commentStart!=NULL) { | |
| 290 *commentStart=0; | |
| 291 } | |
| 292 u_rtrim(line); | |
| 293 const char *start=u_skipWhitespace(line); | |
| 294 if(*start==0) { | |
| 295 continue; // Skip empty and comment-only lines. | |
| 296 } | |
| 297 if(*start=='@') { | |
| 298 ++start; | |
| 299 if(0==strncmp(start, "Levels:", 7)) { | |
| 300 start+=7; | |
| 301 if(!parseLevels(start)) { | |
| 302 return; | |
| 303 } | |
| 304 } else if(0==strncmp(start, "Reorder:", 8)) { | |
| 305 if(!parseOrdering(start+8)) { | |
| 306 return; | |
| 307 } | |
| 308 } | |
| 309 // Skip unknown @Xyz: ... | |
| 310 } else { | |
| 311 if(!parseInputStringFromBiDiClasses(start)) { | |
| 312 return; | |
| 313 } | |
| 314 start=u_skipWhitespace(start); | |
| 315 if(*start!=';') { | |
| 316 errln("missing ; separator on input line %s", line); | |
| 317 return; | |
| 318 } | |
| 319 start=u_skipWhitespace(start+1); | |
| 320 char *end; | |
| 321 uint32_t bitset=(uint32_t)strtoul(start, &end, 16); | |
| 322 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0
)) { | |
| 323 errln("input bitset parse error at %s", start); | |
| 324 return; | |
| 325 } | |
| 326 // Loop over the bitset. | |
| 327 static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBID
I_DEFAULT_RTL }; | |
| 328 static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL"
, "auto/RTL" }; | |
| 329 for(int i=0; i<=3; ++i) { | |
| 330 if(bitset&(1<<i)) { | |
| 331 ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inp
utString.length(), | |
| 332 paraLevels[i], NULL, errorCode); | |
| 333 const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlia
s(), errorCode); | |
| 334 if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_
getLevels()")) { | |
| 335 errln("Input line %d: %s", (int)lineNumber, line); | |
| 336 return; | |
| 337 } | |
| 338 paraLevelName=paraLevelNames[i]; | |
| 339 if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi
.getAlias()))) { | |
| 340 // continue outerLoop; does not exist in C++ | |
| 341 // so just break out of the inner loop. | |
| 342 break; | |
| 343 } | |
| 344 if(!checkOrdering(ubidi.getAlias())) { | |
| 345 // continue outerLoop; does not exist in C++ | |
| 346 // so just break out of the inner loop. | |
| 347 break; | |
| 348 } | |
| 349 } | |
| 350 } | |
| 351 } | |
| 352 } | |
| 353 } | |
| 354 | |
| 355 /* | |
| 356 ******************************************************************************* | |
| 357 * | |
| 358 * created on: 2013jul01 | |
| 359 * created by: Matitiahu Allouche | |
| 360 | |
| 361 This function performs a conformance test for implementations of the | |
| 362 Unicode Bidirectional Algorithm, specified in UAX #9: Unicode | |
| 363 Bidirectional Algorithm, at http://www.unicode.org/unicode/reports/tr9/ | |
| 364 | |
| 365 Each test case is represented in a single line which is read from a file | |
| 366 named BidiCharacter.txt. Empty, blank and comment lines may also appear | |
| 367 in this file. | |
| 368 | |
| 369 The format of the test data is specified below. Note that each test | |
| 370 case constitutes a single line of text; reordering is applied within a | |
| 371 single line and independently of a rendering engine, and rules L3 and L4 | |
| 372 are out of scope. | |
| 373 | |
| 374 The number sign '#' is the comment character: everything is ignored from | |
| 375 the occurrence of '#' until the end of the line, | |
| 376 Empty lines and lines containing only spaces and/or comments are ignored. | |
| 377 | |
| 378 Lines which represent test cases consist of 4 or 5 fields separated by a | |
| 379 semicolon. Each field consists of tokens separated by whitespace (space | |
| 380 or Tab). Whitespace before and after semicolons is optional. | |
| 381 | |
| 382 Field 0: A sequence of hexadecimal code point values separated by space | |
| 383 | |
| 384 Field 1: A value representing the paragraph direction, as follows: | |
| 385 - 0 represents left-to-right | |
| 386 - 1 represents right-to-left | |
| 387 - 2 represents auto-LTR according to rules P2 and P3 of the algorithm | |
| 388 - 3 represents auto-RTL according to rules P2 and P3 of the algorithm | |
| 389 - a negative number whose absolute value is taken as paragraph level; | |
| 390 this may be useful to test cases where the embedding level approaches | |
| 391 or exceeds the maximum embedding level. | |
| 392 | |
| 393 Field 2: The resolved paragraph embedding level. If the input (field 0) | |
| 394 includes more than one paragraph, this field represents the | |
| 395 resolved level of the first paragraph. | |
| 396 | |
| 397 Field 3: An ordered list of resulting levels for each token in field 0 | |
| 398 (each token represents one source character). | |
| 399 The UBA does not assign levels to certain characters (e.g. LRO); | |
| 400 characters removed in rule X9 are indicated with an 'x'. | |
| 401 | |
| 402 Field 4: An ordered list of indices showing the resulting visual ordering | |
| 403 from left to right; characters with a resolved level of 'x' are | |
| 404 skipped. The number are zero-based. Each index corresponds to | |
| 405 a character in the reordered (visual) string. It represents the | |
| 406 index of the source character in the input (field 0). | |
| 407 This field is optional. When it is absent, the visual ordering | |
| 408 is not verified. | |
| 409 | |
| 410 Examples: | |
| 411 | |
| 412 # This is a comment line. | |
| 413 L L ON R ; 0 ; 0 ; 0 0 0 1 ; 0 1 2 3 | |
| 414 L L ON R;0;0;0 0 0 1;0 1 2 3 | |
| 415 | |
| 416 # Note: in the next line, 'B' represents a block separator, not the letter 'B'. | |
| 417 LRE A B C PDF;2;0;x 2 0 0 x;1 2 3 | |
| 418 # Note: in the next line, 'b' represents the letter 'b', not a block separator. | |
| 419 a b c 05d0 05d1 x ; 0 ; 0 ; 0 0 0 1 1 0 ; 0 1 2 4 3 5 | |
| 420 | |
| 421 a R R x ; 1 ; 1 ; 2 1 1 2 | |
| 422 L L R R R B R R L L L B ON ON ; 3 ; 0 ; 0 0 1 1 1 0 1 1 2 2 2 1 1 1 | |
| 423 | |
| 424 * | |
| 425 ******************************************************************************* | |
| 426 */ | |
| 427 void BiDiConformanceTest::TestBidiCharacterTest() { | |
| 428 IcuTestErrorCode errorCode(*this, "TestBidiCharacterTest"); | |
| 429 const char *sourceTestDataPath=getSourceTestData(errorCode); | |
| 430 if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata " | |
| 431 "folder (getSourceTestData())")) { | |
| 432 return; | |
| 433 } | |
| 434 char bidiTestPath[400]; | |
| 435 strcpy(bidiTestPath, sourceTestDataPath); | |
| 436 strcat(bidiTestPath, "BidiCharacterTest.txt"); | |
| 437 LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r")); | |
| 438 if(bidiTestFile.isNull()) { | |
| 439 errln("unable to open %s", bidiTestPath); | |
| 440 return; | |
| 441 } | |
| 442 LocalUBiDiPointer ubidi(ubidi_open()); | |
| 443 lineNumber=0; | |
| 444 levelsCount=0; | |
| 445 orderingCount=0; | |
| 446 errorCount=0; | |
| 447 while(errorCount<20 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias(
))!=NULL) { | |
| 448 ++lineNumber; | |
| 449 paraLevelName="N/A"; | |
| 450 inputString="N/A"; | |
| 451 // Remove trailing comments and whitespace. | |
| 452 char *commentStart=strchr(line, '#'); | |
| 453 if(commentStart!=NULL) { | |
| 454 *commentStart=0; | |
| 455 } | |
| 456 u_rtrim(line); | |
| 457 const char *start=u_skipWhitespace(line); | |
| 458 if(*start==0) { | |
| 459 continue; // Skip empty and comment-only lines. | |
| 460 } | |
| 461 // Parse the code point string in field 0. | |
| 462 UChar *buffer=inputString.getBuffer(200); | |
| 463 int32_t length=u_parseString(start, buffer, inputString.getCapacity(), N
ULL, errorCode); | |
| 464 if(errorCode.logIfFailureAndReset("Invalid string in field 0")) { | |
| 465 errln("Input line %d: %s", (int)lineNumber, line); | |
| 466 inputString.remove(); | |
| 467 continue; | |
| 468 } | |
| 469 inputString.releaseBuffer(length); | |
| 470 start=strchr(start, ';'); | |
| 471 if(start==NULL) { | |
| 472 errorCount++; | |
| 473 errln("\nError on line %d: Missing ; separator on line: %s", (int)li
neNumber, line); | |
| 474 continue; | |
| 475 } | |
| 476 start=u_skipWhitespace(start+1); | |
| 477 char *end; | |
| 478 int32_t paraDirection=(int32_t)strtol(start, &end, 10); | |
| 479 UBiDiLevel paraLevel=UBIDI_MAX_EXPLICIT_LEVEL+2; | |
| 480 if(paraDirection==0) { | |
| 481 paraLevel=0; | |
| 482 paraLevelName="LTR"; | |
| 483 } | |
| 484 else if(paraDirection==1) { | |
| 485 paraLevel=1; | |
| 486 paraLevelName="RTL"; | |
| 487 } | |
| 488 else if(paraDirection==2) { | |
| 489 paraLevel=UBIDI_DEFAULT_LTR; | |
| 490 paraLevelName="Auto/LTR"; | |
| 491 } | |
| 492 else if(paraDirection==3) { | |
| 493 paraLevel=UBIDI_DEFAULT_RTL; | |
| 494 paraLevelName="Auto/RTL"; | |
| 495 } | |
| 496 else if(paraDirection<0 && -paraDirection<=(UBIDI_MAX_EXPLICIT_LEVEL+1))
{ | |
| 497 paraLevel=(UBiDiLevel)(-paraDirection); | |
| 498 sprintf(levelNameString, "%d", (int)paraLevel); | |
| 499 paraLevelName=levelNameString; | |
| 500 } | |
| 501 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || | |
| 502 paraLevel==(UBIDI_MAX_EXPLICIT_LEVEL+2)) { | |
| 503 errln("\nError on line %d: Input paragraph direction incorrect at %s
", (int)lineNumber, start); | |
| 504 printErrorLine(); | |
| 505 continue; | |
| 506 } | |
| 507 start=u_skipWhitespace(end); | |
| 508 if(*start!=';') { | |
| 509 errorCount++; | |
| 510 errln("\nError on line %d: Missing ; separator on line: %s", (int)li
neNumber, line); | |
| 511 continue; | |
| 512 } | |
| 513 start++; | |
| 514 uint32_t resolvedParaLevel=(uint32_t)strtoul(start, &end, 10); | |
| 515 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || | |
| 516 resolvedParaLevel>1) { | |
| 517 errln("\nError on line %d: Resolved paragraph level incorrect at %s"
, (int)lineNumber, start); | |
| 518 printErrorLine(); | |
| 519 continue; | |
| 520 } | |
| 521 start=u_skipWhitespace(end); | |
| 522 if(*start!=';') { | |
| 523 errorCount++; | |
| 524 errln("\nError on line %d: Missing ; separator on line: %s", (int)li
neNumber, line); | |
| 525 return; | |
| 526 } | |
| 527 start++; | |
| 528 if(!parseLevels(start)) { | |
| 529 continue; | |
| 530 } | |
| 531 start=u_skipWhitespace(start); | |
| 532 if(*start==';') { | |
| 533 if(!parseOrdering(start+1)) { | |
| 534 continue; | |
| 535 } | |
| 536 } | |
| 537 else | |
| 538 orderingCount=-1; | |
| 539 | |
| 540 ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.len
gth(), | |
| 541 paraLevel, NULL, errorCode); | |
| 542 const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCo
de); | |
| 543 if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()"
)) { | |
| 544 errln("Input line %d: %s", (int)lineNumber, line); | |
| 545 continue; | |
| 546 } | |
| 547 UBiDiLevel actualLevel; | |
| 548 if((actualLevel=ubidi_getParaLevel(ubidi.getAlias()))!=resolvedParaLevel
) { | |
| 549 printErrorLine(); | |
| 550 errln("\nError on line %d: Wrong resolved paragraph level; expected
%d actual %d", | |
| 551 (int)lineNumber, resolvedParaLevel, actualLevel); | |
| 552 continue; | |
| 553 } | |
| 554 if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias())
)) { | |
| 555 continue; | |
| 556 } | |
| 557 if(orderingCount>=0 && !checkOrdering(ubidi.getAlias())) { | |
| 558 continue; | |
| 559 } | |
| 560 } | |
| 561 } | |
| 562 | |
| 563 static UChar printLevel(UBiDiLevel level) { | |
| 564 if(level<UBIDI_DEFAULT_LTR) { | |
| 565 return 0x30+level; | |
| 566 } else { | |
| 567 return 0x78; // 'x' | |
| 568 } | |
| 569 } | |
| 570 | |
| 571 static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actual
Count) { | |
| 572 uint32_t actualDirectionBits=0; | |
| 573 for(int32_t i=0; i<actualCount; ++i) { | |
| 574 actualDirectionBits|=(1<<(actualLevels[i]&1)); | |
| 575 } | |
| 576 return actualDirectionBits; | |
| 577 } | |
| 578 | |
| 579 UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t
actualCount) { | |
| 580 UBool isOk=TRUE; | |
| 581 if(levelsCount!=actualCount) { | |
| 582 errln("\nError on line %d: Wrong number of level values; expected %d act
ual %d", | |
| 583 (int)lineNumber, (int)levelsCount, (int)actualCount); | |
| 584 isOk=FALSE; | |
| 585 } else { | |
| 586 for(int32_t i=0; i<actualCount; ++i) { | |
| 587 if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) { | |
| 588 if(directionBits!=3 && directionBits==getDirectionBits(actualLev
els, actualCount)) { | |
| 589 // ICU used a shortcut: | |
| 590 // Since the text is unidirectional, it did not store the re
solved | |
| 591 // levels but just returns all levels as the paragraph level
0 or 1. | |
| 592 // The reordering result is the same, so this is fine. | |
| 593 break; | |
| 594 } else { | |
| 595 errln("\nError on line %d: Wrong level value at index %d; ex
pected %d actual %d", | |
| 596 (int)lineNumber, (int)i, levels[i], actualLevels[i]); | |
| 597 isOk=FALSE; | |
| 598 break; | |
| 599 } | |
| 600 } | |
| 601 } | |
| 602 } | |
| 603 if(!isOk) { | |
| 604 printErrorLine(); | |
| 605 UnicodeString els("Expected levels: "); | |
| 606 int32_t i; | |
| 607 for(i=0; i<levelsCount; ++i) { | |
| 608 els.append((UChar)0x20).append(printLevel(levels[i])); | |
| 609 } | |
| 610 UnicodeString als("Actual levels: "); | |
| 611 for(i=0; i<actualCount; ++i) { | |
| 612 als.append((UChar)0x20).append(printLevel(actualLevels[i])); | |
| 613 } | |
| 614 errln(els); | |
| 615 errln(als); | |
| 616 } | |
| 617 return isOk; | |
| 618 } | |
| 619 | |
| 620 // Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS); | |
| 621 // does not work for custom BiDi class assignments | |
| 622 // and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here. | |
| 623 // Therefore we just skip the indexes for BiDi controls while comparing | |
| 624 // with the expected ordering that has them omitted. | |
| 625 UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi) { | |
| 626 UBool isOk=TRUE; | |
| 627 IcuTestErrorCode errorCode(*this, "checkOrdering()"); | |
| 628 int32_t resultLength=ubidi_getResultLength(ubidi); // visual length includi
ng BiDi controls | |
| 629 int32_t i, visualIndex; | |
| 630 // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun() | |
| 631 // and loop over each run's indexes, but that seems unnecessary for this tes
t code. | |
| 632 for(i=visualIndex=0; i<resultLength; ++i) { | |
| 633 int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); | |
| 634 if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) { | |
| 635 errln("Input line %d: %s", (int)lineNumber, line); | |
| 636 return FALSE; | |
| 637 } | |
| 638 if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) { | |
| 639 continue; // BiDi control, omitted from expected ordering. | |
| 640 } | |
| 641 if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) { | |
| 642 errln("\nError on line %d: Wrong ordering value at visual index %d;
expected %d actual %d", | |
| 643 (int)lineNumber, (int)visualIndex, ordering[visualIndex], logi
calIndex); | |
| 644 isOk=FALSE; | |
| 645 break; | |
| 646 } | |
| 647 ++visualIndex; | |
| 648 } | |
| 649 // visualIndex is now the visual length minus the BiDi controls, | |
| 650 // which should match the length of the BidiTest.txt ordering. | |
| 651 if(isOk && orderingCount!=visualIndex) { | |
| 652 errln("\nError on line %d: Wrong number of ordering values; expected %d
actual %d", | |
| 653 (int)lineNumber, (int)orderingCount, (int)visualIndex); | |
| 654 isOk=FALSE; | |
| 655 } | |
| 656 if(!isOk) { | |
| 657 printErrorLine(); | |
| 658 UnicodeString eord("Expected ordering: "); | |
| 659 for(i=0; i<orderingCount; ++i) { | |
| 660 eord.append((UChar)0x20).append((UChar)(0x30+ordering[i])); | |
| 661 } | |
| 662 UnicodeString aord("Actual ordering: "); | |
| 663 for(i=0; i<resultLength; ++i) { | |
| 664 int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); | |
| 665 if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) { | |
| 666 aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex)); | |
| 667 } | |
| 668 } | |
| 669 errln(eord); | |
| 670 errln(aord); | |
| 671 } | |
| 672 return isOk; | |
| 673 } | |
| 674 | |
| 675 void BiDiConformanceTest::printErrorLine() { | |
| 676 ++errorCount; | |
| 677 errln("Input line %5d: %s", (int)lineNumber, line); | |
| 678 errln(UnicodeString("Input string: ")+inputString); | |
| 679 errln("Para level: %s", paraLevelName); | |
| 680 } | |
| OLD | NEW |