OLD | NEW |
| (Empty) |
1 /* | |
2 ******************************************************************************* | |
3 * | |
4 * Copyright (C) 2009-2014, International Business Machines | |
5 * Corporation and others. All Rights Reserved. | |
6 * | |
7 ******************************************************************************* | |
8 * file name: bidiconf.cpp | |
9 * encoding: US-ASCII | |
10 * tab size: 8 (not used) | |
11 * indentation:4 | |
12 * | |
13 * created on: 2009oct16 | |
14 * created by: Markus W. Scherer | |
15 * | |
16 * BiDi conformance test, using the Unicode BidiTest.txt and BidiCharacterTest.
txt files. | |
17 */ | |
18 | |
19 #include <stdio.h> | |
20 #include <stdlib.h> | |
21 #include <string.h> | |
22 #include "unicode/utypes.h" | |
23 #include "unicode/ubidi.h" | |
24 #include "unicode/errorcode.h" | |
25 #include "unicode/localpointer.h" | |
26 #include "unicode/putil.h" | |
27 #include "unicode/unistr.h" | |
28 #include "intltest.h" | |
29 #include "uparse.h" | |
30 | |
31 class BiDiConformanceTest : public IntlTest { | |
32 public: | |
33 BiDiConformanceTest() : | |
34 directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0), | |
35 errorCount(0) {} | |
36 | |
37 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=
NULL); | |
38 | |
39 void TestBidiTest(); | |
40 void TestBidiCharacterTest(); | |
41 private: | |
42 UBool parseLevels(const char *&start); | |
43 UBool parseOrdering(const char *start); | |
44 UBool parseInputStringFromBiDiClasses(const char *&start); | |
45 | |
46 UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount); | |
47 UBool checkOrdering(UBiDi *ubidi); | |
48 | |
49 void printErrorLine(); | |
50 | |
51 char line[10000]; | |
52 UBiDiLevel levels[1000]; | |
53 uint32_t directionBits; | |
54 int32_t ordering[1000]; | |
55 int32_t lineNumber; | |
56 int32_t levelsCount; | |
57 int32_t orderingCount; | |
58 int32_t errorCount; | |
59 UnicodeString inputString; | |
60 const char *paraLevelName; | |
61 char levelNameString[12]; | |
62 }; | |
63 | |
64 extern IntlTest *createBiDiConformanceTest() { | |
65 return new BiDiConformanceTest(); | |
66 } | |
67 | |
68 void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *
&name, char * /*par*/) { | |
69 if(exec) { | |
70 logln("TestSuite BiDiConformanceTest: "); | |
71 } | |
72 TESTCASE_AUTO_BEGIN; | |
73 TESTCASE_AUTO(TestBidiTest); | |
74 TESTCASE_AUTO(TestBidiCharacterTest); | |
75 TESTCASE_AUTO_END; | |
76 } | |
77 | |
78 U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose); | |
79 | |
80 UBool BiDiConformanceTest::parseLevels(const char *&start) { | |
81 directionBits=0; | |
82 levelsCount=0; | |
83 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { | |
84 if(*start=='x') { | |
85 levels[levelsCount++]=UBIDI_DEFAULT_LTR; | |
86 ++start; | |
87 } else { | |
88 char *end; | |
89 uint32_t value=(uint32_t)strtoul(start, &end, 10); | |
90 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';'
) | |
91 || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) { | |
92 errln("\nError on line %d: Levels parse error at %s", (int)lineN
umber, start); | |
93 printErrorLine(); | |
94 return FALSE; | |
95 } | |
96 levels[levelsCount++]=(UBiDiLevel)value; | |
97 directionBits|=(1<<(value&1)); | |
98 start=end; | |
99 } | |
100 } | |
101 return TRUE; | |
102 } | |
103 | |
104 UBool BiDiConformanceTest::parseOrdering(const char *start) { | |
105 orderingCount=0; | |
106 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { | |
107 char *end; | |
108 uint32_t value=(uint32_t)strtoul(start, &end, 10); | |
109 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';') ||
value>=1000) { | |
110 errln("\nError on line %d: Reorder parse error at %s", (int)lineNumb
er, start); | |
111 printErrorLine(); | |
112 return FALSE; | |
113 } | |
114 ordering[orderingCount++]=(int32_t)value; | |
115 start=end; | |
116 } | |
117 return TRUE; | |
118 } | |
119 | |
120 static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={ | |
121 0x6c, // 'l' for L | |
122 0x52, // 'R' for R | |
123 0x33, // '3' for EN | |
124 0x2d, // '-' for ES | |
125 0x25, // '%' for ET | |
126 0x39, // '9' for AN | |
127 0x2c, // ',' for CS | |
128 0x2f, // '/' for B | |
129 0x5f, // '_' for S | |
130 0x20, // ' ' for WS | |
131 0x3d, // '=' for ON | |
132 0x65, // 'e' for LRE | |
133 0x6f, // 'o' for LRO | |
134 0x41, // 'A' for AL | |
135 0x45, // 'E' for RLE | |
136 0x4f, // 'O' for RLO | |
137 0x2a, // '*' for PDF | |
138 0x60, // '`' for NSM | |
139 0x7c, // '|' for BN | |
140 // new in Unicode 6.3/ICU 52 | |
141 0x53, // 'S' for FSI | |
142 0x69, // 'i' for LRI | |
143 0x49, // 'I' for RLI | |
144 0x2e // '.' for PDI | |
145 }; | |
146 | |
147 U_CDECL_BEGIN | |
148 | |
149 static UCharDirection U_CALLCONV | |
150 biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) { | |
151 for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) { | |
152 if(c==charFromBiDiClass[i]) { | |
153 return (UCharDirection)i; | |
154 } | |
155 } | |
156 // Character not in our hardcoded table. | |
157 // Should not occur during testing. | |
158 return U_BIDI_CLASS_DEFAULT; | |
159 } | |
160 | |
161 U_CDECL_END | |
162 | |
163 static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={ | |
164 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3, 0 | |
165 }; | |
166 | |
167 UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) { | |
168 inputString.remove(); | |
169 /* | |
170 * Lengthy but fast BiDi class parser. | |
171 * A simple parser could terminate or extract the name string and use | |
172 * int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClass
String); | |
173 * but that makes this test take significantly more time. | |
174 */ | |
175 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { | |
176 UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT; | |
177 // Compare each character once until we have a match on | |
178 // a complete, short BiDi class name. | |
179 if(start[0]=='L') { | |
180 if(start[1]=='R') { | |
181 if(start[2]=='E') { | |
182 biDiClass=U_LEFT_TO_RIGHT_EMBEDDING; | |
183 } else if(start[2]=='I') { | |
184 biDiClass=U_LEFT_TO_RIGHT_ISOLATE; | |
185 } else if(start[2]=='O') { | |
186 biDiClass=U_LEFT_TO_RIGHT_OVERRIDE; | |
187 } | |
188 } else { | |
189 biDiClass=U_LEFT_TO_RIGHT; | |
190 } | |
191 } else if(start[0]=='R') { | |
192 if(start[1]=='L') { | |
193 if(start[2]=='E') { | |
194 biDiClass=U_RIGHT_TO_LEFT_EMBEDDING; | |
195 } else if(start[2]=='I') { | |
196 biDiClass=U_RIGHT_TO_LEFT_ISOLATE; | |
197 } else if(start[2]=='O') { | |
198 biDiClass=U_RIGHT_TO_LEFT_OVERRIDE; | |
199 } | |
200 } else { | |
201 biDiClass=U_RIGHT_TO_LEFT; | |
202 } | |
203 } else if(start[0]=='E') { | |
204 if(start[1]=='N') { | |
205 biDiClass=U_EUROPEAN_NUMBER; | |
206 } else if(start[1]=='S') { | |
207 biDiClass=U_EUROPEAN_NUMBER_SEPARATOR; | |
208 } else if(start[1]=='T') { | |
209 biDiClass=U_EUROPEAN_NUMBER_TERMINATOR; | |
210 } | |
211 } else if(start[0]=='A') { | |
212 if(start[1]=='L') { | |
213 biDiClass=U_RIGHT_TO_LEFT_ARABIC; | |
214 } else if(start[1]=='N') { | |
215 biDiClass=U_ARABIC_NUMBER; | |
216 } | |
217 } else if(start[0]=='C' && start[1]=='S') { | |
218 biDiClass=U_COMMON_NUMBER_SEPARATOR; | |
219 } else if(start[0]=='B') { | |
220 if(start[1]=='N') { | |
221 biDiClass=U_BOUNDARY_NEUTRAL; | |
222 } else { | |
223 biDiClass=U_BLOCK_SEPARATOR; | |
224 } | |
225 } else if(start[0]=='S') { | |
226 biDiClass=U_SEGMENT_SEPARATOR; | |
227 } else if(start[0]=='W' && start[1]=='S') { | |
228 biDiClass=U_WHITE_SPACE_NEUTRAL; | |
229 } else if(start[0]=='O' && start[1]=='N') { | |
230 biDiClass=U_OTHER_NEUTRAL; | |
231 } else if(start[0]=='P' && start[1]=='D') { | |
232 if(start[2]=='F') { | |
233 biDiClass=U_POP_DIRECTIONAL_FORMAT; | |
234 } else if(start[2]=='I') { | |
235 biDiClass=U_POP_DIRECTIONAL_ISOLATE; | |
236 } | |
237 } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') { | |
238 biDiClass=U_DIR_NON_SPACING_MARK; | |
239 } else if(start[0]=='F' && start[1]=='S' && start[2]=='I') { | |
240 biDiClass=U_FIRST_STRONG_ISOLATE; | |
241 } | |
242 // Now we verify that the class name is terminated properly, | |
243 // and not just the start of a longer word. | |
244 int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass]; | |
245 char c=start[biDiClassNameLength]; | |
246 if(biDiClass<U_CHAR_DIRECTION_COUNT && (U_IS_INV_WHITESPACE(c) || c==';'
|| c==0)) { | |
247 inputString.append(charFromBiDiClass[biDiClass]); | |
248 start+=biDiClassNameLength; | |
249 continue; | |
250 } | |
251 errln("\nError on line %d: BiDi class string not recognized at %s", (int
)lineNumber, start); | |
252 printErrorLine(); | |
253 return FALSE; | |
254 } | |
255 return TRUE; | |
256 } | |
257 | |
258 void BiDiConformanceTest::TestBidiTest() { | |
259 IcuTestErrorCode errorCode(*this, "TestBidiTest"); | |
260 const char *sourceTestDataPath=getSourceTestData(errorCode); | |
261 if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata " | |
262 "folder (getSourceTestData())")) { | |
263 return; | |
264 } | |
265 char bidiTestPath[400]; | |
266 strcpy(bidiTestPath, sourceTestDataPath); | |
267 strcat(bidiTestPath, "BidiTest.txt"); | |
268 LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r")); | |
269 if(bidiTestFile.isNull()) { | |
270 errln("unable to open %s", bidiTestPath); | |
271 return; | |
272 } | |
273 LocalUBiDiPointer ubidi(ubidi_open()); | |
274 ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL, | |
275 NULL, NULL, errorCode); | |
276 if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) { | |
277 return; | |
278 } | |
279 lineNumber=0; | |
280 levelsCount=0; | |
281 orderingCount=0; | |
282 errorCount=0; | |
283 // paraLevelName must be initialized in case the first non-comment line is i
n error | |
284 paraLevelName="N/A"; | |
285 while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias(
))!=NULL) { | |
286 ++lineNumber; | |
287 // Remove trailing comments and whitespace. | |
288 char *commentStart=strchr(line, '#'); | |
289 if(commentStart!=NULL) { | |
290 *commentStart=0; | |
291 } | |
292 u_rtrim(line); | |
293 const char *start=u_skipWhitespace(line); | |
294 if(*start==0) { | |
295 continue; // Skip empty and comment-only lines. | |
296 } | |
297 if(*start=='@') { | |
298 ++start; | |
299 if(0==strncmp(start, "Levels:", 7)) { | |
300 start+=7; | |
301 if(!parseLevels(start)) { | |
302 return; | |
303 } | |
304 } else if(0==strncmp(start, "Reorder:", 8)) { | |
305 if(!parseOrdering(start+8)) { | |
306 return; | |
307 } | |
308 } | |
309 // Skip unknown @Xyz: ... | |
310 } else { | |
311 if(!parseInputStringFromBiDiClasses(start)) { | |
312 return; | |
313 } | |
314 start=u_skipWhitespace(start); | |
315 if(*start!=';') { | |
316 errln("missing ; separator on input line %s", line); | |
317 return; | |
318 } | |
319 start=u_skipWhitespace(start+1); | |
320 char *end; | |
321 uint32_t bitset=(uint32_t)strtoul(start, &end, 16); | |
322 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0
)) { | |
323 errln("input bitset parse error at %s", start); | |
324 return; | |
325 } | |
326 // Loop over the bitset. | |
327 static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBID
I_DEFAULT_RTL }; | |
328 static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL"
, "auto/RTL" }; | |
329 for(int i=0; i<=3; ++i) { | |
330 if(bitset&(1<<i)) { | |
331 ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inp
utString.length(), | |
332 paraLevels[i], NULL, errorCode); | |
333 const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlia
s(), errorCode); | |
334 if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_
getLevels()")) { | |
335 errln("Input line %d: %s", (int)lineNumber, line); | |
336 return; | |
337 } | |
338 paraLevelName=paraLevelNames[i]; | |
339 if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi
.getAlias()))) { | |
340 // continue outerLoop; does not exist in C++ | |
341 // so just break out of the inner loop. | |
342 break; | |
343 } | |
344 if(!checkOrdering(ubidi.getAlias())) { | |
345 // continue outerLoop; does not exist in C++ | |
346 // so just break out of the inner loop. | |
347 break; | |
348 } | |
349 } | |
350 } | |
351 } | |
352 } | |
353 } | |
354 | |
355 /* | |
356 ******************************************************************************* | |
357 * | |
358 * created on: 2013jul01 | |
359 * created by: Matitiahu Allouche | |
360 | |
361 This function performs a conformance test for implementations of the | |
362 Unicode Bidirectional Algorithm, specified in UAX #9: Unicode | |
363 Bidirectional Algorithm, at http://www.unicode.org/unicode/reports/tr9/ | |
364 | |
365 Each test case is represented in a single line which is read from a file | |
366 named BidiCharacter.txt. Empty, blank and comment lines may also appear | |
367 in this file. | |
368 | |
369 The format of the test data is specified below. Note that each test | |
370 case constitutes a single line of text; reordering is applied within a | |
371 single line and independently of a rendering engine, and rules L3 and L4 | |
372 are out of scope. | |
373 | |
374 The number sign '#' is the comment character: everything is ignored from | |
375 the occurrence of '#' until the end of the line, | |
376 Empty lines and lines containing only spaces and/or comments are ignored. | |
377 | |
378 Lines which represent test cases consist of 4 or 5 fields separated by a | |
379 semicolon. Each field consists of tokens separated by whitespace (space | |
380 or Tab). Whitespace before and after semicolons is optional. | |
381 | |
382 Field 0: A sequence of hexadecimal code point values separated by space | |
383 | |
384 Field 1: A value representing the paragraph direction, as follows: | |
385 - 0 represents left-to-right | |
386 - 1 represents right-to-left | |
387 - 2 represents auto-LTR according to rules P2 and P3 of the algorithm | |
388 - 3 represents auto-RTL according to rules P2 and P3 of the algorithm | |
389 - a negative number whose absolute value is taken as paragraph level; | |
390 this may be useful to test cases where the embedding level approaches | |
391 or exceeds the maximum embedding level. | |
392 | |
393 Field 2: The resolved paragraph embedding level. If the input (field 0) | |
394 includes more than one paragraph, this field represents the | |
395 resolved level of the first paragraph. | |
396 | |
397 Field 3: An ordered list of resulting levels for each token in field 0 | |
398 (each token represents one source character). | |
399 The UBA does not assign levels to certain characters (e.g. LRO); | |
400 characters removed in rule X9 are indicated with an 'x'. | |
401 | |
402 Field 4: An ordered list of indices showing the resulting visual ordering | |
403 from left to right; characters with a resolved level of 'x' are | |
404 skipped. The number are zero-based. Each index corresponds to | |
405 a character in the reordered (visual) string. It represents the | |
406 index of the source character in the input (field 0). | |
407 This field is optional. When it is absent, the visual ordering | |
408 is not verified. | |
409 | |
410 Examples: | |
411 | |
412 # This is a comment line. | |
413 L L ON R ; 0 ; 0 ; 0 0 0 1 ; 0 1 2 3 | |
414 L L ON R;0;0;0 0 0 1;0 1 2 3 | |
415 | |
416 # Note: in the next line, 'B' represents a block separator, not the letter 'B'. | |
417 LRE A B C PDF;2;0;x 2 0 0 x;1 2 3 | |
418 # Note: in the next line, 'b' represents the letter 'b', not a block separator. | |
419 a b c 05d0 05d1 x ; 0 ; 0 ; 0 0 0 1 1 0 ; 0 1 2 4 3 5 | |
420 | |
421 a R R x ; 1 ; 1 ; 2 1 1 2 | |
422 L L R R R B R R L L L B ON ON ; 3 ; 0 ; 0 0 1 1 1 0 1 1 2 2 2 1 1 1 | |
423 | |
424 * | |
425 ******************************************************************************* | |
426 */ | |
427 void BiDiConformanceTest::TestBidiCharacterTest() { | |
428 IcuTestErrorCode errorCode(*this, "TestBidiCharacterTest"); | |
429 const char *sourceTestDataPath=getSourceTestData(errorCode); | |
430 if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata " | |
431 "folder (getSourceTestData())")) { | |
432 return; | |
433 } | |
434 char bidiTestPath[400]; | |
435 strcpy(bidiTestPath, sourceTestDataPath); | |
436 strcat(bidiTestPath, "BidiCharacterTest.txt"); | |
437 LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r")); | |
438 if(bidiTestFile.isNull()) { | |
439 errln("unable to open %s", bidiTestPath); | |
440 return; | |
441 } | |
442 LocalUBiDiPointer ubidi(ubidi_open()); | |
443 lineNumber=0; | |
444 levelsCount=0; | |
445 orderingCount=0; | |
446 errorCount=0; | |
447 while(errorCount<20 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias(
))!=NULL) { | |
448 ++lineNumber; | |
449 paraLevelName="N/A"; | |
450 inputString="N/A"; | |
451 // Remove trailing comments and whitespace. | |
452 char *commentStart=strchr(line, '#'); | |
453 if(commentStart!=NULL) { | |
454 *commentStart=0; | |
455 } | |
456 u_rtrim(line); | |
457 const char *start=u_skipWhitespace(line); | |
458 if(*start==0) { | |
459 continue; // Skip empty and comment-only lines. | |
460 } | |
461 // Parse the code point string in field 0. | |
462 UChar *buffer=inputString.getBuffer(200); | |
463 int32_t length=u_parseString(start, buffer, inputString.getCapacity(), N
ULL, errorCode); | |
464 if(errorCode.logIfFailureAndReset("Invalid string in field 0")) { | |
465 errln("Input line %d: %s", (int)lineNumber, line); | |
466 inputString.remove(); | |
467 continue; | |
468 } | |
469 inputString.releaseBuffer(length); | |
470 start=strchr(start, ';'); | |
471 if(start==NULL) { | |
472 errorCount++; | |
473 errln("\nError on line %d: Missing ; separator on line: %s", (int)li
neNumber, line); | |
474 continue; | |
475 } | |
476 start=u_skipWhitespace(start+1); | |
477 char *end; | |
478 int32_t paraDirection=(int32_t)strtol(start, &end, 10); | |
479 UBiDiLevel paraLevel=UBIDI_MAX_EXPLICIT_LEVEL+2; | |
480 if(paraDirection==0) { | |
481 paraLevel=0; | |
482 paraLevelName="LTR"; | |
483 } | |
484 else if(paraDirection==1) { | |
485 paraLevel=1; | |
486 paraLevelName="RTL"; | |
487 } | |
488 else if(paraDirection==2) { | |
489 paraLevel=UBIDI_DEFAULT_LTR; | |
490 paraLevelName="Auto/LTR"; | |
491 } | |
492 else if(paraDirection==3) { | |
493 paraLevel=UBIDI_DEFAULT_RTL; | |
494 paraLevelName="Auto/RTL"; | |
495 } | |
496 else if(paraDirection<0 && -paraDirection<=(UBIDI_MAX_EXPLICIT_LEVEL+1))
{ | |
497 paraLevel=(UBiDiLevel)(-paraDirection); | |
498 sprintf(levelNameString, "%d", (int)paraLevel); | |
499 paraLevelName=levelNameString; | |
500 } | |
501 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || | |
502 paraLevel==(UBIDI_MAX_EXPLICIT_LEVEL+2)) { | |
503 errln("\nError on line %d: Input paragraph direction incorrect at %s
", (int)lineNumber, start); | |
504 printErrorLine(); | |
505 continue; | |
506 } | |
507 start=u_skipWhitespace(end); | |
508 if(*start!=';') { | |
509 errorCount++; | |
510 errln("\nError on line %d: Missing ; separator on line: %s", (int)li
neNumber, line); | |
511 continue; | |
512 } | |
513 start++; | |
514 uint32_t resolvedParaLevel=(uint32_t)strtoul(start, &end, 10); | |
515 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || | |
516 resolvedParaLevel>1) { | |
517 errln("\nError on line %d: Resolved paragraph level incorrect at %s"
, (int)lineNumber, start); | |
518 printErrorLine(); | |
519 continue; | |
520 } | |
521 start=u_skipWhitespace(end); | |
522 if(*start!=';') { | |
523 errorCount++; | |
524 errln("\nError on line %d: Missing ; separator on line: %s", (int)li
neNumber, line); | |
525 return; | |
526 } | |
527 start++; | |
528 if(!parseLevels(start)) { | |
529 continue; | |
530 } | |
531 start=u_skipWhitespace(start); | |
532 if(*start==';') { | |
533 if(!parseOrdering(start+1)) { | |
534 continue; | |
535 } | |
536 } | |
537 else | |
538 orderingCount=-1; | |
539 | |
540 ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.len
gth(), | |
541 paraLevel, NULL, errorCode); | |
542 const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCo
de); | |
543 if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()"
)) { | |
544 errln("Input line %d: %s", (int)lineNumber, line); | |
545 continue; | |
546 } | |
547 UBiDiLevel actualLevel; | |
548 if((actualLevel=ubidi_getParaLevel(ubidi.getAlias()))!=resolvedParaLevel
) { | |
549 printErrorLine(); | |
550 errln("\nError on line %d: Wrong resolved paragraph level; expected
%d actual %d", | |
551 (int)lineNumber, resolvedParaLevel, actualLevel); | |
552 continue; | |
553 } | |
554 if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias())
)) { | |
555 continue; | |
556 } | |
557 if(orderingCount>=0 && !checkOrdering(ubidi.getAlias())) { | |
558 continue; | |
559 } | |
560 } | |
561 } | |
562 | |
563 static UChar printLevel(UBiDiLevel level) { | |
564 if(level<UBIDI_DEFAULT_LTR) { | |
565 return 0x30+level; | |
566 } else { | |
567 return 0x78; // 'x' | |
568 } | |
569 } | |
570 | |
571 static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actual
Count) { | |
572 uint32_t actualDirectionBits=0; | |
573 for(int32_t i=0; i<actualCount; ++i) { | |
574 actualDirectionBits|=(1<<(actualLevels[i]&1)); | |
575 } | |
576 return actualDirectionBits; | |
577 } | |
578 | |
579 UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t
actualCount) { | |
580 UBool isOk=TRUE; | |
581 if(levelsCount!=actualCount) { | |
582 errln("\nError on line %d: Wrong number of level values; expected %d act
ual %d", | |
583 (int)lineNumber, (int)levelsCount, (int)actualCount); | |
584 isOk=FALSE; | |
585 } else { | |
586 for(int32_t i=0; i<actualCount; ++i) { | |
587 if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) { | |
588 if(directionBits!=3 && directionBits==getDirectionBits(actualLev
els, actualCount)) { | |
589 // ICU used a shortcut: | |
590 // Since the text is unidirectional, it did not store the re
solved | |
591 // levels but just returns all levels as the paragraph level
0 or 1. | |
592 // The reordering result is the same, so this is fine. | |
593 break; | |
594 } else { | |
595 errln("\nError on line %d: Wrong level value at index %d; ex
pected %d actual %d", | |
596 (int)lineNumber, (int)i, levels[i], actualLevels[i]); | |
597 isOk=FALSE; | |
598 break; | |
599 } | |
600 } | |
601 } | |
602 } | |
603 if(!isOk) { | |
604 printErrorLine(); | |
605 UnicodeString els("Expected levels: "); | |
606 int32_t i; | |
607 for(i=0; i<levelsCount; ++i) { | |
608 els.append((UChar)0x20).append(printLevel(levels[i])); | |
609 } | |
610 UnicodeString als("Actual levels: "); | |
611 for(i=0; i<actualCount; ++i) { | |
612 als.append((UChar)0x20).append(printLevel(actualLevels[i])); | |
613 } | |
614 errln(els); | |
615 errln(als); | |
616 } | |
617 return isOk; | |
618 } | |
619 | |
620 // Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS); | |
621 // does not work for custom BiDi class assignments | |
622 // and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here. | |
623 // Therefore we just skip the indexes for BiDi controls while comparing | |
624 // with the expected ordering that has them omitted. | |
625 UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi) { | |
626 UBool isOk=TRUE; | |
627 IcuTestErrorCode errorCode(*this, "checkOrdering()"); | |
628 int32_t resultLength=ubidi_getResultLength(ubidi); // visual length includi
ng BiDi controls | |
629 int32_t i, visualIndex; | |
630 // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun() | |
631 // and loop over each run's indexes, but that seems unnecessary for this tes
t code. | |
632 for(i=visualIndex=0; i<resultLength; ++i) { | |
633 int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); | |
634 if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) { | |
635 errln("Input line %d: %s", (int)lineNumber, line); | |
636 return FALSE; | |
637 } | |
638 if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) { | |
639 continue; // BiDi control, omitted from expected ordering. | |
640 } | |
641 if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) { | |
642 errln("\nError on line %d: Wrong ordering value at visual index %d;
expected %d actual %d", | |
643 (int)lineNumber, (int)visualIndex, ordering[visualIndex], logi
calIndex); | |
644 isOk=FALSE; | |
645 break; | |
646 } | |
647 ++visualIndex; | |
648 } | |
649 // visualIndex is now the visual length minus the BiDi controls, | |
650 // which should match the length of the BidiTest.txt ordering. | |
651 if(isOk && orderingCount!=visualIndex) { | |
652 errln("\nError on line %d: Wrong number of ordering values; expected %d
actual %d", | |
653 (int)lineNumber, (int)orderingCount, (int)visualIndex); | |
654 isOk=FALSE; | |
655 } | |
656 if(!isOk) { | |
657 printErrorLine(); | |
658 UnicodeString eord("Expected ordering: "); | |
659 for(i=0; i<orderingCount; ++i) { | |
660 eord.append((UChar)0x20).append((UChar)(0x30+ordering[i])); | |
661 } | |
662 UnicodeString aord("Actual ordering: "); | |
663 for(i=0; i<resultLength; ++i) { | |
664 int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); | |
665 if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) { | |
666 aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex)); | |
667 } | |
668 } | |
669 errln(eord); | |
670 errln(aord); | |
671 } | |
672 return isOk; | |
673 } | |
674 | |
675 void BiDiConformanceTest::printErrorLine() { | |
676 ++errorCount; | |
677 errln("Input line %5d: %s", (int)lineNumber, line); | |
678 errln(UnicodeString("Input string: ")+inputString); | |
679 errln("Para level: %s", paraLevelName); | |
680 } | |
OLD | NEW |