OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ****************************************************************************** |
| 3 * |
| 4 * Copyright (C) 1999-2010, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ****************************************************************************** |
| 8 * file name: ubidi.c |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * created on: 1999jul27 |
| 14 * created by: Markus W. Scherer, updated by Matitiahu Allouche |
| 15 */ |
| 16 |
| 17 #include "cmemory.h" |
| 18 #include "unicode/utypes.h" |
| 19 #include "unicode/ustring.h" |
| 20 #include "unicode/uchar.h" |
| 21 #include "unicode/ubidi.h" |
| 22 #include "ubidi_props.h" |
| 23 #include "ubidiimp.h" |
| 24 #include "uassert.h" |
| 25 |
| 26 /* |
| 27 * General implementation notes: |
| 28 * |
| 29 * Throughout the implementation, there are comments like (W2) that refer to |
| 30 * rules of the BiDi algorithm in its version 5, in this example to the second |
| 31 * rule of the resolution of weak types. |
| 32 * |
| 33 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-3
2) |
| 34 * character according to UTF-16, the second UChar gets the directional property
of |
| 35 * the entire character assigned, while the first one gets a BN, a boundary |
| 36 * neutral, type, which is ignored by most of the algorithm according to |
| 37 * rule (X9) and the implementation suggestions of the BiDi algorithm. |
| 38 * |
| 39 * Later, adjustWSLevels() will set the level for each BN to that of the |
| 40 * following character (UChar), which results in surrogate pairs getting the |
| 41 * same level on each of their surrogates. |
| 42 * |
| 43 * In a UTF-8 implementation, the same thing could be done: the last byte of |
| 44 * a multi-byte sequence would get the "real" property, while all previous |
| 45 * bytes of that sequence would get BN. |
| 46 * |
| 47 * It is not possible to assign all those parts of a character the same real |
| 48 * property because this would fail in the resolution of weak types with rules |
| 49 * that look at immediately surrounding types. |
| 50 * |
| 51 * As a related topic, this implementation does not remove Boundary Neutral |
| 52 * types from the input, but ignores them wherever this is relevant. |
| 53 * For example, the loop for the resolution of the weak types reads |
| 54 * types until it finds a non-BN. |
| 55 * Also, explicit embedding codes are neither changed into BN nor removed. |
| 56 * They are only treated the same way real BNs are. |
| 57 * As stated before, adjustWSLevels() takes care of them at the end. |
| 58 * For the purpose of conformance, the levels of all these codes |
| 59 * do not matter. |
| 60 * |
| 61 * Note that this implementation never modifies the dirProps |
| 62 * after the initial setup. |
| 63 * |
| 64 * |
| 65 * In this implementation, the resolution of weak types (Wn), |
| 66 * neutrals (Nn), and the assignment of the resolved level (In) |
| 67 * are all done in one single loop, in resolveImplicitLevels(). |
| 68 * Changes of dirProp values are done on the fly, without writing |
| 69 * them back to the dirProps array. |
| 70 * |
| 71 * |
| 72 * This implementation contains code that allows to bypass steps of the |
| 73 * algorithm that are not needed on the specific paragraph |
| 74 * in order to speed up the most common cases considerably, |
| 75 * like text that is entirely LTR, or RTL text without numbers. |
| 76 * |
| 77 * Most of this is done by setting a bit for each directional property |
| 78 * in a flags variable and later checking for whether there are |
| 79 * any LTR characters or any RTL characters, or both, whether |
| 80 * there are any explicit embedding codes, etc. |
| 81 * |
| 82 * If the (Xn) steps are performed, then the flags are re-evaluated, |
| 83 * because they will then not contain the embedding codes any more |
| 84 * and will be adjusted for override codes, so that subsequently |
| 85 * more bypassing may be possible than what the initial flags suggested. |
| 86 * |
| 87 * If the text is not mixed-directional, then the |
| 88 * algorithm steps for the weak type resolution are not performed, |
| 89 * and all levels are set to the paragraph level. |
| 90 * |
| 91 * If there are no explicit embedding codes, then the (Xn) steps |
| 92 * are not performed. |
| 93 * |
| 94 * If embedding levels are supplied as a parameter, then all |
| 95 * explicit embedding codes are ignored, and the (Xn) steps |
| 96 * are not performed. |
| 97 * |
| 98 * White Space types could get the level of the run they belong to, |
| 99 * and are checked with a test of (flags&MASK_EMBEDDING) to |
| 100 * consider if the paragraph direction should be considered in |
| 101 * the flags variable. |
| 102 * |
| 103 * If there are no White Space types in the paragraph, then |
| 104 * (L1) is not necessary in adjustWSLevels(). |
| 105 */ |
| 106 |
| 107 /* to avoid some conditional statements, use tiny constant arrays */ |
| 108 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) }; |
| 109 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) }; |
| 110 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; |
| 111 |
| 112 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1] |
| 113 #define DIRPROP_FLAG_E(level) flagE[(level)&1] |
| 114 #define DIRPROP_FLAG_O(level) flagO[(level)&1] |
| 115 |
| 116 /* UBiDi object management -------------------------------------------------- */ |
| 117 |
| 118 U_CAPI UBiDi * U_EXPORT2 |
| 119 ubidi_open(void) |
| 120 { |
| 121 UErrorCode errorCode=U_ZERO_ERROR; |
| 122 return ubidi_openSized(0, 0, &errorCode); |
| 123 } |
| 124 |
| 125 U_CAPI UBiDi * U_EXPORT2 |
| 126 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode)
{ |
| 127 UBiDi *pBiDi; |
| 128 |
| 129 /* check the argument values */ |
| 130 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
| 131 return NULL; |
| 132 } else if(maxLength<0 || maxRunCount<0) { |
| 133 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 134 return NULL; /* invalid arguments */ |
| 135 } |
| 136 |
| 137 /* allocate memory for the object */ |
| 138 pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi)); |
| 139 if(pBiDi==NULL) { |
| 140 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 141 return NULL; |
| 142 } |
| 143 |
| 144 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */ |
| 145 uprv_memset(pBiDi, 0, sizeof(UBiDi)); |
| 146 |
| 147 /* get BiDi properties */ |
| 148 pBiDi->bdp=ubidi_getSingleton(); |
| 149 |
| 150 /* allocate memory for arrays as requested */ |
| 151 if(maxLength>0) { |
| 152 if( !getInitialDirPropsMemory(pBiDi, maxLength) || |
| 153 !getInitialLevelsMemory(pBiDi, maxLength) |
| 154 ) { |
| 155 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 156 } |
| 157 } else { |
| 158 pBiDi->mayAllocateText=TRUE; |
| 159 } |
| 160 |
| 161 if(maxRunCount>0) { |
| 162 if(maxRunCount==1) { |
| 163 /* use simpleRuns[] */ |
| 164 pBiDi->runsSize=sizeof(Run); |
| 165 } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) { |
| 166 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 167 } |
| 168 } else { |
| 169 pBiDi->mayAllocateRuns=TRUE; |
| 170 } |
| 171 |
| 172 if(U_SUCCESS(*pErrorCode)) { |
| 173 return pBiDi; |
| 174 } else { |
| 175 ubidi_close(pBiDi); |
| 176 return NULL; |
| 177 } |
| 178 } |
| 179 |
| 180 /* |
| 181 * We are allowed to allocate memory if memory==NULL or |
| 182 * mayAllocate==TRUE for each array that we need. |
| 183 * We also try to grow memory as needed if we |
| 184 * allocate it. |
| 185 * |
| 186 * Assume sizeNeeded>0. |
| 187 * If *pMemory!=NULL, then assume *pSize>0. |
| 188 * |
| 189 * ### this realloc() may unnecessarily copy the old data, |
| 190 * which we know we don't need any more; |
| 191 * is this the best way to do this?? |
| 192 */ |
| 193 U_CFUNC UBool |
| 194 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAlloc
ate, int32_t sizeNeeded) { |
| 195 void **pMemory = (void **)bidiMem; |
| 196 /* check for existing memory */ |
| 197 if(*pMemory==NULL) { |
| 198 /* we need to allocate memory */ |
| 199 if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) { |
| 200 *pSize=sizeNeeded; |
| 201 return TRUE; |
| 202 } else { |
| 203 return FALSE; |
| 204 } |
| 205 } else { |
| 206 if(sizeNeeded<=*pSize) { |
| 207 /* there is already enough memory */ |
| 208 return TRUE; |
| 209 } |
| 210 else if(!mayAllocate) { |
| 211 /* not enough memory, and we must not allocate */ |
| 212 return FALSE; |
| 213 } else { |
| 214 /* we try to grow */ |
| 215 void *memory; |
| 216 /* in most cases, we do not need the copy-old-data part of |
| 217 * realloc, but it is needed when adding runs using getRunsMemory() |
| 218 * in setParaRunsOnly() |
| 219 */ |
| 220 if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) { |
| 221 *pMemory=memory; |
| 222 *pSize=sizeNeeded; |
| 223 return TRUE; |
| 224 } else { |
| 225 /* we failed to grow */ |
| 226 return FALSE; |
| 227 } |
| 228 } |
| 229 } |
| 230 } |
| 231 |
| 232 U_CAPI void U_EXPORT2 |
| 233 ubidi_close(UBiDi *pBiDi) { |
| 234 if(pBiDi!=NULL) { |
| 235 pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block
*/ |
| 236 if(pBiDi->dirPropsMemory!=NULL) { |
| 237 uprv_free(pBiDi->dirPropsMemory); |
| 238 } |
| 239 if(pBiDi->levelsMemory!=NULL) { |
| 240 uprv_free(pBiDi->levelsMemory); |
| 241 } |
| 242 if(pBiDi->runsMemory!=NULL) { |
| 243 uprv_free(pBiDi->runsMemory); |
| 244 } |
| 245 if(pBiDi->parasMemory!=NULL) { |
| 246 uprv_free(pBiDi->parasMemory); |
| 247 } |
| 248 if(pBiDi->insertPoints.points!=NULL) { |
| 249 uprv_free(pBiDi->insertPoints.points); |
| 250 } |
| 251 |
| 252 uprv_free(pBiDi); |
| 253 } |
| 254 } |
| 255 |
| 256 /* set to approximate "inverse BiDi" ---------------------------------------- */ |
| 257 |
| 258 U_CAPI void U_EXPORT2 |
| 259 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) { |
| 260 if(pBiDi!=NULL) { |
| 261 pBiDi->isInverse=isInverse; |
| 262 pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L |
| 263 : UBIDI_REORDER_DEFAULT; |
| 264 } |
| 265 } |
| 266 |
| 267 U_CAPI UBool U_EXPORT2 |
| 268 ubidi_isInverse(UBiDi *pBiDi) { |
| 269 if(pBiDi!=NULL) { |
| 270 return pBiDi->isInverse; |
| 271 } else { |
| 272 return FALSE; |
| 273 } |
| 274 } |
| 275 |
| 276 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of |
| 277 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre |
| 278 * concept of RUNS_ONLY which is a double operation. |
| 279 * It could be advantageous to divide this into 3 concepts: |
| 280 * a) Operation: direct / inverse / RUNS_ONLY |
| 281 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R |
| 282 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL |
| 283 * This would allow combinations not possible today like RUNS_ONLY with |
| 284 * NUMBERS_SPECIAL. |
| 285 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and |
| 286 * REMOVE_CONTROLS for the inverse step. |
| 287 * Not all combinations would be supported, and probably not all do make sense. |
| 288 * This would need to document which ones are supported and what are the |
| 289 * fallbacks for unsupported combinations. |
| 290 */ |
| 291 U_CAPI void U_EXPORT2 |
| 292 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) { |
| 293 if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT) |
| 294 && (reorderingMode < UBIDI_REORDER_COUNT)) { |
| 295 pBiDi->reorderingMode = reorderingMode; |
| 296 pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBE
RS_AS_L); |
| 297 } |
| 298 } |
| 299 |
| 300 U_CAPI UBiDiReorderingMode U_EXPORT2 |
| 301 ubidi_getReorderingMode(UBiDi *pBiDi) { |
| 302 if (pBiDi!=NULL) { |
| 303 return pBiDi->reorderingMode; |
| 304 } else { |
| 305 return UBIDI_REORDER_DEFAULT; |
| 306 } |
| 307 } |
| 308 |
| 309 U_CAPI void U_EXPORT2 |
| 310 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) { |
| 311 if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { |
| 312 reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; |
| 313 } |
| 314 if (pBiDi!=NULL) { |
| 315 pBiDi->reorderingOptions=reorderingOptions; |
| 316 } |
| 317 } |
| 318 |
| 319 U_CAPI uint32_t U_EXPORT2 |
| 320 ubidi_getReorderingOptions(UBiDi *pBiDi) { |
| 321 if (pBiDi!=NULL) { |
| 322 return pBiDi->reorderingOptions; |
| 323 } else { |
| 324 return 0; |
| 325 } |
| 326 } |
| 327 |
| 328 U_CAPI UBiDiDirection U_EXPORT2 |
| 329 ubidi_getBaseDirection(const UChar *text, |
| 330 int32_t length){ |
| 331 |
| 332 int32_t i; |
| 333 UChar32 uchar; |
| 334 UCharDirection dir; |
| 335 |
| 336 if( text==NULL || length<-1 ){ |
| 337 return UBIDI_NEUTRAL; |
| 338 } |
| 339 |
| 340 if(length==-1) { |
| 341 length=u_strlen(text); |
| 342 } |
| 343 |
| 344 for( i = 0 ; i < length; ) { |
| 345 /* i is incremented by U16_NEXT */ |
| 346 U16_NEXT(text, i, length, uchar); |
| 347 dir = u_charDirection(uchar); |
| 348 if( dir == U_LEFT_TO_RIGHT ) |
| 349 return UBIDI_LTR; |
| 350 if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC ) |
| 351 return UBIDI_RTL; |
| 352 } |
| 353 return UBIDI_NEUTRAL; |
| 354 } |
| 355 |
| 356 /* perform (P2)..(P3) ------------------------------------------------------- */ |
| 357 |
| 358 /* |
| 359 * Get the directional properties for the text, |
| 360 * calculate the flags bit-set, and |
| 361 * determine the paragraph level if necessary. |
| 362 */ |
| 363 static void |
| 364 getDirProps(UBiDi *pBiDi) { |
| 365 const UChar *text=pBiDi->text; |
| 366 DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */ |
| 367 |
| 368 int32_t i=0, i1, length=pBiDi->originalLength; |
| 369 Flags flags=0; /* collect all directionalities in the text */ |
| 370 UChar32 uchar; |
| 371 DirProp dirProp=0, paraDirDefault=0;/* initialize to avoid compiler warnings
*/ |
| 372 UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel); |
| 373 /* for inverse BiDi, the default para level is set to RTL if there is a |
| 374 strong R or AL character at either end of the text
*/ |
| 375 UBool isDefaultLevelInverse=isDefaultLevel && (UBool) |
| 376 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || |
| 377 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL); |
| 378 int32_t lastArabicPos=-1; |
| 379 int32_t controlCount=0; |
| 380 UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions & |
| 381 UBIDI_OPTION_REMOVE_CONTROLS); |
| 382 |
| 383 typedef enum { |
| 384 NOT_CONTEXTUAL, /* 0: not contextual paraLevel */ |
| 385 LOOKING_FOR_STRONG, /* 1: looking for first strong char */ |
| 386 FOUND_STRONG_CHAR /* 2: found first strong char */ |
| 387 } State; |
| 388 State state; |
| 389 int32_t paraStart=0; /* index of first char in paragraph */ |
| 390 DirProp paraDir; /* == CONTEXT_RTL within paragraphs |
| 391 starting with strong R char */ |
| 392 DirProp lastStrongDir=0; /* for default level & inverse BiDi */ |
| 393 int32_t lastStrongLTR=0; /* for STREAMING option */ |
| 394 |
| 395 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { |
| 396 pBiDi->length=0; |
| 397 lastStrongLTR=0; |
| 398 } |
| 399 if(isDefaultLevel) { |
| 400 paraDirDefault=pBiDi->paraLevel&1 ? CONTEXT_RTL : 0; |
| 401 paraDir=paraDirDefault; |
| 402 lastStrongDir=paraDirDefault; |
| 403 state=LOOKING_FOR_STRONG; |
| 404 } else { |
| 405 state=NOT_CONTEXTUAL; |
| 406 paraDir=0; |
| 407 } |
| 408 /* count paragraphs and determine the paragraph level (P2..P3) */ |
| 409 /* |
| 410 * see comment in ubidi.h: |
| 411 * the DEFAULT_XXX values are designed so that |
| 412 * their bit 0 alone yields the intended default |
| 413 */ |
| 414 for( /* i=0 above */ ; i<length; ) { |
| 415 /* i is incremented by U16_NEXT */ |
| 416 U16_NEXT(text, i, length, uchar); |
| 417 flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uch
ar)); |
| 418 dirProps[i-1]=dirProp|paraDir; |
| 419 if(uchar>0xffff) { /* set the lead surrogate's property to BN */ |
| 420 flags|=DIRPROP_FLAG(BN); |
| 421 dirProps[i-2]=(DirProp)(BN|paraDir); |
| 422 } |
| 423 if(state==LOOKING_FOR_STRONG) { |
| 424 if(dirProp==L) { |
| 425 state=FOUND_STRONG_CHAR; |
| 426 if(paraDir) { |
| 427 paraDir=0; |
| 428 for(i1=paraStart; i1<i; i1++) { |
| 429 dirProps[i1]&=~CONTEXT_RTL; |
| 430 } |
| 431 } |
| 432 continue; |
| 433 } |
| 434 if(dirProp==R || dirProp==AL) { |
| 435 state=FOUND_STRONG_CHAR; |
| 436 if(paraDir==0) { |
| 437 paraDir=CONTEXT_RTL; |
| 438 for(i1=paraStart; i1<i; i1++) { |
| 439 dirProps[i1]|=CONTEXT_RTL; |
| 440 } |
| 441 } |
| 442 continue; |
| 443 } |
| 444 } |
| 445 if(dirProp==L) { |
| 446 lastStrongDir=0; |
| 447 lastStrongLTR=i; /* i is index to next character */ |
| 448 } |
| 449 else if(dirProp==R) { |
| 450 lastStrongDir=CONTEXT_RTL; |
| 451 } |
| 452 else if(dirProp==AL) { |
| 453 lastStrongDir=CONTEXT_RTL; |
| 454 lastArabicPos=i-1; |
| 455 } |
| 456 else if(dirProp==B) { |
| 457 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { |
| 458 pBiDi->length=i; /* i is index to next character */ |
| 459 } |
| 460 if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!
=lastStrongDir)) { |
| 461 for( ; paraStart<i; paraStart++) { |
| 462 dirProps[paraStart]|=CONTEXT_RTL; |
| 463 } |
| 464 } |
| 465 if(i<length) { /* B not last char in text */ |
| 466 if(!((uchar==CR) && (text[i]==LF))) { |
| 467 pBiDi->paraCount++; |
| 468 } |
| 469 if(isDefaultLevel) { |
| 470 state=LOOKING_FOR_STRONG; |
| 471 paraStart=i; /* i is index to next character */ |
| 472 paraDir=paraDirDefault; |
| 473 lastStrongDir=paraDirDefault; |
| 474 } |
| 475 } |
| 476 } |
| 477 if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) { |
| 478 controlCount++; |
| 479 } |
| 480 } |
| 481 if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStr
ongDir)) { |
| 482 for(i1=paraStart; i1<length; i1++) { |
| 483 dirProps[i1]|=CONTEXT_RTL; |
| 484 } |
| 485 } |
| 486 if(isDefaultLevel) { |
| 487 pBiDi->paraLevel=GET_PARALEVEL(pBiDi, 0); |
| 488 } |
| 489 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { |
| 490 if((lastStrongLTR>pBiDi->length) && |
| 491 (GET_PARALEVEL(pBiDi, lastStrongLTR)==0)) { |
| 492 pBiDi->length = lastStrongLTR; |
| 493 } |
| 494 if(pBiDi->length<pBiDi->originalLength) { |
| 495 pBiDi->paraCount--; |
| 496 } |
| 497 } |
| 498 /* The following line does nothing new for contextual paraLevel, but is |
| 499 needed for absolute paraLevel. */ |
| 500 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); |
| 501 |
| 502 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { |
| 503 flags|=DIRPROP_FLAG(L); |
| 504 } |
| 505 |
| 506 pBiDi->controlCount = controlCount; |
| 507 pBiDi->flags=flags; |
| 508 pBiDi->lastArabicPos=lastArabicPos; |
| 509 } |
| 510 |
| 511 /* perform (X1)..(X9) ------------------------------------------------------- */ |
| 512 |
| 513 /* determine if the text is mixed-directional or single-directional */ |
| 514 static UBiDiDirection |
| 515 directionFromFlags(UBiDi *pBiDi) { |
| 516 Flags flags=pBiDi->flags; |
| 517 /* if the text contains AN and neutrals, then some neutrals may become RTL *
/ |
| 518 if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N))
)) { |
| 519 return UBIDI_LTR; |
| 520 } else if(!(flags&MASK_LTR)) { |
| 521 return UBIDI_RTL; |
| 522 } else { |
| 523 return UBIDI_MIXED; |
| 524 } |
| 525 } |
| 526 |
| 527 /* |
| 528 * Resolve the explicit levels as specified by explicit embedding codes. |
| 529 * Recalculate the flags to have them reflect the real properties |
| 530 * after taking the explicit embeddings into account. |
| 531 * |
| 532 * The BiDi algorithm is designed to result in the same behavior whether embeddi
ng |
| 533 * levels are externally specified (from "styled text", supposedly the preferred |
| 534 * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text. |
| 535 * That is why (X9) instructs to remove all explicit codes (and BN). |
| 536 * However, in a real implementation, this removal of these codes and their inde
x |
| 537 * positions in the plain text is undesirable since it would result in |
| 538 * reallocated, reindexed text. |
| 539 * Instead, this implementation leaves the codes in there and just ignores them |
| 540 * in the subsequent processing. |
| 541 * In order to get the same reordering behavior, positions with a BN or an |
| 542 * explicit embedding code just get the same level assigned as the last "real" |
| 543 * character. |
| 544 * |
| 545 * Some implementations, not this one, then overwrite some of these |
| 546 * directionality properties at "real" same-level-run boundaries by |
| 547 * L or R codes so that the resolution of weak types can be performed on the |
| 548 * entire paragraph at once instead of having to parse it once more and |
| 549 * perform that resolution on same-level-runs. |
| 550 * This limits the scope of the implicit rules in effectively |
| 551 * the same way as the run limits. |
| 552 * |
| 553 * Instead, this implementation does not modify these codes. |
| 554 * On one hand, the paragraph has to be scanned for same-level-runs, but |
| 555 * on the other hand, this saves another loop to reset these codes, |
| 556 * or saves making and modifying a copy of dirProps[]. |
| 557 * |
| 558 * |
| 559 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algo
rithm. |
| 560 * |
| 561 * |
| 562 * Handling the stack of explicit levels (Xn): |
| 563 * |
| 564 * With the BiDi stack of explicit levels, |
| 565 * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF, |
| 566 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61. |
| 567 * |
| 568 * In order to have a correct push-pop semantics even in the case of overflows, |
| 569 * there are two overflow counters: |
| 570 * - countOver60 is incremented with each LRx at level 60 |
| 571 * - from level 60, one RLx increases the level to 61 |
| 572 * - countOver61 is incremented with each LRx and RLx at level 61 |
| 573 * |
| 574 * Popping levels with PDF must work in the opposite order so that level 61 |
| 575 * is correct at the correct point. Underflows (too many PDFs) must be checked. |
| 576 * |
| 577 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. |
| 578 */ |
| 579 static UBiDiDirection |
| 580 resolveExplicitLevels(UBiDi *pBiDi) { |
| 581 const DirProp *dirProps=pBiDi->dirProps; |
| 582 UBiDiLevel *levels=pBiDi->levels; |
| 583 const UChar *text=pBiDi->text; |
| 584 |
| 585 int32_t i=0, length=pBiDi->length; |
| 586 Flags flags=pBiDi->flags; /* collect all directionalities in the text
*/ |
| 587 DirProp dirProp; |
| 588 UBiDiLevel level=GET_PARALEVEL(pBiDi, 0); |
| 589 |
| 590 UBiDiDirection direction; |
| 591 int32_t paraIndex=0; |
| 592 |
| 593 /* determine if the text is mixed-directional or single-directional */ |
| 594 direction=directionFromFlags(pBiDi); |
| 595 |
| 596 /* we may not need to resolve any explicit levels, but for multiple |
| 597 paragraphs we want to loop on all chars to set the para boundaries */ |
| 598 if((direction!=UBIDI_MIXED) && (pBiDi->paraCount==1)) { |
| 599 /* not mixed directionality: levels don't matter - trailingWSStart will
be 0 */ |
| 600 } else if((pBiDi->paraCount==1) && |
| 601 (!(flags&MASK_EXPLICIT) || |
| 602 (pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL)))
{ |
| 603 /* mixed, but all characters are at the same embedding level */ |
| 604 /* or we are in "inverse BiDi" */ |
| 605 /* and we don't have contextual multiple paragraphs with some B char */ |
| 606 /* set all levels to the paragraph level */ |
| 607 for(i=0; i<length; ++i) { |
| 608 levels[i]=level; |
| 609 } |
| 610 } else { |
| 611 /* continue to perform (Xn) */ |
| 612 |
| 613 /* (X1) level is set for all codes, embeddingLevel keeps track of the pu
sh/pop operations */ |
| 614 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate th
e override status */ |
| 615 UBiDiLevel embeddingLevel=level, newLevel, stackTop=0; |
| 616 |
| 617 UBiDiLevel stack[UBIDI_MAX_EXPLICIT_LEVEL]; /* we never push anyt
hing >=UBIDI_MAX_EXPLICIT_LEVEL */ |
| 618 uint32_t countOver60=0, countOver61=0; /* count overflows of explicit l
evels */ |
| 619 |
| 620 /* recalculate the flags */ |
| 621 flags=0; |
| 622 |
| 623 for(i=0; i<length; ++i) { |
| 624 dirProp=NO_CONTEXT_RTL(dirProps[i]); |
| 625 switch(dirProp) { |
| 626 case LRE: |
| 627 case LRO: |
| 628 /* (X3, X5) */ |
| 629 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|
1)); /* least greater even level */ |
| 630 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) { |
| 631 stack[stackTop]=embeddingLevel; |
| 632 ++stackTop; |
| 633 embeddingLevel=newLevel; |
| 634 if(dirProp==LRO) { |
| 635 embeddingLevel|=UBIDI_LEVEL_OVERRIDE; |
| 636 } |
| 637 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE |
| 638 since this has already been done for newLevel which is |
| 639 the source for embeddingLevel. |
| 640 */ |
| 641 } else if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPL
ICIT_LEVEL) { |
| 642 ++countOver61; |
| 643 } else /* (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPL
ICIT_LEVEL-1 */ { |
| 644 ++countOver60; |
| 645 } |
| 646 flags|=DIRPROP_FLAG(BN); |
| 647 break; |
| 648 case RLE: |
| 649 case RLO: |
| 650 /* (X2, X4) */ |
| 651 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)
|1); /* least greater odd level */ |
| 652 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) { |
| 653 stack[stackTop]=embeddingLevel; |
| 654 ++stackTop; |
| 655 embeddingLevel=newLevel; |
| 656 if(dirProp==RLO) { |
| 657 embeddingLevel|=UBIDI_LEVEL_OVERRIDE; |
| 658 } |
| 659 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for RLE |
| 660 since this has already been done for newLevel which is |
| 661 the source for embeddingLevel. |
| 662 */ |
| 663 } else { |
| 664 ++countOver61; |
| 665 } |
| 666 flags|=DIRPROP_FLAG(BN); |
| 667 break; |
| 668 case PDF: |
| 669 /* (X7) */ |
| 670 /* handle all the overflow cases first */ |
| 671 if(countOver61>0) { |
| 672 --countOver61; |
| 673 } else if(countOver60>0 && (embeddingLevel&~UBIDI_LEVEL_OVERRIDE
)!=UBIDI_MAX_EXPLICIT_LEVEL) { |
| 674 /* handle LRx overflows from level 60 */ |
| 675 --countOver60; |
| 676 } else if(stackTop>0) { |
| 677 /* this is the pop operation; it also pops level 61 while co
untOver60>0 */ |
| 678 --stackTop; |
| 679 embeddingLevel=stack[stackTop]; |
| 680 /* } else { (underflow) */ |
| 681 } |
| 682 flags|=DIRPROP_FLAG(BN); |
| 683 break; |
| 684 case B: |
| 685 stackTop=0; |
| 686 countOver60=countOver61=0; |
| 687 level=GET_PARALEVEL(pBiDi, i); |
| 688 if((i+1)<length) { |
| 689 embeddingLevel=GET_PARALEVEL(pBiDi, i+1); |
| 690 if(!((text[i]==CR) && (text[i+1]==LF))) { |
| 691 pBiDi->paras[paraIndex++]=i+1; |
| 692 } |
| 693 } |
| 694 flags|=DIRPROP_FLAG(B); |
| 695 break; |
| 696 case BN: |
| 697 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ |
| 698 /* they will get their levels set correctly in adjustWSLevels()
*/ |
| 699 flags|=DIRPROP_FLAG(BN); |
| 700 break; |
| 701 default: |
| 702 /* all other types get the "real" level */ |
| 703 if(level!=embeddingLevel) { |
| 704 level=embeddingLevel; |
| 705 if(level&UBIDI_LEVEL_OVERRIDE) { |
| 706 flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS; |
| 707 } else { |
| 708 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS; |
| 709 } |
| 710 } |
| 711 if(!(level&UBIDI_LEVEL_OVERRIDE)) { |
| 712 flags|=DIRPROP_FLAG(dirProp); |
| 713 } |
| 714 break; |
| 715 } |
| 716 |
| 717 /* |
| 718 * We need to set reasonable levels even on BN codes and |
| 719 * explicit codes because we will later look at same-level runs (X10
). |
| 720 */ |
| 721 levels[i]=level; |
| 722 } |
| 723 if(flags&MASK_EMBEDDING) { |
| 724 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); |
| 725 } |
| 726 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { |
| 727 flags|=DIRPROP_FLAG(L); |
| 728 } |
| 729 |
| 730 /* subsequently, ignore the explicit codes and BN (X9) */ |
| 731 |
| 732 /* again, determine if the text is mixed-directional or single-direction
al */ |
| 733 pBiDi->flags=flags; |
| 734 direction=directionFromFlags(pBiDi); |
| 735 } |
| 736 |
| 737 return direction; |
| 738 } |
| 739 |
| 740 /* |
| 741 * Use a pre-specified embedding levels array: |
| 742 * |
| 743 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), |
| 744 * ignore all explicit codes (X9), |
| 745 * and check all the preset levels. |
| 746 * |
| 747 * Recalculate the flags to have them reflect the real properties |
| 748 * after taking the explicit embeddings into account. |
| 749 */ |
| 750 static UBiDiDirection |
| 751 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { |
| 752 const DirProp *dirProps=pBiDi->dirProps; |
| 753 DirProp dirProp; |
| 754 UBiDiLevel *levels=pBiDi->levels; |
| 755 const UChar *text=pBiDi->text; |
| 756 |
| 757 int32_t i, length=pBiDi->length; |
| 758 Flags flags=0; /* collect all directionalities in the text */ |
| 759 UBiDiLevel level; |
| 760 uint32_t paraIndex=0; |
| 761 |
| 762 for(i=0; i<length; ++i) { |
| 763 level=levels[i]; |
| 764 dirProp=NO_CONTEXT_RTL(dirProps[i]); |
| 765 if(level&UBIDI_LEVEL_OVERRIDE) { |
| 766 /* keep the override flag in levels[i] but adjust the flags */ |
| 767 level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simp
ler */ |
| 768 flags|=DIRPROP_FLAG_O(level); |
| 769 } else { |
| 770 /* set the flags */ |
| 771 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp); |
| 772 } |
| 773 if((level<GET_PARALEVEL(pBiDi, i) && |
| 774 !((0==level)&&(dirProp==B))) || |
| 775 (UBIDI_MAX_EXPLICIT_LEVEL<level)) { |
| 776 /* level out of bounds */ |
| 777 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 778 return UBIDI_LTR; |
| 779 } |
| 780 if((dirProp==B) && ((i+1)<length)) { |
| 781 if(!((text[i]==CR) && (text[i+1]==LF))) { |
| 782 pBiDi->paras[paraIndex++]=i+1; |
| 783 } |
| 784 } |
| 785 } |
| 786 if(flags&MASK_EMBEDDING) { |
| 787 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); |
| 788 } |
| 789 |
| 790 /* determine if the text is mixed-directional or single-directional */ |
| 791 pBiDi->flags=flags; |
| 792 return directionFromFlags(pBiDi); |
| 793 } |
| 794 |
| 795 /****************************************************************** |
| 796 The Properties state machine table |
| 797 ******************************************************************* |
| 798 |
| 799 All table cells are 8 bits: |
| 800 bits 0..4: next state |
| 801 bits 5..7: action to perform (if > 0) |
| 802 |
| 803 Cells may be of format "n" where n represents the next state |
| 804 (except for the rightmost column). |
| 805 Cells may also be of format "s(x,y)" where x represents an action |
| 806 to perform and y represents the next state. |
| 807 |
| 808 ******************************************************************* |
| 809 Definitions and type for properties state table |
| 810 ******************************************************************* |
| 811 */ |
| 812 #define IMPTABPROPS_COLUMNS 14 |
| 813 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1) |
| 814 #define GET_STATEPROPS(cell) ((cell)&0x1f) |
| 815 #define GET_ACTIONPROPS(cell) ((cell)>>5) |
| 816 #define s(action, newState) ((uint8_t)(newState+(action<<5))) |
| 817 |
| 818 static const uint8_t groupProp[] = /* dirProp regrouped */ |
| 819 { |
| 820 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN
*/ |
| 821 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10 |
| 822 }; |
| 823 enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirPr
op_S=5, DirProp_B=6 }; /* reduced dirProp */ |
| 824 |
| 825 /****************************************************************** |
| 826 |
| 827 PROPERTIES STATE TABLE |
| 828 |
| 829 In table impTabProps, |
| 830 - the ON column regroups ON and WS |
| 831 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF |
| 832 - the Res column is the reduced property assigned to a run |
| 833 |
| 834 Action 1: process current run1, init new run1 |
| 835 2: init new run2 |
| 836 3: process run1, process run2, init new run1 |
| 837 4: process run1, set run1=run2, init new run2 |
| 838 |
| 839 Notes: |
| 840 1) This table is used in resolveImplicitLevels(). |
| 841 2) This table triggers actions when there is a change in the Bidi |
| 842 property of incoming characters (action 1). |
| 843 3) Most such property sequences are processed immediately (in |
| 844 fact, passed to processPropertySeq(). |
| 845 4) However, numbers are assembled as one sequence. This means |
| 846 that undefined situations (like CS following digits, until |
| 847 it is known if the next char will be a digit) are held until |
| 848 following chars define them. |
| 849 Example: digits followed by CS, then comes another CS or ON; |
| 850 the digits will be processed, then the CS assigned |
| 851 as the start of an ON sequence (action 3). |
| 852 5) There are cases where more than one sequence must be |
| 853 processed, for instance digits followed by CS followed by L: |
| 854 the digits must be processed as one sequence, and the CS |
| 855 must be processed as an ON sequence, all this before starting |
| 856 assembling chars for the opening L sequence. |
| 857 |
| 858 |
| 859 */ |
| 860 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] = |
| 861 { |
| 862 /* L , R , EN , AN , ON , S , B ,
ES , ET , CS , BN , NSM , AL , Res */ |
| 863 /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 ,
7 , 9 , 7 , 0 , 7 , 3 , DirProp_ON }, |
| 864 /* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(
1,7), s(1,9), s(1,7), 1 , 1 , s(1,3), DirProp_L }, |
| 865 /* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(
1,7), s(1,9), s(1,7), 2 , 2 , s(1,3), DirProp_R }, |
| 866 /* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(
1,8), s(1,8), s(1,8), 3 , 3 , 3 , DirProp_R }, |
| 867 /* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2
,10), 11 ,s(2,10), 4 , 4 , s(1,3), DirProp_EN }, |
| 868 /* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(
1,7), s(1,9),s(2,12), 5 , 5 , s(1,3), DirProp_AN }, |
| 869 /* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(
1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), DirProp_AN }, |
| 870 /* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17),
7 ,s(2,14), 7 , 7 , 7 , s(1,3), DirProp_ON }, |
| 871 /* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17),
8 , 8 , 8 , 8 , 8 , s(1,3), DirProp_ON }, |
| 872 /* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17),
7 , 9 , 7 , 9 , 9 , s(1,3), DirProp_ON }, |
| 873 /*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(
4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), DirProp_EN }, |
| 874 /*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(
1,7), 11 , s(1,7), 11 , 11 , s(1,3), DirProp_EN }, |
| 875 /*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(
4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3), DirProp_AN }, |
| 876 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(
4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), DirProp_AN }, |
| 877 /*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17),
7 , 14 , 7 , 14 , 14 , s(1,3), DirProp_ON }, |
| 878 /*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(
1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3), DirProp_S }, |
| 879 /*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(
1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3), DirProp_S }, |
| 880 /*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(
1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3), DirProp_B } |
| 881 }; |
| 882 |
| 883 /* we must undef macro s because the levels table have a different |
| 884 * structure (4 bits for action and 4 bits for next state. |
| 885 */ |
| 886 #undef s |
| 887 |
| 888 /****************************************************************** |
| 889 The levels state machine tables |
| 890 ******************************************************************* |
| 891 |
| 892 All table cells are 8 bits: |
| 893 bits 0..3: next state |
| 894 bits 4..7: action to perform (if > 0) |
| 895 |
| 896 Cells may be of format "n" where n represents the next state |
| 897 (except for the rightmost column). |
| 898 Cells may also be of format "s(x,y)" where x represents an action |
| 899 to perform and y represents the next state. |
| 900 |
| 901 This format limits each table to 16 states each and to 15 actions. |
| 902 |
| 903 ******************************************************************* |
| 904 Definitions and type for levels state tables |
| 905 ******************************************************************* |
| 906 */ |
| 907 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2) |
| 908 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1) |
| 909 #define GET_STATE(cell) ((cell)&0x0f) |
| 910 #define GET_ACTION(cell) ((cell)>>4) |
| 911 #define s(action, newState) ((uint8_t)(newState+(action<<4))) |
| 912 |
| 913 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS]; |
| 914 typedef uint8_t ImpAct[]; |
| 915 |
| 916 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct, |
| 917 * instead of having a pair of ImpTab and a pair of ImpAct. |
| 918 */ |
| 919 typedef struct ImpTabPair { |
| 920 const void * pImpTab[2]; |
| 921 const void * pImpAct[2]; |
| 922 } ImpTabPair; |
| 923 |
| 924 /****************************************************************** |
| 925 |
| 926 LEVELS STATE TABLES |
| 927 |
| 928 In all levels state tables, |
| 929 - state 0 is the initial state |
| 930 - the Res column is the increment to add to the text level |
| 931 for this property sequence. |
| 932 |
| 933 The impAct arrays for each table of a pair map the local action |
| 934 numbers of the table to the total list of actions. For instance, |
| 935 action 2 in a given table corresponds to the action number which |
| 936 appears in entry [2] of the impAct array for that table. |
| 937 The first entry of all impAct arrays must be 0. |
| 938 |
| 939 Action 1: init conditional sequence |
| 940 2: prepend conditional sequence to current sequence |
| 941 3: set ON sequence to new level - 1 |
| 942 4: init EN/AN/ON sequence |
| 943 5: fix EN/AN/ON sequence followed by R |
| 944 6: set previous level sequence to level 2 |
| 945 |
| 946 Notes: |
| 947 1) These tables are used in processPropertySeq(). The input |
| 948 is property sequences as determined by resolveImplicitLevels. |
| 949 2) Most such property sequences are processed immediately |
| 950 (levels are assigned). |
| 951 3) However, some sequences cannot be assigned a final level till |
| 952 one or more following sequences are received. For instance, |
| 953 ON following an R sequence within an even-level paragraph. |
| 954 If the following sequence is R, the ON sequence will be |
| 955 assigned basic run level+1, and so will the R sequence. |
| 956 4) S is generally handled like ON, since its level will be fixed |
| 957 to paragraph level in adjustWSLevels(). |
| 958 |
| 959 */ |
| 960 |
| 961 static const ImpTab impTabL_DEFAULT = /* Even paragraph level */ |
| 962 /* In this table, conditional sequences receive the higher possible level |
| 963 until proven otherwise. |
| 964 */ |
| 965 { |
| 966 /* L , R , EN , AN , ON , S , B , R
es */ |
| 967 /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 ,
0 }, |
| 968 /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 ,
1 }, |
| 969 /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 ,
2 }, |
| 970 /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 ,
2 }, |
| 971 /* 4 : R+ON */ { s(2,0), 1 , 3 , 3 , 4 , 4 , s(2,0),
1 }, |
| 972 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), 2 , 5 , 5 , s(2,0),
1 } |
| 973 }; |
| 974 static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */ |
| 975 /* In this table, conditional sequences receive the lower possible level |
| 976 until proven otherwise. |
| 977 */ |
| 978 { |
| 979 /* L , R , EN , AN , ON , S , B , R
es */ |
| 980 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 ,
0 }, |
| 981 /* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 ,
1 }, |
| 982 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 ,
1 }, |
| 983 /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 ,
1 }, |
| 984 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 ,
0 }, |
| 985 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 ,
0 } |
| 986 }; |
| 987 static const ImpAct impAct0 = {0,1,2,3,4,5,6}; |
| 988 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT, |
| 989 &impTabR_DEFAULT}, |
| 990 {&impAct0, &impAct0}}; |
| 991 |
| 992 static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */ |
| 993 /* In this table, conditional sequences receive the higher possible level |
| 994 until proven otherwise. |
| 995 */ |
| 996 { |
| 997 /* L , R , EN , AN , ON , S , B , R
es */ |
| 998 /* 0 : init */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 ,
0 }, |
| 999 /* 1 : L+EN/AN */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 ,
2 }, |
| 1000 /* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), 0 , 0 ,
1 }, |
| 1001 /* 3 : R+ON */ { s(2,0), 2 , 4 , 4 , 3 , 3 , s(2,0),
1 }, |
| 1002 /* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 ,
2 } |
| 1003 }; |
| 1004 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL, |
| 1005 &impTabR_DEFAULT}, |
| 1006 {&impAct0, &impAct0}}; |
| 1007 |
| 1008 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R = |
| 1009 /* In this table, EN/AN+ON sequences receive levels as if associated with R |
| 1010 until proven that there is L or sor/eor on both sides. AN is handled like EN
. |
| 1011 */ |
| 1012 { |
| 1013 /* L , R , EN , AN , ON , S , B , R
es */ |
| 1014 /* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 ,
0 }, |
| 1015 /* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0),
2 }, |
| 1016 /* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0),
1 }, |
| 1017 /* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 ,
1 }, |
| 1018 /* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0),
1 }, |
| 1019 /* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 ,
2 } |
| 1020 }; |
| 1021 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R = |
| 1022 /* In this table, EN/AN+ON sequences receive levels as if associated with R |
| 1023 until proven that there is L on both sides. AN is handled like EN. |
| 1024 */ |
| 1025 { |
| 1026 /* L , R , EN , AN , ON , S , B , R
es */ |
| 1027 /* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 ,
0 }, |
| 1028 /* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 ,
1 }, |
| 1029 /* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 ,
1 }, |
| 1030 /* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 ,
0 }, |
| 1031 /* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 ,
1 } |
| 1032 }; |
| 1033 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = { |
| 1034 {&impTabL_GROUP_NUMBERS_WITH_R, |
| 1035 &impTabR_GROUP_NUMBERS_WITH_R}, |
| 1036 {&impAct0, &impAct0}}; |
| 1037 |
| 1038 |
| 1039 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L = |
| 1040 /* This table is identical to the Default LTR table except that EN and AN are |
| 1041 handled like L. |
| 1042 */ |
| 1043 { |
| 1044 /* L , R , EN , AN , ON , S , B , R
es */ |
| 1045 /* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 ,
0 }, |
| 1046 /* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 ,
1 }, |
| 1047 /* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 ,
2 }, |
| 1048 /* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 ,
2 }, |
| 1049 /* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0),
1 }, |
| 1050 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0),
1 } |
| 1051 }; |
| 1052 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L = |
| 1053 /* This table is identical to the Default RTL table except that EN and AN are |
| 1054 handled like L. |
| 1055 */ |
| 1056 { |
| 1057 /* L , R , EN , AN , ON , S , B , R
es */ |
| 1058 /* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 ,
0 }, |
| 1059 /* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 ,
1 }, |
| 1060 /* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 ,
1 }, |
| 1061 /* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 ,
1 }, |
| 1062 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 ,
0 }, |
| 1063 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 ,
0 } |
| 1064 }; |
| 1065 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = { |
| 1066 {&impTabL_INVERSE_NUMBERS_AS_L, |
| 1067 &impTabR_INVERSE_NUMBERS_AS_L}, |
| 1068 {&impAct0, &impAct0}}; |
| 1069 |
| 1070 static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */ |
| 1071 /* In this table, conditional sequences receive the lower possible level |
| 1072 until proven otherwise. |
| 1073 */ |
| 1074 { |
| 1075 /* L , R , EN , AN , ON , S , B , R
es */ |
| 1076 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 ,
0 }, |
| 1077 /* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 ,
1 }, |
| 1078 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 ,
1 }, |
| 1079 /* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0),
0 }, |
| 1080 /* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0),
3 }, |
| 1081 /* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0),
2 }, |
| 1082 /* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0),
1 } |
| 1083 }; |
| 1084 static const ImpAct impAct1 = {0,1,11,12}; |
| 1085 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc" |
| 1086 */ |
| 1087 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = { |
| 1088 {&impTabL_DEFAULT, |
| 1089 &impTabR_INVERSE_LIKE_DIRECT}, |
| 1090 {&impAct0, &impAct1}}; |
| 1091 |
| 1092 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS = |
| 1093 /* The case handled in this table is (visually): R EN L |
| 1094 */ |
| 1095 { |
| 1096 /* L , R , EN , AN , ON , S , B , R
es */ |
| 1097 /* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 ,
0 }, |
| 1098 /* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 ,
4 }, |
| 1099 /* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0),
3 }, |
| 1100 /* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 ,
3 }, |
| 1101 /* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0),
3 }, |
| 1102 /* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0),
4 }, |
| 1103 /* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0),
4 } |
| 1104 }; |
| 1105 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS = |
| 1106 /* The cases handled in this table are (visually): R EN L |
| 1107 R L AN L |
| 1108 */ |
| 1109 { |
| 1110 /* L , R , EN , AN , ON , S , B , R
es */ |
| 1111 /* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 ,
0 }, |
| 1112 /* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 ,
1 }, |
| 1113 /* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 ,
0 }, |
| 1114 /* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 ,
1 }, |
| 1115 /* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0),
0 }, |
| 1116 /* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0),
1 }, |
| 1117 /* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0),
3 } |
| 1118 }; |
| 1119 static const ImpAct impAct2 = {0,1,7,8,9,10}; |
| 1120 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = { |
| 1121 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS, |
| 1122 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, |
| 1123 {&impAct0, &impAct2}}; |
| 1124 |
| 1125 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = { |
| 1126 {&impTabL_NUMBERS_SPECIAL, |
| 1127 &impTabR_INVERSE_LIKE_DIRECT}, |
| 1128 {&impAct0, &impAct1}}; |
| 1129 |
| 1130 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = |
| 1131 /* The case handled in this table is (visually): R EN L |
| 1132 */ |
| 1133 { |
| 1134 /* L , R , EN , AN , ON , S , B , R
es */ |
| 1135 /* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 ,
0 }, |
| 1136 /* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 ,
4 }, |
| 1137 /* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 ,
3 }, |
| 1138 /* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0),
3 }, |
| 1139 /* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0),
4 } |
| 1140 }; |
| 1141 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = { |
| 1142 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS, |
| 1143 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, |
| 1144 {&impAct0, &impAct2}}; |
| 1145 |
| 1146 #undef s |
| 1147 |
| 1148 typedef struct { |
| 1149 const ImpTab * pImpTab; /* level table pointer */ |
| 1150 const ImpAct * pImpAct; /* action map array */ |
| 1151 int32_t startON; /* start of ON sequence */ |
| 1152 int32_t startL2EN; /* start of level 2 sequence */ |
| 1153 int32_t lastStrongRTL; /* index of last found R or AL */ |
| 1154 int32_t state; /* current state */ |
| 1155 UBiDiLevel runLevel; /* run level before implicit solving */ |
| 1156 } LevState; |
| 1157 |
| 1158 /*------------------------------------------------------------------------*/ |
| 1159 |
| 1160 static void |
| 1161 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag) |
| 1162 /* param pos: position where to insert |
| 1163 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER |
| 1164 */ |
| 1165 { |
| 1166 #define FIRSTALLOC 10 |
| 1167 Point point; |
| 1168 InsertPoints * pInsertPoints=&(pBiDi->insertPoints); |
| 1169 |
| 1170 if (pInsertPoints->capacity == 0) |
| 1171 { |
| 1172 pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC); |
| 1173 if (pInsertPoints->points == NULL) |
| 1174 { |
| 1175 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; |
| 1176 return; |
| 1177 } |
| 1178 pInsertPoints->capacity=FIRSTALLOC; |
| 1179 } |
| 1180 if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point
*/ |
| 1181 { |
| 1182 void * savePoints=pInsertPoints->points; |
| 1183 pInsertPoints->points=uprv_realloc(pInsertPoints->points, |
| 1184 pInsertPoints->capacity*2*sizeof(Poin
t)); |
| 1185 if (pInsertPoints->points == NULL) |
| 1186 { |
| 1187 pInsertPoints->points=savePoints; |
| 1188 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; |
| 1189 return; |
| 1190 } |
| 1191 else pInsertPoints->capacity*=2; |
| 1192 } |
| 1193 point.pos=pos; |
| 1194 point.flag=flag; |
| 1195 pInsertPoints->points[pInsertPoints->size]=point; |
| 1196 pInsertPoints->size++; |
| 1197 #undef FIRSTALLOC |
| 1198 } |
| 1199 |
| 1200 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */ |
| 1201 |
| 1202 /* |
| 1203 * This implementation of the (Wn) rules applies all rules in one pass. |
| 1204 * In order to do so, it needs a look-ahead of typically 1 character |
| 1205 * (except for W5: sequences of ET) and keeps track of changes |
| 1206 * in a rule Wp that affect a later Wq (p<q). |
| 1207 * |
| 1208 * The (Nn) and (In) rules are also performed in that same single loop, |
| 1209 * but effectively one iteration behind for white space. |
| 1210 * |
| 1211 * Since all implicit rules are performed in one step, it is not necessary |
| 1212 * to actually store the intermediate directional properties in dirProps[]. |
| 1213 */ |
| 1214 |
| 1215 static void |
| 1216 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop, |
| 1217 int32_t start, int32_t limit) { |
| 1218 uint8_t cell, oldStateSeq, actionSeq; |
| 1219 const ImpTab * pImpTab=pLevState->pImpTab; |
| 1220 const ImpAct * pImpAct=pLevState->pImpAct; |
| 1221 UBiDiLevel * levels=pBiDi->levels; |
| 1222 UBiDiLevel level, addLevel; |
| 1223 InsertPoints * pInsertPoints; |
| 1224 int32_t start0, k; |
| 1225 |
| 1226 start0=start; /* save original start position */ |
| 1227 oldStateSeq=(uint8_t)pLevState->state; |
| 1228 cell=(*pImpTab)[oldStateSeq][_prop]; |
| 1229 pLevState->state=GET_STATE(cell); /* isolate the new state */ |
| 1230 actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */ |
| 1231 addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES]; |
| 1232 |
| 1233 if(actionSeq) { |
| 1234 switch(actionSeq) { |
| 1235 case 1: /* init ON seq */ |
| 1236 pLevState->startON=start0; |
| 1237 break; |
| 1238 |
| 1239 case 2: /* prepend ON seq to current seq */ |
| 1240 start=pLevState->startON; |
| 1241 break; |
| 1242 |
| 1243 case 3: /* L or S after possible relevant EN/AN
*/ |
| 1244 /* check if we had EN after R/AL */ |
| 1245 if (pLevState->startL2EN >= 0) { |
| 1246 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); |
| 1247 } |
| 1248 pLevState->startL2EN=-1; /* not within previous if since could also
be -2 */ |
| 1249 /* check if we had any relevant EN/AN after R/AL */ |
| 1250 pInsertPoints=&(pBiDi->insertPoints); |
| 1251 if ((pInsertPoints->capacity == 0) || |
| 1252 (pInsertPoints->size <= pInsertPoints->confirmed)) |
| 1253 { |
| 1254 /* nothing, just clean up */ |
| 1255 pLevState->lastStrongRTL=-1; |
| 1256 /* check if we have a pending conditional segment */ |
| 1257 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES]; |
| 1258 if ((level & 1) && (pLevState->startON > 0)) { /* after ON */ |
| 1259 start=pLevState->startON; /* reset to basic run level */ |
| 1260 } |
| 1261 if (_prop == DirProp_S) /* add LRM before S */ |
| 1262 { |
| 1263 addPoint(pBiDi, start0, LRM_BEFORE); |
| 1264 pInsertPoints->confirmed=pInsertPoints->size; |
| 1265 } |
| 1266 break; |
| 1267 } |
| 1268 /* reset previous RTL cont to level for LTR text */ |
| 1269 for (k=pLevState->lastStrongRTL+1; k<start0; k++) |
| 1270 { |
| 1271 /* reset odd level, leave runLevel+2 as is */ |
| 1272 levels[k]=(levels[k] - 2) & ~1; |
| 1273 } |
| 1274 /* mark insert points as confirmed */ |
| 1275 pInsertPoints->confirmed=pInsertPoints->size; |
| 1276 pLevState->lastStrongRTL=-1; |
| 1277 if (_prop == DirProp_S) /* add LRM before S */ |
| 1278 { |
| 1279 addPoint(pBiDi, start0, LRM_BEFORE); |
| 1280 pInsertPoints->confirmed=pInsertPoints->size; |
| 1281 } |
| 1282 break; |
| 1283 |
| 1284 case 4: /* R/AL after possible relevant EN/AN */ |
| 1285 /* just clean up */ |
| 1286 pInsertPoints=&(pBiDi->insertPoints); |
| 1287 if (pInsertPoints->capacity > 0) |
| 1288 /* remove all non confirmed insert points */ |
| 1289 pInsertPoints->size=pInsertPoints->confirmed; |
| 1290 pLevState->startON=-1; |
| 1291 pLevState->startL2EN=-1; |
| 1292 pLevState->lastStrongRTL=limit - 1; |
| 1293 break; |
| 1294 |
| 1295 case 5: /* EN/AN after R/AL + possible cont */ |
| 1296 /* check for real AN */ |
| 1297 if ((_prop == DirProp_AN) && (NO_CONTEXT_RTL(pBiDi->dirProps[start0]
) == AN) && |
| 1298 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIA
L)) |
| 1299 { |
| 1300 /* real AN */ |
| 1301 if (pLevState->startL2EN == -1) /* if no relevant EN already fou
nd */ |
| 1302 { |
| 1303 /* just note the righmost digit as a strong RTL */ |
| 1304 pLevState->lastStrongRTL=limit - 1; |
| 1305 break; |
| 1306 } |
| 1307 if (pLevState->startL2EN >= 0) /* after EN, no AN */ |
| 1308 { |
| 1309 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); |
| 1310 pLevState->startL2EN=-2; |
| 1311 } |
| 1312 /* note AN */ |
| 1313 addPoint(pBiDi, start0, LRM_BEFORE); |
| 1314 break; |
| 1315 } |
| 1316 /* if first EN/AN after R/AL */ |
| 1317 if (pLevState->startL2EN == -1) { |
| 1318 pLevState->startL2EN=start0; |
| 1319 } |
| 1320 break; |
| 1321 |
| 1322 case 6: /* note location of latest R/AL */ |
| 1323 pLevState->lastStrongRTL=limit - 1; |
| 1324 pLevState->startON=-1; |
| 1325 break; |
| 1326 |
| 1327 case 7: /* L after R+ON/EN/AN */ |
| 1328 /* include possible adjacent number on the left */ |
| 1329 for (k=start0-1; k>=0 && !(levels[k]&1); k--); |
| 1330 if(k>=0) { |
| 1331 addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */ |
| 1332 pInsertPoints=&(pBiDi->insertPoints); |
| 1333 pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */ |
| 1334 } |
| 1335 pLevState->startON=start0; |
| 1336 break; |
| 1337 |
| 1338 case 8: /* AN after L */ |
| 1339 /* AN numbers between L text on both sides may be trouble. */ |
| 1340 /* tentatively bracket with LRMs; will be confirmed if followed by L
*/ |
| 1341 addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */ |
| 1342 addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */ |
| 1343 break; |
| 1344 |
| 1345 case 9: /* R after L+ON/EN/AN */ |
| 1346 /* false alert, infirm LRMs around previous AN */ |
| 1347 pInsertPoints=&(pBiDi->insertPoints); |
| 1348 pInsertPoints->size=pInsertPoints->confirmed; |
| 1349 if (_prop == DirProp_S) /* add RLM before S */ |
| 1350 { |
| 1351 addPoint(pBiDi, start0, RLM_BEFORE); |
| 1352 pInsertPoints->confirmed=pInsertPoints->size; |
| 1353 } |
| 1354 break; |
| 1355 |
| 1356 case 10: /* L after L+ON/AN */ |
| 1357 level=pLevState->runLevel + addLevel; |
| 1358 for(k=pLevState->startON; k<start0; k++) { |
| 1359 if (levels[k]<level) |
| 1360 levels[k]=level; |
| 1361 } |
| 1362 pInsertPoints=&(pBiDi->insertPoints); |
| 1363 pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts *
/ |
| 1364 pLevState->startON=start0; |
| 1365 break; |
| 1366 |
| 1367 case 11: /* L after L+ON+EN/AN/ON */ |
| 1368 level=pLevState->runLevel; |
| 1369 for(k=start0-1; k>=pLevState->startON; k--) { |
| 1370 if(levels[k]==level+3) { |
| 1371 while(levels[k]==level+3) { |
| 1372 levels[k--]-=2; |
| 1373 } |
| 1374 while(levels[k]==level) { |
| 1375 k--; |
| 1376 } |
| 1377 } |
| 1378 if(levels[k]==level+2) { |
| 1379 levels[k]=level; |
| 1380 continue; |
| 1381 } |
| 1382 levels[k]=level+1; |
| 1383 } |
| 1384 break; |
| 1385 |
| 1386 case 12: /* R after L+ON+EN/AN/ON */ |
| 1387 level=pLevState->runLevel+1; |
| 1388 for(k=start0-1; k>=pLevState->startON; k--) { |
| 1389 if(levels[k]>level) { |
| 1390 levels[k]-=2; |
| 1391 } |
| 1392 } |
| 1393 break; |
| 1394 |
| 1395 default: /* we should never get here */ |
| 1396 U_ASSERT(FALSE); |
| 1397 break; |
| 1398 } |
| 1399 } |
| 1400 if((addLevel) || (start < start0)) { |
| 1401 level=pLevState->runLevel + addLevel; |
| 1402 for(k=start; k<limit; k++) { |
| 1403 levels[k]=level; |
| 1404 } |
| 1405 } |
| 1406 } |
| 1407 |
| 1408 static void |
| 1409 resolveImplicitLevels(UBiDi *pBiDi, |
| 1410 int32_t start, int32_t limit, |
| 1411 DirProp sor, DirProp eor) { |
| 1412 const DirProp *dirProps=pBiDi->dirProps; |
| 1413 |
| 1414 LevState levState; |
| 1415 int32_t i, start1, start2; |
| 1416 uint8_t oldStateImp, stateImp, actionImp; |
| 1417 uint8_t gprop, resProp, cell; |
| 1418 UBool inverseRTL; |
| 1419 DirProp nextStrongProp=R; |
| 1420 int32_t nextStrongPos=-1; |
| 1421 |
| 1422 levState.startON = -1; /* silence gcc flow analysis */ |
| 1423 |
| 1424 /* check for RTL inverse BiDi mode */ |
| 1425 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to |
| 1426 * loop on the text characters from end to start. |
| 1427 * This would need a different properties state table (at least different |
| 1428 * actions) and different levels state tables (maybe very similar to the |
| 1429 * LTR corresponding ones. |
| 1430 */ |
| 1431 inverseRTL=(UBool) |
| 1432 ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) && |
| 1433 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || |
| 1434 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)); |
| 1435 /* initialize for levels state table */ |
| 1436 levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MAR
KS */ |
| 1437 levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MAR
KS */ |
| 1438 levState.state=0; |
| 1439 levState.runLevel=pBiDi->levels[start]; |
| 1440 levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.run
Level&1]; |
| 1441 levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.run
Level&1]; |
| 1442 processPropertySeq(pBiDi, &levState, sor, start, start); |
| 1443 /* initialize for property state table */ |
| 1444 if(NO_CONTEXT_RTL(dirProps[start])==NSM) { |
| 1445 stateImp = 1 + sor; |
| 1446 } else { |
| 1447 stateImp=0; |
| 1448 } |
| 1449 start1=start; |
| 1450 start2=start; |
| 1451 |
| 1452 for(i=start; i<=limit; i++) { |
| 1453 if(i>=limit) { |
| 1454 gprop=eor; |
| 1455 } else { |
| 1456 DirProp prop, prop1; |
| 1457 prop=NO_CONTEXT_RTL(dirProps[i]); |
| 1458 if(inverseRTL) { |
| 1459 if(prop==AL) { |
| 1460 /* AL before EN does not make it AN */ |
| 1461 prop=R; |
| 1462 } else if(prop==EN) { |
| 1463 if(nextStrongPos<=i) { |
| 1464 /* look for next strong char (L/R/AL) */ |
| 1465 int32_t j; |
| 1466 nextStrongProp=R; /* set default */ |
| 1467 nextStrongPos=limit; |
| 1468 for(j=i+1; j<limit; j++) { |
| 1469 prop1=NO_CONTEXT_RTL(dirProps[j]); |
| 1470 if(prop1==L || prop1==R || prop1==AL) { |
| 1471 nextStrongProp=prop1; |
| 1472 nextStrongPos=j; |
| 1473 break; |
| 1474 } |
| 1475 } |
| 1476 } |
| 1477 if(nextStrongProp==AL) { |
| 1478 prop=AN; |
| 1479 } |
| 1480 } |
| 1481 } |
| 1482 gprop=groupProp[prop]; |
| 1483 } |
| 1484 oldStateImp=stateImp; |
| 1485 cell=impTabProps[oldStateImp][gprop]; |
| 1486 stateImp=GET_STATEPROPS(cell); /* isolate the new state */ |
| 1487 actionImp=GET_ACTIONPROPS(cell); /* isolate the action */ |
| 1488 if((i==limit) && (actionImp==0)) { |
| 1489 /* there is an unprocessed sequence if its property == eor */ |
| 1490 actionImp=1; /* process the last sequence */ |
| 1491 } |
| 1492 if(actionImp) { |
| 1493 resProp=impTabProps[oldStateImp][IMPTABPROPS_RES]; |
| 1494 switch(actionImp) { |
| 1495 case 1: /* process current seq1, init new seq1 */ |
| 1496 processPropertySeq(pBiDi, &levState, resProp, start1, i); |
| 1497 start1=i; |
| 1498 break; |
| 1499 case 2: /* init new seq2 */ |
| 1500 start2=i; |
| 1501 break; |
| 1502 case 3: /* process seq1, process seq2, init new seq1 */ |
| 1503 processPropertySeq(pBiDi, &levState, resProp, start1, start2); |
| 1504 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i); |
| 1505 start1=i; |
| 1506 break; |
| 1507 case 4: /* process seq1, set seq1=seq2, init new seq2 */ |
| 1508 processPropertySeq(pBiDi, &levState, resProp, start1, start2); |
| 1509 start1=start2; |
| 1510 start2=i; |
| 1511 break; |
| 1512 default: /* we should never get here */ |
| 1513 U_ASSERT(FALSE); |
| 1514 break; |
| 1515 } |
| 1516 } |
| 1517 } |
| 1518 /* flush possible pending sequence, e.g. ON */ |
| 1519 processPropertySeq(pBiDi, &levState, eor, limit, limit); |
| 1520 } |
| 1521 |
| 1522 /* perform (L1) and (X9) ---------------------------------------------------- */ |
| 1523 |
| 1524 /* |
| 1525 * Reset the embedding levels for some non-graphic characters (L1). |
| 1526 * This function also sets appropriate levels for BN, and |
| 1527 * explicit embedding types that are supposed to have been removed |
| 1528 * from the paragraph in (X9). |
| 1529 */ |
| 1530 static void |
| 1531 adjustWSLevels(UBiDi *pBiDi) { |
| 1532 const DirProp *dirProps=pBiDi->dirProps; |
| 1533 UBiDiLevel *levels=pBiDi->levels; |
| 1534 int32_t i; |
| 1535 |
| 1536 if(pBiDi->flags&MASK_WS) { |
| 1537 UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR; |
| 1538 Flags flag; |
| 1539 |
| 1540 i=pBiDi->trailingWSStart; |
| 1541 while(i>0) { |
| 1542 /* reset a sequence of WS/BN before eop and B/S to the paragraph par
aLevel */ |
| 1543 while(i>0 && (flag=DIRPROP_FLAG_NC(dirProps[--i]))&MASK_WS) { |
| 1544 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { |
| 1545 levels[i]=0; |
| 1546 } else { |
| 1547 levels[i]=GET_PARALEVEL(pBiDi, i); |
| 1548 } |
| 1549 } |
| 1550 |
| 1551 /* reset BN to the next character's paraLevel until B/S, which resta
rts above loop */ |
| 1552 /* here, i+1 is guaranteed to be <length */ |
| 1553 while(i>0) { |
| 1554 flag=DIRPROP_FLAG_NC(dirProps[--i]); |
| 1555 if(flag&MASK_BN_EXPLICIT) { |
| 1556 levels[i]=levels[i+1]; |
| 1557 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { |
| 1558 levels[i]=0; |
| 1559 break; |
| 1560 } else if(flag&MASK_B_S) { |
| 1561 levels[i]=GET_PARALEVEL(pBiDi, i); |
| 1562 break; |
| 1563 } |
| 1564 } |
| 1565 } |
| 1566 } |
| 1567 } |
| 1568 |
| 1569 #define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y)) |
| 1570 #define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x))) |
| 1571 static void |
| 1572 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length, |
| 1573 UBiDiLevel paraLevel, UErrorCode *pErrorCode) { |
| 1574 void *runsOnlyMemory; |
| 1575 int32_t *visualMap; |
| 1576 UChar *visualText; |
| 1577 int32_t saveLength, saveTrailingWSStart; |
| 1578 const UBiDiLevel *levels; |
| 1579 UBiDiLevel *saveLevels; |
| 1580 UBiDiDirection saveDirection; |
| 1581 UBool saveMayAllocateText; |
| 1582 Run *runs; |
| 1583 int32_t visualLength, i, j, visualStart, logicalStart, |
| 1584 runCount, runLength, addedRuns, insertRemove, |
| 1585 start, limit, step, indexOddBit, logicalPos, |
| 1586 index0, index1; |
| 1587 uint32_t saveOptions; |
| 1588 |
| 1589 pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT; |
| 1590 if(length==0) { |
| 1591 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); |
| 1592 goto cleanup3; |
| 1593 } |
| 1594 /* obtain memory for mapping table and visual text */ |
| 1595 runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiD
iLevel))); |
| 1596 if(runsOnlyMemory==NULL) { |
| 1597 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 1598 goto cleanup3; |
| 1599 } |
| 1600 visualMap=runsOnlyMemory; |
| 1601 visualText=(UChar *)&visualMap[length]; |
| 1602 saveLevels=(UBiDiLevel *)&visualText[length]; |
| 1603 saveOptions=pBiDi->reorderingOptions; |
| 1604 if(saveOptions & UBIDI_OPTION_INSERT_MARKS) { |
| 1605 pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; |
| 1606 pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS; |
| 1607 } |
| 1608 paraLevel&=1; /* accept only 0 or 1 */ |
| 1609 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); |
| 1610 if(U_FAILURE(*pErrorCode)) { |
| 1611 goto cleanup3; |
| 1612 } |
| 1613 /* we cannot access directly pBiDi->levels since it is not yet set if |
| 1614 * direction is not MIXED |
| 1615 */ |
| 1616 levels=ubidi_getLevels(pBiDi, pErrorCode); |
| 1617 uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel)); |
| 1618 saveTrailingWSStart=pBiDi->trailingWSStart; |
| 1619 saveLength=pBiDi->length; |
| 1620 saveDirection=pBiDi->direction; |
| 1621 |
| 1622 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use |
| 1623 * the visual map and the dirProps array to drive the second call |
| 1624 * to ubidi_setPara (but must make provision for possible removal of |
| 1625 * BiDi controls. Alternatively, only use the dirProps array via |
| 1626 * customized classifier callback. |
| 1627 */ |
| 1628 visualLength=ubidi_writeReordered(pBiDi, visualText, length, |
| 1629 UBIDI_DO_MIRRORING, pErrorCode); |
| 1630 ubidi_getVisualMap(pBiDi, visualMap, pErrorCode); |
| 1631 if(U_FAILURE(*pErrorCode)) { |
| 1632 goto cleanup2; |
| 1633 } |
| 1634 pBiDi->reorderingOptions=saveOptions; |
| 1635 |
| 1636 pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT; |
| 1637 paraLevel^=1; |
| 1638 /* Because what we did with reorderingOptions, visualText may be shorter |
| 1639 * than the original text. But we don't want the levels memory to be |
| 1640 * reallocated shorter than the original length, since we need to restore |
| 1641 * the levels as after the first call to ubidi_setpara() before returning. |
| 1642 * We will force mayAllocateText to FALSE before the second call to |
| 1643 * ubidi_setpara(), and will restore it afterwards. |
| 1644 */ |
| 1645 saveMayAllocateText=pBiDi->mayAllocateText; |
| 1646 pBiDi->mayAllocateText=FALSE; |
| 1647 ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode); |
| 1648 pBiDi->mayAllocateText=saveMayAllocateText; |
| 1649 ubidi_getRuns(pBiDi, pErrorCode); |
| 1650 if(U_FAILURE(*pErrorCode)) { |
| 1651 goto cleanup1; |
| 1652 } |
| 1653 /* check if some runs must be split, count how many splits */ |
| 1654 addedRuns=0; |
| 1655 runCount=pBiDi->runCount; |
| 1656 runs=pBiDi->runs; |
| 1657 visualStart=0; |
| 1658 for(i=0; i<runCount; i++, visualStart+=runLength) { |
| 1659 runLength=runs[i].visualLimit-visualStart; |
| 1660 if(runLength<2) { |
| 1661 continue; |
| 1662 } |
| 1663 logicalStart=GET_INDEX(runs[i].logicalStart); |
| 1664 for(j=logicalStart+1; j<logicalStart+runLength; j++) { |
| 1665 index0=visualMap[j]; |
| 1666 index1=visualMap[j-1]; |
| 1667 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[i
ndex1])) { |
| 1668 addedRuns++; |
| 1669 } |
| 1670 } |
| 1671 } |
| 1672 if(addedRuns) { |
| 1673 if(getRunsMemory(pBiDi, runCount+addedRuns)) { |
| 1674 if(runCount==1) { |
| 1675 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */ |
| 1676 pBiDi->runsMemory[0]=runs[0]; |
| 1677 } |
| 1678 runs=pBiDi->runs=pBiDi->runsMemory; |
| 1679 pBiDi->runCount+=addedRuns; |
| 1680 } else { |
| 1681 goto cleanup1; |
| 1682 } |
| 1683 } |
| 1684 /* split runs which are not consecutive in source text */ |
| 1685 for(i=runCount-1; i>=0; i--) { |
| 1686 runLength= i==0 ? runs[0].visualLimit : |
| 1687 runs[i].visualLimit-runs[i-1].visualLimit; |
| 1688 logicalStart=runs[i].logicalStart; |
| 1689 indexOddBit=GET_ODD_BIT(logicalStart); |
| 1690 logicalStart=GET_INDEX(logicalStart); |
| 1691 if(runLength<2) { |
| 1692 if(addedRuns) { |
| 1693 runs[i+addedRuns]=runs[i]; |
| 1694 } |
| 1695 logicalPos=visualMap[logicalStart]; |
| 1696 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, |
| 1697 saveLevels[logicalPos]^indexOddBit); |
| 1698 continue; |
| 1699 } |
| 1700 if(indexOddBit) { |
| 1701 start=logicalStart; |
| 1702 limit=logicalStart+runLength-1; |
| 1703 step=1; |
| 1704 } else { |
| 1705 start=logicalStart+runLength-1; |
| 1706 limit=logicalStart; |
| 1707 step=-1; |
| 1708 } |
| 1709 for(j=start; j!=limit; j+=step) { |
| 1710 index0=visualMap[j]; |
| 1711 index1=visualMap[j+step]; |
| 1712 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[i
ndex1])) { |
| 1713 logicalPos=BIDI_MIN(visualMap[start], index0); |
| 1714 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, |
| 1715 saveLevels[logicalPos]^indexOddBit); |
| 1716 runs[i+addedRuns].visualLimit=runs[i].visualLimit; |
| 1717 runs[i].visualLimit-=BIDI_ABS(j-start)+1; |
| 1718 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER); |
| 1719 runs[i+addedRuns].insertRemove=insertRemove; |
| 1720 runs[i].insertRemove&=~insertRemove; |
| 1721 start=j+step; |
| 1722 addedRuns--; |
| 1723 } |
| 1724 } |
| 1725 if(addedRuns) { |
| 1726 runs[i+addedRuns]=runs[i]; |
| 1727 } |
| 1728 logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]); |
| 1729 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, |
| 1730 saveLevels[logicalPos]^indexOddBit); |
| 1731 } |
| 1732 |
| 1733 cleanup1: |
| 1734 /* restore initial paraLevel */ |
| 1735 pBiDi->paraLevel^=1; |
| 1736 cleanup2: |
| 1737 /* restore real text */ |
| 1738 pBiDi->text=text; |
| 1739 pBiDi->length=saveLength; |
| 1740 pBiDi->originalLength=length; |
| 1741 pBiDi->direction=saveDirection; |
| 1742 /* the saved levels should never excess levelsSize, but we check anyway */ |
| 1743 if(saveLength>pBiDi->levelsSize) { |
| 1744 saveLength=pBiDi->levelsSize; |
| 1745 } |
| 1746 uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel)); |
| 1747 pBiDi->trailingWSStart=saveTrailingWSStart; |
| 1748 /* free memory for mapping table and visual text */ |
| 1749 uprv_free(runsOnlyMemory); |
| 1750 if(pBiDi->runCount>1) { |
| 1751 pBiDi->direction=UBIDI_MIXED; |
| 1752 } |
| 1753 cleanup3: |
| 1754 pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY; |
| 1755 } |
| 1756 |
| 1757 /* ubidi_setPara ------------------------------------------------------------ */ |
| 1758 |
| 1759 U_CAPI void U_EXPORT2 |
| 1760 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, |
| 1761 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, |
| 1762 UErrorCode *pErrorCode) { |
| 1763 UBiDiDirection direction; |
| 1764 |
| 1765 /* check the argument values */ |
| 1766 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); |
| 1767 if(pBiDi==NULL || text==NULL || length<-1 || |
| 1768 (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) { |
| 1769 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 1770 return; |
| 1771 } |
| 1772 |
| 1773 if(length==-1) { |
| 1774 length=u_strlen(text); |
| 1775 } |
| 1776 |
| 1777 /* special treatment for RUNS_ONLY mode */ |
| 1778 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) { |
| 1779 setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode); |
| 1780 return; |
| 1781 } |
| 1782 |
| 1783 /* initialize the UBiDi structure */ |
| 1784 pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */ |
| 1785 pBiDi->text=text; |
| 1786 pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length; |
| 1787 pBiDi->paraLevel=paraLevel; |
| 1788 pBiDi->direction=UBIDI_LTR; |
| 1789 pBiDi->paraCount=1; |
| 1790 |
| 1791 pBiDi->dirProps=NULL; |
| 1792 pBiDi->levels=NULL; |
| 1793 pBiDi->runs=NULL; |
| 1794 pBiDi->insertPoints.size=0; /* clean up from last call */ |
| 1795 pBiDi->insertPoints.confirmed=0; /* clean up from last call */ |
| 1796 |
| 1797 /* |
| 1798 * Save the original paraLevel if contextual; otherwise, set to 0. |
| 1799 */ |
| 1800 if(IS_DEFAULT_LEVEL(paraLevel)) { |
| 1801 pBiDi->defaultParaLevel=paraLevel; |
| 1802 } else { |
| 1803 pBiDi->defaultParaLevel=0; |
| 1804 } |
| 1805 |
| 1806 if(length==0) { |
| 1807 /* |
| 1808 * For an empty paragraph, create a UBiDi object with the paraLevel and |
| 1809 * the flags and the direction set but without allocating zero-length ar
rays. |
| 1810 * There is nothing more to do. |
| 1811 */ |
| 1812 if(IS_DEFAULT_LEVEL(paraLevel)) { |
| 1813 pBiDi->paraLevel&=1; |
| 1814 pBiDi->defaultParaLevel=0; |
| 1815 } |
| 1816 if(paraLevel&1) { |
| 1817 pBiDi->flags=DIRPROP_FLAG(R); |
| 1818 pBiDi->direction=UBIDI_RTL; |
| 1819 } else { |
| 1820 pBiDi->flags=DIRPROP_FLAG(L); |
| 1821 pBiDi->direction=UBIDI_LTR; |
| 1822 } |
| 1823 |
| 1824 pBiDi->runCount=0; |
| 1825 pBiDi->paraCount=0; |
| 1826 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ |
| 1827 return; |
| 1828 } |
| 1829 |
| 1830 pBiDi->runCount=-1; |
| 1831 |
| 1832 /* |
| 1833 * Get the directional properties, |
| 1834 * the flags bit-set, and |
| 1835 * determine the paragraph level if necessary. |
| 1836 */ |
| 1837 if(getDirPropsMemory(pBiDi, length)) { |
| 1838 pBiDi->dirProps=pBiDi->dirPropsMemory; |
| 1839 getDirProps(pBiDi); |
| 1840 } else { |
| 1841 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 1842 return; |
| 1843 } |
| 1844 /* the processed length may have changed if UBIDI_OPTION_STREAMING */ |
| 1845 length= pBiDi->length; |
| 1846 pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */ |
| 1847 /* allocate paras memory */ |
| 1848 if(pBiDi->paraCount>1) { |
| 1849 if(getInitialParasMemory(pBiDi, pBiDi->paraCount)) { |
| 1850 pBiDi->paras=pBiDi->parasMemory; |
| 1851 pBiDi->paras[pBiDi->paraCount-1]=length; |
| 1852 } else { |
| 1853 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 1854 return; |
| 1855 } |
| 1856 } else { |
| 1857 /* initialize paras for single paragraph */ |
| 1858 pBiDi->paras=pBiDi->simpleParas; |
| 1859 pBiDi->simpleParas[0]=length; |
| 1860 } |
| 1861 |
| 1862 /* are explicit levels specified? */ |
| 1863 if(embeddingLevels==NULL) { |
| 1864 /* no: determine explicit levels according to the (Xn) rules */\ |
| 1865 if(getLevelsMemory(pBiDi, length)) { |
| 1866 pBiDi->levels=pBiDi->levelsMemory; |
| 1867 direction=resolveExplicitLevels(pBiDi); |
| 1868 } else { |
| 1869 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 1870 return; |
| 1871 } |
| 1872 } else { |
| 1873 /* set BN for all explicit codes, check that all levels are 0 or paraLev
el..UBIDI_MAX_EXPLICIT_LEVEL */ |
| 1874 pBiDi->levels=embeddingLevels; |
| 1875 direction=checkExplicitLevels(pBiDi, pErrorCode); |
| 1876 if(U_FAILURE(*pErrorCode)) { |
| 1877 return; |
| 1878 } |
| 1879 } |
| 1880 |
| 1881 /* |
| 1882 * The steps after (X9) in the UBiDi algorithm are performed only if |
| 1883 * the paragraph text has mixed directionality! |
| 1884 */ |
| 1885 pBiDi->direction=direction; |
| 1886 switch(direction) { |
| 1887 case UBIDI_LTR: |
| 1888 /* make sure paraLevel is even */ |
| 1889 pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1); |
| 1890 |
| 1891 /* all levels are implicitly at paraLevel (important for ubidi_getLevels
()) */ |
| 1892 pBiDi->trailingWSStart=0; |
| 1893 break; |
| 1894 case UBIDI_RTL: |
| 1895 /* make sure paraLevel is odd */ |
| 1896 pBiDi->paraLevel|=1; |
| 1897 |
| 1898 /* all levels are implicitly at paraLevel (important for ubidi_getLevels
()) */ |
| 1899 pBiDi->trailingWSStart=0; |
| 1900 break; |
| 1901 default: |
| 1902 /* |
| 1903 * Choose the right implicit state table |
| 1904 */ |
| 1905 switch(pBiDi->reorderingMode) { |
| 1906 case UBIDI_REORDER_DEFAULT: |
| 1907 pBiDi->pImpTabPair=&impTab_DEFAULT; |
| 1908 break; |
| 1909 case UBIDI_REORDER_NUMBERS_SPECIAL: |
| 1910 pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL; |
| 1911 break; |
| 1912 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R: |
| 1913 pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R; |
| 1914 break; |
| 1915 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L: |
| 1916 pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L; |
| 1917 break; |
| 1918 case UBIDI_REORDER_INVERSE_LIKE_DIRECT: |
| 1919 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { |
| 1920 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS; |
| 1921 } else { |
| 1922 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT; |
| 1923 } |
| 1924 break; |
| 1925 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL: |
| 1926 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { |
| 1927 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARK
S; |
| 1928 } else { |
| 1929 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL; |
| 1930 } |
| 1931 break; |
| 1932 default: |
| 1933 /* we should never get here */ |
| 1934 U_ASSERT(FALSE); |
| 1935 break; |
| 1936 } |
| 1937 /* |
| 1938 * If there are no external levels specified and there |
| 1939 * are no significant explicit level codes in the text, |
| 1940 * then we can treat the entire paragraph as one run. |
| 1941 * Otherwise, we need to perform the following rules on runs of |
| 1942 * the text with the same embedding levels. (X10) |
| 1943 * "Significant" explicit level codes are ones that actually |
| 1944 * affect non-BN characters. |
| 1945 * Examples for "insignificant" ones are empty embeddings |
| 1946 * LRE-PDF, LRE-RLE-PDF-PDF, etc. |
| 1947 */ |
| 1948 if(embeddingLevels==NULL && pBiDi->paraCount<=1 && |
| 1949 !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) { |
| 1950 resolveImplicitLevels(pBiDi, 0, length, |
| 1951 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)), |
| 1952 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, lengt
h-1))); |
| 1953 } else { |
| 1954 /* sor, eor: start and end types of same-level-run */ |
| 1955 UBiDiLevel *levels=pBiDi->levels; |
| 1956 int32_t start, limit=0; |
| 1957 UBiDiLevel level, nextLevel; |
| 1958 DirProp sor, eor; |
| 1959 |
| 1960 /* determine the first sor and set eor to it because of the loop bod
y (sor=eor there) */ |
| 1961 level=GET_PARALEVEL(pBiDi, 0); |
| 1962 nextLevel=levels[0]; |
| 1963 if(level<nextLevel) { |
| 1964 eor=GET_LR_FROM_LEVEL(nextLevel); |
| 1965 } else { |
| 1966 eor=GET_LR_FROM_LEVEL(level); |
| 1967 } |
| 1968 |
| 1969 do { |
| 1970 /* determine start and limit of the run (end points just behind
the run) */ |
| 1971 |
| 1972 /* the values for this run's start are the same as for the previ
ous run's end */ |
| 1973 start=limit; |
| 1974 level=nextLevel; |
| 1975 if((start>0) && (NO_CONTEXT_RTL(pBiDi->dirProps[start-1])==B)) { |
| 1976 /* except if this is a new paragraph, then set sor = para le
vel */ |
| 1977 sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start)); |
| 1978 } else { |
| 1979 sor=eor; |
| 1980 } |
| 1981 |
| 1982 /* search for the limit of this run */ |
| 1983 while(++limit<length && levels[limit]==level) {} |
| 1984 |
| 1985 /* get the correct level of the next run */ |
| 1986 if(limit<length) { |
| 1987 nextLevel=levels[limit]; |
| 1988 } else { |
| 1989 nextLevel=GET_PARALEVEL(pBiDi, length-1); |
| 1990 } |
| 1991 |
| 1992 /* determine eor from max(level, nextLevel); sor is last run's e
or */ |
| 1993 if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRID
E)) { |
| 1994 eor=GET_LR_FROM_LEVEL(nextLevel); |
| 1995 } else { |
| 1996 eor=GET_LR_FROM_LEVEL(level); |
| 1997 } |
| 1998 |
| 1999 /* if the run consists of overridden directional types, then the
re |
| 2000 are no implicit types to be resolved */ |
| 2001 if(!(level&UBIDI_LEVEL_OVERRIDE)) { |
| 2002 resolveImplicitLevels(pBiDi, start, limit, sor, eor); |
| 2003 } else { |
| 2004 /* remove the UBIDI_LEVEL_OVERRIDE flags */ |
| 2005 do { |
| 2006 levels[start++]&=~UBIDI_LEVEL_OVERRIDE; |
| 2007 } while(start<limit); |
| 2008 } |
| 2009 } while(limit<length); |
| 2010 } |
| 2011 /* check if we got any memory shortage while adding insert points */ |
| 2012 if (U_FAILURE(pBiDi->insertPoints.errorCode)) |
| 2013 { |
| 2014 *pErrorCode=pBiDi->insertPoints.errorCode; |
| 2015 return; |
| 2016 } |
| 2017 /* reset the embedding levels for some non-graphic characters (L1), (X9)
*/ |
| 2018 adjustWSLevels(pBiDi); |
| 2019 break; |
| 2020 } |
| 2021 /* add RLM for inverse Bidi with contextual orientation resolving |
| 2022 * to RTL which would not round-trip otherwise |
| 2023 */ |
| 2024 if((pBiDi->defaultParaLevel>0) && |
| 2025 (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) && |
| 2026 ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) || |
| 2027 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) { |
| 2028 int32_t i, j, start, last; |
| 2029 DirProp dirProp; |
| 2030 for(i=0; i<pBiDi->paraCount; i++) { |
| 2031 last=pBiDi->paras[i]-1; |
| 2032 if((pBiDi->dirProps[last] & CONTEXT_RTL)==0) { |
| 2033 continue; /* LTR paragraph */ |
| 2034 } |
| 2035 start= i==0 ? 0 : pBiDi->paras[i - 1]; |
| 2036 for(j=last; j>=start; j--) { |
| 2037 dirProp=NO_CONTEXT_RTL(pBiDi->dirProps[j]); |
| 2038 if(dirProp==L) { |
| 2039 if(j<last) { |
| 2040 while(NO_CONTEXT_RTL(pBiDi->dirProps[last])==B) { |
| 2041 last--; |
| 2042 } |
| 2043 } |
| 2044 addPoint(pBiDi, last, RLM_BEFORE); |
| 2045 break; |
| 2046 } |
| 2047 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) { |
| 2048 break; |
| 2049 } |
| 2050 } |
| 2051 } |
| 2052 } |
| 2053 |
| 2054 if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { |
| 2055 pBiDi->resultLength -= pBiDi->controlCount; |
| 2056 } else { |
| 2057 pBiDi->resultLength += pBiDi->insertPoints.size; |
| 2058 } |
| 2059 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ |
| 2060 } |
| 2061 |
| 2062 U_CAPI void U_EXPORT2 |
| 2063 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) { |
| 2064 if(pBiDi!=NULL) { |
| 2065 pBiDi->orderParagraphsLTR=orderParagraphsLTR; |
| 2066 } |
| 2067 } |
| 2068 |
| 2069 U_CAPI UBool U_EXPORT2 |
| 2070 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) { |
| 2071 if(pBiDi!=NULL) { |
| 2072 return pBiDi->orderParagraphsLTR; |
| 2073 } else { |
| 2074 return FALSE; |
| 2075 } |
| 2076 } |
| 2077 |
| 2078 U_CAPI UBiDiDirection U_EXPORT2 |
| 2079 ubidi_getDirection(const UBiDi *pBiDi) { |
| 2080 if(IS_VALID_PARA_OR_LINE(pBiDi)) { |
| 2081 return pBiDi->direction; |
| 2082 } else { |
| 2083 return UBIDI_LTR; |
| 2084 } |
| 2085 } |
| 2086 |
| 2087 U_CAPI const UChar * U_EXPORT2 |
| 2088 ubidi_getText(const UBiDi *pBiDi) { |
| 2089 if(IS_VALID_PARA_OR_LINE(pBiDi)) { |
| 2090 return pBiDi->text; |
| 2091 } else { |
| 2092 return NULL; |
| 2093 } |
| 2094 } |
| 2095 |
| 2096 U_CAPI int32_t U_EXPORT2 |
| 2097 ubidi_getLength(const UBiDi *pBiDi) { |
| 2098 if(IS_VALID_PARA_OR_LINE(pBiDi)) { |
| 2099 return pBiDi->originalLength; |
| 2100 } else { |
| 2101 return 0; |
| 2102 } |
| 2103 } |
| 2104 |
| 2105 U_CAPI int32_t U_EXPORT2 |
| 2106 ubidi_getProcessedLength(const UBiDi *pBiDi) { |
| 2107 if(IS_VALID_PARA_OR_LINE(pBiDi)) { |
| 2108 return pBiDi->length; |
| 2109 } else { |
| 2110 return 0; |
| 2111 } |
| 2112 } |
| 2113 |
| 2114 U_CAPI int32_t U_EXPORT2 |
| 2115 ubidi_getResultLength(const UBiDi *pBiDi) { |
| 2116 if(IS_VALID_PARA_OR_LINE(pBiDi)) { |
| 2117 return pBiDi->resultLength; |
| 2118 } else { |
| 2119 return 0; |
| 2120 } |
| 2121 } |
| 2122 |
| 2123 /* paragraphs API functions ------------------------------------------------- */ |
| 2124 |
| 2125 U_CAPI UBiDiLevel U_EXPORT2 |
| 2126 ubidi_getParaLevel(const UBiDi *pBiDi) { |
| 2127 if(IS_VALID_PARA_OR_LINE(pBiDi)) { |
| 2128 return pBiDi->paraLevel; |
| 2129 } else { |
| 2130 return 0; |
| 2131 } |
| 2132 } |
| 2133 |
| 2134 U_CAPI int32_t U_EXPORT2 |
| 2135 ubidi_countParagraphs(UBiDi *pBiDi) { |
| 2136 if(!IS_VALID_PARA_OR_LINE(pBiDi)) { |
| 2137 return 0; |
| 2138 } else { |
| 2139 return pBiDi->paraCount; |
| 2140 } |
| 2141 } |
| 2142 |
| 2143 U_CAPI void U_EXPORT2 |
| 2144 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, |
| 2145 int32_t *pParaStart, int32_t *pParaLimit, |
| 2146 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { |
| 2147 int32_t paraStart; |
| 2148 |
| 2149 /* check the argument values */ |
| 2150 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); |
| 2151 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode); |
| 2152 RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode); |
| 2153 |
| 2154 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ |
| 2155 if(paraIndex) { |
| 2156 paraStart=pBiDi->paras[paraIndex-1]; |
| 2157 } else { |
| 2158 paraStart=0; |
| 2159 } |
| 2160 if(pParaStart!=NULL) { |
| 2161 *pParaStart=paraStart; |
| 2162 } |
| 2163 if(pParaLimit!=NULL) { |
| 2164 *pParaLimit=pBiDi->paras[paraIndex]; |
| 2165 } |
| 2166 if(pParaLevel!=NULL) { |
| 2167 *pParaLevel=GET_PARALEVEL(pBiDi, paraStart); |
| 2168 } |
| 2169 } |
| 2170 |
| 2171 U_CAPI int32_t U_EXPORT2 |
| 2172 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, |
| 2173 int32_t *pParaStart, int32_t *pParaLimit, |
| 2174 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { |
| 2175 uint32_t paraIndex; |
| 2176 |
| 2177 /* check the argument values */ |
| 2178 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */ |
| 2179 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); |
| 2180 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); |
| 2181 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ |
| 2182 RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1); |
| 2183 |
| 2184 for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex]; paraIndex++); |
| 2185 ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLev
el, pErrorCode); |
| 2186 return paraIndex; |
| 2187 } |
| 2188 |
| 2189 U_CAPI void U_EXPORT2 |
| 2190 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, |
| 2191 const void *newContext, UBiDiClassCallback **oldFn, |
| 2192 const void **oldContext, UErrorCode *pErrorCode) |
| 2193 { |
| 2194 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); |
| 2195 if(pBiDi==NULL) { |
| 2196 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 2197 return; |
| 2198 } |
| 2199 if( oldFn ) |
| 2200 { |
| 2201 *oldFn = pBiDi->fnClassCallback; |
| 2202 } |
| 2203 if( oldContext ) |
| 2204 { |
| 2205 *oldContext = pBiDi->coClassCallback; |
| 2206 } |
| 2207 pBiDi->fnClassCallback = newFn; |
| 2208 pBiDi->coClassCallback = newContext; |
| 2209 } |
| 2210 |
| 2211 U_CAPI void U_EXPORT2 |
| 2212 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **conte
xt) |
| 2213 { |
| 2214 if(pBiDi==NULL) { |
| 2215 return; |
| 2216 } |
| 2217 if( fn ) |
| 2218 { |
| 2219 *fn = pBiDi->fnClassCallback; |
| 2220 } |
| 2221 if( context ) |
| 2222 { |
| 2223 *context = pBiDi->coClassCallback; |
| 2224 } |
| 2225 } |
| 2226 |
| 2227 U_CAPI UCharDirection U_EXPORT2 |
| 2228 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c) |
| 2229 { |
| 2230 UCharDirection dir; |
| 2231 |
| 2232 if( pBiDi->fnClassCallback == NULL || |
| 2233 (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_C
LASS_DEFAULT ) |
| 2234 { |
| 2235 return ubidi_getClass(pBiDi->bdp, c); |
| 2236 } else { |
| 2237 return dir; |
| 2238 } |
| 2239 } |
| 2240 |
OLD | NEW |